Merge branch 'ClickHouse:master' into variant_inference

This commit is contained in:
Shaun Struwig 2024-05-28 09:20:14 +02:00 committed by GitHub
commit 97ea093073
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
260 changed files with 9109 additions and 10419 deletions

View File

@ -136,7 +136,7 @@ jobs:
MarkReleaseReady:
if: ${{ !failure() && !cancelled() }}
needs: [RunConfig, Builds_1]
needs: [RunConfig, Builds_1, Builds_2]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Debug

View File

@ -33,6 +33,10 @@ name: Build ClickHouse
additional_envs:
description: additional ENV variables to setup the job
type: string
secrets:
secret_envs:
description: if given, it's passed to the environments
required: false
jobs:
Build:
@ -54,6 +58,7 @@ jobs:
run: |
cat >> "$GITHUB_ENV" << 'EOF'
${{inputs.additional_envs}}
${{secrets.secret_envs}}
DOCKER_TAG<<DOCKER_JSON
${{ toJson(fromJson(inputs.data).docker_data.images) }}
DOCKER_JSON

View File

@ -13,6 +13,10 @@ name: BuildStageWF
description: ci data
type: string
required: true
secrets:
secret_envs:
description: if given, it's passed to the environments
required: false
jobs:
s:
@ -30,3 +34,5 @@ jobs:
# for now let's do I deep checkout for builds
checkout_depth: 0
data: ${{ inputs.data }}
secrets:
secret_envs: ${{ secrets.secret_envs }}

View File

@ -10,6 +10,10 @@ name: StageWF
description: ci data
type: string
required: true
secrets:
secret_envs:
description: if given, it's passed to the environments
required: false
jobs:
s:
@ -23,3 +27,5 @@ jobs:
test_name: ${{ matrix.job_name_and_runner_type.job_name }}
runner_type: ${{ matrix.job_name_and_runner_type.runner_type }}
data: ${{ inputs.data }}
secrets:
secret_envs: ${{ secrets.secret_envs }}

View File

@ -3665,6 +3665,26 @@ Possible values:
Default value: `0`.
## s3_ignore_file_doesnt_exist {#s3_ignore_file_doesnt_exist}
Ignore absence of file if it does not exist when reading certain keys.
Possible values:
- 1 — `SELECT` returns empty result.
- 0 — `SELECT` throws an exception.
Default value: `0`.
## s3_validate_request_settings {#s3_validate_request_settings}
Enables s3 request settings validation.
Possible values:
- 1 — validate settings.
- 0 — do not validate settings.
Default value: `1`.
## hdfs_truncate_on_insert {#hdfs_truncate_on_insert}
Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists.
@ -3697,6 +3717,56 @@ Possible values:
Default value: `0`.
## hdfs_throw_on_zero_files_match {#hdfs_throw_on_zero_files_match}
Throw an error if matched zero files according to glob expansion rules.
Possible values:
- 1 — `SELECT` throws an exception.
- 0 — `SELECT` returns empty result.
Default value: `0`.
## hdfs_ignore_file_doesnt_exist {#hdfs_ignore_file_doesnt_exist}
Ignore absence of file if it does not exist when reading certain keys.
Possible values:
- 1 — `SELECT` returns empty result.
- 0 — `SELECT` throws an exception.
Default value: `0`.
## azure_throw_on_zero_files_match {#azure_throw_on_zero_files_match}
Throw an error if matched zero files according to glob expansion rules.
Possible values:
- 1 — `SELECT` throws an exception.
- 0 — `SELECT` returns empty result.
Default value: `0`.
## azure_ignore_file_doesnt_exist {#azure_ignore_file_doesnt_exist}
Ignore absence of file if it does not exist when reading certain keys.
Possible values:
- 1 — `SELECT` returns empty result.
- 0 — `SELECT` throws an exception.
Default value: `0`.
## azure_skip_empty_files {#azure_skip_empty_files}
Enables or disables skipping empty files in S3 engine.
Possible values:
- 0 — `SELECT` throws an exception if empty file is not compatible with requested format.
- 1 — `SELECT` returns empty result for empty file.
Default value: `0`.
## engine_url_skip_empty_files {#engine_url_skip_empty_files}
Enables or disables skipping empty files in [URL](../../engines/table-engines/special/url.md) engine tables.

View File

@ -994,25 +994,681 @@ Result:
└─────────────────────────────────────────────┘
```
## reinterpretAsUInt(8\|16\|32\|64)
## reinterpretAsUInt8
## reinterpretAsInt(8\|16\|32\|64)
Performs byte reinterpretation by treating the input value as a value of type UInt8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
## reinterpretAsFloat(32\|64)
**Syntax**
```sql
reinterpretAsUInt8(x)
```
**Parameters**
- `x`: value to byte reinterpret as UInt8. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Reinterpreted value `x` as UInt8. [UInt8](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256).
**Example**
Query:
```sql
SELECT
toInt8(257) AS x,
toTypeName(x),
reinterpretAsUInt8(x) AS res,
toTypeName(res);
```
Result:
```response
┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
│ 1 │ Int8 │ 1 │ UInt8 │
└───┴───────────────┴─────┴─────────────────┘
```
## reinterpretAsUInt16
Performs byte reinterpretation by treating the input value as a value of type UInt16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
```sql
reinterpretAsUInt16(x)
```
**Parameters**
- `x`: value to byte reinterpret as UInt16. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Reinterpreted value `x` as UInt16. [UInt16](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256).
**Example**
Query:
```sql
SELECT
toUInt8(257) AS x,
toTypeName(x),
reinterpretAsUInt16(x) AS res,
toTypeName(res);
```
Result:
```response
┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
│ 1 │ UInt8 │ 1 │ UInt16 │
└───┴───────────────┴─────┴─────────────────┘
```
## reinterpretAsUInt32
Performs byte reinterpretation by treating the input value as a value of type UInt32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
```sql
reinterpretAsUInt32(x)
```
**Parameters**
- `x`: value to byte reinterpret as UInt32. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Reinterpreted value `x` as UInt32. [UInt32](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256).
**Example**
Query:
```sql
SELECT
toUInt16(257) AS x,
toTypeName(x),
reinterpretAsUInt32(x) AS res,
toTypeName(res)
```
Result:
```response
┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
│ 257 │ UInt16 │ 257 │ UInt32 │
└─────┴───────────────┴─────┴─────────────────┘
```
## reinterpretAsUInt64
Performs byte reinterpretation by treating the input value as a value of type UInt64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
```sql
reinterpretAsUInt64(x)
```
**Parameters**
- `x`: value to byte reinterpret as UInt64. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Reinterpreted value `x` as UInt64. [UInt64](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256).
**Example**
Query:
```sql
SELECT
toUInt32(257) AS x,
toTypeName(x),
reinterpretAsUInt64(x) AS res,
toTypeName(res)
```
Result:
```response
┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
│ 257 │ UInt32 │ 257 │ UInt64 │
└─────┴───────────────┴─────┴─────────────────┘
```
## reinterpretAsUInt128
Performs byte reinterpretation by treating the input value as a value of type UInt128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
```sql
reinterpretAsUInt128(x)
```
**Parameters**
- `x`: value to byte reinterpret as UInt128. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Reinterpreted value `x` as UInt128. [UInt128](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256).
**Example**
Query:
```sql
SELECT
toUInt64(257) AS x,
toTypeName(x),
reinterpretAsUInt128(x) AS res,
toTypeName(res)
```
Result:
```response
┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
│ 257 │ UInt64 │ 257 │ UInt128 │
└─────┴───────────────┴─────┴─────────────────┘
```
## reinterpretAsUInt256
Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
```sql
reinterpretAsUInt256(x)
```
**Parameters**
- `x`: value to byte reinterpret as UInt256. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Reinterpreted value `x` as UInt256. [UInt256](../data-types/int-uint.md/#uint8-uint16-uint32-uint64-uint128-uint256-int8-int16-int32-int64-int128-int256).
**Example**
Query:
```sql
SELECT
toUInt128(257) AS x,
toTypeName(x),
reinterpretAsUInt256(x) AS res,
toTypeName(res)
```
Result:
```response
┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
│ 257 │ UInt128 │ 257 │ UInt256 │
└─────┴───────────────┴─────┴─────────────────┘
```
## reinterpretAsInt8
Performs byte reinterpretation by treating the input value as a value of type Int8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
```sql
reinterpretAsInt8(x)
```
**Parameters**
- `x`: value to byte reinterpret as Int8. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Reinterpreted value `x` as Int8. [Int8](../data-types/int-uint.md/#int-ranges).
**Example**
Query:
```sql
SELECT
toUInt8(257) AS x,
toTypeName(x),
reinterpretAsInt8(x) AS res,
toTypeName(res);
```
Result:
```response
┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
│ 1 │ UInt8 │ 1 │ Int8 │
└───┴───────────────┴─────┴─────────────────┘
```
## reinterpretAsInt16
Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
```sql
reinterpretAsInt16(x)
```
**Parameters**
- `x`: value to byte reinterpret as Int16. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Reinterpreted value `x` as Int16. [Int16](../data-types/int-uint.md/#int-ranges).
**Example**
Query:
```sql
SELECT
toInt8(257) AS x,
toTypeName(x),
reinterpretAsInt16(x) AS res,
toTypeName(res);
```
Result:
```response
┌─x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
│ 1 │ Int8 │ 1 │ Int16 │
└───┴───────────────┴─────┴─────────────────┘
```
## reinterpretAsInt32
Performs byte reinterpretation by treating the input value as a value of type Int32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
```sql
reinterpretAsInt32(x)
```
**Parameters**
- `x`: value to byte reinterpret as Int32. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Reinterpreted value `x` as Int32. [Int32](../data-types/int-uint.md/#int-ranges).
**Example**
Query:
```sql
SELECT
toInt16(257) AS x,
toTypeName(x),
reinterpretAsInt32(x) AS res,
toTypeName(res);
```
Result:
```response
┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
│ 257 │ Int16 │ 257 │ Int32 │
└─────┴───────────────┴─────┴─────────────────┘
```
## reinterpretAsInt64
Performs byte reinterpretation by treating the input value as a value of type Int64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
```sql
reinterpretAsInt64(x)
```
**Parameters**
- `x`: value to byte reinterpret as Int64. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Reinterpreted value `x` as Int64. [Int64](../data-types/int-uint.md/#int-ranges).
**Example**
Query:
```sql
SELECT
toInt32(257) AS x,
toTypeName(x),
reinterpretAsInt64(x) AS res,
toTypeName(res);
```
Result:
```response
┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
│ 257 │ Int32 │ 257 │ Int64 │
└─────┴───────────────┴─────┴─────────────────┘
```
## reinterpretAsInt128
Performs byte reinterpretation by treating the input value as a value of type Int128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
```sql
reinterpretAsInt128(x)
```
**Parameters**
- `x`: value to byte reinterpret as Int128. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Reinterpreted value `x` as Int128. [Int128](../data-types/int-uint.md/#int-ranges).
**Example**
Query:
```sql
SELECT
toInt64(257) AS x,
toTypeName(x),
reinterpretAsInt128(x) AS res,
toTypeName(res);
```
Result:
```response
┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
│ 257 │ Int64 │ 257 │ Int128 │
└─────┴───────────────┴─────┴─────────────────┘
```
## reinterpretAsInt256
Performs byte reinterpretation by treating the input value as a value of type Int256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
```sql
reinterpretAsInt256(x)
```
**Parameters**
- `x`: value to byte reinterpret as Int256. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Reinterpreted value `x` as Int256. [Int256](../data-types/int-uint.md/#int-ranges).
**Example**
Query:
```sql
SELECT
toInt128(257) AS x,
toTypeName(x),
reinterpretAsInt256(x) AS res,
toTypeName(res);
```
Result:
```response
┌───x─┬─toTypeName(x)─┬─res─┬─toTypeName(res)─┐
│ 257 │ Int128 │ 257 │ Int256 │
└─────┴───────────────┴─────┴─────────────────┘
```
## reinterpretAsFloat32
Performs byte reinterpretation by treating the input value as a value of type Float32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
```sql
reinterpretAsFloat32(x)
```
**Parameters**
- `x`: value to reinterpret as Float32. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Reinterpreted value `x` as Float32. [Float32](../data-types/float.md).
**Example**
Query:
```sql
SELECT reinterpretAsUInt32(toFloat32(0.2)) as x, reinterpretAsFloat32(x);
```
Result:
```response
┌──────────x─┬─reinterpretAsFloat32(x)─┐
│ 1045220557 │ 0.2 │
└────────────┴─────────────────────────┘
```
## reinterpretAsFloat64
Performs byte reinterpretation by treating the input value as a value of type Float64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax**
```sql
reinterpretAsFloat64(x)
```
**Parameters**
- `x`: value to reinterpret as Float64. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Reinterpreted value `x` as Float64. [Float64](../data-types/float.md).
**Example**
Query:
```sql
SELECT reinterpretAsUInt64(toFloat64(0.2)) as x, reinterpretAsFloat64(x);
```
Result:
```response
┌───────────────────x─┬─reinterpretAsFloat64(x)─┐
│ 4596373779694328218 │ 0.2 │
└─────────────────────┴─────────────────────────┘
```
## reinterpretAsDate
Accepts a string, fixed string or numeric value and interprets the bytes as a number in host order (little endian). It returns a date from the interpreted number as the number of days since the beginning of the Unix Epoch.
**Syntax**
```sql
reinterpretAsDate(x)
```
**Parameters**
- `x`: number of days since the beginning of the Unix Epoch. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Date. [Date](../data-types/date.md).
**Implementation details**
:::note
If the provided string isnt long enough, the function works as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored.
:::
**Example**
Query:
```sql
SELECT reinterpretAsDate(65), reinterpretAsDate('A');
```
Result:
```response
┌─reinterpretAsDate(65)─┬─reinterpretAsDate('A')─┐
│ 1970-03-07 │ 1970-03-07 │
└───────────────────────┴────────────────────────┘
```
## reinterpretAsDateTime
These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isnt long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch.
These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). Returns a date with time interpreted as the number of seconds since the beginning of the Unix Epoch.
**Syntax**
```sql
reinterpretAsDateTime(x)
```
**Parameters**
- `x`: number of seconds since the beginning of the Unix Epoch. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md), [UUID](../data-types/uuid.md), [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned value**
- Date and Time. [DateTime](../data-types/datetime.md).
**Implementation details**
:::note
If the provided string isnt long enough, the function works as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored.
:::
**Example**
Query:
```sql
SELECT reinterpretAsDateTime(65), reinterpretAsDateTime('A');
```
Result:
```response
┌─reinterpretAsDateTime(65)─┬─reinterpretAsDateTime('A')─┐
│ 1970-01-01 01:01:05 │ 1970-01-01 01:01:05 │
└───────────────────────────┴────────────────────────────┘
```
## reinterpretAsString
This function accepts a number or date or date with time and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
This function accepts a number, date or date with time and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
**Syntax**
```sql
reinterpretAsString(x)
```
**Parameters**
- `x`: value to reinterpret to string. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md).
**Returned value**
- String containing bytes representing `x`. [String](../data-types/fixedstring.md).
**Example**
Query:
```sql
SELECT
reinterpretAsString(toDateTime('1970-01-01 01:01:05')),
reinterpretAsString(toDate('1970-03-07'));
```
Result:
```response
┌─reinterpretAsString(toDateTime('1970-01-01 01:01:05'))─┬─reinterpretAsString(toDate('1970-03-07'))─┐
│ A │ A │
└────────────────────────────────────────────────────────┴───────────────────────────────────────────┘
```
## reinterpretAsFixedString
This function accepts a number or date or date with time and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long.
This function accepts a number, date or date with time and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long.
**Syntax**
```sql
reinterpretAsFixedString(x)
```
**Parameters**
- `x`: value to reinterpret to string. [(U)Int*](../data-types/int-uint.md), [Float](../data-types/float.md), [Date](../data-types/date.md), [DateTime](../data-types/datetime.md).
**Returned value**
- Fixed string containing bytes representing `x`. [FixedString](../data-types/fixedstring.md).
**Example**
Query:
```sql
SELECT
reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05')),
reinterpretAsFixedString(toDate('1970-03-07'));
```
Result:
```response
┌─reinterpretAsFixedString(toDateTime('1970-01-01 01:01:05'))─┬─reinterpretAsFixedString(toDate('1970-03-07'))─┐
│ A │ A │
└─────────────────────────────────────────────────────────────┴────────────────────────────────────────────────┘
```
## reinterpretAsUUID
@ -1020,7 +1676,7 @@ This function accepts a number or date or date with time and returns a FixedStri
In addition to the UUID functions listed here, there is dedicated [UUID function documentation](../functions/uuid-functions.md).
:::
Accepts 16 bytes string and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string is longer than 16 bytes, the extra bytes at the end are ignored.
Accepts a 16 byte string and returns a UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the function works as if the string is padded with the necessary number of null bytes to the end. If the string is longer than 16 bytes, the extra bytes at the end are ignored.
**Syntax**
@ -1975,143 +2631,3 @@ Result:
│ 2,"good" │
└───────────────────────────────────────────┘
```
## snowflakeToDateTime
Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](../data-types/datetime.md) format.
**Syntax**
``` sql
snowflakeToDateTime(value[, time_zone])
```
**Arguments**
- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
**Returned value**
- The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value.
**Example**
Query:
``` sql
SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC');
```
Result:
```response
┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐
│ 2021-08-15 10:57:56 │
└──────────────────────────────────────────────────────────────────┘
```
## snowflakeToDateTime64
Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](../data-types/datetime64.md) format.
**Syntax**
``` sql
snowflakeToDateTime64(value[, time_zone])
```
**Arguments**
- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
**Returned value**
- The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision.
**Example**
Query:
``` sql
SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC');
```
Result:
```response
┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐
│ 2021-08-15 10:58:19.841 │
└────────────────────────────────────────────────────────────────────┘
```
## dateTimeToSnowflake
Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
**Syntax**
``` sql
dateTimeToSnowflake(value)
```
**Arguments**
- `value` — Date with time. [DateTime](../data-types/datetime.md).
**Returned value**
- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.
**Example**
Query:
``` sql
WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt);
```
Result:
```response
┌─dateTimeToSnowflake(dt)─┐
│ 1426860702823350272 │
└─────────────────────────┘
```
## dateTime64ToSnowflake
Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
**Syntax**
``` sql
dateTime64ToSnowflake(value)
```
**Arguments**
- `value` — Date with time. [DateTime64](../data-types/datetime64.md).
**Returned value**
- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.
**Example**
Query:
``` sql
WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64);
```
Result:
```response
┌─dateTime64ToSnowflake(dt64)─┐
│ 1426860704886947840 │
└─────────────────────────────┘
```

View File

@ -668,7 +668,7 @@ Result:
└──────────────────────────────────────────────────────────────────────────────────────┘
```
## serverUUID()
## serverUUID
Returns the random UUID generated during the first start of the ClickHouse server. The UUID is stored in file `uuid` in the ClickHouse server directory (e.g. `/var/lib/clickhouse/`) and retained between server restarts.
@ -682,6 +682,275 @@ serverUUID()
- The UUID of the server. [UUID](../data-types/uuid.md).
## generateSnowflakeID
Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID).
The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond.
For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes.
In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0.
Function `generateSnowflakeID` guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.
```
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
|0| timestamp |
├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
| | machine_id | machine_seq_num |
└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘
```
**Syntax**
``` sql
generateSnowflakeID([expr])
```
**Arguments**
- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional.
**Returned value**
A value of type UInt64.
**Example**
First, create a table with a column of type UInt64, then insert a generated Snowflake ID into the table.
``` sql
CREATE TABLE tab (id UInt64) ENGINE = Memory;
INSERT INTO tab SELECT generateSnowflakeID();
SELECT * FROM tab;
```
Result:
```response
┌──────────────────id─┐
│ 7199081390080409600 │
└─────────────────────┘
```
**Example with multiple Snowflake IDs generated per row**
```sql
SELECT generateSnowflakeID(1), generateSnowflakeID(2);
┌─generateSnowflakeID(1)─┬─generateSnowflakeID(2)─┐
│ 7199081609652224000 │ 7199081609652224001 │
└────────────────────────┴────────────────────────┘
```
## generateSnowflakeIDThreadMonotonic
Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID).
The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond.
For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes.
In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0.
This function behaves like `generateSnowflakeID` but gives no guarantee on counter monotony across different simultaneous requests.
Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.
```
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
|0| timestamp |
├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
| | machine_id | machine_seq_num |
└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘
```
**Syntax**
``` sql
generateSnowflakeIDThreadMonotonic([expr])
```
**Arguments**
- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional.
**Returned value**
A value of type UInt64.
**Example**
First, create a table with a column of type UInt64, then insert a generated Snowflake ID into the table.
``` sql
CREATE TABLE tab (id UInt64) ENGINE = Memory;
INSERT INTO tab SELECT generateSnowflakeIDThreadMonotonic();
SELECT * FROM tab;
```
Result:
```response
┌──────────────────id─┐
│ 7199082832006627328 │
└─────────────────────┘
```
**Example with multiple Snowflake IDs generated per row**
```sql
SELECT generateSnowflakeIDThreadMonotonic(1), generateSnowflakeIDThreadMonotonic(2);
┌─generateSnowflakeIDThreadMonotonic(1)─┬─generateSnowflakeIDThreadMonotonic(2)─┐
│ 7199082940311945216 │ 7199082940316139520 │
└───────────────────────────────────────┴───────────────────────────────────────┘
```
## snowflakeToDateTime
Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](../data-types/datetime.md) format.
**Syntax**
``` sql
snowflakeToDateTime(value[, time_zone])
```
**Arguments**
- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
**Returned value**
- The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value.
**Example**
Query:
``` sql
SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC');
```
Result:
```response
┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐
│ 2021-08-15 10:57:56 │
└──────────────────────────────────────────────────────────────────┘
```
## snowflakeToDateTime64
Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](../data-types/datetime64.md) format.
**Syntax**
``` sql
snowflakeToDateTime64(value[, time_zone])
```
**Arguments**
- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
**Returned value**
- The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision.
**Example**
Query:
``` sql
SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC');
```
Result:
```response
┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐
│ 2021-08-15 10:58:19.841 │
└────────────────────────────────────────────────────────────────────┘
```
## dateTimeToSnowflake
Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
**Syntax**
``` sql
dateTimeToSnowflake(value)
```
**Arguments**
- `value` — Date with time. [DateTime](../data-types/datetime.md).
**Returned value**
- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.
**Example**
Query:
``` sql
WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt);
```
Result:
```response
┌─dateTimeToSnowflake(dt)─┐
│ 1426860702823350272 │
└─────────────────────────┘
```
## dateTime64ToSnowflake
Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
**Syntax**
``` sql
dateTime64ToSnowflake(value)
```
**Arguments**
- `value` — Date with time. [DateTime64](../data-types/datetime64.md).
**Returned value**
- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.
**Example**
Query:
``` sql
WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64);
```
Result:
```response
┌─dateTime64ToSnowflake(dt64)─┐
│ 1426860704886947840 │
└─────────────────────────────┘
```
## See also
- [dictGetUUID](../functions/ext-dict-functions.md#ext_dict_functions-other)

View File

@ -753,10 +753,11 @@ size_t getMaxArraySize()
return 0xFFFFFF;
}
bool hasLimitArraySize()
bool discardOnLimitReached()
{
if (auto context = Context::getGlobalContextInstance())
return context->getServerSettings().aggregate_function_group_array_has_limit_size;
return context->getServerSettings().aggregate_function_group_array_action_when_limit_is_reached
== GroupArrayActionWhenLimitReached::DISCARD;
return false;
}
@ -767,7 +768,7 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
{
assertUnary(name, argument_types);
bool limit_size = hasLimitArraySize();
bool has_limit = discardOnLimitReached();
UInt64 max_elems = getMaxArraySize();
if (parameters.empty())
@ -784,14 +785,14 @@ AggregateFunctionPtr createAggregateFunctionGroupArray(
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive number", name);
limit_size = true;
has_limit = true;
max_elems = parameters[0].get<UInt64>();
}
else
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Incorrect number of parameters for aggregate function {}, should be 0 or 1", name);
if (!limit_size)
if (!has_limit)
{
if (Tlast)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "groupArrayLast make sense only with max_elems (groupArrayLast(max_elems)())");

View File

@ -24,6 +24,9 @@ void ArrayJoinNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_stat
buffer << std::string(indent, ' ') << "ARRAY_JOIN id: " << format_state.getNodeId(this);
buffer << ", is_left: " << is_left;
if (hasAlias())
buffer << ", alias: " << getAlias();
buffer << '\n' << std::string(indent + 2, ' ') << "TABLE EXPRESSION\n";
getTableExpression()->dumpTreeImpl(buffer, format_state, indent + 4);

View File

@ -99,6 +99,23 @@ bool checkIfGroupAlwaysTrueGraph(const Analyzer::CNF::OrGroup & group, const Com
return false;
}
bool checkIfGroupAlwaysTrueAtoms(const Analyzer::CNF::OrGroup & group)
{
/// Filters out groups containing mutually exclusive atoms,
/// since these groups are always True
for (const auto & atom : group)
{
auto negated(atom);
negated.negative = !atom.negative;
if (group.contains(negated))
{
return true;
}
}
return false;
}
bool checkIfAtomAlwaysFalseFullMatch(const Analyzer::CNF::AtomicFormula & atom, const ConstraintsDescription::QueryTreeData & query_tree_constraints)
{
const auto constraint_atom_ids = query_tree_constraints.getAtomIds(atom.node_with_hash);
@ -644,7 +661,8 @@ void optimizeWithConstraints(Analyzer::CNF & cnf, const QueryTreeNodes & table_e
cnf.filterAlwaysTrueGroups([&](const auto & group)
{
/// remove always true groups from CNF
return !checkIfGroupAlwaysTrueFullMatch(group, query_tree_constraints) && !checkIfGroupAlwaysTrueGraph(group, compare_graph);
return !checkIfGroupAlwaysTrueFullMatch(group, query_tree_constraints)
&& !checkIfGroupAlwaysTrueGraph(group, compare_graph) && !checkIfGroupAlwaysTrueAtoms(group);
})
.filterAlwaysFalseAtoms([&](const Analyzer::CNF::AtomicFormula & atom)
{

View File

@ -607,6 +607,10 @@ struct ScopeAliases
std::unordered_set<QueryTreeNodePtr> nodes_with_duplicated_aliases;
std::vector<QueryTreeNodePtr> cloned_nodes_with_duplicated_aliases;
/// Names which are aliases from ARRAY JOIN.
/// This is needed to properly qualify columns from matchers and avoid name collision.
std::unordered_set<std::string> array_join_aliases;
std::unordered_map<std::string, QueryTreeNodePtr> & getAliasMap(IdentifierLookupContext lookup_context)
{
switch (lookup_context)
@ -1526,7 +1530,7 @@ private:
ProjectionNames resolveFunction(QueryTreeNodePtr & function_node, IdentifierResolveScope & scope);
ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression);
ProjectionNames resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias = false);
ProjectionNames resolveExpressionNodeList(QueryTreeNodePtr & node_list, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression);
@ -2858,7 +2862,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromExpressionArguments(cons
bool QueryAnalyzer::tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope)
{
return scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME) != nullptr;
return scope.aliases.find(identifier_lookup, ScopeAliases::FindOption::FIRST_NAME) != nullptr || scope.aliases.array_join_aliases.contains(identifier_lookup.identifier.front());
}
/** Resolve identifier from scope aliases.
@ -3889,12 +3893,39 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromArrayJoin(const Identifi
{
auto & array_join_column_expression_typed = array_join_column_expression->as<ColumnNode &>();
if (array_join_column_expression_typed.getAlias() == identifier_lookup.identifier.getFullName())
IdentifierView identifier_view(identifier_lookup.identifier);
if (identifier_view.isCompound() && from_array_join_node.hasAlias() && identifier_view.front() == from_array_join_node.getAlias())
identifier_view.popFirst();
const auto & alias_or_name = array_join_column_expression_typed.hasAlias()
? array_join_column_expression_typed.getAlias()
: array_join_column_expression_typed.getColumnName();
if (identifier_view.front() == alias_or_name)
identifier_view.popFirst();
else if (identifier_view.getFullName() == alias_or_name)
identifier_view.popFirst(identifier_view.getPartsSize()); /// Clear
else
continue;
if (identifier_view.empty())
{
auto array_join_column = std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(),
array_join_column_expression_typed.getColumnSource());
return array_join_column;
}
auto compound_expr = tryResolveIdentifierFromCompoundExpression(
identifier_lookup.identifier,
identifier_lookup.identifier.getPartsSize() - identifier_view.getPartsSize() /*identifier_bind_size*/,
array_join_column_expression,
{} /* compound_expression_source */,
scope,
true /* can_be_not_found */);
if (compound_expr)
return compound_expr;
}
if (!resolved_identifier)
@ -6284,7 +6315,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
*
* 4. If node has alias, update its value in scope alias map. Deregister alias from expression_aliases_in_resolve_process.
*/
ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression)
ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, IdentifierResolveScope & scope, bool allow_lambda_expression, bool allow_table_expression, bool ignore_alias)
{
checkStackSize();
@ -6334,7 +6365,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
* To support both (SELECT 1) AS expression in projection and (SELECT 1) as subquery in IN, do not use
* alias table because in alias table subquery could be evaluated as scalar.
*/
bool use_alias_table = true;
bool use_alias_table = !ignore_alias;
if (is_duplicated_alias || (allow_table_expression && isSubqueryNodeType(node->getNodeType())))
use_alias_table = false;
@ -6634,7 +6665,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
if (is_duplicated_alias)
scope.non_cached_identifier_lookups_during_expression_resolve.erase({Identifier{node_alias}, IdentifierLookupContext::EXPRESSION});
resolved_expressions.emplace(node, result_projection_names);
if (!ignore_alias)
resolved_expressions.emplace(node, result_projection_names);
scope.popExpressionNode();
bool expression_was_root = scope.expressions_in_resolve_process_stack.empty();
@ -7569,22 +7601,25 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
for (auto & array_join_expression : array_join_nodes)
{
auto array_join_expression_alias = array_join_expression->getAlias();
if (!array_join_expression_alias.empty() && scope.aliases.alias_name_to_expression_node->contains(array_join_expression_alias))
throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS,
"ARRAY JOIN expression {} with duplicate alias {}. In scope {}",
array_join_expression->formatASTForErrorMessage(),
array_join_expression_alias,
scope.scope_node->formatASTForErrorMessage());
/// Add array join expression into scope
expressions_visitor.visit(array_join_expression);
for (const auto & elem : array_join_nodes)
{
if (elem->hasAlias())
scope.aliases.array_join_aliases.insert(elem->getAlias());
for (auto & child : elem->getChildren())
{
if (child)
expressions_visitor.visit(child);
}
}
std::string identifier_full_name;
if (auto * identifier_node = array_join_expression->as<IdentifierNode>())
identifier_full_name = identifier_node->getIdentifier().getFullName();
resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
resolveExpressionNode(array_join_expression, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/, true /*ignore_alias*/);
auto process_array_join_expression = [&](QueryTreeNodePtr & expression)
{
@ -7651,27 +7686,7 @@ void QueryAnalyzer::resolveArrayJoin(QueryTreeNodePtr & array_join_node, Identif
}
}
/** Allow to resolve ARRAY JOIN columns from aliases with types after ARRAY JOIN only after ARRAY JOIN expression list is resolved, because
* during resolution of ARRAY JOIN expression list we must use column type before ARRAY JOIN.
*
* Example: SELECT id, value_element FROM test_table ARRAY JOIN [[1,2,3]] AS value_element, value_element AS value
* It is expected that `value_element AS value` expression inside ARRAY JOIN expression list will be
* resolved as `value_element` expression with type before ARRAY JOIN.
* And it is expected that `value_element` inside projection expression list will be resolved as `value_element` expression
* with type after ARRAY JOIN.
*/
array_join_nodes = std::move(array_join_column_expressions);
for (auto & array_join_column_expression : array_join_nodes)
{
auto it = scope.aliases.alias_name_to_expression_node->find(array_join_column_expression->getAlias());
if (it != scope.aliases.alias_name_to_expression_node->end())
{
auto & array_join_column_expression_typed = array_join_column_expression->as<ColumnNode &>();
auto array_join_column = std::make_shared<ColumnNode>(array_join_column_expression_typed.getColumn(),
array_join_column_expression_typed.getColumnSource());
it->second = std::move(array_join_column);
}
}
}
void QueryAnalyzer::checkDuplicateTableNamesOrAlias(const QueryTreeNodePtr & join_node, QueryTreeNodePtr & left_table_expr, QueryTreeNodePtr & right_table_expr, IdentifierResolveScope & scope)

View File

@ -6,7 +6,6 @@
#include <Interpreters/Context.h>
#include <IO/SharedThreadPools.h>
#include <IO/HTTPHeaderEntries.h>
#include <Storages/StorageAzureBlobCluster.h>
#include <Disks/IO/ReadBufferFromAzureBlobStorage.h>
#include <Disks/IO/WriteBufferFromAzureBlobStorage.h>
#include <IO/AzureBlobStorage/copyAzureBlobStorageFile.h>
@ -30,7 +29,7 @@ namespace ErrorCodes
}
BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
StorageAzureBlob::Configuration configuration_,
const StorageAzureConfiguration & configuration_,
bool allow_azure_native_copy,
const ReadSettings & read_settings_,
const WriteSettings & write_settings_,
@ -39,15 +38,14 @@ BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage(
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false}
, configuration(configuration_)
{
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false);
auto client_ptr = configuration.createClient(/* is_readonly */false, /* attempt_to_create_container */true);
client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true});
object_storage = std::make_unique<AzureObjectStorage>(
"BackupReaderAzureBlobStorage",
std::move(client_ptr),
StorageAzureBlob::createSettings(context_),
configuration.container,
configuration.getConnectionURL().toString());
object_storage = std::make_unique<AzureObjectStorage>("BackupReaderAzureBlobStorage",
std::move(client_ptr),
configuration.createSettings(context_),
configuration_.container,
configuration.getConnectionURL().toString());
client = object_storage->getAzureBlobStorageClient();
auto settings_copy = *object_storage->getSettings();
@ -121,7 +119,7 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup,
BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
StorageAzureBlob::Configuration configuration_,
const StorageAzureConfiguration & configuration_,
bool allow_azure_native_copy,
const ReadSettings & read_settings_,
const WriteSettings & write_settings_,
@ -131,13 +129,13 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURL().toString(), false, false}
, configuration(configuration_)
{
auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false, attempt_to_create_container);
auto client_ptr = configuration.createClient(/* is_readonly */false, attempt_to_create_container);
client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true});
object_storage = std::make_unique<AzureObjectStorage>("BackupWriterAzureBlobStorage",
std::move(client_ptr),
StorageAzureBlob::createSettings(context_),
configuration_.container,
configuration.createSettings(context_),
configuration.container,
configuration_.getConnectionURL().toString());
client = object_storage->getAzureBlobStorageClient();
auto settings_copy = *object_storage->getSettings();
@ -145,8 +143,13 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage(
settings = std::make_unique<const AzureObjectStorageSettings>(settings_copy);
}
void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length)
void BackupWriterAzureBlobStorage::copyFileFromDisk(
const String & path_in_backup,
DiskPtr src_disk,
const String & src_path,
bool copy_encrypted,
UInt64 start_pos,
UInt64 length)
{
/// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible.
auto source_data_source_description = src_disk->getDataSourceDescription();
@ -196,10 +199,16 @@ void BackupWriterAzureBlobStorage::copyFile(const String & destination, const St
threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
}
void BackupWriterAzureBlobStorage::copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length)
void BackupWriterAzureBlobStorage::copyDataToFile(
const String & path_in_backup,
const CreateReadBufferFunction & create_read_buffer,
UInt64 start_pos,
UInt64 length)
{
copyDataToAzureBlobStorageFile(create_read_buffer, start_pos, length, client, configuration.container, fs::path(configuration.blob_path) / path_in_backup, settings,
threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
copyDataToAzureBlobStorageFile(
create_read_buffer, start_pos, length, client, configuration.container,
fs::path(configuration.blob_path) / path_in_backup, settings,
threadPoolCallbackRunnerUnsafe<void>(getBackupsIOThreadPool().get(), "BackupWRAzure"));
}
BackupWriterAzureBlobStorage::~BackupWriterAzureBlobStorage() = default;
@ -217,7 +226,7 @@ UInt64 BackupWriterAzureBlobStorage::getFileSize(const String & file_name)
object_storage->listObjects(key,children,/*max_keys*/0);
if (children.empty())
throw Exception(ErrorCodes::AZURE_BLOB_STORAGE_ERROR, "Object must exist");
return children[0].metadata.size_bytes;
return children[0]->metadata->size_bytes;
}
std::unique_ptr<ReadBuffer> BackupWriterAzureBlobStorage::readFile(const String & file_name, size_t /*expected_file_size*/)

View File

@ -5,8 +5,8 @@
#if USE_AZURE_BLOB_STORAGE
#include <Backups/BackupIO_Default.h>
#include <Disks/DiskType.h>
#include <Storages/StorageAzureBlobCluster.h>
#include <Interpreters/Context_fwd.h>
#include <Storages/ObjectStorage/Azure/Configuration.h>
namespace DB
@ -17,24 +17,30 @@ class BackupReaderAzureBlobStorage : public BackupReaderDefault
{
public:
BackupReaderAzureBlobStorage(
StorageAzureBlob::Configuration configuration_,
const StorageAzureConfiguration & configuration_,
bool allow_azure_native_copy,
const ReadSettings & read_settings_,
const WriteSettings & write_settings_,
const ContextPtr & context_);
~BackupReaderAzureBlobStorage() override;
bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
std::unique_ptr<SeekableReadBuffer> readFile(const String & file_name) override;
void copyFileToDisk(const String & path_in_backup, size_t file_size, bool encrypted_in_backup,
DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) override;
void copyFileToDisk(
const String & path_in_backup,
size_t file_size,
bool encrypted_in_backup,
DiskPtr destination_disk,
const String & destination_path,
WriteMode write_mode) override;
private:
const DataSourceDescription data_source_description;
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
StorageAzureBlob::Configuration configuration;
StorageAzureConfiguration configuration;
std::unique_ptr<AzureObjectStorage> object_storage;
std::shared_ptr<const AzureObjectStorageSettings> settings;
};
@ -43,21 +49,32 @@ class BackupWriterAzureBlobStorage : public BackupWriterDefault
{
public:
BackupWriterAzureBlobStorage(
StorageAzureBlob::Configuration configuration_,
const StorageAzureConfiguration & configuration_,
bool allow_azure_native_copy,
const ReadSettings & read_settings_,
const WriteSettings & write_settings_,
const ContextPtr & context_,
bool attempt_to_create_container);
~BackupWriterAzureBlobStorage() override;
bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
std::unique_ptr<WriteBuffer> writeFile(const String & file_name) override;
void copyDataToFile(const String & path_in_backup, const CreateReadBufferFunction & create_read_buffer, UInt64 start_pos, UInt64 length) override;
void copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
bool copy_encrypted, UInt64 start_pos, UInt64 length) override;
void copyDataToFile(
const String & path_in_backup,
const CreateReadBufferFunction & create_read_buffer,
UInt64 start_pos,
UInt64 length) override;
void copyFileFromDisk(
const String & path_in_backup,
DiskPtr src_disk,
const String & src_path,
bool copy_encrypted,
UInt64 start_pos,
UInt64 length) override;
void copyFile(const String & destination, const String & source, size_t size) override;
@ -67,9 +84,10 @@ public:
private:
std::unique_ptr<ReadBuffer> readFile(const String & file_name, size_t expected_file_size) override;
void removeFilesBatch(const Strings & file_names);
const DataSourceDescription data_source_description;
std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient> client;
StorageAzureBlob::Configuration configuration;
StorageAzureConfiguration configuration;
std::unique_ptr<AzureObjectStorage> object_storage;
std::shared_ptr<const AzureObjectStorageSettings> settings;
};

View File

@ -131,10 +131,10 @@ BackupReaderS3::BackupReaderS3(
: BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderS3"))
, s3_uri(s3_uri_)
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false}
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup))
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup).value_or(S3Settings{}))
{
auto & request_settings = s3_settings.request_settings;
request_settings.updateFromSettings(context_->getSettingsRef());
request_settings.updateFromSettingsIfChanged(context_->getSettingsRef());
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
request_settings.allow_native_copy = allow_s3_native_copy;
client = makeS3Client(s3_uri_, access_key_id_, secret_access_key_, s3_settings, context_);
@ -222,10 +222,10 @@ BackupWriterS3::BackupWriterS3(
: BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterS3"))
, s3_uri(s3_uri_)
, data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false}
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup))
, s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup).value_or(S3Settings{}))
{
auto & request_settings = s3_settings.request_settings;
request_settings.updateFromSettings(context_->getSettingsRef());
request_settings.updateFromSettingsIfChanged(context_->getSettingsRef());
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
request_settings.allow_native_copy = allow_s3_native_copy;
request_settings.setStorageClassName(storage_class_name);

View File

@ -5,11 +5,11 @@
#if USE_AZURE_BLOB_STORAGE
#include <Backups/BackupIO_AzureBlobStorage.h>
#include <Storages/StorageAzureBlob.h>
#include <Backups/BackupImpl.h>
#include <IO/Archives/hasRegisteredArchiveFileExtension.h>
#include <Interpreters/Context.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Storages/ObjectStorage/Azure/Configuration.h>
#include <filesystem>
#endif
@ -49,7 +49,7 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
const String & id_arg = params.backup_info.id_arg;
const auto & args = params.backup_info.args;
StorageAzureBlob::Configuration configuration;
StorageAzureConfiguration configuration;
if (!id_arg.empty())
{
@ -81,10 +81,11 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
}
if (args.size() > 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]");
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Backup AzureBlobStorage requires 1 or 2 arguments: named_collection, [filename]");
if (args.size() == 1)
configuration.blob_path = args[0].safeGet<String>();
configuration.setPath(args[0].safeGet<String>());
}
else
@ -116,12 +117,16 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory)
}
BackupImpl::ArchiveParams archive_params;
if (hasRegisteredArchiveFileExtension(configuration.blob_path))
if (hasRegisteredArchiveFileExtension(configuration.getPath()))
{
if (params.is_internal_backup)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Using archives with backups on clusters is disabled");
archive_params.archive_name = removeFileNameFromURL(configuration.blob_path);
auto path = configuration.getPath();
auto filename = removeFileNameFromURL(path);
configuration.setPath(path);
archive_params.archive_name = filename;
archive_params.compression_method = params.compression_method;
archive_params.compression_level = params.compression_level;
archive_params.password = params.password;

View File

@ -115,8 +115,11 @@ if (TARGET ch_contrib::nats_io)
add_headers_and_sources(dbms Storages/NATS)
endif()
add_headers_and_sources(dbms Storages/DataLakes)
add_headers_and_sources(dbms Storages/DataLakes/Iceberg)
add_headers_and_sources(dbms Storages/ObjectStorage)
add_headers_and_sources(dbms Storages/ObjectStorage/Azure)
add_headers_and_sources(dbms Storages/ObjectStorage/S3)
add_headers_and_sources(dbms Storages/ObjectStorage/HDFS)
add_headers_and_sources(dbms Storages/ObjectStorage/DataLakes)
add_headers_and_sources(dbms Common/NamedCollections)
if (TARGET ch_contrib::amqp_cpp)
@ -144,7 +147,6 @@ if (TARGET ch_contrib::azure_sdk)
endif()
if (TARGET ch_contrib::hdfs)
add_headers_and_sources(dbms Storages/HDFS)
add_headers_and_sources(dbms Disks/ObjectStorages/HDFS)
endif()

View File

@ -168,6 +168,9 @@
M(ObjectStorageS3Threads, "Number of threads in the S3ObjectStorage thread pool.") \
M(ObjectStorageS3ThreadsActive, "Number of threads in the S3ObjectStorage thread pool running a task.") \
M(ObjectStorageS3ThreadsScheduled, "Number of queued or active jobs in the S3ObjectStorage thread pool.") \
M(StorageObjectStorageThreads, "Number of threads in the remote table engines thread pools.") \
M(StorageObjectStorageThreadsActive, "Number of threads in the remote table engines thread pool running a task.") \
M(StorageObjectStorageThreadsScheduled, "Number of queued or active jobs in remote table engines thread pool.") \
M(ObjectStorageAzureThreads, "Number of threads in the AzureObjectStorage thread pool.") \
M(ObjectStorageAzureThreadsActive, "Number of threads in the AzureObjectStorage thread pool running a task.") \
M(ObjectStorageAzureThreadsScheduled, "Number of queued or active jobs in the AzureObjectStorage thread pool.") \

View File

@ -5,6 +5,7 @@
#include <Common/ThreadPool.h>
#include <Common/callOnce.h>
#include <Disks/IO/IOUringReader.h>
#include <Storages/StorageS3Settings.h>
#include <Disks/IO/getIOUringReader.h>
#include <Core/ServerSettings.h>
@ -145,9 +146,10 @@ struct ContextSharedPart : boost::noncopyable
mutable ThrottlerPtr local_read_throttler; /// A server-wide throttler for local IO reads
mutable ThrottlerPtr local_write_throttler; /// A server-wide throttler for local IO writes
std::optional<StorageS3Settings> storage_s3_settings TSA_GUARDED_BY(mutex); /// Settings of S3 storage
mutable std::mutex keeper_dispatcher_mutex;
mutable std::shared_ptr<KeeperDispatcher> keeper_dispatcher TSA_GUARDED_BY(keeper_dispatcher_mutex);
};
ContextData::ContextData() = default;
@ -453,6 +455,19 @@ std::shared_ptr<zkutil::ZooKeeper> Context::getZooKeeper() const
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Cannot connect to ZooKeeper from Keeper");
}
const StorageS3Settings & Context::getStorageS3Settings() const
{
std::lock_guard lock(shared->mutex);
if (!shared->storage_s3_settings)
{
const auto & config = shared->config ? *shared->config : Poco::Util::Application::instance().config();
shared->storage_s3_settings.emplace().loadFromConfig("s3", config, getSettingsRef());
}
return *shared->storage_s3_settings;
}
const ServerSettings & Context::getServerSettings() const
{
return shared->server_settings;

View File

@ -37,6 +37,7 @@ class FilesystemCacheLog;
class FilesystemReadPrefetchesLog;
class BlobStorageLog;
class IOUringReader;
class StorageS3Settings;
/// A small class which owns ContextShared.
/// We don't use something like unique_ptr directly to allow ContextShared type to be incomplete.
@ -162,6 +163,10 @@ public:
zkutil::ZooKeeperPtr getZooKeeper() const;
const StorageS3Settings & getStorageS3Settings() const;
const String & getUserName() const { static std::string user; return user; }
const ServerSettings & getServerSettings() const;
bool hasTraceCollector() const;

View File

@ -3,6 +3,7 @@
#include <Core/BaseSettings.h>
#include <Core/Defines.h>
#include <Core/SettingsEnums.h>
namespace Poco::Util
@ -51,7 +52,7 @@ namespace DB
M(UInt64, max_temporary_data_on_disk_size, 0, "The maximum amount of storage that could be used for external aggregation, joins or sorting., ", 0) \
M(String, temporary_data_in_cache, "", "Cache disk name for temporary data.", 0) \
M(UInt64, aggregate_function_group_array_max_element_size, 0xFFFFFF, "Max array element size in bytes for groupArray function. This limit is checked at serialization and help to avoid large state size.", 0) \
M(Bool, aggregate_function_group_array_has_limit_size, false, "When the max array element size is exceeded, a `Too large array size` exception will be thrown by default. When set to true, no exception will be thrown, and the excess elements will be discarded.", 0) \
M(GroupArrayActionWhenLimitReached, aggregate_function_group_array_action_when_limit_is_reached, GroupArrayActionWhenLimitReached::THROW, "Action to execute when max array element size is exceeded in groupArray: `throw` exception, or `discard` extra values", 0) \
M(UInt64, max_server_memory_usage, 0, "Maximum total memory usage of the server in bytes. Zero means unlimited.", 0) \
M(Double, max_server_memory_usage_to_ram_ratio, 0.9, "Same as max_server_memory_usage but in to RAM ratio. Allows to lower max memory on low-memory systems.", 0) \
M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Maximum total memory usage for merges and mutations in bytes. Zero means unlimited.", 0) \

View File

@ -116,6 +116,12 @@ class IColumn;
M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
M(Bool, azure_allow_parallel_part_upload, true, "Use multiple threads for azure multipart upload.", 0) \
M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
M(Bool, hdfs_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
M(Bool, azure_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
M(Bool, s3_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in S3 table engine", 0) \
M(Bool, hdfs_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in HDFS table engine", 0) \
M(Bool, azure_ignore_file_doesnt_exist, false, "Return 0 rows when the requested files don't exist, instead of throwing an exception in AzureBlobStorage table engine", 0) \
M(Bool, s3_validate_request_settings, true, "Validate S3 request settings", 0) \
M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \
M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
@ -128,6 +134,7 @@ class IColumn;
M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \
M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \
M(Bool, hdfs_skip_empty_files, false, "Allow to skip empty files in hdfs table engine", 0) \
M(Bool, azure_skip_empty_files, false, "Allow to skip empty files in azure table engine", 0) \
M(UInt64, hsts_max_age, 0, "Expired time for hsts. 0 means disable HSTS.", 0) \
M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \

View File

@ -85,6 +85,14 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"24.6", {{"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"},
{"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"},
{"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"},
{"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"},
{"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
{"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
{"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
}},
{"24.5", {{"allow_deprecated_functions", true, false, "Allow usage of deprecated functions"},
{"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
{"input_format_tsv_crlf_end_of_line", false, false, "Enables reading of CRLF line endings with TSV formats"},

View File

@ -229,4 +229,9 @@ IMPLEMENT_SETTING_ENUM(SQLSecurityType, ErrorCodes::BAD_ARGUMENTS,
{{"DEFINER", SQLSecurityType::DEFINER},
{"INVOKER", SQLSecurityType::INVOKER},
{"NONE", SQLSecurityType::NONE}})
IMPLEMENT_SETTING_ENUM(
GroupArrayActionWhenLimitReached,
ErrorCodes::BAD_ARGUMENTS,
{{"throw", GroupArrayActionWhenLimitReached::THROW}, {"discard", GroupArrayActionWhenLimitReached::DISCARD}})
}

View File

@ -370,4 +370,12 @@ DECLARE_SETTING_ENUM(SchemaInferenceMode)
DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOverflowBehavior, FormatSettings::DateTimeOverflowBehavior)
DECLARE_SETTING_ENUM(SQLSecurityType)
enum class GroupArrayActionWhenLimitReached : uint8_t
{
THROW,
DISCARD
};
DECLARE_SETTING_ENUM(GroupArrayActionWhenLimitReached)
}

View File

@ -516,8 +516,14 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams(
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * /* cache */) const
SubstreamsCache * cache) const
{
if (auto cached_column = getFromSubstreamsCache(cache, settings.path))
{
column = cached_column;
return;
}
auto mutable_column = column->assumeMutable();
ColumnLowCardinality & low_cardinality_column = typeid_cast<ColumnLowCardinality &>(*mutable_column);
@ -671,6 +677,7 @@ void SerializationLowCardinality::deserializeBinaryBulkWithMultipleStreams(
}
column = std::move(mutable_column);
addToSubstreamsCache(cache, settings.path, column);
}
void SerializationLowCardinality::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const

View File

@ -11,7 +11,7 @@
#include <Parsers/ASTLiteral.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserCreateQuery.h>
#include <Storages/HDFS/HDFSCommon.h>
#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
#include <Storages/IStorage.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Common/re2.h>

View File

@ -13,7 +13,7 @@
#include <azure/core/io/body_stream.hpp>
#include <Common/ThreadPoolTaskTracker.h>
#include <Common/BufferAllocationPolicy.h>
#include <Storages/StorageAzureBlob.h>
#include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
namespace Poco
{

View File

@ -79,14 +79,14 @@ private:
for (const auto & blob : blobs_list)
{
batch.emplace_back(
batch.emplace_back(std::make_shared<RelativePathWithMetadata>(
blob.Name,
ObjectMetadata{
static_cast<uint64_t>(blob.BlobSize),
Poco::Timestamp::fromEpochTime(
std::chrono::duration_cast<std::chrono::seconds>(
static_cast<std::chrono::system_clock::time_point>(blob.Details.LastModified).time_since_epoch()).count()),
{}});
{}}));
}
if (!blob_list_response.NextPageToken.HasValue() || blob_list_response.NextPageToken.Value().empty())
@ -148,15 +148,15 @@ bool AzureObjectStorage::exists(const StoredObject & object) const
return false;
}
ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_prefix) const
ObjectStorageIteratorPtr AzureObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const
{
auto settings_ptr = settings.get();
auto client_ptr = client.get();
return std::make_shared<AzureIteratorAsync>(path_prefix, client_ptr, settings_ptr->list_object_keys_size);
return std::make_shared<AzureIteratorAsync>(path_prefix, client_ptr, max_keys);
}
void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const
void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
{
auto client_ptr = client.get();
@ -179,19 +179,19 @@ void AzureObjectStorage::listObjects(const std::string & path, RelativePathsWith
for (const auto & blob : blobs_list)
{
children.emplace_back(
children.emplace_back(std::make_shared<RelativePathWithMetadata>(
blob.Name,
ObjectMetadata{
static_cast<uint64_t>(blob.BlobSize),
Poco::Timestamp::fromEpochTime(
std::chrono::duration_cast<std::chrono::seconds>(
static_cast<std::chrono::system_clock::time_point>(blob.Details.LastModified).time_since_epoch()).count()),
{}});
{}}));
}
if (max_keys)
{
int keys_left = max_keys - static_cast<int>(children.size());
size_t keys_left = max_keys - children.size();
if (keys_left <= 0)
break;
options.PageSizeHint = keys_left;
@ -346,10 +346,11 @@ void AzureObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
{
auto client_ptr = client.get();
for (const auto & object : objects)
{
removeObjectImpl(object, client_ptr, true);
}
}
ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) const
{
auto client_ptr = client.get();
@ -366,9 +367,9 @@ ObjectMetadata AzureObjectStorage::getObjectMetadata(const std::string & path) c
{
result.attributes.emplace();
for (const auto & [key, value] : properties.Metadata)
(*result.attributes)[key] = value;
result.attributes[key] = value;
}
result.last_modified.emplace(static_cast<std::chrono::system_clock::time_point>(properties.LastModified).time_since_epoch().count());
result.last_modified = static_cast<std::chrono::system_clock::time_point>(properties.LastModified).time_since_epoch().count();
return result;
}
@ -397,7 +398,9 @@ void AzureObjectStorage::copyObject( /// NOLINT
dest_blob_client.CopyFromUri(source_blob_client.GetUrl(), copy_options);
}
void AzureObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
void AzureObjectStorage::applyNewSettings(
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix,
ContextPtr context, const ApplyNewSettingsOptions &)
{
auto new_settings = getAzureBlobStorageSettings(config, config_prefix, context);
settings.set(std::move(new_settings));

View File

@ -85,9 +85,9 @@ public:
const String & object_namespace_,
const String & description_);
void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const override;
ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override;
std::string getName() const override { return "AzureObjectStorage"; }
@ -144,7 +144,8 @@ public:
void applyNewSettings(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
ContextPtr context) override;
ContextPtr context,
const ApplyNewSettingsOptions & options) override;
String getObjectsNamespace() const override { return object_namespace ; }

View File

@ -176,7 +176,7 @@ std::unique_ptr<IObjectStorage> CachedObjectStorage::cloneObjectStorage(
return object_storage->cloneObjectStorage(new_namespace, config, config_prefix, context);
}
void CachedObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const
void CachedObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
{
object_storage->listObjects(path, children, max_keys);
}
@ -192,9 +192,10 @@ void CachedObjectStorage::shutdown()
}
void CachedObjectStorage::applyNewSettings(
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix,
ContextPtr context, const ApplyNewSettingsOptions & options)
{
object_storage->applyNewSettings(config, config_prefix, context);
object_storage->applyNewSettings(config, config_prefix, context, options);
}
String CachedObjectStorage::getObjectsNamespace() const

View File

@ -80,7 +80,7 @@ public:
const std::string & config_prefix,
ContextPtr context) override;
void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
ObjectMetadata getObjectMetadata(const std::string & path) const override;
@ -91,7 +91,8 @@ public:
void applyNewSettings(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
ContextPtr context) override;
ContextPtr context,
const ApplyNewSettingsOptions & options) override;
String getObjectsNamespace() const override;

View File

@ -544,7 +544,7 @@ void DiskObjectStorage::applyNewSettings(
{
/// FIXME we cannot use config_prefix that was passed through arguments because the disk may be wrapped with cache and we need another name
const auto config_prefix = "storage_configuration.disks." + name;
object_storage->applyNewSettings(config, config_prefix, context_);
object_storage->applyNewSettings(config, config_prefix, context_, IObjectStorage::ApplyNewSettingsOptions{ .allow_client_change = true });
{
std::unique_lock lock(resource_mutex);

View File

@ -364,18 +364,18 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage *
for (const auto & object : objects)
{
LOG_INFO(disk->log, "Calling restore for key for disk {}", object.relative_path);
LOG_INFO(disk->log, "Calling restore for key for disk {}", object->relative_path);
/// Skip file operations objects. They will be processed separately.
if (object.relative_path.find("/operations/") != String::npos)
if (object->relative_path.find("/operations/") != String::npos)
continue;
const auto [revision, _] = extractRevisionAndOperationFromKey(object.relative_path);
const auto [revision, _] = extractRevisionAndOperationFromKey(object->relative_path);
/// Filter early if it's possible to get revision from key.
if (revision > restore_information.revision)
continue;
keys_names.push_back(object.relative_path);
keys_names.push_back(object->relative_path);
}
if (!keys_names.empty())
@ -405,26 +405,20 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::processRestoreFiles(
{
for (const auto & key : keys)
{
auto meta = source_object_storage->getObjectMetadata(key);
auto object_attributes = meta.attributes;
auto metadata = source_object_storage->getObjectMetadata(key);
auto object_attributes = metadata.attributes;
String path;
if (object_attributes.has_value())
/// Restore file if object has 'path' in metadata.
auto path_entry = object_attributes.find("path");
if (path_entry == object_attributes.end())
{
/// Restore file if object has 'path' in metadata.
auto path_entry = object_attributes->find("path");
if (path_entry == object_attributes->end())
{
/// Such keys can remain after migration, we can skip them.
LOG_WARNING(disk->log, "Skip key {} because it doesn't have 'path' in metadata", key);
continue;
}
path = path_entry->second;
}
else
/// Such keys can remain after migration, we can skip them.
LOG_WARNING(disk->log, "Skip key {} because it doesn't have 'path' in metadata", key);
continue;
}
path = path_entry->second;
disk->createDirectories(directoryPath(path));
auto object_key = ObjectStorageKey::createAsRelative(disk->object_key_prefix, shrinkKey(source_path, key));
@ -436,7 +430,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::processRestoreFiles(
source_object_storage->copyObjectToAnotherObjectStorage(object_from, object_to, read_settings, write_settings, *disk->object_storage);
auto tx = disk->metadata_storage->createTransaction();
tx->addBlobToMetadata(path, object_key, meta.size_bytes);
tx->addBlobToMetadata(path, object_key, metadata.size_bytes);
tx->commit();
LOG_TRACE(disk->log, "Restored file {}", path);
@ -475,10 +469,10 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject
for (const auto & object : objects)
{
const auto [revision, operation] = extractRevisionAndOperationFromKey(object.relative_path);
const auto [revision, operation] = extractRevisionAndOperationFromKey(object->relative_path);
if (revision == UNKNOWN_REVISION)
{
LOG_WARNING(disk->log, "Skip key {} with unknown revision", object.relative_path);
LOG_WARNING(disk->log, "Skip key {} with unknown revision", object->relative_path);
continue;
}
@ -491,7 +485,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFileOperations(IObject
if (send_metadata)
revision_counter = revision - 1;
auto object_attributes = *(source_object_storage->getObjectMetadata(object.relative_path).attributes);
auto object_attributes = source_object_storage->getObjectMetadata(object->relative_path).attributes;
if (operation == rename)
{
auto from_path = object_attributes["from_path"];

View File

@ -1,12 +1,13 @@
#include <Disks/ObjectStorages/HDFS/HDFSObjectStorage.h>
#include <IO/copyData.h>
#include <Storages/HDFS/WriteBufferFromHDFS.h>
#include <Storages/HDFS/HDFSCommon.h>
#include <Storages/ObjectStorage/HDFS/WriteBufferFromHDFS.h>
#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
#include <Storages/HDFS/ReadBufferFromHDFS.h>
#include <Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h>
#include <Disks/IO/ReadBufferFromRemoteFSGather.h>
#include <Common/getRandomASCIIString.h>
#include <Common/logger_useful.h>
#if USE_HDFS
@ -18,28 +19,57 @@ namespace ErrorCodes
{
extern const int UNSUPPORTED_METHOD;
extern const int HDFS_ERROR;
extern const int ACCESS_DENIED;
extern const int LOGICAL_ERROR;
}
void HDFSObjectStorage::shutdown()
void HDFSObjectStorage::initializeHDFSFS() const
{
if (initialized)
return;
std::lock_guard lock(init_mutex);
if (initialized)
return;
hdfs_builder = createHDFSBuilder(url, config);
hdfs_fs = createHDFSFS(hdfs_builder.get());
initialized = true;
}
void HDFSObjectStorage::startup()
std::string HDFSObjectStorage::extractObjectKeyFromURL(const StoredObject & object) const
{
/// This is very unfortunate, but for disk HDFS we made a mistake
/// and now its behaviour is inconsistent with S3 and Azure disks.
/// The mistake is that for HDFS we write into metadata files whole URL + data directory + key,
/// while for S3 and Azure we write there only data_directory + key.
/// This leads us into ambiguity that for StorageHDFS we have just key in object.remote_path,
/// but for DiskHDFS we have there URL as well.
auto path = object.remote_path;
if (path.starts_with(url))
path = path.substr(url.size());
if (path.starts_with("/"))
path.substr(1);
return path;
}
ObjectStorageKey HDFSObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const
{
initializeHDFSFS();
/// what ever data_source_description.description value is, consider that key as relative key
return ObjectStorageKey::createAsRelative(hdfs_root_path, getRandomASCIIString(32));
chassert(data_directory.starts_with("/"));
return ObjectStorageKey::createAsRelative(
fs::path(url_without_path) / data_directory.substr(1), getRandomASCIIString(32));
}
bool HDFSObjectStorage::exists(const StoredObject & object) const
{
const auto & path = object.remote_path;
const size_t begin_of_path = path.find('/', path.find("//") + 2);
const String remote_fs_object_path = path.substr(begin_of_path);
return (0 == hdfsExists(hdfs_fs.get(), remote_fs_object_path.c_str()));
initializeHDFSFS();
std::string path = object.remote_path;
if (path.starts_with(url_without_path))
path = path.substr(url_without_path.size());
return (0 == hdfsExists(hdfs_fs.get(), path.c_str()));
}
std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObject( /// NOLINT
@ -48,7 +78,10 @@ std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObject( /// NOLIN
std::optional<size_t>,
std::optional<size_t>) const
{
return std::make_unique<ReadBufferFromHDFS>(object.remote_path, object.remote_path, config, patchSettings(read_settings));
initializeHDFSFS();
auto path = extractObjectKeyFromURL(object);
return std::make_unique<ReadBufferFromHDFS>(
fs::path(url_without_path) / "", fs::path(data_directory) / path, config, patchSettings(read_settings));
}
std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObjects( /// NOLINT
@ -57,18 +90,15 @@ std::unique_ptr<ReadBufferFromFileBase> HDFSObjectStorage::readObjects( /// NOLI
std::optional<size_t>,
std::optional<size_t>) const
{
initializeHDFSFS();
auto disk_read_settings = patchSettings(read_settings);
auto read_buffer_creator =
[this, disk_read_settings]
(bool /* restricted_seek */, const StoredObject & object_) -> std::unique_ptr<ReadBufferFromFileBase>
{
const auto & path = object_.remote_path;
size_t begin_of_path = path.find('/', path.find("//") + 2);
auto hdfs_path = path.substr(begin_of_path);
auto hdfs_uri = path.substr(0, begin_of_path);
auto path = extractObjectKeyFromURL(object_);
return std::make_unique<ReadBufferFromHDFS>(
hdfs_uri, hdfs_path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true);
fs::path(url_without_path) / "", fs::path(data_directory) / path, config, disk_read_settings, /* read_until_position */0, /* use_external_buffer */true);
};
return std::make_unique<ReadBufferFromRemoteFSGather>(
@ -82,14 +112,21 @@ std::unique_ptr<WriteBufferFromFileBase> HDFSObjectStorage::writeObject( /// NOL
size_t buf_size,
const WriteSettings & write_settings)
{
initializeHDFSFS();
if (attributes.has_value())
throw Exception(
ErrorCodes::UNSUPPORTED_METHOD,
"HDFS API doesn't support custom attributes/metadata for stored objects");
std::string path = object.remote_path;
if (path.starts_with("/"))
path = path.substr(1);
if (!path.starts_with(url))
path = fs::path(url) / path;
/// Single O_WRONLY in libhdfs adds O_TRUNC
return std::make_unique<WriteBufferFromHDFS>(
object.remote_path, config, settings->replication, patchSettings(write_settings), buf_size,
path, config, settings->replication, patchSettings(write_settings), buf_size,
mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND);
}
@ -97,11 +134,13 @@ std::unique_ptr<WriteBufferFromFileBase> HDFSObjectStorage::writeObject( /// NOL
/// Remove file. Throws exception if file doesn't exists or it's a directory.
void HDFSObjectStorage::removeObject(const StoredObject & object)
{
const auto & path = object.remote_path;
const size_t begin_of_path = path.find('/', path.find("//") + 2);
initializeHDFSFS();
auto path = object.remote_path;
if (path.starts_with(url_without_path))
path = path.substr(url_without_path.size());
/// Add path from root to file name
int res = hdfsDelete(hdfs_fs.get(), path.substr(begin_of_path).c_str(), 0);
int res = hdfsDelete(hdfs_fs.get(), path.c_str(), 0);
if (res == -1)
throw Exception(ErrorCodes::HDFS_ERROR, "HDFSDelete failed with path: {}", path);
@ -109,27 +148,85 @@ void HDFSObjectStorage::removeObject(const StoredObject & object)
void HDFSObjectStorage::removeObjects(const StoredObjects & objects)
{
initializeHDFSFS();
for (const auto & object : objects)
removeObject(object);
}
void HDFSObjectStorage::removeObjectIfExists(const StoredObject & object)
{
initializeHDFSFS();
if (exists(object))
removeObject(object);
}
void HDFSObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
{
initializeHDFSFS();
for (const auto & object : objects)
removeObjectIfExists(object);
}
ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string &) const
ObjectMetadata HDFSObjectStorage::getObjectMetadata(const std::string & path) const
{
throw Exception(
ErrorCodes::UNSUPPORTED_METHOD,
"HDFS API doesn't support custom attributes/metadata for stored objects");
initializeHDFSFS();
auto * file_info = hdfsGetPathInfo(hdfs_fs.get(), path.data());
if (!file_info)
throw Exception(ErrorCodes::HDFS_ERROR,
"Cannot get file info for: {}. Error: {}", path, hdfsGetLastError());
ObjectMetadata metadata;
metadata.size_bytes = static_cast<size_t>(file_info->mSize);
metadata.last_modified = Poco::Timestamp::fromEpochTime(file_info->mLastMod);
hdfsFreeFileInfo(file_info, 1);
return metadata;
}
void HDFSObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
{
initializeHDFSFS();
LOG_TEST(log, "Trying to list files for {}", path);
HDFSFileInfo ls;
ls.file_info = hdfsListDirectory(hdfs_fs.get(), path.data(), &ls.length);
if (ls.file_info == nullptr && errno != ENOENT) // NOLINT
{
// ignore file not found exception, keep throw other exception,
// libhdfs3 doesn't have function to get exception type, so use errno.
throw Exception(ErrorCodes::ACCESS_DENIED, "Cannot list directory {}: {}",
path, String(hdfsGetLastError()));
}
if (!ls.file_info && ls.length > 0)
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "file_info shouldn't be null");
}
LOG_TEST(log, "Listed {} files for {}", ls.length, path);
for (int i = 0; i < ls.length; ++i)
{
const String file_path = fs::path(ls.file_info[i].mName).lexically_normal();
const bool is_directory = ls.file_info[i].mKind == 'D';
if (is_directory)
{
listObjects(fs::path(file_path) / "", children, max_keys);
}
else
{
children.emplace_back(std::make_shared<RelativePathWithMetadata>(
String(file_path),
ObjectMetadata{
static_cast<uint64_t>(ls.file_info[i].mSize),
Poco::Timestamp::fromEpochTime(ls.file_info[i].mLastMod),
{}}));
}
if (max_keys && children.size() >= max_keys)
break;
}
}
void HDFSObjectStorage::copyObject( /// NOLINT
@ -139,6 +236,7 @@ void HDFSObjectStorage::copyObject( /// NOLINT
const WriteSettings & write_settings,
std::optional<ObjectAttributes> object_to_attributes)
{
initializeHDFSFS();
if (object_to_attributes.has_value())
throw Exception(
ErrorCodes::UNSUPPORTED_METHOD,
@ -151,7 +249,10 @@ void HDFSObjectStorage::copyObject( /// NOLINT
}
std::unique_ptr<IObjectStorage> HDFSObjectStorage::cloneObjectStorage(const std::string &, const Poco::Util::AbstractConfiguration &, const std::string &, ContextPtr)
std::unique_ptr<IObjectStorage> HDFSObjectStorage::cloneObjectStorage(
const std::string &,
const Poco::Util::AbstractConfiguration &,
const std::string &, ContextPtr)
{
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "HDFS object storage doesn't support cloning");
}

View File

@ -6,7 +6,7 @@
#include <Disks/IDisk.h>
#include <Disks/ObjectStorages/IObjectStorage.h>
#include <Storages/HDFS/HDFSCommon.h>
#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
#include <Core/UUID.h>
#include <memory>
#include <Poco/Util/AbstractConfiguration.h>
@ -16,21 +16,13 @@ namespace DB
struct HDFSObjectStorageSettings
{
HDFSObjectStorageSettings() = default;
size_t min_bytes_for_seek;
int objects_chunk_size_to_delete;
int replication;
HDFSObjectStorageSettings(
int min_bytes_for_seek_,
int objects_chunk_size_to_delete_,
int replication_)
HDFSObjectStorageSettings(int min_bytes_for_seek_, int replication_)
: min_bytes_for_seek(min_bytes_for_seek_)
, objects_chunk_size_to_delete(objects_chunk_size_to_delete_)
, replication(replication_)
{}
size_t min_bytes_for_seek;
int replication;
};
@ -43,20 +35,29 @@ public:
HDFSObjectStorage(
const String & hdfs_root_path_,
SettingsPtr settings_,
const Poco::Util::AbstractConfiguration & config_)
const Poco::Util::AbstractConfiguration & config_,
bool lazy_initialize)
: config(config_)
, hdfs_builder(createHDFSBuilder(hdfs_root_path_, config))
, hdfs_fs(createHDFSFS(hdfs_builder.get()))
, settings(std::move(settings_))
, hdfs_root_path(hdfs_root_path_)
, log(getLogger("HDFSObjectStorage(" + hdfs_root_path_ + ")"))
{
const size_t begin_of_path = hdfs_root_path_.find('/', hdfs_root_path_.find("//") + 2);
url = hdfs_root_path_;
url_without_path = url.substr(0, begin_of_path);
if (begin_of_path < url.size())
data_directory = url.substr(begin_of_path);
else
data_directory = "/";
if (!lazy_initialize)
initializeHDFSFS();
}
std::string getName() const override { return "HDFSObjectStorage"; }
std::string getCommonKeyPrefix() const override { return hdfs_root_path; }
std::string getCommonKeyPrefix() const override { return url; }
std::string getDescription() const override { return hdfs_root_path; }
std::string getDescription() const override { return url; }
ObjectStorageType getType() const override { return ObjectStorageType::HDFS; }
@ -100,9 +101,7 @@ public:
const WriteSettings & write_settings,
std::optional<ObjectAttributes> object_to_attributes = {}) override;
void shutdown() override;
void startup() override;
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
String getObjectsNamespace() const override { return ""; }
@ -116,13 +115,28 @@ public:
bool isRemote() const override { return true; }
void startup() override { }
void shutdown() override { }
private:
void initializeHDFSFS() const;
std::string extractObjectKeyFromURL(const StoredObject & object) const;
const Poco::Util::AbstractConfiguration & config;
HDFSBuilderWrapper hdfs_builder;
HDFSFSPtr hdfs_fs;
mutable HDFSBuilderWrapper hdfs_builder;
mutable HDFSFSPtr hdfs_fs;
mutable std::mutex init_mutex;
mutable std::atomic_bool initialized{false};
SettingsPtr settings;
const std::string hdfs_root_path;
std::string url;
std::string url_without_path;
std::string data_directory;
LoggerPtr log;
};
}

View File

@ -25,16 +25,16 @@ bool IObjectStorage::existsOrHasAnyChild(const std::string & path) const
return !files.empty();
}
void IObjectStorage::listObjects(const std::string &, RelativePathsWithMetadata &, int) const
void IObjectStorage::listObjects(const std::string &, RelativePathsWithMetadata &, size_t) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "listObjects() is not supported");
}
ObjectStorageIteratorPtr IObjectStorage::iterate(const std::string & path_prefix) const
ObjectStorageIteratorPtr IObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const
{
RelativePathsWithMetadata files;
listObjects(path_prefix, files, 0);
listObjects(path_prefix, files, max_keys);
return std::make_shared<ObjectStorageIteratorFromList>(std::move(files));
}

View File

@ -37,6 +37,7 @@ namespace DB
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR;
}
class ReadBufferFromFileBase;
@ -47,21 +48,28 @@ using ObjectAttributes = std::map<std::string, std::string>;
struct ObjectMetadata
{
uint64_t size_bytes = 0;
std::optional<Poco::Timestamp> last_modified;
std::optional<ObjectAttributes> attributes;
Poco::Timestamp last_modified;
ObjectAttributes attributes;
};
struct RelativePathWithMetadata
{
String relative_path;
ObjectMetadata metadata;
std::optional<ObjectMetadata> metadata;
RelativePathWithMetadata() = default;
RelativePathWithMetadata(String relative_path_, ObjectMetadata metadata_)
explicit RelativePathWithMetadata(String relative_path_, std::optional<ObjectMetadata> metadata_ = std::nullopt)
: relative_path(std::move(relative_path_))
, metadata(std::move(metadata_))
{}
virtual ~RelativePathWithMetadata() = default;
virtual std::string getFileName() const { return std::filesystem::path(relative_path).filename(); }
virtual std::string getPath() const { return relative_path; }
virtual bool isArchive() const { return false; }
virtual std::string getPathToArchive() const { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not an archive"); }
};
struct ObjectKeyWithMetadata
@ -77,7 +85,8 @@ struct ObjectKeyWithMetadata
{}
};
using RelativePathsWithMetadata = std::vector<RelativePathWithMetadata>;
using RelativePathWithMetadataPtr = std::shared_ptr<RelativePathWithMetadata>;
using RelativePathsWithMetadata = std::vector<RelativePathWithMetadataPtr>;
using ObjectKeysWithMetadata = std::vector<ObjectKeyWithMetadata>;
class IObjectStorageIterator;
@ -111,9 +120,9 @@ public:
/// /, /a, /a/b, /a/b/c, /a/b/c/d while exists will return true only for /a/b/c/d
virtual bool existsOrHasAnyChild(const std::string & path) const;
virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const;
virtual void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const;
virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const;
virtual ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const;
/// Get object metadata if supported. It should be possible to receive
/// at least size of object
@ -190,11 +199,15 @@ public:
virtual void startup() = 0;
/// Apply new settings, in most cases reiniatilize client and some other staff
struct ApplyNewSettingsOptions
{
bool allow_client_change = true;
};
virtual void applyNewSettings(
const Poco::Util::AbstractConfiguration &,
const Poco::Util::AbstractConfiguration & /* config */,
const std::string & /*config_prefix*/,
ContextPtr)
{}
ContextPtr /* context */,
const ApplyNewSettingsOptions & /* options */) {}
/// Sometimes object storages have something similar to chroot or namespace, for example
/// buckets in S3. If object storage doesn't have any namepaces return empty string.

View File

@ -10,4 +10,7 @@ using ObjectStoragePtr = std::shared_ptr<IObjectStorage>;
class IMetadataStorage;
using MetadataStoragePtr = std::shared_ptr<IMetadataStorage>;
class IObjectStorageIterator;
using ObjectStorageIteratorPtr = std::shared_ptr<IObjectStorageIterator>;
}

View File

@ -172,7 +172,7 @@ ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & path) c
return object_metadata;
}
void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int /* max_keys */) const
void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t/* max_keys */) const
{
for (const auto & entry : fs::directory_iterator(path))
{
@ -182,8 +182,7 @@ void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWith
continue;
}
auto metadata = getObjectMetadata(entry.path());
children.emplace_back(entry.path(), std::move(metadata));
children.emplace_back(std::make_shared<RelativePathWithMetadata>(entry.path(), getObjectMetadata(entry.path())));
}
}
@ -223,11 +222,6 @@ std::unique_ptr<IObjectStorage> LocalObjectStorage::cloneObjectStorage(
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "cloneObjectStorage() is not implemented for LocalObjectStorage");
}
void LocalObjectStorage::applyNewSettings(
const Poco::Util::AbstractConfiguration & /* config */, const std::string & /* config_prefix */, ContextPtr /* context */)
{
}
ObjectStorageKey LocalObjectStorage::generateObjectKeyForPath(const std::string & /* path */) const
{
constexpr size_t key_name_total_size = 32;

View File

@ -58,7 +58,7 @@ public:
ObjectMetadata getObjectMetadata(const std::string & path) const override;
void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
bool existsOrHasAnyChild(const std::string & path) const override;
@ -73,11 +73,6 @@ public:
void startup() override;
void applyNewSettings(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
ContextPtr context) override;
String getObjectsNamespace() const override { return ""; }
std::unique_ptr<IObjectStorage> cloneObjectStorage(

View File

@ -105,7 +105,7 @@ std::vector<std::string> MetadataStorageFromPlainObjectStorage::getDirectChildre
std::unordered_set<std::string> duplicates_filter;
for (const auto & elem : remote_paths)
{
const auto & path = elem.relative_path;
const auto & path = elem->relative_path;
chassert(path.find(storage_key) == 0);
const auto child_pos = storage_key.size();
/// string::npos is ok.

View File

@ -26,11 +26,11 @@ MetadataStorageFromPlainObjectStorage::PathMap loadPathPrefixMap(const std::stri
object_storage->listObjects(root, files, 0);
for (const auto & file : files)
{
auto remote_path = std::filesystem::path(file.relative_path);
auto remote_path = std::filesystem::path(file->relative_path);
if (remote_path.filename() != PREFIX_PATH_FILE_NAME)
continue;
StoredObject object{file.relative_path};
StoredObject object{file->relative_path};
auto read_buf = object_storage->readObject(object);
String local_path;
@ -88,7 +88,7 @@ std::vector<std::string> getDirectChildrenOnRewritableDisk(
auto skip_list = std::set<std::string>{PREFIX_PATH_FILE_NAME};
for (const auto & elem : remote_paths)
{
const auto & path = elem.relative_path;
const auto & path = elem->relative_path;
chassert(path.find(storage_key) == 0);
const auto child_pos = storage_key.size();

View File

@ -9,7 +9,7 @@
#endif
#if USE_HDFS && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD)
#include <Disks/ObjectStorages/HDFS/HDFSObjectStorage.h>
#include <Storages/HDFS/HDFSCommon.h>
#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
#endif
#if USE_AZURE_BLOB_STORAGE && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD)
#include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
@ -183,7 +183,7 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory)
auto uri = getS3URI(config, config_prefix, context);
auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
auto settings = getSettings(config, config_prefix, context);
auto client = getClient(config, config_prefix, context, *settings);
auto client = getClient(config, config_prefix, context, *settings, true);
auto key_generator = getKeyGenerator(uri, config, config_prefix);
auto object_storage = createObjectStorage<S3ObjectStorage>(
@ -219,7 +219,7 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory)
auto uri = getS3URI(config, config_prefix, context);
auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
auto settings = getSettings(config, config_prefix, context);
auto client = getClient(config, config_prefix, context, *settings);
auto client = getClient(config, config_prefix, context, *settings, true);
auto key_generator = getKeyGenerator(uri, config, config_prefix);
auto object_storage = std::make_shared<PlainObjectStorage<S3ObjectStorage>>(
@ -253,7 +253,7 @@ void registerS3PlainRewritableObjectStorage(ObjectStorageFactory & factory)
auto uri = getS3URI(config, config_prefix, context);
auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix);
auto settings = getSettings(config, config_prefix, context);
auto client = getClient(config, config_prefix, context, *settings);
auto client = getClient(config, config_prefix, context, *settings, true);
auto key_generator = getKeyGenerator(uri, config, config_prefix);
auto object_storage = std::make_shared<PlainRewritableObjectStorage<S3ObjectStorage>>(
@ -287,10 +287,9 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory)
std::unique_ptr<HDFSObjectStorageSettings> settings = std::make_unique<HDFSObjectStorageSettings>(
config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000),
context->getSettingsRef().hdfs_replication);
return createObjectStorage<HDFSObjectStorage>(ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config);
return createObjectStorage<HDFSObjectStorage>(ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config, /* lazy_initialize */false);
});
}
#endif

View File

@ -9,7 +9,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
RelativePathWithMetadata ObjectStorageIteratorFromList::current()
RelativePathWithMetadataPtr ObjectStorageIteratorFromList::current()
{
if (!isValid())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator");

View File

@ -12,9 +12,9 @@ public:
virtual void next() = 0;
virtual void nextBatch() = 0;
virtual bool isValid() = 0;
virtual RelativePathWithMetadata current() = 0;
virtual RelativePathWithMetadataPtr current() = 0;
virtual RelativePathsWithMetadata currentBatch() = 0;
virtual std::optional<RelativePathsWithMetadata> getCurrrentBatchAndScheduleNext() = 0;
virtual std::optional<RelativePathsWithMetadata> getCurrentBatchAndScheduleNext() = 0;
virtual size_t getAccumulatedSize() const = 0;
virtual ~IObjectStorageIterator() = default;
@ -27,9 +27,7 @@ class ObjectStorageIteratorFromList : public IObjectStorageIterator
public:
explicit ObjectStorageIteratorFromList(RelativePathsWithMetadata && batch_)
: batch(std::move(batch_))
, batch_iterator(batch.begin())
{
}
, batch_iterator(batch.begin()) {}
void next() override
{
@ -37,32 +35,26 @@ public:
++batch_iterator;
}
void nextBatch() override
void nextBatch() override { batch_iterator = batch.end(); }
bool isValid() override { return batch_iterator != batch.end(); }
RelativePathWithMetadataPtr current() override;
RelativePathsWithMetadata currentBatch() override { return batch; }
std::optional<RelativePathsWithMetadata> getCurrentBatchAndScheduleNext() override
{
batch_iterator = batch.end();
if (batch.empty())
return {};
auto current_batch = std::move(batch);
batch = {};
return current_batch;
}
bool isValid() override
{
return batch_iterator != batch.end();
}
size_t getAccumulatedSize() const override { return batch.size(); }
RelativePathWithMetadata current() override;
RelativePathsWithMetadata currentBatch() override
{
return batch;
}
std::optional<RelativePathsWithMetadata> getCurrrentBatchAndScheduleNext() override
{
return std::nullopt;
}
size_t getAccumulatedSize() const override
{
return batch.size();
}
private:
RelativePathsWithMetadata batch;
RelativePathsWithMetadata::iterator batch_iterator;

View File

@ -11,10 +11,37 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
IObjectStorageIteratorAsync::IObjectStorageIteratorAsync(
CurrentMetrics::Metric threads_metric,
CurrentMetrics::Metric threads_active_metric,
CurrentMetrics::Metric threads_scheduled_metric,
const std::string & thread_name)
: list_objects_pool(threads_metric, threads_active_metric, threads_scheduled_metric, 1)
, list_objects_scheduler(threadPoolCallbackRunnerUnsafe<BatchAndHasNext>(list_objects_pool, thread_name))
{
}
IObjectStorageIteratorAsync::~IObjectStorageIteratorAsync()
{
if (!deactivated)
deactivate();
}
void IObjectStorageIteratorAsync::deactivate()
{
list_objects_pool.wait();
deactivated = true;
}
void IObjectStorageIteratorAsync::nextBatch()
{
std::lock_guard lock(mutex);
if (!is_finished)
if (is_finished)
{
current_batch.clear();
current_batch_iterator = current_batch.begin();
}
else
{
if (!is_initialized)
{
@ -22,19 +49,27 @@ void IObjectStorageIteratorAsync::nextBatch()
is_initialized = true;
}
BatchAndHasNext next_batch = outcome_future.get();
current_batch = std::move(next_batch.batch);
accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed);
current_batch_iterator = current_batch.begin();
if (next_batch.has_next)
outcome_future = scheduleBatch();
else
is_finished = true;
}
else
{
current_batch.clear();
chassert(outcome_future.valid());
BatchAndHasNext result;
try
{
result = outcome_future.get();
}
catch (...)
{
is_finished = true;
throw;
}
current_batch = std::move(result.batch);
current_batch_iterator = current_batch.begin();
accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed);
if (result.has_next)
outcome_future = scheduleBatch();
else
is_finished = true;
}
}
@ -42,24 +77,10 @@ void IObjectStorageIteratorAsync::next()
{
std::lock_guard lock(mutex);
if (current_batch_iterator != current_batch.end())
{
if (current_batch_iterator == current_batch.end())
nextBatch();
else
++current_batch_iterator;
}
else if (!is_finished)
{
if (outcome_future.valid())
{
BatchAndHasNext next_batch = outcome_future.get();
current_batch = std::move(next_batch.batch);
accumulated_size.fetch_add(current_batch.size(), std::memory_order_relaxed);
current_batch_iterator = current_batch.begin();
if (next_batch.has_next)
outcome_future = scheduleBatch();
else
is_finished = true;
}
}
}
std::future<IObjectStorageIteratorAsync::BatchAndHasNext> IObjectStorageIteratorAsync::scheduleBatch()
@ -72,7 +93,6 @@ std::future<IObjectStorageIteratorAsync::BatchAndHasNext> IObjectStorageIterator
}, Priority{});
}
bool IObjectStorageIteratorAsync::isValid()
{
if (!is_initialized)
@ -82,7 +102,7 @@ bool IObjectStorageIteratorAsync::isValid()
return current_batch_iterator != current_batch.end();
}
RelativePathWithMetadata IObjectStorageIteratorAsync::current()
RelativePathWithMetadataPtr IObjectStorageIteratorAsync::current()
{
if (!isValid())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator");
@ -101,20 +121,20 @@ RelativePathsWithMetadata IObjectStorageIteratorAsync::currentBatch()
return current_batch;
}
std::optional<RelativePathsWithMetadata> IObjectStorageIteratorAsync::getCurrrentBatchAndScheduleNext()
std::optional<RelativePathsWithMetadata> IObjectStorageIteratorAsync::getCurrentBatchAndScheduleNext()
{
std::lock_guard lock(mutex);
if (!is_initialized)
nextBatch();
if (current_batch_iterator != current_batch.end())
if (current_batch_iterator == current_batch.end())
{
auto temp_current_batch = current_batch;
nextBatch();
return temp_current_batch;
return std::nullopt;
}
return std::nullopt;
auto temp_current_batch = std::move(current_batch);
nextBatch();
return temp_current_batch;
}
size_t IObjectStorageIteratorAsync::getAccumulatedSize() const

View File

@ -17,24 +17,22 @@ public:
CurrentMetrics::Metric threads_metric,
CurrentMetrics::Metric threads_active_metric,
CurrentMetrics::Metric threads_scheduled_metric,
const std::string & thread_name)
: list_objects_pool(threads_metric, threads_active_metric, threads_scheduled_metric, 1)
, list_objects_scheduler(threadPoolCallbackRunnerUnsafe<BatchAndHasNext>(list_objects_pool, thread_name))
{
}
const std::string & thread_name);
~IObjectStorageIteratorAsync() override;
bool isValid() override;
RelativePathWithMetadataPtr current() override;
RelativePathsWithMetadata currentBatch() override;
void next() override;
void nextBatch() override;
bool isValid() override;
RelativePathWithMetadata current() override;
RelativePathsWithMetadata currentBatch() override;
size_t getAccumulatedSize() const override;
std::optional<RelativePathsWithMetadata> getCurrrentBatchAndScheduleNext() override;
~IObjectStorageIteratorAsync() override
{
list_objects_pool.wait();
}
size_t getAccumulatedSize() const override;
std::optional<RelativePathsWithMetadata> getCurrentBatchAndScheduleNext() override;
void deactivate();
protected:
@ -50,6 +48,7 @@ protected:
bool is_initialized{false};
bool is_finished{false};
bool deactivated{false};
mutable std::recursive_mutex mutex;
ThreadPool list_objects_pool;

View File

@ -61,7 +61,10 @@ void throwIfError(const Aws::Utils::Outcome<Result, Error> & response)
if (!response.IsSuccess())
{
const auto & err = response.GetError();
throw S3Exception(fmt::format("{} (Code: {})", err.GetMessage(), static_cast<size_t>(err.GetErrorType())), err.GetErrorType());
throw S3Exception(
fmt::format("{} (Code: {}, s3 exception: {})",
err.GetMessage(), static_cast<size_t>(err.GetErrorType()), err.GetExceptionName()),
err.GetErrorType());
}
}
@ -111,10 +114,19 @@ public:
CurrentMetrics::ObjectStorageS3ThreadsScheduled,
"ListObjectS3")
, client(client_)
, request(std::make_unique<S3::ListObjectsV2Request>())
{
request.SetBucket(bucket_);
request.SetPrefix(path_prefix);
request.SetMaxKeys(static_cast<int>(max_list_size));
request->SetBucket(bucket_);
request->SetPrefix(path_prefix);
request->SetMaxKeys(static_cast<int>(max_list_size));
}
~S3IteratorAsync() override
{
/// Deactivate background threads before resetting the request to avoid data race.
deactivate();
request.reset();
client.reset();
}
private:
@ -123,34 +135,32 @@ private:
ProfileEvents::increment(ProfileEvents::S3ListObjects);
ProfileEvents::increment(ProfileEvents::DiskS3ListObjects);
bool result = false;
auto outcome = client->ListObjectsV2(request);
auto outcome = client->ListObjectsV2(*request);
/// Outcome failure will be handled on the caller side.
if (outcome.IsSuccess())
{
request->SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
auto objects = outcome.GetResult().GetContents();
result = !objects.empty();
for (const auto & object : objects)
batch.emplace_back(
object.GetKey(),
ObjectMetadata{static_cast<uint64_t>(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}}
);
{
ObjectMetadata metadata{static_cast<uint64_t>(object.GetSize()), Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()), {}};
batch.emplace_back(std::make_shared<RelativePathWithMetadata>(object.GetKey(), std::move(metadata)));
}
if (result)
request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
return result;
/// It returns false when all objects were returned
return outcome.GetResult().GetIsTruncated();
}
throw S3Exception(outcome.GetError().GetErrorType(), "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
quoteString(request.GetBucket()), quoteString(request.GetPrefix()),
backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage()));
throw S3Exception(outcome.GetError().GetErrorType(),
"Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
quoteString(request->GetBucket()), quoteString(request->GetPrefix()),
backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage()));
}
std::shared_ptr<const S3::Client> client;
S3::ListObjectsV2Request request;
std::unique_ptr<S3::ListObjectsV2Request> request;
};
}
@ -248,12 +258,16 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
if (mode != WriteMode::Rewrite)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 doesn't support append to files");
auto settings_ptr = s3_settings.get();
S3Settings::RequestSettings request_settings = s3_settings.get()->request_settings;
if (auto query_context = CurrentThread::getQueryContext())
{
request_settings.updateFromSettingsIfChanged(query_context->getSettingsRef());
}
ThreadPoolCallbackRunnerUnsafe<void> scheduler;
if (write_settings.s3_allow_parallel_part_upload)
scheduler = threadPoolCallbackRunnerUnsafe<void>(getThreadPoolWriter(), "VFSWrite");
auto blob_storage_log = BlobStorageLogWriter::create(disk_name);
if (blob_storage_log)
blob_storage_log->local_path = object.local_path;
@ -263,7 +277,7 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
uri.bucket,
object.remote_path,
buf_size,
settings_ptr->request_settings,
request_settings,
std::move(blob_storage_log),
attributes,
std::move(scheduler),
@ -271,13 +285,13 @@ std::unique_ptr<WriteBufferFromFileBase> S3ObjectStorage::writeObject( /// NOLIN
}
ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix) const
ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix, size_t max_keys) const
{
auto settings_ptr = s3_settings.get();
return std::make_shared<S3IteratorAsync>(uri.bucket, path_prefix, client.get(), settings_ptr->list_object_keys_size);
return std::make_shared<S3IteratorAsync>(uri.bucket, path_prefix, client.get(), max_keys);
}
void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const
void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const
{
auto settings_ptr = s3_settings.get();
@ -285,7 +299,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
request.SetBucket(uri.bucket);
request.SetPrefix(path);
if (max_keys)
request.SetMaxKeys(max_keys);
request.SetMaxKeys(static_cast<int>(max_keys));
else
request.SetMaxKeys(settings_ptr->list_object_keys_size);
@ -305,19 +319,19 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet
break;
for (const auto & object : objects)
children.emplace_back(
children.emplace_back(std::make_shared<RelativePathWithMetadata>(
object.GetKey(),
ObjectMetadata{
static_cast<uint64_t>(object.GetSize()),
Poco::Timestamp::fromEpochTime(object.GetLastModified().Seconds()),
{}});
{}}));
if (max_keys)
{
int keys_left = max_keys - static_cast<int>(children.size());
size_t keys_left = max_keys - children.size();
if (keys_left <= 0)
break;
request.SetMaxKeys(keys_left);
request.SetMaxKeys(static_cast<int>(keys_left));
}
request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
@ -425,14 +439,16 @@ void S3ObjectStorage::removeObjectsIfExist(const StoredObjects & objects)
std::optional<ObjectMetadata> S3ObjectStorage::tryGetObjectMetadata(const std::string & path) const
{
auto settings_ptr = s3_settings.get();
auto object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* throw_on_error= */ false);
auto object_info = S3::getObjectInfo(
*client.get(), uri.bucket, path, {}, settings_ptr->request_settings,
/* with_metadata= */ true, /* throw_on_error= */ false);
if (object_info.size == 0 && object_info.last_modification_time == 0 && object_info.metadata.empty())
return {};
ObjectMetadata result;
result.size_bytes = object_info.size;
result.last_modified = object_info.last_modification_time;
result.last_modified = Poco::Timestamp::fromEpochTime(object_info.last_modification_time);
result.attributes = object_info.metadata;
return result;
@ -441,11 +457,20 @@ std::optional<ObjectMetadata> S3ObjectStorage::tryGetObjectMetadata(const std::s
ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const
{
auto settings_ptr = s3_settings.get();
auto object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true);
S3::ObjectInfo object_info;
try
{
object_info = S3::getObjectInfo(*client.get(), uri.bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true);
}
catch (DB::Exception & e)
{
e.addMessage("while reading " + path);
throw;
}
ObjectMetadata result;
result.size_bytes = object_info.size;
result.last_modified = object_info.last_modification_time;
result.last_modified = Poco::Timestamp::fromEpochTime(object_info.last_modification_time);
result.attributes = object_info.metadata;
return result;
@ -544,19 +569,38 @@ void S3ObjectStorage::startup()
const_cast<S3::Client &>(*client.get()).EnableRequestProcessing();
}
void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
void S3ObjectStorage::applyNewSettings(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
ContextPtr context,
const ApplyNewSettingsOptions & options)
{
auto new_s3_settings = getSettings(config, config_prefix, context);
auto new_client = getClient(config, config_prefix, context, *new_s3_settings);
s3_settings.set(std::move(new_s3_settings));
client.set(std::move(new_client));
auto settings_from_config = getSettings(config, config_prefix, context, context->getSettingsRef().s3_validate_request_settings);
auto modified_settings = std::make_unique<S3ObjectStorageSettings>(*s3_settings.get());
modified_settings->auth_settings.updateFrom(settings_from_config->auth_settings);
modified_settings->request_settings = settings_from_config->request_settings;
if (auto endpoint_settings = context->getStorageS3Settings().getSettings(uri.uri.toString(), context->getUserName()))
modified_settings->auth_settings.updateFrom(endpoint_settings->auth_settings);
auto current_settings = s3_settings.get();
if (options.allow_client_change
&& (current_settings->auth_settings.hasUpdates(modified_settings->auth_settings) || for_disk_s3))
{
auto new_client = getClient(config, config_prefix, context, *modified_settings, for_disk_s3, &uri);
client.set(std::move(new_client));
}
s3_settings.set(std::move(modified_settings));
}
std::unique_ptr<IObjectStorage> S3ObjectStorage::cloneObjectStorage(
const std::string & new_namespace, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context)
const std::string & new_namespace,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
ContextPtr context)
{
auto new_s3_settings = getSettings(config, config_prefix, context);
auto new_client = getClient(config, config_prefix, context, *new_s3_settings);
auto new_client = getClient(config, config_prefix, context, *new_s3_settings, true);
auto new_uri{uri};
new_uri.bucket = new_namespace;

View File

@ -21,11 +21,13 @@ struct S3ObjectStorageSettings
S3ObjectStorageSettings(
const S3Settings::RequestSettings & request_settings_,
const S3::AuthSettings & auth_settings_,
uint64_t min_bytes_for_seek_,
int32_t list_object_keys_size_,
int32_t objects_chunk_size_to_delete_,
bool read_only_)
: request_settings(request_settings_)
, auth_settings(auth_settings_)
, min_bytes_for_seek(min_bytes_for_seek_)
, list_object_keys_size(list_object_keys_size_)
, objects_chunk_size_to_delete(objects_chunk_size_to_delete_)
@ -33,6 +35,7 @@ struct S3ObjectStorageSettings
{}
S3Settings::RequestSettings request_settings;
S3::AuthSettings auth_settings;
uint64_t min_bytes_for_seek;
int32_t list_object_keys_size;
@ -50,7 +53,8 @@ private:
S3::URI uri_,
const S3Capabilities & s3_capabilities_,
ObjectStorageKeysGeneratorPtr key_generator_,
const String & disk_name_)
const String & disk_name_,
bool for_disk_s3_ = true)
: uri(uri_)
, disk_name(disk_name_)
, client(std::move(client_))
@ -58,6 +62,7 @@ private:
, s3_capabilities(s3_capabilities_)
, key_generator(std::move(key_generator_))
, log(getLogger(logger_name))
, for_disk_s3(for_disk_s3_)
{
}
@ -98,9 +103,9 @@ public:
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
const WriteSettings & write_settings = {}) override;
void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override;
void listObjects(const std::string & path, RelativePathsWithMetadata & children, size_t max_keys) const override;
ObjectStorageIteratorPtr iterate(const std::string & path_prefix) const override;
ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override;
/// Uses `DeleteObjectRequest`.
void removeObject(const StoredObject & object) override;
@ -142,7 +147,8 @@ public:
void applyNewSettings(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
ContextPtr context) override;
ContextPtr context,
const ApplyNewSettingsOptions & options) override;
std::string getObjectsNamespace() const override { return uri.bucket; }
@ -179,6 +185,8 @@ private:
ObjectStorageKeysGeneratorPtr key_generator;
LoggerPtr log;
const bool for_disk_s3;
};
}

View File

@ -25,19 +25,29 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace ErrorCodes
{
extern const int NO_ELEMENTS_IN_CONFIG;
}
std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context)
std::unique_ptr<S3ObjectStorageSettings> getSettings(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
ContextPtr context,
bool validate_settings)
{
const Settings & settings = context->getSettingsRef();
S3Settings::RequestSettings request_settings(config, config_prefix, settings, "s3_");
auto request_settings = S3Settings::RequestSettings(config, config_prefix, settings, "s3_", validate_settings);
auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config);
return std::make_unique<S3ObjectStorageSettings>(
request_settings,
auth_settings,
config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024),
config.getInt(config_prefix + ".list_object_keys_size", 1000),
config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000),
@ -48,82 +58,99 @@ std::unique_ptr<S3::Client> getClient(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
ContextPtr context,
const S3ObjectStorageSettings & settings)
const S3ObjectStorageSettings & settings,
bool for_disk_s3,
const S3::URI * url_)
{
const Settings & global_settings = context->getGlobalContext()->getSettingsRef();
const Settings & local_settings = context->getSettingsRef();
const String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
S3::URI uri(endpoint);
if (!uri.key.ends_with('/'))
uri.key.push_back('/');
const auto & auth_settings = settings.auth_settings;
const auto & request_settings = settings.request_settings;
if (S3::isS3ExpressEndpoint(endpoint) && !config.has(config_prefix + ".region"))
S3::URI url;
if (for_disk_s3)
{
String endpoint = context->getMacros()->expand(config.getString(config_prefix + ".endpoint"));
url = S3::URI(endpoint);
if (!url.key.ends_with('/'))
url.key.push_back('/');
}
else
{
if (!url_)
throw Exception(ErrorCodes::LOGICAL_ERROR, "URL not passed");
url = *url_;
}
const bool is_s3_express_bucket = S3::isS3ExpressEndpoint(url.endpoint);
if (is_s3_express_bucket && !config.has(config_prefix + ".region"))
{
throw Exception(
ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Region should be explicitly specified for directory buckets ({})", config_prefix);
}
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
config.getString(config_prefix + ".region", ""),
auth_settings.region,
context->getRemoteHostFilter(),
static_cast<int>(global_settings.s3_max_redirects),
static_cast<int>(global_settings.s3_retry_attempts),
global_settings.enable_s3_requests_logging,
/* for_disk_s3 = */ true,
for_disk_s3,
settings.request_settings.get_request_throttler,
settings.request_settings.put_request_throttler,
uri.uri.getScheme());
url.uri.getScheme());
client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", S3::DEFAULT_CONNECT_TIMEOUT_MS);
client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", S3::DEFAULT_REQUEST_TIMEOUT_MS);
client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS);
client_configuration.connectTimeoutMs = config.getUInt64(config_prefix + ".connect_timeout_ms", local_settings.s3_connect_timeout_ms.value);
client_configuration.requestTimeoutMs = config.getUInt64(config_prefix + ".request_timeout_ms", local_settings.s3_request_timeout_ms.value);
client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", static_cast<unsigned>(request_settings.max_connections));
client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", S3::DEFAULT_KEEP_ALIVE_TIMEOUT);
client_configuration.http_keep_alive_max_requests = config.getUInt(config_prefix + ".http_keep_alive_max_requests", S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS);
client_configuration.endpointOverride = uri.endpoint;
client_configuration.endpointOverride = url.endpoint;
client_configuration.s3_use_adaptive_timeouts = config.getBool(
config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts);
/*
* Override proxy configuration for backwards compatibility with old configuration format.
* */
auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat(
ProxyConfiguration::protocolFromString(uri.uri.getScheme()),
config_prefix,
config
);
if (proxy_config)
if (for_disk_s3)
{
client_configuration.per_request_configuration
= [proxy_config]() { return proxy_config->resolve(); };
client_configuration.error_report
= [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); };
/*
* Override proxy configuration for backwards compatibility with old configuration format.
* */
if (auto proxy_config = DB::ProxyConfigurationResolverProvider::getFromOldSettingsFormat(
ProxyConfiguration::protocolFromString(url.uri.getScheme()), config_prefix, config))
{
client_configuration.per_request_configuration
= [proxy_config]() { return proxy_config->resolve(); };
client_configuration.error_report
= [proxy_config](const auto & request_config) { proxy_config->errorReport(request_config); };
}
}
HTTPHeaderEntries headers = S3::getHTTPHeaders(config_prefix, config);
S3::ServerSideEncryptionKMSConfig sse_kms_config = S3::getSSEKMSConfig(config_prefix, config);
S3::ClientSettings client_settings{
.use_virtual_addressing = uri.is_virtual_hosted_style,
.use_virtual_addressing = url.is_virtual_hosted_style,
.disable_checksum = local_settings.s3_disable_checksum,
.gcs_issue_compose_request = config.getBool("s3.gcs_issue_compose_request", false),
.is_s3express_bucket = S3::isS3ExpressEndpoint(endpoint),
.is_s3express_bucket = is_s3_express_bucket,
};
auto credentials_configuration = S3::CredentialsConfiguration
{
auth_settings.use_environment_credentials.value_or(context->getConfigRef().getBool("s3.use_environment_credentials", true)),
auth_settings.use_insecure_imds_request.value_or(context->getConfigRef().getBool("s3.use_insecure_imds_request", false)),
auth_settings.expiration_window_seconds.value_or(context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)),
};
return S3::ClientFactory::instance().create(
client_configuration,
client_settings,
config.getString(config_prefix + ".access_key_id", ""),
config.getString(config_prefix + ".secret_access_key", ""),
config.getString(config_prefix + ".server_side_encryption_customer_key_base64", ""),
auth_settings.access_key_id,
auth_settings.secret_access_key,
auth_settings.server_side_encryption_customer_key_base64,
std::move(sse_kms_config),
std::move(headers),
S3::CredentialsConfiguration
{
config.getBool(config_prefix + ".use_environment_credentials", config.getBool("s3.use_environment_credentials", true)),
config.getBool(config_prefix + ".use_insecure_imds_request", config.getBool("s3.use_insecure_imds_request", false)),
config.getUInt64(config_prefix + ".expiration_window_seconds", config.getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)),
config.getBool(config_prefix + ".no_sign_request", config.getBool("s3.no_sign_request", false))
});
auth_settings.headers,
credentials_configuration,
auth_settings.session_token);
}
}

View File

@ -14,9 +14,19 @@ namespace DB
struct S3ObjectStorageSettings;
std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context);
std::unique_ptr<S3ObjectStorageSettings> getSettings(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
ContextPtr context,
bool validate_settings = true);
std::unique_ptr<S3::Client> getClient(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context, const S3ObjectStorageSettings & settings);
std::unique_ptr<S3::Client> getClient(
const Poco::Util::AbstractConfiguration & config,
const String & config_prefix,
ContextPtr context,
const S3ObjectStorageSettings & settings,
bool for_disk_s3,
const S3::URI * url_ = nullptr);
}

View File

@ -344,11 +344,6 @@ void WebObjectStorage::startup()
{
}
void WebObjectStorage::applyNewSettings(
const Poco::Util::AbstractConfiguration & /* config */, const std::string & /* config_prefix */, ContextPtr /* context */)
{
}
ObjectMetadata WebObjectStorage::getObjectMetadata(const std::string & /* path */) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Metadata is not supported for {}", getName());

View File

@ -72,11 +72,6 @@ public:
void startup() override;
void applyNewSettings(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
ContextPtr context) override;
String getObjectsNamespace() const override { return ""; }
std::unique_ptr<IObjectStorage> cloneObjectStorage(

View File

@ -21,8 +21,6 @@ namespace ErrorCodes
const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * column)
{
if (!column)
return {};
if (!isColumnConst(*column))
return {};

View File

@ -0,0 +1,255 @@
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionsRandom.h>
#include <Functions/FunctionHelpers.h>
#include <Core/ServerUUID.h>
#include <Common/Logger.h>
#include <Common/logger_useful.h>
#include "base/types.h"
namespace DB
{
namespace
{
/* Snowflake ID
https://en.wikipedia.org/wiki/Snowflake_ID
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|0| timestamp |
| | machine_id | machine_seq_num |
- The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970)
- The middle 10 bits are the machine ID
- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by different processes
*/
/// bit counts
constexpr auto timestamp_bits_count = 41;
constexpr auto machine_id_bits_count = 10;
constexpr auto machine_seq_num_bits_count = 12;
/// bits masks for Snowflake ID components
constexpr uint64_t machine_id_mask = ((1ull << machine_id_bits_count) - 1) << machine_seq_num_bits_count;
constexpr uint64_t machine_seq_num_mask = (1ull << machine_seq_num_bits_count) - 1;
/// max values
constexpr uint64_t max_machine_seq_num = machine_seq_num_mask;
uint64_t getTimestamp()
{
auto now = std::chrono::system_clock::now();
auto ticks_since_epoch = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
return static_cast<uint64_t>(ticks_since_epoch) & ((1ull << timestamp_bits_count) - 1);
}
uint64_t getMachineIdImpl()
{
UUID server_uuid = ServerUUID::get();
/// hash into 64 bits
uint64_t hi = UUIDHelpers::getHighBytes(server_uuid);
uint64_t lo = UUIDHelpers::getLowBytes(server_uuid);
/// return only 10 bits
return (((hi * 11) ^ (lo * 17)) & machine_id_mask) >> machine_seq_num_bits_count;
}
uint64_t getMachineId()
{
static uint64_t machine_id = getMachineIdImpl();
return machine_id;
}
struct SnowflakeId
{
uint64_t timestamp;
uint64_t machine_id;
uint64_t machine_seq_num;
};
SnowflakeId toSnowflakeId(uint64_t snowflake)
{
return {.timestamp = (snowflake >> (machine_id_bits_count + machine_seq_num_bits_count)),
.machine_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count),
.machine_seq_num = (snowflake & machine_seq_num_mask)};
}
uint64_t fromSnowflakeId(SnowflakeId components)
{
return (components.timestamp << (machine_id_bits_count + machine_seq_num_bits_count) |
components.machine_id << (machine_seq_num_bits_count) |
components.machine_seq_num);
}
struct SnowflakeIdRange
{
SnowflakeId begin; /// inclusive
SnowflakeId end; /// exclusive
};
/// To get the range of `input_rows_count` Snowflake IDs from `max(available, now)`:
/// 1. calculate Snowflake ID by current timestamp (`now`)
/// 2. `begin = max(available, now)`
/// 3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow
SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t input_rows_count)
{
/// 1. `now`
SnowflakeId begin = {.timestamp = getTimestamp(), .machine_id = getMachineId(), .machine_seq_num = 0};
/// 2. `begin`
if (begin.timestamp <= available.timestamp)
{
begin.timestamp = available.timestamp;
begin.machine_seq_num = available.machine_seq_num;
}
/// 3. `end = begin + input_rows_count`
SnowflakeId end;
const uint64_t seq_nums_in_current_timestamp_left = (max_machine_seq_num - begin.machine_seq_num + 1);
if (input_rows_count >= seq_nums_in_current_timestamp_left)
/// if sequence numbers in current timestamp is not enough for rows --> depending on how many elements input_rows_count overflows, forward timestamp by at least 1 tick
end.timestamp = begin.timestamp + 1 + (input_rows_count - seq_nums_in_current_timestamp_left) / (max_machine_seq_num + 1);
else
end.timestamp = begin.timestamp;
end.machine_id = begin.machine_id;
end.machine_seq_num = (begin.machine_seq_num + input_rows_count) & machine_seq_num_mask;
return {begin, end};
}
struct GlobalCounterPolicy
{
static constexpr auto name = "generateSnowflakeID";
static constexpr auto description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
/// Guarantee counter monotonicity within one timestamp across all threads generating Snowflake IDs simultaneously.
struct Data
{
static inline std::atomic<uint64_t> lowest_available_snowflake_id = 0;
SnowflakeId reserveRange(size_t input_rows_count)
{
uint64_t available_snowflake_id = lowest_available_snowflake_id.load();
SnowflakeIdRange range;
do
{
range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), input_rows_count);
}
while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, fromSnowflakeId(range.end)));
/// if CAS failed --> another thread updated `lowest_available_snowflake_id` and we re-try
/// else --> our thread reserved ID range [begin, end) and return the beginning of the range
return range.begin;
}
};
};
struct ThreadLocalCounterPolicy
{
static constexpr auto name = "generateSnowflakeIDThreadMonotonic";
static constexpr auto description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. This function behaves like generateSnowflakeID but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate Snowflake IDs.)";
/// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads.
struct Data
{
static inline thread_local uint64_t lowest_available_snowflake_id = 0;
SnowflakeId reserveRange(size_t input_rows_count)
{
SnowflakeIdRange range = getRangeOfAvailableIds(toSnowflakeId(lowest_available_snowflake_id), input_rows_count);
lowest_available_snowflake_id = fromSnowflakeId(range.end);
return range.begin;
}
};
};
}
template <typename FillPolicy>
class FunctionGenerateSnowflakeID : public IFunction, public FillPolicy
{
public:
static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared<FunctionGenerateSnowflakeID>(); }
String getName() const override { return FillPolicy::name; }
size_t getNumberOfArguments() const override { return 0; }
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const override { return false; }
bool useDefaultImplementationForNulls() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
bool isVariadic() const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors mandatory_args;
FunctionArgumentDescriptors optional_args{
{"expr", nullptr, nullptr, "Arbitrary expression"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
return std::make_shared<DataTypeUInt64>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override
{
auto col_res = ColumnVector<UInt64>::create();
typename ColumnVector<UInt64>::Container & vec_to = col_res->getData();
if (input_rows_count != 0)
{
vec_to.resize(input_rows_count);
typename FillPolicy::Data data;
SnowflakeId snowflake_id = data.reserveRange(input_rows_count); /// returns begin of available snowflake ids range
for (UInt64 & to_row : vec_to)
{
to_row = fromSnowflakeId(snowflake_id);
if (snowflake_id.machine_seq_num == max_machine_seq_num)
{
/// handle overflow
snowflake_id.machine_seq_num = 0;
++snowflake_id.timestamp;
}
else
{
++snowflake_id.machine_seq_num;
}
}
}
return col_res;
}
};
template<typename FillPolicy>
void registerSnowflakeIDGenerator(auto & factory)
{
static constexpr auto doc_syntax_format = "{}([expression])";
static constexpr auto example_format = "SELECT {}()";
static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)";
FunctionDocumentation::Description description = FillPolicy::description;
FunctionDocumentation::Syntax syntax = fmt::format(doc_syntax_format, FillPolicy::name);
FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64";
FunctionDocumentation::Examples examples = {{"single", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
FunctionDocumentation::Categories categories = {"Snowflake ID"};
factory.template registerFunction<FunctionGenerateSnowflakeID<FillPolicy>>({description, syntax, arguments, returned_value, examples, categories}, FunctionFactory::CaseInsensitive);
}
REGISTER_FUNCTION(GenerateSnowflakeID)
{
registerSnowflakeIDGenerator<GlobalCounterPolicy>(factory);
registerSnowflakeIDGenerator<ThreadLocalCounterPolicy>(factory);
}
}

View File

@ -76,7 +76,7 @@ void setVariant(UUID & uuid)
struct FillAllRandomPolicy
{
static constexpr auto name = "generateUUIDv7NonMonotonic";
static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), and a random field (74 bit, including a 2-bit variant field "2") to distinguish UUIDs within a millisecond. This function is the fastest generateUUIDv7* function but it gives no monotonicity guarantees within a timestamp.)";
static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), and a random field (74 bit, including a 2-bit variant field "2") to distinguish UUIDs within a millisecond. This function is the fastest generateUUIDv7* function but it gives no monotonicity guarantees within a timestamp.)";
struct Data
{
void generate(UUID & uuid, uint64_t ts)
@ -136,7 +136,7 @@ struct CounterFields
struct GlobalCounterPolicy
{
static constexpr auto name = "generateUUIDv7";
static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. Function generateUUIDv7 guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)";
/// Guarantee counter monotonicity within one timestamp across all threads generating UUIDv7 simultaneously.
struct Data
@ -159,7 +159,7 @@ struct GlobalCounterPolicy
struct ThreadLocalCounterPolicy
{
static constexpr auto name = "generateUUIDv7ThreadMonotonic";
static constexpr auto doc_description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. This function behaves like generateUUIDv7 but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs.)";
static constexpr auto description = R"(Generates a UUID of version 7. The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit, including a variant field "2", 2 bit) to distinguish UUIDs within a millisecond, and a random field (32 bits). For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value. This function behaves like generateUUIDv7 but gives no guarantee on counter monotony across different simultaneous requests. Monotonicity within one timestamp is guaranteed only within the same thread calling this function to generate UUIDs.)";
/// Guarantee counter monotonicity within one timestamp within the same thread. Faster than GlobalCounterPolicy if a query uses multiple threads.
struct Data
@ -186,7 +186,6 @@ class FunctionGenerateUUIDv7Base : public IFunction, public FillPolicy
{
public:
String getName() const final { return FillPolicy::name; }
size_t getNumberOfArguments() const final { return 0; }
bool isDeterministic() const override { return false; }
bool isDeterministicInScopeOfQuery() const final { return false; }
@ -198,7 +197,7 @@ public:
{
FunctionArgumentDescriptors mandatory_args;
FunctionArgumentDescriptors optional_args{
{"expr", nullptr, nullptr, "Arbitrary Expression"}
{"expr", nullptr, nullptr, "Arbitrary expression"}
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
@ -264,20 +263,20 @@ private:
};
template<typename FillPolicy>
void registerUUIDv7Generator(auto& factory)
void registerUUIDv7Generator(auto & factory)
{
static constexpr auto doc_syntax_format = "{}([expression])";
static constexpr auto example_format = "SELECT {}()";
static constexpr auto multiple_example_format = "SELECT {f}(1), {f}(2)";
FunctionDocumentation::Description doc_description = FillPolicy::doc_description;
FunctionDocumentation::Syntax doc_syntax = fmt::format(doc_syntax_format, FillPolicy::name);
FunctionDocumentation::Arguments doc_arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
FunctionDocumentation::ReturnedValue doc_returned_value = "A value of type UUID version 7.";
FunctionDocumentation::Examples doc_examples = {{"uuid", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
FunctionDocumentation::Categories doc_categories = {"UUID"};
FunctionDocumentation::Description description = FillPolicy::description;
FunctionDocumentation::Syntax syntax = fmt::format(doc_syntax_format, FillPolicy::name);
FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}};
FunctionDocumentation::ReturnedValue returned_value = "A value of type UUID version 7.";
FunctionDocumentation::Examples examples = {{"single", fmt::format(example_format, FillPolicy::name), ""}, {"multiple", fmt::format(multiple_example_format, fmt::arg("f", FillPolicy::name)), ""}};
FunctionDocumentation::Categories categories = {"UUID"};
factory.template registerFunction<FunctionGenerateUUIDv7Base<FillPolicy>>({doc_description, doc_syntax, doc_arguments, doc_returned_value, doc_examples, doc_categories}, FunctionFactory::CaseInsensitive);
factory.template registerFunction<FunctionGenerateUUIDv7Base<FillPolicy>>({description, syntax, arguments, returned_value, examples, categories}, FunctionFactory::CaseInsensitive);
}
REGISTER_FUNCTION(GenerateUUIDv7)

View File

@ -4,8 +4,7 @@
#if USE_AZURE_BLOB_STORAGE
#include <Storages/StorageAzureBlobCluster.h>
#include <Storages/StorageAzureBlob.h>
#include <Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h>
#include <Common/threadPoolCallbackRunner.h>
#include <base/types.h>
#include <functional>

View File

@ -29,6 +29,7 @@ struct URI
std::string key;
std::string version_id;
std::string storage_name;
/// Path (or path pattern) in archive if uri is an archive.
std::optional<std::string> archive_pattern;
std::string uri_str;

View File

@ -53,7 +53,7 @@ namespace
const auto & result = outcome.GetResult();
ObjectInfo object_info;
object_info.size = static_cast<size_t>(result.GetContentLength());
object_info.last_modification_time = result.GetLastModified().Millis() / 1000;
object_info.last_modification_time = result.GetLastModified().Seconds();
if (with_metadata)
object_info.metadata = result.GetMetadata();

View File

@ -174,8 +174,11 @@ void AuthSettings::updateFrom(const AuthSettings & from)
if (!from.session_token.empty())
session_token = from.session_token;
headers = from.headers;
region = from.region;
if (!from.headers.empty())
headers = from.headers;
if (!from.region.empty())
region = from.region;
server_side_encryption_customer_key_base64 = from.server_side_encryption_customer_key_base64;
server_side_encryption_kms_config = from.server_side_encryption_kms_config;

View File

@ -2,7 +2,7 @@
#include <string>
#include <IO/WriteBufferFromFile.h>
#include <IO/copyData.h>
#include <Storages/HDFS/ReadBufferFromHDFS.h>
#include <Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h>
#include <base/types.h>
#include <Common/Config/ConfigProcessor.h>

View File

@ -547,7 +547,7 @@ public:
std::unique_ptr<WriteBufferFromS3> getWriteBuffer(String file_name = "file")
{
S3Settings::RequestSettings request_settings;
request_settings.updateFromSettings(settings);
request_settings.updateFromSettingsIfChanged(settings);
client->resetCounters();

View File

@ -126,6 +126,11 @@ bool astContainsSystemTables(ASTPtr ast, ContextPtr context)
namespace
{
bool isQueryCacheRelatedSetting(const String & setting_name)
{
return setting_name.starts_with("query_cache_") || setting_name.ends_with("_query_cache");
}
class RemoveQueryCacheSettingsMatcher
{
public:
@ -141,7 +146,7 @@ public:
auto is_query_cache_related_setting = [](const auto & change)
{
return change.name.starts_with("query_cache_") || change.name.ends_with("_query_cache");
return isQueryCacheRelatedSetting(change.name);
};
std::erase_if(set_clause->changes, is_query_cache_related_setting);
@ -177,11 +182,11 @@ ASTPtr removeQueryCacheSettings(ASTPtr ast)
return transformed_ast;
}
IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database)
IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database, const Settings & settings)
{
ast = removeQueryCacheSettings(ast);
/// Hash the AST, it must consider aliases (issue #56258)
/// Hash the AST, we must consider aliases (issue #56258)
SipHash hash;
ast->updateTreeHash(hash, /*ignore_aliases=*/ false);
@ -189,6 +194,25 @@ IAST::Hash calculateAstHash(ASTPtr ast, const String & current_database)
/// tables (issue #64136)
hash.update(current_database);
/// Finally, hash the (changed) settings as they might affect the query result (e.g. think of settings `additional_table_filters` and `limit`).
/// Note: allChanged() returns the settings in random order. Also, update()-s of the composite hash must be done in deterministic order.
/// Therefore, collect and sort the settings first, then hash them.
Settings::Range changed_settings = settings.allChanged();
std::vector<std::pair<String, String>> changed_settings_sorted; /// (name, value)
for (const auto & setting : changed_settings)
{
const String & name = setting.getName();
const String & value = setting.getValueString();
if (!isQueryCacheRelatedSetting(name)) /// see removeQueryCacheSettings() why this is a good idea
changed_settings_sorted.push_back({name, value});
}
std::sort(changed_settings_sorted.begin(), changed_settings_sorted.end(), [](auto & lhs, auto & rhs) { return lhs.first < rhs.first; });
for (const auto & setting : changed_settings_sorted)
{
hash.update(setting.first);
hash.update(setting.second);
}
return getSipHash128AsPair(hash);
}
@ -204,12 +228,13 @@ String queryStringFromAST(ASTPtr ast)
QueryCache::Key::Key(
ASTPtr ast_,
const String & current_database,
const Settings & settings,
Block header_,
std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
bool is_shared_,
std::chrono::time_point<std::chrono::system_clock> expires_at_,
bool is_compressed_)
: ast_hash(calculateAstHash(ast_, current_database))
: ast_hash(calculateAstHash(ast_, current_database, settings))
, header(header_)
, user_id(user_id_)
, current_user_roles(current_user_roles_)
@ -220,8 +245,8 @@ QueryCache::Key::Key(
{
}
QueryCache::Key::Key(ASTPtr ast_, const String & current_database, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_)
: QueryCache::Key(ast_, current_database, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles
QueryCache::Key::Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_)
: QueryCache::Key(ast_, current_database, settings, {}, user_id_, current_user_roles_, false, std::chrono::system_clock::from_time_t(1), false) /// dummy values for everything != AST, current database, user name/roles
{
}

View File

@ -14,6 +14,8 @@
namespace DB
{
struct Settings;
/// Does AST contain non-deterministic functions like rand() and now()?
bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context);
@ -89,6 +91,7 @@ public:
/// Ctor to construct a Key for writing into query cache.
Key(ASTPtr ast_,
const String & current_database,
const Settings & settings,
Block header_,
std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_,
bool is_shared_,
@ -96,7 +99,7 @@ public:
bool is_compressed);
/// Ctor to construct a Key for reading from query cache (this operation only needs the AST + user name).
Key(ASTPtr ast_, const String & current_database, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
Key(ASTPtr ast_, const String & current_database, const Settings & settings, std::optional<UUID> user_id_, const std::vector<UUID> & current_user_roles_);
bool operator==(const Key & other) const;
};

View File

@ -25,6 +25,7 @@ namespace ErrorCodes
extern const int TABLE_IS_READ_ONLY;
extern const int SUPPORT_IS_DISABLED;
extern const int BAD_ARGUMENTS;
extern const int NOT_IMPLEMENTED;
}
@ -107,7 +108,19 @@ BlockIO InterpreterDeleteQuery::execute()
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "DELETE query is not supported for table {}", table->getStorageID().getFullTableName());
/// Currently just better exception for the case of a table with projection,
/// can act differently according to the setting.
if (table->hasProjection())
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"DELETE query is not supported for table {} as it has projections. "
"User should drop all the projections manually before running the query",
table->getStorageID().getFullTableName());
}
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"DELETE query is not supported for table {}",
table->getStorageID().getFullTableName());
}
}

View File

@ -51,11 +51,12 @@
#include <Storages/Freeze.h>
#include <Storages/StorageFactory.h>
#include <Storages/StorageFile.h>
#include <Storages/StorageS3.h>
#include <Storages/StorageURL.h>
#include <Storages/StorageAzureBlob.h>
#include <Storages/ObjectStorage/StorageObjectStorage.h>
#include <Storages/ObjectStorage/S3/Configuration.h>
#include <Storages/ObjectStorage/HDFS/Configuration.h>
#include <Storages/ObjectStorage/Azure/Configuration.h>
#include <Storages/MaterializedView/RefreshTask.h>
#include <Storages/HDFS/StorageHDFS.h>
#include <Storages/System/StorageSystemFilesystemCache.h>
#include <Parsers/ASTSystemQuery.h>
#include <Parsers/ASTCreateQuery.h>
@ -500,17 +501,17 @@ BlockIO InterpreterSystemQuery::execute()
StorageFile::getSchemaCache(getContext()).clear();
#if USE_AWS_S3
if (caches_to_drop.contains("S3"))
StorageS3::getSchemaCache(getContext()).clear();
StorageObjectStorage::getSchemaCache(getContext(), StorageS3Configuration::type_name).clear();
#endif
#if USE_HDFS
if (caches_to_drop.contains("HDFS"))
StorageHDFS::getSchemaCache(getContext()).clear();
StorageObjectStorage::getSchemaCache(getContext(), StorageHDFSConfiguration::type_name).clear();
#endif
if (caches_to_drop.contains("URL"))
StorageURL::getSchemaCache(getContext()).clear();
#if USE_AZURE_BLOB_STORAGE
if (caches_to_drop.contains("AZURE"))
StorageAzureBlob::getSchemaCache(getContext()).clear();
StorageObjectStorage::getSchemaCache(getContext(), StorageAzureConfiguration::type_name).clear();
#endif
break;
}

View File

@ -164,6 +164,12 @@ public:
void pushNotIn(CNFQuery::AtomicFormula & atom);
/// Reduces CNF groups by removing mutually exclusive atoms
/// found across groups, in case other atoms are identical.
/// Might require multiple passes to complete reduction.
///
/// Example:
/// (x OR y) AND (x OR !y) -> x
template <typename TAndGroup>
TAndGroup reduceOnceCNFStatements(const TAndGroup & groups)
{
@ -175,10 +181,19 @@ TAndGroup reduceOnceCNFStatements(const TAndGroup & groups)
bool inserted = false;
for (const auto & atom : group)
{
copy.erase(atom);
using AtomType = std::decay_t<decltype(atom)>;
AtomType negative_atom(atom);
negative_atom.negative = !atom.negative;
// Sikpping erase-insert for mutually exclusive atoms within
// single group, since it won't insert negative atom, which
// will break the logic of this rule
if (copy.contains(negative_atom))
{
continue;
}
copy.erase(atom);
copy.insert(negative_atom);
if (groups.contains(copy))
@ -209,6 +224,10 @@ bool isCNFGroupSubset(const TOrGroup & left, const TOrGroup & right)
return true;
}
/// Removes CNF groups if subset group is found in CNF.
///
/// Example:
/// (x OR y) AND (x) -> x
template <typename TAndGroup>
TAndGroup filterCNFSubsets(const TAndGroup & groups)
{

View File

@ -91,6 +91,22 @@ bool checkIfGroupAlwaysTrueGraph(const CNFQuery::OrGroup & group, const Comparis
return false;
}
bool checkIfGroupAlwaysTrueAtoms(const CNFQuery::OrGroup & group)
{
/// Filters out groups containing mutually exclusive atoms,
/// since these groups are always True
for (const auto & atom : group)
{
auto negated(atom);
negated.negative = !atom.negative;
if (group.contains(negated))
{
return true;
}
}
return false;
}
bool checkIfAtomAlwaysFalseFullMatch(const CNFQuery::AtomicFormula & atom, const ConstraintsDescription & constraints_description)
{
@ -158,7 +174,8 @@ void WhereConstraintsOptimizer::perform()
.filterAlwaysTrueGroups([&compare_graph, this](const auto & group)
{
/// remove always true groups from CNF
return !checkIfGroupAlwaysTrueFullMatch(group, metadata_snapshot->getConstraints()) && !checkIfGroupAlwaysTrueGraph(group, compare_graph);
return !checkIfGroupAlwaysTrueFullMatch(group, metadata_snapshot->getConstraints())
&& !checkIfGroupAlwaysTrueGraph(group, compare_graph) && !checkIfGroupAlwaysTrueAtoms(group);
})
.filterAlwaysFalseAtoms([&compare_graph, this](const auto & atom)
{

View File

@ -1093,6 +1093,15 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
&& (ast->as<ASTSelectQuery>() || ast->as<ASTSelectWithUnionQuery>());
QueryCache::Usage query_cache_usage = QueryCache::Usage::None;
/// If the query runs with "use_query_cache = 1", we first probe if the query cache already contains the query result (if yes:
/// return result from cache). If doesn't, we execute the query normally and write the result into the query cache. Both steps use a
/// hash of the AST, the current database and the settings as cache key. Unfortunately, the settings are in some places internally
/// modified between steps 1 and 2 (= during query execution) - this is silly but hard to forbid. As a result, the hashes no longer
/// match and the cache is rendered ineffective. Therefore make a copy of the settings and use it for steps 1 and 2.
std::optional<Settings> settings_copy;
if (can_use_query_cache)
settings_copy = settings;
if (!async_insert)
{
/// If it is a non-internal SELECT, and passive (read) use of the query cache is enabled, and the cache knows the query, then set
@ -1101,7 +1110,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
{
if (can_use_query_cache && settings.enable_reads_from_query_cache)
{
QueryCache::Key key(ast, context->getCurrentDatabase(), context->getUserID(), context->getCurrentRoles());
QueryCache::Key key(ast, context->getCurrentDatabase(), *settings_copy, context->getUserID(), context->getCurrentRoles());
QueryCache::Reader reader = query_cache->createReader(key);
if (reader.hasCacheEntryForKey())
{
@ -1224,7 +1233,7 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
&& (!ast_contains_system_tables || system_table_handling == QueryCacheSystemTableHandling::Save))
{
QueryCache::Key key(
ast, context->getCurrentDatabase(), res.pipeline.getHeader(),
ast, context->getCurrentDatabase(), *settings_copy, res.pipeline.getHeader(),
context->getUserID(), context->getCurrentRoles(),
settings.query_cache_share_between_users,
std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl),

View File

@ -381,7 +381,7 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(
auto algorithm = std::make_unique<MergeTreeThreadSelectAlgorithm>(i);
auto processor = std::make_unique<MergeTreeSelectProcessor>(
pool, std::move(algorithm), storage_snapshot, prewhere_info,
pool, std::move(algorithm), prewhere_info,
actions_settings, block_size_copy, reader_settings);
auto source = std::make_shared<MergeTreeSource>(std::move(processor));
@ -480,7 +480,7 @@ Pipe ReadFromMergeTree::readFromPool(
auto algorithm = std::make_unique<MergeTreeThreadSelectAlgorithm>(i);
auto processor = std::make_unique<MergeTreeSelectProcessor>(
pool, std::move(algorithm), storage_snapshot, prewhere_info,
pool, std::move(algorithm), prewhere_info,
actions_settings, block_size_copy, reader_settings);
auto source = std::make_shared<MergeTreeSource>(std::move(processor));
@ -592,7 +592,7 @@ Pipe ReadFromMergeTree::readInOrder(
algorithm = std::make_unique<MergeTreeInOrderSelectAlgorithm>(i);
auto processor = std::make_unique<MergeTreeSelectProcessor>(
pool, std::move(algorithm), storage_snapshot, prewhere_info,
pool, std::move(algorithm), prewhere_info,
actions_settings, block_size, reader_settings);
processor->addPartLevelToChunk(isQueryWithFinal());

View File

@ -34,6 +34,7 @@
#include <Server/TCPServer.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
#include <Storages/ObjectStorage/StorageObjectStorageCluster.h>
#include <Core/ExternalTable.h>
#include <Core/ServerSettings.h>
#include <Access/AccessControl.h>

View File

@ -1,22 +0,0 @@
#pragma once
#include <Interpreters/Context_fwd.h>
#include <Core/Types.h>
namespace DB
{
template <typename Configuration, typename MetadataReadHelper>
struct DeltaLakeMetadataParser
{
public:
DeltaLakeMetadataParser<Configuration, MetadataReadHelper>();
Strings getFiles(const Configuration & configuration, ContextPtr context);
private:
struct Impl;
std::shared_ptr<Impl> impl;
};
}

View File

@ -1,116 +0,0 @@
#include <Storages/DataLakes/HudiMetadataParser.h>
#include <Common/logger_useful.h>
#include <ranges>
#include <base/find_symbols.h>
#include <Poco/String.h>
#include "config.h"
#include <filesystem>
#include <IO/ReadHelpers.h>
#if USE_AWS_S3
#include <Storages/DataLakes/S3MetadataReader.h>
#include <Storages/StorageS3.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
template <typename Configuration, typename MetadataReadHelper>
struct HudiMetadataParser<Configuration, MetadataReadHelper>::Impl
{
/**
* Useful links:
* - https://hudi.apache.org/tech-specs/
* - https://hudi.apache.org/docs/file_layouts/
*/
/**
* Hudi tables store metadata files and data files.
* Metadata files are stored in .hoodie/metadata directory. Though unlike DeltaLake and Iceberg,
* metadata is not required in order to understand which files we need to read, moreover,
* for Hudi metadata does not always exist.
*
* There can be two types of data files
* 1. base files (columnar file formats like Apache Parquet/Orc)
* 2. log files
* Currently we support reading only `base files`.
* Data file name format:
* [File Id]_[File Write Token]_[Transaction timestamp].[File Extension]
*
* To find needed parts we need to find out latest part file for every file group for every partition.
* Explanation why:
* Hudi reads in and overwrites the entire table/partition with each update.
* Hudi controls the number of file groups under a single partition according to the
* hoodie.parquet.max.file.size option. Once a single Parquet file is too large, Hudi creates a second file group.
* Each file group is identified by File Id.
*/
Strings processMetadataFiles(const Configuration & configuration)
{
auto log = getLogger("HudiMetadataParser");
const auto keys = MetadataReadHelper::listFiles(configuration, "", Poco::toLower(configuration.format));
using Partition = std::string;
using FileID = std::string;
struct FileInfo
{
String key;
UInt64 timestamp = 0;
};
std::unordered_map<Partition, std::unordered_map<FileID, FileInfo>> data_files;
for (const auto & key : keys)
{
auto key_file = std::filesystem::path(key);
Strings file_parts;
const String stem = key_file.stem();
splitInto<'_'>(file_parts, stem);
if (file_parts.size() != 3)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected format for file: {}", key);
const auto partition = key_file.parent_path().stem();
const auto & file_id = file_parts[0];
const auto timestamp = parse<UInt64>(file_parts[2]);
auto & file_info = data_files[partition][file_id];
if (file_info.timestamp == 0 || file_info.timestamp < timestamp)
{
file_info.key = std::move(key);
file_info.timestamp = timestamp;
}
}
Strings result;
for (auto & [partition, partition_data] : data_files)
{
LOG_TRACE(log, "Adding {} data files from partition {}", partition, partition_data.size());
for (auto & [file_id, file_data] : partition_data)
result.push_back(std::move(file_data.key));
}
return result;
}
};
template <typename Configuration, typename MetadataReadHelper>
HudiMetadataParser<Configuration, MetadataReadHelper>::HudiMetadataParser() : impl(std::make_unique<Impl>())
{
}
template <typename Configuration, typename MetadataReadHelper>
Strings HudiMetadataParser<Configuration, MetadataReadHelper>::getFiles(const Configuration & configuration, ContextPtr)
{
return impl->processMetadataFiles(configuration);
}
template HudiMetadataParser<StorageS3::Configuration, S3DataLakeMetadataReadHelper>::HudiMetadataParser();
template Strings HudiMetadataParser<StorageS3::Configuration, S3DataLakeMetadataReadHelper>::getFiles(
const StorageS3::Configuration & configuration, ContextPtr);
}
#endif

View File

@ -1,22 +0,0 @@
#pragma once
#include <Interpreters/Context_fwd.h>
#include <Core/Types.h>
namespace DB
{
template <typename Configuration, typename MetadataReadHelper>
struct HudiMetadataParser
{
public:
HudiMetadataParser<Configuration, MetadataReadHelper>();
Strings getFiles(const Configuration & configuration, ContextPtr context);
private:
struct Impl;
std::shared_ptr<Impl> impl;
};
}

View File

@ -1,136 +0,0 @@
#pragma once
#include "config.h"
#if USE_AWS_S3
#include <Storages/IStorage.h>
#include <Common/logger_useful.h>
#include <Databases/LoadingStrictnessLevel.h>
#include <Storages/StorageFactory.h>
#include <Formats/FormatFactory.h>
#include <filesystem>
namespace DB
{
template <typename Storage, typename Name, typename MetadataParser>
class IStorageDataLake : public Storage
{
public:
static constexpr auto name = Name::name;
using Configuration = typename Storage::Configuration;
template <class ...Args>
explicit IStorageDataLake(const Configuration & configuration_, ContextPtr context_, LoadingStrictnessLevel mode, Args && ...args)
: Storage(getConfigurationForDataRead(configuration_, context_, {}, mode), context_, std::forward<Args>(args)...)
, base_configuration(configuration_)
, log(getLogger(getName())) {} // NOLINT(clang-analyzer-optin.cplusplus.VirtualCall)
template <class ...Args>
static StoragePtr create(const Configuration & configuration_, ContextPtr context_, LoadingStrictnessLevel mode, Args && ...args)
{
return std::make_shared<IStorageDataLake<Storage, Name, MetadataParser>>(configuration_, context_, mode, std::forward<Args>(args)...);
}
String getName() const override { return name; }
static ColumnsDescription getTableStructureFromData(
Configuration & base_configuration,
const std::optional<FormatSettings> & format_settings,
const ContextPtr & local_context)
{
auto configuration = getConfigurationForDataRead(base_configuration, local_context);
return Storage::getTableStructureFromData(configuration, format_settings, local_context);
}
static Configuration getConfiguration(ASTs & engine_args, const ContextPtr & local_context)
{
return Storage::getConfiguration(engine_args, local_context, /* get_format_from_file */false);
}
Configuration updateConfigurationAndGetCopy(const ContextPtr & local_context) override
{
std::lock_guard lock(configuration_update_mutex);
updateConfigurationImpl(local_context);
return Storage::getConfigurationCopy();
}
void updateConfiguration(const ContextPtr & local_context) override
{
std::lock_guard lock(configuration_update_mutex);
updateConfigurationImpl(local_context);
}
private:
static Configuration getConfigurationForDataRead(
const Configuration & base_configuration, const ContextPtr & local_context, const Strings & keys = {},
LoadingStrictnessLevel mode = LoadingStrictnessLevel::CREATE)
{
auto configuration{base_configuration};
configuration.update(local_context);
configuration.static_configuration = true;
try
{
if (keys.empty())
configuration.keys = getDataFiles(configuration, local_context);
else
configuration.keys = keys;
LOG_TRACE(
getLogger("DataLake"),
"New configuration path: {}, keys: {}",
configuration.getPath(), fmt::join(configuration.keys, ", "));
configuration.connect(local_context);
return configuration;
}
catch (...)
{
if (mode <= LoadingStrictnessLevel::CREATE)
throw;
tryLogCurrentException(__PRETTY_FUNCTION__);
return configuration;
}
}
static Strings getDataFiles(const Configuration & configuration, const ContextPtr & local_context)
{
return MetadataParser().getFiles(configuration, local_context);
}
void updateConfigurationImpl(const ContextPtr & local_context)
{
const bool updated = base_configuration.update(local_context);
auto new_keys = getDataFiles(base_configuration, local_context);
if (!updated && new_keys == Storage::getConfigurationCopy().keys)
return;
Storage::useConfiguration(getConfigurationForDataRead(base_configuration, local_context, new_keys));
}
Configuration base_configuration;
std::mutex configuration_update_mutex;
LoggerPtr log;
};
template <typename DataLake>
static StoragePtr createDataLakeStorage(const StorageFactory::Arguments & args)
{
auto configuration = DataLake::getConfiguration(args.engine_args, args.getLocalContext());
/// Data lakes use parquet format, no need for schema inference.
if (configuration.format == "auto")
configuration.format = "Parquet";
return DataLake::create(configuration, args.getContext(), args.mode, args.table_id, args.columns, args.constraints,
args.comment, getFormatSettings(args.getContext()));
}
}
#endif

View File

@ -1,90 +0,0 @@
#include <Storages/DataLakes/Iceberg/StorageIceberg.h>
#if USE_AWS_S3 && USE_AVRO
namespace DB
{
StoragePtr StorageIceberg::create(
const DB::StorageIceberg::Configuration & base_configuration,
DB::ContextPtr context_,
LoadingStrictnessLevel mode,
const DB::StorageID & table_id_,
const DB::ColumnsDescription & columns_,
const DB::ConstraintsDescription & constraints_,
const String & comment,
std::optional<FormatSettings> format_settings_)
{
auto configuration{base_configuration};
configuration.update(context_);
std::unique_ptr<IcebergMetadata> metadata;
NamesAndTypesList schema_from_metadata;
try
{
metadata = parseIcebergMetadata(configuration, context_);
schema_from_metadata = metadata->getTableSchema();
configuration.keys = metadata->getDataFiles();
}
catch (...)
{
if (mode <= LoadingStrictnessLevel::CREATE)
throw;
tryLogCurrentException(__PRETTY_FUNCTION__);
}
return std::make_shared<StorageIceberg>(
std::move(metadata),
configuration,
context_,
table_id_,
columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_,
constraints_,
comment,
format_settings_);
}
StorageIceberg::StorageIceberg(
std::unique_ptr<IcebergMetadata> metadata_,
const Configuration & configuration_,
ContextPtr context_,
const StorageID & table_id_,
const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_,
const String & comment,
std::optional<FormatSettings> format_settings_)
: StorageS3(configuration_, context_, table_id_, columns_, constraints_, comment, format_settings_)
, current_metadata(std::move(metadata_))
, base_configuration(configuration_)
{
}
ColumnsDescription StorageIceberg::getTableStructureFromData(
Configuration & base_configuration,
const std::optional<FormatSettings> &,
const ContextPtr & local_context)
{
auto configuration{base_configuration};
configuration.update(local_context);
auto metadata = parseIcebergMetadata(configuration, local_context);
return ColumnsDescription(metadata->getTableSchema());
}
void StorageIceberg::updateConfigurationImpl(const ContextPtr & local_context)
{
const bool updated = base_configuration.update(local_context);
auto new_metadata = parseIcebergMetadata(base_configuration, local_context);
if (!current_metadata || new_metadata->getVersion() != current_metadata->getVersion())
current_metadata = std::move(new_metadata);
else if (!updated)
return;
auto updated_configuration{base_configuration};
/// If metadata wasn't changed, we won't list data files again.
updated_configuration.keys = current_metadata->getDataFiles();
StorageS3::useConfiguration(updated_configuration);
}
}
#endif

View File

@ -1,85 +0,0 @@
#pragma once
#include "config.h"
#if USE_AWS_S3 && USE_AVRO
# include <filesystem>
# include <Formats/FormatFactory.h>
# include <Storages/DataLakes/Iceberg/IcebergMetadata.h>
# include <Storages/IStorage.h>
# include <Storages/StorageFactory.h>
# include <Storages/StorageS3.h>
# include <Common/logger_useful.h>
namespace DB
{
/// Storage for read-only integration with Apache Iceberg tables in Amazon S3 (see https://iceberg.apache.org/)
/// Right now it's implemented on top of StorageS3 and right now it doesn't support
/// many Iceberg features like schema evolution, partitioning, positional and equality deletes.
/// TODO: Implement Iceberg as a separate storage using IObjectStorage
/// (to support all object storages, not only S3) and add support for missing Iceberg features.
class StorageIceberg : public StorageS3
{
public:
static constexpr auto name = "Iceberg";
using Configuration = StorageS3::Configuration;
static StoragePtr create(const Configuration & base_configuration,
ContextPtr context_,
LoadingStrictnessLevel mode,
const StorageID & table_id_,
const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_,
const String & comment,
std::optional<FormatSettings> format_settings_);
StorageIceberg(
std::unique_ptr<IcebergMetadata> metadata_,
const Configuration & configuration_,
ContextPtr context_,
const StorageID & table_id_,
const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_,
const String & comment,
std::optional<FormatSettings> format_settings_);
String getName() const override { return name; }
static ColumnsDescription getTableStructureFromData(
Configuration & base_configuration,
const std::optional<FormatSettings> &,
const ContextPtr & local_context);
static Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context)
{
return StorageS3::getConfiguration(engine_args, local_context, /* get_format_from_file */false);
}
Configuration updateConfigurationAndGetCopy(const ContextPtr & local_context) override
{
std::lock_guard lock(configuration_update_mutex);
updateConfigurationImpl(local_context);
return StorageS3::getConfigurationCopy();
}
void updateConfiguration(const ContextPtr & local_context) override
{
std::lock_guard lock(configuration_update_mutex);
updateConfigurationImpl(local_context);
}
private:
void updateConfigurationImpl(const ContextPtr & local_context);
std::unique_ptr<IcebergMetadata> current_metadata;
Configuration base_configuration;
std::mutex configuration_update_mutex;
};
}
#endif

View File

@ -1,87 +0,0 @@
#include <config.h>
#if USE_AWS_S3
#include <IO/ReadBufferFromS3.h>
#include <IO/S3/Requests.h>
#include <Interpreters/Context.h>
#include <Storages/DataLakes/S3MetadataReader.h>
#include <aws/core/auth/AWSCredentials.h>
#include <aws/s3/S3Client.h>
#include <aws/s3/model/ListObjectsV2Request.h>
#include <filesystem>
namespace DB
{
namespace ErrorCodes
{
extern const int S3_ERROR;
}
std::shared_ptr<ReadBuffer>
S3DataLakeMetadataReadHelper::createReadBuffer(const String & key, ContextPtr context, const StorageS3::Configuration & base_configuration)
{
S3Settings::RequestSettings request_settings;
request_settings.max_single_read_retries = context->getSettingsRef().s3_max_single_read_retries;
return std::make_shared<ReadBufferFromS3>(
base_configuration.client,
base_configuration.url.bucket,
key,
base_configuration.url.version_id,
request_settings,
context->getReadSettings());
}
bool S3DataLakeMetadataReadHelper::exists(const String & key, const StorageS3::Configuration & configuration)
{
return S3::objectExists(*configuration.client, configuration.url.bucket, key);
}
std::vector<String> S3DataLakeMetadataReadHelper::listFiles(
const StorageS3::Configuration & base_configuration, const String & prefix, const String & suffix)
{
const auto & table_path = base_configuration.url.key;
const auto & bucket = base_configuration.url.bucket;
const auto & client = base_configuration.client;
std::vector<String> res;
S3::ListObjectsV2Request request;
Aws::S3::Model::ListObjectsV2Outcome outcome;
request.SetBucket(bucket);
request.SetPrefix(std::filesystem::path(table_path) / prefix);
bool is_finished{false};
while (!is_finished)
{
outcome = client->ListObjectsV2(request);
if (!outcome.IsSuccess())
throw S3Exception(
outcome.GetError().GetErrorType(),
"Could not list objects in bucket {} with key {}, S3 exception: {}, message: {}",
quoteString(bucket),
quoteString(base_configuration.url.key),
backQuote(outcome.GetError().GetExceptionName()),
quoteString(outcome.GetError().GetMessage()));
const auto & result_batch = outcome.GetResult().GetContents();
for (const auto & obj : result_batch)
{
const auto & filename = obj.GetKey();
if (filename.ends_with(suffix))
res.push_back(filename);
}
request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
is_finished = !outcome.GetResult().GetIsTruncated();
}
LOG_TRACE(getLogger("S3DataLakeMetadataReadHelper"), "Listed {} files", res.size());
return res;
}
}
#endif

View File

@ -1,25 +0,0 @@
#pragma once
#include <config.h>
#if USE_AWS_S3
#include <Storages/StorageS3.h>
namespace DB
{
class ReadBuffer;
struct S3DataLakeMetadataReadHelper
{
static std::shared_ptr<ReadBuffer> createReadBuffer(
const String & key, ContextPtr context, const StorageS3::Configuration & base_configuration);
static bool exists(const String & key, const StorageS3::Configuration & configuration);
static std::vector<String> listFiles(const StorageS3::Configuration & configuration, const std::string & prefix = "", const std::string & suffix = "");
};
}
#endif

View File

@ -1,25 +0,0 @@
#pragma once
#include <Storages/IStorage.h>
#include <Storages/DataLakes/IStorageDataLake.h>
#include <Storages/DataLakes/DeltaLakeMetadataParser.h>
#include "config.h"
#if USE_AWS_S3
#include <Storages/DataLakes/S3MetadataReader.h>
#include <Storages/StorageS3.h>
#endif
namespace DB
{
struct StorageDeltaLakeName
{
static constexpr auto name = "DeltaLake";
};
#if USE_AWS_S3 && USE_PARQUET
using StorageDeltaLakeS3 = IStorageDataLake<StorageS3, StorageDeltaLakeName, DeltaLakeMetadataParser<StorageS3::Configuration, S3DataLakeMetadataReadHelper>>;
#endif
}

View File

@ -1,25 +0,0 @@
#pragma once
#include <Storages/IStorage.h>
#include <Storages/DataLakes/IStorageDataLake.h>
#include <Storages/DataLakes/HudiMetadataParser.h>
#include "config.h"
#if USE_AWS_S3
#include <Storages/DataLakes/S3MetadataReader.h>
#include <Storages/StorageS3.h>
#endif
namespace DB
{
struct StorageHudiName
{
static constexpr auto name = "Hudi";
};
#if USE_AWS_S3
using StorageHudiS3 = IStorageDataLake<StorageS3, StorageHudiName, HudiMetadataParser<StorageS3::Configuration, S3DataLakeMetadataReadHelper>>;
#endif
}

View File

@ -1,50 +0,0 @@
#include <Storages/DataLakes/IStorageDataLake.h>
#include "config.h"
#if USE_AWS_S3
#include <Storages/DataLakes/StorageDeltaLake.h>
#include <Storages/DataLakes/Iceberg/StorageIceberg.h>
#include <Storages/DataLakes/StorageHudi.h>
namespace DB
{
#define REGISTER_DATA_LAKE_STORAGE(STORAGE, NAME) \
factory.registerStorage( \
NAME, \
[](const StorageFactory::Arguments & args) \
{ \
return createDataLakeStorage<STORAGE>(args);\
}, \
{ \
.supports_settings = false, \
.supports_schema_inference = true, \
.source_access_type = AccessType::S3, \
});
#if USE_PARQUET
void registerStorageDeltaLake(StorageFactory & factory)
{
REGISTER_DATA_LAKE_STORAGE(StorageDeltaLakeS3, StorageDeltaLakeName::name)
}
#endif
#if USE_AVRO /// StorageIceberg depending on Avro to parse metadata with Avro format.
void registerStorageIceberg(StorageFactory & factory)
{
REGISTER_DATA_LAKE_STORAGE(StorageIceberg, StorageIceberg::name)
}
#endif
void registerStorageHudi(StorageFactory & factory)
{
REGISTER_DATA_LAKE_STORAGE(StorageHudiS3, StorageHudiName::name)
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,190 +0,0 @@
#pragma once
#include "config.h"
#if USE_HDFS
#include <Processors/ISource.h>
#include <Storages/IStorage.h>
#include <Storages/Cache/SchemaCache.h>
#include <Storages/prepareReadingFromFormat.h>
#include <Storages/SelectQueryInfo.h>
#include <Poco/URI.h>
namespace DB
{
class IInputFormat;
/**
* This class represents table engine for external hdfs files.
* Read method is supported for now.
*/
class StorageHDFS final : public IStorage, WithContext
{
public:
struct PathInfo
{
time_t last_mod_time;
size_t size;
};
struct PathWithInfo
{
PathWithInfo() = default;
PathWithInfo(const String & path_, const std::optional<PathInfo> & info_) : path(path_), info(info_) {}
String path;
std::optional<PathInfo> info;
};
StorageHDFS(
const String & uri_,
const StorageID & table_id_,
const String & format_name_,
const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_,
const String & comment,
const ContextPtr & context_,
const String & compression_method_ = "",
bool distributed_processing_ = false,
ASTPtr partition_by = nullptr);
String getName() const override { return "HDFS"; }
void read(
QueryPlan & query_plan,
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
ContextPtr context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
size_t num_streams) override;
SinkToStoragePtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, ContextPtr context, bool async_insert) override;
void truncate(
const ASTPtr & query,
const StorageMetadataPtr & metadata_snapshot,
ContextPtr local_context,
TableExclusiveLockHolder &) override;
bool supportsPartitionBy() const override { return true; }
/// Check if the format is column-oriented.
/// Is is useful because column oriented formats could effectively skip unknown columns
/// So we can create a header of only required columns in read method and ask
/// format to read only them. Note: this hack cannot be done with ordinary formats like TSV.
bool supportsSubsetOfColumns(const ContextPtr & context_) const;
bool supportsSubcolumns() const override { return true; }
bool supportsDynamicSubcolumns() const override { return true; }
static ColumnsDescription getTableStructureFromData(
const String & format,
const String & uri,
const String & compression_method,
const ContextPtr & ctx);
static std::pair<ColumnsDescription, String> getTableStructureAndFormatFromData(
const String & uri,
const String & compression_method,
const ContextPtr & ctx);
static SchemaCache & getSchemaCache(const ContextPtr & ctx);
bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; }
protected:
friend class HDFSSource;
friend class ReadFromHDFS;
private:
static std::pair<ColumnsDescription, String> getTableStructureAndFormatFromDataImpl(
std::optional<String> format,
const String & uri,
const String & compression_method,
const ContextPtr & ctx);
std::vector<String> uris;
String format_name;
String compression_method;
const bool distributed_processing;
ASTPtr partition_by;
bool is_path_with_globs;
LoggerPtr log = getLogger("StorageHDFS");
};
class PullingPipelineExecutor;
class HDFSSource : public ISource, WithContext
{
public:
class DisclosedGlobIterator
{
public:
DisclosedGlobIterator(const String & uri_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
StorageHDFS::PathWithInfo next();
private:
class Impl;
/// shared_ptr to have copy constructor
std::shared_ptr<Impl> pimpl;
};
class URISIterator
{
public:
URISIterator(const std::vector<String> & uris_, const ActionsDAG::Node * predicate, const NamesAndTypesList & virtual_columns, const ContextPtr & context);
StorageHDFS::PathWithInfo next();
private:
class Impl;
/// shared_ptr to have copy constructor
std::shared_ptr<Impl> pimpl;
};
using IteratorWrapper = std::function<StorageHDFS::PathWithInfo()>;
using StorageHDFSPtr = std::shared_ptr<StorageHDFS>;
HDFSSource(
const ReadFromFormatInfo & info,
StorageHDFSPtr storage_,
const ContextPtr & context_,
UInt64 max_block_size_,
std::shared_ptr<IteratorWrapper> file_iterator_,
bool need_only_count_);
~HDFSSource() override;
String getName() const override;
Chunk generate() override;
private:
void addNumRowsToCache(const String & path, size_t num_rows);
std::optional<size_t> tryGetNumRowsFromCache(const StorageHDFS::PathWithInfo & path_with_info);
StorageHDFSPtr storage;
Block block_for_format;
NamesAndTypesList requested_columns;
NamesAndTypesList requested_virtual_columns;
UInt64 max_block_size;
std::shared_ptr<IteratorWrapper> file_iterator;
ColumnsDescription columns_description;
bool need_only_count;
size_t total_rows_in_file = 0;
std::unique_ptr<ReadBuffer> read_buf;
std::shared_ptr<IInputFormat> input_format;
std::unique_ptr<QueryPipeline> pipeline;
std::unique_ptr<PullingPipelineExecutor> reader;
String current_path;
std::optional<size_t> current_file_size;
/// Recreate ReadBuffer and PullingPipelineExecutor for each file.
bool initialize();
};
}
#endif

View File

@ -1,98 +0,0 @@
#include "config.h"
#include "Interpreters/Context_fwd.h"
#if USE_HDFS
#include <Storages/HDFS/StorageHDFSCluster.h>
#include <Core/QueryProcessingStage.h>
#include <DataTypes/DataTypeString.h>
#include <Interpreters/getHeaderForProcessingStage.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <QueryPipeline/RemoteQueryExecutor.h>
#include <Processors/Transforms/AddingDefaultsTransform.h>
#include <Processors/Sources/RemoteSource.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/queryToString.h>
#include <Storages/HDFS/HDFSCommon.h>
#include <Storages/IStorage.h>
#include <Storages/SelectQueryInfo.h>
#include <Storages/extractTableFunctionArgumentsFromSelectQuery.h>
#include <Storages/VirtualColumnUtils.h>
#include <TableFunctions/TableFunctionHDFSCluster.h>
#include <memory>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
StorageHDFSCluster::StorageHDFSCluster(
ContextPtr context_,
const String & cluster_name_,
const String & uri_,
const StorageID & table_id_,
const String & format_name_,
const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_,
const String & compression_method)
: IStorageCluster(cluster_name_, table_id_, getLogger("StorageHDFSCluster (" + table_id_.table_name + ")"))
, uri(uri_)
, format_name(format_name_)
{
checkHDFSURL(uri_);
context_->getRemoteHostFilter().checkURL(Poco::URI(uri_));
StorageInMemoryMetadata storage_metadata;
if (columns_.empty())
{
ColumnsDescription columns;
if (format_name == "auto")
std::tie(columns, format_name) = StorageHDFS::getTableStructureAndFormatFromData(uri_, compression_method, context_);
else
columns = StorageHDFS::getTableStructureFromData(format_name, uri_, compression_method, context_);
storage_metadata.setColumns(columns);
}
else
{
if (format_name == "auto")
format_name = StorageHDFS::getTableStructureAndFormatFromData(uri_, compression_method, context_).second;
storage_metadata.setColumns(columns_);
}
storage_metadata.setConstraints(constraints_);
setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
}
void StorageHDFSCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context)
{
ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query);
if (!expression_list)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function hdfsCluster, got '{}'", queryToString(query));
TableFunctionHDFSCluster::updateStructureAndFormatArgumentsIfNeeded(
expression_list->children, storage_snapshot->metadata->getColumns().getAll().toNamesAndTypesDescription(), format_name, context);
}
RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
{
auto iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uri, predicate, getVirtualsList(), context);
auto callback = std::make_shared<std::function<String()>>([iter = std::move(iterator)]() mutable -> String { return iter->next().path; });
return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)};
}
}
#endif

View File

@ -1,53 +0,0 @@
#pragma once
#include "config.h"
#if USE_HDFS
#include <memory>
#include <optional>
#include <Client/Connection.h>
#include <Interpreters/Cluster.h>
#include <Storages/IStorageCluster.h>
#include <Storages/HDFS/StorageHDFS.h>
namespace DB
{
class Context;
class StorageHDFSCluster : public IStorageCluster
{
public:
StorageHDFSCluster(
ContextPtr context_,
const String & cluster_name_,
const String & uri_,
const StorageID & table_id_,
const String & format_name_,
const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_,
const String & compression_method);
std::string getName() const override { return "HDFSCluster"; }
RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
bool supportsSubcolumns() const override { return true; }
bool supportsDynamicSubcolumns() const override { return true; }
bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; }
private:
void updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) override;
String uri;
String format_name;
};
}
#endif

View File

@ -12,7 +12,7 @@
#include <base/types.h>
#include <Common/CacheBase.h>
#include <Common/PoolBase.h>
#include <Storages/HDFS/HDFSCommon.h>
#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
#include <Storages/Hive/HiveFile.h>

View File

@ -14,7 +14,7 @@
#include <Core/Block.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h>
#include <Storages/Hive/HiveSettings.h>
#include <Storages/HDFS/ReadBufferFromHDFS.h>
#include <Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h>
namespace orc
{

View File

@ -38,8 +38,8 @@
#include <Processors/QueryPlan/SourceStepWithFilter.h>
#include <Processors/Sources/NullSource.h>
#include <Storages/AlterCommands.h>
#include <Storages/HDFS/ReadBufferFromHDFS.h>
#include <Storages/HDFS/AsynchronousReadBufferFromHDFS.h>
#include <Storages/ObjectStorage/HDFS/ReadBufferFromHDFS.h>
#include <Storages/ObjectStorage/HDFS/AsynchronousReadBufferFromHDFS.h>
#include <Storages/Hive/HiveSettings.h>
#include <Storages/Hive/StorageHiveMetadata.h>
#include <Storages/MergeTree/KeyCondition.h>

View File

@ -9,7 +9,7 @@
#include <Interpreters/Context.h>
#include <Storages/IStorage.h>
#include <Storages/HDFS/HDFSCommon.h>
#include <Storages/ObjectStorage/HDFS/HDFSCommon.h>
#include <Storages/Hive/HiveCommon.h>
#include <Storages/Hive/HiveFile.h>

View File

@ -27,11 +27,14 @@ namespace ErrorCodes
extern const int CANNOT_RESTORE_TABLE;
}
IStorage::IStorage(StorageID storage_id_)
IStorage::IStorage(StorageID storage_id_, std::unique_ptr<StorageInMemoryMetadata> metadata_)
: storage_id(std::move(storage_id_))
, metadata(std::make_unique<StorageInMemoryMetadata>())
, virtuals(std::make_unique<VirtualColumnsDescription>())
{
if (metadata_)
metadata.set(std::move(metadata_));
else
metadata.set(std::make_unique<StorageInMemoryMetadata>());
}
bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const

View File

@ -99,7 +99,7 @@ class IStorage : public std::enable_shared_from_this<IStorage>, public TypePromo
public:
IStorage() = delete;
/// Storage metadata can be set separately in setInMemoryMetadata method
explicit IStorage(StorageID storage_id_);
explicit IStorage(StorageID storage_id_, std::unique_ptr<StorageInMemoryMetadata> metadata_ = nullptr);
IStorage(const IStorage &) = delete;
IStorage & operator=(const IStorage &) = delete;
@ -261,6 +261,9 @@ public:
/// Return true if storage can execute lightweight delete mutations.
virtual bool supportsLightweightDelete() const { return false; }
/// Return true if storage has any projection.
virtual bool hasProjection() const { return false; }
/// Return true if storage can execute 'DELETE FROM' mutations. This is different from lightweight delete
/// because those are internally translated into 'ALTER UDPATE' mutations.
virtual bool supportsDelete() const { return false; }

View File

@ -43,7 +43,6 @@ class IReservation;
using ReservationPtr = std::unique_ptr<IReservation>;
class IMergeTreeReader;
class IMergeTreeDataPartWriter;
class MarkCache;
class UncompressedCache;
class MergeTreeTransaction;
@ -74,7 +73,6 @@ public:
using VirtualFields = std::unordered_map<String, Field>;
using MergeTreeReaderPtr = std::unique_ptr<IMergeTreeReader>;
using MergeTreeWriterPtr = std::unique_ptr<IMergeTreeDataPartWriter>;
using ColumnSizeByName = std::unordered_map<std::string, ColumnSize>;
using NameToNumber = std::unordered_map<std::string, size_t>;
@ -106,15 +104,6 @@ public:
const ValueSizeMap & avg_value_size_hints_,
const ReadBufferFromFileBase::ProfileCallback & profile_callback_) const = 0;
virtual MergeTreeWriterPtr getWriter(
const NamesAndTypesList & columns_list,
const StorageMetadataPtr & metadata_snapshot,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
const Statistics & stats_to_recalc_,
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & writer_settings,
const MergeTreeIndexGranularity & computed_index_granularity) = 0;
virtual bool isStoredOnDisk() const = 0;
virtual bool isStoredOnRemoteDisk() const = 0;
@ -172,6 +161,8 @@ public:
const SerializationInfoByName & getSerializationInfos() const { return serialization_infos; }
const SerializationByName & getSerializations() const { return serializations; }
SerializationPtr getSerialization(const String & column_name) const;
SerializationPtr tryGetSerialization(const String & column_name) const;
@ -201,6 +192,7 @@ public:
/// take place, you must take original name of column for this part from
/// storage and pass it to this method.
std::optional<size_t> getColumnPosition(const String & column_name) const;
const NameToNumber & getColumnPositions() const { return column_name_to_position; }
/// Returns the name of a column with minimum compressed size (as returned by getColumnSize()).
/// If no checksums are present returns the name of the first physically existing column.
@ -446,6 +438,8 @@ public:
bool hasProjection(const String & projection_name) const { return projection_parts.contains(projection_name); }
bool hasProjection() const { return !projection_parts.empty(); }
bool hasBrokenProjection(const String & projection_name) const;
/// Return true, if all projections were loaded successfully and none was marked as broken.

View File

@ -3,6 +3,13 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NO_SUCH_COLUMN_IN_TABLE;
}
Block getBlockAndPermute(const Block & block, const Names & names, const IColumn::Permutation * permutation)
{
Block result;
@ -38,18 +45,27 @@ Block permuteBlockIfNeeded(const Block & block, const IColumn::Permutation * per
}
IMergeTreeDataPartWriter::IMergeTreeDataPartWriter(
const MergeTreeMutableDataPartPtr & data_part_,
const String & data_part_name_,
const SerializationByName & serializations_,
MutableDataPartStoragePtr data_part_storage_,
const MergeTreeIndexGranularityInfo & index_granularity_info_,
const MergeTreeSettingsPtr & storage_settings_,
const NamesAndTypesList & columns_list_,
const StorageMetadataPtr & metadata_snapshot_,
const VirtualsDescriptionPtr & virtual_columns_,
const MergeTreeWriterSettings & settings_,
const MergeTreeIndexGranularity & index_granularity_)
: data_part(data_part_)
, storage(data_part_->storage)
: data_part_name(data_part_name_)
, serializations(serializations_)
, index_granularity_info(index_granularity_info_)
, storage_settings(storage_settings_)
, metadata_snapshot(metadata_snapshot_)
, virtual_columns(virtual_columns_)
, columns_list(columns_list_)
, settings(settings_)
, index_granularity(index_granularity_)
, with_final_mark(settings.can_use_adaptive_granularity)
, data_part_storage(data_part_storage_)
, index_granularity(index_granularity_)
{
}
@ -60,6 +76,102 @@ Columns IMergeTreeDataPartWriter::releaseIndexColumns()
std::make_move_iterator(index_columns.end()));
}
SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const
{
auto it = serializations.find(column_name);
if (it == serializations.end())
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE,
"There is no column or subcolumn {} in part {}", column_name, data_part_name);
return it->second;
}
ASTPtr IMergeTreeDataPartWriter::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const
{
auto get_codec_or_default = [&](const auto & column_desc)
{
return column_desc.codec ? column_desc.codec : default_codec->getFullCodecDesc();
};
const auto & columns = metadata_snapshot->getColumns();
if (const auto * column_desc = columns.tryGet(column_name))
return get_codec_or_default(*column_desc);
if (const auto * virtual_desc = virtual_columns->tryGetDescription(column_name))
return get_codec_or_default(*virtual_desc);
return default_codec->getFullCodecDesc();
}
IMergeTreeDataPartWriter::~IMergeTreeDataPartWriter() = default;
MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter(
const String & data_part_name_,
const String & logger_name_,
const SerializationByName & serializations_,
MutableDataPartStoragePtr data_part_storage_,
const MergeTreeIndexGranularityInfo & index_granularity_info_,
const MergeTreeSettingsPtr & storage_settings_,
const NamesAndTypesList & columns_list,
const ColumnPositions & column_positions,
const StorageMetadataPtr & metadata_snapshot,
const VirtualsDescriptionPtr & virtual_columns,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
const Statistics & stats_to_recalc_,
const String & marks_file_extension_,
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & writer_settings,
const MergeTreeIndexGranularity & computed_index_granularity);
MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter(
const String & data_part_name_,
const String & logger_name_,
const SerializationByName & serializations_,
MutableDataPartStoragePtr data_part_storage_,
const MergeTreeIndexGranularityInfo & index_granularity_info_,
const MergeTreeSettingsPtr & storage_settings_,
const NamesAndTypesList & columns_list,
const StorageMetadataPtr & metadata_snapshot,
const VirtualsDescriptionPtr & virtual_columns,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
const Statistics & stats_to_recalc_,
const String & marks_file_extension_,
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & writer_settings,
const MergeTreeIndexGranularity & computed_index_granularity);
MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
MergeTreeDataPartType part_type,
const String & data_part_name_,
const String & logger_name_,
const SerializationByName & serializations_,
MutableDataPartStoragePtr data_part_storage_,
const MergeTreeIndexGranularityInfo & index_granularity_info_,
const MergeTreeSettingsPtr & storage_settings_,
const NamesAndTypesList & columns_list,
const ColumnPositions & column_positions,
const StorageMetadataPtr & metadata_snapshot,
const VirtualsDescriptionPtr & virtual_columns,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
const Statistics & stats_to_recalc_,
const String & marks_file_extension_,
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & writer_settings,
const MergeTreeIndexGranularity & computed_index_granularity)
{
if (part_type == MergeTreeDataPartType::Compact)
return createMergeTreeDataPartCompactWriter(data_part_name_, logger_name_, serializations_, data_part_storage_,
index_granularity_info_, storage_settings_, columns_list, column_positions, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_,
marks_file_extension_, default_codec_, writer_settings, computed_index_granularity);
else if (part_type == MergeTreeDataPartType::Wide)
return createMergeTreeDataPartWideWriter(data_part_name_, logger_name_, serializations_, data_part_storage_,
index_granularity_info_, storage_settings_, columns_list, metadata_snapshot, virtual_columns, indices_to_recalc, stats_to_recalc_,
marks_file_extension_, default_codec_, writer_settings, computed_index_granularity);
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown part type: {}", part_type.toString());
}
}

View File

@ -1,12 +1,13 @@
#pragma once
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromFileBase.h>
#include <Compression/CompressedWriteBuffer.h>
#include <IO/HashingWriteBuffer.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h>
#include <Disks/IDisk.h>
#include <Storages/MergeTree/MergeTreeDataPartType.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
#include <Storages/MergeTree/MergeTreeIndexGranularityInfo.h>
#include <Storages/MergeTree/MergeTreeIndices.h>
#include <Storages/MergeTree/IDataPartStorage.h>
#include <Storages/Statistics/Statistics.h>
#include <Storages/VirtualColumnsDescription.h>
namespace DB
@ -22,9 +23,14 @@ class IMergeTreeDataPartWriter : private boost::noncopyable
{
public:
IMergeTreeDataPartWriter(
const MergeTreeMutableDataPartPtr & data_part_,
const String & data_part_name_,
const SerializationByName & serializations_,
MutableDataPartStoragePtr data_part_storage_,
const MergeTreeIndexGranularityInfo & index_granularity_info_,
const MergeTreeSettingsPtr & storage_settings_,
const NamesAndTypesList & columns_list_,
const StorageMetadataPtr & metadata_snapshot_,
const VirtualsDescriptionPtr & virtual_columns_,
const MergeTreeWriterSettings & settings_,
const MergeTreeIndexGranularity & index_granularity_ = {});
@ -32,7 +38,7 @@ public:
virtual void write(const Block & block, const IColumn::Permutation * permutation) = 0;
virtual void fillChecksums(IMergeTreeDataPart::Checksums & checksums, NameSet & checksums_to_remove) = 0;
virtual void fillChecksums(MergeTreeDataPartChecksums & checksums, NameSet & checksums_to_remove) = 0;
virtual void finish(bool sync) = 0;
@ -40,16 +46,48 @@ public:
const MergeTreeIndexGranularity & getIndexGranularity() const { return index_granularity; }
protected:
SerializationPtr getSerialization(const String & column_name) const;
const MergeTreeMutableDataPartPtr data_part;
const MergeTreeData & storage;
ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const;
IDataPartStorage & getDataPartStorage() { return *data_part_storage; }
const String data_part_name;
/// Serializations for every columns and subcolumns by their names.
const SerializationByName serializations;
const MergeTreeIndexGranularityInfo index_granularity_info;
const MergeTreeSettingsPtr storage_settings;
const StorageMetadataPtr metadata_snapshot;
const VirtualsDescriptionPtr virtual_columns;
const NamesAndTypesList columns_list;
const MergeTreeWriterSettings settings;
MergeTreeIndexGranularity index_granularity;
const bool with_final_mark;
MutableDataPartStoragePtr data_part_storage;
MutableColumns index_columns;
MergeTreeIndexGranularity index_granularity;
};
using MergeTreeDataPartWriterPtr = std::unique_ptr<IMergeTreeDataPartWriter>;
using ColumnPositions = std::unordered_map<std::string, size_t>;
MergeTreeDataPartWriterPtr createMergeTreeDataPartWriter(
MergeTreeDataPartType part_type,
const String & data_part_name_,
const String & logger_name_,
const SerializationByName & serializations_,
MutableDataPartStoragePtr data_part_storage_,
const MergeTreeIndexGranularityInfo & index_granularity_info_,
const MergeTreeSettingsPtr & storage_settings_,
const NamesAndTypesList & columns_list,
const ColumnPositions & column_positions,
const StorageMetadataPtr & metadata_snapshot,
const VirtualsDescriptionPtr & virtual_columns_,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
const Statistics & stats_to_recalc_,
const String & marks_file_extension,
const CompressionCodecPtr & default_codec_,
const MergeTreeWriterSettings & writer_settings,
const MergeTreeIndexGranularity & computed_index_granularity);
}

View File

@ -7,20 +7,21 @@ namespace DB
{
IMergedBlockOutputStream::IMergedBlockOutputStream(
const MergeTreeMutableDataPartPtr & data_part,
const MergeTreeSettingsPtr & storage_settings_,
MutableDataPartStoragePtr data_part_storage_,
const StorageMetadataPtr & metadata_snapshot_,
const NamesAndTypesList & columns_list,
bool reset_columns_)
: storage(data_part->storage)
: storage_settings(storage_settings_)
, metadata_snapshot(metadata_snapshot_)
, data_part_storage(data_part->getDataPartStoragePtr())
, data_part_storage(data_part_storage_)
, reset_columns(reset_columns_)
{
if (reset_columns)
{
SerializationInfo::Settings info_settings =
{
.ratio_of_defaults_for_sparse = storage.getSettings()->ratio_of_defaults_for_sparse_serialization,
.ratio_of_defaults_for_sparse = storage_settings->ratio_of_defaults_for_sparse_serialization,
.choose_kind = false,
};
@ -42,7 +43,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart(
return {};
for (const auto & column : empty_columns)
LOG_TRACE(storage.log, "Skipping expired/empty column {} for part {}", column, data_part->name);
LOG_TRACE(data_part->storage.log, "Skipping expired/empty column {} for part {}", column, data_part->name);
/// Collect counts for shared streams of different columns. As an example, Nested columns have shared stream with array sizes.
std::map<String, size_t> stream_counts;
@ -91,7 +92,7 @@ NameSet IMergedBlockOutputStream::removeEmptyColumnsFromPart(
}
else /// If we have no file in checksums it doesn't exist on disk
{
LOG_TRACE(storage.log, "Files {} doesn't exist in checksums so it doesn't exist on disk, will not try to remove it", *itr);
LOG_TRACE(data_part->storage.log, "Files {} doesn't exist in checksums so it doesn't exist on disk, will not try to remove it", *itr);
itr = remove_files.erase(itr);
}
}

View File

@ -1,10 +1,12 @@
#pragma once
#include "Storages/MergeTree/IDataPartStorage.h"
#include <Storages/MergeTree/IDataPartStorage.h>
#include <Storages/MergeTree/MergeTreeSettings.h>
#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h>
#include <Storages/MergeTree/IMergeTreeDataPartWriter.h>
#include <Common/Logger.h>
namespace DB
{
@ -13,7 +15,8 @@ class IMergedBlockOutputStream
{
public:
IMergedBlockOutputStream(
const MergeTreeMutableDataPartPtr & data_part,
const MergeTreeSettingsPtr & storage_settings_,
MutableDataPartStoragePtr data_part_storage_,
const StorageMetadataPtr & metadata_snapshot_,
const NamesAndTypesList & columns_list,
bool reset_columns_);
@ -39,11 +42,13 @@ protected:
SerializationInfoByName & serialization_infos,
MergeTreeData::DataPart::Checksums & checksums);
const MergeTreeData & storage;
MergeTreeSettingsPtr storage_settings;
LoggerPtr log;
StorageMetadataPtr metadata_snapshot;
MutableDataPartStoragePtr data_part_storage;
IMergeTreeDataPart::MergeTreeWriterPtr writer;
MergeTreeDataPartWriterPtr writer;
bool reset_columns = false;
SerializationInfoByName new_serialization_infos;

View File

@ -2664,6 +2664,13 @@ BoolMask KeyCondition::checkInHyperrectangle(
else if (element.function == RPNElement::FUNCTION_IN_RANGE
|| element.function == RPNElement::FUNCTION_NOT_IN_RANGE)
{
if (element.key_column >= hyperrectangle.size())
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Hyperrectangle size is {}, but requested element at posittion {} ({})",
hyperrectangle.size(), element.key_column, element.toString());
}
const Range * key_range = &hyperrectangle[element.key_column];
/// The case when the column is wrapped in a chain of possibly monotonic functions.

View File

@ -9,7 +9,7 @@
#include <Common/ActionBlocker.h>
#include <Processors/Transforms/CheckSortedTransform.h>
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
#include <Compression/CompressedWriteBuffer.h>
#include <DataTypes/ObjectUtils.h>
#include <DataTypes/Serializations/SerializationInfo.h>
#include <IO/IReadableWriteBuffer.h>
@ -34,6 +34,7 @@
#include <Processors/Transforms/DistinctTransform.h>
#include <Processors/QueryPlan/CreatingSetsStep.h>
#include <Interpreters/PreparedSets.h>
#include <Interpreters/MergeTreeTransaction.h>
#include <QueryPipeline/QueryPipelineBuilder.h>
namespace DB
@ -378,7 +379,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()),
MergeTreeStatisticsFactory::instance().getMany(global_ctx->metadata_snapshot->getColumns()),
ctx->compression_codec,
global_ctx->txn,
global_ctx->txn ? global_ctx->txn->tid : Tx::PrehistoricTID,
/*reset_columns=*/ true,
ctx->blocks_are_granules_size,
global_ctx->context->getWriteSettings());

View File

@ -6135,6 +6135,21 @@ bool MergeTreeData::supportsLightweightDelete() const
return true;
}
bool MergeTreeData::hasProjection() const
{
auto lock = lockParts();
for (const auto & part : data_parts_by_info)
{
if (part->getState() == MergeTreeDataPartState::Outdated
|| part->getState() == MergeTreeDataPartState::Deleting)
continue;
if (part->hasProjection())
return true;
}
return false;
}
MergeTreeData::ProjectionPartsVector MergeTreeData::getAllProjectionPartsVector(MergeTreeData::DataPartStateVector * out_states) const
{
ProjectionPartsVector res;
@ -8477,7 +8492,7 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::createE
MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns,
index_factory.getMany(metadata_snapshot->getSecondaryIndices()),
Statistics{},
compression_codec, txn);
compression_codec, txn ? txn->tid : Tx::PrehistoricTID);
bool sync_on_insert = settings->fsync_after_insert;

Some files were not shown because too many files have changed in this diff Show More