Merge remote-tracking branch 'origin/master' into pr-local-plan

This commit is contained in:
Igor Nikonov 2024-08-15 20:48:56 +00:00
commit 49fc4cb87b
82 changed files with 1666 additions and 395 deletions

18
.github/actions/debug/action.yml vendored Normal file
View File

@ -0,0 +1,18 @@
name: DebugInfo
description: Prints workflow debug info
runs:
using: "composite"
steps:
- name: Print envs
shell: bash
run: |
echo "::group::Envs"
env
echo "::endgroup::"
- name: Print Event.json
shell: bash
run: |
echo "::group::Event.json"
python3 -m json.tool "$GITHUB_EVENT_PATH"
echo "::endgroup::"

109
.github/workflows/auto_releases.yml vendored Normal file
View File

@ -0,0 +1,109 @@
name: AutoReleases
env:
PYTHONUNBUFFERED: 1
concurrency:
group: autoreleases
on:
# schedule:
# - cron: '0 9 * * *'
workflow_dispatch:
inputs:
dry-run:
description: 'Dry run'
required: false
default: true
type: boolean
jobs:
AutoReleaseInfo:
runs-on: [self-hosted, style-checker-aarch64]
outputs:
data: ${{ steps.info.outputs.AUTO_RELEASE_PARAMS }}
dry_run: ${{ steps.info.outputs.DRY_RUN }}
steps:
- name: Debug Info
uses: ./.github/actions/debug
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
RCSK
EOF
echo "DRY_RUN=true" >> "$GITHUB_ENV"
- name: Check out repository code
uses: ClickHouse/checkout@v1
- name: Prepare Info
id: info
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --prepare
echo "::group::Auto Release Info"
python3 -m json.tool /tmp/autorelease_info.json
echo "::endgroup::"
{
echo 'AUTO_RELEASE_PARAMS<<EOF'
cat /tmp/autorelease_info.json
echo 'EOF'
} >> "$GITHUB_ENV"
{
echo 'AUTO_RELEASE_PARAMS<<EOF'
cat /tmp/autorelease_info.json
echo 'EOF'
} >> "$GITHUB_OUTPUT"
echo "DRY_RUN=true" >> "$GITHUB_OUTPUT"
- name: Post Release Branch statuses
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 auto_release.py --post-status
- name: Clean up
uses: ./.github/actions/clean
Release_0:
needs: AutoReleaseInfo
name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].release_branch }}
if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].ready }}
uses: ./.github/workflows/create_release.yml
with:
ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].commit_sha }}
type: patch
dry-run: ${{ needs.AutoReleaseInfo.outputs.dry_run }}
#
# Release_1:
# needs: [AutoReleaseInfo, Release_0]
# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].release_branch }}
# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].ready }}
# uses: ./.github/workflows/create_release.yml
# with:
# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].commit_sha }}
# type: patch
# dry-run: ${{ env.DRY_RUN }}
#
# Release_2:
# needs: [AutoReleaseInfo, Release_1]
# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[2].release_branch }}
# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[2].ready }}
# uses: ./.github/workflow/create_release.yml
# with:
# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].commit_sha }}
# type: patch
# dry-run: ${{ env.DRY_RUN }}
#
# Release_3:
# needs: [AutoReleaseInfo, Release_2]
# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].release_branch }}
# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].ready }}
# uses: ./.github/workflow/create_release.yml
# with:
# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].commit_sha }}
# type: patch
# dry-run: ${{ env.DRY_RUN }}
# - name: Post Slack Message
# if: ${{ !cancelled() }}
# run: |
# cd "$GITHUB_WORKSPACE/tests/ci"
# python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }}

View File

@ -2,6 +2,7 @@ name: CreateRelease
concurrency: concurrency:
group: release group: release
'on': 'on':
workflow_dispatch: workflow_dispatch:
inputs: inputs:
@ -26,6 +27,26 @@ concurrency:
required: false required: false
default: false default: false
type: boolean type: boolean
workflow_call:
inputs:
ref:
description: 'Git reference (branch or commit sha) from which to create the release'
required: true
type: string
type:
description: 'The type of release: "new" for a new release or "patch" for a patch release'
required: true
type: string
only-repo:
description: 'Run only repos updates including docker (repo-recovery, tests)'
required: false
default: false
type: boolean
dry-run:
description: 'Dry run'
required: false
default: false
type: boolean
jobs: jobs:
CreateRelease: CreateRelease:

View File

@ -359,13 +359,14 @@ DESC format(JSONEachRow, '{"int" : 42, "float" : 42.42, "string" : "Hello, World
Dates, DateTimes: Dates, DateTimes:
```sql ```sql
DESC format(JSONEachRow, '{"date" : "2022-01-01", "datetime" : "2022-01-01 00:00:00"}') DESC format(JSONEachRow, '{"date" : "2022-01-01", "datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"}')
``` ```
```response ```response
┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ ┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ date │ Nullable(Date) │ │ │ │ │ │ │ date │ Nullable(Date) │ │ │ │ │ │
│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │ │ datetime │ Nullable(DateTime) │ │ │ │ │ │
└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ │ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │
└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
``` ```
Arrays: Arrays:
@ -759,12 +760,13 @@ DESC format(CSV, 'Hello world!,World hello!')
Dates, DateTimes: Dates, DateTimes:
```sql ```sql
DESC format(CSV, '"2020-01-01","2020-01-01 00:00:00"') DESC format(CSV, '"2020-01-01","2020-01-01 00:00:00","2022-01-01 00:00:00.000"')
``` ```
```response ```response
┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ ┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ c1 │ Nullable(Date) │ │ │ │ │ │ │ c1 │ Nullable(Date) │ │ │ │ │ │
│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ │ c2 │ Nullable(DateTime) │ │ │ │ │ │
│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │
└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ └──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
``` ```
@ -956,12 +958,13 @@ DESC format(TSKV, 'int=42 float=42.42 bool=true string=Hello,World!\n')
Dates, DateTimes: Dates, DateTimes:
```sql ```sql
DESC format(TSV, '2020-01-01 2020-01-01 00:00:00') DESC format(TSV, '2020-01-01 2020-01-01 00:00:00 2022-01-01 00:00:00.000')
``` ```
```response ```response
┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ ┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ c1 │ Nullable(Date) │ │ │ │ │ │ │ c1 │ Nullable(Date) │ │ │ │ │ │
│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ │ c2 │ Nullable(DateTime) │ │ │ │ │ │
│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │
└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ └──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
``` ```
@ -1126,12 +1129,13 @@ DESC format(Values, $$(42, 42.42, true, 'Hello,World!')$$)
Dates, DateTimes: Dates, DateTimes:
```sql ```sql
DESC format(Values, $$('2020-01-01', '2020-01-01 00:00:00')$$) DESC format(Values, $$('2020-01-01', '2020-01-01 00:00:00', '2022-01-01 00:00:00.000')$$)
``` ```
```response ```response
┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ ┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ c1 │ Nullable(Date) │ │ │ │ │ │ │ c1 │ Nullable(Date) │ │ │ │ │ │
│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ │ c2 │ Nullable(DateTime) │ │ │ │ │ │
│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │
└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ └──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
``` ```
@ -1504,8 +1508,8 @@ DESC format(JSONEachRow, $$
#### input_format_try_infer_datetimes #### input_format_try_infer_datetimes
If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. If enabled, ClickHouse will try to infer type `DateTime` or `DateTime64` from string fields in schema inference for text formats.
If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime64(9)`, If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime` or `DateTime64(9)` (if any datetime had fractional part),
if at least one field was not parsed as datetime, the result type will be `String`. if at least one field was not parsed as datetime, the result type will be `String`.
Enabled by default. Enabled by default.
@ -1513,39 +1517,66 @@ Enabled by default.
**Examples** **Examples**
```sql ```sql
SET input_format_try_infer_datetimes = 0 SET input_format_try_infer_datetimes = 0;
DESC format(JSONEachRow, $$ DESC format(JSONEachRow, $$
{"datetime" : "2021-01-01 00:00:00.000"} {"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"}
{"datetime" : "2022-01-01 00:00:00.000"} {"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"}
$$) $$)
``` ```
```response ```response
┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ ┌─name───────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ datetime │ Nullable(String) │ │ │ │ │ │ │ datetime │ Nullable(String) │ │ │ │ │ │
└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ │ datetime64 │ Nullable(String) │ │ │ │ │ │
└────────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
``` ```
```sql ```sql
SET input_format_try_infer_datetimes = 1 SET input_format_try_infer_datetimes = 1;
DESC format(JSONEachRow, $$ DESC format(JSONEachRow, $$
{"datetime" : "2021-01-01 00:00:00.000"} {"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"}
{"datetime" : "2022-01-01 00:00:00.000"} {"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"}
$$) $$)
``` ```
```response ```response
┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ ┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │ │ datetime │ Nullable(DateTime) │ │ │ │ │ │
└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ │ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │
└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
``` ```
```sql ```sql
DESC format(JSONEachRow, $$ DESC format(JSONEachRow, $$
{"datetime" : "2021-01-01 00:00:00.000"} {"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"}
{"datetime" : "unknown"} {"datetime" : "unknown", "datetime64" : "unknown"}
$$) $$)
``` ```
```response ```response
┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ ┌─name───────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ datetime │ Nullable(String) │ │ │ │ │ │ │ datetime │ Nullable(String) │ │ │ │ │ │
└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ │ datetime64 │ Nullable(String) │ │ │ │ │ │
└────────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
#### input_format_try_infer_datetimes_only_datetime64
If enabled, ClickHouse will always infer `DateTime64(9)` when `input_format_try_infer_datetimes` is enabled even if datetime values don't contain fractional part.
Disabled by default.
**Examples**
```sql
SET input_format_try_infer_datetimes = 1;
SET input_format_try_infer_datetimes_only_datetime64 = 1;
DESC format(JSONEachRow, $$
{"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"}
{"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"}
$$)
```
```text
┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │
│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │
└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
``` ```
Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format) Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format)

View File

@ -1042,10 +1042,23 @@ Compression rates of LZ4 or ZSTD improve on average by 20-40%.
This setting works best for tables with no primary key or a low-cardinality primary key, i.e. a table with only few distinct primary key values. This setting works best for tables with no primary key or a low-cardinality primary key, i.e. a table with only few distinct primary key values.
High-cardinality primary keys, e.g. involving timestamp columns of type `DateTime64`, are not expected to benefit from this setting. High-cardinality primary keys, e.g. involving timestamp columns of type `DateTime64`, are not expected to benefit from this setting.
### deduplicate_merge_projection_mode ## lightweight_mutation_projection_mode
By default, lightweight delete `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation. So the default value would be `throw`.
However, this option can change the behavior. With the value either `drop` or `rebuild`, deletes will work with projections. `drop` would delete the projection so it might be fast in the current query as projection gets deleted but slow in future queries as no projection attached.
`rebuild` would rebuild the projection which might affect the performance of the current query, but might speedup for future queries. A good thing is that these options would only work in the part level,
which means projections in the part that don't get touched would stay intact instead of triggering any action like drop or rebuild.
Possible values:
- throw, drop, rebuild
Default value: throw
## deduplicate_merge_projection_mode
Whether to allow create projection for the table with non-classic MergeTree, that is not (Replicated, Shared) MergeTree. If allowed, what is the action when merge projections, either drop or rebuild. So classic MergeTree would ignore this setting. Whether to allow create projection for the table with non-classic MergeTree, that is not (Replicated, Shared) MergeTree. If allowed, what is the action when merge projections, either drop or rebuild. So classic MergeTree would ignore this setting.
It also controls `OPTIMIZE DEDUPLICATE` as well, but has effect on all MergeTree family members. It also controls `OPTIMIZE DEDUPLICATE` as well, but has effect on all MergeTree family members. Similar to the option `lightweight_mutation_projection_mode`, it is also part level.
Possible values: Possible values:

View File

@ -5654,3 +5654,9 @@ Possible values:
- 1 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is enabled. - 1 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is enabled.
Default value: `0`. Default value: `0`.
## create_if_not_exists
Enable `IF NOT EXISTS` for `CREATE` statement by default. If either this setting or `IF NOT EXISTS` is specified and a table with the provided name already exists, no exception will be thrown.
Default value: `false`.

View File

@ -38,8 +38,7 @@ If you anticipate frequent deletes, consider using a [custom partitioning key](/
### Lightweight `DELETE`s with projections ### Lightweight `DELETE`s with projections
By default, `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation and may require the projection to be rebuilt, negatively affecting `DELETE` performance. By default, `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation. But there is a [MergeTree setting](https://clickhouse.com/docs/en/operations/settings/merge-tree-settings) `lightweight_mutation_projection_mode` can change the behavior.
However, there is an option to change this behavior. By changing setting `lightweight_mutation_projection_mode = 'drop'`, deletes will work with projections.
## Performance considerations when using lightweight `DELETE` ## Performance considerations when using lightweight `DELETE`

View File

@ -490,8 +490,6 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
/// process_list_element_holder is used to make an element in ProcessList live while BACKUP is working asynchronously. /// process_list_element_holder is used to make an element in ProcessList live while BACKUP is working asynchronously.
auto process_list_element = context_in_use->getProcessListElement(); auto process_list_element = context_in_use->getProcessListElement();
/// Update context to preserve query information in processlist (settings, current_database)
process_list_element->updateContext(context_in_use);
thread_pool.scheduleOrThrowOnError( thread_pool.scheduleOrThrowOnError(
[this, [this,
@ -855,8 +853,6 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
/// process_list_element_holder is used to make an element in ProcessList live while RESTORE is working asynchronously. /// process_list_element_holder is used to make an element in ProcessList live while RESTORE is working asynchronously.
auto process_list_element = context_in_use->getProcessListElement(); auto process_list_element = context_in_use->getProcessListElement();
/// Update context to preserve query information in processlist (settings, current_database)
process_list_element->updateContext(context_in_use);
thread_pool.scheduleOrThrowOnError( thread_pool.scheduleOrThrowOnError(
[this, [this,

View File

@ -244,33 +244,43 @@ const char * analyzeImpl(
is_trivial = false; is_trivial = false;
if (!in_square_braces) if (!in_square_braces)
{ {
/// Check for case-insensitive flag. /// it means flag negation
if (pos + 1 < end && pos[1] == '?') /// there are various possible flags
/// actually only imsU are supported by re2
auto is_flag_char = [](char x)
{ {
for (size_t offset = 2; pos + offset < end; ++offset) return x == '-' || x == 'i' || x == 'm' || x == 's' || x == 'U' || x == 'u';
};
/// Check for case-insensitive flag.
if (pos + 2 < end && pos[1] == '?' && is_flag_char(pos[2]))
{
size_t offset = 2;
for (; pos + offset < end; ++offset)
{ {
if (pos[offset] == '-' /// it means flag negation if (pos[offset] == 'i')
/// various possible flags, actually only imsU are supported by re2
|| (pos[offset] >= 'a' && pos[offset] <= 'z')
|| (pos[offset] >= 'A' && pos[offset] <= 'Z'))
{ {
if (pos[offset] == 'i') /// Actually it can be negated case-insensitive flag. But we don't care.
{ has_case_insensitive_flag = true;
/// Actually it can be negated case-insensitive flag. But we don't care.
has_case_insensitive_flag = true;
break;
}
} }
else else if (!is_flag_char(pos[offset]))
break; break;
} }
pos += offset;
if (pos == end)
return pos;
/// if this group only contains flags, we have nothing to do.
if (*pos == ')')
{
++pos;
break;
}
} }
/// (?:regex) means non-capturing parentheses group /// (?:regex) means non-capturing parentheses group
if (pos + 2 < end && pos[1] == '?' && pos[2] == ':') else if (pos + 2 < end && pos[1] == '?' && pos[2] == ':')
{ {
pos += 2; pos += 2;
} }
if (pos + 3 < end && pos[1] == '?' && (pos[2] == '<' || pos[2] == '\'' || (pos[2] == 'P' && pos[3] == '<'))) else if (pos + 3 < end && pos[1] == '?' && (pos[2] == '<' || pos[2] == '\'' || (pos[2] == 'P' && pos[3] == '<')))
{ {
pos = skipNameCapturingGroup(pos, pos[2] == 'P' ? 3: 2, end); pos = skipNameCapturingGroup(pos, pos[2] == 'P' ? 3: 2, end);
} }

View File

@ -19,6 +19,9 @@ TEST(OptimizeRE, analyze)
}; };
test_f("abc", "abc", {}, true, true); test_f("abc", "abc", {}, true, true);
test_f("c([^k]*)de", ""); test_f("c([^k]*)de", "");
test_f("(?-s)bob", "bob", {}, false, true);
test_f("(?s)bob", "bob", {}, false, true);
test_f("(?ssss", "");
test_f("abc(de)fg", "abcdefg", {}, false, true); test_f("abc(de)fg", "abcdefg", {}, false, true);
test_f("abc(de|xyz)fg", "abc", {"abcdefg", "abcxyzfg"}, false, true); test_f("abc(de|xyz)fg", "abc", {"abcdefg", "abcxyzfg"}, false, true);
test_f("abc(de?f|xyz)fg", "abc", {"abcd", "abcxyzfg"}, false, true); test_f("abc(de?f|xyz)fg", "abc", {"abcd", "abcxyzfg"}, false, true);

View File

@ -897,6 +897,7 @@ class IColumn;
M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \
M(Bool, restore_replace_external_engines_to_null, false, "Replace all the external table engines to Null on restore. Useful for testing purposes", 0) \ M(Bool, restore_replace_external_engines_to_null, false, "Replace all the external table engines to Null on restore. Useful for testing purposes", 0) \
M(Bool, restore_replace_external_table_functions_to_null, false, "Replace all table functions to Null on restore. Useful for testing purposes", 0) \ M(Bool, restore_replace_external_table_functions_to_null, false, "Replace all table functions to Null on restore. Useful for testing purposes", 0) \
M(Bool, create_if_not_exists, false, "Enable IF NOT EXISTS for CREATE statements by default", 0) \
\ \
\ \
/* ###################################### */ \ /* ###################################### */ \
@ -1137,6 +1138,7 @@ class IColumn;
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_datetimes_only_datetime64, false, "When input_format_try_infer_datetimes is enabled, infer only DateTime64 but not DateTime types", 0) \
M(Bool, input_format_try_infer_exponent_floats, false, "Try to infer floats in exponential notation while schema inference in text formats (except JSON, where exponent numbers are always inferred)", 0) \ M(Bool, input_format_try_infer_exponent_floats, false, "Try to infer floats in exponential notation while schema inference in text formats (except JSON, where exponent numbers are always inferred)", 0) \
M(Bool, output_format_markdown_escape_special_characters, false, "Escape special characters in Markdown", 0) \ M(Bool, output_format_markdown_escape_special_characters, false, "Escape special characters in Markdown", 0) \
M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \ M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \

View File

@ -75,6 +75,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
}, },
{"24.8", {"24.8",
{ {
{"create_if_not_exists", false, false, "New setting."},
{"rows_before_aggregation", true, true, "Provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation"}, {"rows_before_aggregation", true, true, "Provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation"},
{"restore_replace_external_table_functions_to_null", false, false, "New setting."}, {"restore_replace_external_table_functions_to_null", false, false, "New setting."},
{"restore_replace_external_engines_to_null", false, false, "New setting."}, {"restore_replace_external_engines_to_null", false, false, "New setting."},
@ -90,6 +91,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"parallel_replicas_local_plan", false, false, "Use local plan for local replica in a query with parallel replicas"}, {"parallel_replicas_local_plan", false, false, "Use local plan for local replica in a query with parallel replicas"},
{"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."}, {"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
{"allow_experimental_vector_similarity_index", false, false, "Added new setting to allow experimental vector similarity indexes"}, {"allow_experimental_vector_similarity_index", false, false, "Added new setting to allow experimental vector similarity indexes"},
{"input_format_try_infer_datetimes_only_datetime64", true, false, "Allow to infer DateTime instead of DateTime64 in data formats"}
} }
}, },
{"24.7", {"24.7",

View File

@ -645,8 +645,9 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment)
ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, current_impl_buffer_size); ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, current_impl_buffer_size);
std::string failure_reason;
bool continue_predownload = file_segment.reserve( bool continue_predownload = file_segment.reserve(
current_predownload_size, settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds); current_predownload_size, settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds, failure_reason);
if (continue_predownload) if (continue_predownload)
{ {
LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, current_impl_buffer_size); LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, current_impl_buffer_size);
@ -1002,7 +1003,8 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
{ {
chassert(file_offset_of_buffer_end + size - 1 <= file_segment.range().right); chassert(file_offset_of_buffer_end + size - 1 <= file_segment.range().right);
bool success = file_segment.reserve(size, settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds); std::string failure_reason;
bool success = file_segment.reserve(size, settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds, failure_reason);
if (success) if (success)
{ {
chassert(file_segment.getCurrentWriteOffset() == static_cast<size_t>(implementation_buffer->getPosition())); chassert(file_segment.getCurrentWriteOffset() == static_cast<size_t>(implementation_buffer->getPosition()));
@ -1028,7 +1030,8 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep()
LOG_TRACE(log, "Bypassing cache because writeCache method failed"); LOG_TRACE(log, "Bypassing cache because writeCache method failed");
} }
else else
LOG_TRACE(log, "No space left in cache to reserve {} bytes, will continue without cache download", size); LOG_TRACE(log, "No space left in cache to reserve {} bytes, reason: {}, "
"will continue without cache download", failure_reason, size);
if (!success) if (!success)
{ {

View File

@ -91,7 +91,8 @@ bool FileSegmentRangeWriter::write(char * data, size_t size, size_t offset, File
size_t size_to_write = std::min(available_size, size); size_t size_to_write = std::min(available_size, size);
bool reserved = file_segment->reserve(size_to_write, reserve_space_lock_wait_timeout_milliseconds); std::string failure_reason;
bool reserved = file_segment->reserve(size_to_write, reserve_space_lock_wait_timeout_milliseconds, failure_reason);
if (!reserved) if (!reserved)
{ {
appendFilesystemCacheLog(*file_segment); appendFilesystemCacheLog(*file_segment);

View File

@ -63,7 +63,7 @@ void throwIfError(const Aws::Utils::Outcome<Result, Error> & response)
{ {
const auto & err = response.GetError(); const auto & err = response.GetError();
throw S3Exception( throw S3Exception(
fmt::format("{} (Code: {}, s3 exception: {})", fmt::format("{} (Code: {}, S3 exception: '{}')",
err.GetMessage(), static_cast<size_t>(err.GetErrorType()), err.GetExceptionName()), err.GetMessage(), static_cast<size_t>(err.GetErrorType()), err.GetExceptionName()),
err.GetErrorType()); err.GetErrorType());
} }

View File

@ -419,10 +419,11 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo
String result = getAdditionalFormatInfoForAllRowBasedFormats(settings); String result = getAdditionalFormatInfoForAllRowBasedFormats(settings);
/// First, settings that are common for all text formats: /// First, settings that are common for all text formats:
result += fmt::format( result += fmt::format(
", try_infer_integers={}, try_infer_dates={}, try_infer_datetimes={}", ", try_infer_integers={}, try_infer_dates={}, try_infer_datetimes={}, try_infer_datetimes_only_datetime64={}",
settings.try_infer_integers, settings.try_infer_integers,
settings.try_infer_dates, settings.try_infer_dates,
settings.try_infer_datetimes); settings.try_infer_datetimes,
settings.try_infer_datetimes_only_datetime64);
/// Second, format-specific settings: /// Second, format-specific settings:
switch (escaping_rule) switch (escaping_rule)

View File

@ -266,6 +266,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.try_infer_integers = settings.input_format_try_infer_integers; format_settings.try_infer_integers = settings.input_format_try_infer_integers;
format_settings.try_infer_dates = settings.input_format_try_infer_dates; format_settings.try_infer_dates = settings.input_format_try_infer_dates;
format_settings.try_infer_datetimes = settings.input_format_try_infer_datetimes; format_settings.try_infer_datetimes = settings.input_format_try_infer_datetimes;
format_settings.try_infer_datetimes_only_datetime64 = settings.input_format_try_infer_datetimes_only_datetime64;
format_settings.try_infer_exponent_floats = settings.input_format_try_infer_exponent_floats; format_settings.try_infer_exponent_floats = settings.input_format_try_infer_exponent_floats;
format_settings.markdown.escape_special_characters = settings.output_format_markdown_escape_special_characters; format_settings.markdown.escape_special_characters = settings.output_format_markdown_escape_special_characters;
format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string; format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string;

View File

@ -46,6 +46,7 @@ struct FormatSettings
bool try_infer_integers = true; bool try_infer_integers = true;
bool try_infer_dates = true; bool try_infer_dates = true;
bool try_infer_datetimes = true; bool try_infer_datetimes = true;
bool try_infer_datetimes_only_datetime64 = false;
bool try_infer_exponent_floats = false; bool try_infer_exponent_floats = false;
enum class DateTimeInputFormat : uint8_t enum class DateTimeInputFormat : uint8_t

View File

@ -306,37 +306,45 @@ namespace
type_indexes.erase(TypeIndex::UInt64); type_indexes.erase(TypeIndex::UInt64);
} }
/// If we have only Date and DateTime types, convert Date to DateTime, /// If we have only date/datetimes types (Date/DateTime/DateTime64), convert all of them to the common type,
/// otherwise, convert all Date and DateTime to String. /// otherwise, convert all Date, DateTime and DateTime64 to String.
void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes) void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes)
{ {
bool have_dates = type_indexes.contains(TypeIndex::Date); bool have_dates = type_indexes.contains(TypeIndex::Date);
bool have_datetimes = type_indexes.contains(TypeIndex::DateTime64); bool have_datetimes = type_indexes.contains(TypeIndex::DateTime);
bool all_dates_or_datetimes = (type_indexes.size() == (static_cast<size_t>(have_dates) + static_cast<size_t>(have_datetimes))); bool have_datetimes64 = type_indexes.contains(TypeIndex::DateTime64);
bool all_dates_or_datetimes = (type_indexes.size() == (static_cast<size_t>(have_dates) + static_cast<size_t>(have_datetimes) + static_cast<size_t>(have_datetimes64)));
if (!all_dates_or_datetimes && (have_dates || have_datetimes)) if (!all_dates_or_datetimes && (have_dates || have_datetimes || have_datetimes64))
{ {
for (auto & type : data_types) for (auto & type : data_types)
{ {
if (isDate(type) || isDateTime64(type)) if (isDate(type) || isDateTime(type) || isDateTime64(type))
type = std::make_shared<DataTypeString>(); type = std::make_shared<DataTypeString>();
} }
type_indexes.erase(TypeIndex::Date); type_indexes.erase(TypeIndex::Date);
type_indexes.erase(TypeIndex::DateTime); type_indexes.erase(TypeIndex::DateTime);
type_indexes.erase(TypeIndex::DateTime64);
type_indexes.insert(TypeIndex::String); type_indexes.insert(TypeIndex::String);
return; return;
} }
if (have_dates && have_datetimes) for (auto & type : data_types)
{ {
for (auto & type : data_types) if (isDate(type) && (have_datetimes || have_datetimes64))
{ {
if (isDate(type)) if (have_datetimes64)
type = std::make_shared<DataTypeDateTime64>(9); type = std::make_shared<DataTypeDateTime64>(9);
else
type = std::make_shared<DataTypeDateTime>();
type_indexes.erase(TypeIndex::Date);
}
else if (isDateTime(type) && have_datetimes64)
{
type = std::make_shared<DataTypeDateTime64>(9);
type_indexes.erase(TypeIndex::DateTime);
} }
type_indexes.erase(TypeIndex::Date);
} }
} }
@ -697,55 +705,87 @@ namespace
bool tryInferDate(std::string_view field) bool tryInferDate(std::string_view field)
{ {
if (field.empty()) /// Minimum length of Date text representation is 8 (YYYY-M-D) and maximum is 10 (YYYY-MM-DD)
if (field.size() < 8 || field.size() > 10)
return false; return false;
ReadBufferFromString buf(field);
Float64 tmp_float;
/// Check if it's just a number, and if so, don't try to infer Date from it, /// Check if it's just a number, and if so, don't try to infer Date from it,
/// because we can interpret this number as a Date (for example 20000101 will be 2000-01-01) /// because we can interpret this number as a Date (for example 20000101 will be 2000-01-01)
/// and it will lead to inferring Date instead of simple Int64/UInt64 in some cases. /// and it will lead to inferring Date instead of simple Int64/UInt64 in some cases.
if (tryReadFloatText(tmp_float, buf) && buf.eof()) if (std::all_of(field.begin(), field.end(), isNumericASCII))
return false;
buf.seek(0, SEEK_SET); /// Return position to the beginning
DayNum tmp;
return tryReadDateText(tmp, buf) && buf.eof();
}
bool tryInferDateTime(std::string_view field, const FormatSettings & settings)
{
if (field.empty())
return false; return false;
ReadBufferFromString buf(field); ReadBufferFromString buf(field);
Float64 tmp_float; DayNum tmp;
return tryReadDateText(tmp, buf, DateLUT::instance(), /*allowed_delimiters=*/"-/:") && buf.eof();
}
DataTypePtr tryInferDateTimeOrDateTime64(std::string_view field, const FormatSettings & settings)
{
/// Don't try to infer DateTime if string is too long.
/// It's difficult to say what is the real maximum length of
/// DateTime we can parse using BestEffort approach.
/// 50 symbols is more or less valid limit for date times that makes sense.
if (field.empty() || field.size() > 50)
return nullptr;
/// Check that we have at least one digit, don't infer datetime form strings like "Apr"/"May"/etc.
if (!std::any_of(field.begin(), field.end(), isNumericASCII))
return nullptr;
/// Check if it's just a number, and if so, don't try to infer DateTime from it, /// Check if it's just a number, and if so, don't try to infer DateTime from it,
/// because we can interpret this number as a timestamp and it will lead to /// because we can interpret this number as a timestamp and it will lead to
/// inferring DateTime instead of simple Int64/Float64 in some cases. /// inferring DateTime instead of simple Int64 in some cases.
if (std::all_of(field.begin(), field.end(), isNumericASCII))
return nullptr;
ReadBufferFromString buf(field);
Float64 tmp_float;
/// Check if it's a float value, and if so, don't try to infer DateTime from it,
/// because it will lead to inferring DateTime instead of simple Float64 in some cases.
if (tryReadFloatText(tmp_float, buf) && buf.eof()) if (tryReadFloatText(tmp_float, buf) && buf.eof())
return false; return nullptr;
buf.seek(0, SEEK_SET); /// Return position to the beginning
if (!settings.try_infer_datetimes_only_datetime64)
{
time_t tmp;
switch (settings.date_time_input_format)
{
case FormatSettings::DateTimeInputFormat::Basic:
if (tryReadDateTimeText(tmp, buf, DateLUT::instance(), /*allowed_date_delimiters=*/"-/:", /*allowed_time_delimiters=*/":") && buf.eof())
return std::make_shared<DataTypeDateTime>();
break;
case FormatSettings::DateTimeInputFormat::BestEffort:
if (tryParseDateTimeBestEffortStrict(tmp, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof())
return std::make_shared<DataTypeDateTime>();
break;
case FormatSettings::DateTimeInputFormat::BestEffortUS:
if (tryParseDateTimeBestEffortUSStrict(tmp, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof())
return std::make_shared<DataTypeDateTime>();
break;
}
}
buf.seek(0, SEEK_SET); /// Return position to the beginning buf.seek(0, SEEK_SET); /// Return position to the beginning
DateTime64 tmp; DateTime64 tmp;
switch (settings.date_time_input_format) switch (settings.date_time_input_format)
{ {
case FormatSettings::DateTimeInputFormat::Basic: case FormatSettings::DateTimeInputFormat::Basic:
if (tryReadDateTime64Text(tmp, 9, buf) && buf.eof()) if (tryReadDateTime64Text(tmp, 9, buf, DateLUT::instance(), /*allowed_date_delimiters=*/"-/:", /*allowed_time_delimiters=*/":") && buf.eof())
return true; return std::make_shared<DataTypeDateTime64>(9);
break; break;
case FormatSettings::DateTimeInputFormat::BestEffort: case FormatSettings::DateTimeInputFormat::BestEffort:
if (tryParseDateTime64BestEffort(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof()) if (tryParseDateTime64BestEffortStrict(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof())
return true; return std::make_shared<DataTypeDateTime64>(9);
break; break;
case FormatSettings::DateTimeInputFormat::BestEffortUS: case FormatSettings::DateTimeInputFormat::BestEffortUS:
if (tryParseDateTime64BestEffortUS(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof()) if (tryParseDateTime64BestEffortUSStrict(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof())
return true; return std::make_shared<DataTypeDateTime64>(9);
break; break;
} }
return false; return nullptr;
} }
template <bool is_json> template <bool is_json>
@ -1439,8 +1479,11 @@ DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const Forma
if (settings.try_infer_dates && tryInferDate(field)) if (settings.try_infer_dates && tryInferDate(field))
return std::make_shared<DataTypeDate>(); return std::make_shared<DataTypeDate>();
if (settings.try_infer_datetimes && tryInferDateTime(field, settings)) if (settings.try_infer_datetimes)
return std::make_shared<DataTypeDateTime64>(9); {
if (auto type = tryInferDateTimeOrDateTime64(field, settings))
return type;
}
return nullptr; return nullptr;
} }

View File

@ -1271,7 +1271,7 @@ template void readJSONArrayInto<PaddedPODArray<UInt8>, void>(PaddedPODArray<UInt
template bool readJSONArrayInto<PaddedPODArray<UInt8>, bool>(PaddedPODArray<UInt8> & s, ReadBuffer & buf); template bool readJSONArrayInto<PaddedPODArray<UInt8>, bool>(PaddedPODArray<UInt8> & s, ReadBuffer & buf);
template <typename ReturnType> template <typename ReturnType>
ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters)
{ {
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>; static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
@ -1318,6 +1318,9 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf)
} }
else else
{ {
if (!isSymbolIn(*buf.position(), allowed_delimiters))
return error();
++buf.position(); ++buf.position();
if (!append_digit(month)) if (!append_digit(month))
@ -1325,7 +1328,11 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf)
append_digit(month); append_digit(month);
if (!buf.eof() && !isNumericASCII(*buf.position())) if (!buf.eof() && !isNumericASCII(*buf.position()))
{
if (!isSymbolIn(*buf.position(), allowed_delimiters))
return error();
++buf.position(); ++buf.position();
}
else else
return error(); return error();
@ -1338,12 +1345,12 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf)
return ReturnType(true); return ReturnType(true);
} }
template void readDateTextFallback<void>(LocalDate &, ReadBuffer &); template void readDateTextFallback<void>(LocalDate &, ReadBuffer &, const char * allowed_delimiters);
template bool readDateTextFallback<bool>(LocalDate &, ReadBuffer &); template bool readDateTextFallback<bool>(LocalDate &, ReadBuffer &, const char * allowed_delimiters);
template <typename ReturnType, bool dt64_mode> template <typename ReturnType, bool dt64_mode>
ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut) ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters, const char * allowed_time_delimiters)
{ {
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>; static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
@ -1413,6 +1420,9 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3]) if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3])
|| !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9])) || !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9]))
return false; return false;
if (!isSymbolIn(s[4], allowed_date_delimiters) || !isSymbolIn(s[7], allowed_date_delimiters))
return false;
} }
UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0');
@ -1443,6 +1453,9 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[3]) || !isNumericASCII(s[4]) if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[3]) || !isNumericASCII(s[4])
|| !isNumericASCII(s[6]) || !isNumericASCII(s[7])) || !isNumericASCII(s[6]) || !isNumericASCII(s[7]))
return false; return false;
if (!isSymbolIn(s[2], allowed_time_delimiters) || !isSymbolIn(s[5], allowed_time_delimiters))
return false;
} }
hour = (s[0] - '0') * 10 + (s[1] - '0'); hour = (s[0] - '0') * 10 + (s[1] - '0');
@ -1488,10 +1501,10 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
return ReturnType(true); return ReturnType(true);
} }
template void readDateTimeTextFallback<void, false>(time_t &, ReadBuffer &, const DateLUTImpl &); template void readDateTimeTextFallback<void, false>(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *);
template void readDateTimeTextFallback<void, true>(time_t &, ReadBuffer &, const DateLUTImpl &); template void readDateTimeTextFallback<void, true>(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *);
template bool readDateTimeTextFallback<bool, false>(time_t &, ReadBuffer &, const DateLUTImpl &); template bool readDateTimeTextFallback<bool, false>(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *);
template bool readDateTimeTextFallback<bool, true>(time_t &, ReadBuffer &, const DateLUTImpl &); template bool readDateTimeTextFallback<bool, true>(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *);
template <typename ReturnType> template <typename ReturnType>

View File

@ -703,13 +703,28 @@ struct NullOutput
}; };
template <typename ReturnType> template <typename ReturnType>
ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf); ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters);
inline bool isSymbolIn(char symbol, const char * symbols)
{
if (symbols == nullptr)
return true;
const char * pos = symbols;
while (*pos)
{
if (*pos == symbol)
return true;
++pos;
}
return false;
}
/// In YYYY-MM-DD format. /// In YYYY-MM-DD format.
/// For convenience, Month and Day parts can have single digit instead of two digits. /// For convenience, Month and Day parts can have single digit instead of two digits.
/// Any separators other than '-' are supported. /// Any separators other than '-' are supported.
template <typename ReturnType = void> template <typename ReturnType = void>
inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters = nullptr)
{ {
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>; static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
@ -753,6 +768,9 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
} }
else else
{ {
if (!isSymbolIn(pos[-1], allowed_delimiters))
return error();
if (!isNumericASCII(pos[0])) if (!isNumericASCII(pos[0]))
return error(); return error();
@ -768,6 +786,9 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
if (isNumericASCII(pos[-1]) || !isNumericASCII(pos[0])) if (isNumericASCII(pos[-1]) || !isNumericASCII(pos[0]))
return error(); return error();
if (!isSymbolIn(pos[-1], allowed_delimiters))
return error();
day = pos[0] - '0'; day = pos[0] - '0';
if (isNumericASCII(pos[1])) if (isNumericASCII(pos[1]))
{ {
@ -783,7 +804,7 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf)
return ReturnType(true); return ReturnType(true);
} }
else else
return readDateTextFallback<ReturnType>(date, buf); return readDateTextFallback<ReturnType>(date, buf, allowed_delimiters);
} }
inline void convertToDayNum(DayNum & date, ExtendedDayNum & from) inline void convertToDayNum(DayNum & date, ExtendedDayNum & from)
@ -797,15 +818,15 @@ inline void convertToDayNum(DayNum & date, ExtendedDayNum & from)
} }
template <typename ReturnType = void> template <typename ReturnType = void>
inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut) inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_delimiters = nullptr)
{ {
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>; static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
LocalDate local_date; LocalDate local_date;
if constexpr (throw_exception) if constexpr (throw_exception)
readDateTextImpl<ReturnType>(local_date, buf); readDateTextImpl<ReturnType>(local_date, buf, allowed_delimiters);
else if (!readDateTextImpl<ReturnType>(local_date, buf)) else if (!readDateTextImpl<ReturnType>(local_date, buf, allowed_delimiters))
return false; return false;
ExtendedDayNum ret = date_lut.makeDayNum(local_date.year(), local_date.month(), local_date.day()); ExtendedDayNum ret = date_lut.makeDayNum(local_date.year(), local_date.month(), local_date.day());
@ -814,15 +835,15 @@ inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLU
} }
template <typename ReturnType = void> template <typename ReturnType = void>
inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut) inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_delimiters = nullptr)
{ {
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>; static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
LocalDate local_date; LocalDate local_date;
if constexpr (throw_exception) if constexpr (throw_exception)
readDateTextImpl<ReturnType>(local_date, buf); readDateTextImpl<ReturnType>(local_date, buf, allowed_delimiters);
else if (!readDateTextImpl<ReturnType>(local_date, buf)) else if (!readDateTextImpl<ReturnType>(local_date, buf, allowed_delimiters))
return false; return false;
/// When the parameter is out of rule or out of range, Date32 uses 1925-01-01 as the default value (-DateLUT::instance().getDayNumOffsetEpoch(), -16436) and Date uses 1970-01-01. /// When the parameter is out of rule or out of range, Date32 uses 1925-01-01 as the default value (-DateLUT::instance().getDayNumOffsetEpoch(), -16436) and Date uses 1970-01-01.
@ -846,19 +867,19 @@ inline void readDateText(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTI
readDateTextImpl<void>(date, buf, date_lut); readDateTextImpl<void>(date, buf, date_lut);
} }
inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf) inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters = nullptr)
{ {
return readDateTextImpl<bool>(date, buf); return readDateTextImpl<bool>(date, buf, allowed_delimiters);
} }
inline bool tryReadDateText(DayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) inline bool tryReadDateText(DayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance(), const char * allowed_delimiters = nullptr)
{ {
return readDateTextImpl<bool>(date, buf, time_zone); return readDateTextImpl<bool>(date, buf, time_zone, allowed_delimiters);
} }
inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance(), const char * allowed_delimiters = nullptr)
{ {
return readDateTextImpl<bool>(date, buf, time_zone); return readDateTextImpl<bool>(date, buf, time_zone, allowed_delimiters);
} }
UUID parseUUID(std::span<const UInt8> src); UUID parseUUID(std::span<const UInt8> src);
@ -975,13 +996,13 @@ inline T parseFromString(std::string_view str)
template <typename ReturnType = void, bool dt64_mode = false> template <typename ReturnType = void, bool dt64_mode = false>
ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut); ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr);
/** In YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format, according to specified time zone. /** In YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format, according to specified time zone.
* As an exception, also supported parsing of unix timestamp in form of decimal number. * As an exception, also supported parsing of unix timestamp in form of decimal number.
*/ */
template <typename ReturnType = void, bool dt64_mode = false> template <typename ReturnType = void, bool dt64_mode = false>
inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut) inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr)
{ {
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>; static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
@ -1014,6 +1035,9 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons
if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3]) if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3])
|| !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9])) || !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9]))
return ReturnType(false); return ReturnType(false);
if (!isSymbolIn(s[4], allowed_date_delimiters) || !isSymbolIn(s[7], allowed_date_delimiters))
return ReturnType(false);
} }
UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0');
@ -1033,6 +1057,9 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons
if (!isNumericASCII(s[11]) || !isNumericASCII(s[12]) || !isNumericASCII(s[14]) || !isNumericASCII(s[15]) if (!isNumericASCII(s[11]) || !isNumericASCII(s[12]) || !isNumericASCII(s[14]) || !isNumericASCII(s[15])
|| !isNumericASCII(s[17]) || !isNumericASCII(s[18])) || !isNumericASCII(s[17]) || !isNumericASCII(s[18]))
return ReturnType(false); return ReturnType(false);
if (!isSymbolIn(s[13], allowed_time_delimiters) || !isSymbolIn(s[16], allowed_time_delimiters))
return ReturnType(false);
} }
hour = (s[11] - '0') * 10 + (s[12] - '0'); hour = (s[11] - '0') * 10 + (s[12] - '0');
@ -1057,11 +1084,11 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons
return readIntTextImpl<time_t, ReturnType, ReadIntTextCheckOverflow::CHECK_OVERFLOW>(datetime, buf); return readIntTextImpl<time_t, ReturnType, ReadIntTextCheckOverflow::CHECK_OVERFLOW>(datetime, buf);
} }
else else
return readDateTimeTextFallback<ReturnType, dt64_mode>(datetime, buf, date_lut); return readDateTimeTextFallback<ReturnType, dt64_mode>(datetime, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters);
} }
template <typename ReturnType> template <typename ReturnType>
inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut) inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr)
{ {
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>; static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
@ -1075,7 +1102,7 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re
{ {
try try
{ {
readDateTimeTextImpl<ReturnType, true>(whole, buf, date_lut); readDateTimeTextImpl<ReturnType, true>(whole, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters);
} }
catch (const DB::Exception &) catch (const DB::Exception &)
{ {
@ -1085,7 +1112,7 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re
} }
else else
{ {
auto ok = readDateTimeTextImpl<ReturnType, true>(whole, buf, date_lut); auto ok = readDateTimeTextImpl<ReturnType, true>(whole, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters);
if (!ok && (buf.eof() || *buf.position() != '.')) if (!ok && (buf.eof() || *buf.position() != '.'))
return ReturnType(false); return ReturnType(false);
} }
@ -1168,14 +1195,14 @@ inline void readDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer
readDateTimeTextImpl<void>(datetime64, scale, buf, date_lut); readDateTimeTextImpl<void>(datetime64, scale, buf, date_lut);
} }
inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance(), const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr)
{ {
return readDateTimeTextImpl<bool>(datetime, buf, time_zone); return readDateTimeTextImpl<bool>(datetime, buf, time_zone, allowed_date_delimiters, allowed_time_delimiters);
} }
inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance(), const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr)
{ {
return readDateTimeTextImpl<bool>(datetime64, scale, buf, date_lut); return readDateTimeTextImpl<bool>(datetime64, scale, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters);
} }
inline void readDateTimeText(LocalDateTime & datetime, ReadBuffer & buf) inline void readDateTimeText(LocalDateTime & datetime, ReadBuffer & buf)

View File

@ -145,12 +145,16 @@ Aws::String AWSEC2MetadataClient::getDefaultCredentialsSecurely() const
{ {
String user_agent_string = awsComputeUserAgentString(); String user_agent_string = awsComputeUserAgentString();
auto [new_token, response_code] = getEC2MetadataToken(user_agent_string); auto [new_token, response_code] = getEC2MetadataToken(user_agent_string);
if (response_code == Aws::Http::HttpResponseCode::BAD_REQUEST) if (response_code == Aws::Http::HttpResponseCode::BAD_REQUEST
|| response_code == Aws::Http::HttpResponseCode::REQUEST_NOT_MADE)
{
/// At least the host should be available and reply, otherwise neither IMDSv2 nor IMDSv1 are usable.
return {}; return {};
}
else if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty()) else if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty())
{ {
LOG_TRACE(logger, "Calling EC2MetadataService to get token failed, " LOG_TRACE(logger, "Calling EC2MetadataService to get token failed, "
"falling back to less secure way. HTTP response code: {}", response_code); "falling back to a less secure way. HTTP response code: {}", response_code);
return getDefaultCredentials(); return getDefaultCredentials();
} }
@ -247,7 +251,7 @@ static Aws::String getAWSMetadataEndpoint()
return ec2_metadata_service_endpoint; return ec2_metadata_service_endpoint;
} }
std::shared_ptr<AWSEC2MetadataClient> InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration) std::shared_ptr<AWSEC2MetadataClient> createEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration)
{ {
auto endpoint = getAWSMetadataEndpoint(); auto endpoint = getAWSMetadataEndpoint();
return std::make_shared<AWSEC2MetadataClient>(client_configuration, endpoint.c_str()); return std::make_shared<AWSEC2MetadataClient>(client_configuration, endpoint.c_str());
@ -781,11 +785,13 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
/// EC2MetadataService throttles by delaying the response so the service client should set a large read timeout. /// EC2MetadataService throttles by delaying the response so the service client should set a large read timeout.
/// EC2MetadataService delay is in order of seconds so it only make sense to retry after a couple of seconds. /// EC2MetadataService delay is in order of seconds so it only make sense to retry after a couple of seconds.
aws_client_configuration.connectTimeoutMs = 1000; /// But the connection timeout should be small because there is the case when there is no IMDS at all,
/// like outside of the cloud, on your own machines.
aws_client_configuration.connectTimeoutMs = 10;
aws_client_configuration.requestTimeoutMs = 1000; aws_client_configuration.requestTimeoutMs = 1000;
aws_client_configuration.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(1, 1000); aws_client_configuration.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(1, 1000);
auto ec2_metadata_client = InitEC2MetadataClient(aws_client_configuration); auto ec2_metadata_client = createEC2MetadataClient(aws_client_configuration);
auto config_loader = std::make_shared<AWSEC2InstanceProfileConfigLoader>(ec2_metadata_client, !credentials_configuration.use_insecure_imds_request); auto config_loader = std::make_shared<AWSEC2InstanceProfileConfigLoader>(ec2_metadata_client, !credentials_configuration.use_insecure_imds_request);
AddProvider(std::make_shared<AWSInstanceProfileCredentialsProvider>(config_loader)); AddProvider(std::make_shared<AWSInstanceProfileCredentialsProvider>(config_loader));

View File

@ -70,7 +70,7 @@ private:
LoggerPtr logger; LoggerPtr logger;
}; };
std::shared_ptr<AWSEC2MetadataClient> InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration); std::shared_ptr<AWSEC2MetadataClient> createEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration);
class AWSEC2InstanceProfileConfigLoader : public Aws::Config::AWSProfileConfigLoader class AWSEC2InstanceProfileConfigLoader : public Aws::Config::AWSProfileConfigLoader
{ {

View File

@ -128,7 +128,7 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion()
} }
else else
{ {
/// In global mode AWS C++ SDK send `us-east-1` but accept switching to another one if being suggested. /// In global mode AWS C++ SDK sends `us-east-1` but accepts switching to another one if being suggested.
region = Aws::Region::AWS_GLOBAL; region = Aws::Region::AWS_GLOBAL;
} }
} }

View File

@ -1,8 +1,8 @@
#include <IO/S3/URI.h> #include <IO/S3/URI.h>
#include <Interpreters/Context.h>
#include <Storages/NamedCollectionsHelpers.h>
#include "Common/Macros.h"
#if USE_AWS_S3 #if USE_AWS_S3
#include <Interpreters/Context.h>
#include <Common/Macros.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/quoteString.h> #include <Common/quoteString.h>
#include <Common/re2.h> #include <Common/re2.h>
@ -10,6 +10,7 @@
#include <boost/algorithm/string/case_conv.hpp> #include <boost/algorithm/string/case_conv.hpp>
namespace DB namespace DB
{ {
@ -40,21 +41,13 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax)
/// Case when AWS Private Link Interface is being used /// Case when AWS Private Link Interface is being used
/// E.g. (bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com/bucket-name/key) /// E.g. (bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com/bucket-name/key)
/// https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html /// https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html
static const RE2 aws_private_link_style_pattern(R"(bucket\.vpce\-([a-z0-9\-.]+)\.vpce.amazonaws.com(:\d{1,5})?)"); static const RE2 aws_private_link_style_pattern(R"(bucket\.vpce\-([a-z0-9\-.]+)\.vpce\.amazonaws\.com(:\d{1,5})?)");
/// Case when bucket name and key represented in path of S3 URL. /// Case when bucket name and key represented in the path of S3 URL.
/// E.g. (https://s3.region.amazonaws.com/bucket-name/key) /// E.g. (https://s3.region.amazonaws.com/bucket-name/key)
/// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access
static const RE2 path_style_pattern("^/([^/]*)/(.*)"); static const RE2 path_style_pattern("^/([^/]*)/(.*)");
static constexpr auto S3 = "S3";
static constexpr auto S3EXPRESS = "S3EXPRESS";
static constexpr auto COSN = "COSN";
static constexpr auto COS = "COS";
static constexpr auto OBS = "OBS";
static constexpr auto OSS = "OSS";
static constexpr auto EOS = "EOS";
if (allow_archive_path_syntax) if (allow_archive_path_syntax)
std::tie(uri_str, archive_pattern) = getURIAndArchivePattern(uri_); std::tie(uri_str, archive_pattern) = getURIAndArchivePattern(uri_);
else else
@ -85,7 +78,7 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax)
URIConverter::modifyURI(uri, mapper); URIConverter::modifyURI(uri, mapper);
} }
storage_name = S3; storage_name = "S3";
if (uri.getHost().empty()) if (uri.getHost().empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Host is empty in S3 URI."); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Host is empty in S3 URI.");
@ -93,11 +86,13 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax)
/// Extract object version ID from query string. /// Extract object version ID from query string.
bool has_version_id = false; bool has_version_id = false;
for (const auto & [query_key, query_value] : uri.getQueryParameters()) for (const auto & [query_key, query_value] : uri.getQueryParameters())
{
if (query_key == "versionId") if (query_key == "versionId")
{ {
version_id = query_value; version_id = query_value;
has_version_id = true; has_version_id = true;
} }
}
/// Poco::URI will ignore '?' when parsing the path, but if there is a versionId in the http parameter, /// Poco::URI will ignore '?' when parsing the path, but if there is a versionId in the http parameter,
/// '?' can not be used as a wildcard, otherwise it will be ambiguous. /// '?' can not be used as a wildcard, otherwise it will be ambiguous.
@ -129,15 +124,8 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax)
} }
boost::to_upper(name); boost::to_upper(name);
/// For S3Express it will look like s3express-eun1-az1, i.e. contain region and AZ info if (name == "COS")
if (name != S3 && !name.starts_with(S3EXPRESS) && name != COS && name != OBS && name != OSS && name != EOS) storage_name = "COSN";
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Object storage system name is unrecognized in virtual hosted style S3 URI: {}",
quoteString(name));
if (name == COS)
storage_name = COSN;
else else
storage_name = name; storage_name = name;
} }
@ -148,13 +136,22 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax)
validateBucket(bucket, uri); validateBucket(bucket, uri);
} }
else else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket or key name are invalid in S3 URI."); {
/// Custom endpoint, e.g. a public domain of Cloudflare R2,
/// which could be served by a custom server-side code.
storage_name = "S3";
bucket = "default";
is_virtual_hosted_style = false;
endpoint = uri.getScheme() + "://" + uri.getAuthority();
if (!uri.getPath().empty())
key = uri.getPath().substr(1);
}
} }
void URI::addRegionToURI(const std::string &region) void URI::addRegionToURI(const std::string &region)
{ {
if (auto pos = endpoint.find("amazonaws.com"); pos != std::string::npos) if (auto pos = endpoint.find(".amazonaws.com"); pos != std::string::npos)
endpoint = endpoint.substr(0, pos) + region + "." + endpoint.substr(pos); endpoint = endpoint.substr(0, pos) + "." + region + endpoint.substr(pos);
} }
void URI::validateBucket(const String & bucket, const Poco::URI & uri) void URI::validateBucket(const String & bucket, const Poco::URI & uri)

View File

@ -1,14 +1,14 @@
#pragma once #pragma once
#include <optional>
#include <string>
#include "config.h" #include "config.h"
#if USE_AWS_S3 #if USE_AWS_S3
#include <optional>
#include <string>
#include <Poco/URI.h> #include <Poco/URI.h>
namespace DB::S3 namespace DB::S3
{ {
@ -23,7 +23,7 @@ namespace DB::S3
struct URI struct URI
{ {
Poco::URI uri; Poco::URI uri;
// Custom endpoint if URI scheme is not S3. // Custom endpoint if URI scheme, if not S3.
std::string endpoint; std::string endpoint;
std::string bucket; std::string bucket;
std::string key; std::string key;

View File

@ -82,13 +82,14 @@ struct DateTimeSubsecondPart
UInt8 digits; UInt8 digits;
}; };
template <typename ReturnType, bool is_us_style> template <typename ReturnType, bool is_us_style, bool strict = false, bool is_64 = false>
ReturnType parseDateTimeBestEffortImpl( ReturnType parseDateTimeBestEffortImpl(
time_t & res, time_t & res,
ReadBuffer & in, ReadBuffer & in,
const DateLUTImpl & local_time_zone, const DateLUTImpl & local_time_zone,
const DateLUTImpl & utc_time_zone, const DateLUTImpl & utc_time_zone,
DateTimeSubsecondPart * fractional) DateTimeSubsecondPart * fractional,
const char * allowed_date_delimiters = nullptr)
{ {
auto on_error = [&]<typename... FmtArgs>(int error_code [[maybe_unused]], auto on_error = [&]<typename... FmtArgs>(int error_code [[maybe_unused]],
FormatStringHelper<FmtArgs...> fmt_string [[maybe_unused]], FormatStringHelper<FmtArgs...> fmt_string [[maybe_unused]],
@ -170,22 +171,36 @@ ReturnType parseDateTimeBestEffortImpl(
fractional->digits = 3; fractional->digits = 3;
readDecimalNumber<3>(fractional->value, digits + 10); readDecimalNumber<3>(fractional->value, digits + 10);
} }
else if constexpr (strict)
{
/// Fractional part is not allowed.
return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected fractional part");
}
return ReturnType(true); return ReturnType(true);
} }
else if (num_digits == 10 && !year && !has_time) else if (num_digits == 10 && !year && !has_time)
{ {
if (strict && month)
return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month component is duplicated");
/// This is unix timestamp. /// This is unix timestamp.
readDecimalNumber<10>(res, digits); readDecimalNumber<10>(res, digits);
return ReturnType(true); return ReturnType(true);
} }
else if (num_digits == 9 && !year && !has_time) else if (num_digits == 9 && !year && !has_time)
{ {
if (strict && month)
return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month component is duplicated");
/// This is unix timestamp. /// This is unix timestamp.
readDecimalNumber<9>(res, digits); readDecimalNumber<9>(res, digits);
return ReturnType(true); return ReturnType(true);
} }
else if (num_digits == 14 && !year && !has_time) else if (num_digits == 14 && !year && !has_time)
{ {
if (strict && month)
return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month component is duplicated");
/// This is YYYYMMDDhhmmss /// This is YYYYMMDDhhmmss
readDecimalNumber<4>(year, digits); readDecimalNumber<4>(year, digits);
readDecimalNumber<2>(month, digits + 4); readDecimalNumber<2>(month, digits + 4);
@ -197,6 +212,9 @@ ReturnType parseDateTimeBestEffortImpl(
} }
else if (num_digits == 8 && !year) else if (num_digits == 8 && !year)
{ {
if (strict && month)
return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month component is duplicated");
/// This is YYYYMMDD /// This is YYYYMMDD
readDecimalNumber<4>(year, digits); readDecimalNumber<4>(year, digits);
readDecimalNumber<2>(month, digits + 4); readDecimalNumber<2>(month, digits + 4);
@ -272,6 +290,9 @@ ReturnType parseDateTimeBestEffortImpl(
else else
return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits after year and month: {}", num_digits); return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits after year and month: {}", num_digits);
} }
if (!isSymbolIn(delimiter_after_year, allowed_date_delimiters))
return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: '{}' delimiter between date parts is not allowed", delimiter_after_year);
} }
} }
else if (num_digits == 2 || num_digits == 1) else if (num_digits == 2 || num_digits == 1)
@ -403,9 +424,16 @@ ReturnType parseDateTimeBestEffortImpl(
else else
{ {
if (day_of_month) if (day_of_month)
{
if (strict && hour)
return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: hour component is duplicated");
hour = hour_or_day_of_month_or_month; hour = hour_or_day_of_month_or_month;
}
else else
{
day_of_month = hour_or_day_of_month_or_month; day_of_month = hour_or_day_of_month_or_month;
}
} }
} }
else if (num_digits != 0) else if (num_digits != 0)
@ -446,6 +474,11 @@ ReturnType parseDateTimeBestEffortImpl(
fractional->digits = num_digits; fractional->digits = num_digits;
readDecimalNumber(fractional->value, num_digits, digits); readDecimalNumber(fractional->value, num_digits, digits);
} }
else if (strict)
{
/// Fractional part is not allowed.
return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected fractional part");
}
} }
else if (c == '+' || c == '-') else if (c == '+' || c == '-')
{ {
@ -582,12 +615,24 @@ ReturnType parseDateTimeBestEffortImpl(
return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: neither Date nor Time was parsed successfully"); return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: neither Date nor Time was parsed successfully");
if (!day_of_month) if (!day_of_month)
{
if constexpr (strict)
return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: day of month is required");
day_of_month = 1; day_of_month = 1;
}
if (!month) if (!month)
{
if constexpr (strict)
return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month is required");
month = 1; month = 1;
}
if (!year) if (!year)
{ {
if constexpr (strict)
return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: year is required");
/// If year is not specified, it will be the current year if the date is unknown or not greater than today, /// If year is not specified, it will be the current year if the date is unknown or not greater than today,
/// otherwise it will be the previous year. /// otherwise it will be the previous year.
/// This convoluted logic is needed to parse the syslog format, which looks as follows: "Mar 3 01:33:48". /// This convoluted logic is needed to parse the syslog format, which looks as follows: "Mar 3 01:33:48".
@ -641,6 +686,20 @@ ReturnType parseDateTimeBestEffortImpl(
} }
}; };
if constexpr (strict)
{
if constexpr (is_64)
{
if (year < 1900)
return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime64: year {} is less than minimum supported year 1900", year);
}
else
{
if (year < 1970)
return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: year {} is less than minimum supported year 1970", year);
}
}
if (has_time_zone_offset) if (has_time_zone_offset)
{ {
res = utc_time_zone.makeDateTime(year, month, day_of_month, hour, minute, second); res = utc_time_zone.makeDateTime(year, month, day_of_month, hour, minute, second);
@ -654,20 +713,20 @@ ReturnType parseDateTimeBestEffortImpl(
return ReturnType(true); return ReturnType(true);
} }
template <typename ReturnType, bool is_us_style> template <typename ReturnType, bool is_us_style, bool strict = false>
ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters = nullptr)
{ {
time_t whole; time_t whole;
DateTimeSubsecondPart subsecond = {0, 0}; // needs to be explicitly initialized sine it could be missing from input string DateTimeSubsecondPart subsecond = {0, 0}; // needs to be explicitly initialized sine it could be missing from input string
if constexpr (std::is_same_v<ReturnType, bool>) if constexpr (std::is_same_v<ReturnType, bool>)
{ {
if (!parseDateTimeBestEffortImpl<bool, is_us_style>(whole, in, local_time_zone, utc_time_zone, &subsecond)) if (!parseDateTimeBestEffortImpl<bool, is_us_style, strict, true>(whole, in, local_time_zone, utc_time_zone, &subsecond, allowed_date_delimiters))
return false; return false;
} }
else else
{ {
parseDateTimeBestEffortImpl<ReturnType, is_us_style>(whole, in, local_time_zone, utc_time_zone, &subsecond); parseDateTimeBestEffortImpl<ReturnType, is_us_style, strict, true>(whole, in, local_time_zone, utc_time_zone, &subsecond, allowed_date_delimiters);
} }
@ -730,4 +789,24 @@ bool tryParseDateTime64BestEffortUS(DateTime64 & res, UInt32 scale, ReadBuffer &
return parseDateTime64BestEffortImpl<bool, true>(res, scale, in, local_time_zone, utc_time_zone); return parseDateTime64BestEffortImpl<bool, true>(res, scale, in, local_time_zone, utc_time_zone);
} }
bool tryParseDateTimeBestEffortStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters)
{
return parseDateTimeBestEffortImpl<bool, false, true>(res, in, local_time_zone, utc_time_zone, nullptr, allowed_date_delimiters);
}
bool tryParseDateTimeBestEffortUSStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters)
{
return parseDateTimeBestEffortImpl<bool, true, true>(res, in, local_time_zone, utc_time_zone, nullptr, allowed_date_delimiters);
}
bool tryParseDateTime64BestEffortStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters)
{
return parseDateTime64BestEffortImpl<bool, false, true>(res, scale, in, local_time_zone, utc_time_zone, allowed_date_delimiters);
}
bool tryParseDateTime64BestEffortUSStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters)
{
return parseDateTime64BestEffortImpl<bool, true, true>(res, scale, in, local_time_zone, utc_time_zone, allowed_date_delimiters);
}
} }

View File

@ -63,4 +63,12 @@ void parseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in,
bool tryParseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); bool tryParseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone);
void parseDateTime64BestEffortUS(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); void parseDateTime64BestEffortUS(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone);
bool tryParseDateTime64BestEffortUS(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); bool tryParseDateTime64BestEffortUS(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone);
/// More strict version of best effort parsing. Requires day, month and year to be present, checks for allowed
/// delimiters between date components, makes additional correctness checks. Used in schema inference if date times.
bool tryParseDateTimeBestEffortStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters);
bool tryParseDateTimeBestEffortUSStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters);
bool tryParseDateTime64BestEffortStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters);
bool tryParseDateTime64BestEffortUSStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters);
} }

View File

@ -206,11 +206,6 @@ TEST(S3UriTest, validPatterns)
} }
} }
TEST_P(S3UriTest, invalidPatterns)
{
ASSERT_ANY_THROW(S3::URI new_uri(GetParam()));
}
TEST(S3UriTest, versionIdChecks) TEST(S3UriTest, versionIdChecks)
{ {
for (const auto& test_case : TestCases) for (const auto& test_case : TestCases)
@ -223,19 +218,5 @@ TEST(S3UriTest, versionIdChecks)
} }
} }
INSTANTIATE_TEST_SUITE_P(
S3,
S3UriTest,
testing::Values(
"https:///",
"https://.s3.amazonaws.com/key",
"https://s3.amazonaws.com/key",
"https://jokserfn.s3amazonaws.com/key",
"https://s3.amazonaws.com//",
"https://amazonaws.com/",
"https://amazonaws.com//",
"https://amazonaws.com//key"));
} }
#endif #endif

View File

@ -804,7 +804,8 @@ bool FileCache::tryReserve(
const size_t size, const size_t size,
FileCacheReserveStat & reserve_stat, FileCacheReserveStat & reserve_stat,
const UserInfo & user, const UserInfo & user,
size_t lock_wait_timeout_milliseconds) size_t lock_wait_timeout_milliseconds,
std::string & failure_reason)
{ {
ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::FilesystemCacheReserveMicroseconds); ProfileEventTimeIncrement<Microseconds> watch(ProfileEvents::FilesystemCacheReserveMicroseconds);
@ -817,6 +818,7 @@ bool FileCache::tryReserve(
if (cache_is_being_resized.load(std::memory_order_relaxed)) if (cache_is_being_resized.load(std::memory_order_relaxed))
{ {
ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfCacheResize); ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfCacheResize);
failure_reason = "cache is being resized";
return false; return false;
} }
@ -824,6 +826,7 @@ bool FileCache::tryReserve(
if (!cache_lock) if (!cache_lock)
{ {
ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfLockContention); ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfLockContention);
failure_reason = "cache contention";
return false; return false;
} }
@ -847,6 +850,7 @@ bool FileCache::tryReserve(
LOG_TEST(log, "Query limit exceeded, space reservation failed, " LOG_TEST(log, "Query limit exceeded, space reservation failed, "
"recache_on_query_limit_exceeded is disabled (while reserving for {}:{})", "recache_on_query_limit_exceeded is disabled (while reserving for {}:{})",
file_segment.key(), file_segment.offset()); file_segment.key(), file_segment.offset());
failure_reason = "query limit exceeded";
return false; return false;
} }
@ -877,6 +881,7 @@ bool FileCache::tryReserve(
if (!query_priority->collectCandidatesForEviction( if (!query_priority->collectCandidatesForEviction(
size, required_elements_num, reserve_stat, eviction_candidates, {}, user.user_id, cache_lock)) size, required_elements_num, reserve_stat, eviction_candidates, {}, user.user_id, cache_lock))
{ {
failure_reason = "cannot evict enough space for query limit";
return false; return false;
} }
@ -891,11 +896,15 @@ bool FileCache::tryReserve(
if (!main_priority->collectCandidatesForEviction( if (!main_priority->collectCandidatesForEviction(
size, required_elements_num, reserve_stat, eviction_candidates, queue_iterator, user.user_id, cache_lock)) size, required_elements_num, reserve_stat, eviction_candidates, queue_iterator, user.user_id, cache_lock))
{ {
failure_reason = "cannot evict enough space";
return false; return false;
} }
if (!file_segment.getKeyMetadata()->createBaseDirectory()) if (!file_segment.getKeyMetadata()->createBaseDirectory())
{
failure_reason = "not enough space on device";
return false; return false;
}
if (eviction_candidates.size() > 0) if (eviction_candidates.size() > 0)
{ {

View File

@ -165,7 +165,8 @@ public:
size_t size, size_t size,
FileCacheReserveStat & stat, FileCacheReserveStat & stat,
const UserInfo & user, const UserInfo & user,
size_t lock_wait_timeout_milliseconds); size_t lock_wait_timeout_milliseconds,
std::string & failure_reason);
std::vector<FileSegment::Info> getFileSegmentInfos(const UserID & user_id); std::vector<FileSegment::Info> getFileSegmentInfos(const UserID & user_id);

View File

@ -502,7 +502,11 @@ LockedKeyPtr FileSegment::lockKeyMetadata(bool assert_exists) const
return metadata->tryLock(); return metadata->tryLock();
} }
bool FileSegment::reserve(size_t size_to_reserve, size_t lock_wait_timeout_milliseconds, FileCacheReserveStat * reserve_stat) bool FileSegment::reserve(
size_t size_to_reserve,
size_t lock_wait_timeout_milliseconds,
std::string & failure_reason,
FileCacheReserveStat * reserve_stat)
{ {
if (!size_to_reserve) if (!size_to_reserve)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Zero space reservation is not allowed"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Zero space reservation is not allowed");
@ -554,7 +558,7 @@ bool FileSegment::reserve(size_t size_to_reserve, size_t lock_wait_timeout_milli
if (!reserve_stat) if (!reserve_stat)
reserve_stat = &dummy_stat; reserve_stat = &dummy_stat;
bool reserved = cache->tryReserve(*this, size_to_reserve, *reserve_stat, getKeyMetadata()->user, lock_wait_timeout_milliseconds); bool reserved = cache->tryReserve(*this, size_to_reserve, *reserve_stat, getKeyMetadata()->user, lock_wait_timeout_milliseconds, failure_reason);
if (!reserved) if (!reserved)
setDownloadFailedUnlocked(lock()); setDownloadFailedUnlocked(lock());

View File

@ -201,7 +201,11 @@ public:
/// Try to reserve exactly `size` bytes (in addition to the getDownloadedSize() bytes already downloaded). /// Try to reserve exactly `size` bytes (in addition to the getDownloadedSize() bytes already downloaded).
/// Returns true if reservation was successful, false otherwise. /// Returns true if reservation was successful, false otherwise.
bool reserve(size_t size_to_reserve, size_t lock_wait_timeout_milliseconds, FileCacheReserveStat * reserve_stat = nullptr); bool reserve(
size_t size_to_reserve,
size_t lock_wait_timeout_milliseconds,
std::string & failure_reason,
FileCacheReserveStat * reserve_stat = nullptr);
/// Write data into reserved space. /// Write data into reserved space.
void write(char * from, size_t size, size_t offset_in_file); void write(char * from, size_t size, size_t offset_in_file);

View File

@ -705,7 +705,8 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment, std::optional<Memor
{ {
auto size = reader->available(); auto size = reader->available();
if (!file_segment.reserve(size, reserve_space_lock_wait_timeout_milliseconds)) std::string failure_reason;
if (!file_segment.reserve(size, reserve_space_lock_wait_timeout_milliseconds, failure_reason))
{ {
LOG_TEST( LOG_TEST(
log, "Failed to reserve space during background download " log, "Failed to reserve space during background download "

View File

@ -75,7 +75,8 @@ void WriteBufferToFileSegment::nextImpl()
FileCacheReserveStat reserve_stat; FileCacheReserveStat reserve_stat;
/// In case of an error, we don't need to finalize the file segment /// In case of an error, we don't need to finalize the file segment
/// because it will be deleted soon and completed in the holder's destructor. /// because it will be deleted soon and completed in the holder's destructor.
bool ok = file_segment->reserve(bytes_to_write, reserve_space_lock_wait_timeout_milliseconds, &reserve_stat); std::string failure_reason;
bool ok = file_segment->reserve(bytes_to_write, reserve_space_lock_wait_timeout_milliseconds, failure_reason, &reserve_stat);
if (!ok) if (!ok)
{ {
@ -84,9 +85,10 @@ void WriteBufferToFileSegment::nextImpl()
reserve_stat_msg += fmt::format("{} hold {}, can release {}; ", reserve_stat_msg += fmt::format("{} hold {}, can release {}; ",
toString(kind), ReadableSize(stat.non_releasable_size), ReadableSize(stat.releasable_size)); toString(kind), ReadableSize(stat.non_releasable_size), ReadableSize(stat.releasable_size));
throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve {} bytes for {}: {}(segment info: {})", throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve {} bytes for {}: reason {}, {}(segment info: {})",
bytes_to_write, bytes_to_write,
file_segment->getKind() == FileSegmentKind::Temporary ? "temporary file" : "the file in cache", file_segment->getKind() == FileSegmentKind::Temporary ? "temporary file" : "the file in cache",
failure_reason,
reserve_stat_msg, reserve_stat_msg,
file_segment->getInfoForLog() file_segment->getInfoForLog()
); );

View File

@ -1944,6 +1944,8 @@ BlockIO InterpreterCreateQuery::execute()
FunctionNameNormalizer::visit(query_ptr.get()); FunctionNameNormalizer::visit(query_ptr.get());
auto & create = query_ptr->as<ASTCreateQuery &>(); auto & create = query_ptr->as<ASTCreateQuery &>();
create.if_not_exists |= getContext()->getSettingsRef().create_if_not_exists;
bool is_create_database = create.database && !create.table; bool is_create_database = create.database && !create.table;
if (!create.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext())) if (!create.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext()))
{ {

View File

@ -244,9 +244,6 @@ public:
/// Same as checkTimeLimit but it never throws /// Same as checkTimeLimit but it never throws
[[nodiscard]] bool checkTimeLimitSoft(); [[nodiscard]] bool checkTimeLimitSoft();
/// Use it in case of the query left in background to execute asynchronously
void updateContext(ContextWeakPtr weak_context) { context = std::move(weak_context); }
/// Get the reference for the start of the query. Used to synchronize with other Stopwatches /// Get the reference for the start of the query. Used to synchronize with other Stopwatches
UInt64 getQueryCPUStartTime() { return watch.getStart(); } UInt64 getQueryCPUStartTime() { return watch.getStart(); }
}; };

View File

@ -246,7 +246,8 @@ void download(FileSegment & file_segment)
ASSERT_EQ(file_segment.state(), State::DOWNLOADING); ASSERT_EQ(file_segment.state(), State::DOWNLOADING);
ASSERT_EQ(file_segment.getDownloadedSize(), 0); ASSERT_EQ(file_segment.getDownloadedSize(), 0);
ASSERT_TRUE(file_segment.reserve(file_segment.range().size(), 1000)); std::string failure_reason;
ASSERT_TRUE(file_segment.reserve(file_segment.range().size(), 1000, failure_reason));
download(cache_base_path, file_segment); download(cache_base_path, file_segment);
ASSERT_EQ(file_segment.state(), State::DOWNLOADING); ASSERT_EQ(file_segment.state(), State::DOWNLOADING);
@ -258,7 +259,8 @@ void assertDownloadFails(FileSegment & file_segment)
{ {
ASSERT_EQ(file_segment.getOrSetDownloader(), FileSegment::getCallerId()); ASSERT_EQ(file_segment.getOrSetDownloader(), FileSegment::getCallerId());
ASSERT_EQ(file_segment.getDownloadedSize(), 0); ASSERT_EQ(file_segment.getDownloadedSize(), 0);
ASSERT_FALSE(file_segment.reserve(file_segment.range().size(), 1000)); std::string failure_reason;
ASSERT_FALSE(file_segment.reserve(file_segment.range().size(), 1000, failure_reason));
file_segment.complete(); file_segment.complete();
} }
@ -957,10 +959,11 @@ TEST_F(FileCacheTest, temporaryData)
{ {
ASSERT_EQ(some_data_holder->size(), 5); ASSERT_EQ(some_data_holder->size(), 5);
std::string failure_reason;
for (auto & segment : *some_data_holder) for (auto & segment : *some_data_holder)
{ {
ASSERT_TRUE(segment->getOrSetDownloader() == DB::FileSegment::getCallerId()); ASSERT_TRUE(segment->getOrSetDownloader() == DB::FileSegment::getCallerId());
ASSERT_TRUE(segment->reserve(segment->range().size(), 1000)); ASSERT_TRUE(segment->reserve(segment->range().size(), 1000, failure_reason));
download(*segment); download(*segment);
segment->complete(); segment->complete();
} }

View File

@ -96,7 +96,7 @@ bool ExecutingGraph::addEdges(uint64_t node)
return was_edge_added; return was_edge_added;
} }
bool ExecutingGraph::expandPipeline(std::stack<uint64_t> & stack, uint64_t pid) ExecutingGraph::UpdateNodeStatus ExecutingGraph::expandPipeline(std::stack<uint64_t> & stack, uint64_t pid)
{ {
auto & cur_node = *nodes[pid]; auto & cur_node = *nodes[pid];
Processors new_processors; Processors new_processors;
@ -108,7 +108,7 @@ bool ExecutingGraph::expandPipeline(std::stack<uint64_t> & stack, uint64_t pid)
catch (...) catch (...)
{ {
cur_node.exception = std::current_exception(); cur_node.exception = std::current_exception();
return false; return UpdateNodeStatus::Exception;
} }
{ {
@ -118,7 +118,7 @@ bool ExecutingGraph::expandPipeline(std::stack<uint64_t> & stack, uint64_t pid)
{ {
for (auto & processor : new_processors) for (auto & processor : new_processors)
processor->cancel(); processor->cancel();
return false; return UpdateNodeStatus::Cancelled;
} }
processors->insert(processors->end(), new_processors.begin(), new_processors.end()); processors->insert(processors->end(), new_processors.begin(), new_processors.end());
@ -178,7 +178,7 @@ bool ExecutingGraph::expandPipeline(std::stack<uint64_t> & stack, uint64_t pid)
} }
} }
return true; return UpdateNodeStatus::Done;
} }
void ExecutingGraph::initializeExecution(Queue & queue) void ExecutingGraph::initializeExecution(Queue & queue)
@ -213,7 +213,7 @@ void ExecutingGraph::initializeExecution(Queue & queue)
} }
bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue) ExecutingGraph::UpdateNodeStatus ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue)
{ {
std::stack<Edge *> updated_edges; std::stack<Edge *> updated_edges;
std::stack<uint64_t> updated_processors; std::stack<uint64_t> updated_processors;
@ -309,7 +309,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue
catch (...) catch (...)
{ {
node.exception = std::current_exception(); node.exception = std::current_exception();
return false; return UpdateNodeStatus::Exception;
} }
#ifndef NDEBUG #ifndef NDEBUG
@ -386,8 +386,9 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue
read_lock.unlock(); read_lock.unlock();
{ {
std::unique_lock lock(nodes_mutex); std::unique_lock lock(nodes_mutex);
if (!expandPipeline(updated_processors, pid)) auto status = expandPipeline(updated_processors, pid);
return false; if (status != UpdateNodeStatus::Done)
return status;
} }
read_lock.lock(); read_lock.lock();
@ -397,7 +398,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue
} }
} }
return true; return UpdateNodeStatus::Done;
} }
void ExecutingGraph::cancel(bool cancel_all_processors) void ExecutingGraph::cancel(bool cancel_all_processors)

View File

@ -138,10 +138,17 @@ public:
/// Traverse graph the first time to update all the childless nodes. /// Traverse graph the first time to update all the childless nodes.
void initializeExecution(Queue & queue); void initializeExecution(Queue & queue);
enum class UpdateNodeStatus
{
Done,
Exception,
Cancelled,
};
/// Update processor with pid number (call IProcessor::prepare). /// Update processor with pid number (call IProcessor::prepare).
/// Check parents and children of current processor and push them to stacks if they also need to be updated. /// Check parents and children of current processor and push them to stacks if they also need to be updated.
/// If processor wants to be expanded, lock will be upgraded to get write access to pipeline. /// If processor wants to be expanded, lock will be upgraded to get write access to pipeline.
bool updateNode(uint64_t pid, Queue & queue, Queue & async_queue); UpdateNodeStatus updateNode(uint64_t pid, Queue & queue, Queue & async_queue);
void cancel(bool cancel_all_processors = true); void cancel(bool cancel_all_processors = true);
@ -155,7 +162,7 @@ private:
/// Update graph after processor (pid) returned ExpandPipeline status. /// Update graph after processor (pid) returned ExpandPipeline status.
/// All new nodes and nodes with updated ports are pushed into stack. /// All new nodes and nodes with updated ports are pushed into stack.
bool expandPipeline(std::stack<uint64_t> & stack, uint64_t pid); UpdateNodeStatus expandPipeline(std::stack<uint64_t> & stack, uint64_t pid);
std::shared_ptr<Processors> processors; std::shared_ptr<Processors> processors;
std::vector<bool> source_processors; std::vector<bool> source_processors;

View File

@ -77,9 +77,9 @@ const Processors & PipelineExecutor::getProcessors() const
return graph->getProcessors(); return graph->getProcessors();
} }
void PipelineExecutor::cancel() void PipelineExecutor::cancel(ExecutionStatus reason)
{ {
cancelled = true; tryUpdateExecutionStatus(ExecutionStatus::Executing, reason);
finish(); finish();
graph->cancel(); graph->cancel();
} }
@ -98,6 +98,11 @@ void PipelineExecutor::finish()
tasks.finish(); tasks.finish();
} }
bool PipelineExecutor::tryUpdateExecutionStatus(ExecutionStatus expected, ExecutionStatus desired)
{
return execution_status.compare_exchange_strong(expected, desired);
}
void PipelineExecutor::execute(size_t num_threads, bool concurrency_control) void PipelineExecutor::execute(size_t num_threads, bool concurrency_control)
{ {
checkTimeLimit(); checkTimeLimit();
@ -120,7 +125,7 @@ void PipelineExecutor::execute(size_t num_threads, bool concurrency_control)
} }
catch (...) catch (...)
{ {
span.addAttribute(ExecutionStatus::fromCurrentException()); span.addAttribute(DB::ExecutionStatus::fromCurrentException());
#ifndef NDEBUG #ifndef NDEBUG
LOG_TRACE(log, "Exception while executing query. Current state:\n{}", dumpPipeline()); LOG_TRACE(log, "Exception while executing query. Current state:\n{}", dumpPipeline());
@ -169,7 +174,7 @@ bool PipelineExecutor::checkTimeLimitSoft()
// We call cancel here so that all processors are notified and tasks waken up // We call cancel here so that all processors are notified and tasks waken up
// so that the "break" is faster and doesn't wait for long events // so that the "break" is faster and doesn't wait for long events
if (!continuing) if (!continuing)
cancel(); cancel(ExecutionStatus::CancelledByTimeout);
return continuing; return continuing;
} }
@ -195,7 +200,8 @@ void PipelineExecutor::finalizeExecution()
{ {
checkTimeLimit(); checkTimeLimit();
if (cancelled) auto status = execution_status.load();
if (status == ExecutionStatus::CancelledByTimeout || status == ExecutionStatus::CancelledByUser)
return; return;
bool all_processors_finished = true; bool all_processors_finished = true;
@ -271,7 +277,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie
break; break;
if (!context.executeTask()) if (!context.executeTask())
cancel(); cancel(ExecutionStatus::Exception);
if (tasks.isFinished()) if (tasks.isFinished())
break; break;
@ -289,11 +295,13 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie
Queue async_queue; Queue async_queue;
/// Prepare processor after execution. /// Prepare processor after execution.
if (!graph->updateNode(context.getProcessorID(), queue, async_queue)) auto status = graph->updateNode(context.getProcessorID(), queue, async_queue);
cancel(); if (status == ExecutingGraph::UpdateNodeStatus::Exception)
cancel(ExecutionStatus::Exception);
/// Push other tasks to global queue. /// Push other tasks to global queue.
tasks.pushTasks(queue, async_queue, context); if (status == ExecutingGraph::UpdateNodeStatus::Done)
tasks.pushTasks(queue, async_queue, context);
} }
#ifndef NDEBUG #ifndef NDEBUG
@ -309,7 +317,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie
{ {
/// spawnThreads can throw an exception, for example CANNOT_SCHEDULE_TASK. /// spawnThreads can throw an exception, for example CANNOT_SCHEDULE_TASK.
/// We should cancel execution properly before rethrow. /// We should cancel execution properly before rethrow.
cancel(); cancel(ExecutionStatus::Exception);
throw; throw;
} }
@ -328,6 +336,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie
void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_control) void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_control)
{ {
is_execution_initialized = true; is_execution_initialized = true;
tryUpdateExecutionStatus(ExecutionStatus::NotStarted, ExecutionStatus::Executing);
size_t use_threads = num_threads; size_t use_threads = num_threads;
@ -393,7 +402,7 @@ void PipelineExecutor::executeImpl(size_t num_threads, bool concurrency_control)
{ {
/// If finished_flag is not set, there was an exception. /// If finished_flag is not set, there was an exception.
/// Cancel execution in this case. /// Cancel execution in this case.
cancel(); cancel(ExecutionStatus::Exception);
if (pool) if (pool)
pool->wait(); pool->wait();
} }

View File

@ -48,8 +48,20 @@ public:
const Processors & getProcessors() const; const Processors & getProcessors() const;
enum class ExecutionStatus
{
NotStarted,
Executing,
Finished,
Exception,
CancelledByUser,
CancelledByTimeout,
};
/// Cancel execution. May be called from another thread. /// Cancel execution. May be called from another thread.
void cancel(); void cancel() { cancel(ExecutionStatus::CancelledByUser); }
ExecutionStatus getExecutionStatus() const { return execution_status.load(); }
/// Cancel processors which only read data from source. May be called from another thread. /// Cancel processors which only read data from source. May be called from another thread.
void cancelReading(); void cancelReading();
@ -81,7 +93,7 @@ private:
/// system.opentelemetry_span_log /// system.opentelemetry_span_log
bool trace_processors = false; bool trace_processors = false;
std::atomic_bool cancelled = false; std::atomic<ExecutionStatus> execution_status = ExecutionStatus::NotStarted;
std::atomic_bool cancelled_reading = false; std::atomic_bool cancelled_reading = false;
LoggerPtr log = getLogger("PipelineExecutor"); LoggerPtr log = getLogger("PipelineExecutor");
@ -105,6 +117,10 @@ private:
void executeStepImpl(size_t thread_num, std::atomic_bool * yield_flag = nullptr); void executeStepImpl(size_t thread_num, std::atomic_bool * yield_flag = nullptr);
void executeSingleThread(size_t thread_num); void executeSingleThread(size_t thread_num);
void finish(); void finish();
void cancel(ExecutionStatus reason);
/// If execution_status == from, change it to desired.
bool tryUpdateExecutionStatus(ExecutionStatus expected, ExecutionStatus desired);
String dumpPipeline() const; String dumpPipeline() const;
}; };

View File

@ -15,6 +15,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int QUERY_WAS_CANCELLED;
} }
class PushingAsyncSource : public ISource class PushingAsyncSource : public ISource
@ -176,6 +177,16 @@ void PushingAsyncPipelineExecutor::start()
data->thread = ThreadFromGlobalPool(std::move(func)); data->thread = ThreadFromGlobalPool(std::move(func));
} }
[[noreturn]] static void throwOnExecutionStatus(PipelineExecutor::ExecutionStatus status)
{
if (status == PipelineExecutor::ExecutionStatus::CancelledByTimeout
|| status == PipelineExecutor::ExecutionStatus::CancelledByUser)
throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, "Query was cancelled");
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Pipeline for PushingPipelineExecutor was finished before all data was inserted");
}
void PushingAsyncPipelineExecutor::push(Chunk chunk) void PushingAsyncPipelineExecutor::push(Chunk chunk)
{ {
if (!started) if (!started)
@ -185,8 +196,7 @@ void PushingAsyncPipelineExecutor::push(Chunk chunk)
data->rethrowExceptionIfHas(); data->rethrowExceptionIfHas();
if (!is_pushed) if (!is_pushed)
throw Exception(ErrorCodes::LOGICAL_ERROR, throwOnExecutionStatus(data->executor->getExecutionStatus());
"Pipeline for PushingAsyncPipelineExecutor was finished before all data was inserted");
} }
void PushingAsyncPipelineExecutor::push(Block block) void PushingAsyncPipelineExecutor::push(Block block)

View File

@ -11,6 +11,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int QUERY_WAS_CANCELLED;
} }
class PushingSource : public ISource class PushingSource : public ISource
@ -80,6 +81,15 @@ const Block & PushingPipelineExecutor::getHeader() const
return pushing_source->getPort().getHeader(); return pushing_source->getPort().getHeader();
} }
[[noreturn]] static void throwOnExecutionStatus(PipelineExecutor::ExecutionStatus status)
{
if (status == PipelineExecutor::ExecutionStatus::CancelledByTimeout
|| status == PipelineExecutor::ExecutionStatus::CancelledByUser)
throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, "Query was cancelled");
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Pipeline for PushingPipelineExecutor was finished before all data was inserted");
}
void PushingPipelineExecutor::start() void PushingPipelineExecutor::start()
{ {
@ -91,8 +101,7 @@ void PushingPipelineExecutor::start()
executor->setReadProgressCallback(pipeline.getReadProgressCallback()); executor->setReadProgressCallback(pipeline.getReadProgressCallback());
if (!executor->executeStep(&input_wait_flag)) if (!executor->executeStep(&input_wait_flag))
throw Exception(ErrorCodes::LOGICAL_ERROR, throwOnExecutionStatus(executor->getExecutionStatus());
"Pipeline for PushingPipelineExecutor was finished before all data was inserted");
} }
void PushingPipelineExecutor::push(Chunk chunk) void PushingPipelineExecutor::push(Chunk chunk)
@ -103,8 +112,7 @@ void PushingPipelineExecutor::push(Chunk chunk)
pushing_source->setData(std::move(chunk)); pushing_source->setData(std::move(chunk));
if (!executor->executeStep(&input_wait_flag)) if (!executor->executeStep(&input_wait_flag))
throw Exception(ErrorCodes::LOGICAL_ERROR, throwOnExecutionStatus(executor->getExecutionStatus());
"Pipeline for PushingPipelineExecutor was finished before all data was inserted");
} }
void PushingPipelineExecutor::push(Block block) void PushingPipelineExecutor::push(Block block)

View File

@ -386,10 +386,11 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(RangesInDataParts parts_wit
/// We have a special logic for local replica. It has to read less data, because in some cases it should /// We have a special logic for local replica. It has to read less data, because in some cases it should
/// merge states of aggregate functions or do some other important stuff other than reading from Disk. /// merge states of aggregate functions or do some other important stuff other than reading from Disk.
auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier;
if (pool_settings.min_marks_for_concurrent_read > std::numeric_limits<Int64>::max()) const auto min_marks_for_concurrent_read_limit = std::numeric_limits<Int64>::max() >> 1;
if (pool_settings.min_marks_for_concurrent_read > min_marks_for_concurrent_read_limit)
{ {
/// limit min marks to read in case it's big, happened in test since due to settings randomzation /// limit min marks to read in case it's big, happened in test since due to settings randomzation
pool_settings.min_marks_for_concurrent_read = std::numeric_limits<Int64>::max(); pool_settings.min_marks_for_concurrent_read = min_marks_for_concurrent_read_limit;
multiplier = 1.0f; multiplier = 1.0f;
} }
@ -562,10 +563,11 @@ Pipe ReadFromMergeTree::readInOrder(
}; };
auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier;
if (pool_settings.min_marks_for_concurrent_read > std::numeric_limits<Int64>::max()) const auto min_marks_for_concurrent_read_limit = std::numeric_limits<Int64>::max() >> 1;
if (pool_settings.min_marks_for_concurrent_read > min_marks_for_concurrent_read_limit)
{ {
/// limit min marks to read in case it's big, happened in test since due to settings randomzation /// limit min marks to read in case it's big, happened in test since due to settings randomzation
pool_settings.min_marks_for_concurrent_read = std::numeric_limits<Int64>::max(); pool_settings.min_marks_for_concurrent_read = min_marks_for_concurrent_read_limit;
multiplier = 1.0f; multiplier = 1.0f;
} }

View File

@ -105,13 +105,16 @@ size_t MergeTreeIndexGranularity::countMarksForRows(size_t from_mark, size_t num
/// See comment in IMergeTreeSelectAlgorithm. /// See comment in IMergeTreeSelectAlgorithm.
if (min_marks_to_read) if (min_marks_to_read)
{ {
// check that ... // check overflow
bool overflow = ((1ULL << 63) & min_marks_to_read); // further multiplication by 2 will not overflow size_t min_marks_to_read_2 = 0;
if (!overflow) bool overflow = common::mulOverflow(min_marks_to_read, 2, min_marks_to_read_2);
overflow = (std::numeric_limits<size_t>::max() - from_mark) < 2 * min_marks_to_read; // further addition will not overflow
if (!overflow && from_mark + 2 * min_marks_to_read <= to_mark) size_t to_mark_overwrite = 0;
to_mark = from_mark + min_marks_to_read; if (!overflow)
overflow = common::addOverflow(from_mark, min_marks_to_read_2, to_mark_overwrite);
if (!overflow && to_mark_overwrite < to_mark)
to_mark = to_mark_overwrite;
} }
return getRowsCountInRange(from_mark, std::max(1UL, to_mark)) - offset_in_rows; return getRowsCountInRange(from_mark, std::max(1UL, to_mark)) - offset_in_rows;

View File

@ -895,7 +895,7 @@ void StorageMergeTree::loadDeduplicationLog()
std::string path = fs::path(relative_data_path) / "deduplication_logs"; std::string path = fs::path(relative_data_path) / "deduplication_logs";
/// If either there is already a deduplication log, or we will be able to use it. /// If either there is already a deduplication log, or we will be able to use it.
if (disk->exists(path) || !disk->isReadOnly()) if (!disk->isReadOnly() || disk->exists(path))
{ {
deduplication_log = std::make_unique<MergeTreeDeduplicationLog>(path, settings->non_replicated_deduplication_window, format_version, disk); deduplication_log = std::make_unique<MergeTreeDeduplicationLog>(path, settings->non_replicated_deduplication_window, format_version, disk);
deduplication_log->load(); deduplication_log->load();

View File

@ -570,6 +570,8 @@ class ClickHouseCluster:
self.spark_session = None self.spark_session = None
self.with_azurite = False self.with_azurite = False
self.azurite_container = "azurite-container"
self.blob_service_client = None
self._azurite_port = 0 self._azurite_port = 0
# available when with_hdfs == True # available when with_hdfs == True
@ -2692,6 +2694,32 @@ class ClickHouseCluster:
connection_string connection_string
) )
logging.debug(blob_service_client.get_account_information()) logging.debug(blob_service_client.get_account_information())
containers = [
c
for c in blob_service_client.list_containers(
name_starts_with=self.azurite_container
)
if c.name == self.azurite_container
]
if len(containers) > 0:
for c in containers:
blob_service_client.delete_container(c)
container_client = blob_service_client.get_container_client(
self.azurite_container
)
if container_client.exists():
logging.debug(
f"azurite container '{self.azurite_container}' exist, deleting all blobs"
)
for b in container_client.list_blobs():
container_client.delete_blob(b.name)
else:
logging.debug(
f"azurite container '{self.azurite_container}' doesn't exist, creating it"
)
container_client.create_container()
self.blob_service_client = blob_service_client self.blob_service_client = blob_service_client
return return
except Exception as ex: except Exception as ex:

View File

@ -202,6 +202,10 @@ def test_create_table():
f"S3Queue('http://minio1:9001/root/data/', 'CSV', 'gzip') settings mode = 'ordered'", f"S3Queue('http://minio1:9001/root/data/', 'CSV', 'gzip') settings mode = 'ordered'",
f"S3Queue('http://minio1:9001/root/data/', 'minio', '{password}', 'CSV') settings mode = 'ordered'", f"S3Queue('http://minio1:9001/root/data/', 'minio', '{password}', 'CSV') settings mode = 'ordered'",
f"S3Queue('http://minio1:9001/root/data/', 'minio', '{password}', 'CSV', 'gzip') settings mode = 'ordered'", f"S3Queue('http://minio1:9001/root/data/', 'minio', '{password}', 'CSV', 'gzip') settings mode = 'ordered'",
(
f"Iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
"DNS_ERROR",
),
] ]
def make_test_case(i): def make_test_case(i):
@ -266,6 +270,7 @@ def test_create_table():
# due to sensitive data substituion the query will be normalized, so not "settings" but "SETTINGS" # due to sensitive data substituion the query will be normalized, so not "settings" but "SETTINGS"
"CREATE TABLE table19 (`x` int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'minio', '[HIDDEN]', 'CSV') SETTINGS mode = 'ordered'", "CREATE TABLE table19 (`x` int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'minio', '[HIDDEN]', 'CSV') SETTINGS mode = 'ordered'",
"CREATE TABLE table20 (`x` int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'minio', '[HIDDEN]', 'CSV', 'gzip') SETTINGS mode = 'ordered'", "CREATE TABLE table20 (`x` int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'minio', '[HIDDEN]', 'CSV', 'gzip') SETTINGS mode = 'ordered'",
"CREATE TABLE table21 (`x` int) ENGINE = Iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')",
], ],
must_not_contain=[password], must_not_contain=[password],
) )
@ -387,6 +392,7 @@ def test_table_functions():
f"azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_15.csv', '{azure_account_name}', '{azure_account_key}', 'CSV', 'none', 'auto')", f"azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_15.csv', '{azure_account_name}', '{azure_account_key}', 'CSV', 'none', 'auto')",
f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')", f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')",
f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '{azure_account_key}')", f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '{azure_account_key}')",
f"iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')",
] ]
def make_test_case(i): def make_test_case(i):
@ -478,6 +484,7 @@ def test_table_functions():
f"CREATE TABLE tablefunc48 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_15.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none', 'auto')", f"CREATE TABLE tablefunc48 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_15.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none', 'auto')",
f"CREATE TABLE tablefunc49 (x int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')", f"CREATE TABLE tablefunc49 (x int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')",
f"CREATE TABLE tablefunc50 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '[HIDDEN]')", f"CREATE TABLE tablefunc50 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '[HIDDEN]')",
"CREATE TABLE tablefunc51 (`x` int) AS iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')",
], ],
must_not_contain=[password], must_not_contain=[password],
) )

View File

@ -51,9 +51,9 @@ create_table_sql_nullable_template = """
""" """
def skip_test_msan(instance): def skip_test_sanitizers(instance):
if instance.is_built_with_memory_sanitizer(): if instance.is_built_with_sanitizer():
pytest.skip("Memory Sanitizer cannot work with third-party shared libraries") pytest.skip("Sanitizers cannot work with third-party shared libraries")
def get_mysql_conn(): def get_mysql_conn():
@ -208,7 +208,7 @@ def started_cluster():
def test_mysql_odbc_select_nullable(started_cluster): def test_mysql_odbc_select_nullable(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
mysql_setup = node1.odbc_drivers["MySQL"] mysql_setup = node1.odbc_drivers["MySQL"]
table_name = "test_insert_nullable_select" table_name = "test_insert_nullable_select"
@ -248,7 +248,7 @@ def test_mysql_odbc_select_nullable(started_cluster):
def test_mysql_simple_select_works(started_cluster): def test_mysql_simple_select_works(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
mysql_setup = node1.odbc_drivers["MySQL"] mysql_setup = node1.odbc_drivers["MySQL"]
@ -331,7 +331,7 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, column_x Nulla
def test_mysql_insert(started_cluster): def test_mysql_insert(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
mysql_setup = node1.odbc_drivers["MySQL"] mysql_setup = node1.odbc_drivers["MySQL"]
table_name = "test_insert" table_name = "test_insert"
@ -374,7 +374,7 @@ def test_mysql_insert(started_cluster):
def test_sqlite_simple_select_function_works(started_cluster): def test_sqlite_simple_select_function_works(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
sqlite_setup = node1.odbc_drivers["SQLite3"] sqlite_setup = node1.odbc_drivers["SQLite3"]
sqlite_db = sqlite_setup["Database"] sqlite_db = sqlite_setup["Database"]
@ -438,7 +438,7 @@ def test_sqlite_simple_select_function_works(started_cluster):
def test_sqlite_table_function(started_cluster): def test_sqlite_table_function(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
sqlite_setup = node1.odbc_drivers["SQLite3"] sqlite_setup = node1.odbc_drivers["SQLite3"]
sqlite_db = sqlite_setup["Database"] sqlite_db = sqlite_setup["Database"]
@ -470,7 +470,7 @@ def test_sqlite_table_function(started_cluster):
def test_sqlite_simple_select_storage_works(started_cluster): def test_sqlite_simple_select_storage_works(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
sqlite_setup = node1.odbc_drivers["SQLite3"] sqlite_setup = node1.odbc_drivers["SQLite3"]
sqlite_db = sqlite_setup["Database"] sqlite_db = sqlite_setup["Database"]
@ -503,7 +503,7 @@ def test_sqlite_simple_select_storage_works(started_cluster):
def test_sqlite_odbc_hashed_dictionary(started_cluster): def test_sqlite_odbc_hashed_dictionary(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
sqlite_db = node1.odbc_drivers["SQLite3"]["Database"] sqlite_db = node1.odbc_drivers["SQLite3"]["Database"]
node1.exec_in_container( node1.exec_in_container(
@ -586,7 +586,7 @@ def test_sqlite_odbc_hashed_dictionary(started_cluster):
def test_sqlite_odbc_cached_dictionary(started_cluster): def test_sqlite_odbc_cached_dictionary(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
sqlite_db = node1.odbc_drivers["SQLite3"]["Database"] sqlite_db = node1.odbc_drivers["SQLite3"]["Database"]
node1.exec_in_container( node1.exec_in_container(
@ -635,7 +635,7 @@ def test_sqlite_odbc_cached_dictionary(started_cluster):
def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster): def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
try: try:
conn = get_postgres_conn(started_cluster) conn = get_postgres_conn(started_cluster)
@ -663,7 +663,7 @@ def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster):
def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster): def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
try: try:
conn = get_postgres_conn(started_cluster) conn = get_postgres_conn(started_cluster)
@ -685,7 +685,7 @@ def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster):
def test_no_connection_pooling(started_cluster): def test_no_connection_pooling(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
try: try:
conn = get_postgres_conn(started_cluster) conn = get_postgres_conn(started_cluster)
@ -717,7 +717,7 @@ def test_no_connection_pooling(started_cluster):
def test_postgres_insert(started_cluster): def test_postgres_insert(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
conn = get_postgres_conn(started_cluster) conn = get_postgres_conn(started_cluster)
@ -754,7 +754,7 @@ def test_postgres_insert(started_cluster):
def test_odbc_postgres_date_data_type(started_cluster): def test_odbc_postgres_date_data_type(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
try: try:
conn = get_postgres_conn(started_cluster) conn = get_postgres_conn(started_cluster)
@ -783,7 +783,7 @@ def test_odbc_postgres_date_data_type(started_cluster):
def test_odbc_postgres_conversions(started_cluster): def test_odbc_postgres_conversions(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
try: try:
conn = get_postgres_conn(started_cluster) conn = get_postgres_conn(started_cluster)
@ -841,7 +841,7 @@ def test_odbc_postgres_conversions(started_cluster):
def test_odbc_cyrillic_with_varchar(started_cluster): def test_odbc_cyrillic_with_varchar(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
conn = get_postgres_conn(started_cluster) conn = get_postgres_conn(started_cluster)
cursor = conn.cursor() cursor = conn.cursor()
@ -868,7 +868,7 @@ def test_odbc_cyrillic_with_varchar(started_cluster):
def test_many_connections(started_cluster): def test_many_connections(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
conn = get_postgres_conn(started_cluster) conn = get_postgres_conn(started_cluster)
cursor = conn.cursor() cursor = conn.cursor()
@ -894,7 +894,7 @@ def test_many_connections(started_cluster):
def test_concurrent_queries(started_cluster): def test_concurrent_queries(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
conn = get_postgres_conn(started_cluster) conn = get_postgres_conn(started_cluster)
cursor = conn.cursor() cursor = conn.cursor()
@ -948,7 +948,7 @@ def test_concurrent_queries(started_cluster):
def test_odbc_long_column_names(started_cluster): def test_odbc_long_column_names(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
conn = get_postgres_conn(started_cluster) conn = get_postgres_conn(started_cluster)
cursor = conn.cursor() cursor = conn.cursor()
@ -986,7 +986,7 @@ def test_odbc_long_column_names(started_cluster):
def test_odbc_long_text(started_cluster): def test_odbc_long_text(started_cluster):
skip_test_msan(node1) skip_test_sanitizers(node1)
conn = get_postgres_conn(started_cluster) conn = get_postgres_conn(started_cluster)
cursor = conn.cursor() cursor = conn.cursor()

View File

@ -56,7 +56,7 @@ def test_credentials_from_metadata():
) )
expected_logs = [ expected_logs = [
"Calling EC2MetadataService to get token failed, falling back to less secure way", "Calling EC2MetadataService to get token failed, falling back to a less secure way",
"Getting default credentials for ec2 instance from resolver:8080", "Getting default credentials for ec2 instance from resolver:8080",
"Calling EC2MetadataService resource, /latest/meta-data/iam/security-credentials returned credential string myrole", "Calling EC2MetadataService resource, /latest/meta-data/iam/security-credentials returned credential string myrole",
"Calling EC2MetadataService resource /latest/meta-data/iam/security-credentials/myrole", "Calling EC2MetadataService resource /latest/meta-data/iam/security-credentials/myrole",

View File

@ -1,6 +1,7 @@
import io import io
import logging import logging
import random import random
import string
import time import time
import pytest import pytest
@ -13,7 +14,6 @@ from uuid import uuid4
AVAILABLE_MODES = ["unordered", "ordered"] AVAILABLE_MODES = ["unordered", "ordered"]
DEFAULT_AUTH = ["'minio'", "'minio123'"] DEFAULT_AUTH = ["'minio'", "'minio123'"]
NO_AUTH = ["NOSIGN"] NO_AUTH = ["NOSIGN"]
AZURE_CONTAINER_NAME = "cont"
def prepare_public_s3_bucket(started_cluster): def prepare_public_s3_bucket(started_cluster):
@ -68,13 +68,24 @@ def s3_queue_setup_teardown(started_cluster):
instance = started_cluster.instances["instance"] instance = started_cluster.instances["instance"]
instance_2 = started_cluster.instances["instance2"] instance_2 = started_cluster.instances["instance2"]
instance.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") instance.query("DROP DATABASE IF EXISTS default; CREATE DATABASE default;")
instance_2.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") instance_2.query("DROP DATABASE IF EXISTS default; CREATE DATABASE default;")
minio = started_cluster.minio_client minio = started_cluster.minio_client
objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True)) objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True))
for obj in objects: for obj in objects:
minio.remove_object(started_cluster.minio_bucket, obj.object_name) minio.remove_object(started_cluster.minio_bucket, obj.object_name)
container_client = started_cluster.blob_service_client.get_container_client(
started_cluster.azurite_container
)
if container_client.exists():
blob_names = [b.name for b in container_client.list_blobs()]
logging.debug(f"Deleting blobs: {blob_names}")
for b in blob_names:
container_client.delete_blob(b)
yield # run test yield # run test
@ -129,11 +140,6 @@ def started_cluster():
cluster.start() cluster.start()
logging.info("Cluster started") logging.info("Cluster started")
container_client = cluster.blob_service_client.get_container_client(
AZURE_CONTAINER_NAME
)
container_client.create_container()
yield cluster yield cluster
finally: finally:
cluster.shutdown() cluster.shutdown()
@ -190,7 +196,7 @@ def put_s3_file_content(started_cluster, filename, data, bucket=None):
def put_azure_file_content(started_cluster, filename, data, bucket=None): def put_azure_file_content(started_cluster, filename, data, bucket=None):
client = started_cluster.blob_service_client.get_blob_client( client = started_cluster.blob_service_client.get_blob_client(
AZURE_CONTAINER_NAME, filename started_cluster.azurite_container, filename
) )
buf = io.BytesIO(data) buf = io.BytesIO(data)
client.upload_blob(buf, "BlockBlob", len(data)) client.upload_blob(buf, "BlockBlob", len(data))
@ -226,7 +232,7 @@ def create_table(
url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{files_path}/" url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{files_path}/"
engine_def = f"{engine_name}('{url}', {auth_params}, {file_format})" engine_def = f"{engine_name}('{url}', {auth_params}, {file_format})"
else: else:
engine_def = f"{engine_name}('{started_cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', '{files_path}/', 'CSV')" engine_def = f"{engine_name}('{started_cluster.env_variables['AZURITE_CONNECTION_STRING']}', '{started_cluster.azurite_container}', '{files_path}/', 'CSV')"
node.query(f"DROP TABLE IF EXISTS {table_name}") node.query(f"DROP TABLE IF EXISTS {table_name}")
create_query = f""" create_query = f"""
@ -262,15 +268,21 @@ def create_mv(
) )
def generate_random_string(length=6):
return "".join(random.choice(string.ascii_lowercase) for i in range(length))
@pytest.mark.parametrize("mode", ["unordered", "ordered"]) @pytest.mark.parametrize("mode", ["unordered", "ordered"])
@pytest.mark.parametrize("engine_name", ["S3Queue", "AzureQueue"]) @pytest.mark.parametrize("engine_name", ["S3Queue", "AzureQueue"])
def test_delete_after_processing(started_cluster, mode, engine_name): def test_delete_after_processing(started_cluster, mode, engine_name):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = f"test.delete_after_processing_{mode}_{engine_name}" table_name = f"delete_after_processing_{mode}_{engine_name}"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
files_num = 5 files_num = 5
row_num = 10 row_num = 10
# A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
if engine_name == "S3Queue": if engine_name == "S3Queue":
storage = "s3" storage = "s3"
else: else:
@ -285,7 +297,7 @@ def test_delete_after_processing(started_cluster, mode, engine_name):
table_name, table_name,
mode, mode,
files_path, files_path,
additional_settings={"after_processing": "delete"}, additional_settings={"after_processing": "delete", "keeper_path": keeper_path},
engine_name=engine_name, engine_name=engine_name,
) )
create_mv(node, table_name, dst_table_name) create_mv(node, table_name, dst_table_name)
@ -313,7 +325,7 @@ def test_delete_after_processing(started_cluster, mode, engine_name):
assert len(objects) == 0 assert len(objects) == 0
else: else:
client = started_cluster.blob_service_client.get_container_client( client = started_cluster.blob_service_client.get_container_client(
AZURE_CONTAINER_NAME started_cluster.azurite_container
) )
objects_iterator = client.list_blobs(files_path) objects_iterator = client.list_blobs(files_path)
for objects in objects_iterator: for objects in objects_iterator:
@ -324,11 +336,12 @@ def test_delete_after_processing(started_cluster, mode, engine_name):
@pytest.mark.parametrize("engine_name", ["S3Queue", "AzureQueue"]) @pytest.mark.parametrize("engine_name", ["S3Queue", "AzureQueue"])
def test_failed_retry(started_cluster, mode, engine_name): def test_failed_retry(started_cluster, mode, engine_name):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = f"test.failed_retry_{mode}_{engine_name}" table_name = f"failed_retry_{mode}_{engine_name}"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
file_path = f"{files_path}/trash_test.csv" file_path = f"{files_path}/trash_test.csv"
keeper_path = f"/clickhouse/test_{table_name}" # A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
retries_num = 3 retries_num = 3
values = [ values = [
@ -385,8 +398,9 @@ def test_failed_retry(started_cluster, mode, engine_name):
@pytest.mark.parametrize("mode", AVAILABLE_MODES) @pytest.mark.parametrize("mode", AVAILABLE_MODES)
def test_direct_select_file(started_cluster, mode): def test_direct_select_file(started_cluster, mode):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = f"test.direct_select_file_{mode}" table_name = f"direct_select_file_{mode}"
keeper_path = f"/clickhouse/test_{table_name}" # A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}_{mode}_{generate_random_string()}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
file_path = f"{files_path}/test.csv" file_path = f"{files_path}/test.csv"
@ -447,7 +461,7 @@ def test_direct_select_file(started_cluster, mode):
] == [] ] == []
# New table with different zookeeper path # New table with different zookeeper path
keeper_path = f"/clickhouse/test_{table_name}_{mode}_2" keeper_path = f"{keeper_path}_2"
create_table( create_table(
started_cluster, started_cluster,
node, node,
@ -491,8 +505,17 @@ def test_direct_select_multiple_files(started_cluster, mode):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = f"direct_select_multiple_files_{mode}" table_name = f"direct_select_multiple_files_{mode}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
# A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
create_table(started_cluster, node, table_name, mode, files_path) create_table(
started_cluster,
node,
table_name,
mode,
files_path,
additional_settings={"keeper_path": keeper_path},
)
for i in range(5): for i in range(5):
rand_values = [[random.randint(0, 50) for _ in range(3)] for _ in range(10)] rand_values = [[random.randint(0, 50) for _ in range(3)] for _ in range(10)]
values_csv = ( values_csv = (
@ -515,14 +538,23 @@ def test_direct_select_multiple_files(started_cluster, mode):
@pytest.mark.parametrize("mode", AVAILABLE_MODES) @pytest.mark.parametrize("mode", AVAILABLE_MODES)
def test_streaming_to_view_(started_cluster, mode): def test_streaming_to_view(started_cluster, mode):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = f"streaming_to_view_{mode}" table_name = f"streaming_to_view_{mode}"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
# A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
total_values = generate_random_files(started_cluster, files_path, 10) total_values = generate_random_files(started_cluster, files_path, 10)
create_table(started_cluster, node, table_name, mode, files_path) create_table(
started_cluster,
node,
table_name,
mode,
files_path,
additional_settings={"keeper_path": keeper_path},
)
create_mv(node, table_name, dst_table_name) create_mv(node, table_name, dst_table_name)
expected_values = set([tuple(i) for i in total_values]) expected_values = set([tuple(i) for i in total_values])
@ -544,7 +576,8 @@ def test_streaming_to_many_views(started_cluster, mode):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = f"streaming_to_many_views_{mode}" table_name = f"streaming_to_many_views_{mode}"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
keeper_path = f"/clickhouse/test_{table_name}" # A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
for i in range(3): for i in range(3):
@ -582,7 +615,8 @@ def test_streaming_to_many_views(started_cluster, mode):
def test_multiple_tables_meta_mismatch(started_cluster): def test_multiple_tables_meta_mismatch(started_cluster):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = f"multiple_tables_meta_mismatch" table_name = f"multiple_tables_meta_mismatch"
keeper_path = f"/clickhouse/test_{table_name}" # A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
create_table( create_table(
@ -675,7 +709,8 @@ def test_multiple_tables_streaming_sync(started_cluster, mode):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = f"multiple_tables_streaming_sync_{mode}" table_name = f"multiple_tables_streaming_sync_{mode}"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
keeper_path = f"/clickhouse/test_{table_name}" # A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
files_to_generate = 300 files_to_generate = 300
@ -756,7 +791,10 @@ def test_multiple_tables_streaming_sync(started_cluster, mode):
def test_multiple_tables_streaming_sync_distributed(started_cluster, mode): def test_multiple_tables_streaming_sync_distributed(started_cluster, mode):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
node_2 = started_cluster.instances["instance2"] node_2 = started_cluster.instances["instance2"]
table_name = f"multiple_tables_streaming_sync_distributed_{mode}" # A unique table name is necessary for repeatable tests
table_name = (
f"multiple_tables_streaming_sync_distributed_{mode}_{generate_random_string()}"
)
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
keeper_path = f"/clickhouse/test_{table_name}" keeper_path = f"/clickhouse/test_{table_name}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
@ -833,7 +871,8 @@ def test_max_set_age(started_cluster):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = "max_set_age" table_name = "max_set_age"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
keeper_path = f"/clickhouse/test_{table_name}" # A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
max_age = 20 max_age = 20
files_to_generate = 10 files_to_generate = 10
@ -944,10 +983,9 @@ def test_max_set_age(started_cluster):
def test_max_set_size(started_cluster): def test_max_set_size(started_cluster):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = f"max_set_size" table_name = f"max_set_size"
dst_table_name = f"{table_name}_dst" # A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}" keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
max_age = 10
files_to_generate = 10 files_to_generate = 10
create_table( create_table(
@ -991,7 +1029,8 @@ def test_drop_table(started_cluster):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = f"test_drop" table_name = f"test_drop"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
keeper_path = f"/clickhouse/test_{table_name}" # A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
files_to_generate = 300 files_to_generate = 300
@ -1021,9 +1060,11 @@ def test_drop_table(started_cluster):
def test_s3_client_reused(started_cluster): def test_s3_client_reused(started_cluster):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = f"test.test_s3_client_reused" table_name = f"test_s3_client_reused"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
# A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
row_num = 10 row_num = 10
def get_created_s3_clients_count(): def get_created_s3_clients_count():
@ -1057,6 +1098,7 @@ def test_s3_client_reused(started_cluster):
additional_settings={ additional_settings={
"after_processing": "delete", "after_processing": "delete",
"s3queue_processing_threads_num": 1, "s3queue_processing_threads_num": 1,
"keeper_path": keeper_path,
}, },
auth=NO_AUTH, auth=NO_AUTH,
bucket=started_cluster.minio_public_bucket, bucket=started_cluster.minio_public_bucket,
@ -1114,7 +1156,8 @@ def test_processing_threads(started_cluster, mode):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = f"processing_threads_{mode}" table_name = f"processing_threads_{mode}"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
keeper_path = f"/clickhouse/test_{table_name}" # A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
files_to_generate = 300 files_to_generate = 300
processing_threads = 32 processing_threads = 32
@ -1181,7 +1224,8 @@ def test_shards(started_cluster, mode, processing_threads):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = f"test_shards_{mode}_{processing_threads}" table_name = f"test_shards_{mode}_{processing_threads}"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
keeper_path = f"/clickhouse/test_{table_name}" # A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
files_to_generate = 300 files_to_generate = 300
shards_num = 3 shards_num = 3
@ -1300,7 +1344,7 @@ where zookeeper_path ilike '%{table_name}%' and status = 'Processed' and rows_pr
pytest.param("unordered", 1), pytest.param("unordered", 1),
pytest.param("unordered", 8), pytest.param("unordered", 8),
pytest.param("ordered", 1), pytest.param("ordered", 1),
pytest.param("ordered", 8), pytest.param("ordered", 2),
], ],
) )
def test_shards_distributed(started_cluster, mode, processing_threads): def test_shards_distributed(started_cluster, mode, processing_threads):
@ -1308,10 +1352,11 @@ def test_shards_distributed(started_cluster, mode, processing_threads):
node_2 = started_cluster.instances["instance2"] node_2 = started_cluster.instances["instance2"]
table_name = f"test_shards_distributed_{mode}_{processing_threads}" table_name = f"test_shards_distributed_{mode}_{processing_threads}"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
keeper_path = f"/clickhouse/test_{table_name}" # A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
files_to_generate = 300 files_to_generate = 300
row_num = 50 row_num = 300
total_rows = row_num * files_to_generate total_rows = row_num * files_to_generate
shards_num = 2 shards_num = 2
@ -1461,8 +1506,8 @@ def test_settings_check(started_cluster):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
node_2 = started_cluster.instances["instance2"] node_2 = started_cluster.instances["instance2"]
table_name = f"test_settings_check" table_name = f"test_settings_check"
dst_table_name = f"{table_name}_dst" # A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}" keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
mode = "ordered" mode = "ordered"
@ -1504,7 +1549,10 @@ def test_processed_file_setting(started_cluster, processing_threads):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = f"test_processed_file_setting_{processing_threads}" table_name = f"test_processed_file_setting_{processing_threads}"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
keeper_path = f"/clickhouse/test_{table_name}_{processing_threads}" # A unique path is necessary for repeatable tests
keeper_path = (
f"/clickhouse/test_{table_name}_{processing_threads}_{generate_random_string()}"
)
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
files_to_generate = 10 files_to_generate = 10
@ -1555,7 +1603,10 @@ def test_processed_file_setting_distributed(started_cluster, processing_threads)
node_2 = started_cluster.instances["instance2"] node_2 = started_cluster.instances["instance2"]
table_name = f"test_processed_file_setting_distributed_{processing_threads}" table_name = f"test_processed_file_setting_distributed_{processing_threads}"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
keeper_path = f"/clickhouse/test_{table_name}" # A unique path is necessary for repeatable tests
keeper_path = (
f"/clickhouse/test_{table_name}_{processing_threads}_{generate_random_string()}"
)
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
files_to_generate = 10 files_to_generate = 10
@ -1609,7 +1660,8 @@ def test_upgrade(started_cluster):
table_name = f"test_upgrade" table_name = f"test_upgrade"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
keeper_path = f"/clickhouse/test_{table_name}" # A unique path is necessary for repeatable tests
keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
files_to_generate = 10 files_to_generate = 10
@ -1648,7 +1700,8 @@ def test_upgrade(started_cluster):
def test_exception_during_insert(started_cluster): def test_exception_during_insert(started_cluster):
node = started_cluster.instances["instance_too_many_parts"] node = started_cluster.instances["instance_too_many_parts"]
table_name = f"test_exception_during_insert" # A unique table name is necessary for repeatable tests
table_name = f"test_exception_during_insert_{generate_random_string()}"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
keeper_path = f"/clickhouse/test_{table_name}" keeper_path = f"/clickhouse/test_{table_name}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
@ -1664,6 +1717,7 @@ def test_exception_during_insert(started_cluster):
"keeper_path": keeper_path, "keeper_path": keeper_path,
}, },
) )
node.rotate_logs()
total_values = generate_random_files( total_values = generate_random_files(
started_cluster, files_path, files_to_generate, start_ind=0, row_num=1 started_cluster, files_path, files_to_generate, start_ind=0, row_num=1
) )
@ -1680,33 +1734,49 @@ def test_exception_during_insert(started_cluster):
) )
assert "Too many parts" in exception assert "Too many parts" in exception
original_parts_to_throw_insert = 0
modified_parts_to_throw_insert = 10
node.replace_in_config( node.replace_in_config(
"/etc/clickhouse-server/config.d/merge_tree.xml", "/etc/clickhouse-server/config.d/merge_tree.xml",
"parts_to_throw_insert>0", f"parts_to_throw_insert>{original_parts_to_throw_insert}",
"parts_to_throw_insert>10", f"parts_to_throw_insert>{modified_parts_to_throw_insert}",
) )
node.restart_clickhouse() try:
node.restart_clickhouse()
def get_count(): def get_count():
return int(node.query(f"SELECT count() FROM {dst_table_name}")) return int(node.query(f"SELECT count() FROM {dst_table_name}"))
expected_rows = 10 expected_rows = 10
for _ in range(20): for _ in range(20):
if expected_rows == get_count(): if expected_rows == get_count():
break break
time.sleep(1) time.sleep(1)
assert expected_rows == get_count() assert expected_rows == get_count()
finally:
node.replace_in_config(
"/etc/clickhouse-server/config.d/merge_tree.xml",
f"parts_to_throw_insert>{modified_parts_to_throw_insert}",
f"parts_to_throw_insert>{original_parts_to_throw_insert}",
)
node.restart_clickhouse()
def test_commit_on_limit(started_cluster): def test_commit_on_limit(started_cluster):
node = started_cluster.instances["instance"] node = started_cluster.instances["instance"]
table_name = f"test_commit_on_limit" # A unique table name is necessary for repeatable tests
table_name = f"test_commit_on_limit_{generate_random_string()}"
dst_table_name = f"{table_name}_dst" dst_table_name = f"{table_name}_dst"
keeper_path = f"/clickhouse/test_{table_name}" keeper_path = f"/clickhouse/test_{table_name}"
files_path = f"{table_name}_data" files_path = f"{table_name}_data"
files_to_generate = 10 files_to_generate = 10
failed_files_event_before = int(
node.query(
"SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1"
)
)
create_table( create_table(
started_cluster, started_cluster,
node, node,
@ -1782,6 +1852,9 @@ def test_commit_on_limit(started_cluster):
assert "test_999999.csv" in get_processed_files() assert "test_999999.csv" in get_processed_files()
assert 1 == int( assert 1 == int(
node.count_in_log(f"Setting file {files_path}/test_9999.csv as failed")
)
assert failed_files_event_before + 1 == int(
node.query( node.query(
"SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1"
) )

View File

@ -0,0 +1,30 @@
<test>
<fill_query>
INSERT INTO FUNCTION file('test_pq_index', Parquet) SELECT * FROM generateRandom('int64_column Nullable(Int64), tuple_column Tuple(a Nullable(String), b Nullable(Float64), c Tuple(i UInt32, j UInt32)),array_tuple_column Array(Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64))), map_tuple_column Map(String, Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))') limit 1000000 SETTINGS output_format_parquet_use_custom_encoder=false, output_format_parquet_write_page_index=true
</fill_query>
<query>
SELECT * FROM file('test_pq_index', Parquet, 'tuple_column Tuple(a Nullable(String))') Format Null
</query>
<query>
SELECT tuple_column.a FROM file('test_pq_index', Parquet) Format Null
</query>
<query>
SELECT tuple_column.a FROM file('test_pq_index', Parquet, 'tuple_column Tuple(a Nullable(String))') Format Null
</query>
<query>
SELECT tuple_column.c.i FROM file('test_pq_index', Parquet) Format Null
</query>
<query>
SELECT * FROM file('test_pq_index', Parquet, 'array_tuple_column Array (Tuple(a Nullable(String)))') Format Null
</query>
<query>
SELECT * FROM file('test_pq_index', Parquet, 'map_tuple_column Map(String, Tuple(a Nullable(String)))') Format Null
</query>
</test>

View File

@ -14,4 +14,4 @@
======== ========
201902_4_5_1 1 201902_4_5_1 1
======== ========
201801_1_1_0 1 201801_1_1_2 1

View File

@ -39,6 +39,6 @@ CHECK TABLE mt_table PARTITION 201902 SETTINGS max_threads = 1;
SELECT '========'; SELECT '========';
CHECK TABLE mt_table PART '201801_1_1_0'; CHECK TABLE mt_table PART '201801_1_1_2';
DROP TABLE IF EXISTS mt_table; DROP TABLE IF EXISTS mt_table;

View File

@ -11,33 +11,40 @@ function query()
${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&database_atomic_wait_for_drop_and_detach_synchronously=1" -d "$*" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&database_atomic_wait_for_drop_and_detach_synchronously=1" -d "$*"
} }
# NOTE: database = $CLICKHOUSE_DATABASE is unwanted
verify_sql="SELECT
(SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics)
= (SELECT sum(active), sum(NOT active) FROM
(SELECT active FROM system.parts UNION ALL SELECT active FROM system.projection_parts UNION ALL SELECT 1 FROM system.dropped_tables_parts))"
# The query is not atomic - it can compare states between system.parts and system.metrics from different points in time. # The query is not atomic - it can compare states between system.parts and system.metrics from different points in time.
# So, there is inherent race condition. But it should get expected result eventually. # So, there is inherent race condition. But it should get expected result eventually.
# In case of test failure, this code will do infinite loop and timeout. # In case of test failure, this code will do infinite loop and timeout.
verify() verify()
{ {
for i in {1..5000} local result
do
result=$( query "$verify_sql" )
[ "$result" = "1" ] && echo "$result" && break
sleep 0.1
if [[ $i -eq 5000 ]] for _ in {1..100}; do
then # NOTE: database = $CLICKHOUSE_DATABASE is unwanted
query " result=$( query "SELECT
SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics; (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics)
SELECT sum(active), sum(NOT active) FROM system.parts; =
SELECT sum(active), sum(NOT active) FROM system.projection_parts; (SELECT sum(active), sum(NOT active) FROM (
SELECT count() FROM system.dropped_tables_parts; SELECT active FROM system.parts
" UNION ALL SELECT active FROM system.projection_parts
UNION ALL SELECT 1 FROM system.dropped_tables_parts
))"
)
if [ "$result" = "1" ]; then
echo "$result"
return
fi fi
sleep 0.5
done done
$CLICKHOUSE_CLIENT -q "
SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics;
SELECT sum(active), sum(NOT active) FROM system.parts;
SELECT sum(active), sum(NOT active) FROM system.projection_parts;
SELECT count() FROM system.dropped_tables_parts;
"
} }
query "DROP TABLE IF EXISTS test_table" query "DROP TABLE IF EXISTS test_table"

View File

@ -1 +1 @@
c1 Nullable(DateTime64(9)) c1 Nullable(DateTime)

View File

@ -1,29 +1,29 @@
JSONEachRow JSONEachRow
x Nullable(Date) x Nullable(Date)
x Nullable(DateTime64(9)) x Nullable(DateTime64(9))
x Nullable(DateTime64(9)) x Nullable(DateTime)
x Array(Nullable(Date)) x Array(Nullable(Date))
x Array(Nullable(DateTime64(9))) x Array(Nullable(DateTime))
x Array(Nullable(DateTime64(9))) x Array(Nullable(DateTime))
x Tuple(\n date1 Nullable(DateTime64(9)),\n date2 Nullable(Date)) x Tuple(\n date1 Nullable(DateTime),\n date2 Nullable(Date))
x Array(Nullable(DateTime64(9))) x Array(Nullable(DateTime))
x Array(Nullable(DateTime64(9))) x Array(Nullable(DateTime))
x Nullable(DateTime64(9)) x Nullable(DateTime)
x Array(Nullable(String)) x Array(Nullable(String))
x Nullable(String) x Nullable(String)
x Array(Nullable(String)) x Array(Nullable(String))
x Tuple(\n key1 Array(Array(Nullable(DateTime64(9)))),\n key2 Array(Array(Nullable(String)))) x Tuple(\n key1 Array(Array(Nullable(DateTime))),\n key2 Array(Array(Nullable(String))))
CSV CSV
c1 Nullable(Date) c1 Nullable(Date)
c1 Nullable(DateTime64(9)) c1 Nullable(DateTime64(9))
c1 Nullable(DateTime64(9)) c1 Nullable(DateTime)
c1 Array(Nullable(Date)) c1 Array(Nullable(Date))
c1 Array(Nullable(DateTime64(9))) c1 Array(Nullable(DateTime))
c1 Array(Nullable(DateTime64(9))) c1 Array(Nullable(DateTime))
c1 Map(String, Nullable(DateTime64(9))) c1 Map(String, Nullable(DateTime))
c1 Array(Nullable(DateTime64(9))) c1 Array(Nullable(DateTime))
c1 Array(Nullable(DateTime64(9))) c1 Array(Nullable(DateTime))
c1 Nullable(DateTime64(9)) c1 Nullable(DateTime)
c1 Array(Nullable(String)) c1 Array(Nullable(String))
c1 Nullable(String) c1 Nullable(String)
c1 Array(Nullable(String)) c1 Array(Nullable(String))
@ -31,14 +31,14 @@ c1 Map(String, Array(Array(Nullable(String))))
TSV TSV
c1 Nullable(Date) c1 Nullable(Date)
c1 Nullable(DateTime64(9)) c1 Nullable(DateTime64(9))
c1 Nullable(DateTime64(9)) c1 Nullable(DateTime)
c1 Array(Nullable(Date)) c1 Array(Nullable(Date))
c1 Array(Nullable(DateTime64(9))) c1 Array(Nullable(DateTime))
c1 Array(Nullable(DateTime64(9))) c1 Array(Nullable(DateTime))
c1 Map(String, Nullable(DateTime64(9))) c1 Map(String, Nullable(DateTime))
c1 Array(Nullable(DateTime64(9))) c1 Array(Nullable(DateTime))
c1 Array(Nullable(DateTime64(9))) c1 Array(Nullable(DateTime))
c1 Nullable(DateTime64(9)) c1 Nullable(DateTime)
c1 Array(Nullable(String)) c1 Array(Nullable(String))
c1 Nullable(String) c1 Nullable(String)
c1 Array(Nullable(String)) c1 Array(Nullable(String))
@ -46,14 +46,14 @@ c1 Map(String, Array(Array(Nullable(String))))
Values Values
c1 Nullable(Date) c1 Nullable(Date)
c1 Nullable(DateTime64(9)) c1 Nullable(DateTime64(9))
c1 Nullable(DateTime64(9)) c1 Nullable(DateTime)
c1 Array(Nullable(Date)) c1 Array(Nullable(Date))
c1 Array(Nullable(DateTime64(9))) c1 Array(Nullable(DateTime))
c1 Array(Nullable(DateTime64(9))) c1 Array(Nullable(DateTime))
c1 Map(String, Nullable(DateTime64(9))) c1 Map(String, Nullable(DateTime))
c1 Array(Nullable(DateTime64(9))) c1 Array(Nullable(DateTime))
c1 Array(Nullable(DateTime64(9))) c1 Array(Nullable(DateTime))
c1 Nullable(DateTime64(9)) c1 Nullable(DateTime)
c1 Array(Nullable(String)) c1 Array(Nullable(String))
c1 Nullable(String) c1 Nullable(String)
c1 Array(Nullable(String)) c1 Array(Nullable(String))

View File

@ -0,0 +1,10 @@
0,"1970-01-01"
1,"1970-01-02"
2,"1970-01-03"
3,"1970-01-04"
4,"1970-01-05"
5,"1970-01-06"
6,"1970-01-07"
7,"1970-01-08"
8,"1970-01-09"
9,"1970-01-10"
1 0 1970-01-01
2 1 1970-01-02
3 2 1970-01-03
4 3 1970-01-04
5 4 1970-01-05
6 5 1970-01-06
7 6 1970-01-07
8 7 1970-01-08
9 8 1970-01-09
10 9 1970-01-10

View File

@ -0,0 +1,11 @@
"number","toDate(number)"
0,"1970-01-01"
1,"1970-01-02"
2,"1970-01-03"
3,"1970-01-04"
4,"1970-01-05"
5,"1970-01-06"
6,"1970-01-07"
7,"1970-01-08"
8,"1970-01-09"
9,"1970-01-10"

View File

@ -0,0 +1,10 @@
0 1970-01-01
1 1970-01-02
2 1970-01-03
3 1970-01-04
4 1970-01-05
5 1970-01-06
6 1970-01-07
7 1970-01-08
8 1970-01-09
9 1970-01-10

View File

@ -0,0 +1,10 @@
["0", "1970-01-01"]
["1", "1970-01-02"]
["2", "1970-01-03"]
["3", "1970-01-04"]
["4", "1970-01-05"]
["5", "1970-01-06"]
["6", "1970-01-07"]
["7", "1970-01-08"]
["8", "1970-01-09"]
["9", "1970-01-10"]

View File

@ -0,0 +1,10 @@
{"number":"0","toDate(number)":"1970-01-01"}
{"number":"1","toDate(number)":"1970-01-02"}
{"number":"2","toDate(number)":"1970-01-03"}
{"number":"3","toDate(number)":"1970-01-04"}
{"number":"4","toDate(number)":"1970-01-05"}
{"number":"5","toDate(number)":"1970-01-06"}
{"number":"6","toDate(number)":"1970-01-07"}
{"number":"7","toDate(number)":"1970-01-08"}
{"number":"8","toDate(number)":"1970-01-09"}
{"number":"9","toDate(number)":"1970-01-10"}

View File

@ -0,0 +1,10 @@
number=0 toDate(number)=1970-01-01
number=1 toDate(number)=1970-01-02
number=2 toDate(number)=1970-01-03
number=3 toDate(number)=1970-01-04
number=4 toDate(number)=1970-01-05
number=5 toDate(number)=1970-01-06
number=6 toDate(number)=1970-01-07
number=7 toDate(number)=1970-01-08
number=8 toDate(number)=1970-01-09
number=9 toDate(number)=1970-01-10

View File

@ -0,0 +1,10 @@
0 1970-01-01
1 1970-01-02
2 1970-01-03
3 1970-01-04
4 1970-01-05
5 1970-01-06
6 1970-01-07
7 1970-01-08
8 1970-01-09
9 1970-01-10
1 0 1970-01-01
2 1 1970-01-02
3 2 1970-01-03
4 3 1970-01-04
5 4 1970-01-05
6 5 1970-01-06
7 6 1970-01-07
8 7 1970-01-08
9 8 1970-01-09
10 9 1970-01-10

View File

@ -0,0 +1,11 @@
number toDate(number)
0 1970-01-01
1 1970-01-02
2 1970-01-03
3 1970-01-04
4 1970-01-05
5 1970-01-06
6 1970-01-07
7 1970-01-08
8 1970-01-09
9 1970-01-10

View File

@ -0,0 +1 @@
(0,'1970-01-01'),(1,'1970-01-02'),(2,'1970-01-03'),(3,'1970-01-04'),(4,'1970-01-05'),(5,'1970-01-06'),(6,'1970-01-07'),(7,'1970-01-08'),(8,'1970-01-09'),(9,'1970-01-10')

View File

@ -4,7 +4,7 @@ c2 Nullable(Date)
c1 Nullable(Float64) c1 Nullable(Float64)
c2 Nullable(Date) c2 Nullable(Date)
c1 Nullable(Int64) c1 Nullable(Int64)
c2 Nullable(DateTime64(9)) c2 Nullable(DateTime)
c1 UInt8 c1 UInt8
c2 Nullable(Date) c2 Nullable(Date)
4 4
@ -14,7 +14,7 @@ toDate(number) Nullable(Date)
number Nullable(Float64) number Nullable(Float64)
toDate(number) Nullable(Date) toDate(number) Nullable(Date)
number Nullable(Int64) number Nullable(Int64)
toDate(number) Nullable(DateTime64(9)) toDate(number) Nullable(DateTime)
number Nullable(Int64) number Nullable(Int64)
toDate(number) Nullable(Date) toDate(number) Nullable(Date)
4 4
@ -24,7 +24,7 @@ c2 Nullable(Date)
c1 Nullable(Float64) c1 Nullable(Float64)
c2 Nullable(Date) c2 Nullable(Date)
c1 Nullable(Int64) c1 Nullable(Int64)
c2 Nullable(DateTime64(9)) c2 Nullable(DateTime)
c1 UInt8 c1 UInt8
c2 Nullable(Date) c2 Nullable(Date)
4 4
@ -34,7 +34,7 @@ toDate(number) Nullable(Date)
number Nullable(Float64) number Nullable(Float64)
toDate(number) Nullable(Date) toDate(number) Nullable(Date)
number Nullable(Int64) number Nullable(Int64)
toDate(number) Nullable(DateTime64(9)) toDate(number) Nullable(DateTime)
number Nullable(Int64) number Nullable(Int64)
toDate(number) Nullable(Date) toDate(number) Nullable(Date)
4 4
@ -44,7 +44,7 @@ toDate(number) Nullable(Date)
number Nullable(Float64) number Nullable(Float64)
toDate(number) Nullable(Date) toDate(number) Nullable(Date)
number Nullable(Int64) number Nullable(Int64)
toDate(number) Nullable(DateTime64(9)) toDate(number) Nullable(DateTime)
number Nullable(Int64) number Nullable(Int64)
toDate(number) Nullable(Date) toDate(number) Nullable(Date)
4 4
@ -54,7 +54,7 @@ c2 Nullable(Date)
c1 Nullable(Float64) c1 Nullable(Float64)
c2 Nullable(Date) c2 Nullable(Date)
c1 Nullable(Int64) c1 Nullable(Int64)
c2 Nullable(DateTime64(9)) c2 Nullable(DateTime)
c1 UInt8 c1 UInt8
c2 Nullable(Date) c2 Nullable(Date)
4 4
@ -64,7 +64,7 @@ toDate(number) Nullable(Date)
number Nullable(Float64) number Nullable(Float64)
toDate(number) Nullable(Date) toDate(number) Nullable(Date)
number Nullable(Int64) number Nullable(Int64)
toDate(number) Nullable(DateTime64(9)) toDate(number) Nullable(DateTime)
number Nullable(Int64) number Nullable(Int64)
toDate(number) Nullable(Date) toDate(number) Nullable(Date)
4 4
@ -74,7 +74,7 @@ c2 Nullable(Date)
c1 Nullable(Float64) c1 Nullable(Float64)
c2 Nullable(Date) c2 Nullable(Date)
c1 Nullable(Int64) c1 Nullable(Int64)
c2 Nullable(DateTime64(9)) c2 Nullable(DateTime)
c1 UInt8 c1 UInt8
c2 Nullable(Date) c2 Nullable(Date)
4 4
@ -84,7 +84,7 @@ c2 Nullable(Date)
c1 Nullable(Float64) c1 Nullable(Float64)
c2 Nullable(Date) c2 Nullable(Date)
c1 Nullable(Int64) c1 Nullable(Int64)
c2 Nullable(DateTime64(9)) c2 Nullable(DateTime)
c1 UInt8 c1 UInt8
c2 Nullable(Date) c2 Nullable(Date)
4 4

View File

@ -11,5 +11,6 @@ if [ "$RES" -eq 10 ]
then then
echo "$RES" echo "$RES"
else else
echo "$RES"
cat "${CLICKHOUSE_TMP}/${CLICKHOUSE_DATABASE}.log" cat "${CLICKHOUSE_TMP}/${CLICKHOUSE_DATABASE}.log"
fi fi

View File

@ -4,7 +4,7 @@ JSON
{"d":"str","dynamicType(d)":"String"} {"d":"str","dynamicType(d)":"String"}
{"d":["1","2","3"],"dynamicType(d)":"Array(Int64)"} {"d":["1","2","3"],"dynamicType(d)":"Array(Int64)"}
{"d":"2020-01-01","dynamicType(d)":"Date"} {"d":"2020-01-01","dynamicType(d)":"Date"}
{"d":"2020-01-01 10:00:00.000000000","dynamicType(d)":"DateTime64(9)"} {"d":"2020-01-01 10:00:00","dynamicType(d)":"DateTime"}
{"d":{"a":"42","b":"str"},"dynamicType(d)":"Tuple(a Int64, b String)"} {"d":{"a":"42","b":"str"},"dynamicType(d)":"Tuple(a Int64, b String)"}
{"d":{"a":"43"},"dynamicType(d)":"Tuple(a Int64)"} {"d":{"a":"43"},"dynamicType(d)":"Tuple(a Int64)"}
{"d":{"a":"44","c":["1","2","3"]},"dynamicType(d)":"Tuple(a Int64, c Array(Int64))"} {"d":{"a":"44","c":["1","2","3"]},"dynamicType(d)":"Tuple(a Int64, c Array(Int64))"}
@ -22,7 +22,7 @@ CSV
"str","String" "str","String"
"[1,2,3]","Array(Int64)" "[1,2,3]","Array(Int64)"
"2020-01-01","Date" "2020-01-01","Date"
"2020-01-01 10:00:00.000000000","DateTime64(9)" "2020-01-01 10:00:00","DateTime"
"[1, 'str', [1, 2, 3]]","String" "[1, 'str', [1, 2, 3]]","String"
\N,"None" \N,"None"
true,"Bool" true,"Bool"
@ -32,24 +32,24 @@ TSV
str String str String
[1,2,3] Array(Int64) [1,2,3] Array(Int64)
2020-01-01 Date 2020-01-01 Date
2020-01-01 10:00:00.000000000 DateTime64(9) 2020-01-01 10:00:00 DateTime
[1, \'str\', [1, 2, 3]] String [1, \'str\', [1, 2, 3]] String
\N None \N None
true Bool true Bool
Values Values
(42,'Int64'),(42.42,'Float64'),('str','String'),([1,2,3],'Array(Int64)'),('2020-01-01','Date'),('2020-01-01 10:00:00.000000000','DateTime64(9)'),(NULL,'None'),(true,'Bool') (42,'Int64'),(42.42,'Float64'),('str','String'),([1,2,3],'Array(Int64)'),('2020-01-01','Date'),('2020-01-01 10:00:00','DateTime'),(NULL,'None'),(true,'Bool')
Cast using parsing Cast using parsing
42 Int64 42 Int64
42.42 Float64 42.42 Float64
[1,2,3] Array(Int64) [1,2,3] Array(Int64)
2020-01-01 Date 2020-01-01 Date
2020-01-01 10:00:00.000000000 DateTime64(9) 2020-01-01 10:00:00 DateTime
NULL String NULL String
true Bool true Bool
42 Int64 false 42 Int64 false
42.42 Float64 false 42.42 Float64 false
[1,2,3] Array(Int64) false [1,2,3] Array(Int64) false
2020-01-01 Date true 2020-01-01 Date true
2020-01-01 10:00:00.000000000 DateTime64(9) true 2020-01-01 10:00:00 DateTime true
NULL String true NULL String true
true Bool true true Bool true

View File

@ -12,7 +12,7 @@ Hello String
[1,2,3] Array(Nullable(Int64)) [1,2,3] Array(Nullable(Int64))
['str1','str2','str3'] Array(Nullable(String)) ['str1','str2','str3'] Array(Nullable(String))
[[[1],[2,3,4]],[[5,6],[7]]] Array(Array(Array(Nullable(Int64)))) [[[1],[2,3,4]],[[5,6],[7]]] Array(Array(Array(Nullable(Int64))))
['2020-01-01 00:00:00.000000000','2020-01-01 00:00:00.000000000'] Array(Nullable(DateTime64(9))) ['2020-01-01 00:00:00','2020-01-01 00:00:00'] Array(Nullable(DateTime))
['2020-01-01','2020-01-01 date'] Array(Nullable(String)) ['2020-01-01','2020-01-01 date'] Array(Nullable(String))
['2020-01-01','2020-01-01 00:00:00','str'] Array(Nullable(String)) ['2020-01-01','2020-01-01 00:00:00','str'] Array(Nullable(String))
['2020-01-01','2020-01-01 00:00:00','42'] Array(Nullable(String)) ['2020-01-01','2020-01-01 00:00:00','42'] Array(Nullable(String))

View File

@ -1,2 +1,2 @@
x Nullable(Int64) x Nullable(Int64)
schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=true, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=true, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_datetimes_only_datetime64=false, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false

View File

@ -0,0 +1,24 @@
-- Tags: no-parallel
SET create_if_not_exists=0; -- Default
DROP TABLE IF EXISTS example_table;
CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id;
CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; -- { serverError TABLE_ALREADY_EXISTS }
DROP DATABASE IF EXISTS example_database;
CREATE DATABASE example_database;
CREATE DATABASE example_database; -- { serverError DATABASE_ALREADY_EXISTS }
SET create_if_not_exists=1;
DROP TABLE IF EXISTS example_table;
CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id;
CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id;
DROP DATABASE IF EXISTS example_database;
CREATE DATABASE example_database;
CREATE DATABASE example_database;
DROP DATABASE IF EXISTS example_database;
DROP TABLE IF EXISTS example_table;

View File

@ -0,0 +1,2 @@
QUERY_WAS_CANCELLED
QUERY_WAS_CANCELLED

View File

@ -0,0 +1,8 @@
#!/usr/bin/env bash
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
${CLICKHOUSE_CLIENT} --query "create table null_t (number UInt64) engine = Null;"
${CLICKHOUSE_CLIENT} --query "select sleep(0.1) from system.numbers settings max_block_size = 1 format Native" 2>/dev/null | ${CLICKHOUSE_CLIENT} --max_execution_time 0.3 --timeout_overflow_mode break --query "insert into null_t format Native" 2>&1 | grep -o "QUERY_WAS_CANCELLED"

View File

@ -0,0 +1,16 @@
#!/usr/bin/env bash
# Tags: no-fasttest, no-asan, no-msan, no-tsan
# ^ requires S3
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
# Inaccessible IMDS should not introduce large delays, so this query should reply quickly at least sometimes:
while true
do
# This host (likely) drops packets sent to it (does not reply), so it is good for testing timeouts.
# At the same time, we expect that the clickhouse host does not drop packets and quickly replies with 4xx, which is a non-retriable error for S3.
AWS_EC2_METADATA_SERVICE_ENDPOINT='https://10.255.255.255/' ${CLICKHOUSE_LOCAL} --time --query "SELECT * FROM s3('${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/nonexistent')" |& grep -v -F 404 |
${CLICKHOUSE_LOCAL} --input-format TSV "SELECT c1::Float64 < 1 FROM table" | grep 1 && break
done

View File

@ -0,0 +1,253 @@
Date
2020-01-01 Date
2020-01-01 Date
2020-01-01 Date
2020-01-01 Date
2020-01-01 Date
2020-01-01 Date
2020-01-01 Date
2020-01-01 Date
2020-01-01 Date
2020-01-01 Date
2020-01-01 Date
2020-01-01 Date
String
2020_01_01 String
2020_1_01 String
2020_01_1 String
2020_1_1 String
2020a01a01 String
2020a1a01 String
2020a01a1 String
2020a1a1 String
20200101 String
DateTime
2020-01-02 18:42:42 DateTime
2020-01-02 18:42:42 DateTime
2020-01-02 18:42:42 DateTime
String
2020_01_01 42:42:42 String
2020a01a01 42:42:42 String
2020-01-01 42.42.42 String
2020-01-01 42 42 42 String
2020-01-01 42a42a42 String
DateTime64
2020-01-02 18:42:42.424200000 DateTime64(9)
2020-01-02 18:42:42.424200000 DateTime64(9)
2020-01-02 18:42:42.424200000 DateTime64(9)
String
2020_01_01 42:42:42.4242 String
2020a01a01 42:42:42.4242 String
2020-01-01 42.42.42.4242 String
2020-01-01 42 42 42.4242 String
2020-01-01 42a42a42.4242 String
DateTime/DateTime64 best effort
2000-01-01 00:00:00 DateTime
2000-01-01 01:00:00 DateTime
2000-01-01 01:00:00.000000000 DateTime64(9)
2017-01-01 22:02:03 DateTime
2017-01-01 22:02:03.000000000 DateTime64(9)
2017-01-01 21:02:03 DateTime
2017-01-01 21:02:03.000000000 DateTime64(9)
2017-01-01 22:02:03 DateTime
2017-01-01 22:02:03.000000000 DateTime64(9)
2017-01-02 01:02:03 DateTime
2017-01-02 01:02:03.000000000 DateTime64(9)
1970-01-02 01:02:03 DateTime
1970-01-02 01:02:03.000000000 DateTime64(9)
1970-01-02 01:02:03 DateTime
1970-01-02 01:02:03.000000000 DateTime64(9)
2018-02-11 03:40:50 DateTime
2018-02-11 03:40:50.000000000 DateTime64(9)
2000-04-17 01:02:03 DateTime
2000-04-17 01:02:03.000000000 DateTime64(9)
1970-01-02 01:00:00 DateTime
1970-01-02 01:00:00.000000000 DateTime64(9)
1970-01-02 01:02:03 DateTime
1970-01-02 01:02:03.000000000 DateTime64(9)
1970-01-02 01:02:03 DateTime
1970-01-02 01:02:03.000000000 DateTime64(9)
2015-12-31 20:00:00 DateTime
2015-12-31 20:00:00 DateTime
2016-01-01 00:00:00 DateTime
2016-01-01 00:00:00 DateTime
2017-01-01 22:02:03 DateTime
2017-01-01 22:02:03.000000000 DateTime64(9)
2017-01-02 03:04:05 DateTime
2017-01-02 03:04:05.000000000 DateTime64(9)
2017-01-02 03:04:05 DateTime
2017-01-02 03:04:05.000000000 DateTime64(9)
2017-01-02 03:04:05 DateTime
2017-01-02 03:04:05.000000000 DateTime64(9)
2017-01-02 03:04:05 DateTime
2017-01-02 03:04:05.000000000 DateTime64(9)
2017-01-02 04:04:05 DateTime
2017-01-02 04:04:05.000000000 DateTime64(9)
2017-01-02 02:34:05 DateTime
2017-01-02 02:34:05.000000000 DateTime64(9)
2017-01-02 00:04:05 DateTime
2017-01-02 00:04:05.000000000 DateTime64(9)
2017-01-02 02:04:05 DateTime
2017-01-02 02:04:05.000000000 DateTime64(9)
2017-01-02 00:04:05 DateTime
2017-01-02 00:04:05.000000000 DateTime64(9)
2017-01-01 18:04:05 DateTime
2017-01-01 18:04:05.000000000 DateTime64(9)
2017-01-02 03:04:05 DateTime
2017-01-02 03:04:05.000000000 DateTime64(9)
2017-01-01 23:04:05 DateTime
2017-01-01 23:04:05.000000000 DateTime64(9)
2017-02-01 23:04:05 DateTime
2017-02-01 23:04:05.000000000 DateTime64(9)
2017-06-01 23:04:05 DateTime
2017-06-01 23:04:05.000000000 DateTime64(9)
2017-01-02 00:04:05 DateTime
2017-01-02 00:04:05.000000000 DateTime64(9)
2017-01-02 03:04:05 DateTime
2017-01-02 03:04:05.000000000 DateTime64(9)
2017-01-02 03:04:05 DateTime
2017-01-02 03:04:05.000000000 DateTime64(9)
2017-01-02 04:04:05 DateTime
2017-01-02 04:04:05.000000000 DateTime64(9)
2017-01-02 04:04:05 DateTime
2017-01-02 04:04:05.000000000 DateTime64(9)
2017-01-02 02:04:05 DateTime
2017-01-02 02:04:05.000000000 DateTime64(9)
2017-01-02 03:04:05 DateTime
2017-01-02 03:04:05.000000000 DateTime64(9)
2017-04-01 11:22:33 DateTime
2017-04-01 11:22:33.000000000 DateTime64(9)
2017-04-01 22:02:03 DateTime
2017-04-01 22:02:03.000000000 DateTime64(9)
2017-04-01 22:02:03 DateTime
2017-04-01 22:02:03.000000000 DateTime64(9)
2017-04-02 01:02:03 DateTime
2017-04-02 01:02:03.000000000 DateTime64(9)
2017-04-02 11:22:33 DateTime
2017-04-02 11:22:33.000000000 DateTime64(9)
2017-04-02 01:02:03 DateTime
2017-04-02 01:02:03.000000000 DateTime64(9)
2017-04-02 01:22:33 DateTime
2017-04-02 01:22:33.000000000 DateTime64(9)
2017-04-02 01:02:03 DateTime
2017-04-02 01:02:03.000000000 DateTime64(9)
2017-04-02 01:02:33 DateTime
2017-04-02 01:02:33.000000000 DateTime64(9)
2017-04-01 22:02:03 DateTime
2017-04-01 22:02:03.000000000 DateTime64(9)
2017-04-02 01:02:03 DateTime
2017-04-02 01:02:03.000000000 DateTime64(9)
2017-04-01 22:02:03 DateTime
2017-04-01 22:02:03.000000000 DateTime64(9)
2017-04-01 21:02:03 DateTime
2017-04-01 21:02:03.000000000 DateTime64(9)
2017-04-02 01:02:03 DateTime
2017-04-02 01:02:03.000000000 DateTime64(9)
2017-01-01 22:02:03 DateTime
2017-01-01 22:02:03.000000000 DateTime64(9)
2017-04-25 01:02:03 DateTime
2017-04-25 01:02:03.000000000 DateTime64(9)
2017-04-25 01:02:03 DateTime
2017-04-25 01:02:03.000000000 DateTime64(9)
2017-01-25 01:02:03 DateTime
2017-01-25 01:02:03.000000000 DateTime64(9)
2017-01-24 22:02:03 DateTime
2017-01-24 22:02:03.000000000 DateTime64(9)
2017-01-25 13:02:03 DateTime
2017-01-25 13:02:03.000000000 DateTime64(9)
2017-01-25 01:02:03 DateTime
2017-01-25 01:02:03.000000000 DateTime64(9)
2017-01-25 01:02:03 DateTime
2017-01-25 01:02:03.000000000 DateTime64(9)
2017-01-24 22:02:03 DateTime
2017-01-24 22:02:03.000000000 DateTime64(9)
2017-01-24 22:02:03 DateTime
2017-01-24 22:02:03.000000000 DateTime64(9)
2017-01-25 10:02:03 DateTime
2017-01-25 10:02:03.000000000 DateTime64(9)
2017-01-25 10:02:03 DateTime
2017-01-25 10:02:03.000000000 DateTime64(9)
2017-01-25 10:02:03 DateTime
2017-01-25 10:02:03.000000000 DateTime64(9)
2017-01-25 09:32:03 DateTime
2017-01-25 09:32:03.000000000 DateTime64(9)
2017-01-25 01:02:03 DateTime
2017-01-25 01:02:03.000000000 DateTime64(9)
2017-01-25 13:02:03 DateTime
2017-01-25 13:02:03.000000000 DateTime64(9)
2017-01-25 13:02:03 DateTime
2017-01-25 13:02:03.000000000 DateTime64(9)
2017-01-25 10:02:03 DateTime
2017-01-25 10:02:03.000000000 DateTime64(9)
2018-02-11 03:40:50 DateTime
2018-02-11 03:40:50.000000000 DateTime64(9)
2018-02-11 03:40:50 DateTime
2018-02-11 03:40:50.000000000 DateTime64(9)
String
2 String
20 String
200 String
2000 String
20000 String
200001 String
2000010 String
20000101 String
200001010 String
2000010101 String
20000101010 String
200001010101 String
2000010101010 String
20000101010101 String
2.1 String
20.1 String
200.1 String
2000.1 String
20000.1 String
200001.1 String
2000010.1 String
20000101.1 String
200001010.1 String
2000010101.1 String
20000101010.1 String
200001010101.1 String
2000010101010.1 String
20000101010101.1 String
Mar String
Mar1 String
Mar 1 String
Mar01 String
Mar 01 String
Mar2020 String
Mar 2020 String
Mar012020 String
Mar 012020 String
Mar01012020 String
Mar 01012020 String
Mar0101202001 String
Mar 0101202001 String
Mar010120200101 String
Mar 010120200101 String
Mar01012020010101 String
Mar 01012020010101 String
Mar01012020010101.000 String
Mar 0101202001010101.000 String
2000 01 01 01:00:00 String
2000 01 01 01:00:00.000 String
2000a01a01 01:00:00 String
2000a01a01 01:00:00.000 String
2000-01-01 01 00 00 String
2000-01-01 01 00 00.000 String
2000-01-01 01-00-00 String
2000-01-01 01-00-00.000 String
2000-01-01 01a00a00 String
2000-01-01 01a00a00.000 String
2000-01 01:00:00 String
2000-01 01:00:00.000 String
2000 01 String
2000-01 String
Mar 2000 00:00:00 String
Mar 2000 00:00:00.000 String
2000 00:00:00 String
2000 00:00:00.000 String
Mar 2000-01-01 00:00:00 String
Mar 2000-01-01 00:00:00.000 String

View File

@ -0,0 +1,269 @@
set input_format_try_infer_datetimes = 1;
set input_format_try_infer_dates = 1;
set schema_inference_make_columns_nullable = 0;
set input_format_json_try_infer_numbers_from_strings = 0;
set session_timezone = 'UTC';
select 'Date';
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:01"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:1:01"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:1:1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-1-01"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-1-1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/01"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/1/01"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/1/1"}');
select 'String';
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_01"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_1_01"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_1_1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a01"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a1a01"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a1a1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20200101"}');
select 'DateTime';
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:01 42:42:42"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/01 42:42:42"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42:42:42"}');
select 'String';
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_01 42:42:42"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a01 42:42:42"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42.42.42"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42 42 42"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42a42a42"}');
select 'DateTime64';
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:01 42:42:42.4242"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/01 42:42:42.4242"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42:42:42.4242"}');
select 'String';
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_01 42:42:42.4242"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a01 42:42:42.4242"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42.42.42.4242"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42 42 42.4242"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42a42a42.4242"}');
set date_time_input_format='best_effort';
select 'DateTime/DateTime64 best effort';
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 00:00:00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01:00:00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01:00:00.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203 MSK"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000 MSK"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203 MSK+0100"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000 MSK+0100"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203 UTC+0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000 UTC+0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/1970 010203Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/1970 010203.000Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/70 010203Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/70 010203.000Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "11 Feb 2018 06:40:50 +0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "11 Feb 2018 06:40:50.000 +0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "17 Apr 2000 2 1:2:3"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "17 Apr 2000 2 1:2:3.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102 01:00:00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102 01:00:00.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102010203Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102010203Z.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "1970/01/02 010203Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "1970/01/02 010203.000Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01MSD"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01 MSD"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01UTC"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "201701 02 010203 UTC+0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "201701 02 010203.000 UTC+0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+0"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+0"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+0000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+0000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 -0100"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 -0100"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+030"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+030"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+900"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+900"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05GMT"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000GMT"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSD"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSD"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSD Feb"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSD Feb"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSD Jun"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSD Jun"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSK"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSK"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05+00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000+00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05 -0100"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000 -0100"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05-0100"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000-0100"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05+0100"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000+0100"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 01 11:22:33"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 01 11:22:33.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 010203 UTC+0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 010203.000 UTC+0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 01:2:3 UTC+0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 01:2:3.000 UTC+0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:02:3"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:02:3.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 11:22:33"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 11:22:33.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:03"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:03.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:22:33"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:22:33.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:33"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:33.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 MSK"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 MSK"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 UTC+0000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 UTC+0000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 UTC+0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 UTC+0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 UTC+0400"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 UTC+0400"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 2 1:2:3"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 2 1:2:3.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Jan 02 010203 UTC+0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Jan 02 010203.000 UTC+0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 01:02:03"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 01:02:03.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 1:2:3"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 1:2:3.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 MSK"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 MSK"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 PM"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 PM"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z+03:00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z+03:00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +03:00 PM"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +03:00 PM"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +0300 PM"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +0300 PM"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z+03:00 PM"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z+03:00 PM"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +03:30 PM"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +03:30 PM"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3Z Mon"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000Z Mon"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z PM"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z PM"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3Z PM"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000Z PM"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z PM +03:00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z PM +03:00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 11 Feb 2018 06:40:50 +0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 11 Feb 2018 06:40:50.000 +0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun, 11 Feb 2018 06:40:50 +0300"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun, 11 Feb 2018 06:40:50.000 +0300"}');
select 'String';
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010101"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101010"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010101"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2.1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20.1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200.1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000.1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000.1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001.1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010.1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101.1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010.1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101.1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010.1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010101.1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101010.1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010101.1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 1"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 01"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar2020"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2020"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar012020"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 012020"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01012020"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 01012020"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar0101202001"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 0101202001"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar010120200101"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 010120200101"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01012020010101"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 01012020010101"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01012020010101.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 0101202001010101.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 01 01 01:00:00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 01 01 01:00:00.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000a01a01 01:00:00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000a01a01 01:00:00.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01 00 00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01 00 00.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01-00-00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01-00-00.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01a00a00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01a00a00.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01 01:00:00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01 01:00:00.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 01"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000 00:00:00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000 00:00:00.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 00:00:00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 00:00:00.000"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000-01-01 00:00:00"}');
select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000-01-01 00:00:00.000"}');

View File

@ -1,3 +1,14 @@
1006
1007
1008
1009
101
1010
1011
1012
1013
1014
---
100 100 100 100
101 101 101 101
102 102 102 102

View File

@ -6,10 +6,18 @@ INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(10_000);
SET allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; SET allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost';
SELECT v
FROM test__fuzz_22
ORDER BY v
LIMIT 10, 10
SETTINGS merge_tree_min_rows_for_concurrent_read = 9223372036854775806;
SELECT '---';
SELECT k, v SELECT k, v
FROM test__fuzz_22 FROM test__fuzz_22
ORDER BY k ORDER BY k
LIMIT 100, 10 LIMIT 100, 10
SETTINGS merge_tree_min_rows_for_concurrent_read = 9223372036854775806; SETTINGS optimize_read_in_order=1, merge_tree_min_rows_for_concurrent_read = 9223372036854775806;
DROP TABLE test__fuzz_22 SYNC; DROP TABLE test__fuzz_22 SYNC;