diff --git a/.github/actions/debug/action.yml b/.github/actions/debug/action.yml new file mode 100644 index 00000000000..e1fe3f28024 --- /dev/null +++ b/.github/actions/debug/action.yml @@ -0,0 +1,18 @@ +name: DebugInfo +description: Prints workflow debug info + +runs: + using: "composite" + steps: + - name: Print envs + shell: bash + run: | + echo "::group::Envs" + env + echo "::endgroup::" + - name: Print Event.json + shell: bash + run: | + echo "::group::Event.json" + python3 -m json.tool "$GITHUB_EVENT_PATH" + echo "::endgroup::" diff --git a/.github/workflows/auto_releases.yml b/.github/workflows/auto_releases.yml new file mode 100644 index 00000000000..c159907187c --- /dev/null +++ b/.github/workflows/auto_releases.yml @@ -0,0 +1,109 @@ +name: AutoReleases + +env: + PYTHONUNBUFFERED: 1 + +concurrency: + group: autoreleases + +on: + # schedule: + # - cron: '0 9 * * *' + workflow_dispatch: + inputs: + dry-run: + description: 'Dry run' + required: false + default: true + type: boolean + +jobs: + AutoReleaseInfo: + runs-on: [self-hosted, style-checker-aarch64] + outputs: + data: ${{ steps.info.outputs.AUTO_RELEASE_PARAMS }} + dry_run: ${{ steps.info.outputs.DRY_RUN }} + steps: + - name: Debug Info + uses: ./.github/actions/debug + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Prepare Info + id: info + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 auto_release.py --prepare + echo "::group::Auto Release Info" + python3 -m json.tool /tmp/autorelease_info.json + echo "::endgroup::" + { + echo 'AUTO_RELEASE_PARAMS<> "$GITHUB_ENV" + { + echo 'AUTO_RELEASE_PARAMS<> "$GITHUB_OUTPUT" + echo "DRY_RUN=true" >> "$GITHUB_OUTPUT" + - name: Post Release Branch statuses + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 auto_release.py --post-status + - name: Clean up + uses: ./.github/actions/clean + + Release_0: + needs: AutoReleaseInfo + name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].release_branch }} + if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].ready }} + uses: ./.github/workflows/create_release.yml + with: + ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].commit_sha }} + type: patch + dry-run: ${{ needs.AutoReleaseInfo.outputs.dry_run }} +# +# Release_1: +# needs: [AutoReleaseInfo, Release_0] +# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].release_branch }} +# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].ready }} +# uses: ./.github/workflows/create_release.yml +# with: +# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].commit_sha }} +# type: patch +# dry-run: ${{ env.DRY_RUN }} +# +# Release_2: +# needs: [AutoReleaseInfo, Release_1] +# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[2].release_branch }} +# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[2].ready }} +# uses: ./.github/workflow/create_release.yml +# with: +# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].commit_sha }} +# type: patch +# dry-run: ${{ env.DRY_RUN }} +# +# Release_3: +# needs: [AutoReleaseInfo, Release_2] +# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].release_branch }} +# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].ready }} +# uses: ./.github/workflow/create_release.yml +# with: +# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].commit_sha }} +# type: patch +# dry-run: ${{ env.DRY_RUN }} + +# - name: Post Slack Message +# if: ${{ !cancelled() }} +# run: | +# cd "$GITHUB_WORKSPACE/tests/ci" +# python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }} diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index eb16c25f604..1553d689227 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -2,6 +2,7 @@ name: CreateRelease concurrency: group: release + 'on': workflow_dispatch: inputs: @@ -26,6 +27,26 @@ concurrency: required: false default: false type: boolean + workflow_call: + inputs: + ref: + description: 'Git reference (branch or commit sha) from which to create the release' + required: true + type: string + type: + description: 'The type of release: "new" for a new release or "patch" for a patch release' + required: true + type: string + only-repo: + description: 'Run only repos updates including docker (repo-recovery, tests)' + required: false + default: false + type: boolean + dry-run: + description: 'Dry run' + required: false + default: false + type: boolean jobs: CreateRelease: diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 05fae994cbe..4afba20d76c 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -359,13 +359,14 @@ DESC format(JSONEachRow, '{"int" : 42, "float" : 42.42, "string" : "Hello, World Dates, DateTimes: ```sql -DESC format(JSONEachRow, '{"date" : "2022-01-01", "datetime" : "2022-01-01 00:00:00"}') +DESC format(JSONEachRow, '{"date" : "2022-01-01", "datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"}') ``` ```response -┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ date │ Nullable(Date) │ │ │ │ │ │ -│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │ -└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ date │ Nullable(Date) │ │ │ │ │ │ +│ datetime │ Nullable(DateTime) │ │ │ │ │ │ +│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` Arrays: @@ -759,12 +760,13 @@ DESC format(CSV, 'Hello world!,World hello!') Dates, DateTimes: ```sql -DESC format(CSV, '"2020-01-01","2020-01-01 00:00:00"') +DESC format(CSV, '"2020-01-01","2020-01-01 00:00:00","2022-01-01 00:00:00.000"') ``` ```response ┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ c1 │ Nullable(Date) │ │ │ │ │ │ -│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +│ c2 │ Nullable(DateTime) │ │ │ │ │ │ +│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │ └──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` @@ -956,12 +958,13 @@ DESC format(TSKV, 'int=42 float=42.42 bool=true string=Hello,World!\n') Dates, DateTimes: ```sql -DESC format(TSV, '2020-01-01 2020-01-01 00:00:00') +DESC format(TSV, '2020-01-01 2020-01-01 00:00:00 2022-01-01 00:00:00.000') ``` ```response ┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ c1 │ Nullable(Date) │ │ │ │ │ │ -│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +│ c2 │ Nullable(DateTime) │ │ │ │ │ │ +│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │ └──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` @@ -1126,12 +1129,13 @@ DESC format(Values, $$(42, 42.42, true, 'Hello,World!')$$) Dates, DateTimes: ```sql -DESC format(Values, $$('2020-01-01', '2020-01-01 00:00:00')$$) -``` + DESC format(Values, $$('2020-01-01', '2020-01-01 00:00:00', '2022-01-01 00:00:00.000')$$) + ``` ```response ┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ c1 │ Nullable(Date) │ │ │ │ │ │ -│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +│ c2 │ Nullable(DateTime) │ │ │ │ │ │ +│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │ └──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` @@ -1504,8 +1508,8 @@ DESC format(JSONEachRow, $$ #### input_format_try_infer_datetimes -If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. -If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime64(9)`, +If enabled, ClickHouse will try to infer type `DateTime` or `DateTime64` from string fields in schema inference for text formats. +If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime` or `DateTime64(9)` (if any datetime had fractional part), if at least one field was not parsed as datetime, the result type will be `String`. Enabled by default. @@ -1513,39 +1517,66 @@ Enabled by default. **Examples** ```sql -SET input_format_try_infer_datetimes = 0 +SET input_format_try_infer_datetimes = 0; DESC format(JSONEachRow, $$ - {"datetime" : "2021-01-01 00:00:00.000"} - {"datetime" : "2022-01-01 00:00:00.000"} + {"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"} + {"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"} $$) ``` ```response -┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ datetime │ Nullable(String) │ │ │ │ │ │ -└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +┌─name───────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(String) │ │ │ │ │ │ +│ datetime64 │ Nullable(String) │ │ │ │ │ │ +└────────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` ```sql -SET input_format_try_infer_datetimes = 1 +SET input_format_try_infer_datetimes = 1; DESC format(JSONEachRow, $$ - {"datetime" : "2021-01-01 00:00:00.000"} - {"datetime" : "2022-01-01 00:00:00.000"} + {"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"} + {"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"} $$) ``` ```response -┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │ -└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(DateTime) │ │ │ │ │ │ +│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` ```sql DESC format(JSONEachRow, $$ - {"datetime" : "2021-01-01 00:00:00.000"} - {"datetime" : "unknown"} + {"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"} + {"datetime" : "unknown", "datetime64" : "unknown"} $$) ``` ```response -┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ datetime │ Nullable(String) │ │ │ │ │ │ -└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +┌─name───────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(String) │ │ │ │ │ │ +│ datetime64 │ Nullable(String) │ │ │ │ │ │ +└────────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +#### input_format_try_infer_datetimes_only_datetime64 + +If enabled, ClickHouse will always infer `DateTime64(9)` when `input_format_try_infer_datetimes` is enabled even if datetime values don't contain fractional part. + +Disabled by default. + +**Examples** + +```sql +SET input_format_try_infer_datetimes = 1; +SET input_format_try_infer_datetimes_only_datetime64 = 1; +DESC format(JSONEachRow, $$ + {"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"} + {"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"} + $$) +``` + +```text +┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │ +│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index a3bd919d3ce..a13aacc76e6 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -1042,10 +1042,23 @@ Compression rates of LZ4 or ZSTD improve on average by 20-40%. This setting works best for tables with no primary key or a low-cardinality primary key, i.e. a table with only few distinct primary key values. High-cardinality primary keys, e.g. involving timestamp columns of type `DateTime64`, are not expected to benefit from this setting. -### deduplicate_merge_projection_mode +## lightweight_mutation_projection_mode + +By default, lightweight delete `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation. So the default value would be `throw`. +However, this option can change the behavior. With the value either `drop` or `rebuild`, deletes will work with projections. `drop` would delete the projection so it might be fast in the current query as projection gets deleted but slow in future queries as no projection attached. +`rebuild` would rebuild the projection which might affect the performance of the current query, but might speedup for future queries. A good thing is that these options would only work in the part level, +which means projections in the part that don't get touched would stay intact instead of triggering any action like drop or rebuild. + +Possible values: + +- throw, drop, rebuild + +Default value: throw + +## deduplicate_merge_projection_mode Whether to allow create projection for the table with non-classic MergeTree, that is not (Replicated, Shared) MergeTree. If allowed, what is the action when merge projections, either drop or rebuild. So classic MergeTree would ignore this setting. -It also controls `OPTIMIZE DEDUPLICATE` as well, but has effect on all MergeTree family members. +It also controls `OPTIMIZE DEDUPLICATE` as well, but has effect on all MergeTree family members. Similar to the option `lightweight_mutation_projection_mode`, it is also part level. Possible values: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 03ff6067a8f..de601fe02dc 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -5654,3 +5654,9 @@ Possible values: - 1 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is enabled. Default value: `0`. + +## create_if_not_exists + +Enable `IF NOT EXISTS` for `CREATE` statement by default. If either this setting or `IF NOT EXISTS` is specified and a table with the provided name already exists, no exception will be thrown. + +Default value: `false`. diff --git a/docs/en/sql-reference/statements/delete.md b/docs/en/sql-reference/statements/delete.md index 88a9c933519..78142f880fe 100644 --- a/docs/en/sql-reference/statements/delete.md +++ b/docs/en/sql-reference/statements/delete.md @@ -38,8 +38,7 @@ If you anticipate frequent deletes, consider using a [custom partitioning key](/ ### Lightweight `DELETE`s with projections -By default, `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation and may require the projection to be rebuilt, negatively affecting `DELETE` performance. -However, there is an option to change this behavior. By changing setting `lightweight_mutation_projection_mode = 'drop'`, deletes will work with projections. +By default, `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation. But there is a [MergeTree setting](https://clickhouse.com/docs/en/operations/settings/merge-tree-settings) `lightweight_mutation_projection_mode` can change the behavior. ## Performance considerations when using lightweight `DELETE` diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 8b45c816817..0b93ae6d547 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -490,8 +490,6 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context /// process_list_element_holder is used to make an element in ProcessList live while BACKUP is working asynchronously. auto process_list_element = context_in_use->getProcessListElement(); - /// Update context to preserve query information in processlist (settings, current_database) - process_list_element->updateContext(context_in_use); thread_pool.scheduleOrThrowOnError( [this, @@ -855,8 +853,6 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt /// process_list_element_holder is used to make an element in ProcessList live while RESTORE is working asynchronously. auto process_list_element = context_in_use->getProcessListElement(); - /// Update context to preserve query information in processlist (settings, current_database) - process_list_element->updateContext(context_in_use); thread_pool.scheduleOrThrowOnError( [this, diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index 712cab80aff..2cdb3409487 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -244,33 +244,43 @@ const char * analyzeImpl( is_trivial = false; if (!in_square_braces) { - /// Check for case-insensitive flag. - if (pos + 1 < end && pos[1] == '?') + /// it means flag negation + /// there are various possible flags + /// actually only imsU are supported by re2 + auto is_flag_char = [](char x) { - for (size_t offset = 2; pos + offset < end; ++offset) + return x == '-' || x == 'i' || x == 'm' || x == 's' || x == 'U' || x == 'u'; + }; + /// Check for case-insensitive flag. + if (pos + 2 < end && pos[1] == '?' && is_flag_char(pos[2])) + { + size_t offset = 2; + for (; pos + offset < end; ++offset) { - if (pos[offset] == '-' /// it means flag negation - /// various possible flags, actually only imsU are supported by re2 - || (pos[offset] >= 'a' && pos[offset] <= 'z') - || (pos[offset] >= 'A' && pos[offset] <= 'Z')) + if (pos[offset] == 'i') { - if (pos[offset] == 'i') - { - /// Actually it can be negated case-insensitive flag. But we don't care. - has_case_insensitive_flag = true; - break; - } + /// Actually it can be negated case-insensitive flag. But we don't care. + has_case_insensitive_flag = true; } - else + else if (!is_flag_char(pos[offset])) break; } + pos += offset; + if (pos == end) + return pos; + /// if this group only contains flags, we have nothing to do. + if (*pos == ')') + { + ++pos; + break; + } } /// (?:regex) means non-capturing parentheses group - if (pos + 2 < end && pos[1] == '?' && pos[2] == ':') + else if (pos + 2 < end && pos[1] == '?' && pos[2] == ':') { pos += 2; } - if (pos + 3 < end && pos[1] == '?' && (pos[2] == '<' || pos[2] == '\'' || (pos[2] == 'P' && pos[3] == '<'))) + else if (pos + 3 < end && pos[1] == '?' && (pos[2] == '<' || pos[2] == '\'' || (pos[2] == 'P' && pos[3] == '<'))) { pos = skipNameCapturingGroup(pos, pos[2] == 'P' ? 3: 2, end); } diff --git a/src/Common/tests/gtest_optimize_re.cpp b/src/Common/tests/gtest_optimize_re.cpp index a9fcb918b24..d6735c3ccfe 100644 --- a/src/Common/tests/gtest_optimize_re.cpp +++ b/src/Common/tests/gtest_optimize_re.cpp @@ -19,6 +19,9 @@ TEST(OptimizeRE, analyze) }; test_f("abc", "abc", {}, true, true); test_f("c([^k]*)de", ""); + test_f("(?-s)bob", "bob", {}, false, true); + test_f("(?s)bob", "bob", {}, false, true); + test_f("(?ssss", ""); test_f("abc(de)fg", "abcdefg", {}, false, true); test_f("abc(de|xyz)fg", "abc", {"abcdefg", "abcxyzfg"}, false, true); test_f("abc(de?f|xyz)fg", "abc", {"abcd", "abcxyzfg"}, false, true); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index cd246876fcc..7fc10a9917d 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -897,6 +897,7 @@ class IColumn; M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ M(Bool, restore_replace_external_engines_to_null, false, "Replace all the external table engines to Null on restore. Useful for testing purposes", 0) \ M(Bool, restore_replace_external_table_functions_to_null, false, "Replace all table functions to Null on restore. Useful for testing purposes", 0) \ + M(Bool, create_if_not_exists, false, "Enable IF NOT EXISTS for CREATE statements by default", 0) \ \ \ /* ###################################### */ \ @@ -1137,6 +1138,7 @@ class IColumn; M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ + M(Bool, input_format_try_infer_datetimes_only_datetime64, false, "When input_format_try_infer_datetimes is enabled, infer only DateTime64 but not DateTime types", 0) \ M(Bool, input_format_try_infer_exponent_floats, false, "Try to infer floats in exponential notation while schema inference in text formats (except JSON, where exponent numbers are always inferred)", 0) \ M(Bool, output_format_markdown_escape_special_characters, false, "Escape special characters in Markdown", 0) \ M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 0bc79d6ff57..5524cadd19a 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -75,6 +75,7 @@ static std::initializer_list(implementation_buffer->getPosition())); @@ -1028,7 +1030,8 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() LOG_TRACE(log, "Bypassing cache because writeCache method failed"); } else - LOG_TRACE(log, "No space left in cache to reserve {} bytes, will continue without cache download", size); + LOG_TRACE(log, "No space left in cache to reserve {} bytes, reason: {}, " + "will continue without cache download", failure_reason, size); if (!success) { diff --git a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp index 382c4a80cc4..103ae0e1832 100644 --- a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp @@ -91,7 +91,8 @@ bool FileSegmentRangeWriter::write(char * data, size_t size, size_t offset, File size_t size_to_write = std::min(available_size, size); - bool reserved = file_segment->reserve(size_to_write, reserve_space_lock_wait_timeout_milliseconds); + std::string failure_reason; + bool reserved = file_segment->reserve(size_to_write, reserve_space_lock_wait_timeout_milliseconds, failure_reason); if (!reserved) { appendFilesystemCacheLog(*file_segment); diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 7205b5b3294..8de80971238 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -63,7 +63,7 @@ void throwIfError(const Aws::Utils::Outcome & response) { const auto & err = response.GetError(); throw S3Exception( - fmt::format("{} (Code: {}, s3 exception: {})", + fmt::format("{} (Code: {}, S3 exception: '{}')", err.GetMessage(), static_cast(err.GetErrorType()), err.GetExceptionName()), err.GetErrorType()); } diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 58407a810c5..e7d9be39ec9 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -419,10 +419,11 @@ String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, Fo String result = getAdditionalFormatInfoForAllRowBasedFormats(settings); /// First, settings that are common for all text formats: result += fmt::format( - ", try_infer_integers={}, try_infer_dates={}, try_infer_datetimes={}", + ", try_infer_integers={}, try_infer_dates={}, try_infer_datetimes={}, try_infer_datetimes_only_datetime64={}", settings.try_infer_integers, settings.try_infer_dates, - settings.try_infer_datetimes); + settings.try_infer_datetimes, + settings.try_infer_datetimes_only_datetime64); /// Second, format-specific settings: switch (escaping_rule) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index a78836ff63c..da57c59bdfc 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -266,6 +266,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.try_infer_integers = settings.input_format_try_infer_integers; format_settings.try_infer_dates = settings.input_format_try_infer_dates; format_settings.try_infer_datetimes = settings.input_format_try_infer_datetimes; + format_settings.try_infer_datetimes_only_datetime64 = settings.input_format_try_infer_datetimes_only_datetime64; format_settings.try_infer_exponent_floats = settings.input_format_try_infer_exponent_floats; format_settings.markdown.escape_special_characters = settings.output_format_markdown_escape_special_characters; format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index f0359218775..3970c776ad2 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -46,6 +46,7 @@ struct FormatSettings bool try_infer_integers = true; bool try_infer_dates = true; bool try_infer_datetimes = true; + bool try_infer_datetimes_only_datetime64 = false; bool try_infer_exponent_floats = false; enum class DateTimeInputFormat : uint8_t diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 3c374ada9e6..5bd41e33f58 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -306,37 +306,45 @@ namespace type_indexes.erase(TypeIndex::UInt64); } - /// If we have only Date and DateTime types, convert Date to DateTime, - /// otherwise, convert all Date and DateTime to String. + /// If we have only date/datetimes types (Date/DateTime/DateTime64), convert all of them to the common type, + /// otherwise, convert all Date, DateTime and DateTime64 to String. void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes) { bool have_dates = type_indexes.contains(TypeIndex::Date); - bool have_datetimes = type_indexes.contains(TypeIndex::DateTime64); - bool all_dates_or_datetimes = (type_indexes.size() == (static_cast(have_dates) + static_cast(have_datetimes))); + bool have_datetimes = type_indexes.contains(TypeIndex::DateTime); + bool have_datetimes64 = type_indexes.contains(TypeIndex::DateTime64); + bool all_dates_or_datetimes = (type_indexes.size() == (static_cast(have_dates) + static_cast(have_datetimes) + static_cast(have_datetimes64))); - if (!all_dates_or_datetimes && (have_dates || have_datetimes)) + if (!all_dates_or_datetimes && (have_dates || have_datetimes || have_datetimes64)) { for (auto & type : data_types) { - if (isDate(type) || isDateTime64(type)) + if (isDate(type) || isDateTime(type) || isDateTime64(type)) type = std::make_shared(); } type_indexes.erase(TypeIndex::Date); type_indexes.erase(TypeIndex::DateTime); + type_indexes.erase(TypeIndex::DateTime64); type_indexes.insert(TypeIndex::String); return; } - if (have_dates && have_datetimes) + for (auto & type : data_types) { - for (auto & type : data_types) + if (isDate(type) && (have_datetimes || have_datetimes64)) { - if (isDate(type)) + if (have_datetimes64) type = std::make_shared(9); + else + type = std::make_shared(); + type_indexes.erase(TypeIndex::Date); + } + else if (isDateTime(type) && have_datetimes64) + { + type = std::make_shared(9); + type_indexes.erase(TypeIndex::DateTime); } - - type_indexes.erase(TypeIndex::Date); } } @@ -697,55 +705,87 @@ namespace bool tryInferDate(std::string_view field) { - if (field.empty()) + /// Minimum length of Date text representation is 8 (YYYY-M-D) and maximum is 10 (YYYY-MM-DD) + if (field.size() < 8 || field.size() > 10) return false; - ReadBufferFromString buf(field); - Float64 tmp_float; /// Check if it's just a number, and if so, don't try to infer Date from it, /// because we can interpret this number as a Date (for example 20000101 will be 2000-01-01) /// and it will lead to inferring Date instead of simple Int64/UInt64 in some cases. - if (tryReadFloatText(tmp_float, buf) && buf.eof()) - return false; - - buf.seek(0, SEEK_SET); /// Return position to the beginning - - DayNum tmp; - return tryReadDateText(tmp, buf) && buf.eof(); - } - - bool tryInferDateTime(std::string_view field, const FormatSettings & settings) - { - if (field.empty()) + if (std::all_of(field.begin(), field.end(), isNumericASCII)) return false; ReadBufferFromString buf(field); - Float64 tmp_float; + DayNum tmp; + return tryReadDateText(tmp, buf, DateLUT::instance(), /*allowed_delimiters=*/"-/:") && buf.eof(); + } + + DataTypePtr tryInferDateTimeOrDateTime64(std::string_view field, const FormatSettings & settings) + { + /// Don't try to infer DateTime if string is too long. + /// It's difficult to say what is the real maximum length of + /// DateTime we can parse using BestEffort approach. + /// 50 symbols is more or less valid limit for date times that makes sense. + if (field.empty() || field.size() > 50) + return nullptr; + + /// Check that we have at least one digit, don't infer datetime form strings like "Apr"/"May"/etc. + if (!std::any_of(field.begin(), field.end(), isNumericASCII)) + return nullptr; + /// Check if it's just a number, and if so, don't try to infer DateTime from it, /// because we can interpret this number as a timestamp and it will lead to - /// inferring DateTime instead of simple Int64/Float64 in some cases. + /// inferring DateTime instead of simple Int64 in some cases. + if (std::all_of(field.begin(), field.end(), isNumericASCII)) + return nullptr; + + ReadBufferFromString buf(field); + Float64 tmp_float; + /// Check if it's a float value, and if so, don't try to infer DateTime from it, + /// because it will lead to inferring DateTime instead of simple Float64 in some cases. if (tryReadFloatText(tmp_float, buf) && buf.eof()) - return false; + return nullptr; + + buf.seek(0, SEEK_SET); /// Return position to the beginning + if (!settings.try_infer_datetimes_only_datetime64) + { + time_t tmp; + switch (settings.date_time_input_format) + { + case FormatSettings::DateTimeInputFormat::Basic: + if (tryReadDateTimeText(tmp, buf, DateLUT::instance(), /*allowed_date_delimiters=*/"-/:", /*allowed_time_delimiters=*/":") && buf.eof()) + return std::make_shared(); + break; + case FormatSettings::DateTimeInputFormat::BestEffort: + if (tryParseDateTimeBestEffortStrict(tmp, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof()) + return std::make_shared(); + break; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + if (tryParseDateTimeBestEffortUSStrict(tmp, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof()) + return std::make_shared(); + break; + } + } buf.seek(0, SEEK_SET); /// Return position to the beginning DateTime64 tmp; switch (settings.date_time_input_format) { case FormatSettings::DateTimeInputFormat::Basic: - if (tryReadDateTime64Text(tmp, 9, buf) && buf.eof()) - return true; + if (tryReadDateTime64Text(tmp, 9, buf, DateLUT::instance(), /*allowed_date_delimiters=*/"-/:", /*allowed_time_delimiters=*/":") && buf.eof()) + return std::make_shared(9); break; case FormatSettings::DateTimeInputFormat::BestEffort: - if (tryParseDateTime64BestEffort(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof()) - return true; + if (tryParseDateTime64BestEffortStrict(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof()) + return std::make_shared(9); break; case FormatSettings::DateTimeInputFormat::BestEffortUS: - if (tryParseDateTime64BestEffortUS(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof()) - return true; + if (tryParseDateTime64BestEffortUSStrict(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof()) + return std::make_shared(9); break; } - return false; + return nullptr; } template @@ -1439,8 +1479,11 @@ DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const Forma if (settings.try_infer_dates && tryInferDate(field)) return std::make_shared(); - if (settings.try_infer_datetimes && tryInferDateTime(field, settings)) - return std::make_shared(9); + if (settings.try_infer_datetimes) + { + if (auto type = tryInferDateTimeOrDateTime64(field, settings)) + return type; + } return nullptr; } diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 9559462e62b..48d788512e4 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1271,7 +1271,7 @@ template void readJSONArrayInto, void>(PaddedPODArray, bool>(PaddedPODArray & s, ReadBuffer & buf); template -ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) +ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters) { static constexpr bool throw_exception = std::is_same_v; @@ -1318,6 +1318,9 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) } else { + if (!isSymbolIn(*buf.position(), allowed_delimiters)) + return error(); + ++buf.position(); if (!append_digit(month)) @@ -1325,7 +1328,11 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) append_digit(month); if (!buf.eof() && !isNumericASCII(*buf.position())) + { + if (!isSymbolIn(*buf.position(), allowed_delimiters)) + return error(); ++buf.position(); + } else return error(); @@ -1338,12 +1345,12 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) return ReturnType(true); } -template void readDateTextFallback(LocalDate &, ReadBuffer &); -template bool readDateTextFallback(LocalDate &, ReadBuffer &); +template void readDateTextFallback(LocalDate &, ReadBuffer &, const char * allowed_delimiters); +template bool readDateTextFallback(LocalDate &, ReadBuffer &, const char * allowed_delimiters); template -ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut) +ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters, const char * allowed_time_delimiters) { static constexpr bool throw_exception = std::is_same_v; @@ -1413,6 +1420,9 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3]) || !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9])) return false; + + if (!isSymbolIn(s[4], allowed_date_delimiters) || !isSymbolIn(s[7], allowed_date_delimiters)) + return false; } UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); @@ -1443,6 +1453,9 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[3]) || !isNumericASCII(s[4]) || !isNumericASCII(s[6]) || !isNumericASCII(s[7])) return false; + + if (!isSymbolIn(s[2], allowed_time_delimiters) || !isSymbolIn(s[5], allowed_time_delimiters)) + return false; } hour = (s[0] - '0') * 10 + (s[1] - '0'); @@ -1488,10 +1501,10 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D return ReturnType(true); } -template void readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); -template void readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); -template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); -template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); +template void readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *); +template void readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *); +template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *); +template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *); template diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index ffba4fafb5c..39e1cb12b5c 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -703,13 +703,28 @@ struct NullOutput }; template -ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf); +ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters); + +inline bool isSymbolIn(char symbol, const char * symbols) +{ + if (symbols == nullptr) + return true; + + const char * pos = symbols; + while (*pos) + { + if (*pos == symbol) + return true; + ++pos; + } + return false; +} /// In YYYY-MM-DD format. /// For convenience, Month and Day parts can have single digit instead of two digits. /// Any separators other than '-' are supported. template -inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) +inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters = nullptr) { static constexpr bool throw_exception = std::is_same_v; @@ -753,6 +768,9 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) } else { + if (!isSymbolIn(pos[-1], allowed_delimiters)) + return error(); + if (!isNumericASCII(pos[0])) return error(); @@ -768,6 +786,9 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) if (isNumericASCII(pos[-1]) || !isNumericASCII(pos[0])) return error(); + if (!isSymbolIn(pos[-1], allowed_delimiters)) + return error(); + day = pos[0] - '0'; if (isNumericASCII(pos[1])) { @@ -783,7 +804,7 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) return ReturnType(true); } else - return readDateTextFallback(date, buf); + return readDateTextFallback(date, buf, allowed_delimiters); } inline void convertToDayNum(DayNum & date, ExtendedDayNum & from) @@ -797,15 +818,15 @@ inline void convertToDayNum(DayNum & date, ExtendedDayNum & from) } template -inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut) +inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_delimiters = nullptr) { static constexpr bool throw_exception = std::is_same_v; LocalDate local_date; if constexpr (throw_exception) - readDateTextImpl(local_date, buf); - else if (!readDateTextImpl(local_date, buf)) + readDateTextImpl(local_date, buf, allowed_delimiters); + else if (!readDateTextImpl(local_date, buf, allowed_delimiters)) return false; ExtendedDayNum ret = date_lut.makeDayNum(local_date.year(), local_date.month(), local_date.day()); @@ -814,15 +835,15 @@ inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLU } template -inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut) +inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_delimiters = nullptr) { static constexpr bool throw_exception = std::is_same_v; LocalDate local_date; if constexpr (throw_exception) - readDateTextImpl(local_date, buf); - else if (!readDateTextImpl(local_date, buf)) + readDateTextImpl(local_date, buf, allowed_delimiters); + else if (!readDateTextImpl(local_date, buf, allowed_delimiters)) return false; /// When the parameter is out of rule or out of range, Date32 uses 1925-01-01 as the default value (-DateLUT::instance().getDayNumOffsetEpoch(), -16436) and Date uses 1970-01-01. @@ -846,19 +867,19 @@ inline void readDateText(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTI readDateTextImpl(date, buf, date_lut); } -inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf) +inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters = nullptr) { - return readDateTextImpl(date, buf); + return readDateTextImpl(date, buf, allowed_delimiters); } -inline bool tryReadDateText(DayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline bool tryReadDateText(DayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance(), const char * allowed_delimiters = nullptr) { - return readDateTextImpl(date, buf, time_zone); + return readDateTextImpl(date, buf, time_zone, allowed_delimiters); } -inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance(), const char * allowed_delimiters = nullptr) { - return readDateTextImpl(date, buf, time_zone); + return readDateTextImpl(date, buf, time_zone, allowed_delimiters); } UUID parseUUID(std::span src); @@ -975,13 +996,13 @@ inline T parseFromString(std::string_view str) template -ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut); +ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr); /** In YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format, according to specified time zone. * As an exception, also supported parsing of unix timestamp in form of decimal number. */ template -inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut) +inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr) { static constexpr bool throw_exception = std::is_same_v; @@ -1014,6 +1035,9 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3]) || !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9])) return ReturnType(false); + + if (!isSymbolIn(s[4], allowed_date_delimiters) || !isSymbolIn(s[7], allowed_date_delimiters)) + return ReturnType(false); } UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); @@ -1033,6 +1057,9 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons if (!isNumericASCII(s[11]) || !isNumericASCII(s[12]) || !isNumericASCII(s[14]) || !isNumericASCII(s[15]) || !isNumericASCII(s[17]) || !isNumericASCII(s[18])) return ReturnType(false); + + if (!isSymbolIn(s[13], allowed_time_delimiters) || !isSymbolIn(s[16], allowed_time_delimiters)) + return ReturnType(false); } hour = (s[11] - '0') * 10 + (s[12] - '0'); @@ -1057,11 +1084,11 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons return readIntTextImpl(datetime, buf); } else - return readDateTimeTextFallback(datetime, buf, date_lut); + return readDateTimeTextFallback(datetime, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters); } template -inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut) +inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr) { static constexpr bool throw_exception = std::is_same_v; @@ -1075,7 +1102,7 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re { try { - readDateTimeTextImpl(whole, buf, date_lut); + readDateTimeTextImpl(whole, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters); } catch (const DB::Exception &) { @@ -1085,7 +1112,7 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re } else { - auto ok = readDateTimeTextImpl(whole, buf, date_lut); + auto ok = readDateTimeTextImpl(whole, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters); if (!ok && (buf.eof() || *buf.position() != '.')) return ReturnType(false); } @@ -1168,14 +1195,14 @@ inline void readDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer readDateTimeTextImpl(datetime64, scale, buf, date_lut); } -inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance(), const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr) { - return readDateTimeTextImpl(datetime, buf, time_zone); + return readDateTimeTextImpl(datetime, buf, time_zone, allowed_date_delimiters, allowed_time_delimiters); } -inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) +inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance(), const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr) { - return readDateTimeTextImpl(datetime64, scale, buf, date_lut); + return readDateTimeTextImpl(datetime64, scale, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters); } inline void readDateTimeText(LocalDateTime & datetime, ReadBuffer & buf) diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp index dfb7727fca4..d6f7542da6b 100644 --- a/src/IO/S3/Credentials.cpp +++ b/src/IO/S3/Credentials.cpp @@ -145,12 +145,16 @@ Aws::String AWSEC2MetadataClient::getDefaultCredentialsSecurely() const { String user_agent_string = awsComputeUserAgentString(); auto [new_token, response_code] = getEC2MetadataToken(user_agent_string); - if (response_code == Aws::Http::HttpResponseCode::BAD_REQUEST) + if (response_code == Aws::Http::HttpResponseCode::BAD_REQUEST + || response_code == Aws::Http::HttpResponseCode::REQUEST_NOT_MADE) + { + /// At least the host should be available and reply, otherwise neither IMDSv2 nor IMDSv1 are usable. return {}; + } else if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty()) { LOG_TRACE(logger, "Calling EC2MetadataService to get token failed, " - "falling back to less secure way. HTTP response code: {}", response_code); + "falling back to a less secure way. HTTP response code: {}", response_code); return getDefaultCredentials(); } @@ -247,7 +251,7 @@ static Aws::String getAWSMetadataEndpoint() return ec2_metadata_service_endpoint; } -std::shared_ptr InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration) +std::shared_ptr createEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration) { auto endpoint = getAWSMetadataEndpoint(); return std::make_shared(client_configuration, endpoint.c_str()); @@ -781,11 +785,13 @@ S3CredentialsProviderChain::S3CredentialsProviderChain( /// EC2MetadataService throttles by delaying the response so the service client should set a large read timeout. /// EC2MetadataService delay is in order of seconds so it only make sense to retry after a couple of seconds. - aws_client_configuration.connectTimeoutMs = 1000; + /// But the connection timeout should be small because there is the case when there is no IMDS at all, + /// like outside of the cloud, on your own machines. + aws_client_configuration.connectTimeoutMs = 10; aws_client_configuration.requestTimeoutMs = 1000; aws_client_configuration.retryStrategy = std::make_shared(1, 1000); - auto ec2_metadata_client = InitEC2MetadataClient(aws_client_configuration); + auto ec2_metadata_client = createEC2MetadataClient(aws_client_configuration); auto config_loader = std::make_shared(ec2_metadata_client, !credentials_configuration.use_insecure_imds_request); AddProvider(std::make_shared(config_loader)); diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h index 95297ab0538..042c48ec15a 100644 --- a/src/IO/S3/Credentials.h +++ b/src/IO/S3/Credentials.h @@ -70,7 +70,7 @@ private: LoggerPtr logger; }; -std::shared_ptr InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration); +std::shared_ptr createEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration); class AWSEC2InstanceProfileConfigLoader : public Aws::Config::AWSProfileConfigLoader { diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index aab7a39534d..de43f34d838 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -128,7 +128,7 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion() } else { - /// In global mode AWS C++ SDK send `us-east-1` but accept switching to another one if being suggested. + /// In global mode AWS C++ SDK sends `us-east-1` but accepts switching to another one if being suggested. region = Aws::Region::AWS_GLOBAL; } } diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index fead18315d8..9c80b377661 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -1,8 +1,8 @@ #include -#include -#include -#include "Common/Macros.h" + #if USE_AWS_S3 +#include +#include #include #include #include @@ -10,6 +10,7 @@ #include + namespace DB { @@ -40,21 +41,13 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) /// Case when AWS Private Link Interface is being used /// E.g. (bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com/bucket-name/key) /// https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html - static const RE2 aws_private_link_style_pattern(R"(bucket\.vpce\-([a-z0-9\-.]+)\.vpce.amazonaws.com(:\d{1,5})?)"); + static const RE2 aws_private_link_style_pattern(R"(bucket\.vpce\-([a-z0-9\-.]+)\.vpce\.amazonaws\.com(:\d{1,5})?)"); - /// Case when bucket name and key represented in path of S3 URL. + /// Case when bucket name and key represented in the path of S3 URL. /// E.g. (https://s3.region.amazonaws.com/bucket-name/key) /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access static const RE2 path_style_pattern("^/([^/]*)/(.*)"); - static constexpr auto S3 = "S3"; - static constexpr auto S3EXPRESS = "S3EXPRESS"; - static constexpr auto COSN = "COSN"; - static constexpr auto COS = "COS"; - static constexpr auto OBS = "OBS"; - static constexpr auto OSS = "OSS"; - static constexpr auto EOS = "EOS"; - if (allow_archive_path_syntax) std::tie(uri_str, archive_pattern) = getURIAndArchivePattern(uri_); else @@ -85,7 +78,7 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) URIConverter::modifyURI(uri, mapper); } - storage_name = S3; + storage_name = "S3"; if (uri.getHost().empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Host is empty in S3 URI."); @@ -93,11 +86,13 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) /// Extract object version ID from query string. bool has_version_id = false; for (const auto & [query_key, query_value] : uri.getQueryParameters()) + { if (query_key == "versionId") { version_id = query_value; has_version_id = true; } + } /// Poco::URI will ignore '?' when parsing the path, but if there is a versionId in the http parameter, /// '?' can not be used as a wildcard, otherwise it will be ambiguous. @@ -129,15 +124,8 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) } boost::to_upper(name); - /// For S3Express it will look like s3express-eun1-az1, i.e. contain region and AZ info - if (name != S3 && !name.starts_with(S3EXPRESS) && name != COS && name != OBS && name != OSS && name != EOS) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", - quoteString(name)); - - if (name == COS) - storage_name = COSN; + if (name == "COS") + storage_name = "COSN"; else storage_name = name; } @@ -148,13 +136,22 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) validateBucket(bucket, uri); } else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket or key name are invalid in S3 URI."); + { + /// Custom endpoint, e.g. a public domain of Cloudflare R2, + /// which could be served by a custom server-side code. + storage_name = "S3"; + bucket = "default"; + is_virtual_hosted_style = false; + endpoint = uri.getScheme() + "://" + uri.getAuthority(); + if (!uri.getPath().empty()) + key = uri.getPath().substr(1); + } } void URI::addRegionToURI(const std::string ®ion) { - if (auto pos = endpoint.find("amazonaws.com"); pos != std::string::npos) - endpoint = endpoint.substr(0, pos) + region + "." + endpoint.substr(pos); + if (auto pos = endpoint.find(".amazonaws.com"); pos != std::string::npos) + endpoint = endpoint.substr(0, pos) + "." + region + endpoint.substr(pos); } void URI::validateBucket(const String & bucket, const Poco::URI & uri) diff --git a/src/IO/S3/URI.h b/src/IO/S3/URI.h index 80e2da96cd4..c8d0b28cd15 100644 --- a/src/IO/S3/URI.h +++ b/src/IO/S3/URI.h @@ -1,14 +1,14 @@ #pragma once -#include -#include - #include "config.h" #if USE_AWS_S3 +#include +#include #include + namespace DB::S3 { @@ -23,7 +23,7 @@ namespace DB::S3 struct URI { Poco::URI uri; - // Custom endpoint if URI scheme is not S3. + // Custom endpoint if URI scheme, if not S3. std::string endpoint; std::string bucket; std::string key; diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index e046e837689..f220577f2cb 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -82,13 +82,14 @@ struct DateTimeSubsecondPart UInt8 digits; }; -template +template ReturnType parseDateTimeBestEffortImpl( time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, - DateTimeSubsecondPart * fractional) + DateTimeSubsecondPart * fractional, + const char * allowed_date_delimiters = nullptr) { auto on_error = [&](int error_code [[maybe_unused]], FormatStringHelper fmt_string [[maybe_unused]], @@ -170,22 +171,36 @@ ReturnType parseDateTimeBestEffortImpl( fractional->digits = 3; readDecimalNumber<3>(fractional->value, digits + 10); } + else if constexpr (strict) + { + /// Fractional part is not allowed. + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected fractional part"); + } return ReturnType(true); } else if (num_digits == 10 && !year && !has_time) { + if (strict && month) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month component is duplicated"); + /// This is unix timestamp. readDecimalNumber<10>(res, digits); return ReturnType(true); } else if (num_digits == 9 && !year && !has_time) { + if (strict && month) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month component is duplicated"); + /// This is unix timestamp. readDecimalNumber<9>(res, digits); return ReturnType(true); } else if (num_digits == 14 && !year && !has_time) { + if (strict && month) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month component is duplicated"); + /// This is YYYYMMDDhhmmss readDecimalNumber<4>(year, digits); readDecimalNumber<2>(month, digits + 4); @@ -197,6 +212,9 @@ ReturnType parseDateTimeBestEffortImpl( } else if (num_digits == 8 && !year) { + if (strict && month) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month component is duplicated"); + /// This is YYYYMMDD readDecimalNumber<4>(year, digits); readDecimalNumber<2>(month, digits + 4); @@ -272,6 +290,9 @@ ReturnType parseDateTimeBestEffortImpl( else return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits after year and month: {}", num_digits); } + + if (!isSymbolIn(delimiter_after_year, allowed_date_delimiters)) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: '{}' delimiter between date parts is not allowed", delimiter_after_year); } } else if (num_digits == 2 || num_digits == 1) @@ -403,9 +424,16 @@ ReturnType parseDateTimeBestEffortImpl( else { if (day_of_month) + { + if (strict && hour) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: hour component is duplicated"); + hour = hour_or_day_of_month_or_month; + } else + { day_of_month = hour_or_day_of_month_or_month; + } } } else if (num_digits != 0) @@ -446,6 +474,11 @@ ReturnType parseDateTimeBestEffortImpl( fractional->digits = num_digits; readDecimalNumber(fractional->value, num_digits, digits); } + else if (strict) + { + /// Fractional part is not allowed. + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected fractional part"); + } } else if (c == '+' || c == '-') { @@ -582,12 +615,24 @@ ReturnType parseDateTimeBestEffortImpl( return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: neither Date nor Time was parsed successfully"); if (!day_of_month) + { + if constexpr (strict) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: day of month is required"); day_of_month = 1; + } + if (!month) + { + if constexpr (strict) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month is required"); month = 1; + } if (!year) { + if constexpr (strict) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: year is required"); + /// If year is not specified, it will be the current year if the date is unknown or not greater than today, /// otherwise it will be the previous year. /// This convoluted logic is needed to parse the syslog format, which looks as follows: "Mar 3 01:33:48". @@ -641,6 +686,20 @@ ReturnType parseDateTimeBestEffortImpl( } }; + if constexpr (strict) + { + if constexpr (is_64) + { + if (year < 1900) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime64: year {} is less than minimum supported year 1900", year); + } + else + { + if (year < 1970) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: year {} is less than minimum supported year 1970", year); + } + } + if (has_time_zone_offset) { res = utc_time_zone.makeDateTime(year, month, day_of_month, hour, minute, second); @@ -654,20 +713,20 @@ ReturnType parseDateTimeBestEffortImpl( return ReturnType(true); } -template -ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) +template +ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters = nullptr) { time_t whole; DateTimeSubsecondPart subsecond = {0, 0}; // needs to be explicitly initialized sine it could be missing from input string if constexpr (std::is_same_v) { - if (!parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond)) + if (!parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond, allowed_date_delimiters)) return false; } else { - parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond); + parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond, allowed_date_delimiters); } @@ -730,4 +789,24 @@ bool tryParseDateTime64BestEffortUS(DateTime64 & res, UInt32 scale, ReadBuffer & return parseDateTime64BestEffortImpl(res, scale, in, local_time_zone, utc_time_zone); } +bool tryParseDateTimeBestEffortStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters) +{ + return parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr, allowed_date_delimiters); +} + +bool tryParseDateTimeBestEffortUSStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters) +{ + return parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr, allowed_date_delimiters); +} + +bool tryParseDateTime64BestEffortStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters) +{ + return parseDateTime64BestEffortImpl(res, scale, in, local_time_zone, utc_time_zone, allowed_date_delimiters); +} + +bool tryParseDateTime64BestEffortUSStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters) +{ + return parseDateTime64BestEffortImpl(res, scale, in, local_time_zone, utc_time_zone, allowed_date_delimiters); +} + } diff --git a/src/IO/parseDateTimeBestEffort.h b/src/IO/parseDateTimeBestEffort.h index 22af44f9e76..6dd052b67a3 100644 --- a/src/IO/parseDateTimeBestEffort.h +++ b/src/IO/parseDateTimeBestEffort.h @@ -63,4 +63,12 @@ void parseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, bool tryParseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); void parseDateTime64BestEffortUS(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); bool tryParseDateTime64BestEffortUS(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); + +/// More strict version of best effort parsing. Requires day, month and year to be present, checks for allowed +/// delimiters between date components, makes additional correctness checks. Used in schema inference if date times. +bool tryParseDateTimeBestEffortStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters); +bool tryParseDateTimeBestEffortUSStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters); +bool tryParseDateTime64BestEffortStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters); +bool tryParseDateTime64BestEffortUSStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters); + } diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index 0ec28f80072..c0bf7fcb28a 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -206,11 +206,6 @@ TEST(S3UriTest, validPatterns) } } -TEST_P(S3UriTest, invalidPatterns) -{ - ASSERT_ANY_THROW(S3::URI new_uri(GetParam())); -} - TEST(S3UriTest, versionIdChecks) { for (const auto& test_case : TestCases) @@ -223,19 +218,5 @@ TEST(S3UriTest, versionIdChecks) } } -INSTANTIATE_TEST_SUITE_P( - S3, - S3UriTest, - testing::Values( - "https:///", - "https://.s3.amazonaws.com/key", - "https://s3.amazonaws.com/key", - "https://jokserfn.s3amazonaws.com/key", - "https://s3.amazonaws.com//", - "https://amazonaws.com/", - "https://amazonaws.com//", - "https://amazonaws.com//key")); - } - #endif diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 13c70b38543..b1a1f629b00 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -804,7 +804,8 @@ bool FileCache::tryReserve( const size_t size, FileCacheReserveStat & reserve_stat, const UserInfo & user, - size_t lock_wait_timeout_milliseconds) + size_t lock_wait_timeout_milliseconds, + std::string & failure_reason) { ProfileEventTimeIncrement watch(ProfileEvents::FilesystemCacheReserveMicroseconds); @@ -817,6 +818,7 @@ bool FileCache::tryReserve( if (cache_is_being_resized.load(std::memory_order_relaxed)) { ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfCacheResize); + failure_reason = "cache is being resized"; return false; } @@ -824,6 +826,7 @@ bool FileCache::tryReserve( if (!cache_lock) { ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfLockContention); + failure_reason = "cache contention"; return false; } @@ -847,6 +850,7 @@ bool FileCache::tryReserve( LOG_TEST(log, "Query limit exceeded, space reservation failed, " "recache_on_query_limit_exceeded is disabled (while reserving for {}:{})", file_segment.key(), file_segment.offset()); + failure_reason = "query limit exceeded"; return false; } @@ -877,6 +881,7 @@ bool FileCache::tryReserve( if (!query_priority->collectCandidatesForEviction( size, required_elements_num, reserve_stat, eviction_candidates, {}, user.user_id, cache_lock)) { + failure_reason = "cannot evict enough space for query limit"; return false; } @@ -891,11 +896,15 @@ bool FileCache::tryReserve( if (!main_priority->collectCandidatesForEviction( size, required_elements_num, reserve_stat, eviction_candidates, queue_iterator, user.user_id, cache_lock)) { + failure_reason = "cannot evict enough space"; return false; } if (!file_segment.getKeyMetadata()->createBaseDirectory()) + { + failure_reason = "not enough space on device"; return false; + } if (eviction_candidates.size() > 0) { diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 07be802a940..efa504689eb 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -165,7 +165,8 @@ public: size_t size, FileCacheReserveStat & stat, const UserInfo & user, - size_t lock_wait_timeout_milliseconds); + size_t lock_wait_timeout_milliseconds, + std::string & failure_reason); std::vector getFileSegmentInfos(const UserID & user_id); diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index c46fb978ae4..cfbdfbaa257 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -502,7 +502,11 @@ LockedKeyPtr FileSegment::lockKeyMetadata(bool assert_exists) const return metadata->tryLock(); } -bool FileSegment::reserve(size_t size_to_reserve, size_t lock_wait_timeout_milliseconds, FileCacheReserveStat * reserve_stat) +bool FileSegment::reserve( + size_t size_to_reserve, + size_t lock_wait_timeout_milliseconds, + std::string & failure_reason, + FileCacheReserveStat * reserve_stat) { if (!size_to_reserve) throw Exception(ErrorCodes::LOGICAL_ERROR, "Zero space reservation is not allowed"); @@ -554,7 +558,7 @@ bool FileSegment::reserve(size_t size_to_reserve, size_t lock_wait_timeout_milli if (!reserve_stat) reserve_stat = &dummy_stat; - bool reserved = cache->tryReserve(*this, size_to_reserve, *reserve_stat, getKeyMetadata()->user, lock_wait_timeout_milliseconds); + bool reserved = cache->tryReserve(*this, size_to_reserve, *reserve_stat, getKeyMetadata()->user, lock_wait_timeout_milliseconds, failure_reason); if (!reserved) setDownloadFailedUnlocked(lock()); diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index 25ffb880b45..e90ebdbf8fe 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -201,7 +201,11 @@ public: /// Try to reserve exactly `size` bytes (in addition to the getDownloadedSize() bytes already downloaded). /// Returns true if reservation was successful, false otherwise. - bool reserve(size_t size_to_reserve, size_t lock_wait_timeout_milliseconds, FileCacheReserveStat * reserve_stat = nullptr); + bool reserve( + size_t size_to_reserve, + size_t lock_wait_timeout_milliseconds, + std::string & failure_reason, + FileCacheReserveStat * reserve_stat = nullptr); /// Write data into reserved space. void write(char * from, size_t size, size_t offset_in_file); diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 7e4b76d3cc6..6399691bcf6 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -705,7 +705,8 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment, std::optionalavailable(); - if (!file_segment.reserve(size, reserve_space_lock_wait_timeout_milliseconds)) + std::string failure_reason; + if (!file_segment.reserve(size, reserve_space_lock_wait_timeout_milliseconds, failure_reason)) { LOG_TEST( log, "Failed to reserve space during background download " diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index e6ebf6ad50c..e43bbacdbc5 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -75,7 +75,8 @@ void WriteBufferToFileSegment::nextImpl() FileCacheReserveStat reserve_stat; /// In case of an error, we don't need to finalize the file segment /// because it will be deleted soon and completed in the holder's destructor. - bool ok = file_segment->reserve(bytes_to_write, reserve_space_lock_wait_timeout_milliseconds, &reserve_stat); + std::string failure_reason; + bool ok = file_segment->reserve(bytes_to_write, reserve_space_lock_wait_timeout_milliseconds, failure_reason, &reserve_stat); if (!ok) { @@ -84,9 +85,10 @@ void WriteBufferToFileSegment::nextImpl() reserve_stat_msg += fmt::format("{} hold {}, can release {}; ", toString(kind), ReadableSize(stat.non_releasable_size), ReadableSize(stat.releasable_size)); - throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve {} bytes for {}: {}(segment info: {})", + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Failed to reserve {} bytes for {}: reason {}, {}(segment info: {})", bytes_to_write, file_segment->getKind() == FileSegmentKind::Temporary ? "temporary file" : "the file in cache", + failure_reason, reserve_stat_msg, file_segment->getInfoForLog() ); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 95143031707..f30c0585663 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1944,6 +1944,8 @@ BlockIO InterpreterCreateQuery::execute() FunctionNameNormalizer::visit(query_ptr.get()); auto & create = query_ptr->as(); + create.if_not_exists |= getContext()->getSettingsRef().create_if_not_exists; + bool is_create_database = create.database && !create.table; if (!create.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext())) { diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 248ba947bc1..accb73e12df 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -244,9 +244,6 @@ public: /// Same as checkTimeLimit but it never throws [[nodiscard]] bool checkTimeLimitSoft(); - /// Use it in case of the query left in background to execute asynchronously - void updateContext(ContextWeakPtr weak_context) { context = std::move(weak_context); } - /// Get the reference for the start of the query. Used to synchronize with other Stopwatches UInt64 getQueryCPUStartTime() { return watch.getStart(); } }; diff --git a/src/Interpreters/tests/gtest_filecache.cpp b/src/Interpreters/tests/gtest_filecache.cpp index 36acc319f4e..fd602ab5918 100644 --- a/src/Interpreters/tests/gtest_filecache.cpp +++ b/src/Interpreters/tests/gtest_filecache.cpp @@ -246,7 +246,8 @@ void download(FileSegment & file_segment) ASSERT_EQ(file_segment.state(), State::DOWNLOADING); ASSERT_EQ(file_segment.getDownloadedSize(), 0); - ASSERT_TRUE(file_segment.reserve(file_segment.range().size(), 1000)); + std::string failure_reason; + ASSERT_TRUE(file_segment.reserve(file_segment.range().size(), 1000, failure_reason)); download(cache_base_path, file_segment); ASSERT_EQ(file_segment.state(), State::DOWNLOADING); @@ -258,7 +259,8 @@ void assertDownloadFails(FileSegment & file_segment) { ASSERT_EQ(file_segment.getOrSetDownloader(), FileSegment::getCallerId()); ASSERT_EQ(file_segment.getDownloadedSize(), 0); - ASSERT_FALSE(file_segment.reserve(file_segment.range().size(), 1000)); + std::string failure_reason; + ASSERT_FALSE(file_segment.reserve(file_segment.range().size(), 1000, failure_reason)); file_segment.complete(); } @@ -957,10 +959,11 @@ TEST_F(FileCacheTest, temporaryData) { ASSERT_EQ(some_data_holder->size(), 5); + std::string failure_reason; for (auto & segment : *some_data_holder) { ASSERT_TRUE(segment->getOrSetDownloader() == DB::FileSegment::getCallerId()); - ASSERT_TRUE(segment->reserve(segment->range().size(), 1000)); + ASSERT_TRUE(segment->reserve(segment->range().size(), 1000, failure_reason)); download(*segment); segment->complete(); } diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp index 6d5b60d8159..10470325bb8 100644 --- a/src/Processors/Executors/ExecutingGraph.cpp +++ b/src/Processors/Executors/ExecutingGraph.cpp @@ -96,7 +96,7 @@ bool ExecutingGraph::addEdges(uint64_t node) return was_edge_added; } -bool ExecutingGraph::expandPipeline(std::stack & stack, uint64_t pid) +ExecutingGraph::UpdateNodeStatus ExecutingGraph::expandPipeline(std::stack & stack, uint64_t pid) { auto & cur_node = *nodes[pid]; Processors new_processors; @@ -108,7 +108,7 @@ bool ExecutingGraph::expandPipeline(std::stack & stack, uint64_t pid) catch (...) { cur_node.exception = std::current_exception(); - return false; + return UpdateNodeStatus::Exception; } { @@ -118,7 +118,7 @@ bool ExecutingGraph::expandPipeline(std::stack & stack, uint64_t pid) { for (auto & processor : new_processors) processor->cancel(); - return false; + return UpdateNodeStatus::Cancelled; } processors->insert(processors->end(), new_processors.begin(), new_processors.end()); @@ -178,7 +178,7 @@ bool ExecutingGraph::expandPipeline(std::stack & stack, uint64_t pid) } } - return true; + return UpdateNodeStatus::Done; } void ExecutingGraph::initializeExecution(Queue & queue) @@ -213,7 +213,7 @@ void ExecutingGraph::initializeExecution(Queue & queue) } -bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue) +ExecutingGraph::UpdateNodeStatus ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue) { std::stack updated_edges; std::stack updated_processors; @@ -309,7 +309,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue catch (...) { node.exception = std::current_exception(); - return false; + return UpdateNodeStatus::Exception; } #ifndef NDEBUG @@ -386,8 +386,9 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue read_lock.unlock(); { std::unique_lock lock(nodes_mutex); - if (!expandPipeline(updated_processors, pid)) - return false; + auto status = expandPipeline(updated_processors, pid); + if (status != UpdateNodeStatus::Done) + return status; } read_lock.lock(); @@ -397,7 +398,7 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue } } - return true; + return UpdateNodeStatus::Done; } void ExecutingGraph::cancel(bool cancel_all_processors) diff --git a/src/Processors/Executors/ExecutingGraph.h b/src/Processors/Executors/ExecutingGraph.h index 71dcd360a2c..e1a6ac96203 100644 --- a/src/Processors/Executors/ExecutingGraph.h +++ b/src/Processors/Executors/ExecutingGraph.h @@ -138,10 +138,17 @@ public: /// Traverse graph the first time to update all the childless nodes. void initializeExecution(Queue & queue); + enum class UpdateNodeStatus + { + Done, + Exception, + Cancelled, + }; + /// Update processor with pid number (call IProcessor::prepare). /// Check parents and children of current processor and push them to stacks if they also need to be updated. /// If processor wants to be expanded, lock will be upgraded to get write access to pipeline. - bool updateNode(uint64_t pid, Queue & queue, Queue & async_queue); + UpdateNodeStatus updateNode(uint64_t pid, Queue & queue, Queue & async_queue); void cancel(bool cancel_all_processors = true); @@ -155,7 +162,7 @@ private: /// Update graph after processor (pid) returned ExpandPipeline status. /// All new nodes and nodes with updated ports are pushed into stack. - bool expandPipeline(std::stack & stack, uint64_t pid); + UpdateNodeStatus expandPipeline(std::stack & stack, uint64_t pid); std::shared_ptr processors; std::vector source_processors; diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 82cad471a29..23b3a6d9f5f 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -77,9 +77,9 @@ const Processors & PipelineExecutor::getProcessors() const return graph->getProcessors(); } -void PipelineExecutor::cancel() +void PipelineExecutor::cancel(ExecutionStatus reason) { - cancelled = true; + tryUpdateExecutionStatus(ExecutionStatus::Executing, reason); finish(); graph->cancel(); } @@ -98,6 +98,11 @@ void PipelineExecutor::finish() tasks.finish(); } +bool PipelineExecutor::tryUpdateExecutionStatus(ExecutionStatus expected, ExecutionStatus desired) +{ + return execution_status.compare_exchange_strong(expected, desired); +} + void PipelineExecutor::execute(size_t num_threads, bool concurrency_control) { checkTimeLimit(); @@ -120,7 +125,7 @@ void PipelineExecutor::execute(size_t num_threads, bool concurrency_control) } catch (...) { - span.addAttribute(ExecutionStatus::fromCurrentException()); + span.addAttribute(DB::ExecutionStatus::fromCurrentException()); #ifndef NDEBUG LOG_TRACE(log, "Exception while executing query. Current state:\n{}", dumpPipeline()); @@ -169,7 +174,7 @@ bool PipelineExecutor::checkTimeLimitSoft() // We call cancel here so that all processors are notified and tasks waken up // so that the "break" is faster and doesn't wait for long events if (!continuing) - cancel(); + cancel(ExecutionStatus::CancelledByTimeout); return continuing; } @@ -195,7 +200,8 @@ void PipelineExecutor::finalizeExecution() { checkTimeLimit(); - if (cancelled) + auto status = execution_status.load(); + if (status == ExecutionStatus::CancelledByTimeout || status == ExecutionStatus::CancelledByUser) return; bool all_processors_finished = true; @@ -271,7 +277,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie break; if (!context.executeTask()) - cancel(); + cancel(ExecutionStatus::Exception); if (tasks.isFinished()) break; @@ -289,11 +295,13 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie Queue async_queue; /// Prepare processor after execution. - if (!graph->updateNode(context.getProcessorID(), queue, async_queue)) - cancel(); + auto status = graph->updateNode(context.getProcessorID(), queue, async_queue); + if (status == ExecutingGraph::UpdateNodeStatus::Exception) + cancel(ExecutionStatus::Exception); /// Push other tasks to global queue. - tasks.pushTasks(queue, async_queue, context); + if (status == ExecutingGraph::UpdateNodeStatus::Done) + tasks.pushTasks(queue, async_queue, context); } #ifndef NDEBUG @@ -309,7 +317,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie { /// spawnThreads can throw an exception, for example CANNOT_SCHEDULE_TASK. /// We should cancel execution properly before rethrow. - cancel(); + cancel(ExecutionStatus::Exception); throw; } @@ -328,6 +336,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie void PipelineExecutor::initializeExecution(size_t num_threads, bool concurrency_control) { is_execution_initialized = true; + tryUpdateExecutionStatus(ExecutionStatus::NotStarted, ExecutionStatus::Executing); size_t use_threads = num_threads; @@ -393,7 +402,7 @@ void PipelineExecutor::executeImpl(size_t num_threads, bool concurrency_control) { /// If finished_flag is not set, there was an exception. /// Cancel execution in this case. - cancel(); + cancel(ExecutionStatus::Exception); if (pool) pool->wait(); } diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index ae119355cb5..79d0a29d4e1 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -48,8 +48,20 @@ public: const Processors & getProcessors() const; + enum class ExecutionStatus + { + NotStarted, + Executing, + Finished, + Exception, + CancelledByUser, + CancelledByTimeout, + }; + /// Cancel execution. May be called from another thread. - void cancel(); + void cancel() { cancel(ExecutionStatus::CancelledByUser); } + + ExecutionStatus getExecutionStatus() const { return execution_status.load(); } /// Cancel processors which only read data from source. May be called from another thread. void cancelReading(); @@ -81,7 +93,7 @@ private: /// system.opentelemetry_span_log bool trace_processors = false; - std::atomic_bool cancelled = false; + std::atomic execution_status = ExecutionStatus::NotStarted; std::atomic_bool cancelled_reading = false; LoggerPtr log = getLogger("PipelineExecutor"); @@ -105,6 +117,10 @@ private: void executeStepImpl(size_t thread_num, std::atomic_bool * yield_flag = nullptr); void executeSingleThread(size_t thread_num); void finish(); + void cancel(ExecutionStatus reason); + + /// If execution_status == from, change it to desired. + bool tryUpdateExecutionStatus(ExecutionStatus expected, ExecutionStatus desired); String dumpPipeline() const; }; diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp index 830a96533ed..866d224a08d 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp @@ -15,6 +15,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int QUERY_WAS_CANCELLED; } class PushingAsyncSource : public ISource @@ -176,6 +177,16 @@ void PushingAsyncPipelineExecutor::start() data->thread = ThreadFromGlobalPool(std::move(func)); } +[[noreturn]] static void throwOnExecutionStatus(PipelineExecutor::ExecutionStatus status) +{ + if (status == PipelineExecutor::ExecutionStatus::CancelledByTimeout + || status == PipelineExecutor::ExecutionStatus::CancelledByUser) + throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, "Query was cancelled"); + + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Pipeline for PushingPipelineExecutor was finished before all data was inserted"); +} + void PushingAsyncPipelineExecutor::push(Chunk chunk) { if (!started) @@ -185,8 +196,7 @@ void PushingAsyncPipelineExecutor::push(Chunk chunk) data->rethrowExceptionIfHas(); if (!is_pushed) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Pipeline for PushingAsyncPipelineExecutor was finished before all data was inserted"); + throwOnExecutionStatus(data->executor->getExecutionStatus()); } void PushingAsyncPipelineExecutor::push(Block block) diff --git a/src/Processors/Executors/PushingPipelineExecutor.cpp b/src/Processors/Executors/PushingPipelineExecutor.cpp index 696932932df..7a1c0111a3a 100644 --- a/src/Processors/Executors/PushingPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingPipelineExecutor.cpp @@ -11,6 +11,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int QUERY_WAS_CANCELLED; } class PushingSource : public ISource @@ -80,6 +81,15 @@ const Block & PushingPipelineExecutor::getHeader() const return pushing_source->getPort().getHeader(); } +[[noreturn]] static void throwOnExecutionStatus(PipelineExecutor::ExecutionStatus status) +{ + if (status == PipelineExecutor::ExecutionStatus::CancelledByTimeout + || status == PipelineExecutor::ExecutionStatus::CancelledByUser) + throw Exception(ErrorCodes::QUERY_WAS_CANCELLED, "Query was cancelled"); + + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Pipeline for PushingPipelineExecutor was finished before all data was inserted"); +} void PushingPipelineExecutor::start() { @@ -91,8 +101,7 @@ void PushingPipelineExecutor::start() executor->setReadProgressCallback(pipeline.getReadProgressCallback()); if (!executor->executeStep(&input_wait_flag)) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Pipeline for PushingPipelineExecutor was finished before all data was inserted"); + throwOnExecutionStatus(executor->getExecutionStatus()); } void PushingPipelineExecutor::push(Chunk chunk) @@ -103,8 +112,7 @@ void PushingPipelineExecutor::push(Chunk chunk) pushing_source->setData(std::move(chunk)); if (!executor->executeStep(&input_wait_flag)) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Pipeline for PushingPipelineExecutor was finished before all data was inserted"); + throwOnExecutionStatus(executor->getExecutionStatus()); } void PushingPipelineExecutor::push(Block block) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 523d4ce386a..909d025358f 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -386,10 +386,11 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(RangesInDataParts parts_wit /// We have a special logic for local replica. It has to read less data, because in some cases it should /// merge states of aggregate functions or do some other important stuff other than reading from Disk. auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; - if (pool_settings.min_marks_for_concurrent_read > std::numeric_limits::max()) + const auto min_marks_for_concurrent_read_limit = std::numeric_limits::max() >> 1; + if (pool_settings.min_marks_for_concurrent_read > min_marks_for_concurrent_read_limit) { /// limit min marks to read in case it's big, happened in test since due to settings randomzation - pool_settings.min_marks_for_concurrent_read = std::numeric_limits::max(); + pool_settings.min_marks_for_concurrent_read = min_marks_for_concurrent_read_limit; multiplier = 1.0f; } @@ -562,10 +563,11 @@ Pipe ReadFromMergeTree::readInOrder( }; auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; - if (pool_settings.min_marks_for_concurrent_read > std::numeric_limits::max()) + const auto min_marks_for_concurrent_read_limit = std::numeric_limits::max() >> 1; + if (pool_settings.min_marks_for_concurrent_read > min_marks_for_concurrent_read_limit) { /// limit min marks to read in case it's big, happened in test since due to settings randomzation - pool_settings.min_marks_for_concurrent_read = std::numeric_limits::max(); + pool_settings.min_marks_for_concurrent_read = min_marks_for_concurrent_read_limit; multiplier = 1.0f; } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp index 2f9a4a47b11..2b924284857 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp @@ -105,13 +105,16 @@ size_t MergeTreeIndexGranularity::countMarksForRows(size_t from_mark, size_t num /// See comment in IMergeTreeSelectAlgorithm. if (min_marks_to_read) { - // check that ... - bool overflow = ((1ULL << 63) & min_marks_to_read); // further multiplication by 2 will not overflow - if (!overflow) - overflow = (std::numeric_limits::max() - from_mark) < 2 * min_marks_to_read; // further addition will not overflow + // check overflow + size_t min_marks_to_read_2 = 0; + bool overflow = common::mulOverflow(min_marks_to_read, 2, min_marks_to_read_2); - if (!overflow && from_mark + 2 * min_marks_to_read <= to_mark) - to_mark = from_mark + min_marks_to_read; + size_t to_mark_overwrite = 0; + if (!overflow) + overflow = common::addOverflow(from_mark, min_marks_to_read_2, to_mark_overwrite); + + if (!overflow && to_mark_overwrite < to_mark) + to_mark = to_mark_overwrite; } return getRowsCountInRange(from_mark, std::max(1UL, to_mark)) - offset_in_rows; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 78dbb72c199..f7701a2aab8 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -895,7 +895,7 @@ void StorageMergeTree::loadDeduplicationLog() std::string path = fs::path(relative_data_path) / "deduplication_logs"; /// If either there is already a deduplication log, or we will be able to use it. - if (disk->exists(path) || !disk->isReadOnly()) + if (!disk->isReadOnly() || disk->exists(path)) { deduplication_log = std::make_unique(path, settings->non_replicated_deduplication_window, format_version, disk); deduplication_log->load(); diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 215718463e8..53f4f1e1f26 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -570,6 +570,8 @@ class ClickHouseCluster: self.spark_session = None self.with_azurite = False + self.azurite_container = "azurite-container" + self.blob_service_client = None self._azurite_port = 0 # available when with_hdfs == True @@ -2692,6 +2694,32 @@ class ClickHouseCluster: connection_string ) logging.debug(blob_service_client.get_account_information()) + containers = [ + c + for c in blob_service_client.list_containers( + name_starts_with=self.azurite_container + ) + if c.name == self.azurite_container + ] + if len(containers) > 0: + for c in containers: + blob_service_client.delete_container(c) + + container_client = blob_service_client.get_container_client( + self.azurite_container + ) + if container_client.exists(): + logging.debug( + f"azurite container '{self.azurite_container}' exist, deleting all blobs" + ) + for b in container_client.list_blobs(): + container_client.delete_blob(b.name) + else: + logging.debug( + f"azurite container '{self.azurite_container}' doesn't exist, creating it" + ) + container_client.create_container() + self.blob_service_client = blob_service_client return except Exception as ex: diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index 6f6dc4d287f..8d5345082ff 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -202,6 +202,10 @@ def test_create_table(): f"S3Queue('http://minio1:9001/root/data/', 'CSV', 'gzip') settings mode = 'ordered'", f"S3Queue('http://minio1:9001/root/data/', 'minio', '{password}', 'CSV') settings mode = 'ordered'", f"S3Queue('http://minio1:9001/root/data/', 'minio', '{password}', 'CSV', 'gzip') settings mode = 'ordered'", + ( + f"Iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')", + "DNS_ERROR", + ), ] def make_test_case(i): @@ -266,6 +270,7 @@ def test_create_table(): # due to sensitive data substituion the query will be normalized, so not "settings" but "SETTINGS" "CREATE TABLE table19 (`x` int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'minio', '[HIDDEN]', 'CSV') SETTINGS mode = 'ordered'", "CREATE TABLE table20 (`x` int) ENGINE = S3Queue('http://minio1:9001/root/data/', 'minio', '[HIDDEN]', 'CSV', 'gzip') SETTINGS mode = 'ordered'", + "CREATE TABLE table21 (`x` int) ENGINE = Iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')", ], must_not_contain=[password], ) @@ -387,6 +392,7 @@ def test_table_functions(): f"azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_15.csv', '{azure_account_name}', '{azure_account_key}', 'CSV', 'none', 'auto')", f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')", f"azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '{azure_account_key}')", + f"iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')", ] def make_test_case(i): @@ -478,6 +484,7 @@ def test_table_functions(): f"CREATE TABLE tablefunc48 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', '{azure_storage_account_url}', 'cont', 'test_simple_15.csv', '{azure_account_name}', '[HIDDEN]', 'CSV', 'none', 'auto')", f"CREATE TABLE tablefunc49 (x int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, connection_string = '{azure_conn_string}', container = 'cont', blob_path = 'test_simple_16.csv', format = 'CSV')", f"CREATE TABLE tablefunc50 (`x` int) AS azureBlobStorageCluster('test_shard_localhost', named_collection_2, storage_account_url = '{azure_storage_account_url}', container = 'cont', blob_path = 'test_simple_17.csv', account_name = '{azure_account_name}', account_key = '[HIDDEN]')", + "CREATE TABLE tablefunc51 (`x` int) AS iceberg('http://minio1:9001/root/data/test11.csv.gz', 'minio', '[HIDDEN]')", ], must_not_contain=[password], ) diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index 0d0d7a0afb1..9d4ca5ad49f 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -51,9 +51,9 @@ create_table_sql_nullable_template = """ """ -def skip_test_msan(instance): - if instance.is_built_with_memory_sanitizer(): - pytest.skip("Memory Sanitizer cannot work with third-party shared libraries") +def skip_test_sanitizers(instance): + if instance.is_built_with_sanitizer(): + pytest.skip("Sanitizers cannot work with third-party shared libraries") def get_mysql_conn(): @@ -208,7 +208,7 @@ def started_cluster(): def test_mysql_odbc_select_nullable(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) mysql_setup = node1.odbc_drivers["MySQL"] table_name = "test_insert_nullable_select" @@ -248,7 +248,7 @@ def test_mysql_odbc_select_nullable(started_cluster): def test_mysql_simple_select_works(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) mysql_setup = node1.odbc_drivers["MySQL"] @@ -331,7 +331,7 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, column_x Nulla def test_mysql_insert(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) mysql_setup = node1.odbc_drivers["MySQL"] table_name = "test_insert" @@ -374,7 +374,7 @@ def test_mysql_insert(started_cluster): def test_sqlite_simple_select_function_works(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) sqlite_setup = node1.odbc_drivers["SQLite3"] sqlite_db = sqlite_setup["Database"] @@ -438,7 +438,7 @@ def test_sqlite_simple_select_function_works(started_cluster): def test_sqlite_table_function(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) sqlite_setup = node1.odbc_drivers["SQLite3"] sqlite_db = sqlite_setup["Database"] @@ -470,7 +470,7 @@ def test_sqlite_table_function(started_cluster): def test_sqlite_simple_select_storage_works(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) sqlite_setup = node1.odbc_drivers["SQLite3"] sqlite_db = sqlite_setup["Database"] @@ -503,7 +503,7 @@ def test_sqlite_simple_select_storage_works(started_cluster): def test_sqlite_odbc_hashed_dictionary(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) sqlite_db = node1.odbc_drivers["SQLite3"]["Database"] node1.exec_in_container( @@ -586,7 +586,7 @@ def test_sqlite_odbc_hashed_dictionary(started_cluster): def test_sqlite_odbc_cached_dictionary(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) sqlite_db = node1.odbc_drivers["SQLite3"]["Database"] node1.exec_in_container( @@ -635,7 +635,7 @@ def test_sqlite_odbc_cached_dictionary(started_cluster): def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) try: conn = get_postgres_conn(started_cluster) @@ -663,7 +663,7 @@ def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster): def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) try: conn = get_postgres_conn(started_cluster) @@ -685,7 +685,7 @@ def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster): def test_no_connection_pooling(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) try: conn = get_postgres_conn(started_cluster) @@ -717,7 +717,7 @@ def test_no_connection_pooling(started_cluster): def test_postgres_insert(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) @@ -754,7 +754,7 @@ def test_postgres_insert(started_cluster): def test_odbc_postgres_date_data_type(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) try: conn = get_postgres_conn(started_cluster) @@ -783,7 +783,7 @@ def test_odbc_postgres_date_data_type(started_cluster): def test_odbc_postgres_conversions(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) try: conn = get_postgres_conn(started_cluster) @@ -841,7 +841,7 @@ def test_odbc_postgres_conversions(started_cluster): def test_odbc_cyrillic_with_varchar(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() @@ -868,7 +868,7 @@ def test_odbc_cyrillic_with_varchar(started_cluster): def test_many_connections(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() @@ -894,7 +894,7 @@ def test_many_connections(started_cluster): def test_concurrent_queries(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() @@ -948,7 +948,7 @@ def test_concurrent_queries(started_cluster): def test_odbc_long_column_names(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() @@ -986,7 +986,7 @@ def test_odbc_long_column_names(started_cluster): def test_odbc_long_text(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() diff --git a/tests/integration/test_s3_imds/test_simple.py b/tests/integration/test_s3_imds/test_simple.py index 0dacac2b0b9..4884c824f99 100644 --- a/tests/integration/test_s3_imds/test_simple.py +++ b/tests/integration/test_s3_imds/test_simple.py @@ -56,7 +56,7 @@ def test_credentials_from_metadata(): ) expected_logs = [ - "Calling EC2MetadataService to get token failed, falling back to less secure way", + "Calling EC2MetadataService to get token failed, falling back to a less secure way", "Getting default credentials for ec2 instance from resolver:8080", "Calling EC2MetadataService resource, /latest/meta-data/iam/security-credentials returned credential string myrole", "Calling EC2MetadataService resource /latest/meta-data/iam/security-credentials/myrole", diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index 8f197e09e61..9e3ee19179a 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -1,6 +1,7 @@ import io import logging import random +import string import time import pytest @@ -13,7 +14,6 @@ from uuid import uuid4 AVAILABLE_MODES = ["unordered", "ordered"] DEFAULT_AUTH = ["'minio'", "'minio123'"] NO_AUTH = ["NOSIGN"] -AZURE_CONTAINER_NAME = "cont" def prepare_public_s3_bucket(started_cluster): @@ -68,13 +68,24 @@ def s3_queue_setup_teardown(started_cluster): instance = started_cluster.instances["instance"] instance_2 = started_cluster.instances["instance2"] - instance.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") - instance_2.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") + instance.query("DROP DATABASE IF EXISTS default; CREATE DATABASE default;") + instance_2.query("DROP DATABASE IF EXISTS default; CREATE DATABASE default;") minio = started_cluster.minio_client objects = list(minio.list_objects(started_cluster.minio_bucket, recursive=True)) for obj in objects: minio.remove_object(started_cluster.minio_bucket, obj.object_name) + + container_client = started_cluster.blob_service_client.get_container_client( + started_cluster.azurite_container + ) + + if container_client.exists(): + blob_names = [b.name for b in container_client.list_blobs()] + logging.debug(f"Deleting blobs: {blob_names}") + for b in blob_names: + container_client.delete_blob(b) + yield # run test @@ -129,11 +140,6 @@ def started_cluster(): cluster.start() logging.info("Cluster started") - container_client = cluster.blob_service_client.get_container_client( - AZURE_CONTAINER_NAME - ) - container_client.create_container() - yield cluster finally: cluster.shutdown() @@ -190,7 +196,7 @@ def put_s3_file_content(started_cluster, filename, data, bucket=None): def put_azure_file_content(started_cluster, filename, data, bucket=None): client = started_cluster.blob_service_client.get_blob_client( - AZURE_CONTAINER_NAME, filename + started_cluster.azurite_container, filename ) buf = io.BytesIO(data) client.upload_blob(buf, "BlockBlob", len(data)) @@ -226,7 +232,7 @@ def create_table( url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{files_path}/" engine_def = f"{engine_name}('{url}', {auth_params}, {file_format})" else: - engine_def = f"{engine_name}('{started_cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', '{files_path}/', 'CSV')" + engine_def = f"{engine_name}('{started_cluster.env_variables['AZURITE_CONNECTION_STRING']}', '{started_cluster.azurite_container}', '{files_path}/', 'CSV')" node.query(f"DROP TABLE IF EXISTS {table_name}") create_query = f""" @@ -262,15 +268,21 @@ def create_mv( ) +def generate_random_string(length=6): + return "".join(random.choice(string.ascii_lowercase) for i in range(length)) + + @pytest.mark.parametrize("mode", ["unordered", "ordered"]) @pytest.mark.parametrize("engine_name", ["S3Queue", "AzureQueue"]) def test_delete_after_processing(started_cluster, mode, engine_name): node = started_cluster.instances["instance"] - table_name = f"test.delete_after_processing_{mode}_{engine_name}" + table_name = f"delete_after_processing_{mode}_{engine_name}" dst_table_name = f"{table_name}_dst" files_path = f"{table_name}_data" files_num = 5 row_num = 10 + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" if engine_name == "S3Queue": storage = "s3" else: @@ -285,7 +297,7 @@ def test_delete_after_processing(started_cluster, mode, engine_name): table_name, mode, files_path, - additional_settings={"after_processing": "delete"}, + additional_settings={"after_processing": "delete", "keeper_path": keeper_path}, engine_name=engine_name, ) create_mv(node, table_name, dst_table_name) @@ -313,7 +325,7 @@ def test_delete_after_processing(started_cluster, mode, engine_name): assert len(objects) == 0 else: client = started_cluster.blob_service_client.get_container_client( - AZURE_CONTAINER_NAME + started_cluster.azurite_container ) objects_iterator = client.list_blobs(files_path) for objects in objects_iterator: @@ -324,11 +336,12 @@ def test_delete_after_processing(started_cluster, mode, engine_name): @pytest.mark.parametrize("engine_name", ["S3Queue", "AzureQueue"]) def test_failed_retry(started_cluster, mode, engine_name): node = started_cluster.instances["instance"] - table_name = f"test.failed_retry_{mode}_{engine_name}" + table_name = f"failed_retry_{mode}_{engine_name}" dst_table_name = f"{table_name}_dst" files_path = f"{table_name}_data" file_path = f"{files_path}/trash_test.csv" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" retries_num = 3 values = [ @@ -385,8 +398,9 @@ def test_failed_retry(started_cluster, mode, engine_name): @pytest.mark.parametrize("mode", AVAILABLE_MODES) def test_direct_select_file(started_cluster, mode): node = started_cluster.instances["instance"] - table_name = f"test.direct_select_file_{mode}" - keeper_path = f"/clickhouse/test_{table_name}" + table_name = f"direct_select_file_{mode}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{mode}_{generate_random_string()}" files_path = f"{table_name}_data" file_path = f"{files_path}/test.csv" @@ -447,7 +461,7 @@ def test_direct_select_file(started_cluster, mode): ] == [] # New table with different zookeeper path - keeper_path = f"/clickhouse/test_{table_name}_{mode}_2" + keeper_path = f"{keeper_path}_2" create_table( started_cluster, node, @@ -491,8 +505,17 @@ def test_direct_select_multiple_files(started_cluster, mode): node = started_cluster.instances["instance"] table_name = f"direct_select_multiple_files_{mode}" files_path = f"{table_name}_data" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" - create_table(started_cluster, node, table_name, mode, files_path) + create_table( + started_cluster, + node, + table_name, + mode, + files_path, + additional_settings={"keeper_path": keeper_path}, + ) for i in range(5): rand_values = [[random.randint(0, 50) for _ in range(3)] for _ in range(10)] values_csv = ( @@ -515,14 +538,23 @@ def test_direct_select_multiple_files(started_cluster, mode): @pytest.mark.parametrize("mode", AVAILABLE_MODES) -def test_streaming_to_view_(started_cluster, mode): +def test_streaming_to_view(started_cluster, mode): node = started_cluster.instances["instance"] table_name = f"streaming_to_view_{mode}" dst_table_name = f"{table_name}_dst" files_path = f"{table_name}_data" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" total_values = generate_random_files(started_cluster, files_path, 10) - create_table(started_cluster, node, table_name, mode, files_path) + create_table( + started_cluster, + node, + table_name, + mode, + files_path, + additional_settings={"keeper_path": keeper_path}, + ) create_mv(node, table_name, dst_table_name) expected_values = set([tuple(i) for i in total_values]) @@ -544,7 +576,8 @@ def test_streaming_to_many_views(started_cluster, mode): node = started_cluster.instances["instance"] table_name = f"streaming_to_many_views_{mode}" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" for i in range(3): @@ -582,7 +615,8 @@ def test_streaming_to_many_views(started_cluster, mode): def test_multiple_tables_meta_mismatch(started_cluster): node = started_cluster.instances["instance"] table_name = f"multiple_tables_meta_mismatch" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" create_table( @@ -675,7 +709,8 @@ def test_multiple_tables_streaming_sync(started_cluster, mode): node = started_cluster.instances["instance"] table_name = f"multiple_tables_streaming_sync_{mode}" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" files_to_generate = 300 @@ -756,7 +791,10 @@ def test_multiple_tables_streaming_sync(started_cluster, mode): def test_multiple_tables_streaming_sync_distributed(started_cluster, mode): node = started_cluster.instances["instance"] node_2 = started_cluster.instances["instance2"] - table_name = f"multiple_tables_streaming_sync_distributed_{mode}" + # A unique table name is necessary for repeatable tests + table_name = ( + f"multiple_tables_streaming_sync_distributed_{mode}_{generate_random_string()}" + ) dst_table_name = f"{table_name}_dst" keeper_path = f"/clickhouse/test_{table_name}" files_path = f"{table_name}_data" @@ -833,7 +871,8 @@ def test_max_set_age(started_cluster): node = started_cluster.instances["instance"] table_name = "max_set_age" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" max_age = 20 files_to_generate = 10 @@ -944,10 +983,9 @@ def test_max_set_age(started_cluster): def test_max_set_size(started_cluster): node = started_cluster.instances["instance"] table_name = f"max_set_size" - dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" - max_age = 10 files_to_generate = 10 create_table( @@ -991,7 +1029,8 @@ def test_drop_table(started_cluster): node = started_cluster.instances["instance"] table_name = f"test_drop" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" files_to_generate = 300 @@ -1021,9 +1060,11 @@ def test_drop_table(started_cluster): def test_s3_client_reused(started_cluster): node = started_cluster.instances["instance"] - table_name = f"test.test_s3_client_reused" + table_name = f"test_s3_client_reused" dst_table_name = f"{table_name}_dst" files_path = f"{table_name}_data" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" row_num = 10 def get_created_s3_clients_count(): @@ -1057,6 +1098,7 @@ def test_s3_client_reused(started_cluster): additional_settings={ "after_processing": "delete", "s3queue_processing_threads_num": 1, + "keeper_path": keeper_path, }, auth=NO_AUTH, bucket=started_cluster.minio_public_bucket, @@ -1114,7 +1156,8 @@ def test_processing_threads(started_cluster, mode): node = started_cluster.instances["instance"] table_name = f"processing_threads_{mode}" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" files_to_generate = 300 processing_threads = 32 @@ -1181,7 +1224,8 @@ def test_shards(started_cluster, mode, processing_threads): node = started_cluster.instances["instance"] table_name = f"test_shards_{mode}_{processing_threads}" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" files_to_generate = 300 shards_num = 3 @@ -1300,7 +1344,7 @@ where zookeeper_path ilike '%{table_name}%' and status = 'Processed' and rows_pr pytest.param("unordered", 1), pytest.param("unordered", 8), pytest.param("ordered", 1), - pytest.param("ordered", 8), + pytest.param("ordered", 2), ], ) def test_shards_distributed(started_cluster, mode, processing_threads): @@ -1308,10 +1352,11 @@ def test_shards_distributed(started_cluster, mode, processing_threads): node_2 = started_cluster.instances["instance2"] table_name = f"test_shards_distributed_{mode}_{processing_threads}" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" files_to_generate = 300 - row_num = 50 + row_num = 300 total_rows = row_num * files_to_generate shards_num = 2 @@ -1461,8 +1506,8 @@ def test_settings_check(started_cluster): node = started_cluster.instances["instance"] node_2 = started_cluster.instances["instance2"] table_name = f"test_settings_check" - dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" mode = "ordered" @@ -1504,7 +1549,10 @@ def test_processed_file_setting(started_cluster, processing_threads): node = started_cluster.instances["instance"] table_name = f"test_processed_file_setting_{processing_threads}" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}_{processing_threads}" + # A unique path is necessary for repeatable tests + keeper_path = ( + f"/clickhouse/test_{table_name}_{processing_threads}_{generate_random_string()}" + ) files_path = f"{table_name}_data" files_to_generate = 10 @@ -1555,7 +1603,10 @@ def test_processed_file_setting_distributed(started_cluster, processing_threads) node_2 = started_cluster.instances["instance2"] table_name = f"test_processed_file_setting_distributed_{processing_threads}" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = ( + f"/clickhouse/test_{table_name}_{processing_threads}_{generate_random_string()}" + ) files_path = f"{table_name}_data" files_to_generate = 10 @@ -1609,7 +1660,8 @@ def test_upgrade(started_cluster): table_name = f"test_upgrade" dst_table_name = f"{table_name}_dst" - keeper_path = f"/clickhouse/test_{table_name}" + # A unique path is necessary for repeatable tests + keeper_path = f"/clickhouse/test_{table_name}_{generate_random_string()}" files_path = f"{table_name}_data" files_to_generate = 10 @@ -1648,7 +1700,8 @@ def test_upgrade(started_cluster): def test_exception_during_insert(started_cluster): node = started_cluster.instances["instance_too_many_parts"] - table_name = f"test_exception_during_insert" + # A unique table name is necessary for repeatable tests + table_name = f"test_exception_during_insert_{generate_random_string()}" dst_table_name = f"{table_name}_dst" keeper_path = f"/clickhouse/test_{table_name}" files_path = f"{table_name}_data" @@ -1664,6 +1717,7 @@ def test_exception_during_insert(started_cluster): "keeper_path": keeper_path, }, ) + node.rotate_logs() total_values = generate_random_files( started_cluster, files_path, files_to_generate, start_ind=0, row_num=1 ) @@ -1680,33 +1734,49 @@ def test_exception_during_insert(started_cluster): ) assert "Too many parts" in exception + original_parts_to_throw_insert = 0 + modified_parts_to_throw_insert = 10 node.replace_in_config( "/etc/clickhouse-server/config.d/merge_tree.xml", - "parts_to_throw_insert>0", - "parts_to_throw_insert>10", + f"parts_to_throw_insert>{original_parts_to_throw_insert}", + f"parts_to_throw_insert>{modified_parts_to_throw_insert}", ) - node.restart_clickhouse() + try: + node.restart_clickhouse() - def get_count(): - return int(node.query(f"SELECT count() FROM {dst_table_name}")) + def get_count(): + return int(node.query(f"SELECT count() FROM {dst_table_name}")) - expected_rows = 10 - for _ in range(20): - if expected_rows == get_count(): - break - time.sleep(1) - assert expected_rows == get_count() + expected_rows = 10 + for _ in range(20): + if expected_rows == get_count(): + break + time.sleep(1) + assert expected_rows == get_count() + finally: + node.replace_in_config( + "/etc/clickhouse-server/config.d/merge_tree.xml", + f"parts_to_throw_insert>{modified_parts_to_throw_insert}", + f"parts_to_throw_insert>{original_parts_to_throw_insert}", + ) + node.restart_clickhouse() def test_commit_on_limit(started_cluster): node = started_cluster.instances["instance"] - table_name = f"test_commit_on_limit" + # A unique table name is necessary for repeatable tests + table_name = f"test_commit_on_limit_{generate_random_string()}" dst_table_name = f"{table_name}_dst" keeper_path = f"/clickhouse/test_{table_name}" files_path = f"{table_name}_data" files_to_generate = 10 + failed_files_event_before = int( + node.query( + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" + ) + ) create_table( started_cluster, node, @@ -1782,6 +1852,9 @@ def test_commit_on_limit(started_cluster): assert "test_999999.csv" in get_processed_files() assert 1 == int( + node.count_in_log(f"Setting file {files_path}/test_9999.csv as failed") + ) + assert failed_files_event_before + 1 == int( node.query( "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" ) diff --git a/tests/performance/parquet_read_with_index.xml b/tests/performance/parquet_read_with_index.xml new file mode 100644 index 00000000000..1bb2d8eb4a2 --- /dev/null +++ b/tests/performance/parquet_read_with_index.xml @@ -0,0 +1,30 @@ + + + INSERT INTO FUNCTION file('test_pq_index', Parquet) SELECT * FROM generateRandom('int64_column Nullable(Int64), tuple_column Tuple(a Nullable(String), b Nullable(Float64), c Tuple(i UInt32, j UInt32)),array_tuple_column Array(Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64))), map_tuple_column Map(String, Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))') limit 1000000 SETTINGS output_format_parquet_use_custom_encoder=false, output_format_parquet_write_page_index=true + + + + SELECT * FROM file('test_pq_index', Parquet, 'tuple_column Tuple(a Nullable(String))') Format Null + + + + SELECT tuple_column.a FROM file('test_pq_index', Parquet) Format Null + + + + SELECT tuple_column.a FROM file('test_pq_index', Parquet, 'tuple_column Tuple(a Nullable(String))') Format Null + + + + SELECT tuple_column.c.i FROM file('test_pq_index', Parquet) Format Null + + + + SELECT * FROM file('test_pq_index', Parquet, 'array_tuple_column Array (Tuple(a Nullable(String)))') Format Null + + + + SELECT * FROM file('test_pq_index', Parquet, 'map_tuple_column Map(String, Tuple(a Nullable(String)))') Format Null + + + diff --git a/tests/queries/0_stateless/00961_check_table.reference b/tests/queries/0_stateless/00961_check_table.reference index a0a054898b9..686285bb6aa 100644 --- a/tests/queries/0_stateless/00961_check_table.reference +++ b/tests/queries/0_stateless/00961_check_table.reference @@ -14,4 +14,4 @@ ======== 201902_4_5_1 1 ======== -201801_1_1_0 1 +201801_1_1_2 1 diff --git a/tests/queries/0_stateless/00961_check_table.sql b/tests/queries/0_stateless/00961_check_table.sql index a6abe8103d5..fc3c5435670 100644 --- a/tests/queries/0_stateless/00961_check_table.sql +++ b/tests/queries/0_stateless/00961_check_table.sql @@ -39,6 +39,6 @@ CHECK TABLE mt_table PARTITION 201902 SETTINGS max_threads = 1; SELECT '========'; -CHECK TABLE mt_table PART '201801_1_1_0'; +CHECK TABLE mt_table PART '201801_1_1_2'; DROP TABLE IF EXISTS mt_table; diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh index a07dd306b3e..0a9f94cc451 100755 --- a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -11,33 +11,40 @@ function query() ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&database_atomic_wait_for_drop_and_detach_synchronously=1" -d "$*" } -# NOTE: database = $CLICKHOUSE_DATABASE is unwanted -verify_sql="SELECT - (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics) - = (SELECT sum(active), sum(NOT active) FROM - (SELECT active FROM system.parts UNION ALL SELECT active FROM system.projection_parts UNION ALL SELECT 1 FROM system.dropped_tables_parts))" # The query is not atomic - it can compare states between system.parts and system.metrics from different points in time. # So, there is inherent race condition. But it should get expected result eventually. # In case of test failure, this code will do infinite loop and timeout. verify() { - for i in {1..5000} - do - result=$( query "$verify_sql" ) - [ "$result" = "1" ] && echo "$result" && break - sleep 0.1 + local result - if [[ $i -eq 5000 ]] - then - query " - SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics; - SELECT sum(active), sum(NOT active) FROM system.parts; - SELECT sum(active), sum(NOT active) FROM system.projection_parts; - SELECT count() FROM system.dropped_tables_parts; - " + for _ in {1..100}; do + # NOTE: database = $CLICKHOUSE_DATABASE is unwanted + result=$( query "SELECT + (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics) + = + (SELECT sum(active), sum(NOT active) FROM ( + SELECT active FROM system.parts + UNION ALL SELECT active FROM system.projection_parts + UNION ALL SELECT 1 FROM system.dropped_tables_parts + ))" + ) + + if [ "$result" = "1" ]; then + echo "$result" + return fi + + sleep 0.5 done + + $CLICKHOUSE_CLIENT -q " + SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics; + SELECT sum(active), sum(NOT active) FROM system.parts; + SELECT sum(active), sum(NOT active) FROM system.projection_parts; + SELECT count() FROM system.dropped_tables_parts; + " } query "DROP TABLE IF EXISTS test_table" diff --git a/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference index be82d744a3b..56293ca0e5d 100644 --- a/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference +++ b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference @@ -1 +1 @@ -c1 Nullable(DateTime64(9)) +c1 Nullable(DateTime) diff --git a/tests/queries/0_stateless/02325_dates_schema_inference.reference b/tests/queries/0_stateless/02325_dates_schema_inference.reference index c8eebd3262e..124f105220d 100644 --- a/tests/queries/0_stateless/02325_dates_schema_inference.reference +++ b/tests/queries/0_stateless/02325_dates_schema_inference.reference @@ -1,29 +1,29 @@ JSONEachRow x Nullable(Date) x Nullable(DateTime64(9)) -x Nullable(DateTime64(9)) +x Nullable(DateTime) x Array(Nullable(Date)) -x Array(Nullable(DateTime64(9))) -x Array(Nullable(DateTime64(9))) -x Tuple(\n date1 Nullable(DateTime64(9)),\n date2 Nullable(Date)) -x Array(Nullable(DateTime64(9))) -x Array(Nullable(DateTime64(9))) -x Nullable(DateTime64(9)) +x Array(Nullable(DateTime)) +x Array(Nullable(DateTime)) +x Tuple(\n date1 Nullable(DateTime),\n date2 Nullable(Date)) +x Array(Nullable(DateTime)) +x Array(Nullable(DateTime)) +x Nullable(DateTime) x Array(Nullable(String)) x Nullable(String) x Array(Nullable(String)) -x Tuple(\n key1 Array(Array(Nullable(DateTime64(9)))),\n key2 Array(Array(Nullable(String)))) +x Tuple(\n key1 Array(Array(Nullable(DateTime))),\n key2 Array(Array(Nullable(String)))) CSV c1 Nullable(Date) c1 Nullable(DateTime64(9)) -c1 Nullable(DateTime64(9)) +c1 Nullable(DateTime) c1 Array(Nullable(Date)) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Map(String, Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Nullable(DateTime64(9)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Map(String, Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Nullable(DateTime) c1 Array(Nullable(String)) c1 Nullable(String) c1 Array(Nullable(String)) @@ -31,14 +31,14 @@ c1 Map(String, Array(Array(Nullable(String)))) TSV c1 Nullable(Date) c1 Nullable(DateTime64(9)) -c1 Nullable(DateTime64(9)) +c1 Nullable(DateTime) c1 Array(Nullable(Date)) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Map(String, Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Nullable(DateTime64(9)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Map(String, Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Nullable(DateTime) c1 Array(Nullable(String)) c1 Nullable(String) c1 Array(Nullable(String)) @@ -46,14 +46,14 @@ c1 Map(String, Array(Array(Nullable(String)))) Values c1 Nullable(Date) c1 Nullable(DateTime64(9)) -c1 Nullable(DateTime64(9)) +c1 Nullable(DateTime) c1 Array(Nullable(Date)) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Map(String, Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Nullable(DateTime64(9)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Map(String, Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Nullable(DateTime) c1 Array(Nullable(String)) c1 Nullable(String) c1 Array(Nullable(String)) diff --git a/tests/queries/0_stateless/02404_data.CSV b/tests/queries/0_stateless/02404_data.CSV new file mode 100644 index 00000000000..2d8b5c8daa8 --- /dev/null +++ b/tests/queries/0_stateless/02404_data.CSV @@ -0,0 +1,10 @@ +0,"1970-01-01" +1,"1970-01-02" +2,"1970-01-03" +3,"1970-01-04" +4,"1970-01-05" +5,"1970-01-06" +6,"1970-01-07" +7,"1970-01-08" +8,"1970-01-09" +9,"1970-01-10" diff --git a/tests/queries/0_stateless/02404_data.CSVWithNames b/tests/queries/0_stateless/02404_data.CSVWithNames new file mode 100644 index 00000000000..34647008916 --- /dev/null +++ b/tests/queries/0_stateless/02404_data.CSVWithNames @@ -0,0 +1,11 @@ +"number","toDate(number)" +0,"1970-01-01" +1,"1970-01-02" +2,"1970-01-03" +3,"1970-01-04" +4,"1970-01-05" +5,"1970-01-06" +6,"1970-01-07" +7,"1970-01-08" +8,"1970-01-09" +9,"1970-01-10" diff --git a/tests/queries/0_stateless/02404_data.CustomSeparated b/tests/queries/0_stateless/02404_data.CustomSeparated new file mode 100644 index 00000000000..f3ae1663536 --- /dev/null +++ b/tests/queries/0_stateless/02404_data.CustomSeparated @@ -0,0 +1,10 @@ +0 1970-01-01 +1 1970-01-02 +2 1970-01-03 +3 1970-01-04 +4 1970-01-05 +5 1970-01-06 +6 1970-01-07 +7 1970-01-08 +8 1970-01-09 +9 1970-01-10 diff --git a/tests/queries/0_stateless/02404_data.JSONCompactEachRow b/tests/queries/0_stateless/02404_data.JSONCompactEachRow new file mode 100644 index 00000000000..de2e0986aab --- /dev/null +++ b/tests/queries/0_stateless/02404_data.JSONCompactEachRow @@ -0,0 +1,10 @@ +["0", "1970-01-01"] +["1", "1970-01-02"] +["2", "1970-01-03"] +["3", "1970-01-04"] +["4", "1970-01-05"] +["5", "1970-01-06"] +["6", "1970-01-07"] +["7", "1970-01-08"] +["8", "1970-01-09"] +["9", "1970-01-10"] diff --git a/tests/queries/0_stateless/02404_data.JSONEachRow b/tests/queries/0_stateless/02404_data.JSONEachRow new file mode 100644 index 00000000000..e77256ac7fc --- /dev/null +++ b/tests/queries/0_stateless/02404_data.JSONEachRow @@ -0,0 +1,10 @@ +{"number":"0","toDate(number)":"1970-01-01"} +{"number":"1","toDate(number)":"1970-01-02"} +{"number":"2","toDate(number)":"1970-01-03"} +{"number":"3","toDate(number)":"1970-01-04"} +{"number":"4","toDate(number)":"1970-01-05"} +{"number":"5","toDate(number)":"1970-01-06"} +{"number":"6","toDate(number)":"1970-01-07"} +{"number":"7","toDate(number)":"1970-01-08"} +{"number":"8","toDate(number)":"1970-01-09"} +{"number":"9","toDate(number)":"1970-01-10"} diff --git a/tests/queries/0_stateless/02404_data.TSKV b/tests/queries/0_stateless/02404_data.TSKV new file mode 100644 index 00000000000..70f7ad33c8b --- /dev/null +++ b/tests/queries/0_stateless/02404_data.TSKV @@ -0,0 +1,10 @@ +number=0 toDate(number)=1970-01-01 +number=1 toDate(number)=1970-01-02 +number=2 toDate(number)=1970-01-03 +number=3 toDate(number)=1970-01-04 +number=4 toDate(number)=1970-01-05 +number=5 toDate(number)=1970-01-06 +number=6 toDate(number)=1970-01-07 +number=7 toDate(number)=1970-01-08 +number=8 toDate(number)=1970-01-09 +number=9 toDate(number)=1970-01-10 diff --git a/tests/queries/0_stateless/02404_data.TSV b/tests/queries/0_stateless/02404_data.TSV new file mode 100644 index 00000000000..f3ae1663536 --- /dev/null +++ b/tests/queries/0_stateless/02404_data.TSV @@ -0,0 +1,10 @@ +0 1970-01-01 +1 1970-01-02 +2 1970-01-03 +3 1970-01-04 +4 1970-01-05 +5 1970-01-06 +6 1970-01-07 +7 1970-01-08 +8 1970-01-09 +9 1970-01-10 diff --git a/tests/queries/0_stateless/02404_data.TSVWithNames b/tests/queries/0_stateless/02404_data.TSVWithNames new file mode 100644 index 00000000000..23310234a8c --- /dev/null +++ b/tests/queries/0_stateless/02404_data.TSVWithNames @@ -0,0 +1,11 @@ +number toDate(number) +0 1970-01-01 +1 1970-01-02 +2 1970-01-03 +3 1970-01-04 +4 1970-01-05 +5 1970-01-06 +6 1970-01-07 +7 1970-01-08 +8 1970-01-09 +9 1970-01-10 diff --git a/tests/queries/0_stateless/02404_data.Values b/tests/queries/0_stateless/02404_data.Values new file mode 100644 index 00000000000..d9a621d7ec9 --- /dev/null +++ b/tests/queries/0_stateless/02404_data.Values @@ -0,0 +1 @@ +(0,'1970-01-01'),(1,'1970-01-02'),(2,'1970-01-03'),(3,'1970-01-04'),(4,'1970-01-05'),(5,'1970-01-06'),(6,'1970-01-07'),(7,'1970-01-08'),(8,'1970-01-09'),(9,'1970-01-10') \ No newline at end of file diff --git a/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.reference b/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.reference index 049603328d9..3d6b1021916 100644 --- a/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.reference +++ b/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.reference @@ -4,7 +4,7 @@ c2 Nullable(Date) c1 Nullable(Float64) c2 Nullable(Date) c1 Nullable(Int64) -c2 Nullable(DateTime64(9)) +c2 Nullable(DateTime) c1 UInt8 c2 Nullable(Date) 4 @@ -14,7 +14,7 @@ toDate(number) Nullable(Date) number Nullable(Float64) toDate(number) Nullable(Date) number Nullable(Int64) -toDate(number) Nullable(DateTime64(9)) +toDate(number) Nullable(DateTime) number Nullable(Int64) toDate(number) Nullable(Date) 4 @@ -24,7 +24,7 @@ c2 Nullable(Date) c1 Nullable(Float64) c2 Nullable(Date) c1 Nullable(Int64) -c2 Nullable(DateTime64(9)) +c2 Nullable(DateTime) c1 UInt8 c2 Nullable(Date) 4 @@ -34,7 +34,7 @@ toDate(number) Nullable(Date) number Nullable(Float64) toDate(number) Nullable(Date) number Nullable(Int64) -toDate(number) Nullable(DateTime64(9)) +toDate(number) Nullable(DateTime) number Nullable(Int64) toDate(number) Nullable(Date) 4 @@ -44,7 +44,7 @@ toDate(number) Nullable(Date) number Nullable(Float64) toDate(number) Nullable(Date) number Nullable(Int64) -toDate(number) Nullable(DateTime64(9)) +toDate(number) Nullable(DateTime) number Nullable(Int64) toDate(number) Nullable(Date) 4 @@ -54,7 +54,7 @@ c2 Nullable(Date) c1 Nullable(Float64) c2 Nullable(Date) c1 Nullable(Int64) -c2 Nullable(DateTime64(9)) +c2 Nullable(DateTime) c1 UInt8 c2 Nullable(Date) 4 @@ -64,7 +64,7 @@ toDate(number) Nullable(Date) number Nullable(Float64) toDate(number) Nullable(Date) number Nullable(Int64) -toDate(number) Nullable(DateTime64(9)) +toDate(number) Nullable(DateTime) number Nullable(Int64) toDate(number) Nullable(Date) 4 @@ -74,7 +74,7 @@ c2 Nullable(Date) c1 Nullable(Float64) c2 Nullable(Date) c1 Nullable(Int64) -c2 Nullable(DateTime64(9)) +c2 Nullable(DateTime) c1 UInt8 c2 Nullable(Date) 4 @@ -84,7 +84,7 @@ c2 Nullable(Date) c1 Nullable(Float64) c2 Nullable(Date) c1 Nullable(Int64) -c2 Nullable(DateTime64(9)) +c2 Nullable(DateTime) c1 UInt8 c2 Nullable(Date) 4 diff --git a/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh b/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh index f747b3156a5..df7e9386662 100755 --- a/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh +++ b/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh @@ -11,5 +11,6 @@ if [ "$RES" -eq 10 ] then echo "$RES" else + echo "$RES" cat "${CLICKHOUSE_TMP}/${CLICKHOUSE_DATABASE}.log" fi diff --git a/tests/queries/0_stateless/03033_dynamic_text_serialization.reference b/tests/queries/0_stateless/03033_dynamic_text_serialization.reference index 9fc356cc5e6..f949d5e9baf 100644 --- a/tests/queries/0_stateless/03033_dynamic_text_serialization.reference +++ b/tests/queries/0_stateless/03033_dynamic_text_serialization.reference @@ -4,7 +4,7 @@ JSON {"d":"str","dynamicType(d)":"String"} {"d":["1","2","3"],"dynamicType(d)":"Array(Int64)"} {"d":"2020-01-01","dynamicType(d)":"Date"} -{"d":"2020-01-01 10:00:00.000000000","dynamicType(d)":"DateTime64(9)"} +{"d":"2020-01-01 10:00:00","dynamicType(d)":"DateTime"} {"d":{"a":"42","b":"str"},"dynamicType(d)":"Tuple(a Int64, b String)"} {"d":{"a":"43"},"dynamicType(d)":"Tuple(a Int64)"} {"d":{"a":"44","c":["1","2","3"]},"dynamicType(d)":"Tuple(a Int64, c Array(Int64))"} @@ -22,7 +22,7 @@ CSV "str","String" "[1,2,3]","Array(Int64)" "2020-01-01","Date" -"2020-01-01 10:00:00.000000000","DateTime64(9)" +"2020-01-01 10:00:00","DateTime" "[1, 'str', [1, 2, 3]]","String" \N,"None" true,"Bool" @@ -32,24 +32,24 @@ TSV str String [1,2,3] Array(Int64) 2020-01-01 Date -2020-01-01 10:00:00.000000000 DateTime64(9) +2020-01-01 10:00:00 DateTime [1, \'str\', [1, 2, 3]] String \N None true Bool Values -(42,'Int64'),(42.42,'Float64'),('str','String'),([1,2,3],'Array(Int64)'),('2020-01-01','Date'),('2020-01-01 10:00:00.000000000','DateTime64(9)'),(NULL,'None'),(true,'Bool') +(42,'Int64'),(42.42,'Float64'),('str','String'),([1,2,3],'Array(Int64)'),('2020-01-01','Date'),('2020-01-01 10:00:00','DateTime'),(NULL,'None'),(true,'Bool') Cast using parsing 42 Int64 42.42 Float64 [1,2,3] Array(Int64) 2020-01-01 Date -2020-01-01 10:00:00.000000000 DateTime64(9) +2020-01-01 10:00:00 DateTime NULL String true Bool 42 Int64 false 42.42 Float64 false [1,2,3] Array(Int64) false 2020-01-01 Date true -2020-01-01 10:00:00.000000000 DateTime64(9) true +2020-01-01 10:00:00 DateTime true NULL String true true Bool true diff --git a/tests/queries/0_stateless/03199_json_extract_dynamic.reference b/tests/queries/0_stateless/03199_json_extract_dynamic.reference index 759b7763cd1..955106946ea 100644 --- a/tests/queries/0_stateless/03199_json_extract_dynamic.reference +++ b/tests/queries/0_stateless/03199_json_extract_dynamic.reference @@ -12,7 +12,7 @@ Hello String [1,2,3] Array(Nullable(Int64)) ['str1','str2','str3'] Array(Nullable(String)) [[[1],[2,3,4]],[[5,6],[7]]] Array(Array(Array(Nullable(Int64)))) -['2020-01-01 00:00:00.000000000','2020-01-01 00:00:00.000000000'] Array(Nullable(DateTime64(9))) +['2020-01-01 00:00:00','2020-01-01 00:00:00'] Array(Nullable(DateTime)) ['2020-01-01','2020-01-01 date'] Array(Nullable(String)) ['2020-01-01','2020-01-01 00:00:00','str'] Array(Nullable(String)) ['2020-01-01','2020-01-01 00:00:00','42'] Array(Nullable(String)) diff --git a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference index cd109daac52..13b1138d1c4 100644 --- a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference +++ b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference @@ -1,2 +1,2 @@ x Nullable(Int64) -schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=true, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false +schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=true, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_datetimes_only_datetime64=false, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false diff --git a/tests/queries/0_stateless/03221_create_if_not_exists_setting.reference b/tests/queries/0_stateless/03221_create_if_not_exists_setting.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03221_create_if_not_exists_setting.sql b/tests/queries/0_stateless/03221_create_if_not_exists_setting.sql new file mode 100644 index 00000000000..18b3ed7bcec --- /dev/null +++ b/tests/queries/0_stateless/03221_create_if_not_exists_setting.sql @@ -0,0 +1,24 @@ +-- Tags: no-parallel + +SET create_if_not_exists=0; -- Default + +DROP TABLE IF EXISTS example_table; +CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; +CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; -- { serverError TABLE_ALREADY_EXISTS } + +DROP DATABASE IF EXISTS example_database; +CREATE DATABASE example_database; +CREATE DATABASE example_database; -- { serverError DATABASE_ALREADY_EXISTS } + +SET create_if_not_exists=1; + +DROP TABLE IF EXISTS example_table; +CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; +CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; + +DROP DATABASE IF EXISTS example_database; +CREATE DATABASE example_database; +CREATE DATABASE example_database; + +DROP DATABASE IF EXISTS example_database; +DROP TABLE IF EXISTS example_table; \ No newline at end of file diff --git a/tests/queries/0_stateless/03221_insert_timeout_overflow_mode.reference b/tests/queries/0_stateless/03221_insert_timeout_overflow_mode.reference new file mode 100644 index 00000000000..68538c3f75b --- /dev/null +++ b/tests/queries/0_stateless/03221_insert_timeout_overflow_mode.reference @@ -0,0 +1,2 @@ +QUERY_WAS_CANCELLED +QUERY_WAS_CANCELLED diff --git a/tests/queries/0_stateless/03221_insert_timeout_overflow_mode.sh b/tests/queries/0_stateless/03221_insert_timeout_overflow_mode.sh new file mode 100755 index 00000000000..db943a665cb --- /dev/null +++ b/tests/queries/0_stateless/03221_insert_timeout_overflow_mode.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query "create table null_t (number UInt64) engine = Null;" +${CLICKHOUSE_CLIENT} --query "select sleep(0.1) from system.numbers settings max_block_size = 1 format Native" 2>/dev/null | ${CLICKHOUSE_CLIENT} --max_execution_time 0.3 --timeout_overflow_mode break --query "insert into null_t format Native" 2>&1 | grep -o "QUERY_WAS_CANCELLED" diff --git a/tests/queries/0_stateless/03221_s3_imds_decent_timeout.reference b/tests/queries/0_stateless/03221_s3_imds_decent_timeout.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/03221_s3_imds_decent_timeout.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03221_s3_imds_decent_timeout.sh b/tests/queries/0_stateless/03221_s3_imds_decent_timeout.sh new file mode 100755 index 00000000000..021278955cd --- /dev/null +++ b/tests/queries/0_stateless/03221_s3_imds_decent_timeout.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-asan, no-msan, no-tsan +# ^ requires S3 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Inaccessible IMDS should not introduce large delays, so this query should reply quickly at least sometimes: +while true +do + # This host (likely) drops packets sent to it (does not reply), so it is good for testing timeouts. + # At the same time, we expect that the clickhouse host does not drop packets and quickly replies with 4xx, which is a non-retriable error for S3. + AWS_EC2_METADATA_SERVICE_ENDPOINT='https://10.255.255.255/' ${CLICKHOUSE_LOCAL} --time --query "SELECT * FROM s3('${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/nonexistent')" |& grep -v -F 404 | + ${CLICKHOUSE_LOCAL} --input-format TSV "SELECT c1::Float64 < 1 FROM table" | grep 1 && break +done diff --git a/tests/queries/0_stateless/03222_date_time_inference.reference b/tests/queries/0_stateless/03222_date_time_inference.reference new file mode 100644 index 00000000000..3288308a1d0 --- /dev/null +++ b/tests/queries/0_stateless/03222_date_time_inference.reference @@ -0,0 +1,253 @@ +Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +String +2020_01_01 String +2020_1_01 String +2020_01_1 String +2020_1_1 String +2020a01a01 String +2020a1a01 String +2020a01a1 String +2020a1a1 String +20200101 String +DateTime +2020-01-02 18:42:42 DateTime +2020-01-02 18:42:42 DateTime +2020-01-02 18:42:42 DateTime +String +2020_01_01 42:42:42 String +2020a01a01 42:42:42 String +2020-01-01 42.42.42 String +2020-01-01 42 42 42 String +2020-01-01 42a42a42 String +DateTime64 +2020-01-02 18:42:42.424200000 DateTime64(9) +2020-01-02 18:42:42.424200000 DateTime64(9) +2020-01-02 18:42:42.424200000 DateTime64(9) +String +2020_01_01 42:42:42.4242 String +2020a01a01 42:42:42.4242 String +2020-01-01 42.42.42.4242 String +2020-01-01 42 42 42.4242 String +2020-01-01 42a42a42.4242 String +DateTime/DateTime64 best effort +2000-01-01 00:00:00 DateTime +2000-01-01 01:00:00 DateTime +2000-01-01 01:00:00.000000000 DateTime64(9) +2017-01-01 22:02:03 DateTime +2017-01-01 22:02:03.000000000 DateTime64(9) +2017-01-01 21:02:03 DateTime +2017-01-01 21:02:03.000000000 DateTime64(9) +2017-01-01 22:02:03 DateTime +2017-01-01 22:02:03.000000000 DateTime64(9) +2017-01-02 01:02:03 DateTime +2017-01-02 01:02:03.000000000 DateTime64(9) +1970-01-02 01:02:03 DateTime +1970-01-02 01:02:03.000000000 DateTime64(9) +1970-01-02 01:02:03 DateTime +1970-01-02 01:02:03.000000000 DateTime64(9) +2018-02-11 03:40:50 DateTime +2018-02-11 03:40:50.000000000 DateTime64(9) +2000-04-17 01:02:03 DateTime +2000-04-17 01:02:03.000000000 DateTime64(9) +1970-01-02 01:00:00 DateTime +1970-01-02 01:00:00.000000000 DateTime64(9) +1970-01-02 01:02:03 DateTime +1970-01-02 01:02:03.000000000 DateTime64(9) +1970-01-02 01:02:03 DateTime +1970-01-02 01:02:03.000000000 DateTime64(9) +2015-12-31 20:00:00 DateTime +2015-12-31 20:00:00 DateTime +2016-01-01 00:00:00 DateTime +2016-01-01 00:00:00 DateTime +2017-01-01 22:02:03 DateTime +2017-01-01 22:02:03.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 04:04:05 DateTime +2017-01-02 04:04:05.000000000 DateTime64(9) +2017-01-02 02:34:05 DateTime +2017-01-02 02:34:05.000000000 DateTime64(9) +2017-01-02 00:04:05 DateTime +2017-01-02 00:04:05.000000000 DateTime64(9) +2017-01-02 02:04:05 DateTime +2017-01-02 02:04:05.000000000 DateTime64(9) +2017-01-02 00:04:05 DateTime +2017-01-02 00:04:05.000000000 DateTime64(9) +2017-01-01 18:04:05 DateTime +2017-01-01 18:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-01 23:04:05 DateTime +2017-01-01 23:04:05.000000000 DateTime64(9) +2017-02-01 23:04:05 DateTime +2017-02-01 23:04:05.000000000 DateTime64(9) +2017-06-01 23:04:05 DateTime +2017-06-01 23:04:05.000000000 DateTime64(9) +2017-01-02 00:04:05 DateTime +2017-01-02 00:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 04:04:05 DateTime +2017-01-02 04:04:05.000000000 DateTime64(9) +2017-01-02 04:04:05 DateTime +2017-01-02 04:04:05.000000000 DateTime64(9) +2017-01-02 02:04:05 DateTime +2017-01-02 02:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-04-01 11:22:33 DateTime +2017-04-01 11:22:33.000000000 DateTime64(9) +2017-04-01 22:02:03 DateTime +2017-04-01 22:02:03.000000000 DateTime64(9) +2017-04-01 22:02:03 DateTime +2017-04-01 22:02:03.000000000 DateTime64(9) +2017-04-02 01:02:03 DateTime +2017-04-02 01:02:03.000000000 DateTime64(9) +2017-04-02 11:22:33 DateTime +2017-04-02 11:22:33.000000000 DateTime64(9) +2017-04-02 01:02:03 DateTime +2017-04-02 01:02:03.000000000 DateTime64(9) +2017-04-02 01:22:33 DateTime +2017-04-02 01:22:33.000000000 DateTime64(9) +2017-04-02 01:02:03 DateTime +2017-04-02 01:02:03.000000000 DateTime64(9) +2017-04-02 01:02:33 DateTime +2017-04-02 01:02:33.000000000 DateTime64(9) +2017-04-01 22:02:03 DateTime +2017-04-01 22:02:03.000000000 DateTime64(9) +2017-04-02 01:02:03 DateTime +2017-04-02 01:02:03.000000000 DateTime64(9) +2017-04-01 22:02:03 DateTime +2017-04-01 22:02:03.000000000 DateTime64(9) +2017-04-01 21:02:03 DateTime +2017-04-01 21:02:03.000000000 DateTime64(9) +2017-04-02 01:02:03 DateTime +2017-04-02 01:02:03.000000000 DateTime64(9) +2017-01-01 22:02:03 DateTime +2017-01-01 22:02:03.000000000 DateTime64(9) +2017-04-25 01:02:03 DateTime +2017-04-25 01:02:03.000000000 DateTime64(9) +2017-04-25 01:02:03 DateTime +2017-04-25 01:02:03.000000000 DateTime64(9) +2017-01-25 01:02:03 DateTime +2017-01-25 01:02:03.000000000 DateTime64(9) +2017-01-24 22:02:03 DateTime +2017-01-24 22:02:03.000000000 DateTime64(9) +2017-01-25 13:02:03 DateTime +2017-01-25 13:02:03.000000000 DateTime64(9) +2017-01-25 01:02:03 DateTime +2017-01-25 01:02:03.000000000 DateTime64(9) +2017-01-25 01:02:03 DateTime +2017-01-25 01:02:03.000000000 DateTime64(9) +2017-01-24 22:02:03 DateTime +2017-01-24 22:02:03.000000000 DateTime64(9) +2017-01-24 22:02:03 DateTime +2017-01-24 22:02:03.000000000 DateTime64(9) +2017-01-25 10:02:03 DateTime +2017-01-25 10:02:03.000000000 DateTime64(9) +2017-01-25 10:02:03 DateTime +2017-01-25 10:02:03.000000000 DateTime64(9) +2017-01-25 10:02:03 DateTime +2017-01-25 10:02:03.000000000 DateTime64(9) +2017-01-25 09:32:03 DateTime +2017-01-25 09:32:03.000000000 DateTime64(9) +2017-01-25 01:02:03 DateTime +2017-01-25 01:02:03.000000000 DateTime64(9) +2017-01-25 13:02:03 DateTime +2017-01-25 13:02:03.000000000 DateTime64(9) +2017-01-25 13:02:03 DateTime +2017-01-25 13:02:03.000000000 DateTime64(9) +2017-01-25 10:02:03 DateTime +2017-01-25 10:02:03.000000000 DateTime64(9) +2018-02-11 03:40:50 DateTime +2018-02-11 03:40:50.000000000 DateTime64(9) +2018-02-11 03:40:50 DateTime +2018-02-11 03:40:50.000000000 DateTime64(9) +String +2 String +20 String +200 String +2000 String +20000 String +200001 String +2000010 String +20000101 String +200001010 String +2000010101 String +20000101010 String +200001010101 String +2000010101010 String +20000101010101 String +2.1 String +20.1 String +200.1 String +2000.1 String +20000.1 String +200001.1 String +2000010.1 String +20000101.1 String +200001010.1 String +2000010101.1 String +20000101010.1 String +200001010101.1 String +2000010101010.1 String +20000101010101.1 String +Mar String +Mar1 String +Mar 1 String +Mar01 String +Mar 01 String +Mar2020 String +Mar 2020 String +Mar012020 String +Mar 012020 String +Mar01012020 String +Mar 01012020 String +Mar0101202001 String +Mar 0101202001 String +Mar010120200101 String +Mar 010120200101 String +Mar01012020010101 String +Mar 01012020010101 String +Mar01012020010101.000 String +Mar 0101202001010101.000 String +2000 01 01 01:00:00 String +2000 01 01 01:00:00.000 String +2000a01a01 01:00:00 String +2000a01a01 01:00:00.000 String +2000-01-01 01 00 00 String +2000-01-01 01 00 00.000 String +2000-01-01 01-00-00 String +2000-01-01 01-00-00.000 String +2000-01-01 01a00a00 String +2000-01-01 01a00a00.000 String +2000-01 01:00:00 String +2000-01 01:00:00.000 String +2000 01 String +2000-01 String +Mar 2000 00:00:00 String +Mar 2000 00:00:00.000 String +2000 00:00:00 String +2000 00:00:00.000 String +Mar 2000-01-01 00:00:00 String +Mar 2000-01-01 00:00:00.000 String diff --git a/tests/queries/0_stateless/03222_date_time_inference.sql b/tests/queries/0_stateless/03222_date_time_inference.sql new file mode 100644 index 00000000000..ebd472294be --- /dev/null +++ b/tests/queries/0_stateless/03222_date_time_inference.sql @@ -0,0 +1,269 @@ +set input_format_try_infer_datetimes = 1; +set input_format_try_infer_dates = 1; +set schema_inference_make_columns_nullable = 0; +set input_format_json_try_infer_numbers_from_strings = 0; +set session_timezone = 'UTC'; + +select 'Date'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:1:01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:1:1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-1-01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-1-1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/1/01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/1/1"}'); + +select 'String'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_1_01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_1_1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a1a01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a1a1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20200101"}'); + +select 'DateTime'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:01 42:42:42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/01 42:42:42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42:42:42"}'); + +select 'String'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_01 42:42:42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a01 42:42:42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42.42.42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42 42 42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42a42a42"}'); + +select 'DateTime64'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:01 42:42:42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/01 42:42:42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42:42:42.4242"}'); + +select 'String'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_01 42:42:42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a01 42:42:42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42.42.42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42 42 42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42a42a42.4242"}'); + +set date_time_input_format='best_effort'; +select 'DateTime/DateTime64 best effort'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 00:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203 MSK+0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000 MSK+0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/1970 010203Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/1970 010203.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/70 010203Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/70 010203.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "11 Feb 2018 06:40:50 +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "11 Feb 2018 06:40:50.000 +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "17 Apr 2000 2 1:2:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "17 Apr 2000 2 1:2:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102 01:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102 01:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102010203Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102010203Z.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "1970/01/02 010203Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "1970/01/02 010203.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01MSD"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01 MSD"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01UTC"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "201701 02 010203 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "201701 02 010203.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+0"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+0"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+0000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+0000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 -0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 -0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+030"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+030"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+900"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+900"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05GMT"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000GMT"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSD"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSD"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSD Feb"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSD Feb"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSD Jun"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSD Jun"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05+00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000+00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05 -0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000 -0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05-0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000-0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05+0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000+0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 01 11:22:33"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 01 11:22:33.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 010203 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 010203.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 01:2:3 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 01:2:3.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:02:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:02:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 11:22:33"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 11:22:33.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:03"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:03.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:22:33"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:22:33.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:33"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:33.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 UTC+0000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 UTC+0000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 UTC+0400"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 UTC+0400"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 2 1:2:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 2 1:2:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Jan 02 010203 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Jan 02 010203.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 01:02:03"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 01:02:03.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 1:2:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 1:2:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z+03:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z+03:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +03:00 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +03:00 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +0300 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +0300 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z+03:00 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z+03:00 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +03:30 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +03:30 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3Z Mon"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000Z Mon"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3Z PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000Z PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z PM +03:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z PM +03:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 11 Feb 2018 06:40:50 +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 11 Feb 2018 06:40:50.000 +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun, 11 Feb 2018 06:40:50 +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun, 11 Feb 2018 06:40:50.000 +0300"}'); + +select 'String'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101010"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010101.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101010.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010101.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar2020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 01012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar0101202001"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 0101202001"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar010120200101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 010120200101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01012020010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 01012020010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01012020010101.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 0101202001010101.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 01 01 01:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 01 01 01:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000a01a01 01:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000a01a01 01:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01 00 00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01 00 00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01-00-00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01-00-00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01a00a00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01a00a00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01 01:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01 01:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000 00:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000 00:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 00:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 00:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000-01-01 00:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000-01-01 00:00:00.000"}'); + + diff --git a/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference index 7fafd4d13ea..b6c452ba328 100644 --- a/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference +++ b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference @@ -1,3 +1,14 @@ +1006 +1007 +1008 +1009 +101 +1010 +1011 +1012 +1013 +1014 +--- 100 100 101 101 102 102 diff --git a/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql index 112373e5db2..6f486f8f0fe 100644 --- a/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql +++ b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql @@ -6,10 +6,18 @@ INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(10_000); SET allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; +SELECT v +FROM test__fuzz_22 +ORDER BY v +LIMIT 10, 10 +SETTINGS merge_tree_min_rows_for_concurrent_read = 9223372036854775806; + +SELECT '---'; + SELECT k, v FROM test__fuzz_22 ORDER BY k LIMIT 100, 10 -SETTINGS merge_tree_min_rows_for_concurrent_read = 9223372036854775806; +SETTINGS optimize_read_in_order=1, merge_tree_min_rows_for_concurrent_read = 9223372036854775806; DROP TABLE test__fuzz_22 SYNC;