From 0d46e7555b066298603b5e0cd4dc122e74863ebf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 20 Feb 2024 19:08:33 +0100 Subject: [PATCH 001/363] Fix unexpected behavior with FORMAT and SETTINGS parsing --- src/Parsers/ParserQueryWithOutput.cpp | 80 ++++++++++++++++++--------- 1 file changed, 54 insertions(+), 26 deletions(-) diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 7a627ae5f6a..340abf27c31 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -150,37 +150,65 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec } + /// These two sections are allowed in an arbitrary order. ParserKeyword s_format("FORMAT"); - - if (s_format.ignore(pos, expected)) - { - ParserIdentifier format_p; - - if (!format_p.parse(pos, query_with_output.format, expected)) - return false; - setIdentifierSpecial(query_with_output.format); - - query_with_output.children.push_back(query_with_output.format); - } - - // SETTINGS key1 = value1, key2 = value2, ... ParserKeyword s_settings("SETTINGS"); - if (!query_with_output.settings_ast && s_settings.ignore(pos, expected)) - { - ParserSetQuery parser_settings(true); - if (!parser_settings.parse(pos, query_with_output.settings_ast, expected)) - return false; - query_with_output.children.push_back(query_with_output.settings_ast); - // SETTINGS after FORMAT is not parsed by the SELECT parser (ParserSelectQuery) - // Pass them manually, to apply in InterpreterSelectQuery::initSettings() - if (query->as()) + /** Why: let's take the following example: + * SELECT 1 UNION ALL SELECT 2 FORMAT TSV + * Each subquery can be put in parentheses and have its own settings: + * (SELECT 1 SETTINGS a=b) UNION ALL (SELECT 2 SETTINGS c=d) FORMAT TSV + * And the whole query can have settings: + * (SELECT 1 SETTINGS a=b) UNION ALL (SELECT 2 SETTINGS c=d) FORMAT TSV SETTINGS e=f + * A single query with output is parsed in the same way as the UNION ALL chain: + * SELECT 1 SETTINGS a=b FORMAT TSV SETTINGS e=f + * So while these forms have a slightly different meaning, they both exist: + * SELECT 1 SETTINGS a=b FORMAT TSV + * SELECT 1 FORMAT TSV SETTINGS e=f + * And due to this effect, the users expect that the FORMAT and SETTINGS may go in an arbitrary order. + * But while this work: + * (SELECT 1) UNION ALL (SELECT 2) FORMAT TSV SETTINGS d=f + * This does not work automatically, unless we explicitly allow different orders: + * (SELECT 1) UNION ALL (SELECT 2) SETTINGS d=f FORMAT TSV + * Inevitably, we also allow this: + * SELECT 1 SETTINGS a=b SETTINGS d=f FORMAT TSV + * ^^^^^^^^^^^^^^^^^^^^^ + * Because this part is consumed into ASTSelectWithUnionQuery + * and the rest into ASTQueryWithOutput. + */ + + for (size_t i = 0; i < 2; ++i) + { + if (!query_with_output.format && s_format.ignore(pos, expected)) { - auto settings = query_with_output.settings_ast->clone(); - assert_cast(settings.get())->print_in_format = false; - QueryWithOutputSettingsPushDownVisitor::Data data{settings}; - QueryWithOutputSettingsPushDownVisitor(data).visit(query); + ParserIdentifier format_p; + + if (!format_p.parse(pos, query_with_output.format, expected)) + return false; + setIdentifierSpecial(query_with_output.format); + + query_with_output.children.push_back(query_with_output.format); } + else if (!query_with_output.settings_ast && s_settings.ignore(pos, expected)) + { + // SETTINGS key1 = value1, key2 = value2, ... + ParserSetQuery parser_settings(true); + if (!parser_settings.parse(pos, query_with_output.settings_ast, expected)) + return false; + query_with_output.children.push_back(query_with_output.settings_ast); + + // SETTINGS after FORMAT is not parsed by the SELECT parser (ParserSelectQuery) + // Pass them manually, to apply in InterpreterSelectQuery::initSettings() + if (query->as()) + { + auto settings = query_with_output.settings_ast->clone(); + assert_cast(settings.get())->print_in_format = false; + QueryWithOutputSettingsPushDownVisitor::Data data{settings}; + QueryWithOutputSettingsPushDownVisitor(data).visit(query); + } + } + else + break; } node = std::move(query); From 3fb45ff1762867cbeb53e31cd4492dda5cf8dbb7 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 14 May 2024 19:17:56 +0200 Subject: [PATCH 002/363] Add setting and implementation --- src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + src/Formats/SchemaInferenceUtils.cpp | 44 ++++++++++++++++++- .../0_stateless/03150_infer_type_variant.sql | 4 ++ 5 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03150_infer_type_variant.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4a0de354a03..0c0614550e5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1066,6 +1066,7 @@ class IColumn; M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ + M(Bool, input_format_json_infer_variant_from_multitype_array, false, "Try to infer variant type rather than tuple when column/array has multiple", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 43ccee173f0..e027c693094 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -137,6 +137,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.read_arrays_as_strings = settings.input_format_json_read_arrays_as_strings; format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings; format_settings.json.infer_incomplete_types_as_strings = settings.input_format_json_infer_incomplete_types_as_strings; + format_settings.json.infer_variant_from_multitype_array = settings.input_format_json_infer_variant_from_multitype_array; format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata; format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8; format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index d5fedf99adb..6cac41bd63e 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -223,6 +223,7 @@ struct FormatSettings bool compact_allow_variable_number_of_columns = false; bool try_infer_objects_as_tuples = false; bool infer_incomplete_types_as_strings = true; + bool infer_variant_from_multitype_array = false; bool throw_on_bad_escape_sequence = true; bool ignore_unnecessary_fields = true; } json{}; diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 02c0aa6dd77..fc1a26f9b2f 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -307,6 +308,22 @@ namespace type_indexes.erase(TypeIndex::UInt64); } + /// if setting input_format_json_infer_variant_from_multitype_array is true + /// and nested types are not equal then we convert to type variant. + void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes) + { + auto variant_type = std::make_shared(data_types); + /// replace separate types with a single variant type + data_types.clear(); + type_indexes.clear(); + data_types.push_back(variant_type); + type_indexes.insert(TypeIndex::Variant); + + // push it back again + data_types.push_back(variant_type); + type_indexes.insert(TypeIndex::Variant); + } + /// If we have only Date and DateTime types, convert Date to DateTime, /// otherwise, convert all Date and DateTime to String. void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes) @@ -649,6 +666,12 @@ namespace /// Check settings specific for JSON formats. + if (settings.json.infer_variant_from_multitype_array) + { + transformVariant(data_types, type_indexes); + return; + } + /// Convert numbers inferred from strings back to strings if needed. if (settings.json.try_infer_numbers_from_strings || settings.json.read_numbers_as_strings) transformJSONNumbersBackToString(data_types, settings, type_indexes, json_info); @@ -677,6 +700,12 @@ namespace if constexpr (!is_json) return; + if (settings.json.infer_variant_from_multitype_array) + { + transformVariant(data_types, type_indexes); + return; + } + /// Convert JSON tuples with same nested types to arrays. transformTuplesWithEqualNestedTypesToArrays(data_types, type_indexes); @@ -822,7 +851,6 @@ namespace if (checkIfTypesAreEqual(nested_types_copy)) return std::make_shared(nested_types_copy.back()); - return std::make_shared(nested_types); } else @@ -1482,6 +1510,20 @@ DataTypePtr makeNullableRecursively(DataTypePtr type) return nested_type ? std::make_shared(nested_type) : nullptr; } + if (which.isVariant()) + { + const auto * variant_type = assert_cast(type.get()); + DataTypes nested_types; + for (const auto & nested_type: variant_type->getVariants()) + { + /// unlike tuple or array, here we do not want to make any of the variants nullable + /// so we do not call makeNullableRecursively + nested_types.push_back(nested_type); + } + + return std::make_shared(nested_types); + } + if (which.isTuple()) { const auto * tuple_type = assert_cast(type.get()); diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql new file mode 100644 index 00000000000..ac544d04f6e --- /dev/null +++ b/tests/queries/0_stateless/03150_infer_type_variant.sql @@ -0,0 +1,4 @@ +SET input_format_json_infer_variant_from_multitype_array=1; +SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}'); +SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}'); +SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}'); From 467366af990215e11b4b0309b90b3e6d9ebca5fd Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 14 May 2024 21:55:56 +0200 Subject: [PATCH 003/363] Fix unimplemented serialization error and update reference file --- src/Formats/SchemaInferenceUtils.cpp | 13 ++++++++--- .../03150_infer_type_variant.reference | 22 +++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03150_infer_type_variant.reference diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index fc1a26f9b2f..d0d29892dec 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -319,7 +319,7 @@ namespace data_types.push_back(variant_type); type_indexes.insert(TypeIndex::Variant); - // push it back again + // make the second type variant as well data_types.push_back(variant_type); type_indexes.insert(TypeIndex::Variant); } @@ -669,7 +669,6 @@ namespace if (settings.json.infer_variant_from_multitype_array) { transformVariant(data_types, type_indexes); - return; } /// Convert numbers inferred from strings back to strings if needed. @@ -703,7 +702,6 @@ namespace if (settings.json.infer_variant_from_multitype_array) { transformVariant(data_types, type_indexes); - return; } /// Convert JSON tuples with same nested types to arrays. @@ -1440,6 +1438,15 @@ void transformFinalInferredJSONTypeIfNeededImpl(DataTypePtr & data_type, const F return; } + + if (const auto * variant_type = typeid_cast(data_type.get())) + { + auto nested_types = variant_type->getVariants(); + for (auto & nested_type : nested_types) + transformFinalInferredJSONTypeIfNeededImpl(nested_type, settings, json_info, remain_nothing_types); + data_type = std::make_shared(nested_types); + return; + } } void transformFinalInferredJSONTypeIfNeeded(DataTypePtr & data_type, const FormatSettings & settings, JSONInferenceInfo * json_info) diff --git a/tests/queries/0_stateless/03150_infer_type_variant.reference b/tests/queries/0_stateless/03150_infer_type_variant.reference new file mode 100644 index 00000000000..ffb4209eadb --- /dev/null +++ b/tests/queries/0_stateless/03150_infer_type_variant.reference @@ -0,0 +1,22 @@ + ┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ arr ┃ toTypeName(arr) ┃ + ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple(…│ + │ │… a Int64))) │ + └──────────────────┴─────────────────────────────────────┘ + ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ x ┃ toTypeName(x) ┃ + ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ 42 │ Variant(Int64, String) │ + ├───────┼────────────────────────┤ +2. │ Hello │ Variant(Int64, String) │ + └───────┴────────────────────────┘ + ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ x ┃ toTypeName(x) ┃ + ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ [1,2,3] │ Variant(Array(Int64), Tuple(…│ + │ │… a Int64)) │ + ├─────────┼──────────────────────────────┤ +2. │ (42) │ Variant(Array(Int64), Tuple(…│ + │ │… a Int64)) │ + └─────────┴──────────────────────────────┘ From 4066c6bc548979703f45ba264437f5966c403d6a Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 15 May 2024 02:13:53 +0000 Subject: [PATCH 004/363] Update setting name --- src/Core/Settings.h | 2 +- src/Formats/FormatFactory.cpp | 2 +- src/Formats/FormatSettings.h | 2 +- src/Formats/SchemaInferenceUtils.cpp | 6 +++--- tests/queries/0_stateless/03150_infer_type_variant.sql | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0c0614550e5..ffc337b674f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1066,7 +1066,7 @@ class IColumn; M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ - M(Bool, input_format_json_infer_variant_from_multitype_array, false, "Try to infer variant type rather than tuple when column/array has multiple", 0) \ + M(Bool, input_format_json_infer_variant_from_multi_type_array, false, "Try to infer variant type rather than tuple when column/array has multiple", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index e027c693094..792ac08a5df 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -137,7 +137,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.read_arrays_as_strings = settings.input_format_json_read_arrays_as_strings; format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings; format_settings.json.infer_incomplete_types_as_strings = settings.input_format_json_infer_incomplete_types_as_strings; - format_settings.json.infer_variant_from_multitype_array = settings.input_format_json_infer_variant_from_multitype_array; + format_settings.json.infer_variant_from_multi_type_array = settings.input_format_json_infer_variant_from_multi_type_array; format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata; format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8; format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 6cac41bd63e..d2c75872326 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -223,7 +223,7 @@ struct FormatSettings bool compact_allow_variable_number_of_columns = false; bool try_infer_objects_as_tuples = false; bool infer_incomplete_types_as_strings = true; - bool infer_variant_from_multitype_array = false; + bool infer_variant_from_multi_type_array = false; bool throw_on_bad_escape_sequence = true; bool ignore_unnecessary_fields = true; } json{}; diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index d0d29892dec..f693916c584 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -308,7 +308,7 @@ namespace type_indexes.erase(TypeIndex::UInt64); } - /// if setting input_format_json_infer_variant_from_multitype_array is true + /// if setting input_format_json_infer_variant_from_multi_type_array is true /// and nested types are not equal then we convert to type variant. void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes) { @@ -666,7 +666,7 @@ namespace /// Check settings specific for JSON formats. - if (settings.json.infer_variant_from_multitype_array) + if (settings.json.infer_variant_from_multi_type_array) { transformVariant(data_types, type_indexes); } @@ -699,7 +699,7 @@ namespace if constexpr (!is_json) return; - if (settings.json.infer_variant_from_multitype_array) + if (settings.json.infer_variant_from_multi_type_array) { transformVariant(data_types, type_indexes); } diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql index ac544d04f6e..2ea849248f7 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.sql +++ b/tests/queries/0_stateless/03150_infer_type_variant.sql @@ -1,4 +1,4 @@ -SET input_format_json_infer_variant_from_multitype_array=1; +SET input_format_json_infer_variant_from_multi_type_array=1; SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}'); SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}'); SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}'); From 2762cf86d2ba3f5c1ac86040b6ef484feb40837b Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 15 May 2024 14:19:17 +0200 Subject: [PATCH 005/363] fix test file --- tests/queries/0_stateless/03150_infer_type_variant.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql index 2ea849248f7..ac544d04f6e 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.sql +++ b/tests/queries/0_stateless/03150_infer_type_variant.sql @@ -1,4 +1,4 @@ -SET input_format_json_infer_variant_from_multi_type_array=1; +SET input_format_json_infer_variant_from_multitype_array=1; SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}'); SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}'); SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}'); From 4800aa6a6cdf5a8431fcc3d6fd96672590da0fff Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 15 May 2024 14:21:52 +0200 Subject: [PATCH 006/363] rename setting in test file --- tests/queries/0_stateless/03150_infer_type_variant.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql index ac544d04f6e..2ea849248f7 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.sql +++ b/tests/queries/0_stateless/03150_infer_type_variant.sql @@ -1,4 +1,4 @@ -SET input_format_json_infer_variant_from_multitype_array=1; +SET input_format_json_infer_variant_from_multi_type_array=1; SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}'); SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}'); SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}'); From fbf34519a5e72bb03c57ee6bc0feea1adddcb309 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 15 May 2024 16:03:23 +0200 Subject: [PATCH 007/363] Add setting to SettingsChangesHistory.h --- src/Core/SettingsChangesHistory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index ece48620618..0665d1d6ca6 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -93,6 +93,7 @@ static std::map sett {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, {"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."}, + {"input_format_json_infer_variant_from_multi_type_array", 0, 0, "Allows inference of variant type if columns/arrays have multiple types."}, }}, {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, From 777e0b313a48975fa51d645fb09a4f6ebfac1d1e Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 15 May 2024 18:51:29 +0200 Subject: [PATCH 008/363] Update 03150_infer_type_variant.sql Fix failing test --- tests/queries/0_stateless/03150_infer_type_variant.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql index 2ea849248f7..3253ddfe179 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.sql +++ b/tests/queries/0_stateless/03150_infer_type_variant.sql @@ -1,4 +1,4 @@ SET input_format_json_infer_variant_from_multi_type_array=1; -SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}'); -SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}'); -SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}'); +SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}') FORMAT Pretty; +SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}') FORMAT Pretty; +SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}') FORMAT Pretty; From dd8d5c46c4678f570b9357dbdd912bcd6f4a267e Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 15 May 2024 18:55:34 +0200 Subject: [PATCH 009/363] Fix style --- src/Formats/SchemaInferenceUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index f693916c584..2cbb680af97 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -319,7 +319,7 @@ namespace data_types.push_back(variant_type); type_indexes.insert(TypeIndex::Variant); - // make the second type variant as well + /// make the second type variant as well data_types.push_back(variant_type); type_indexes.insert(TypeIndex::Variant); } From 04800f596c4471d10e15c40a533c539c6b549b06 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 24 May 2024 21:20:20 +0200 Subject: [PATCH 010/363] Incorporate review changes --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.h | 2 +- src/Formats/FormatFactory.cpp | 2 +- src/Formats/FormatSettings.h | 2 +- src/Formats/SchemaInferenceUtils.cpp | 60 +++++++++++++------ .../03150_infer_type_variant.reference | 29 +++++---- .../0_stateless/03150_infer_type_variant.sql | 3 +- 7 files changed, 67 insertions(+), 33 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ffc337b674f..be7564794e9 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1066,7 +1066,7 @@ class IColumn; M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ - M(Bool, input_format_json_infer_variant_from_multi_type_array, false, "Try to infer variant type rather than tuple when column/array has multiple", 0) \ + M(Bool, input_format_try_infer_variants, false, "Try to infer the Variant type in text formats when there is more than one possible type for column/array elements", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 0665d1d6ca6..75fcb538b2b 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -93,7 +93,7 @@ static std::map sett {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, {"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."}, - {"input_format_json_infer_variant_from_multi_type_array", 0, 0, "Allows inference of variant type if columns/arrays have multiple types."}, + {"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"}, }}, {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 792ac08a5df..2854802453e 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -137,7 +137,6 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.read_arrays_as_strings = settings.input_format_json_read_arrays_as_strings; format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings; format_settings.json.infer_incomplete_types_as_strings = settings.input_format_json_infer_incomplete_types_as_strings; - format_settings.json.infer_variant_from_multi_type_array = settings.input_format_json_infer_variant_from_multi_type_array; format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata; format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8; format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name; @@ -266,6 +265,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.max_parser_depth = context->getSettingsRef().max_parser_depth; format_settings.client_protocol_version = context->getClientProtocolVersion(); format_settings.date_time_overflow_behavior = settings.date_time_overflow_behavior; + format_settings.try_infer_variant = settings.input_format_try_infer_variants; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 927a7e691d8..a2a9e75bd44 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -36,6 +36,7 @@ struct FormatSettings bool decimal_trailing_zeros = false; bool defaults_for_omitted_fields = true; bool is_writing_to_terminal = false; + bool try_infer_variant = false; bool seekable_read = true; UInt64 max_rows_to_read_for_schema_inference = 25000; @@ -223,7 +224,6 @@ struct FormatSettings bool compact_allow_variable_number_of_columns = false; bool try_infer_objects_as_tuples = false; bool infer_incomplete_types_as_strings = true; - bool infer_variant_from_multi_type_array = false; bool throw_on_bad_escape_sequence = true; bool ignore_unnecessary_fields = true; } json{}; diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 2cbb680af97..298127cad68 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -239,6 +239,16 @@ namespace return true; } + bool checkIfTypesContainVariant(const DataTypes & types) + { + for (size_t i = 0; i < types.size(); ++i) + { + if (isVariant(types[i])) + return true; + } + return false; + } + void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes) { type_indexes.clear(); @@ -308,20 +318,31 @@ namespace type_indexes.erase(TypeIndex::UInt64); } - /// if setting input_format_json_infer_variant_from_multi_type_array is true + /// if setting try_infer_variant is true /// and nested types are not equal then we convert to type variant. void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes) { + auto typesAreEqual = checkIfTypesAreEqual(data_types); + auto typesContainVariant = checkIfTypesContainVariant(data_types); + if (typesAreEqual || typesContainVariant) + return; + + DataTypes new_data_types; + TypeIndexesSet new_type_indexes; + auto variant_type = std::make_shared(data_types); - /// replace separate types with a single variant type + size_t i = 0; + while (i != data_types.size()) + { + new_data_types.push_back(variant_type); + new_type_indexes.insert(TypeIndex::Variant); + i++; + } + data_types.clear(); type_indexes.clear(); - data_types.push_back(variant_type); - type_indexes.insert(TypeIndex::Variant); - - /// make the second type variant as well - data_types.push_back(variant_type); - type_indexes.insert(TypeIndex::Variant); + data_types = new_data_types; + type_indexes = new_type_indexes; } /// If we have only Date and DateTime types, convert Date to DateTime, @@ -661,16 +682,14 @@ namespace if (settings.try_infer_dates || settings.try_infer_datetimes) transformDatesAndDateTimes(data_types, type_indexes); + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); + if constexpr (!is_json) return; /// Check settings specific for JSON formats. - if (settings.json.infer_variant_from_multi_type_array) - { - transformVariant(data_types, type_indexes); - } - /// Convert numbers inferred from strings back to strings if needed. if (settings.json.try_infer_numbers_from_strings || settings.json.read_numbers_as_strings) transformJSONNumbersBackToString(data_types, settings, type_indexes, json_info); @@ -685,6 +704,10 @@ namespace if (settings.json.try_infer_objects_as_tuples) mergeJSONPaths(data_types, type_indexes, settings, json_info); + + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); + }; auto transform_complex_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes) @@ -696,14 +719,12 @@ namespace /// If there is at least one non Nothing type, change all Nothing types to it. transformNothingComplexTypes(data_types, type_indexes); + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); + if constexpr (!is_json) return; - if (settings.json.infer_variant_from_multi_type_array) - { - transformVariant(data_types, type_indexes); - } - /// Convert JSON tuples with same nested types to arrays. transformTuplesWithEqualNestedTypesToArrays(data_types, type_indexes); @@ -715,6 +736,9 @@ namespace if (json_info && json_info->allow_merging_named_tuples) mergeNamedTuples(data_types, type_indexes, settings, json_info); + + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); }; transformTypesRecursively(types, transform_simple_types, transform_complex_types); diff --git a/tests/queries/0_stateless/03150_infer_type_variant.reference b/tests/queries/0_stateless/03150_infer_type_variant.reference index ffb4209eadb..f3f53057845 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.reference +++ b/tests/queries/0_stateless/03150_infer_type_variant.reference @@ -2,7 +2,7 @@ ┃ arr ┃ toTypeName(arr) ┃ ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ 1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple(…│ - │ │… a Int64))) │ + │ │… a Nullable(Int64)))) │ └──────────────────┴─────────────────────────────────────┘ ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ x ┃ toTypeName(x) ┃ @@ -11,12 +11,21 @@ ├───────┼────────────────────────┤ 2. │ Hello │ Variant(Int64, String) │ └───────┴────────────────────────┘ - ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ x ┃ toTypeName(x) ┃ - ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ -1. │ [1,2,3] │ Variant(Array(Int64), Tuple(…│ - │ │… a Int64)) │ - ├─────────┼──────────────────────────────┤ -2. │ (42) │ Variant(Array(Int64), Tuple(…│ - │ │… a Int64)) │ - └─────────┴──────────────────────────────┘ + ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ x ┃ toTypeName(x) ┃ + ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ [1,2,3] │ Variant(Array(Nullable(Int64)), Tuple(…│ + │ │… a Nullable(Int64))) │ + ├─────────┼────────────────────────────────────────┤ +2. │ (42) │ Variant(Array(Nullable(Int64)), Tuple(…│ + │ │… a Nullable(Int64))) │ + └─────────┴────────────────────────────────────────┘ + ┏━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ + ┃ c1 ┃ toTypeName(c1) ┃ c2 ┃ toTypeName(c2) ┃ + ┡━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +1. │ 1 │ Nullable(Int64) │ Hello World! │ Nullable(String) │ + ├────┼─────────────────┼──────────────┼──────────────────┤ +2. │ 2 │ Nullable(Int64) │ [1,2,3] │ Nullable(String) │ + ├────┼─────────────────┼──────────────┼──────────────────┤ +3. │ 3 │ Nullable(Int64) │ 2020-01-01 │ Nullable(String) │ + └────┴─────────────────┴──────────────┴──────────────────┘ diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql index 3253ddfe179..45126ccd471 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.sql +++ b/tests/queries/0_stateless/03150_infer_type_variant.sql @@ -1,4 +1,5 @@ -SET input_format_json_infer_variant_from_multi_type_array=1; +SET input_format_try_infer_variants=1; SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}') FORMAT Pretty; SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}') FORMAT Pretty; SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}') FORMAT Pretty; +SELECT c1, toTypeName(c1), c2, toTypeName(c2) FROM format('CSV', '1,Hello World!\n2,"[1,2,3]"\n3,"2020-01-01"\n') FORMAT Pretty; \ No newline at end of file From 655262d1a1b21d85f4fbe284e0835065bcca379b Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 5 Jun 2024 14:34:17 +0200 Subject: [PATCH 011/363] Fix issue with nullables --- src/Formats/SchemaInferenceUtils.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 298127cad68..43120cb7b22 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -1547,11 +1547,13 @@ DataTypePtr makeNullableRecursively(DataTypePtr type) DataTypes nested_types; for (const auto & nested_type: variant_type->getVariants()) { - /// unlike tuple or array, here we do not want to make any of the variants nullable - /// so we do not call makeNullableRecursively - nested_types.push_back(nested_type); + auto is_low_cardinality = nested_type->lowCardinality(); + auto has_sub_types = nested_type->haveSubtypes(); + if (!is_low_cardinality && has_sub_types) + nested_types.push_back(makeNullableRecursively(nested_type)); + else + nested_types.push_back(nested_type); } - return std::make_shared(nested_types); } From 418fc7f4438abd25eae4928f36ff0c3fef2395f8 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 6 Jun 2024 10:02:19 +0200 Subject: [PATCH 012/363] Fix incorrect inference for other formats --- src/Formats/SchemaInferenceUtils.cpp | 29 +++++++++-- .../03150_infer_type_variant.reference | 48 +++++++++---------- 2 files changed, 49 insertions(+), 28 deletions(-) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 43120cb7b22..a8b5d4343f5 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -318,19 +318,40 @@ namespace type_indexes.erase(TypeIndex::UInt64); } - /// if setting try_infer_variant is true - /// and nested types are not equal then we convert to type variant. + /// if setting 'try_infer_variant' is true then we convert to type variant. void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes) { auto typesAreEqual = checkIfTypesAreEqual(data_types); auto typesContainVariant = checkIfTypesContainVariant(data_types); - if (typesAreEqual || typesContainVariant) + if (typesAreEqual) return; DataTypes new_data_types; TypeIndexesSet new_type_indexes; + std::shared_ptr variant_type; + + /// extract the nested types of variant and make a new variant with the nested types and the other type. + /// eg. Type 1: variant, Type 2: Date -> variant. + if (typesContainVariant) + { + DataTypes extracted_types; + for (size_t i=0; i(data_types[i].get())) + extracted_types = variant->getVariants(); + } + else + extracted_types.push_back(data_types[i]); + } + variant_type = std::make_shared(extracted_types); + } + else + { + variant_type = std::make_shared(data_types); + } - auto variant_type = std::make_shared(data_types); size_t i = 0; while (i != data_types.size()) { diff --git a/tests/queries/0_stateless/03150_infer_type_variant.reference b/tests/queries/0_stateless/03150_infer_type_variant.reference index f3f53057845..a5f56cb3618 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.reference +++ b/tests/queries/0_stateless/03150_infer_type_variant.reference @@ -1,9 +1,9 @@ - ┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ arr ┃ toTypeName(arr) ┃ - ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ -1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple(…│ - │ │… a Nullable(Int64)))) │ - └──────────────────┴─────────────────────────────────────┘ + ┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ arr ┃ toTypeName(arr) ┃ + ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple( + a Nullable(Int64)))) │ + └──────────────────┴─────────────────────────────────────────────────────────────┘ ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ x ┃ toTypeName(x) ┃ ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩ @@ -11,21 +11,21 @@ ├───────┼────────────────────────┤ 2. │ Hello │ Variant(Int64, String) │ └───────┴────────────────────────┘ - ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ x ┃ toTypeName(x) ┃ - ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ -1. │ [1,2,3] │ Variant(Array(Nullable(Int64)), Tuple(…│ - │ │… a Nullable(Int64))) │ - ├─────────┼────────────────────────────────────────┤ -2. │ (42) │ Variant(Array(Nullable(Int64)), Tuple(…│ - │ │… a Nullable(Int64))) │ - └─────────┴────────────────────────────────────────┘ - ┏━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ - ┃ c1 ┃ toTypeName(c1) ┃ c2 ┃ toTypeName(c2) ┃ - ┡━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ -1. │ 1 │ Nullable(Int64) │ Hello World! │ Nullable(String) │ - ├────┼─────────────────┼──────────────┼──────────────────┤ -2. │ 2 │ Nullable(Int64) │ [1,2,3] │ Nullable(String) │ - ├────┼─────────────────┼──────────────┼──────────────────┤ -3. │ 3 │ Nullable(Int64) │ 2020-01-01 │ Nullable(String) │ - └────┴─────────────────┴──────────────┴──────────────────┘ + ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ x ┃ toTypeName(x) ┃ + ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ [1,2,3] │ Variant(Array(Nullable(Int64)), Tuple( + a Nullable(Int64))) │ + ├─────────┼───────────────────────────────────────────────────────────────┤ +2. │ (42) │ Variant(Array(Nullable(Int64)), Tuple( + a Nullable(Int64))) │ + └─────────┴───────────────────────────────────────────────────────────────┘ + ┏━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ c1 ┃ toTypeName(c1) ┃ c2 ┃ toTypeName(c2) ┃ + ┡━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ 1 │ Nullable(Int64) │ Hello World! │ Variant(Array(Nullable(Int64)), String) │ + ├────┼─────────────────┼──────────────┼─────────────────────────────────────────┤ +2. │ 2 │ Nullable(Int64) │ [1,2,3] │ Variant(Array(Nullable(Int64)), String) │ + ├────┼─────────────────┼──────────────┼─────────────────────────────────────────┤ +3. │ 3 │ Nullable(Int64) │ 2020-01-01 │ Variant(Array(Nullable(Int64)), String) │ + └────┴─────────────────┴──────────────┴─────────────────────────────────────────┘ From 9cf11a210f07110676b373b864ea098583d87ff6 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 11 Jun 2024 11:11:06 +0200 Subject: [PATCH 013/363] Review changes --- src/Core/SettingsChangesHistory.h | 3 +- src/Formats/SchemaInferenceUtils.cpp | 73 ++++++------------- .../03150_infer_type_variant.reference | 24 +++--- 3 files changed, 35 insertions(+), 65 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 3f743ef42bf..661ecc607ba 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -92,6 +92,7 @@ static std::map sett {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, + {"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"}, }}, {"24.5", {{"allow_deprecated_functions", true, false, "Allow usage of deprecated functions"}, {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, @@ -103,8 +104,6 @@ static std::map sett {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, - {"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."}, - {"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"}, {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index a8b5d4343f5..b7c71a95b29 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -239,16 +239,6 @@ namespace return true; } - bool checkIfTypesContainVariant(const DataTypes & types) - { - for (size_t i = 0; i < types.size(); ++i) - { - if (isVariant(types[i])) - return true; - } - return false; - } - void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes) { type_indexes.clear(); @@ -321,49 +311,28 @@ namespace /// if setting 'try_infer_variant' is true then we convert to type variant. void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes) { - auto typesAreEqual = checkIfTypesAreEqual(data_types); - auto typesContainVariant = checkIfTypesContainVariant(data_types); - if (typesAreEqual) + if (checkIfTypesAreEqual(data_types)) return; - DataTypes new_data_types; - TypeIndexesSet new_type_indexes; - std::shared_ptr variant_type; - - /// extract the nested types of variant and make a new variant with the nested types and the other type. - /// eg. Type 1: variant, Type 2: Date -> variant. - if (typesContainVariant) + DataTypes variant_types; + for (const auto & type : data_types) { - DataTypes extracted_types; - for (size_t i=0; i(type.get())) { - if (isVariant(data_types[i])) - { - if (const auto * variant = typeid_cast(data_types[i].get())) - extracted_types = variant->getVariants(); - } - else - extracted_types.push_back(data_types[i]); + const auto & current_variants = variant_type->getVariants(); + variant_types.insert(variant_types.end(), current_variants.begin(), current_variants.end()); + } + else + { + variant_types.push_back(type); } - variant_type = std::make_shared(extracted_types); - } - else - { - variant_type = std::make_shared(data_types); } - size_t i = 0; - while (i != data_types.size()) - { - new_data_types.push_back(variant_type); - new_type_indexes.insert(TypeIndex::Variant); - i++; - } + auto variant_type = std::make_shared(variant_types); - data_types.clear(); - type_indexes.clear(); - data_types = new_data_types; - type_indexes = new_type_indexes; + for (auto & type : data_types) + type = variant_type; + type_indexes = {TypeIndex::Variant}; } /// If we have only Date and DateTime types, convert Date to DateTime, @@ -703,11 +672,12 @@ namespace if (settings.try_infer_dates || settings.try_infer_datetimes) transformDatesAndDateTimes(data_types, type_indexes); - if (settings.try_infer_variant) - transformVariant(data_types, type_indexes); - if constexpr (!is_json) + { + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); return; + } /// Check settings specific for JSON formats. @@ -740,11 +710,12 @@ namespace /// If there is at least one non Nothing type, change all Nothing types to it. transformNothingComplexTypes(data_types, type_indexes); - if (settings.try_infer_variant) - transformVariant(data_types, type_indexes); - if constexpr (!is_json) + { + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); return; + } /// Convert JSON tuples with same nested types to arrays. transformTuplesWithEqualNestedTypesToArrays(data_types, type_indexes); diff --git a/tests/queries/0_stateless/03150_infer_type_variant.reference b/tests/queries/0_stateless/03150_infer_type_variant.reference index a5f56cb3618..a43fa1e1227 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.reference +++ b/tests/queries/0_stateless/03150_infer_type_variant.reference @@ -1,16 +1,16 @@ - ┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ arr ┃ toTypeName(arr) ┃ - ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ -1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple( + ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ arr ┃ toTypeName(arr) ┃ + ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ ['1','Hello',(32)] │ Array(Variant(String, Tuple( a Nullable(Int64)))) │ - └──────────────────┴─────────────────────────────────────────────────────────────┘ - ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ x ┃ toTypeName(x) ┃ - ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩ -1. │ 42 │ Variant(Int64, String) │ - ├───────┼────────────────────────┤ -2. │ Hello │ Variant(Int64, String) │ - └───────┴────────────────────────┘ + └────────────────────┴──────────────────────────────────────────────────────┘ + ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ + ┃ x ┃ toTypeName(x) ┃ + ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +1. │ 42 │ Nullable(String) │ + ├───────┼──────────────────┤ +2. │ Hello │ Nullable(String) │ + └───────┴──────────────────┘ ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ x ┃ toTypeName(x) ┃ ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ From 548c90901020317669a10d191a0b6f8a7d0a0511 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 11 Jun 2024 12:14:36 +0200 Subject: [PATCH 014/363] Add documentation --- docs/en/operations/settings/settings-formats.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 6aae1ea62e5..8bbb469547b 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -194,6 +194,17 @@ If enabled, ClickHouse will try to infer type `DateTime64` from string fields in Enabled by default. +## input_format_try_infer_variants {#input_format_try_infer_variants} + +If enabled, ClickHouse will try to infer type [`Variant`](../../sql-reference/data-types/variant.md) in schema inference for text formats when there is more than one possible type for column/array elements. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: `0`. + ## date_time_input_format {#date_time_input_format} Allows choosing a parser of the text representation of date and time. From bad5e27bbffa9c1f6727a0416edcb135dadcc1fe Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Tue, 11 Jun 2024 13:32:34 +0200 Subject: [PATCH 015/363] Update src/Formats/SchemaInferenceUtils.cpp Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- src/Formats/SchemaInferenceUtils.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 0ac8b32f8aa..240830013c6 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -1539,9 +1539,7 @@ DataTypePtr makeNullableRecursively(DataTypePtr type) DataTypes nested_types; for (const auto & nested_type: variant_type->getVariants()) { - auto is_low_cardinality = nested_type->lowCardinality(); - auto has_sub_types = nested_type->haveSubtypes(); - if (!is_low_cardinality && has_sub_types) + if (!nested_type->lowCardinality() && nested_type->haveSubtypes()) nested_types.push_back(makeNullableRecursively(nested_type)); else nested_types.push_back(nested_type); From e91dd71d4e55fe80d1c230a87eee7ad84333d9c3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2024 07:01:06 +0200 Subject: [PATCH 016/363] Settings normalization --- src/Interpreters/InterpreterSetQuery.cpp | 26 ++++++++++-------------- src/Parsers/ParserQueryWithOutput.cpp | 11 ---------- 2 files changed, 11 insertions(+), 26 deletions(-) diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index 7e68fc5c4c1..cac44c7747b 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -9,6 +9,7 @@ #include #include + namespace DB { @@ -45,9 +46,7 @@ static void applySettingsFromSelectWithUnion(const ASTSelectWithUnionQuery & sel // It is flattened later, when we process UNION ALL/DISTINCT. const auto * last_select = children.back()->as(); if (last_select && last_select->settings()) - { InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(); - } } void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMutablePtr context_) @@ -55,6 +54,16 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta if (!ast) return; + /// First apply the outermost settings. Then they could be overridden by deeper settings. + if (const auto * query_with_output = dynamic_cast(ast.get())) + { + if (query_with_output->settings_ast) + InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(); + + if (const auto * create_query = ast->as(); create_query && create_query->select) + applySettingsFromSelectWithUnion(create_query->select->as(), context_); + } + if (const auto * select_query = ast->as()) { if (auto new_settings = select_query->settings()) @@ -71,19 +80,6 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta applySettingsFromQuery(explain_query->getExplainedQuery(), context_); } - else if (const auto * query_with_output = dynamic_cast(ast.get())) - { - if (query_with_output->settings_ast) - InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(); - - if (const auto * create_query = ast->as()) - { - if (create_query->select) - { - applySettingsFromSelectWithUnion(create_query->select->as(), context_); - } - } - } else if (auto * insert_query = ast->as()) { context_->setInsertFormat(insert_query->format); diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 6d8a1258555..ac8f7d560e0 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -198,16 +197,6 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!parser_settings.parse(pos, query_with_output.settings_ast, expected)) return false; query_with_output.children.push_back(query_with_output.settings_ast); - - // SETTINGS after FORMAT is not parsed by the SELECT parser (ParserSelectQuery) - // Pass them manually, to apply in InterpreterSelectQuery::initSettings() - if (query->as()) - { - auto settings = query_with_output.settings_ast->clone(); - assert_cast(settings.get())->print_in_format = false; - QueryWithOutputSettingsPushDownVisitor::Data data{settings}; - QueryWithOutputSettingsPushDownVisitor(data).visit(query); - } } else break; From f5da9e424075fa755edd1a869e199f4861011be2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2024 07:01:44 +0200 Subject: [PATCH 017/363] Add a test --- ...QueryWithOutputSettingsPushDownVisitor.cpp | 56 ------------------- .../QueryWithOutputSettingsPushDownVisitor.h | 39 ------------- .../03172_format_settings_clauses.reference | 14 +++++ .../03172_format_settings_clauses.sql | 30 ++++++++++ 4 files changed, 44 insertions(+), 95 deletions(-) delete mode 100644 src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp delete mode 100644 src/Parsers/QueryWithOutputSettingsPushDownVisitor.h create mode 100644 tests/queries/0_stateless/03172_format_settings_clauses.reference create mode 100644 tests/queries/0_stateless/03172_format_settings_clauses.sql diff --git a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp deleted file mode 100644 index 8cf0d0063ae..00000000000 --- a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace DB -{ - -bool QueryWithOutputSettingsPushDownMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) -{ - if (node->as()) - return true; - if (node->as()) - return true; - if (child->as()) - return true; - return false; -} - -void QueryWithOutputSettingsPushDownMatcher::visit(ASTPtr & ast, Data & data) -{ - if (auto * select_query = ast->as()) - visit(*select_query, ast, data); -} - -void QueryWithOutputSettingsPushDownMatcher::visit(ASTSelectQuery & select_query, ASTPtr &, Data & data) -{ - ASTPtr select_settings_ast = select_query.settings(); - if (!select_settings_ast) - { - select_query.setExpression(ASTSelectQuery::Expression::SETTINGS, data.settings_ast->clone()); - return; - } - - SettingsChanges & select_settings = select_settings_ast->as().changes; - SettingsChanges & settings = data.settings_ast->as().changes; - - for (auto & setting : settings) - { - auto it = std::find_if(select_settings.begin(), select_settings.end(), [&](auto & select_setting) - { - return select_setting.name == setting.name; - }); - if (it == select_settings.end()) - select_settings.push_back(setting); - else - it->value = setting.value; - } -} - -} diff --git a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h deleted file mode 100644 index fde8a07b555..00000000000 --- a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class ASTSelectQuery; -struct SettingChange; -class SettingsChanges; - -/// Pushdown SETTINGS clause that goes after FORMAT to the SELECT query: -/// (since settings after FORMAT parsed separately not in the ParserSelectQuery but in ParserQueryWithOutput) -/// -/// SELECT 1 FORMAT Null SETTINGS max_block_size = 1 -> -/// SELECT 1 SETTINGS max_block_size = 1 FORMAT Null SETTINGS max_block_size = 1 -/// -/// Otherwise settings after FORMAT will not be applied. -class QueryWithOutputSettingsPushDownMatcher -{ -public: - using Visitor = InDepthNodeVisitor; - - struct Data - { - const ASTPtr & settings_ast; - }; - - static bool needChildVisit(ASTPtr & node, const ASTPtr & child); - static void visit(ASTPtr & ast, Data & data); - -private: - static void visit(ASTSelectQuery &, ASTPtr &, Data &); -}; - -using QueryWithOutputSettingsPushDownVisitor = QueryWithOutputSettingsPushDownMatcher::Visitor; - -} diff --git a/tests/queries/0_stateless/03172_format_settings_clauses.reference b/tests/queries/0_stateless/03172_format_settings_clauses.reference new file mode 100644 index 00000000000..8a98b137f4b --- /dev/null +++ b/tests/queries/0_stateless/03172_format_settings_clauses.reference @@ -0,0 +1,14 @@ +1 +2 +1 +2 +1 +2 +1 +1 +3 +3 +3 +3 +3 +1 diff --git a/tests/queries/0_stateless/03172_format_settings_clauses.sql b/tests/queries/0_stateless/03172_format_settings_clauses.sql new file mode 100644 index 00000000000..0d1aa4dcfbb --- /dev/null +++ b/tests/queries/0_stateless/03172_format_settings_clauses.sql @@ -0,0 +1,30 @@ +SET max_block_size = 10, max_threads = 1; + +-- Take the following example: +SELECT 1 UNION ALL SELECT 2 FORMAT TSV; + +-- Each subquery can be put in parentheses and have its own settings: +(SELECT getSetting('max_block_size') SETTINGS max_block_size = 1) UNION ALL (SELECT getSetting('max_block_size') SETTINGS max_block_size = 2) FORMAT TSV; + +-- And the whole query can have settings: +(SELECT getSetting('max_block_size') SETTINGS max_block_size = 1) UNION ALL (SELECT getSetting('max_block_size') SETTINGS max_block_size = 2) FORMAT TSV SETTINGS max_block_size = 3; + +-- A single query with output is parsed in the same way as the UNION ALL chain: +SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 FORMAT TSV SETTINGS max_block_size = 3; + +-- So while these forms have a slightly different meaning, they both exist: +SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 FORMAT TSV; +SELECT getSetting('max_block_size') FORMAT TSV SETTINGS max_block_size = 3; + +-- And due to this effect, the users expect that the FORMAT and SETTINGS may go in an arbitrary order. +-- But while this work: +(SELECT getSetting('max_block_size')) UNION ALL (SELECT getSetting('max_block_size')) FORMAT TSV SETTINGS max_block_size = 3; + +-- This does not work automatically, unless we explicitly allow different orders: +(SELECT getSetting('max_block_size')) UNION ALL (SELECT getSetting('max_block_size')) SETTINGS max_block_size = 3 FORMAT TSV; + +-- Inevitably, we allow this: +SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 SETTINGS max_block_size = 3 FORMAT TSV; +/*^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^*/ +-- Because this part is consumed into ASTSelectWithUnionQuery +-- and the rest into ASTQueryWithOutput. From 778807a8883901debc1bfeb72e17b37eb06bcaf0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2024 08:22:36 +0200 Subject: [PATCH 018/363] Fix error; remove garbage --- src/Client/ClientBase.cpp | 33 +++------------------------------ 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 854cc3fef8b..ad4964b4b7c 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -1937,41 +1938,13 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin global_context->setSettings(*old_settings); }); - auto apply_query_settings = [&](const IAST & settings_ast) - { - if (!old_settings) - old_settings.emplace(global_context->getSettingsRef()); - global_context->applySettingsChanges(settings_ast.as()->changes); - global_context->resetSettingsToDefaultValue(settings_ast.as()->default_settings); - }; - - const auto * insert = parsed_query->as(); - if (const auto * select = parsed_query->as(); select && select->settings()) - apply_query_settings(*select->settings()); - else if (const auto * select_with_union = parsed_query->as()) - { - const ASTs & children = select_with_union->list_of_selects->children; - if (!children.empty()) - { - // On the client it is enough to apply settings only for the - // last SELECT, since the only thing that is important to apply - // on the client is format settings. - const auto * last_select = children.back()->as(); - if (last_select && last_select->settings()) - { - apply_query_settings(*last_select->settings()); - } - } - } - else if (const auto * query_with_output = parsed_query->as(); query_with_output && query_with_output->settings_ast) - apply_query_settings(*query_with_output->settings_ast); - else if (insert && insert->settings_ast) - apply_query_settings(*insert->settings_ast); + InterpreterSetQuery::applySettingsFromQuery(parsed_query, global_context); if (!connection->checkConnected(connection_parameters.timeouts)) connect(); ASTPtr input_function; + const auto * insert = parsed_query->as(); if (insert && insert->select) insert->tryFindInputFunction(input_function); From 87a2e5f39018ef16ab6fdd79ad934dce6c45aaf0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2024 08:30:55 +0200 Subject: [PATCH 019/363] Fix error --- src/Client/ClientBase.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index ad4964b4b7c..958a1f50813 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1932,10 +1932,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin { /// Temporarily apply query settings to context. - std::optional old_settings; + Settings old_settings = global_context->getSettings(); SCOPE_EXIT_SAFE({ - if (old_settings) - global_context->setSettings(*old_settings); + global_context->setSettings(old_settings); }); InterpreterSetQuery::applySettingsFromQuery(parsed_query, global_context); From 67a539292e5e62f0bf470d11a5b224ef105bea02 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2024 08:33:11 +0200 Subject: [PATCH 020/363] Update test --- tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference | 4 ++-- tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference index 22405bf1866..a8b99666654 100644 --- a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference +++ b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference @@ -1,7 +1,7 @@ 1 1 1 -1 -1 +2 +1 2 2 diff --git a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh index b70c28422c9..173cc949500 100755 --- a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh +++ b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh @@ -13,7 +13,7 @@ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) FORMAT CSV SETTINGS max_block_size = 1' # push down append ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_compress_block_size = 1 FORMAT CSV SETTINGS max_block_size = 1' -# overwrite on push down (since these settings goes latest) +# not overwrite on push down ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_block_size = 2 FORMAT CSV SETTINGS max_block_size = 1' # on push-down ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_block_size = 1 FORMAT CSV' From 4817657375a37c374eb4be72793cba434c16a815 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 17 Jun 2024 23:43:03 +0200 Subject: [PATCH 021/363] Revert "Revert "Fix AWS ECS"" --- contrib/aws-crt-cpp | 2 +- .../ProxyConfigurationResolverProvider.cpp | 35 +++++++------------ .../ProxyConfigurationResolverProvider.h | 5 ++- src/IO/ReadWriteBufferFromHTTP.cpp | 2 +- src/IO/S3/Client.cpp | 6 ++-- src/IO/S3/PocoHTTPClient.cpp | 20 +++++++++-- src/IO/S3/PocoHTTPClient.h | 14 ++++---- src/IO/S3/PocoHTTPClientFactory.cpp | 5 ++- .../0_stateless/03170_ecs_crash.reference | 4 +++ tests/queries/0_stateless/03170_ecs_crash.sh | 9 +++++ 10 files changed, 61 insertions(+), 41 deletions(-) create mode 100644 tests/queries/0_stateless/03170_ecs_crash.reference create mode 100755 tests/queries/0_stateless/03170_ecs_crash.sh diff --git a/contrib/aws-crt-cpp b/contrib/aws-crt-cpp index f532d6abc0d..0217761556a 160000 --- a/contrib/aws-crt-cpp +++ b/contrib/aws-crt-cpp @@ -1 +1 @@ -Subproject commit f532d6abc0d2b0d8b5d6fe9e7c51eaedbe4afbd0 +Subproject commit 0217761556a7ba7ec537fe933d0ab1159096746e diff --git a/src/Common/ProxyConfigurationResolverProvider.cpp b/src/Common/ProxyConfigurationResolverProvider.cpp index b06073121e7..a46837bfdb9 100644 --- a/src/Common/ProxyConfigurationResolverProvider.cpp +++ b/src/Common/ProxyConfigurationResolverProvider.cpp @@ -112,9 +112,8 @@ namespace return configuration.has(config_prefix + ".uri"); } - /* - * New syntax requires protocol prefix " or " - * */ + /* New syntax requires protocol prefix " or " + */ std::optional getProtocolPrefix( ProxyConfiguration::Protocol request_protocol, const String & config_prefix, @@ -130,22 +129,18 @@ namespace return protocol_prefix; } - template std::optional calculatePrefixBasedOnSettingsSyntax( + bool new_syntax, ProxyConfiguration::Protocol request_protocol, const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration ) { if (!configuration.has(config_prefix)) - { return std::nullopt; - } - if constexpr (new_syntax) - { + if (new_syntax) return getProtocolPrefix(request_protocol, config_prefix, configuration); - } return config_prefix; } @@ -155,24 +150,21 @@ std::shared_ptr ProxyConfigurationResolverProvider:: Protocol request_protocol, const Poco::Util::AbstractConfiguration & configuration) { - if (auto resolver = getFromSettings(request_protocol, "proxy", configuration)) - { + if (auto resolver = getFromSettings(true, request_protocol, "proxy", configuration)) return resolver; - } return std::make_shared( request_protocol, isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration)); } -template std::shared_ptr ProxyConfigurationResolverProvider::getFromSettings( + bool new_syntax, Protocol request_protocol, const String & config_prefix, - const Poco::Util::AbstractConfiguration & configuration -) + const Poco::Util::AbstractConfiguration & configuration) { - auto prefix_opt = calculatePrefixBasedOnSettingsSyntax(request_protocol, config_prefix, configuration); + auto prefix_opt = calculatePrefixBasedOnSettingsSyntax(new_syntax, request_protocol, config_prefix, configuration); if (!prefix_opt) { @@ -195,20 +187,17 @@ std::shared_ptr ProxyConfigurationResolverProvider:: std::shared_ptr ProxyConfigurationResolverProvider::getFromOldSettingsFormat( Protocol request_protocol, const String & config_prefix, - const Poco::Util::AbstractConfiguration & configuration -) + const Poco::Util::AbstractConfiguration & configuration) { - /* - * First try to get it from settings only using the combination of config_prefix and configuration. + /* First try to get it from settings only using the combination of config_prefix and configuration. * This logic exists for backward compatibility with old S3 storage specific proxy configuration. * */ - if (auto resolver = ProxyConfigurationResolverProvider::getFromSettings(request_protocol, config_prefix + ".proxy", configuration)) + if (auto resolver = ProxyConfigurationResolverProvider::getFromSettings(false, request_protocol, config_prefix + ".proxy", configuration)) { return resolver; } - /* - * In case the combination of config_prefix and configuration does not provide a resolver, try to get it from general / new settings. + /* In case the combination of config_prefix and configuration does not provide a resolver, try to get it from general / new settings. * Falls back to Environment resolver if no configuration is found. * */ return ProxyConfigurationResolverProvider::get(request_protocol, configuration); diff --git a/src/Common/ProxyConfigurationResolverProvider.h b/src/Common/ProxyConfigurationResolverProvider.h index ebf22f7e92a..357b218e499 100644 --- a/src/Common/ProxyConfigurationResolverProvider.h +++ b/src/Common/ProxyConfigurationResolverProvider.h @@ -33,12 +33,11 @@ public: ); private: - template static std::shared_ptr getFromSettings( + bool is_new_syntax, Protocol protocol, const String & config_prefix, - const Poco::Util::AbstractConfiguration & configuration - ); + const Poco::Util::AbstractConfiguration & configuration); }; } diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 303ffb744b5..4f883a9b4ed 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -221,7 +221,7 @@ ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP( if (iter == http_header_entries.end()) { - http_header_entries.emplace_back(user_agent, fmt::format("ClickHouse/{}", VERSION_STRING)); + http_header_entries.emplace_back(user_agent, fmt::format("ClickHouse/{}{}", VERSION_STRING, VERSION_OFFICIAL)); } if (!delay_initialization && use_external_buffer) diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 9229342b8c1..cbb61deea9f 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -972,10 +972,10 @@ PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT { auto context = Context::getGlobalContextInstance(); chassert(context); - auto proxy_configuration_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::protocolFromString(protocol), context->getConfigRef()); + auto proxy_configuration_resolver = ProxyConfigurationResolverProvider::get(ProxyConfiguration::protocolFromString(protocol), context->getConfigRef()); - auto per_request_configuration = [=] () { return proxy_configuration_resolver->resolve(); }; - auto error_report = [=] (const DB::ProxyConfiguration & req) { proxy_configuration_resolver->errorReport(req); }; + auto per_request_configuration = [=]{ return proxy_configuration_resolver->resolve(); }; + auto error_report = [=](const ProxyConfiguration & req) { proxy_configuration_resolver->errorReport(req); }; auto config = PocoHTTPClientConfiguration( per_request_configuration, diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 1cef43530e0..04982f14f36 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -1,4 +1,5 @@ #include +#include #include "config.h" #if USE_AWS_S3 @@ -17,6 +18,7 @@ #include #include #include +#include #include #include @@ -29,6 +31,7 @@ #include + static const int SUCCESS_RESPONSE_MIN = 200; static const int SUCCESS_RESPONSE_MAX = 299; @@ -84,7 +87,7 @@ namespace DB::S3 { PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( - std::function per_request_configuration_, + std::function per_request_configuration_, const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, @@ -94,7 +97,7 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( bool s3_use_adaptive_timeouts_, const ThrottlerPtr & get_request_throttler_, const ThrottlerPtr & put_request_throttler_, - std::function error_report_) + std::function error_report_) : per_request_configuration(per_request_configuration_) , force_region(force_region_) , remote_host_filter(remote_host_filter_) @@ -107,6 +110,8 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( , s3_use_adaptive_timeouts(s3_use_adaptive_timeouts_) , error_report(error_report_) { + /// This is used to identify configurations created by us. + userAgent = std::string(VERSION_FULL) + VERSION_OFFICIAL; } void PocoHTTPClientConfiguration::updateSchemeAndRegion() @@ -166,6 +171,17 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config { } +PocoHTTPClient::PocoHTTPClient(const Aws::Client::ClientConfiguration & client_configuration) + : timeouts(ConnectionTimeouts() + .withConnectionTimeout(Poco::Timespan(client_configuration.connectTimeoutMs * 1000)) + .withSendTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) + .withReceiveTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) + .withTCPKeepAliveTimeout(Poco::Timespan( + client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0))), + remote_host_filter(Context::getGlobalContextInstance()->getRemoteHostFilter()) +{ +} + std::shared_ptr PocoHTTPClient::MakeRequest( const std::shared_ptr & request, Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 88251b964e2..18a21649167 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -38,7 +38,7 @@ class PocoHTTPClient; struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration { - std::function per_request_configuration; + std::function per_request_configuration; String force_region; const RemoteHostFilter & remote_host_filter; unsigned int s3_max_redirects; @@ -54,13 +54,13 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration size_t http_keep_alive_timeout = DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT; size_t http_keep_alive_max_requests = DEFAULT_HTTP_KEEP_ALIVE_MAX_REQUEST; - std::function error_report; + std::function error_report; void updateSchemeAndRegion(); private: PocoHTTPClientConfiguration( - std::function per_request_configuration_, + std::function per_request_configuration_, const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, @@ -70,8 +70,7 @@ private: bool s3_use_adaptive_timeouts_, const ThrottlerPtr & get_request_throttler_, const ThrottlerPtr & put_request_throttler_, - std::function error_report_ - ); + std::function error_report_); /// Constructor of Aws::Client::ClientConfiguration must be called after AWS SDK initialization. friend ClientFactory; @@ -120,6 +119,7 @@ class PocoHTTPClient : public Aws::Http::HttpClient { public: explicit PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration); + explicit PocoHTTPClient(const Aws::Client::ClientConfiguration & client_configuration); ~PocoHTTPClient() override = default; std::shared_ptr MakeRequest( @@ -166,8 +166,8 @@ protected: static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request); void addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount = 1) const; - std::function per_request_configuration; - std::function error_report; + std::function per_request_configuration; + std::function error_report; ConnectionTimeouts timeouts; const RemoteHostFilter & remote_host_filter; unsigned int s3_max_redirects; diff --git a/src/IO/S3/PocoHTTPClientFactory.cpp b/src/IO/S3/PocoHTTPClientFactory.cpp index ef7af2d01ba..abec907778c 100644 --- a/src/IO/S3/PocoHTTPClientFactory.cpp +++ b/src/IO/S3/PocoHTTPClientFactory.cpp @@ -15,7 +15,10 @@ namespace DB::S3 std::shared_ptr PocoHTTPClientFactory::CreateHttpClient(const Aws::Client::ClientConfiguration & client_configuration) const { - return std::make_shared(static_cast(client_configuration)); + if (client_configuration.userAgent.starts_with("ClickHouse")) + return std::make_shared(static_cast(client_configuration)); + else /// This client is created inside the AWS SDK with default settings to obtain ECS credentials from localhost. + return std::make_shared(client_configuration); } std::shared_ptr PocoHTTPClientFactory::CreateHttpRequest( diff --git a/tests/queries/0_stateless/03170_ecs_crash.reference b/tests/queries/0_stateless/03170_ecs_crash.reference new file mode 100644 index 00000000000..acd7c60768b --- /dev/null +++ b/tests/queries/0_stateless/03170_ecs_crash.reference @@ -0,0 +1,4 @@ +1 2 3 +4 5 6 +7 8 9 +0 0 0 diff --git a/tests/queries/0_stateless/03170_ecs_crash.sh b/tests/queries/0_stateless/03170_ecs_crash.sh new file mode 100755 index 00000000000..fa6870c4cf2 --- /dev/null +++ b/tests/queries/0_stateless/03170_ecs_crash.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Previous versions crashed in attempt to use this authentication method (regardless of whether it was able to authenticate): +AWS_CONTAINER_CREDENTIALS_FULL_URI=http://localhost:1338/latest/meta-data/container/security-credentials $CLICKHOUSE_LOCAL -q "select * from s3('http://localhost:11111/test/a.tsv')" From 1df895f3dadcbb65d246927b79f42144e5fc1af2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 18 Jun 2024 14:04:40 +0200 Subject: [PATCH 022/363] Minor changes --- src/Access/SettingsConstraints.cpp | 8 ++++---- src/Interpreters/InterpreterSetQuery.cpp | 10 +++++----- src/Interpreters/InterpreterSetQuery.h | 2 +- .../03003_compatibility_setting_bad_value.sql | 1 - 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index a274f6b54f2..7506e365035 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -219,8 +219,8 @@ void SettingsConstraints::clamp(const Settings & current_settings, SettingsChang }); } -template -bool getNewValueToCheck(const T & current_settings, SettingChange & change, Field & new_value, bool throw_on_failure) +template +bool getNewValueToCheck(const SettingsT & current_settings, SettingChange & change, Field & new_value, bool throw_on_failure) { Field current_value; bool has_current_value = current_settings.tryGet(change.name, current_value); @@ -230,12 +230,12 @@ bool getNewValueToCheck(const T & current_settings, SettingChange & change, Fiel return false; if (throw_on_failure) - new_value = T::castValueUtil(change.name, change.value); + new_value = SettingsT::castValueUtil(change.name, change.value); else { try { - new_value = T::castValueUtil(change.name, change.value); + new_value = SettingsT::castValueUtil(change.name, change.value); } catch (...) { diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index cac44c7747b..15d4ba56d8d 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -46,7 +46,7 @@ static void applySettingsFromSelectWithUnion(const ASTSelectWithUnionQuery & sel // It is flattened later, when we process UNION ALL/DISTINCT. const auto * last_select = children.back()->as(); if (last_select && last_select->settings()) - InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(); + InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(false); } void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMutablePtr context_) @@ -58,7 +58,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta if (const auto * query_with_output = dynamic_cast(ast.get())) { if (query_with_output->settings_ast) - InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(); + InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(false); if (const auto * create_query = ast->as(); create_query && create_query->select) applySettingsFromSelectWithUnion(create_query->select->as(), context_); @@ -67,7 +67,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta if (const auto * select_query = ast->as()) { if (auto new_settings = select_query->settings()) - InterpreterSetQuery(new_settings, context_).executeForCurrentContext(); + InterpreterSetQuery(new_settings, context_).executeForCurrentContext(false); } else if (const auto * select_with_union_query = ast->as()) { @@ -76,7 +76,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta else if (const auto * explain_query = ast->as()) { if (explain_query->settings_ast) - InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(); + InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(false); applySettingsFromQuery(explain_query->getExplainedQuery(), context_); } @@ -84,7 +84,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta { context_->setInsertFormat(insert_query->format); if (insert_query->settings_ast) - InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(); + InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(false); } } diff --git a/src/Interpreters/InterpreterSetQuery.h b/src/Interpreters/InterpreterSetQuery.h index 2438762f347..f50105c39f4 100644 --- a/src/Interpreters/InterpreterSetQuery.h +++ b/src/Interpreters/InterpreterSetQuery.h @@ -23,7 +23,7 @@ public: /** Set setting for current context (query context). * It is used for interpretation of SETTINGS clause in SELECT query. */ - void executeForCurrentContext(bool ignore_setting_constraints = false); + void executeForCurrentContext(bool ignore_setting_constraints); bool supportsTransactions() const override { return true; } diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql index 48e98798c51..b9fbfd917fc 100644 --- a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql +++ b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql @@ -1,2 +1 @@ select 42 settings compatibility=NULL; -- {clientError BAD_ARGUMENTS} - From 11d54f4809a8b58773f13664e6b842cb6c7dce48 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 18 Jun 2024 15:18:53 +0200 Subject: [PATCH 023/363] Pass-through settings from the client --- programs/client/Client.cpp | 3 +++ programs/server/Server.cpp | 2 +- src/Access/AccessControl.cpp | 8 +++++++- src/Access/AccessControl.h | 5 ++++- src/Client/ClientBase.cpp | 1 - .../0_stateless/03003_compatibility_setting_bad_value.sql | 2 +- 6 files changed, 16 insertions(+), 5 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index efe23d57478..22a035fbd71 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1184,6 +1184,9 @@ void Client::processConfig() global_context->setQueryKindInitial(); global_context->setQuotaClientKey(config().getString("quota_key", "")); global_context->setQueryKind(query_kind); + + /// Allow to pass-through unknown settings to the server. + global_context->getAccessControl().allowAllSettings(); } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 6414f7f6ea5..2f1d07790e1 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1824,7 +1824,7 @@ try auto & access_control = global_context->getAccessControl(); try { - access_control.setUpFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); }); + access_control.setupFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); }); } catch (...) { diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index c3bb42160ad..9831621d6ac 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -264,7 +264,7 @@ AccessControl::AccessControl() AccessControl::~AccessControl() = default; -void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, +void AccessControl::setupFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, const zkutil::GetZooKeeper & get_zookeeper_function_) { if (config_.has("custom_settings_prefixes")) @@ -852,4 +852,10 @@ const ExternalAuthenticators & AccessControl::getExternalAuthenticators() const return *external_authenticators; } + +void AccessControl::allowAllSettings() +{ + custom_settings_prefixes->registerPrefixes({""}); +} + } diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index d1537219a06..f408f6dfb0d 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -54,7 +54,7 @@ public: ~AccessControl() override; /// Initializes access storage (user directories). - void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, + void setupFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, const zkutil::GetZooKeeper & get_zookeeper_function_); /// Parses access entities from a configuration loaded from users.xml. @@ -235,6 +235,9 @@ public: /// Gets manager of notifications. AccessChangesNotifier & getChangesNotifier(); + /// Allow all setting names - this can be used in clients to pass-through unknown settings to the server. + void allowAllSettings(); + private: class ContextAccessCache; class CustomSettingsPrefixes; diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 958a1f50813..617a56cfd95 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2958,7 +2958,6 @@ void ClientBase::init(int argc, char ** argv) boost::replace_all(arg, "−", "--"); } - OptionsDescription options_description; options_description.main_description.emplace(createOptionsDescription("Main options", terminal_width)); diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql index b9fbfd917fc..3a09eec7452 100644 --- a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql +++ b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql @@ -1 +1 @@ -select 42 settings compatibility=NULL; -- {clientError BAD_ARGUMENTS} +select 42 settings compatibility=NULL; -- {clientError BAD_GET} From 16c3e36b5a2f3203eb87161f4320ea5e70865fc4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Jun 2024 22:50:52 +0200 Subject: [PATCH 024/363] Simplification --- src/Client/ClientBase.cpp | 142 +++++++++++++++++------------------ src/Client/ClientBase.h | 1 + src/Interpreters/Session.cpp | 8 +- src/Interpreters/Session.h | 3 +- 4 files changed, 74 insertions(+), 80 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 617a56cfd95..490a560de2d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -439,7 +439,7 @@ void ClientBase::sendExternalTables(ASTPtr parsed_query) std::vector data; for (auto & table : external_tables) - data.emplace_back(table.getData(global_context)); + data.emplace_back(table.getData(query_context)); connection->sendExternalTablesData(data); } @@ -652,10 +652,10 @@ try /// intermixed with data with parallel formatting. /// It may increase code complexity significantly. if (!extras_into_stdout || select_only_into_file) - output_format = global_context->getOutputFormatParallelIfPossible( + output_format = query_context->getOutputFormatParallelIfPossible( current_format, out_file_buf ? *out_file_buf : *out_buf, block); else - output_format = global_context->getOutputFormat( + output_format = query_context->getOutputFormat( current_format, out_file_buf ? *out_file_buf : *out_buf, block); output_format->setAutoFlush(); @@ -949,7 +949,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) /// But for asynchronous inserts we don't extract data, because it's needed /// to be done on server side in that case (for coalescing the data from multiple inserts on server side). const auto * insert = parsed_query->as(); - if (insert && isSyncInsertWithData(*insert, global_context)) + if (insert && isSyncInsertWithData(*insert, query_context)) query_to_execute = full_query.substr(0, insert->data - full_query.data()); else query_to_execute = full_query; @@ -1067,7 +1067,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa } } - const auto & settings = global_context->getSettingsRef(); + const auto & settings = query_context->getSettingsRef(); const Int32 signals_before_stop = settings.partial_result_on_first_cancel ? 2 : 1; int retries_left = 10; @@ -1082,10 +1082,10 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa connection_parameters.timeouts, query, query_parameters, - global_context->getCurrentQueryId(), + query_context->getCurrentQueryId(), query_processing_stage, - &global_context->getSettingsRef(), - &global_context->getClientInfo(), + &query_context->getSettingsRef(), + &query_context->getClientInfo(), true, [&](const Progress & progress) { onProgress(progress); }); @@ -1275,7 +1275,7 @@ void ClientBase::onProgress(const Progress & value) void ClientBase::onTimezoneUpdate(const String & tz) { - global_context->setSetting("session_timezone", tz); + query_context->setSetting("session_timezone", tz); } @@ -1471,13 +1471,13 @@ bool ClientBase::receiveSampleBlock(Block & out, ColumnsDescription & columns_de void ClientBase::setInsertionTable(const ASTInsertQuery & insert_query) { - if (!global_context->hasInsertionTable() && insert_query.table) + if (!query_context->hasInsertionTable() && insert_query.table) { String table = insert_query.table->as().shortName(); if (!table.empty()) { String database = insert_query.database ? insert_query.database->as().shortName() : ""; - global_context->setInsertionTable(StorageID(database, table)); + query_context->setInsertionTable(StorageID(database, table)); } } } @@ -1528,7 +1528,7 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars const auto & parsed_insert_query = parsed_query->as(); if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && !isStdinNotEmptyAndValid(std_in)))) { - const auto & settings = global_context->getSettingsRef(); + const auto & settings = query_context->getSettingsRef(); if (settings.throw_if_no_data_to_insert) throw Exception(ErrorCodes::NO_DATA_TO_INSERT, "No data to insert"); else @@ -1542,10 +1542,10 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars connection_parameters.timeouts, query, query_parameters, - global_context->getCurrentQueryId(), + query_context->getCurrentQueryId(), query_processing_stage, - &global_context->getSettingsRef(), - &global_context->getClientInfo(), + &query_context->getSettingsRef(), + &query_context->getClientInfo(), true, [&](const Progress & progress) { onProgress(progress); }); @@ -1593,7 +1593,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des /// Set callback to be called on file progress. if (tty_buf) - progress_indication.setFileProgressCallback(global_context, *tty_buf); + progress_indication.setFileProgressCallback(query_context, *tty_buf); } /// If data fetched from file (maybe compressed file) @@ -1627,10 +1627,10 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des } StorageFile::CommonArguments args{ - WithContext(global_context), + WithContext(query_context), parsed_insert_query->table_id, current_format, - getFormatSettings(global_context), + getFormatSettings(query_context), compression_method, columns_for_storage_file, ConstraintsDescription{}, @@ -1638,7 +1638,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des {}, String{}, }; - StoragePtr storage = std::make_shared(in_file, global_context->getUserFilesPath(), args); + StoragePtr storage = std::make_shared(in_file, query_context->getUserFilesPath(), args); storage->startup(); SelectQueryInfo query_info; @@ -1647,18 +1647,18 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des auto metadata = storage->getInMemoryMetadataPtr(); QueryPlan plan; storage->read( - plan, - sample.getNames(), - storage->getStorageSnapshot(metadata, global_context), - query_info, - global_context, - {}, - global_context->getSettingsRef().max_block_size, - getNumberOfPhysicalCPUCores()); + plan, + sample.getNames(), + storage->getStorageSnapshot(metadata, query_context), + query_info, + query_context, + {}, + query_context->getSettingsRef().max_block_size, + getNumberOfPhysicalCPUCores()); auto builder = plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(global_context), - BuildQueryPipelineSettings::fromContext(global_context)); + QueryPlanOptimizationSettings::fromContext(query_context), + BuildQueryPipelineSettings::fromContext(query_context)); QueryPlanResourceHolder resources; auto pipe = QueryPipelineBuilder::getPipe(std::move(*builder), resources); @@ -1719,14 +1719,14 @@ void ClientBase::sendDataFrom(ReadBuffer & buf, Block & sample, const ColumnsDes current_format = insert->format; } - auto source = global_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size); + auto source = query_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size); Pipe pipe(source); if (columns_description.hasDefaults()) { pipe.addSimpleTransform([&](const Block & header) { - return std::make_shared(header, columns_description, *source, global_context); + return std::make_shared(header, columns_description, *source, query_context); }); } @@ -1872,6 +1872,9 @@ void ClientBase::cancelQuery() void ClientBase::processParsedSingleQuery(const String & full_query, const String & query_to_execute, ASTPtr parsed_query, std::optional echo_query_, bool report_error) { + query_context = Context::createCopy(global_context); + query_context->makeQueryContext(); + resetOutput(); have_error = false; cancelled = false; @@ -1888,12 +1891,12 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (is_interactive) { - global_context->setCurrentQueryId(""); + query_context->setCurrentQueryId(""); // Generate a new query_id for (const auto & query_id_format : query_id_formats) { writeString(query_id_format.first, std_out); - writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", global_context->getCurrentQueryId())), std_out); + writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", query_context->getCurrentQueryId())), std_out); writeChar('\n', std_out); std_out.next(); } @@ -1920,7 +1923,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin auto password = auth_data->getPassword(); if (password) - global_context->getAccessControl().checkPasswordComplexityRules(*password); + query_context->getAccessControl().checkPasswordComplexityRules(*password); } } } @@ -1930,47 +1933,40 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin progress_indication.resetProgress(); profile_events.watch.restart(); + /// Apply query settings to context, as they can affect the behavior on client-side. + InterpreterSetQuery::applySettingsFromQuery(parsed_query, query_context); + + if (!connection->checkConnected(connection_parameters.timeouts)) + connect(); + + ASTPtr input_function; + const auto * insert = parsed_query->as(); + if (insert && insert->select) + insert->tryFindInputFunction(input_function); + + bool is_async_insert_with_inlined_data = query_context->getSettingsRef().async_insert && insert && insert->hasInlinedData(); + + if (is_async_insert_with_inlined_data) { - /// Temporarily apply query settings to context. - Settings old_settings = global_context->getSettings(); - SCOPE_EXIT_SAFE({ - global_context->setSettings(old_settings); - }); + bool have_data_in_stdin = !is_interactive && !stdin_is_a_tty && isStdinNotEmptyAndValid(std_in); + bool have_external_data = have_data_in_stdin || insert->infile; - InterpreterSetQuery::applySettingsFromQuery(parsed_query, global_context); - - if (!connection->checkConnected(connection_parameters.timeouts)) - connect(); - - ASTPtr input_function; - const auto * insert = parsed_query->as(); - if (insert && insert->select) - insert->tryFindInputFunction(input_function); - - bool is_async_insert_with_inlined_data = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData(); - - if (is_async_insert_with_inlined_data) - { - bool have_data_in_stdin = !is_interactive && !stdin_is_a_tty && isStdinNotEmptyAndValid(std_in); - bool have_external_data = have_data_in_stdin || insert->infile; - - if (have_external_data) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Processing async inserts with both inlined and external data (from stdin or infile) is not supported"); - } - - /// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately. - if (insert && (!insert->select || input_function) && !is_async_insert_with_inlined_data) - { - if (input_function && insert->format.empty()) - throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()"); - - processInsertQuery(query_to_execute, parsed_query); - } - else - processOrdinaryQuery(query_to_execute, parsed_query); + if (have_external_data) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Processing async inserts with both inlined and external data (from stdin or infile) is not supported"); } + /// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately. + if (insert && (!insert->select || input_function) && !is_async_insert_with_inlined_data) + { + if (input_function && insert->format.empty()) + throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()"); + + processInsertQuery(query_to_execute, parsed_query); + } + else + processOrdinaryQuery(query_to_execute, parsed_query); + /// Do not change context (current DB, settings) in case of an exception. if (!have_error) { @@ -2651,10 +2647,8 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name) if (!has_log_comment) { - Settings settings = global_context->getSettings(); /// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]" - settings.log_comment = fs::absolute(fs::path(file_name)); - global_context->setSettings(settings); + global_context->setSetting("log_comment", String(fs::absolute(fs::path(file_name)))); } return executeMultiQuery(queries_from_file); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 220fcddc038..228a9d65ea7 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -198,6 +198,7 @@ protected: /// since other members can use them. SharedContextHolder shared_context; ContextMutablePtr global_context; + ContextMutablePtr query_context; bool is_interactive = false; /// Use either interactive line editing interface or batch mode. bool is_multiquery = false; diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 396562189e0..9dd686290db 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -532,7 +532,7 @@ ContextMutablePtr Session::makeSessionContext() session_context->checkSettingsConstraints(settings_from_auth_server, SettingSource::QUERY); session_context->applySettingsChanges(settings_from_auth_server); - recordLoginSucess(session_context); + recordLoginSuccess(session_context); return session_context; } @@ -596,7 +596,7 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: { session_name_ }, max_sessions_for_user); - recordLoginSucess(session_context); + recordLoginSuccess(session_context); return session_context; } @@ -672,13 +672,13 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t user = query_context->getUser(); /// Interserver does not create session context - recordLoginSucess(query_context); + recordLoginSuccess(query_context); return query_context; } -void Session::recordLoginSucess(ContextPtr login_context) const +void Session::recordLoginSuccess(ContextPtr login_context) const { if (notified_session_log_about_login) return; diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 14f6f806acd..fc41c78e666 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -102,8 +102,7 @@ public: private: std::shared_ptr getSessionLog() const; ContextMutablePtr makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const; - void recordLoginSucess(ContextPtr login_context) const; - + void recordLoginSuccess(ContextPtr login_context) const; mutable bool notified_session_log_about_login = false; const UUID auth_id; From fa5d4cfea183c64e3ff088f922c0960a3c3951e0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 20 Jun 2024 00:41:14 +0200 Subject: [PATCH 025/363] Fix error --- src/Client/ClientBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 490a560de2d..8fcb9632be0 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -949,7 +949,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) /// But for asynchronous inserts we don't extract data, because it's needed /// to be done on server side in that case (for coalescing the data from multiple inserts on server side). const auto * insert = parsed_query->as(); - if (insert && isSyncInsertWithData(*insert, query_context)) + if (insert && isSyncInsertWithData(*insert, global_context)) query_to_execute = full_query.substr(0, insert->data - full_query.data()); else query_to_execute = full_query; From e064171a68fcac110d27ff36a51b7a6bb4fbb251 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jun 2024 03:58:11 +0200 Subject: [PATCH 026/363] Fix errors --- programs/client/Client.cpp | 10 ---------- programs/local/LocalServer.cpp | 3 --- src/Client/ClientBase.cpp | 4 +++- src/Client/LocalConnection.cpp | 1 - src/Client/LocalConnection.h | 2 -- .../0_stateless/00857_global_joinsavel_table_alias.sql | 1 - 6 files changed, 3 insertions(+), 18 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 22a035fbd71..ab02d9fac74 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include #include @@ -311,7 +310,6 @@ int Client::main(const std::vector & /*args*/) try { UseSSL use_ssl; - auto & thread_status = MainThreadStatus::getInstance(); setupSignalHandler(); std::cout << std::fixed << std::setprecision(3); @@ -326,14 +324,6 @@ try initTTYBuffer(toProgressOption(config().getString("progress", "default"))); ASTAlterCommand::setFormatAlterCommandsWithParentheses(true); - { - // All that just to set DB::CurrentThread::get().getGlobalContext() - // which is required for client timezone (pushed from server) to work. - auto thread_group = std::make_shared(); - const_cast(thread_group->global_context) = global_context; - thread_status.attachToGroup(thread_group, false); - } - /// Includes delayed_interactive. if (is_interactive) { diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index cb1c35743b2..e5f4bac852c 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -27,10 +27,8 @@ #include #include #include -#include #include #include -#include #include #include #include @@ -48,7 +46,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 38aa0ed8b14..03f088f2b61 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1872,7 +1872,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin ASTPtr parsed_query, std::optional echo_query_, bool report_error) { query_context = Context::createCopy(global_context); - query_context->makeQueryContext(); + CurrentThread::QueryScope query_scope(query_context); resetOutput(); have_error = false; @@ -2926,6 +2926,8 @@ void ClientBase::init(int argc, char ** argv) /// Don't parse options with Poco library, we prefer neat boost::program_options. stopOptionsProcessing(); + MainThreadStatus::getInstance(); + stdin_is_a_tty = isatty(STDIN_FILENO); stdout_is_a_tty = isatty(STDOUT_FILENO); stderr_is_a_tty = isatty(STDERR_FILENO); diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index c7494e31605..e63e5793505 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -125,7 +125,6 @@ void LocalConnection::sendQuery( state->query_id = query_id; state->query = query; - state->query_scope_holder = std::make_unique(query_context); state->stage = QueryProcessingStage::Enum(stage); state->profile_queue = std::make_shared(std::numeric_limits::max()); CurrentThread::attachInternalProfileEventsQueue(state->profile_queue); diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 899d134cce5..bdd0b481529 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -61,8 +61,6 @@ struct LocalQueryState /// Time after the last check to stop the request and send the progress. Stopwatch after_send_progress; Stopwatch after_send_profile_events; - - std::unique_ptr query_scope_holder; }; diff --git a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql index 2044a9b8d22..092b071cb48 100644 --- a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql +++ b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql @@ -1,4 +1,3 @@ - DROP TABLE IF EXISTS local_table; DROP TABLE IF EXISTS other_table; From 84f81c61853f34d765475309932d73af55e25d0f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jun 2024 17:18:32 +0200 Subject: [PATCH 027/363] Fix error --- src/Client/Suggest.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index 0188ebc8173..c1f163939e8 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -96,6 +96,10 @@ void Suggest::load(ContextPtr context, const ConnectionParameters & connection_p loading_thread = std::thread([my_context = Context::createCopy(context), connection_parameters, suggestion_limit, this] { ThreadStatus thread_status; + my_context->makeQueryContext(); + auto group = ThreadGroup::createForQuery(my_context); + CurrentThread::attachToGroup(group); + for (size_t retry = 0; retry < 10; ++retry) { try From 602fa5cbadba2924bc4e57f26f5f37b00d7b086e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jun 2024 17:28:54 +0200 Subject: [PATCH 028/363] Fix error --- programs/local/LocalServer.cpp | 1 - src/Client/ClientBase.h | 3 --- 2 files changed, 4 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index e5f4bac852c..45641b999b6 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -428,7 +428,6 @@ int LocalServer::main(const std::vector & /*args*/) try { UseSSL use_ssl; - thread_status.emplace(); StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true)); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 228a9d65ea7..83b99696373 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -244,9 +244,6 @@ protected: Settings cmd_settings; MergeTreeSettings cmd_merge_tree_settings; - /// thread status should be destructed before shared context because it relies on process list. - std::optional thread_status; - ServerConnectionPtr connection; ConnectionParameters connection_parameters; From e0ef26285a5cda87e5073c6a0aaecb67f9609a96 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jun 2024 19:46:00 +0200 Subject: [PATCH 029/363] Update submodule --- contrib/aws | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/aws b/contrib/aws index 1c2946bfcb7..6463c9cbf47 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit 1c2946bfcb7f1e3ae0a858de0b59d4f1a7b4ccaf +Subproject commit 6463c9cbf47cab78e4a4fa97a866942f201c6a58 From 9948150b87c6ee4531e0130de095035e2f228ec1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jun 2024 00:19:20 +0200 Subject: [PATCH 030/363] Fix error --- src/IO/S3/PocoHTTPClient.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 18a21649167..3b7ec4d1d9c 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -172,7 +172,7 @@ protected: const RemoteHostFilter & remote_host_filter; unsigned int s3_max_redirects; bool s3_use_adaptive_timeouts = true; - bool enable_s3_requests_logging; + bool enable_s3_requests_logging = false; bool for_disk_s3; /// Limits get request per second rate for GET, SELECT and all other requests, excluding throttled by put throttler From 384aa9feb90bbf95c5bc0e5498af4aca769c2531 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:51:20 +0200 Subject: [PATCH 031/363] Move setting to 24.7 changes --- src/Core/SettingsChangesHistory.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 1ab7dc69f60..deaeba2a7de 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -86,6 +86,8 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static const std::map settings_changes_history = { + {"24.7", {{"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"}, + }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, @@ -97,7 +99,6 @@ static const std::map Date: Tue, 25 Jun 2024 02:49:40 +0200 Subject: [PATCH 032/363] Update submodule --- contrib/aws | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/aws b/contrib/aws index 1c2946bfcb7..d5450d76abd 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit 1c2946bfcb7f1e3ae0a858de0b59d4f1a7b4ccaf +Subproject commit d5450d76abda556ce145ddabe7e0cc6a7644ec59 From 49634db3ba2961dadbdc1689f0a4ef1ecdb8bea1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 25 Jun 2024 02:51:57 +0200 Subject: [PATCH 033/363] Update submodule --- contrib/aws-crt-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/aws-crt-cpp b/contrib/aws-crt-cpp index f532d6abc0d..e5aa45cacfd 160000 --- a/contrib/aws-crt-cpp +++ b/contrib/aws-crt-cpp @@ -1 +1 @@ -Subproject commit f532d6abc0d2b0d8b5d6fe9e7c51eaedbe4afbd0 +Subproject commit e5aa45cacfdcda7719ead38760e7c61076f5745f From d7b3c3e8a97835c9f7987a5852ef9469770f8560 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2024 21:59:04 +0200 Subject: [PATCH 034/363] Add review suggestion --- src/Interpreters/InterpreterSetQuery.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index 15d4ba56d8d..2ae35c4313b 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -46,7 +46,7 @@ static void applySettingsFromSelectWithUnion(const ASTSelectWithUnionQuery & sel // It is flattened later, when we process UNION ALL/DISTINCT. const auto * last_select = children.back()->as(); if (last_select && last_select->settings()) - InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(false); + InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(/* ignore_setting_constraints= */ false); } void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMutablePtr context_) @@ -58,7 +58,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta if (const auto * query_with_output = dynamic_cast(ast.get())) { if (query_with_output->settings_ast) - InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(false); + InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); if (const auto * create_query = ast->as(); create_query && create_query->select) applySettingsFromSelectWithUnion(create_query->select->as(), context_); @@ -67,7 +67,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta if (const auto * select_query = ast->as()) { if (auto new_settings = select_query->settings()) - InterpreterSetQuery(new_settings, context_).executeForCurrentContext(false); + InterpreterSetQuery(new_settings, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); } else if (const auto * select_with_union_query = ast->as()) { @@ -76,7 +76,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta else if (const auto * explain_query = ast->as()) { if (explain_query->settings_ast) - InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(false); + InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); applySettingsFromQuery(explain_query->getExplainedQuery(), context_); } @@ -84,7 +84,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta { context_->setInsertFormat(insert_query->format); if (insert_query->settings_ast) - InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(false); + InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); } } From b14823d07ee1c059d2f278c0cc4068474c79ad2a Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 3 Jul 2024 09:33:57 +0200 Subject: [PATCH 035/363] Update SettingsChangesHistory.cpp --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 7719fe1e837..828031f4c23 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -60,6 +60,7 @@ static std::initializer_list Date: Mon, 22 Jul 2024 05:46:53 +0200 Subject: [PATCH 036/363] What if we tighten limits for functional tests? --- tests/config/install.sh | 1 + tests/config/users.d/limits.xml | 8 ++++++++ 2 files changed, 9 insertions(+) create mode 100644 tests/config/users.d/limits.xml diff --git a/tests/config/install.sh b/tests/config/install.sh index 1b0edc5fc16..265b9248f4a 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -93,6 +93,7 @@ ln -sf $SRC_PATH/users.d/prefetch_settings.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/nonconst_timezone.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/allow_introspection_functions.yaml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/replicated_ddl_entry.xml $DEST_SERVER_PATH/users.d/ +ln -sf $SRC_PATH/users.d/limits.xml $DEST_SERVER_PATH/users.d/ if [[ -n "$USE_OLD_ANALYZER" ]] && [[ "$USE_OLD_ANALYZER" -eq 1 ]]; then ln -sf $SRC_PATH/users.d/analyzer.xml $DEST_SERVER_PATH/users.d/ diff --git a/tests/config/users.d/limits.xml b/tests/config/users.d/limits.xml new file mode 100644 index 00000000000..f44c73241ab --- /dev/null +++ b/tests/config/users.d/limits.xml @@ -0,0 +1,8 @@ + + + + 5G + 20000000 + + + From 8c264230e30bf97f6bac999401cd594b31e09977 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 22 Jul 2024 07:20:33 +0200 Subject: [PATCH 037/363] Loosen the limit --- docker/test/stateful/run.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 857385f4715..72a8f31ab71 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -118,8 +118,8 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] clickhouse-client --query "CREATE TABLE test.hits AS datasets.hits_v1" clickhouse-client --query "CREATE TABLE test.visits AS datasets.visits_v1" - clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1" - clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1" + clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1" + clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1" clickhouse-client --query "DROP TABLE datasets.hits_v1" clickhouse-client --query "DROP TABLE datasets.visits_v1" @@ -191,16 +191,16 @@ else ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" - clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC" clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC" else clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" fi - clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0" fi clickhouse-client --query "SHOW TABLES FROM test" From 4978869d2f709a9ad93cefc04cda43bcc739fb22 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 23 Jul 2024 18:04:38 +0800 Subject: [PATCH 038/363] stash --- src/Functions/FunctionOverlay.cpp | 481 ++++++++++++++++++++++++++++++ 1 file changed, 481 insertions(+) create mode 100644 src/Functions/FunctionOverlay.cpp diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/FunctionOverlay.cpp new file mode 100644 index 00000000000..6160335ad79 --- /dev/null +++ b/src/Functions/FunctionOverlay.cpp @@ -0,0 +1,481 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int ILLEGAL_COLUMN; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int ZERO_ARRAY_OR_TUPLE_INDEX; +} + +namespace +{ + +/// If 'is_utf8' - measure offset and length in code points instead of bytes. +/// Syntax: overlay(input, replace, offset[, length]) +template +class FunctionOverlay : public IFunction +{ +public: + static constexpr auto name = is_utf8 ? "OverlayUTF8" : "Overlay"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return name; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const size_t number_of_arguments = arguments.size(); + if (number_of_arguments < 3 || number_of_arguments > 4) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: " + "passed {}, should be 3 or 4", + getName(), + number_of_arguments); + + /// first argument is string + if (!isString(arguments[0])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected String", + arguments[0]->getName(), + getName()); + + /// second argument is string + if (!isString(arguments[1])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}, expected String", + arguments[1]->getName(), + getName()); + + if (!isNativeNumber(arguments[2])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of third argument of function {}, expected (U)Int8|16|32|64", + arguments[2]->getName(), + getName()); + + if (number_of_arguments == 4 && !isNativeNumber(arguments[3])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}, expected (U)Int8|16|32|64", + arguments[3]->getName(), + getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const size_t number_of_arguments = arguments.size(); + + ColumnPtr column_string = arguments[0].column; + ColumnPtr column_offset = arguments[1].column; + ColumnPtr column_length; + if (number_of_arguments == 3) + column_length = arguments[2].column; + + const ColumnConst * column_offset_const = checkAndGetColumn(column_offset.get()); + const ColumnConst * column_length_const = nullptr; + if (number_of_arguments == 3) + column_length_const = checkAndGetColumn(column_length.get()); + + Int64 offset = 0; + Int64 length = 0; + if (column_offset_const) + offset = column_offset_const->getInt(0); + if (column_length_const) + length = column_length_const->getInt(0); + + auto res_col = ColumnString::create(); + auto & res_data = res_col->getChars(); + auto & res_offsets = res_col->getOffsets(); + } + +private: +template + void constantConstant( + size_t rows, + const StringRef & input, + const StringRef & replace, + const ColumnPtr & column_offset, + const ColumnPtr & column_length, + Int64 const_offset, + Int64 const_length, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + if (!three_args && length_is_const && const_length < 0) + { + constantConstant(input, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); + return; + } + + Int64 offset = 0; // start from 1, maybe negative + size_t valid_offset = 0; // start from 0, not negative + if constexpr (offset_is_const) + { + offset = const_offset; + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + size_t replace_size = replace.size; + Int64 length = 0; // maybe negative + size_t valid_length = 0; // not negative + if constexpr (!three_args && length_is_const) + { + assert(const_length >= 0); + valid_length = const_length; + } + else if constexpr (three_args) + { + valid_length = replace_size; + } + + size_t res_offset = 0; + size_t input_size = input.size; + for (size_t i = 0; i < rows; ++i) + { + if constexpr (!offset_is_const) + { + offset = column_offset->getInt(i); + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + if constexpr (!three_args && !length_is_const) + { + length = column_length->getInt(i); + valid_length = length >= 0 ? length : replace_size; + } + + size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size); + res_offset += suffix_size; + } + + /// add zero terminator + res_data[res_offset] = 0; + ++res_offset; + + res_offsets[i] = res_offset; + } + } + + template + void vectorConstant( + const ColumnString::Chars & input_data, + const ColumnString::Offsets & input_offsets, + const StringRef & replace, + const ColumnPtr & column_offset, + const ColumnPtr & column_length, + Int64 const_offset, + Int64 const_length, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + if (!three_args && length_is_const && const_length < 0) + { + vectorConstant(input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); + return; + } + + Int64 offset = 0; // start from 1, maybe negative + size_t valid_offset = 0; // start from 0, not negative + if constexpr (offset_is_const) + { + offset = const_offset; + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + size_t replace_size = replace.size; + Int64 length = 0; // maybe negative + size_t valid_length = 0; // not negative + if constexpr (!three_args && length_is_const) + { + assert(const_length >= 0); + valid_length = const_length; + } + else if constexpr (three_args) + { + valid_length = replace_size; + } + + size_t rows = input_offsets.size(); + size_t res_offset = 0; + for (size_t i = 0; i < rows; ++i) + { + size_t input_offset = input_offsets[i - 1]; + size_t input_size = input_offsets[i] - input_offsets[i - 1] - 1; + + if constexpr (!offset_is_const) + { + offset = column_offset->getInt(i); + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + if constexpr (!three_args && !length_is_const) + { + length = column_length->getInt(i); + valid_length = length >= 0 ? length : replace_size; + } + + size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15( + &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size); + res_offset += suffix_size; + } + + /// add zero terminator + res_data[res_offset] = 0; + ++res_offset; + + res_offsets[i] = res_offset; + } + } + + template + void constantVector( + const StringRef & input, + const ColumnString::Chars & replace_data, + const ColumnString::Offsets & replace_offsets, + const ColumnPtr & column_offset, + const ColumnPtr & column_length, + Int64 const_offset, + Int64 const_length, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + if (!three_args && length_is_const && const_length < 0) + { + constantVector(input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets); + return; + } + + Int64 offset = 0; // start from 1, maybe negative + size_t valid_offset = 0; // start from 0, not negative + if constexpr (offset_is_const) + { + offset = const_offset; + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + Int64 length = 0; // maybe negative + size_t valid_length = 0; // not negative + if constexpr (!three_args && length_is_const) + { + assert(const_length >= 0); + valid_length = const_length; + } + + size_t rows = replace_offsets.size(); + size_t input_size = input.size; + size_t res_offset = 0; + for (size_t i = 0; i < rows; ++i) + { + size_t replace_offset = replace_offsets[i - 1]; + size_t replace_size = replace_offsets[i] - replace_offsets[i - 1] - 1; + + if constexpr (!offset_is_const) + { + offset = column_offset->getInt(i); + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + if constexpr (three_args) + { + // length = replace_size; + valid_length = replace_size; + } + else if constexpr (!length_is_const) + { + length = column_length->getInt(i); + valid_length = length >= 0 ? length : replace_size; + } + + size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size); + res_offset += suffix_size; + } + + /// add zero terminator + res_data[res_offset] = 0; + ++res_offset; + + res_offsets[i] = res_offset; + } + } + + template + void vectorVector( + const ColumnString::Chars & input_data, + const ColumnString::Offsets & input_offsets, + const ColumnString::Chars & replace_data, + const ColumnString::Offsets & replace_offsets, + const ColumnPtr & column_offset, + const ColumnPtr & column_length, + Int64 const_offset, + Int64 const_length, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + if (!three_args && length_is_const && const_length < 0) + { + vectorVector( + input_data, + input_offsets, + replace_data, + replace_offsets, + column_offset, + column_length, + const_offset, + -1, + res_data, + res_offsets); + return; + } + + + Int64 offset = 0; // start from 1, maybe negative + size_t valid_offset = 0; // start from 0, not negative + if constexpr (offset_is_const) + { + offset = const_offset; + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + Int64 length = 0; // maybe negative + size_t valid_length = 0; // not negative + if constexpr (!three_args && length_is_const) + { + assert(const_length >= 0); + valid_length = const_length; + } + + size_t rows = input_offsets.size(); + size_t res_offset = 0; + for (size_t i = 0; i < rows; ++i) + { + size_t input_offset = input_offsets[i - 1]; + size_t input_size = input_offsets[i] - input_offsets[i - 1] - 1; + size_t replace_offset = replace_offsets[i - 1]; + size_t replace_size = replace_offsets[i] - replace_offsets[i - 1] - 1; + + if constexpr (!offset_is_const) + { + offset = column_offset->getInt(i); + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + if constexpr (three_args) + { + // length = replace_size; + valid_length = replace_size; + } + else if constexpr (!length_is_const) + { + length = column_length->getInt(i); + valid_length = length >= 0 ? length : replace_size; + } + + size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15( + &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size); + res_offset += suffix_size; + } + + /// add zero terminator + res_data[res_offset] = 0; + ++res_offset; + + res_offsets[i] = res_offset; + } + } +}; + +} + +REGISTER_FUNCTION(Overlay) +{ + factory.registerFunction>({}, FunctionFactory::CaseInsensitive); + factory.registerFunction>({}, FunctionFactory::CaseSensitive); +} + +} From 81688e0efdf75a6a3923d6b95f09579d37e93e2a Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 23 Jul 2024 20:29:35 +0800 Subject: [PATCH 039/363] almost finish --- src/Functions/FunctionOverlay.cpp | 154 ++++++++++++++++++++++++++---- 1 file changed, 134 insertions(+), 20 deletions(-) diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/FunctionOverlay.cpp index 6160335ad79..65af4d811f5 100644 --- a/src/Functions/FunctionOverlay.cpp +++ b/src/Functions/FunctionOverlay.cpp @@ -15,10 +15,8 @@ namespace DB namespace ErrorCodes { -extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -extern const int ZERO_ARRAY_OR_TUPLE_INDEX; } namespace @@ -86,32 +84,145 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const size_t number_of_arguments = arguments.size(); + bool three_args = number_of_arguments == 3; - ColumnPtr column_string = arguments[0].column; - ColumnPtr column_offset = arguments[1].column; + ColumnPtr column_offset = arguments[2].column; ColumnPtr column_length; - if (number_of_arguments == 3) - column_length = arguments[2].column; + if (!three_args) + column_length = arguments[3].column; const ColumnConst * column_offset_const = checkAndGetColumn(column_offset.get()); const ColumnConst * column_length_const = nullptr; - if (number_of_arguments == 3) + if (!three_args) column_length_const = checkAndGetColumn(column_length.get()); - Int64 offset = 0; - Int64 length = 0; + bool offset_is_const = false; + bool length_is_const = false; + Int64 offset = -1; + Int64 length = -1; if (column_offset_const) + { offset = column_offset_const->getInt(0); + offset_is_const = true; + } + if (column_length_const) + { length = column_length_const->getInt(0); + length_is_const = true; + } + auto res_col = ColumnString::create(); auto & res_data = res_col->getChars(); auto & res_offsets = res_col->getOffsets(); + res_offsets.resize_exact(input_rows_count); + + ColumnPtr column_input = arguments[0].column; + ColumnPtr column_replace = arguments[1].column; + + const auto * column_input_const = checkAndGetColumn(column_input.get()); + const auto * column_input_string = checkAndGetColumn(column_input.get()); + if (column_input_const) + { + StringRef input = column_input_const->getDataAt(0); + res_data.reserve(input.size * input_rows_count); + } + else + { + res_data.reserve(column_input_string->getChars().size()); + } + + const auto * column_replace_const = checkAndGetColumn(column_replace.get()); + const auto * column_replace_string = checkAndGetColumn(column_replace.get()); + bool input_is_const = column_input_const != nullptr; + bool replace_is_const = column_replace_const != nullptr; + +#define OVERLAY_EXECUTE_CASE(THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST) \ + if (input_is_const && replace_is_const) \ + constantConstant( \ + input_rows_count, \ + column_input_const->getDataAt(0), \ + column_replace_const->getDataAt(0), \ + column_offset, \ + column_length, \ + offset, \ + length, \ + res_data, \ + res_offsets); \ + else if (input_is_const) \ + constantVector( \ + column_input_const->getDataAt(0), \ + column_replace_string->getChars(), \ + column_replace_string->getOffsets(), \ + column_offset, \ + column_length, \ + offset, \ + length, \ + res_data, \ + res_offsets); \ + else if (replace_is_const) \ + vectorConstant( \ + column_input_string->getChars(), \ + column_input_string->getOffsets(), \ + column_replace_const->getDataAt(0), \ + column_offset, \ + column_length, \ + offset, \ + length, \ + res_data, \ + res_offsets); \ + else \ + vectorVector( \ + column_input_string->getChars(), \ + column_input_string->getOffsets(), \ + column_replace_string->getChars(), \ + column_replace_string->getOffsets(), \ + column_offset, \ + column_length, \ + offset, \ + length, \ + res_data, \ + res_offsets); + + if (three_args) + { + if (offset_is_const) + { + OVERLAY_EXECUTE_CASE(true, true, false) + } + else + { + OVERLAY_EXECUTE_CASE(true, false, false) + } + } + else + { + if (offset_is_const && length_is_const) + { + OVERLAY_EXECUTE_CASE(false, true, true) + } + else if (offset_is_const && !length_is_const) + { + OVERLAY_EXECUTE_CASE(false, true, false) + } + else if (!offset_is_const && length_is_const) + { + OVERLAY_EXECUTE_CASE(false, false, true) + } + else + { + OVERLAY_EXECUTE_CASE(false, false, false) + } + } +#undef OVERLAY_EXECUTE_CASE + + return res_col; } + private: -template + template void constantConstant( size_t rows, const StringRef & input, @@ -121,11 +232,12 @@ template ( + rows, input, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); return; } @@ -194,7 +306,7 @@ template + template void vectorConstant( const ColumnString::Chars & input_data, const ColumnString::Offsets & input_offsets, @@ -204,11 +316,12 @@ template ( + input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); return; } @@ -281,7 +394,7 @@ template + template void constantVector( const StringRef & input, const ColumnString::Chars & replace_data, @@ -291,11 +404,12 @@ template ( + input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets); return; } @@ -379,11 +493,11 @@ template ( + vectorVector( input_data, input_offsets, replace_data, From 9785f85ca32e3af5760a9bc24e998e7d050fc073 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 23 Jul 2024 21:08:02 +0800 Subject: [PATCH 040/363] fix style --- src/Functions/FunctionOverlay.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/FunctionOverlay.cpp index 65af4d811f5..7d0e2e86de2 100644 --- a/src/Functions/FunctionOverlay.cpp +++ b/src/Functions/FunctionOverlay.cpp @@ -1,14 +1,10 @@ #include -#include #include -#include -#include #include #include #include #include -#include - +#include namespace DB { From f4138ee6c67bbdb82269a9087b1b054f33cb35a8 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 24 Jul 2024 10:20:27 +0800 Subject: [PATCH 041/363] fix bugs about corner cases --- src/Functions/FunctionOverlay.cpp | 89 +++++++++++++++++-------------- 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/FunctionOverlay.cpp index 7d0e2e86de2..d3ee7e1df6d 100644 --- a/src/Functions/FunctionOverlay.cpp +++ b/src/Functions/FunctionOverlay.cpp @@ -218,6 +218,26 @@ public: private: + /// input offset is 1-based, maybe negative + /// output result is 0-based valid offset, within [0, input_size] + static size_t getValidOffset(Int64 offset, size_t input_size) + { + if (offset > 0) + { + if (static_cast(offset) > input_size + 1) [[unlikely]] + return input_size; + else + return offset - 1; + } + else + { + if (input_size < -static_cast(offset)) [[unlikely]] + return 0; + else + return input_size + offset; + } + } + template void constantConstant( size_t rows, @@ -237,13 +257,10 @@ private: return; } - Int64 offset = 0; // start from 1, maybe negative + size_t input_size = input.size; size_t valid_offset = 0; // start from 0, not negative if constexpr (offset_is_const) - { - offset = const_offset; - valid_offset = offset > 0 ? (offset - 1) : (-offset); - } + valid_offset = getValidOffset(const_offset, input_size); size_t replace_size = replace.size; Int64 length = 0; // maybe negative @@ -258,14 +275,14 @@ private: valid_length = replace_size; } + Int64 offset = 0; // start from 1, maybe negative size_t res_offset = 0; - size_t input_size = input.size; for (size_t i = 0; i < rows; ++i) { if constexpr (!offset_is_const) { offset = column_offset->getInt(i); - valid_offset = offset > 0 ? (offset - 1) : (-offset); + valid_offset = getValidOffset(offset, input_size); } if constexpr (!three_args && !length_is_const) @@ -274,7 +291,7 @@ private: valid_length = length >= 0 ? length : replace_size; } - size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator res_data.resize(new_res_size); @@ -321,14 +338,6 @@ private: return; } - Int64 offset = 0; // start from 1, maybe negative - size_t valid_offset = 0; // start from 0, not negative - if constexpr (offset_is_const) - { - offset = const_offset; - valid_offset = offset > 0 ? (offset - 1) : (-offset); - } - size_t replace_size = replace.size; Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative @@ -343,16 +352,22 @@ private: } size_t rows = input_offsets.size(); + Int64 offset = 0; // start from 1, maybe negative + size_t valid_offset = 0; // start from 0, not negative size_t res_offset = 0; for (size_t i = 0; i < rows; ++i) { size_t input_offset = input_offsets[i - 1]; size_t input_size = input_offsets[i] - input_offsets[i - 1] - 1; - if constexpr (!offset_is_const) + if constexpr (offset_is_const) + { + valid_offset = getValidOffset(const_offset, input_size); + } + else { offset = column_offset->getInt(i); - valid_offset = offset > 0 ? (offset - 1) : (-offset); + valid_offset = getValidOffset(offset, input_size); } if constexpr (!three_args && !length_is_const) @@ -361,7 +376,7 @@ private: valid_length = length >= 0 ? length : replace_size; } - size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator res_data.resize(new_res_size); @@ -409,13 +424,10 @@ private: return; } - Int64 offset = 0; // start from 1, maybe negative + size_t input_size = input.size; size_t valid_offset = 0; // start from 0, not negative if constexpr (offset_is_const) - { - offset = const_offset; - valid_offset = offset > 0 ? (offset - 1) : (-offset); - } + valid_offset = getValidOffset(const_offset, input_size); Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative @@ -426,7 +438,7 @@ private: } size_t rows = replace_offsets.size(); - size_t input_size = input.size; + Int64 offset = 0; // start from 1, maybe negative size_t res_offset = 0; for (size_t i = 0; i < rows; ++i) { @@ -436,12 +448,11 @@ private: if constexpr (!offset_is_const) { offset = column_offset->getInt(i); - valid_offset = offset > 0 ? (offset - 1) : (-offset); + valid_offset = getValidOffset(offset, input_size); } if constexpr (three_args) { - // length = replace_size; valid_length = replace_size; } else if constexpr (!length_is_const) @@ -450,7 +461,7 @@ private: valid_length = length >= 0 ? length : replace_size; } - size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator res_data.resize(new_res_size); @@ -507,15 +518,6 @@ private: return; } - - Int64 offset = 0; // start from 1, maybe negative - size_t valid_offset = 0; // start from 0, not negative - if constexpr (offset_is_const) - { - offset = const_offset; - valid_offset = offset > 0 ? (offset - 1) : (-offset); - } - Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative if constexpr (!three_args && length_is_const) @@ -525,6 +527,8 @@ private: } size_t rows = input_offsets.size(); + Int64 offset = 0; // start from 1, maybe negative + size_t valid_offset = 0; // start from 0, not negative size_t res_offset = 0; for (size_t i = 0; i < rows; ++i) { @@ -533,15 +537,18 @@ private: size_t replace_offset = replace_offsets[i - 1]; size_t replace_size = replace_offsets[i] - replace_offsets[i - 1] - 1; - if constexpr (!offset_is_const) + if constexpr (offset_is_const) + { + valid_offset = getValidOffset(const_offset, input_size); + } + else { offset = column_offset->getInt(i); - valid_offset = offset > 0 ? (offset - 1) : (-offset); + valid_offset = getValidOffset(offset, input_size); } if constexpr (three_args) { - // length = replace_size; valid_length = replace_size; } else if constexpr (!length_is_const) @@ -550,7 +557,7 @@ private: valid_length = length >= 0 ? length : replace_size; } - size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator res_data.resize(new_res_size); From b42069cfa80e66ab59669bb7ccb93c2944d91170 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 04:27:11 +0200 Subject: [PATCH 042/363] Adjust some tests --- .../0_stateless/00111_shard_external_sort_distributed.sql | 6 +++--- .../0_stateless/00463_long_sessions_in_http_interface.sh | 2 +- tests/queries/0_stateless/00601_kill_running_query.sh | 2 +- tests/queries/0_stateless/00976_max_execution_speed.sql | 2 +- .../0_stateless/01119_optimize_trivial_insert_select.sql | 5 +++-- .../queries/0_stateless/01245_limit_infinite_sources.sql | 1 + tests/queries/0_stateless/01249_flush_interactive.sh | 4 ++-- tests/queries/0_stateless/01293_show_settings.reference | 1 + .../01301_aggregate_state_exception_memory_leak.sh | 2 +- tests/queries/0_stateless/01603_read_with_backoff_bug.sql | 2 +- .../0_stateless/02021_exponential_sum_shard.reference | 1 - tests/queries/0_stateless/02021_exponential_sum_shard.sql | 1 - tests/queries/0_stateless/02136_kill_scalar_queries.sh | 2 +- tests/queries/0_stateless/02293_ttest_large_samples.sql | 2 ++ .../02294_floating_point_second_in_settings.sh | 6 +++--- tests/queries/0_stateless/02343_aggregation_pipeline.sql | 8 +++----- .../0_stateless/02697_stop_reading_on_first_cancel.sh | 2 +- tests/queries/0_stateless/02700_s3_part_INT_MAX.sh | 4 +++- .../02896_max_execution_time_with_break_overflow_mode.sql | 2 ++ tests/queries/0_stateless/02915_sleep_large_uint.sql | 1 + ...p_virtual_columns_with_non_deterministic_functions.sql | 1 + 21 files changed, 32 insertions(+), 25 deletions(-) diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql index 112f5edae36..88a05f59111 100644 --- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql +++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql @@ -1,10 +1,10 @@ -- Tags: distributed -SET max_memory_usage = 300000000; -SET max_bytes_before_external_sort = 20000000; +SET max_memory_usage = 150000000; +SET max_bytes_before_external_sort = 10000000; DROP TABLE IF EXISTS numbers10m; -CREATE VIEW numbers10m AS SELECT number FROM system.numbers LIMIT 10000000; +CREATE VIEW numbers10m AS SELECT number FROM system.numbers LIMIT 5000000; SELECT number FROM remote('127.0.0.{2,3}', currentDatabase(), numbers10m) ORDER BY number * 1234567890123456789 LIMIT 19999980, 20; diff --git a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh index d41d6409315..bb77a88820a 100755 --- a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh +++ b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh @@ -74,7 +74,7 @@ ${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE t" echo "A session cannot be used by concurrent connections:" -${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9&query_id=${CLICKHOUSE_DATABASE}_9" --data-binary "SELECT count() FROM system.numbers" >/dev/null & +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9&query_id=${CLICKHOUSE_DATABASE}_9&max_rows_to_read=0" --data-binary "SELECT count() FROM system.numbers" >/dev/null & # An infinite loop is required to make the test reliable. We will ensure that at least once the query on the line above has started before this check while true diff --git a/tests/queries/0_stateless/00601_kill_running_query.sh b/tests/queries/0_stateless/00601_kill_running_query.sh index 3163f8146d0..be0fff49129 100755 --- a/tests/queries/0_stateless/00601_kill_running_query.sh +++ b/tests/queries/0_stateless/00601_kill_running_query.sh @@ -11,7 +11,7 @@ function wait_for_query_to_start() while [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() FROM system.processes WHERE query_id = '$1'") == 0 ]]; do sleep 0.1; done } -${CLICKHOUSE_CURL_COMMAND} -q --max-time 30 -sS "$CLICKHOUSE_URL&query_id=test_00601_$CLICKHOUSE_DATABASE" -d 'SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k)' > /dev/null & +${CLICKHOUSE_CURL_COMMAND} -q --max-time 30 -sS "$CLICKHOUSE_URL&query_id=test_00601_$CLICKHOUSE_DATABASE" -d 'SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k) SETTINGS max_rows_to_read = 0' > /dev/null & wait_for_query_to_start "test_00601_$CLICKHOUSE_DATABASE" $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = 'test_00601_$CLICKHOUSE_DATABASE'" wait diff --git a/tests/queries/0_stateless/00976_max_execution_speed.sql b/tests/queries/0_stateless/00976_max_execution_speed.sql index 52c3f05ff43..41374712724 100644 --- a/tests/queries/0_stateless/00976_max_execution_speed.sql +++ b/tests/queries/0_stateless/00976_max_execution_speed.sql @@ -1,2 +1,2 @@ -SET max_execution_speed = 1, max_execution_time = 3; +SET max_execution_speed = 1, max_execution_time = 3, max_rows_to_read = 0; SELECT count() FROM system.numbers; -- { serverError TIMEOUT_EXCEEDED } diff --git a/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql index a53b60a5ad3..2b301d7aced 100644 --- a/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql +++ b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql @@ -1,8 +1,9 @@ drop table if exists t; create table t(n int, a Int64, s String) engine = MergeTree() order by a; -set enable_positional_arguments=0; -set optimize_trivial_insert_select=1; +set enable_positional_arguments = 0; +set optimize_trivial_insert_select = 1; +set max_rows_to_read = 0; -- due to aggregate functions, optimize_trivial_insert_select will not be applied insert into t select 1, sum(number) as c, getSetting('max_threads') from numbers_mt(100000000) settings max_insert_threads=4, max_threads=2; diff --git a/tests/queries/0_stateless/01245_limit_infinite_sources.sql b/tests/queries/0_stateless/01245_limit_infinite_sources.sql index 05680d86a33..69c93baf8a8 100644 --- a/tests/queries/0_stateless/01245_limit_infinite_sources.sql +++ b/tests/queries/0_stateless/01245_limit_infinite_sources.sql @@ -9,3 +9,4 @@ FROM ) WHERE number = 1 LIMIT 1 +SETTINGS max_rows_to_read = 0; diff --git a/tests/queries/0_stateless/01249_flush_interactive.sh b/tests/queries/0_stateless/01249_flush_interactive.sh index 551e11c8c8d..775b7825a16 100755 --- a/tests/queries/0_stateless/01249_flush_interactive.sh +++ b/tests/queries/0_stateless/01249_flush_interactive.sh @@ -14,10 +14,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function test() { - timeout 5 ${CLICKHOUSE_LOCAL} --max_execution_time 10 --query " + timeout 5 ${CLICKHOUSE_LOCAL} --max_execution_time 10 --max_rows_to_read 0 --query " SELECT DISTINCT number % 5 FROM system.numbers" ||: echo -e '---' - timeout 5 ${CLICKHOUSE_CURL} -sS --no-buffer "${CLICKHOUSE_URL}&max_execution_time=10" --data-binary " + timeout 5 ${CLICKHOUSE_CURL} -sS --no-buffer "${CLICKHOUSE_URL}&max_execution_time=10&max_rows_to_read=0" --data-binary " SELECT DISTINCT number % 5 FROM system.numbers" ||: echo -e '---' } diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference index 187f55697e4..9d326f16a3b 100644 --- a/tests/queries/0_stateless/01293_show_settings.reference +++ b/tests/queries/0_stateless/01293_show_settings.reference @@ -5,5 +5,6 @@ connect_timeout_with_failover_secure_ms Milliseconds 3000 external_storage_connect_timeout_sec UInt64 10 s3_connect_timeout_ms UInt64 1000 filesystem_prefetch_max_memory_usage UInt64 1073741824 +max_memory_usage UInt64 5000000000 max_untracked_memory UInt64 1048576 memory_profiler_step UInt64 1048576 diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh index 47fe7a9c7d9..9dd800ceb09 100755 --- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) start=$SECONDS # If the memory leak exists, it will lead to OOM fairly quickly. for _ in {1..1000}; do - $CLICKHOUSE_CLIENT --max_memory_usage 1G <<< "SELECT uniqExactState(number) FROM system.numbers_mt GROUP BY number % 10"; + $CLICKHOUSE_CLIENT --max_memory_usage 1G --max_rows_to_read 0 <<< "SELECT uniqExactState(number) FROM system.numbers_mt GROUP BY number % 10"; # NOTE: we cannot use timeout here since this will not guarantee that the query will be executed at least once. # (since graceful wait of clickhouse-client had been reverted) diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql index 1cf52c0288b..ec14f637c01 100644 --- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql +++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql @@ -9,7 +9,7 @@ create table t (x UInt64, s String) engine = MergeTree order by x SETTINGS index INSERT INTO t SELECT number, if(number < (8129 * 1024), arrayStringConcat(arrayMap(x -> toString(x), range(number % 128)), ' '), '') -FROM numbers_mt((8129 * 1024) * 3) settings max_insert_threads=8; +FROM numbers_mt((8129 * 1024) * 3) settings max_insert_threads=8, max_rows_to_read=0; -- optimize table t final; diff --git a/tests/queries/0_stateless/02021_exponential_sum_shard.reference b/tests/queries/0_stateless/02021_exponential_sum_shard.reference index 8453706a05a..c28e5d7a132 100644 --- a/tests/queries/0_stateless/02021_exponential_sum_shard.reference +++ b/tests/queries/0_stateless/02021_exponential_sum_shard.reference @@ -2,4 +2,3 @@ 0.009775171065493644 0.009775171065493644 0.009775171065493644 -0.009775171065493644 diff --git a/tests/queries/0_stateless/02021_exponential_sum_shard.sql b/tests/queries/0_stateless/02021_exponential_sum_shard.sql index 49fde0fe217..8e91637e41d 100644 --- a/tests/queries/0_stateless/02021_exponential_sum_shard.sql +++ b/tests/queries/0_stateless/02021_exponential_sum_shard.sql @@ -3,4 +3,3 @@ WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1) WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM remote('127.0.0.{1..10}', numbers_mt(10000)); WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM remote('127.0.0.{1..10}', numbers_mt(100000)); WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM remote('127.0.0.{1..10}', numbers_mt(1000000)); -WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM remote('127.0.0.{1..10}', numbers_mt(10000000)); diff --git a/tests/queries/0_stateless/02136_kill_scalar_queries.sh b/tests/queries/0_stateless/02136_kill_scalar_queries.sh index c8691b62360..f8bd5a42756 100755 --- a/tests/queries/0_stateless/02136_kill_scalar_queries.sh +++ b/tests/queries/0_stateless/02136_kill_scalar_queries.sh @@ -10,7 +10,7 @@ function wait_for_query_to_start() } QUERY_1_ID="${CLICKHOUSE_DATABASE}_TEST02132KILL_QUERY1" -(${CLICKHOUSE_CLIENT} --query_id="${QUERY_1_ID}" --query='select (SELECT max(number) from system.numbers) + 1;' 2>&1 | grep -q "Code: 394." || echo 'FAIL') & +(${CLICKHOUSE_CLIENT} --max_rows_to_read 0 --query_id="${QUERY_1_ID}" --query='select (SELECT max(number) from system.numbers) + 1;' 2>&1 | grep -q "Code: 394." || echo 'FAIL') & wait_for_query_to_start "${QUERY_1_ID}" ${CLICKHOUSE_CLIENT} --query="KILL QUERY WHERE query_id='${QUERY_1_ID}' SYNC" diff --git a/tests/queries/0_stateless/02293_ttest_large_samples.sql b/tests/queries/0_stateless/02293_ttest_large_samples.sql index 14baa3fddfe..826bd483fe9 100644 --- a/tests/queries/0_stateless/02293_ttest_large_samples.sql +++ b/tests/queries/0_stateless/02293_ttest_large_samples.sql @@ -15,6 +15,8 @@ SELECT FROM system.numbers limit 500000)); +SET max_rows_to_read = 0; + SELECT roundBankers(result.1, 5), roundBankers(result.2, 5 ) FROM ( SELECT studentTTest(sample, variant) as result diff --git a/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh b/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh index 7a18b8fea29..27dbd3e3de6 100755 --- a/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh +++ b/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh @@ -23,16 +23,16 @@ function check_output() { # TCP CLIENT echo "TCP CLIENT" -OUTPUT=$($CLICKHOUSE_CLIENT --max_execution_time $MAX_TIMEOUT -q "SELECT count() FROM system.numbers" 2>&1 || true) +OUTPUT=$($CLICKHOUSE_CLIENT --max_rows_to_read 0 --max_execution_time $MAX_TIMEOUT -q "SELECT count() FROM system.numbers" 2>&1 || true) check_output "${OUTPUT}" echo "TCP CLIENT WITH SETTINGS IN QUERY" -OUTPUT=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.numbers SETTINGS max_execution_time=$MAX_TIMEOUT" 2>&1 || true) +OUTPUT=$($CLICKHOUSE_CLIENT --max_rows_to_read 0 -q "SELECT count() FROM system.numbers SETTINGS max_execution_time=$MAX_TIMEOUT" 2>&1 || true) check_output "${OUTPUT}" # HTTP CLIENT echo "HTTP CLIENT" -OUTPUT=$(${CLICKHOUSE_CURL_COMMAND} -q -sS "$CLICKHOUSE_URL&max_execution_time=$MAX_TIMEOUT" -d \ +OUTPUT=$(${CLICKHOUSE_CURL_COMMAND} -q -sS "$CLICKHOUSE_URL&max_execution_time=${MAX_TIMEOUT}&max_rows_to_read=0" -d \ "SELECT count() FROM system.numbers" || true) check_output "${OUTPUT}" diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.sql b/tests/queries/0_stateless/02343_aggregation_pipeline.sql index 0f9dbd0247d..24d54293313 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.sql +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.sql @@ -13,11 +13,9 @@ set allow_prefetched_read_pool_for_local_filesystem = 0; -- { echoOn } -explain pipeline select * from (select * from numbers(1e8) group by number) group by number; - -explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number; - -explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number; +explain pipeline select * from (select * from numbers(1e8) group by number) group by number settings max_rows_to_read = 0; +explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number settings max_rows_to_read = 0; +explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number settings max_rows_to_read = 0; explain pipeline select number from remote('127.0.0.{1,2,3}', system, numbers_mt) group by number settings distributed_aggregation_memory_efficient = 1; diff --git a/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh index 2be13588453..5a2cec08eca 100755 --- a/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh +++ b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) QUERY_ID="${CLICKHOUSE_DATABASE}_read_with_cancel" -$CLICKHOUSE_CLIENT -n --query_id="$QUERY_ID" --query="SELECT sum(number * 0) FROM numbers(10000000000) SETTINGS partial_result_on_first_cancel=true;" & +$CLICKHOUSE_CLIENT --max_rows_to_read 0 -n --query_id="$QUERY_ID" --query="SELECT sum(number * 0) FROM numbers(10000000000) SETTINGS partial_result_on_first_cancel=true;" & pid=$! for _ in {0..60} diff --git a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh index a34a480a078..c431686b594 100755 --- a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh +++ b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh @@ -10,7 +10,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # NOTE: .sh test is used over .sql because it needs $CLICKHOUSE_DATABASE to # avoid truncation, since seems that the version of MinIO that is used on CI # too slow with this. -$CLICKHOUSE_CLIENT -nm -q " +# +# Unfortunately, the test has to buffer it in memory. +$CLICKHOUSE_CLIENT --max_memory_usage 10G -nm -q " INSERT INTO FUNCTION s3('http://localhost:11111/test/$CLICKHOUSE_DATABASE/test_INT_MAX.tsv', '', '', 'TSV') SELECT repeat('a', 1024) FROM numbers((pow(2, 30) * 2) / 1024) SETTINGS s3_max_single_part_upload_size = '5Gi'; diff --git a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql index ec86a66c7dd..3e131cad0f0 100644 --- a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql +++ b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql @@ -1,5 +1,7 @@ -- Tags: no-fasttest +SET max_rows_to_read = 0; + -- Query stops after timeout without an error SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, max_execution_time=2, timeout_overflow_mode='break' FORMAT Null; diff --git a/tests/queries/0_stateless/02915_sleep_large_uint.sql b/tests/queries/0_stateless/02915_sleep_large_uint.sql index f7c04ab6d1f..08b6c580a28 100644 --- a/tests/queries/0_stateless/02915_sleep_large_uint.sql +++ b/tests/queries/0_stateless/02915_sleep_large_uint.sql @@ -1,6 +1,7 @@ SELECT sleep(3.40282e+44); -- { serverError BAD_ARGUMENTS } SELECT sleep((pow(2, 64) / 1000000) - 1); -- { serverError BAD_ARGUMENTS } SELECT sleepEachRow(184467440737095516) from numbers(10000); -- { serverError BAD_ARGUMENTS } +SET max_rows_to_read = 0; SELECT sleepEachRow(pow(2, 31)) from numbers(9007199254740992) settings function_sleep_max_microseconds_per_block = 8589934592000000000; -- { serverError TOO_SLOW } -- Another corner case, but it requires lots of memory to run (huge block size) diff --git a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql index 8ccc3cf61da..6ef8c5a8656 100644 --- a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql +++ b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql @@ -1,3 +1,4 @@ +SET max_rows_to_read = 0; create table test (number UInt64) engine=MergeTree order by number; insert into test select * from numbers(50000000); select ignore(number) from test where RAND() > 4292390314 limit 10; From 016888e29a828870d5cdb50a0eb5e1514bafc97c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 04:32:20 +0200 Subject: [PATCH 043/363] Adjust some tests --- tests/queries/1_stateful/00067_union_all.sql | 3 ++- .../00088_global_in_one_shard_and_rows_before_limit.sql | 2 +- .../queries/1_stateful/00147_global_in_aggregate_function.sql | 1 + .../queries/1_stateful/00149_quantiles_timing_distributed.sql | 1 + tests/queries/1_stateful/00167_read_bytes_from_fs.sql | 1 + .../1_stateful/00171_grouping_aggregated_transform_bug.sql | 1 + .../1_stateful/00182_simple_squashing_transform_bug.sql | 1 + 7 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/queries/1_stateful/00067_union_all.sql b/tests/queries/1_stateful/00067_union_all.sql index 2a1d00e975d..9ee14b36b03 100644 --- a/tests/queries/1_stateful/00067_union_all.sql +++ b/tests/queries/1_stateful/00067_union_all.sql @@ -10,4 +10,5 @@ UNION ALL ORDER BY id DESC LIMIT 10 ) -ORDER BY id, event; +ORDER BY id, event +SETTINGS max_rows_to_read = 40_000_000; diff --git a/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql b/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql index 52f9c46997f..443808e7bed 100644 --- a/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql +++ b/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql @@ -1,4 +1,4 @@ -- Tags: shard -SET output_format_write_statistics = 0; +SET output_format_write_statistics = 0, max_rows_to_read = 20_000_000; SELECT EventDate, count() FROM remote('127.0.0.1', test.hits) WHERE UserID GLOBAL IN (SELECT UserID FROM test.hits) GROUP BY EventDate ORDER BY EventDate LIMIT 5 FORMAT JSONCompact; diff --git a/tests/queries/1_stateful/00147_global_in_aggregate_function.sql b/tests/queries/1_stateful/00147_global_in_aggregate_function.sql index 075c01530c6..c156f073573 100644 --- a/tests/queries/1_stateful/00147_global_in_aggregate_function.sql +++ b/tests/queries/1_stateful/00147_global_in_aggregate_function.sql @@ -1,4 +1,5 @@ -- Tags: global +SET max_rows_to_read = 40_000_000; SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits); SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits); diff --git a/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql b/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql index 6f910646fb7..16b565985ea 100644 --- a/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql +++ b/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql @@ -1,4 +1,5 @@ -- Tags: distributed +SET max_rows_to_read = 100_000_000; SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID); SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID) SETTINGS optimize_aggregation_in_order = 1; diff --git a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql index 7b3f50f8141..1a98a531067 100644 --- a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql +++ b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql @@ -1,5 +1,6 @@ -- Tags: no-random-settings +SET max_memory_usage = '10G' SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40; -- We had a bug which lead to additional compressed data read. test.hits compressed size is about 1.2Gb, but we read more then 3Gb. diff --git a/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql b/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql index 7068780a1b1..b3e4d749328 100644 --- a/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql +++ b/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql @@ -1,4 +1,5 @@ -- Tags: distributed +SET max_rows_to_read = '100M'; SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID) SETTINGS max_block_size = 63169; SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID) SETTINGS optimize_aggregation_in_order = 1, max_block_size = 63169; diff --git a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql index e73de4b33fb..85bad651090 100644 --- a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql +++ b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql @@ -1,6 +1,7 @@ -- Tags: global set allow_prefetched_read_pool_for_remote_filesystem=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0, max_threads=2, max_block_size=65387; +set max_rows_to_read = '20M'; SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits); SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits); From e569a305ba07225ee641ae4af07ba9c88e4608d4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 04:46:31 +0200 Subject: [PATCH 044/363] Adjust some tests --- .../00002_log_and_exception_messages_formatting.sql | 2 ++ .../0_stateless/00375_shard_group_uniq_array_of_string.sql | 2 +- .../00376_shard_group_uniq_array_of_int_array.sql | 2 +- .../00377_shard_group_uniq_array_of_string_array.sql | 2 +- tests/queries/0_stateless/00600_replace_running_query.sh | 6 +++--- .../00834_cancel_http_readonly_queries_on_client_close.sh | 2 +- tests/queries/0_stateless/00906_low_cardinality_cache.sql | 2 +- tests/queries/0_stateless/01304_direct_io_long.sh | 4 ++-- tests/queries/0_stateless/02021_exponential_sum.reference | 1 - tests/queries/0_stateless/02021_exponential_sum.sql | 1 - tests/queries/0_stateless/02234_cast_to_ip_address.sql | 2 +- .../0_stateless/02450_kill_distributed_query_deadlock.sh | 2 +- tests/queries/0_stateless/02585_query_status_deadlock.sh | 3 +-- tests/queries/0_stateless/02786_max_execution_time_leaf.sql | 1 + .../0_stateless/02844_subquery_timeout_with_break.sql | 2 +- tests/queries/0_stateless/02916_glogal_in_cancel.sql | 2 +- 16 files changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql index 07c42d6d039..c158406c0da 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -7,6 +7,8 @@ system flush logs; drop table if exists logs; create view logs as select * from system.text_log where now() - toIntervalMinute(120) < event_time; +SET max_rows_to_read = 0; + -- Check that we don't have too many messages formatted with fmt::runtime or strings concatenation. -- 0.001 threshold should be always enough, the value was about 0.00025 WITH 0.001 AS threshold diff --git a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql index 8a310cb8fc9..f32a64cd30f 100644 --- a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql +++ b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql @@ -7,7 +7,7 @@ INSERT INTO group_uniq_str SELECT 2 as id, toString(number % 100) as v FROM syst INSERT INTO group_uniq_str SELECT 5 as id, toString(number % 100) as v FROM system.numbers LIMIT 10000000; SELECT length(groupUniqArray(v)) FROM group_uniq_str GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M'; SELECT length(groupUniqArray(10)(v)) FROM group_uniq_str GROUP BY id ORDER BY id; SELECT length(groupUniqArray(10000)(v)) FROM group_uniq_str GROUP BY id ORDER BY id; diff --git a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql index abd0e6e6a45..43066880102 100644 --- a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql +++ b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql @@ -6,7 +6,7 @@ CREATE TABLE group_uniq_arr_int ENGINE = Memory AS (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); SELECT length(groupUniqArray(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M'; SELECT length(groupUniqArray(10)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; SELECT length(groupUniqArray(100000)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; diff --git a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql index e9cfff211f8..1c4376ad577 100644 --- a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql +++ b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql @@ -6,6 +6,6 @@ CREATE TABLE group_uniq_arr_str ENGINE = Memory AS (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); SELECT length(groupUniqArray(v)) FROM group_uniq_arr_str GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M'; DROP TABLE IF EXISTS group_uniq_arr_str; diff --git a/tests/queries/0_stateless/00600_replace_running_query.sh b/tests/queries/0_stateless/00600_replace_running_query.sh index 7a71d17f19b..e7022875086 100755 --- a/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/tests/queries/0_stateless/00600_replace_running_query.sh @@ -17,7 +17,7 @@ function wait_for_query_to_start() } -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d 'SELECT 1, count() FROM system.numbers' > /dev/null 2>&1 & +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1&max_rows_to_read=0" -d 'SELECT 1, count() FROM system.numbers' > /dev/null 2>&1 & wait_for_query_to_start 'hello' # Replace it @@ -26,7 +26,7 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d # Wait for it to be replaced wait -${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & +${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --max_rows_to_read=0 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & wait_for_query_to_start '42' # Trying to run another query with the same query_id @@ -38,7 +38,7 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=42&replace_running_query=1" -d 'S $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = '42' SYNC" > /dev/null wait -${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 3, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & +${CLICKHOUSE_CLIENT} --query_id=42 --max_rows_to_read=0 --query='SELECT 3, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & wait_for_query_to_start '42' ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --replace_running_query_max_wait_ms=500 --query='SELECT 43' 2>&1 | grep -F "can't be stopped" > /dev/null wait diff --git a/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh b/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh index 5c21c70e06a..dd3735f27b1 100755 --- a/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh +++ b/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} --max-time 1 -sS "${CLICKHOUSE_URL}&query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&query=SELECT+count()+FROM+system.numbers" 2>&1 | grep -cF 'curl: (28)' +${CLICKHOUSE_CURL} --max-time 1 -sS "${CLICKHOUSE_URL}&query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&max_rows_to_read=0&query=SELECT+count()+FROM+system.numbers" 2>&1 | grep -cF 'curl: (28)' i=0 retries=300 while [[ $i -lt $retries ]]; do diff --git a/tests/queries/0_stateless/00906_low_cardinality_cache.sql b/tests/queries/0_stateless/00906_low_cardinality_cache.sql index 55eacd0db44..15a53841761 100644 --- a/tests/queries/0_stateless/00906_low_cardinality_cache.sql +++ b/tests/queries/0_stateless/00906_low_cardinality_cache.sql @@ -1,5 +1,5 @@ drop table if exists lc_00906; create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -insert into lc_00906 select '0123456789' from numbers(100000000); +insert into lc_00906 select '0123456789' from numbers(100000000) SETTINGS max_rows_to_read = '100M'; select count(), b from lc_00906 group by b; drop table if exists lc_00906; diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh index 2e27c2f7728..a66239058ab 100755 --- a/tests/queries/0_stateless/01304_direct_io_long.sh +++ b/tests/queries/0_stateless/01304_direct_io_long.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --multiquery --query " +$CLICKHOUSE_CLIENT --max_rows_to_read 50M --multiquery " DROP TABLE IF EXISTS bug; CREATE TABLE bug (UserID UInt64, Date Date) ENGINE = MergeTree ORDER BY Date SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi', merge_max_block_size = 8192; @@ -18,5 +18,5 @@ cat "$LOG" | grep Loaded rm "$LOG" -$CLICKHOUSE_CLIENT --multiquery --query " +$CLICKHOUSE_CLIENT --multiquery " DROP TABLE bug;" diff --git a/tests/queries/0_stateless/02021_exponential_sum.reference b/tests/queries/0_stateless/02021_exponential_sum.reference index 5bd77479cf7..c9dcee51173 100644 --- a/tests/queries/0_stateless/02021_exponential_sum.reference +++ b/tests/queries/0_stateless/02021_exponential_sum.reference @@ -5,4 +5,3 @@ 0.0009775171065493646 0.0009775171065493646 0.0009775171065493646 -0.0009775171065493646 diff --git a/tests/queries/0_stateless/02021_exponential_sum.sql b/tests/queries/0_stateless/02021_exponential_sum.sql index 8ab7638029c..62ec7dcf9f1 100644 --- a/tests/queries/0_stateless/02021_exponential_sum.sql +++ b/tests/queries/0_stateless/02021_exponential_sum.sql @@ -6,4 +6,3 @@ WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1) WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM numbers_mt(100000); WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM numbers_mt(1000000); WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM numbers_mt(10000000); -WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM numbers_mt(100000000); diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.sql b/tests/queries/0_stateless/02234_cast_to_ip_address.sql index 28f1afff57f..51e953da905 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.sql +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.sql @@ -67,7 +67,7 @@ SELECT toIPv6('::.1.2.3'); --{serverError CANNOT_PARSE_IPV6} SELECT toIPv6OrDefault('::.1.2.3'); SELECT toIPv6OrNull('::.1.2.3'); -SELECT count() FROM numbers_mt(100000000) WHERE NOT ignore(toIPv6OrZero(randomString(8))); +SELECT count() FROM numbers_mt(20000000) WHERE NOT ignore(toIPv6OrZero(randomString(8))); SELECT '--'; diff --git a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh index 0cd520d8d5d..445f907bcc5 100755 --- a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh +++ b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh @@ -9,7 +9,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # this can trigger a hung/deadlock in ProcessorList. for i in {1..50}; do query_id="$CLICKHOUSE_TEST_UNIQUE_NAME-$i" - $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null & + $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" --max_rows_to_read 0 -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null & while :; do killed_queries="$($CLICKHOUSE_CLIENT -q "kill query where query_id = '$query_id' sync" | wc -l)" if [[ "$killed_queries" -ge 1 ]]; then diff --git a/tests/queries/0_stateless/02585_query_status_deadlock.sh b/tests/queries/0_stateless/02585_query_status_deadlock.sh index e3e34109cdb..6321ac0064a 100755 --- a/tests/queries/0_stateless/02585_query_status_deadlock.sh +++ b/tests/queries/0_stateless/02585_query_status_deadlock.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) QUERY_ID="${CLICKHOUSE_DATABASE}_test_02585_query_to_kill_id_1" -$CLICKHOUSE_CLIENT --query_id="$QUERY_ID" -n -q " +$CLICKHOUSE_CLIENT --query_id="$QUERY_ID" --max_rows_to_read 0 -n -q " create temporary table tmp as select * from numbers(500000000); select * from remote('127.0.0.2', 'system.numbers_mt') where number in (select * from tmp);" &> /dev/null & @@ -23,4 +23,3 @@ do done $CLICKHOUSE_CLIENT -q "kill query where query_id = '$QUERY_ID' sync" &> /dev/null - diff --git a/tests/queries/0_stateless/02786_max_execution_time_leaf.sql b/tests/queries/0_stateless/02786_max_execution_time_leaf.sql index f678c913b46..2e4623f4ac6 100644 --- a/tests/queries/0_stateless/02786_max_execution_time_leaf.sql +++ b/tests/queries/0_stateless/02786_max_execution_time_leaf.sql @@ -1,4 +1,5 @@ -- Tags: no-fasttest +SET max_rows_to_read = 0; SELECT count() FROM cluster('test_cluster_two_shards', view( SELECT * FROM numbers(100000000000) )) SETTINGS max_execution_time_leaf = 1; -- { serverError TIMEOUT_EXCEEDED } -- Can return partial result SELECT count() FROM cluster('test_cluster_two_shards', view( SELECT * FROM numbers(100000000000) )) FORMAT Null SETTINGS max_execution_time_leaf = 1, timeout_overflow_mode_leaf = 'break'; diff --git a/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql b/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql index 511ed0c59de..00b527a9378 100644 --- a/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql +++ b/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql @@ -4,7 +4,7 @@ CREATE TABLE t (key UInt64, value UInt64, INDEX value_idx value TYPE bloom_filte INSERT INTO t SELECT number, rand()%1000 FROM numbers(10000); SET timeout_overflow_mode='break'; -SET max_execution_time=0.1; +SET max_execution_time=0.1, max_rows_to_read=0; SELECT * FROM t WHERE value IN (SELECT number FROM numbers(1000000000)); DROP TABLE t; diff --git a/tests/queries/0_stateless/02916_glogal_in_cancel.sql b/tests/queries/0_stateless/02916_glogal_in_cancel.sql index ad54f1ecdec..dd61795947a 100644 --- a/tests/queries/0_stateless/02916_glogal_in_cancel.sql +++ b/tests/queries/0_stateless/02916_glogal_in_cancel.sql @@ -1,2 +1,2 @@ -set max_execution_time = 0.5, timeout_overflow_mode = 'break'; +set max_execution_time = 0.5, timeout_overflow_mode = 'break', max_rows_to_read = 0; SELECT number FROM remote('127.0.0.{3|2}', numbers(1)) WHERE number GLOBAL IN (SELECT number FROM numbers(10000000000.)) format Null; From 26650dcb2e39b0d28cae4029b6adde2b6c01fda2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 04:59:13 +0200 Subject: [PATCH 045/363] More limits --- tests/config/install.sh | 2 +- tests/config/users.d/limits.xml | 8 ----- tests/config/users.d/limits.yaml | 57 ++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 9 deletions(-) delete mode 100644 tests/config/users.d/limits.xml create mode 100644 tests/config/users.d/limits.yaml diff --git a/tests/config/install.sh b/tests/config/install.sh index 265b9248f4a..c5fb3cc92c7 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -93,7 +93,7 @@ ln -sf $SRC_PATH/users.d/prefetch_settings.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/nonconst_timezone.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/allow_introspection_functions.yaml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/replicated_ddl_entry.xml $DEST_SERVER_PATH/users.d/ -ln -sf $SRC_PATH/users.d/limits.xml $DEST_SERVER_PATH/users.d/ +ln -sf $SRC_PATH/users.d/limits.yaml $DEST_SERVER_PATH/users.d/ if [[ -n "$USE_OLD_ANALYZER" ]] && [[ "$USE_OLD_ANALYZER" -eq 1 ]]; then ln -sf $SRC_PATH/users.d/analyzer.xml $DEST_SERVER_PATH/users.d/ diff --git a/tests/config/users.d/limits.xml b/tests/config/users.d/limits.xml deleted file mode 100644 index f44c73241ab..00000000000 --- a/tests/config/users.d/limits.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - 5G - 20000000 - - - diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml new file mode 100644 index 00000000000..4f3f439a997 --- /dev/null +++ b/tests/config/users.d/limits.yaml @@ -0,0 +1,57 @@ +profiles: + default: + max_memory_usage: 5G + max_rows_to_read: 20000000 + + # Also set every other limit to a high value, so it will not limit anything, but we will test that code around it. + s3_max_get_rps: 1000000 + s3_max_get_burst: 2000000 + s3_max_put_rps: 1000000 + s3_max_put_burst: 2000000 + max_remote_read_network_bandwidth: 1T + max_remote_write_network_bandwidth: 1T + max_local_read_bandwidth: 1T + max_local_write_bandwidth: 1T + use_index_for_in_with_subqueries_max_values: 1G + max_bytes_to_read: 1T + max_bytes_to_read_leaf: 1T + max_rows_to_group_by: 10G + max_bytes_before_external_group_by: 10G + max_rows_to_sort: 10G + max_bytes_to_sort: 10G + max_bytes_before_external_sort: 10G + max_result_rows: 1G + max_result_bytes: 1G + max_execution_time: 600 + max_execution_time_leaf: 600 + max_execution_speed: 100G + max_execution_speed_bytes: 10T + max_estimated_execution_time: 600 + max_columns_to_read: 10K + max_temporary_columns: 10K + max_temporary_non_const_columns: 10K + max_sessions_for_user: 1K + max_rows_in_set: 10G + max_bytes_in_set: 10G + max_rows_in_join: 10G + max_bytes_in_join: 10G + max_rows_in_set_to_optimize_join: 1G + max_rows_to_transfer: 1G + max_bytes_to_transfer: 1G + max_rows_in_distinct: 10G + max_bytes_in_distinct: 10G + max_memory_usage_for_user: 10G + max_network_bandwidth: 100G + max_network_bytes: 1T + max_network_bandwidth_for_user: 100G + max_network_bandwidth_for_all_users: 100G + max_temporary_data_on_disk_size_for_user: 100G + max_temporary_data_on_disk_size_for_query: 100G + max_backup_bandwidth: 100G + max_hyperscan_regexp_length: 1M + max_hyperscan_regexp_total_length: 10M + query_cache_max_size_in_bytes: 10M + query_cache_max_entries: 100K + external_storage_max_read_rows: 10G + external_storage_max_read_bytes: 10G + max_streams_for_merge_tree_reading: 1000 From fd3f0cf92b7800b171c5723541a329748a0dad1b Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 24 Jul 2024 14:17:58 +0800 Subject: [PATCH 046/363] support overlayUTF8 --- src/Functions/FunctionOverlay.cpp | 281 +++++++++++++----- .../0_stateless/03205_overlay.reference | 168 +++++++++++ tests/queries/0_stateless/03205_overlay.sql | 60 ++++ .../0_stateless/03206_overlay_utf8.reference | 168 +++++++++++ .../0_stateless/03206_overlay_utf8.sql | 60 ++++ 5 files changed, 665 insertions(+), 72 deletions(-) create mode 100644 tests/queries/0_stateless/03205_overlay.reference create mode 100644 tests/queries/0_stateless/03205_overlay.sql create mode 100644 tests/queries/0_stateless/03206_overlay_utf8.reference create mode 100644 tests/queries/0_stateless/03206_overlay_utf8.sql diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/FunctionOverlay.cpp index d3ee7e1df6d..61d2df88ab1 100644 --- a/src/Functions/FunctionOverlay.cpp +++ b/src/Functions/FunctionOverlay.cpp @@ -3,8 +3,10 @@ #include #include #include +#include #include #include +#include namespace DB { @@ -15,6 +17,8 @@ extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } +using namespace GatherUtils; + namespace { @@ -24,7 +28,7 @@ template class FunctionOverlay : public IFunction { public: - static constexpr auto name = is_utf8 ? "OverlayUTF8" : "Overlay"; + static constexpr auto name = is_utf8 ? "overlayUTF8" : "overlay"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return name; } @@ -238,6 +242,15 @@ private: } } + /// get character count of a slice [data, data+bytes) + static size_t getSliceSize(const UInt8 * data, size_t bytes) + { + if constexpr (is_utf8) + return UTF8::countCodePoints(data, bytes); + else + return bytes; + } + template void constantConstant( size_t rows, @@ -257,13 +270,12 @@ private: return; } - size_t input_size = input.size; + size_t input_size = getSliceSize(reinterpret_cast(input.data), input.size); size_t valid_offset = 0; // start from 0, not negative if constexpr (offset_is_const) valid_offset = getValidOffset(const_offset, input_size); - size_t replace_size = replace.size; - Int64 length = 0; // maybe negative + size_t replace_size = getSliceSize(reinterpret_cast(replace.data), replace.size); size_t valid_length = 0; // not negative if constexpr (!three_args && length_is_const) { @@ -276,6 +288,9 @@ private: } Int64 offset = 0; // start from 1, maybe negative + Int64 length = 0; // maybe negative + const UInt8 * input_begin = reinterpret_cast(input.data); + const UInt8 * input_end = reinterpret_cast(input.data + input.size); size_t res_offset = 0; for (size_t i = 0; i < rows; ++i) { @@ -293,28 +308,57 @@ private: size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; - size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator - res_data.resize(new_res_size); - /// copy prefix before replaced region - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size); - res_offset += prefix_size; - - /// copy replace - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size); - res_offset += replace_size; - - /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. - if (suffix_size) + if constexpr (!is_utf8) { - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size); - res_offset += suffix_size; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size); + res_offset += suffix_size; + } + } + else + { + const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + size_t prefix_bytes = prefix_end > input_end ? input.size : prefix_end - input_begin; + + const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + size_t suffix_bytes = input_end - suffix_begin; + + size_t new_res_size = res_data.size() + prefix_bytes + replace.size + suffix_bytes + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input_begin, prefix_bytes); + res_offset += prefix_bytes; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace.size); + res_offset += replace.size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero. + if (suffix_bytes) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes); + res_offset += suffix_bytes; + } } /// add zero terminator res_data[res_offset] = 0; ++res_offset; - res_offsets[i] = res_offset; } } @@ -338,7 +382,7 @@ private: return; } - size_t replace_size = replace.size; + size_t replace_size = getSliceSize(reinterpret_cast(replace.data), replace.size); Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative if constexpr (!three_args && length_is_const) @@ -358,7 +402,8 @@ private: for (size_t i = 0; i < rows; ++i) { size_t input_offset = input_offsets[i - 1]; - size_t input_size = input_offsets[i] - input_offsets[i - 1] - 1; + size_t input_bytes = input_offsets[i] - input_offsets[i - 1] - 1; + size_t input_size = getSliceSize(&input_data[input_offset], input_bytes); if constexpr (offset_is_const) { @@ -378,29 +423,59 @@ private: size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; - size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator - res_data.resize(new_res_size); - /// copy prefix before replaced region - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size); - res_offset += prefix_size; - - /// copy replace - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size); - res_offset += replace_size; - - /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. - if (suffix_size) + if constexpr (!is_utf8) { - memcpySmallAllowReadWriteOverflow15( - &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size); - res_offset += suffix_size; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15( + &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size); + res_offset += suffix_size; + } + } + else + { + const auto * input_begin = &input_data[input_offset]; + const auto * input_end = &input_data[input_offset + input_bytes]; + const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + size_t prefix_bytes = prefix_end > input_end ? input_bytes : prefix_end - input_begin; + const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + size_t suffix_bytes = input_end - suffix_begin; + + size_t new_res_size = res_data.size() + prefix_bytes + replace.size + suffix_bytes + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_bytes); + res_offset += prefix_bytes; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace.size); + res_offset += replace.size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero. + if (suffix_bytes) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes); + res_offset += suffix_bytes; + } } /// add zero terminator res_data[res_offset] = 0; ++res_offset; - res_offsets[i] = res_offset; } } @@ -424,7 +499,7 @@ private: return; } - size_t input_size = input.size; + size_t input_size = getSliceSize(reinterpret_cast(input.data), input.size); size_t valid_offset = 0; // start from 0, not negative if constexpr (offset_is_const) valid_offset = getValidOffset(const_offset, input_size); @@ -438,12 +513,15 @@ private: } size_t rows = replace_offsets.size(); + const auto * input_begin = reinterpret_cast(input.data); + const auto * input_end = reinterpret_cast(input.data + input.size); Int64 offset = 0; // start from 1, maybe negative size_t res_offset = 0; for (size_t i = 0; i < rows; ++i) { size_t replace_offset = replace_offsets[i - 1]; - size_t replace_size = replace_offsets[i] - replace_offsets[i - 1] - 1; + size_t replace_bytes = replace_offsets[i] - replace_offsets[i - 1] - 1; + size_t replace_size = getSliceSize(&replace_data[replace_offset], replace_bytes); if constexpr (!offset_is_const) { @@ -463,28 +541,55 @@ private: size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; - size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator - res_data.resize(new_res_size); - /// copy prefix before replaced region - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size); - res_offset += prefix_size; - - /// copy replace - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size); - res_offset += replace_size; - - /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. - if (suffix_size) + if constexpr (!is_utf8) { - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size); - res_offset += suffix_size; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size); + res_offset += suffix_size; + } + } + else + { + const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + size_t prefix_bytes = prefix_end > input_end ? input.size : prefix_end - input_begin; + const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + size_t suffix_bytes = input_end - suffix_begin; + size_t new_res_size = res_data.size() + prefix_bytes + replace_bytes + suffix_bytes + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input_begin, prefix_bytes); + res_offset += prefix_bytes; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_bytes); + res_offset += replace_bytes; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero + if (suffix_bytes) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes); + res_offset += suffix_bytes; + } } /// add zero terminator res_data[res_offset] = 0; ++res_offset; - res_offsets[i] = res_offset; } } @@ -533,9 +638,12 @@ private: for (size_t i = 0; i < rows; ++i) { size_t input_offset = input_offsets[i - 1]; - size_t input_size = input_offsets[i] - input_offsets[i - 1] - 1; + size_t input_bytes = input_offsets[i] - input_offsets[i - 1] - 1; + size_t input_size = getSliceSize(&input_data[input_offset], input_bytes); + size_t replace_offset = replace_offsets[i - 1]; - size_t replace_size = replace_offsets[i] - replace_offsets[i - 1] - 1; + size_t replace_bytes = replace_offsets[i] - replace_offsets[i - 1] - 1; + size_t replace_size = getSliceSize(&replace_data[replace_offset], replace_bytes); if constexpr (offset_is_const) { @@ -559,29 +667,58 @@ private: size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; - size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator - res_data.resize(new_res_size); - /// copy prefix before replaced region - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size); - res_offset += prefix_size; - - /// copy replace - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size); - res_offset += replace_size; - - /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. - if (suffix_size) + if constexpr (!is_utf8) { - memcpySmallAllowReadWriteOverflow15( - &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size); - res_offset += suffix_size; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15( + &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size); + res_offset += suffix_size; + } + } + else + { + const auto * input_begin = &input_data[input_offset]; + const auto * input_end = &input_data[input_offset + input_bytes]; + const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + size_t prefix_bytes = prefix_end > input_end ? input_bytes : prefix_end - input_begin; + const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + size_t suffix_bytes = input_end - suffix_begin; + size_t new_res_size = res_data.size() + prefix_bytes + replace_bytes + suffix_bytes + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input_begin, prefix_bytes); + res_offset += prefix_bytes; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_bytes); + res_offset += replace_bytes; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero. + if (suffix_bytes) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes); + res_offset += suffix_bytes; + } } /// add zero terminator res_data[res_offset] = 0; ++res_offset; - res_offsets[i] = res_offset; } } diff --git a/tests/queries/0_stateless/03205_overlay.reference b/tests/queries/0_stateless/03205_overlay.reference new file mode 100644 index 00000000000..9e79db2e131 --- /dev/null +++ b/tests/queries/0_stateless/03205_overlay.reference @@ -0,0 +1,168 @@ +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL diff --git a/tests/queries/0_stateless/03205_overlay.sql b/tests/queries/0_stateless/03205_overlay.sql new file mode 100644 index 00000000000..b131312c934 --- /dev/null +++ b/tests/queries/0_stateless/03205_overlay.sql @@ -0,0 +1,60 @@ +SELECT overlay('Spark SQL', 'ANSI ', 7, 0) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0) from numbers(3); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0) from numbers(3); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)) from numbers(3); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), 0) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, materialize(0)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), materialize(0)) from numbers(3); +SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); + +SELECT overlay('Spark SQL', '_', 6) from numbers(3); +SELECT overlay(materialize('Spark SQL'), '_', 6) from numbers(3); +SELECT overlay('Spark SQL', materialize('_'), 6) from numbers(3); +SELECT overlay('Spark SQL', '_', materialize(6)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('_'), 6) from numbers(3); +SELECT overlay(materialize('Spark SQL'), '_', materialize(6)) from numbers(3); +SELECT overlay('Spark SQL', materialize('_'), materialize(6)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('_'), materialize(6)) from numbers(3); + +SELECT overlay('Spark SQL', 'CORE', 7) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'CORE', 7) from numbers(3); +SELECT overlay('Spark SQL', materialize('CORE'), 7) from numbers(3); +SELECT overlay('Spark SQL', 'CORE', materialize(7)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('CORE'), 7) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'CORE', materialize(7)) from numbers(3); +SELECT overlay('Spark SQL', materialize('CORE'), materialize(7)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('CORE'), materialize(7)) from numbers(3); + +SELECT overlay('Spark SQL', 'ANSI ', 7, 0) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0) from numbers(3); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0) from numbers(3); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)) from numbers(3); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); + +SELECT overlay('Spark SQL', 'tructured', 2, 4) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'tructured', 2, 4) from numbers(3); +SELECT overlay('Spark SQL', materialize('tructured'), 2, 4) from numbers(3); +SELECT overlay('Spark SQL', 'tructured', materialize(2), 4) from numbers(3); +SELECT overlay('Spark SQL', 'tructured', 2, materialize(4)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('tructured'), 2, 4) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'tructured', materialize(2), 4) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'tructured', 2, materialize(4)) from numbers(3); +SELECT overlay('Spark SQL', materialize('tructured'), materialize(2), 4) from numbers(3); +SELECT overlay('Spark SQL', materialize('tructured'), 2, materialize(4)) from numbers(3); +SELECT overlay('Spark SQL', 'tructured', materialize(2), materialize(4)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('tructured'), materialize(2), materialize(4)) from numbers(3); diff --git a/tests/queries/0_stateless/03206_overlay_utf8.reference b/tests/queries/0_stateless/03206_overlay_utf8.reference new file mode 100644 index 00000000000..19878c97184 --- /dev/null +++ b/tests/queries/0_stateless/03206_overlay_utf8.reference @@ -0,0 +1,168 @@ +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH diff --git a/tests/queries/0_stateless/03206_overlay_utf8.sql b/tests/queries/0_stateless/03206_overlay_utf8.sql new file mode 100644 index 00000000000..00b756c8b5b --- /dev/null +++ b/tests/queries/0_stateless/03206_overlay_utf8.sql @@ -0,0 +1,60 @@ +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), 0) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, materialize(0)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), materialize(0)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); + +SELECT overlayUTF8('Spark SQL和CH', '_', 6) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), '_', 6) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('_'), 6) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', '_', materialize(6)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), 6) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), '_', materialize(6)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('_'), materialize(6)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), materialize(6)) from numbers(3); + +SELECT overlayUTF8('Spark SQL和CH', 'CORE', 7) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'CORE', 7) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('CORE'), 7) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'CORE', materialize(7)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), 7) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'CORE', materialize(7)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('CORE'), materialize(7)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), materialize(7)) from numbers(3); + +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); + +SELECT overlayUTF8('Spark SQL和CH', 'tructured', 2, 4) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, 4) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, 4) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), 4) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'tructured', 2, materialize(4)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), 2, 4) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', materialize(2), 4) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, materialize(4)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), materialize(2), 4) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, materialize(4)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), materialize(4)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), materialize(2), materialize(4)) from numbers(3); From c09c22b17575396e38fb45cb385dcc8a49f9a183 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 24 Jul 2024 14:45:47 +0800 Subject: [PATCH 047/363] finish doc --- .../functions/string-replace-functions.md | 72 +++++++++++++++++++ ...new_functions_must_be_documented.reference | 2 + 2 files changed, 74 insertions(+) diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 8793ebdd1a3..4e1f89fd974 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -223,3 +223,75 @@ SELECT translateUTF8('Münchener Straße', 'üß', 'us') AS res; │ Munchener Strase │ └──────────────────┘ ``` + +## overlay + +Replace the string `s` with the string `replace` starting from the 1-based `position` for `length` bytes. If `length` is omitted or negative, then it defaults to the length of `replace`. + +**Syntax** + +```sql +overlay(s, replace, position[, length]) +``` + +**Parameters** + +- `s`: A string type [String](../data-types/string.md). +- `replace`: A string type [String](../data-types/string.md). +- `position`: An integer type [Int](../data-types/int.md). +- `length`: Optional. An integer type [Int](../data-types/int.md). + +**Returned value** + +- A [String](../data-types/string.md) data type value. If `position` is negative the position is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. + +**Example** + +```sql +SELECT overlay('Spark SQL', 'CORE', 7) AS res; +``` + +Result: + +```text + ┌─res────────┐ + │ Spark CORE │ + └────────────┘ +``` + +## overlayUTF8 + +Replace the string `s` with the string `replace` starting from the 1-based `position` for `length` UTF-8 characters. If `length` is omitted or negative, then it defaults to the length of `replace`. + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Syntax** + +```sql +overlayUTF8(s, replace, position[, length]) +``` + +**Parameters** + +- `s`: A string type [String](../data-types/string.md). +- `replace`: A string type [String](../data-types/string.md). +- `position`: An integer type [Int](../data-types/int.md). +- `length`: Optional. An integer type [Int](../data-types/int.md). + +**Returned value** + +- A [String](../data-types/string.md) data type value. If `position` is negative the position is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. + +**Example** + +```sql +SELECT overlayUTF8('ClickHouse是一款OLAP数据库', '开源', 12, 2) AS res; +``` + +Result: + +```text +┌─res────────────────────────┐ +│ ClickHouse是开源OLAP数据库 │ +└────────────────────────────┘ +``` diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index a152066a460..ba9d3fb7a83 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -512,6 +512,8 @@ nullIf nullIn nullInIgnoreSet or +overlay +overlayUTF8 parseDateTime parseDateTime32BestEffort parseDateTime32BestEffortOrNull From 63e586d17af1e8a92fc4d2e8af71f1dba4996fea Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 09:57:56 +0200 Subject: [PATCH 048/363] Adjust tests --- tests/config/users.d/limits.yaml | 6 +-- ..._shard_external_sort_distributed.reference | 40 +++++++++---------- .../00111_shard_external_sort_distributed.sql | 2 +- ...00375_shard_group_uniq_array_of_string.sql | 2 +- ...76_shard_group_uniq_array_of_int_array.sql | 2 +- ...shard_group_uniq_array_of_string_array.sql | 2 +- .../00600_replace_running_query.sh | 4 +- .../00601_kill_running_query.reference | 2 +- .../00906_low_cardinality_cache.sql | 3 +- .../01091_query_profiler_does_not_hang.sql | 2 +- .../0_stateless/01293_show_settings.reference | 1 + .../0_stateless/01485_256_bit_multiply.sql | 2 + .../01603_read_with_backoff_bug.sql | 1 + .../01961_roaring_memory_tracking.sql | 2 +- .../02003_memory_limit_in_client.sh | 6 +-- .../02161_addressToLineWithInlines.sql | 2 +- .../02226_analyzer_or_like_combine.sql | 2 + .../02234_cast_to_ip_address.reference | 8 ++-- .../02343_aggregation_pipeline.reference | 6 +-- .../02353_simdjson_buffer_overflow.sql | 1 + .../0_stateless/02372_now_in_block.sql | 1 + .../02536_delta_gorilla_corruption.sql | 2 +- 22 files changed, 54 insertions(+), 45 deletions(-) diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml index 4f3f439a997..1c9fff9f4c8 100644 --- a/tests/config/users.d/limits.yaml +++ b/tests/config/users.d/limits.yaml @@ -27,9 +27,9 @@ profiles: max_execution_speed: 100G max_execution_speed_bytes: 10T max_estimated_execution_time: 600 - max_columns_to_read: 10K - max_temporary_columns: 10K - max_temporary_non_const_columns: 10K + max_columns_to_read: 20K + max_temporary_columns: 20K + max_temporary_non_const_columns: 20K max_sessions_for_user: 1K max_rows_in_set: 10G max_bytes_in_set: 10G diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference b/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference index df5aa77af60..7534c12a0d8 100644 --- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference +++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference @@ -1,20 +1,20 @@ -7040546 -7040546 -4327029 -4327029 -1613512 -1613512 -8947307 -8947307 -6233790 -6233790 -3520273 -3520273 -806756 -806756 -8140551 -8140551 -5427034 -5427034 -2713517 -2713517 +4437158 +4437158 +1723641 +1723641 +3630402 +3630402 +916885 +916885 +2823646 +2823646 +110129 +110129 +4730407 +4730407 +2016890 +2016890 +3923651 +3923651 +1210134 +1210134 diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql index 88a05f59111..ef9c0f9f9d0 100644 --- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql +++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql @@ -6,6 +6,6 @@ SET max_bytes_before_external_sort = 10000000; DROP TABLE IF EXISTS numbers10m; CREATE VIEW numbers10m AS SELECT number FROM system.numbers LIMIT 5000000; -SELECT number FROM remote('127.0.0.{2,3}', currentDatabase(), numbers10m) ORDER BY number * 1234567890123456789 LIMIT 19999980, 20; +SELECT number FROM remote('127.0.0.{2,3}', currentDatabase(), numbers10m) ORDER BY number * 1234567890123456789 LIMIT 4999980, 20; DROP TABLE numbers10m; diff --git a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql index f32a64cd30f..445ffe66f64 100644 --- a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql +++ b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql @@ -7,7 +7,7 @@ INSERT INTO group_uniq_str SELECT 2 as id, toString(number % 100) as v FROM syst INSERT INTO group_uniq_str SELECT 5 as id, toString(number % 100) as v FROM system.numbers LIMIT 10000000; SELECT length(groupUniqArray(v)) FROM group_uniq_str GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M'; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '60M'; SELECT length(groupUniqArray(10)(v)) FROM group_uniq_str GROUP BY id ORDER BY id; SELECT length(groupUniqArray(10000)(v)) FROM group_uniq_str GROUP BY id ORDER BY id; diff --git a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql index 43066880102..7593e1e1580 100644 --- a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql +++ b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql @@ -6,7 +6,7 @@ CREATE TABLE group_uniq_arr_int ENGINE = Memory AS (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); SELECT length(groupUniqArray(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M'; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '55M'; SELECT length(groupUniqArray(10)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; SELECT length(groupUniqArray(100000)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; diff --git a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql index 1c4376ad577..8b48ee673f3 100644 --- a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql +++ b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql @@ -6,6 +6,6 @@ CREATE TABLE group_uniq_arr_str ENGINE = Memory AS (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); SELECT length(groupUniqArray(v)) FROM group_uniq_arr_str GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M'; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '55M'; DROP TABLE IF EXISTS group_uniq_arr_str; diff --git a/tests/queries/0_stateless/00600_replace_running_query.sh b/tests/queries/0_stateless/00600_replace_running_query.sh index e7022875086..8f21443d589 100755 --- a/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/tests/queries/0_stateless/00600_replace_running_query.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) TEST_PREFIX=$RANDOM ${CLICKHOUSE_CLIENT} -q "drop user if exists u_00600${TEST_PREFIX}" -${CLICKHOUSE_CLIENT} -q "create user u_00600${TEST_PREFIX} settings max_execution_time=60, readonly=1" +${CLICKHOUSE_CLIENT} -q "create user u_00600${TEST_PREFIX} settings max_execution_time=60, readonly=1, max_rows_to_read=0" ${CLICKHOUSE_CLIENT} -q "grant select on system.numbers to u_00600${TEST_PREFIX}" function wait_for_query_to_start() @@ -26,7 +26,7 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d # Wait for it to be replaced wait -${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --max_rows_to_read=0 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & +${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & wait_for_query_to_start '42' # Trying to run another query with the same query_id diff --git a/tests/queries/0_stateless/00601_kill_running_query.reference b/tests/queries/0_stateless/00601_kill_running_query.reference index 3917ff89482..7824d5804bc 100644 --- a/tests/queries/0_stateless/00601_kill_running_query.reference +++ b/tests/queries/0_stateless/00601_kill_running_query.reference @@ -1 +1 @@ -waiting test_00601_default default SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k) +waiting test_00601_default default SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k) SETTINGS max_rows_to_read = 0 diff --git a/tests/queries/0_stateless/00906_low_cardinality_cache.sql b/tests/queries/0_stateless/00906_low_cardinality_cache.sql index 15a53841761..efd96746dc4 100644 --- a/tests/queries/0_stateless/00906_low_cardinality_cache.sql +++ b/tests/queries/0_stateless/00906_low_cardinality_cache.sql @@ -1,5 +1,6 @@ +SET max_rows_to_read = '100M' drop table if exists lc_00906; create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -insert into lc_00906 select '0123456789' from numbers(100000000) SETTINGS max_rows_to_read = '100M'; +insert into lc_00906 select '0123456789' from numbers(100000000); select count(), b from lc_00906 group by b; drop table if exists lc_00906; diff --git a/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql b/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql index 21a84bdd691..45f1a00ae23 100644 --- a/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql +++ b/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql @@ -1,4 +1,4 @@ -- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug -SET query_profiler_cpu_time_period_ns = 1; +SET query_profiler_cpu_time_period_ns = 1, max_rows_to_read = 0; SELECT count() FROM numbers_mt(1000000000); diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference index 9d326f16a3b..8b383813c9f 100644 --- a/tests/queries/0_stateless/01293_show_settings.reference +++ b/tests/queries/0_stateless/01293_show_settings.reference @@ -6,5 +6,6 @@ external_storage_connect_timeout_sec UInt64 10 s3_connect_timeout_ms UInt64 1000 filesystem_prefetch_max_memory_usage UInt64 1073741824 max_memory_usage UInt64 5000000000 +max_memory_usage_for_user UInt64 10000000000 max_untracked_memory UInt64 1048576 memory_profiler_step UInt64 1048576 diff --git a/tests/queries/0_stateless/01485_256_bit_multiply.sql b/tests/queries/0_stateless/01485_256_bit_multiply.sql index 5c8c47c9127..18be2b11599 100644 --- a/tests/queries/0_stateless/01485_256_bit_multiply.sql +++ b/tests/queries/0_stateless/01485_256_bit_multiply.sql @@ -1,5 +1,7 @@ -- Tags: no-random-settings, no-asan, no-msan, no-tsan, no-ubsan, no-debug +SET max_rows_to_read = '100M' + select count() from ( select toInt128(number) * number x, toInt256(number) * number y from numbers_mt(100000000) where x != y diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql index ec14f637c01..b68d15a2200 100644 --- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql +++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql @@ -3,6 +3,7 @@ set enable_filesystem_cache=0; set enable_filesystem_cache_on_write_operations=0; +set max_rows_to_read = '30M'; drop table if exists t; create table t (x UInt64, s String) engine = MergeTree order by x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; diff --git a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql index 485c8192f69..79c722bd629 100644 --- a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql +++ b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql @@ -2,5 +2,5 @@ SET max_bytes_before_external_group_by = 0; -SET max_memory_usage = '100M'; +SET max_memory_usage = '100M', max_rows_to_read = '1B'; SELECT cityHash64(rand() % 1000) as n, groupBitmapState(number) FROM numbers_mt(200000000) GROUP BY n FORMAT Null; -- { serverError MEMORY_LIMIT_EXCEEDED } diff --git a/tests/queries/0_stateless/02003_memory_limit_in_client.sh b/tests/queries/0_stateless/02003_memory_limit_in_client.sh index 96028f4847a..94eba8f25be 100755 --- a/tests/queries/0_stateless/02003_memory_limit_in_client.sh +++ b/tests/queries/0_stateless/02003_memory_limit_in_client.sh @@ -4,11 +4,11 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --max_memory_usage_in_client=1 -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" +$CLICKHOUSE_CLIENT --max_result_bytes 0 --max_memory_usage_in_client=1 -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" $CLICKHOUSE_CLIENT --max_memory_usage_in_client=0 -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='5K' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='5k' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" +$CLICKHOUSE_CLIENT --max_result_bytes 0 --max_memory_usage_in_client='5K' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" +$CLICKHOUSE_CLIENT --max_result_bytes 0 --max_memory_usage_in_client='5k' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" $CLICKHOUSE_CLIENT --max_memory_usage_in_client='1M' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" $CLICKHOUSE_CLIENT --max_memory_usage_in_client='23G' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" $CLICKHOUSE_CLIENT --max_memory_usage_in_client='11T' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql index cf400ed34c5..d7ce133f38c 100644 --- a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql @@ -6,7 +6,7 @@ SELECT addressToLineWithInlines(1); -- { serverError FUNCTION_NOT_ALLOWED } SET allow_introspection_functions = 1; SET query_profiler_real_time_period_ns = 0; SET query_profiler_cpu_time_period_ns = 1000000; -SET log_queries = 1; +SET log_queries = 1, max_rows_to_read = 0; SELECT count() FROM numbers_mt(10000000000) SETTINGS log_comment='02161_test_case'; SET log_queries = 0; SET query_profiler_cpu_time_period_ns = 0; diff --git a/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql b/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql index fbebfc6d281..1cd6b8a4e4d 100644 --- a/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql +++ b/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql @@ -1,3 +1,5 @@ +SET allow_hyperscan = 1, max_hyperscan_regexp_length = 0, max_hyperscan_regexp_total_length = 0; + EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 0; EXPLAIN QUERY TREE run_passes=1 SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 0, allow_experimental_analyzer = 1; EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 1; diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.reference b/tests/queries/0_stateless/02234_cast_to_ip_address.reference index fa9c6bd0f94..3dd306477b9 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.reference +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.reference @@ -26,9 +26,9 @@ IPv4 functions IPv6 functions \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0 \N -\0\0\0\0\0\0\0\0\0\0\0\0 -\0\0\0\0\0\0\0\0\0\0\0\0 -\0\0\0\0\0\0\0\0\0\0\0\0 +\0\0\0\0\0\0\0\0\0\0��\0\0 +\0\0\0\0\0\0\0\0\0\0��\0\0 +\0\0\0\0\0\0\0\0\0\0��\0\0 -- :: \N @@ -37,7 +37,7 @@ IPv6 functions ::ffff:127.0.0.1 :: \N -100000000 +20000000 -- ::ffff:127.0.0.1 -- diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.reference b/tests/queries/0_stateless/02343_aggregation_pipeline.reference index bf61eb6da0a..eb013200a17 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.reference +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.reference @@ -1,6 +1,6 @@ -- { echoOn } -explain pipeline select * from (select * from numbers(1e8) group by number) group by number; +explain pipeline select * from (select * from numbers(1e8) group by number) group by number settings max_rows_to_read = 0; (Expression) ExpressionTransform × 16 (Aggregating) @@ -16,7 +16,7 @@ ExpressionTransform × 16 ExpressionTransform (ReadFromSystemNumbers) NumbersRange 0 → 1 -explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number; +explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number settings max_rows_to_read = 0; (Expression) ExpressionTransform × 16 (Aggregating) @@ -32,7 +32,7 @@ ExpressionTransform × 16 ExpressionTransform × 16 (ReadFromSystemNumbers) NumbersRange × 16 0 → 1 -explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number; +explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number settings max_rows_to_read = 0; (Expression) ExpressionTransform (Sorting) diff --git a/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql b/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql index b324f834053..e7c6c272102 100644 --- a/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql +++ b/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql @@ -2,5 +2,6 @@ SET max_execution_time = 3; SET timeout_overflow_mode = 'break'; +SET max_rows_to_read = 0, max_bytes_to_read = 0; SELECT count() FROM system.numbers_mt WHERE NOT ignore(JSONExtract('{' || repeat('"a":"b",', rand() % 10) || '"c":"d"}', 'a', 'String')) FORMAT Null; diff --git a/tests/queries/0_stateless/02372_now_in_block.sql b/tests/queries/0_stateless/02372_now_in_block.sql index aee4572ce8d..d0aec471801 100644 --- a/tests/queries/0_stateless/02372_now_in_block.sql +++ b/tests/queries/0_stateless/02372_now_in_block.sql @@ -1,3 +1,4 @@ +SET max_rows_to_read = 0, max_bytes_to_read = 0; SELECT count() FROM (SELECT DISTINCT nowInBlock(), nowInBlock('Pacific/Pitcairn') FROM system.numbers LIMIT 2); SELECT nowInBlock(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT nowInBlock(NULL) IS NULL; diff --git a/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql b/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql index a4e0965e329..3accc726d08 100644 --- a/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql +++ b/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql @@ -12,7 +12,7 @@ create table bug_delta_gorilla (value_bug UInt64 codec (Delta, Gorilla)) engine = MergeTree order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi' -as (select 0 from numbers(30000000)); +as (select 0 from numbers(20000000)); select count(*) from bug_delta_gorilla From c2238c57231fe86883eb9b9a7042a72d28d31eae Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 10:45:33 +0200 Subject: [PATCH 049/363] Fix tests --- .../0_stateless/00375_shard_group_uniq_array_of_string.sql | 2 +- .../00376_shard_group_uniq_array_of_int_array.sql | 4 +++- .../00377_shard_group_uniq_array_of_string_array.sql | 3 ++- tests/queries/0_stateless/00906_low_cardinality_cache.sql | 2 +- tests/queries/0_stateless/01485_256_bit_multiply.sql | 2 +- tests/queries/0_stateless/01961_roaring_memory_tracking.sql | 2 +- .../queries/0_stateless/02234_cast_to_ip_address.reference | 6 +++--- tests/queries/0_stateless/02234_cast_to_ip_address.sql | 2 +- 8 files changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql index 445ffe66f64..8db91904a6a 100644 --- a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql +++ b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql @@ -7,7 +7,7 @@ INSERT INTO group_uniq_str SELECT 2 as id, toString(number % 100) as v FROM syst INSERT INTO group_uniq_str SELECT 5 as id, toString(number % 100) as v FROM system.numbers LIMIT 10000000; SELECT length(groupUniqArray(v)) FROM group_uniq_str GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '60M'; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '100M'; SELECT length(groupUniqArray(10)(v)) FROM group_uniq_str GROUP BY id ORDER BY id; SELECT length(groupUniqArray(10000)(v)) FROM group_uniq_str GROUP BY id ORDER BY id; diff --git a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql index 7593e1e1580..24b7f1c30a6 100644 --- a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql +++ b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql @@ -1,12 +1,14 @@ -- Tags: shard +SET max_rows_to_read = '55M'; + DROP TABLE IF EXISTS group_uniq_arr_int; CREATE TABLE group_uniq_arr_int ENGINE = Memory AS SELECT g as id, if(c == 0, [v], if(c == 1, emptyArrayInt64(), [v, v])) as v FROM (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); SELECT length(groupUniqArray(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '55M'; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id; SELECT length(groupUniqArray(10)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; SELECT length(groupUniqArray(100000)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; diff --git a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql index 8b48ee673f3..180a6a04861 100644 --- a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql +++ b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql @@ -1,4 +1,5 @@ -- Tags: shard +SET max_rows_to_read = '55M'; DROP TABLE IF EXISTS group_uniq_arr_str; CREATE TABLE group_uniq_arr_str ENGINE = Memory AS @@ -6,6 +7,6 @@ CREATE TABLE group_uniq_arr_str ENGINE = Memory AS (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); SELECT length(groupUniqArray(v)) FROM group_uniq_arr_str GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '55M'; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id; DROP TABLE IF EXISTS group_uniq_arr_str; diff --git a/tests/queries/0_stateless/00906_low_cardinality_cache.sql b/tests/queries/0_stateless/00906_low_cardinality_cache.sql index efd96746dc4..9c1abe1b6df 100644 --- a/tests/queries/0_stateless/00906_low_cardinality_cache.sql +++ b/tests/queries/0_stateless/00906_low_cardinality_cache.sql @@ -1,4 +1,4 @@ -SET max_rows_to_read = '100M' +SET max_rows_to_read = '100M'; drop table if exists lc_00906; create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into lc_00906 select '0123456789' from numbers(100000000); diff --git a/tests/queries/0_stateless/01485_256_bit_multiply.sql b/tests/queries/0_stateless/01485_256_bit_multiply.sql index 18be2b11599..a4e99d51970 100644 --- a/tests/queries/0_stateless/01485_256_bit_multiply.sql +++ b/tests/queries/0_stateless/01485_256_bit_multiply.sql @@ -1,6 +1,6 @@ -- Tags: no-random-settings, no-asan, no-msan, no-tsan, no-ubsan, no-debug -SET max_rows_to_read = '100M' +SET max_rows_to_read = '100M'; select count() from ( diff --git a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql index 79c722bd629..22eb8e887f2 100644 --- a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql +++ b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql @@ -2,5 +2,5 @@ SET max_bytes_before_external_group_by = 0; -SET max_memory_usage = '100M', max_rows_to_read = '1B'; +SET max_memory_usage = '100M', max_rows_to_read = '1G'; SELECT cityHash64(rand() % 1000) as n, groupBitmapState(number) FROM numbers_mt(200000000) GROUP BY n FORMAT Null; -- { serverError MEMORY_LIMIT_EXCEEDED } diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.reference b/tests/queries/0_stateless/02234_cast_to_ip_address.reference index 3dd306477b9..b9f0a49ec4d 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.reference +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.reference @@ -26,9 +26,9 @@ IPv4 functions IPv6 functions \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0 \N -\0\0\0\0\0\0\0\0\0\0��\0\0 -\0\0\0\0\0\0\0\0\0\0��\0\0 -\0\0\0\0\0\0\0\0\0\0��\0\0 +\0\0\0\0\0\0\0\0\0\0\0\0 +\0\0\0\0\0\0\0\0\0\0\0\0 +\0\0\0\0\0\0\0\0\0\0\0\0 -- :: \N diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.sql b/tests/queries/0_stateless/02234_cast_to_ip_address.sql index 51e953da905..c851cfde927 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.sql +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.sql @@ -71,7 +71,7 @@ SELECT count() FROM numbers_mt(20000000) WHERE NOT ignore(toIPv6OrZero(randomStr SELECT '--'; -SELECT cast('test' , 'IPv6'); --{serverError CANNOT_PARSE_IPV6} +SELECT cast('test' , 'IPv6'); -- { serverError CANNOT_PARSE_IPV6 } SELECT cast('::ffff:127.0.0.1', 'IPv6'); SELECT '--'; From 2106d4769ac4fca6604dec3b66831aef4b12e943 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 10:53:28 +0200 Subject: [PATCH 050/363] Fix tests --- tests/config/users.d/limits.yaml | 1 - tests/queries/0_stateless/02346_additional_filters.sql | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml index 1c9fff9f4c8..23aaccf9298 100644 --- a/tests/config/users.d/limits.yaml +++ b/tests/config/users.d/limits.yaml @@ -30,7 +30,6 @@ profiles: max_columns_to_read: 20K max_temporary_columns: 20K max_temporary_non_const_columns: 20K - max_sessions_for_user: 1K max_rows_in_set: 10G max_bytes_in_set: 10G max_rows_in_join: 10G diff --git a/tests/queries/0_stateless/02346_additional_filters.sql b/tests/queries/0_stateless/02346_additional_filters.sql index f6b665713ec..5a799e1c8c1 100644 --- a/tests/queries/0_stateless/02346_additional_filters.sql +++ b/tests/queries/0_stateless/02346_additional_filters.sql @@ -4,6 +4,8 @@ drop table if exists table_2; drop table if exists v_numbers; drop table if exists mv_table; +SET max_rows_to_read = 0; + create table table_1 (x UInt32, y String) engine = MergeTree order by x; insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); From 878a340317863e94aec61476e105342aef997c7b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 11:47:07 +0200 Subject: [PATCH 051/363] Fix tests --- docker/test/stateful/run.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 304bfd7b533..5532df40fdf 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -200,7 +200,8 @@ else clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" fi clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" + # AWS S3 is very inefficient, so increase memory even further: + clickhouse-client --max_memory_usage 20G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" fi clickhouse-client --query "SHOW TABLES FROM test" From 036485a657a35d764ad33f912d0d10b37d05e59b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 12:15:48 +0200 Subject: [PATCH 052/363] Fix error --- docker/test/stateful/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 5532df40fdf..bd2e38ff00f 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -201,7 +201,7 @@ else fi clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" # AWS S3 is very inefficient, so increase memory even further: - clickhouse-client --max_memory_usage 20G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" + clickhouse-client --max_memory_usage 20G --max_memory_usage_for_user 20G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" fi clickhouse-client --query "SHOW TABLES FROM test" From e2c78844a005aa5c04e454df6ff7e0508c967d18 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 14:53:09 +0200 Subject: [PATCH 053/363] Fix tests --- docker/test/stateful/run.sh | 2 +- tests/queries/0_stateless/02177_issue_31009.sql | 2 ++ .../00088_global_in_one_shard_and_rows_before_limit.sql | 2 +- tests/queries/1_stateful/00147_global_in_aggregate_function.sql | 2 +- tests/queries/1_stateful/00167_read_bytes_from_fs.sql | 2 +- .../queries/1_stateful/00182_simple_squashing_transform_bug.sql | 2 +- 6 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index bd2e38ff00f..fde8b8ae529 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -201,7 +201,7 @@ else fi clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" # AWS S3 is very inefficient, so increase memory even further: - clickhouse-client --max_memory_usage 20G --max_memory_usage_for_user 20G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" + clickhouse-client --max_memory_usage 30G --max_memory_usage_for_user 30G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" fi clickhouse-client --query "SHOW TABLES FROM test" diff --git a/tests/queries/0_stateless/02177_issue_31009.sql b/tests/queries/0_stateless/02177_issue_31009.sql index f25df59f4b4..5c62b5a9c2f 100644 --- a/tests/queries/0_stateless/02177_issue_31009.sql +++ b/tests/queries/0_stateless/02177_issue_31009.sql @@ -8,6 +8,8 @@ DROP TABLE IF EXISTS right; CREATE TABLE left ( key UInt32, value String ) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE right ( key UInt32, value String ) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +SET max_rows_to_read = '50M'; + INSERT INTO left SELECT number, toString(number) FROM numbers(25367182); INSERT INTO right SELECT number, toString(number) FROM numbers(23124707); diff --git a/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql b/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql index 443808e7bed..8f18f3740e4 100644 --- a/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql +++ b/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql @@ -1,4 +1,4 @@ -- Tags: shard -SET output_format_write_statistics = 0, max_rows_to_read = 20_000_000; +SET output_format_write_statistics = 0, max_rows_to_read = 50_000_000; SELECT EventDate, count() FROM remote('127.0.0.1', test.hits) WHERE UserID GLOBAL IN (SELECT UserID FROM test.hits) GROUP BY EventDate ORDER BY EventDate LIMIT 5 FORMAT JSONCompact; diff --git a/tests/queries/1_stateful/00147_global_in_aggregate_function.sql b/tests/queries/1_stateful/00147_global_in_aggregate_function.sql index c156f073573..f0b249e9af4 100644 --- a/tests/queries/1_stateful/00147_global_in_aggregate_function.sql +++ b/tests/queries/1_stateful/00147_global_in_aggregate_function.sql @@ -1,5 +1,5 @@ -- Tags: global -SET max_rows_to_read = 40_000_000; +SET max_rows_to_read = 100_000_000; SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits); SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits); diff --git a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql index 1a98a531067..184a8edcbcb 100644 --- a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql +++ b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql @@ -1,6 +1,6 @@ -- Tags: no-random-settings -SET max_memory_usage = '10G' +SET max_memory_usage = '10G'; SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40; -- We had a bug which lead to additional compressed data read. test.hits compressed size is about 1.2Gb, but we read more then 3Gb. diff --git a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql index 85bad651090..26e112cff04 100644 --- a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql +++ b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql @@ -1,7 +1,7 @@ -- Tags: global set allow_prefetched_read_pool_for_remote_filesystem=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0, max_threads=2, max_block_size=65387; -set max_rows_to_read = '20M'; +set max_rows_to_read = '100M'; SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits); SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits); From c837541a7783f14780a7d2535dd6fa2cbf5effd5 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 25 Jul 2024 10:11:53 +0800 Subject: [PATCH 054/363] fix style --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 943caf918d6..fa26cc0ff1f 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2166,6 +2166,7 @@ outfile overcommit overcommitted overfitting +overlayUTF overparallelization packetpool packetsize From 6725546b312ef52675f8fbd5f41d0ef5327a3e8a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 25 Jul 2024 19:35:21 +0200 Subject: [PATCH 055/363] Update some tests --- .../02884_parallel_window_functions.sql | 18 ++++++++++-------- .../03143_asof_join_ddb_long.reference | 4 ++-- .../0_stateless/03143_asof_join_ddb_long.sql | 4 ++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.sql b/tests/queries/0_stateless/02884_parallel_window_functions.sql index c5ab013a198..ea1cd458c65 100644 --- a/tests/queries/0_stateless/02884_parallel_window_functions.sql +++ b/tests/queries/0_stateless/02884_parallel_window_functions.sql @@ -1,6 +1,6 @@ -- Tags: long, no-tsan, no-asan, no-ubsan, no-msan, no-debug -CREATE TABLE window_funtion_threading +CREATE TABLE window_function_threading Engine = MergeTree ORDER BY (ac, nw) AS SELECT @@ -20,7 +20,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading GROUP BY ac, nw ) GROUP BY nw @@ -40,7 +40,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading GROUP BY ac, nw ) GROUP BY nw @@ -58,7 +58,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading GROUP BY ac, nw ) GROUP BY nw @@ -66,6 +66,8 @@ ORDER BY nw ASC, R DESC LIMIT 10 SETTINGS max_threads = 1; +SET max_rows_to_read = 30000000; + SELECT nw, sum(WR) AS R, @@ -77,7 +79,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 0 GROUP BY ac, @@ -88,7 +90,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 1 GROUP BY ac, @@ -99,7 +101,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 2 GROUP BY ac, @@ -110,7 +112,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 3 GROUP BY ac, diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.reference b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference index 2850a8aba98..ae7f7c805f2 100644 --- a/tests/queries/0_stateless/03143_asof_join_ddb_long.reference +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference @@ -1,2 +1,2 @@ -49999983751397 10000032 -49999983751397 10000032 +7999995751397 4000032 +7999995751397 4000032 diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql index 17a67511030..a635fd2e86a 100644 --- a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql @@ -11,7 +11,7 @@ AS toDateTime('1990-03-21 13:00:00') + INTERVAL number MINUTE AS begin, number % 4 AS key, number AS value - FROM numbers(0, 10000000); + FROM numbers(0, 4000000); CREATE TABLE skewed_probe ENGINE = MergeTree ORDER BY (key, begin) AS @@ -33,7 +33,7 @@ AS SELECT toDateTime('1990-03-21 13:00:01') + INTERVAL number MINUTE AS begin, 3 AS key - FROM numbers(0, 10000000); + FROM numbers(0, 4000000); SELECT SUM(value), COUNT(*) From a7441669aa87b1551d2211ec4c3e550aaaa86b41 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 25 Jul 2024 21:43:03 +0200 Subject: [PATCH 056/363] Update some tests --- ...675_profile_events_from_query_log_and_client.sh | 2 +- .../02884_parallel_window_functions.sql | 2 +- ...967_parallel_replicas_join_algo_and_analyzer.sh | 14 +++++++------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh index e346d9893a7..1cf65ed8120 100755 --- a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh +++ b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo "INSERT TO S3" $CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -nq " INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/profile_events.csv', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(1000000) SETTINGS s3_max_single_part_upload_size = 10, s3_truncate_on_insert = 1; -" 2>&1 | grep -o -e '\ \[\ .*\ \]\ S3.*:\ .*\ ' | grep -v 'Microseconds' | grep -v 'S3DiskConnections' | grep -v 'S3DiskAddresses' | sort +" 2>&1 | grep -o -e '\ \[\ .*\ \]\ S3.*:\ .*\ ' | grep -v 'Microseconds' | grep -v 'S3DiskConnections' | grep -v 'S3DiskAddresses' | grep -v 'RequestThrottlerCount' | sort echo "CHECK WITH query_log" $CLICKHOUSE_CLIENT -nq " diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.sql b/tests/queries/0_stateless/02884_parallel_window_functions.sql index ea1cd458c65..2207c90a4ee 100644 --- a/tests/queries/0_stateless/02884_parallel_window_functions.sql +++ b/tests/queries/0_stateless/02884_parallel_window_functions.sql @@ -66,7 +66,7 @@ ORDER BY nw ASC, R DESC LIMIT 10 SETTINGS max_threads = 1; -SET max_rows_to_read = 30000000; +SET max_rows_to_read = 40000000; SELECT nw, diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh index 2840482da6d..8cefa873940 100755 --- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh @@ -90,7 +90,7 @@ $CLICKHOUSE_CLIENT -q " select * from (select key, value from num_1) l inner join (select key, value from num_2) r on l.key = r.key order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', +SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -128,7 +128,7 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', +SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -154,7 +154,7 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=0) r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', +SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -179,7 +179,7 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', +SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -205,7 +205,7 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', +SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -230,7 +230,7 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='hash') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', +SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -255,7 +255,7 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', +SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | From ba5a07bcc7b78673e58d0501e85b59e929215bac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 25 Jul 2024 21:47:41 +0200 Subject: [PATCH 057/363] Better tests --- .../01301_aggregate_state_exception_memory_leak.reference | 2 +- .../0_stateless/01301_aggregate_state_exception_memory_leak.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference index b20e7415f52..6282bf366d0 100644 --- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference +++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference @@ -1,2 +1,2 @@ -Memory limit (for query) exceeded +Memory limit exceeded Ok diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh index 9dd800ceb09..266518d11d4 100755 --- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh @@ -16,5 +16,5 @@ for _ in {1..1000}; do if [[ $elapsed -gt 30 ]]; then break fi -done 2>&1 | grep -o -F 'Memory limit (for query) exceeded' | uniq +done 2>&1 | grep -o -P 'Memory limit .+ exceeded' | sed -r -e 's/(Memory limit)(.+)( exceeded)/\1\3/' | uniq echo 'Ok' From f81e8aa345d64f5fbcae103f92cdc649f0d82d24 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 25 Jul 2024 22:08:32 +0200 Subject: [PATCH 058/363] Update tests --- .../queries/0_stateless/01010_pmj_right_table_memory_limits.sql | 2 ++ tests/queries/0_stateless/01304_direct_io_long.sh | 2 +- .../01730_distributed_group_by_no_merge_order_by_long.sql | 2 +- tests/queries/0_stateless/02151_lc_prefetch.sql | 1 + .../queries/0_stateless/02344_insert_profile_events_stress.sql | 1 + ...02354_distributed_with_external_aggregation_memory_usage.sql | 2 ++ .../02481_parquet_list_monotonically_increasing_offsets.sh | 2 +- tests/queries/0_stateless/02497_remote_disk_fat_column.sql | 2 +- .../02896_max_execution_time_with_break_overflow_mode.sql | 2 +- tests/queries/0_stateless/03143_asof_join_ddb_long.sql | 1 + 10 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql index a090be85221..b8f2596f3d5 100644 --- a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql +++ b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql @@ -1,5 +1,7 @@ -- Tags: no-parallel, no-fasttest, no-random-settings +SET max_bytes_in_join = 0; +SET max_rows_in_join = 0; SET max_memory_usage = 32000000; SET join_on_disk_max_files_to_merge = 4; diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh index a66239058ab..35d1440bcb5 100755 --- a/tests/queries/0_stateless/01304_direct_io_long.sh +++ b/tests/queries/0_stateless/01304_direct_io_long.sh @@ -12,7 +12,7 @@ $CLICKHOUSE_CLIENT --max_rows_to_read 50M --multiquery " INSERT INTO bug SELECT rand64(), '2020-06-07' FROM numbers(50000000); OPTIMIZE TABLE bug FINAL;" LOG="$CLICKHOUSE_TMP/err-$CLICKHOUSE_DATABASE" -$CLICKHOUSE_BENCHMARK --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>"$LOG" +$CLICKHOUSE_BENCHMARK --max_rows_to_read 51M --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>"$LOG" cat "$LOG" | grep Exception cat "$LOG" | grep Loaded diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index 6625ad916e8..6172afbc699 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -12,7 +12,7 @@ select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by n -- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block, -- so the initiator will first receive all blocks from remotes and only after start merging, -- and will hit the memory limit. -select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296; -- { serverError MEMORY_LIMIT_EXCEEDED } +select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296, max_rows_to_read=0; -- { serverError MEMORY_LIMIT_EXCEEDED } -- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently, -- since they don't need to wait until the aggregation will be finished, diff --git a/tests/queries/0_stateless/02151_lc_prefetch.sql b/tests/queries/0_stateless/02151_lc_prefetch.sql index c2b97231145..f8c76038120 100644 --- a/tests/queries/0_stateless/02151_lc_prefetch.sql +++ b/tests/queries/0_stateless/02151_lc_prefetch.sql @@ -3,5 +3,6 @@ drop table if exists tab_lc; CREATE TABLE tab_lc (x UInt64, y LowCardinality(String)) engine = MergeTree order by x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into tab_lc select number, toString(number % 10) from numbers(20000000); optimize table tab_lc; +SET max_rows_to_read = '21M'; select count() from tab_lc where y == '0' settings local_filesystem_read_prefetch=1; drop table if exists tab_lc; diff --git a/tests/queries/0_stateless/02344_insert_profile_events_stress.sql b/tests/queries/0_stateless/02344_insert_profile_events_stress.sql index e9a790bea5d..902e1da543c 100644 --- a/tests/queries/0_stateless/02344_insert_profile_events_stress.sql +++ b/tests/queries/0_stateless/02344_insert_profile_events_stress.sql @@ -1,4 +1,5 @@ -- Tags: no-parallel, long, no-debug, no-tsan, no-msan, no-asan +SET max_rows_to_read = 0; create table data_02344 (key Int) engine=Null; -- 3e9 rows is enough to fill the socket buffer and cause INSERT hung. diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index 105fb500461..82eb4c93e3d 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -1,5 +1,7 @@ -- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-object-storage +SET max_rows_to_read = '51M'; + DROP TABLE IF EXISTS t_2354_dist_with_external_aggr; create table t_2354_dist_with_external_aggr(a UInt64, b String, c FixedString(100)) engine = MergeTree order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; diff --git a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh index 55e6ac2f758..3e28e76d6da 100755 --- a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh +++ b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh @@ -11,7 +11,7 @@ echo "Parquet" DATA_FILE=$CUR_DIR/data_parquet/list_monotonically_increasing_offsets.parquet ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (list Array(Int64), json Nullable(String)) ENGINE = Memory" -cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO parquet_load FORMAT Parquet" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" | md5sum ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM parquet_load" ${CLICKHOUSE_CLIENT} --query="drop table parquet_load" diff --git a/tests/queries/0_stateless/02497_remote_disk_fat_column.sql b/tests/queries/0_stateless/02497_remote_disk_fat_column.sql index d97109b66f3..65519296602 100644 --- a/tests/queries/0_stateless/02497_remote_disk_fat_column.sql +++ b/tests/queries/0_stateless/02497_remote_disk_fat_column.sql @@ -2,7 +2,7 @@ set allow_suspicious_fixed_string_types=1; create table fat_granularity (x UInt32, fat FixedString(160000)) engine = MergeTree order by x settings storage_policy = 's3_cache'; -insert into fat_granularity select number, toString(number) || '_' from numbers(100000) settings max_block_size = 8192, max_insert_threads=8; +insert into fat_granularity select number, toString(number) || '_' from numbers(100000) settings max_block_size = 3000, max_insert_threads = 8, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; -- Too large sizes of FixedString to deserialize select x from fat_granularity prewhere fat like '256\_%' settings max_threads=2; diff --git a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql index 3e131cad0f0..ecaad62b35a 100644 --- a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql +++ b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql @@ -1,6 +1,6 @@ -- Tags: no-fasttest -SET max_rows_to_read = 0; +SET max_rows_to_read = 0, max_execution_time = 0, max_estimated_execution_time = 0; -- Query stops after timeout without an error SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, max_execution_time=2, timeout_overflow_mode='break' FORMAT Null; diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql index a635fd2e86a..18d98dbdfe4 100644 --- a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql @@ -35,6 +35,7 @@ AS 3 AS key FROM numbers(0, 4000000); +SET max_rows_to_read = 0; SELECT SUM(value), COUNT(*) FROM skewed_probe From 9c7078bcf7edfc79dbea2cf6e065aa594810ccaf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 02:54:11 +0200 Subject: [PATCH 059/363] Update tests --- tests/queries/0_stateless/00974_query_profiler.sql | 1 + ...0_distributed_group_by_no_merge_order_by_long.sql | 3 ++- .../0_stateless/02122_join_group_by_timeout.sh | 8 ++++---- ...ibuted_with_external_aggregation_memory_usage.sql | 2 +- ..._parquet_list_monotonically_increasing_offsets.sh | 2 +- .../02884_parallel_window_functions.reference | 12 ++++++------ 6 files changed, 15 insertions(+), 13 deletions(-) diff --git a/tests/queries/0_stateless/00974_query_profiler.sql b/tests/queries/0_stateless/00974_query_profiler.sql index 24e4241b813..71ea14c3d64 100644 --- a/tests/queries/0_stateless/00974_query_profiler.sql +++ b/tests/queries/0_stateless/00974_query_profiler.sql @@ -15,6 +15,7 @@ SELECT count() > 0 FROM system.trace_log t WHERE query_id = (SELECT query_id FRO SET query_profiler_real_time_period_ns = 0; SET query_profiler_cpu_time_period_ns = 1000000; SET log_queries = 1; +SET max_rows_to_read = 0; SELECT count(), ignore('test cpu time query profiler') FROM numbers_mt(10000000000); SET log_queries = 0; SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index 6172afbc699..e980f367de7 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -12,7 +12,8 @@ select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by n -- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block, -- so the initiator will first receive all blocks from remotes and only after start merging, -- and will hit the memory limit. -select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296, max_rows_to_read=0; -- { serverError MEMORY_LIMIT_EXCEEDED } +SET max_rows_to_read = 0; +select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296; -- { serverError MEMORY_LIMIT_EXCEEDED } -- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently, -- since they don't need to wait until the aggregation will be finished, diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.sh b/tests/queries/0_stateless/02122_join_group_by_timeout.sh index 59719f75d7c..79c4f01c98a 100755 --- a/tests/queries/0_stateless/02122_join_group_by_timeout.sh +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.sh @@ -14,7 +14,7 @@ MAX_PROCESS_WAIT=5 # TCP CLIENT: As of today (02/12/21) uses PullingAsyncPipelineExecutor ### Should be cancelled after 1 second and return a 159 exception (timeout) -timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ +timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_result_rows 0 --max_result_bytes 0 --max_execution_time 1 -q \ "SELECT * FROM ( SELECT a.name as n @@ -31,7 +31,7 @@ timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq ### Should stop pulling data and return what has been generated already (return code 0) -timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT -q \ +timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_result_rows 0 --max_result_bytes 0 -q \ "SELECT a.name as n FROM ( @@ -48,7 +48,7 @@ echo $? # HTTP CLIENT: As of today (02/12/21) uses PullingPipelineExecutor ### Should be cancelled after 1 second and return a 159 exception (timeout) -${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_execution_time=1" -d \ +${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_result_rows=0&max_result_bytes=0&max_execution_time=1" -d \ "SELECT * FROM ( SELECT a.name as n @@ -66,7 +66,7 @@ ${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_exec ### Should stop pulling data and return what has been generated already (return code 0) -${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL" -d \ +${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_result_rows=0&max_result_bytes=0" -d \ "SELECT a.name as n FROM ( diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index 82eb4c93e3d..5eea6f149b5 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -1,6 +1,6 @@ -- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-object-storage -SET max_rows_to_read = '51M'; +SET max_rows_to_read = '101M'; DROP TABLE IF EXISTS t_2354_dist_with_external_aggr; diff --git a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh index 3e28e76d6da..2f512697868 100755 --- a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh +++ b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh @@ -12,6 +12,6 @@ DATA_FILE=$CUR_DIR/data_parquet/list_monotonically_increasing_offsets.parquet ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (list Array(Int64), json Nullable(String)) ENGINE = Memory" cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO parquet_load FORMAT Parquet" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" | md5sum +${CLICKHOUSE_CLIENT} --max_result_rows 0 --max_result_bytes 0 --query="SELECT * FROM parquet_load" | md5sum ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM parquet_load" ${CLICKHOUSE_CLIENT} --query="drop table parquet_load" diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.reference b/tests/queries/0_stateless/02884_parallel_window_functions.reference index bac15838dc2..a2cc96dda74 100644 --- a/tests/queries/0_stateless/02884_parallel_window_functions.reference +++ b/tests/queries/0_stateless/02884_parallel_window_functions.reference @@ -12,7 +12,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading GROUP BY ac, nw ) GROUP BY nw @@ -32,7 +32,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading GROUP BY ac, nw ) GROUP BY nw @@ -53,7 +53,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 0 GROUP BY ac, @@ -64,7 +64,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 1 GROUP BY ac, @@ -75,7 +75,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 2 GROUP BY ac, @@ -86,7 +86,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 3 GROUP BY ac, From a03f12fe76969bc2002bc76ce04f7097cb02295b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 14:17:17 +0200 Subject: [PATCH 060/363] Fix some tests --- ...2354_distributed_with_external_aggregation_memory_usage.sql | 2 +- .../0_stateless/02884_parallel_window_functions.reference | 1 + tests/queries/0_stateless/replication.lib | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index 5eea6f149b5..f9da5b3a73c 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -25,6 +25,6 @@ select a, b, c, sum(a) as s from remote('127.0.0.{2,3}', currentDatabase(), t_2354_dist_with_external_aggr) group by a, b, c format Null -settings max_memory_usage = '5Gi'; +settings max_memory_usage = '5Gi', max_result_rows = 0, max_result_bytes = 0; DROP TABLE t_2354_dist_with_external_aggr; diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.reference b/tests/queries/0_stateless/02884_parallel_window_functions.reference index a2cc96dda74..1f5346a1484 100644 --- a/tests/queries/0_stateless/02884_parallel_window_functions.reference +++ b/tests/queries/0_stateless/02884_parallel_window_functions.reference @@ -42,6 +42,7 @@ SETTINGS max_threads = 1; 0 2 0 1 2 0 2 2 0 +SET max_rows_to_read = 40000000; SELECT nw, sum(WR) AS R, diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 05651531fba..dcac721859e 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -114,7 +114,8 @@ function check_replication_consistency() # it's important to disable prefer warmed unmerged parts because # otherwise it can read non-syncrhonized state of replicas - res=$($CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 -q \ + # also, disable the limit that is set for tests globally + res=$($CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 --max_rows_to_read=0 -q \ "SELECT if((countDistinct(data) as c) == 0, 1, c) FROM From 7f4eb59c42aa4f432c99fa7e4f8240056fa26b91 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 14:33:40 +0200 Subject: [PATCH 061/363] Fix some tests --- .../01730_distributed_group_by_no_merge_order_by_long.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index e980f367de7..805e0b4fedb 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -12,7 +12,7 @@ select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by n -- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block, -- so the initiator will first receive all blocks from remotes and only after start merging, -- and will hit the memory limit. -SET max_rows_to_read = 0; +SET max_rows_to_read = 0, max_result_rows = 0; select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296; -- { serverError MEMORY_LIMIT_EXCEEDED } -- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently, From 80b925ec75983ea558be536d657bfd2d277f1fbc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 14:47:58 +0200 Subject: [PATCH 062/363] Update test --- ...arallel_replicas_join_algo_and_analyzer.sh | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh index 8cefa873940..2c5b5a2a07b 100755 --- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh @@ -86,11 +86,11 @@ SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2) r on l.key = r.key order by l.key limit 10 offset 700000 -SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', +SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -123,12 +123,12 @@ SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', +SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -149,12 +149,12 @@ SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=0) r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, send_logs_level='trace', +SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -174,12 +174,12 @@ SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, send_logs_level='trace', +SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -200,12 +200,12 @@ SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', +SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -225,12 +225,12 @@ SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='hash') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', +SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -250,12 +250,12 @@ SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', +SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | From 342bbb53c65d32698d55649252e6bc29f2a557b8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 14:49:36 +0200 Subject: [PATCH 063/363] It was an optimization, not a limit --- tests/config/users.d/limits.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml index 23aaccf9298..63c4e884a9d 100644 --- a/tests/config/users.d/limits.yaml +++ b/tests/config/users.d/limits.yaml @@ -34,7 +34,6 @@ profiles: max_bytes_in_set: 10G max_rows_in_join: 10G max_bytes_in_join: 10G - max_rows_in_set_to_optimize_join: 1G max_rows_to_transfer: 1G max_bytes_to_transfer: 1G max_rows_in_distinct: 10G From fd5934d0ad8954a263554ce48402849deafa5341 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Jul 2024 01:28:38 +0200 Subject: [PATCH 064/363] Update test --- .../01730_distributed_group_by_no_merge_order_by_long.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index 805e0b4fedb..6eb55839f5e 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -1,6 +1,7 @@ -- Tags: long, distributed, no-random-settings drop table if exists data_01730; +SET max_rows_to_read = 0, max_result_rows = 0; -- does not use 127.1 due to prefer_localhost_replica @@ -12,7 +13,6 @@ select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by n -- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block, -- so the initiator will first receive all blocks from remotes and only after start merging, -- and will hit the memory limit. -SET max_rows_to_read = 0, max_result_rows = 0; select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296; -- { serverError MEMORY_LIMIT_EXCEEDED } -- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently, From 16e84d1e3678b56d0b32dff1377b6daadf870dd2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Jul 2024 02:37:54 +0200 Subject: [PATCH 065/363] Update test --- tests/queries/0_stateless/01651_lc_insert_tiny_log.sql | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql index d11c9120c61..bc5553ad227 100644 --- a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql @@ -1,7 +1,7 @@ -set allow_suspicious_low_cardinality_types=1; +set allow_suspicious_low_cardinality_types = 1, max_rows_to_read = '21M'; drop table if exists perf_lc_num; -CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = TinyLog; +CREATE TABLE perf_lc_num( num UInt8, arr Array(LowCardinality(Int64)) default [num] ) ENGINE = TinyLog; INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); @@ -16,7 +16,7 @@ select sum(length(arr)), sum(num) from perf_lc_num; drop table if exists perf_lc_num; -CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = Log; +CREATE TABLE perf_lc_num( num UInt8, arr Array(LowCardinality(Int64)) default [num] ) ENGINE = Log; INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); @@ -31,7 +31,7 @@ select sum(length(arr)), sum(num) from perf_lc_num; drop table if exists perf_lc_num; -CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = StripeLog; +CREATE TABLE perf_lc_num( num UInt8, arr Array(LowCardinality(Int64)) default [num] ) ENGINE = StripeLog; INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); @@ -44,5 +44,3 @@ select sum(length(arr)) from perf_lc_num; select sum(length(arr)), sum(num) from perf_lc_num; drop table if exists perf_lc_num; - - From 59ce7ec1c6f478b3ac700a612fea46e7fc12756b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Jul 2024 04:26:46 +0200 Subject: [PATCH 066/363] Update 01651_lc_insert_tiny_log.sql --- tests/queries/0_stateless/01651_lc_insert_tiny_log.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql index bc5553ad227..b1d6a39d5c9 100644 --- a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql @@ -1,4 +1,4 @@ -set allow_suspicious_low_cardinality_types = 1, max_rows_to_read = '21M'; +set allow_suspicious_low_cardinality_types = 1, max_rows_to_read = '31M'; drop table if exists perf_lc_num; CREATE TABLE perf_lc_num( num UInt8, arr Array(LowCardinality(Int64)) default [num] ) ENGINE = TinyLog; From 9a6de84559cdf927e8747e4cd536676fd3b1c513 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Jul 2024 10:39:55 +0200 Subject: [PATCH 067/363] Update tests --- .../00086_concat_nary_const_with_nonconst_segfault.sql | 2 +- .../01730_distributed_group_by_no_merge_order_by_long.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql b/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql index 2f0ef648983..4b87b2af28d 100644 --- a/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql +++ b/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql @@ -1 +1 @@ -SELECT extract(toString(number), '10000000') FROM system.numbers_mt WHERE concat(materialize('1'), '...', toString(number)) LIKE '%10000000%' LIMIT 1 +SELECT extract(toString(number), '10000000') FROM system.numbers_mt WHERE concat(materialize('1'), '...', toString(number)) LIKE '%10000000%' LIMIT 1 SETTINGS max_rows_to_read = 0; diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index 6eb55839f5e..83a26c83005 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -1,7 +1,7 @@ -- Tags: long, distributed, no-random-settings drop table if exists data_01730; -SET max_rows_to_read = 0, max_result_rows = 0; +SET max_rows_to_read = 0, max_result_rows = 0, max_bytes_before_external_group_by = 0; -- does not use 127.1 due to prefer_localhost_replica From 3db505a1327fc5bf96c93f2a510436402be13f3b Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 27 Jul 2024 12:53:38 +0200 Subject: [PATCH 068/363] Update SettingsChangesHistory.cpp --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 1dda9e72084..dc3bf984cc6 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -67,6 +67,7 @@ static std::initializer_list Date: Sat, 27 Jul 2024 16:10:15 +0200 Subject: [PATCH 069/363] Better limits --- tests/config/users.d/limits.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml index 63c4e884a9d..46cff73142c 100644 --- a/tests/config/users.d/limits.yaml +++ b/tests/config/users.d/limits.yaml @@ -38,7 +38,7 @@ profiles: max_bytes_to_transfer: 1G max_rows_in_distinct: 10G max_bytes_in_distinct: 10G - max_memory_usage_for_user: 10G + max_memory_usage_for_user: 32G max_network_bandwidth: 100G max_network_bytes: 1T max_network_bandwidth_for_user: 100G From b1eaec0d49b326df09c46ea3107e98fa083c220d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Jul 2024 23:29:52 +0200 Subject: [PATCH 070/363] Update 01293_show_settings.reference --- tests/queries/0_stateless/01293_show_settings.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference index 8b383813c9f..c4c3473ee18 100644 --- a/tests/queries/0_stateless/01293_show_settings.reference +++ b/tests/queries/0_stateless/01293_show_settings.reference @@ -6,6 +6,6 @@ external_storage_connect_timeout_sec UInt64 10 s3_connect_timeout_ms UInt64 1000 filesystem_prefetch_max_memory_usage UInt64 1073741824 max_memory_usage UInt64 5000000000 -max_memory_usage_for_user UInt64 10000000000 +max_memory_usage_for_user UInt64 32000000000 max_untracked_memory UInt64 1048576 memory_profiler_step UInt64 1048576 From cd1350c0f380d2c2754231c12aefb891c299b3fb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Jul 2024 14:07:36 +0200 Subject: [PATCH 071/363] Mark some tests as long --- .../0_stateless/00111_shard_external_sort_distributed.sql | 2 +- .../00377_shard_group_uniq_array_of_string_array.sql | 2 +- tests/queries/0_stateless/00906_low_cardinality_cache.sql | 2 ++ tests/queries/0_stateless/01603_read_with_backoff_bug.sql | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql index ef9c0f9f9d0..93efc317bfa 100644 --- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql +++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql @@ -1,4 +1,4 @@ --- Tags: distributed +-- Tags: distributed, long SET max_memory_usage = 150000000; SET max_bytes_before_external_sort = 10000000; diff --git a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql index 180a6a04861..1ec91ac2396 100644 --- a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql +++ b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql @@ -1,4 +1,4 @@ --- Tags: shard +-- Tags: shard, long SET max_rows_to_read = '55M'; DROP TABLE IF EXISTS group_uniq_arr_str; diff --git a/tests/queries/0_stateless/00906_low_cardinality_cache.sql b/tests/queries/0_stateless/00906_low_cardinality_cache.sql index 9c1abe1b6df..337fba865fd 100644 --- a/tests/queries/0_stateless/00906_low_cardinality_cache.sql +++ b/tests/queries/0_stateless/00906_low_cardinality_cache.sql @@ -1,3 +1,5 @@ +-- Tags: long + SET max_rows_to_read = '100M'; drop table if exists lc_00906; create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql index b68d15a2200..85be5082d92 100644 --- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql +++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan +-- Tags: no-tsan, long -- Tag no-tsan: Too long for TSan set enable_filesystem_cache=0; From 1042fc68c2969ee4963268a97daf522d0e163ac5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Jul 2024 16:41:43 +0200 Subject: [PATCH 072/363] Update test --- .../queries/0_stateless/00632_get_sample_block_cache.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/00632_get_sample_block_cache.sql b/tests/queries/0_stateless/00632_get_sample_block_cache.sql index ae9b6bb7b2c..6a226c4912a 100644 --- a/tests/queries/0_stateless/00632_get_sample_block_cache.sql +++ b/tests/queries/0_stateless/00632_get_sample_block_cache.sql @@ -2,6 +2,9 @@ SET joined_subquery_requires_alias = 0; +-- We are no longer interested in the old analyzer. +SET allow_experimental_analyzer = 1; + -- This test (SELECT) without cache can take tens minutes DROP TABLE IF EXISTS dict_string; DROP TABLE IF EXISTS dict_ui64; @@ -41,8 +44,6 @@ SETTINGS index_granularity = 8192; CREATE TABLE dict_string (entityIri String) ENGINE = Memory; CREATE TABLE dict_ui64 (learnerId UInt64) ENGINE = Memory; ---SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average`, if (isNaN((`overall-full-watched-learners-count`/`overall-watchers-count`) * 100), 0, (`overall-full-watched-learners-count`/`overall-watchers-count`) * 100) as `overall-watched-part`, if (isNaN((`full-watched-learners-count`/`watchers-count` * 100)), 0, (`full-watched-learners-count`/`watchers-count` * 100)) as `full-watched-part`, if (isNaN((`rejects-count`/`views-count` * 100)), 0, (`rejects-count`/`views-count` * 100)) as `rejects-part` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average` FROM (SELECT `entityIri`, `watchers-count` FROM (SELECT `entityIri` FROM `CloM8CwMR2`) ANY LEFT JOIN (SELECT uniq(learnerId) as `watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewDurationSum) as `time-repeating-average`, `entityIri` FROM (SELECT sum(views.viewDuration) as viewDurationSum, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `views`.`repeatingView` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `reject-views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewsCount) as `repeating-views-count-average`, `entityIri` FROM (SELECT count() as viewsCount, `learnerId`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `courseId` = 1 AND `entityIri` IN `CloM8CwMR2` WHERE `views`.`repeatingView` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.watchedPart) as `watched-part-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `rejects-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(progressMax) as `progress-average`, `entityIri` FROM (SELECT max(views.progress) as progressMax, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedViews) as `views-count-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT any(duration) as `duration`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `full-watched-learners-count`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-full-watched-learners-count`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `views-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedTime) as `time-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) FORMAT JSON; - SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average`, if (isNaN((`overall-full-watched-learners-count`/`overall-watchers-count`) * 100), 0, (`overall-full-watched-learners-count`/`overall-watchers-count`) * 100) as `overall-watched-part`, if (isNaN((`full-watched-learners-count`/`watchers-count` * 100)), 0, (`full-watched-learners-count`/`watchers-count` * 100)) as `full-watched-part`, if (isNaN((`rejects-count`/`views-count` * 100)), 0, (`rejects-count`/`views-count` * 100)) as `rejects-part` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average` FROM (SELECT `entityIri`, `watchers-count` FROM (SELECT `entityIri` FROM dict_string) ANY LEFT JOIN (SELECT uniq(learnerId) as `watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewDurationSum) as `time-repeating-average`, `entityIri` FROM (SELECT sum(views.viewDuration) as viewDurationSum, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `views`.`repeatingView` = 1 AND `learnerId` IN dict_ui64 GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `reject-views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewsCount) as `repeating-views-count-average`, `entityIri` FROM (SELECT count() as viewsCount, `learnerId`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `courseId` = 1 AND `entityIri` IN dict_string WHERE `views`.`repeatingView` = 1 AND `learnerId` IN dict_ui64 GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.watchedPart) as `watched-part-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `rejects-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(progressMax) as `progress-average`, `entityIri` FROM (SELECT max(views.progress) as progressMax, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedViews) as `views-count-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT any(duration) as `duration`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `full-watched-learners-count`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-full-watched-learners-count`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `views-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedTime) as `time-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`); @@ -55,7 +56,7 @@ DROP TABLE video_views; --- Test for tsan: Ensure cache used from one thread +-- Test for tsan: Ensure cache is used from one thread SET max_threads = 32; DROP TABLE IF EXISTS sample_00632; @@ -173,7 +174,6 @@ FROM UNION ALL SELECT * FROM ( SELECT * FROM sample_00632 WHERE x > 0 ) ) GROUP BY x - --HAVING c = 1 ORDER BY x ASC ); DROP TABLE sample_00632; From 81714ce561c99710c3350a40d662966d5bb1a86a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Jul 2024 16:44:53 +0200 Subject: [PATCH 073/363] Make test simpler --- tests/queries/0_stateless/02585_query_status_deadlock.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02585_query_status_deadlock.sh b/tests/queries/0_stateless/02585_query_status_deadlock.sh index 6321ac0064a..932cf593393 100755 --- a/tests/queries/0_stateless/02585_query_status_deadlock.sh +++ b/tests/queries/0_stateless/02585_query_status_deadlock.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) QUERY_ID="${CLICKHOUSE_DATABASE}_test_02585_query_to_kill_id_1" $CLICKHOUSE_CLIENT --query_id="$QUERY_ID" --max_rows_to_read 0 -n -q " -create temporary table tmp as select * from numbers(500000000); +create temporary table tmp as select * from numbers(100000000); select * from remote('127.0.0.2', 'system.numbers_mt') where number in (select * from tmp);" &> /dev/null & $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" @@ -19,7 +19,7 @@ do if [ -n "$res" ]; then break fi - sleep 1 + sleep 1 done $CLICKHOUSE_CLIENT -q "kill query where query_id = '$QUERY_ID' sync" &> /dev/null From aec346676127abba85886a828e663ebf05cfa81e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Jul 2024 23:07:28 +0200 Subject: [PATCH 074/363] Update test --- tests/queries/0_stateless/01161_all_system_tables.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01161_all_system_tables.sh b/tests/queries/0_stateless/01161_all_system_tables.sh index 739df782a39..d4a80d074dc 100755 --- a/tests/queries/0_stateless/01161_all_system_tables.sh +++ b/tests/queries/0_stateless/01161_all_system_tables.sh @@ -19,7 +19,7 @@ function run_selects() thread_num=$1 readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} -q "SELECT database || '.' || name FROM system.tables WHERE database in ('system', 'information_schema', 'INFORMATION_SCHEMA') and name != 'zookeeper' and name != 'models' - AND sipHash64(name || toString($RAND)) % $THREADS = $thread_num AND name NOT LIKE '%\\_sender' AND name NOT LIKE '%\\_watcher'") + AND sipHash64(name || toString($RAND)) % $THREADS = $thread_num AND name NOT LIKE '%\\_sender' AND name NOT LIKE '%\\_watcher' AND name != 'coverage_log'") for t in "${tables_arr[@]}" do From e7cd07510aee8769cd31c4cbfa6a86d6198d37f5 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 29 Jul 2024 13:03:21 +0200 Subject: [PATCH 075/363] Move setting to 24.8 version --- src/Core/SettingsChangesHistory.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index dc3bf984cc6..41319ac7645 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -57,6 +57,8 @@ String ClickHouseVersion::toString() const /// Note: please check if the key already exists to prevent duplicate entries. static std::initializer_list> settings_changes_history_initializer = { + {"24.8", {{"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"}, + }}, {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"}, {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, @@ -66,7 +68,6 @@ static std::initializer_list Date: Mon, 29 Jul 2024 21:16:19 +0200 Subject: [PATCH 076/363] Update tests --- tests/queries/0_stateless/replication.lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index dcac721859e..36309cf0331 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -89,7 +89,7 @@ function check_replication_consistency() # Touch all data to check that it's readable (and trigger PartCheckThread if needed) # it's important to disable prefer warmed unmerged parts because # otherwise it can read non-syncrhonized state of replicas - while ! $CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 -q "SELECT * FROM merge(currentDatabase(), '$table_name_prefix') FORMAT Null" 2>/dev/null; do + while ! $CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 --max_result_rows 0 --max_result_bytes 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT * FROM merge(currentDatabase(), '$table_name_prefix') FORMAT Null" 2>/dev/null; do sleep 1; num_tries=$((num_tries+1)) if [ $num_tries -eq 250 ]; then From d89019293e955452ede3e0abbe4b11ab2a3471bb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jul 2024 06:15:13 +0200 Subject: [PATCH 077/363] Update tests --- tests/queries/0_stateless/01603_read_with_backoff_bug.sql | 4 ++-- tests/queries/0_stateless/02700_s3_part_INT_MAX.sh | 2 +- tests/queries/1_stateful/00157_cache_dictionary.sql | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql index 85be5082d92..f41b336be46 100644 --- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql +++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql @@ -1,5 +1,5 @@ --- Tags: no-tsan, long --- Tag no-tsan: Too long for TSan +-- Tags: no-tsan, no-msan, long +-- too long. set enable_filesystem_cache=0; set enable_filesystem_cache_on_write_operations=0; diff --git a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh index c431686b594..cfb38c60615 100755 --- a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh +++ b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh @@ -12,7 +12,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # too slow with this. # # Unfortunately, the test has to buffer it in memory. -$CLICKHOUSE_CLIENT --max_memory_usage 10G -nm -q " +$CLICKHOUSE_CLIENT --max_memory_usage 16G -nm -q " INSERT INTO FUNCTION s3('http://localhost:11111/test/$CLICKHOUSE_DATABASE/test_INT_MAX.tsv', '', '', 'TSV') SELECT repeat('a', 1024) FROM numbers((pow(2, 30) * 2) / 1024) SETTINGS s3_max_single_part_upload_size = '5Gi'; diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql index 3621ff82126..a7c6c099de6 100644 --- a/tests/queries/1_stateful/00157_cache_dictionary.sql +++ b/tests/queries/1_stateful/00157_cache_dictionary.sql @@ -9,7 +9,7 @@ ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS storage_policy = 'default'; -INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000; +INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000 SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; CREATE DATABASE IF NOT EXISTS db_dict; DROP DICTIONARY IF EXISTS db_dict.cache_hits; From 56ba7c5d48cfa648f1d496d55cdfd50450da0299 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jul 2024 09:33:17 +0200 Subject: [PATCH 078/363] Update a test --- tests/queries/0_stateless/00632_get_sample_block_cache.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00632_get_sample_block_cache.sql b/tests/queries/0_stateless/00632_get_sample_block_cache.sql index 6a226c4912a..a631cbb8b86 100644 --- a/tests/queries/0_stateless/00632_get_sample_block_cache.sql +++ b/tests/queries/0_stateless/00632_get_sample_block_cache.sql @@ -57,7 +57,7 @@ DROP TABLE video_views; -- Test for tsan: Ensure cache is used from one thread -SET max_threads = 32; +SET max_threads = 32, max_memory_usage = '10G'; DROP TABLE IF EXISTS sample_00632; From 458509909da0e6702dbb6023775112e4fa1c9a83 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jul 2024 23:21:14 +0200 Subject: [PATCH 079/363] Update test --- tests/queries/1_stateful/00157_cache_dictionary.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql index a7c6c099de6..bb5a21d0779 100644 --- a/tests/queries/1_stateful/00157_cache_dictionary.sql +++ b/tests/queries/1_stateful/00157_cache_dictionary.sql @@ -9,7 +9,7 @@ ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS storage_policy = 'default'; -INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000 SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; +INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000 SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, max_block_size = 8192; CREATE DATABASE IF NOT EXISTS db_dict; DROP DICTIONARY IF EXISTS db_dict.cache_hits; From 33ed33d2af0821785165f0ffe80e3c8086c637e2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:38:20 +0200 Subject: [PATCH 080/363] Minor change --- src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index f419f5811a1..734f354d9a5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri std::lock_guard lock(mutex); if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name); } else { From af324f69e955310f54e451d1120e8526ac3150fb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:56:20 +0200 Subject: [PATCH 081/363] Update test --- tests/queries/1_stateful/00158_cache_dictionary_has.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/queries/1_stateful/00158_cache_dictionary_has.sql b/tests/queries/1_stateful/00158_cache_dictionary_has.sql index 32c109417de..631a7751550 100644 --- a/tests/queries/1_stateful/00158_cache_dictionary_has.sql +++ b/tests/queries/1_stateful/00158_cache_dictionary_has.sql @@ -10,6 +10,8 @@ SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hits' PA LIFETIME(MIN 300 MAX 600) LAYOUT(CACHE(SIZE_IN_CELLS 100 QUERY_WAIT_TIMEOUT_MILLISECONDS 600000)); +SET timeout_before_checking_execution_speed = 300; + SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 1400 == 0 LIMIT 100); SELECT count() from test.hits PREWHERE WatchID % 1400 == 0; @@ -20,4 +22,4 @@ SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) a SELECT count() from test.hits PREWHERE WatchID % 5 == 0; DROP DICTIONARY IF EXISTS db_dict.cache_hits; -DROP DATABASE IF EXISTS db_dict; +DROP DATABASE IF EXISTS db_dict; From 461251b519120dd6b0cb64471bfba70160137e50 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 19:21:47 +0200 Subject: [PATCH 082/363] Update a test --- .../02450_kill_distributed_query_deadlock.sh | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh index 445f907bcc5..96692ba325a 100755 --- a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh +++ b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh @@ -5,20 +5,24 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# Test that running distributed query and cancel it ASAP, -# this can trigger a hung/deadlock in ProcessorList. -for i in {1..50}; do +# Test that runs a distributed query and cancels it ASAP, +# this has a chance to trigger a hung/deadlock in ProcessorList. +for i in {1..50} +do query_id="$CLICKHOUSE_TEST_UNIQUE_NAME-$i" - $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" --max_rows_to_read 0 -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null & - while :; do + $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" --max_rows_to_read 0 --max_bytes_to_read 0 --max_result_rows 0 --max_result_bytes 0 -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null & + while true + do killed_queries="$($CLICKHOUSE_CLIENT -q "kill query where query_id = '$query_id' sync" | wc -l)" - if [[ "$killed_queries" -ge 1 ]]; then + if [[ "$killed_queries" -ge 1 ]] + then break fi done wait -n query_return_status=$? - if [[ $query_return_status -eq 0 ]]; then + if [[ $query_return_status -eq 0 ]] + then echo "Query $query_id should be cancelled, however it returns successfully" fi done From af30e72e187713f4e435ff4f1f2382ea6671ba21 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Aug 2024 23:58:57 +0200 Subject: [PATCH 083/363] Better limits --- tests/config/users.d/limits.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml index 46cff73142c..53cbbfa744a 100644 --- a/tests/config/users.d/limits.yaml +++ b/tests/config/users.d/limits.yaml @@ -26,6 +26,7 @@ profiles: max_execution_time_leaf: 600 max_execution_speed: 100G max_execution_speed_bytes: 10T + timeout_before_checking_execution_speed: 300 max_estimated_execution_time: 600 max_columns_to_read: 20K max_temporary_columns: 20K From fb23cbdef6b38d1ba90aa1bb304487a3bb947e5c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:38:20 +0200 Subject: [PATCH 084/363] Minor change --- src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index f419f5811a1..734f354d9a5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri std::lock_guard lock(mutex); if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name); } else { From 15e9f8d9cbe4c2f0a1d9973650d93fa195a56276 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Aug 2024 22:52:01 +0200 Subject: [PATCH 085/363] Fix `02481_async_insert_race_long` --- tests/queries/0_stateless/02481_async_insert_race_long.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02481_async_insert_race_long.sh b/tests/queries/0_stateless/02481_async_insert_race_long.sh index b0088017d32..def97409bc4 100755 --- a/tests/queries/0_stateless/02481_async_insert_race_long.sh +++ b/tests/queries/0_stateless/02481_async_insert_race_long.sh @@ -29,11 +29,8 @@ function insert3() { local TIMELIMIT=$((SECONDS+$1)) while [ $SECONDS -lt "$TIMELIMIT" ]; do - ${MY_CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" & - sleep 0.05 + ${MY_CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" done - - wait } function select1() From 13b435d281ce39a4f8eee889da482dbf272dd1cf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:38:20 +0200 Subject: [PATCH 086/363] Minor change --- src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index f419f5811a1..734f354d9a5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri std::lock_guard lock(mutex); if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name); } else { From 113f7e0c8c9cc61ab72f5ccd8bb6e5fac58cbaea Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Aug 2024 19:37:38 +0200 Subject: [PATCH 087/363] Maybe better --- tests/queries/1_stateful/00157_cache_dictionary.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql index bb5a21d0779..f1bee538828 100644 --- a/tests/queries/1_stateful/00157_cache_dictionary.sql +++ b/tests/queries/1_stateful/00157_cache_dictionary.sql @@ -9,7 +9,8 @@ ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS storage_policy = 'default'; -INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000 SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, max_block_size = 8192; +INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000 + SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, max_block_size = 8192, max_insert_threads = 1, max_threads = 1; CREATE DATABASE IF NOT EXISTS db_dict; DROP DICTIONARY IF EXISTS db_dict.cache_hits; From 1b251fe08837aee56fe541bf3ef54ff647650869 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 5 Aug 2024 21:10:31 +0000 Subject: [PATCH 088/363] Allow to specify min and max for random settings in the test --- docs/en/development/tests.md | 22 +++++ tests/clickhouse-test | 92 ++++++++++++++++--- ...mic_read_subcolumns_compact_merge_tree.sql | 1 + ...ynamic_read_subcolumns_wide_merge_tree.sql | 1 + ...merges_1_horizontal_compact_merge_tree.sql | 4 + ..._merges_1_horizontal_compact_wide_tree.sql | 2 + ...c_merges_1_vertical_compact_merge_tree.sql | 2 + ...amic_merges_1_vertical_wide_merge_tree.sql | 2 + ...merges_2_horizontal_compact_merge_tree.sql | 1 + ...ic_merges_2_horizontal_wide_merge_tree.sql | 1 + ...c_merges_2_vertical_compact_merge_tree.sql | 1 + ...amic_merges_2_vertical_wide_merge_tree.sql | 1 + ...sted_dynamic_merges_compact_horizontal.sql | 1 + ...nested_dynamic_merges_compact_vertical.sql | 1 + ..._nested_dynamic_merges_wide_horizontal.sql | 1 + ...38_nested_dynamic_merges_wide_vertical.sql | 1 + 16 files changed, 122 insertions(+), 12 deletions(-) diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 269995a1a96..f0afa983fec 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -91,6 +91,28 @@ SELECT 1 In addition to the above settings, you can use `USE_*` flags from `system.build_options` to define usage of particular ClickHouse features. For example, if your test uses a MySQL table, you should add a tag `use-mysql`. +### Specifying limits for random settings + +A test can specify minimum and maximum allowed values for settings that can be randomized during test run. + +For `.sh` tests limits are written as a comment on the line next to tags or on the second line if no tags are specified: + +```bash +#!/usr/bin/env bash +# Tags: no-fasttest +# Random settings limits: max_block_size=(1000, 10000), index_granularity=(100, None) +``` + +For `.sql` tests tags are placed as a SQL comment in the line next to tags or in the first line: + +```sql +-- Tags: no-fasttest +-- Random settings limits: max_block_size=(1000, 10000), index_granularity=(100, None) +SELECT 1 +``` + +If you need to specify only one limit, you can use `None` for another one. + ### Choosing the Test Name The name of the test starts with a five-digit prefix followed by a descriptive name, such as `00422_hash_function_constexpr.sql`. To choose the prefix, find the largest prefix already present in the directory, and increment it by one. In the meantime, some other tests might be added with the same numeric prefix, but this is OK and does not lead to any problems, you don't have to change it later. diff --git a/tests/clickhouse-test b/tests/clickhouse-test index a29c786e998..ea488e7c3dd 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -39,6 +39,7 @@ from errno import ESRCH from subprocess import PIPE, Popen from time import sleep, time from typing import Dict, List, Optional, Set, Tuple, Union +from ast import literal_eval as make_tuple try: import termcolor # type: ignore @@ -1068,9 +1069,25 @@ class TestCase: return description + "\n" + def apply_random_settings_limits(self, random_settings): + print("Random settings limits:", self.random_settings_limits) + for setting in random_settings: + if setting in self.random_settings_limits: + min = self.random_settings_limits[setting][0] + if min and random_settings[setting] < min: + random_settings[setting] = min + max = self.random_settings_limits[setting][1] + if max and random_settings[setting] > max: + random_settings[setting] = max + def __init__(self, suite, case: str, args, is_concurrent: bool): self.case: str = case # case file name self.tags: Set[str] = suite.all_tags[case] if case in suite.all_tags else set() + self.random_settings_limits = ( + suite.all_random_settings_limits[case] + if case in suite.all_random_settings_limits + else dict() + ) for tag in os.getenv("GLOBAL_TAGS", "").split(","): self.tags.add(tag.strip()) @@ -1112,11 +1129,13 @@ class TestCase: if self.randomize_settings: self.random_settings = SettingsRandomizer.get_random_settings(args) + self.apply_random_settings_limits(self.random_settings) if self.randomize_merge_tree_settings: self.merge_tree_random_settings = ( MergeTreeSettingsRandomizer.get_random_settings(args) ) + self.apply_random_settings_limits(self.merge_tree_random_settings) self.base_url_params = ( os.environ["CLICKHOUSE_URL_PARAMS"] @@ -1900,7 +1919,9 @@ class TestSuite: return test_name @staticmethod - def read_test_tags(suite_dir: str, all_tests: List[str]) -> Dict[str, Set[str]]: + def read_test_tags_and_random_settings_limits( + suite_dir: str, all_tests: List[str] + ) -> (Dict[str, Set[str]], Dict[str, Dict[str, Tuple[int, int]]]): def get_comment_sign(filename): if filename.endswith(".sql") or filename.endswith(".sql.j2"): return "--" @@ -1925,22 +1946,48 @@ class TestSuite: tags = {tag.strip() for tag in tags} return tags + def parse_random_settings_limits_from_line( + line, comment_sign + ) -> Dict[str, Tuple[int, int]]: + if not line.startswith(comment_sign): + return {} + random_settings_limits_str = line[ + len(comment_sign) : + ].lstrip() # noqa: ignore E203 + random_settings_limits_prefix = "Random settings limits:" + if not random_settings_limits_str.startswith(random_settings_limits_prefix): + return {} + random_settings_limits_str = random_settings_limits_str[ + len(random_settings_limits_prefix) : + ] # noqa: ignore E203 + # limits are specified in a form 'setting1=(min, max), setting2=(min,max), ...' + random_settings_limits = re.findall( + "([^=, ]+) *= *(\([^=]+\))", random_settings_limits_str + ) + random_settings_limits = { + pair[0]: make_tuple(pair[1]) for pair in random_settings_limits + } + return random_settings_limits + def is_shebang(line: str) -> bool: return line.startswith("#!") def find_tag_line(file): - for line in file: + line = file.readline() + while line != "": line = line.strip() if line and not is_shebang(line): return line + line = file.readline() return "" - def load_tags_from_file(filepath): + def load_tags_and_random_settings_limits_from_file(filepath): comment_sign = get_comment_sign(filepath) need_query_params = False with open(filepath, "r", encoding="utf-8") as file: try: tag_line = find_tag_line(file) + next_line = file.readline() except UnicodeDecodeError: return [] try: @@ -1950,21 +1997,35 @@ class TestSuite: need_query_params = True except UnicodeDecodeError: pass - parsed_tags = parse_tags_from_line(tag_line, comment_sign) - if need_query_params: - parsed_tags.add("need-query-parameters") - return parsed_tags + parsed_tags = parse_tags_from_line(tag_line, comment_sign) + if need_query_params: + parsed_tags.add("need-query-parameters") + random_settings_limits_line = next_line if parsed_tags else tag_line + random_settings_limits = parse_random_settings_limits_from_line( + random_settings_limits_line, comment_sign + ) + return parsed_tags, random_settings_limits all_tags = {} + all_random_settings_limits = {} start_time = datetime.now() for test_name in all_tests: - tags = load_tags_from_file(os.path.join(suite_dir, test_name)) + ( + tags, + random_settings_limits, + ) = load_tags_and_random_settings_limits_from_file( + os.path.join(suite_dir, test_name) + ) if tags: all_tags[test_name] = tags + if random_settings_limits: + all_random_settings_limits[test_name] = random_settings_limits elapsed = (datetime.now() - start_time).total_seconds() if elapsed > 1: - print(f"Tags for suite {suite_dir} read in {elapsed:.2f} seconds") - return all_tags + print( + f"Tags and random settings limits for suite {suite_dir} read in {elapsed:.2f} seconds" + ) + return all_tags, all_random_settings_limits def __init__(self, args, suite_path: str, suite_tmp_path: str, suite: str): self.args = args @@ -1994,9 +2055,16 @@ class TestSuite: self.all_tests: List[str] = self.get_tests_list( self.tests_in_suite_key_func, filter_func ) - self.all_tags: Dict[str, Set[str]] = self.read_test_tags( - self.suite_path, self.all_tests + + all_tags_and_random_settings_limits = ( + self.read_test_tags_and_random_settings_limits( + self.suite_path, self.all_tests + ) ) + self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0] + self.all_random_settings_limits: Dict[ + str, Dict[str, (int, int)] + ] = all_tags_and_random_settings_limits[1] self.sequential_tests = [] self.parallel_tests = [] diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql index ddfba4418bd..822393d3c78 100644 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql index 5aac5f7b72f..2394893dc8b 100644 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql index d2c787040e5..7c2e7c3d2be 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql @@ -1,4 +1,6 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) + set allow_experimental_dynamic_type=1; drop table if exists test; @@ -31,3 +33,5 @@ optimize table test final; select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); drop table test; + +select 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql index f99bf771608..aa62435188a 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql @@ -1,4 +1,6 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) + set allow_experimental_dynamic_type=1; drop table if exists test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql index be81596d043..bfc7bb9d206 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql @@ -1,4 +1,6 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) + set allow_experimental_dynamic_type=1; drop table if exists test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql index f6396af42a8..233667db0a7 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql @@ -1,4 +1,6 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) + set allow_experimental_dynamic_type=1; drop table if exists test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql index e133ac3001f..48a6a55378c 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql index d527081b763..44b298b1c35 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql index ebccfb77922..f42150720b3 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql index 104d6018e41..ee4ff6af162 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql index 1d5c63dcdf1..e0636f053df 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql @@ -1,4 +1,5 @@ -- Tags: long +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql index 2bffe35c577..edfad295e9a 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql @@ -1,4 +1,5 @@ -- Tags: long +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql index fb686091ebb..79d488ec253 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql @@ -1,4 +1,5 @@ -- Tags: long +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql index ed195452d56..e2a453b867a 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql @@ -1,4 +1,5 @@ -- Tags: long +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; From d3dc17453377368defd80cda9f4b95dda6adc9df Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 5 Aug 2024 21:15:11 +0000 Subject: [PATCH 089/363] Remove log --- tests/clickhouse-test | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 389193836bf..5fcb9fb80f1 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -682,7 +682,6 @@ class FailureReason(enum.Enum): BUILD = "not running for current build" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" SHARED_MERGE_TREE = "no-shared-merge-tree" - DISTRIBUTED_CACHE = "distributed-cache" # UNKNOWN reasons NO_REFERENCE = "no reference file" @@ -1071,7 +1070,6 @@ class TestCase: return description + "\n" def apply_random_settings_limits(self, random_settings): - print("Random settings limits:", self.random_settings_limits) for setting in random_settings: if setting in self.random_settings_limits: min = self.random_settings_limits[setting][0] @@ -1211,9 +1209,6 @@ class TestCase: elif tags and ("no-replicated-database" in tags) and args.replicated_database: return FailureReason.REPLICATED_DB - elif tags and ("no-distributed-cache" in tags) and args.distributed_cache: - return FailureReason.DISTRIBUTED_CACHE - elif ( tags and ("atomic-database" in tags) @@ -1251,11 +1246,6 @@ class TestCase: ): return FailureReason.SKIP - elif "no-flaky-check" in tags and ( - 1 == int(os.environ.get("IS_FLAKY_CHECK", 0)) - ): - return FailureReason.SKIP - elif tags: for build_flag in args.build_flags: if "no-" + build_flag in tags: @@ -2295,6 +2285,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool args, test_suite, client_options, server_logs_level ) test_result = test_case.process_result(test_result, MESSAGES) + break except TimeoutError: break finally: @@ -3280,12 +3271,6 @@ def parse_args(): default=False, help="Run tests over s3 storage", ) - parser.add_argument( - "--distributed-cache", - action="store_true", - default=False, - help="Run tests with enabled distributed cache", - ) parser.add_argument( "--azure-blob-storage", action="store_true", From 18a7a82458ce7ec3f12f7b6751699f119769ed55 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 5 Aug 2024 21:16:18 +0000 Subject: [PATCH 090/363] Better formatting --- tests/clickhouse-test | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 5fcb9fb80f1..bcb8a12625b 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1950,15 +1950,11 @@ class TestSuite: ) -> Dict[str, Tuple[int, int]]: if not line.startswith(comment_sign): return {} - random_settings_limits_str = line[ - len(comment_sign) : - ].lstrip() # noqa: ignore E203 + random_settings_limits_str = line[len(comment_sign) :].lstrip() # noqa: ignore E203 random_settings_limits_prefix = "Random settings limits:" if not random_settings_limits_str.startswith(random_settings_limits_prefix): return {} - random_settings_limits_str = random_settings_limits_str[ - len(random_settings_limits_prefix) : - ] # noqa: ignore E203 + random_settings_limits_str = random_settings_limits_str[len(random_settings_limits_prefix) :] # noqa: ignore E203 # limits are specified in a form 'setting1=(min, max), setting2=(min,max), ...' random_settings_limits = re.findall( "([^=, ]+) *= *(\([^=]+\))", random_settings_limits_str @@ -1996,25 +1992,20 @@ class TestSuite: need_query_params = True except UnicodeDecodeError: pass - parsed_tags = parse_tags_from_line(tag_line, comment_sign) - if need_query_params: - parsed_tags.add("need-query-parameters") - random_settings_limits_line = next_line if parsed_tags else tag_line - random_settings_limits = parse_random_settings_limits_from_line( - random_settings_limits_line, comment_sign - ) + parsed_tags = parse_tags_from_line(tag_line, comment_sign) + if need_query_params: + parsed_tags.add("need-query-parameters") + random_settings_limits_line = next_line if parsed_tags else tag_line + random_settings_limits = parse_random_settings_limits_from_line( + random_settings_limits_line, comment_sign + ) return parsed_tags, random_settings_limits all_tags = {} all_random_settings_limits = {} start_time = datetime.now() for test_name in all_tests: - ( - tags, - random_settings_limits, - ) = load_tags_and_random_settings_limits_from_file( - os.path.join(suite_dir, test_name) - ) + tags, random_settings_limits = load_tags_and_random_settings_limits_from_file(os.path.join(suite_dir, test_name)) # noqa: ignore E203 if tags: all_tags[test_name] = tags if random_settings_limits: @@ -2061,9 +2052,7 @@ class TestSuite: ) ) self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0] - self.all_random_settings_limits: Dict[ - str, Dict[str, (int, int)] - ] = all_tags_and_random_settings_limits[1] + self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = all_tags_and_random_settings_limits[1] # noqa: ignore E203 self.sequential_tests = [] self.parallel_tests = [] From 74a2976810b86086819ee8e6ee1f110ab1e70a37 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 6 Aug 2024 08:13:03 +0000 Subject: [PATCH 091/363] Fix pylint --- tests/clickhouse-test | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index bcb8a12625b..84f33860484 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1072,12 +1072,12 @@ class TestCase: def apply_random_settings_limits(self, random_settings): for setting in random_settings: if setting in self.random_settings_limits: - min = self.random_settings_limits[setting][0] - if min and random_settings[setting] < min: - random_settings[setting] = min - max = self.random_settings_limits[setting][1] - if max and random_settings[setting] > max: - random_settings[setting] = max + min_value = self.random_settings_limits[setting][0] + if min_value and random_settings[setting] < min_value: + random_settings[setting] = min_value + max_value = self.random_settings_limits[setting][1] + if max_value and random_settings[setting] > max_value: + random_settings[setting] = max_value def __init__(self, suite, case: str, args, is_concurrent: bool): self.case: str = case # case file name @@ -2005,7 +2005,7 @@ class TestSuite: all_random_settings_limits = {} start_time = datetime.now() for test_name in all_tests: - tags, random_settings_limits = load_tags_and_random_settings_limits_from_file(os.path.join(suite_dir, test_name)) # noqa: ignore E203 + tags, random_settings_limits = load_tags_and_random_settings_limits_from_file(os.path.join(suite_dir, test_name)) # noqa: ignore E203 if tags: all_tags[test_name] = tags if random_settings_limits: @@ -2052,7 +2052,7 @@ class TestSuite: ) ) self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0] - self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = all_tags_and_random_settings_limits[1] # noqa: ignore E203 + self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = all_tags_and_random_settings_limits[1] # noqa: ignore E203 self.sequential_tests = [] self.parallel_tests = [] From 5226792b1d8b4e110c63a813fb68c9dd65ea07b7 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 6 Aug 2024 08:48:06 +0000 Subject: [PATCH 092/363] Fix bad merge with master --- tests/clickhouse-test | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 84f33860484..c4124982442 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1163,6 +1163,9 @@ class TestCase: elif args.cloud and ("no-replicated-database" in tags): return FailureReason.REPLICATED_DB + elif tags and ("no-distributed-cache" in tags) and args.distributed_cache: + return FailureReason.DISTRIBUTED_CACHE + elif args.cloud and self.name in suite.cloud_skip_list: return FailureReason.NOT_SUPPORTED_IN_CLOUD @@ -1246,6 +1249,11 @@ class TestCase: ): return FailureReason.SKIP + elif "no-flaky-check" in tags and ( + 1 == int(os.environ.get("IS_FLAKY_CHECK", 0)) + ): + return FailureReason.SKIP + elif tags: for build_flag in args.build_flags: if "no-" + build_flag in tags: @@ -2274,7 +2282,6 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool args, test_suite, client_options, server_logs_level ) test_result = test_case.process_result(test_result, MESSAGES) - break except TimeoutError: break finally: @@ -3260,6 +3267,12 @@ def parse_args(): default=False, help="Run tests over s3 storage", ) + parser.add_argument( + "--distributed-cache", + action="store_true", + default=False, + help="Run tests with enabled distributed cache", + ) parser.add_argument( "--azure-blob-storage", action="store_true", From bb33dca38470aba044da06938cc96ca55166262d Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 6 Aug 2024 08:49:08 +0000 Subject: [PATCH 093/363] Fix unrelated changes --- tests/clickhouse-test | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index c4124982442..72136404796 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -682,6 +682,7 @@ class FailureReason(enum.Enum): BUILD = "not running for current build" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" SHARED_MERGE_TREE = "no-shared-merge-tree" + DISTRIBUTED_CACHE = "distributed-cache" # UNKNOWN reasons NO_REFERENCE = "no reference file" @@ -1163,9 +1164,6 @@ class TestCase: elif args.cloud and ("no-replicated-database" in tags): return FailureReason.REPLICATED_DB - elif tags and ("no-distributed-cache" in tags) and args.distributed_cache: - return FailureReason.DISTRIBUTED_CACHE - elif args.cloud and self.name in suite.cloud_skip_list: return FailureReason.NOT_SUPPORTED_IN_CLOUD @@ -1212,6 +1210,9 @@ class TestCase: elif tags and ("no-replicated-database" in tags) and args.replicated_database: return FailureReason.REPLICATED_DB + elif tags and ("no-distributed-cache" in tags) and args.distributed_cache: + return FailureReason.DISTRIBUTED_CACHE + elif ( tags and ("atomic-database" in tags) @@ -1250,7 +1251,7 @@ class TestCase: return FailureReason.SKIP elif "no-flaky-check" in tags and ( - 1 == int(os.environ.get("IS_FLAKY_CHECK", 0)) + 1 == int(os.environ.get("IS_FLAKY_CHECK", 0)) ): return FailureReason.SKIP From 71c06b40cbf65abda49579bf5ac08e46575c7d29 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 6 Aug 2024 09:07:21 +0000 Subject: [PATCH 094/363] Avoid regexp --- docs/en/development/tests.md | 4 ++-- tests/clickhouse-test | 14 +++++++------- ...amic_merges_1_horizontal_compact_merge_tree.sql | 2 +- ...namic_merges_1_horizontal_compact_wide_tree.sql | 2 +- ...ynamic_merges_1_vertical_compact_merge_tree.sql | 2 +- ...7_dynamic_merges_1_vertical_wide_merge_tree.sql | 2 +- ...amic_merges_2_horizontal_compact_merge_tree.sql | 2 +- ...dynamic_merges_2_horizontal_wide_merge_tree.sql | 2 +- ...ynamic_merges_2_vertical_compact_merge_tree.sql | 2 +- ...7_dynamic_merges_2_vertical_wide_merge_tree.sql | 2 +- ...38_nested_dynamic_merges_compact_horizontal.sql | 2 +- ...3038_nested_dynamic_merges_compact_vertical.sql | 2 +- ...03038_nested_dynamic_merges_wide_horizontal.sql | 2 +- .../03038_nested_dynamic_merges_wide_vertical.sql | 2 +- 14 files changed, 21 insertions(+), 21 deletions(-) diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index f0afa983fec..bc9f85ef323 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -100,14 +100,14 @@ For `.sh` tests limits are written as a comment on the line next to tags or on t ```bash #!/usr/bin/env bash # Tags: no-fasttest -# Random settings limits: max_block_size=(1000, 10000), index_granularity=(100, None) +# Random settings limits: max_block_size=(1000, 10000); index_granularity=(100, None) ``` For `.sql` tests tags are placed as a SQL comment in the line next to tags or in the first line: ```sql -- Tags: no-fasttest --- Random settings limits: max_block_size=(1000, 10000), index_granularity=(100, None) +-- Random settings limits: max_block_size=(1000, 10000); index_granularity=(100, None) SELECT 1 ``` diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 72136404796..e5378e8c7f3 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1964,13 +1964,13 @@ class TestSuite: if not random_settings_limits_str.startswith(random_settings_limits_prefix): return {} random_settings_limits_str = random_settings_limits_str[len(random_settings_limits_prefix) :] # noqa: ignore E203 - # limits are specified in a form 'setting1=(min, max), setting2=(min,max), ...' - random_settings_limits = re.findall( - "([^=, ]+) *= *(\([^=]+\))", random_settings_limits_str - ) - random_settings_limits = { - pair[0]: make_tuple(pair[1]) for pair in random_settings_limits - } + # limits are specified in a form 'setting1=(min, max); setting2=(min,max); ...' + random_settings_limits = {} + for setting_and_limit in random_settings_limits_str.split(';'): + setting_and_limit = setting_and_limit.split('=') + random_settings_limits[setting_and_limit[0].strip()] = make_tuple( + setting_and_limit[1] + ) return random_settings_limits def is_shebang(line: str) -> bool: diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql index 7c2e7c3d2be..46f1c78b255 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type=1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql index aa62435188a..bf0c6ef0374 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type=1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql index bfc7bb9d206..fb82369a7a3 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type=1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql index 233667db0a7..c026bc04a56 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type=1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql index 71c6841515a..7f1934091f2 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql index 94ae1d867f5..f1f387fae9d 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql index 98ae230636a..cc11c454d38 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql index f8f5bd5d9e1..ffb2aca8b35 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql index 13c1fd8b485..9ec4e4f949b 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql index daa95071cdb..ed4de931841 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql index dea7e7c0971..bd3c4b58a8f 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql index bf1323f2ea9..81bcda5443d 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; From 621f4bbf9e04b62628a9c053b3f39c6b8a67a52d Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Tue, 6 Aug 2024 14:13:20 +0200 Subject: [PATCH 095/363] Update SettingsChangesHistory.cpp --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index f815a21b6a1..bb062deaab0 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -79,6 +79,7 @@ static std::initializer_list Date: Tue, 6 Aug 2024 15:01:10 +0200 Subject: [PATCH 096/363] Fix pylint --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index e5378e8c7f3..dea303ecdfb 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1086,7 +1086,7 @@ class TestCase: self.random_settings_limits = ( suite.all_random_settings_limits[case] if case in suite.all_random_settings_limits - else dict() + else {} ) for tag in os.getenv("GLOBAL_TAGS", "").split(","): From 3ebc3852f404a1ce392e9b66b8356fa9da701097 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Mon, 22 Jul 2024 15:28:29 +0000 Subject: [PATCH 097/363] Allow filtering ip addresses by ip family in DNS resolver --- programs/server/Server.cpp | 3 + src/Common/DNSResolver.cpp | 93 ++++++-- src/Common/DNSResolver.h | 9 + src/Core/ServerSettings.h | 2 + src/Core/SettingsChangesHistory.cpp | 261 +++++++++++++++++++++++ tests/integration/test_dns_cache/test.py | 67 +++++- 6 files changed, 418 insertions(+), 17 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 7800ee9ff00..3126f65ef09 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1768,6 +1768,9 @@ try new_server_settings.http_connections_store_limit, }); + DNSResolver::instance().setFilterSettings(new_server_settings.dns_allow_resolve_names_to_ipv4, new_server_settings.dns_allow_resolve_names_to_ipv6); + + if (global_context->isServerCompletelyStarted()) CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability); diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 4b577a251af..051e6e63091 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include "DNSPTRResolverProvider.h" @@ -139,12 +141,6 @@ DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) return addresses; } -DNSResolver::IPAddresses resolveIPAddressWithCache(CacheBase & cache, const std::string & host) -{ - auto [result, _ ] = cache.getOrSet(host, [&host]() {return std::make_shared(resolveIPAddressImpl(host), std::chrono::system_clock::now());}); - return result->addresses; -} - std::unordered_set reverseResolveImpl(const Poco::Net::IPAddress & address) { auto ptr_resolver = DB::DNSPTRResolverProvider::get(); @@ -198,21 +194,90 @@ struct DNSResolver::Impl std::atomic disable_cache{false}; }; +struct DNSResolver::AddressFilter +{ + struct DNSFilterSettings + { + std::atomic dns_allow_resolve_names_to_ipv4{true}; + std::atomic dns_allow_resolve_names_to_ipv6{true}; + }; -DNSResolver::DNSResolver() : impl(std::make_unique()), log(getLogger("DNSResolver")) {} + void performAddressFiltering(DNSResolver::IPAddresses & addresses) + { + bool dns_resolve_ipv4 = settings.dns_allow_resolve_names_to_ipv4; + bool dns_resolve_ipv6 = settings.dns_allow_resolve_names_to_ipv6; + + if (dns_resolve_ipv4 && dns_resolve_ipv6) + { + return; + } + if (!dns_resolve_ipv4 && !dns_resolve_ipv6) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "DNS can't resolve any address, because dns_resolve_ipv6_interfaces and dns_resolve_ipv4_interfaces both are disabled"); + } + addresses.erase( + std::remove_if(addresses.begin(), addresses.end(), + [dns_resolve_ipv6, dns_resolve_ipv4](const Poco::Net::IPAddress& address) + { + return (address.family() == Poco::Net::IPAddress::IPv6 && !dns_resolve_ipv6) + || (address.family() == Poco::Net::IPAddress::IPv4 && !dns_resolve_ipv4); + }), + addresses.end() + ); + } + + void setSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_) + { + settings.dns_allow_resolve_names_to_ipv4 = dns_allow_resolve_names_to_ipv4_; + settings.dns_allow_resolve_names_to_ipv6 = dns_allow_resolve_names_to_ipv6_; + } + + DNSFilterSettings settings; +}; + + +DNSResolver::DNSResolver() + : impl(std::make_unique()) + , addressFilter(std::make_unique()) + , log(getLogger("DNSResolver")) {} + + +DNSResolver::IPAddresses DNSResolver::getResolvedIPAdressessWithFiltering(const std::string & host) +{ + auto addresses = resolveIPAddressImpl(host); + addressFilter->performAddressFiltering(addresses); + + if (addresses.empty()) + { + ProfileEvents::increment(ProfileEvents::DNSError); + throw DB::NetException(ErrorCodes::DNS_ERROR, "After filtering there are no resolved address for host({}).", host); + } + return addresses; +} + +DNSResolver::IPAddresses DNSResolver::resolveIPAddressWithCache(const std::string & host) +{ + auto [result, _ ] = impl->cache_host.getOrSet(host, [&host, this]() {return std::make_shared(getResolvedIPAdressessWithFiltering(host), std::chrono::system_clock::now());}); + return result->addresses; +} Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host) { return pickAddress(resolveHostAll(host)); // random order -> random pick } +void DNSResolver::setFilterSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_) +{ + addressFilter->setSettings(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_); +} + DNSResolver::IPAddresses DNSResolver::resolveHostAllInOriginOrder(const std::string & host) { if (impl->disable_cache) - return resolveIPAddressImpl(host); + return getResolvedIPAdressessWithFiltering(host); addToNewHosts(host); - return resolveIPAddressWithCache(impl->cache_host, host); + return resolveIPAddressWithCache(host); } DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) @@ -232,7 +297,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_an splitHostAndPort(host_and_port, host, port); addToNewHosts(host); - return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(impl->cache_host, host)), port); + return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(host)), port); } Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, UInt16 port) @@ -241,7 +306,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U return Poco::Net::SocketAddress(host, port); addToNewHosts(host); - return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(impl->cache_host, host)), port); + return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(host)), port); } std::vector DNSResolver::resolveAddressList(const std::string & host, UInt16 port) @@ -254,7 +319,7 @@ std::vector DNSResolver::resolveAddressList(const std: if (!impl->disable_cache) addToNewHosts(host); - std::vector ips = impl->disable_cache ? hostByName(host) : resolveIPAddressWithCache(impl->cache_host, host); + std::vector ips = impl->disable_cache ? hostByName(host) : resolveIPAddressWithCache(host); auto ips_end = std::unique(ips.begin(), ips.end()); addresses.reserve(ips_end - ips.begin()); @@ -419,8 +484,8 @@ bool DNSResolver::updateCache(UInt32 max_consecutive_failures) bool DNSResolver::updateHost(const String & host) { - const auto old_value = resolveIPAddressWithCache(impl->cache_host, host); - auto new_value = resolveIPAddressImpl(host); + const auto old_value = resolveIPAddressWithCache(host); + auto new_value = getResolvedIPAdressessWithFiltering(host); const bool result = old_value != new_value; impl->cache_host.set(host, std::make_shared(std::move(new_value), std::chrono::system_clock::now())); return result; diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index 1ddd9d3b991..b35f55dfcd2 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -68,6 +68,8 @@ public: /// Returns true if IP of any host has been changed or an element was dropped (too many failures) bool updateCache(UInt32 max_consecutive_failures); + void setFilterSettings(bool dns_allow_resolve_names_to_ipv4, bool dns_allow_resolve_names_to_ipv6); + /// Returns a copy of cache entries std::vector> cacheEntries() const; @@ -86,6 +88,10 @@ private: struct Impl; std::unique_ptr impl; + + struct AddressFilter; + std::unique_ptr addressFilter; + LoggerPtr log; /// Updates cached value and returns true it has been changed. @@ -94,6 +100,9 @@ private: void addToNewHosts(const String & host); void addToNewAddresses(const Poco::Net::IPAddress & address); + + IPAddresses resolveIPAddressWithCache(const std::string & host); + IPAddresses getResolvedIPAdressessWithFiltering(const std::string & host); }; } diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index d13e6251ca9..6c23e3b95f6 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -106,6 +106,8 @@ namespace DB M(UInt64, dns_cache_max_entries, 10000, "Internal DNS cache max entries.", 0) \ M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \ M(UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0) \ + M(Bool, dns_allow_resolve_names_to_ipv4, true, "Allows resolve names to ipv4 addresses.", 0) \ + M(Bool, dns_allow_resolve_names_to_ipv6, true, "Allows resolve names to ipv6 addresses.", 0) \ \ M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \ M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 0ccbd874a3d..a76e214b1fc 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -500,6 +500,267 @@ static std::initializer_list col >= '2023-01-01' AND col <= '2023-12-31')"}, + {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, + {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, + {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, + {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, + {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + }}, + {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, + {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, + {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, + {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, + {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, + {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, + {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, + {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, + {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, + {"enable_vertical_final", false, true, "Use vertical final by default"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, + {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, + {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, + {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, + {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, + {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, + {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, + {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, + {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, + {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, + {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, + {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, + {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, + {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, + {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, + {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, + {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, + {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, + {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, + {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, + {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, + {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, + {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, + {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, + {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, + {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, + {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, + {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, + {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, + {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, + {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, + {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, + {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, + {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, + {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, + {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, + {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, + {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, + {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, + {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, + {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, + {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, + {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, + {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, + {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, + {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, + {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, + {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, + {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, + {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, + {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, + {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, + {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, + {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, + {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, + {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, + {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, + {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, + {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, + {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, + {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, + {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, + {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, + {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, + {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, + {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, + {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, + {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, + {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, + {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, + {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, + {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, + {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, + {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, + {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, + {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, + {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, + {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, + {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, + {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, + {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, + {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, + {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, + {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, + {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, + {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, + {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, + {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, + {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, + {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, + {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, + {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, }; diff --git a/tests/integration/test_dns_cache/test.py b/tests/integration/test_dns_cache/test.py index a6db26c8575..5e120dc42aa 100644 --- a/tests/integration/test_dns_cache/test.py +++ b/tests/integration/test_dns_cache/test.py @@ -32,6 +32,7 @@ node2 = cluster.add_instance( main_configs=["configs/listen_host.xml", "configs/dns_update_long.xml"], with_zookeeper=True, ipv6_address="2001:3984:3989::1:1112", + ipv4_address="10.5.95.11", ) @@ -39,9 +40,6 @@ node2 = cluster.add_instance( def cluster_without_dns_cache_update(): try: cluster.start() - - _fill_nodes([node1, node2], "test_table_drop") - yield cluster except Exception as ex: @@ -59,6 +57,8 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): # In this case we should manually set up the static DNS entries on the source host # to exclude resplving addresses automatically added by docker. # We use ipv6 for hosts, but resolved DNS entries may contain an unexpected ipv4 address. + _fill_nodes([node1, node2], "test_table_drop") + node2.set_hosts([("2001:3984:3989::1:1111", "node1")]) # drop DNS cache node2.query("SYSTEM DROP DNS CACHE") @@ -98,6 +98,67 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): assert_eq_with_retry(node2, "SELECT count(*) from test_table_drop", "7") +def _render_filter_config(allow_ipv4, allow_ipv6): + config = f""" + + {int(allow_ipv4)} + {int(allow_ipv6)} + + """ + return config + + +@pytest.mark.parametrize( + "allow_ipv4, allow_ipv6", + [ + (True, False), + (False, True), + (False, False), + ], +) +def test_dns_resolver_filter(cluster_without_dns_cache_update, allow_ipv4, allow_ipv6): + host_ipv6 = node2.ipv6_address + host_ipv4 = node2.ipv4_address + + node2.set_hosts( + [ + (host_ipv6, "test_host"), + (host_ipv4, "test_host"), + ] + ) + node2.replace_config( + "/etc/clickhouse-server/config.d/dns_filter.xml", + _render_filter_config(allow_ipv4, allow_ipv6), + ) + + node2.query("SYSTEM DROP DNS CACHE") + node2.query("SYSTEM DROP CONNECTIONS CACHE") + node2.query("SYSTEM RELOAD CONFIG") + + if not allow_ipv4 and not allow_ipv6: + with pytest.raises(QueryRuntimeException): + node4.query("SELECT * FROM remote('lost_host', 'system', 'one')") + else: + node2.query("SELECT * FROM remote('test_host', system, one)") + assert ( + node2.query( + "SELECT ip_address FROM system.dns_cache WHERE hostname='test_host'" + ) + == f"{host_ipv4 if allow_ipv4 else host_ipv6}\n" + ) + + node2.exec_in_container( + [ + "bash", + "-c", + "rm /etc/clickhouse-server/config.d/dns_filter.xml", + ], + privileged=True, + user="root", + ) + node2.query("SYSTEM RELOAD CONFIG") + + node3 = cluster.add_instance( "node3", main_configs=["configs/listen_host.xml"], From 0d5cb9f75a527e90aed18860efbd5ed1f9dcd775 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Tue, 23 Jul 2024 09:25:02 +0000 Subject: [PATCH 098/363] Review fixes --- src/Common/DNSResolver.cpp | 34 ++++++++++++++--------------- src/Core/SettingsChangesHistory.cpp | 2 +- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 051e6e63091..08111d7f2af 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -12,8 +12,7 @@ #include #include #include -#include -#include +#include "Common/MultiVersion.h" #include #include "DNSPTRResolverProvider.h" @@ -198,14 +197,17 @@ struct DNSResolver::AddressFilter { struct DNSFilterSettings { - std::atomic dns_allow_resolve_names_to_ipv4{true}; - std::atomic dns_allow_resolve_names_to_ipv6{true}; + bool dns_allow_resolve_names_to_ipv4{true}; + bool dns_allow_resolve_names_to_ipv6{true}; }; + AddressFilter() : settings(std::make_unique()) {} + void performAddressFiltering(DNSResolver::IPAddresses & addresses) { - bool dns_resolve_ipv4 = settings.dns_allow_resolve_names_to_ipv4; - bool dns_resolve_ipv6 = settings.dns_allow_resolve_names_to_ipv6; + const auto current_settings = settings.get(); + bool dns_resolve_ipv4 = current_settings->dns_allow_resolve_names_to_ipv4; + bool dns_resolve_ipv6 = current_settings->dns_allow_resolve_names_to_ipv6; if (dns_resolve_ipv4 && dns_resolve_ipv6) { @@ -215,24 +217,20 @@ struct DNSResolver::AddressFilter { throw Exception(ErrorCodes::BAD_ARGUMENTS, "DNS can't resolve any address, because dns_resolve_ipv6_interfaces and dns_resolve_ipv4_interfaces both are disabled"); } - addresses.erase( - std::remove_if(addresses.begin(), addresses.end(), - [dns_resolve_ipv6, dns_resolve_ipv4](const Poco::Net::IPAddress& address) - { - return (address.family() == Poco::Net::IPAddress::IPv6 && !dns_resolve_ipv6) - || (address.family() == Poco::Net::IPAddress::IPv4 && !dns_resolve_ipv4); - }), - addresses.end() - ); + + std::erase_if(addresses, [dns_resolve_ipv6, dns_resolve_ipv4](const Poco::Net::IPAddress& address) + { + return (address.family() == Poco::Net::IPAddress::IPv6 && !dns_resolve_ipv6) + || (address.family() == Poco::Net::IPAddress::IPv4 && !dns_resolve_ipv4); + }); } void setSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_) { - settings.dns_allow_resolve_names_to_ipv4 = dns_allow_resolve_names_to_ipv4_; - settings.dns_allow_resolve_names_to_ipv6 = dns_allow_resolve_names_to_ipv6_; + settings.set(std::make_unique(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_)); } - DNSFilterSettings settings; + MultiVersion settings; }; diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index a76e214b1fc..01b9bca795f 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -523,7 +523,7 @@ static std::initializer_list Date: Tue, 23 Jul 2024 12:58:50 +0000 Subject: [PATCH 099/363] Fix test --- src/Core/SettingsChangesHistory.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 01b9bca795f..ac427e2e03e 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -523,9 +523,7 @@ static std::initializer_list Date: Tue, 23 Jul 2024 16:56:29 +0000 Subject: [PATCH 100/363] Fix tidy build --- src/Common/DNSResolver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 08111d7f2af..bbee7d259f0 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -203,7 +203,7 @@ struct DNSResolver::AddressFilter AddressFilter() : settings(std::make_unique()) {} - void performAddressFiltering(DNSResolver::IPAddresses & addresses) + void performAddressFiltering(DNSResolver::IPAddresses & addresses) const { const auto current_settings = settings.get(); bool dns_resolve_ipv4 = current_settings->dns_allow_resolve_names_to_ipv4; From 304d01e2c36ae11cd1c703135c493ee885bc3062 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Mon, 29 Jul 2024 19:18:14 +0000 Subject: [PATCH 101/363] Review fix --- programs/server/Server.cpp | 1 - src/Common/DNSResolver.cpp | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 3126f65ef09..aa7c3d75163 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1770,7 +1770,6 @@ try DNSResolver::instance().setFilterSettings(new_server_settings.dns_allow_resolve_names_to_ipv4, new_server_settings.dns_allow_resolve_names_to_ipv6); - if (global_context->isServerCompletelyStarted()) CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability); diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index bbee7d259f0..68a8fa7d74c 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -225,9 +225,9 @@ struct DNSResolver::AddressFilter }); } - void setSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_) + void setSettings(bool dns_allow_resolve_names_to_ipv4, bool dns_allow_resolve_names_to_ipv6) { - settings.set(std::make_unique(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_)); + settings.set(std::make_unique(dns_allow_resolve_names_to_ipv4, dns_allow_resolve_names_to_ipv6)); } MultiVersion settings; @@ -264,9 +264,9 @@ Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host) return pickAddress(resolveHostAll(host)); // random order -> random pick } -void DNSResolver::setFilterSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_) +void DNSResolver::setFilterSettings(bool dns_allow_resolve_names_to_ipv4, bool dns_allow_resolve_names_to_ipv6) { - addressFilter->setSettings(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_); + addressFilter->setSettings(dns_allow_resolve_names_to_ipv4, dns_allow_resolve_names_to_ipv6); } DNSResolver::IPAddresses DNSResolver::resolveHostAllInOriginOrder(const std::string & host) From 4143eea587b808086cceb98025906646fa78c96a Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Wed, 31 Jul 2024 08:35:38 +0000 Subject: [PATCH 102/363] Add test to skip parallel --- tests/integration/parallel_skip.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 99fa626bd1e..6689572aeb7 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -1,6 +1,7 @@ [ "test_dns_cache/test.py::test_dns_cache_update", "test_dns_cache/test.py::test_ip_change_drop_dns_cache", + "test_dns_cache/test.py::test_dns_resolver_filter", "test_dns_cache/test.py::test_ip_change_update_dns_cache", "test_dns_cache/test.py::test_user_access_ip_change[node0]", "test_dns_cache/test.py::test_user_access_ip_change[node1]", From 012ea3cc6d09c50f29fe4d7964aa18ee038c35b2 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Wed, 31 Jul 2024 13:29:36 +0000 Subject: [PATCH 103/363] Rebase --- src/Core/SettingsChangesHistory.cpp | 259 ----------------------- tests/integration/test_dns_cache/test.py | 138 ++++++------ 2 files changed, 74 insertions(+), 323 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index ac427e2e03e..0ccbd874a3d 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -500,265 +500,6 @@ static std::initializer_list col >= '2023-01-01' AND col <= '2023-12-31')"}, - {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, - {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, - {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, - {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, - {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - }}, - {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, - {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, - {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, - {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, - {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, - {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, - {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, - {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, - {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, - {"enable_vertical_final", false, true, "Use vertical final by default"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, - {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, - {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, - {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, - {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, - {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, - {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, - {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, - {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, - {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, - {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, - {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, - {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, - {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, - {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, - {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, - {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, - {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, - {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, - {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, - {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, - {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, - {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, - {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, - {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, - {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, - {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, - {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, - {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, - {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, - {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, - {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, - {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, - {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, - {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, - {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, - {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, - {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, - {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, - {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, - {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, - {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, - {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, - {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, - {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, - {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, - {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, - {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, - {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, - {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, - {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, - {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, - {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, - {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, - {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, - {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, - {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, - {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, - {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, - {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, - {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, - {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, - {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, - {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, - {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, - {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, - {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, - {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, - {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, - {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, - {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, - {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, - {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, - {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, - {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, - {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, - {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, - {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, - {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, - {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, - {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, - {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, - {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, - {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, - {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, - {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, - {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, - {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, }; diff --git a/tests/integration/test_dns_cache/test.py b/tests/integration/test_dns_cache/test.py index 5e120dc42aa..36401517429 100644 --- a/tests/integration/test_dns_cache/test.py +++ b/tests/integration/test_dns_cache/test.py @@ -32,7 +32,6 @@ node2 = cluster.add_instance( main_configs=["configs/listen_host.xml", "configs/dns_update_long.xml"], with_zookeeper=True, ipv6_address="2001:3984:3989::1:1112", - ipv4_address="10.5.95.11", ) @@ -40,6 +39,9 @@ node2 = cluster.add_instance( def cluster_without_dns_cache_update(): try: cluster.start() + + _fill_nodes([node1, node2], "test_table_drop") + yield cluster except Exception as ex: @@ -57,8 +59,6 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): # In this case we should manually set up the static DNS entries on the source host # to exclude resplving addresses automatically added by docker. # We use ipv6 for hosts, but resolved DNS entries may contain an unexpected ipv4 address. - _fill_nodes([node1, node2], "test_table_drop") - node2.set_hosts([("2001:3984:3989::1:1111", "node1")]) # drop DNS cache node2.query("SYSTEM DROP DNS CACHE") @@ -98,67 +98,6 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): assert_eq_with_retry(node2, "SELECT count(*) from test_table_drop", "7") -def _render_filter_config(allow_ipv4, allow_ipv6): - config = f""" - - {int(allow_ipv4)} - {int(allow_ipv6)} - - """ - return config - - -@pytest.mark.parametrize( - "allow_ipv4, allow_ipv6", - [ - (True, False), - (False, True), - (False, False), - ], -) -def test_dns_resolver_filter(cluster_without_dns_cache_update, allow_ipv4, allow_ipv6): - host_ipv6 = node2.ipv6_address - host_ipv4 = node2.ipv4_address - - node2.set_hosts( - [ - (host_ipv6, "test_host"), - (host_ipv4, "test_host"), - ] - ) - node2.replace_config( - "/etc/clickhouse-server/config.d/dns_filter.xml", - _render_filter_config(allow_ipv4, allow_ipv6), - ) - - node2.query("SYSTEM DROP DNS CACHE") - node2.query("SYSTEM DROP CONNECTIONS CACHE") - node2.query("SYSTEM RELOAD CONFIG") - - if not allow_ipv4 and not allow_ipv6: - with pytest.raises(QueryRuntimeException): - node4.query("SELECT * FROM remote('lost_host', 'system', 'one')") - else: - node2.query("SELECT * FROM remote('test_host', system, one)") - assert ( - node2.query( - "SELECT ip_address FROM system.dns_cache WHERE hostname='test_host'" - ) - == f"{host_ipv4 if allow_ipv4 else host_ipv6}\n" - ) - - node2.exec_in_container( - [ - "bash", - "-c", - "rm /etc/clickhouse-server/config.d/dns_filter.xml", - ], - privileged=True, - user="root", - ) - node2.query("SYSTEM RELOAD CONFIG") - - node3 = cluster.add_instance( "node3", main_configs=["configs/listen_host.xml"], @@ -378,3 +317,74 @@ def test_host_is_drop_from_cache_after_consecutive_failures( assert node4.wait_for_log_line( "Cached hosts dropped:.*InvalidHostThatDoesNotExist.*" ) + + +node7 = cluster.add_instance( + "node7", + main_configs=["configs/listen_host.xml", "configs/dns_update_long.xml"], + with_zookeeper=True, + ipv6_address="2001:3984:3989::1:1117", + ipv4_address="10.5.95.17", +) + + +def _render_filter_config(allow_ipv4, allow_ipv6): + config = f""" + + {int(allow_ipv4)} + {int(allow_ipv6)} + + """ + return config + + +@pytest.mark.parametrize( + "allow_ipv4, allow_ipv6", + [ + (True, False), + (False, True), + (False, False), + ], +) +def test_dns_resolver_filter(cluster_without_dns_cache_update, allow_ipv4, allow_ipv6): + node = node7 + host_ipv6 = node.ipv6_address + host_ipv4 = node.ipv4_address + + node.set_hosts( + [ + (host_ipv6, "test_host"), + (host_ipv4, "test_host"), + ] + ) + node.replace_config( + "/etc/clickhouse-server/config.d/dns_filter.xml", + _render_filter_config(allow_ipv4, allow_ipv6), + ) + + node.query("SYSTEM RELOAD CONFIG") + node.query("SYSTEM DROP DNS CACHE") + node.query("SYSTEM DROP CONNECTIONS CACHE") + + if not allow_ipv4 and not allow_ipv6: + with pytest.raises(QueryRuntimeException): + node.query("SELECT * FROM remote('lost_host', 'system', 'one')") + else: + node.query("SELECT * FROM remote('test_host', system, one)") + assert ( + node.query( + "SELECT ip_address FROM system.dns_cache WHERE hostname='test_host'" + ) + == f"{host_ipv4 if allow_ipv4 else host_ipv6}\n" + ) + + node.exec_in_container( + [ + "bash", + "-c", + "rm /etc/clickhouse-server/config.d/dns_filter.xml", + ], + privileged=True, + user="root", + ) + node.query("SYSTEM RELOAD CONFIG") From d124de847b44344d9346c4d1b76ada03b31c58c8 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 6 Aug 2024 16:06:59 +0000 Subject: [PATCH 104/363] Fix style --- tests/clickhouse-test | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index dea303ecdfb..c3b1d4d907c 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1959,15 +1959,17 @@ class TestSuite: ) -> Dict[str, Tuple[int, int]]: if not line.startswith(comment_sign): return {} - random_settings_limits_str = line[len(comment_sign) :].lstrip() # noqa: ignore E203 + random_settings_limits_str = line[len(comment_sign) :].lstrip() random_settings_limits_prefix = "Random settings limits:" if not random_settings_limits_str.startswith(random_settings_limits_prefix): return {} - random_settings_limits_str = random_settings_limits_str[len(random_settings_limits_prefix) :] # noqa: ignore E203 + random_settings_limits_str = random_settings_limits_str[ + len(random_settings_limits_prefix) : + ] # limits are specified in a form 'setting1=(min, max); setting2=(min,max); ...' random_settings_limits = {} - for setting_and_limit in random_settings_limits_str.split(';'): - setting_and_limit = setting_and_limit.split('=') + for setting_and_limit in random_settings_limits_str.split(";"): + setting_and_limit = setting_and_limit.split("=") random_settings_limits[setting_and_limit[0].strip()] = make_tuple( setting_and_limit[1] ) @@ -2014,7 +2016,12 @@ class TestSuite: all_random_settings_limits = {} start_time = datetime.now() for test_name in all_tests: - tags, random_settings_limits = load_tags_and_random_settings_limits_from_file(os.path.join(suite_dir, test_name)) # noqa: ignore E203 + ( + tags, + random_settings_limits, + ) = load_tags_and_random_settings_limits_from_file( + os.path.join(suite_dir, test_name) + ) # noqa: ignore E203 if tags: all_tags[test_name] = tags if random_settings_limits: @@ -2061,7 +2068,9 @@ class TestSuite: ) ) self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0] - self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = all_tags_and_random_settings_limits[1] # noqa: ignore E203 + self.all_random_settings_limits: Dict[ + str, Dict[str, (int, int)] + ] = all_tags_and_random_settings_limits[1] self.sequential_tests = [] self.parallel_tests = [] From 0ebe8e35511f764b61cb2428433132644f7deb96 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Tue, 6 Aug 2024 18:38:23 +0200 Subject: [PATCH 105/363] Fix style --- src/Core/SettingsChangesHistory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index e6949dd4fba..1ebc9b07748 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -79,7 +79,7 @@ static std::initializer_list Date: Wed, 7 Aug 2024 14:42:42 +0200 Subject: [PATCH 106/363] Fix style check --- tests/clickhouse-test | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index c3b1d4d907c..5946e561949 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2068,10 +2068,9 @@ class TestSuite: ) ) self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0] - self.all_random_settings_limits: Dict[ - str, Dict[str, (int, int)] - ] = all_tags_and_random_settings_limits[1] - + self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = ( + all_tags_and_random_settings_limits[1] + ) self.sequential_tests = [] self.parallel_tests = [] for test_name in self.all_tests: From 725640613b0d1cf47515697b5856a85953b73483 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 7 Aug 2024 22:35:57 +0200 Subject: [PATCH 107/363] Add annotations --- .../0_stateless/00111_shard_external_sort_distributed.sql | 3 ++- .../0_stateless/00376_shard_group_uniq_array_of_int_array.sql | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql index 93efc317bfa..9e06654195d 100644 --- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql +++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql @@ -1,4 +1,5 @@ --- Tags: distributed, long +-- Tags: distributed, long, no-flaky-check +-- ^ no-flaky-check - sometimes longer than 600s with ThreadFuzzer. SET max_memory_usage = 150000000; SET max_bytes_before_external_sort = 10000000; diff --git a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql index 24b7f1c30a6..4453c26283c 100644 --- a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql +++ b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql @@ -1,4 +1,4 @@ --- Tags: shard +-- Tags: long SET max_rows_to_read = '55M'; From da6378752fc562f7b8df487f86fe2257215eb96a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 8 Aug 2024 04:07:43 +0200 Subject: [PATCH 108/363] Add annotations --- tests/queries/0_stateless/01304_direct_io_long.sh | 3 ++- tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh index 6ab25eebaf7..867c37667fe 100755 --- a/tests/queries/0_stateless/01304_direct_io_long.sh +++ b/tests/queries/0_stateless/01304_direct_io_long.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: long, no-object-storage-with-slow-build +# Tags: long, no-object-storage-with-slow-build, no-flaky-check +# It can be too long with ThreadFuzzer CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 b/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 index 47940356302..7df77595347 100644 --- a/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 +++ b/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 @@ -1,4 +1,5 @@ --- Tags: long +-- Tags: long, no-flaky-check +-- It can be too long with ThreadFuzzer DROP TABLE IF EXISTS left; DROP TABLE IF EXISTS right; From 4fac40a3cb4823f0014a2c5324593b6ef8a6b6ac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 8 Aug 2024 04:41:22 +0200 Subject: [PATCH 109/363] Step back --- programs/client/Client.cpp | 10 ++++++++++ programs/local/LocalServer.cpp | 3 +++ src/Client/ClientBase.h | 3 +++ src/Client/LocalConnection.cpp | 1 + src/Client/LocalConnection.h | 2 ++ src/Client/Suggest.cpp | 4 ---- 6 files changed, 19 insertions(+), 4 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 0f136664de8..1d99d223ee9 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -327,6 +328,7 @@ int Client::main(const std::vector & /*args*/) try { UseSSL use_ssl; + auto & thread_status = MainThreadStatus::getInstance(); setupSignalHandler(); std::cout << std::fixed << std::setprecision(3); @@ -341,6 +343,14 @@ try initTTYBuffer(toProgressOption(config().getString("progress", "default"))); ASTAlterCommand::setFormatAlterCommandsWithParentheses(true); + { + // All that just to set DB::CurrentThread::get().getGlobalContext() + // which is required for client timezone (pushed from server) to work. + auto thread_group = std::make_shared(); + const_cast(thread_group->global_context) = global_context; + thread_status.attachToGroup(thread_group, false); + } + /// Includes delayed_interactive. if (is_interactive) { diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index cb89c6c5510..0d731ed0e14 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -29,8 +29,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -461,6 +463,7 @@ int LocalServer::main(const std::vector & /*args*/) try { UseSSL use_ssl; + thread_status.emplace(); StackTrace::setShowAddresses(server_settings.show_addresses_in_stack_traces); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 26deb1eda26..1a23b6b1363 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -296,6 +296,9 @@ protected: Settings cmd_settings; MergeTreeSettings cmd_merge_tree_settings; + /// thread status should be destructed before shared context because it relies on process list. + std::optional thread_status; + ServerConnectionPtr connection; ConnectionParameters connection_parameters; diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index b0a5ef99253..072184e0a66 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -128,6 +128,7 @@ void LocalConnection::sendQuery( state->query_id = query_id; state->query = query; + state->query_scope_holder = std::make_unique(query_context); state->stage = QueryProcessingStage::Enum(stage); state->profile_queue = std::make_shared(std::numeric_limits::max()); CurrentThread::attachInternalProfileEventsQueue(state->profile_queue); diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 5cc3d0b30ec..b424c5b5aa3 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -61,6 +61,8 @@ struct LocalQueryState /// Time after the last check to stop the request and send the progress. Stopwatch after_send_progress; Stopwatch after_send_profile_events; + + std::unique_ptr query_scope_holder; }; diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index c1f163939e8..0188ebc8173 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -96,10 +96,6 @@ void Suggest::load(ContextPtr context, const ConnectionParameters & connection_p loading_thread = std::thread([my_context = Context::createCopy(context), connection_parameters, suggestion_limit, this] { ThreadStatus thread_status; - my_context->makeQueryContext(); - auto group = ThreadGroup::createForQuery(my_context); - CurrentThread::attachToGroup(group); - for (size_t retry = 0; retry < 10; ++retry) { try From ca9bd647fbd97fd36de5e30778c824ae522e03c3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 8 Aug 2024 04:52:18 +0200 Subject: [PATCH 110/363] Simplification --- src/Client/ClientBase.cpp | 43 +++++++-------------------------------- 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 62a008bc88c..a305278fb4d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1877,48 +1877,19 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin profile_events.watch.restart(); { - /// Temporarily apply query settings to context. - std::optional old_settings; - SCOPE_EXIT_SAFE({ - if (old_settings) - client_context->setSettings(*old_settings); + /// Temporarily apply query settings to the context. + Settings old_settings = client_context->getSettingsCopy(); + SCOPE_EXIT_SAFE( + { + client_context->setSettings(old_settings); }); - - auto apply_query_settings = [&](const IAST & settings_ast) - { - if (!old_settings) - old_settings.emplace(client_context->getSettingsRef()); - client_context->applySettingsChanges(settings_ast.as()->changes); - client_context->resetSettingsToDefaultValue(settings_ast.as()->default_settings); - }; - - const auto * insert = parsed_query->as(); - if (const auto * select = parsed_query->as(); select && select->settings()) - apply_query_settings(*select->settings()); - else if (const auto * select_with_union = parsed_query->as()) - { - const ASTs & children = select_with_union->list_of_selects->children; - if (!children.empty()) - { - // On the client it is enough to apply settings only for the - // last SELECT, since the only thing that is important to apply - // on the client is format settings. - const auto * last_select = children.back()->as(); - if (last_select && last_select->settings()) - { - apply_query_settings(*last_select->settings()); - } - } - } - else if (const auto * query_with_output = parsed_query->as(); query_with_output && query_with_output->settings_ast) - apply_query_settings(*query_with_output->settings_ast); - else if (insert && insert->settings_ast) - apply_query_settings(*insert->settings_ast); + InterpreterSetQuery::applySettingsFromQuery(parsed_query, client_context); if (!connection->checkConnected(connection_parameters.timeouts)) connect(); ASTPtr input_function; + const auto * insert = parsed_query->as(); if (insert && insert->select) insert->tryFindInputFunction(input_function); From d52d599af4db1c779551b6788a5505e521fe3c31 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 8 Aug 2024 18:34:02 +0200 Subject: [PATCH 111/363] Annotations --- ...ter_skip_virtual_columns_with_non_deterministic_functions.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql index 6ef8c5a8656..6714a069246 100644 --- a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql +++ b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql @@ -1,3 +1,4 @@ +-- Tags: long SET max_rows_to_read = 0; create table test (number UInt64) engine=MergeTree order by number; insert into test select * from numbers(50000000); From f2731841de804c30ece1c75e84c8ca8d3eb62ef8 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 8 Aug 2024 20:20:21 +0200 Subject: [PATCH 112/363] init --- src/Core/callOnTypeIndex.h | 3 + src/DataTypes/getLeastSupertype.cpp | 41 +++++ src/DataTypes/getLeastSupertype.h | 22 +++ src/Functions/FunctionsConversion.cpp | 61 +++++++- ...23_interval_data_type_comparison.reference | 99 ++++++++++++ .../03223_interval_data_type_comparison.sql | 142 ++++++++++++++++++ 6 files changed, 365 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03223_interval_data_type_comparison.reference create mode 100644 tests/queries/0_stateless/03223_interval_data_type_comparison.sql diff --git a/src/Core/callOnTypeIndex.h b/src/Core/callOnTypeIndex.h index f5f67df563b..ae5afce36be 100644 --- a/src/Core/callOnTypeIndex.h +++ b/src/Core/callOnTypeIndex.h @@ -3,6 +3,7 @@ #include #include +#include namespace DB @@ -212,6 +213,8 @@ static bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... ar case TypeIndex::IPv4: return f(TypePair(), std::forward(args)...); case TypeIndex::IPv6: return f(TypePair(), std::forward(args)...); + case TypeIndex::Interval: return f(TypePair(), std::forward(args)...); + default: break; } diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index a71b19d6c92..0b9c744c091 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -228,6 +228,40 @@ void convertUInt64toInt64IfPossible(const DataTypes & types, TypeIndexSet & type } } +DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet &types_set) +{ + const auto& granularity_map = getGranularityMap(); + int min_granularity = std::get<0>(granularity_map.at(IntervalKind::Kind::Year)); + DataTypePtr smallest_type; + + bool is_higher_interval = false; // For Years, Quarters and Months + + for (const auto &type : types) + { + if (const auto * interval_type = typeid_cast(type.get())) + { + int current_granularity = std::get<0>(granularity_map.at(interval_type->getKind())); + if (current_granularity > 8) + is_higher_interval = true; + if (current_granularity < min_granularity) + { + min_granularity = current_granularity; + smallest_type = type; + } + } + } + + if (is_higher_interval && min_granularity <= 8) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot compare intervals {} and {} because the amount of days in month is not determined", types[0]->getName(), types[1]->getName()); + + if (smallest_type) + { + types_set.clear(); + types_set.insert(smallest_type->getTypeId()); + } + + return smallest_type; +} } template @@ -652,6 +686,13 @@ DataTypePtr getLeastSupertype(const DataTypes & types) return numeric_type; } + /// For interval data types. + { + auto res = findSmallestIntervalSuperType(types, type_ids); + if (res) + return res; + } + /// All other data types (UUID, AggregateFunction, Enum...) are compatible only if they are the same (checked in trivial cases). return throwOrReturn(types, "", ErrorCodes::NO_COMMON_TYPE); } diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h index 2ae1e52ca96..c584eb83011 100644 --- a/src/DataTypes/getLeastSupertype.h +++ b/src/DataTypes/getLeastSupertype.h @@ -1,5 +1,7 @@ #pragma once #include +#include +#include namespace DB { @@ -48,4 +50,24 @@ DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types); DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types); +/// A map that enumerated all interval kinds in ascending order with a conversion value to a next interval +inline const std::unordered_map> & getGranularityMap() +{ + static std::unordered_map> granularity_map = + { + {IntervalKind::Kind::Nanosecond, {1, 1000}}, + {IntervalKind::Kind::Microsecond, {2, 1000}}, + {IntervalKind::Kind::Millisecond, {3, 1000}}, + {IntervalKind::Kind::Second, {4, 60}}, + {IntervalKind::Kind::Minute, {5, 60}}, + {IntervalKind::Kind::Hour, {6, 24}}, + {IntervalKind::Kind::Day, {7, 7}}, + {IntervalKind::Kind::Week, {8, 4}}, + {IntervalKind::Kind::Month, {9, 3}}, + {IntervalKind::Kind::Quarter, {10, 4}}, + {IntervalKind::Kind::Year, {11, 1}} + }; + return granularity_map; +} + } diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 675283d011e..0ab1858dc97 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -1573,6 +1574,55 @@ struct ConvertImpl arguments, result_type, input_rows_count, additions); } } + else if constexpr (std::is_same_v && std::is_same_v) + { + IntervalKind to = typeid_cast(result_type.get())->getKind(); + IntervalKind from = typeid_cast(arguments[0].type.get())->getKind(); + + if (from == to) + return arguments[0].column; + + const auto &map = getGranularityMap(); + Int64 conversion_factor = 1; + Int64 result_value; + + int from_position = map.at(from).first; + int to_position = map.at(to).first; // Positions of each interval according to granurality map + + if (from_position < to_position) + { + for (int i = from_position - 1; i <= to_position; ++i) + { + // Find the kind that matches this position + for (const auto &entry : map) + { + if (entry.second.first == i) + { + conversion_factor *= entry.second.second; + break; + } + } + } + result_value = arguments[0].column->getInt(0) / conversion_factor; + } + else + { + for (int i = from_position - 1; i >= to_position; --i) + { + for (const auto &entry : map) + { + if (entry.second.first == i) + { + conversion_factor *= entry.second.second; + break; + } + } + } + result_value = arguments[0].column->getInt(0) * conversion_factor; + } + + return ColumnConst::create(ColumnInt64::create(1, result_value), input_rows_count); + } else { using FromFieldType = typename FromDataType::FieldType; @@ -2181,7 +2231,7 @@ private: const DataTypePtr from_type = removeNullable(arguments[0].type); ColumnPtr result_column; - [[maybe_unused]] FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; + FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; if (context) date_time_overflow_behavior = context->getSettingsRef().date_time_overflow_behavior.value; @@ -2277,7 +2327,7 @@ private: } } else - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, from_string_tag); + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, from_string_tag); return true; }; @@ -2334,6 +2384,11 @@ private: else done = callOnIndexAndDataType(from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag); } + + if constexpr (std::is_same_v) + { + done = callOnIndexAndDataType(from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag); + } } if (!done) @@ -5224,7 +5279,7 @@ REGISTER_FUNCTION(Conversion) /// MySQL compatibility alias. Cannot be registered as alias, /// because we don't want it to be normalized to toDate in queries, /// otherwise CREATE DICTIONARY query breaks. - factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::Case::Insensitive); + factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::CaseInsensitive); factory.registerFunction(); factory.registerFunction(); diff --git a/tests/queries/0_stateless/03223_interval_data_type_comparison.reference b/tests/queries/0_stateless/03223_interval_data_type_comparison.reference new file mode 100644 index 00000000000..e98f792e4b2 --- /dev/null +++ b/tests/queries/0_stateless/03223_interval_data_type_comparison.reference @@ -0,0 +1,99 @@ +Comparing nanoseconds +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +Comparing microseconds +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +Comparing milliseconds +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +Comparing seconds +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +Comparing minutes +1 +1 +1 +1 +0 +0 +0 +0 +Comparing hours +1 +1 +1 +0 +0 +0 +Comparing days +1 +1 +0 +0 +Comparing weeks +1 +0 +Comparing months +1 +1 +1 +0 +0 +0 +Comparing quarters +1 +1 +0 +0 +Comparing years +1 +0 diff --git a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql new file mode 100644 index 00000000000..6e4862bf2d2 --- /dev/null +++ b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql @@ -0,0 +1,142 @@ +SELECT('Comparing nanoseconds'); +SELECT toIntervalNanosecond(500) > toIntervalNanosecond(300); +SELECT toIntervalNanosecond(1000) < toIntervalNanosecond(1500); +SELECT toIntervalNanosecond(2000) = toIntervalNanosecond(2000); +SELECT toIntervalNanosecond(1000) >= toIntervalMicrosecond(1); +SELECT toIntervalNanosecond(1000001) > toIntervalMillisecond(1); +SELECT toIntervalNanosecond(2000000001) > toIntervalSecond(2); +SELECT toIntervalNanosecond(60000000000) = toIntervalMinute(1); +SELECT toIntervalNanosecond(7199999999999) < toIntervalHour(2); +SELECT toIntervalNanosecond(1) < toIntervalDay(2); +SELECT toIntervalNanosecond(5) < toIntervalWeek(1); + +SELECT toIntervalNanosecond(500) < toIntervalNanosecond(300); +SELECT toIntervalNanosecond(1000) > toIntervalNanosecond(1500); +SELECT toIntervalNanosecond(2000) != toIntervalNanosecond(2000); +SELECT toIntervalNanosecond(1000) < toIntervalMicrosecond(1); +SELECT toIntervalNanosecond(1000001) < toIntervalMillisecond(1); +SELECT toIntervalNanosecond(2000000001) < toIntervalSecond(2); +SELECT toIntervalNanosecond(60000000000) != toIntervalMinute(1); +SELECT toIntervalNanosecond(7199999999999) > toIntervalHour(2); +SELECT toIntervalNanosecond(1) > toIntervalDay(2); +SELECT toIntervalNanosecond(5) > toIntervalWeek(1); + +SELECT toIntervalNanosecond(1) < toIntervalMonth(2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing microseconds'); +SELECT toIntervalMicrosecond(1) < toIntervalMicrosecond(999); +SELECT toIntervalMicrosecond(1001) > toIntervalMillisecond(1); +SELECT toIntervalMicrosecond(2000000) = toIntervalSecond(2); +SELECT toIntervalMicrosecond(179999999) < toIntervalMinute(3); +SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); +SELECT toIntervalMicrosecond(36000000000000) > toIntervalDay(2); +SELECT toIntervalMicrosecond(1209600000000) = toIntervalWeek(2); + +SELECT toIntervalMicrosecond(1) > toIntervalMicrosecond(999); +SELECT toIntervalMicrosecond(1001) < toIntervalMillisecond(1); +SELECT toIntervalMicrosecond(2000000) != toIntervalSecond(2); +SELECT toIntervalMicrosecond(179999999) > toIntervalMinute(3); +SELECT toIntervalMicrosecond(3600000000) != toIntervalHour(1); +SELECT toIntervalMicrosecond(36000000000000) < toIntervalDay(2); +SELECT toIntervalMicrosecond(1209600000000) != toIntervalWeek(2); + +SELECT toIntervalMicrosecond(36000000000000) < toIntervalQuarter(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing milliseconds'); +SELECT toIntervalMillisecond(2000) > toIntervalMillisecond(2); +SELECT toIntervalMillisecond(2000) = toIntervalSecond(2); +SELECT toIntervalMillisecond(170000) < toIntervalMinute(3); +SELECT toIntervalMillisecond(144000001) > toIntervalHour(40); +SELECT toIntervalMillisecond(1728000000) = toIntervalDay(20); +SELECT toIntervalMillisecond(1198599999) < toIntervalWeek(2); + +SELECT toIntervalMillisecond(2000) < toIntervalMillisecond(2); +SELECT toIntervalMillisecond(2000) != toIntervalSecond(2); +SELECT toIntervalMillisecond(170000) > toIntervalMinute(3); +SELECT toIntervalMillisecond(144000001) < toIntervalHour(40); +SELECT toIntervalMillisecond(1728000000) != toIntervalDay(20); +SELECT toIntervalMillisecond(1198599999) > toIntervalWeek(2); + +SELECT toIntervalMillisecond(36000000000000) < toIntervalYear(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing seconds'); +SELECT toIntervalSecond(120) > toIntervalSecond(2); +SELECT toIntervalSecond(120) = toIntervalMinute(2); +SELECT toIntervalSecond(1) < toIntervalHour(2); +SELECT toIntervalSecond(86401) >= toIntervalDay(1); +SELECT toIntervalSecond(1209600) = toIntervalWeek(2); + +SELECT toIntervalSecond(120) < toIntervalSecond(2); +SELECT toIntervalSecond(120) != toIntervalMinute(2); +SELECT toIntervalSecond(1) > toIntervalHour(2); +SELECT toIntervalSecond(86401) < toIntervalDay(1); +SELECT toIntervalSecond(1209600) != toIntervalWeek(2); + +SELECT toIntervalSecond(36000000000000) < toIntervalMonth(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing minutes'); +SELECT toIntervalMinute(1) < toIntervalMinute(59); +SELECT toIntervalMinute(1) < toIntervalHour(59); +SELECT toIntervalMinute(1440) = toIntervalDay(1); +SELECT toIntervalMinute(30241) > toIntervalWeek(3); + +SELECT toIntervalMinute(1) > toIntervalMinute(59); +SELECT toIntervalMinute(1) > toIntervalHour(59); +SELECT toIntervalMinute(1440) != toIntervalDay(1); +SELECT toIntervalMinute(30241) < toIntervalWeek(3); + +SELECT toIntervalMinute(2) = toIntervalQuarter(120); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing hours'); +SELECT toIntervalHour(48) > toIntervalHour(2); +SELECT toIntervalHour(48) >= toIntervalDay(2); +SELECT toIntervalHour(672) = toIntervalWeek(4); + +SELECT toIntervalHour(48) < toIntervalHour(2); +SELECT toIntervalHour(48) < toIntervalDay(2); +SELECT toIntervalHour(672) != toIntervalWeek(4); + +SELECT toIntervalHour(2) < toIntervalYear(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing days'); +SELECT toIntervalDay(1) < toIntervalDay(23); +SELECT toIntervalDay(25) > toIntervalWeek(3); + +SELECT toIntervalDay(1) > toIntervalDay(23); +SELECT toIntervalDay(25) < toIntervalWeek(3); + +SELECT toIntervalDay(2) = toIntervalMonth(48); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing weeks'); +SELECT toIntervalWeek(1) < toIntervalWeek(6); + +SELECT toIntervalWeek(1) > toIntervalWeek(6); + +SELECT toIntervalWeek(124) > toIntervalQuarter(8); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing months'); +SELECT toIntervalMonth(1) < toIntervalMonth(3); +SELECT toIntervalMonth(124) > toIntervalQuarter(5); +SELECT toIntervalMonth(36) = toIntervalYear(3); + +SELECT toIntervalMonth(1) > toIntervalMonth(3); +SELECT toIntervalMonth(124) < toIntervalQuarter(5); +SELECT toIntervalMonth(36) != toIntervalYear(3); + +SELECT toIntervalMonth(6) = toIntervalMicrosecond(26); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing quarters'); +SELECT toIntervalQuarter(5) > toIntervalQuarter(4); +SELECT toIntervalQuarter(20) = toIntervalYear(5); + +SELECT toIntervalQuarter(5) < toIntervalQuarter(4); +SELECT toIntervalQuarter(20) != toIntervalYear(5); + +SELECT toIntervalQuarter(2) = toIntervalNanosecond(6); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing years'); +SELECT toIntervalYear(1) < toIntervalYear(3); + +SELECT toIntervalYear(1) > toIntervalYear(3); + +SELECT toIntervalYear(2) = toIntervalSecond(8); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } From e9659626adc29d237d23e0f3ced9c8712d472a73 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 8 Aug 2024 20:41:15 +0200 Subject: [PATCH 113/363] fix style + add docs --- .../data-types/special-data-types/interval.md | 21 +++++++++---------- .../data-types/special-data-types/interval.md | 21 +++++++++---------- .../data-types/special-data-types/interval.md | 21 +++++++++---------- src/DataTypes/getLeastSupertype.cpp | 2 +- 4 files changed, 31 insertions(+), 34 deletions(-) diff --git a/docs/en/sql-reference/data-types/special-data-types/interval.md b/docs/en/sql-reference/data-types/special-data-types/interval.md index bedbcf0bd28..be26053580b 100644 --- a/docs/en/sql-reference/data-types/special-data-types/interval.md +++ b/docs/en/sql-reference/data-types/special-data-types/interval.md @@ -53,29 +53,28 @@ SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY └─────────────────────┴───────────────────────────────┘ ``` -Intervals with different types can’t be combined. You can’t use intervals like `4 DAY 1 HOUR`. Specify intervals in units that are smaller or equal to the smallest unit of the interval, for example, the interval `1 day and an hour` interval can be expressed as `25 HOUR` or `90000 SECOND`. - -You can’t perform arithmetical operations with `Interval`-type values, but you can add intervals of different types consequently to values in `Date` or `DateTime` data types. For example: +Also it is possible to use multiple intervals simultaneously: ``` sql -SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR +SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) ``` ``` text -┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐ -│ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │ -└─────────────────────┴────────────────────────────────────────────────────────┘ +┌───current_date_time─┬─plus(current_date_time, plus(toIntervalDay(4), toIntervalHour(3)))─┐ +│ 2024-08-08 18:31:39 │ 2024-08-12 21:31:39 │ +└─────────────────────┴────────────────────────────────────────────────────────────────────┘ ``` -The following query causes an exception: +And to compare values with different intevals: ``` sql -select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) +SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); ``` ``` text -Received exception from server (version 19.14.1): -Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime.. +┌─less(toIntervalMicrosecond(179999999), toIntervalMinute(3))─┐ +│ 1 │ +└─────────────────────────────────────────────────────────────┘ ``` ## See Also diff --git a/docs/ru/sql-reference/data-types/special-data-types/interval.md b/docs/ru/sql-reference/data-types/special-data-types/interval.md index 867a6665f4b..5064391f582 100644 --- a/docs/ru/sql-reference/data-types/special-data-types/interval.md +++ b/docs/ru/sql-reference/data-types/special-data-types/interval.md @@ -54,29 +54,28 @@ SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY └─────────────────────┴───────────────────────────────┘ ``` -Нельзя объединять интервалы различных типов. Нельзя использовать интервалы вида `4 DAY 1 HOUR`. Вместо этого выражайте интервал в единицах меньших или равных минимальной единице интервала, например, интервал «1 день и 1 час» можно выразить как `25 HOUR` или `90000 SECOND`. - -Арифметические операции со значениями типов `Interval` не доступны, однако можно последовательно добавлять различные интервалы к значениям типов `Date` и `DateTime`. Например: +Также можно использовать различные типы интервалов одновременно: ``` sql -SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR +SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) ``` ``` text -┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐ -│ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │ -└─────────────────────┴────────────────────────────────────────────────────────┘ +┌───current_date_time─┬─plus(current_date_time, plus(toIntervalDay(4), toIntervalHour(3)))─┐ +│ 2024-08-08 18:31:39 │ 2024-08-12 21:31:39 │ +└─────────────────────┴────────────────────────────────────────────────────────────────────┘ ``` -Следующий запрос приведёт к генерированию исключения: +И сравнивать значения из разными интервалами: ``` sql -select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) +SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); ``` ``` text -Received exception from server (version 19.14.1): -Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime.. +┌─less(toIntervalMicrosecond(179999999), toIntervalMinute(3))─┐ +│ 1 │ +└─────────────────────────────────────────────────────────────┘ ``` ## Смотрите также {#smotrite-takzhe} diff --git a/docs/zh/sql-reference/data-types/special-data-types/interval.md b/docs/zh/sql-reference/data-types/special-data-types/interval.md index e05869b2df8..e16f6d5f84f 100644 --- a/docs/zh/sql-reference/data-types/special-data-types/interval.md +++ b/docs/zh/sql-reference/data-types/special-data-types/interval.md @@ -55,29 +55,28 @@ SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY └─────────────────────┴───────────────────────────────┘ ``` -不同类型的间隔不能合并。 你不能使用诸如 `4 DAY 1 HOUR` 的时间间隔. 以小于或等于时间间隔最小单位的单位来指定间隔,例如,时间间隔 `1 day and an hour` 可以表示为 `25 HOUR` 或 `90000 SECOND`. - -你不能对 `Interval` 类型的值执行算术运算,但你可以向 `Date` 或 `DateTime` 数据类型的值添加不同类型的时间间隔,例如: +也可以同時使用多個間隔: ``` sql -SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR +SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) ``` ``` text -┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐ -│ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │ -└─────────────────────┴────────────────────────────────────────────────────────┘ +┌───current_date_time─┬─plus(current_date_time, plus(toIntervalDay(4), toIntervalHour(3)))─┐ +│ 2024-08-08 18:31:39 │ 2024-08-12 21:31:39 │ +└─────────────────────┴────────────────────────────────────────────────────────────────────┘ ``` -以下查询将导致异常: +並比較不同直數的值: ``` sql -select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) +SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); ``` ``` text -Received exception from server (version 19.14.1): -Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime.. +┌─less(toIntervalMicrosecond(179999999), toIntervalMinute(3))─┐ +│ 1 │ +└─────────────────────────────────────────────────────────────┘ ``` ## 另请参阅 {#see-also} diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 0b9c744c091..674284460dc 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -252,7 +252,7 @@ DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet & } if (is_higher_interval && min_granularity <= 8) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot compare intervals {} and {} because the amount of days in month is not determined", types[0]->getName(), types[1]->getName()); + throw Exception(ErrorCodes::NO_COMMON_TYPE, "Cannot compare intervals {} and {} because the amount of days in month is not determined", types[0]->getName(), types[1]->getName()); if (smallest_type) { From 91ff9f40a2cea46d2ddf14628b16a0ddf923a3cc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 8 Aug 2024 21:04:10 +0200 Subject: [PATCH 114/363] Misc --- tests/queries/0_stateless/01603_read_with_backoff_bug.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql index 278817b1d48..8a6fa9b7845 100644 --- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql +++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql @@ -4,6 +4,7 @@ set enable_filesystem_cache=0; set enable_filesystem_cache_on_write_operations=0; set max_rows_to_read = '30M'; + drop table if exists t; create table t (x UInt64, s String) engine = MergeTree order by x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; From 0ad6aa09acb72a67fc88e0cd8186afd32fefd6bf Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 8 Aug 2024 22:51:27 +0200 Subject: [PATCH 115/363] fix style --- docs/en/sql-reference/data-types/special-data-types/interval.md | 2 +- src/Functions/FunctionsConversion.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/data-types/special-data-types/interval.md b/docs/en/sql-reference/data-types/special-data-types/interval.md index be26053580b..4ef1a7e6238 100644 --- a/docs/en/sql-reference/data-types/special-data-types/interval.md +++ b/docs/en/sql-reference/data-types/special-data-types/interval.md @@ -65,7 +65,7 @@ SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVA └─────────────────────┴────────────────────────────────────────────────────────────────────┘ ``` -And to compare values with different intevals: +And to compare values with different intervals: ``` sql SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 0ab1858dc97..1708991af74 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1606,7 +1606,7 @@ struct ConvertImpl result_value = arguments[0].column->getInt(0) / conversion_factor; } else - { + { for (int i = from_position - 1; i >= to_position; --i) { for (const auto &entry : map) From 94efbb0bf9ab62a5399d4918e7bcfd358421a879 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 8 Aug 2024 23:26:24 +0200 Subject: [PATCH 116/363] fix build --- src/Functions/FunctionsConversion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 1708991af74..43ebe573582 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -5279,7 +5279,7 @@ REGISTER_FUNCTION(Conversion) /// MySQL compatibility alias. Cannot be registered as alias, /// because we don't want it to be normalized to toDate in queries, /// otherwise CREATE DICTIONARY query breaks. - factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::CaseInsensitive); + factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::Case::Insensitive); factory.registerFunction(); factory.registerFunction(); From b4c553718353eb2302f85ea4d096a92036ce832c Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 8 Aug 2024 23:49:56 +0200 Subject: [PATCH 117/363] fix errorcodes in test --- .../03223_interval_data_type_comparison.sql | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql index 6e4862bf2d2..5d01addae45 100644 --- a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql +++ b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql @@ -21,7 +21,7 @@ SELECT toIntervalNanosecond(7199999999999) > toIntervalHour(2); SELECT toIntervalNanosecond(1) > toIntervalDay(2); SELECT toIntervalNanosecond(5) > toIntervalWeek(1); -SELECT toIntervalNanosecond(1) < toIntervalMonth(2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalNanosecond(1) < toIntervalMonth(2); -- { serverError NO_COMMON_TYPE } SELECT('Comparing microseconds'); SELECT toIntervalMicrosecond(1) < toIntervalMicrosecond(999); @@ -40,7 +40,7 @@ SELECT toIntervalMicrosecond(3600000000) != toIntervalHour(1); SELECT toIntervalMicrosecond(36000000000000) < toIntervalDay(2); SELECT toIntervalMicrosecond(1209600000000) != toIntervalWeek(2); -SELECT toIntervalMicrosecond(36000000000000) < toIntervalQuarter(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalMicrosecond(36000000000000) < toIntervalQuarter(1); -- { serverError NO_COMMON_TYPE } SELECT('Comparing milliseconds'); SELECT toIntervalMillisecond(2000) > toIntervalMillisecond(2); @@ -57,7 +57,7 @@ SELECT toIntervalMillisecond(144000001) < toIntervalHour(40); SELECT toIntervalMillisecond(1728000000) != toIntervalDay(20); SELECT toIntervalMillisecond(1198599999) > toIntervalWeek(2); -SELECT toIntervalMillisecond(36000000000000) < toIntervalYear(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalMillisecond(36000000000000) < toIntervalYear(1); -- { serverError NO_COMMON_TYPE } SELECT('Comparing seconds'); SELECT toIntervalSecond(120) > toIntervalSecond(2); @@ -72,7 +72,7 @@ SELECT toIntervalSecond(1) > toIntervalHour(2); SELECT toIntervalSecond(86401) < toIntervalDay(1); SELECT toIntervalSecond(1209600) != toIntervalWeek(2); -SELECT toIntervalSecond(36000000000000) < toIntervalMonth(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalSecond(36000000000000) < toIntervalMonth(1); -- { serverError NO_COMMON_TYPE } SELECT('Comparing minutes'); SELECT toIntervalMinute(1) < toIntervalMinute(59); @@ -85,7 +85,7 @@ SELECT toIntervalMinute(1) > toIntervalHour(59); SELECT toIntervalMinute(1440) != toIntervalDay(1); SELECT toIntervalMinute(30241) < toIntervalWeek(3); -SELECT toIntervalMinute(2) = toIntervalQuarter(120); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalMinute(2) = toIntervalQuarter(120); -- { serverError NO_COMMON_TYPE } SELECT('Comparing hours'); SELECT toIntervalHour(48) > toIntervalHour(2); @@ -96,7 +96,7 @@ SELECT toIntervalHour(48) < toIntervalHour(2); SELECT toIntervalHour(48) < toIntervalDay(2); SELECT toIntervalHour(672) != toIntervalWeek(4); -SELECT toIntervalHour(2) < toIntervalYear(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalHour(2) < toIntervalYear(1); -- { serverError NO_COMMON_TYPE } SELECT('Comparing days'); SELECT toIntervalDay(1) < toIntervalDay(23); @@ -105,14 +105,14 @@ SELECT toIntervalDay(25) > toIntervalWeek(3); SELECT toIntervalDay(1) > toIntervalDay(23); SELECT toIntervalDay(25) < toIntervalWeek(3); -SELECT toIntervalDay(2) = toIntervalMonth(48); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalDay(2) = toIntervalMonth(48); -- { serverError NO_COMMON_TYPE } SELECT('Comparing weeks'); SELECT toIntervalWeek(1) < toIntervalWeek(6); SELECT toIntervalWeek(1) > toIntervalWeek(6); -SELECT toIntervalWeek(124) > toIntervalQuarter(8); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalWeek(124) > toIntervalQuarter(8); -- { serverError NO_COMMON_TYPE } SELECT('Comparing months'); SELECT toIntervalMonth(1) < toIntervalMonth(3); @@ -123,7 +123,7 @@ SELECT toIntervalMonth(1) > toIntervalMonth(3); SELECT toIntervalMonth(124) < toIntervalQuarter(5); SELECT toIntervalMonth(36) != toIntervalYear(3); -SELECT toIntervalMonth(6) = toIntervalMicrosecond(26); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalMonth(6) = toIntervalMicrosecond(26); -- { serverError NO_COMMON_TYPE } SELECT('Comparing quarters'); SELECT toIntervalQuarter(5) > toIntervalQuarter(4); @@ -132,11 +132,11 @@ SELECT toIntervalQuarter(20) = toIntervalYear(5); SELECT toIntervalQuarter(5) < toIntervalQuarter(4); SELECT toIntervalQuarter(20) != toIntervalYear(5); -SELECT toIntervalQuarter(2) = toIntervalNanosecond(6); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalQuarter(2) = toIntervalNanosecond(6); -- { serverError NO_COMMON_TYPE } SELECT('Comparing years'); SELECT toIntervalYear(1) < toIntervalYear(3); SELECT toIntervalYear(1) > toIntervalYear(3); -SELECT toIntervalYear(2) = toIntervalSecond(8); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalYear(2) = toIntervalSecond(8); -- { serverError NO_COMMON_TYPE } From 3357275fa8c55bcc5371b4ff9c9a5d80e51ab689 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Fri, 9 Aug 2024 18:33:45 +0800 Subject: [PATCH 118/363] Fix MSAN issue caused by incorrect date format. --- src/IO/ReadHelpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index c771fced73a..dd4aef23a25 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1432,7 +1432,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D s_pos[size] = 0; if constexpr (throw_exception) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", s); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", String(s, date_broken_down_length + 1 + size)); else return false; } From 35f19522e745ef2267b4c6f99dfc5d7c1f7e78c3 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 9 Aug 2024 12:56:14 +0200 Subject: [PATCH 119/363] fix fuzzer --- src/Functions/FunctionsConversion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 43ebe573582..c25bc44450f 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1579,7 +1579,7 @@ struct ConvertImpl IntervalKind to = typeid_cast(result_type.get())->getKind(); IntervalKind from = typeid_cast(arguments[0].type.get())->getKind(); - if (from == to) + if (from == to || arguments[0].column->empty()) return arguments[0].column; const auto &map = getGranularityMap(); From ca4041847e4aa8acccd6ea31c0a18f2160c0dc7a Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Fri, 9 Aug 2024 19:15:41 +0800 Subject: [PATCH 120/363] Add tests --- src/IO/ReadHelpers.cpp | 4 ++-- ...215_fix_datetime_implicit_conversion.reference | 1 + .../03215_fix_datetime_implicit_conversion.sql | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference create mode 100644 tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index dd4aef23a25..e69b4187b37 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1402,7 +1402,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D s_pos[size] = 0; if constexpr (throw_exception) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", s); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", String(s, already_read_length)); else return false; } @@ -1432,7 +1432,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D s_pos[size] = 0; if constexpr (throw_exception) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", String(s, date_broken_down_length + 1 + size)); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", String(s, size)); else return false; } diff --git a/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql new file mode 100644 index 00000000000..70a8a3432a6 --- /dev/null +++ b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS tab SYNC; + +CREATE TABLE tab +( + a DateTime, + pk String +) Engine = MergeTree() ORDER BY pk; + +INSERT INTO tab select cast(number, 'DateTime'), generateUUIDv4() FROM system.numbers LIMIT 1; + +SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:09'; +SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:0'; -- { serverError CANNOT_PARSE_DATETIME } +SELECT count(*) FROM tab WHERE a = '2024-08-0 09:58:09'; -- { serverError TYPE_MISMATCH } + +DROP TABLE IF EXISTS tab SYNC; From 6ded5e1c8b994ad2332468e605b17a74e8d5675f Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Fri, 9 Aug 2024 23:50:03 +0800 Subject: [PATCH 121/363] Some fixups --- src/IO/ReadHelpers.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index e69b4187b37..b484f80250d 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1399,10 +1399,8 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D size_t size = buf.read(s_pos, remaining_date_size); if (size != remaining_date_size) { - s_pos[size] = 0; - if constexpr (throw_exception) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", String(s, already_read_length)); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", std::string_view(s, already_read_length + size)); else return false; } @@ -1429,10 +1427,8 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D if (size != time_broken_down_length) { - s_pos[size] = 0; - if constexpr (throw_exception) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", String(s, size)); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", std::string_view(s, size)); else return false; } From a3d8db6e1eb27d6a8fa81bbf43c8ffb171714c0b Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 9 Aug 2024 19:05:37 +0200 Subject: [PATCH 122/363] updates due to review --- .../data-types/special-data-types/interval.md | 21 +++++++------- src/DataTypes/getLeastSupertype.cpp | 13 ++++----- src/DataTypes/getLeastSupertype.h | 21 ++------------ src/Functions/FunctionsConversion.cpp | 28 +++---------------- 4 files changed, 24 insertions(+), 59 deletions(-) diff --git a/docs/zh/sql-reference/data-types/special-data-types/interval.md b/docs/zh/sql-reference/data-types/special-data-types/interval.md index e16f6d5f84f..e05869b2df8 100644 --- a/docs/zh/sql-reference/data-types/special-data-types/interval.md +++ b/docs/zh/sql-reference/data-types/special-data-types/interval.md @@ -55,28 +55,29 @@ SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY └─────────────────────┴───────────────────────────────┘ ``` -也可以同時使用多個間隔: +不同类型的间隔不能合并。 你不能使用诸如 `4 DAY 1 HOUR` 的时间间隔. 以小于或等于时间间隔最小单位的单位来指定间隔,例如,时间间隔 `1 day and an hour` 可以表示为 `25 HOUR` 或 `90000 SECOND`. + +你不能对 `Interval` 类型的值执行算术运算,但你可以向 `Date` 或 `DateTime` 数据类型的值添加不同类型的时间间隔,例如: ``` sql -SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) +SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR ``` ``` text -┌───current_date_time─┬─plus(current_date_time, plus(toIntervalDay(4), toIntervalHour(3)))─┐ -│ 2024-08-08 18:31:39 │ 2024-08-12 21:31:39 │ -└─────────────────────┴────────────────────────────────────────────────────────────────────┘ +┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐ +│ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │ +└─────────────────────┴────────────────────────────────────────────────────────┘ ``` -並比較不同直數的值: +以下查询将导致异常: ``` sql -SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); +select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) ``` ``` text -┌─less(toIntervalMicrosecond(179999999), toIntervalMinute(3))─┐ -│ 1 │ -└─────────────────────────────────────────────────────────────┘ +Received exception from server (version 19.14.1): +Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime.. ``` ## 另请参阅 {#see-also} diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 674284460dc..8bcec49815f 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -230,8 +230,7 @@ void convertUInt64toInt64IfPossible(const DataTypes & types, TypeIndexSet & type DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet &types_set) { - const auto& granularity_map = getGranularityMap(); - int min_granularity = std::get<0>(granularity_map.at(IntervalKind::Kind::Year)); + auto min_interval = IntervalKind::Kind::Year; DataTypePtr smallest_type; bool is_higher_interval = false; // For Years, Quarters and Months @@ -240,18 +239,18 @@ DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet & { if (const auto * interval_type = typeid_cast(type.get())) { - int current_granularity = std::get<0>(granularity_map.at(interval_type->getKind())); - if (current_granularity > 8) + auto current_interval = interval_type->getKind().kind; + if (current_interval > IntervalKind::Kind::Week) is_higher_interval = true; - if (current_granularity < min_granularity) + if (current_interval < min_interval) { - min_granularity = current_granularity; + min_interval = current_interval; smallest_type = type; } } } - if (is_higher_interval && min_granularity <= 8) + if (is_higher_interval && min_interval <= IntervalKind::Kind::Week) throw Exception(ErrorCodes::NO_COMMON_TYPE, "Cannot compare intervals {} and {} because the amount of days in month is not determined", types[0]->getName(), types[1]->getName()); if (smallest_type) diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h index c584eb83011..5ea2b6417b2 100644 --- a/src/DataTypes/getLeastSupertype.h +++ b/src/DataTypes/getLeastSupertype.h @@ -50,24 +50,9 @@ DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types); DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types); -/// A map that enumerated all interval kinds in ascending order with a conversion value to a next interval -inline const std::unordered_map> & getGranularityMap() -{ - static std::unordered_map> granularity_map = - { - {IntervalKind::Kind::Nanosecond, {1, 1000}}, - {IntervalKind::Kind::Microsecond, {2, 1000}}, - {IntervalKind::Kind::Millisecond, {3, 1000}}, - {IntervalKind::Kind::Second, {4, 60}}, - {IntervalKind::Kind::Minute, {5, 60}}, - {IntervalKind::Kind::Hour, {6, 24}}, - {IntervalKind::Kind::Day, {7, 7}}, - {IntervalKind::Kind::Week, {8, 4}}, - {IntervalKind::Kind::Month, {9, 3}}, - {IntervalKind::Kind::Quarter, {10, 4}}, - {IntervalKind::Kind::Year, {11, 1}} - }; - return granularity_map; +/// A vector that shows the conversion rates to the next Interval type starting from NanoSecond +static std::vector interval_conversions = {1000, 1000, 1000, 60, 60, 24, 7, 4, 3, 4, 1}; + } } diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index c25bc44450f..25c6bbcbfef 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1582,42 +1582,22 @@ struct ConvertImpl if (from == to || arguments[0].column->empty()) return arguments[0].column; - const auto &map = getGranularityMap(); Int64 conversion_factor = 1; Int64 result_value; - int from_position = map.at(from).first; - int to_position = map.at(to).first; // Positions of each interval according to granurality map + int from_position = static_cast(from.kind); + int to_position = static_cast(to.kind); // Positions of each interval according to granurality map if (from_position < to_position) { for (int i = from_position - 1; i <= to_position; ++i) - { - // Find the kind that matches this position - for (const auto &entry : map) - { - if (entry.second.first == i) - { - conversion_factor *= entry.second.second; - break; - } - } - } + conversion_factor *= interval_conversions[i]; result_value = arguments[0].column->getInt(0) / conversion_factor; } else { for (int i = from_position - 1; i >= to_position; --i) - { - for (const auto &entry : map) - { - if (entry.second.first == i) - { - conversion_factor *= entry.second.second; - break; - } - } - } + conversion_factor *= interval_conversions[i]; result_value = arguments[0].column->getInt(0) * conversion_factor; } From a25accdae3cf420beaeb8ca25a9ac32070c397b8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 9 Aug 2024 20:20:09 +0200 Subject: [PATCH 123/363] Fix a test --- programs/client/Client.cpp | 3 +++ src/Client/ClientBase.cpp | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 1d99d223ee9..631914dee5c 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1164,6 +1164,9 @@ void Client::processOptions(const OptionsDescription & options_description, /// (There is no need to copy the context because clickhouse-client has no background tasks so it won't use that context in parallel.) client_context = global_context; initClientContext(); + + /// Allow to pass-through unknown settings to the server. + client_context->getAccessControl().allowAllSettings(); } diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index a00a9499237..473db8e9678 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -689,9 +689,6 @@ void ClientBase::initClientContext() client_context->setQueryKindInitial(); client_context->setQueryKind(query_kind); client_context->setQueryParameters(query_parameters); - - /// Allow to pass-through unknown settings to the server. - client_context->getAccessControl().allowAllSettings(); } bool ClientBase::isRegularFile(int fd) From 384aedccaeece56456ad1e5ea17a8da4f56a69a4 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Sat, 10 Aug 2024 00:09:50 +0200 Subject: [PATCH 124/363] Update getLeastSupertype.h --- src/DataTypes/getLeastSupertype.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h index 5ea2b6417b2..8dd1685e6e9 100644 --- a/src/DataTypes/getLeastSupertype.h +++ b/src/DataTypes/getLeastSupertype.h @@ -54,5 +54,3 @@ DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types); static std::vector interval_conversions = {1000, 1000, 1000, 60, 60, 24, 7, 4, 3, 4, 1}; } - -} From c716315c3fdfd57719daf0f7f42b786afe6e68af Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 10 Aug 2024 00:20:46 +0200 Subject: [PATCH 125/363] Annotations --- .../0_stateless/00375_shard_group_uniq_array_of_string.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql index 8db91904a6a..c8a243d9b27 100644 --- a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql +++ b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql @@ -1,4 +1,4 @@ --- Tags: shard +-- Tags: shard, long DROP TABLE IF EXISTS group_uniq_str; CREATE TABLE group_uniq_str ENGINE = Memory AS SELECT number % 10 as id, toString(intDiv((number%10000), 10)) as v FROM system.numbers LIMIT 10000000; From 375de7ff6523a23fb7b898725a53004f24d047cd Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 11 Aug 2024 07:15:10 +0200 Subject: [PATCH 126/363] ci: add more logs in the functional tests reports Due to settings randomization 4096 is not enough even to show all settings, like here [1]. [1]: https://s3.amazonaws.com/clickhouse-test-reports/68139/c852bd9dbaa317423234d4f15f21d64e59be42b5/stateless_tests_flaky_check__asan_.html Signed-off-by: Azat Khuzhin --- docker/test/util/process_functional_tests_result.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index aa2ea686c46..ec9e14b1430 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -116,7 +116,7 @@ def process_test_log(log_path, broken_tests): test[0], test[1], test[2], - "".join(test[3])[:4096].replace("\t", "\\t").replace("\n", "\\n"), + "".join(test[3])[:8192].replace("\t", "\\t").replace("\n", "\\n"), ] for test in test_results ] From ece707c4436ab65fcb142f0eaae72f7eb2c3d8db Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 10 Aug 2024 19:43:29 +0200 Subject: [PATCH 127/363] Better test for Not-ready Set is passed in system.* tables - system.distribution_queue - system.replication_queue - system.rocksdb - system.databases - system.mutations - test for system.part_moves_between_shards will not be provided since it is a likely deprecated feature and the test requires some code (I've fixed it differently from #66018, but it does not make sense anymore, so I'm submitting only the test) --- ...3223_system_tables_set_not_ready.reference | 5 ++++ .../03223_system_tables_set_not_ready.sql | 30 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 tests/queries/0_stateless/03223_system_tables_set_not_ready.reference create mode 100644 tests/queries/0_stateless/03223_system_tables_set_not_ready.sql diff --git a/tests/queries/0_stateless/03223_system_tables_set_not_ready.reference b/tests/queries/0_stateless/03223_system_tables_set_not_ready.reference new file mode 100644 index 00000000000..e39523ed4f5 --- /dev/null +++ b/tests/queries/0_stateless/03223_system_tables_set_not_ready.reference @@ -0,0 +1,5 @@ +system.distribution_queue 1 +system.rocksdb 1 +system.databases 1 +system.mutations 1 +system.replication_queue 1 diff --git a/tests/queries/0_stateless/03223_system_tables_set_not_ready.sql b/tests/queries/0_stateless/03223_system_tables_set_not_ready.sql new file mode 100644 index 00000000000..907fa47143c --- /dev/null +++ b/tests/queries/0_stateless/03223_system_tables_set_not_ready.sql @@ -0,0 +1,30 @@ +-- Tags: no-fasttest +-- Tag no-fasttest -- due to EmbeddedRocksDB + +drop table if exists null; +drop table if exists dist; +create table null as system.one engine=Null; +create table dist as null engine=Distributed(test_cluster_two_shards, currentDatabase(), 'null', rand()); +insert into dist settings prefer_localhost_replica=0 values (1); +select 'system.distribution_queue', count() from system.distribution_queue where exists(select 1) and database = currentDatabase(); + +drop table if exists rocksdb; +create table rocksdb (key Int) engine=EmbeddedRocksDB() primary key key; +insert into rocksdb values (1); +select 'system.rocksdb', count()>0 from system.rocksdb where exists(select 1) and database = currentDatabase(); + +select 'system.databases', count() from system.databases where exists(select 1) and database = currentDatabase(); + +drop table if exists mt; +create table mt (key Int) engine=MergeTree() order by key; +alter table mt delete where 1; +select 'system.mutations', count() from system.mutations where exists(select 1) and database = currentDatabase(); + +drop table if exists rep1; +drop table if exists rep2; +create table rep1 (key Int) engine=ReplicatedMergeTree('/{database}/rep', '{table}') order by key; +create table rep2 (key Int) engine=ReplicatedMergeTree('/{database}/rep', '{table}') order by key; +system stop fetches rep2; +insert into rep1 values (1); +system sync replica rep2 pull; +select 'system.replication_queue', count()>0 from system.replication_queue where exists(select 1) and database = currentDatabase(); From a41c1305887d08f43c02e354bb307f69a16b3fb0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 12 Aug 2024 06:05:44 +0200 Subject: [PATCH 128/363] Update 02675_profile_events_from_query_log_and_client.sh --- .../02675_profile_events_from_query_log_and_client.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh index 894b2b61563..ff534a6a2e6 100755 --- a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh +++ b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-random-merge-tree-settings # Tag no-fasttest: needs s3 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) From 1cc845726842f388c4524d55b248f210e28d979d Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 12 Aug 2024 16:57:47 +0200 Subject: [PATCH 129/363] fix reviews, fix crash in fuzzer --- src/DataTypes/getLeastSupertype.cpp | 2 +- src/DataTypes/getLeastSupertype.h | 2 +- src/Functions/FunctionsConversion.cpp | 11 +- .../03223_interval_data_type_comparison.sql | 198 +++++++++--------- 4 files changed, 106 insertions(+), 107 deletions(-) diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 8bcec49815f..65df529e78b 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -251,7 +251,7 @@ DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet & } if (is_higher_interval && min_interval <= IntervalKind::Kind::Week) - throw Exception(ErrorCodes::NO_COMMON_TYPE, "Cannot compare intervals {} and {} because the amount of days in month is not determined", types[0]->getName(), types[1]->getName()); + throw Exception(ErrorCodes::NO_COMMON_TYPE, "Cannot compare intervals {} and {} because the number of days in a month is not fixed", types[0]->getName(), types[1]->getName()); if (smallest_type) { diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h index 8dd1685e6e9..55d8e8fff0d 100644 --- a/src/DataTypes/getLeastSupertype.h +++ b/src/DataTypes/getLeastSupertype.h @@ -51,6 +51,6 @@ DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types); DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types); /// A vector that shows the conversion rates to the next Interval type starting from NanoSecond -static std::vector interval_conversions = {1000, 1000, 1000, 60, 60, 24, 7, 4, 3, 4, 1}; +static std::vector interval_conversions = {1, 1000, 1000, 1000, 60, 60, 24, 7, 4, 3, 4}; } diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 25c6bbcbfef..b6102cb7ecf 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1586,17 +1586,17 @@ struct ConvertImpl Int64 result_value; int from_position = static_cast(from.kind); - int to_position = static_cast(to.kind); // Positions of each interval according to granurality map + int to_position = static_cast(to.kind); /// Positions of each interval according to granularity map if (from_position < to_position) { - for (int i = from_position - 1; i <= to_position; ++i) + for (int i = from_position; i < to_position; ++i) conversion_factor *= interval_conversions[i]; result_value = arguments[0].column->getInt(0) / conversion_factor; } else { - for (int i = from_position - 1; i >= to_position; --i) + for (int i = from_position; i > to_position; --i) conversion_factor *= interval_conversions[i]; result_value = arguments[0].column->getInt(0) * conversion_factor; } @@ -2366,9 +2366,8 @@ private: } if constexpr (std::is_same_v) - { - done = callOnIndexAndDataType(from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag); - } + if (WhichDataType(from_type).isInterval()) + done = callOnIndexAndDataType(from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag); } if (!done) diff --git a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql index 5d01addae45..77b6e2fa3dc 100644 --- a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql +++ b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql @@ -1,142 +1,142 @@ SELECT('Comparing nanoseconds'); -SELECT toIntervalNanosecond(500) > toIntervalNanosecond(300); -SELECT toIntervalNanosecond(1000) < toIntervalNanosecond(1500); -SELECT toIntervalNanosecond(2000) = toIntervalNanosecond(2000); -SELECT toIntervalNanosecond(1000) >= toIntervalMicrosecond(1); -SELECT toIntervalNanosecond(1000001) > toIntervalMillisecond(1); -SELECT toIntervalNanosecond(2000000001) > toIntervalSecond(2); -SELECT toIntervalNanosecond(60000000000) = toIntervalMinute(1); -SELECT toIntervalNanosecond(7199999999999) < toIntervalHour(2); -SELECT toIntervalNanosecond(1) < toIntervalDay(2); -SELECT toIntervalNanosecond(5) < toIntervalWeek(1); +SELECT INTERVAL 500 NANOSECOND > INTERVAL 300 NANOSECOND; +SELECT INTERVAL 1000 NANOSECOND < INTERVAL 1500 NANOSECOND; +SELECT INTERVAL 2000 NANOSECOND = INTERVAL 2000 NANOSECOND; +SELECT INTERVAL 1000 NANOSECOND >= INTERVAL 1 MICROSECOND; +SELECT INTERVAL 1000001 NANOSECOND > INTERVAL 1 MILLISECOND; +SELECT INTERVAL 2000000001 NANOSECOND > INTERVAL 2 SECOND; +SELECT INTERVAL 60000000000 NANOSECOND = INTERVAL 1 MINUTE; +SELECT INTERVAL 7199999999999 NANOSECOND < INTERVAL 2 HOUR; +SELECT INTERVAL 1 NANOSECOND < INTERVAL 2 DAY; +SELECT INTERVAL 5 NANOSECOND < INTERVAL 1 WEEK; -SELECT toIntervalNanosecond(500) < toIntervalNanosecond(300); -SELECT toIntervalNanosecond(1000) > toIntervalNanosecond(1500); -SELECT toIntervalNanosecond(2000) != toIntervalNanosecond(2000); -SELECT toIntervalNanosecond(1000) < toIntervalMicrosecond(1); -SELECT toIntervalNanosecond(1000001) < toIntervalMillisecond(1); -SELECT toIntervalNanosecond(2000000001) < toIntervalSecond(2); -SELECT toIntervalNanosecond(60000000000) != toIntervalMinute(1); -SELECT toIntervalNanosecond(7199999999999) > toIntervalHour(2); -SELECT toIntervalNanosecond(1) > toIntervalDay(2); -SELECT toIntervalNanosecond(5) > toIntervalWeek(1); +SELECT INTERVAL 500 NANOSECOND < INTERVAL 300 NANOSECOND; +SELECT INTERVAL 1000 NANOSECOND > INTERVAL 1500 NANOSECOND; +SELECT INTERVAL 2000 NANOSECOND != INTERVAL 2000 NANOSECOND; +SELECT INTERVAL 1000 NANOSECOND < INTERVAL 1 MICROSECOND; +SELECT INTERVAL 1000001 NANOSECOND < INTERVAL 1 MILLISECOND; +SELECT INTERVAL 2000000001 NANOSECOND < INTERVAL 2 SECOND; +SELECT INTERVAL 60000000000 NANOSECOND != INTERVAL 1 MINUTE; +SELECT INTERVAL 7199999999999 NANOSECOND > INTERVAL 2 HOUR; +SELECT INTERVAL 1 NANOSECOND > INTERVAL 2 DAY; +SELECT INTERVAL 5 NANOSECOND > INTERVAL 1 WEEK; -SELECT toIntervalNanosecond(1) < toIntervalMonth(2); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 1 NANOSECOND < INTERVAL 2 MONTH; -- { serverError NO_COMMON_TYPE } SELECT('Comparing microseconds'); -SELECT toIntervalMicrosecond(1) < toIntervalMicrosecond(999); -SELECT toIntervalMicrosecond(1001) > toIntervalMillisecond(1); -SELECT toIntervalMicrosecond(2000000) = toIntervalSecond(2); -SELECT toIntervalMicrosecond(179999999) < toIntervalMinute(3); -SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); -SELECT toIntervalMicrosecond(36000000000000) > toIntervalDay(2); -SELECT toIntervalMicrosecond(1209600000000) = toIntervalWeek(2); +SELECT INTERVAL 1 MICROSECOND < INTERVAL 999 MICROSECOND; +SELECT INTERVAL 1001 MICROSECOND > INTERVAL 1 MILLISECOND; +SELECT INTERVAL 2000000 MICROSECOND = INTERVAL 2 SECOND; +SELECT INTERVAL 179999999 MICROSECOND < INTERVAL 3 MINUTE; +SELECT INTERVAL 3600000000 MICROSECOND = INTERVAL 1 HOUR; +SELECT INTERVAL 36000000000000 MICROSECOND > INTERVAL 2 DAY; +SELECT INTERVAL 1209600000000 MICROSECOND = INTERVAL 2 WEEK; -SELECT toIntervalMicrosecond(1) > toIntervalMicrosecond(999); -SELECT toIntervalMicrosecond(1001) < toIntervalMillisecond(1); -SELECT toIntervalMicrosecond(2000000) != toIntervalSecond(2); -SELECT toIntervalMicrosecond(179999999) > toIntervalMinute(3); -SELECT toIntervalMicrosecond(3600000000) != toIntervalHour(1); -SELECT toIntervalMicrosecond(36000000000000) < toIntervalDay(2); -SELECT toIntervalMicrosecond(1209600000000) != toIntervalWeek(2); +SELECT INTERVAL 1 MICROSECOND > INTERVAL 999 MICROSECOND; +SELECT INTERVAL 1001 MICROSECOND < INTERVAL 1 MILLISECOND; +SELECT INTERVAL 2000000 MICROSECOND != INTERVAL 2 SECOND; +SELECT INTERVAL 179999999 MICROSECOND > INTERVAL 3 MINUTE; +SELECT INTERVAL 3600000000 MICROSECOND != INTERVAL 1 HOUR; +SELECT INTERVAL 36000000000000 MICROSECOND < INTERVAL 2 DAY; +SELECT INTERVAL 1209600000000 MICROSECOND != INTERVAL 2 WEEK; -SELECT toIntervalMicrosecond(36000000000000) < toIntervalQuarter(1); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 36000000000000 MICROSECOND < INTERVAL 1 QUARTER; -- { serverError NO_COMMON_TYPE } SELECT('Comparing milliseconds'); -SELECT toIntervalMillisecond(2000) > toIntervalMillisecond(2); -SELECT toIntervalMillisecond(2000) = toIntervalSecond(2); -SELECT toIntervalMillisecond(170000) < toIntervalMinute(3); -SELECT toIntervalMillisecond(144000001) > toIntervalHour(40); -SELECT toIntervalMillisecond(1728000000) = toIntervalDay(20); -SELECT toIntervalMillisecond(1198599999) < toIntervalWeek(2); +SELECT INTERVAL 2000 MILLISECOND > INTERVAL 2 MILLISECOND; +SELECT INTERVAL 2000 MILLISECOND = INTERVAL 2 SECOND; +SELECT INTERVAL 170000 MILLISECOND < INTERVAL 3 MINUTE; +SELECT INTERVAL 144000001 MILLISECOND > INTERVAL 40 HOUR; +SELECT INTERVAL 1728000000 MILLISECOND = INTERVAL 20 DAY; +SELECT INTERVAL 1198599999 MILLISECOND < INTERVAL 2 WEEK; -SELECT toIntervalMillisecond(2000) < toIntervalMillisecond(2); -SELECT toIntervalMillisecond(2000) != toIntervalSecond(2); -SELECT toIntervalMillisecond(170000) > toIntervalMinute(3); -SELECT toIntervalMillisecond(144000001) < toIntervalHour(40); -SELECT toIntervalMillisecond(1728000000) != toIntervalDay(20); -SELECT toIntervalMillisecond(1198599999) > toIntervalWeek(2); +SELECT INTERVAL 2000 MILLISECOND < INTERVAL 2 MILLISECOND; +SELECT INTERVAL 2000 MILLISECOND != INTERVAL 2 SECOND; +SELECT INTERVAL 170000 MILLISECOND > INTERVAL 3 MINUTE; +SELECT INTERVAL 144000001 MILLISECOND < INTERVAL 40 HOUR; +SELECT INTERVAL 1728000000 MILLISECOND != INTERVAL 20 DAY; +SELECT INTERVAL 1198599999 MILLISECOND > INTERVAL 2 WEEK; -SELECT toIntervalMillisecond(36000000000000) < toIntervalYear(1); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 36000000000000 MILLISECOND < INTERVAL 1 YEAR; -- { serverError NO_COMMON_TYPE } SELECT('Comparing seconds'); -SELECT toIntervalSecond(120) > toIntervalSecond(2); -SELECT toIntervalSecond(120) = toIntervalMinute(2); -SELECT toIntervalSecond(1) < toIntervalHour(2); -SELECT toIntervalSecond(86401) >= toIntervalDay(1); -SELECT toIntervalSecond(1209600) = toIntervalWeek(2); +SELECT INTERVAL 120 SECOND > INTERVAL 2 SECOND; +SELECT INTERVAL 120 SECOND = INTERVAL 2 MINUTE; +SELECT INTERVAL 1 SECOND < INTERVAL 2 HOUR; +SELECT INTERVAL 86401 SECOND >= INTERVAL 1 DAY; +SELECT INTERVAL 1209600 SECOND = INTERVAL 2 WEEK; -SELECT toIntervalSecond(120) < toIntervalSecond(2); -SELECT toIntervalSecond(120) != toIntervalMinute(2); -SELECT toIntervalSecond(1) > toIntervalHour(2); -SELECT toIntervalSecond(86401) < toIntervalDay(1); -SELECT toIntervalSecond(1209600) != toIntervalWeek(2); +SELECT INTERVAL 120 SECOND < INTERVAL 2 SECOND; +SELECT INTERVAL 120 SECOND != INTERVAL 2 MINUTE; +SELECT INTERVAL 1 SECOND > INTERVAL 2 HOUR; +SELECT INTERVAL 86401 SECOND < INTERVAL 1 DAY; +SELECT INTERVAL 1209600 SECOND != INTERVAL 2 WEEK; -SELECT toIntervalSecond(36000000000000) < toIntervalMonth(1); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 36000000000000 SECOND < INTERVAL 1 MONTH; -- { serverError NO_COMMON_TYPE } SELECT('Comparing minutes'); -SELECT toIntervalMinute(1) < toIntervalMinute(59); -SELECT toIntervalMinute(1) < toIntervalHour(59); -SELECT toIntervalMinute(1440) = toIntervalDay(1); -SELECT toIntervalMinute(30241) > toIntervalWeek(3); +SELECT INTERVAL 1 MINUTE < INTERVAL 59 MINUTE; +SELECT INTERVAL 1 MINUTE < INTERVAL 59 HOUR; +SELECT INTERVAL 1440 MINUTE = INTERVAL 1 DAY; +SELECT INTERVAL 30241 MINUTE > INTERVAL 3 WEEK; -SELECT toIntervalMinute(1) > toIntervalMinute(59); -SELECT toIntervalMinute(1) > toIntervalHour(59); -SELECT toIntervalMinute(1440) != toIntervalDay(1); -SELECT toIntervalMinute(30241) < toIntervalWeek(3); +SELECT INTERVAL 1 MINUTE > INTERVAL 59 MINUTE; +SELECT INTERVAL 1 MINUTE > INTERVAL 59 HOUR; +SELECT INTERVAL 1440 MINUTE != INTERVAL 1 DAY; +SELECT INTERVAL 30241 MINUTE < INTERVAL 3 WEEK; -SELECT toIntervalMinute(2) = toIntervalQuarter(120); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 2 MINUTE = INTERVAL 120 QUARTER; -- { serverError NO_COMMON_TYPE } SELECT('Comparing hours'); -SELECT toIntervalHour(48) > toIntervalHour(2); -SELECT toIntervalHour(48) >= toIntervalDay(2); -SELECT toIntervalHour(672) = toIntervalWeek(4); +SELECT INTERVAL 48 HOUR > INTERVAL 2 HOUR; +SELECT INTERVAL 48 HOUR >= INTERVAL 2 DAY; +SELECT INTERVAL 672 HOUR = INTERVAL 4 WEEK; -SELECT toIntervalHour(48) < toIntervalHour(2); -SELECT toIntervalHour(48) < toIntervalDay(2); -SELECT toIntervalHour(672) != toIntervalWeek(4); +SELECT INTERVAL 48 HOUR < INTERVAL 2 HOUR; +SELECT INTERVAL 48 HOUR < INTERVAL 2 DAY; +SELECT INTERVAL 672 HOUR != INTERVAL 4 WEEK; -SELECT toIntervalHour(2) < toIntervalYear(1); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 2 HOUR < INTERVAL 1 YEAR; -- { serverError NO_COMMON_TYPE } SELECT('Comparing days'); -SELECT toIntervalDay(1) < toIntervalDay(23); -SELECT toIntervalDay(25) > toIntervalWeek(3); +SELECT INTERVAL 1 DAY < INTERVAL 23 DAY; +SELECT INTERVAL 25 DAY > INTERVAL 3 WEEK; -SELECT toIntervalDay(1) > toIntervalDay(23); -SELECT toIntervalDay(25) < toIntervalWeek(3); +SELECT INTERVAL 1 DAY > INTERVAL 23 DAY; +SELECT INTERVAL 25 DAY < INTERVAL 3 WEEK; -SELECT toIntervalDay(2) = toIntervalMonth(48); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 2 DAY = INTERVAL 48 MONTH; -- { serverError NO_COMMON_TYPE } SELECT('Comparing weeks'); -SELECT toIntervalWeek(1) < toIntervalWeek(6); +SELECT INTERVAL 1 WEEK < INTERVAL 6 WEEK; -SELECT toIntervalWeek(1) > toIntervalWeek(6); +SELECT INTERVAL 1 WEEK > INTERVAL 6 WEEK; -SELECT toIntervalWeek(124) > toIntervalQuarter(8); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 124 WEEK > INTERVAL 8 QUARTER; -- { serverError NO_COMMON_TYPE } SELECT('Comparing months'); -SELECT toIntervalMonth(1) < toIntervalMonth(3); -SELECT toIntervalMonth(124) > toIntervalQuarter(5); -SELECT toIntervalMonth(36) = toIntervalYear(3); +SELECT INTERVAL 1 MONTH < INTERVAL 3 MONTH; +SELECT INTERVAL 124 MONTH > INTERVAL 5 QUARTER; +SELECT INTERVAL 36 MONTH = INTERVAL 3 YEAR; -SELECT toIntervalMonth(1) > toIntervalMonth(3); -SELECT toIntervalMonth(124) < toIntervalQuarter(5); -SELECT toIntervalMonth(36) != toIntervalYear(3); +SELECT INTERVAL 1 MONTH > INTERVAL 3 MONTH; +SELECT INTERVAL 124 MONTH < INTERVAL 5 QUARTER; +SELECT INTERVAL 36 MONTH != INTERVAL 3 YEAR; -SELECT toIntervalMonth(6) = toIntervalMicrosecond(26); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 6 MONTH = INTERVAL 26 MICROSECOND; -- { serverError NO_COMMON_TYPE } SELECT('Comparing quarters'); -SELECT toIntervalQuarter(5) > toIntervalQuarter(4); -SELECT toIntervalQuarter(20) = toIntervalYear(5); +SELECT INTERVAL 5 QUARTER > INTERVAL 4 QUARTER; +SELECT INTERVAL 20 QUARTER = INTERVAL 5 YEAR; -SELECT toIntervalQuarter(5) < toIntervalQuarter(4); -SELECT toIntervalQuarter(20) != toIntervalYear(5); +SELECT INTERVAL 5 QUARTER < INTERVAL 4 QUARTER; +SELECT INTERVAL 20 QUARTER != INTERVAL 5 YEAR; -SELECT toIntervalQuarter(2) = toIntervalNanosecond(6); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 2 QUARTER = INTERVAL 6 NANOSECOND; -- { serverError NO_COMMON_TYPE } SELECT('Comparing years'); -SELECT toIntervalYear(1) < toIntervalYear(3); +SELECT INTERVAL 1 YEAR < INTERVAL 3 YEAR; -SELECT toIntervalYear(1) > toIntervalYear(3); +SELECT INTERVAL 1 YEAR > INTERVAL 3 YEAR; -SELECT toIntervalYear(2) = toIntervalSecond(8); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 2 YEAR = INTERVAL 8 SECOND; -- { serverError NO_COMMON_TYPE } \ No newline at end of file From 9c7d9a6a8d96b88c56aaa95b691f2b9bf79cf8d4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 12 Aug 2024 23:57:03 +0200 Subject: [PATCH 130/363] Annotations --- tests/queries/0_stateless/02293_ttest_large_samples.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02293_ttest_large_samples.sql b/tests/queries/0_stateless/02293_ttest_large_samples.sql index 826bd483fe9..b4687541360 100644 --- a/tests/queries/0_stateless/02293_ttest_large_samples.sql +++ b/tests/queries/0_stateless/02293_ttest_large_samples.sql @@ -1,3 +1,5 @@ +-- Tags: long + SELECT roundBankers(result.1, 5), roundBankers(result.2, 5) FROM ( SELECT studentTTest(sample, variant) as result From 1767ec6b4ca0fc0e8546e705e0d0dff3ffa797cb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 12 Aug 2024 23:55:01 +0200 Subject: [PATCH 131/363] Debug test --- .../0_stateless/02490_benchmark_max_consecutive_errors.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh b/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh index f747b3156a5..df7e9386662 100755 --- a/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh +++ b/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh @@ -11,5 +11,6 @@ if [ "$RES" -eq 10 ] then echo "$RES" else + echo "$RES" cat "${CLICKHOUSE_TMP}/${CLICKHOUSE_DATABASE}.log" fi From 4c043301e6dde6b0c83394d6721e112c9c7bf4ce Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 13 Aug 2024 10:30:31 +0200 Subject: [PATCH 132/363] Avoid ignoring errors of execute_process() (set COMMAND_ERROR_IS_FATAL=ANY) This will fix with issues like this [1]: Aug 12 09:58:44 '/usr/bin/cmake' '--build' '/build/build_docker/native' '--target' 'pre_compressor' Aug 12 09:58:44 sccache: error: Server startup failed: cache storage failed to read: Unexpected (temporary) at stat Aug 12 09:58:45 ninja: build stopped: subcommand failed. Aug 12 09:58:45 -- Configuring done (77.7s) Aug 12 09:58:47 -- Generating done (1.8s) Aug 12 09:58:47 -- Build files have been written to: /build/build_docker So as you can see even if ninja fails it still wrote build files, while it should fail. [1]: https://s3.amazonaws.com/clickhouse-test-reports/64955/0af41e32a5822d25ac3760f1ebb2313557474701/builds/report.html [2]: https://s3.amazonaws.com/clickhouse-builds/PRs/64955/0af41e32a5822d25ac3760f1ebb2313557474701/binary_darwin_aarch64/build_log.log Note, COMMAND_ERROR_IS_FATAL is 3.19+, and the requirement for now is 3.20 Signed-off-by: Azat Khuzhin --- CMakeLists.txt | 12 ++++++-- PreLoad.cmake | 10 +++++-- cmake/freebsd/default_libs.cmake | 12 ++++++-- cmake/linux/default_libs.cmake | 6 +++- cmake/tools.cmake | 6 +++- cmake/utils.cmake | 5 +++- contrib/cctz-cmake/CMakeLists.txt | 4 ++- contrib/google-protobuf-cmake/CMakeLists.txt | 12 ++++++-- contrib/grpc-cmake/CMakeLists.txt | 30 +++++++++++++------ .../completions/CMakeLists.txt | 1 + 10 files changed, 75 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b4e0484ab1..8e2302e6c52 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -609,7 +609,9 @@ if (NATIVE_BUILD_TARGETS execute_process( COMMAND ${CMAKE_COMMAND} -E make_directory "${NATIVE_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} @@ -621,9 +623,13 @@ if (NATIVE_BUILD_TARGETS "-DENABLE_CLICKHOUSE_SELF_EXTRACTING=${ENABLE_CLICKHOUSE_SELF_EXTRACTING}" ${PROJECT_SOURCE_DIR} WORKING_DIRECTORY "${NATIVE_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} --build "${NATIVE_BUILD_DIR}" --target ${NATIVE_BUILD_TARGETS} - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) endif () diff --git a/PreLoad.cmake b/PreLoad.cmake index e0fd37b2fd6..92b221c9f63 100644 --- a/PreLoad.cmake +++ b/PreLoad.cmake @@ -51,8 +51,14 @@ if (NOT "$ENV{CFLAGS}" STREQUAL "" endif() # Default toolchain - this is needed to avoid dependency on OS files. -execute_process(COMMAND uname -s OUTPUT_VARIABLE OS) -execute_process(COMMAND uname -m OUTPUT_VARIABLE ARCH) +execute_process(COMMAND uname -s + OUTPUT_VARIABLE OS + COMMAND_ERROR_IS_FATAL ANY +) +execute_process(COMMAND uname -m + OUTPUT_VARIABLE ARCH + COMMAND_ERROR_IS_FATAL ANY +) # By default, prefer clang on Linux # But note, that you still may change the compiler with -DCMAKE_C_COMPILER/-DCMAKE_CXX_COMPILER. diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake index 6bde75f8c9a..3f5b3829877 100644 --- a/cmake/freebsd/default_libs.cmake +++ b/cmake/freebsd/default_libs.cmake @@ -9,10 +9,18 @@ endif () file(GLOB bprefix "/usr/local/llvm${COMPILER_VERSION_MAJOR}/lib/clang/${COMPILER_VERSION_MAJOR}/lib/${system_processor}-portbld-freebsd*/") message(STATUS "-Bprefix: ${bprefix}") -execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins-${system_processor}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) +execute_process(COMMAND + ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins-${system_processor}.a + OUTPUT_VARIABLE BUILTINS_LIBRARY + COMMAND_ERROR_IS_FATAL ANY + OUTPUT_STRIP_TRAILING_WHITESPACE) # --print-file-name simply prints what you passed in case of nothing was resolved, so let's try one other possible option if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins-${system_processor}.a") - execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) + execute_process(COMMAND + ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins.a + OUTPUT_VARIABLE BUILTINS_LIBRARY + COMMAND_ERROR_IS_FATAL ANY + OUTPUT_STRIP_TRAILING_WHITESPACE) endif() if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins.a") message(FATAL_ERROR "libclang_rt.builtins had not been found") diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index 4a06243243e..51620bc9f33 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -5,7 +5,11 @@ set (DEFAULT_LIBS "-nodefaultlibs") # We need builtins from Clang's RT even without libcxx - for ubsan+int128. # See https://bugs.llvm.org/show_bug.cgi?id=16404 -execute_process (COMMAND ${CMAKE_CXX_COMPILER} --target=${CMAKE_CXX_COMPILER_TARGET} --print-libgcc-file-name --rtlib=compiler-rt OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) +execute_process (COMMAND + ${CMAKE_CXX_COMPILER} --target=${CMAKE_CXX_COMPILER_TARGET} --print-libgcc-file-name --rtlib=compiler-rt + OUTPUT_VARIABLE BUILTINS_LIBRARY + COMMAND_ERROR_IS_FATAL ANY + OUTPUT_STRIP_TRAILING_WHITESPACE) # Apparently, in clang-19, the UBSan support library for C++ was moved out into ubsan_standalone_cxx.a, so we have to include both. if (SANITIZE STREQUAL undefined) diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 7aa5d4c51ce..5c7da54b779 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -5,7 +5,11 @@ if (NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") endif () # Print details to output -execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE COMPILER_SELF_IDENTIFICATION OUTPUT_STRIP_TRAILING_WHITESPACE) +execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version + OUTPUT_VARIABLE COMPILER_SELF_IDENTIFICATION + COMMAND_ERROR_IS_FATAL ANY + OUTPUT_STRIP_TRAILING_WHITESPACE +) message (STATUS "Using compiler:\n${COMPILER_SELF_IDENTIFICATION}") # Require minimum compiler versions diff --git a/cmake/utils.cmake b/cmake/utils.cmake index a318408098a..a99d8e050a8 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -90,7 +90,10 @@ endfunction() # Function get_cmake_properties returns list of all propreties that cmake supports function(get_cmake_properties outvar) - execute_process(COMMAND cmake --help-property-list OUTPUT_VARIABLE cmake_properties) + execute_process(COMMAND cmake --help-property-list + OUTPUT_VARIABLE cmake_properties + COMMAND_ERROR_IS_FATAL ANY + ) # Convert command output into a CMake list string(REGEX REPLACE ";" "\\\\;" cmake_properties "${cmake_properties}") string(REGEX REPLACE "\n" ";" cmake_properties "${cmake_properties}") diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 7161f743de1..fadf948b053 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -37,7 +37,9 @@ message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}") execute_process(COMMAND bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -" OUTPUT_STRIP_TRAILING_WHITESPACE - OUTPUT_VARIABLE TIMEZONES) + OUTPUT_VARIABLE TIMEZONES + COMMAND_ERROR_IS_FATAL ANY +) file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") file(APPEND ${TIMEZONES_FILE} "#include \n") diff --git a/contrib/google-protobuf-cmake/CMakeLists.txt b/contrib/google-protobuf-cmake/CMakeLists.txt index e44f737cfc3..f1a744f851f 100644 --- a/contrib/google-protobuf-cmake/CMakeLists.txt +++ b/contrib/google-protobuf-cmake/CMakeLists.txt @@ -359,7 +359,9 @@ else () execute_process( COMMAND mkdir -p ${PROTOC_BUILD_DIR} - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} @@ -375,11 +377,15 @@ else () "-DABSL_ENABLE_INSTALL=0" "${protobuf_source_dir}" WORKING_DIRECTORY "${PROTOC_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} --build "${PROTOC_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) endif () add_executable(protoc IMPORTED GLOBAL) diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt index 1c0bf41ff78..975774d1990 100644 --- a/contrib/grpc-cmake/CMakeLists.txt +++ b/contrib/grpc-cmake/CMakeLists.txt @@ -51,8 +51,9 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME set(OPENSSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake") execute_process( - COMMAND mkdir -p ${OPENSSL_BUILD_DIR} - COMMAND_ECHO STDOUT + COMMAND mkdir -p ${OPENSSL_BUILD_DIR} + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY ) if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") @@ -89,15 +90,21 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME "-DClickHouse_SOURCE_DIR=${ClickHouse_SOURCE_DIR}" "${OPENSSL_SOURCE_DIR}" WORKING_DIRECTORY "${OPENSSL_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} --build "${OPENSSL_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} --install "${OPENSSL_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) # It's not important on which file we depend, we just want to specify right order add_library(openssl_for_grpc STATIC IMPORTED GLOBAL) @@ -108,8 +115,9 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME set (GRPC_CPP_PLUGIN_BUILD_DIR "${_gRPC_BINARY_DIR}/build") execute_process( - COMMAND mkdir -p ${GRPC_CPP_PLUGIN_BUILD_DIR} - COMMAND_ECHO STDOUT + COMMAND mkdir -p ${GRPC_CPP_PLUGIN_BUILD_DIR} + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY ) set(abseil_source_dir "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") @@ -140,11 +148,15 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME "-DgRPC_SSL_PROVIDER=package" "${_gRPC_SOURCE_DIR}" WORKING_DIRECTORY "${GRPC_CPP_PLUGIN_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} --build "${GRPC_CPP_PLUGIN_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) add_executable(grpc_cpp_plugin IMPORTED GLOBAL) set_target_properties (grpc_cpp_plugin PROPERTIES IMPORTED_LOCATION "${GRPC_CPP_PLUGIN_BUILD_DIR}/grpc_cpp_plugin") diff --git a/programs/bash-completion/completions/CMakeLists.txt b/programs/bash-completion/completions/CMakeLists.txt index d364e07ef6e..2e911e81981 100644 --- a/programs/bash-completion/completions/CMakeLists.txt +++ b/programs/bash-completion/completions/CMakeLists.txt @@ -6,6 +6,7 @@ macro(configure_bash_completion) COMMAND ${PKG_CONFIG_BIN} --variable=completionsdir bash-completion OUTPUT_VARIABLE ${out} OUTPUT_STRIP_TRAILING_WHITESPACE + COMMAND_ERROR_IS_FATAL ANY ) endif() string(REPLACE /usr "${CMAKE_INSTALL_PREFIX}" out "${out}") From 7f005a6ca48d4f193470d3a71bc1d97ff55f4a2f Mon Sep 17 00:00:00 2001 From: shiyer7474 Date: Tue, 13 Aug 2024 08:38:30 +0000 Subject: [PATCH 133/363] Fix small value DateTime64 constant folding in nested query --- src/Analyzer/ConstantNode.cpp | 10 ++++- ...222_datetime64_small_value_const.reference | 18 +++++++++ .../03222_datetime64_small_value_const.sql | 39 +++++++++++++++++++ 3 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03222_datetime64_small_value_const.reference create mode 100644 tests/queries/0_stateless/03222_datetime64_small_value_const.sql diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index c65090f5b55..3d0f448da4b 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -177,9 +177,15 @@ ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const * It could also lead to ambiguous parsing because we don't know if the string literal represents a date or a Decimal64 literal. * For this reason, we use a string literal representing a date instead of a Decimal64 literal. */ - if (WhichDataType(constant_value_type->getTypeId()).isDateTime64()) + if ((WhichDataType(constant_value_type->getTypeId()).isDateTime64()) || + (WhichDataType(constant_value_type->getTypeId()).isNullable() && WhichDataType((typeid_cast(constant_value_type.get()))->getNestedType()->getTypeId()).isDateTime64())) { - const auto * date_time_type = typeid_cast(constant_value_type.get()); + const DataTypeDateTime64 * date_time_type = nullptr; + if (WhichDataType(constant_value_type->getTypeId()).isNullable()) + date_time_type = typeid_cast((typeid_cast(constant_value_type.get()))->getNestedType().get()); + else + date_time_type = typeid_cast(constant_value_type.get()); + DecimalField decimal_value; if (constant_value_literal.tryGet>(decimal_value)) { diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.reference b/tests/queries/0_stateless/03222_datetime64_small_value_const.reference new file mode 100644 index 00000000000..ae36c08acc5 --- /dev/null +++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.reference @@ -0,0 +1,18 @@ +0 1970-01-01 00:00:00.000 +0 1970-01-01 00:00:05.000 +0 1970-01-01 00:45:25.456789 +0 1970-01-01 00:53:25.456789123 +0 \N +1 1970-01-01 00:00:00.000 +5 1970-01-01 00:00:00.000 +2 1970-01-01 00:00:02.456 +3 1970-01-01 00:00:04.811 +4 1970-01-01 00:10:05.000 +4 1970-01-01 00:10:05.000 +1 1970-01-01 00:00:00.000 +2 1970-01-01 00:00:02.456 +3 1970-01-01 00:00:04.811 +5 1970-01-01 00:00:00.000 +0 +0 +5 diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql new file mode 100644 index 00000000000..6999ba9662a --- /dev/null +++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql @@ -0,0 +1,39 @@ +-- Tags: shard + +select *, (select toDateTime64(0, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64(5, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64('1970-01-01 00:45:25.456789', 6)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64('1970-01-01 00:53:25.456789123', 9)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64(null,3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; + +create database if not exists shard_0; +create database if not exists shard_1; + +drop table if exists shard_0.dt64_03222; +drop table if exists shard_1.dt64_03222; +drop table if exists distr_03222_dt64; + +create table shard_0.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree order by id; +create table shard_1.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree order by id; +create table distr_03222_dt64 (id UInt64, dt DateTime64(3)) engine = Distributed(test_cluster_two_shards_different_databases, '', dt64_03222); + +insert into shard_0.dt64_03222 values(1, toDateTime64('1970-01-01 00:00:00.000',3)) +insert into shard_0.dt64_03222 values(2, toDateTime64('1970-01-01 00:00:02.456',3)); +insert into shard_1.dt64_03222 values(3, toDateTime64('1970-01-01 00:00:04.811',3)); +insert into shard_1.dt64_03222 values(4, toDateTime64('1970-01-01 00:10:05',3)); +insert into shard_1.dt64_03222 values(5, toDateTime64(0,3)); + +--Output : 1,5 2,3,4 4 1,2,3,5 0 0 5 +select id, dt from distr_03222_dt64 where dt = (select toDateTime64(0,3)) order by id; +select id, dt from distr_03222_dt64 where dt > (select toDateTime64(0,3)) order by id; +select id, dt from distr_03222_dt64 where dt > (select toDateTime64('1970-01-01 00:10:00.000',3)) order by id; +select id, dt from distr_03222_dt64 where dt < (select toDateTime64(5,3)) order by id; + +select count(*) from distr_03222_dt64 where dt > (select toDateTime64('2024-07-20 00:00:00',3)); +select count(*) from distr_03222_dt64 where dt > (select now()); +select count(*) from distr_03222_dt64 where dt < (select toDateTime64('2004-07-20 00:00:00',3)); + + +drop table if exists shard_0.dt64_03222; +drop table if exists shard_1.dt64_03222; +drop table if exists distr_03222_dt64; From e46c5a75ef6b5488834add56bea4cab327515bfb Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 13 Aug 2024 17:59:05 +0800 Subject: [PATCH 134/363] fix building issue --- src/Functions/{FunctionOverlay.cpp => overlay.cpp} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename src/Functions/{FunctionOverlay.cpp => overlay.cpp} (99%) diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/overlay.cpp similarity index 99% rename from src/Functions/FunctionOverlay.cpp rename to src/Functions/overlay.cpp index 61d2df88ab1..094da27a71d 100644 --- a/src/Functions/FunctionOverlay.cpp +++ b/src/Functions/overlay.cpp @@ -728,8 +728,8 @@ private: REGISTER_FUNCTION(Overlay) { - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); - factory.registerFunction>({}, FunctionFactory::CaseSensitive); + factory.registerFunction>({}, FunctionFactory::Case::Insensitive); + factory.registerFunction>({}, FunctionFactory::Case::Sensitive); } } From 0414cdbbbf32efe10a92c9dd93ba47743ceeb848 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 13 Aug 2024 15:58:49 +0200 Subject: [PATCH 135/363] Fix unpack error --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 5946e561949..5bde4686d3a 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1995,7 +1995,7 @@ class TestSuite: tag_line = find_tag_line(file) next_line = file.readline() except UnicodeDecodeError: - return [] + return [], {} try: if filepath.endswith(".sql"): for line in file: From 69893aaa25e3c459f3480955534e05456e7aaa64 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 13 Aug 2024 16:19:25 +0200 Subject: [PATCH 136/363] Lower memory usage --- docker/test/stateless/run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index c70cbe1fe45..874095e39dc 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -391,8 +391,8 @@ done # wait for minio to flush its batch if it has any sleep 1 clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE" -clickhouse-client -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow" -clickhouse-client -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow" +clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow" +clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow" # Stop server so we can safely read data with clickhouse-local. # Why do we read data with clickhouse-local? From 6dfed409f460311f133e30e70f839f9865d71861 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 13 Aug 2024 16:09:45 +0000 Subject: [PATCH 137/363] Fix seraching for query params --- tests/clickhouse-test | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 5bde4686d3a..515b519af3e 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1978,13 +1978,20 @@ class TestSuite: def is_shebang(line: str) -> bool: return line.startswith("#!") - def find_tag_line(file): - line = file.readline() - while line != "": - line = line.strip() - if line and not is_shebang(line): + def find_tag_line(lines, comment_sign): + for line in lines: + if line.startswith(comment_sign) and line[ + len(comment_sign) : + ].lstrip().startswith("Tags:"): + return line + return "" + + def find_random_settings_limits_line(lines, comment_sign): + for line in lines: + if line.startswith(comment_sign) and line[ + len(comment_sign) : + ].lstrip().startswith("Random settings limits:"): return line - line = file.readline() return "" def load_tags_and_random_settings_limits_from_file(filepath): @@ -1992,13 +1999,16 @@ class TestSuite: need_query_params = False with open(filepath, "r", encoding="utf-8") as file: try: - tag_line = find_tag_line(file) - next_line = file.readline() + lines = file.readlines() + tag_line = find_tag_line(lines, comment_sign) + random_settings_limits_line = find_random_settings_limits_line( + lines, comment_sign + ) except UnicodeDecodeError: return [], {} try: if filepath.endswith(".sql"): - for line in file: + for line in lines: if "{CLICKHOUSE_DATABASE" in line: need_query_params = True except UnicodeDecodeError: @@ -2006,7 +2016,6 @@ class TestSuite: parsed_tags = parse_tags_from_line(tag_line, comment_sign) if need_query_params: parsed_tags.add("need-query-parameters") - random_settings_limits_line = next_line if parsed_tags else tag_line random_settings_limits = parse_random_settings_limits_from_line( random_settings_limits_line, comment_sign ) @@ -2068,9 +2077,9 @@ class TestSuite: ) ) self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0] - self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = ( - all_tags_and_random_settings_limits[1] - ) + self.all_random_settings_limits: Dict[ + str, Dict[str, (int, int)] + ] = all_tags_and_random_settings_limits[1] self.sequential_tests = [] self.parallel_tests = [] for test_name in self.all_tests: From 0abca8b7ddbafa37da5b1196b21fb816999fd334 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 14 Aug 2024 09:57:59 +0800 Subject: [PATCH 138/363] fix doc --- .../sql-reference/functions/string-replace-functions.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 1caa6215b6b..d086c9ee64b 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -263,8 +263,8 @@ overlay(s, replace, position[, length]) - `s`: A string type [String](../data-types/string.md). - `replace`: A string type [String](../data-types/string.md). -- `position`: An integer type [Int](../data-types/int.md). -- `length`: Optional. An integer type [Int](../data-types/int.md). +- `position`: An integer type [Int](../data-types/int-uint.md). +- `length`: Optional. An integer type [Int](../data-types/int-uint.md). **Returned value** @@ -300,8 +300,8 @@ overlayUTF8(s, replace, position[, length]) - `s`: A string type [String](../data-types/string.md). - `replace`: A string type [String](../data-types/string.md). -- `position`: An integer type [Int](../data-types/int.md). -- `length`: Optional. An integer type [Int](../data-types/int.md). +- `position`: An integer type [Int](../data-types/int-uint.md). +- `length`: Optional. An integer type [Int](../data-types/int-uint.md). **Returned value** From da1c98a771c5b6cd74b3e1ba00fd4e01574489e9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 14 Aug 2024 04:54:33 +0200 Subject: [PATCH 139/363] Update the limits --- docker/test/stateless/run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 874095e39dc..2e6e7bbebe5 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -391,8 +391,8 @@ done # wait for minio to flush its batch if it has any sleep 1 clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE" -clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow" -clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow" +clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow" +clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow" # Stop server so we can safely read data with clickhouse-local. # Why do we read data with clickhouse-local? From f740cf4eaa71621fb518c6d5668e8356f452a979 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 14 Aug 2024 09:54:03 +0200 Subject: [PATCH 140/363] Fix data race on SampleKey --- src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index a6ef0063069..6efd3a5c97f 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -369,7 +369,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( /// If sample and final are used together no need to calculate sampling expression twice. /// The first time it was calculated for final, because sample key is a part of the PK. /// So, assume that we already have calculated column. - ASTPtr sampling_key_ast = metadata_snapshot->getSamplingKeyAST(); + ASTPtr sampling_key_ast; if (final) { @@ -377,6 +377,12 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( /// We do spoil available_real_columns here, but it is not used later. available_real_columns.emplace_back(sampling_key.column_names[0], std::move(sampling_column_type)); } + else + { + sampling_key_ast = metadata_snapshot->getSamplingKeyAST()->clone(); + } + + chassert(sampling_key_ast != nullptr); if (has_lower_limit) { From 28b0aad3f9e54beed27ee384ab81312233abaa84 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 14 Aug 2024 15:16:34 +0200 Subject: [PATCH 141/363] Fix python style --- tests/clickhouse-test | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 515b519af3e..8c2da7334d4 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1975,9 +1975,6 @@ class TestSuite: ) return random_settings_limits - def is_shebang(line: str) -> bool: - return line.startswith("#!") - def find_tag_line(lines, comment_sign): for line in lines: if line.startswith(comment_sign) and line[ @@ -2077,9 +2074,9 @@ class TestSuite: ) ) self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0] - self.all_random_settings_limits: Dict[ - str, Dict[str, (int, int)] - ] = all_tags_and_random_settings_limits[1] + self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = ( + all_tags_and_random_settings_limits[1] + ) self.sequential_tests = [] self.parallel_tests = [] for test_name in self.all_tests: From 844cdd8937cce17060ea8b54fdfc2428d3015f44 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 14 Aug 2024 20:38:09 +0200 Subject: [PATCH 142/363] update toInterval functions --- .../functions/type-conversion-functions.md | 400 +++++++++++++++++- 1 file changed, 380 insertions(+), 20 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 1e618b8cdab..cd6fd9ab839 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -4866,30 +4866,23 @@ Result: └───────┴───────────────┴──────┴──────────────┴──────────────┴──────────────────────┘ ``` -## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) +## toIntervalYear -Converts a Number type argument to an [Interval](../data-types/special-data-types/interval.md) data type. +Returns an interval of `n` years of data type [IntervalYear](../data-types/special-data-types/interval.md). **Syntax** ``` sql -toIntervalSecond(number) -toIntervalMinute(number) -toIntervalHour(number) -toIntervalDay(number) -toIntervalWeek(number) -toIntervalMonth(number) -toIntervalQuarter(number) -toIntervalYear(number) +toIntervalYear(n) ``` **Arguments** -- `number` — Duration of interval. Positive integer number. +- `n` — Number of years. Positive integer number. [Int*](../data-types/int-uint.md). **Returned values** -- The value in `Interval` data type. +- Interval of `n` years. [IntervalYear](../data-types/special-data-types/interval.md). **Example** @@ -4898,19 +4891,386 @@ Query: ``` sql WITH toDate('2019-01-01') AS date, - INTERVAL 1 WEEK AS interval_week, - toIntervalWeek(1) AS interval_to_week -SELECT - date + interval_week, - date + interval_to_week; + toIntervalYear(1) AS interval_to_year +SELECT date + interval_to_year ``` Result: ```response -┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐ -│ 2019-01-08 │ 2019-01-08 │ -└───────────────────────────┴──────────────────────────────┘ +┌─plus(date, interval_to_year)─┐ +│ 2020-01-01 │ +└──────────────────────────────┘ +``` + +## toIntervalQuarter + +Returns an interval of `n` quarters of data type [IntervalQuarter](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalQuarter(n) +``` + +**Arguments** + +- `n` — Number of quarters. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` quarters. [IntervalQuarter](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2019-01-01') AS date, + toIntervalQuarter(1) AS interval_to_quarter +SELECT date + interval_to_quarter +``` + +Result: + +```response +┌─plus(date, interval_to_quarter)─┐ +│ 2019-04-01 │ +└─────────────────────────────────┘ +``` + +## toIntervalMonth + +Returns an interval of `n` months of data type [IntervalMonth](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalMonth(n) +``` + +**Arguments** + +- `n` — Number of m. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` months. [IntervalMonth](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2019-01-01') AS date, + toIntervalMonth(1) AS interval_to_month +SELECT date + interval_to_month +``` + +Result: + +```response +┌─plus(date, interval_to_month)─┐ +│ 2019-02-01 │ +└───────────────────────────────┘ +``` + +## toIntervalWeek + +Returns an interval of `n` weeks of data type [IntervalWeek](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalWeek(n) +``` + +**Arguments** + +- `n` — Number of weeks. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` weeks. [IntervalWeek](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2019-01-01') AS date, + toIntervalWeek(1) AS interval_to_week +SELECT date + interval_to_week +``` + +Result: + +```response +┌─plus(date, interval_to_week)─┐ +│ 2019-01-08 │ +└──────────────────────────────┘ +``` + +## toIntervalDay + +Returns an interval of `n` days of data type [IntervalDay](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalDay(n) +``` + +**Arguments** + +- `n` — Number of days. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` days. [IntervalDay](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2019-01-01') AS date, + toIntervalDay(5) AS interval_to_days +SELECT date + interval_to_days +``` + +Result: + +```response +┌─plus(date, interval_to_days)─┐ +│ 2019-01-06 │ +└──────────────────────────────┘ +``` + +## toIntervalHour + +Returns an interval of `n` hours of data type [IntervalHour](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalHour(n) +``` + +**Arguments** + +- `n` — Number of hours. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` hours. [IntervalHour](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2019-01-01') AS date, + toIntervalHour(12) AS interval_to_hours +SELECT date + interval_to_hours +``` + +Result: + +```response +┌─plus(date, interval_to_hours)─┐ +│ 2019-01-01 12:00:00 │ +└───────────────────────────────┘ +``` + +## toIntervalMinute + +Returns an interval of `n` minutes of data type [IntervalMinute](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalMinute(n) +``` + +**Arguments** + +- `n` — Number of minutes. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` minutes. [IntervalMinute](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2019-01-01') AS date, + toIntervalMinute(12) AS interval_to_minutes +SELECT date + interval_to_minutes +``` + +Result: + +```response +┌─plus(date, interval_to_minutes)─┐ +│ 2019-01-01 00:12:00 │ +└─────────────────────────────────┘ +``` + +## toIntervalSecond + +Returns an interval of `n` seconds of data type [IntervalSecond](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalSecond(n) +``` + +**Arguments** + +- `n` — Number of seconds. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` seconds. [IntervalSecond](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2019-01-01') AS date, + toIntervalSecond(30) AS interval_to_seconds +SELECT date + interval_to_seconds +``` + +Result: + +```response +┌─plus(date, interval_to_seconds)─┐ +│ 2019-01-01 00:00:30 │ +└─────────────────────────────────┘ +``` + +## toIntervalMillisecond + +Returns an interval of `n` milliseconds of data type [IntervalMillisecond](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalMillisecond(n) +``` + +**Arguments** + +- `n` — Number of milliseconds. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` milliseconds. [IntervalMilliseconds](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDateTime('2019-01-01') AS date, + toIntervalMillisecond(30) AS interval_to_milliseconds +SELECT date + interval_to_milliseconds +``` + +Result: + +```response +┌─plus(date, interval_to_milliseconds)─┐ +│ 2019-01-01 00:00:00.030 │ +└──────────────────────────────────────┘ +``` + +## toIntervalMicrosecond + +Returns an interval of `n` microseconds of data type [IntervalMicrosecond](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalMicrosecond(n) +``` + +**Arguments** + +- `n` — Number of microseconds. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` microseconds. [IntervalMicrosecond](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDateTime('2019-01-01') AS date, + toIntervalMicrosecond(30) AS interval_to_microseconds +SELECT date + interval_to_microseconds +``` + +Result: + +```response +┌─plus(date, interval_to_microseconds)─┐ +│ 2019-01-01 00:00:00.000030 │ +└──────────────────────────────────────┘ +``` + +## toIntervalNanosecond + +Returns an interval of `n` nanoseconds of data type [IntervalNanosecond](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalNanosecond(n) +``` + +**Arguments** + +- `n` — Number of nanoseconds. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` nanoseconds. [IntervalNanosecond](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDateTime('2019-01-01') AS date, + toIntervalNanosecond(30) AS interval_to_nanoseconds +SELECT date + interval_to_nanoseconds +``` + +Result: + +```response +┌─plus(date, interval_to_nanoseconds)─┐ +│ 2019-01-01 00:00:00.000000030 │ +└─────────────────────────────────────┘ ``` ## parseDateTime From 7bebc448f30a026e41563553788f463243aa18fd Mon Sep 17 00:00:00 2001 From: megao Date: Thu, 15 Aug 2024 10:42:22 +0800 Subject: [PATCH 143/363] fix progress value of view_refreshview --- src/Storages/System/StorageSystemViewRefreshes.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index 30539ed6b6a..97065f2b2fb 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -86,7 +86,8 @@ void StorageSystemViewRefreshes::fillData( res_columns[i++]->insert(refresh.exception_message); res_columns[i++]->insert(refresh.refresh_count); - res_columns[i++]->insert(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read); +// res_columns[i++]->insert(std::min(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read, static_cast(1))); + res_columns[i++]->insert(std::min(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read, 1.0)); res_columns[i++]->insert(refresh.progress.elapsed_ns / 1e9); res_columns[i++]->insert(refresh.progress.read_rows); res_columns[i++]->insert(refresh.progress.read_bytes); From c9bfff3934ba5257dbfd1d43294b27155c6aec30 Mon Sep 17 00:00:00 2001 From: megao Date: Thu, 15 Aug 2024 10:52:17 +0800 Subject: [PATCH 144/363] fix progress value of view_refreshview --- src/Storages/System/StorageSystemViewRefreshes.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index 97065f2b2fb..3941c4c39c2 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -86,7 +86,6 @@ void StorageSystemViewRefreshes::fillData( res_columns[i++]->insert(refresh.exception_message); res_columns[i++]->insert(refresh.refresh_count); -// res_columns[i++]->insert(std::min(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read, static_cast(1))); res_columns[i++]->insert(std::min(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read, 1.0)); res_columns[i++]->insert(refresh.progress.elapsed_ns / 1e9); res_columns[i++]->insert(refresh.progress.read_rows); From b82c231886f2496c01b288a138663c4d430fc7b2 Mon Sep 17 00:00:00 2001 From: shiyer7474 Date: Thu, 15 Aug 2024 10:37:13 +0000 Subject: [PATCH 145/363] Code review feedback - used removeNullable() --- src/Analyzer/ConstantNode.cpp | 11 +++-------- .../03222_datetime64_small_value_const.sql | 2 +- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index 3d0f448da4b..3a99ad08ad8 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -177,15 +177,10 @@ ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const * It could also lead to ambiguous parsing because we don't know if the string literal represents a date or a Decimal64 literal. * For this reason, we use a string literal representing a date instead of a Decimal64 literal. */ - if ((WhichDataType(constant_value_type->getTypeId()).isDateTime64()) || - (WhichDataType(constant_value_type->getTypeId()).isNullable() && WhichDataType((typeid_cast(constant_value_type.get()))->getNestedType()->getTypeId()).isDateTime64())) + const auto & constant_value_end_type = removeNullable(constant_value_type); /// if Nullable + if (WhichDataType(constant_value_end_type->getTypeId()).isDateTime64()) { - const DataTypeDateTime64 * date_time_type = nullptr; - if (WhichDataType(constant_value_type->getTypeId()).isNullable()) - date_time_type = typeid_cast((typeid_cast(constant_value_type.get()))->getNestedType().get()); - else - date_time_type = typeid_cast(constant_value_type.get()); - + const auto * date_time_type = typeid_cast(constant_value_end_type.get()); DecimalField decimal_value; if (constant_value_literal.tryGet>(decimal_value)) { diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql index 6999ba9662a..af06a622f8d 100644 --- a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql +++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql @@ -17,7 +17,7 @@ create table shard_0.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree create table shard_1.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree order by id; create table distr_03222_dt64 (id UInt64, dt DateTime64(3)) engine = Distributed(test_cluster_two_shards_different_databases, '', dt64_03222); -insert into shard_0.dt64_03222 values(1, toDateTime64('1970-01-01 00:00:00.000',3)) +insert into shard_0.dt64_03222 values(1, toDateTime64('1970-01-01 00:00:00.000',3)); insert into shard_0.dt64_03222 values(2, toDateTime64('1970-01-01 00:00:02.456',3)); insert into shard_1.dt64_03222 values(3, toDateTime64('1970-01-01 00:00:04.811',3)); insert into shard_1.dt64_03222 values(4, toDateTime64('1970-01-01 00:10:05',3)); From 7d01c3131265d0dfae24fbf6ab91c71073573765 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Aug 2024 16:01:13 +0200 Subject: [PATCH 146/363] Delete old code of named collections --- src/Core/PostgreSQL/PoolWithFailover.cpp | 2 +- src/Core/PostgreSQL/PoolWithFailover.h | 9 +- src/Dictionaries/HTTPDictionarySource.cpp | 26 +- src/Dictionaries/MongoDBDictionarySource.cpp | 61 ++-- .../PostgreSQLDictionarySource.cpp | 131 ++++++-- .../ExternalDataSourceConfiguration.cpp | 288 ------------------ .../ExternalDataSourceConfiguration.h | 92 ------ src/Storages/NamedCollectionsHelpers.h | 2 +- src/Storages/StorageExternalDistributed.h | 2 - src/TableFunctions/TableFunctionMongoDB.cpp | 1 - src/TableFunctions/TableFunctionRedis.cpp | 1 - 11 files changed, 170 insertions(+), 445 deletions(-) delete mode 100644 src/Storages/ExternalDataSourceConfiguration.cpp delete mode 100644 src/Storages/ExternalDataSourceConfiguration.h diff --git a/src/Core/PostgreSQL/PoolWithFailover.cpp b/src/Core/PostgreSQL/PoolWithFailover.cpp index 5014564dbe0..054fc3b2226 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.cpp +++ b/src/Core/PostgreSQL/PoolWithFailover.cpp @@ -23,7 +23,7 @@ namespace postgres { PoolWithFailover::PoolWithFailover( - const DB::ExternalDataSourcesConfigurationByPriority & configurations_by_priority, + const ReplicasConfigurationByPriority & configurations_by_priority, size_t pool_size, size_t pool_wait_timeout_, size_t max_tries_, diff --git a/src/Core/PostgreSQL/PoolWithFailover.h b/src/Core/PostgreSQL/PoolWithFailover.h index 502a9a9b7d7..2237c752367 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.h +++ b/src/Core/PostgreSQL/PoolWithFailover.h @@ -8,7 +8,6 @@ #include "ConnectionHolder.h" #include #include -#include #include @@ -20,12 +19,12 @@ namespace postgres class PoolWithFailover { - -using RemoteDescription = std::vector>; - public: + using ReplicasConfigurationByPriority = std::map>; + using RemoteDescription = std::vector>; + PoolWithFailover( - const DB::ExternalDataSourcesConfigurationByPriority & configurations_by_priority, + const ReplicasConfigurationByPriority & configurations_by_priority, size_t pool_size, size_t pool_wait_timeout, size_t max_tries_, diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index 663c63dd6c6..d6df03b39df 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -8,12 +8,12 @@ #include #include #include -#include #include #include #include "DictionarySourceFactory.h" #include "DictionarySourceHelpers.h" #include "DictionaryStructure.h" +#include #include "registerDictionaries.h" @@ -223,21 +223,23 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory) String endpoint; String format; - auto named_collection = created_from_ddl - ? getURLBasedDataSourceConfiguration(config, settings_config_prefix, global_context) - : std::nullopt; + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, global_context) : nullptr; if (named_collection) { - url = named_collection->configuration.url; - endpoint = named_collection->configuration.endpoint; - format = named_collection->configuration.format; + validateNamedCollection( + *named_collection, + /* required_keys */{}, + /* optional_keys */ValidateKeysMultiset{ + "url", "endpoint", "user", "credentials.user", "password", "credentials.password", "format", "compression_method", "structure", "name"}); - credentials.setUsername(named_collection->configuration.user); - credentials.setPassword(named_collection->configuration.password); + url = named_collection->getOrDefault("url", ""); + endpoint = named_collection->getOrDefault("endpoint", ""); + format = named_collection->getOrDefault("format", ""); - header_entries.reserve(named_collection->configuration.headers.size()); - for (const auto & [key, value] : named_collection->configuration.headers) - header_entries.emplace_back(key, value); + credentials.setUsername(named_collection->getAnyOrDefault({"user", "credentials.user"}, "")); + credentials.setPassword(named_collection->getAnyOrDefault({"password", "credentials.password"}, "")); + + header_entries = getHeadersFromNamedCollection(*named_collection); } else { diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index c30a6f90e44..7bacfdab3d2 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -1,15 +1,12 @@ #include "MongoDBDictionarySource.h" #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" -#include #include +#include namespace DB { -static const std::unordered_set dictionary_allowed_keys = { - "host", "port", "user", "password", "db", "database", "uri", "collection", "name", "method", "options"}; - void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) { auto create_mongo_db_dictionary = []( @@ -22,35 +19,53 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) bool created_from_ddl) { const auto config_prefix = root_config_prefix + ".mongodb"; - ExternalDataSourceConfiguration configuration; - auto has_config_key = [](const String & key) { return dictionary_allowed_keys.contains(key); }; - auto named_collection = getExternalDataSourceConfiguration(config, config_prefix, context, has_config_key); + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, config_prefix, context) : nullptr; + + String host, username, password, database, method, options, collection; + UInt16 port; if (named_collection) { - configuration = named_collection->configuration; + validateNamedCollection( + *named_collection, + /* required_keys */{"collection"}, + /* optional_keys */ValidateKeysMultiset{ + "host", "port", "user", "password", "db", "database", "uri", "name", "method", "options"}); + + host = named_collection->getOrDefault("host", ""); + port = static_cast(named_collection->getOrDefault("port", 0)); + username = named_collection->getOrDefault("user", ""); + password = named_collection->getOrDefault("password", ""); + database = named_collection->getAnyOrDefault({"db", "database"}, ""); + method = named_collection->getOrDefault("method", ""); + collection = named_collection->getOrDefault("collection", ""); + options = named_collection->getOrDefault("options", ""); } else { - configuration.host = config.getString(config_prefix + ".host", ""); - configuration.port = config.getUInt(config_prefix + ".port", 0); - configuration.username = config.getString(config_prefix + ".user", ""); - configuration.password = config.getString(config_prefix + ".password", ""); - configuration.database = config.getString(config_prefix + ".db", ""); + host = config.getString(config_prefix + ".host", ""); + port = config.getUInt(config_prefix + ".port", 0); + username = config.getString(config_prefix + ".user", ""); + password = config.getString(config_prefix + ".password", ""); + database = config.getString(config_prefix + ".db", ""); + method = config.getString(config_prefix + ".method", ""); + collection = config.getString(config_prefix + ".collection"); + options = config.getString(config_prefix + ".options", ""); } if (created_from_ddl) - context->getRemoteHostFilter().checkHostAndPort(configuration.host, toString(configuration.port)); + context->getRemoteHostFilter().checkHostAndPort(host, toString(port)); - return std::make_unique(dict_struct, + return std::make_unique( + dict_struct, config.getString(config_prefix + ".uri", ""), - configuration.host, - configuration.port, - configuration.username, - configuration.password, - config.getString(config_prefix + ".method", ""), - configuration.database, - config.getString(config_prefix + ".collection"), - config.getString(config_prefix + ".options", ""), + host, + port, + username, + password, + method, + database, + collection, + options, sample_block); }; diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index f62a9a009d8..fd026a97cd4 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -13,7 +13,7 @@ #include "readInvalidateQuery.h" #include #include -#include +#include #include #endif @@ -30,7 +30,7 @@ namespace ErrorCodes static const UInt64 max_block_size = 8192; -static const std::unordered_set dictionary_allowed_keys = { +static const ValidateKeysMultiset dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", "schema", "update_field", "update_lag", "invalidate_query", "query", "where", "name", "priority"}; @@ -179,6 +179,19 @@ std::string PostgreSQLDictionarySource::toString() const #endif +static void validateConfigKeys( + const Poco::Util::AbstractConfiguration & dict_config, const String & config_prefix) +{ + Poco::Util::AbstractConfiguration::Keys config_keys; + dict_config.keys(config_prefix, config_keys); + for (const auto & config_key : config_keys) + { + if (dictionary_allowed_keys.contains(config_key) || startsWith(config_key, "replica")) + continue; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected key `{}` in dictionary source configuration", config_key); + } +} + void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) { auto create_table_source = [=](const DictionaryStructure & dict_struct, @@ -191,38 +204,118 @@ void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) { #if USE_LIBPQXX const auto settings_config_prefix = config_prefix + ".postgresql"; - auto has_config_key = [](const String & key) { return dictionary_allowed_keys.contains(key) || key.starts_with("replica"); }; - auto configuration = getExternalDataSourceConfigurationByPriority(config, settings_config_prefix, context, has_config_key); const auto & settings = context->getSettingsRef(); + std::optional dictionary_configuration; + String database, schema, table; + postgres::PoolWithFailover::ReplicasConfigurationByPriority replicas_by_priority; + + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, context) : nullptr; + if (named_collection) + { + validateNamedCollection>(*named_collection, {}, dictionary_allowed_keys); + + StoragePostgreSQL::Configuration common_configuration; + common_configuration.host = named_collection->getOrDefault("host", ""); + common_configuration.port = named_collection->getOrDefault("port", 0); + common_configuration.username = named_collection->getOrDefault("user", ""); + common_configuration.password = named_collection->getOrDefault("password", ""); + common_configuration.database = named_collection->getAnyOrDefault({"database", "db"}, ""); + common_configuration.schema = named_collection->getOrDefault("schema", ""); + common_configuration.table = named_collection->getOrDefault("table", ""); + + dictionary_configuration.emplace(PostgreSQLDictionarySource::Configuration{ + .db = common_configuration.database, + .schema = common_configuration.schema, + .table = common_configuration.table, + .query = named_collection->getOrDefault("query", ""), + .where = named_collection->getOrDefault("where", ""), + .invalidate_query = named_collection->getOrDefault("invalidate_query", ""), + .update_field = named_collection->getOrDefault("update_field", ""), + .update_lag = named_collection->getOrDefault("update_lag", 1), + }); + + replicas_by_priority[0].emplace_back(common_configuration); + } + else + { + validateConfigKeys(config, settings_config_prefix); + + StoragePostgreSQL::Configuration common_configuration; + common_configuration.host = config.getString(settings_config_prefix + ".host", ""); + common_configuration.port = config.getUInt(settings_config_prefix + ".port", 0); + common_configuration.username = config.getString(settings_config_prefix + ".user", ""); + common_configuration.password = config.getString(settings_config_prefix + ".password", ""); + common_configuration.database = config.getString(fmt::format("{}.database", settings_config_prefix), config.getString(fmt::format("{}.db", settings_config_prefix), "")); + common_configuration.schema = config.getString(fmt::format("{}.schema", settings_config_prefix), ""); + common_configuration.table = config.getString(fmt::format("{}.table", settings_config_prefix), ""); + + dictionary_configuration.emplace(PostgreSQLDictionarySource::Configuration + { + .db = common_configuration.database, + .schema = common_configuration.schema, + .table = common_configuration.table, + .query = config.getString(fmt::format("{}.query", settings_config_prefix), ""), + .where = config.getString(fmt::format("{}.where", settings_config_prefix), ""), + .invalidate_query = config.getString(fmt::format("{}.invalidate_query", settings_config_prefix), ""), + .update_field = config.getString(fmt::format("{}.update_field", settings_config_prefix), ""), + .update_lag = config.getUInt64(fmt::format("{}.update_lag", settings_config_prefix), 1) + }); + + + if (config.has(settings_config_prefix + ".replica")) + { + Poco::Util::AbstractConfiguration::Keys config_keys; + config.keys(settings_config_prefix, config_keys); + + for (const auto & config_key : config_keys) + { + if (config_key.starts_with("replica")) + { + String replica_name = settings_config_prefix + "." + config_key; + StoragePostgreSQL::Configuration replica_configuration{common_configuration}; + + size_t priority = config.getInt(replica_name + ".priority", 0); + replica_configuration.host = config.getString(replica_name + ".host", common_configuration.host); + replica_configuration.port = config.getUInt(replica_name + ".port", common_configuration.port); + replica_configuration.username = config.getString(replica_name + ".user", common_configuration.username); + replica_configuration.password = config.getString(replica_name + ".password", common_configuration.password); + + if (replica_configuration.host.empty() || replica_configuration.port == 0 + || replica_configuration.username.empty() || replica_configuration.password.empty()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Named collection of connection parameters is missing some " + "of the parameters and no other dictionary parameters are added"); + } + + replicas_by_priority[priority].emplace_back(replica_configuration); + } + } + } + else + { + replicas_by_priority[0].emplace_back(common_configuration); + } + } if (created_from_ddl) { - for (const auto & replicas : configuration.replicas_configurations) - for (const auto & replica : replicas.second) + for (const auto & [_, replicas] : replicas_by_priority) + for (const auto & replica : replicas) context->getRemoteHostFilter().checkHostAndPort(replica.host, toString(replica.port)); } + auto pool = std::make_shared( - configuration.replicas_configurations, + replicas_by_priority, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, settings.postgresql_connection_pool_retries, settings.postgresql_connection_pool_auto_close_connection, settings.postgresql_connection_attempt_timeout); - PostgreSQLDictionarySource::Configuration dictionary_configuration - { - .db = configuration.database, - .schema = configuration.schema, - .table = configuration.table, - .query = config.getString(fmt::format("{}.query", settings_config_prefix), ""), - .where = config.getString(fmt::format("{}.where", settings_config_prefix), ""), - .invalidate_query = config.getString(fmt::format("{}.invalidate_query", settings_config_prefix), ""), - .update_field = config.getString(fmt::format("{}.update_field", settings_config_prefix), ""), - .update_lag = config.getUInt64(fmt::format("{}.update_lag", settings_config_prefix), 1) - }; - return std::make_unique(dict_struct, dictionary_configuration, pool, sample_block); + return std::make_unique(dict_struct, dictionary_configuration.value(), pool, sample_block); #else (void)dict_struct; (void)config; diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp deleted file mode 100644 index 41979f8d91c..00000000000 --- a/src/Storages/ExternalDataSourceConfiguration.cpp +++ /dev/null @@ -1,288 +0,0 @@ -#include "ExternalDataSourceConfiguration.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -IMPLEMENT_SETTINGS_TRAITS(EmptySettingsTraits, EMPTY_SETTINGS) - -static const std::unordered_set dictionary_allowed_keys = { - "host", "port", "user", "password", "quota_key", "db", - "database", "table", "schema", "replica", - "update_field", "update_lag", "invalidate_query", "query", - "where", "name", "secure", "uri", "collection"}; - - -template -SettingsChanges getSettingsChangesFromConfig( - const BaseSettings & settings, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - SettingsChanges config_settings; - for (const auto & setting : settings.all()) - { - const auto & setting_name = setting.getName(); - auto setting_value = config.getString(config_prefix + '.' + setting_name, ""); - if (!setting_value.empty()) - config_settings.emplace_back(setting_name, setting_value); - } - return config_settings; -} - - -String ExternalDataSourceConfiguration::toString() const -{ - WriteBufferFromOwnString configuration_info; - configuration_info << "username: " << username << "\t"; - if (addresses.empty()) - { - configuration_info << "host: " << host << "\t"; - configuration_info << "port: " << port << "\t"; - } - else - { - for (const auto & [replica_host, replica_port] : addresses) - { - configuration_info << "host: " << replica_host << "\t"; - configuration_info << "port: " << replica_port << "\t"; - } - } - return configuration_info.str(); -} - - -void ExternalDataSourceConfiguration::set(const ExternalDataSourceConfiguration & conf) -{ - host = conf.host; - port = conf.port; - username = conf.username; - password = conf.password; - quota_key = conf.quota_key; - database = conf.database; - table = conf.table; - schema = conf.schema; - addresses = conf.addresses; - addresses_expr = conf.addresses_expr; -} - - -static void validateConfigKeys( - const Poco::Util::AbstractConfiguration & dict_config, const String & config_prefix, HasConfigKeyFunc has_config_key_func) -{ - Poco::Util::AbstractConfiguration::Keys config_keys; - dict_config.keys(config_prefix, config_keys); - for (const auto & config_key : config_keys) - { - if (!has_config_key_func(config_key)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected key `{}` in dictionary source configuration", config_key); - } -} - -template -std::optional getExternalDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, - ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings) -{ - validateConfigKeys(dict_config, dict_config_prefix, has_config_key); - ExternalDataSourceConfiguration configuration; - - auto collection_name = dict_config.getString(dict_config_prefix + ".name", ""); - if (!collection_name.empty()) - { - const auto & config = context->getConfigRef(); - const auto & collection_prefix = fmt::format("named_collections.{}", collection_name); - validateConfigKeys(dict_config, collection_prefix, has_config_key); - auto config_settings = getSettingsChangesFromConfig(settings, config, collection_prefix); - auto dict_settings = getSettingsChangesFromConfig(settings, dict_config, dict_config_prefix); - /// dictionary config settings override collection settings. - config_settings.insert(config_settings.end(), dict_settings.begin(), dict_settings.end()); - - if (!config.has(collection_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name); - - configuration.host = dict_config.getString(dict_config_prefix + ".host", config.getString(collection_prefix + ".host", "")); - configuration.port = dict_config.getInt(dict_config_prefix + ".port", config.getUInt(collection_prefix + ".port", 0)); - configuration.username = dict_config.getString(dict_config_prefix + ".user", config.getString(collection_prefix + ".user", "")); - configuration.password = dict_config.getString(dict_config_prefix + ".password", config.getString(collection_prefix + ".password", "")); - configuration.quota_key = dict_config.getString(dict_config_prefix + ".quota_key", config.getString(collection_prefix + ".quota_key", "")); - configuration.database = dict_config.getString(dict_config_prefix + ".db", config.getString(dict_config_prefix + ".database", - config.getString(collection_prefix + ".db", config.getString(collection_prefix + ".database", "")))); - configuration.table = dict_config.getString(dict_config_prefix + ".table", config.getString(collection_prefix + ".table", "")); - configuration.schema = dict_config.getString(dict_config_prefix + ".schema", config.getString(collection_prefix + ".schema", "")); - - if (configuration.host.empty() || configuration.port == 0 || configuration.username.empty() || configuration.table.empty()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Named collection of connection parameters is missing some " - "of the parameters and dictionary parameters are not added"); - } - return ExternalDataSourceInfo{.configuration = configuration, .settings_changes = config_settings}; - } - return std::nullopt; -} - -std::optional getURLBasedDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context) -{ - URLBasedDataSourceConfiguration configuration; - auto collection_name = dict_config.getString(dict_config_prefix + ".name", ""); - if (!collection_name.empty()) - { - const auto & config = context->getConfigRef(); - const auto & collection_prefix = fmt::format("named_collections.{}", collection_name); - - if (!config.has(collection_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name); - - configuration.url = - dict_config.getString(dict_config_prefix + ".url", config.getString(collection_prefix + ".url", "")); - configuration.endpoint = - dict_config.getString(dict_config_prefix + ".endpoint", config.getString(collection_prefix + ".endpoint", "")); - configuration.format = - dict_config.getString(dict_config_prefix + ".format", config.getString(collection_prefix + ".format", "")); - configuration.compression_method = - dict_config.getString(dict_config_prefix + ".compression", config.getString(collection_prefix + ".compression_method", "")); - configuration.structure = - dict_config.getString(dict_config_prefix + ".structure", config.getString(collection_prefix + ".structure", "")); - configuration.user = - dict_config.getString(dict_config_prefix + ".credentials.user", config.getString(collection_prefix + ".credentials.user", "")); - configuration.password = - dict_config.getString(dict_config_prefix + ".credentials.password", config.getString(collection_prefix + ".credentials.password", "")); - - String headers_prefix; - const Poco::Util::AbstractConfiguration *headers_config = nullptr; - if (dict_config.has(dict_config_prefix + ".headers")) - { - headers_prefix = dict_config_prefix + ".headers"; - headers_config = &dict_config; - } - else - { - headers_prefix = collection_prefix + ".headers"; - headers_config = &config; - } - - if (headers_config) - { - Poco::Util::AbstractConfiguration::Keys header_keys; - headers_config->keys(headers_prefix, header_keys); - headers_prefix += "."; - for (const auto & header : header_keys) - { - const auto header_prefix = headers_prefix + header; - configuration.headers.emplace_back( - headers_config->getString(header_prefix + ".name"), - headers_config->getString(header_prefix + ".value")); - } - } - - return URLBasedDataSourceConfig{ .configuration = configuration }; - } - - return std::nullopt; -} - -ExternalDataSourcesByPriority getExternalDataSourceConfigurationByPriority( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context, HasConfigKeyFunc has_config_key) -{ - validateConfigKeys(dict_config, dict_config_prefix, has_config_key); - ExternalDataSourceConfiguration common_configuration; - - auto named_collection = getExternalDataSourceConfiguration(dict_config, dict_config_prefix, context, has_config_key); - if (named_collection) - { - common_configuration = named_collection->configuration; - } - else - { - common_configuration.host = dict_config.getString(dict_config_prefix + ".host", ""); - common_configuration.port = dict_config.getUInt(dict_config_prefix + ".port", 0); - common_configuration.username = dict_config.getString(dict_config_prefix + ".user", ""); - common_configuration.password = dict_config.getString(dict_config_prefix + ".password", ""); - common_configuration.quota_key = dict_config.getString(dict_config_prefix + ".quota_key", ""); - common_configuration.database = dict_config.getString(dict_config_prefix + ".db", dict_config.getString(dict_config_prefix + ".database", "")); - common_configuration.table = dict_config.getString(fmt::format("{}.table", dict_config_prefix), ""); - common_configuration.schema = dict_config.getString(fmt::format("{}.schema", dict_config_prefix), ""); - } - - ExternalDataSourcesByPriority configuration - { - .database = common_configuration.database, - .table = common_configuration.table, - .schema = common_configuration.schema, - .replicas_configurations = {} - }; - - if (dict_config.has(dict_config_prefix + ".replica")) - { - Poco::Util::AbstractConfiguration::Keys config_keys; - dict_config.keys(dict_config_prefix, config_keys); - - for (const auto & config_key : config_keys) - { - if (config_key.starts_with("replica")) - { - ExternalDataSourceConfiguration replica_configuration(common_configuration); - String replica_name = dict_config_prefix + "." + config_key; - validateConfigKeys(dict_config, replica_name, has_config_key); - - size_t priority = dict_config.getInt(replica_name + ".priority", 0); - replica_configuration.host = dict_config.getString(replica_name + ".host", common_configuration.host); - replica_configuration.port = dict_config.getUInt(replica_name + ".port", common_configuration.port); - replica_configuration.username = dict_config.getString(replica_name + ".user", common_configuration.username); - replica_configuration.password = dict_config.getString(replica_name + ".password", common_configuration.password); - replica_configuration.quota_key = dict_config.getString(replica_name + ".quota_key", common_configuration.quota_key); - - if (replica_configuration.host.empty() || replica_configuration.port == 0 - || replica_configuration.username.empty() || replica_configuration.password.empty()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Named collection of connection parameters is missing some " - "of the parameters and no other dictionary parameters are added"); - } - - configuration.replicas_configurations[priority].emplace_back(replica_configuration); - } - } - } - else - { - configuration.replicas_configurations[0].emplace_back(common_configuration); - } - - return configuration; -} - - -void URLBasedDataSourceConfiguration::set(const URLBasedDataSourceConfiguration & conf) -{ - url = conf.url; - format = conf.format; - compression_method = conf.compression_method; - structure = conf.structure; - http_method = conf.http_method; - headers = conf.headers; -} - -template -std::optional getExternalDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, - ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings); - -template -SettingsChanges getSettingsChangesFromConfig( - const BaseSettings & settings, const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -} diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h deleted file mode 100644 index c703c9ce999..00000000000 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ /dev/null @@ -1,92 +0,0 @@ -#pragma once - -#include -#include -#include -#include - - -namespace DB -{ - -#define EMPTY_SETTINGS(M, ALIAS) -DECLARE_SETTINGS_TRAITS(EmptySettingsTraits, EMPTY_SETTINGS) - -struct EmptySettings : public BaseSettings {}; - -struct ExternalDataSourceConfiguration -{ - String host; - UInt16 port = 0; - String username = "default"; - String password; - String quota_key; - String database; - String table; - String schema; - - std::vector> addresses; /// Failover replicas. - String addresses_expr; - - String toString() const; - - void set(const ExternalDataSourceConfiguration & conf); -}; - - -using StorageSpecificArgs = std::vector>; - -struct ExternalDataSourceInfo -{ - ExternalDataSourceConfiguration configuration; - SettingsChanges settings_changes; -}; - -using HasConfigKeyFunc = std::function; - -template -std::optional getExternalDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, - ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings = {}); - - -/// Highest priority is 0, the bigger the number in map, the less the priority. -using ExternalDataSourcesConfigurationByPriority = std::map>; - -struct ExternalDataSourcesByPriority -{ - String database; - String table; - String schema; - ExternalDataSourcesConfigurationByPriority replicas_configurations; -}; - -ExternalDataSourcesByPriority -getExternalDataSourceConfigurationByPriority(const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context, HasConfigKeyFunc has_config_key); - -struct URLBasedDataSourceConfiguration -{ - String url; - String endpoint; - String format = "auto"; - String compression_method = "auto"; - String structure = "auto"; - - String user; - String password; - - HTTPHeaderEntries headers; - String http_method; - - void set(const URLBasedDataSourceConfiguration & conf); -}; - -struct URLBasedDataSourceConfig -{ - URLBasedDataSourceConfiguration configuration; -}; - -std::optional getURLBasedDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context); - -} diff --git a/src/Storages/NamedCollectionsHelpers.h b/src/Storages/NamedCollectionsHelpers.h index f444a581eb6..bf2da7235a2 100644 --- a/src/Storages/NamedCollectionsHelpers.h +++ b/src/Storages/NamedCollectionsHelpers.h @@ -133,7 +133,7 @@ void validateNamedCollection( { throw Exception( ErrorCodes::BAD_ARGUMENTS, - "Unexpected key {} in named collection. Required keys: {}, optional keys: {}", + "Unexpected key `{}` in named collection. Required keys: {}, optional keys: {}", backQuoteIfNeed(key), fmt::join(required_keys, ", "), fmt::join(optional_keys, ", ")); } } diff --git a/src/Storages/StorageExternalDistributed.h b/src/Storages/StorageExternalDistributed.h index c4d37c3e5cc..56c7fe86f34 100644 --- a/src/Storages/StorageExternalDistributed.h +++ b/src/Storages/StorageExternalDistributed.h @@ -8,8 +8,6 @@ namespace DB { -struct ExternalDataSourceConfiguration; - /// Storages MySQL and PostgreSQL use ConnectionPoolWithFailover and support multiple replicas. /// This class unites multiple storages with replicas into multiple shards with replicas. /// A query to external database is passed to one replica on each shard, the result is united. diff --git a/src/TableFunctions/TableFunctionMongoDB.cpp b/src/TableFunctions/TableFunctionMongoDB.cpp index b2cf1b4675e..94279d1bf6d 100644 --- a/src/TableFunctions/TableFunctionMongoDB.cpp +++ b/src/TableFunctions/TableFunctionMongoDB.cpp @@ -1,5 +1,4 @@ #include -#include #include diff --git a/src/TableFunctions/TableFunctionRedis.cpp b/src/TableFunctions/TableFunctionRedis.cpp index f87ba6d1c6d..aca751c2840 100644 --- a/src/TableFunctions/TableFunctionRedis.cpp +++ b/src/TableFunctions/TableFunctionRedis.cpp @@ -15,7 +15,6 @@ #include #include -#include namespace DB From f6e1eb1643888c2b8bbc179899cbc4bacaee5b78 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Aug 2024 16:31:48 +0200 Subject: [PATCH 147/363] Fix style check --- src/Dictionaries/HTTPDictionarySource.cpp | 2 +- src/Dictionaries/PostgreSQLDictionarySource.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index d6df03b39df..bf19f912723 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -230,7 +230,7 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory) *named_collection, /* required_keys */{}, /* optional_keys */ValidateKeysMultiset{ - "url", "endpoint", "user", "credentials.user", "password", "credentials.password", "format", "compression_method", "structure", "name"}); + "url", "endpoint", "user", "credentials.user", "password", "credentials.password", "format", "compression_method", "structure", "name"}); url = named_collection->getOrDefault("url", ""); endpoint = named_collection->getOrDefault("endpoint", ""); diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index fd026a97cd4..fd426de126d 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -24,6 +24,7 @@ namespace DB namespace ErrorCodes { extern const int SUPPORT_IS_DISABLED; + extern const int BAD_ARGUMENTS; } #if USE_LIBPQXX From 077f10a4ada2a561111207d8e99e22d2c8e48f40 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Aug 2024 18:26:48 +0200 Subject: [PATCH 148/363] Fix build --- src/Dictionaries/PostgreSQLDictionarySource.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index fd426de126d..8e472f85a6e 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -4,6 +4,7 @@ #include #include #include "DictionarySourceFactory.h" +#include #include "registerDictionaries.h" #if USE_LIBPQXX @@ -13,7 +14,6 @@ #include "readInvalidateQuery.h" #include #include -#include #include #endif @@ -27,14 +27,14 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -#if USE_LIBPQXX - -static const UInt64 max_block_size = 8192; - static const ValidateKeysMultiset dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", "schema", "update_field", "update_lag", "invalidate_query", "query", "where", "name", "priority"}; +#if USE_LIBPQXX + +static const UInt64 max_block_size = 8192; + namespace { ExternalQueryBuilder makeExternalQueryBuilder(const DictionaryStructure & dict_struct, const String & schema, const String & table, const String & query, const String & where) @@ -178,8 +178,6 @@ std::string PostgreSQLDictionarySource::toString() const return "PostgreSQL: " + configuration.db + '.' + configuration.table + (where.empty() ? "" : ", where: " + where); } -#endif - static void validateConfigKeys( const Poco::Util::AbstractConfiguration & dict_config, const String & config_prefix) { @@ -193,6 +191,8 @@ static void validateConfigKeys( } } +#endif + void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) { auto create_table_source = [=](const DictionaryStructure & dict_struct, From a49ef43286e31525f8829e3307a9231be0ce417c Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 15 Aug 2024 21:33:26 +0000 Subject: [PATCH 149/363] Fix 01119_session_log flakiness --- ...9_session_log.sql => 01119_session_log.sh} | 37 +++++++++++++++---- 1 file changed, 30 insertions(+), 7 deletions(-) rename tests/queries/0_stateless/{01119_session_log.sql => 01119_session_log.sh} (73%) mode change 100644 => 100755 diff --git a/tests/queries/0_stateless/01119_session_log.sql b/tests/queries/0_stateless/01119_session_log.sh old mode 100644 new mode 100755 similarity index 73% rename from tests/queries/0_stateless/01119_session_log.sql rename to tests/queries/0_stateless/01119_session_log.sh index 55f6228797a..809d300fada --- a/tests/queries/0_stateless/01119_session_log.sql +++ b/tests/queries/0_stateless/01119_session_log.sh @@ -1,5 +1,20 @@ --- Tags: no-fasttest +#!/usr/bin/env bash +# Tags: no-fasttest +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +session_log_query_prefix=" +system flush logs; +select distinct type, user, auth_type, toString(client_address)!='::ffff:0.0.0.0' as a, client_port!=0 as b, interface from system.session_log +where user in ('default', 'nonexistsnt_user_1119', ' ', ' INTERSERVER SECRET ') +and interface in ('HTTP', 'TCP', 'TCP_Interserver') +and (user != 'default' or (a=1 and b=1)) -- FIXME: we should not write uninitialized address and port (but we do sometimes) +and event_time >= now() - interval 5 minute" + +$CLICKHOUSE_CLIENT -nm -q " select * from remote('127.0.0.2', system, one, 'default', ''); select * from remote('127.0.0.2', system, one, 'default', 'wrong password'); -- { serverError AUTHENTICATION_FAILED } select * from remote('127.0.0.2', system, one, 'nonexistsnt_user_1119', ''); -- { serverError AUTHENTICATION_FAILED } @@ -16,9 +31,17 @@ select * from url('http://127.0.0.1:8123/?query=select+1&user=+++', LineAsString select * from cluster('test_cluster_interserver_secret', system, one); -system flush logs; -select distinct type, user, auth_type, toString(client_address)!='::ffff:0.0.0.0' as a, client_port!=0 as b, interface from system.session_log -where user in ('default', 'nonexistsnt_user_1119', ' ', ' INTERSERVER SECRET ') -and interface in ('HTTP', 'TCP', 'TCP_Interserver') -and (user != 'default' or (a=1 and b=1)) -- FIXME: we should not write uninitialized address and port (but we do sometimes) -and event_time >= now() - interval 5 minute order by type, user, interface; +$session_log_query_prefix and type != 'Logout' order by type, user, interface; +" + +# Wait for logout events. +for attempt in {1..10} +do + if [ "`$CLICKHOUSE_CLIENT -q "$session_log_query_prefix and type = 'Logout'" | wc -l`" -eq 3 ] + then + break + fi + sleep 2 +done + +$CLICKHOUSE_CLIENT -q "$session_log_query_prefix and type = 'Logout' order by user, interface" From 253188381759d062967c9bb3b9d0907f30eab61a Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 16 Aug 2024 05:39:50 +0000 Subject: [PATCH 150/363] she sells seashells by seashore the shells that she sells are seashells im sure --- tests/queries/0_stateless/01119_session_log.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01119_session_log.sh b/tests/queries/0_stateless/01119_session_log.sh index 809d300fada..2d17b545276 100755 --- a/tests/queries/0_stateless/01119_session_log.sh +++ b/tests/queries/0_stateless/01119_session_log.sh @@ -35,7 +35,7 @@ $session_log_query_prefix and type != 'Logout' order by type, user, interface; " # Wait for logout events. -for attempt in {1..10} +for _ in {1..10} do if [ "`$CLICKHOUSE_CLIENT -q "$session_log_query_prefix and type = 'Logout'" | wc -l`" -eq 3 ] then From a85f544205f2e782d4f6c16c0622728475db3571 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 16 Aug 2024 08:47:28 +0000 Subject: [PATCH 151/363] Update analyzer_tech_debt.txt --- tests/analyzer_tech_debt.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index bd92465e1aa..c8edbdc5932 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -1,4 +1,3 @@ 01624_soft_constraints -02354_vector_search_queries # Check after ConstantNode refactoring 02944_variant_as_common_type From 1b49e2492521c54b0e6240d412af847d3fa21221 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 16 Aug 2024 11:26:31 +0200 Subject: [PATCH 152/363] Fix clang-tidy --- src/Dictionaries/PostgreSQLDictionarySource.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index 8e472f85a6e..b1bab17e2e9 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -208,7 +208,6 @@ void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) const auto & settings = context->getSettingsRef(); std::optional dictionary_configuration; - String database, schema, table; postgres::PoolWithFailover::ReplicasConfigurationByPriority replicas_by_priority; auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, context) : nullptr; From 60a6e893a40761eb46655e76cb6a3fe5f177019c Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Fri, 16 Aug 2024 17:56:12 +0800 Subject: [PATCH 153/363] first commit --- src/Common/examples/CMakeLists.txt | 5 + src/Common/examples/utf8_upper_lower.cpp | 27 ++ src/Functions/LowerUpperImpl.h | 1 - src/Functions/LowerUpperUTF8Impl.h | 283 +++--------------- src/Functions/initcapUTF8.cpp | 3 +- src/Functions/lowerUTF8.cpp | 25 +- src/Functions/upperUTF8.cpp | 24 +- .../00170_lower_upper_utf8.reference | 4 + .../0_stateless/00170_lower_upper_utf8.sql | 11 + .../00233_position_function_family.sql | 3 + .../0_stateless/00761_lower_utf8_bug.sql | 3 + .../0_stateless/01278_random_string_utf8.sql | 3 + .../0_stateless/01431_utf8_ubsan.reference | 4 +- .../queries/0_stateless/01431_utf8_ubsan.sql | 3 + .../0_stateless/01590_countSubstrings.sql | 3 + ...71_lower_upper_utf8_row_overlaps.reference | 4 +- .../02071_lower_upper_utf8_row_overlaps.sql | 3 + ...new_functions_must_be_documented.reference | 2 - .../02514_if_with_lazy_low_cardinality.sql | 3 + .../0_stateless/02807_lower_utf8_msan.sql | 3 + tests/queries/0_stateless/03015_peder1001.sql | 3 + 21 files changed, 159 insertions(+), 261 deletions(-) create mode 100644 src/Common/examples/utf8_upper_lower.cpp diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 69580d4ad0e..8383e80d09d 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -92,3 +92,8 @@ endif() clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp) target_link_libraries (check_pointer_valid PRIVATE clickhouse_common_io clickhouse_common_config) + +if (TARGET ch_contrib::icu) + clickhouse_add_executable (utf8_upper_lower utf8_upper_lower.cpp) + target_link_libraries (utf8_upper_lower PRIVATE ch_contrib::icu) +endif () diff --git a/src/Common/examples/utf8_upper_lower.cpp b/src/Common/examples/utf8_upper_lower.cpp new file mode 100644 index 00000000000..826e1763105 --- /dev/null +++ b/src/Common/examples/utf8_upper_lower.cpp @@ -0,0 +1,27 @@ +#include +#include + +std::string utf8_to_lower(const std::string & input) +{ + icu::UnicodeString unicodeInput(input.c_str(), "UTF-8"); + unicodeInput.toLower(); + std::string output; + unicodeInput.toUTF8String(output); + return output; +} + +std::string utf8_to_upper(const std::string & input) +{ + icu::UnicodeString unicodeInput(input.c_str(), "UTF-8"); + unicodeInput.toUpper(); + std::string output; + unicodeInput.toUTF8String(output); + return output; +} + +int main() +{ + std::string input = "ır"; + std::cout << "upper:" << utf8_to_upper(input) << std::endl; + return 0; +} diff --git a/src/Functions/LowerUpperImpl.h b/src/Functions/LowerUpperImpl.h index d463ef96e16..a52703d10c8 100644 --- a/src/Functions/LowerUpperImpl.h +++ b/src/Functions/LowerUpperImpl.h @@ -1,7 +1,6 @@ #pragma once #include - namespace DB { diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index eedabca5b22..5da085f48e5 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -1,15 +1,14 @@ #pragma once + +#include "config.h" + +#if USE_ICU + #include #include -#include -#include +#include +#include #include -#include - -#ifdef __SSE2__ -#include -#endif - namespace DB { @@ -19,71 +18,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -/// xor or do nothing -template -UInt8 xor_or_identity(const UInt8 c, const int mask) -{ - return c ^ mask; -} - -template <> -inline UInt8 xor_or_identity(const UInt8 c, const int) -{ - return c; -} - -/// It is caller's responsibility to ensure the presence of a valid cyrillic sequence in array -template -inline void UTF8CyrillicToCase(const UInt8 *& src, UInt8 *& dst) -{ - if (src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) - { - /// ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏ - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x10); - } - else if (src[0] == 0xD1u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) - { - /// ѐёђѓєѕіїјљњћќѝўџ - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x10); - } - else if (src[0] == 0xD0u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) - { - /// А-П - *dst++ = *src++; - *dst++ = xor_or_identity(*src++, 0x20); - } - else if (src[0] == 0xD0u && (src[1] >= 0xB0u && src[1] <= 0xBFu)) - { - /// а-п - *dst++ = *src++; - *dst++ = xor_or_identity(*src++, 0x20); - } - else if (src[0] == 0xD0u && (src[1] >= 0xA0u && src[1] <= 0xAFu)) - { - /// Р-Я - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x20); - } - else if (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) - { - /// р-я - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x20); - } -} - - -/** If the string contains UTF-8 encoded text, convert it to the lower (upper) case. - * Note: It is assumed that after the character is converted to another case, - * the length of its multibyte sequence in UTF-8 does not change. - * Otherwise, the behavior is undefined. - */ -template +template struct LowerUpperUTF8Impl { static void vector( @@ -103,180 +38,46 @@ struct LowerUpperUTF8Impl return; } - res_data.resize_exact(data.size()); - res_offsets.assign(offsets); - array(data.data(), data.data() + data.size(), offsets, res_data.data()); + res_data.resize(data.size()); + res_offsets.resize_exact(offsets.size()); + + String output; + size_t curr_offset = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + const auto * data_start = reinterpret_cast(&data[offsets[i - 1]]); + size_t size = offsets[i] - offsets[i - 1]; + + icu::UnicodeString input(data_start, static_cast(size), "UTF-8"); + if constexpr (upper) + input.toUpper(); + else + input.toLower(); + + output.clear(); + input.toUTF8String(output); + + /// For valid UTF-8 input strings, ICU sometimes produces output with extra '\0's at the end. Only the data before the first + /// '\0' is valid. It the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this + /// case, the behavior is also reasonable. + const char * res_end = find_last_not_symbols_or_null<'\0'>(output.data(), output.data() + output.size()); + size_t valid_size = res_end ? res_end - output.data() + 1 : 0; + + res_data.resize(curr_offset + valid_size + 1); + memcpy(&res_data[curr_offset], output.data(), valid_size); + res_data[curr_offset + valid_size] = 0; + + curr_offset += valid_size + 1; + res_offsets[i] = curr_offset; + } } static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Functions lowerUTF8 and upperUTF8 cannot work with FixedString argument"); } - - /** Converts a single code point starting at `src` to desired case, storing result starting at `dst`. - * `src` and `dst` are incremented by corresponding sequence lengths. */ - static bool toCase(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool partial) - { - if (src[0] <= ascii_upper_bound) - { - if (*src >= not_case_lower_bound && *src <= not_case_upper_bound) - *dst++ = *src++ ^ flip_case_mask; - else - *dst++ = *src++; - } - else if (src + 1 < src_end - && ((src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0xBFu)) || (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x9Fu)))) - { - cyrillic_to_case(src, dst); - } - else if (src + 1 < src_end && src[0] == 0xC2u) - { - /// Punctuation U+0080 - U+00BF, UTF-8: C2 80 - C2 BF - *dst++ = *src++; - *dst++ = *src++; - } - else if (src + 2 < src_end && src[0] == 0xE2u) - { - /// Characters U+2000 - U+2FFF, UTF-8: E2 80 80 - E2 BF BF - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - } - else - { - size_t src_sequence_length = UTF8::seqLength(*src); - /// In case partial buffer was passed (due to SSE optimization) - /// we cannot convert it with current src_end, but we may have more - /// bytes to convert and eventually got correct symbol. - if (partial && src_sequence_length > static_cast(src_end - src)) - return false; - - auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src); - if (src_code_point) - { - int dst_code_point = to_case(*src_code_point); - if (dst_code_point > 0) - { - size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src); - assert(dst_sequence_length <= 4); - - /// We don't support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8. - /// As an example, this happens for ß and ẞ. - if (dst_sequence_length == src_sequence_length) - { - src += dst_sequence_length; - dst += dst_sequence_length; - return true; - } - } - } - - *dst = *src; - ++dst; - ++src; - } - - return true; - } - -private: - static constexpr auto ascii_upper_bound = '\x7f'; - static constexpr auto flip_case_mask = 'A' ^ 'a'; - - static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst) - { - const auto * offset_it = offsets.begin(); - const UInt8 * begin = src; - -#ifdef __SSE2__ - static constexpr auto bytes_sse = sizeof(__m128i); - - /// If we are before this position, we can still read at least bytes_sse. - const auto * src_end_sse = src_end - bytes_sse + 1; - - /// SSE2 packed comparison operate on signed types, hence compare (c < 0) instead of (c > 0x7f) - const auto v_zero = _mm_setzero_si128(); - const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound - 1); - const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1); - const auto v_flip_case_mask = _mm_set1_epi8(flip_case_mask); - - while (src < src_end_sse) - { - const auto chars = _mm_loadu_si128(reinterpret_cast(src)); - - /// check for ASCII - const auto is_not_ascii = _mm_cmplt_epi8(chars, v_zero); - const auto mask_is_not_ascii = _mm_movemask_epi8(is_not_ascii); - - /// ASCII - if (mask_is_not_ascii == 0) - { - const auto is_not_case - = _mm_and_si128(_mm_cmpgt_epi8(chars, v_not_case_lower_bound), _mm_cmplt_epi8(chars, v_not_case_upper_bound)); - const auto mask_is_not_case = _mm_movemask_epi8(is_not_case); - - /// everything in correct case ASCII - if (mask_is_not_case == 0) - _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), chars); - else - { - /// ASCII in mixed case - /// keep `flip_case_mask` only where necessary, zero out elsewhere - const auto xor_mask = _mm_and_si128(v_flip_case_mask, is_not_case); - - /// flip case by applying calculated mask - const auto cased_chars = _mm_xor_si128(chars, xor_mask); - - /// store result back to destination - _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), cased_chars); - } - - src += bytes_sse; - dst += bytes_sse; - } - else - { - /// UTF-8 - - /// Find the offset of the next string after src - size_t offset_from_begin = src - begin; - while (offset_from_begin >= *offset_it) - ++offset_it; - - /// Do not allow one row influence another (since row may have invalid sequence, and break the next) - const UInt8 * row_end = begin + *offset_it; - chassert(row_end >= src); - const UInt8 * expected_end = std::min(src + bytes_sse, row_end); - - while (src < expected_end) - { - if (!toCase(src, expected_end, dst, /* partial= */ true)) - { - /// Fallback to handling byte by byte. - src_end_sse = src; - break; - } - } - } - } - - /// Find the offset of the next string after src - size_t offset_from_begin = src - begin; - while (offset_it != offsets.end() && offset_from_begin >= *offset_it) - ++offset_it; -#endif - - /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another) - while (src < src_end) - { - const UInt8 * row_end = begin + *offset_it; - chassert(row_end >= src); - - while (src < row_end) - toCase(src, row_end, dst, /* partial= */ false); - ++offset_it; - } - } }; } + +#endif diff --git a/src/Functions/initcapUTF8.cpp b/src/Functions/initcapUTF8.cpp index 282d846094e..004586dce26 100644 --- a/src/Functions/initcapUTF8.cpp +++ b/src/Functions/initcapUTF8.cpp @@ -1,9 +1,8 @@ #include #include -#include #include #include - +#include namespace DB { diff --git a/src/Functions/lowerUTF8.cpp b/src/Functions/lowerUTF8.cpp index 7adb0069121..e2f7cb84730 100644 --- a/src/Functions/lowerUTF8.cpp +++ b/src/Functions/lowerUTF8.cpp @@ -1,9 +1,10 @@ -#include +#include "config.h" + +#if USE_ICU + +#include #include #include -#include -#include - namespace DB { @@ -15,13 +16,25 @@ struct NameLowerUTF8 static constexpr auto name = "lowerUTF8"; }; -using FunctionLowerUTF8 = FunctionStringToString>, NameLowerUTF8>; +using FunctionLowerUTF8 = FunctionStringToString, NameLowerUTF8>; } REGISTER_FUNCTION(LowerUTF8) { - factory.registerFunction(); + FunctionDocumentation::Description description + = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)"; + FunctionDocumentation::Syntax syntax = "lowerUTF8(input)"; + FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}}; + FunctionDocumentation::ReturnedValue returned_value = "A String data type value"; + FunctionDocumentation::Examples examples = { + {"first", "SELECT lowerUTF8('München') as Lowerutf8;", "münchen"}, + }; + FunctionDocumentation::Categories categories = {"String"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); } } + +#endif diff --git a/src/Functions/upperUTF8.cpp b/src/Functions/upperUTF8.cpp index 659e67f0ef3..ef26430331f 100644 --- a/src/Functions/upperUTF8.cpp +++ b/src/Functions/upperUTF8.cpp @@ -1,8 +1,10 @@ +#include "config.h" + +#if USE_ICU + +#include #include #include -#include -#include - namespace DB { @@ -14,13 +16,25 @@ struct NameUpperUTF8 static constexpr auto name = "upperUTF8"; }; -using FunctionUpperUTF8 = FunctionStringToString>, NameUpperUTF8>; +using FunctionUpperUTF8 = FunctionStringToString, NameUpperUTF8>; } REGISTER_FUNCTION(UpperUTF8) { - factory.registerFunction(); + FunctionDocumentation::Description description + = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)"; + FunctionDocumentation::Syntax syntax = "upperUTF8(input)"; + FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}}; + FunctionDocumentation::ReturnedValue returned_value = "A String data type value"; + FunctionDocumentation::Examples examples = { + {"first", "SELECT upperUTF8('München') as Upperutf8;", "MÜNCHEN"}, + }; + FunctionDocumentation::Categories categories = {"String"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); } } + +#endif diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.reference b/tests/queries/0_stateless/00170_lower_upper_utf8.reference index f202cb75513..3c644f22b9b 100644 --- a/tests/queries/0_stateless/00170_lower_upper_utf8.reference +++ b/tests/queries/0_stateless/00170_lower_upper_utf8.reference @@ -22,3 +22,7 @@ 1 1 1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.sql b/tests/queries/0_stateless/00170_lower_upper_utf8.sql index 4caba2033ff..85b6c5c6095 100644 --- a/tests/queries/0_stateless/00170_lower_upper_utf8.sql +++ b/tests/queries/0_stateless/00170_lower_upper_utf8.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + select lower('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str; select lowerUTF8('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str; select lower('AaAaAaAaAaAaAaA012345789,.!aAaA') = 'aaaaaaaaaaaaaaa012345789,.!aaaa'; @@ -27,3 +30,11 @@ select sum(lower(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaАБВ select sum(upper(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n; select sum(lowerUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaабвгaaaaaaaa')) = count() from system.one array join range(16384) as n; select sum(upperUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n; + +-- Turkish language +select upperUTF8('ır') = 'IR'; +select lowerUTF8('ır') = 'ır'; + +-- German language +select upper('öäüß') = 'öäüß'; +select lower('ÖÄÜẞ') = 'ÖÄÜẞ'; diff --git a/tests/queries/0_stateless/00233_position_function_family.sql b/tests/queries/0_stateless/00233_position_function_family.sql index dd7394bc39a..d6668cb7ba4 100644 --- a/tests/queries/0_stateless/00233_position_function_family.sql +++ b/tests/queries/0_stateless/00233_position_function_family.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + SET send_logs_level = 'fatal'; select 1 = position('', ''); diff --git a/tests/queries/0_stateless/00761_lower_utf8_bug.sql b/tests/queries/0_stateless/00761_lower_utf8_bug.sql index de20b894331..a0ab55edc15 100644 --- a/tests/queries/0_stateless/00761_lower_utf8_bug.sql +++ b/tests/queries/0_stateless/00761_lower_utf8_bug.sql @@ -1 +1,4 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + SELECT lowerUTF8('\xF0') = lowerUTF8('\xF0'); diff --git a/tests/queries/0_stateless/01278_random_string_utf8.sql b/tests/queries/0_stateless/01278_random_string_utf8.sql index da2dc48c3e1..290d6a0c759 100644 --- a/tests/queries/0_stateless/01278_random_string_utf8.sql +++ b/tests/queries/0_stateless/01278_random_string_utf8.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + SELECT randomStringUTF8('string'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT lengthUTF8(randomStringUTF8(100)); SELECT toTypeName(randomStringUTF8(10)); diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.reference b/tests/queries/0_stateless/01431_utf8_ubsan.reference index c98c950d535..dc785e57851 100644 --- a/tests/queries/0_stateless/01431_utf8_ubsan.reference +++ b/tests/queries/0_stateless/01431_utf8_ubsan.reference @@ -1,2 +1,2 @@ -FF -FF +EFBFBD +EFBFBD diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.sql b/tests/queries/0_stateless/01431_utf8_ubsan.sql index d6a299225b1..3a28e023805 100644 --- a/tests/queries/0_stateless/01431_utf8_ubsan.sql +++ b/tests/queries/0_stateless/01431_utf8_ubsan.sql @@ -1,2 +1,5 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + SELECT hex(lowerUTF8('\xFF')); SELECT hex(upperUTF8('\xFF')); diff --git a/tests/queries/0_stateless/01590_countSubstrings.sql b/tests/queries/0_stateless/01590_countSubstrings.sql index b38cbb7d188..5ec4f412d7f 100644 --- a/tests/queries/0_stateless/01590_countSubstrings.sql +++ b/tests/queries/0_stateless/01590_countSubstrings.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + -- -- countSubstrings -- diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference index a3bac432482..deabef61a88 100644 --- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference +++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference @@ -5,9 +5,9 @@ insert into utf8_overlap values ('\xe2'), ('Foo⚊BarBazBam'), ('\xe2'), ('Foo -- MONOGRAM FOR YANG with lowerUTF8(str) as l_, upperUTF8(str) as u_, '0x' || hex(str) as h_ select length(str), if(l_ == '\xe2', h_, l_), if(u_ == '\xe2', h_, u_) from utf8_overlap format CSV; -1,"0xE2","0xE2" +1,"�","�" 15,"foo⚊barbazbam","FOO⚊BARBAZBAM" -1,"0xE2","0xE2" +1,"�","�" 15,"foo⚊barbazbam","FOO⚊BARBAZBAM" -- NOTE: regression test for introduced bug -- https://github.com/ClickHouse/ClickHouse/issues/42756 diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql index 8ca0a3f5f75..d175e0659d0 100644 --- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql +++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + drop table if exists utf8_overlap; create table utf8_overlap (str String) engine=Memory(); diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index c39f1fb1ce9..0980e25b70f 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -416,7 +416,6 @@ logTrace lowCardinalityIndices lowCardinalityKeys lower -lowerUTF8 makeDate makeDate32 makeDateTime @@ -897,7 +896,6 @@ tupleToNameValuePairs unbin unhex upper -upperUTF8 uptime validateNestedArraySizes version diff --git a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql index 80e3c0a9ece..b169cfd0ab9 100644 --- a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql +++ b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + create table if not exists t (`arr.key` Array(LowCardinality(String)), `arr.value` Array(LowCardinality(String))) engine = Memory; insert into t (`arr.key`, `arr.value`) values (['a'], ['b']); select if(true, if(lowerUTF8(arr.key) = 'a', 1, 2), 3) as x from t left array join arr; diff --git a/tests/queries/0_stateless/02807_lower_utf8_msan.sql b/tests/queries/0_stateless/02807_lower_utf8_msan.sql index e9eb18bf615..95f224577f7 100644 --- a/tests/queries/0_stateless/02807_lower_utf8_msan.sql +++ b/tests/queries/0_stateless/02807_lower_utf8_msan.sql @@ -1,2 +1,5 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + SELECT lowerUTF8(arrayJoin(['©--------------------------------------', '©--------------------'])) ORDER BY 1; SELECT upperUTF8(materialize('aaaaАБВГaaaaaaaaaaaaАБВГAAAAaaAA')) FROM numbers(2); diff --git a/tests/queries/0_stateless/03015_peder1001.sql b/tests/queries/0_stateless/03015_peder1001.sql index 810503207f2..df8e4db1536 100644 --- a/tests/queries/0_stateless/03015_peder1001.sql +++ b/tests/queries/0_stateless/03015_peder1001.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + DROP TABLE IF EXISTS test_data; CREATE TABLE test_data From 4600b270dafec20b276ab83eb557270c24cb4169 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Fri, 16 Aug 2024 17:58:54 +0800 Subject: [PATCH 154/363] remote icu contrib --- .gitmodules | 3 --- contrib/icu | 1 - 2 files changed, 4 deletions(-) delete mode 160000 contrib/icu diff --git a/.gitmodules b/.gitmodules index 7fdfb1103c5..164da311930 100644 --- a/.gitmodules +++ b/.gitmodules @@ -106,9 +106,6 @@ [submodule "contrib/icudata"] path = contrib/icudata url = https://github.com/ClickHouse/icudata -[submodule "contrib/icu"] - path = contrib/icu - url = https://github.com/unicode-org/icu [submodule "contrib/flatbuffers"] path = contrib/flatbuffers url = https://github.com/ClickHouse/flatbuffers diff --git a/contrib/icu b/contrib/icu deleted file mode 160000 index 7750081bda4..00000000000 --- a/contrib/icu +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 7750081bda4b3bc1768ae03849ec70f67ea10625 From 3ee741bd5e33d16b2f5711a8f2b06fca1a64b7bc Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Fri, 16 Aug 2024 18:04:15 +0800 Subject: [PATCH 155/363] add submodule contrib/icu from clickhouse --- .gitmodules | 4 ++++ contrib/icu | 1 + 2 files changed, 5 insertions(+) create mode 160000 contrib/icu diff --git a/.gitmodules b/.gitmodules index 164da311930..a8cc6a07caf 100644 --- a/.gitmodules +++ b/.gitmodules @@ -372,3 +372,7 @@ [submodule "contrib/numactl"] path = contrib/numactl url = https://github.com/ClickHouse/numactl.git +[submodule "contrib/icu"] + path = contrib/icu + url = https://github.com/ClickHouse/icu + branch = ClickHouse/release-75-1 diff --git a/contrib/icu b/contrib/icu new file mode 160000 index 00000000000..4216173eeeb --- /dev/null +++ b/contrib/icu @@ -0,0 +1 @@ +Subproject commit 4216173eeeb39c1d4caaa54a68860e800412d273 From 5ff4d990e189dfee42eb57f567a5ff6313cfa8d8 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 16 Aug 2024 11:11:11 +0200 Subject: [PATCH 156/363] CI: Auto Releases in prod --- .github/workflows/auto_releases.yml | 76 ++++++++++------------------ .github/workflows/create_release.yml | 2 + tests/ci/auto_release.py | 9 ++++ tests/ci/ci_utils.py | 24 ++++++--- 4 files changed, 54 insertions(+), 57 deletions(-) diff --git a/.github/workflows/auto_releases.yml b/.github/workflows/auto_releases.yml index c159907187c..28483ea136f 100644 --- a/.github/workflows/auto_releases.yml +++ b/.github/workflows/auto_releases.yml @@ -19,13 +19,11 @@ on: jobs: AutoReleaseInfo: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, release-maker] outputs: data: ${{ steps.info.outputs.AUTO_RELEASE_PARAMS }} dry_run: ${{ steps.info.outputs.DRY_RUN }} steps: - - name: Debug Info - uses: ./.github/actions/debug - name: Set envs run: | cat >> "$GITHUB_ENV" << 'EOF' @@ -36,6 +34,10 @@ jobs: echo "DRY_RUN=true" >> "$GITHUB_ENV" - name: Check out repository code uses: ClickHouse/checkout@v1 + with: + fetch-depth: 0 # full history needed + - name: Debug Info + uses: ./.github/actions/debug - name: Prepare Info id: info run: | @@ -46,12 +48,7 @@ jobs: echo "::endgroup::" { echo 'AUTO_RELEASE_PARAMS<> "$GITHUB_ENV" - { - echo 'AUTO_RELEASE_PARAMS<> "$GITHUB_OUTPUT" echo "DRY_RUN=true" >> "$GITHUB_OUTPUT" @@ -62,48 +59,29 @@ jobs: - name: Clean up uses: ./.github/actions/clean - Release_0: + Releases: needs: AutoReleaseInfo - name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].release_branch }} - if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].ready }} + strategy: + matrix: + release_params: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases }} + max-parallel: 1 + name: Release ${{ matrix.release_params.release_branch }} uses: ./.github/workflows/create_release.yml with: - ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].commit_sha }} + ref: ${{ matrix.release_params.commit_sha }} type: patch - dry-run: ${{ needs.AutoReleaseInfo.outputs.dry_run }} -# -# Release_1: -# needs: [AutoReleaseInfo, Release_0] -# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].release_branch }} -# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].ready }} -# uses: ./.github/workflows/create_release.yml -# with: -# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].commit_sha }} -# type: patch -# dry-run: ${{ env.DRY_RUN }} -# -# Release_2: -# needs: [AutoReleaseInfo, Release_1] -# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[2].release_branch }} -# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[2].ready }} -# uses: ./.github/workflow/create_release.yml -# with: -# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].commit_sha }} -# type: patch -# dry-run: ${{ env.DRY_RUN }} -# -# Release_3: -# needs: [AutoReleaseInfo, Release_2] -# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].release_branch }} -# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].ready }} -# uses: ./.github/workflow/create_release.yml -# with: -# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].commit_sha }} -# type: patch -# dry-run: ${{ env.DRY_RUN }} + dry-run: ${{ fromJson(needs.AutoReleaseInfo.outputs.dry_run) }} + secrets: + ROBOT_CLICKHOUSE_COMMIT_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} -# - name: Post Slack Message -# if: ${{ !cancelled() }} -# run: | -# cd "$GITHUB_WORKSPACE/tests/ci" -# python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }} + PostSlackMessage: + needs: [AutoReleaseInfo] + runs-on: [self-hosted, release-maker] + if: ${{ !cancelled() }} + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Post + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }} diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 1553d689227..1fb6cb60e96 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -47,6 +47,8 @@ concurrency: required: false default: false type: boolean + secrets: + ROBOT_CLICKHOUSE_COMMIT_TOKEN: jobs: CreateRelease: diff --git a/tests/ci/auto_release.py b/tests/ci/auto_release.py index 3cc88634004..58cfc833afe 100644 --- a/tests/ci/auto_release.py +++ b/tests/ci/auto_release.py @@ -1,4 +1,5 @@ import argparse +import copy import dataclasses import json import os @@ -46,6 +47,7 @@ def parse_args(): MAX_NUMBER_OF_COMMITS_TO_CONSIDER_FOR_RELEASE = 5 AUTORELEASE_INFO_FILE = "/tmp/autorelease_info.json" +AUTORELEASE_MATRIX_PARAMS = "/tmp/autorelease_params.json" @dataclasses.dataclass @@ -74,6 +76,12 @@ class AutoReleaseInfo: with open(AUTORELEASE_INFO_FILE, "w", encoding="utf-8") as f: print(json.dumps(dataclasses.asdict(self), indent=2), file=f) + # dump file for GH action matrix that is similar to the file above but with dropped not ready release branches + params = copy.deepcopy(self) + params.releases = [release for release in params.releases if release.ready] + with open(AUTORELEASE_MATRIX_PARAMS, "w", encoding="utf-8") as f: + print(json.dumps(params, indent=2), file=f) + @staticmethod def from_file() -> "AutoReleaseInfo": with open(AUTORELEASE_INFO_FILE, "r", encoding="utf-8") as json_file: @@ -136,6 +144,7 @@ def _prepare(token): commit_ci_status = CI.GH.get_commit_status_by_name( token=token, commit_sha=commit, + # handle old name for old releases status_name=(CI.JobNames.BUILD_CHECK, "ClickHouse build check"), ) commit_sha = commit diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index b8778e0cc50..97ab10f1b58 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -102,21 +102,29 @@ class GH: assert len(commit_sha) == 40 assert Utils.is_hex(commit_sha) assert not Utils.is_hex(token) - url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses?per_page={200}" + + url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses" headers = { "Authorization": f"token {token}", "Accept": "application/vnd.github.v3+json", } - response = requests.get(url, headers=headers, timeout=5) if isinstance(status_name, str): status_name = (status_name,) - if response.status_code == 200: - assert "next" not in response.links, "Response truncated" - statuses = response.json() - for status in statuses: - if status["context"] in status_name: - return status["state"] # type: ignore + + while url: + response = requests.get(url, headers=headers, timeout=5) + if response.status_code == 200: + statuses = response.json() + for status in statuses: + if status["context"] in status_name: + return status["state"] + + # Check if there is a next page + url = response.links.get("next", {}).get("url") + else: + break + return "" @staticmethod From f17655f13fcadb9babbc859e45e9f38cb32ad9e3 Mon Sep 17 00:00:00 2001 From: Dani Pozo Date: Thu, 1 Aug 2024 11:01:27 +0200 Subject: [PATCH 157/363] Load filesystem cache metadata asynchronously --- src/Common/StatusFile.cpp | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 35 +++--- .../Cached/CachedObjectStorage.cpp | 5 +- src/Interpreters/Cache/FileCache.cpp | 71 +++++++++--- src/Interpreters/Cache/FileCache.h | 10 ++ src/Interpreters/Cache/FileCacheSettings.cpp | 3 + src/Interpreters/Cache/FileCacheSettings.h | 1 + .../InterpreterDescribeCacheQuery.cpp | 2 + src/Interpreters/TemporaryDataOnDisk.cpp | 4 +- src/Interpreters/tests/gtest_filecache.cpp | 11 ++ .../System/StorageSystemFilesystemCache.cpp | 3 + .../StorageSystemFilesystemCacheSettings.cpp | 2 + tests/config/config.d/storage_conf.xml | 2 + tests/config/config.d/storage_conf_02944.xml | 1 + .../integration/test_filesystem_cache/test.py | 106 +++++++++++++++--- .../02344_describe_cache.reference | 2 +- .../0_stateless/02344_describe_cache.sh | 2 +- ...8_filesystem_cache_as_collection.reference | 4 +- .../02908_filesystem_cache_as_collection.sql | 4 +- ...ge_cache_setting_without_restart.reference | 14 +-- ...lly_change_filesystem_cache_size.reference | 10 +- 21 files changed, 227 insertions(+), 67 deletions(-) diff --git a/src/Common/StatusFile.cpp b/src/Common/StatusFile.cpp index 80464f38082..0bbb7ff411d 100644 --- a/src/Common/StatusFile.cpp +++ b/src/Common/StatusFile.cpp @@ -51,7 +51,7 @@ StatusFile::StatusFile(std::string path_, FillFunction fill_) std::string contents; { ReadBufferFromFile in(path, 1024); - LimitReadBuffer limit_in(in, 1024, /* trow_exception */ false, /* exact_limit */ {}); + LimitReadBuffer limit_in(in, 1024, /* throw_exception */ false, /* exact_limit */ {}); readStringUntilEOF(contents, limit_in); } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index bb9761a3905..c96f5f0c931 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -80,20 +80,27 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c if (with_file_cache) { - auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); - buf = std::make_unique( - object_path, - cache_key, - settings.remote_fs_cache, - FileCache::getCommonUser(), - [=, this]() { return read_buffer_creator(/* restricted_seek */true, object); }, - settings, - query_id, - object.bytes_size, - /* allow_seeks */false, - /* use_external_buffer */true, - /* read_until_position */std::nullopt, - cache_log); + if (settings.remote_fs_cache->isInitialized()) + { + auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); + buf = std::make_unique( + object_path, + cache_key, + settings.remote_fs_cache, + FileCache::getCommonUser(), + [=, this]() { return read_buffer_creator(/* restricted_seek */true, object); }, + settings, + query_id, + object.bytes_size, + /* allow_seeks */false, + /* use_external_buffer */true, + /* read_until_position */std::nullopt, + cache_log); + } + else + { + settings.remote_fs_cache->throwInitExceptionIfNeeded(); + } } /// Can't wrap CachedOnDiskReadBufferFromFile in CachedInMemoryReadBufferFromFile because the diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp index fb817005399..ab0d357119c 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp @@ -99,7 +99,7 @@ std::unique_ptr CachedObjectStorage::writeObject( /// N /// Need to remove even if cache_on_write == false. removeCacheIfExists(object.remote_path); - if (cache_on_write) + if (cache_on_write && cache->isInitialized()) { auto key = getCacheKey(object.remote_path); return std::make_unique( @@ -122,7 +122,8 @@ void CachedObjectStorage::removeCacheIfExists(const std::string & path_key_for_c return; /// Add try catch? - cache->removeKeyIfExists(getCacheKey(path_key_for_cache), FileCache::getCommonUser().user_id); + if (cache->isInitialized()) + cache->removeKeyIfExists(getCacheKey(path_key_for_cache), FileCache::getCommonUser().user_id); } void CachedObjectStorage::removeObject(const StoredObject & object) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index e3925163362..4c35c0f7f4c 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -11,11 +11,15 @@ #include #include #include +#include +#include #include #include #include +#include #include +#include namespace fs = std::filesystem; @@ -88,6 +92,7 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s , bypass_cache_threshold(settings.enable_bypass_cache_with_threshold ? settings.bypass_cache_threshold : 0) , boundary_alignment(settings.boundary_alignment) , load_metadata_threads(settings.load_metadata_threads) + , load_metadata_asynchronously(settings.load_metadata_asynchronously) , write_cache_per_user_directory(settings.write_cache_per_user_id_directory) , keep_current_size_to_max_ratio(1 - settings.keep_free_space_size_ratio) , keep_current_elements_to_max_ratio(1 - settings.keep_free_space_elements_ratio) @@ -136,7 +141,17 @@ const FileCache::UserInfo & FileCache::getInternalUser() bool FileCache::isInitialized() const { - return is_initialized.load(std::memory_order_seq_cst); + return is_initialized; +} + +void FileCache::throwInitExceptionIfNeeded() +{ + if (load_metadata_asynchronously) + return; + + std::lock_guard lock(init_mutex); + if (init_exception) + std::rethrow_exception(init_exception); } const String & FileCache::getBasePath() const @@ -170,6 +185,35 @@ void FileCache::assertInitialized() const } void FileCache::initialize() +{ + // Prevent initialize() from running twice. This may be caused by two cache disks being created with the same path (see integration/test_filesystem_cache). + callOnce(initialize_called, [&] { + bool need_to_load_metadata = fs::exists(getBasePath()); + try + { + if (!need_to_load_metadata) + fs::create_directories(getBasePath()); + status_file = make_unique(fs::path(getBasePath()) / "status", StatusFile::write_full_info); + } + catch (...) + { + init_exception = std::current_exception(); + tryLogCurrentException(__PRETTY_FUNCTION__); + throw; + } + + if (load_metadata_asynchronously) + { + load_metadata_main_thread = ThreadFromGlobalPool([this, need_to_load_metadata] { initializeImpl(need_to_load_metadata); }); + } + else + { + initializeImpl(need_to_load_metadata); + } + }); +} + +void FileCache::initializeImpl(bool load_metadata) { std::lock_guard lock(init_mutex); @@ -178,16 +222,10 @@ void FileCache::initialize() try { - if (fs::exists(getBasePath())) - { + if (load_metadata) loadMetadata(); - } - else - { - fs::create_directories(getBasePath()); - } - status_file = make_unique(fs::path(getBasePath()) / "status", StatusFile::write_full_info); + metadata.startup(); } catch (...) { @@ -196,8 +234,6 @@ void FileCache::initialize() throw; } - metadata.startup(); - if (keep_current_size_to_max_ratio != 1 || keep_current_elements_to_max_ratio != 1) { keep_up_free_space_ratio_task = Context::getGlobalContextInstance()->getSchedulePool().createTask(log->name(), [this] { freeSpaceRatioKeepingThreadFunc(); }); @@ -205,6 +241,7 @@ void FileCache::initialize() } is_initialized = true; + LOG_TEST(log, "Initialized cache from {}", metadata.getBaseDirectory()); } CachePriorityGuard::Lock FileCache::lockCache() const @@ -1185,7 +1222,6 @@ void FileCache::loadMetadataImpl() std::vector loading_threads; std::exception_ptr first_exception; std::mutex set_exception_mutex; - std::atomic stop_loading = false; LOG_INFO(log, "Loading filesystem cache with {} threads from {}", load_metadata_threads, metadata.getBaseDirectory()); @@ -1195,7 +1231,7 @@ void FileCache::loadMetadataImpl() { loading_threads.emplace_back([&] { - while (!stop_loading) + while (!stop_loading_metadata) { try { @@ -1212,7 +1248,7 @@ void FileCache::loadMetadataImpl() if (!first_exception) first_exception = std::current_exception(); } - stop_loading = true; + stop_loading_metadata = true; return; } } @@ -1225,7 +1261,7 @@ void FileCache::loadMetadataImpl() if (!first_exception) first_exception = std::current_exception(); } - stop_loading = true; + stop_loading_metadata = true; break; } } @@ -1412,6 +1448,11 @@ FileCache::~FileCache() void FileCache::deactivateBackgroundOperations() { shutdown.store(true); + + stop_loading_metadata = true; + if (load_metadata_main_thread.joinable()) + load_metadata_main_thread.join(); + metadata.shutdown(); if (keep_up_free_space_ratio_task) keep_up_free_space_ratio_task->deactivate(); diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 07be802a940..579472eb824 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -8,6 +8,7 @@ #include +#include #include #include #include @@ -82,6 +83,9 @@ public: bool isInitialized() const; + /// Throws if `!load_metadata_asynchronously` and there is an exception in `init_exception` + void throwInitExceptionIfNeeded(); + const String & getBasePath() const; static Key createKeyForPath(const String & path); @@ -198,6 +202,9 @@ private: const size_t bypass_cache_threshold; const size_t boundary_alignment; size_t load_metadata_threads; + const bool load_metadata_asynchronously; + std::atomic stop_loading_metadata = false; + ThreadFromGlobalPool load_metadata_main_thread; const bool write_cache_per_user_directory; BackgroundSchedulePool::TaskHolder keep_up_free_space_ratio_task; @@ -209,6 +216,7 @@ private: std::exception_ptr init_exception; std::atomic is_initialized = false; + OnceFlag initialize_called; mutable std::mutex init_mutex; std::unique_ptr status_file; std::atomic shutdown = false; @@ -246,6 +254,8 @@ private: */ FileCacheQueryLimitPtr query_limit; + void initializeImpl(bool load_metadata); + void assertInitialized() const; void assertCacheCorrectness(); diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp index c68ff3183c6..e162d6b7551 100644 --- a/src/Interpreters/Cache/FileCacheSettings.cpp +++ b/src/Interpreters/Cache/FileCacheSettings.cpp @@ -65,6 +65,9 @@ void FileCacheSettings::loadImpl(FuncHas has, FuncGetUInt get_uint, FuncGetStrin if (has("load_metadata_threads")) load_metadata_threads = get_uint("load_metadata_threads"); + if (has("load_metadata_asynchronously")) + load_metadata_asynchronously = get_uint("load_metadata_asynchronously"); + if (boundary_alignment > max_file_segment_size) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `boundary_alignment` cannot exceed `max_file_segment_size`"); diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h index 93ded202947..72a2b6c3369 100644 --- a/src/Interpreters/Cache/FileCacheSettings.h +++ b/src/Interpreters/Cache/FileCacheSettings.h @@ -32,6 +32,7 @@ struct FileCacheSettings size_t background_download_queue_size_limit = FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_QUEUE_SIZE_LIMIT; size_t load_metadata_threads = FILECACHE_DEFAULT_LOAD_METADATA_THREADS; + bool load_metadata_asynchronously = false; bool write_cache_per_user_id_directory = false; diff --git a/src/Interpreters/InterpreterDescribeCacheQuery.cpp b/src/Interpreters/InterpreterDescribeCacheQuery.cpp index c7e863bf260..c7464dc6b77 100644 --- a/src/Interpreters/InterpreterDescribeCacheQuery.cpp +++ b/src/Interpreters/InterpreterDescribeCacheQuery.cpp @@ -20,6 +20,7 @@ static Block getSampleBlock() ColumnWithTypeAndName{std::make_shared(), "max_size"}, ColumnWithTypeAndName{std::make_shared(), "max_elements"}, ColumnWithTypeAndName{std::make_shared(), "max_file_segment_size"}, + ColumnWithTypeAndName{std::make_shared(), "is_initialized"}, ColumnWithTypeAndName{std::make_shared(), "boundary_alignment"}, ColumnWithTypeAndName{std::make_shared>(), "cache_on_write_operations"}, ColumnWithTypeAndName{std::make_shared>(), "cache_hits_threshold"}, @@ -50,6 +51,7 @@ BlockIO InterpreterDescribeCacheQuery::execute() res_columns[i++]->insert(settings.max_size); res_columns[i++]->insert(settings.max_elements); res_columns[i++]->insert(settings.max_file_segment_size); + res_columns[i++]->insert(cache->isInitialized()); res_columns[i++]->insert(settings.boundary_alignment); res_columns[i++]->insert(settings.cache_on_write_operations); res_columns[i++]->insert(settings.cache_hits_threshold); diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index 7f0fb8cd6ca..3259d7b67d6 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -65,7 +65,7 @@ TemporaryDataOnDisk::TemporaryDataOnDisk(TemporaryDataOnDiskScopePtr parent_, Cu std::unique_ptr TemporaryDataOnDisk::createRawStream(size_t max_file_size) { - if (file_cache) + if (file_cache && file_cache->isInitialized()) { auto holder = createCacheFile(max_file_size); return std::make_unique(std::move(holder)); @@ -81,7 +81,7 @@ std::unique_ptr TemporaryDataOnDisk::createRawStream(si TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, size_t max_file_size) { - if (file_cache) + if (file_cache && file_cache->isInitialized()) { auto holder = createCacheFile(max_file_size); diff --git a/src/Interpreters/tests/gtest_filecache.cpp b/src/Interpreters/tests/gtest_filecache.cpp index 36acc319f4e..5e2d3ee8219 100644 --- a/src/Interpreters/tests/gtest_filecache.cpp +++ b/src/Interpreters/tests/gtest_filecache.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -42,6 +43,7 @@ #include #include +using namespace std::chrono_literals; namespace fs = std::filesystem; using namespace DB; @@ -358,9 +360,11 @@ TEST_F(FileCacheTest, LRUPolicy) settings.max_size = 30; settings.max_elements = 5; settings.boundary_alignment = 1; + settings.load_metadata_asynchronously = false; const size_t file_size = INT_MAX; // the value doesn't really matter because boundary_alignment == 1. + const auto user = FileCache::getCommonUser(); { std::cerr << "Step 1\n"; @@ -815,6 +819,7 @@ TEST_F(FileCacheTest, writeBuffer) settings.max_elements = 5; settings.max_file_segment_size = 5; settings.base_path = cache_base_path; + settings.load_metadata_asynchronously = false; FileCache cache("6", settings); cache.initialize(); @@ -946,6 +951,7 @@ TEST_F(FileCacheTest, temporaryData) settings.max_size = 10_KiB; settings.max_file_segment_size = 1_KiB; settings.base_path = cache_base_path; + settings.load_metadata_asynchronously = false; DB::FileCache file_cache("7", settings); file_cache.initialize(); @@ -1073,6 +1079,7 @@ TEST_F(FileCacheTest, CachedReadBuffer) settings.max_size = 30; settings.max_elements = 10; settings.boundary_alignment = 1; + settings.load_metadata_asynchronously = false; ReadSettings read_settings; read_settings.enable_filesystem_cache = true; @@ -1092,6 +1099,7 @@ TEST_F(FileCacheTest, CachedReadBuffer) auto cache = std::make_shared("8", settings); cache->initialize(); + auto key = cache->createKeyForPath(file_path); const auto user = FileCache::getCommonUser(); @@ -1132,6 +1140,7 @@ TEST_F(FileCacheTest, TemporaryDataReadBufferSize) settings.max_size = 10_KiB; settings.max_file_segment_size = 1_KiB; settings.base_path = cache_base_path; + settings.load_metadata_asynchronously = false; DB::FileCache file_cache("cache", settings); file_cache.initialize(); @@ -1195,6 +1204,7 @@ TEST_F(FileCacheTest, SLRUPolicy) settings.max_size = 40; settings.max_elements = 6; settings.boundary_alignment = 1; + settings.load_metadata_asynchronously = false; settings.cache_policy = "SLRU"; settings.slru_size_ratio = 0.5; @@ -1307,6 +1317,7 @@ TEST_F(FileCacheTest, SLRUPolicy) settings2.boundary_alignment = 1; settings2.cache_policy = "SLRU"; settings2.slru_size_ratio = 0.5; + settings.load_metadata_asynchronously = false; auto cache = std::make_shared("slru_2", settings2); cache->initialize(); diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp index cfb388bc232..0e972d8411b 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemFilesystemCache.cpp @@ -47,6 +47,9 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex for (const auto & [cache_name, cache_data] : caches) { const auto & cache = cache_data->cache; + if (!cache->isInitialized()) + continue; + cache->iterate([&](const FileSegment::Info & file_segment) { size_t i = 0; diff --git a/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp b/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp index 8915032baf7..c6bba6b8598 100644 --- a/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp +++ b/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp @@ -21,6 +21,7 @@ ColumnsDescription StorageSystemFilesystemCacheSettings::getColumnsDescription() {"path", std::make_shared(), "Cache directory"}, {"max_size", std::make_shared(), "Cache size limit by the number of bytes"}, {"max_elements", std::make_shared(), "Cache size limit by the number of elements"}, + {"is_initialized", std::make_shared(), "Whether the cache is initialized and ready to be used"}, {"current_size", std::make_shared(), "Current cache size by the number of bytes"}, {"current_elements", std::make_shared(), "Current cache size by the number of elements"}, {"max_file_segment_size", std::make_shared(), "Maximum allowed file segment size"}, @@ -56,6 +57,7 @@ void StorageSystemFilesystemCacheSettings::fillData( res_columns[i++]->insert(settings.base_path); res_columns[i++]->insert(settings.max_size); res_columns[i++]->insert(settings.max_elements); + res_columns[i++]->insert(cache->isInitialized()); res_columns[i++]->insert(cache->getUsedCacheSize()); res_columns[i++]->insert(cache->getFileSegmentsNum()); res_columns[i++]->insert(settings.max_file_segment_size); diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index e106e3a0e6b..091071f0637 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -27,6 +27,7 @@ 0.3 0.15 0.15 + 0 cache @@ -37,6 +38,7 @@ 100 0 0 + 0 diff --git a/tests/config/config.d/storage_conf_02944.xml b/tests/config/config.d/storage_conf_02944.xml index 5f45640a923..08d78900229 100644 --- a/tests/config/config.d/storage_conf_02944.xml +++ b/tests/config/config.d/storage_conf_02944.xml @@ -19,6 +19,7 @@ 10 100 0 + 0 diff --git a/tests/integration/test_filesystem_cache/test.py b/tests/integration/test_filesystem_cache/test.py index 17a8dd8b6e1..aee8bd25c2e 100644 --- a/tests/integration/test_filesystem_cache/test.py +++ b/tests/integration/test_filesystem_cache/test.py @@ -1,6 +1,7 @@ import logging import time import os +import random import pytest from helpers.cluster import ClickHouseCluster @@ -30,14 +31,6 @@ def cluster(): "config.d/storage_conf_2.xml", ], ) - cluster.add_instance( - "node_no_filesystem_caches_path", - main_configs=[ - "config.d/storage_conf.xml", - "config.d/remove_filesystem_caches_path.xml", - ], - stay_alive=True, - ) cluster.add_instance( "node_force_read_through_cache_on_merge", main_configs=[ @@ -59,6 +52,51 @@ def cluster(): cluster.shutdown() +@pytest.fixture(scope="function") +def non_shared_cluster(): + """ + For tests that cannot run in parallel against the same node/cluster (see test_custom_cached_disk, which relies on + changing server settings at runtime) + """ + try: + # Randomize the cluster name + cluster = ClickHouseCluster(f"{__file__}_non_shared_{random.randint(0, 10**7)}") + cluster.add_instance( + "node_no_filesystem_caches_path", + main_configs=[ + "config.d/storage_conf.xml", + "config.d/remove_filesystem_caches_path.xml", + ], + stay_alive=True, + ) + + logging.info("Starting test-exclusive cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def wait_for_cache_initialized(node, cache_path, max_attempts=50): + initialized = False + attempts = 0 + while not initialized: + query_result = node.query( + "SELECT path FROM system.filesystem_cache_settings WHERE is_initialized" + ) + initialized = cache_path in query_result + + if initialized: + break + + time.sleep(0.1) + attempts += 1 + if attempts >= max_attempts: + raise "Stopped waiting for cache to be initialized" + + @pytest.mark.parametrize("node_name", ["node"]) def test_parallel_cache_loading_on_startup(cluster, node_name): node = cluster.instances[node_name] @@ -71,14 +109,21 @@ def test_parallel_cache_loading_on_startup(cluster, node_name): ORDER BY value SETTINGS disk = disk( type = cache, - path = 'paralel_loading_test', + name = 'parallel_loading_test', + path = 'parallel_loading_test', disk = 'hdd_blob', max_file_segment_size = '1Ki', boundary_alignment = '1Ki', max_size = '1Gi', max_elements = 10000000, load_metadata_threads = 30); + """ + ) + wait_for_cache_initialized(node, "parallel_loading_test") + + node.query( + """ SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000000; SELECT * FROM test FORMAT Null; @@ -103,6 +148,7 @@ def test_parallel_cache_loading_on_startup(cluster, node_name): ) node.restart_clickhouse() + wait_for_cache_initialized(node, "parallel_loading_test") # < because of additional files loaded into cache on server startup. assert cache_count <= int(node.query("SELECT count() FROM system.filesystem_cache")) @@ -131,7 +177,7 @@ def test_caches_with_the_same_configuration(cluster, node_name): node = cluster.instances[node_name] cache_path = "cache1" - node.query(f"SYSTEM DROP FILESYSTEM CACHE;") + node.query("SYSTEM DROP FILESYSTEM CACHE;") for table in ["test", "test2"]: node.query( f""" @@ -142,14 +188,20 @@ def test_caches_with_the_same_configuration(cluster, node_name): ORDER BY value SETTINGS disk = disk( type = cache, - name = {table}, + name = '{table}', path = '{cache_path}', disk = 'hdd_blob', max_file_segment_size = '1Ki', boundary_alignment = '1Ki', cache_on_write_operations=1, max_size = '1Mi'); + """ + ) + wait_for_cache_initialized(node, cache_path) + + node.query( + f""" SET enable_filesystem_cache_on_write_operations=1; INSERT INTO {table} SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000; @@ -195,9 +247,8 @@ def test_caches_with_the_same_configuration(cluster, node_name): @pytest.mark.parametrize("node_name", ["node_caches_with_same_path"]) def test_caches_with_the_same_configuration_2(cluster, node_name): node = cluster.instances[node_name] - cache_path = "cache1" - node.query(f"SYSTEM DROP FILESYSTEM CACHE;") + node.query("SYSTEM DROP FILESYSTEM CACHE;") for table in ["cache1", "cache2"]: node.query( f""" @@ -207,7 +258,13 @@ def test_caches_with_the_same_configuration_2(cluster, node_name): Engine=MergeTree() ORDER BY value SETTINGS disk = '{table}'; + """ + ) + wait_for_cache_initialized(node, "cache1") + + node.query( + f""" SET enable_filesystem_cache_on_write_operations=1; INSERT INTO {table} SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000; @@ -227,8 +284,8 @@ def test_caches_with_the_same_configuration_2(cluster, node_name): ) -def test_custom_cached_disk(cluster): - node = cluster.instances["node_no_filesystem_caches_path"] +def test_custom_cached_disk(non_shared_cluster): + node = non_shared_cluster.instances["node_no_filesystem_caches_path"] assert "Cannot create cached custom disk without" in node.query_and_get_error( f""" @@ -377,6 +434,7 @@ def test_force_filesystem_cache_on_merges(cluster): ORDER BY value SETTINGS disk = disk( type = cache, + name = 'force_cache_on_merges', path = 'force_cache_on_merges', disk = 'hdd_blob', max_file_segment_size = '1Ki', @@ -385,7 +443,13 @@ def test_force_filesystem_cache_on_merges(cluster): max_size = '10Gi', max_elements = 10000000, load_metadata_threads = 30); + """ + ) + wait_for_cache_initialized(node, "force_cache_on_merges") + + node.query( + """ SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000000; INSERT INTO test SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000000; @@ -441,7 +505,13 @@ SETTINGS disk = disk(type = cache, path = "test_system_sync_filesystem_cache", delayed_cleanup_interval_ms = 10000000, disk = hdd_blob), min_bytes_for_wide_part = 10485760; + """ + ) + wait_for_cache_initialized(node, "test_system_sync_filesystem_cache") + + node.query( + """ INSERT INTO test SELECT 1, 'test'; """ ) @@ -525,7 +595,13 @@ SETTINGS disk = disk(type = cache, keep_free_space_elements_ratio = {elements_ratio}, disk = hdd_blob), min_bytes_for_wide_part = 10485760; + """ + ) + wait_for_cache_initialized(node, "test_keep_up_size_ratio") + + node.query( + """ INSERT INTO test SELECT randomString(200); """ ) diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference index 6895606eb2b..13429b14866 100644 --- a/tests/queries/0_stateless/02344_describe_cache.reference +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -1,2 +1,2 @@ 1 -102400 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/02344_describe_cache_test 0 5000 0 16 +102400 10000000 33554432 1 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/02344_describe_cache_test 0 5000 0 16 diff --git a/tests/queries/0_stateless/02344_describe_cache.sh b/tests/queries/0_stateless/02344_describe_cache.sh index d91661db9bc..c5373b4d7e3 100755 --- a/tests/queries/0_stateless/02344_describe_cache.sh +++ b/tests/queries/0_stateless/02344_describe_cache.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -nm --query """ DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk(name = '$disk_name', type = cache, max_size = '100Ki', path = '$disk_name', disk = 's3_disk'); +SETTINGS disk = disk(name = '$disk_name', type = cache, max_size = '100Ki', path = '$disk_name', disk = 's3_disk', load_metadata_asynchronously = 0); """ $CLICKHOUSE_CLIENT -nm --query """ diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference index d4191af1594..41a60204eab 100644 --- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference +++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference @@ -1,2 +1,2 @@ -1048576 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection_sql 0 5000 0 16 -1048576 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection 0 5000 0 16 +1048576 10000000 33554432 1 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection_sql 0 5000 0 16 +1048576 10000000 33554432 1 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection 0 5000 0 16 diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql index c7216833bc9..127baa8304e 100644 --- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql +++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql @@ -3,8 +3,8 @@ CREATE NAMED COLLECTION IF NOT EXISTS cache_collection_sql AS path = 'collection_sql', max_size = '1Mi'; DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) -ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME', cache_name='cache_collection_sql'); +ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME', cache_name='cache_collection_sql', load_metadata_asynchronously = 0); DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME'; CREATE TABLE test2 (a Int32, b String) -ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME_2', cache_name='cache_collection'); +ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME_2', cache_name='cache_collection', load_metadata_asynchronously = 0); DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME_2'; diff --git a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference index 17a25d82824..0f64d0393b2 100644 --- a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference +++ b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference @@ -1,7 +1,7 @@ -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 0 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 10 1000 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 5 1000 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 15 1000 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 2 1000 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 1000 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 0 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 0 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 10 1000 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 5 1000 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 15 1000 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 2 1000 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 1000 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 0 0 16 diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference index 298cc908178..c6bbcdc20c2 100644 --- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference +++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference @@ -1,20 +1,20 @@ -100 10 10 10 0 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 +100 10 10 1 10 0 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 0 10 98 set max_size from 100 to 10 -10 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 +10 10 10 1 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 1 8 set max_size from 10 to 100 -100 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 +100 10 10 1 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 10 98 set max_elements from 10 to 2 -100 2 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 +100 2 10 1 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 2 18 set max_elements from 2 to 10 -100 10 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 +100 10 10 1 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 10 98 From dfd17cc2d71555de9c42ad6085c35bb3f1372dd1 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 16 Aug 2024 13:23:57 +0000 Subject: [PATCH 158/363] Check for invalid regexp in JSON SKIP REGEXP section --- src/DataTypes/DataTypeObject.cpp | 11 +++++++++++ .../0_stateless/03227_json_invalid_regexp.reference | 0 .../queries/0_stateless/03227_json_invalid_regexp.sql | 4 ++++ 3 files changed, 15 insertions(+) create mode 100644 tests/queries/0_stateless/03227_json_invalid_regexp.reference create mode 100644 tests/queries/0_stateless/03227_json_invalid_regexp.sql diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index d6395155397..11fffd8769b 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -49,6 +49,17 @@ DataTypeObject::DataTypeObject( , max_dynamic_paths(max_dynamic_paths_) , max_dynamic_types(max_dynamic_types_) { + /// Check if regular expressions are valid. + for (const auto & regexp_str : path_regexps_to_skip) + { + re2::RE2::Options options; + /// Don't log errors to stderr. + options.set_log_errors(false); + auto regexp = re2::RE2(regexp_str, options); + if (!regexp.error().empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid regexp '{}': {}", regexp_str, regexp.error()); + } + for (const auto & [typed_path, type] : typed_paths) { for (const auto & path_to_skip : paths_to_skip) diff --git a/tests/queries/0_stateless/03227_json_invalid_regexp.reference b/tests/queries/0_stateless/03227_json_invalid_regexp.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03227_json_invalid_regexp.sql b/tests/queries/0_stateless/03227_json_invalid_regexp.sql new file mode 100644 index 00000000000..734dea1aac6 --- /dev/null +++ b/tests/queries/0_stateless/03227_json_invalid_regexp.sql @@ -0,0 +1,4 @@ +set allow_experimental_json_type = 1; +create table test (json JSON(SKIP REGEXP '[]')) engine=Memory(); -- {serverError BAD_ARGUMENTS} +create table test (json JSON(SKIP REGEXP '+')) engine=Memory(); -- {serverError BAD_ARGUMENTS}; + From 6bd65dbfa5c5d450e355dc64c110db65d2f56cbb Mon Sep 17 00:00:00 2001 From: Aleksei Filatov Date: Fri, 16 Aug 2024 15:07:53 +0000 Subject: [PATCH 159/363] Use HTTP/1.1 for external HTTP authentication --- src/Access/HTTPAuthClient.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Access/HTTPAuthClient.h b/src/Access/HTTPAuthClient.h index a8b56cf05a7..a1b97a729a3 100644 --- a/src/Access/HTTPAuthClient.h +++ b/src/Access/HTTPAuthClient.h @@ -82,7 +82,8 @@ public: Result authenticate(const String & user_name, const String & password) const { - Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_GET, this->getURI().getPathAndQuery()}; + Poco::Net::HTTPRequest request{ + Poco::Net::HTTPRequest::HTTP_GET, this->getURI().getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1}; Poco::Net::HTTPBasicCredentials basic_credentials{user_name, password}; basic_credentials.authenticate(request); From 45e06de3267486296cc1452c981a78688a2193ae Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 16 Aug 2024 18:01:43 +0200 Subject: [PATCH 160/363] Minor update in Dynamic/JSON serializations --- src/DataTypes/Serializations/SerializationObject.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index 2dd25e540cc..0042aa6d89d 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -199,7 +199,7 @@ void SerializationObject::serializeBinaryBulkStatePrefix( auto object_state = std::make_shared(serialization_version); object_state->max_dynamic_paths = column_object.getMaxDynamicPaths(); /// Write max_dynamic_paths parameter. - writeBinaryLittleEndian(object_state->max_dynamic_paths, *stream); + writeVarUInt(object_state->max_dynamic_paths, *stream); /// Write all dynamic paths in sorted order. object_state->sorted_dynamic_paths.reserve(dynamic_paths.size()); for (const auto & [path, _] : dynamic_paths) @@ -354,7 +354,7 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationObject::deserializeOb readBinaryLittleEndian(serialization_version, *structure_stream); auto structure_state = std::make_shared(serialization_version); /// Read max_dynamic_paths parameter. - readBinaryLittleEndian(structure_state->max_dynamic_paths, *structure_stream); + readVarUInt(structure_state->max_dynamic_paths, *structure_stream); /// Read the sorted list of dynamic paths. size_t dynamic_paths_size; readVarUInt(dynamic_paths_size, *structure_stream); From c85d5e753899503f93a8f9ca7b67776d386d9130 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 16 Aug 2024 18:02:51 +0200 Subject: [PATCH 161/363] Update Dynamic serialization --- src/DataTypes/Serializations/SerializationDynamic.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp index 6bba87c40fa..ab24779ced2 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.cpp +++ b/src/DataTypes/Serializations/SerializationDynamic.cpp @@ -115,7 +115,7 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix( dynamic_state->max_dynamic_types = column_dynamic.getMaxDynamicTypes(); /// Write max_dynamic_types parameter, because it can differ from the max_dynamic_types /// that is specified in the Dynamic type (we could decrease it before merge). - writeBinaryLittleEndian(dynamic_state->max_dynamic_types, *stream); + writeVarUInt(dynamic_state->max_dynamic_types, *stream); dynamic_state->variant_type = variant_info.variant_type; dynamic_state->variant_names = variant_info.variant_names; @@ -123,7 +123,7 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix( /// Write information about variants. size_t num_variants = dynamic_state->variant_names.size() - 1; /// Don't write shared variant, Dynamic column should always have it. - writeBinaryLittleEndian(num_variants, *stream); + writeVarUInt(num_variants, *stream); if (settings.data_types_binary_encoding) { const auto & variants = assert_cast(*dynamic_state->variant_type).getVariants(); @@ -252,11 +252,11 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationDynamic::deserializeD readBinaryLittleEndian(structure_version, *structure_stream); auto structure_state = std::make_shared(structure_version); /// Read max_dynamic_types parameter. - readBinaryLittleEndian(structure_state->max_dynamic_types, *structure_stream); + readVarUInt(structure_state->max_dynamic_types, *structure_stream); /// Read information about variants. DataTypes variants; size_t num_variants; - readBinaryLittleEndian(num_variants, *structure_stream); + readVarUInt(num_variants, *structure_stream); variants.reserve(num_variants + 1); /// +1 for shared variant. if (settings.data_types_binary_encoding) { From 4f84c82d6d53ded0adda46aac1db1d345b5ba2eb Mon Sep 17 00:00:00 2001 From: Linh Giang <165205637+linhgiang24@users.noreply.github.com> Date: Fri, 16 Aug 2024 11:02:44 -0600 Subject: [PATCH 162/363] Update grant.md to include POSTGRES privilege Added POSTGRES privilege under the SOURCES category as it seems to be missing. --- docs/en/sql-reference/statements/grant.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 43fa344a16d..6118f4c1d36 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -200,6 +200,7 @@ Hierarchy of privileges: - `JDBC` - `HDFS` - `S3` + - `POSTGRES` - [dictGet](#dictget) - [displaySecretsInShowAndSelect](#displaysecretsinshowandselect) - [NAMED COLLECTION ADMIN](#named-collection-admin) @@ -476,6 +477,7 @@ Allows using external data sources. Applies to [table engines](../../engines/tab - `JDBC`. Level: `GLOBAL` - `HDFS`. Level: `GLOBAL` - `S3`. Level: `GLOBAL` + - `POSTGRES`. Level: `GLOBAL` The `SOURCES` privilege enables use of all the sources. Also you can grant a privilege for each source individually. To use sources, you need additional privileges. From a6d5047bb09640bcf99bef84f655602d7dfb3361 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 16 Aug 2024 21:29:46 +0100 Subject: [PATCH 163/363] impl --- .../MergeTree/MergeTreePrefetchedReadPool.cpp | 4 ++++ .../MergeTree/MergeTreeReadPoolBase.cpp | 6 +++++ .../MergeTreeReadPoolParallelReplicas.cpp | 5 +++++ ...rgeTreeReadPoolParallelReplicasInOrder.cpp | 5 +++++ .../ParallelReplicasReadingCoordinator.cpp | 4 ++++ ...icas_read_task_size_overflow_bug.reference | 0 ...l_replicas_read_task_size_overflow_bug.sql | 22 +++++++++++++++++++ 7 files changed, 46 insertions(+) create mode 100644 tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.reference create mode 100644 tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.sql diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index a9b77fb6c03..7081eb716f5 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -395,6 +395,10 @@ void MergeTreePrefetchedReadPool::fillPerThreadTasks(size_t threads, size_t sum_ part_stat.prefetch_step_marks = std::max(part_stat.prefetch_step_marks, per_part_infos[i]->min_marks_per_task); + if (part_stat.prefetch_step_marks == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + LOG_DEBUG( log, "Part: {}, sum_marks: {}, approx mark size: {}, prefetch_step_bytes: {}, prefetch_step_marks: {}, (ranges: {})", diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 6d2560bc9c7..9d3c38822e1 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -13,6 +13,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } MergeTreeReadPoolBase::MergeTreeReadPoolBase( @@ -85,6 +86,11 @@ static size_t calculateMinMarksPerTask( min_marks_per_task = heuristic_min_marks; } } + + if (min_marks_per_task == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + LOG_TEST(&Poco::Logger::get("MergeTreeReadPoolBase"), "Will use min_marks_per_task={}", min_marks_per_task); return min_marks_per_task; } diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index 33eaf5a49bd..d23072771f2 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -8,6 +8,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( @@ -38,6 +39,10 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( for (const auto & info : per_part_infos) min_marks_per_task = std::max(min_marks_per_task, info->min_marks_per_task); + if (min_marks_per_task == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + extension.all_callback( InitialAllRangesAnnouncement(coordination_mode, parts_ranges.getDescriptions(), extension.number_of_current_replica)); } diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp index 6b5cf978423..42ffc4304b2 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp @@ -6,6 +6,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrder( @@ -37,6 +38,10 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd for (const auto & info : per_part_infos) min_marks_per_task = std::max(min_marks_per_task, info->min_marks_per_task); + if (min_marks_per_task == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + for (const auto & part : parts_ranges) request.push_back({part.data_part->info, MarkRanges{}}); diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index f46b4de10b7..ee47fe3549a 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -1004,6 +1004,10 @@ void ParallelReplicasReadingCoordinator::handleInitialAllRangesAnnouncement(Init ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelReadRequest request) { + if (request.min_number_of_marks == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + ProfileEventTimeIncrement watch(ProfileEvents::ParallelReplicasHandleRequestMicroseconds); std::lock_guard lock(mutex); diff --git a/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.reference b/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.sql b/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.sql new file mode 100644 index 00000000000..984c7fe0db7 --- /dev/null +++ b/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.sql @@ -0,0 +1,22 @@ +DROP TABLE IF EXISTS test__fuzz_22 SYNC; + +CREATE TABLE test__fuzz_22 (k Float32, v String) ENGINE = MergeTree ORDER BY k SETTINGS index_granularity = 1; + +SYSTEM STOP MERGES test__fuzz_22; + +INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1); +INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1); +INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1); +INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1); + +SET allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost', + merge_tree_min_rows_for_concurrent_read = 9223372036854775806, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem = 9223372036854775806; + + SELECT v + FROM test__fuzz_22 +ORDER BY v + LIMIT 10, 10 +SETTINGS max_threads = 4 + FORMAT Null; -- { serverError BAD_ARGUMENTS } + +DROP TABLE test__fuzz_22 SYNC; From cc7d22a7b83440cfbf7d37086ece7fac222f24de Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 16 Aug 2024 23:08:16 +0200 Subject: [PATCH 164/363] Proper parsing of the PostgreSQL-style CAST operator --- src/Parsers/ExpressionElementParsers.cpp | 26 +++++++++++-------- ..._proper_parsing_of_cast_operator.reference | 4 +++ .../03227_proper_parsing_of_cast_operator.sql | 6 +++++ 3 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.reference create mode 100644 tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.sql diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index dd22b80b1cb..ffa1bd93ded 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -853,9 +853,9 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected /// Parse numbers (including decimals), strings, arrays and tuples of them. + Pos begin = pos; const char * data_begin = pos->begin; const char * data_end = pos->end; - bool is_string_literal = pos->type == StringLiteral; if (pos->type == Minus) { @@ -866,7 +866,7 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected data_end = pos->end; ++pos; } - else if (pos->type == Number || is_string_literal) + else if (pos->type == Number || pos->type == StringLiteral) { ++pos; } @@ -939,18 +939,22 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected { String s; size_t data_size = data_end - data_begin; - if (is_string_literal) + if (begin->type == StringLiteral) { - ReadBufferFromMemory buf(data_begin, data_size); - readQuotedStringWithSQLStyle(s, buf); - assert(buf.count() == data_size); + ASTPtr literal; + if (ParserStringLiteral().parse(begin, literal, expected)) + { + node = createFunctionCast(literal, type_ast); + return true; + } + return false; } else - s = String(data_begin, data_size); - - auto literal = std::make_shared(std::move(s)); - node = createFunctionCast(literal, type_ast); - return true; + { + auto literal = std::make_shared(String(data_begin, data_size)); + node = createFunctionCast(literal, type_ast); + return true; + } } return false; diff --git a/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.reference b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.reference new file mode 100644 index 00000000000..2127d396bb3 --- /dev/null +++ b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.reference @@ -0,0 +1,4 @@ +414243 +ABC +A +{"a": \'A\'} diff --git a/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.sql b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.sql new file mode 100644 index 00000000000..0c2e7dc582a --- /dev/null +++ b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.sql @@ -0,0 +1,6 @@ +SELECT '414243'::String; +SELECT x'414243'::String; +SELECT b'01000001'::String; +SELECT '{"a": \'\x41\'}'::String; +SELECT '{"a": \'\x4\'}'::String; -- { clientError SYNTAX_ERROR } +SELECT '{"a": \'a\x4\'}'::String; -- { clientError SYNTAX_ERROR } From aee031ad4468b870073dc46770d07cea07aa829f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 16 Aug 2024 23:25:49 +0200 Subject: [PATCH 165/363] Slightly better --- src/Parsers/ExpressionElementParsers.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index ffa1bd93ded..726326bfc85 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -856,6 +856,7 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected Pos begin = pos; const char * data_begin = pos->begin; const char * data_end = pos->end; + ASTPtr string_literal; if (pos->type == Minus) { @@ -866,10 +867,15 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected data_end = pos->end; ++pos; } - else if (pos->type == Number || pos->type == StringLiteral) + else if (pos->type == Number) { ++pos; } + else if (pos->type == StringLiteral) + { + if (!ParserStringLiteral().parse(begin, string_literal, expected)) + return false; + } else if (isOneOf(pos->type)) { TokenType last_token = OpeningSquareBracket; @@ -939,15 +945,10 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected { String s; size_t data_size = data_end - data_begin; - if (begin->type == StringLiteral) + if (string_literal) { - ASTPtr literal; - if (ParserStringLiteral().parse(begin, literal, expected)) - { - node = createFunctionCast(literal, type_ast); - return true; - } - return false; + node = createFunctionCast(string_literal, type_ast); + return true; } else { From d952f7cff579d28a51eea428aee7460121862ce5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 16 Aug 2024 23:50:26 +0200 Subject: [PATCH 166/363] Update test --- tests/queries/0_stateless/01825_new_type_json_ghdata.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh index acb4925ce6e..ee702300094 100755 --- a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh @@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1 -cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata FORMAT JSONAsObject" +cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} --max_block_size 8192 --max_insert_block_size 8192 --max_insert_threads 1 --min_insert_block_size_bytes 0 --min_insert_block_size_rows 0 -q "INSERT INTO ghdata FORMAT JSONAsObject" ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM ghdata WHERE NOT ignore(*)" From b98249ea7fda526a7a561862fcc4a721e5a4587f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 00:06:47 +0200 Subject: [PATCH 167/363] Use temporary tables for input and output in clickhouse-local --- programs/local/LocalServer.cpp | 2 +- tests/queries/0_stateless/01191_rename_dictionary.sql | 1 + .../02141_clickhouse_local_interactive_table.reference | 4 ++-- .../0_stateless/02141_clickhouse_local_interactive_table.sh | 4 ++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 200beea7b63..a8b774562f9 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -367,7 +367,7 @@ std::string LocalServer::getInitialCreateTableQuery() else table_structure = "(" + table_structure + ")"; - return fmt::format("CREATE TABLE {} {} ENGINE = File({}, {});", + return fmt::format("CREATE TEMPORARY TABLE {} {} ENGINE = File({}, {});", table_name, table_structure, data_format, table_file); } diff --git a/tests/queries/0_stateless/01191_rename_dictionary.sql b/tests/queries/0_stateless/01191_rename_dictionary.sql index c5012dabc81..be95e5a7d4b 100644 --- a/tests/queries/0_stateless/01191_rename_dictionary.sql +++ b/tests/queries/0_stateless/01191_rename_dictionary.sql @@ -27,6 +27,7 @@ RENAME DICTIONARY test_01191.t TO test_01191.dict1; -- {serverError INCORRECT_QU DROP DICTIONARY test_01191.t; -- {serverError INCORRECT_QUERY} DROP TABLE test_01191.t; +DROP DATABASE IF EXISTS dummy_db; CREATE DATABASE dummy_db ENGINE=Atomic; RENAME DICTIONARY test_01191.dict TO dummy_db.dict1; RENAME DICTIONARY dummy_db.dict1 TO test_01191.dict; diff --git a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference index 0bb8966cbe4..0e74c0a083e 100644 --- a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference +++ b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference @@ -1,2 +1,2 @@ -CREATE TABLE default.`table`\n(\n `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\') -CREATE TABLE foo.`table`\n(\n `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\') +CREATE TEMPORARY TABLE `table`\n(\n `key` String\n)\nENGINE = File(TSVWithNamesAndTypes, \'/dev/null\') +CREATE TEMPORARY TABLE `table`\n(\n `key` String\n)\nENGINE = File(TSVWithNamesAndTypes, \'/dev/null\') diff --git a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh index 934d87616ac..3a95e59416a 100755 --- a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh +++ b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh @@ -4,5 +4,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_LOCAL --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create table table' -$CLICKHOUSE_LOCAL --database foo --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create table table' +$CLICKHOUSE_LOCAL --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create temporary table table' +$CLICKHOUSE_LOCAL --database foo --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create temporary table table' From 8ba142559ca05295670b0a899610ab613c2d5658 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 00:09:39 +0200 Subject: [PATCH 168/363] Pass-through RENAME and UUID-related operations in Overlay database to underlying databases --- src/Databases/DatabasesOverlay.cpp | 47 ++++++++++++++++++++++++++++++ src/Databases/DatabasesOverlay.h | 9 ++++++ src/Interpreters/StorageID.h | 1 - 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src/Databases/DatabasesOverlay.cpp b/src/Databases/DatabasesOverlay.cpp index 801356b3dd7..495733e15fd 100644 --- a/src/Databases/DatabasesOverlay.cpp +++ b/src/Databases/DatabasesOverlay.cpp @@ -14,6 +14,8 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int CANNOT_GET_CREATE_TABLE_QUERY; + extern const int BAD_ARGUMENTS; + extern const int UNKNOWN_TABLE; } DatabasesOverlay::DatabasesOverlay(const String & name_, ContextPtr context_) @@ -124,6 +126,39 @@ StoragePtr DatabasesOverlay::detachTable(ContextPtr context_, const String & tab getEngineName()); } +void DatabasesOverlay::renameTable( + ContextPtr current_context, + const String & name, + IDatabase & to_database, + const String & to_name, + bool exchange, + bool dictionary) +{ + for (auto & db : databases) + { + if (db->isTableExist(name, current_context)) + { + if (DatabasesOverlay * to_overlay_database = typeid_cast(&to_database)) + { + /// Renaming from Overlay database inside itself or into another Overlay database. + /// Just use the first database in the overlay as a destination. + if (to_overlay_database->databases.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The destination Overlay database {} does not have any members", to_database.getDatabaseName()); + + db->renameTable(current_context, name, *to_overlay_database->databases[0], to_name, exchange, dictionary); + } + else + { + /// Renaming into a different type of database. E.g. from Overlay on top of Atomic database into just Atomic database. + db->renameTable(current_context, name, to_database, to_name, exchange, dictionary); + } + + return; + } + } + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", backQuote(getDatabaseName()), backQuote(name)); +} + ASTPtr DatabasesOverlay::getCreateTableQueryImpl(const String & name, ContextPtr context_, bool throw_on_error) const { ASTPtr result = nullptr; @@ -178,6 +213,18 @@ String DatabasesOverlay::getTableDataPath(const ASTCreateQuery & query) const return result; } +UUID DatabasesOverlay::getUUID() const +{ + UUID result = UUIDHelpers::Nil; + for (const auto & db : databases) + { + result = db->getUUID(); + if (result != UUIDHelpers::Nil) + break; + } + return result; +} + UUID DatabasesOverlay::tryGetTableUUID(const String & table_name) const { UUID result = UUIDHelpers::Nil; diff --git a/src/Databases/DatabasesOverlay.h b/src/Databases/DatabasesOverlay.h index b0c7e7e4032..40c653e5cb5 100644 --- a/src/Databases/DatabasesOverlay.h +++ b/src/Databases/DatabasesOverlay.h @@ -35,12 +35,21 @@ public: StoragePtr detachTable(ContextPtr context, const String & table_name) override; + void renameTable( + ContextPtr current_context, + const String & name, + IDatabase & to_database, + const String & to_name, + bool exchange, + bool dictionary) override; + ASTPtr getCreateTableQueryImpl(const String & name, ContextPtr context, bool throw_on_error) const override; ASTPtr getCreateDatabaseQuery() const override; String getTableDataPath(const String & table_name) const override; String getTableDataPath(const ASTCreateQuery & query) const override; + UUID getUUID() const override; UUID tryGetTableUUID(const String & table_name) const override; void drop(ContextPtr context) override; diff --git a/src/Interpreters/StorageID.h b/src/Interpreters/StorageID.h index f9afbc7b98d..ad55d16e284 100644 --- a/src/Interpreters/StorageID.h +++ b/src/Interpreters/StorageID.h @@ -27,7 +27,6 @@ class ASTQueryWithTableAndOutput; class ASTTableIdentifier; class Context; -// TODO(ilezhankin): refactor and merge |ASTTableIdentifier| struct StorageID { String database_name; From da0a8051d8c8e8c2c72145e15cdf5a96e99641d2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 00:22:57 +0200 Subject: [PATCH 169/363] Miscellaneous changes in database engines --- src/Databases/DatabaseLazy.cpp | 2 +- src/Databases/DatabaseLazy.h | 2 +- src/Databases/DatabaseOnDisk.cpp | 10 +++++----- src/Databases/DatabaseOnDisk.h | 4 ++-- src/Databases/DatabaseOrdinary.cpp | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 3fb6d30fcb8..2ccdd8510a8 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -52,7 +52,7 @@ DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, void DatabaseLazy::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel /*mode*/) { - iterateMetadataFiles(local_context, [this, &local_context](const String & file_name) + iterateMetadataFiles([this, &local_context](const String & file_name) { const std::string table_name = unescapeForFileName(file_name.substr(0, file_name.size() - 4)); diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index 41cfb751141..aeac130594f 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -12,7 +12,7 @@ class DatabaseLazyIterator; class Context; /** Lazy engine of databases. - * Works like DatabaseOrdinary, but stores in memory only the cache. + * Works like DatabaseOrdinary, but stores only recently accessed tables in memory. * Can be used only with *Log engines. */ class DatabaseLazy final : public DatabaseOnDisk diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 734f354d9a5..c80e4def94e 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -568,14 +568,14 @@ void DatabaseOnDisk::drop(ContextPtr local_context) assert(TSA_SUPPRESS_WARNING_FOR_READ(tables).empty()); if (local_context->getSettingsRef().force_remove_data_recursively_on_drop) { - (void)fs::remove_all(local_context->getPath() + getDataPath()); + (void)fs::remove_all(std::filesystem::path(getContext()->getPath()) / data_path); (void)fs::remove_all(getMetadataPath()); } else { try { - (void)fs::remove(local_context->getPath() + getDataPath()); + (void)fs::remove(std::filesystem::path(getContext()->getPath()) / data_path); (void)fs::remove(getMetadataPath()); } catch (const fs::filesystem_error & e) @@ -613,7 +613,7 @@ time_t DatabaseOnDisk::getObjectMetadataModificationTime(const String & object_n } } -void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const IteratingFunction & process_metadata_file) const +void DatabaseOnDisk::iterateMetadataFiles(const IteratingFunction & process_metadata_file) const { auto process_tmp_drop_metadata_file = [&](const String & file_name) { @@ -621,7 +621,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat static const char * tmp_drop_ext = ".sql.tmp_drop"; const std::string object_name = file_name.substr(0, file_name.size() - strlen(tmp_drop_ext)); - if (fs::exists(local_context->getPath() + getDataPath() + '/' + object_name)) + if (fs::exists(std::filesystem::path(getContext()->getPath()) / data_path / object_name)) { fs::rename(getMetadataPath() + file_name, getMetadataPath() + object_name + ".sql"); LOG_WARNING(log, "Object {} was not dropped previously and will be restored", backQuote(object_name)); @@ -638,7 +638,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat std::vector> metadata_files; fs::directory_iterator dir_end; - for (fs::directory_iterator dir_it(getMetadataPath()); dir_it != dir_end; ++dir_it) + for (fs::directory_iterator dir_it(metadata_path); dir_it != dir_end; ++dir_it) { String file_name = dir_it->path().filename(); /// For '.svn', '.gitignore' directory and similar. diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h index 12656068643..ffc95a7c128 100644 --- a/src/Databases/DatabaseOnDisk.h +++ b/src/Databases/DatabaseOnDisk.h @@ -64,7 +64,7 @@ public: time_t getObjectMetadataModificationTime(const String & object_name) const override; String getDataPath() const override { return data_path; } - String getTableDataPath(const String & table_name) const override { return data_path + escapeForFileName(table_name) + "/"; } + String getTableDataPath(const String & table_name) const override { return std::filesystem::path(data_path) / escapeForFileName(table_name) / ""; } String getTableDataPath(const ASTCreateQuery & query) const override { return getTableDataPath(query.getTable()); } String getMetadataPath() const override { return metadata_path; } @@ -83,7 +83,7 @@ protected: using IteratingFunction = std::function; - void iterateMetadataFiles(ContextPtr context, const IteratingFunction & process_metadata_file) const; + void iterateMetadataFiles(const IteratingFunction & process_metadata_file) const; ASTPtr getCreateTableQueryImpl( const String & table_name, diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 8808261654f..dd8a3f42ea8 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -55,7 +55,7 @@ static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768; static constexpr const char * const CONVERT_TO_REPLICATED_FLAG_NAME = "convert_to_replicated"; DatabaseOrdinary::DatabaseOrdinary(const String & name_, const String & metadata_path_, ContextPtr context_) - : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseOrdinary (" + name_ + ")", context_) + : DatabaseOrdinary(name_, metadata_path_, std::filesystem::path("data") / escapeForFileName(name_) / "", "DatabaseOrdinary (" + name_ + ")", context_) { } @@ -265,7 +265,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables } }; - iterateMetadataFiles(local_context, process_metadata); + iterateMetadataFiles(process_metadata); size_t objects_in_database = metadata.parsed_tables.size() - prev_tables_count; size_t dictionaries_in_database = metadata.total_dictionaries - prev_total_dictionaries; From 07fa798ffa53216fd37dac51bceb327665fc8dda Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 16 Aug 2024 23:31:41 +0000 Subject: [PATCH 170/363] add total in system.one, test --- src/Storages/System/StorageSystemOne.cpp | 5 ++++- .../System/StorageSystemViewRefreshes.cpp | 2 +- ...3221_refreshable_matview_progress.reference | 2 ++ .../03221_refreshable_matview_progress.sql | 18 ++++++++++++++++++ 4 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03221_refreshable_matview_progress.reference create mode 100644 tests/queries/0_stateless/03221_refreshable_matview_progress.sql diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index 936d55e61a0..70377715dc3 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -41,7 +41,10 @@ Pipe StorageSystemOne::read( auto column = DataTypeUInt8().createColumnConst(1, 0u)->convertToFullColumnIfConst(); Chunk chunk({ std::move(column) }, 1); - return Pipe(std::make_shared(std::move(header), std::move(chunk))); + auto source = std::make_shared(std::move(header), std::move(chunk)); + source->addTotalRowsApprox(1); + + return Pipe(source); } diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index 3941c4c39c2..30539ed6b6a 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -86,7 +86,7 @@ void StorageSystemViewRefreshes::fillData( res_columns[i++]->insert(refresh.exception_message); res_columns[i++]->insert(refresh.refresh_count); - res_columns[i++]->insert(std::min(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read, 1.0)); + res_columns[i++]->insert(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read); res_columns[i++]->insert(refresh.progress.elapsed_ns / 1e9); res_columns[i++]->insert(refresh.progress.read_rows); res_columns[i++]->insert(refresh.progress.read_bytes); diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.reference b/tests/queries/0_stateless/03221_refreshable_matview_progress.reference new file mode 100644 index 00000000000..5ed392e61c7 --- /dev/null +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.reference @@ -0,0 +1,2 @@ +0 +4 4 1 diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql new file mode 100644 index 00000000000..4794359fd2b --- /dev/null +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql @@ -0,0 +1,18 @@ +set allow_experimental_refreshable_materialized_view=1; + +CREATE MATERIALIZED VIEW 03221_rmv +REFRESH AFTER 1 SECOND +( +x UInt64 +) +ENGINE = Memory +AS SELECT number AS x +FROM numbers(3) +UNION ALL +SELECT rand64() AS x; + +SELECT sleep(2); + +SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE view = '03221_rmv'; + +DROP TABLE 03221_rmv; From 142d7b15828c9eeef145fd56d56b33d1004fb68c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 01:35:49 +0200 Subject: [PATCH 171/363] Miscellaneous changes in BaseDaemon --- src/Daemon/BaseDaemon.cpp | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index e7ae8ea5a1d..f75699881bd 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -16,39 +16,29 @@ #include #if defined(OS_LINUX) - #include +#include #endif #include #include #include - #include #include #include -#include #include -#include #include #include #include #include #include - #include #include #include -#include #include -#include #include -#include #include -#include #include -#include -#include #include #include #include @@ -459,13 +449,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() signal_listener_thread.start(*signal_listener); #if defined(__ELF__) && !defined(OS_FREEBSD) - String build_id_hex = SymbolIndex::instance().getBuildIDHex(); - if (build_id_hex.empty()) - build_id = ""; - else - build_id = build_id_hex; -#else - build_id = ""; + build_id = SymbolIndex::instance().getBuildIDHex(); #endif git_hash = GIT_HASH; From 83608cb7bfcbf0801994cc916ed36b91c7113307 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 01:40:07 +0200 Subject: [PATCH 172/363] Miscellaneous changes from #66999 (2) --- base/base/CMakeLists.txt | 3 +++ {src/Daemon => base/base}/GitHash.cpp.in | 0 src/Daemon/CMakeLists.txt | 3 --- 3 files changed, 3 insertions(+), 3 deletions(-) rename {src/Daemon => base/base}/GitHash.cpp.in (100%) diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 247028b96e0..3d236f52c36 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -8,6 +8,8 @@ endif () # when instantiated from JSON.cpp. Try again when libcxx(abi) and Clang are upgraded to 16. set (CMAKE_CXX_STANDARD 20) +configure_file(GitHash.cpp.in GitHash.generated.cpp) + set (SRCS argsToConfig.cpp cgroupsv2.cpp @@ -33,6 +35,7 @@ set (SRCS safeExit.cpp throwError.cpp Numa.cpp + GitHash.generated.cpp ) add_library (common ${SRCS}) diff --git a/src/Daemon/GitHash.cpp.in b/base/base/GitHash.cpp.in similarity index 100% rename from src/Daemon/GitHash.cpp.in rename to base/base/GitHash.cpp.in diff --git a/src/Daemon/CMakeLists.txt b/src/Daemon/CMakeLists.txt index 35ea2122dbb..2068af2200d 100644 --- a/src/Daemon/CMakeLists.txt +++ b/src/Daemon/CMakeLists.txt @@ -1,10 +1,7 @@ -configure_file(GitHash.cpp.in GitHash.generated.cpp) - add_library (daemon BaseDaemon.cpp GraphiteWriter.cpp SentryWriter.cpp - GitHash.generated.cpp ) target_link_libraries (daemon PUBLIC loggers common PRIVATE clickhouse_parsers clickhouse_common_io clickhouse_common_config) From 4b68ba23c0372331401cd327ca5849ba9d1bce8d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 03:43:53 +0200 Subject: [PATCH 173/363] Pretty print tuples in CREATE TABLE statements --- programs/format/Format.cpp | 6 +++++- src/Client/ClientBase.cpp | 6 +++++- src/Core/ExternalTable.cpp | 4 ++-- src/Core/Settings.h | 2 +- src/DataTypes/IDataType.cpp | 1 - src/Databases/DatabaseOnDisk.cpp | 2 +- src/Functions/formatQuery.cpp | 8 +++++++- .../InterpreterShowCreateQuery.cpp | 5 ++++- .../formatWithPossiblyHidingSecrets.h | 3 ++- src/Parsers/ASTColumnDeclaration.cpp | 8 ++------ src/Parsers/ASTDataType.cpp | 19 ++++++++++++++----- src/Parsers/ASTExpressionList.cpp | 5 ++--- src/Parsers/ASTNameTypePair.cpp | 6 +----- src/Parsers/IAST.cpp | 3 ++- src/Parsers/IAST.h | 14 +++++++++----- 15 files changed, 57 insertions(+), 35 deletions(-) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index a434c9171e9..4af77533c53 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -264,7 +264,11 @@ int mainEntryClickHouseFormat(int argc, char ** argv) if (!backslash) { WriteBufferFromOwnString str_buf; - formatAST(*res, str_buf, hilite, oneline || approx_query_length < max_line_length); + oneline = oneline || approx_query_length < max_line_length; + IAST::FormatSettings settings(str_buf, oneline, hilite); + settings.show_secrets = true; + settings.print_pretty_type_names = !oneline; + res->format(settings); if (insert_query_payload) { diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 187ef079eda..74357d33f1c 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -331,7 +331,11 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Setting { output_stream << std::endl; WriteBufferFromOStream res_buf(output_stream, 4096); - formatAST(*res, res_buf); + IAST::FormatSettings format_settings(res_buf, /* one_line */ false); + format_settings.hilite = true; + format_settings.show_secrets = true; + format_settings.print_pretty_type_names = true; + res->format(format_settings); res_buf.finalize(); output_stream << std::endl << std::endl; } diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index c2bcf6ec651..4ff0d7092d8 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -85,7 +85,7 @@ void BaseExternalTable::parseStructureFromStructureField(const std::string & arg /// We use `formatWithPossiblyHidingSensitiveData` instead of `getColumnNameWithoutAlias` because `column->type` is an ASTFunction. /// `getColumnNameWithoutAlias` will return name of the function with `(arguments)` even if arguments is empty. if (column) - structure.emplace_back(column->name, column->type->formatWithPossiblyHidingSensitiveData(0, true, true)); + structure.emplace_back(column->name, column->type->formatWithPossiblyHidingSensitiveData(0, true, true, false)); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: expected column definition, got {}", child->formatForErrorMessage()); } @@ -102,7 +102,7 @@ void BaseExternalTable::parseStructureFromTypesField(const std::string & argumen throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: {}", error); for (size_t i = 0; i < type_list_raw->children.size(); ++i) - structure.emplace_back("_" + toString(i + 1), type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(0, true, true)); + structure.emplace_back("_" + toString(i + 1), type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(0, true, true, false)); } void BaseExternalTable::initSampleBlock() diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0d84ad9022a..1a71494c8cd 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -882,7 +882,7 @@ class IColumn; M(Bool, use_json_alias_for_old_object_type, false, "When enabled, JSON type alias will create old experimental Object type instead of a new JSON type", 0) \ M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \ M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \ - M(Bool, print_pretty_type_names, true, "Print pretty type names in DESCRIBE query and toTypeName() function", 0) \ + M(Bool, print_pretty_type_names, true, "Print pretty type names in the DESCRIBE query and `toTypeName` function", 0) \ M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \ M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0) \ M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, "Allows to set a default value for SQL SECURITY option when creating a normal view.", 0) \ diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 7fd8a85aeca..49e5b2d022e 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -8,7 +8,6 @@ #include #include -#include #include #include diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 734f354d9a5..4e1ddd8cc77 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -504,7 +504,7 @@ void DatabaseOnDisk::renameTable( } -/// It returns create table statement (even if table is detached) +/// It returns the create table statement (even if table is detached) ASTPtr DatabaseOnDisk::getCreateTableQueryImpl(const String & table_name, ContextPtr, bool throw_on_error) const { ASTPtr ast; diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index 9591ea95254..be633bdfe37 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -43,6 +43,7 @@ public: max_query_size = settings.max_query_size; max_parser_depth = settings.max_parser_depth; max_parser_backtracks = settings.max_parser_backtracks; + print_pretty_type_names = settings.print_pretty_type_names; } String getName() const override { return name; } @@ -138,7 +139,11 @@ private: } } - formatAST(*ast, buf, /*hilite*/ false, /*single_line*/ output_formatting == OutputFormatting::SingleLine); + IAST::FormatSettings settings(buf, output_formatting == OutputFormatting::SingleLine, /*hilite*/ false); + settings.show_secrets = true; + settings.print_pretty_type_names = print_pretty_type_names; + ast->format(settings); + auto formatted = buf.stringView(); const size_t res_data_new_size = res_data_size + formatted.size() + 1; @@ -165,6 +170,7 @@ private: size_t max_query_size; size_t max_parser_depth; size_t max_parser_backtracks; + bool print_pretty_type_names; }; } diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index e5549b2e539..7af6b4948f8 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -97,7 +97,10 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl() } MutableColumnPtr column = ColumnString::create(); - column->insert(format({.ctx = getContext(), .query = *create_query, .one_line = false})); + column->insert(format({ + .ctx = getContext(), + .query = *create_query, + .one_line = false})); return QueryPipeline(std::make_shared(Block{{ std::move(column), diff --git a/src/Interpreters/formatWithPossiblyHidingSecrets.h b/src/Interpreters/formatWithPossiblyHidingSecrets.h index ea8c295b169..14e84f1d1a4 100644 --- a/src/Interpreters/formatWithPossiblyHidingSecrets.h +++ b/src/Interpreters/formatWithPossiblyHidingSecrets.h @@ -25,7 +25,8 @@ inline String format(const SecretHidingFormatSettings & settings) && settings.ctx->getSettingsRef().format_display_secrets_in_show_and_select && settings.ctx->getAccess()->isGranted(AccessType::displaySecretsInShowAndSelect); - return settings.query.formatWithPossiblyHidingSensitiveData(settings.max_length, settings.one_line, show_secrets); + return settings.query.formatWithPossiblyHidingSensitiveData( + settings.max_length, settings.one_line, show_secrets, settings.ctx->getSettingsRef().print_pretty_type_names); } } diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index c96499095d5..23d653012f8 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -66,17 +66,13 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & format_settings, Fo { frame.need_parens = false; - /// We have to always backquote column names to avoid ambiguouty with INDEX and other declarations in CREATE query. + /// We have to always backquote column names to avoid ambiguity with INDEX and other declarations in CREATE query. format_settings.ostr << backQuote(name); if (type) { format_settings.ostr << ' '; - - FormatStateStacked type_frame = frame; - type_frame.indent = 0; - - type->formatImpl(format_settings, state, type_frame); + type->formatImpl(format_settings, state, frame); } if (null_modifier) diff --git a/src/Parsers/ASTDataType.cpp b/src/Parsers/ASTDataType.cpp index 3c17ae8c380..21f56e5f7a2 100644 --- a/src/Parsers/ASTDataType.cpp +++ b/src/Parsers/ASTDataType.cpp @@ -40,12 +40,21 @@ void ASTDataType::formatImpl(const FormatSettings & settings, FormatState & stat { settings.ostr << '(' << (settings.hilite ? hilite_none : ""); - for (size_t i = 0, size = arguments->children.size(); i < size; ++i) + if (!settings.one_line && settings.print_pretty_type_names && name == "Tuple") { - if (i != 0) - settings.ostr << ", "; - - arguments->children[i]->formatImpl(settings, state, frame); + ++frame.indent; + std::string indent_str = settings.one_line ? "" : "\n" + std::string(4 * frame.indent, ' '); + for (size_t i = 0, size = arguments->children.size(); i < size; ++i) + { + if (i != 0) + settings.ostr << ','; + settings.ostr << indent_str; + arguments->children[i]->formatImpl(settings, state, frame); + } + } + else + { + arguments->formatImpl(settings, state, frame); } settings.ostr << (settings.hilite ? hilite_function : "") << ')'; diff --git a/src/Parsers/ASTExpressionList.cpp b/src/Parsers/ASTExpressionList.cpp index 61ac482af82..f345b0c6a6f 100644 --- a/src/Parsers/ASTExpressionList.cpp +++ b/src/Parsers/ASTExpressionList.cpp @@ -42,7 +42,8 @@ void ASTExpressionList::formatImpl(const FormatSettings & settings, FormatState void ASTExpressionList::formatImplMultiline(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - std::string indent_str = "\n" + std::string(4 * (frame.indent + 1), ' '); + ++frame.indent; + std::string indent_str = "\n" + std::string(4 * frame.indent, ' '); if (frame.expression_list_prepend_whitespace) { @@ -50,8 +51,6 @@ void ASTExpressionList::formatImplMultiline(const FormatSettings & settings, For settings.ostr << ' '; } - ++frame.indent; - for (size_t i = 0, size = children.size(); i < size; ++i) { if (i && separator) diff --git a/src/Parsers/ASTNameTypePair.cpp b/src/Parsers/ASTNameTypePair.cpp index e4066081a9b..1515700365f 100644 --- a/src/Parsers/ASTNameTypePair.cpp +++ b/src/Parsers/ASTNameTypePair.cpp @@ -23,12 +23,8 @@ ASTPtr ASTNameTypePair::clone() const void ASTNameTypePair::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - - settings.ostr << indent_str << backQuoteIfNeed(name) << ' '; + settings.ostr << backQuoteIfNeed(name) << ' '; type->formatImpl(settings, state, frame); } } - - diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 37d7f458d61..5bd2c92c60a 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -165,11 +165,12 @@ size_t IAST::checkDepthImpl(size_t max_depth) const return res; } -String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets) const +String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names) const { WriteBufferFromOwnString buf; FormatSettings settings(buf, one_line); settings.show_secrets = show_secrets; + settings.print_pretty_type_names = print_pretty_type_names; format(settings); return wipeSensitiveDataAndCutToLength(buf.str(), max_length); } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index e2cf7579667..2293d50b0ec 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -201,6 +201,7 @@ public: bool show_secrets; /// Show secret parts of the AST (e.g. passwords, encryption keys). char nl_or_ws; /// Newline or whitespace. LiteralEscapingStyle literal_escaping_style; + bool print_pretty_type_names; explicit FormatSettings( WriteBuffer & ostr_, @@ -209,7 +210,8 @@ public: bool always_quote_identifiers_ = false, IdentifierQuotingStyle identifier_quoting_style_ = IdentifierQuotingStyle::Backticks, bool show_secrets_ = true, - LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular) + LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular, + bool print_pretty_type_names_ = false) : ostr(ostr_) , one_line(one_line_) , hilite(hilite_) @@ -218,6 +220,7 @@ public: , show_secrets(show_secrets_) , nl_or_ws(one_line ? ' ' : '\n') , literal_escaping_style(literal_escaping_style_) + , print_pretty_type_names(print_pretty_type_names_) { } @@ -230,6 +233,7 @@ public: , show_secrets(other.show_secrets) , nl_or_ws(other.nl_or_ws) , literal_escaping_style(other.literal_escaping_style) + , print_pretty_type_names(other.print_pretty_type_names) { } @@ -251,7 +255,7 @@ public: /// The state that is copied when each node is formatted. For example, nesting level. struct FormatStateStacked { - UInt8 indent = 0; + UInt16 indent = 0; bool need_parens = false; bool expression_list_always_start_on_new_line = false; /// Line feed and indent before expression list even if it's of single element. bool expression_list_prepend_whitespace = false; /// Prepend whitespace (if it is required) @@ -274,7 +278,7 @@ public: /// Secrets are displayed regarding show_secrets, then SensitiveDataMasker is applied. /// You can use Interpreters/formatWithPossiblyHidingSecrets.h for convenience. - String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets) const; + String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names) const; /** formatForLogging and formatForErrorMessage always hide secrets. This inconsistent * behaviour is due to the fact such functions are called from Client which knows nothing about @@ -283,12 +287,12 @@ public: */ String formatForLogging(size_t max_length = 0) const { - return formatWithPossiblyHidingSensitiveData(max_length, true, false); + return formatWithPossiblyHidingSensitiveData(max_length, true, false, false); } String formatForErrorMessage() const { - return formatWithPossiblyHidingSensitiveData(0, true, false); + return formatWithPossiblyHidingSensitiveData(0, true, false, false); } virtual bool hasSecretParts() const { return childrenHaveSecretParts(); } From 566e043c2c84a979ca1c05996f6e3a4303708bff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 03:44:36 +0200 Subject: [PATCH 174/363] Add a test --- ...print_pretty_tuples_create_query.reference | 56 +++++++++++++++++++ .../03227_print_pretty_tuples_create_query.sh | 35 ++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference create mode 100755 tests/queries/0_stateless/03227_print_pretty_tuples_create_query.sh diff --git a/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference new file mode 100644 index 00000000000..c65dc32a224 --- /dev/null +++ b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference @@ -0,0 +1,56 @@ + +SHOW CREATE TABLE: +CREATE TABLE test.test +( + `x` Tuple( + a String, + b Array(Tuple( + c Tuple( + e String), + d String))), + `y` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192 +CREATE TABLE test.test +( + `x` Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), + `y` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192 + +clickhouse-format: +CREATE TABLE test +( + `x` Tuple( + a String, + b Array(Tuple( + c Tuple( + e String), + d String))), + `y` String +) +ORDER BY tuple() +CREATE TABLE test (`x` Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), `y` String) ORDER BY tuple() + +formatQuery: +CREATE TABLE test +( + `x` Tuple( + a String, + b Array(Tuple( + c Tuple( + e String), + d String))), + `y` String +) +ORDER BY tuple() +CREATE TABLE test +( + `x` Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), + `y` String +) +ORDER BY tuple() diff --git a/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.sh b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.sh new file mode 100755 index 00000000000..e5614f9f228 --- /dev/null +++ b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-asan, no-msan, no-tsan +# ^ requires S3 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo +echo "SHOW CREATE TABLE:" +${CLICKHOUSE_CLIENT} --output-format Raw --query " + DROP TABLE IF EXISTS test; + CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY (); + SET print_pretty_type_names = 1; + SHOW CREATE TABLE test; + SET print_pretty_type_names = 0; + SHOW CREATE TABLE test; + DROP TABLE test; +" + +echo +echo "clickhouse-format:" +${CLICKHOUSE_FORMAT} --query " + CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY () +" +${CLICKHOUSE_FORMAT} --oneline --query " + CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY () +" + +echo +echo "formatQuery:" +${CLICKHOUSE_CLIENT} --output-format Raw --query " + SELECT formatQuery('CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY ()') SETTINGS print_pretty_type_names = 1; + SELECT formatQuery('CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY ()') SETTINGS print_pretty_type_names = 0; +" From 3065b386a4e033a802bbc4855d5113e814648848 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 03:49:45 +0200 Subject: [PATCH 175/363] Update documentation --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 1a71494c8cd..d8837d26e54 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -882,7 +882,7 @@ class IColumn; M(Bool, use_json_alias_for_old_object_type, false, "When enabled, JSON type alias will create old experimental Object type instead of a new JSON type", 0) \ M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \ M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \ - M(Bool, print_pretty_type_names, true, "Print pretty type names in the DESCRIBE query and `toTypeName` function", 0) \ + M(Bool, print_pretty_type_names, true, "Print pretty type names in the DESCRIBE query and `toTypeName` function, as well as in the `SHOW CREATE TABLE` query and the `formatQuery` function.", 0) \ M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \ M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0) \ M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, "Allows to set a default value for SQL SECURITY option when creating a normal view.", 0) \ From 02ec4e2f92ac769f92aebdb714d0d8da1a924984 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 04:00:31 +0200 Subject: [PATCH 176/363] Fix build --- src/Parsers/ExpressionElementParsers.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 726326bfc85..61b5723072e 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -943,7 +943,6 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (ParserToken(DoubleColon).ignore(pos, expected) && ParserDataType().parse(pos, type_ast, expected)) { - String s; size_t data_size = data_end - data_begin; if (string_literal) { From 7a5df67b3b3b5bd7a8481562e0293150427fef90 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 05:08:58 +0200 Subject: [PATCH 177/363] Fix style --- src/Interpreters/InterpreterShowCreateQuery.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index 7af6b4948f8..3de6b755609 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -97,10 +97,12 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl() } MutableColumnPtr column = ColumnString::create(); - column->insert(format({ + column->insert(format( + { .ctx = getContext(), .query = *create_query, - .one_line = false})); + .one_line = false + })); return QueryPipeline(std::make_shared(Block{{ std::move(column), From 330086c621c860524b68ce7598d12c8db958101d Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 17 Aug 2024 03:46:33 +0000 Subject: [PATCH 178/363] update 02136_scalar_progress results according to fixed bug --- tests/queries/0_stateless/02136_scalar_progress.reference | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02136_scalar_progress.reference b/tests/queries/0_stateless/02136_scalar_progress.reference index 5378c52de89..b8957f78e6d 100644 --- a/tests/queries/0_stateless/02136_scalar_progress.reference +++ b/tests/queries/0_stateless/02136_scalar_progress.reference @@ -1,6 +1,7 @@ < X-ClickHouse-Progress: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"} < X-ClickHouse-Progress: {"read_rows":"65505","read_bytes":"524040","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"} < X-ClickHouse-Progress: {"read_rows":"100000","read_bytes":"800000","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"} -< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"} -< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"1","result_bytes":"272"} -< X-ClickHouse-Summary: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"1","result_bytes":"272"} +< X-ClickHouse-Progress: {"read_rows":"100000","read_bytes":"800000","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"1","result_bytes":"272"} +< X-ClickHouse-Summary: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"1","result_bytes":"272"} From 00891a2dd8f69dde0b1fe364b6891fd3629d5dbe Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 17 Aug 2024 13:57:24 +0000 Subject: [PATCH 179/363] fix test --- .../queries/0_stateless/03221_refreshable_matview_progress.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql index 4794359fd2b..30228277bb5 100644 --- a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql @@ -1,3 +1,5 @@ +-- Tags: no-ordinary-database + set allow_experimental_refreshable_materialized_view=1; CREATE MATERIALIZED VIEW 03221_rmv From 9dee9ecfb4adee4cff099f13afe61bdc2c38170e Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sat, 17 Aug 2024 10:33:35 -0600 Subject: [PATCH 180/363] Fix incorrect default value for postgresql_connection_pool_auto_close_connection in docs --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index de601fe02dc..5bf1fe197ae 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1381,7 +1381,7 @@ Default value: `2`. Close connection before returning connection to the pool. -Default value: true. +Default value: false. ## odbc_bridge_connection_pool_size {#odbc-bridge-connection-pool-size} From a42b12725ab37df74a96e85f7c644c90bd6e30f6 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 16 Aug 2024 17:39:09 +0200 Subject: [PATCH 181/363] CI: Native build for package_aarch64 --- tests/ci/ci_config.py | 3 ++- tests/ci/ci_definitions.py | 1 + tests/ci/test_ci_config.py | 14 ++++++++++---- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 7a19eb6f827..173c6c9c931 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -94,7 +94,8 @@ class CI: package_type="deb", static_binary_name="aarch64", additional_pkgs=True, - ) + ), + runner_type=Runners.BUILDER_ARM, ), BuildNames.PACKAGE_ASAN: CommonJobConfigs.BUILD.with_properties( build_config=BuildConfig( diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 48847b0d7a6..1bed9db06f2 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -57,6 +57,7 @@ class Runners(metaclass=WithIter): """ BUILDER = "builder" + BUILDER_ARM = "builder-aarch64" STYLE_CHECKER = "style-checker" STYLE_CHECKER_ARM = "style-checker-aarch64" FUNC_TESTER = "func-tester" diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index 525b3bf367b..c3e55aeac06 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -35,10 +35,16 @@ class TestCIConfig(unittest.TestCase): f"Job [{job}] must have style-checker(-aarch64) runner", ) elif "binary_" in job.lower() or "package_" in job.lower(): - self.assertTrue( - CI.JOB_CONFIGS[job].runner_type == CI.Runners.BUILDER, - f"Job [{job}] must have [{CI.Runners.BUILDER}] runner", - ) + if job.lower() == CI.BuildNames.PACKAGE_AARCH64: + self.assertTrue( + CI.JOB_CONFIGS[job].runner_type in (CI.Runners.BUILDER_ARM,), + f"Job [{job}] must have [{CI.Runners.BUILDER_ARM}] runner", + ) + else: + self.assertTrue( + CI.JOB_CONFIGS[job].runner_type in (CI.Runners.BUILDER,), + f"Job [{job}] must have [{CI.Runners.BUILDER}] runner", + ) elif "aarch64" in job.lower(): self.assertTrue( "aarch" in CI.JOB_CONFIGS[job].runner_type, From 7432400fd0c07b7c967f47b1536706b8f791fcb1 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 16 Aug 2024 21:06:58 +0200 Subject: [PATCH 182/363] revert hacks made to prevent OOM in aarch64 --- cmake/limit_jobs.cmake | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index 17d8dd42a2c..8e48fc9b9d8 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -42,19 +42,9 @@ endif () # But use 2 parallel jobs, since: # - this is what llvm does # - and I've verfied that lld-11 does not use all available CPU time (in peak) while linking one binary -if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO) - if (ARCH_AARCH64) - # aarch64 builds start to often fail with OOMs (reason not yet clear), for now let's limit the concurrency - message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 1.") - set (PARALLEL_LINK_JOBS 1) - if (LINKER_NAME MATCHES "lld") - math(EXPR LTO_JOBS ${NUMBER_OF_LOGICAL_CORES}/4) - set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -Wl,--thinlto-jobs=${LTO_JOBS}") - endif() - elseif (PARALLEL_LINK_JOBS GREATER 2) - message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") - set (PARALLEL_LINK_JOBS 2) - endif () +if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLEL_LINK_JOBS GREATER 2) + message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") + set (PARALLEL_LINK_JOBS 2) endif() message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).") From cb8d9a05643d3aac5f410c4eac53124224c63bc8 Mon Sep 17 00:00:00 2001 From: Blargian Date: Sat, 17 Aug 2024 20:13:35 +0200 Subject: [PATCH 183/363] fix typo --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index cd6fd9ab839..a03394be226 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -4952,7 +4952,7 @@ toIntervalMonth(n) **Arguments** -- `n` — Number of m. Positive integer number. [Int*](../data-types/int-uint.md). +- `n` — Number of months. Positive integer number. [Int*](../data-types/int-uint.md). **Returned values** From ae389d14ee65ff5fea3543868b6b161f9fcb806e Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 17 Aug 2024 20:42:00 +0200 Subject: [PATCH 184/363] Fix stylecheck --- src/Formats/SchemaInferenceUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 54352b88578..e8eab3b4453 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -333,7 +333,7 @@ namespace type = variant_type; type_indexes = {TypeIndex::Variant}; } - + /// If we have only date/datetimes types (Date/DateTime/DateTime64), convert all of them to the common type, /// otherwise, convert all Date, DateTime and DateTime64 to String. void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes) From c68b597eee752d5921d4469af01104a27681296a Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 17 Aug 2024 20:03:18 +0000 Subject: [PATCH 185/363] fix test --- .../queries/0_stateless/03221_refreshable_matview_progress.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql index 30228277bb5..1be276c485c 100644 --- a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql @@ -15,6 +15,6 @@ SELECT rand64() AS x; SELECT sleep(2); -SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE view = '03221_rmv'; +SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE database = currentDatabase() and view = '03221_rmv'; DROP TABLE 03221_rmv; From a36688b07387822cae18eca1fbc798a517119e02 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 03:02:20 +0200 Subject: [PATCH 186/363] Fix error --- src/Parsers/ASTDataType.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Parsers/ASTDataType.cpp b/src/Parsers/ASTDataType.cpp index 21f56e5f7a2..4211347fb74 100644 --- a/src/Parsers/ASTDataType.cpp +++ b/src/Parsers/ASTDataType.cpp @@ -54,6 +54,7 @@ void ASTDataType::formatImpl(const FormatSettings & settings, FormatState & stat } else { + frame.expression_list_prepend_whitespace = false; arguments->formatImpl(settings, state, frame); } From 3c021e02b69f9237d9765d6295fd843ad2503398 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 03:02:26 +0200 Subject: [PATCH 187/363] Fix tests --- .../0_stateless/01458_named_tuple_millin.reference | 8 ++++++-- .../01504_compression_multiple_streams.reference | 8 ++++---- ...01548_create_table_compound_column_format.reference | 3 ++- .../01881_aggregate_functions_versioning.reference | 2 +- .../02117_show_create_table_system.reference | 10 ++++++++-- .../02286_tuple_numeric_identifier.reference | 2 +- .../02907_backup_restore_flatten_nested.reference | 4 ++-- 7 files changed, 24 insertions(+), 13 deletions(-) diff --git a/tests/queries/0_stateless/01458_named_tuple_millin.reference b/tests/queries/0_stateless/01458_named_tuple_millin.reference index 954dfe36563..86561570985 100644 --- a/tests/queries/0_stateless/01458_named_tuple_millin.reference +++ b/tests/queries/0_stateless/01458_named_tuple_millin.reference @@ -1,12 +1,16 @@ CREATE TABLE default.tuple ( - `j` Tuple(a Int8, b String) + `j` Tuple( + a Int8, + b String) ) ENGINE = Memory j Tuple(\n a Int8,\n b String) CREATE TABLE default.tuple ( - `j` Tuple(a Int8, b String) + `j` Tuple( + a Int8, + b String) ) ENGINE = Memory j Tuple(\n a Int8,\n b String) diff --git a/tests/queries/0_stateless/01504_compression_multiple_streams.reference b/tests/queries/0_stateless/01504_compression_multiple_streams.reference index 4d3aba66526..14cdce72044 100644 --- a/tests/queries/0_stateless/01504_compression_multiple_streams.reference +++ b/tests/queries/0_stateless/01504_compression_multiple_streams.reference @@ -1,20 +1,20 @@ 1 1 [[1]] (1,[1]) 1 1 [[1]] (1,[1]) -CREATE TABLE default.columns_with_multiple_streams\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(T64, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192 +CREATE TABLE default.columns_with_multiple_streams\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(\n UInt32,\n Array(UInt64)) CODEC(T64, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192 1 1 [[1]] (1,[1]) 2 2 [[2]] (2,[2]) -CREATE TABLE default.columns_with_multiple_streams\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192 +CREATE TABLE default.columns_with_multiple_streams\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(\n UInt32,\n Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192 1 1 [[1]] (1,[1]) 2 2 [[2]] (2,[2]) 3 3 [[3]] (3,[3]) 1 1 [[1]] (1,[1]) 1 1 [[1]] (1,[1]) -CREATE TABLE default.columns_with_multiple_streams_compact\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192 +CREATE TABLE default.columns_with_multiple_streams_compact\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(\n UInt32,\n Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192 1 1 [[1]] (1,[1]) 2 2 [[2]] (2,[2]) 1 1 [[1]] (1,[1]) 2 2 [[2]] (2,[2]) -CREATE TABLE default.columns_with_multiple_streams_compact\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192 +CREATE TABLE default.columns_with_multiple_streams_compact\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(\n UInt32,\n Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192 1 1 [[1]] (1,[1]) 2 2 [[2]] (2,[2]) 3 3 [[3]] (3,[3]) diff --git a/tests/queries/0_stateless/01548_create_table_compound_column_format.reference b/tests/queries/0_stateless/01548_create_table_compound_column_format.reference index 21e31e8f034..c23cc57548b 100644 --- a/tests/queries/0_stateless/01548_create_table_compound_column_format.reference +++ b/tests/queries/0_stateless/01548_create_table_compound_column_format.reference @@ -7,6 +7,7 @@ ENGINE = TinyLog CREATE TABLE test ( `a` Int64, - `b` Tuple(a Int64) + `b` Tuple( + a Int64) ) ENGINE = TinyLog diff --git a/tests/queries/0_stateless/01881_aggregate_functions_versioning.reference b/tests/queries/0_stateless/01881_aggregate_functions_versioning.reference index c30c4ca7e74..e15f312c2c8 100644 --- a/tests/queries/0_stateless/01881_aggregate_functions_versioning.reference +++ b/tests/queries/0_stateless/01881_aggregate_functions_versioning.reference @@ -1 +1 @@ -CREATE TABLE default.test_table\n(\n `col1` DateTime,\n `col2` Int64,\n `col3` AggregateFunction(1, sumMap, Tuple(Array(UInt8), Array(UInt8)))\n)\nENGINE = AggregatingMergeTree\nORDER BY (col1, col2)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.test_table\n(\n `col1` DateTime,\n `col2` Int64,\n `col3` AggregateFunction(1, sumMap, Tuple(\n Array(UInt8),\n Array(UInt8)))\n)\nENGINE = AggregatingMergeTree\nORDER BY (col1, col2)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 32e8b2f4312..638a46a142f 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -510,9 +510,15 @@ CREATE TABLE system.parts `rows_where_ttl_info.max` Array(DateTime), `projections` Array(String), `visible` UInt8, - `creation_tid` Tuple(UInt64, UInt64, UUID), + `creation_tid` Tuple( + UInt64, + UInt64, + UUID), `removal_tid_lock` UInt64, - `removal_tid` Tuple(UInt64, UInt64, UUID), + `removal_tid` Tuple( + UInt64, + UInt64, + UUID), `creation_csn` UInt64, `removal_csn` UInt64, `has_lightweight_delete` UInt8, diff --git a/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference b/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference index 21348493d1d..916cdaf83cd 100644 --- a/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference +++ b/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.t_tuple_numeric\n(\n `t` Tuple(`1` Tuple(`2` Int32, `3` Int32), `4` Int32)\n)\nENGINE = Memory +CREATE TABLE default.t_tuple_numeric\n(\n `t` Tuple(\n `1` Tuple(\n `2` Int32,\n `3` Int32),\n `4` Int32)\n)\nENGINE = Memory {"t":{"1":{"2":2,"3":3},"4":4}} 2 3 4 2 3 4 diff --git a/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference b/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference index aa8f22f590a..0db19f0591a 100644 --- a/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference +++ b/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference @@ -1,8 +1,8 @@ BACKUP_CREATED -CREATE TABLE default.test\n(\n `test` Array(Tuple(foo String, bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.test\n(\n `test` Array(Tuple(\n foo String,\n bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 BACKUP_CREATED CREATE TABLE default.test2\n(\n `test` Nested(foo String, bar Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 RESTORED -CREATE TABLE default.test\n(\n `test` Array(Tuple(foo String, bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.test\n(\n `test` Array(Tuple(\n foo String,\n bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 RESTORED CREATE TABLE default.test2\n(\n `test` Nested(foo String, bar Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 From 60dd7e962a19e92cef4e3ab40d2607b3f5a59e90 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 05:56:27 +0200 Subject: [PATCH 188/363] Fix tests --- programs/format/Format.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 4af77533c53..f07387bd395 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -264,10 +264,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv) if (!backslash) { WriteBufferFromOwnString str_buf; - oneline = oneline || approx_query_length < max_line_length; - IAST::FormatSettings settings(str_buf, oneline, hilite); + bool oneline_current_query = oneline || approx_query_length < max_line_length; + IAST::FormatSettings settings(str_buf, oneline_current_query, hilite); settings.show_secrets = true; - settings.print_pretty_type_names = !oneline; + settings.print_pretty_type_names = !oneline_current_query; res->format(settings); if (insert_query_payload) @@ -311,7 +311,11 @@ int mainEntryClickHouseFormat(int argc, char ** argv) else { WriteBufferFromOwnString str_buf; - formatAST(*res, str_buf, hilite, oneline); + bool oneline_current_query = oneline || approx_query_length < max_line_length; + IAST::FormatSettings settings(str_buf, oneline_current_query, hilite); + settings.show_secrets = true; + settings.print_pretty_type_names = !oneline_current_query; + res->format(settings); auto res_string = str_buf.str(); WriteBufferFromOStream res_cout(std::cout, 4096); From 1691e4c4977dbaf44bedf61cdf248e7a1997d407 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 06:14:21 +0200 Subject: [PATCH 189/363] Fix test --- tests/queries/0_stateless/01825_new_type_json_ghdata.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh index ee702300094..fbd7d897fb8 100755 --- a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh @@ -8,7 +8,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1 -cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} --max_block_size 8192 --max_insert_block_size 8192 --max_insert_threads 1 --min_insert_block_size_bytes 0 --min_insert_block_size_rows 0 -q "INSERT INTO ghdata FORMAT JSONAsObject" +cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} \ + --max_memory_usage 10G --query "INSERT INTO ghdata FORMAT JSONAsObject" ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM ghdata WHERE NOT ignore(*)" @@ -16,7 +17,7 @@ ${CLICKHOUSE_CLIENT} -q \ "SELECT data.repo.name, count() AS stars FROM ghdata \ WHERE data.type = 'WatchEvent' GROUP BY data.repo.name ORDER BY stars DESC, data.repo.name LIMIT 5" -${CLICKHOUSE_CLIENT} --allow_experimental_analyzer=1 -q \ +${CLICKHOUSE_CLIENT} --enable_analyzer=1 -q \ "SELECT data.payload.commits[].author.name AS name, count() AS c FROM ghdata \ ARRAY JOIN data.payload.commits[].author.name \ GROUP BY name ORDER BY c DESC, name LIMIT 5" From 7f2c61799dc984add3056f9c77c4e64476ad917b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 06:14:59 +0200 Subject: [PATCH 190/363] Fix test --- .../0_stateless/01825_new_type_json_ghdata_insert_select.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh index ef87034ff89..2afec5ba7fe 100755 --- a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh @@ -13,10 +13,10 @@ ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2 (data JSON) ENGINE = MergeTree OR ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_string (data String) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_from_string (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1 -cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2 FORMAT JSONAsObject" +cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO ghdata_2 FORMAT JSONAsObject" cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2_string FORMAT JSONAsString" -${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2_from_string SELECT data FROM ghdata_2_string" +${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO ghdata_2_from_string SELECT data FROM ghdata_2_string" ${CLICKHOUSE_CLIENT} -q "SELECT \ (SELECT mapSort(groupUniqArrayMap(JSONAllPathsWithTypes(data))), sum(cityHash64(toString(data))) FROM ghdata_2_from_string) = \ From d34d41d1e240f0df6256a827661174a30011f204 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 18 Aug 2024 04:34:38 +0000 Subject: [PATCH 191/363] fix test --- .../queries/0_stateless/03221_refreshable_matview_progress.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql index 1be276c485c..de8de41fd04 100644 --- a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql @@ -16,5 +16,3 @@ SELECT rand64() AS x; SELECT sleep(2); SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE database = currentDatabase() and view = '03221_rmv'; - -DROP TABLE 03221_rmv; From 03ab872f5c628613e0e187d4e1ad3ca9b9148bf6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 07:34:44 +0200 Subject: [PATCH 192/363] Fix error --- src/IO/S3/PocoHTTPClient.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 3b7ec4d1d9c..eb65460ce13 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -20,6 +20,7 @@ #include #include + namespace Aws::Http::Standard { class StandardHttpResponse; @@ -27,15 +28,17 @@ class StandardHttpResponse; namespace DB { - class Context; } + namespace DB::S3 { + class ClientFactory; class PocoHTTPClient; + struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration { std::function per_request_configuration; @@ -76,6 +79,7 @@ private: friend ClientFactory; }; + class PocoHTTPResponse : public Aws::Http::Standard::StandardHttpResponse { public: @@ -115,6 +119,7 @@ private: Aws::Utils::Stream::ResponseStream body_stream; }; + class PocoHTTPClient : public Aws::Http::HttpClient { public: @@ -170,10 +175,10 @@ protected: std::function error_report; ConnectionTimeouts timeouts; const RemoteHostFilter & remote_host_filter; - unsigned int s3_max_redirects; + unsigned int s3_max_redirects = 0; bool s3_use_adaptive_timeouts = true; bool enable_s3_requests_logging = false; - bool for_disk_s3; + bool for_disk_s3 = false; /// Limits get request per second rate for GET, SELECT and all other requests, excluding throttled by put throttler /// (i.e. throttles GetObject, HeadObject) From 4e91c663a62529d27ddcf6d364338f001e1706eb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 08:28:16 +0200 Subject: [PATCH 193/363] Fix error --- src/IO/S3/PocoHTTPClient.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index dc7dcdc6793..3e060e21c51 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -397,8 +397,11 @@ void PocoHTTPClient::makeRequestInternalImpl( try { - const auto proxy_configuration = per_request_configuration(); - for (unsigned int attempt = 0; attempt <= s3_max_redirects; ++attempt) + ProxyConfiguration proxy_configuration; + if (per_request_configuration) + proxy_configuration = per_request_configuration(); + + for (size_t attempt = 0; attempt <= s3_max_redirects; ++attempt) { Poco::URI target_uri(uri); @@ -516,7 +519,6 @@ void PocoHTTPClient::makeRequestInternalImpl( LOG_TEST(log, "Redirecting request to new location: {}", location); addMetric(request, S3MetricType::Redirects); - continue; } @@ -564,9 +566,9 @@ void PocoHTTPClient::makeRequestInternalImpl( } else { - if (status_code == 429 || status_code == 503) - { // API throttling + { + /// API throttling addMetric(request, S3MetricType::Throttling); } else if (status_code >= 300) From 7071942858e44053b92b8386e68251be7718e3b5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 09:05:45 +0200 Subject: [PATCH 194/363] Miscellanous changes from #66999 --- programs/keeper/Keeper.cpp | 4 +++- src/Daemon/BaseDaemon.cpp | 4 +--- src/Daemon/BaseDaemon.h | 1 - 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index a447a9e50f6..ced661d9772 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -66,6 +66,8 @@ /// A minimal file used when the keeper is run without installation INCBIN(keeper_resource_embedded_xml, SOURCE_DIR "/programs/keeper/keeper_embedded.xml"); +extern const char * GIT_HASH; + int mainEntryClickHouseKeeper(int argc, char ** argv) { DB::Keeper app; @@ -675,7 +677,7 @@ void Keeper::logRevision() const "Starting ClickHouse Keeper {} (revision: {}, git hash: {}, build id: {}), PID {}", VERSION_STRING, ClickHouseRevision::getVersionRevision(), - git_hash.empty() ? "" : git_hash, + GIT_HASH, build_id.empty() ? "" : build_id, getpid()); } diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index f75699881bd..c42bf7641d2 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -452,8 +452,6 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() build_id = SymbolIndex::instance().getBuildIDHex(); #endif - git_hash = GIT_HASH; - #if defined(OS_LINUX) std::string executable_path = getExecutablePath(); @@ -466,7 +464,7 @@ void BaseDaemon::logRevision() const { logger().information("Starting " + std::string{VERSION_FULL} + " (revision: " + std::to_string(ClickHouseRevision::getVersionRevision()) - + ", git hash: " + (git_hash.empty() ? "" : git_hash) + + ", git hash: " + std::string(GIT_HASH) + ", build id: " + (build_id.empty() ? "" : build_id) + ")" + ", PID " + std::to_string(getpid())); } diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h index b15aa74fcf3..a6efa94a567 100644 --- a/src/Daemon/BaseDaemon.h +++ b/src/Daemon/BaseDaemon.h @@ -165,7 +165,6 @@ protected: Poco::Util::AbstractConfiguration * last_configuration = nullptr; String build_id; - String git_hash; String stored_binary_hash; bool should_setup_watchdog = false; From 2a48aaad561f52539edbede94015c35e264bd344 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 09:12:49 +0200 Subject: [PATCH 195/363] Fix build --- src/Interpreters/executeQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index fe87eed5570..decc16a3704 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -786,7 +786,7 @@ static std::tuple executeQueryImpl( /// Verify that AST formatting is consistent: /// If you format AST, parse it back, and format it again, you get the same string. - String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true); + String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true, false); /// The query can become more verbose after formatting, so: size_t new_max_query_size = max_query_size > 0 ? (1000 + 2 * max_query_size) : 0; @@ -811,7 +811,7 @@ static std::tuple executeQueryImpl( chassert(ast2); - String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true); + String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true, false); if (formatted1 != formatted2) throw Exception(ErrorCodes::LOGICAL_ERROR, From 4f7e3e8374acd98496092e8f8a219af6755a2f70 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 09:38:00 +0200 Subject: [PATCH 196/363] Fix test 01017_uniqCombined_memory_usage --- .../0_stateless/01017_uniqCombined_memory_usage.sql | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql index c13a0859183..eca370d94af 100644 --- a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql +++ b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql @@ -7,7 +7,8 @@ -- sizeof(HLL) is (2^K * 6 / 8) -- hence max_memory_usage for 100 rows = (96<<10)*100 = 9830400 -SET use_uncompressed_cache = 0; +SET use_uncompressed_cache = 0; +SET memory_profiler_step = 1; -- HashTable for UInt32 (used until (1<<13) elements), hence 8192 elements SELECT 'UInt32'; @@ -31,14 +32,14 @@ SELECT 'K=16'; SELECT 'UInt32'; SET max_memory_usage = 2000000; SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(4096 * 100) GROUP BY k); -- { serverError MEMORY_LIMIT_EXCEEDED } -SET max_memory_usage = 4915200; +SET max_memory_usage = 5230000; SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(4096 * 100) GROUP BY k); -- HashTable for UInt64 (used until (1<<11) elements), hence 2048 elements SELECT 'UInt64'; SET max_memory_usage = 2000000; SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(2048 * 100) GROUP BY k); -- { serverError MEMORY_LIMIT_EXCEEDED } -SET max_memory_usage = 4915200; +SET max_memory_usage = 5900000; SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(2048 * 100) GROUP BY k); SELECT 'K=18'; From ec8554c85aeae5dcc8367ce09d093c5526ef1d47 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 09:41:29 +0200 Subject: [PATCH 197/363] Fix build --- src/Common/SignalHandlers.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/Common/SignalHandlers.cpp b/src/Common/SignalHandlers.cpp index c4358da2453..6ac6cbcae29 100644 --- a/src/Common/SignalHandlers.cpp +++ b/src/Common/SignalHandlers.cpp @@ -18,13 +18,17 @@ namespace DB { + namespace ErrorCodes { extern const int CANNOT_SET_SIGNAL_HANDLER; extern const int CANNOT_SEND_SIGNAL; } + } +extern const char * GIT_HASH; + using namespace DB; @@ -334,7 +338,7 @@ void SignalListener::onTerminate(std::string_view message, UInt32 thread_num) co size_t pos = message.find('\n'); LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) {}", - VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", thread_num, message.substr(0, pos)); + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH, thread_num, message.substr(0, pos)); /// Print trace from std::terminate exception line-by-line to make it easy for grep. while (pos != std::string_view::npos) @@ -368,7 +372,7 @@ try LOG_FATAL(log, "########## Short fault info ############"); LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {}", - VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH, thread_num, sig); std::string signal_description = "Unknown signal"; @@ -434,13 +438,13 @@ try if (query_id.empty()) { LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (no query) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH, thread_num, signal_description, sig); } else { LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH, thread_num, query_id, query, signal_description, sig); } From d6e170f77704833fa6655820d55090ba18b0b9fe Mon Sep 17 00:00:00 2001 From: Chang Chen Date: Sat, 17 Aug 2024 18:31:11 +0800 Subject: [PATCH 198/363] repeat field is also compound types, ignore it. --- src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index c6167e572df..7b5c29e321f 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -286,6 +286,9 @@ static std::vector getHyperrectangleForRowGroup(const parquet::FileMetaDa if (!s) continue; + if(s->descr()->schema_node()->is_repeated()) + continue; + auto path = c->path_in_schema()->ToDotVector(); if (path.size() != 1) continue; // compound types not supported From 858f8b502002661584e6153d39a23edc87b49dda Mon Sep 17 00:00:00 2001 From: Chang Chen Date: Sun, 18 Aug 2024 19:11:30 +0800 Subject: [PATCH 199/363] add test and update codes per commit --- .../Formats/Impl/ParquetBlockInputFormat.cpp | 2 +- .../02841_parquet_filter_pushdown_bug.reference | 1 + .../02841_parquet_filter_pushdown_bug.sh.sh | 8 ++++++++ .../0_stateless/data_parquet/68131.parquet | Bin 0 -> 289 bytes 4 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.reference create mode 100755 tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh create mode 100644 tests/queries/0_stateless/data_parquet/68131.parquet diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 7b5c29e321f..1f213fef731 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -286,7 +286,7 @@ static std::vector getHyperrectangleForRowGroup(const parquet::FileMetaDa if (!s) continue; - if(s->descr()->schema_node()->is_repeated()) + if (s->descr()->schema_node()->is_repeated()) continue; auto path = c->path_in_schema()->ToDotVector(); diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.reference b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.reference new file mode 100644 index 00000000000..6ed63af507a --- /dev/null +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.reference @@ -0,0 +1 @@ +[1,2] diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh new file mode 100755 index 00000000000..58eb207b6e6 --- /dev/null +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select f from file('$CURDIR/data_parquet/68131.parquet', Parquet, 'f Array(Int32)')" \ No newline at end of file diff --git a/tests/queries/0_stateless/data_parquet/68131.parquet b/tests/queries/0_stateless/data_parquet/68131.parquet new file mode 100644 index 0000000000000000000000000000000000000000..169f6152003db164c78e33cd69205caa33f906b5 GIT binary patch literal 289 zcmXAl!D_=W42Bgqgq#M0O4Q&(E)5xMp|QKBg Date: Sun, 18 Aug 2024 12:18:24 +0000 Subject: [PATCH 200/363] fix test --- .../0_stateless/03221_refreshable_matview_progress.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql index de8de41fd04..ecb385c9bfa 100644 --- a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql @@ -3,7 +3,7 @@ set allow_experimental_refreshable_materialized_view=1; CREATE MATERIALIZED VIEW 03221_rmv -REFRESH AFTER 1 SECOND +REFRESH AFTER 10 SECOND ( x UInt64 ) @@ -16,3 +16,5 @@ SELECT rand64() AS x; SELECT sleep(2); SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE database = currentDatabase() and view = '03221_rmv'; + +DROP TABLE 03221_rmv; From f5308635d193859cdb19a71040006278a21bdc51 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 18 Aug 2024 15:25:07 +0200 Subject: [PATCH 201/363] Revert "Improve compatibility of `upper/lowerUTF8` with Spark" --- .gitmodules | 7 +- contrib/icu | 2 +- src/Common/examples/CMakeLists.txt | 5 - src/Common/examples/utf8_upper_lower.cpp | 27 -- src/Functions/LowerUpperImpl.h | 1 + src/Functions/LowerUpperUTF8Impl.h | 283 +++++++++++++++--- src/Functions/initcapUTF8.cpp | 3 +- src/Functions/lowerUTF8.cpp | 25 +- src/Functions/upperUTF8.cpp | 24 +- .../00170_lower_upper_utf8.reference | 4 - .../0_stateless/00170_lower_upper_utf8.sql | 11 - .../00233_position_function_family.sql | 3 - .../0_stateless/00761_lower_utf8_bug.sql | 3 - .../0_stateless/01278_random_string_utf8.sql | 3 - .../0_stateless/01431_utf8_ubsan.reference | 4 +- .../queries/0_stateless/01431_utf8_ubsan.sql | 3 - .../0_stateless/01590_countSubstrings.sql | 3 - ...71_lower_upper_utf8_row_overlaps.reference | 4 +- .../02071_lower_upper_utf8_row_overlaps.sql | 3 - ...new_functions_must_be_documented.reference | 2 + .../02514_if_with_lazy_low_cardinality.sql | 3 - .../0_stateless/02807_lower_utf8_msan.sql | 3 - tests/queries/0_stateless/03015_peder1001.sql | 3 - 23 files changed, 265 insertions(+), 164 deletions(-) delete mode 100644 src/Common/examples/utf8_upper_lower.cpp diff --git a/.gitmodules b/.gitmodules index f18844e5eb4..cdee6a43ad8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -106,6 +106,9 @@ [submodule "contrib/icudata"] path = contrib/icudata url = https://github.com/ClickHouse/icudata +[submodule "contrib/icu"] + path = contrib/icu + url = https://github.com/unicode-org/icu [submodule "contrib/flatbuffers"] path = contrib/flatbuffers url = https://github.com/ClickHouse/flatbuffers @@ -366,7 +369,3 @@ [submodule "contrib/numactl"] path = contrib/numactl url = https://github.com/ClickHouse/numactl.git -[submodule "contrib/icu"] - path = contrib/icu - url = https://github.com/ClickHouse/icu - branch = ClickHouse/release-75-1 diff --git a/contrib/icu b/contrib/icu index 4216173eeeb..7750081bda4 160000 --- a/contrib/icu +++ b/contrib/icu @@ -1 +1 @@ -Subproject commit 4216173eeeb39c1d4caaa54a68860e800412d273 +Subproject commit 7750081bda4b3bc1768ae03849ec70f67ea10625 diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 8383e80d09d..69580d4ad0e 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -92,8 +92,3 @@ endif() clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp) target_link_libraries (check_pointer_valid PRIVATE clickhouse_common_io clickhouse_common_config) - -if (TARGET ch_contrib::icu) - clickhouse_add_executable (utf8_upper_lower utf8_upper_lower.cpp) - target_link_libraries (utf8_upper_lower PRIVATE ch_contrib::icu) -endif () diff --git a/src/Common/examples/utf8_upper_lower.cpp b/src/Common/examples/utf8_upper_lower.cpp deleted file mode 100644 index 826e1763105..00000000000 --- a/src/Common/examples/utf8_upper_lower.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include - -std::string utf8_to_lower(const std::string & input) -{ - icu::UnicodeString unicodeInput(input.c_str(), "UTF-8"); - unicodeInput.toLower(); - std::string output; - unicodeInput.toUTF8String(output); - return output; -} - -std::string utf8_to_upper(const std::string & input) -{ - icu::UnicodeString unicodeInput(input.c_str(), "UTF-8"); - unicodeInput.toUpper(); - std::string output; - unicodeInput.toUTF8String(output); - return output; -} - -int main() -{ - std::string input = "ır"; - std::cout << "upper:" << utf8_to_upper(input) << std::endl; - return 0; -} diff --git a/src/Functions/LowerUpperImpl.h b/src/Functions/LowerUpperImpl.h index a52703d10c8..d463ef96e16 100644 --- a/src/Functions/LowerUpperImpl.h +++ b/src/Functions/LowerUpperImpl.h @@ -1,6 +1,7 @@ #pragma once #include + namespace DB { diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index 5da085f48e5..eedabca5b22 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -1,14 +1,15 @@ #pragma once - -#include "config.h" - -#if USE_ICU - #include #include -#include -#include +#include +#include #include +#include + +#ifdef __SSE2__ +#include +#endif + namespace DB { @@ -18,7 +19,71 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -template +/// xor or do nothing +template +UInt8 xor_or_identity(const UInt8 c, const int mask) +{ + return c ^ mask; +} + +template <> +inline UInt8 xor_or_identity(const UInt8 c, const int) +{ + return c; +} + +/// It is caller's responsibility to ensure the presence of a valid cyrillic sequence in array +template +inline void UTF8CyrillicToCase(const UInt8 *& src, UInt8 *& dst) +{ + if (src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) + { + /// ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏ + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x10); + } + else if (src[0] == 0xD1u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) + { + /// ѐёђѓєѕіїјљњћќѝўџ + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x10); + } + else if (src[0] == 0xD0u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) + { + /// А-П + *dst++ = *src++; + *dst++ = xor_or_identity(*src++, 0x20); + } + else if (src[0] == 0xD0u && (src[1] >= 0xB0u && src[1] <= 0xBFu)) + { + /// а-п + *dst++ = *src++; + *dst++ = xor_or_identity(*src++, 0x20); + } + else if (src[0] == 0xD0u && (src[1] >= 0xA0u && src[1] <= 0xAFu)) + { + /// Р-Я + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x20); + } + else if (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) + { + /// р-я + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x20); + } +} + + +/** If the string contains UTF-8 encoded text, convert it to the lower (upper) case. + * Note: It is assumed that after the character is converted to another case, + * the length of its multibyte sequence in UTF-8 does not change. + * Otherwise, the behavior is undefined. + */ +template struct LowerUpperUTF8Impl { static void vector( @@ -38,46 +103,180 @@ struct LowerUpperUTF8Impl return; } - res_data.resize(data.size()); - res_offsets.resize_exact(offsets.size()); - - String output; - size_t curr_offset = 0; - for (size_t i = 0; i < offsets.size(); ++i) - { - const auto * data_start = reinterpret_cast(&data[offsets[i - 1]]); - size_t size = offsets[i] - offsets[i - 1]; - - icu::UnicodeString input(data_start, static_cast(size), "UTF-8"); - if constexpr (upper) - input.toUpper(); - else - input.toLower(); - - output.clear(); - input.toUTF8String(output); - - /// For valid UTF-8 input strings, ICU sometimes produces output with extra '\0's at the end. Only the data before the first - /// '\0' is valid. It the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this - /// case, the behavior is also reasonable. - const char * res_end = find_last_not_symbols_or_null<'\0'>(output.data(), output.data() + output.size()); - size_t valid_size = res_end ? res_end - output.data() + 1 : 0; - - res_data.resize(curr_offset + valid_size + 1); - memcpy(&res_data[curr_offset], output.data(), valid_size); - res_data[curr_offset + valid_size] = 0; - - curr_offset += valid_size + 1; - res_offsets[i] = curr_offset; - } + res_data.resize_exact(data.size()); + res_offsets.assign(offsets); + array(data.data(), data.data() + data.size(), offsets, res_data.data()); } static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Functions lowerUTF8 and upperUTF8 cannot work with FixedString argument"); } + + /** Converts a single code point starting at `src` to desired case, storing result starting at `dst`. + * `src` and `dst` are incremented by corresponding sequence lengths. */ + static bool toCase(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool partial) + { + if (src[0] <= ascii_upper_bound) + { + if (*src >= not_case_lower_bound && *src <= not_case_upper_bound) + *dst++ = *src++ ^ flip_case_mask; + else + *dst++ = *src++; + } + else if (src + 1 < src_end + && ((src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0xBFu)) || (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x9Fu)))) + { + cyrillic_to_case(src, dst); + } + else if (src + 1 < src_end && src[0] == 0xC2u) + { + /// Punctuation U+0080 - U+00BF, UTF-8: C2 80 - C2 BF + *dst++ = *src++; + *dst++ = *src++; + } + else if (src + 2 < src_end && src[0] == 0xE2u) + { + /// Characters U+2000 - U+2FFF, UTF-8: E2 80 80 - E2 BF BF + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + } + else + { + size_t src_sequence_length = UTF8::seqLength(*src); + /// In case partial buffer was passed (due to SSE optimization) + /// we cannot convert it with current src_end, but we may have more + /// bytes to convert and eventually got correct symbol. + if (partial && src_sequence_length > static_cast(src_end - src)) + return false; + + auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src); + if (src_code_point) + { + int dst_code_point = to_case(*src_code_point); + if (dst_code_point > 0) + { + size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src); + assert(dst_sequence_length <= 4); + + /// We don't support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8. + /// As an example, this happens for ß and ẞ. + if (dst_sequence_length == src_sequence_length) + { + src += dst_sequence_length; + dst += dst_sequence_length; + return true; + } + } + } + + *dst = *src; + ++dst; + ++src; + } + + return true; + } + +private: + static constexpr auto ascii_upper_bound = '\x7f'; + static constexpr auto flip_case_mask = 'A' ^ 'a'; + + static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst) + { + const auto * offset_it = offsets.begin(); + const UInt8 * begin = src; + +#ifdef __SSE2__ + static constexpr auto bytes_sse = sizeof(__m128i); + + /// If we are before this position, we can still read at least bytes_sse. + const auto * src_end_sse = src_end - bytes_sse + 1; + + /// SSE2 packed comparison operate on signed types, hence compare (c < 0) instead of (c > 0x7f) + const auto v_zero = _mm_setzero_si128(); + const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound - 1); + const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1); + const auto v_flip_case_mask = _mm_set1_epi8(flip_case_mask); + + while (src < src_end_sse) + { + const auto chars = _mm_loadu_si128(reinterpret_cast(src)); + + /// check for ASCII + const auto is_not_ascii = _mm_cmplt_epi8(chars, v_zero); + const auto mask_is_not_ascii = _mm_movemask_epi8(is_not_ascii); + + /// ASCII + if (mask_is_not_ascii == 0) + { + const auto is_not_case + = _mm_and_si128(_mm_cmpgt_epi8(chars, v_not_case_lower_bound), _mm_cmplt_epi8(chars, v_not_case_upper_bound)); + const auto mask_is_not_case = _mm_movemask_epi8(is_not_case); + + /// everything in correct case ASCII + if (mask_is_not_case == 0) + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), chars); + else + { + /// ASCII in mixed case + /// keep `flip_case_mask` only where necessary, zero out elsewhere + const auto xor_mask = _mm_and_si128(v_flip_case_mask, is_not_case); + + /// flip case by applying calculated mask + const auto cased_chars = _mm_xor_si128(chars, xor_mask); + + /// store result back to destination + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), cased_chars); + } + + src += bytes_sse; + dst += bytes_sse; + } + else + { + /// UTF-8 + + /// Find the offset of the next string after src + size_t offset_from_begin = src - begin; + while (offset_from_begin >= *offset_it) + ++offset_it; + + /// Do not allow one row influence another (since row may have invalid sequence, and break the next) + const UInt8 * row_end = begin + *offset_it; + chassert(row_end >= src); + const UInt8 * expected_end = std::min(src + bytes_sse, row_end); + + while (src < expected_end) + { + if (!toCase(src, expected_end, dst, /* partial= */ true)) + { + /// Fallback to handling byte by byte. + src_end_sse = src; + break; + } + } + } + } + + /// Find the offset of the next string after src + size_t offset_from_begin = src - begin; + while (offset_it != offsets.end() && offset_from_begin >= *offset_it) + ++offset_it; +#endif + + /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another) + while (src < src_end) + { + const UInt8 * row_end = begin + *offset_it; + chassert(row_end >= src); + + while (src < row_end) + toCase(src, row_end, dst, /* partial= */ false); + ++offset_it; + } + } }; } - -#endif diff --git a/src/Functions/initcapUTF8.cpp b/src/Functions/initcapUTF8.cpp index 004586dce26..282d846094e 100644 --- a/src/Functions/initcapUTF8.cpp +++ b/src/Functions/initcapUTF8.cpp @@ -1,8 +1,9 @@ #include #include +#include #include #include -#include + namespace DB { diff --git a/src/Functions/lowerUTF8.cpp b/src/Functions/lowerUTF8.cpp index e2f7cb84730..7adb0069121 100644 --- a/src/Functions/lowerUTF8.cpp +++ b/src/Functions/lowerUTF8.cpp @@ -1,10 +1,9 @@ -#include "config.h" - -#if USE_ICU - -#include +#include #include #include +#include +#include + namespace DB { @@ -16,25 +15,13 @@ struct NameLowerUTF8 static constexpr auto name = "lowerUTF8"; }; -using FunctionLowerUTF8 = FunctionStringToString, NameLowerUTF8>; +using FunctionLowerUTF8 = FunctionStringToString>, NameLowerUTF8>; } REGISTER_FUNCTION(LowerUTF8) { - FunctionDocumentation::Description description - = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)"; - FunctionDocumentation::Syntax syntax = "lowerUTF8(input)"; - FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}}; - FunctionDocumentation::ReturnedValue returned_value = "A String data type value"; - FunctionDocumentation::Examples examples = { - {"first", "SELECT lowerUTF8('München') as Lowerutf8;", "münchen"}, - }; - FunctionDocumentation::Categories categories = {"String"}; - - factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + factory.registerFunction(); } } - -#endif diff --git a/src/Functions/upperUTF8.cpp b/src/Functions/upperUTF8.cpp index ef26430331f..659e67f0ef3 100644 --- a/src/Functions/upperUTF8.cpp +++ b/src/Functions/upperUTF8.cpp @@ -1,10 +1,8 @@ -#include "config.h" - -#if USE_ICU - -#include #include #include +#include +#include + namespace DB { @@ -16,25 +14,13 @@ struct NameUpperUTF8 static constexpr auto name = "upperUTF8"; }; -using FunctionUpperUTF8 = FunctionStringToString, NameUpperUTF8>; +using FunctionUpperUTF8 = FunctionStringToString>, NameUpperUTF8>; } REGISTER_FUNCTION(UpperUTF8) { - FunctionDocumentation::Description description - = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)"; - FunctionDocumentation::Syntax syntax = "upperUTF8(input)"; - FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}}; - FunctionDocumentation::ReturnedValue returned_value = "A String data type value"; - FunctionDocumentation::Examples examples = { - {"first", "SELECT upperUTF8('München') as Upperutf8;", "MÜNCHEN"}, - }; - FunctionDocumentation::Categories categories = {"String"}; - - factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + factory.registerFunction(); } } - -#endif diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.reference b/tests/queries/0_stateless/00170_lower_upper_utf8.reference index 3c644f22b9b..f202cb75513 100644 --- a/tests/queries/0_stateless/00170_lower_upper_utf8.reference +++ b/tests/queries/0_stateless/00170_lower_upper_utf8.reference @@ -22,7 +22,3 @@ 1 1 1 -1 -1 -1 -1 diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.sql b/tests/queries/0_stateless/00170_lower_upper_utf8.sql index 85b6c5c6095..4caba2033ff 100644 --- a/tests/queries/0_stateless/00170_lower_upper_utf8.sql +++ b/tests/queries/0_stateless/00170_lower_upper_utf8.sql @@ -1,6 +1,3 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - select lower('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str; select lowerUTF8('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str; select lower('AaAaAaAaAaAaAaA012345789,.!aAaA') = 'aaaaaaaaaaaaaaa012345789,.!aaaa'; @@ -30,11 +27,3 @@ select sum(lower(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaАБВ select sum(upper(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n; select sum(lowerUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaабвгaaaaaaaa')) = count() from system.one array join range(16384) as n; select sum(upperUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n; - --- Turkish language -select upperUTF8('ır') = 'IR'; -select lowerUTF8('ır') = 'ır'; - --- German language -select upper('öäüß') = 'öäüß'; -select lower('ÖÄÜẞ') = 'ÖÄÜẞ'; diff --git a/tests/queries/0_stateless/00233_position_function_family.sql b/tests/queries/0_stateless/00233_position_function_family.sql index d6668cb7ba4..dd7394bc39a 100644 --- a/tests/queries/0_stateless/00233_position_function_family.sql +++ b/tests/queries/0_stateless/00233_position_function_family.sql @@ -1,6 +1,3 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - SET send_logs_level = 'fatal'; select 1 = position('', ''); diff --git a/tests/queries/0_stateless/00761_lower_utf8_bug.sql b/tests/queries/0_stateless/00761_lower_utf8_bug.sql index a0ab55edc15..de20b894331 100644 --- a/tests/queries/0_stateless/00761_lower_utf8_bug.sql +++ b/tests/queries/0_stateless/00761_lower_utf8_bug.sql @@ -1,4 +1 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - SELECT lowerUTF8('\xF0') = lowerUTF8('\xF0'); diff --git a/tests/queries/0_stateless/01278_random_string_utf8.sql b/tests/queries/0_stateless/01278_random_string_utf8.sql index 290d6a0c759..da2dc48c3e1 100644 --- a/tests/queries/0_stateless/01278_random_string_utf8.sql +++ b/tests/queries/0_stateless/01278_random_string_utf8.sql @@ -1,6 +1,3 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - SELECT randomStringUTF8('string'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT lengthUTF8(randomStringUTF8(100)); SELECT toTypeName(randomStringUTF8(10)); diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.reference b/tests/queries/0_stateless/01431_utf8_ubsan.reference index dc785e57851..c98c950d535 100644 --- a/tests/queries/0_stateless/01431_utf8_ubsan.reference +++ b/tests/queries/0_stateless/01431_utf8_ubsan.reference @@ -1,2 +1,2 @@ -EFBFBD -EFBFBD +FF +FF diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.sql b/tests/queries/0_stateless/01431_utf8_ubsan.sql index 3a28e023805..d6a299225b1 100644 --- a/tests/queries/0_stateless/01431_utf8_ubsan.sql +++ b/tests/queries/0_stateless/01431_utf8_ubsan.sql @@ -1,5 +1,2 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - SELECT hex(lowerUTF8('\xFF')); SELECT hex(upperUTF8('\xFF')); diff --git a/tests/queries/0_stateless/01590_countSubstrings.sql b/tests/queries/0_stateless/01590_countSubstrings.sql index 5ec4f412d7f..b38cbb7d188 100644 --- a/tests/queries/0_stateless/01590_countSubstrings.sql +++ b/tests/queries/0_stateless/01590_countSubstrings.sql @@ -1,6 +1,3 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - -- -- countSubstrings -- diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference index deabef61a88..a3bac432482 100644 --- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference +++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference @@ -5,9 +5,9 @@ insert into utf8_overlap values ('\xe2'), ('Foo⚊BarBazBam'), ('\xe2'), ('Foo -- MONOGRAM FOR YANG with lowerUTF8(str) as l_, upperUTF8(str) as u_, '0x' || hex(str) as h_ select length(str), if(l_ == '\xe2', h_, l_), if(u_ == '\xe2', h_, u_) from utf8_overlap format CSV; -1,"�","�" +1,"0xE2","0xE2" 15,"foo⚊barbazbam","FOO⚊BARBAZBAM" -1,"�","�" +1,"0xE2","0xE2" 15,"foo⚊barbazbam","FOO⚊BARBAZBAM" -- NOTE: regression test for introduced bug -- https://github.com/ClickHouse/ClickHouse/issues/42756 diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql index d175e0659d0..8ca0a3f5f75 100644 --- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql +++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql @@ -1,6 +1,3 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - drop table if exists utf8_overlap; create table utf8_overlap (str String) engine=Memory(); diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 0980e25b70f..c39f1fb1ce9 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -416,6 +416,7 @@ logTrace lowCardinalityIndices lowCardinalityKeys lower +lowerUTF8 makeDate makeDate32 makeDateTime @@ -896,6 +897,7 @@ tupleToNameValuePairs unbin unhex upper +upperUTF8 uptime validateNestedArraySizes version diff --git a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql index b169cfd0ab9..80e3c0a9ece 100644 --- a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql +++ b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql @@ -1,6 +1,3 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - create table if not exists t (`arr.key` Array(LowCardinality(String)), `arr.value` Array(LowCardinality(String))) engine = Memory; insert into t (`arr.key`, `arr.value`) values (['a'], ['b']); select if(true, if(lowerUTF8(arr.key) = 'a', 1, 2), 3) as x from t left array join arr; diff --git a/tests/queries/0_stateless/02807_lower_utf8_msan.sql b/tests/queries/0_stateless/02807_lower_utf8_msan.sql index 95f224577f7..e9eb18bf615 100644 --- a/tests/queries/0_stateless/02807_lower_utf8_msan.sql +++ b/tests/queries/0_stateless/02807_lower_utf8_msan.sql @@ -1,5 +1,2 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - SELECT lowerUTF8(arrayJoin(['©--------------------------------------', '©--------------------'])) ORDER BY 1; SELECT upperUTF8(materialize('aaaaАБВГaaaaaaaaaaaaАБВГAAAAaaAA')) FROM numbers(2); diff --git a/tests/queries/0_stateless/03015_peder1001.sql b/tests/queries/0_stateless/03015_peder1001.sql index df8e4db1536..810503207f2 100644 --- a/tests/queries/0_stateless/03015_peder1001.sql +++ b/tests/queries/0_stateless/03015_peder1001.sql @@ -1,6 +1,3 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - DROP TABLE IF EXISTS test_data; CREATE TABLE test_data From 427016a450cad536e8cbaf4de04d07313456aa4b Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Tue, 6 Aug 2024 21:39:41 +0200 Subject: [PATCH 202/363] CI: Functional tests to store artifacts on timeout --- docker/test/fasttest/run.sh | 18 +--------- docker/test/sqllogic/Dockerfile | 1 - docker/test/sqllogic/run.sh | 2 +- docker/test/sqltest/Dockerfile | 1 - docker/test/stateful/run.sh | 25 +------------- docker/test/stateless/Dockerfile | 1 - docker/test/stateless/run.sh | 21 ++---------- docker/test/stateless/utils.lib | 16 --------- tests/ci/ci.py | 30 ++++++++-------- tests/ci/ci_definitions.py | 3 +- tests/ci/functional_test_check.py | 37 +++++++++++++++----- tests/ci/report.py | 11 +++--- tests/ci/tee_popen.py | 57 +++++++++++++++++++++++++++---- 13 files changed, 107 insertions(+), 116 deletions(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 394d31addb1..9920326b11c 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -256,22 +256,6 @@ function configure rm -f "$FASTTEST_DATA/config.d/secure_ports.xml" } -function timeout_with_logging() { - local exit_code=0 - - timeout -s TERM --preserve-status "${@}" || exit_code="${?}" - - echo "Checking if it is a timeout. The code 124 will indicate a timeout." - if [[ "${exit_code}" -eq "124" ]] - then - echo "The command 'timeout ${*}' has been killed by timeout." - else - echo "No, it isn't a timeout." - fi - - return $exit_code -} - function run_tests { clickhouse-server --version @@ -340,7 +324,7 @@ case "$stage" in configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt" ;& "run_tests") - timeout_with_logging 35m bash -c run_tests ||: + run_tests ||: /process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \ --out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \ --out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv" diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 1425e12cd84..6397526388e 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -35,7 +35,6 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ ENV TZ=Europe/Amsterdam -ENV MAX_RUN_TIME=9000 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git" diff --git a/docker/test/sqllogic/run.sh b/docker/test/sqllogic/run.sh index ccba344035e..32368980f9b 100755 --- a/docker/test/sqllogic/run.sh +++ b/docker/test/sqllogic/run.sh @@ -94,7 +94,7 @@ function run_tests() export -f run_tests -timeout "${MAX_RUN_TIME:-9000}" bash -c run_tests || echo "timeout reached" >&2 +run_tests #/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv diff --git a/docker/test/sqltest/Dockerfile b/docker/test/sqltest/Dockerfile index 71d915b0c7a..b805bb03c2b 100644 --- a/docker/test/sqltest/Dockerfile +++ b/docker/test/sqltest/Dockerfile @@ -22,7 +22,6 @@ ARG sqltest_repo="https://github.com/elliotchance/sqltest/" RUN git clone ${sqltest_repo} ENV TZ=UTC -ENV MAX_RUN_TIME=900 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone COPY run.sh / diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 3a4f0d97993..c072eeb0fa8 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -4,9 +4,6 @@ source /setup_export_logs.sh set -e -x -MAX_RUN_TIME=${MAX_RUN_TIME:-3600} -MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 3600 : MAX_RUN_TIME)) - # Choose random timezone for this test run TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)" echo "Choosen random timezone $TZ" @@ -123,9 +120,6 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] clickhouse-client --query "DROP TABLE datasets.hits_v1" clickhouse-client --query "DROP TABLE datasets.visits_v1" - - MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours) - MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) else clickhouse-client --query "CREATE DATABASE test" clickhouse-client --query "SHOW TABLES FROM test" @@ -257,24 +251,7 @@ function run_tests() export -f run_tests -function timeout_with_logging() { - local exit_code=0 - - timeout -s TERM --preserve-status "${@}" || exit_code="${?}" - - echo "Checking if it is a timeout. The code 124 will indicate a timeout." - if [[ "${exit_code}" -eq "124" ]] - then - echo "The command 'timeout ${*}' has been killed by timeout." - else - echo "No, it isn't a timeout." - fi - - return $exit_code -} - -TIMEOUT=$((MAX_RUN_TIME - 700)) -timeout_with_logging "$TIMEOUT" bash -c run_tests ||: +run_tests ||: echo "Files in current directory" ls -la ./ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index d8eb072328f..b0c4914a4e8 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -65,7 +65,6 @@ ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 -ENV MAX_RUN_TIME=0 # Unrelated to vars in setup_minio.sh, but should be the same there # to have the same binaries for local running scenario diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index c70cbe1fe45..ad0cd321cc5 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -12,9 +12,6 @@ dmesg --clear # fail on errors, verbose and export all env variables set -e -x -a -MAX_RUN_TIME=${MAX_RUN_TIME:-9000} -MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 9000 : MAX_RUN_TIME)) - USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0} USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0} @@ -308,8 +305,6 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" - TIMEOUT=$((MAX_RUN_TIME - 800 > 8400 ? 8400 : MAX_RUN_TIME - 800)) - START_TIME=${SECONDS} set +e TEST_ARGS=( @@ -324,32 +319,22 @@ function run_tests() --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" ) - timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s clickhouse-test "${TEST_ARGS[@]}" 2>&1 \ + clickhouse-test "${TEST_ARGS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt set -e - DURATION=$((SECONDS - START_TIME)) - - echo "Elapsed ${DURATION} seconds." - if [[ $DURATION -ge $TIMEOUT ]] - then - echo "It looks like the command is terminated by the timeout, which is ${TIMEOUT} seconds." - fi } export -f run_tests - -# This should be enough to setup job and collect artifacts -TIMEOUT=$((MAX_RUN_TIME - 700)) if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. - timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ + NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests \ | sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||: fi -timeout_with_logging "$TIMEOUT" bash -c run_tests ||: +run_tests ||: echo "Files in current directory" ls -la ./ diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index cb257536c36..31cd67254b4 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -40,22 +40,6 @@ function fn_exists() { declare -F "$1" > /dev/null; } -function timeout_with_logging() { - local exit_code=0 - - timeout -s TERM --preserve-status "${@}" || exit_code="${?}" - - echo "Checking if it is a timeout. The code 124 will indicate a timeout." - if [[ "${exit_code}" -eq "124" ]] - then - echo "The command 'timeout ${*}' has been killed by timeout." - else - echo "No, it isn't a timeout." - fi - - return $exit_code -} - function collect_core_dumps() { find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 49b597333dc..1208d8642ad 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -50,7 +50,6 @@ from github_helper import GitHub from pr_info import PRInfo from report import ( ERROR, - FAILURE, PENDING, SUCCESS, BuildResult, @@ -62,11 +61,11 @@ from report import ( FAIL, ) from s3_helper import S3Helper -from stopwatch import Stopwatch from tee_popen import TeePopen from ci_cache import CiCache from ci_settings import CiSettings from ci_buddy import CIBuddy +from stopwatch import Stopwatch from version_helper import get_version_from_repo # pylint: disable=too-many-lines @@ -370,8 +369,8 @@ def _pre_action(s3, job_name, batch, indata, pr_info): # skip_status = SUCCESS already there GH.print_in_group("Commit Status Data", job_status) - # create pre report - jr = JobReport.create_pre_report(status=skip_status, job_skipped=to_be_skipped) + # create dummy report + jr = JobReport.create_dummy(status=skip_status, job_skipped=to_be_skipped) jr.dump() if not to_be_skipped: @@ -990,19 +989,20 @@ def _run_test(job_name: str, run_command: str) -> int: stopwatch = Stopwatch() job_log = Path(TEMP_PATH) / "job_log.txt" with TeePopen(run_command, job_log, env, timeout) as process: + print(f"Job process started, pid [{process.process.pid}]") retcode = process.wait() if retcode != 0: print(f"Run action failed for: [{job_name}] with exit code [{retcode}]") - if timeout and process.timeout_exceeded: - print(f"Timeout {timeout} exceeded, dumping the job report") - JobReport( - status=FAILURE, - description=f"Timeout {timeout} exceeded", - test_results=[TestResult.create_check_timeout_expired(timeout)], - start_time=stopwatch.start_time_str, - duration=stopwatch.duration_seconds, - additional_files=[job_log], - ).dump() + if process.timeout_exceeded: + print(f"Job timed out: [{job_name}] exit code [{retcode}]") + assert JobReport.exist(), "JobReport real or dummy must be present" + jr = JobReport.load() + if jr.dummy: + print( + f"ERROR: Run action failed with timeout and did not generate JobReport - update dummy report with execution time" + ) + jr.test_results = [TestResult.create_check_timeout_expired()] + jr.duration = stopwatch.duration_seconds print(f"Run action done for: [{job_name}]") return retcode @@ -1205,7 +1205,7 @@ def main() -> int: job_report ), "BUG. There must be job report either real report, or pre-report if job was killed" error_description = "" - if not job_report.pre_report: + if not job_report.dummy: # it's a real job report ch_helper = ClickHouseHelper() check_url = "" diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 1bed9db06f2..1d1c39f913d 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -332,7 +332,7 @@ class JobConfig: # will be triggered for the job if omitted in CI workflow yml run_command: str = "" # job timeout, seconds - timeout: Optional[int] = None + timeout: int = 7200 # sets number of batches for a multi-batch job num_batches: int = 1 # label that enables job in CI, if set digest isn't used @@ -421,7 +421,6 @@ class CommonJobConfigs: ), run_command='functional_test_check.py "$CHECK_NAME"', runner_type=Runners.FUNC_TESTER, - timeout=9000, ) STATEFUL_TEST = JobConfig( job_name_keyword="stateful", diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index b7391eff01b..d08f98fa05f 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -5,10 +5,11 @@ import csv import logging import os import re +import signal import subprocess import sys from pathlib import Path -from typing import List, Tuple +from typing import List, Tuple, Optional from build_download_helper import download_all_deb_packages from clickhouse_helper import CiLogsCredentials @@ -25,11 +26,12 @@ from report import ( TestResults, read_test_results, FAILURE, + TestResult, ) from stopwatch import Stopwatch from tee_popen import TeePopen from ci_config import CI -from ci_utils import Utils +from ci_utils import Utils, Shell NO_CHANGES_MSG = "Nothing to run" @@ -113,10 +115,6 @@ def get_run_command( if flaky_check: envs.append("-e NUM_TRIES=50") - envs.append("-e MAX_RUN_TIME=2800") - else: - max_run_time = os.getenv("MAX_RUN_TIME", "0") - envs.append(f"-e MAX_RUN_TIME={max_run_time}") envs += [f"-e {e}" for e in additional_envs] @@ -128,7 +126,7 @@ def get_run_command( ) return ( - f"docker run --volume={builds_path}:/package_folder " + f"docker run --rm --name func-tester --volume={builds_path}:/package_folder " # For dmesg and sysctl "--privileged " f"{ci_logs_args}" @@ -198,7 +196,7 @@ def process_results( state, description = status[0][0], status[0][1] if ret_code != 0: state = ERROR - description += " (but script exited with an error)" + description = f"Job failed, exit code: {ret_code}. " + description try: results_path = result_directory / "test_results.tsv" @@ -240,7 +238,19 @@ def parse_args(): return parser.parse_args() +test_process = None # type: Optional[TeePopen] +timeout_expired = False + + +def handle_sigterm(signum, _frame): + print(f"WARNING: Received signal {signum}") + global timeout_expired + timeout_expired = True + Shell.check(f"docker exec func-tester pkill -f clickhouse-test", verbose=True) + + def main(): + signal.signal(signal.SIGTERM, handle_sigterm) logging.basicConfig(level=logging.INFO) for handler in logging.root.handlers: # pylint: disable=protected-access @@ -328,11 +338,13 @@ def main(): logging.info("Going to run func tests: %s", run_command) with TeePopen(run_command, run_log_path) as process: + global test_process + test_process = process retcode = process.wait() if retcode == 0: logging.info("Run successfully") else: - logging.info("Run failed") + logging.info("Run failed, exit code %s", retcode) try: subprocess.check_call( @@ -348,6 +360,13 @@ def main(): state, description, test_results, additional_logs = process_results( retcode, result_path, server_log_path ) + if timeout_expired: + description = "Timeout expired" + state = FAILURE + test_results.insert( + 0, TestResult.create_check_timeout_expired(stopwatch.duration_seconds) + ) + else: print( "This is validate bugfix or flaky check run, but no changes test to run - skip with success" diff --git a/tests/ci/report.py b/tests/ci/report.py index 6779a6dae96..c2632719aef 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -249,6 +249,7 @@ JOB_REPORT_FILE = Path(GITHUB_WORKSPACE) / "job_report.json" JOB_STARTED_TEST_NAME = "STARTED" JOB_FINISHED_TEST_NAME = "COMPLETED" +JOB_TIMEOUT_TEST_NAME = "Job Timeout Expired" @dataclass @@ -277,8 +278,8 @@ class TestResult: self.log_files.append(log_path) @staticmethod - def create_check_timeout_expired(timeout: float) -> "TestResult": - return TestResult("Check timeout expired", "FAIL", timeout) + def create_check_timeout_expired(duration: Optional[float] = None) -> "TestResult": + return TestResult(JOB_TIMEOUT_TEST_NAME, "FAIL", time=duration) TestResults = List[TestResult] @@ -303,7 +304,7 @@ class JobReport: # indicates that this is not real job report but report for the job that was skipped by rerun check job_skipped: bool = False # indicates that report generated by CI script in order to check later if job was killed before real report is generated - pre_report: bool = False + dummy: bool = False exit_code: int = -1 @staticmethod @@ -311,7 +312,7 @@ class JobReport: return datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") @classmethod - def create_pre_report(cls, status: str, job_skipped: bool) -> "JobReport": + def create_dummy(cls, status: str, job_skipped: bool) -> "JobReport": return JobReport( status=status, description="", @@ -320,7 +321,7 @@ class JobReport: duration=0.0, additional_files=[], job_skipped=job_skipped, - pre_report=True, + dummy=True, ) def update_duration(self): diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index 13db50df53f..ad3e62dab9c 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -2,6 +2,8 @@ import logging import os +import signal +import subprocess import sys from io import TextIOWrapper from pathlib import Path @@ -30,20 +32,35 @@ class TeePopen: self._process = None # type: Optional[Popen] self.timeout = timeout self.timeout_exceeded = False + self.terminated_by_sigterm = False + self.terminated_by_sigkill = False + self.pid = 0 def _check_timeout(self) -> None: if self.timeout is None: return sleep(self.timeout) + logging.warning( + "Timeout exceeded. Send SIGTERM to process %s, timeout %s", + self.process.pid, + self.timeout, + ) + self.send_signal(signal.SIGTERM) + time_wait = 0 + self.terminated_by_sigterm = True self.timeout_exceeded = True + while self.process.poll() is None and time_wait < 100: + print("wait...") + wait = 5 + sleep(wait) + time_wait += wait while self.process.poll() is None: - logging.warning( - "Killing process %s, timeout %s exceeded", - self.process.pid, - self.timeout, + logging.error( + "Process is still running. Send SIGKILL", ) - os.killpg(self.process.pid, 9) - sleep(10) + self.send_signal(signal.SIGKILL) + self.terminated_by_sigkill = True + sleep(5) def __enter__(self) -> "TeePopen": self.process = Popen( @@ -57,6 +74,9 @@ class TeePopen: bufsize=1, errors="backslashreplace", ) + sleep(1) + self.pid = self._get_child_pid() + print(f"Subprocess started, pid [{self.process.pid}], child pid [{self.pid}]") if self.timeout is not None and self.timeout > 0: t = Thread(target=self._check_timeout) t.daemon = True # does not block the program from exit @@ -77,6 +97,22 @@ class TeePopen: self.log_file.close() + def _get_child_pid(self): + # linux only + ps_command = f"ps --ppid {self.process.pid} -o pid=" + res = "NA" + try: + result = subprocess.run( + ps_command, shell=True, capture_output=True, text=True + ) + res = result.stdout.strip() + pid = int(res) + return pid + except Exception as e: + print(f"Failed to get child's pid, command [{ps_command}], result [{res}]") + print(f"ERROR: getting Python subprocess PID: {e}") + return self.process.pid + def wait(self) -> int: if self.process.stdout is not None: for line in self.process.stdout: @@ -85,6 +121,15 @@ class TeePopen: return self.process.wait() + def poll(self): + return self.process.poll() + + def send_signal(self, signal_num): + if self.pid: + os.kill(self.pid, signal_num) + else: + print("ERROR: no process to send signal") + @property def process(self) -> Popen: if self._process is not None: From 8e35b082b2e3315655110bdce4238217dfe85914 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Sat, 10 Aug 2024 10:01:16 +0200 Subject: [PATCH 203/363] teepopen fix --- tests/ci/tee_popen.py | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index ad3e62dab9c..53b0a0f6c2c 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -3,7 +3,6 @@ import logging import os import signal -import subprocess import sys from io import TextIOWrapper from pathlib import Path @@ -34,7 +33,6 @@ class TeePopen: self.timeout_exceeded = False self.terminated_by_sigterm = False self.terminated_by_sigkill = False - self.pid = 0 def _check_timeout(self) -> None: if self.timeout is None: @@ -75,8 +73,7 @@ class TeePopen: errors="backslashreplace", ) sleep(1) - self.pid = self._get_child_pid() - print(f"Subprocess started, pid [{self.process.pid}], child pid [{self.pid}]") + print(f"Subprocess started, pid [{self.process.pid}]") if self.timeout is not None and self.timeout > 0: t = Thread(target=self._check_timeout) t.daemon = True # does not block the program from exit @@ -97,22 +94,6 @@ class TeePopen: self.log_file.close() - def _get_child_pid(self): - # linux only - ps_command = f"ps --ppid {self.process.pid} -o pid=" - res = "NA" - try: - result = subprocess.run( - ps_command, shell=True, capture_output=True, text=True - ) - res = result.stdout.strip() - pid = int(res) - return pid - except Exception as e: - print(f"Failed to get child's pid, command [{ps_command}], result [{res}]") - print(f"ERROR: getting Python subprocess PID: {e}") - return self.process.pid - def wait(self) -> int: if self.process.stdout is not None: for line in self.process.stdout: @@ -125,10 +106,7 @@ class TeePopen: return self.process.poll() def send_signal(self, signal_num): - if self.pid: - os.kill(self.pid, signal_num) - else: - print("ERROR: no process to send signal") + os.killpg(self.process.pid, signal_num) @property def process(self) -> Popen: From 66fa5a154a8895f481a598616df93f7cb83e42cd Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 12 Aug 2024 02:34:22 +0200 Subject: [PATCH 204/363] tune timeouts, batches --- tests/ci/ci_config.py | 7 ++++--- tests/ci/ci_definitions.py | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 173c6c9c931..99f4ed38475 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -316,6 +316,7 @@ class CI: JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], random_bucket="parrepl_with_sanitizer", + timeout=3600, ), JobNames.STATELESS_TEST_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_ASAN], num_batches=2 @@ -346,7 +347,7 @@ class CI: required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4 ), JobNames.STATELESS_TEST_S3_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=2 + required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=1 ), JobNames.STATELESS_TEST_AZURE_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_ASAN], num_batches=3, release_only=True @@ -401,14 +402,14 @@ class CI: required_builds=[BuildNames.PACKAGE_ASAN], release_only=True, num_batches=4 ), JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], num_batches=6 + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4 ), JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6 ), JobNames.INTEGRATION_TEST_ARM: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_AARCH64], - num_batches=6, + num_batches=3, runner_type=Runners.FUNC_TESTER_ARM, ), JobNames.INTEGRATION_TEST: CommonJobConfigs.INTEGRATION_TEST.with_properties( diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 1d1c39f913d..13c222b10b9 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -465,6 +465,7 @@ class CommonJobConfigs: ), run_command="upgrade_check.py", runner_type=Runners.STRESS_TESTER, + timeout=3600, ) INTEGRATION_TEST = JobConfig( job_name_keyword="integration", From 1deeca40dbbbc14373e51d830b851b54b82e5efa Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 15 Aug 2024 13:11:10 +0200 Subject: [PATCH 205/363] Handling timeout in integration tests --- tests/ci/ci.py | 13 ++++++++- tests/ci/ci_config.py | 3 ++- tests/ci/integration_test_check.py | 2 +- tests/ci/integration_tests_runner.py | 40 +++++++++++++++++++++++++++- 4 files changed, 54 insertions(+), 4 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 1208d8642ad..a9ae078b449 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1003,6 +1003,7 @@ def _run_test(job_name: str, run_command: str) -> int: ) jr.test_results = [TestResult.create_check_timeout_expired()] jr.duration = stopwatch.duration_seconds + jr.additional_files += [job_log] print(f"Run action done for: [{job_name}]") return retcode @@ -1329,10 +1330,20 @@ def main() -> int: if CI.is_test_job(args.job_name): gh = GitHub(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) + check_url = "" + if job_report.test_results or job_report.additional_files: + check_url = upload_result_helper.upload_results( + s3, + pr_info.number, + pr_info.sha, + job_report.test_results, + job_report.additional_files, + job_report.check_name or _get_ext_check_name(args.job_name), + ) post_commit_status( commit, ERROR, - "", + check_url, "Error: " + error_description, _get_ext_check_name(args.job_name), pr_info, diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 99f4ed38475..b5e424c2b3f 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -402,7 +402,8 @@ class CI: required_builds=[BuildNames.PACKAGE_ASAN], release_only=True, num_batches=4 ), JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4 + required_builds=[BuildNames.PACKAGE_ASAN], + num_batches=3, ), JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6 diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 6245f0490fc..7232ca375a1 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -29,7 +29,7 @@ from stopwatch import Stopwatch import integration_tests_runner as runner from ci_config import CI -from ci_utils import Utils +from ci_utils import Utils, Shell def get_json_params_dict( diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index f5dbef4f6db..d3cd3d16de1 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -9,6 +9,7 @@ import random import re import shlex import shutil +import signal import string import subprocess import sys @@ -16,11 +17,13 @@ import time import zlib # for crc32 from collections import defaultdict from itertools import chain -from typing import Any, Dict +from typing import Any, Dict, Optional from env_helper import IS_CI from integration_test_images import IMAGES from tee_popen import TeePopen +from report import JOB_TIMEOUT_TEST_NAME +from stopwatch import Stopwatch MAX_RETRY = 1 NUM_WORKERS = 5 @@ -621,6 +624,9 @@ class ClickhouseIntegrationTestsRunner: test_data_dirs = {} for i in range(num_tries): + if timeout_expired: + print("Timeout expired - break test group execution") + break logging.info("Running test group %s for the %s retry", test_group, i) clear_ip_tables_and_restart_daemons() @@ -657,6 +663,8 @@ class ClickhouseIntegrationTestsRunner: logging.info("Executing cmd: %s", cmd) # ignore retcode, since it meaningful due to pipe to tee with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as proc: + global runner_subprocess + runner_subprocess = proc proc.wait() extra_logs_names = [log_basename] @@ -780,6 +788,9 @@ class ClickhouseIntegrationTestsRunner: logs = [] tries_num = 1 if should_fail else FLAKY_TRIES_COUNT for i in range(tries_num): + if timeout_expired: + print("Timeout expired - break flaky check execution") + break final_retry += 1 logging.info("Running tests for the %s time", i) counters, tests_times, log_paths = self.try_run_test_group( @@ -839,6 +850,7 @@ class ClickhouseIntegrationTestsRunner: return result_state, status_text, test_result, logs def run_impl(self, repo_path, build_path): + stopwatch = Stopwatch() if self.flaky_check or self.bugfix_validate_check: return self.run_flaky_check( repo_path, build_path, should_fail=self.bugfix_validate_check @@ -921,6 +933,9 @@ class ClickhouseIntegrationTestsRunner: random.shuffle(items_to_run) for group, tests in items_to_run: + if timeout_expired: + print("Timeout expired - break tests execution") + break logging.info("Running test group %s containing %s tests", group, len(tests)) group_counters, group_test_times, log_paths = self.try_run_test_group( repo_path, group, tests, MAX_RETRY, NUM_WORKERS, 0 @@ -981,6 +996,17 @@ class ClickhouseIntegrationTestsRunner: status_text = "Timeout, " + status_text result_state = "failure" + if timeout_expired: + logging.error( + "Job killed by external timeout signal - setting status to failure!" + ) + status_text = "Job timeout expired, " + status_text + result_state = "failure" + # add mock test case to make timeout visible in job report and in ci db + test_result.insert( + 0, (JOB_TIMEOUT_TEST_NAME, "FAIL", f"{stopwatch.duration_seconds}", "") + ) + if not counters or sum(len(counter) for counter in counters.values()) == 0: status_text = "No tests found for some reason! It's a bug" result_state = "failure" @@ -1001,6 +1027,7 @@ def write_results(results_file, status_file, results, status): def run(): + signal.signal(signal.SIGTERM, handle_sigterm) logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") repo_path = os.environ.get("CLICKHOUSE_TESTS_REPO_PATH") @@ -1035,5 +1062,16 @@ def run(): logging.info("Result written") +timeout_expired = False +runner_subprocess = None # type:Optional[subprocess.Popen] + + +def handle_sigterm(signum, _frame): + print(f"WARNING: Received signal {signum}") + global timeout_expired + timeout_expired = True + runner_subprocess.send_signal(signal.SIGTERM) + + if __name__ == "__main__": run() From dde7ee29fc594f87bb35880bede845c4d4f29423 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 16 Aug 2024 10:22:12 +0200 Subject: [PATCH 206/363] sort tests in report by status --- tests/ci/ci_config.py | 8 ++++---- tests/ci/integration_test_check.py | 2 +- tests/ci/integration_tests_runner.py | 3 ++- tests/ci/report.py | 19 +++++++++++++++---- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index b5e424c2b3f..8ce0b9fde5a 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -344,7 +344,7 @@ class CI: runner_type=Runners.FUNC_TESTER_ARM, ), JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4 + required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=2 ), JobNames.STATELESS_TEST_S3_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=1 @@ -354,7 +354,7 @@ class CI: ), JobNames.STATELESS_TEST_S3_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], - num_batches=4, + num_batches=3, ), JobNames.STRESS_TEST_DEBUG: CommonJobConfigs.STRESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], @@ -403,14 +403,14 @@ class CI: ), JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_ASAN], - num_batches=3, + num_batches=6, ), JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6 ), JobNames.INTEGRATION_TEST_ARM: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_AARCH64], - num_batches=3, + num_batches=6, runner_type=Runners.FUNC_TESTER_ARM, ), JobNames.INTEGRATION_TEST: CommonJobConfigs.INTEGRATION_TEST.with_properties( diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 7232ca375a1..6245f0490fc 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -29,7 +29,7 @@ from stopwatch import Stopwatch import integration_tests_runner as runner from ci_config import CI -from ci_utils import Utils, Shell +from ci_utils import Utils def get_json_params_dict( diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index d3cd3d16de1..c3b71b85022 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -1070,7 +1070,8 @@ def handle_sigterm(signum, _frame): print(f"WARNING: Received signal {signum}") global timeout_expired timeout_expired = True - runner_subprocess.send_signal(signal.SIGTERM) + if runner_subprocess: + runner_subprocess.send_signal(signal.SIGTERM) if __name__ == "__main__": diff --git a/tests/ci/report.py b/tests/ci/report.py index c2632719aef..a1b25b994c7 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -742,10 +742,21 @@ def create_test_html_report( has_test_time = any(tr.time is not None for tr in test_results) has_log_urls = False - # Display entires with logs at the top (they correspond to failed tests) - test_results.sort( - key=lambda result: result.raw_logs is None and result.log_files is None - ) + def sort_key(status): + if "fail" in status.lower(): + return 0 + elif "error" in status.lower(): + return 1 + elif "not" in status.lower(): + return 2 + elif "ok" in status.lower(): + return 10 + elif "success" in status.lower(): + return 9 + else: + return 5 + + test_results.sort(key=lambda result: sort_key(result.status)) for test_result in test_results: colspan = 0 From 29fd5a6c90caeafee8bc930918f7f5544a1658a2 Mon Sep 17 00:00:00 2001 From: shiyer7474 Date: Sun, 18 Aug 2024 15:10:35 +0000 Subject: [PATCH 207/363] Add explicit session_timezone to UTC --- tests/queries/0_stateless/03222_datetime64_small_value_const.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql index af06a622f8d..39266ba7992 100644 --- a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql +++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql @@ -1,4 +1,5 @@ -- Tags: shard +set session_timezone = 'UTC'; -- don't randomize the session timezone select *, (select toDateTime64(0, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; select *, (select toDateTime64(5, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; From 90330077e5595c000cec82c8a0819db339296d33 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 18 Aug 2024 17:56:44 +0000 Subject: [PATCH 208/363] fix test --- .../queries/0_stateless/03221_refreshable_matview_progress.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql index ecb385c9bfa..98e1c48478d 100644 --- a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql @@ -1,4 +1,4 @@ --- Tags: no-ordinary-database +-- Tags: no-replicated-database, no-ordinary-database set allow_experimental_refreshable_materialized_view=1; From 683b84e6b66edbfbba12a3c56aad9aefd717bde2 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sun, 18 Aug 2024 19:37:52 +0100 Subject: [PATCH 209/363] fix --- src/Storages/MergeTree/MergeTreeReadPool.cpp | 4 ++++ src/Storages/MergeTree/MergeTreeReadPoolBase.cpp | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index cc321cd5a4d..9927d369104 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -235,6 +235,10 @@ void MergeTreeReadPool::fillPerThreadInfo(size_t threads, size_t sum_marks) const auto part_idx = current_parts.back().part_idx; const auto min_marks_per_task = per_part_infos[part_idx]->min_marks_per_task; + if (min_marks_per_task == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + /// Do not get too few rows from part. if (marks_in_part >= min_marks_per_task && need_marks < min_marks_per_task) need_marks = min_marks_per_task; diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 9d3c38822e1..1cc13102794 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -87,10 +87,6 @@ static size_t calculateMinMarksPerTask( } } - if (min_marks_per_task == 0) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); - LOG_TEST(&Poco::Logger::get("MergeTreeReadPoolBase"), "Will use min_marks_per_task={}", min_marks_per_task); return min_marks_per_task; } From 3883627aad0e23105ec9e1f985039cd27dcf227d Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sun, 18 Aug 2024 20:51:40 +0100 Subject: [PATCH 210/363] fix --- src/Storages/MergeTree/MergeTreeReadPool.cpp | 1 + src/Storages/MergeTree/MergeTreeReadPoolBase.cpp | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 9927d369104..23c314e48f5 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -24,6 +24,7 @@ namespace ErrorCodes { extern const int CANNOT_SCHEDULE_TASK; extern const int LOGICAL_ERROR; +extern const int BAD_ARGUMENTS; } MergeTreeReadPool::MergeTreeReadPool( diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 1cc13102794..95a10454f9e 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -13,7 +13,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; } MergeTreeReadPoolBase::MergeTreeReadPoolBase( From a258b4fb3dcdb8fa2484132aa72e9cece618d488 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 22:18:31 +0200 Subject: [PATCH 211/363] Fix race condition in MergeTree restarting thread --- .../ReplicatedMergeTreeRestartingThread.cpp | 15 ++++++++++++++- .../ReplicatedMergeTreeRestartingThread.h | 11 ++--------- src/Storages/StorageReplicatedMergeTree.cpp | 10 ++++------ 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 05fd6f6915b..d3ccda904b6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -49,6 +48,20 @@ ReplicatedMergeTreeRestartingThread::ReplicatedMergeTreeRestartingThread(Storage task = storage.getContext()->getSchedulePool().createTask(log_name, [this]{ run(); }); } +void ReplicatedMergeTreeRestartingThread::start(bool schedule) +{ + LOG_TRACE(log, "Starting the restating thread, schedule: {}", schedule); + if (schedule) + task->activateAndSchedule(); + else + task->activate(); +} + +void ReplicatedMergeTreeRestartingThread::wakeup() +{ + task->schedule(); +} + void ReplicatedMergeTreeRestartingThread::run() { if (need_stop) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 01071d80e8b..d719505ae5e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -24,16 +24,9 @@ class ReplicatedMergeTreeRestartingThread public: explicit ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_); - void start(bool schedule = true) - { - LOG_TRACE(log, "Starting restating thread, schedule: {}", schedule); - if (schedule) - task->activateAndSchedule(); - else - task->activate(); - } + void start(bool schedule); - void wakeup() { task->schedule(); } + void wakeup(); void shutdown(bool part_of_full_shutdown); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 068ff1387b3..ff8e362aa36 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5194,17 +5194,16 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) startBeingLeader(); - /// Activate replica in a separate thread if we are not calling from attach thread - restarting_thread.start(/*schedule=*/!from_attach_thread); - if (from_attach_thread) { LOG_TRACE(log, "Trying to startup table from right now"); - /// Try activating replica in current thread. + /// Try activating replica in the current thread. restarting_thread.run(); + restarting_thread.start(false); } else { + restarting_thread.start(true); /// Wait while restarting_thread finishing initialization. /// NOTE It does not mean that replication is actually started after receiving this event. /// It only means that an attempt to startup replication was made. @@ -5225,7 +5224,7 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) session_expired_callback_handler = EventNotifier::instance().subscribe(Coordination::Error::ZSESSIONEXPIRED, [this]() { LOG_TEST(log, "Received event for expired session. Waking up restarting thread"); - restarting_thread.start(); + restarting_thread.start(true); }); startBackgroundMovesIfNeeded(); @@ -5294,7 +5293,6 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() LOG_TRACE(log, "The attach thread is shutdown"); } - restarting_thread.shutdown(/* part_of_full_shutdown */true); /// Explicitly set the event, because the restarting thread will not set it again startup_event.set(); From 3e5d070b8fb47600979a1a7cc672edc3a7327004 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 22:40:42 +0200 Subject: [PATCH 212/363] Fix tests --- .../00804_test_deflate_qpl_codec_compression.reference | 2 +- .../00804_test_zstd_qat_codec_compression.reference | 2 +- .../03227_print_pretty_tuples_create_query.reference | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference index a2178f5eda7..a6e03404f2b 100644 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference +++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(DEFLATE_QPL),\n `data` String CODEC(DEFLATE_QPL),\n `ddd` Date CODEC(DEFLATE_QPL),\n `ddd32` Date32 CODEC(DEFLATE_QPL),\n `somenum` Float64 CODEC(DEFLATE_QPL),\n `somestr` FixedString(3) CODEC(DEFLATE_QPL),\n `othernum` Int64 CODEC(DEFLATE_QPL),\n `somearray` Array(UInt8) CODEC(DEFLATE_QPL),\n `somemap` Map(String, UInt32) CODEC(DEFLATE_QPL),\n `sometuple` Tuple(UInt16, UInt64) CODEC(DEFLATE_QPL)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(DEFLATE_QPL),\n `data` String CODEC(DEFLATE_QPL),\n `ddd` Date CODEC(DEFLATE_QPL),\n `ddd32` Date32 CODEC(DEFLATE_QPL),\n `somenum` Float64 CODEC(DEFLATE_QPL),\n `somestr` FixedString(3) CODEC(DEFLATE_QPL),\n `othernum` Int64 CODEC(DEFLATE_QPL),\n `somearray` Array(UInt8) CODEC(DEFLATE_QPL),\n `somemap` Map(String, UInt32) CODEC(DEFLATE_QPL),\n `sometuple` Tuple(\n UInt16,\n UInt64) CODEC(DEFLATE_QPL)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 hello 2018-12-14 2018-12-14 1.1 aaa 5 [1,2,3] {'k1':1,'k2':2} (1,2) 2 world 2018-12-15 2018-12-15 2.2 bbb 6 [4,5,6] {'k3':3,'k4':4} (3,4) 3 ! 2018-12-16 2018-12-16 3.3 ccc 7 [7,8,9] {'k5':5,'k6':6} (5,6) diff --git a/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference index 31a4360469f..ff70403ce7a 100644 --- a/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference +++ b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(ZSTD_QAT(1)),\n `data` String CODEC(ZSTD_QAT(1)),\n `ddd` Date CODEC(ZSTD_QAT(1)),\n `ddd32` Date32 CODEC(ZSTD_QAT(1)),\n `somenum` Float64 CODEC(ZSTD_QAT(1)),\n `somestr` FixedString(3) CODEC(ZSTD_QAT(1)),\n `othernum` Int64 CODEC(ZSTD_QAT(1)),\n `somearray` Array(UInt8) CODEC(ZSTD_QAT(1)),\n `somemap` Map(String, UInt32) CODEC(ZSTD_QAT(1)),\n `sometuple` Tuple(UInt16, UInt64) CODEC(ZSTD_QAT(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(ZSTD_QAT(1)),\n `data` String CODEC(ZSTD_QAT(1)),\n `ddd` Date CODEC(ZSTD_QAT(1)),\n `ddd32` Date32 CODEC(ZSTD_QAT(1)),\n `somenum` Float64 CODEC(ZSTD_QAT(1)),\n `somestr` FixedString(3) CODEC(ZSTD_QAT(1)),\n `othernum` Int64 CODEC(ZSTD_QAT(1)),\n `somearray` Array(UInt8) CODEC(ZSTD_QAT(1)),\n `somemap` Map(String, UInt32) CODEC(ZSTD_QAT(1)),\n `sometuple` Tuple(\n UInt16,\n UInt64) CODEC(ZSTD_QAT(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 hello 2018-12-14 2018-12-14 1.1 aaa 5 [1,2,3] {'k1':1,'k2':2} (1,2) 2 world 2018-12-15 2018-12-15 2.2 bbb 6 [4,5,6] {'k3':3,'k4':4} (3,4) 3 ! 2018-12-16 2018-12-16 3.3 ccc 7 [7,8,9] {'k5':5,'k6':6} (5,6) diff --git a/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference index c65dc32a224..afaaaaa6119 100644 --- a/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference +++ b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference @@ -1,6 +1,6 @@ SHOW CREATE TABLE: -CREATE TABLE test.test +CREATE TABLE default.test ( `x` Tuple( a String, @@ -13,7 +13,7 @@ CREATE TABLE test.test ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192 -CREATE TABLE test.test +CREATE TABLE default.test ( `x` Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), `y` String From 8f2c20806a7b757beecb99e52a8d5a1dcab8df07 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 22:45:13 +0200 Subject: [PATCH 213/363] Fix test `01079_bad_alters_zookeeper_long` --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- tests/clickhouse-test | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 068ff1387b3..66dac0dfe31 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6342,7 +6342,7 @@ void StorageReplicatedMergeTree::alter( "Metadata on replica is not up to date with common metadata in Zookeeper. " "It means that this replica still not applied some of previous alters." " Probably too many alters executing concurrently (highly not recommended). " - "You can retry this error"); + "You can retry the query"); /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level. if (query_context->getZooKeeperMetadataTransaction()) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index a3d7e0e922d..1203ad3730a 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -59,6 +59,7 @@ MESSAGES_TO_RETRY = [ "is already started to be removing by another replica right now", # This is from LSan, and it indicates its own internal problem: "Unable to get registers from thread", + "You can retry", ] MAX_RETRIES = 3 From 207ef87782eca80410ed5747d971be7ddce65e6c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 22:55:38 +0200 Subject: [PATCH 214/363] Fix tests --- tests/queries/0_stateless/01825_new_type_json_ghdata.sh | 3 ++- .../0_stateless/01825_type_json_ghdata_insert_select.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh index fbd7d897fb8..b2f20d825dd 100755 --- a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-s3-storage +# ^ no-s3-storage: too memory hungry CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh index 711194e71a1..17398d9a0c1 100755 --- a/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh +++ b/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-s3-storage +# ^ no-s3-storage: too memory hungry CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 5ed3c29d4a1e1992671af7168c0a1b01757d97bb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Aug 2024 01:18:12 +0200 Subject: [PATCH 215/363] Update tests --- tests/queries/0_stateless/01825_new_type_json_ghdata.sh | 2 +- .../queries/0_stateless/01825_type_json_ghdata_insert_select.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh index b2f20d825dd..33940caec29 100755 --- a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-s3-storage +# Tags: no-fasttest, no-s3-storage, long # ^ no-s3-storage: too memory hungry CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh index 17398d9a0c1..fc503b345d9 100755 --- a/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh +++ b/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-s3-storage +# Tags: no-fasttest, no-s3-storage, long # ^ no-s3-storage: too memory hungry CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) From 8eb922036e5b7caa36c1b904b43fdaee8e45acaa Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 19 Aug 2024 12:45:15 +0800 Subject: [PATCH 216/363] change as request --- .../functions/string-replace-functions.md | 16 +- src/Functions/overlay.cpp | 140 +++++------ .../0_stateless/03205_overlay.reference | 230 +++++------------- tests/queries/0_stateless/03205_overlay.sql | 132 +++++----- .../0_stateless/03206_overlay_utf8.reference | 168 ------------- .../0_stateless/03206_overlay_utf8.sql | 60 ----- 6 files changed, 203 insertions(+), 543 deletions(-) delete mode 100644 tests/queries/0_stateless/03206_overlay_utf8.reference delete mode 100644 tests/queries/0_stateless/03206_overlay_utf8.sql diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index d086c9ee64b..408a896e607 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -251,24 +251,24 @@ select printf('%%%s %s %d', 'Hello', 'World', 2024); ## overlay -Replace the string `s` with the string `replace` starting from the 1-based `position` for `length` bytes. If `length` is omitted or negative, then it defaults to the length of `replace`. +Replace the string `s` with the string `replace` starting from the 1-based `offset` for `length` bytes. If `length` is omitted or negative, then it defaults to the length of `replace`. **Syntax** ```sql -overlay(s, replace, position[, length]) +overlay(s, replace, offset[, length]) ``` **Parameters** - `s`: A string type [String](../data-types/string.md). - `replace`: A string type [String](../data-types/string.md). -- `position`: An integer type [Int](../data-types/int-uint.md). +- `offset`: An integer type [Int](../data-types/int-uint.md). - `length`: Optional. An integer type [Int](../data-types/int-uint.md). **Returned value** -- A [String](../data-types/string.md) data type value. If `position` is negative the position is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. +- A [String](../data-types/string.md) data type value. If `offset` is negative the offset is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. **Example** @@ -286,26 +286,26 @@ Result: ## overlayUTF8 -Replace the string `s` with the string `replace` starting from the 1-based `position` for `length` UTF-8 characters. If `length` is omitted or negative, then it defaults to the length of `replace`. +Replace the string `s` with the string `replace` starting from the 1-based `offset` for `length` UTF-8 characters. If `length` is omitted or negative, then it defaults to the length of `replace`. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. **Syntax** ```sql -overlayUTF8(s, replace, position[, length]) +overlayUTF8(s, replace, offset[, length]) ``` **Parameters** - `s`: A string type [String](../data-types/string.md). - `replace`: A string type [String](../data-types/string.md). -- `position`: An integer type [Int](../data-types/int-uint.md). +- `offset`: An integer type [Int](../data-types/int-uint.md). - `length`: Optional. An integer type [Int](../data-types/int-uint.md). **Returned value** -- A [String](../data-types/string.md) data type value. If `position` is negative the position is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. +- A [String](../data-types/string.md) data type value. If `offset` is negative the offset is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. **Example** diff --git a/src/Functions/overlay.cpp b/src/Functions/overlay.cpp index 094da27a71d..73ca0acbb8e 100644 --- a/src/Functions/overlay.cpp +++ b/src/Functions/overlay.cpp @@ -17,13 +17,13 @@ extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -using namespace GatherUtils; - namespace { /// If 'is_utf8' - measure offset and length in code points instead of bytes. -/// Syntax: overlay(input, replace, offset[, length]) +/// Syntax: +/// - overlay(input, replace, offset[, length]) +/// - overlayUTF8(input, replace, offset[, length]) - measure offset and length in code points instead of bytes template class FunctionOverlay : public IFunction { @@ -37,63 +37,39 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - const size_t number_of_arguments = arguments.size(); - if (number_of_arguments < 3 || number_of_arguments > 4) - throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: " - "passed {}, should be 3 or 4", - getName(), - number_of_arguments); + FunctionArgumentDescriptors mandatory_args{ + {"input", static_cast(&isString), nullptr, "String"}, + {"replace", static_cast(&isString), nullptr, "String"}, + {"offset", static_cast(&isNativeInteger), nullptr, "(U)Int8/16/32/64"}, + }; - /// first argument is string - if (!isString(arguments[0])) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of first argument of function {}, expected String", - arguments[0]->getName(), - getName()); + FunctionArgumentDescriptors optional_args{ + {"length", static_cast(&isNativeInteger), nullptr, "(U)Int8/16/32/64"}, + }; - /// second argument is string - if (!isString(arguments[1])) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of second argument of function {}, expected String", - arguments[1]->getName(), - getName()); - - if (!isNativeNumber(arguments[2])) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of third argument of function {}, expected (U)Int8|16|32|64", - arguments[2]->getName(), - getName()); - - if (number_of_arguments == 4 && !isNativeNumber(arguments[3])) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of second argument of function {}, expected (U)Int8|16|32|64", - arguments[3]->getName(), - getName()); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (input_rows_count == 0) + return ColumnString::create(); + const size_t number_of_arguments = arguments.size(); - bool three_args = number_of_arguments == 3; + bool has_three_args = number_of_arguments == 3; ColumnPtr column_offset = arguments[2].column; ColumnPtr column_length; - if (!three_args) + if (!has_three_args) column_length = arguments[3].column; const ColumnConst * column_offset_const = checkAndGetColumn(column_offset.get()); const ColumnConst * column_length_const = nullptr; - if (!three_args) + if (!has_three_args) column_length_const = checkAndGetColumn(column_length.get()); bool offset_is_const = false; @@ -126,7 +102,7 @@ public: if (column_input_const) { StringRef input = column_input_const->getDataAt(0); - res_data.reserve(input.size * input_rows_count); + res_data.reserve((input.size + 1) * input_rows_count); } else { @@ -135,8 +111,8 @@ public: const auto * column_replace_const = checkAndGetColumn(column_replace.get()); const auto * column_replace_string = checkAndGetColumn(column_replace.get()); - bool input_is_const = column_input_const != nullptr; - bool replace_is_const = column_replace_const != nullptr; + bool input_is_const = (column_input_const != nullptr); + bool replace_is_const = (column_replace_const != nullptr); #define OVERLAY_EXECUTE_CASE(THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST) \ if (input_is_const && replace_is_const) \ @@ -150,8 +126,9 @@ public: length, \ res_data, \ res_offsets); \ - else if (input_is_const) \ + else if (input_is_const && !replace_is_const) \ constantVector( \ + input_rows_count, \ column_input_const->getDataAt(0), \ column_replace_string->getChars(), \ column_replace_string->getOffsets(), \ @@ -161,8 +138,9 @@ public: length, \ res_data, \ res_offsets); \ - else if (replace_is_const) \ + else if (!input_is_const && replace_is_const) \ vectorConstant( \ + input_rows_count, \ column_input_string->getChars(), \ column_input_string->getOffsets(), \ column_replace_const->getDataAt(0), \ @@ -174,6 +152,7 @@ public: res_offsets); \ else \ vectorVector( \ + input_rows_count, \ column_input_string->getChars(), \ column_input_string->getOffsets(), \ column_replace_string->getChars(), \ @@ -185,7 +164,7 @@ public: res_data, \ res_offsets); - if (three_args) + if (has_three_args) { if (offset_is_const) { @@ -251,7 +230,7 @@ private: return bytes; } - template + template void constantConstant( size_t rows, const StringRef & input, @@ -263,7 +242,7 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!three_args && length_is_const && const_length < 0) + if (!has_three_args && length_is_const && const_length < 0) { constantConstant( rows, input, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); @@ -277,12 +256,12 @@ private: size_t replace_size = getSliceSize(reinterpret_cast(replace.data), replace.size); size_t valid_length = 0; // not negative - if constexpr (!three_args && length_is_const) + if constexpr (!has_three_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; } - else if constexpr (three_args) + else if constexpr (has_three_args) { valid_length = replace_size; } @@ -300,7 +279,7 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (!three_args && !length_is_const) + if constexpr (!has_three_args && !length_is_const) { length = column_length->getInt(i); valid_length = length >= 0 ? length : replace_size; @@ -331,10 +310,10 @@ private: } else { - const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); size_t prefix_bytes = prefix_end > input_end ? input.size : prefix_end - input_begin; - const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); size_t suffix_bytes = input_end - suffix_begin; size_t new_res_size = res_data.size() + prefix_bytes + replace.size + suffix_bytes + 1; /// +1 for zero terminator @@ -363,8 +342,9 @@ private: } } - template + template void vectorConstant( + size_t rows, const ColumnString::Chars & input_data, const ColumnString::Offsets & input_offsets, const StringRef & replace, @@ -375,27 +355,26 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!three_args && length_is_const && const_length < 0) + if (!has_three_args && length_is_const && const_length < 0) { vectorConstant( - input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); + rows, input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); return; } size_t replace_size = getSliceSize(reinterpret_cast(replace.data), replace.size); Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative - if constexpr (!three_args && length_is_const) + if constexpr (!has_three_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; } - else if constexpr (three_args) + else if constexpr (has_three_args) { valid_length = replace_size; } - size_t rows = input_offsets.size(); Int64 offset = 0; // start from 1, maybe negative size_t valid_offset = 0; // start from 0, not negative size_t res_offset = 0; @@ -415,7 +394,7 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (!three_args && !length_is_const) + if constexpr (!has_three_args && !length_is_const) { length = column_length->getInt(i); valid_length = length >= 0 ? length : replace_size; @@ -449,9 +428,9 @@ private: { const auto * input_begin = &input_data[input_offset]; const auto * input_end = &input_data[input_offset + input_bytes]; - const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); size_t prefix_bytes = prefix_end > input_end ? input_bytes : prefix_end - input_begin; - const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); size_t suffix_bytes = input_end - suffix_begin; size_t new_res_size = res_data.size() + prefix_bytes + replace.size + suffix_bytes + 1; /// +1 for zero terminator @@ -480,8 +459,9 @@ private: } } - template + template void constantVector( + size_t rows, const StringRef & input, const ColumnString::Chars & replace_data, const ColumnString::Offsets & replace_offsets, @@ -492,10 +472,10 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!three_args && length_is_const && const_length < 0) + if (!has_three_args && length_is_const && const_length < 0) { constantVector( - input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets); + rows, input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets); return; } @@ -506,13 +486,12 @@ private: Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative - if constexpr (!three_args && length_is_const) + if constexpr (!has_three_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; } - size_t rows = replace_offsets.size(); const auto * input_begin = reinterpret_cast(input.data); const auto * input_end = reinterpret_cast(input.data + input.size); Int64 offset = 0; // start from 1, maybe negative @@ -529,7 +508,7 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (three_args) + if constexpr (has_three_args) { valid_length = replace_size; } @@ -564,9 +543,9 @@ private: } else { - const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); size_t prefix_bytes = prefix_end > input_end ? input.size : prefix_end - input_begin; - const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); size_t suffix_bytes = input_end - suffix_begin; size_t new_res_size = res_data.size() + prefix_bytes + replace_bytes + suffix_bytes + 1; /// +1 for zero terminator res_data.resize(new_res_size); @@ -594,8 +573,9 @@ private: } } - template + template void vectorVector( + size_t rows, const ColumnString::Chars & input_data, const ColumnString::Offsets & input_offsets, const ColumnString::Chars & replace_data, @@ -607,9 +587,10 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!three_args && length_is_const && const_length < 0) + if (!has_three_args && length_is_const && const_length < 0) { vectorVector( + rows, input_data, input_offsets, replace_data, @@ -625,13 +606,12 @@ private: Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative - if constexpr (!three_args && length_is_const) + if constexpr (!has_three_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; } - size_t rows = input_offsets.size(); Int64 offset = 0; // start from 1, maybe negative size_t valid_offset = 0; // start from 0, not negative size_t res_offset = 0; @@ -655,7 +635,7 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (three_args) + if constexpr (has_three_args) { valid_length = replace_size; } @@ -693,9 +673,9 @@ private: { const auto * input_begin = &input_data[input_offset]; const auto * input_end = &input_data[input_offset + input_bytes]; - const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); size_t prefix_bytes = prefix_end > input_end ? input_bytes : prefix_end - input_begin; - const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); size_t suffix_bytes = input_end - suffix_begin; size_t new_res_size = res_data.size() + prefix_bytes + replace_bytes + suffix_bytes + 1; /// +1 for zero terminator res_data.resize(new_res_size); diff --git a/tests/queries/0_stateless/03205_overlay.reference b/tests/queries/0_stateless/03205_overlay.reference index 9e79db2e131..383a26986d6 100644 --- a/tests/queries/0_stateless/03205_overlay.reference +++ b/tests/queries/0_stateless/03205_overlay.reference @@ -1,168 +1,62 @@ -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL +Negative test of overlay +Positive test 1 with various combinations of const/non-const columns +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Positive test 2 with various combinations of const/non-const columns +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Positive test 3 with various combinations of const/non-const columns +Spark CORE Spark CORECH +Spark CORE Spark CORECH +Spark CORE Spark CORECH +Spark CORE Spark CORECH +Spark CORE Spark CORECH +Spark CORE Spark CORECH +Spark CORE Spark CORECH +Spark CORE Spark CORECH +Positive test 4 with various combinations of const/non-const columns +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Positive test 5 with various combinations of const/non-const columns +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH diff --git a/tests/queries/0_stateless/03205_overlay.sql b/tests/queries/0_stateless/03205_overlay.sql index b131312c934..4fd0791521d 100644 --- a/tests/queries/0_stateless/03205_overlay.sql +++ b/tests/queries/0_stateless/03205_overlay.sql @@ -1,60 +1,74 @@ -SELECT overlay('Spark SQL', 'ANSI ', 7, 0) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0) from numbers(3); -SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0) from numbers(3); -SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)) from numbers(3); -SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), 0) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, materialize(0)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), materialize(0)) from numbers(3); -SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); +SELECT 'Negative test of overlay'; +SELECT overlay('hello', 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT overlay('hello', 'world'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT overlay('hello', 'world', 2, 3, 'extra'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT overlay(123, 'world', 2, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT overlay('hello', 456, 2, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT overlay('hello', 'world', 'two', 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT overlay('hello', 'world', 2, 'three'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT overlay('Spark SQL', '_', 6) from numbers(3); -SELECT overlay(materialize('Spark SQL'), '_', 6) from numbers(3); -SELECT overlay('Spark SQL', materialize('_'), 6) from numbers(3); -SELECT overlay('Spark SQL', '_', materialize(6)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('_'), 6) from numbers(3); -SELECT overlay(materialize('Spark SQL'), '_', materialize(6)) from numbers(3); -SELECT overlay('Spark SQL', materialize('_'), materialize(6)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('_'), materialize(6)) from numbers(3); - -SELECT overlay('Spark SQL', 'CORE', 7) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'CORE', 7) from numbers(3); -SELECT overlay('Spark SQL', materialize('CORE'), 7) from numbers(3); -SELECT overlay('Spark SQL', 'CORE', materialize(7)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('CORE'), 7) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'CORE', materialize(7)) from numbers(3); -SELECT overlay('Spark SQL', materialize('CORE'), materialize(7)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('CORE'), materialize(7)) from numbers(3); - -SELECT overlay('Spark SQL', 'ANSI ', 7, 0) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0) from numbers(3); -SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0) from numbers(3); -SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)) from numbers(3); -SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); - -SELECT overlay('Spark SQL', 'tructured', 2, 4) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'tructured', 2, 4) from numbers(3); -SELECT overlay('Spark SQL', materialize('tructured'), 2, 4) from numbers(3); -SELECT overlay('Spark SQL', 'tructured', materialize(2), 4) from numbers(3); -SELECT overlay('Spark SQL', 'tructured', 2, materialize(4)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('tructured'), 2, 4) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'tructured', materialize(2), 4) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'tructured', 2, materialize(4)) from numbers(3); -SELECT overlay('Spark SQL', materialize('tructured'), materialize(2), 4) from numbers(3); -SELECT overlay('Spark SQL', materialize('tructured'), 2, materialize(4)) from numbers(3); -SELECT overlay('Spark SQL', 'tructured', materialize(2), materialize(4)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('tructured'), materialize(2), materialize(4)) from numbers(3); +SELECT 'Positive test 1 with various combinations of const/non-const columns'; +SELECT overlay('Spark SQL', 'ANSI ', 7, 0), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0); +SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)); +SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), 0), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), 0); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, materialize(0)); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), materialize(0)); +SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), materialize(0)); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)); + +SELECT 'Positive test 2 with various combinations of const/non-const columns'; +SELECT overlay('Spark SQL', '_', 6), overlayUTF8('Spark SQL和CH', '_', 6); +SELECT overlay(materialize('Spark SQL'), '_', 6), overlayUTF8(materialize('Spark SQL和CH'), '_', 6); +SELECT overlay('Spark SQL', materialize('_'), 6), overlayUTF8('Spark SQL和CH', materialize('_'), 6); +SELECT overlay('Spark SQL', '_', materialize(6)), overlayUTF8('Spark SQL和CH', '_', materialize(6)); +SELECT overlay(materialize('Spark SQL'), materialize('_'), 6), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), 6); +SELECT overlay(materialize('Spark SQL'), '_', materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), '_', materialize(6)); +SELECT overlay('Spark SQL', materialize('_'), materialize(6)), overlayUTF8('Spark SQL和CH', materialize('_'), materialize(6)); +SELECT overlay(materialize('Spark SQL'), materialize('_'), materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), materialize(6)); + +SELECT 'Positive test 3 with various combinations of const/non-const columns'; +SELECT overlay('Spark SQL', 'CORE', 7), overlayUTF8('Spark SQL和CH', 'CORE', 7); +SELECT overlay(materialize('Spark SQL'), 'CORE', 7), overlayUTF8(materialize('Spark SQL和CH'), 'CORE', 7); +SELECT overlay('Spark SQL', materialize('CORE'), 7), overlayUTF8('Spark SQL和CH', materialize('CORE'), 7); +SELECT overlay('Spark SQL', 'CORE', materialize(7)), overlayUTF8('Spark SQL和CH', 'CORE', materialize(7)); +SELECT overlay(materialize('Spark SQL'), materialize('CORE'), 7), overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), 7); +SELECT overlay(materialize('Spark SQL'), 'CORE', materialize(7)), overlayUTF8(materialize('Spark SQL和CH'), 'CORE', materialize(7)); +SELECT overlay('Spark SQL', materialize('CORE'), materialize(7)), overlayUTF8('Spark SQL和CH', materialize('CORE'), materialize(7)); +SELECT overlay(materialize('Spark SQL'), materialize('CORE'), materialize(7)), overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), materialize(7)); + +SELECT 'Positive test 4 with various combinations of const/non-const columns'; +SELECT overlay('Spark SQL', 'ANSI ', 7, 0), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0); +SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)); +SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)); + +SELECT 'Positive test 5 with various combinations of const/non-const columns'; +SELECT overlay('Spark SQL', 'tructured', 2, 4), overlayUTF8('Spark SQL和CH', 'tructured', 2, 4); +SELECT overlay(materialize('Spark SQL'), 'tructured', 2, 4), overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, 4); +SELECT overlay('Spark SQL', materialize('tructured'), 2, 4), overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, 4); +SELECT overlay('Spark SQL', 'tructured', materialize(2), 4), overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), 4); +SELECT overlay('Spark SQL', 'tructured', 2, materialize(4)), overlayUTF8('Spark SQL和CH', 'tructured', 2, materialize(4)); +SELECT overlay(materialize('Spark SQL'), materialize('tructured'), 2, 4), overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), 2, 4); +SELECT overlay(materialize('Spark SQL'), 'tructured', materialize(2), 4), overlayUTF8(materialize('Spark SQL和CH'), 'tructured', materialize(2), 4); +SELECT overlay(materialize('Spark SQL'), 'tructured', 2, materialize(4)), overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, materialize(4)); +SELECT overlay('Spark SQL', materialize('tructured'), materialize(2), 4), overlayUTF8('Spark SQL和CH', materialize('tructured'), materialize(2), 4); +SELECT overlay('Spark SQL', materialize('tructured'), 2, materialize(4)), overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, materialize(4)); +SELECT overlay('Spark SQL', 'tructured', materialize(2), materialize(4)), overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), materialize(4)); +SELECT overlay(materialize('Spark SQL'), materialize('tructured'), materialize(2), materialize(4)), overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), materialize(2), materialize(4)); diff --git a/tests/queries/0_stateless/03206_overlay_utf8.reference b/tests/queries/0_stateless/03206_overlay_utf8.reference deleted file mode 100644 index 19878c97184..00000000000 --- a/tests/queries/0_stateless/03206_overlay_utf8.reference +++ /dev/null @@ -1,168 +0,0 @@ -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH diff --git a/tests/queries/0_stateless/03206_overlay_utf8.sql b/tests/queries/0_stateless/03206_overlay_utf8.sql deleted file mode 100644 index 00b756c8b5b..00000000000 --- a/tests/queries/0_stateless/03206_overlay_utf8.sql +++ /dev/null @@ -1,60 +0,0 @@ -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), 0) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, materialize(0)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), materialize(0)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); - -SELECT overlayUTF8('Spark SQL和CH', '_', 6) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), '_', 6) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('_'), 6) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', '_', materialize(6)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), 6) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), '_', materialize(6)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('_'), materialize(6)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), materialize(6)) from numbers(3); - -SELECT overlayUTF8('Spark SQL和CH', 'CORE', 7) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'CORE', 7) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('CORE'), 7) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'CORE', materialize(7)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), 7) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'CORE', materialize(7)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('CORE'), materialize(7)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), materialize(7)) from numbers(3); - -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); - -SELECT overlayUTF8('Spark SQL和CH', 'tructured', 2, 4) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, 4) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, 4) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), 4) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'tructured', 2, materialize(4)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), 2, 4) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', materialize(2), 4) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, materialize(4)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), materialize(2), 4) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, materialize(4)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), materialize(4)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), materialize(2), materialize(4)) from numbers(3); From 2a8c9b8518175d81632cf7ca48c10522b737e6b4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Aug 2024 07:59:14 +0200 Subject: [PATCH 217/363] Fix tests --- .../0_stateless/01825_new_type_json_ghdata_insert_select.sh | 3 ++- tests/queries/0_stateless/01825_type_json_ghdata.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh index 2afec5ba7fe..568ba2bd185 100755 --- a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-fasttest, long +# Tags: no-fasttest, no-s3-storage, long +# ^ no-s3-storage: it is memory-hungry CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01825_type_json_ghdata.sh b/tests/queries/0_stateless/01825_type_json_ghdata.sh index 2686e2c8eb1..7e952de6c08 100755 --- a/tests/queries/0_stateless/01825_type_json_ghdata.sh +++ b/tests/queries/0_stateless/01825_type_json_ghdata.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-s3-storage, long +# ^ no-s3-storage: it is memory-hungry CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From e623ad041f4937b0e7ed22f3159acfee6c0147b3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 25 Jul 2024 16:44:17 +0200 Subject: [PATCH 218/363] Make C-z ignorance configurable (ignore_shell_suspend) in clickhouse-client C-z is extermelly useful for some users (like myself), so provide a way to configure it in client and avoid it's ignorance in clickhouse-disks (I hope it is OK since it is not that known utility and it does not have it's own configuration, while cli option is useless, one should remeber about it). Honestly I've never seen any interactive client that forbids C-z, so ignoring it my default looks strange to me. Signed-off-by: Azat Khuzhin --- programs/client/clickhouse-client.xml | 3 +++ programs/disks/DisksApp.cpp | 1 + programs/disks/DisksApp.h | 2 +- programs/disks/DisksClient.cpp | 1 - programs/disks/DisksClient.h | 3 +-- programs/keeper-client/KeeperClient.cpp | 1 + src/Client/ClientBase.cpp | 1 + src/Client/ReplxxLineReader.cpp | 4 +++- src/Client/ReplxxLineReader.h | 1 + 9 files changed, 12 insertions(+), 5 deletions(-) diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml index 9ce7d1cb223..6eb8976a6ef 100644 --- a/programs/client/clickhouse-client.xml +++ b/programs/client/clickhouse-client.xml @@ -53,6 +53,9 @@ --> + + + If --- src/Functions/LowerUpperUTF8Impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index 8469bedde0c..36ee1723269 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -57,7 +57,7 @@ struct LowerUpperUTF8Impl input.toUTF8String(output); /// For valid UTF-8 input strings, ICU sometimes produces output with an extra '\0 at the end. Only the data before that - /// '\0' is valid. It the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this + /// '\0' is valid. If the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this /// case, the behavior is also reasonable. size_t valid_size = output.size(); if (!output.empty() && output.back() == '\0') From 2f6ad1271cfbd9aa62ad2365e70314aba4da21b9 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 00:27:02 +0200 Subject: [PATCH 316/363] fix tests + exception --- src/Storages/VirtualColumnUtils.cpp | 2 +- .../test_storage_azure_blob_storage/test.py | 10 +-- tests/integration/test_storage_hdfs/test.py | 9 +-- .../03203_hive_style_partitioning.reference | 2 - .../03203_hive_style_partitioning.sh | 61 +++---------------- 5 files changed, 15 insertions(+), 69 deletions(-) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index ca82a1ce67a..f0d276e4e56 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -162,7 +162,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & sto return; if (storage_columns.size() == 1) - throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot implement partition by all columns in a file"); + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use hive partitioning for file {}: it contains only partition columns. Disable use_hive_partitioning setting to read this file", path); auto local_type = storage_columns.get(name).type; storage_columns.remove(name); desc.addEphemeral(name, local_type, ""); diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 637dbd38262..a3172329a99 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -1518,14 +1518,14 @@ def test_hive_partitioning_with_one_parameter(cluster): ) query = ( - f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, " + f"SELECT column2, _file, _path, column1 FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " f"blob_path='{path}', format='CSV', structure='{table_format}')" ) assert azure_query( node, query, settings={"use_hive_partitioning": 1} ).splitlines() == [ - "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}".format( + "Gordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format( bucket="cont", max_path=path ) ] @@ -1560,7 +1560,7 @@ def test_hive_partitioning_with_all_parameters(cluster): f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " f"blob_path='{path}', format='CSV', structure='{table_format}');" ) - pattern = r"DB::Exception: Cannot implement partition by all columns in a file" + pattern = r"DB::Exception: Cannot use hive partitioning for file" with pytest.raises(Exception, match=pattern): azure_query(node, query, settings={"use_hive_partitioning": 1}) @@ -1572,7 +1572,7 @@ def test_hive_partitioning_without_setting(cluster): table_format = "column1 String, column2 String" values_1 = f"('Elizabeth', 'Gordon')" values_2 = f"('Emilia', 'Gregor')" - path = "a/column1=Elizabeth/column2=Gordon/sample.csv" + path = "a/column1=Elizabeth/column2=Gordon/column3=Gordon/sample.csv" azure_query( node, @@ -1582,7 +1582,7 @@ def test_hive_partitioning_without_setting(cluster): ) query = ( - f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, " + f"SELECT column1, column2, _file, _path, column3 FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " f"blob_path='{path}', format='CSV', structure='{table_format}');" ) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index ad2e7084791..ea8c4efa745 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -1268,11 +1268,6 @@ def test_hive_partitioning_with_one_parameter(started_cluster): ) assert r == f"Elizabeth\n" - r = node1.query( - "SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')", - settings={"use_hive_partitioning": 1}, - ) - assert r == f"Gordon\n" def test_hive_partitioning_with_all_parameters(started_cluster): @@ -1285,11 +1280,11 @@ def test_hive_partitioning_with_all_parameters(started_cluster): == f"Elizabeth\tGordon\n" ) - pattern = r"DB::Exception: Cannot implement partition by all columns in a file" + pattern = r"DB::Exception: Cannot use hive partitioning for file" with pytest.raises(QueryRuntimeException, match=pattern): node1.query( - f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", + f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", settings={"use_hive_partitioning": 1}, ) diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference index b5eaef7f51e..af52dcd9b88 100644 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference @@ -35,8 +35,6 @@ Cross Elizabeth Array(Int64) LowCardinality(Float64) 101 2070 -4081 -2070 2070 b 1 diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh index 41b215578f0..4e165446c34 100755 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh @@ -11,22 +11,10 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE FILE HIVE PARTITIONING'" $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 1; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; - -SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; - -SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; - -SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; - -SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = _column0; +SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth'; SELECT number, date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1; SELECT array, float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1; @@ -37,7 +25,6 @@ SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01 $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 1; -SELECT _identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2; SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2; SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1; """ @@ -61,21 +48,7 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE URL PARTITIONING'" $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 1; -SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; - -SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0; - -SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; -SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; - -SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; - -SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; -SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; - -SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +SELECT *, column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; SELECT *, non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;""" @@ -92,24 +65,10 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3 PARTITIONING'" $CLICKHOUSE_CLIENT -n -q """ set use_hive_partitioning = 1; -SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; - -SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0; - -SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; -SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; - -SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; - -SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; -SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1; - -SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; -SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = _column0; +SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth'; """ $CLICKHOUSE_CLIENT -n -q """ @@ -123,13 +82,7 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3CLUSTER PARTITIONING'" $CLICKHOUSE_CLIENT -n -q """ set use_hive_partitioning = 1; -SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; +SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; -SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0; - -SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; -SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1; - -SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; -SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1; +SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth'; """ From 8a89cd31a1e7770479af6eaf1b4211ef4ece1795 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 22 Aug 2024 00:29:32 +0200 Subject: [PATCH 317/363] Fix Upgrade Check: move some settings to 24.9 section --- src/Core/SettingsChangesHistory.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index fb59577b0f0..5e831c6301c 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -72,11 +72,13 @@ static std::initializer_list Date: Thu, 22 Aug 2024 00:48:29 +0200 Subject: [PATCH 318/363] fix black --- tests/integration/test_storage_hdfs/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index ea8c4efa745..a75c13b9ea6 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -1269,7 +1269,6 @@ def test_hive_partitioning_with_one_parameter(started_cluster): assert r == f"Elizabeth\n" - def test_hive_partitioning_with_all_parameters(started_cluster): hdfs_api = started_cluster.hdfs_api hdfs_api.write_data( From 92e153585ded4f15e1292613584ff35a55c735f3 Mon Sep 17 00:00:00 2001 From: Tanya Bragin Date: Wed, 21 Aug 2024 19:19:07 -0700 Subject: [PATCH 319/363] Update README.md Add latest meetups from Alexey tour. --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 17b6dcd2ac1..5e66b9da73e 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,13 @@ Every month we get together with the community (users, contributors, customers, Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. +The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey Milovidov: + * [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25 +* [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5 +* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/clickhouse-nc-meetup-group/events/302557230) - September 9 +* [New York Meetup (Ramp)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10 +* [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12 ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" From 9d0b3e3937cca32bc8bc922876fb8e6ac53a3de9 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 22 Aug 2024 11:32:59 +0800 Subject: [PATCH 320/363] change as request --- .../functions/string-replace-functions.md | 145 +++++++++--------- src/Functions/overlay.cpp | 8 +- ...new_functions_must_be_documented.reference | 2 - 3 files changed, 76 insertions(+), 79 deletions(-) diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 55e97688b18..0cc6b0b27d5 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -8,6 +8,78 @@ sidebar_label: Replacing in Strings [General strings functions](string-functions.md) and [functions for searching in strings](string-search-functions.md) are described separately. +## overlay + +Replace part of the string `input` with another string `replace`, starting at the 1-based index `offset`. + +**Syntax** + +```sql +overlay(s, replace, offset[, length]) +``` + +**Parameters** + +- `input`: A string type [String](../data-types/string.md). +- `replace`: A string type [String](../data-types/string.md). +- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the `input` string. +- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within input to be replaced. If `length` is not specified, the number of bytes removed from `input` equals the length of `replace`; otherwise `length` bytes are removed. + +**Returned value** + +- A [String](../data-types/string.md) data type value. + +**Example** + +```sql +SELECT overlay('ClickHouse SQL', 'CORE', 12) AS res; +``` + +Result: + +```text +┌─res─────────────┐ +│ ClickHouse CORE │ +└─────────────────┘ +``` + +## overlayUTF8 + +Replace part of the string `input` with another string `replace`, starting at the 1-based index `offset`. + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Syntax** + +```sql +overlayUTF8(s, replace, offset[, length]) +``` + +**Parameters** + +- `s`: A string type [String](../data-types/string.md). +- `replace`: A string type [String](../data-types/string.md). +- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the `input` string. +- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within input to be replaced. If `length` is not specified, the number of characters removed from `input` equals the length of `replace`; otherwise `length` characters are removed. + +**Returned value** + +- A [String](../data-types/string.md) data type value. + +**Example** + +```sql +SELECT overlayUTF8('ClickHouse是一款OLAP数据库', '开源', 12, 2) AS res; +``` + +Result: + +```text +┌─res────────────────────────┐ +│ ClickHouse是开源OLAP数据库 │ +└────────────────────────────┘ +``` + ## replaceOne Replaces the first occurrence of the substring `pattern` in `haystack` by the `replacement` string. @@ -248,76 +320,3 @@ select printf('%%%s %s %d', 'Hello', 'World', 2024); │ %Hello World 2024 │ └──────────────────────────────────────────────┘ ``` - -## overlay - -Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed. - - -**Syntax** - -```sql -overlay(s, replace, offset[, length]) -``` - -**Parameters** - -- `s`: A string type [String](../data-types/string.md). -- `replace`: A string type [String](../data-types/string.md). -- `offset`: An integer type [Int](../data-types/int-uint.md). -- `length`: Optional. An integer type [Int](../data-types/int-uint.md). - -**Returned value** - -- A [String](../data-types/string.md) data type value. If `offset` is negative the offset is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. - -**Example** - -```sql -SELECT overlay('Spark SQL', 'CORE', 7) AS res; -``` - -Result: - -```text - ┌─res────────┐ - │ Spark CORE │ - └────────────┘ -``` - -## overlayUTF8 - -Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of characters removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of characters is removed. - -Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. - -**Syntax** - -```sql -overlayUTF8(s, replace, offset[, length]) -``` - -**Parameters** - -- `s`: A string type [String](../data-types/string.md). -- `replace`: A string type [String](../data-types/string.md). -- `offset`: An integer type [Int](../data-types/int-uint.md). -- `length`: Optional. An integer type [Int](../data-types/int-uint.md). - -**Returned value** - -- A [String](../data-types/string.md) data type value. If `offset` is negative the offset is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. - -**Example** - -```sql -SELECT overlayUTF8('ClickHouse是一款OLAP数据库', '开源', 12, 2) AS res; -``` - -Result: - -```text -┌─res────────────────────────┐ -│ ClickHouse是开源OLAP数据库 │ -└────────────────────────────┘ -``` diff --git a/src/Functions/overlay.cpp b/src/Functions/overlay.cpp index 20988c775a5..497ebb9c9cd 100644 --- a/src/Functions/overlay.cpp +++ b/src/Functions/overlay.cpp @@ -201,14 +201,14 @@ private: { if (offset > 0) { - if (static_cast(offset) > input_size + 1) [[unlikely]] + if (static_cast(offset) > input_size + 1) return input_size; else return offset - 1; } else { - if (input_size < -static_cast(offset)) [[unlikely]] + if (input_size < -static_cast(offset)) return 0; else return input_size + offset; @@ -704,14 +704,14 @@ REGISTER_FUNCTION(Overlay) { factory.registerFunction>( {.description = R"( -Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed. +Replace a part of a string `input` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `input` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed. )", .categories{"String"}}, FunctionFactory::Case::Insensitive); factory.registerFunction>( {.description = R"( -Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed. +Replace a part of a string `input` with another string `replace`, starting at 1-based index `offset`. By default, the number of characters removed from `input` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of characters is removed. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. )", diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 6495b6619f9..c39f1fb1ce9 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -512,8 +512,6 @@ nullIf nullIn nullInIgnoreSet or -overlay -overlayUTF8 parseDateTime parseDateTime32BestEffort parseDateTime32BestEffortOrNull From 3ff9522b69ec7e51119f445152ffb9678a0f124f Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 22 Aug 2024 12:49:10 +0800 Subject: [PATCH 321/363] change as request --- src/Functions/overlay.cpp | 165 +++++++++--------- .../0_stateless/03205_overlay.reference | 58 +++--- tests/queries/0_stateless/03205_overlay.sql | 11 +- 3 files changed, 115 insertions(+), 119 deletions(-) diff --git a/src/Functions/overlay.cpp b/src/Functions/overlay.cpp index 497ebb9c9cd..df8b825eabe 100644 --- a/src/Functions/overlay.cpp +++ b/src/Functions/overlay.cpp @@ -53,139 +53,132 @@ public: if (input_rows_count == 0) return ColumnString::create(); - const size_t number_of_arguments = arguments.size(); - bool has_three_args = number_of_arguments == 3; + bool has_four_args = (arguments.size() == 4); - ColumnPtr column_offset = arguments[2].column; - ColumnPtr column_length; - if (!has_three_args) - column_length = arguments[3].column; + ColumnPtr col_input = arguments[0].column; + const auto * col_input_const = checkAndGetColumn(col_input.get()); + const auto * col_input_string = checkAndGetColumn(col_input.get()); + bool input_is_const = (col_input_const != nullptr); - const ColumnConst * column_offset_const = checkAndGetColumn(column_offset.get()); - const ColumnConst * column_length_const = nullptr; - if (!has_three_args) - column_length_const = checkAndGetColumn(column_length.get()); + ColumnPtr col_replace = arguments[1].column; + const auto * col_replace_const = checkAndGetColumn(col_replace.get()); + const auto * col_replace_string = checkAndGetColumn(col_replace.get()); + bool replace_is_const = (col_replace_const != nullptr); + ColumnPtr col_offset = arguments[2].column; + const ColumnConst * col_offset_const = checkAndGetColumn(col_offset.get()); bool offset_is_const = false; - bool length_is_const = false; Int64 offset = -1; - Int64 length = -1; - if (column_offset_const) + if (col_offset_const) { - offset = column_offset_const->getInt(0); + offset = col_offset_const->getInt(0); offset_is_const = true; } - if (column_length_const) + ColumnPtr col_length = has_four_args ? arguments[3].column : nullptr; + const ColumnConst * col_length_const = has_four_args ? checkAndGetColumn(col_length.get()) : nullptr; + bool length_is_const = false; + Int64 length = -1; + if (col_length_const) { - length = column_length_const->getInt(0); + length = col_length_const->getInt(0); length_is_const = true; } - auto res_col = ColumnString::create(); auto & res_data = res_col->getChars(); auto & res_offsets = res_col->getOffsets(); + res_offsets.resize_exact(input_rows_count); - - ColumnPtr column_input = arguments[0].column; - ColumnPtr column_replace = arguments[1].column; - - const auto * column_input_const = checkAndGetColumn(column_input.get()); - const auto * column_input_string = checkAndGetColumn(column_input.get()); - if (column_input_const) + if (col_input_const) { - StringRef input = column_input_const->getDataAt(0); + StringRef input = col_input_const->getDataAt(0); res_data.reserve((input.size + 1) * input_rows_count); } else { - res_data.reserve(column_input_string->getChars().size()); + res_data.reserve(col_input_string->getChars().size()); } - const auto * column_replace_const = checkAndGetColumn(column_replace.get()); - const auto * column_replace_string = checkAndGetColumn(column_replace.get()); - bool input_is_const = (column_input_const != nullptr); - bool replace_is_const = (column_replace_const != nullptr); -#define OVERLAY_EXECUTE_CASE(THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST) \ +#define OVERLAY_EXECUTE_CASE(HAS_FOUR_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST) \ if (input_is_const && replace_is_const) \ - constantConstant( \ + constantConstant( \ input_rows_count, \ - column_input_const->getDataAt(0), \ - column_replace_const->getDataAt(0), \ - column_offset, \ - column_length, \ + col_input_const->getDataAt(0), \ + col_replace_const->getDataAt(0), \ + col_offset, \ + col_length, \ offset, \ length, \ res_data, \ res_offsets); \ else if (input_is_const && !replace_is_const) \ - constantVector( \ + constantVector( \ input_rows_count, \ - column_input_const->getDataAt(0), \ - column_replace_string->getChars(), \ - column_replace_string->getOffsets(), \ - column_offset, \ - column_length, \ + col_input_const->getDataAt(0), \ + col_replace_string->getChars(), \ + col_replace_string->getOffsets(), \ + col_offset, \ + col_length, \ offset, \ length, \ res_data, \ res_offsets); \ else if (!input_is_const && replace_is_const) \ - vectorConstant( \ + vectorConstant( \ input_rows_count, \ - column_input_string->getChars(), \ - column_input_string->getOffsets(), \ - column_replace_const->getDataAt(0), \ - column_offset, \ - column_length, \ + col_input_string->getChars(), \ + col_input_string->getOffsets(), \ + col_replace_const->getDataAt(0), \ + col_offset, \ + col_length, \ offset, \ length, \ res_data, \ res_offsets); \ else \ - vectorVector( \ + vectorVector( \ input_rows_count, \ - column_input_string->getChars(), \ - column_input_string->getOffsets(), \ - column_replace_string->getChars(), \ - column_replace_string->getOffsets(), \ - column_offset, \ - column_length, \ + col_input_string->getChars(), \ + col_input_string->getOffsets(), \ + col_replace_string->getChars(), \ + col_replace_string->getOffsets(), \ + col_offset, \ + col_length, \ offset, \ length, \ res_data, \ res_offsets); - if (has_three_args) + if (!has_four_args) { if (offset_is_const) { - OVERLAY_EXECUTE_CASE(true, true, false) + OVERLAY_EXECUTE_CASE(false, true, false) } else { - OVERLAY_EXECUTE_CASE(true, false, false) + OVERLAY_EXECUTE_CASE(false, false, false) } } else { if (offset_is_const && length_is_const) { - OVERLAY_EXECUTE_CASE(false, true, true) + OVERLAY_EXECUTE_CASE(true, true, true) } else if (offset_is_const && !length_is_const) { - OVERLAY_EXECUTE_CASE(false, true, false) + OVERLAY_EXECUTE_CASE(true, true, false) } else if (!offset_is_const && length_is_const) { - OVERLAY_EXECUTE_CASE(false, false, true) + OVERLAY_EXECUTE_CASE(true, false, true) } else { - OVERLAY_EXECUTE_CASE(false, false, false) + OVERLAY_EXECUTE_CASE(true, false, false) } } #undef OVERLAY_EXECUTE_CASE @@ -224,7 +217,7 @@ private: return bytes; } - template + template void constantConstant( size_t rows, const StringRef & input, @@ -236,7 +229,7 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!has_three_args && length_is_const && const_length < 0) + if (has_four_args && length_is_const && const_length < 0) { constantConstant( rows, input, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); @@ -250,12 +243,12 @@ private: size_t replace_size = getSliceSize(reinterpret_cast(replace.data), replace.size); size_t valid_length = 0; // not negative - if constexpr (!has_three_args && length_is_const) + if constexpr (has_four_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; } - else if constexpr (has_three_args) + else if constexpr (!has_four_args) { valid_length = replace_size; } @@ -273,14 +266,14 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (!has_three_args && !length_is_const) + if constexpr (has_four_args && !length_is_const) { length = column_length->getInt(i); valid_length = length >= 0 ? length : replace_size; } size_t prefix_size = valid_offset; - size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length); if constexpr (!is_utf8) { @@ -332,11 +325,12 @@ private: /// add zero terminator res_data[res_offset] = 0; ++res_offset; + res_offsets[i] = res_offset; } } - template + template void vectorConstant( size_t rows, const ColumnString::Chars & input_data, @@ -349,7 +343,7 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!has_three_args && length_is_const && const_length < 0) + if (has_four_args && length_is_const && const_length < 0) { vectorConstant( rows, input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); @@ -359,12 +353,12 @@ private: size_t replace_size = getSliceSize(reinterpret_cast(replace.data), replace.size); Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative - if constexpr (!has_three_args && length_is_const) + if constexpr (has_four_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; } - else if constexpr (has_three_args) + else if constexpr (!has_four_args) { valid_length = replace_size; } @@ -388,14 +382,14 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (!has_three_args && !length_is_const) + if constexpr (has_four_args && !length_is_const) { length = column_length->getInt(i); valid_length = length >= 0 ? length : replace_size; } size_t prefix_size = valid_offset; - size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length); if constexpr (!is_utf8) { @@ -449,11 +443,12 @@ private: /// add zero terminator res_data[res_offset] = 0; ++res_offset; + res_offsets[i] = res_offset; } } - template + template void constantVector( size_t rows, const StringRef & input, @@ -466,7 +461,7 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!has_three_args && length_is_const && const_length < 0) + if (has_four_args && length_is_const && const_length < 0) { constantVector( rows, input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets); @@ -480,7 +475,7 @@ private: Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative - if constexpr (!has_three_args && length_is_const) + if constexpr (has_four_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; @@ -502,7 +497,7 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (has_three_args) + if constexpr (!has_four_args) { valid_length = replace_size; } @@ -513,7 +508,7 @@ private: } size_t prefix_size = valid_offset; - size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length); if constexpr (!is_utf8) { @@ -563,11 +558,12 @@ private: /// add zero terminator res_data[res_offset] = 0; ++res_offset; + res_offsets[i] = res_offset; } } - template + template void vectorVector( size_t rows, const ColumnString::Chars & input_data, @@ -581,7 +577,7 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!has_three_args && length_is_const && const_length < 0) + if (has_four_args && length_is_const && const_length < 0) { vectorVector( rows, @@ -600,7 +596,7 @@ private: Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative - if constexpr (!has_three_args && length_is_const) + if constexpr (has_four_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; @@ -629,7 +625,7 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (has_three_args) + if constexpr (!has_four_args) { valid_length = replace_size; } @@ -640,7 +636,7 @@ private: } size_t prefix_size = valid_offset; - size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length); if constexpr (!is_utf8) { @@ -693,6 +689,7 @@ private: /// add zero terminator res_data[res_offset] = 0; ++res_offset; + res_offsets[i] = res_offset; } } diff --git a/tests/queries/0_stateless/03205_overlay.reference b/tests/queries/0_stateless/03205_overlay.reference index 67a699944e0..4be3baadaea 100644 --- a/tests/queries/0_stateless/03205_overlay.reference +++ b/tests/queries/0_stateless/03205_overlay.reference @@ -25,34 +25,34 @@ Spark ANSI SQL Spark ANSI SQL和CH Spark ANSI SQL Spark ANSI SQL和CH Spark ANSI SQL Spark ANSI SQL和CH Spark ANSI SQL Spark ANSI SQL和CH -Test with different offset values --12 _park SQL _park SQL和CH --11 _park SQL S_ark SQL和CH --10 _park SQL Sp_rk SQL和CH --9 _park SQL Spa_k SQL和CH --8 S_ark SQL Spar_ SQL和CH --7 Sp_rk SQL Spark_SQL和CH --6 Spa_k SQL Spark _QL和CH --5 Spar_ SQL Spark S_L和CH --4 Spark_SQL Spark SQ_和CH --3 Spark _QL Spark SQL_CH --2 Spark S_L Spark SQL和_H --1 Spark SQ_ Spark SQL和C_ -0 Spark SQL_ Spark SQL和CH_ -1 _park SQL _park SQL和CH -2 S_ark SQL S_ark SQL和CH -3 Sp_rk SQL Sp_rk SQL和CH -4 Spa_k SQL Spa_k SQL和CH -5 Spar_ SQL Spar_ SQL和CH -6 Spark_SQL Spark_SQL和CH -7 Spark _QL Spark _QL和CH -8 Spark S_L Spark S_L和CH -9 Spark SQ_ Spark SQ_和CH -10 Spark SQL_ Spark SQL_CH -11 Spark SQL_ Spark SQL和_H -12 Spark SQL_ Spark SQL和C_ -13 Spark SQL_ Spark SQL和CH_ -Test with different length values +Test with special offset values +-12 __ark SQL 之park SQL和CH +-11 __ark SQL S之ark SQL和CH +-10 __ark SQL Sp之rk SQL和CH +-9 __ark SQL Spa之k SQL和CH +-8 S__rk SQL Spar之 SQL和CH +-7 Sp__k SQL Spark之SQL和CH +-6 Spa__ SQL Spark 之QL和CH +-5 Spar__SQL Spark S之L和CH +-4 Spark__QL Spark SQ之和CH +-3 Spark __L Spark SQL之CH +-2 Spark S__ Spark SQL和之H +-1 Spark SQ__ Spark SQL和C之 +0 Spark SQL__ Spark SQL和CH之 +1 __ark SQL 之park SQL和CH +2 S__rk SQL S之ark SQL和CH +3 Sp__k SQL Sp之rk SQL和CH +4 Spa__ SQL Spa之k SQL和CH +5 Spar__SQL Spar之 SQL和CH +6 Spark__QL Spark之SQL和CH +7 Spark __L Spark 之QL和CH +8 Spark S__ Spark S之L和CH +9 Spark SQ__ Spark SQ之和CH +10 Spark SQL__ Spark SQL之CH +11 Spark SQL__ Spark SQL和之H +12 Spark SQL__ Spark SQL和C之 +13 Spark SQL__ Spark SQL和CH之 +Test with special length values -1 Spark ANSI Spark ANSI H 0 Spark ANSI SQL Spark ANSI SQL和CH 1 Spark ANSI QL Spark ANSI QL和CH @@ -61,7 +61,7 @@ Test with different length values 4 Spark ANSI Spark ANSI CH 5 Spark ANSI Spark ANSI H 6 Spark ANSI Spark ANSI -Test with different input and replace values +Test with special input and replace values _ _ Spark SQL Spark SQL和CH ANSI ANSI diff --git a/tests/queries/0_stateless/03205_overlay.sql b/tests/queries/0_stateless/03205_overlay.sql index 4d0b5ecbe03..765b29f93ec 100644 --- a/tests/queries/0_stateless/03205_overlay.sql +++ b/tests/queries/0_stateless/03205_overlay.sql @@ -1,5 +1,4 @@ SELECT 'Negative test of overlay'; -SELECT overlay('hello', 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT overlay('hello', 'world'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT overlay('hello', 'world', 2, 3, 'extra'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT overlay(123, 'world', 2, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } @@ -35,13 +34,13 @@ SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), materialize(0) SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), materialize(0)); SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)); -SELECT 'Test with different offset values'; -WITH number - 12 as offset SELECT offset, overlay('Spark SQL', '_', offset), overlayUTF8('Spark SQL和CH', '_', offset) from numbers(26); +SELECT 'Test with special offset values'; +WITH number - 12 AS offset SELECT offset, overlay('Spark SQL', '__', offset), overlayUTF8('Spark SQL和CH', '之', offset) FROM numbers(26); -SELECT 'Test with different length values'; -WITH number - 1 as length SELECT length, overlay('Spark SQL', 'ANSI ', 7, length), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, length) from numbers(8); +SELECT 'Test with special length values'; +WITH number - 1 AS length SELECT length, overlay('Spark SQL', 'ANSI ', 7, length), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, length) FROM numbers(8); -SELECT 'Test with different input and replace values'; +SELECT 'Test with special input and replace values'; SELECT overlay('', '_', 6), overlayUTF8('', '_', 6); SELECT overlay('Spark SQL', '', 6), overlayUTF8('Spark SQL和CH', '', 6); SELECT overlay('', 'ANSI ', 7, 0), overlayUTF8('', 'ANSI ', 7, 0); From be4439e3ec0a1491f4e333ac848844fd930a6e5b Mon Sep 17 00:00:00 2001 From: Alexey Date: Thu, 22 Aug 2024 10:30:48 +0300 Subject: [PATCH 322/363] Update install.md Added correct commands for russian vwersion of the installation from deb packets --- docs/ru/getting-started/install.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index aee445da843..4a0ec258c64 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -25,10 +25,10 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su Яндекс рекомендует использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните: ``` bash -sudo apt-get install -y apt-transport-https ca-certificates dirmngr -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 +sudo apt-get install -y apt-transport-https ca-certificates curl gnupg +curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | sudo gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg -echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \ +echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \ /etc/apt/sources.list.d/clickhouse.list sudo apt-get update From 95f45d2eaf39a9e8a6373c75749ec57f727be700 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 13:20:04 +0200 Subject: [PATCH 323/363] try to fix tests --- .../test_storage_azure_blob_storage/test.py | 14 +++++------ tests/integration/test_storage_hdfs/test.py | 25 +++---------------- .../03203_hive_style_partitioning.reference | 20 +++++++-------- .../03203_hive_style_partitioning.sh | 14 +++-------- 4 files changed, 23 insertions(+), 50 deletions(-) diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index a3172329a99..c1f518e45ce 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -1513,14 +1513,14 @@ def test_hive_partitioning_with_one_parameter(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," - f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}", + f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values}", settings={"azure_truncate_on_insert": 1}, ) query = ( f"SELECT column2, _file, _path, column1 FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " - f"blob_path='{path}', format='CSV', structure='{table_format}')" + f"blob_path='{path}', format='CSVWithNames', structure='{table_format}')" ) assert azure_query( node, query, settings={"use_hive_partitioning": 1} @@ -1533,7 +1533,7 @@ def test_hive_partitioning_with_one_parameter(cluster): query = ( f"SELECT column2 FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " - f"blob_path='{path}', format='CSV', structure='{table_format}');" + f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');" ) assert azure_query( node, query, settings={"use_hive_partitioning": 1} @@ -1551,14 +1551,14 @@ def test_hive_partitioning_with_all_parameters(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," - f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}", + f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}", settings={"azure_truncate_on_insert": 1}, ) query = ( f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " - f"blob_path='{path}', format='CSV', structure='{table_format}');" + f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');" ) pattern = r"DB::Exception: Cannot use hive partitioning for file" @@ -1577,14 +1577,14 @@ def test_hive_partitioning_without_setting(cluster): azure_query( node, f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}'," - f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}", + f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}", settings={"azure_truncate_on_insert": 1}, ) query = ( f"SELECT column1, column2, _file, _path, column3 FROM azureBlobStorage(azure_conf2, " f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', " - f"blob_path='{path}', format='CSV', structure='{table_format}');" + f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');" ) pattern = re.compile( r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index a75c13b9ea6..31cc8609eb4 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -1259,35 +1259,16 @@ def test_respect_object_existence_on_partitioned_write(started_cluster): def test_hive_partitioning_with_one_parameter(started_cluster): hdfs_api = started_cluster.hdfs_api - hdfs_api.write_data(f"/column0=Elizabeth/parquet_1", f"Elizabeth\tGordon\n") - assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n" + hdfs_api.write_data(f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n") + assert hdfs_api.read_data(f"/column0=Elizabeth/file_1") == f"column0,column1\nElizabeth,Gordon\n" r = node1.query( - "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')", + "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/file_1', 'CSVWithNames')", settings={"use_hive_partitioning": 1}, ) assert r == f"Elizabeth\n" -def test_hive_partitioning_with_all_parameters(started_cluster): - hdfs_api = started_cluster.hdfs_api - hdfs_api.write_data( - f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n" - ) - assert ( - hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2") - == f"Elizabeth\tGordon\n" - ) - - pattern = r"DB::Exception: Cannot use hive partitioning for file" - - with pytest.raises(QueryRuntimeException, match=pattern): - node1.query( - f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');", - settings={"use_hive_partitioning": 1}, - ) - - def test_hive_partitioning_without_setting(started_cluster): hdfs_api = started_cluster.hdfs_api hdfs_api.write_data( diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference index af52dcd9b88..acdadc2510b 100644 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference @@ -1,5 +1,5 @@ TESTING THE FILE HIVE PARTITIONING -last Elizabeth + last Elizabeth Frank Elizabeth Moreno Elizabeth Guzman Elizabeth @@ -19,8 +19,7 @@ Stanley Gibson Elizabeth Eugenia Greer Elizabeth Jeffery Delgado Elizabeth Clara Cross Elizabeth -Elizabeth Gordon Elizabeth -last Elizabeth + last Elizabeth Frank Elizabeth Moreno Elizabeth Guzman Elizabeth @@ -35,12 +34,13 @@ Cross Elizabeth Array(Int64) LowCardinality(Float64) 101 2070 +4081 +2070 2070 b 1 -1 TESTING THE URL PARTITIONING -last Elizabeth + last Elizabeth Frank Elizabeth Moreno Elizabeth Guzman Elizabeth @@ -60,10 +60,9 @@ Stanley Gibson Elizabeth Eugenia Greer Elizabeth Jeffery Delgado Elizabeth Clara Cross Elizabeth -Elizabeth Gordon Elizabeth 1 TESTING THE S3 PARTITIONING -last Elizabeth + last Elizabeth Frank Elizabeth Moreno Elizabeth Guzman Elizabeth @@ -83,8 +82,7 @@ Stanley Gibson Elizabeth Eugenia Greer Elizabeth Jeffery Delgado Elizabeth Clara Cross Elizabeth -Elizabeth Gordon Elizabeth -last Elizabeth + last Elizabeth Frank Elizabeth Moreno Elizabeth Guzman Elizabeth @@ -96,7 +94,7 @@ Delgado Elizabeth Cross Elizabeth OK TESTING THE S3CLUSTER PARTITIONING -last Elizabeth + last Elizabeth Frank Elizabeth Moreno Elizabeth Guzman Elizabeth @@ -106,7 +104,7 @@ Gibson Elizabeth Greer Elizabeth Delgado Elizabeth Cross Elizabeth -last Elizabeth + last Elizabeth Frank Elizabeth Moreno Elizabeth Guzman Elizabeth diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh index 4e165446c34..b3d196924af 100755 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh @@ -14,7 +14,7 @@ set use_hive_partitioning = 1; SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; -SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth'; +SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10; SELECT number, date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1; SELECT array, float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1; @@ -29,16 +29,10 @@ SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.c SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1; """ -$CLICKHOUSE_LOCAL -n -q """ -set use_hive_partitioning = 1; - -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10; -""" 2>&1 | grep -c "INCORRECT_DATA" - $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 0; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; +SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER" @@ -68,7 +62,7 @@ set use_hive_partitioning = 1; SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; -SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth'; +SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10; """ $CLICKHOUSE_CLIENT -n -q """ @@ -84,5 +78,5 @@ set use_hive_partitioning = 1; SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; -SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth'; +SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10; """ From 84467077b886cd48c9cd33c69c1935b3f7863dd7 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 22 Aug 2024 13:45:13 +0200 Subject: [PATCH 324/363] Fix test for role expiration in RoleCache. --- tests/integration/test_role/test.py | 81 +++++++++-------------------- 1 file changed, 26 insertions(+), 55 deletions(-) diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index b3b18dc8271..9d15f0f81db 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -418,72 +418,43 @@ def test_function_current_roles(): ) -def test_role_expiration(): - instance.query("CREATE USER ure") +@pytest.mark.parametrize("with_extra_role", [False, True]) +def test_role_expiration(with_extra_role): instance.query("CREATE ROLE rre") - instance.query("GRANT rre TO ure") + instance.query("CREATE USER ure DEFAULT ROLE rre") - instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log") - instance.query("INSERT INTO tre VALUES (0)") + instance.query("CREATE TABLE table1 (id Int) Engine=Log") + instance.query("CREATE TABLE table2 (id Int) Engine=Log") + instance.query("INSERT INTO table1 VALUES (1)") + instance.query("INSERT INTO table2 VALUES (2)") + instance.query("GRANT SELECT ON table1 TO rre") + + assert instance.query("SELECT * FROM table1", user="ure") == "1\n" assert "Not enough privileges" in instance.query_and_get_error( - "SELECT * FROM tre", user="ure" + "SELECT * FROM table2", user="ure" ) - instance.query("GRANT SELECT ON tre TO rre") - - assert instance.query("SELECT * FROM tre", user="ure") == "0\n" - # access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test # so we wait >2 seconds until the role is expired time.sleep(5) - instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log") - instance.query("INSERT INTO tre1 VALUES (0)") - instance.query("GRANT SELECT ON tre1 TO rre") + if with_extra_role: + # Expiration of role "rre" from the role cache can be caused by another role being used. + instance.query("CREATE ROLE extra_role") + instance.query("CREATE USER extra_user DEFAULT ROLE extra_role") + instance.query("GRANT SELECT ON table1 TO extra_role") + assert instance.query("SELECT * FROM table1", user="extra_user") == "1\n" - assert instance.query("SELECT * from tre1", user="ure") == "0\n" + instance.query("GRANT SELECT ON table2 TO rre") + assert instance.query("SELECT * FROM table1", user="ure") == "1\n" + assert instance.query("SELECT * FROM table2", user="ure") == "2\n" - instance.query("DROP USER ure") instance.query("DROP ROLE rre") - instance.query("DROP TABLE tre") - instance.query("DROP TABLE tre1") - - -def test_two_roles_expiration(): - instance.query("CREATE USER ure") - instance.query("CREATE ROLE rre") - instance.query("GRANT rre TO ure") - - instance.query("CREATE ROLE rre_second") - - instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log") - instance.query("INSERT INTO tre VALUES (0)") - - assert "Not enough privileges" in instance.query_and_get_error( - "SELECT * FROM tre", user="ure" - ) - - instance.query("GRANT SELECT ON tre TO rre") - - assert instance.query("SELECT * FROM tre", user="ure") == "0\n" - - # access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test - # so we wait >2 seconds until the roles are expired - time.sleep(5) - - instance.query( - "GRANT SELECT ON tre1 TO rre_second" - ) # we expect that both rre and rre_second are gone from cache upon this operation - - instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log") - instance.query("INSERT INTO tre1 VALUES (0)") - instance.query("GRANT SELECT ON tre1 TO rre") - - assert instance.query("SELECT * from tre1", user="ure") == "0\n" - instance.query("DROP USER ure") - instance.query("DROP ROLE rre") - instance.query("DROP ROLE rre_second") - instance.query("DROP TABLE tre") - instance.query("DROP TABLE tre1") + instance.query("DROP TABLE table1") + instance.query("DROP TABLE table2") + + if with_extra_role: + instance.query("DROP ROLE extra_role") + instance.query("DROP USER extra_user") From 664e9b3db9d47e45c642ad21e3a5273ab423199a Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 4 Aug 2024 13:30:41 +0200 Subject: [PATCH 325/363] Add one more test. --- tests/integration/test_role/test.py | 173 ++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index 9d15f0f81db..225cab975ff 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -1,5 +1,6 @@ import time import pytest +import random from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV @@ -458,3 +459,175 @@ def test_role_expiration(with_extra_role): if with_extra_role: instance.query("DROP ROLE extra_role") instance.query("DROP USER extra_user") + + +def test_roles_cache(): + # This test takes 20 seconds. + test_time = 20 + + # Three users A, B, C. + users = ["A", "B", "C"] + instance.query("CREATE USER " + ", ".join(users)) + + # Table "tbl" has 10 columns. Each of the users has access to a different set of columns. + num_columns = 10 + columns = [f"x{i}" for i in range(1, num_columns + 1)] + columns_with_types = [column + " Int64" for column in columns] + columns_with_types_comma_separated = ", ".join(columns_with_types) + values = list(range(1, num_columns + 1)) + values_comma_separated = ", ".join([str(value) for value in values]) + instance.query( + f"CREATE TABLE tbl ({columns_with_types_comma_separated}) ENGINE=MergeTree ORDER BY tuple()" + ) + instance.query(f"INSERT INTO tbl VALUES ({values_comma_separated})") + columns_to_values = dict([(f"x{i}", i) for i in range(1, num_columns + 1)]) + + # In this test we create and modify roles multiple times along with updating the following variables. + # Then we check that each of the users has access to the expected set of columns. + roles = [] + users_to_roles = dict([(user, []) for user in users]) + roles_to_columns = {} + + # Checks that each of the users can access the expected set of columns and can't access other columns. + def check(): + for user in random.sample(users, len(users)): + expected_roles = users_to_roles[user] + expected_columns = list( + set(sum([roles_to_columns[role] for role in expected_roles], [])) + ) + expected_result = sorted( + [columns_to_values[column] for column in expected_columns] + ) + query = " UNION ALL ".join( + [ + f"SELECT * FROM viewIfPermitted(SELECT {column} AS c FROM tbl ELSE null('c Int64'))" + for column in columns + ] + ) + result = instance.query(query, user=user).splitlines() + result = sorted([int(value) for value in result]) + ok = result == expected_result + if not ok: + print(f"Show grants for {user}:") + print( + instance.query( + "SHOW GRANTS FOR " + ", ".join([user] + expected_roles) + ) + ) + print(f"Expected result: {expected_result}") + print(f"Got unexpected result: {result}") + assert ok + + # Grants one of our roles a permission to access one of the columns. + def grant_column(): + columns_used_in_roles = sum(roles_to_columns.values(), []) + columns_to_choose = [ + column for column in columns if column not in columns_used_in_roles + ] + if not columns_to_choose or not roles: + return False + column = random.choice(columns_to_choose) + role = random.choice(roles) + instance.query(f"GRANT SELECT({column}) ON tbl TO {role}") + roles_to_columns[role].append(column) + return True + + # Revokes a permission to access one of the granted column from all our roles. + def revoke_column(): + columns_used_in_roles = sum(roles_to_columns.values(), []) + columns_to_choose = list(set(columns_used_in_roles)) + if not columns_to_choose or not roles: + return False + column = random.choice(columns_to_choose) + roles_str = ", ".join(roles) + instance.query(f"REVOKE SELECT({column}) ON tbl FROM {roles_str}") + for role in roles_to_columns: + if column in roles_to_columns[role]: + roles_to_columns[role].remove(column) + return True + + # Creates a role and grants it to one of the users. + def create_role(): + for role in ["R1", "R2", "R3"]: + if role not in roles: + instance.query(f"CREATE ROLE {role}") + roles.append(role) + if role not in roles_to_columns: + roles_to_columns[role] = [] + if "R1" not in users_to_roles["A"]: + instance.query("GRANT R1 TO A") + users_to_roles["A"].append("R1") + elif "R2" not in users_to_roles["B"]: + instance.query("GRANT R2 TO B") + users_to_roles["B"].append("R2") + elif "R3" not in users_to_roles["B"]: + instance.query("GRANT R3 TO R2") + users_to_roles["B"].append("R3") + elif "R3" not in users_to_roles["C"]: + instance.query("GRANT R3 TO C") + users_to_roles["C"].append("R3") + else: + return False + return True + + # Drops one of our roles. + def drop_role(): + if not roles: + return False + role = random.choice(roles) + instance.query(f"DROP ROLE {role}") + roles.remove(role) + for u in users_to_roles: + if role in users_to_roles[u]: + users_to_roles[u].remove(role) + del roles_to_columns[role] + if (role == "R2") and ("R3" in users_to_roles["B"]): + users_to_roles["B"].remove("R3") + return True + + # Modifies some grants or roles randomly. + def modify(): + while True: + rnd = random.random() + if rnd < 0.4: + if grant_column(): + break + elif rnd < 0.5: + if revoke_column(): + break + elif rnd < 0.9: + if create_role(): + break + else: + if drop_role(): + break + + def maybe_modify(): + if random.random() < 0.9: + modify() + modify() + + # Sleeping is necessary in this test because the role cache in ClickHouse has expiration timeout. + def maybe_sleep(): + if random.random() < 0.1: + # "role_cache_expiration_time_seconds" is set to 2 seconds in the test configuration. + # We need a sleep longer than that in this test sometimes. + seconds = random.random() * 5 + print(f"Sleeping {seconds} seconds") + time.sleep(seconds) + + # Main part of the test. + start_time = time.time() + end_time = start_time + test_time + + while time.time() < end_time: + check() + maybe_sleep() + maybe_modify() + maybe_sleep() + + check() + + instance.query("DROP USER " + ", ".join(users)) + instance.query("DROP ROLE " + ", ".join(roles)) + instance.query("DROP TABLE tbl") From 7ef5c366e873c4fd99f257eefbb3a350848e308c Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 4 Aug 2024 13:33:50 +0200 Subject: [PATCH 326/363] Fix expiration in RoleCache. --- src/Access/RoleCache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Access/RoleCache.cpp b/src/Access/RoleCache.cpp index 2d94df2eea5..cc1f1520b67 100644 --- a/src/Access/RoleCache.cpp +++ b/src/Access/RoleCache.cpp @@ -120,7 +120,7 @@ void RoleCache::collectEnabledRoles(EnabledRoles & enabled_roles, SubscriptionsO SubscriptionsOnRoles new_subscriptions_on_roles; new_subscriptions_on_roles.reserve(subscriptions_on_roles.size()); - auto get_role_function = [this, &subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, subscriptions_on_roles); }; + auto get_role_function = [this, &new_subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, new_subscriptions_on_roles); }; for (const auto & current_role : enabled_roles.params.current_roles) collectRoles(*new_info, skip_ids, get_role_function, current_role, true, false); From 54dd3afd49df9c92cd3621a5cec4c7464c341a71 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 22 Aug 2024 14:52:17 +0200 Subject: [PATCH 327/363] Turn off fault injection for insert in 01396_inactive_replica_cleanup_nodes_zookeeper --- .../01396_inactive_replica_cleanup_nodes_zookeeper.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index bff85b3e29f..9ea15071856 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -23,11 +23,10 @@ $CLICKHOUSE_CLIENT -n --query " DETACH TABLE r2; " -$CLICKHOUSE_CLIENT --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})" +# insert_keeper_fault_injection_probability=0 -- can slowdown insert a lot (produce a lot of parts) +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})" -# Now wait for cleanup thread - for _ in {1..60}; do $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break; From 7a740819b9551a291827b9d37b8b724612587a20 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 22 Aug 2024 14:53:15 +0200 Subject: [PATCH 328/363] Accidentally deleted comment --- .../01396_inactive_replica_cleanup_nodes_zookeeper.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index 9ea15071856..80e9253af2c 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -27,6 +27,7 @@ $CLICKHOUSE_CLIENT -n --query " $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})" +# Now wait for cleanup thread for _ in {1..60}; do $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break; From b3f084459f60b1e31c32736573af0810dee99230 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 14:53:53 +0200 Subject: [PATCH 329/363] fix black --- tests/integration/test_storage_hdfs/test.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 31cc8609eb4..b18940b7290 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -1259,8 +1259,13 @@ def test_respect_object_existence_on_partitioned_write(started_cluster): def test_hive_partitioning_with_one_parameter(started_cluster): hdfs_api = started_cluster.hdfs_api - hdfs_api.write_data(f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n") - assert hdfs_api.read_data(f"/column0=Elizabeth/file_1") == f"column0,column1\nElizabeth,Gordon\n" + hdfs_api.write_data( + f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n" + ) + assert ( + hdfs_api.read_data(f"/column0=Elizabeth/file_1") + == f"column0,column1\nElizabeth,Gordon\n" + ) r = node1.query( "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/file_1', 'CSVWithNames')", @@ -1269,6 +1274,7 @@ def test_hive_partitioning_with_one_parameter(started_cluster): assert r == f"Elizabeth\n" + def test_hive_partitioning_without_setting(started_cluster): hdfs_api = started_cluster.hdfs_api hdfs_api.write_data( From 8d14d8523098a42cd778ef50a9b066508da8919c Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 15:24:33 +0200 Subject: [PATCH 330/363] fix black --- tests/integration/test_storage_hdfs/test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index b18940b7290..7a92e8adb0d 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -1274,7 +1274,6 @@ def test_hive_partitioning_with_one_parameter(started_cluster): assert r == f"Elizabeth\n" - def test_hive_partitioning_without_setting(started_cluster): hdfs_api = started_cluster.hdfs_api hdfs_api.write_data( From add4718634317304f652579a9f201c3b81c96a7d Mon Sep 17 00:00:00 2001 From: Tanya Bragin Date: Thu, 22 Aug 2024 06:37:27 -0700 Subject: [PATCH 331/363] Update README.md - Meetups update Fixed one meetup location; Added more meetups --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5e66b9da73e..c9474ef0fc0 100644 --- a/README.md +++ b/README.md @@ -45,9 +45,17 @@ The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey * [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25 * [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5 * [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/clickhouse-nc-meetup-group/events/302557230) - September 9 -* [New York Meetup (Ramp)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10 +* [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10 * [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12 +Other upcoming meetups +* [Seattle Meetup (Statsig)](https://www.meetup.com/clickhouse-seattle-user-group/events/302518075/) - August 27 +* [Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302732666/) - August 27 +* [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5 +* [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5 +* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10 +* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17 + ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" * **Recording available**: [**v24.4 Release Call**](https://www.youtube.com/watch?v=dtUqgcfOGmE) All the features of 24.4, one convenient video! Watch it now! From 91e65feaaedd4806875aed3d4be4f07edeefdb71 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 22 Aug 2024 13:42:30 +0000 Subject: [PATCH 332/363] fix virtual columns in Merge engine --- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageMerge.cpp | 14 +++++++------- .../02890_describe_table_options.reference | 8 ++++++++ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index c4668159759..0b80858800b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -290,7 +290,7 @@ VirtualColumnsDescription StorageDistributed::createVirtuals() desc.addEphemeral("_shard_num", std::make_shared(), "Deprecated. Use function shardNum instead"); - /// Add virtual columns from table of storage Merges. + /// Add virtual columns from table with Merge engine. desc.addEphemeral("_database", std::make_shared(std::make_shared()), "The name of database which the row comes from"); desc.addEphemeral("_table", std::make_shared(std::make_shared()), "The name of table which the row comes from"); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index e88844e2d31..0827321e296 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -642,10 +642,6 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name); } } - else - { - - } auto child = createPlanForTable( nested_storage_snaphsot, @@ -657,6 +653,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ row_policy_data_opt, modified_context, current_streams); + child.plan.addInterpreterContext(modified_context); if (child.plan.isInitialized()) @@ -914,12 +911,14 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo modified_query_info.table_expression = replacement_table_expression; modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression); - auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); - if (storage_snapshot_->storage.supportsSubcolumns()) - get_column_options.withSubcolumns(); + auto get_column_options = GetColumnsOptions(GetColumnsOptions::All) + .withExtendedObjects() + .withSubcolumns(storage_snapshot_->storage.supportsSubcolumns()); std::unordered_map column_name_to_node; + /// Consider only non-virtual columns of storage while checking for _table and _database columns. + /// I.e. always override virtual columns with these names from underlying table (if any). if (!storage_snapshot_->tryGetColumn(get_column_options, "_table")) { auto table_name_node = std::make_shared(current_storage_id.table_name); @@ -946,6 +945,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo column_name_to_node.emplace("_database", function_node); } + get_column_options.withVirtuals(); auto storage_columns = storage_snapshot_->metadata->getColumns(); bool with_aliases = /* common_processed_stage == QueryProcessingStage::FetchColumns && */ !storage_columns.getAliases().empty(); diff --git a/tests/queries/0_stateless/02890_describe_table_options.reference b/tests/queries/0_stateless/02890_describe_table_options.reference index 9181cb27cb0..b77ef4a0fdf 100644 --- a/tests/queries/0_stateless/02890_describe_table_options.reference +++ b/tests/queries/0_stateless/02890_describe_table_options.reference @@ -54,6 +54,8 @@ _row_exists UInt8 Persisted mask created by lightweight delete that show wheth _block_number UInt64 Persisted original number of block that was assigned at insert Delta, LZ4 1 _block_offset UInt64 Persisted original number of row in block that was assigned at insert Delta, LZ4 1 _shard_num UInt32 Deprecated. Use function shardNum instead 1 +_database LowCardinality(String) The name of database which the row comes from 1 +_table LowCardinality(String) The name of table which the row comes from 1 SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options; id UInt64 index column 0 0 @@ -87,6 +89,8 @@ _row_exists UInt8 Persisted mask created by lightweight delete that show wheth _block_number UInt64 Persisted original number of block that was assigned at insert Delta, LZ4 0 1 _block_offset UInt64 Persisted original number of row in block that was assigned at insert Delta, LZ4 0 1 _shard_num UInt32 Deprecated. Use function shardNum instead 0 1 +_database LowCardinality(String) The name of database which the row comes from 0 1 +_table LowCardinality(String) The name of table which the row comes from 0 1 arr.size0 UInt64 1 0 t.a String ZSTD(1) 1 0 t.b UInt64 ZSTD(1) 1 0 @@ -144,6 +148,8 @@ _row_exists UInt8 1 _block_number UInt64 1 _block_offset UInt64 1 _shard_num UInt32 1 +_database LowCardinality(String) 1 +_table LowCardinality(String) 1 SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options; id UInt64 0 0 @@ -177,6 +183,8 @@ _row_exists UInt8 0 1 _block_number UInt64 0 1 _block_offset UInt64 0 1 _shard_num UInt32 0 1 +_database LowCardinality(String) 0 1 +_table LowCardinality(String) 0 1 arr.size0 UInt64 1 0 t.a String 1 0 t.b UInt64 1 0 From ce33943b430a9ad512f4942083889dea4decb778 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 22 Aug 2024 15:50:59 +0200 Subject: [PATCH 333/363] Fix flaky check --- tests/docker_scripts/stateless_runner.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/docker_scripts/stateless_runner.sh b/tests/docker_scripts/stateless_runner.sh index 40a63f74a6b..d8921a04458 100755 --- a/tests/docker_scripts/stateless_runner.sh +++ b/tests/docker_scripts/stateless_runner.sh @@ -339,7 +339,7 @@ export -f run_tests if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. - NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests \ + NUM_TRIES=1 USE_DATABASE_ORDINARY=1 run_tests \ | sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||: fi From a9e793532ae308767da3bc4da74d9631cd85eb90 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 22 Aug 2024 16:34:14 +0200 Subject: [PATCH 334/363] fix shutdown for PeriodicLog --- src/Interpreters/PeriodicLog.cpp | 3 ++- src/Interpreters/PeriodicLog.h | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp index 9d2891e11eb..15970ca5b81 100644 --- a/src/Interpreters/PeriodicLog.cpp +++ b/src/Interpreters/PeriodicLog.cpp @@ -1,6 +1,7 @@ #include #include #include +#include "Functions/DateTimeTransforms.h" namespace DB { @@ -27,7 +28,7 @@ template void PeriodicLog::shutdown() { stopCollect(); - this->stopFlushThread(); + Base::shutdown(); } template diff --git a/src/Interpreters/PeriodicLog.h b/src/Interpreters/PeriodicLog.h index 08c3f7eb23f..ceac8088d40 100644 --- a/src/Interpreters/PeriodicLog.h +++ b/src/Interpreters/PeriodicLog.h @@ -17,6 +17,7 @@ template class PeriodicLog : public SystemLog { using SystemLog::SystemLog; + using Base = SystemLog; public: using TimePoint = std::chrono::system_clock::time_point; @@ -24,12 +25,12 @@ public: /// Launches a background thread to collect metrics with interval void startCollect(size_t collect_interval_milliseconds_); - /// Stop background thread - void stopCollect(); - void shutdown() final; protected: + /// Stop background thread + void stopCollect(); + virtual void stepFunction(TimePoint current_time) = 0; private: From 5340ac5fbc7fba75d6a743d345c0f79dc466df0b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 22 Aug 2024 14:39:19 +0000 Subject: [PATCH 335/363] Update version_date.tsv and changelogs after v24.5.5.41-stable --- docs/changelogs/v24.5.5.41-stable.md | 71 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 + 2 files changed, 73 insertions(+) create mode 100644 docs/changelogs/v24.5.5.41-stable.md diff --git a/docs/changelogs/v24.5.5.41-stable.md b/docs/changelogs/v24.5.5.41-stable.md new file mode 100644 index 00000000000..8ba160e31d7 --- /dev/null +++ b/docs/changelogs/v24.5.5.41-stable.md @@ -0,0 +1,71 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.5.5.41-stable (441d4a6ebe3) FIXME as compared to v24.5.4.49-stable (63b760955a0) + +#### Improvement +* Backported in [#66768](https://github.com/ClickHouse/ClickHouse/issues/66768): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#65350](https://github.com/ClickHouse/ClickHouse/issues/65350): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#65621](https://github.com/ClickHouse/ClickHouse/issues/65621): Fix `Cannot find column` in distributed query with `ARRAY JOIN` by `Nested` column. Fixes [#64755](https://github.com/ClickHouse/ClickHouse/issues/64755). [#64801](https://github.com/ClickHouse/ClickHouse/pull/64801) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67902](https://github.com/ClickHouse/ClickHouse/issues/67902): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66884](https://github.com/ClickHouse/ClickHouse/issues/66884): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#65933](https://github.com/ClickHouse/ClickHouse/issues/65933): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#66301](https://github.com/ClickHouse/ClickHouse/issues/66301): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)). +* Backported in [#66328](https://github.com/ClickHouse/ClickHouse/issues/66328): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68252](https://github.com/ClickHouse/ClickHouse/issues/68252): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)). +* Backported in [#66155](https://github.com/ClickHouse/ClickHouse/issues/66155): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#66454](https://github.com/ClickHouse/ClickHouse/issues/66454): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#66226](https://github.com/ClickHouse/ClickHouse/issues/66226): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66680](https://github.com/ClickHouse/ClickHouse/issues/66680): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#66604](https://github.com/ClickHouse/ClickHouse/issues/66604): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)). +* Backported in [#66360](https://github.com/ClickHouse/ClickHouse/issues/66360): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68064](https://github.com/ClickHouse/ClickHouse/issues/68064): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#68158](https://github.com/ClickHouse/ClickHouse/issues/68158): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#66972](https://github.com/ClickHouse/ClickHouse/issues/66972): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66691](https://github.com/ClickHouse/ClickHouse/issues/66691): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#66969](https://github.com/ClickHouse/ClickHouse/issues/66969): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66720](https://github.com/ClickHouse/ClickHouse/issues/66720): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#66951](https://github.com/ClickHouse/ClickHouse/issues/66951): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66757](https://github.com/ClickHouse/ClickHouse/issues/66757): Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66948](https://github.com/ClickHouse/ClickHouse/issues/66948): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68115](https://github.com/ClickHouse/ClickHouse/issues/68115): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67633](https://github.com/ClickHouse/ClickHouse/issues/67633): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#67481](https://github.com/ClickHouse/ClickHouse/issues/67481): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)). +* Backported in [#67814](https://github.com/ClickHouse/ClickHouse/issues/67814): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67197](https://github.com/ClickHouse/ClickHouse/issues/67197): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#67379](https://github.com/ClickHouse/ClickHouse/issues/67379): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67501](https://github.com/ClickHouse/ClickHouse/issues/67501): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#67886](https://github.com/ClickHouse/ClickHouse/issues/67886): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67576](https://github.com/ClickHouse/ClickHouse/issues/67576): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67850](https://github.com/ClickHouse/ClickHouse/issues/67850): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68272](https://github.com/ClickHouse/ClickHouse/issues/68272): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67807](https://github.com/ClickHouse/ClickHouse/issues/67807): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67836](https://github.com/ClickHouse/ClickHouse/issues/67836): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#67991](https://github.com/ClickHouse/ClickHouse/issues/67991): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68207](https://github.com/ClickHouse/ClickHouse/issues/68207): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68091](https://github.com/ClickHouse/ClickHouse/issues/68091): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68122](https://github.com/ClickHouse/ClickHouse/issues/68122): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68171](https://github.com/ClickHouse/ClickHouse/issues/68171): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68337](https://github.com/ClickHouse/ClickHouse/issues/68337): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68667](https://github.com/ClickHouse/ClickHouse/issues/68667): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#66387](https://github.com/ClickHouse/ClickHouse/issues/66387): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)). +* Backported in [#66426](https://github.com/ClickHouse/ClickHouse/issues/66426): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66544](https://github.com/ClickHouse/ClickHouse/issues/66544): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66859](https://github.com/ClickHouse/ClickHouse/issues/66859): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)). +* Backported in [#66875](https://github.com/ClickHouse/ClickHouse/issues/66875): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)). +* Backported in [#67059](https://github.com/ClickHouse/ClickHouse/issues/67059): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)). +* Backported in [#66945](https://github.com/ClickHouse/ClickHouse/issues/66945): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67252](https://github.com/ClickHouse/ClickHouse/issues/67252): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)). +* Backported in [#67412](https://github.com/ClickHouse/ClickHouse/issues/67412): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)). +* Update version after release. [#67862](https://github.com/ClickHouse/ClickHouse/pull/67862) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68077](https://github.com/ClickHouse/ClickHouse/issues/68077): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8556375d543..9063d3ef971 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -6,6 +6,7 @@ v24.6.3.95-stable 2024-08-06 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.5.78-stable 2024-08-05 +v24.5.5.41-stable 2024-08-22 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 @@ -14,6 +15,7 @@ v24.4.4.113-stable 2024-08-02 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.9.5-lts 2024-08-22 v24.3.8.13-lts 2024-08-20 v24.3.7.30-lts 2024-08-14 v24.3.6.48-lts 2024-08-02 From 1ea0163dfe6b3278d8a5e8d86c31b3d63d7a3780 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Thu, 22 Aug 2024 16:42:14 +0200 Subject: [PATCH 336/363] Fix issue with maps with arrays as keys --- src/Functions/FunctionsHashing.h | 4 ++-- tests/queries/0_stateless/02534_keyed_siphash.reference | 7 ++++++- tests/queries/0_stateless/02534_keyed_siphash.sql | 5 ++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 0cf4246fd66..3da0b2cd9be 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -93,9 +93,9 @@ namespace impl if (is_const) i = 0; assert(key0->size() == key1->size()); - if (offsets != nullptr) + if (offsets != nullptr && i > 0) { - const auto * const begin = offsets->begin(); + const auto * const begin = std::upper_bound(offsets->begin(), offsets->end(), i - 1); const auto * upper = std::upper_bound(begin, offsets->end(), i); if (upper != offsets->end()) i = upper - begin; diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index 31c0cae8981..8b147025a05 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -244,5 +244,10 @@ Test emtpy arrays and maps 0AD04BFD000000000000000000000000 4761183170873013810 0AD04BFD000000000000000000000000 +Test maps with arrays as keys 16734549324845627102 -D675BB3D687973A238AB891DD99C7047 +1D03941D808D04810D2363A6C107D622 +16734549324845627102 +16734549324845627102 +1D03941D808D04810D2363A6C107D622 +1D03941D808D04810D2363A6C107D622 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index b499d8ef02b..ba3c4a9156d 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -351,5 +351,8 @@ SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []); SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), [])); SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])); SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []))); +SELECT 'Test maps with arrays as keys'; SELECT sipHash64Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3)); -SELECT hex(sipHash128Keyed((0::UInt64, 0::UInt64), map([0], 1, [2], 3))); +SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3))); +SELECT sipHash64Keyed((materialize(1::UInt64), 2::UInt64), map([0], 1, [2], 3)) FROM numbers(2); +SELECT hex(sipHash128Keyed((materialize(1::UInt64), 2::UInt64), map([0], 1, [2], 3))) FROM numbers(2); From a93d1919804d1c8dc7760f20084ade9a09710a47 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Thu, 22 Aug 2024 16:43:38 +0200 Subject: [PATCH 337/363] Fix typo in test case --- tests/queries/0_stateless/02534_keyed_siphash.reference | 2 +- tests/queries/0_stateless/02534_keyed_siphash.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index 8b147025a05..a05446a494e 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -239,7 +239,7 @@ Check bug found fuzzing Test arrays and maps 608E1FF030C9E206185B112C2A25F1A7 ABB65AE97711A2E053E324ED88B1D08B -Test emtpy arrays and maps +Test empty arrays and maps 4761183170873013810 0AD04BFD000000000000000000000000 4761183170873013810 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index ba3c4a9156d..7cfc82512bd 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -346,7 +346,7 @@ INSERT INTO sipHashKeyed_keys FORMAT VALUES ({'a':'b', 'c':'d'}), ({'e':'f', 'g' SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FROM sipHashKeyed_keys ORDER BY a; DROP TABLE sipHashKeyed_keys; -SELECT 'Test emtpy arrays and maps'; +SELECT 'Test empty arrays and maps'; SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []); SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), [])); SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])); From 0dc18247df3a290b4fb312325ff3b2a44a3f8357 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 22 Aug 2024 15:10:24 +0000 Subject: [PATCH 338/363] Update version_date.tsv and changelogs after v24.6.3.38-stable --- docs/changelogs/v24.6.3.38-stable.md | 83 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 3 + 2 files changed, 86 insertions(+) create mode 100644 docs/changelogs/v24.6.3.38-stable.md diff --git a/docs/changelogs/v24.6.3.38-stable.md b/docs/changelogs/v24.6.3.38-stable.md new file mode 100644 index 00000000000..01d7e26e31f --- /dev/null +++ b/docs/changelogs/v24.6.3.38-stable.md @@ -0,0 +1,83 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.6.3.38-stable (4e33c831589) FIXME as compared to v24.6.2.17-stable (5710a8b5c0c) + +#### Improvement +* Backported in [#66770](https://github.com/ClickHouse/ClickHouse/issues/66770): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#66885](https://github.com/ClickHouse/ClickHouse/issues/66885): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66303](https://github.com/ClickHouse/ClickHouse/issues/66303): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)). +* Backported in [#66330](https://github.com/ClickHouse/ClickHouse/issues/66330): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#66157](https://github.com/ClickHouse/ClickHouse/issues/66157): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#66210](https://github.com/ClickHouse/ClickHouse/issues/66210): Disable the `merge-filters` optimization introduced in [#64760](https://github.com/ClickHouse/ClickHouse/issues/64760). It may cause an exception if optimization merges two filter expressions and does not apply a short-circuit evaluation. [#66126](https://github.com/ClickHouse/ClickHouse/pull/66126) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66456](https://github.com/ClickHouse/ClickHouse/issues/66456): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#66228](https://github.com/ClickHouse/ClickHouse/issues/66228): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66183](https://github.com/ClickHouse/ClickHouse/issues/66183): Fix rare case with missing data in the result of distributed query, close [#61432](https://github.com/ClickHouse/ClickHouse/issues/61432). [#66174](https://github.com/ClickHouse/ClickHouse/pull/66174) ([vdimir](https://github.com/vdimir)). +* Backported in [#66271](https://github.com/ClickHouse/ClickHouse/issues/66271): Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#66682](https://github.com/ClickHouse/ClickHouse/issues/66682): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#66587](https://github.com/ClickHouse/ClickHouse/issues/66587): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)). +* Backported in [#66362](https://github.com/ClickHouse/ClickHouse/issues/66362): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68066](https://github.com/ClickHouse/ClickHouse/issues/68066): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#68566](https://github.com/ClickHouse/ClickHouse/issues/68566): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68159](https://github.com/ClickHouse/ClickHouse/issues/68159): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#66613](https://github.com/ClickHouse/ClickHouse/issues/66613): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66693](https://github.com/ClickHouse/ClickHouse/issues/66693): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)). +* Backported in [#66577](https://github.com/ClickHouse/ClickHouse/issues/66577): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66721](https://github.com/ClickHouse/ClickHouse/issues/66721): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#66670](https://github.com/ClickHouse/ClickHouse/issues/66670): Fix reading of uninitialized memory when hashing empty tuples. This closes [#66559](https://github.com/ClickHouse/ClickHouse/issues/66559). [#66562](https://github.com/ClickHouse/ClickHouse/pull/66562) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#66952](https://github.com/ClickHouse/ClickHouse/issues/66952): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66956](https://github.com/ClickHouse/ClickHouse/issues/66956): Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#66716](https://github.com/ClickHouse/ClickHouse/issues/66716): Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#66759](https://github.com/ClickHouse/ClickHouse/issues/66759): Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66751](https://github.com/ClickHouse/ClickHouse/issues/66751): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68116](https://github.com/ClickHouse/ClickHouse/issues/68116): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67635](https://github.com/ClickHouse/ClickHouse/issues/67635): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#67482](https://github.com/ClickHouse/ClickHouse/issues/67482): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)). +* Backported in [#67816](https://github.com/ClickHouse/ClickHouse/issues/67816): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67199](https://github.com/ClickHouse/ClickHouse/issues/67199): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#67381](https://github.com/ClickHouse/ClickHouse/issues/67381): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#67244](https://github.com/ClickHouse/ClickHouse/issues/67244): This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#67503](https://github.com/ClickHouse/ClickHouse/issues/67503): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#67887](https://github.com/ClickHouse/ClickHouse/issues/67887): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67578](https://github.com/ClickHouse/ClickHouse/issues/67578): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68611](https://github.com/ClickHouse/ClickHouse/issues/68611): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#67852](https://github.com/ClickHouse/ClickHouse/issues/67852): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68275](https://github.com/ClickHouse/ClickHouse/issues/68275): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67808](https://github.com/ClickHouse/ClickHouse/issues/67808): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67838](https://github.com/ClickHouse/ClickHouse/issues/67838): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#67993](https://github.com/ClickHouse/ClickHouse/issues/67993): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68208](https://github.com/ClickHouse/ClickHouse/issues/68208): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68093](https://github.com/ClickHouse/ClickHouse/issues/68093): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68124](https://github.com/ClickHouse/ClickHouse/issues/68124): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68221](https://github.com/ClickHouse/ClickHouse/issues/68221): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Backported in [#68173](https://github.com/ClickHouse/ClickHouse/issues/68173): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68339](https://github.com/ClickHouse/ClickHouse/issues/68339): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68396](https://github.com/ClickHouse/ClickHouse/issues/68396): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#68668](https://github.com/ClickHouse/ClickHouse/issues/68668): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Backport [#66599](https://github.com/ClickHouse/ClickHouse/issues/66599) to 24.6: Fix dropping named collection in local storage"'. [#66922](https://github.com/ClickHouse/ClickHouse/pull/66922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#66332](https://github.com/ClickHouse/ClickHouse/issues/66332): Do not raise a NOT_IMPLEMENTED error when getting s3 metrics with a multiple disk configuration. [#65403](https://github.com/ClickHouse/ClickHouse/pull/65403) ([Elena Torró](https://github.com/elenatorro)). +* Backported in [#66142](https://github.com/ClickHouse/ClickHouse/issues/66142): Fix flaky test_storage_s3_queue tests. [#66009](https://github.com/ClickHouse/ClickHouse/pull/66009) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#66389](https://github.com/ClickHouse/ClickHouse/issues/66389): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)). +* Backported in [#66428](https://github.com/ClickHouse/ClickHouse/issues/66428): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#66546](https://github.com/ClickHouse/ClickHouse/issues/66546): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#66861](https://github.com/ClickHouse/ClickHouse/issues/66861): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)). +* Backported in [#66877](https://github.com/ClickHouse/ClickHouse/issues/66877): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)). +* Backported in [#67061](https://github.com/ClickHouse/ClickHouse/issues/67061): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)). +* Backported in [#66940](https://github.com/ClickHouse/ClickHouse/issues/66940): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67254](https://github.com/ClickHouse/ClickHouse/issues/67254): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)). +* Backported in [#67414](https://github.com/ClickHouse/ClickHouse/issues/67414): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)). +* Update version after release. [#67909](https://github.com/ClickHouse/ClickHouse/pull/67909) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68079](https://github.com/ClickHouse/ClickHouse/issues/68079): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8556375d543..cc168f58862 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -3,9 +3,11 @@ v24.7.3.42-stable 2024-08-08 v24.7.2.13-stable 2024-08-01 v24.7.1.2915-stable 2024-07-30 v24.6.3.95-stable 2024-08-06 +v24.6.3.38-stable 2024-08-22 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.5.78-stable 2024-08-05 +v24.5.5.41-stable 2024-08-22 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 @@ -14,6 +16,7 @@ v24.4.4.113-stable 2024-08-02 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.9.5-lts 2024-08-22 v24.3.8.13-lts 2024-08-20 v24.3.7.30-lts 2024-08-14 v24.3.6.48-lts 2024-08-02 From 0b9c24f31d548c87deca3334282c14fc78a295ba Mon Sep 17 00:00:00 2001 From: Michael Stetsyuk Date: Thu, 15 Aug 2024 12:09:50 +0000 Subject: [PATCH 339/363] write metadata to disk and keeper in the same format --- src/Storages/ColumnsDescription.cpp | 30 +++++--- src/Storages/ColumnsDescription.h | 6 +- .../__init__.py | 0 .../config/enable_keeper.xml | 26 +++++++ .../config/users.xml | 8 +++ .../test.py | 71 +++++++++++++++++++ 6 files changed, 128 insertions(+), 13 deletions(-) create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/__init__.py create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml create mode 100644 tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 0d724245b49..0212bbd6fff 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -113,7 +113,15 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const && ast_to_str(ttl) == ast_to_str(other.ttl); } -void ColumnDescription::writeText(WriteBuffer & buf) const +String formatASTStateAware(IAST & ast, IAST::FormatState & state) +{ + WriteBufferFromOwnString buf; + IAST::FormatSettings settings(buf, true, false); + ast.formatImpl(settings, state, IAST::FormatStateStacked()); + return buf.str(); +} + +void ColumnDescription::writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const { /// NOTE: Serialization format is insane. @@ -126,20 +134,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const writeChar('\t', buf); DB::writeText(DB::toString(default_desc.kind), buf); writeChar('\t', buf); - writeEscapedString(queryToString(default_desc.expression), buf); + writeEscapedString(formatASTStateAware(*default_desc.expression, state), buf); } - if (!comment.empty()) + if (!comment.empty() && include_comment) { writeChar('\t', buf); DB::writeText("COMMENT ", buf); - writeEscapedString(queryToString(ASTLiteral(Field(comment))), buf); + auto ast = ASTLiteral(Field(comment)); + writeEscapedString(formatASTStateAware(ast, state), buf); } if (codec) { writeChar('\t', buf); - writeEscapedString(queryToString(codec), buf); + writeEscapedString(formatASTStateAware(*codec, state), buf); } if (!settings.empty()) @@ -150,21 +159,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const ASTSetQuery ast; ast.is_standalone = false; ast.changes = settings; - writeEscapedString(queryToString(ast), buf); + writeEscapedString(formatASTStateAware(ast, state), buf); DB::writeText(")", buf); } if (!statistics.empty()) { writeChar('\t', buf); - writeEscapedString(queryToString(statistics.getAST()), buf); + writeEscapedString(formatASTStateAware(*statistics.getAST(), state), buf); } if (ttl) { writeChar('\t', buf); DB::writeText("TTL ", buf); - writeEscapedString(queryToString(ttl), buf); + writeEscapedString(formatASTStateAware(*ttl, state), buf); } writeChar('\n', buf); @@ -895,16 +904,17 @@ void ColumnsDescription::resetColumnTTLs() } -String ColumnsDescription::toString() const +String ColumnsDescription::toString(bool include_comments) const { WriteBufferFromOwnString buf; + IAST::FormatState ast_format_state; writeCString("columns format version: 1\n", buf); DB::writeText(columns.size(), buf); writeCString(" columns:\n", buf); for (const ColumnDescription & column : columns) - column.writeText(buf); + column.writeText(buf, ast_format_state, include_comments); return buf.str(); } diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index f0760160f0a..c89c26501e8 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -104,7 +104,7 @@ struct ColumnDescription bool operator==(const ColumnDescription & other) const; bool operator!=(const ColumnDescription & other) const { return !(*this == other); } - void writeText(WriteBuffer & buf) const; + void writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const; void readText(ReadBuffer & buf); }; @@ -137,7 +137,7 @@ public: /// NOTE Must correspond with Nested::flatten function. void flattenNested(); /// TODO: remove, insert already flattened Nested columns. - bool operator==(const ColumnsDescription & other) const { return columns == other.columns; } + bool operator==(const ColumnsDescription & other) const { return toString(false) == other.toString(false); } bool operator!=(const ColumnsDescription & other) const { return !(*this == other); } auto begin() const { return columns.begin(); } @@ -221,7 +221,7 @@ public: /// Does column has non default specified compression codec bool hasCompressionCodec(const String & column_name) const; - String toString() const; + String toString(bool include_comments = true) const; static ColumnsDescription parse(const String & str); size_t size() const diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/__init__.py b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml new file mode 100644 index 00000000000..4ca4f604ec3 --- /dev/null +++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml @@ -0,0 +1,26 @@ + + + 2181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + 20000 + + + + 1 + localhost + 9444 + + + + + + + localhost + 2181 + + 20000 + + \ No newline at end of file diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml new file mode 100644 index 00000000000..c5de0b6819c --- /dev/null +++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml @@ -0,0 +1,8 @@ + + + + default + + + + \ No newline at end of file diff --git a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py new file mode 100644 index 00000000000..e0c15e18c23 --- /dev/null +++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py @@ -0,0 +1,71 @@ +import pytest +import random +import string + +from helpers.cluster import ClickHouseCluster + + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=[ + "config/enable_keeper.xml", + "config/users.xml", + ], + stay_alive=True, + with_minio=True, + macros={"shard": 1, "replica": 1}, +) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def randomize_table_name(table_name, random_suffix_length=10): + letters = string.ascii_letters + string.digits + return f"{table_name}_{''.join(random.choice(letters) for _ in range(random_suffix_length))}" + + +@pytest.mark.parametrize("engine", ["ReplicatedMergeTree"]) +def test_aliases_in_default_expr_not_break_table_structure(start_cluster, engine): + """ + Making sure that using aliases in columns' default expressions does not lead to having different columns metadata in ZooKeeper and on disk. + Issue: https://github.com/ClickHouse/clickhouse-private/issues/5150 + """ + + data = '{"event": {"col1-key": "col1-val", "col2-key": "col2-val"}}' + + table_name = randomize_table_name("t") + + node.query( + f""" + DROP TABLE IF EXISTS {table_name}; + CREATE TABLE {table_name} + ( + `data` String, + `col1` String DEFAULT JSONExtractString(JSONExtractString(data, 'event') AS event, 'col1-key'), + `col2` String MATERIALIZED JSONExtractString(JSONExtractString(data, 'event') AS event, 'col2-key') + ) + ENGINE = {engine}('/test/{table_name}', '{{replica}}') + ORDER BY col1 + """ + ) + + node.restart_clickhouse() + + node.query( + f""" + INSERT INTO {table_name} (data) VALUES ('{data}'); + """ + ) + assert node.query(f"SELECT data FROM {table_name}").strip() == data + assert node.query(f"SELECT col1 FROM {table_name}").strip() == "col1-val" + assert node.query(f"SELECT col2 FROM {table_name}").strip() == "col2-val" + + node.query(f"DROP TABLE {table_name}") From 859d2bfe273f571458be6f007761bc8c743d589a Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 22 Aug 2024 17:18:06 +0200 Subject: [PATCH 340/363] move stopFlushThread to SystemLogBase --- src/Common/SystemLogBase.cpp | 19 +++++++++++++++++++ src/Common/SystemLogBase.h | 2 ++ src/Interpreters/PeriodicLog.cpp | 6 +++--- src/Interpreters/PeriodicLog.h | 2 +- src/Interpreters/SystemLog.cpp | 21 +-------------------- src/Interpreters/SystemLog.h | 7 +------ 6 files changed, 27 insertions(+), 30 deletions(-) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 127c8862a35..45f4eb1c5a6 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -273,6 +273,25 @@ void SystemLogBase::startup() saving_thread = std::make_unique([this] { savingThreadFunction(); }); } +template +void SystemLogBase::stopFlushThread() +{ + { + std::lock_guard lock(thread_mutex); + + if (!saving_thread || !saving_thread->joinable()) + return; + + if (is_shutdown) + return; + + is_shutdown = true; + queue->shutdown(); + } + + saving_thread->join(); +} + template void SystemLogBase::add(LogElement element) { diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index 0d7b04d5c57..0942e920a42 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -216,6 +216,8 @@ public: static consteval bool shouldTurnOffLogger() { return false; } protected: + void stopFlushThread() final; + std::shared_ptr> queue; }; } diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp index 15970ca5b81..1b285aad3ff 100644 --- a/src/Interpreters/PeriodicLog.cpp +++ b/src/Interpreters/PeriodicLog.cpp @@ -11,7 +11,7 @@ void PeriodicLog::startCollect(size_t collect_interval_milliseconds_ { collect_interval_milliseconds = collect_interval_milliseconds_; is_shutdown_metric_thread = false; - flush_thread = std::make_unique([this] { threadFunction(); }); + collecting_thread = std::make_unique([this] { threadFunction(); }); } template @@ -20,8 +20,8 @@ void PeriodicLog::stopCollect() bool old_val = false; if (!is_shutdown_metric_thread.compare_exchange_strong(old_val, true)) return; - if (flush_thread) - flush_thread->join(); + if (collecting_thread) + collecting_thread->join(); } template diff --git a/src/Interpreters/PeriodicLog.h b/src/Interpreters/PeriodicLog.h index ceac8088d40..8254a02434a 100644 --- a/src/Interpreters/PeriodicLog.h +++ b/src/Interpreters/PeriodicLog.h @@ -36,7 +36,7 @@ protected: private: void threadFunction(); - std::unique_ptr flush_thread; + std::unique_ptr collecting_thread; size_t collect_interval_milliseconds; std::atomic is_shutdown_metric_thread{false}; }; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 832c39bfaf8..6a3ec197c6e 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -402,32 +402,13 @@ SystemLog::SystemLog( template void SystemLog::shutdown() { - stopFlushThread(); + Base::stopFlushThread(); auto table = DatabaseCatalog::instance().tryGetTable(table_id, getContext()); if (table) table->flushAndShutdown(); } -template -void SystemLog::stopFlushThread() -{ - { - std::lock_guard lock(thread_mutex); - - if (!saving_thread || !saving_thread->joinable()) - return; - - if (is_shutdown) - return; - - is_shutdown = true; - queue->shutdown(); - } - - saving_thread->join(); -} - template void SystemLog::savingThreadFunction() diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 9e1af3578bd..31652c1af67 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -125,8 +125,6 @@ public: void shutdown() override; - void stopFlushThread() override; - /** Creates new table if it does not exist. * Renames old table if its structure is not suitable. * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. @@ -136,10 +134,7 @@ public: protected: LoggerPtr log; - using ISystemLog::is_shutdown; - using ISystemLog::saving_thread; - using ISystemLog::thread_mutex; - using Base::queue; + using Base::queue; StoragePtr getStorage() const; From 51fbc629c6dff4653e687228b0507947516072bb Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 22 Aug 2024 15:42:17 +0000 Subject: [PATCH 341/363] Update version_date.tsv and changelogs after v24.7.3.47-stable --- docs/changelogs/v24.7.3.47-stable.md | 55 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 4 ++ 2 files changed, 59 insertions(+) create mode 100644 docs/changelogs/v24.7.3.47-stable.md diff --git a/docs/changelogs/v24.7.3.47-stable.md b/docs/changelogs/v24.7.3.47-stable.md new file mode 100644 index 00000000000..e5f23a70fe1 --- /dev/null +++ b/docs/changelogs/v24.7.3.47-stable.md @@ -0,0 +1,55 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.7.3.47-stable (2e50fe27a14) FIXME as compared to v24.7.2.13-stable (6e41f601b2f) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#68232](https://github.com/ClickHouse/ClickHouse/issues/68232): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)). +* Backported in [#67969](https://github.com/ClickHouse/ClickHouse/issues/67969): Fixed reading of subcolumns after `ALTER ADD COLUMN` query. [#66243](https://github.com/ClickHouse/ClickHouse/pull/66243) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68068](https://github.com/ClickHouse/ClickHouse/issues/68068): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#67637](https://github.com/ClickHouse/ClickHouse/issues/67637): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)). +* Backported in [#67820](https://github.com/ClickHouse/ClickHouse/issues/67820): Fix possible deadlock on query cancel with parallel replicas. [#66905](https://github.com/ClickHouse/ClickHouse/pull/66905) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67818](https://github.com/ClickHouse/ClickHouse/issues/67818): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67766](https://github.com/ClickHouse/ClickHouse/issues/67766): Fix crash of `uniq` and `uniqTheta ` with `tuple()` argument. Closes [#67303](https://github.com/ClickHouse/ClickHouse/issues/67303). [#67306](https://github.com/ClickHouse/ClickHouse/pull/67306) ([flynn](https://github.com/ucasfl)). +* Backported in [#67881](https://github.com/ClickHouse/ClickHouse/issues/67881): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#68613](https://github.com/ClickHouse/ClickHouse/issues/68613): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#67854](https://github.com/ClickHouse/ClickHouse/issues/67854): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68278](https://github.com/ClickHouse/ClickHouse/issues/68278): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68040](https://github.com/ClickHouse/ClickHouse/issues/68040): Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#68038](https://github.com/ClickHouse/ClickHouse/issues/68038): Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)). +* Backported in [#67713](https://github.com/ClickHouse/ClickHouse/issues/67713): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67840](https://github.com/ClickHouse/ClickHouse/issues/67840): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#67995](https://github.com/ClickHouse/ClickHouse/issues/67995): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68224](https://github.com/ClickHouse/ClickHouse/issues/68224): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68095](https://github.com/ClickHouse/ClickHouse/issues/68095): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68126](https://github.com/ClickHouse/ClickHouse/issues/68126): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68223](https://github.com/ClickHouse/ClickHouse/issues/68223): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Backported in [#68175](https://github.com/ClickHouse/ClickHouse/issues/68175): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68341](https://github.com/ClickHouse/ClickHouse/issues/68341): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68398](https://github.com/ClickHouse/ClickHouse/issues/68398): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#68669](https://github.com/ClickHouse/ClickHouse/issues/68669): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#67518](https://github.com/ClickHouse/ClickHouse/issues/67518): Split slow test 03036_dynamic_read_subcolumns. [#66954](https://github.com/ClickHouse/ClickHouse/pull/66954) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67516](https://github.com/ClickHouse/ClickHouse/issues/67516): Split 01508_partition_pruning_long. [#66983](https://github.com/ClickHouse/ClickHouse/pull/66983) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67529](https://github.com/ClickHouse/ClickHouse/issues/67529): Reduce max time of 00763_long_lock_buffer_alter_destination_table. [#67185](https://github.com/ClickHouse/ClickHouse/pull/67185) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#67803](https://github.com/ClickHouse/ClickHouse/issues/67803): Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql. [#67359](https://github.com/ClickHouse/ClickHouse/pull/67359) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67643](https://github.com/ClickHouse/ClickHouse/issues/67643): [Green CI] Fix potentially flaky test_mask_sensitive_info integration test. [#67506](https://github.com/ClickHouse/ClickHouse/pull/67506) ([Alexey Katsman](https://github.com/alexkats)). +* Backported in [#67609](https://github.com/ClickHouse/ClickHouse/issues/67609): Fix test_zookeeper_config_load_balancing after adding the xdist worker name to the instance. [#67590](https://github.com/ClickHouse/ClickHouse/pull/67590) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#67871](https://github.com/ClickHouse/ClickHouse/issues/67871): Fix 02434_cancel_insert_when_client_dies. [#67600](https://github.com/ClickHouse/ClickHouse/pull/67600) ([vdimir](https://github.com/vdimir)). +* Backported in [#67704](https://github.com/ClickHouse/ClickHouse/issues/67704): Fix 02910_bad_logs_level_in_local in fast tests. [#67603](https://github.com/ClickHouse/ClickHouse/pull/67603) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#67689](https://github.com/ClickHouse/ClickHouse/issues/67689): Fix 01605_adaptive_granularity_block_borders. [#67605](https://github.com/ClickHouse/ClickHouse/pull/67605) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67827](https://github.com/ClickHouse/ClickHouse/issues/67827): Try fix 03143_asof_join_ddb_long. [#67620](https://github.com/ClickHouse/ClickHouse/pull/67620) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#67892](https://github.com/ClickHouse/ClickHouse/issues/67892): Revert "Merge pull request [#66510](https://github.com/ClickHouse/ClickHouse/issues/66510) from canhld94/fix_trivial_count_non_deterministic_func". [#67800](https://github.com/ClickHouse/ClickHouse/pull/67800) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68081](https://github.com/ClickHouse/ClickHouse/issues/68081): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). +* Update version after release. [#68044](https://github.com/ClickHouse/ClickHouse/pull/68044) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68269](https://github.com/ClickHouse/ClickHouse/issues/68269): [Green CI] Fix test 01903_correct_block_size_prediction_with_default. [#68203](https://github.com/ClickHouse/ClickHouse/pull/68203) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#68432](https://github.com/ClickHouse/ClickHouse/issues/68432): tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68538](https://github.com/ClickHouse/ClickHouse/issues/68538): CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)). +* Backported in [#68555](https://github.com/ClickHouse/ClickHouse/issues/68555): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8556375d543..6ef5ace4ba6 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,11 +1,14 @@ v24.8.1.2684-lts 2024-08-21 +v24.7.3.47-stable 2024-08-22 v24.7.3.42-stable 2024-08-08 v24.7.2.13-stable 2024-08-01 v24.7.1.2915-stable 2024-07-30 v24.6.3.95-stable 2024-08-06 +v24.6.3.38-stable 2024-08-22 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.5.78-stable 2024-08-05 +v24.5.5.41-stable 2024-08-22 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 @@ -14,6 +17,7 @@ v24.4.4.113-stable 2024-08-02 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.9.5-lts 2024-08-22 v24.3.8.13-lts 2024-08-20 v24.3.7.30-lts 2024-08-14 v24.3.6.48-lts 2024-08-02 From 5f61e193401c5fa46db03542cb88ba4188ed00e9 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 17:46:47 +0200 Subject: [PATCH 342/363] small fixes --- docs/ru/getting-started/install.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 4a0ec258c64..5bce41ec07a 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -22,7 +22,7 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su ### Из deb-пакетов {#install-from-deb-packages} -Яндекс рекомендует использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните: +Рекомендуется использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните: ``` bash sudo apt-get install -y apt-transport-https ca-certificates curl gnupg @@ -55,7 +55,7 @@ clickhouse-client # or "clickhouse-client --password" if you've set up a passwor ::: ### Из rpm-пакетов {#from-rpm-packages} -Команда ClickHouse в Яндексе рекомендует использовать официальные предкомпилированные `rpm`-пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm. +Команда ClickHouse рекомендует использовать официальные предкомпилированные `rpm`-пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm. #### Установка официального репозитория @@ -102,7 +102,7 @@ sudo yum install clickhouse-server clickhouse-client ### Из tgz-архивов {#from-tgz-archives} -Команда ClickHouse в Яндексе рекомендует использовать предкомпилированные бинарники из `tgz`-архивов для всех дистрибутивов, где невозможна установка `deb`- и `rpm`- пакетов. +Команда ClickHouse рекомендует использовать предкомпилированные бинарники из `tgz`-архивов для всех дистрибутивов, где невозможна установка `deb`- и `rpm`- пакетов. Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://packages.clickhouse.com/tgz/. После этого архивы нужно распаковать и воспользоваться скриптами установки. Пример установки самой свежей версии: From 980b02bfd67defbbdf78165e8225fb754d722d7a Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 17:48:57 +0200 Subject: [PATCH 343/363] fix compatibility with en version --- docs/ru/getting-started/install.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 5bce41ec07a..f8a660fbec9 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -31,9 +31,17 @@ curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | s echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \ /etc/apt/sources.list.d/clickhouse.list sudo apt-get update +``` +#### Установка ClickHouse server и client + +```bash sudo apt-get install -y clickhouse-server clickhouse-client +``` +#### Запуск ClickHouse server + +```bash sudo service clickhouse-server start clickhouse-client # or "clickhouse-client --password" if you've set up a password. ``` From 7c3a013d56c1dbd5b72f04f6be61f007004aaefa Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Thu, 22 Aug 2024 16:53:30 +0100 Subject: [PATCH 344/363] Update newjson.md --- docs/en/sql-reference/data-types/newjson.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/newjson.md b/docs/en/sql-reference/data-types/newjson.md index 9e43216df6c..f7fc7e1498e 100644 --- a/docs/en/sql-reference/data-types/newjson.md +++ b/docs/en/sql-reference/data-types/newjson.md @@ -70,7 +70,7 @@ SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON as json └────────────────────────────────────────────────┘ ``` -CAST from named `Tuple`, `Map` and `Object('json')` to `JSON` type will be supported later. +CAST from `JSON`, named `Tuple`, `Map` and `Object('json')` to `JSON` type will be supported later. ## Reading JSON paths as subcolumns From 28fbd8a4eff4eafa7db99eb37e38376ffda11763 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 17:56:16 +0200 Subject: [PATCH 345/363] fix stateless tests --- .../queries/0_stateless/03203_hive_style_partitioning.reference | 2 -- tests/queries/0_stateless/03203_hive_style_partitioning.sh | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference index acdadc2510b..a9d856babce 100644 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference @@ -34,8 +34,6 @@ Cross Elizabeth Array(Int64) LowCardinality(Float64) 101 2070 -4081 -2070 2070 b 1 diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh index b3d196924af..6734c5f14ad 100755 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh @@ -32,7 +32,7 @@ SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMI $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 0; -SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10; +SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10; """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER" From 9c0e1df1663dd5c56066dd615fc3cafe6408d308 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 22 Aug 2024 17:58:15 +0200 Subject: [PATCH 346/363] Fix flaky test 00989_parallel_parts_loading --- tests/queries/0_stateless/00989_parallel_parts_loading.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/00989_parallel_parts_loading.sql b/tests/queries/0_stateless/00989_parallel_parts_loading.sql index 407e124f137..dc074241ff6 100644 --- a/tests/queries/0_stateless/00989_parallel_parts_loading.sql +++ b/tests/queries/0_stateless/00989_parallel_parts_loading.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings, no-random-merge-tree-settings +-- small insert block size can make insert terribly slow, especially with some build like msan DROP TABLE IF EXISTS mt; CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS parts_to_delay_insert = 100000, parts_to_throw_insert = 100000; From 0bd8ebf62616ce882b0ebc46945c837a5a91ba44 Mon Sep 17 00:00:00 2001 From: Tyler Hannan Date: Thu, 22 Aug 2024 17:58:56 +0200 Subject: [PATCH 347/363] Update README.md adding community call. resolving recent recordings --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c9474ef0fc0..9099fd48659 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ curl https://clickhouse.com/ | sh Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. -* [v24.8 Community Call](https://clickhouse.com/company/events/v24-8-community-release-call) - August 20 +* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 266 ## Upcoming Events @@ -58,7 +58,7 @@ Other upcoming meetups ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" -* **Recording available**: [**v24.4 Release Call**](https://www.youtube.com/watch?v=dtUqgcfOGmE) All the features of 24.4, one convenient video! Watch it now! +* **Recording available**: [**v24.8 LTS Release Call**](https://www.youtube.com/watch?v=AeLmp2jc51k) All the features of 24.8 LTS, one convenient video! Watch it now! ## Interested in joining ClickHouse and making it your full-time job? From 52cdd88eb6d7bbb5d395dd80445655ad47c83c92 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 22 Aug 2024 17:59:10 +0200 Subject: [PATCH 348/363] Better comment --- tests/queries/0_stateless/00989_parallel_parts_loading.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00989_parallel_parts_loading.sql b/tests/queries/0_stateless/00989_parallel_parts_loading.sql index dc074241ff6..3b73e6a0e3c 100644 --- a/tests/queries/0_stateless/00989_parallel_parts_loading.sql +++ b/tests/queries/0_stateless/00989_parallel_parts_loading.sql @@ -1,5 +1,5 @@ -- Tags: no-random-settings, no-random-merge-tree-settings --- small insert block size can make insert terribly slow, especially with some build like msan +-- small number of insert threads can make insert terribly slow, especially with some build like msan DROP TABLE IF EXISTS mt; CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS parts_to_delay_insert = 100000, parts_to_throw_insert = 100000; From e7b89537bf1bb760c6082f04de4668bd1c00f33a Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 22 Aug 2024 18:02:42 +0200 Subject: [PATCH 349/363] fix style --- src/Interpreters/PeriodicLog.cpp | 1 - src/Interpreters/SystemLog.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Interpreters/PeriodicLog.cpp b/src/Interpreters/PeriodicLog.cpp index 1b285aad3ff..22bc14856c4 100644 --- a/src/Interpreters/PeriodicLog.cpp +++ b/src/Interpreters/PeriodicLog.cpp @@ -1,7 +1,6 @@ #include #include #include -#include "Functions/DateTimeTransforms.h" namespace DB { diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 31652c1af67..c03f9370068 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -134,7 +134,7 @@ public: protected: LoggerPtr log; - using Base::queue; + using Base::queue; StoragePtr getStorage() const; From 1692360233593e635c5a7797847bdfd8a0ffa33e Mon Sep 17 00:00:00 2001 From: Tyler Hannan Date: Thu, 22 Aug 2024 18:12:38 +0200 Subject: [PATCH 350/363] Update README.md 26 and 266 are different --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9099fd48659..83a5c05c667 100644 --- a/README.md +++ b/README.md @@ -34,7 +34,7 @@ curl https://clickhouse.com/ | sh Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. -* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 266 +* [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 26 ## Upcoming Events From 4264fbc037accedecebcd8122910e4406e92cd58 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 22 Aug 2024 16:16:47 +0000 Subject: [PATCH 351/363] Update version_date.tsv and changelogs after v24.8.2.3-lts --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v24.8.2.3-lts.md | 12 ++++++++++++ utils/list-versions/version_date.tsv | 5 +++++ 5 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v24.8.2.3-lts.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index fc93cee5bbc..6ff7ea43374 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.8.1.2684" +ARG VERSION="24.8.2.3" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 3ceaf2a08b4..c87885d3b49 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.8.1.2684" +ARG VERSION="24.8.2.3" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 76db997821c..6ccf74823e2 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.8.1.2684" +ARG VERSION="24.8.2.3" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" #docker-official-library:off diff --git a/docs/changelogs/v24.8.2.3-lts.md b/docs/changelogs/v24.8.2.3-lts.md new file mode 100644 index 00000000000..69dfc9961a2 --- /dev/null +++ b/docs/changelogs/v24.8.2.3-lts.md @@ -0,0 +1,12 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.8.2.3-lts (b54f79ed323) FIXME as compared to v24.8.1.2684-lts (161c62fd295) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#68670](https://github.com/ClickHouse/ClickHouse/issues/68670): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8556375d543..199c4f822f4 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,11 +1,15 @@ +v24.8.2.3-lts 2024-08-22 v24.8.1.2684-lts 2024-08-21 +v24.7.3.47-stable 2024-08-22 v24.7.3.42-stable 2024-08-08 v24.7.2.13-stable 2024-08-01 v24.7.1.2915-stable 2024-07-30 v24.6.3.95-stable 2024-08-06 +v24.6.3.38-stable 2024-08-22 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.5.78-stable 2024-08-05 +v24.5.5.41-stable 2024-08-22 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 @@ -14,6 +18,7 @@ v24.4.4.113-stable 2024-08-02 v24.4.3.25-stable 2024-06-14 v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 +v24.3.9.5-lts 2024-08-22 v24.3.8.13-lts 2024-08-20 v24.3.7.30-lts 2024-08-14 v24.3.6.48-lts 2024-08-02 From fa453c3664b18da7a6945e662b881f80fedadc5b Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 22 Aug 2024 18:13:45 +0200 Subject: [PATCH 352/363] Disable SqlLogic job --- tests/ci/ci_config.py | 7 ++++--- tests/ci/ci_definitions.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 58de25f039f..0885f1d9ec2 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -498,9 +498,10 @@ class CI: JobNames.SQLANCER_DEBUG: CommonJobConfigs.SQLLANCER_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], ), - JobNames.SQL_LOGIC_TEST: CommonJobConfigs.SQLLOGIC_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE], - ), + # TODO: job does not work at all, uncomment and fix + # JobNames.SQL_LOGIC_TEST: CommonJobConfigs.SQLLOGIC_TEST.with_properties( + # required_builds=[BuildNames.PACKAGE_RELEASE], + # ), JobNames.SQLTEST: CommonJobConfigs.SQL_TEST.with_properties( required_builds=[BuildNames.PACKAGE_RELEASE], ), diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 1cdb3f1487e..9d95a19790f 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -204,7 +204,7 @@ class JobNames(metaclass=WithIter): PERFORMANCE_TEST_AMD64 = "Performance Comparison (release)" PERFORMANCE_TEST_ARM64 = "Performance Comparison (aarch64)" - SQL_LOGIC_TEST = "Sqllogic test (release)" + # SQL_LOGIC_TEST = "Sqllogic test (release)" SQLANCER = "SQLancer (release)" SQLANCER_DEBUG = "SQLancer (debug)" From 06c46ee75bcb94fe02ac68df6a4a044145804d76 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 18:56:50 +0200 Subject: [PATCH 353/363] add one more test --- .../0_stateless/03203_hive_style_partitioning.reference | 1 + tests/queries/0_stateless/03203_hive_style_partitioning.sh | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.reference b/tests/queries/0_stateless/03203_hive_style_partitioning.reference index a9d856babce..0fbc1fb556e 100644 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference @@ -37,6 +37,7 @@ Array(Int64) LowCardinality(Float64) 2070 b 1 +1 TESTING THE URL PARTITIONING last Elizabeth Frank Elizabeth diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh index 6734c5f14ad..8ab18f5edfe 100755 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh @@ -29,6 +29,12 @@ SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.c SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1; """ +$CLICKHOUSE_LOCAL -n -q """ +set use_hive_partitioning = 1; + +SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10; +""" 2>&1 | grep -c "INCORRECT_DATA" + $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 0; From f89193fa416cc333f549d72bb8ba453907edc951 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 22 Aug 2024 19:12:19 +0000 Subject: [PATCH 354/363] Update version_date.tsv and changelogs after v24.5.5.41-stable --- utils/list-versions/version_date.tsv | 2 -- 1 file changed, 2 deletions(-) diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 199c4f822f4..0e25f8d3b62 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,11 +1,9 @@ v24.8.2.3-lts 2024-08-22 v24.8.1.2684-lts 2024-08-21 -v24.7.3.47-stable 2024-08-22 v24.7.3.42-stable 2024-08-08 v24.7.2.13-stable 2024-08-01 v24.7.1.2915-stable 2024-07-30 v24.6.3.95-stable 2024-08-06 -v24.6.3.38-stable 2024-08-22 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 v24.5.5.78-stable 2024-08-05 From 4200b3d5cbbfe065073c40f1e122c44189f3554f Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 22 Aug 2024 14:02:25 +0200 Subject: [PATCH 355/363] CI: Stress test fix --- tests/clickhouse-test | 2 +- tests/docker_scripts/stress_runner.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 4f9380d6f20..ad6173065fe 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -3567,7 +3567,7 @@ if __name__ == "__main__": f"Cannot access the specified directory with queries ({args.queries})", file=sys.stderr, ) - sys.exit(1) + assert False, "No --queries provided" CAPTURE_CLIENT_STACKTRACE = args.capture_client_stacktrace diff --git a/tests/docker_scripts/stress_runner.sh b/tests/docker_scripts/stress_runner.sh index 7666398e10b..039c60c8e4e 100755 --- a/tests/docker_scripts/stress_runner.sh +++ b/tests/docker_scripts/stress_runner.sh @@ -10,8 +10,7 @@ dmesg --clear # shellcheck disable=SC1091 source /setup_export_logs.sh -ln -s /repo/tests/clickhouse-test/ci/stress.py /usr/bin/stress -ln -s /repo/tests/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test +ln -s /repo/tests/clickhouse-test /usr/bin/clickhouse-test # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib @@ -266,6 +265,7 @@ fi start_server +cd /repo/tests/ || exit 1 # clickhouse-test can find queries dir from there python3 /repo/tests/ci/stress.py --hung-check --drop-databases --output-folder /test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \ || echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv From dc862b1411884a462bba8dcf86a474ccbe57e380 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 22 Aug 2024 23:40:18 +0200 Subject: [PATCH 356/363] fix test --- tests/queries/0_stateless/03203_hive_style_partitioning.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03203_hive_style_partitioning.sh b/tests/queries/0_stateless/03203_hive_style_partitioning.sh index 8ab18f5edfe..60e8a6e9faa 100755 --- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh +++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh @@ -32,7 +32,7 @@ SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMI $CLICKHOUSE_LOCAL -n -q """ set use_hive_partitioning = 1; -SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10; +SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10; """ 2>&1 | grep -c "INCORRECT_DATA" $CLICKHOUSE_LOCAL -n -q """ From 4c790999eb6ad74e3a8f99c072dcc12c956a63d8 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 23 Aug 2024 02:18:26 +0200 Subject: [PATCH 357/363] CI: Force package_debug build on release branches --- .github/workflows/release_branches.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 82826794ea3..ec119b6ff95 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -130,6 +130,7 @@ jobs: with: build_name: package_debug data: ${{ needs.RunConfig.outputs.data }} + force: true BuilderBinDarwin: needs: [RunConfig, BuildDockers] if: ${{ !failure() && !cancelled() }} From f5739dfe06db8610818fafb5c3a2c33f59fd0a8d Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 23 Aug 2024 02:51:27 +0200 Subject: [PATCH 358/363] CI: Make job rerun possible if triggered manually --- tests/ci/ci.py | 7 +++++-- tests/ci/ci_utils.py | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index a9ae078b449..d201b6602f5 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -333,7 +333,10 @@ def _pre_action(s3, job_name, batch, indata, pr_info): CI.JobNames.BUILD_CHECK, ): # we might want to rerun build report job rerun_helper = RerunHelper(commit, _get_ext_check_name(job_name)) - if rerun_helper.is_already_finished_by_status(): + if ( + rerun_helper.is_already_finished_by_status() + and not Utils.is_job_triggered_manually() + ): print("WARNING: Rerunning job with GH status ") status = rerun_helper.get_finished_status() assert status @@ -344,7 +347,7 @@ def _pre_action(s3, job_name, batch, indata, pr_info): skip_status = status.state # ci cache check - if not to_be_skipped and not no_cache: + if not to_be_skipped and not no_cache and not Utils.is_job_triggered_manually(): ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update() job_config = CI.get_job_config(job_name) if ci_cache.is_successful( diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index a4c0977f47c..e8d9e7dc254 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -18,6 +18,7 @@ class Envs: ) S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds") GITHUB_WORKFLOW = os.getenv("GITHUB_WORKFLOW", "") + GITHUB_ACTOR = os.getenv("GITHUB_ACTOR", "") class WithIter(type): @@ -282,3 +283,7 @@ class Utils: ): res = res.replace(*r) return res + + @staticmethod + def is_job_triggered_manually(): + return "robot" not in Envs.GITHUB_ACTOR From 60e4bcbbf0b1991b42bcab4b83e55be344e8a659 Mon Sep 17 00:00:00 2001 From: Tanya Bragin Date: Thu, 22 Aug 2024 20:45:28 -0700 Subject: [PATCH 359/363] Update README.md Update Raleigh meetup link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 83a5c05c667..546f08afd3d 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey * [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25 * [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5 -* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/clickhouse-nc-meetup-group/events/302557230) - September 9 +* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9 * [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10 * [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12 From e5380806653f8d391c6e88664b0096c3c51240f5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 23 Aug 2024 07:09:03 +0000 Subject: [PATCH 360/363] Update version_date.tsv and changelogs after v24.5.6.45-stable --- docs/changelogs/v24.5.6.45-stable.md | 33 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 docs/changelogs/v24.5.6.45-stable.md diff --git a/docs/changelogs/v24.5.6.45-stable.md b/docs/changelogs/v24.5.6.45-stable.md new file mode 100644 index 00000000000..b329ebab27b --- /dev/null +++ b/docs/changelogs/v24.5.6.45-stable.md @@ -0,0 +1,33 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.5.6.45-stable (bdca8604c29) FIXME as compared to v24.5.5.78-stable (0138248cb62) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#67902](https://github.com/ClickHouse/ClickHouse/issues/67902): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68252](https://github.com/ClickHouse/ClickHouse/issues/68252): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)). +* Backported in [#68064](https://github.com/ClickHouse/ClickHouse/issues/68064): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#68158](https://github.com/ClickHouse/ClickHouse/issues/68158): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68115](https://github.com/ClickHouse/ClickHouse/issues/68115): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67886](https://github.com/ClickHouse/ClickHouse/issues/67886): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#68272](https://github.com/ClickHouse/ClickHouse/issues/68272): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67807](https://github.com/ClickHouse/ClickHouse/issues/67807): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#67836](https://github.com/ClickHouse/ClickHouse/issues/67836): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#67991](https://github.com/ClickHouse/ClickHouse/issues/67991): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68207](https://github.com/ClickHouse/ClickHouse/issues/68207): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68091](https://github.com/ClickHouse/ClickHouse/issues/68091): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68122](https://github.com/ClickHouse/ClickHouse/issues/68122): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68171](https://github.com/ClickHouse/ClickHouse/issues/68171): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68337](https://github.com/ClickHouse/ClickHouse/issues/68337): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68667](https://github.com/ClickHouse/ClickHouse/issues/68667): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Update version after release. [#67862](https://github.com/ClickHouse/ClickHouse/pull/67862) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68077](https://github.com/ClickHouse/ClickHouse/issues/68077): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). +* Backported in [#68756](https://github.com/ClickHouse/ClickHouse/issues/68756): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 0e25f8d3b62..57a59d7ac49 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -6,8 +6,8 @@ v24.7.1.2915-stable 2024-07-30 v24.6.3.95-stable 2024-08-06 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 +v24.5.6.45-stable 2024-08-23 v24.5.5.78-stable 2024-08-05 -v24.5.5.41-stable 2024-08-22 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 From e1a7bd9163bebf0aeab12d8dd46c729f73b068be Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 23 Aug 2024 07:37:32 +0000 Subject: [PATCH 361/363] Update version_date.tsv and changelogs after v24.6.4.42-stable --- docs/changelogs/v24.6.4.42-stable.md | 33 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 3 ++- 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 docs/changelogs/v24.6.4.42-stable.md diff --git a/docs/changelogs/v24.6.4.42-stable.md b/docs/changelogs/v24.6.4.42-stable.md new file mode 100644 index 00000000000..29b6ba095af --- /dev/null +++ b/docs/changelogs/v24.6.4.42-stable.md @@ -0,0 +1,33 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.6.4.42-stable (c534bb4b4dd) FIXME as compared to v24.6.3.95-stable (8325c920d11) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#68066](https://github.com/ClickHouse/ClickHouse/issues/68066): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#68566](https://github.com/ClickHouse/ClickHouse/issues/68566): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68159](https://github.com/ClickHouse/ClickHouse/issues/68159): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68116](https://github.com/ClickHouse/ClickHouse/issues/68116): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#67887](https://github.com/ClickHouse/ClickHouse/issues/67887): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#68611](https://github.com/ClickHouse/ClickHouse/issues/68611): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68275](https://github.com/ClickHouse/ClickHouse/issues/68275): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#67993](https://github.com/ClickHouse/ClickHouse/issues/67993): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68208](https://github.com/ClickHouse/ClickHouse/issues/68208): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68093](https://github.com/ClickHouse/ClickHouse/issues/68093): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68124](https://github.com/ClickHouse/ClickHouse/issues/68124): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68221](https://github.com/ClickHouse/ClickHouse/issues/68221): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Backported in [#68173](https://github.com/ClickHouse/ClickHouse/issues/68173): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68339](https://github.com/ClickHouse/ClickHouse/issues/68339): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68396](https://github.com/ClickHouse/ClickHouse/issues/68396): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#68668](https://github.com/ClickHouse/ClickHouse/issues/68668): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Update version after release. [#67909](https://github.com/ClickHouse/ClickHouse/pull/67909) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68079](https://github.com/ClickHouse/ClickHouse/issues/68079): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). +* Backported in [#68758](https://github.com/ClickHouse/ClickHouse/issues/68758): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 0e25f8d3b62..8ce510f110d 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -3,11 +3,12 @@ v24.8.1.2684-lts 2024-08-21 v24.7.3.42-stable 2024-08-08 v24.7.2.13-stable 2024-08-01 v24.7.1.2915-stable 2024-07-30 +v24.6.4.42-stable 2024-08-23 v24.6.3.95-stable 2024-08-06 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 +v24.5.6.45-stable 2024-08-23 v24.5.5.78-stable 2024-08-05 -v24.5.5.41-stable 2024-08-22 v24.5.4.49-stable 2024-07-01 v24.5.3.5-stable 2024-06-13 v24.5.2.34-stable 2024-06-13 From eec720dab60ea63b033919bbc4c1f6837920a42d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 23 Aug 2024 08:05:27 +0000 Subject: [PATCH 362/363] Update version_date.tsv and changelogs after v24.7.4.51-stable --- docs/changelogs/v24.7.4.51-stable.md | 36 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 ++ 2 files changed, 38 insertions(+) create mode 100644 docs/changelogs/v24.7.4.51-stable.md diff --git a/docs/changelogs/v24.7.4.51-stable.md b/docs/changelogs/v24.7.4.51-stable.md new file mode 100644 index 00000000000..a7cf9790383 --- /dev/null +++ b/docs/changelogs/v24.7.4.51-stable.md @@ -0,0 +1,36 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.7.4.51-stable (70fe2f6fa52) FIXME as compared to v24.7.3.42-stable (63730bc4293) + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Backported in [#68232](https://github.com/ClickHouse/ClickHouse/issues/68232): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)). +* Backported in [#68068](https://github.com/ClickHouse/ClickHouse/issues/68068): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Backported in [#68613](https://github.com/ClickHouse/ClickHouse/issues/68613): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#68278](https://github.com/ClickHouse/ClickHouse/issues/68278): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68040](https://github.com/ClickHouse/ClickHouse/issues/68040): Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#68038](https://github.com/ClickHouse/ClickHouse/issues/68038): Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)). +* Backported in [#68224](https://github.com/ClickHouse/ClickHouse/issues/68224): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#68095](https://github.com/ClickHouse/ClickHouse/issues/68095): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68126](https://github.com/ClickHouse/ClickHouse/issues/68126): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#68223](https://github.com/ClickHouse/ClickHouse/issues/68223): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Backported in [#68175](https://github.com/ClickHouse/ClickHouse/issues/68175): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Backported in [#68341](https://github.com/ClickHouse/ClickHouse/issues/68341): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68398](https://github.com/ClickHouse/ClickHouse/issues/68398): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#68669](https://github.com/ClickHouse/ClickHouse/issues/68669): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Backported in [#67803](https://github.com/ClickHouse/ClickHouse/issues/67803): Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql. [#67359](https://github.com/ClickHouse/ClickHouse/pull/67359) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68081](https://github.com/ClickHouse/ClickHouse/issues/68081): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). +* Update version after release. [#68044](https://github.com/ClickHouse/ClickHouse/pull/68044) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Backported in [#68269](https://github.com/ClickHouse/ClickHouse/issues/68269): [Green CI] Fix test 01903_correct_block_size_prediction_with_default. [#68203](https://github.com/ClickHouse/ClickHouse/pull/68203) ([Pablo Marcos](https://github.com/pamarcos)). +* Backported in [#68432](https://github.com/ClickHouse/ClickHouse/issues/68432): tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68538](https://github.com/ClickHouse/ClickHouse/issues/68538): CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)). +* Backported in [#68555](https://github.com/ClickHouse/ClickHouse/issues/68555): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)). +* Backported in [#68760](https://github.com/ClickHouse/ClickHouse/issues/68760): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 57a59d7ac49..d9674ed2366 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,8 +1,10 @@ v24.8.2.3-lts 2024-08-22 v24.8.1.2684-lts 2024-08-21 +v24.7.4.51-stable 2024-08-23 v24.7.3.42-stable 2024-08-08 v24.7.2.13-stable 2024-08-01 v24.7.1.2915-stable 2024-07-30 +v24.6.4.42-stable 2024-08-23 v24.6.3.95-stable 2024-08-06 v24.6.2.17-stable 2024-07-05 v24.6.1.4423-stable 2024-07-01 From b0894bffe62722acee2fa5d832ceda9a75754bde Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 23 Aug 2024 12:01:17 +0200 Subject: [PATCH 363/363] change test file location --- .../sample.parquet | Bin 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/{column0=Elizabeth => column0=Elizabeth1}/sample.parquet (100%) diff --git a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet similarity index 100% rename from tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet rename to tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet