From 3fb45ff1762867cbeb53e31cd4492dda5cf8dbb7 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 14 May 2024 19:17:56 +0200 Subject: [PATCH 001/127] Add setting and implementation --- src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + src/Formats/SchemaInferenceUtils.cpp | 44 ++++++++++++++++++- .../0_stateless/03150_infer_type_variant.sql | 4 ++ 5 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03150_infer_type_variant.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4a0de354a03..0c0614550e5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1066,6 +1066,7 @@ class IColumn; M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ + M(Bool, input_format_json_infer_variant_from_multitype_array, false, "Try to infer variant type rather than tuple when column/array has multiple", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 43ccee173f0..e027c693094 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -137,6 +137,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.read_arrays_as_strings = settings.input_format_json_read_arrays_as_strings; format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings; format_settings.json.infer_incomplete_types_as_strings = settings.input_format_json_infer_incomplete_types_as_strings; + format_settings.json.infer_variant_from_multitype_array = settings.input_format_json_infer_variant_from_multitype_array; format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata; format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8; format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index d5fedf99adb..6cac41bd63e 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -223,6 +223,7 @@ struct FormatSettings bool compact_allow_variable_number_of_columns = false; bool try_infer_objects_as_tuples = false; bool infer_incomplete_types_as_strings = true; + bool infer_variant_from_multitype_array = false; bool throw_on_bad_escape_sequence = true; bool ignore_unnecessary_fields = true; } json{}; diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 02c0aa6dd77..fc1a26f9b2f 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -307,6 +308,22 @@ namespace type_indexes.erase(TypeIndex::UInt64); } + /// if setting input_format_json_infer_variant_from_multitype_array is true + /// and nested types are not equal then we convert to type variant. + void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes) + { + auto variant_type = std::make_shared(data_types); + /// replace separate types with a single variant type + data_types.clear(); + type_indexes.clear(); + data_types.push_back(variant_type); + type_indexes.insert(TypeIndex::Variant); + + // push it back again + data_types.push_back(variant_type); + type_indexes.insert(TypeIndex::Variant); + } + /// If we have only Date and DateTime types, convert Date to DateTime, /// otherwise, convert all Date and DateTime to String. void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes) @@ -649,6 +666,12 @@ namespace /// Check settings specific for JSON formats. + if (settings.json.infer_variant_from_multitype_array) + { + transformVariant(data_types, type_indexes); + return; + } + /// Convert numbers inferred from strings back to strings if needed. if (settings.json.try_infer_numbers_from_strings || settings.json.read_numbers_as_strings) transformJSONNumbersBackToString(data_types, settings, type_indexes, json_info); @@ -677,6 +700,12 @@ namespace if constexpr (!is_json) return; + if (settings.json.infer_variant_from_multitype_array) + { + transformVariant(data_types, type_indexes); + return; + } + /// Convert JSON tuples with same nested types to arrays. transformTuplesWithEqualNestedTypesToArrays(data_types, type_indexes); @@ -822,7 +851,6 @@ namespace if (checkIfTypesAreEqual(nested_types_copy)) return std::make_shared(nested_types_copy.back()); - return std::make_shared(nested_types); } else @@ -1482,6 +1510,20 @@ DataTypePtr makeNullableRecursively(DataTypePtr type) return nested_type ? std::make_shared(nested_type) : nullptr; } + if (which.isVariant()) + { + const auto * variant_type = assert_cast(type.get()); + DataTypes nested_types; + for (const auto & nested_type: variant_type->getVariants()) + { + /// unlike tuple or array, here we do not want to make any of the variants nullable + /// so we do not call makeNullableRecursively + nested_types.push_back(nested_type); + } + + return std::make_shared(nested_types); + } + if (which.isTuple()) { const auto * tuple_type = assert_cast(type.get()); diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql new file mode 100644 index 00000000000..ac544d04f6e --- /dev/null +++ b/tests/queries/0_stateless/03150_infer_type_variant.sql @@ -0,0 +1,4 @@ +SET input_format_json_infer_variant_from_multitype_array=1; +SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}'); +SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}'); +SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}'); From 467366af990215e11b4b0309b90b3e6d9ebca5fd Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 14 May 2024 21:55:56 +0200 Subject: [PATCH 002/127] Fix unimplemented serialization error and update reference file --- src/Formats/SchemaInferenceUtils.cpp | 13 ++++++++--- .../03150_infer_type_variant.reference | 22 +++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03150_infer_type_variant.reference diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index fc1a26f9b2f..d0d29892dec 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -319,7 +319,7 @@ namespace data_types.push_back(variant_type); type_indexes.insert(TypeIndex::Variant); - // push it back again + // make the second type variant as well data_types.push_back(variant_type); type_indexes.insert(TypeIndex::Variant); } @@ -669,7 +669,6 @@ namespace if (settings.json.infer_variant_from_multitype_array) { transformVariant(data_types, type_indexes); - return; } /// Convert numbers inferred from strings back to strings if needed. @@ -703,7 +702,6 @@ namespace if (settings.json.infer_variant_from_multitype_array) { transformVariant(data_types, type_indexes); - return; } /// Convert JSON tuples with same nested types to arrays. @@ -1440,6 +1438,15 @@ void transformFinalInferredJSONTypeIfNeededImpl(DataTypePtr & data_type, const F return; } + + if (const auto * variant_type = typeid_cast(data_type.get())) + { + auto nested_types = variant_type->getVariants(); + for (auto & nested_type : nested_types) + transformFinalInferredJSONTypeIfNeededImpl(nested_type, settings, json_info, remain_nothing_types); + data_type = std::make_shared(nested_types); + return; + } } void transformFinalInferredJSONTypeIfNeeded(DataTypePtr & data_type, const FormatSettings & settings, JSONInferenceInfo * json_info) diff --git a/tests/queries/0_stateless/03150_infer_type_variant.reference b/tests/queries/0_stateless/03150_infer_type_variant.reference new file mode 100644 index 00000000000..ffb4209eadb --- /dev/null +++ b/tests/queries/0_stateless/03150_infer_type_variant.reference @@ -0,0 +1,22 @@ + ┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ arr ┃ toTypeName(arr) ┃ + ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple(…│ + │ │… a Int64))) │ + └──────────────────┴─────────────────────────────────────┘ + ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ x ┃ toTypeName(x) ┃ + ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ 42 │ Variant(Int64, String) │ + ├───────┼────────────────────────┤ +2. │ Hello │ Variant(Int64, String) │ + └───────┴────────────────────────┘ + ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ x ┃ toTypeName(x) ┃ + ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ [1,2,3] │ Variant(Array(Int64), Tuple(…│ + │ │… a Int64)) │ + ├─────────┼──────────────────────────────┤ +2. │ (42) │ Variant(Array(Int64), Tuple(…│ + │ │… a Int64)) │ + └─────────┴──────────────────────────────┘ From 4066c6bc548979703f45ba264437f5966c403d6a Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 15 May 2024 02:13:53 +0000 Subject: [PATCH 003/127] Update setting name --- src/Core/Settings.h | 2 +- src/Formats/FormatFactory.cpp | 2 +- src/Formats/FormatSettings.h | 2 +- src/Formats/SchemaInferenceUtils.cpp | 6 +++--- tests/queries/0_stateless/03150_infer_type_variant.sql | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0c0614550e5..ffc337b674f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1066,7 +1066,7 @@ class IColumn; M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ - M(Bool, input_format_json_infer_variant_from_multitype_array, false, "Try to infer variant type rather than tuple when column/array has multiple", 0) \ + M(Bool, input_format_json_infer_variant_from_multi_type_array, false, "Try to infer variant type rather than tuple when column/array has multiple", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index e027c693094..792ac08a5df 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -137,7 +137,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.read_arrays_as_strings = settings.input_format_json_read_arrays_as_strings; format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings; format_settings.json.infer_incomplete_types_as_strings = settings.input_format_json_infer_incomplete_types_as_strings; - format_settings.json.infer_variant_from_multitype_array = settings.input_format_json_infer_variant_from_multitype_array; + format_settings.json.infer_variant_from_multi_type_array = settings.input_format_json_infer_variant_from_multi_type_array; format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata; format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8; format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 6cac41bd63e..d2c75872326 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -223,7 +223,7 @@ struct FormatSettings bool compact_allow_variable_number_of_columns = false; bool try_infer_objects_as_tuples = false; bool infer_incomplete_types_as_strings = true; - bool infer_variant_from_multitype_array = false; + bool infer_variant_from_multi_type_array = false; bool throw_on_bad_escape_sequence = true; bool ignore_unnecessary_fields = true; } json{}; diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index d0d29892dec..f693916c584 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -308,7 +308,7 @@ namespace type_indexes.erase(TypeIndex::UInt64); } - /// if setting input_format_json_infer_variant_from_multitype_array is true + /// if setting input_format_json_infer_variant_from_multi_type_array is true /// and nested types are not equal then we convert to type variant. void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes) { @@ -666,7 +666,7 @@ namespace /// Check settings specific for JSON formats. - if (settings.json.infer_variant_from_multitype_array) + if (settings.json.infer_variant_from_multi_type_array) { transformVariant(data_types, type_indexes); } @@ -699,7 +699,7 @@ namespace if constexpr (!is_json) return; - if (settings.json.infer_variant_from_multitype_array) + if (settings.json.infer_variant_from_multi_type_array) { transformVariant(data_types, type_indexes); } diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql index ac544d04f6e..2ea849248f7 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.sql +++ b/tests/queries/0_stateless/03150_infer_type_variant.sql @@ -1,4 +1,4 @@ -SET input_format_json_infer_variant_from_multitype_array=1; +SET input_format_json_infer_variant_from_multi_type_array=1; SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}'); SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}'); SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}'); From 2762cf86d2ba3f5c1ac86040b6ef484feb40837b Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 15 May 2024 14:19:17 +0200 Subject: [PATCH 004/127] fix test file --- tests/queries/0_stateless/03150_infer_type_variant.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql index 2ea849248f7..ac544d04f6e 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.sql +++ b/tests/queries/0_stateless/03150_infer_type_variant.sql @@ -1,4 +1,4 @@ -SET input_format_json_infer_variant_from_multi_type_array=1; +SET input_format_json_infer_variant_from_multitype_array=1; SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}'); SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}'); SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}'); From 4800aa6a6cdf5a8431fcc3d6fd96672590da0fff Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 15 May 2024 14:21:52 +0200 Subject: [PATCH 005/127] rename setting in test file --- tests/queries/0_stateless/03150_infer_type_variant.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql index ac544d04f6e..2ea849248f7 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.sql +++ b/tests/queries/0_stateless/03150_infer_type_variant.sql @@ -1,4 +1,4 @@ -SET input_format_json_infer_variant_from_multitype_array=1; +SET input_format_json_infer_variant_from_multi_type_array=1; SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}'); SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}'); SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}'); From fbf34519a5e72bb03c57ee6bc0feea1adddcb309 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 15 May 2024 16:03:23 +0200 Subject: [PATCH 006/127] Add setting to SettingsChangesHistory.h --- src/Core/SettingsChangesHistory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index ece48620618..0665d1d6ca6 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -93,6 +93,7 @@ static std::map sett {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, {"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."}, + {"input_format_json_infer_variant_from_multi_type_array", 0, 0, "Allows inference of variant type if columns/arrays have multiple types."}, }}, {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, From 777e0b313a48975fa51d645fb09a4f6ebfac1d1e Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 15 May 2024 18:51:29 +0200 Subject: [PATCH 007/127] Update 03150_infer_type_variant.sql Fix failing test --- tests/queries/0_stateless/03150_infer_type_variant.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql index 2ea849248f7..3253ddfe179 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.sql +++ b/tests/queries/0_stateless/03150_infer_type_variant.sql @@ -1,4 +1,4 @@ SET input_format_json_infer_variant_from_multi_type_array=1; -SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}'); -SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}'); -SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}'); +SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}') FORMAT Pretty; +SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}') FORMAT Pretty; +SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}') FORMAT Pretty; From dd8d5c46c4678f570b9357dbdd912bcd6f4a267e Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 15 May 2024 18:55:34 +0200 Subject: [PATCH 008/127] Fix style --- src/Formats/SchemaInferenceUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index f693916c584..2cbb680af97 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -319,7 +319,7 @@ namespace data_types.push_back(variant_type); type_indexes.insert(TypeIndex::Variant); - // make the second type variant as well + /// make the second type variant as well data_types.push_back(variant_type); type_indexes.insert(TypeIndex::Variant); } From 04800f596c4471d10e15c40a533c539c6b549b06 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 24 May 2024 21:20:20 +0200 Subject: [PATCH 009/127] Incorporate review changes --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.h | 2 +- src/Formats/FormatFactory.cpp | 2 +- src/Formats/FormatSettings.h | 2 +- src/Formats/SchemaInferenceUtils.cpp | 60 +++++++++++++------ .../03150_infer_type_variant.reference | 29 +++++---- .../0_stateless/03150_infer_type_variant.sql | 3 +- 7 files changed, 67 insertions(+), 33 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ffc337b674f..be7564794e9 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1066,7 +1066,7 @@ class IColumn; M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ - M(Bool, input_format_json_infer_variant_from_multi_type_array, false, "Try to infer variant type rather than tuple when column/array has multiple", 0) \ + M(Bool, input_format_try_infer_variants, false, "Try to infer the Variant type in text formats when there is more than one possible type for column/array elements", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 0665d1d6ca6..75fcb538b2b 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -93,7 +93,7 @@ static std::map sett {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, {"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."}, - {"input_format_json_infer_variant_from_multi_type_array", 0, 0, "Allows inference of variant type if columns/arrays have multiple types."}, + {"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"}, }}, {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 792ac08a5df..2854802453e 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -137,7 +137,6 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.read_arrays_as_strings = settings.input_format_json_read_arrays_as_strings; format_settings.json.try_infer_numbers_from_strings = settings.input_format_json_try_infer_numbers_from_strings; format_settings.json.infer_incomplete_types_as_strings = settings.input_format_json_infer_incomplete_types_as_strings; - format_settings.json.infer_variant_from_multi_type_array = settings.input_format_json_infer_variant_from_multi_type_array; format_settings.json.validate_types_from_metadata = settings.input_format_json_validate_types_from_metadata; format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8; format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name; @@ -266,6 +265,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.max_parser_depth = context->getSettingsRef().max_parser_depth; format_settings.client_protocol_version = context->getClientProtocolVersion(); format_settings.date_time_overflow_behavior = settings.date_time_overflow_behavior; + format_settings.try_infer_variant = settings.input_format_try_infer_variants; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 927a7e691d8..a2a9e75bd44 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -36,6 +36,7 @@ struct FormatSettings bool decimal_trailing_zeros = false; bool defaults_for_omitted_fields = true; bool is_writing_to_terminal = false; + bool try_infer_variant = false; bool seekable_read = true; UInt64 max_rows_to_read_for_schema_inference = 25000; @@ -223,7 +224,6 @@ struct FormatSettings bool compact_allow_variable_number_of_columns = false; bool try_infer_objects_as_tuples = false; bool infer_incomplete_types_as_strings = true; - bool infer_variant_from_multi_type_array = false; bool throw_on_bad_escape_sequence = true; bool ignore_unnecessary_fields = true; } json{}; diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 2cbb680af97..298127cad68 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -239,6 +239,16 @@ namespace return true; } + bool checkIfTypesContainVariant(const DataTypes & types) + { + for (size_t i = 0; i < types.size(); ++i) + { + if (isVariant(types[i])) + return true; + } + return false; + } + void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes) { type_indexes.clear(); @@ -308,20 +318,31 @@ namespace type_indexes.erase(TypeIndex::UInt64); } - /// if setting input_format_json_infer_variant_from_multi_type_array is true + /// if setting try_infer_variant is true /// and nested types are not equal then we convert to type variant. void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes) { + auto typesAreEqual = checkIfTypesAreEqual(data_types); + auto typesContainVariant = checkIfTypesContainVariant(data_types); + if (typesAreEqual || typesContainVariant) + return; + + DataTypes new_data_types; + TypeIndexesSet new_type_indexes; + auto variant_type = std::make_shared(data_types); - /// replace separate types with a single variant type + size_t i = 0; + while (i != data_types.size()) + { + new_data_types.push_back(variant_type); + new_type_indexes.insert(TypeIndex::Variant); + i++; + } + data_types.clear(); type_indexes.clear(); - data_types.push_back(variant_type); - type_indexes.insert(TypeIndex::Variant); - - /// make the second type variant as well - data_types.push_back(variant_type); - type_indexes.insert(TypeIndex::Variant); + data_types = new_data_types; + type_indexes = new_type_indexes; } /// If we have only Date and DateTime types, convert Date to DateTime, @@ -661,16 +682,14 @@ namespace if (settings.try_infer_dates || settings.try_infer_datetimes) transformDatesAndDateTimes(data_types, type_indexes); + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); + if constexpr (!is_json) return; /// Check settings specific for JSON formats. - if (settings.json.infer_variant_from_multi_type_array) - { - transformVariant(data_types, type_indexes); - } - /// Convert numbers inferred from strings back to strings if needed. if (settings.json.try_infer_numbers_from_strings || settings.json.read_numbers_as_strings) transformJSONNumbersBackToString(data_types, settings, type_indexes, json_info); @@ -685,6 +704,10 @@ namespace if (settings.json.try_infer_objects_as_tuples) mergeJSONPaths(data_types, type_indexes, settings, json_info); + + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); + }; auto transform_complex_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes) @@ -696,14 +719,12 @@ namespace /// If there is at least one non Nothing type, change all Nothing types to it. transformNothingComplexTypes(data_types, type_indexes); + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); + if constexpr (!is_json) return; - if (settings.json.infer_variant_from_multi_type_array) - { - transformVariant(data_types, type_indexes); - } - /// Convert JSON tuples with same nested types to arrays. transformTuplesWithEqualNestedTypesToArrays(data_types, type_indexes); @@ -715,6 +736,9 @@ namespace if (json_info && json_info->allow_merging_named_tuples) mergeNamedTuples(data_types, type_indexes, settings, json_info); + + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); }; transformTypesRecursively(types, transform_simple_types, transform_complex_types); diff --git a/tests/queries/0_stateless/03150_infer_type_variant.reference b/tests/queries/0_stateless/03150_infer_type_variant.reference index ffb4209eadb..f3f53057845 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.reference +++ b/tests/queries/0_stateless/03150_infer_type_variant.reference @@ -2,7 +2,7 @@ ┃ arr ┃ toTypeName(arr) ┃ ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ 1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple(…│ - │ │… a Int64))) │ + │ │… a Nullable(Int64)))) │ └──────────────────┴─────────────────────────────────────┘ ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ x ┃ toTypeName(x) ┃ @@ -11,12 +11,21 @@ ├───────┼────────────────────────┤ 2. │ Hello │ Variant(Int64, String) │ └───────┴────────────────────────┘ - ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ x ┃ toTypeName(x) ┃ - ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ -1. │ [1,2,3] │ Variant(Array(Int64), Tuple(…│ - │ │… a Int64)) │ - ├─────────┼──────────────────────────────┤ -2. │ (42) │ Variant(Array(Int64), Tuple(…│ - │ │… a Int64)) │ - └─────────┴──────────────────────────────┘ + ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ x ┃ toTypeName(x) ┃ + ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ [1,2,3] │ Variant(Array(Nullable(Int64)), Tuple(…│ + │ │… a Nullable(Int64))) │ + ├─────────┼────────────────────────────────────────┤ +2. │ (42) │ Variant(Array(Nullable(Int64)), Tuple(…│ + │ │… a Nullable(Int64))) │ + └─────────┴────────────────────────────────────────┘ + ┏━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ + ┃ c1 ┃ toTypeName(c1) ┃ c2 ┃ toTypeName(c2) ┃ + ┡━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +1. │ 1 │ Nullable(Int64) │ Hello World! │ Nullable(String) │ + ├────┼─────────────────┼──────────────┼──────────────────┤ +2. │ 2 │ Nullable(Int64) │ [1,2,3] │ Nullable(String) │ + ├────┼─────────────────┼──────────────┼──────────────────┤ +3. │ 3 │ Nullable(Int64) │ 2020-01-01 │ Nullable(String) │ + └────┴─────────────────┴──────────────┴──────────────────┘ diff --git a/tests/queries/0_stateless/03150_infer_type_variant.sql b/tests/queries/0_stateless/03150_infer_type_variant.sql index 3253ddfe179..45126ccd471 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.sql +++ b/tests/queries/0_stateless/03150_infer_type_variant.sql @@ -1,4 +1,5 @@ -SET input_format_json_infer_variant_from_multi_type_array=1; +SET input_format_try_infer_variants=1; SELECT arr, toTypeName(arr) FROM format('JSONEachRow', '{"arr" : [1, "Hello", {"a" : 32}]}') FORMAT Pretty; SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : 42}, {"x" : "Hello"}') FORMAT Pretty; SELECT x, toTypeName(x) FROM format('JSONEachRow', '{"x" : [1, 2, 3]}, {"x" : {"a" : 42}}') FORMAT Pretty; +SELECT c1, toTypeName(c1), c2, toTypeName(c2) FROM format('CSV', '1,Hello World!\n2,"[1,2,3]"\n3,"2020-01-01"\n') FORMAT Pretty; \ No newline at end of file From 655262d1a1b21d85f4fbe284e0835065bcca379b Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 5 Jun 2024 14:34:17 +0200 Subject: [PATCH 010/127] Fix issue with nullables --- src/Formats/SchemaInferenceUtils.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 298127cad68..43120cb7b22 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -1547,11 +1547,13 @@ DataTypePtr makeNullableRecursively(DataTypePtr type) DataTypes nested_types; for (const auto & nested_type: variant_type->getVariants()) { - /// unlike tuple or array, here we do not want to make any of the variants nullable - /// so we do not call makeNullableRecursively - nested_types.push_back(nested_type); + auto is_low_cardinality = nested_type->lowCardinality(); + auto has_sub_types = nested_type->haveSubtypes(); + if (!is_low_cardinality && has_sub_types) + nested_types.push_back(makeNullableRecursively(nested_type)); + else + nested_types.push_back(nested_type); } - return std::make_shared(nested_types); } From 418fc7f4438abd25eae4928f36ff0c3fef2395f8 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 6 Jun 2024 10:02:19 +0200 Subject: [PATCH 011/127] Fix incorrect inference for other formats --- src/Formats/SchemaInferenceUtils.cpp | 29 +++++++++-- .../03150_infer_type_variant.reference | 48 +++++++++---------- 2 files changed, 49 insertions(+), 28 deletions(-) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 43120cb7b22..a8b5d4343f5 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -318,19 +318,40 @@ namespace type_indexes.erase(TypeIndex::UInt64); } - /// if setting try_infer_variant is true - /// and nested types are not equal then we convert to type variant. + /// if setting 'try_infer_variant' is true then we convert to type variant. void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes) { auto typesAreEqual = checkIfTypesAreEqual(data_types); auto typesContainVariant = checkIfTypesContainVariant(data_types); - if (typesAreEqual || typesContainVariant) + if (typesAreEqual) return; DataTypes new_data_types; TypeIndexesSet new_type_indexes; + std::shared_ptr variant_type; + + /// extract the nested types of variant and make a new variant with the nested types and the other type. + /// eg. Type 1: variant, Type 2: Date -> variant. + if (typesContainVariant) + { + DataTypes extracted_types; + for (size_t i=0; i(data_types[i].get())) + extracted_types = variant->getVariants(); + } + else + extracted_types.push_back(data_types[i]); + } + variant_type = std::make_shared(extracted_types); + } + else + { + variant_type = std::make_shared(data_types); + } - auto variant_type = std::make_shared(data_types); size_t i = 0; while (i != data_types.size()) { diff --git a/tests/queries/0_stateless/03150_infer_type_variant.reference b/tests/queries/0_stateless/03150_infer_type_variant.reference index f3f53057845..a5f56cb3618 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.reference +++ b/tests/queries/0_stateless/03150_infer_type_variant.reference @@ -1,9 +1,9 @@ - ┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ arr ┃ toTypeName(arr) ┃ - ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ -1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple(…│ - │ │… a Nullable(Int64)))) │ - └──────────────────┴─────────────────────────────────────┘ + ┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ arr ┃ toTypeName(arr) ┃ + ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple( + a Nullable(Int64)))) │ + └──────────────────┴─────────────────────────────────────────────────────────────┘ ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ x ┃ toTypeName(x) ┃ ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩ @@ -11,21 +11,21 @@ ├───────┼────────────────────────┤ 2. │ Hello │ Variant(Int64, String) │ └───────┴────────────────────────┘ - ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ x ┃ toTypeName(x) ┃ - ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ -1. │ [1,2,3] │ Variant(Array(Nullable(Int64)), Tuple(…│ - │ │… a Nullable(Int64))) │ - ├─────────┼────────────────────────────────────────┤ -2. │ (42) │ Variant(Array(Nullable(Int64)), Tuple(…│ - │ │… a Nullable(Int64))) │ - └─────────┴────────────────────────────────────────┘ - ┏━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ - ┃ c1 ┃ toTypeName(c1) ┃ c2 ┃ toTypeName(c2) ┃ - ┡━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ -1. │ 1 │ Nullable(Int64) │ Hello World! │ Nullable(String) │ - ├────┼─────────────────┼──────────────┼──────────────────┤ -2. │ 2 │ Nullable(Int64) │ [1,2,3] │ Nullable(String) │ - ├────┼─────────────────┼──────────────┼──────────────────┤ -3. │ 3 │ Nullable(Int64) │ 2020-01-01 │ Nullable(String) │ - └────┴─────────────────┴──────────────┴──────────────────┘ + ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ x ┃ toTypeName(x) ┃ + ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ [1,2,3] │ Variant(Array(Nullable(Int64)), Tuple( + a Nullable(Int64))) │ + ├─────────┼───────────────────────────────────────────────────────────────┤ +2. │ (42) │ Variant(Array(Nullable(Int64)), Tuple( + a Nullable(Int64))) │ + └─────────┴───────────────────────────────────────────────────────────────┘ + ┏━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ c1 ┃ toTypeName(c1) ┃ c2 ┃ toTypeName(c2) ┃ + ┡━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ 1 │ Nullable(Int64) │ Hello World! │ Variant(Array(Nullable(Int64)), String) │ + ├────┼─────────────────┼──────────────┼─────────────────────────────────────────┤ +2. │ 2 │ Nullable(Int64) │ [1,2,3] │ Variant(Array(Nullable(Int64)), String) │ + ├────┼─────────────────┼──────────────┼─────────────────────────────────────────┤ +3. │ 3 │ Nullable(Int64) │ 2020-01-01 │ Variant(Array(Nullable(Int64)), String) │ + └────┴─────────────────┴──────────────┴─────────────────────────────────────────┘ From 9cf11a210f07110676b373b864ea098583d87ff6 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 11 Jun 2024 11:11:06 +0200 Subject: [PATCH 012/127] Review changes --- src/Core/SettingsChangesHistory.h | 3 +- src/Formats/SchemaInferenceUtils.cpp | 73 ++++++------------- .../03150_infer_type_variant.reference | 24 +++--- 3 files changed, 35 insertions(+), 65 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 3f743ef42bf..661ecc607ba 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -92,6 +92,7 @@ static std::map sett {"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"}, {"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"}, {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, + {"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"}, }}, {"24.5", {{"allow_deprecated_functions", true, false, "Allow usage of deprecated functions"}, {"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."}, @@ -103,8 +104,6 @@ static std::map sett {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, - {"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."}, - {"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"}, {"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"}, {"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"}, {"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."}, diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index a8b5d4343f5..b7c71a95b29 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -239,16 +239,6 @@ namespace return true; } - bool checkIfTypesContainVariant(const DataTypes & types) - { - for (size_t i = 0; i < types.size(); ++i) - { - if (isVariant(types[i])) - return true; - } - return false; - } - void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes) { type_indexes.clear(); @@ -321,49 +311,28 @@ namespace /// if setting 'try_infer_variant' is true then we convert to type variant. void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes) { - auto typesAreEqual = checkIfTypesAreEqual(data_types); - auto typesContainVariant = checkIfTypesContainVariant(data_types); - if (typesAreEqual) + if (checkIfTypesAreEqual(data_types)) return; - DataTypes new_data_types; - TypeIndexesSet new_type_indexes; - std::shared_ptr variant_type; - - /// extract the nested types of variant and make a new variant with the nested types and the other type. - /// eg. Type 1: variant, Type 2: Date -> variant. - if (typesContainVariant) + DataTypes variant_types; + for (const auto & type : data_types) { - DataTypes extracted_types; - for (size_t i=0; i(type.get())) { - if (isVariant(data_types[i])) - { - if (const auto * variant = typeid_cast(data_types[i].get())) - extracted_types = variant->getVariants(); - } - else - extracted_types.push_back(data_types[i]); + const auto & current_variants = variant_type->getVariants(); + variant_types.insert(variant_types.end(), current_variants.begin(), current_variants.end()); + } + else + { + variant_types.push_back(type); } - variant_type = std::make_shared(extracted_types); - } - else - { - variant_type = std::make_shared(data_types); } - size_t i = 0; - while (i != data_types.size()) - { - new_data_types.push_back(variant_type); - new_type_indexes.insert(TypeIndex::Variant); - i++; - } + auto variant_type = std::make_shared(variant_types); - data_types.clear(); - type_indexes.clear(); - data_types = new_data_types; - type_indexes = new_type_indexes; + for (auto & type : data_types) + type = variant_type; + type_indexes = {TypeIndex::Variant}; } /// If we have only Date and DateTime types, convert Date to DateTime, @@ -703,11 +672,12 @@ namespace if (settings.try_infer_dates || settings.try_infer_datetimes) transformDatesAndDateTimes(data_types, type_indexes); - if (settings.try_infer_variant) - transformVariant(data_types, type_indexes); - if constexpr (!is_json) + { + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); return; + } /// Check settings specific for JSON formats. @@ -740,11 +710,12 @@ namespace /// If there is at least one non Nothing type, change all Nothing types to it. transformNothingComplexTypes(data_types, type_indexes); - if (settings.try_infer_variant) - transformVariant(data_types, type_indexes); - if constexpr (!is_json) + { + if (settings.try_infer_variant) + transformVariant(data_types, type_indexes); return; + } /// Convert JSON tuples with same nested types to arrays. transformTuplesWithEqualNestedTypesToArrays(data_types, type_indexes); diff --git a/tests/queries/0_stateless/03150_infer_type_variant.reference b/tests/queries/0_stateless/03150_infer_type_variant.reference index a5f56cb3618..a43fa1e1227 100644 --- a/tests/queries/0_stateless/03150_infer_type_variant.reference +++ b/tests/queries/0_stateless/03150_infer_type_variant.reference @@ -1,16 +1,16 @@ - ┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ arr ┃ toTypeName(arr) ┃ - ┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ -1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple( + ┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ arr ┃ toTypeName(arr) ┃ + ┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ +1. │ ['1','Hello',(32)] │ Array(Variant(String, Tuple( a Nullable(Int64)))) │ - └──────────────────┴─────────────────────────────────────────────────────────────┘ - ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ x ┃ toTypeName(x) ┃ - ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩ -1. │ 42 │ Variant(Int64, String) │ - ├───────┼────────────────────────┤ -2. │ Hello │ Variant(Int64, String) │ - └───────┴────────────────────────┘ + └────────────────────┴──────────────────────────────────────────────────────┘ + ┏━━━━━━━┳━━━━━━━━━━━━━━━━━━┓ + ┃ x ┃ toTypeName(x) ┃ + ┡━━━━━━━╇━━━━━━━━━━━━━━━━━━┩ +1. │ 42 │ Nullable(String) │ + ├───────┼──────────────────┤ +2. │ Hello │ Nullable(String) │ + └───────┴──────────────────┘ ┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ x ┃ toTypeName(x) ┃ ┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ From 548c90901020317669a10d191a0b6f8a7d0a0511 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 11 Jun 2024 12:14:36 +0200 Subject: [PATCH 013/127] Add documentation --- docs/en/operations/settings/settings-formats.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 6aae1ea62e5..8bbb469547b 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -194,6 +194,17 @@ If enabled, ClickHouse will try to infer type `DateTime64` from string fields in Enabled by default. +## input_format_try_infer_variants {#input_format_try_infer_variants} + +If enabled, ClickHouse will try to infer type [`Variant`](../../sql-reference/data-types/variant.md) in schema inference for text formats when there is more than one possible type for column/array elements. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: `0`. + ## date_time_input_format {#date_time_input_format} Allows choosing a parser of the text representation of date and time. From bad5e27bbffa9c1f6727a0416edcb135dadcc1fe Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Tue, 11 Jun 2024 13:32:34 +0200 Subject: [PATCH 014/127] Update src/Formats/SchemaInferenceUtils.cpp Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- src/Formats/SchemaInferenceUtils.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 0ac8b32f8aa..240830013c6 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -1539,9 +1539,7 @@ DataTypePtr makeNullableRecursively(DataTypePtr type) DataTypes nested_types; for (const auto & nested_type: variant_type->getVariants()) { - auto is_low_cardinality = nested_type->lowCardinality(); - auto has_sub_types = nested_type->haveSubtypes(); - if (!is_low_cardinality && has_sub_types) + if (!nested_type->lowCardinality() && nested_type->haveSubtypes()) nested_types.push_back(makeNullableRecursively(nested_type)); else nested_types.push_back(nested_type); From 384aa9feb90bbf95c5bc0e5498af4aca769c2531 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 24 Jun 2024 13:51:20 +0200 Subject: [PATCH 015/127] Move setting to 24.7 changes --- src/Core/SettingsChangesHistory.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 1ab7dc69f60..deaeba2a7de 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -86,6 +86,8 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static const std::map settings_changes_history = { + {"24.7", {{"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"}, + }}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, @@ -97,7 +99,6 @@ static const std::map Date: Wed, 3 Jul 2024 09:33:57 +0200 Subject: [PATCH 016/127] Update SettingsChangesHistory.cpp --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 7719fe1e837..828031f4c23 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -60,6 +60,7 @@ static std::initializer_list Date: Tue, 23 Jul 2024 18:04:38 +0800 Subject: [PATCH 017/127] stash --- src/Functions/FunctionOverlay.cpp | 481 ++++++++++++++++++++++++++++++ 1 file changed, 481 insertions(+) create mode 100644 src/Functions/FunctionOverlay.cpp diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/FunctionOverlay.cpp new file mode 100644 index 00000000000..6160335ad79 --- /dev/null +++ b/src/Functions/FunctionOverlay.cpp @@ -0,0 +1,481 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int ILLEGAL_COLUMN; +extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +extern const int ZERO_ARRAY_OR_TUPLE_INDEX; +} + +namespace +{ + +/// If 'is_utf8' - measure offset and length in code points instead of bytes. +/// Syntax: overlay(input, replace, offset[, length]) +template +class FunctionOverlay : public IFunction +{ +public: + static constexpr auto name = is_utf8 ? "OverlayUTF8" : "Overlay"; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + String getName() const override { return name; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + const size_t number_of_arguments = arguments.size(); + if (number_of_arguments < 3 || number_of_arguments > 4) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: " + "passed {}, should be 3 or 4", + getName(), + number_of_arguments); + + /// first argument is string + if (!isString(arguments[0])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected String", + arguments[0]->getName(), + getName()); + + /// second argument is string + if (!isString(arguments[1])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}, expected String", + arguments[1]->getName(), + getName()); + + if (!isNativeNumber(arguments[2])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of third argument of function {}, expected (U)Int8|16|32|64", + arguments[2]->getName(), + getName()); + + if (number_of_arguments == 4 && !isNativeNumber(arguments[3])) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}, expected (U)Int8|16|32|64", + arguments[3]->getName(), + getName()); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const size_t number_of_arguments = arguments.size(); + + ColumnPtr column_string = arguments[0].column; + ColumnPtr column_offset = arguments[1].column; + ColumnPtr column_length; + if (number_of_arguments == 3) + column_length = arguments[2].column; + + const ColumnConst * column_offset_const = checkAndGetColumn(column_offset.get()); + const ColumnConst * column_length_const = nullptr; + if (number_of_arguments == 3) + column_length_const = checkAndGetColumn(column_length.get()); + + Int64 offset = 0; + Int64 length = 0; + if (column_offset_const) + offset = column_offset_const->getInt(0); + if (column_length_const) + length = column_length_const->getInt(0); + + auto res_col = ColumnString::create(); + auto & res_data = res_col->getChars(); + auto & res_offsets = res_col->getOffsets(); + } + +private: +template + void constantConstant( + size_t rows, + const StringRef & input, + const StringRef & replace, + const ColumnPtr & column_offset, + const ColumnPtr & column_length, + Int64 const_offset, + Int64 const_length, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + if (!three_args && length_is_const && const_length < 0) + { + constantConstant(input, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); + return; + } + + Int64 offset = 0; // start from 1, maybe negative + size_t valid_offset = 0; // start from 0, not negative + if constexpr (offset_is_const) + { + offset = const_offset; + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + size_t replace_size = replace.size; + Int64 length = 0; // maybe negative + size_t valid_length = 0; // not negative + if constexpr (!three_args && length_is_const) + { + assert(const_length >= 0); + valid_length = const_length; + } + else if constexpr (three_args) + { + valid_length = replace_size; + } + + size_t res_offset = 0; + size_t input_size = input.size; + for (size_t i = 0; i < rows; ++i) + { + if constexpr (!offset_is_const) + { + offset = column_offset->getInt(i); + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + if constexpr (!three_args && !length_is_const) + { + length = column_length->getInt(i); + valid_length = length >= 0 ? length : replace_size; + } + + size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size); + res_offset += suffix_size; + } + + /// add zero terminator + res_data[res_offset] = 0; + ++res_offset; + + res_offsets[i] = res_offset; + } + } + + template + void vectorConstant( + const ColumnString::Chars & input_data, + const ColumnString::Offsets & input_offsets, + const StringRef & replace, + const ColumnPtr & column_offset, + const ColumnPtr & column_length, + Int64 const_offset, + Int64 const_length, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + if (!three_args && length_is_const && const_length < 0) + { + vectorConstant(input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); + return; + } + + Int64 offset = 0; // start from 1, maybe negative + size_t valid_offset = 0; // start from 0, not negative + if constexpr (offset_is_const) + { + offset = const_offset; + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + size_t replace_size = replace.size; + Int64 length = 0; // maybe negative + size_t valid_length = 0; // not negative + if constexpr (!three_args && length_is_const) + { + assert(const_length >= 0); + valid_length = const_length; + } + else if constexpr (three_args) + { + valid_length = replace_size; + } + + size_t rows = input_offsets.size(); + size_t res_offset = 0; + for (size_t i = 0; i < rows; ++i) + { + size_t input_offset = input_offsets[i - 1]; + size_t input_size = input_offsets[i] - input_offsets[i - 1] - 1; + + if constexpr (!offset_is_const) + { + offset = column_offset->getInt(i); + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + if constexpr (!three_args && !length_is_const) + { + length = column_length->getInt(i); + valid_length = length >= 0 ? length : replace_size; + } + + size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15( + &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size); + res_offset += suffix_size; + } + + /// add zero terminator + res_data[res_offset] = 0; + ++res_offset; + + res_offsets[i] = res_offset; + } + } + + template + void constantVector( + const StringRef & input, + const ColumnString::Chars & replace_data, + const ColumnString::Offsets & replace_offsets, + const ColumnPtr & column_offset, + const ColumnPtr & column_length, + Int64 const_offset, + Int64 const_length, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + if (!three_args && length_is_const && const_length < 0) + { + constantVector(input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets); + return; + } + + Int64 offset = 0; // start from 1, maybe negative + size_t valid_offset = 0; // start from 0, not negative + if constexpr (offset_is_const) + { + offset = const_offset; + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + Int64 length = 0; // maybe negative + size_t valid_length = 0; // not negative + if constexpr (!three_args && length_is_const) + { + assert(const_length >= 0); + valid_length = const_length; + } + + size_t rows = replace_offsets.size(); + size_t input_size = input.size; + size_t res_offset = 0; + for (size_t i = 0; i < rows; ++i) + { + size_t replace_offset = replace_offsets[i - 1]; + size_t replace_size = replace_offsets[i] - replace_offsets[i - 1] - 1; + + if constexpr (!offset_is_const) + { + offset = column_offset->getInt(i); + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + if constexpr (three_args) + { + // length = replace_size; + valid_length = replace_size; + } + else if constexpr (!length_is_const) + { + length = column_length->getInt(i); + valid_length = length >= 0 ? length : replace_size; + } + + size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size); + res_offset += suffix_size; + } + + /// add zero terminator + res_data[res_offset] = 0; + ++res_offset; + + res_offsets[i] = res_offset; + } + } + + template + void vectorVector( + const ColumnString::Chars & input_data, + const ColumnString::Offsets & input_offsets, + const ColumnString::Chars & replace_data, + const ColumnString::Offsets & replace_offsets, + const ColumnPtr & column_offset, + const ColumnPtr & column_length, + Int64 const_offset, + Int64 const_length, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + if (!three_args && length_is_const && const_length < 0) + { + vectorVector( + input_data, + input_offsets, + replace_data, + replace_offsets, + column_offset, + column_length, + const_offset, + -1, + res_data, + res_offsets); + return; + } + + + Int64 offset = 0; // start from 1, maybe negative + size_t valid_offset = 0; // start from 0, not negative + if constexpr (offset_is_const) + { + offset = const_offset; + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + Int64 length = 0; // maybe negative + size_t valid_length = 0; // not negative + if constexpr (!three_args && length_is_const) + { + assert(const_length >= 0); + valid_length = const_length; + } + + size_t rows = input_offsets.size(); + size_t res_offset = 0; + for (size_t i = 0; i < rows; ++i) + { + size_t input_offset = input_offsets[i - 1]; + size_t input_size = input_offsets[i] - input_offsets[i - 1] - 1; + size_t replace_offset = replace_offsets[i - 1]; + size_t replace_size = replace_offsets[i] - replace_offsets[i - 1] - 1; + + if constexpr (!offset_is_const) + { + offset = column_offset->getInt(i); + valid_offset = offset > 0 ? (offset - 1) : (-offset); + } + + if constexpr (three_args) + { + // length = replace_size; + valid_length = replace_size; + } + else if constexpr (!length_is_const) + { + length = column_length->getInt(i); + valid_length = length >= 0 ? length : replace_size; + } + + size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15( + &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size); + res_offset += suffix_size; + } + + /// add zero terminator + res_data[res_offset] = 0; + ++res_offset; + + res_offsets[i] = res_offset; + } + } +}; + +} + +REGISTER_FUNCTION(Overlay) +{ + factory.registerFunction>({}, FunctionFactory::CaseInsensitive); + factory.registerFunction>({}, FunctionFactory::CaseSensitive); +} + +} From 81688e0efdf75a6a3923d6b95f09579d37e93e2a Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 23 Jul 2024 20:29:35 +0800 Subject: [PATCH 018/127] almost finish --- src/Functions/FunctionOverlay.cpp | 154 ++++++++++++++++++++++++++---- 1 file changed, 134 insertions(+), 20 deletions(-) diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/FunctionOverlay.cpp index 6160335ad79..65af4d811f5 100644 --- a/src/Functions/FunctionOverlay.cpp +++ b/src/Functions/FunctionOverlay.cpp @@ -15,10 +15,8 @@ namespace DB namespace ErrorCodes { -extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -extern const int ZERO_ARRAY_OR_TUPLE_INDEX; } namespace @@ -86,32 +84,145 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { const size_t number_of_arguments = arguments.size(); + bool three_args = number_of_arguments == 3; - ColumnPtr column_string = arguments[0].column; - ColumnPtr column_offset = arguments[1].column; + ColumnPtr column_offset = arguments[2].column; ColumnPtr column_length; - if (number_of_arguments == 3) - column_length = arguments[2].column; + if (!three_args) + column_length = arguments[3].column; const ColumnConst * column_offset_const = checkAndGetColumn(column_offset.get()); const ColumnConst * column_length_const = nullptr; - if (number_of_arguments == 3) + if (!three_args) column_length_const = checkAndGetColumn(column_length.get()); - Int64 offset = 0; - Int64 length = 0; + bool offset_is_const = false; + bool length_is_const = false; + Int64 offset = -1; + Int64 length = -1; if (column_offset_const) + { offset = column_offset_const->getInt(0); + offset_is_const = true; + } + if (column_length_const) + { length = column_length_const->getInt(0); + length_is_const = true; + } + auto res_col = ColumnString::create(); auto & res_data = res_col->getChars(); auto & res_offsets = res_col->getOffsets(); + res_offsets.resize_exact(input_rows_count); + + ColumnPtr column_input = arguments[0].column; + ColumnPtr column_replace = arguments[1].column; + + const auto * column_input_const = checkAndGetColumn(column_input.get()); + const auto * column_input_string = checkAndGetColumn(column_input.get()); + if (column_input_const) + { + StringRef input = column_input_const->getDataAt(0); + res_data.reserve(input.size * input_rows_count); + } + else + { + res_data.reserve(column_input_string->getChars().size()); + } + + const auto * column_replace_const = checkAndGetColumn(column_replace.get()); + const auto * column_replace_string = checkAndGetColumn(column_replace.get()); + bool input_is_const = column_input_const != nullptr; + bool replace_is_const = column_replace_const != nullptr; + +#define OVERLAY_EXECUTE_CASE(THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST) \ + if (input_is_const && replace_is_const) \ + constantConstant( \ + input_rows_count, \ + column_input_const->getDataAt(0), \ + column_replace_const->getDataAt(0), \ + column_offset, \ + column_length, \ + offset, \ + length, \ + res_data, \ + res_offsets); \ + else if (input_is_const) \ + constantVector( \ + column_input_const->getDataAt(0), \ + column_replace_string->getChars(), \ + column_replace_string->getOffsets(), \ + column_offset, \ + column_length, \ + offset, \ + length, \ + res_data, \ + res_offsets); \ + else if (replace_is_const) \ + vectorConstant( \ + column_input_string->getChars(), \ + column_input_string->getOffsets(), \ + column_replace_const->getDataAt(0), \ + column_offset, \ + column_length, \ + offset, \ + length, \ + res_data, \ + res_offsets); \ + else \ + vectorVector( \ + column_input_string->getChars(), \ + column_input_string->getOffsets(), \ + column_replace_string->getChars(), \ + column_replace_string->getOffsets(), \ + column_offset, \ + column_length, \ + offset, \ + length, \ + res_data, \ + res_offsets); + + if (three_args) + { + if (offset_is_const) + { + OVERLAY_EXECUTE_CASE(true, true, false) + } + else + { + OVERLAY_EXECUTE_CASE(true, false, false) + } + } + else + { + if (offset_is_const && length_is_const) + { + OVERLAY_EXECUTE_CASE(false, true, true) + } + else if (offset_is_const && !length_is_const) + { + OVERLAY_EXECUTE_CASE(false, true, false) + } + else if (!offset_is_const && length_is_const) + { + OVERLAY_EXECUTE_CASE(false, false, true) + } + else + { + OVERLAY_EXECUTE_CASE(false, false, false) + } + } +#undef OVERLAY_EXECUTE_CASE + + return res_col; } + private: -template + template void constantConstant( size_t rows, const StringRef & input, @@ -121,11 +232,12 @@ template ( + rows, input, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); return; } @@ -194,7 +306,7 @@ template + template void vectorConstant( const ColumnString::Chars & input_data, const ColumnString::Offsets & input_offsets, @@ -204,11 +316,12 @@ template ( + input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); return; } @@ -281,7 +394,7 @@ template + template void constantVector( const StringRef & input, const ColumnString::Chars & replace_data, @@ -291,11 +404,12 @@ template ( + input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets); return; } @@ -379,11 +493,11 @@ template ( + vectorVector( input_data, input_offsets, replace_data, From 9785f85ca32e3af5760a9bc24e998e7d050fc073 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 23 Jul 2024 21:08:02 +0800 Subject: [PATCH 019/127] fix style --- src/Functions/FunctionOverlay.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/FunctionOverlay.cpp index 65af4d811f5..7d0e2e86de2 100644 --- a/src/Functions/FunctionOverlay.cpp +++ b/src/Functions/FunctionOverlay.cpp @@ -1,14 +1,10 @@ #include -#include #include -#include -#include #include #include #include #include -#include - +#include namespace DB { From f4138ee6c67bbdb82269a9087b1b054f33cb35a8 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 24 Jul 2024 10:20:27 +0800 Subject: [PATCH 020/127] fix bugs about corner cases --- src/Functions/FunctionOverlay.cpp | 89 +++++++++++++++++-------------- 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/FunctionOverlay.cpp index 7d0e2e86de2..d3ee7e1df6d 100644 --- a/src/Functions/FunctionOverlay.cpp +++ b/src/Functions/FunctionOverlay.cpp @@ -218,6 +218,26 @@ public: private: + /// input offset is 1-based, maybe negative + /// output result is 0-based valid offset, within [0, input_size] + static size_t getValidOffset(Int64 offset, size_t input_size) + { + if (offset > 0) + { + if (static_cast(offset) > input_size + 1) [[unlikely]] + return input_size; + else + return offset - 1; + } + else + { + if (input_size < -static_cast(offset)) [[unlikely]] + return 0; + else + return input_size + offset; + } + } + template void constantConstant( size_t rows, @@ -237,13 +257,10 @@ private: return; } - Int64 offset = 0; // start from 1, maybe negative + size_t input_size = input.size; size_t valid_offset = 0; // start from 0, not negative if constexpr (offset_is_const) - { - offset = const_offset; - valid_offset = offset > 0 ? (offset - 1) : (-offset); - } + valid_offset = getValidOffset(const_offset, input_size); size_t replace_size = replace.size; Int64 length = 0; // maybe negative @@ -258,14 +275,14 @@ private: valid_length = replace_size; } + Int64 offset = 0; // start from 1, maybe negative size_t res_offset = 0; - size_t input_size = input.size; for (size_t i = 0; i < rows; ++i) { if constexpr (!offset_is_const) { offset = column_offset->getInt(i); - valid_offset = offset > 0 ? (offset - 1) : (-offset); + valid_offset = getValidOffset(offset, input_size); } if constexpr (!three_args && !length_is_const) @@ -274,7 +291,7 @@ private: valid_length = length >= 0 ? length : replace_size; } - size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator res_data.resize(new_res_size); @@ -321,14 +338,6 @@ private: return; } - Int64 offset = 0; // start from 1, maybe negative - size_t valid_offset = 0; // start from 0, not negative - if constexpr (offset_is_const) - { - offset = const_offset; - valid_offset = offset > 0 ? (offset - 1) : (-offset); - } - size_t replace_size = replace.size; Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative @@ -343,16 +352,22 @@ private: } size_t rows = input_offsets.size(); + Int64 offset = 0; // start from 1, maybe negative + size_t valid_offset = 0; // start from 0, not negative size_t res_offset = 0; for (size_t i = 0; i < rows; ++i) { size_t input_offset = input_offsets[i - 1]; size_t input_size = input_offsets[i] - input_offsets[i - 1] - 1; - if constexpr (!offset_is_const) + if constexpr (offset_is_const) + { + valid_offset = getValidOffset(const_offset, input_size); + } + else { offset = column_offset->getInt(i); - valid_offset = offset > 0 ? (offset - 1) : (-offset); + valid_offset = getValidOffset(offset, input_size); } if constexpr (!three_args && !length_is_const) @@ -361,7 +376,7 @@ private: valid_length = length >= 0 ? length : replace_size; } - size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator res_data.resize(new_res_size); @@ -409,13 +424,10 @@ private: return; } - Int64 offset = 0; // start from 1, maybe negative + size_t input_size = input.size; size_t valid_offset = 0; // start from 0, not negative if constexpr (offset_is_const) - { - offset = const_offset; - valid_offset = offset > 0 ? (offset - 1) : (-offset); - } + valid_offset = getValidOffset(const_offset, input_size); Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative @@ -426,7 +438,7 @@ private: } size_t rows = replace_offsets.size(); - size_t input_size = input.size; + Int64 offset = 0; // start from 1, maybe negative size_t res_offset = 0; for (size_t i = 0; i < rows; ++i) { @@ -436,12 +448,11 @@ private: if constexpr (!offset_is_const) { offset = column_offset->getInt(i); - valid_offset = offset > 0 ? (offset - 1) : (-offset); + valid_offset = getValidOffset(offset, input_size); } if constexpr (three_args) { - // length = replace_size; valid_length = replace_size; } else if constexpr (!length_is_const) @@ -450,7 +461,7 @@ private: valid_length = length >= 0 ? length : replace_size; } - size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator res_data.resize(new_res_size); @@ -507,15 +518,6 @@ private: return; } - - Int64 offset = 0; // start from 1, maybe negative - size_t valid_offset = 0; // start from 0, not negative - if constexpr (offset_is_const) - { - offset = const_offset; - valid_offset = offset > 0 ? (offset - 1) : (-offset); - } - Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative if constexpr (!three_args && length_is_const) @@ -525,6 +527,8 @@ private: } size_t rows = input_offsets.size(); + Int64 offset = 0; // start from 1, maybe negative + size_t valid_offset = 0; // start from 0, not negative size_t res_offset = 0; for (size_t i = 0; i < rows; ++i) { @@ -533,15 +537,18 @@ private: size_t replace_offset = replace_offsets[i - 1]; size_t replace_size = replace_offsets[i] - replace_offsets[i - 1] - 1; - if constexpr (!offset_is_const) + if constexpr (offset_is_const) + { + valid_offset = getValidOffset(const_offset, input_size); + } + else { offset = column_offset->getInt(i); - valid_offset = offset > 0 ? (offset - 1) : (-offset); + valid_offset = getValidOffset(offset, input_size); } if constexpr (three_args) { - // length = replace_size; valid_length = replace_size; } else if constexpr (!length_is_const) @@ -550,7 +557,7 @@ private: valid_length = length >= 0 ? length : replace_size; } - size_t prefix_size = valid_offset > input_size ? input_size : valid_offset; + size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator res_data.resize(new_res_size); From fd3f0cf92b7800b171c5723541a329748a0dad1b Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 24 Jul 2024 14:17:58 +0800 Subject: [PATCH 021/127] support overlayUTF8 --- src/Functions/FunctionOverlay.cpp | 281 +++++++++++++----- .../0_stateless/03205_overlay.reference | 168 +++++++++++ tests/queries/0_stateless/03205_overlay.sql | 60 ++++ .../0_stateless/03206_overlay_utf8.reference | 168 +++++++++++ .../0_stateless/03206_overlay_utf8.sql | 60 ++++ 5 files changed, 665 insertions(+), 72 deletions(-) create mode 100644 tests/queries/0_stateless/03205_overlay.reference create mode 100644 tests/queries/0_stateless/03205_overlay.sql create mode 100644 tests/queries/0_stateless/03206_overlay_utf8.reference create mode 100644 tests/queries/0_stateless/03206_overlay_utf8.sql diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/FunctionOverlay.cpp index d3ee7e1df6d..61d2df88ab1 100644 --- a/src/Functions/FunctionOverlay.cpp +++ b/src/Functions/FunctionOverlay.cpp @@ -3,8 +3,10 @@ #include #include #include +#include #include #include +#include namespace DB { @@ -15,6 +17,8 @@ extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } +using namespace GatherUtils; + namespace { @@ -24,7 +28,7 @@ template class FunctionOverlay : public IFunction { public: - static constexpr auto name = is_utf8 ? "OverlayUTF8" : "Overlay"; + static constexpr auto name = is_utf8 ? "overlayUTF8" : "overlay"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return name; } @@ -238,6 +242,15 @@ private: } } + /// get character count of a slice [data, data+bytes) + static size_t getSliceSize(const UInt8 * data, size_t bytes) + { + if constexpr (is_utf8) + return UTF8::countCodePoints(data, bytes); + else + return bytes; + } + template void constantConstant( size_t rows, @@ -257,13 +270,12 @@ private: return; } - size_t input_size = input.size; + size_t input_size = getSliceSize(reinterpret_cast(input.data), input.size); size_t valid_offset = 0; // start from 0, not negative if constexpr (offset_is_const) valid_offset = getValidOffset(const_offset, input_size); - size_t replace_size = replace.size; - Int64 length = 0; // maybe negative + size_t replace_size = getSliceSize(reinterpret_cast(replace.data), replace.size); size_t valid_length = 0; // not negative if constexpr (!three_args && length_is_const) { @@ -276,6 +288,9 @@ private: } Int64 offset = 0; // start from 1, maybe negative + Int64 length = 0; // maybe negative + const UInt8 * input_begin = reinterpret_cast(input.data); + const UInt8 * input_end = reinterpret_cast(input.data + input.size); size_t res_offset = 0; for (size_t i = 0; i < rows; ++i) { @@ -293,28 +308,57 @@ private: size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; - size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator - res_data.resize(new_res_size); - /// copy prefix before replaced region - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size); - res_offset += prefix_size; - - /// copy replace - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size); - res_offset += replace_size; - - /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. - if (suffix_size) + if constexpr (!is_utf8) { - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size); - res_offset += suffix_size; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size); + res_offset += suffix_size; + } + } + else + { + const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + size_t prefix_bytes = prefix_end > input_end ? input.size : prefix_end - input_begin; + + const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + size_t suffix_bytes = input_end - suffix_begin; + + size_t new_res_size = res_data.size() + prefix_bytes + replace.size + suffix_bytes + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input_begin, prefix_bytes); + res_offset += prefix_bytes; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace.size); + res_offset += replace.size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero. + if (suffix_bytes) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes); + res_offset += suffix_bytes; + } } /// add zero terminator res_data[res_offset] = 0; ++res_offset; - res_offsets[i] = res_offset; } } @@ -338,7 +382,7 @@ private: return; } - size_t replace_size = replace.size; + size_t replace_size = getSliceSize(reinterpret_cast(replace.data), replace.size); Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative if constexpr (!three_args && length_is_const) @@ -358,7 +402,8 @@ private: for (size_t i = 0; i < rows; ++i) { size_t input_offset = input_offsets[i - 1]; - size_t input_size = input_offsets[i] - input_offsets[i - 1] - 1; + size_t input_bytes = input_offsets[i] - input_offsets[i - 1] - 1; + size_t input_size = getSliceSize(&input_data[input_offset], input_bytes); if constexpr (offset_is_const) { @@ -378,29 +423,59 @@ private: size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; - size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator - res_data.resize(new_res_size); - /// copy prefix before replaced region - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size); - res_offset += prefix_size; - - /// copy replace - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size); - res_offset += replace_size; - - /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. - if (suffix_size) + if constexpr (!is_utf8) { - memcpySmallAllowReadWriteOverflow15( - &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size); - res_offset += suffix_size; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15( + &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size); + res_offset += suffix_size; + } + } + else + { + const auto * input_begin = &input_data[input_offset]; + const auto * input_end = &input_data[input_offset + input_bytes]; + const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + size_t prefix_bytes = prefix_end > input_end ? input_bytes : prefix_end - input_begin; + const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + size_t suffix_bytes = input_end - suffix_begin; + + size_t new_res_size = res_data.size() + prefix_bytes + replace.size + suffix_bytes + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_bytes); + res_offset += prefix_bytes; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], replace.data, replace.size); + res_offset += replace.size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero. + if (suffix_bytes) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes); + res_offset += suffix_bytes; + } } /// add zero terminator res_data[res_offset] = 0; ++res_offset; - res_offsets[i] = res_offset; } } @@ -424,7 +499,7 @@ private: return; } - size_t input_size = input.size; + size_t input_size = getSliceSize(reinterpret_cast(input.data), input.size); size_t valid_offset = 0; // start from 0, not negative if constexpr (offset_is_const) valid_offset = getValidOffset(const_offset, input_size); @@ -438,12 +513,15 @@ private: } size_t rows = replace_offsets.size(); + const auto * input_begin = reinterpret_cast(input.data); + const auto * input_end = reinterpret_cast(input.data + input.size); Int64 offset = 0; // start from 1, maybe negative size_t res_offset = 0; for (size_t i = 0; i < rows; ++i) { size_t replace_offset = replace_offsets[i - 1]; - size_t replace_size = replace_offsets[i] - replace_offsets[i - 1] - 1; + size_t replace_bytes = replace_offsets[i] - replace_offsets[i - 1] - 1; + size_t replace_size = getSliceSize(&replace_data[replace_offset], replace_bytes); if constexpr (!offset_is_const) { @@ -463,28 +541,55 @@ private: size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; - size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator - res_data.resize(new_res_size); - /// copy prefix before replaced region - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size); - res_offset += prefix_size; - - /// copy replace - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size); - res_offset += replace_size; - - /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. - if (suffix_size) + if constexpr (!is_utf8) { - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size); - res_offset += suffix_size; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data, prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input.data + prefix_size + valid_length, suffix_size); + res_offset += suffix_size; + } + } + else + { + const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + size_t prefix_bytes = prefix_end > input_end ? input.size : prefix_end - input_begin; + const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + size_t suffix_bytes = input_end - suffix_begin; + size_t new_res_size = res_data.size() + prefix_bytes + replace_bytes + suffix_bytes + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input_begin, prefix_bytes); + res_offset += prefix_bytes; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_bytes); + res_offset += replace_bytes; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero + if (suffix_bytes) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes); + res_offset += suffix_bytes; + } } /// add zero terminator res_data[res_offset] = 0; ++res_offset; - res_offsets[i] = res_offset; } } @@ -533,9 +638,12 @@ private: for (size_t i = 0; i < rows; ++i) { size_t input_offset = input_offsets[i - 1]; - size_t input_size = input_offsets[i] - input_offsets[i - 1] - 1; + size_t input_bytes = input_offsets[i] - input_offsets[i - 1] - 1; + size_t input_size = getSliceSize(&input_data[input_offset], input_bytes); + size_t replace_offset = replace_offsets[i - 1]; - size_t replace_size = replace_offsets[i] - replace_offsets[i - 1] - 1; + size_t replace_bytes = replace_offsets[i] - replace_offsets[i - 1] - 1; + size_t replace_size = getSliceSize(&replace_data[replace_offset], replace_bytes); if constexpr (offset_is_const) { @@ -559,29 +667,58 @@ private: size_t prefix_size = valid_offset; size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; - size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator - res_data.resize(new_res_size); - /// copy prefix before replaced region - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size); - res_offset += prefix_size; - - /// copy replace - memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size); - res_offset += replace_size; - - /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. - if (suffix_size) + if constexpr (!is_utf8) { - memcpySmallAllowReadWriteOverflow15( - &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size); - res_offset += suffix_size; + size_t new_res_size = res_data.size() + prefix_size + replace_size + suffix_size + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &input_data[input_offset], prefix_size); + res_offset += prefix_size; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_size); + res_offset += replace_size; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_size is zero. + if (suffix_size) + { + memcpySmallAllowReadWriteOverflow15( + &res_data[res_offset], &input_data[input_offset + prefix_size + valid_length], suffix_size); + res_offset += suffix_size; + } + } + else + { + const auto * input_begin = &input_data[input_offset]; + const auto * input_end = &input_data[input_offset + input_bytes]; + const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + size_t prefix_bytes = prefix_end > input_end ? input_bytes : prefix_end - input_begin; + const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + size_t suffix_bytes = input_end - suffix_begin; + size_t new_res_size = res_data.size() + prefix_bytes + replace_bytes + suffix_bytes + 1; /// +1 for zero terminator + res_data.resize(new_res_size); + + /// copy prefix before replaced region + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], input_begin, prefix_bytes); + res_offset += prefix_bytes; + + /// copy replace + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &replace_data[replace_offset], replace_bytes); + res_offset += replace_bytes; + + /// copy suffix after replaced region. It is not necessary to copy if suffix_bytes is zero. + if (suffix_bytes) + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], suffix_begin, suffix_bytes); + res_offset += suffix_bytes; + } } /// add zero terminator res_data[res_offset] = 0; ++res_offset; - res_offsets[i] = res_offset; } } diff --git a/tests/queries/0_stateless/03205_overlay.reference b/tests/queries/0_stateless/03205_overlay.reference new file mode 100644 index 00000000000..9e79db2e131 --- /dev/null +++ b/tests/queries/0_stateless/03205_overlay.reference @@ -0,0 +1,168 @@ +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark_SQL +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark CORE +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Spark ANSI SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL +Structured SQL diff --git a/tests/queries/0_stateless/03205_overlay.sql b/tests/queries/0_stateless/03205_overlay.sql new file mode 100644 index 00000000000..b131312c934 --- /dev/null +++ b/tests/queries/0_stateless/03205_overlay.sql @@ -0,0 +1,60 @@ +SELECT overlay('Spark SQL', 'ANSI ', 7, 0) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0) from numbers(3); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0) from numbers(3); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)) from numbers(3); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), 0) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, materialize(0)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), materialize(0)) from numbers(3); +SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); + +SELECT overlay('Spark SQL', '_', 6) from numbers(3); +SELECT overlay(materialize('Spark SQL'), '_', 6) from numbers(3); +SELECT overlay('Spark SQL', materialize('_'), 6) from numbers(3); +SELECT overlay('Spark SQL', '_', materialize(6)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('_'), 6) from numbers(3); +SELECT overlay(materialize('Spark SQL'), '_', materialize(6)) from numbers(3); +SELECT overlay('Spark SQL', materialize('_'), materialize(6)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('_'), materialize(6)) from numbers(3); + +SELECT overlay('Spark SQL', 'CORE', 7) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'CORE', 7) from numbers(3); +SELECT overlay('Spark SQL', materialize('CORE'), 7) from numbers(3); +SELECT overlay('Spark SQL', 'CORE', materialize(7)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('CORE'), 7) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'CORE', materialize(7)) from numbers(3); +SELECT overlay('Spark SQL', materialize('CORE'), materialize(7)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('CORE'), materialize(7)) from numbers(3); + +SELECT overlay('Spark SQL', 'ANSI ', 7, 0) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0) from numbers(3); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0) from numbers(3); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)) from numbers(3); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); + +SELECT overlay('Spark SQL', 'tructured', 2, 4) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'tructured', 2, 4) from numbers(3); +SELECT overlay('Spark SQL', materialize('tructured'), 2, 4) from numbers(3); +SELECT overlay('Spark SQL', 'tructured', materialize(2), 4) from numbers(3); +SELECT overlay('Spark SQL', 'tructured', 2, materialize(4)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('tructured'), 2, 4) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'tructured', materialize(2), 4) from numbers(3); +SELECT overlay(materialize('Spark SQL'), 'tructured', 2, materialize(4)) from numbers(3); +SELECT overlay('Spark SQL', materialize('tructured'), materialize(2), 4) from numbers(3); +SELECT overlay('Spark SQL', materialize('tructured'), 2, materialize(4)) from numbers(3); +SELECT overlay('Spark SQL', 'tructured', materialize(2), materialize(4)) from numbers(3); +SELECT overlay(materialize('Spark SQL'), materialize('tructured'), materialize(2), materialize(4)) from numbers(3); diff --git a/tests/queries/0_stateless/03206_overlay_utf8.reference b/tests/queries/0_stateless/03206_overlay_utf8.reference new file mode 100644 index 00000000000..19878c97184 --- /dev/null +++ b/tests/queries/0_stateless/03206_overlay_utf8.reference @@ -0,0 +1,168 @@ +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark_SQL和CH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark CORECH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Spark ANSI SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH +Structured SQL和CH diff --git a/tests/queries/0_stateless/03206_overlay_utf8.sql b/tests/queries/0_stateless/03206_overlay_utf8.sql new file mode 100644 index 00000000000..00b756c8b5b --- /dev/null +++ b/tests/queries/0_stateless/03206_overlay_utf8.sql @@ -0,0 +1,60 @@ +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), 0) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, materialize(0)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), materialize(0)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); + +SELECT overlayUTF8('Spark SQL和CH', '_', 6) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), '_', 6) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('_'), 6) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', '_', materialize(6)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), 6) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), '_', materialize(6)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('_'), materialize(6)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), materialize(6)) from numbers(3); + +SELECT overlayUTF8('Spark SQL和CH', 'CORE', 7) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'CORE', 7) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('CORE'), 7) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'CORE', materialize(7)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), 7) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'CORE', materialize(7)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('CORE'), materialize(7)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), materialize(7)) from numbers(3); + +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); + +SELECT overlayUTF8('Spark SQL和CH', 'tructured', 2, 4) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, 4) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, 4) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), 4) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'tructured', 2, materialize(4)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), 2, 4) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', materialize(2), 4) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, materialize(4)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), materialize(2), 4) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, materialize(4)) from numbers(3); +SELECT overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), materialize(4)) from numbers(3); +SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), materialize(2), materialize(4)) from numbers(3); From c09c22b17575396e38fb45cb385dcc8a49f9a183 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 24 Jul 2024 14:45:47 +0800 Subject: [PATCH 022/127] finish doc --- .../functions/string-replace-functions.md | 72 +++++++++++++++++++ ...new_functions_must_be_documented.reference | 2 + 2 files changed, 74 insertions(+) diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 8793ebdd1a3..4e1f89fd974 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -223,3 +223,75 @@ SELECT translateUTF8('Münchener Straße', 'üß', 'us') AS res; │ Munchener Strase │ └──────────────────┘ ``` + +## overlay + +Replace the string `s` with the string `replace` starting from the 1-based `position` for `length` bytes. If `length` is omitted or negative, then it defaults to the length of `replace`. + +**Syntax** + +```sql +overlay(s, replace, position[, length]) +``` + +**Parameters** + +- `s`: A string type [String](../data-types/string.md). +- `replace`: A string type [String](../data-types/string.md). +- `position`: An integer type [Int](../data-types/int.md). +- `length`: Optional. An integer type [Int](../data-types/int.md). + +**Returned value** + +- A [String](../data-types/string.md) data type value. If `position` is negative the position is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. + +**Example** + +```sql +SELECT overlay('Spark SQL', 'CORE', 7) AS res; +``` + +Result: + +```text + ┌─res────────┐ + │ Spark CORE │ + └────────────┘ +``` + +## overlayUTF8 + +Replace the string `s` with the string `replace` starting from the 1-based `position` for `length` UTF-8 characters. If `length` is omitted or negative, then it defaults to the length of `replace`. + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Syntax** + +```sql +overlayUTF8(s, replace, position[, length]) +``` + +**Parameters** + +- `s`: A string type [String](../data-types/string.md). +- `replace`: A string type [String](../data-types/string.md). +- `position`: An integer type [Int](../data-types/int.md). +- `length`: Optional. An integer type [Int](../data-types/int.md). + +**Returned value** + +- A [String](../data-types/string.md) data type value. If `position` is negative the position is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. + +**Example** + +```sql +SELECT overlayUTF8('ClickHouse是一款OLAP数据库', '开源', 12, 2) AS res; +``` + +Result: + +```text +┌─res────────────────────────┐ +│ ClickHouse是开源OLAP数据库 │ +└────────────────────────────┘ +``` diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index a152066a460..ba9d3fb7a83 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -512,6 +512,8 @@ nullIf nullIn nullInIgnoreSet or +overlay +overlayUTF8 parseDateTime parseDateTime32BestEffort parseDateTime32BestEffortOrNull From c837541a7783f14780a7d2535dd6fa2cbf5effd5 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 25 Jul 2024 10:11:53 +0800 Subject: [PATCH 023/127] fix style --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 943caf918d6..fa26cc0ff1f 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2166,6 +2166,7 @@ outfile overcommit overcommitted overfitting +overlayUTF overparallelization packetpool packetsize From 3db505a1327fc5bf96c93f2a510436402be13f3b Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 27 Jul 2024 12:53:38 +0200 Subject: [PATCH 024/127] Update SettingsChangesHistory.cpp --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 1dda9e72084..dc3bf984cc6 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -67,6 +67,7 @@ static std::initializer_list Date: Mon, 29 Jul 2024 13:03:21 +0200 Subject: [PATCH 025/127] Move setting to 24.8 version --- src/Core/SettingsChangesHistory.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index dc3bf984cc6..41319ac7645 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -57,6 +57,8 @@ String ClickHouseVersion::toString() const /// Note: please check if the key already exists to prevent duplicate entries. static std::initializer_list> settings_changes_history_initializer = { + {"24.8", {{"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"}, + }}, {"24.7", {{"output_format_parquet_write_page_index", false, true, "Add a possibility to write page index into parquet files."}, {"output_format_binary_encode_types_in_binary_format", false, false, "Added new setting to allow to write type names in binary format in RowBinaryWithNamesAndTypes output format"}, {"input_format_binary_decode_types_in_binary_format", false, false, "Added new setting to allow to read type names in binary format in RowBinaryWithNamesAndTypes input format"}, @@ -66,7 +68,6 @@ static std::initializer_list Date: Mon, 5 Aug 2024 21:10:31 +0000 Subject: [PATCH 026/127] Allow to specify min and max for random settings in the test --- docs/en/development/tests.md | 22 +++++ tests/clickhouse-test | 92 ++++++++++++++++--- ...mic_read_subcolumns_compact_merge_tree.sql | 1 + ...ynamic_read_subcolumns_wide_merge_tree.sql | 1 + ...merges_1_horizontal_compact_merge_tree.sql | 4 + ..._merges_1_horizontal_compact_wide_tree.sql | 2 + ...c_merges_1_vertical_compact_merge_tree.sql | 2 + ...amic_merges_1_vertical_wide_merge_tree.sql | 2 + ...merges_2_horizontal_compact_merge_tree.sql | 1 + ...ic_merges_2_horizontal_wide_merge_tree.sql | 1 + ...c_merges_2_vertical_compact_merge_tree.sql | 1 + ...amic_merges_2_vertical_wide_merge_tree.sql | 1 + ...sted_dynamic_merges_compact_horizontal.sql | 1 + ...nested_dynamic_merges_compact_vertical.sql | 1 + ..._nested_dynamic_merges_wide_horizontal.sql | 1 + ...38_nested_dynamic_merges_wide_vertical.sql | 1 + 16 files changed, 122 insertions(+), 12 deletions(-) diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 269995a1a96..f0afa983fec 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -91,6 +91,28 @@ SELECT 1 In addition to the above settings, you can use `USE_*` flags from `system.build_options` to define usage of particular ClickHouse features. For example, if your test uses a MySQL table, you should add a tag `use-mysql`. +### Specifying limits for random settings + +A test can specify minimum and maximum allowed values for settings that can be randomized during test run. + +For `.sh` tests limits are written as a comment on the line next to tags or on the second line if no tags are specified: + +```bash +#!/usr/bin/env bash +# Tags: no-fasttest +# Random settings limits: max_block_size=(1000, 10000), index_granularity=(100, None) +``` + +For `.sql` tests tags are placed as a SQL comment in the line next to tags or in the first line: + +```sql +-- Tags: no-fasttest +-- Random settings limits: max_block_size=(1000, 10000), index_granularity=(100, None) +SELECT 1 +``` + +If you need to specify only one limit, you can use `None` for another one. + ### Choosing the Test Name The name of the test starts with a five-digit prefix followed by a descriptive name, such as `00422_hash_function_constexpr.sql`. To choose the prefix, find the largest prefix already present in the directory, and increment it by one. In the meantime, some other tests might be added with the same numeric prefix, but this is OK and does not lead to any problems, you don't have to change it later. diff --git a/tests/clickhouse-test b/tests/clickhouse-test index a29c786e998..ea488e7c3dd 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -39,6 +39,7 @@ from errno import ESRCH from subprocess import PIPE, Popen from time import sleep, time from typing import Dict, List, Optional, Set, Tuple, Union +from ast import literal_eval as make_tuple try: import termcolor # type: ignore @@ -1068,9 +1069,25 @@ class TestCase: return description + "\n" + def apply_random_settings_limits(self, random_settings): + print("Random settings limits:", self.random_settings_limits) + for setting in random_settings: + if setting in self.random_settings_limits: + min = self.random_settings_limits[setting][0] + if min and random_settings[setting] < min: + random_settings[setting] = min + max = self.random_settings_limits[setting][1] + if max and random_settings[setting] > max: + random_settings[setting] = max + def __init__(self, suite, case: str, args, is_concurrent: bool): self.case: str = case # case file name self.tags: Set[str] = suite.all_tags[case] if case in suite.all_tags else set() + self.random_settings_limits = ( + suite.all_random_settings_limits[case] + if case in suite.all_random_settings_limits + else dict() + ) for tag in os.getenv("GLOBAL_TAGS", "").split(","): self.tags.add(tag.strip()) @@ -1112,11 +1129,13 @@ class TestCase: if self.randomize_settings: self.random_settings = SettingsRandomizer.get_random_settings(args) + self.apply_random_settings_limits(self.random_settings) if self.randomize_merge_tree_settings: self.merge_tree_random_settings = ( MergeTreeSettingsRandomizer.get_random_settings(args) ) + self.apply_random_settings_limits(self.merge_tree_random_settings) self.base_url_params = ( os.environ["CLICKHOUSE_URL_PARAMS"] @@ -1900,7 +1919,9 @@ class TestSuite: return test_name @staticmethod - def read_test_tags(suite_dir: str, all_tests: List[str]) -> Dict[str, Set[str]]: + def read_test_tags_and_random_settings_limits( + suite_dir: str, all_tests: List[str] + ) -> (Dict[str, Set[str]], Dict[str, Dict[str, Tuple[int, int]]]): def get_comment_sign(filename): if filename.endswith(".sql") or filename.endswith(".sql.j2"): return "--" @@ -1925,22 +1946,48 @@ class TestSuite: tags = {tag.strip() for tag in tags} return tags + def parse_random_settings_limits_from_line( + line, comment_sign + ) -> Dict[str, Tuple[int, int]]: + if not line.startswith(comment_sign): + return {} + random_settings_limits_str = line[ + len(comment_sign) : + ].lstrip() # noqa: ignore E203 + random_settings_limits_prefix = "Random settings limits:" + if not random_settings_limits_str.startswith(random_settings_limits_prefix): + return {} + random_settings_limits_str = random_settings_limits_str[ + len(random_settings_limits_prefix) : + ] # noqa: ignore E203 + # limits are specified in a form 'setting1=(min, max), setting2=(min,max), ...' + random_settings_limits = re.findall( + "([^=, ]+) *= *(\([^=]+\))", random_settings_limits_str + ) + random_settings_limits = { + pair[0]: make_tuple(pair[1]) for pair in random_settings_limits + } + return random_settings_limits + def is_shebang(line: str) -> bool: return line.startswith("#!") def find_tag_line(file): - for line in file: + line = file.readline() + while line != "": line = line.strip() if line and not is_shebang(line): return line + line = file.readline() return "" - def load_tags_from_file(filepath): + def load_tags_and_random_settings_limits_from_file(filepath): comment_sign = get_comment_sign(filepath) need_query_params = False with open(filepath, "r", encoding="utf-8") as file: try: tag_line = find_tag_line(file) + next_line = file.readline() except UnicodeDecodeError: return [] try: @@ -1950,21 +1997,35 @@ class TestSuite: need_query_params = True except UnicodeDecodeError: pass - parsed_tags = parse_tags_from_line(tag_line, comment_sign) - if need_query_params: - parsed_tags.add("need-query-parameters") - return parsed_tags + parsed_tags = parse_tags_from_line(tag_line, comment_sign) + if need_query_params: + parsed_tags.add("need-query-parameters") + random_settings_limits_line = next_line if parsed_tags else tag_line + random_settings_limits = parse_random_settings_limits_from_line( + random_settings_limits_line, comment_sign + ) + return parsed_tags, random_settings_limits all_tags = {} + all_random_settings_limits = {} start_time = datetime.now() for test_name in all_tests: - tags = load_tags_from_file(os.path.join(suite_dir, test_name)) + ( + tags, + random_settings_limits, + ) = load_tags_and_random_settings_limits_from_file( + os.path.join(suite_dir, test_name) + ) if tags: all_tags[test_name] = tags + if random_settings_limits: + all_random_settings_limits[test_name] = random_settings_limits elapsed = (datetime.now() - start_time).total_seconds() if elapsed > 1: - print(f"Tags for suite {suite_dir} read in {elapsed:.2f} seconds") - return all_tags + print( + f"Tags and random settings limits for suite {suite_dir} read in {elapsed:.2f} seconds" + ) + return all_tags, all_random_settings_limits def __init__(self, args, suite_path: str, suite_tmp_path: str, suite: str): self.args = args @@ -1994,9 +2055,16 @@ class TestSuite: self.all_tests: List[str] = self.get_tests_list( self.tests_in_suite_key_func, filter_func ) - self.all_tags: Dict[str, Set[str]] = self.read_test_tags( - self.suite_path, self.all_tests + + all_tags_and_random_settings_limits = ( + self.read_test_tags_and_random_settings_limits( + self.suite_path, self.all_tests + ) ) + self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0] + self.all_random_settings_limits: Dict[ + str, Dict[str, (int, int)] + ] = all_tags_and_random_settings_limits[1] self.sequential_tests = [] self.parallel_tests = [] diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql index ddfba4418bd..822393d3c78 100644 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql index 5aac5f7b72f..2394893dc8b 100644 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql index d2c787040e5..7c2e7c3d2be 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql @@ -1,4 +1,6 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) + set allow_experimental_dynamic_type=1; drop table if exists test; @@ -31,3 +33,5 @@ optimize table test final; select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); drop table test; + +select 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql index f99bf771608..aa62435188a 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql @@ -1,4 +1,6 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) + set allow_experimental_dynamic_type=1; drop table if exists test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql index be81596d043..bfc7bb9d206 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql @@ -1,4 +1,6 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) + set allow_experimental_dynamic_type=1; drop table if exists test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql index f6396af42a8..233667db0a7 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql @@ -1,4 +1,6 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) + set allow_experimental_dynamic_type=1; drop table if exists test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql index e133ac3001f..48a6a55378c 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql index d527081b763..44b298b1c35 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql index ebccfb77922..f42150720b3 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql index 104d6018e41..ee4ff6af162 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql @@ -1,4 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql index 1d5c63dcdf1..e0636f053df 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql @@ -1,4 +1,5 @@ -- Tags: long +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql index 2bffe35c577..edfad295e9a 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql @@ -1,4 +1,5 @@ -- Tags: long +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql index fb686091ebb..79d488ec253 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql @@ -1,4 +1,5 @@ -- Tags: long +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql index ed195452d56..e2a453b867a 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql @@ -1,4 +1,5 @@ -- Tags: long +-- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; From d3dc17453377368defd80cda9f4b95dda6adc9df Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 5 Aug 2024 21:15:11 +0000 Subject: [PATCH 027/127] Remove log --- tests/clickhouse-test | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 389193836bf..5fcb9fb80f1 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -682,7 +682,6 @@ class FailureReason(enum.Enum): BUILD = "not running for current build" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" SHARED_MERGE_TREE = "no-shared-merge-tree" - DISTRIBUTED_CACHE = "distributed-cache" # UNKNOWN reasons NO_REFERENCE = "no reference file" @@ -1071,7 +1070,6 @@ class TestCase: return description + "\n" def apply_random_settings_limits(self, random_settings): - print("Random settings limits:", self.random_settings_limits) for setting in random_settings: if setting in self.random_settings_limits: min = self.random_settings_limits[setting][0] @@ -1211,9 +1209,6 @@ class TestCase: elif tags and ("no-replicated-database" in tags) and args.replicated_database: return FailureReason.REPLICATED_DB - elif tags and ("no-distributed-cache" in tags) and args.distributed_cache: - return FailureReason.DISTRIBUTED_CACHE - elif ( tags and ("atomic-database" in tags) @@ -1251,11 +1246,6 @@ class TestCase: ): return FailureReason.SKIP - elif "no-flaky-check" in tags and ( - 1 == int(os.environ.get("IS_FLAKY_CHECK", 0)) - ): - return FailureReason.SKIP - elif tags: for build_flag in args.build_flags: if "no-" + build_flag in tags: @@ -2295,6 +2285,7 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool args, test_suite, client_options, server_logs_level ) test_result = test_case.process_result(test_result, MESSAGES) + break except TimeoutError: break finally: @@ -3280,12 +3271,6 @@ def parse_args(): default=False, help="Run tests over s3 storage", ) - parser.add_argument( - "--distributed-cache", - action="store_true", - default=False, - help="Run tests with enabled distributed cache", - ) parser.add_argument( "--azure-blob-storage", action="store_true", From 18a7a82458ce7ec3f12f7b6751699f119769ed55 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 5 Aug 2024 21:16:18 +0000 Subject: [PATCH 028/127] Better formatting --- tests/clickhouse-test | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 5fcb9fb80f1..bcb8a12625b 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1950,15 +1950,11 @@ class TestSuite: ) -> Dict[str, Tuple[int, int]]: if not line.startswith(comment_sign): return {} - random_settings_limits_str = line[ - len(comment_sign) : - ].lstrip() # noqa: ignore E203 + random_settings_limits_str = line[len(comment_sign) :].lstrip() # noqa: ignore E203 random_settings_limits_prefix = "Random settings limits:" if not random_settings_limits_str.startswith(random_settings_limits_prefix): return {} - random_settings_limits_str = random_settings_limits_str[ - len(random_settings_limits_prefix) : - ] # noqa: ignore E203 + random_settings_limits_str = random_settings_limits_str[len(random_settings_limits_prefix) :] # noqa: ignore E203 # limits are specified in a form 'setting1=(min, max), setting2=(min,max), ...' random_settings_limits = re.findall( "([^=, ]+) *= *(\([^=]+\))", random_settings_limits_str @@ -1996,25 +1992,20 @@ class TestSuite: need_query_params = True except UnicodeDecodeError: pass - parsed_tags = parse_tags_from_line(tag_line, comment_sign) - if need_query_params: - parsed_tags.add("need-query-parameters") - random_settings_limits_line = next_line if parsed_tags else tag_line - random_settings_limits = parse_random_settings_limits_from_line( - random_settings_limits_line, comment_sign - ) + parsed_tags = parse_tags_from_line(tag_line, comment_sign) + if need_query_params: + parsed_tags.add("need-query-parameters") + random_settings_limits_line = next_line if parsed_tags else tag_line + random_settings_limits = parse_random_settings_limits_from_line( + random_settings_limits_line, comment_sign + ) return parsed_tags, random_settings_limits all_tags = {} all_random_settings_limits = {} start_time = datetime.now() for test_name in all_tests: - ( - tags, - random_settings_limits, - ) = load_tags_and_random_settings_limits_from_file( - os.path.join(suite_dir, test_name) - ) + tags, random_settings_limits = load_tags_and_random_settings_limits_from_file(os.path.join(suite_dir, test_name)) # noqa: ignore E203 if tags: all_tags[test_name] = tags if random_settings_limits: @@ -2061,9 +2052,7 @@ class TestSuite: ) ) self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0] - self.all_random_settings_limits: Dict[ - str, Dict[str, (int, int)] - ] = all_tags_and_random_settings_limits[1] + self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = all_tags_and_random_settings_limits[1] # noqa: ignore E203 self.sequential_tests = [] self.parallel_tests = [] From 74a2976810b86086819ee8e6ee1f110ab1e70a37 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 6 Aug 2024 08:13:03 +0000 Subject: [PATCH 029/127] Fix pylint --- tests/clickhouse-test | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index bcb8a12625b..84f33860484 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1072,12 +1072,12 @@ class TestCase: def apply_random_settings_limits(self, random_settings): for setting in random_settings: if setting in self.random_settings_limits: - min = self.random_settings_limits[setting][0] - if min and random_settings[setting] < min: - random_settings[setting] = min - max = self.random_settings_limits[setting][1] - if max and random_settings[setting] > max: - random_settings[setting] = max + min_value = self.random_settings_limits[setting][0] + if min_value and random_settings[setting] < min_value: + random_settings[setting] = min_value + max_value = self.random_settings_limits[setting][1] + if max_value and random_settings[setting] > max_value: + random_settings[setting] = max_value def __init__(self, suite, case: str, args, is_concurrent: bool): self.case: str = case # case file name @@ -2005,7 +2005,7 @@ class TestSuite: all_random_settings_limits = {} start_time = datetime.now() for test_name in all_tests: - tags, random_settings_limits = load_tags_and_random_settings_limits_from_file(os.path.join(suite_dir, test_name)) # noqa: ignore E203 + tags, random_settings_limits = load_tags_and_random_settings_limits_from_file(os.path.join(suite_dir, test_name)) # noqa: ignore E203 if tags: all_tags[test_name] = tags if random_settings_limits: @@ -2052,7 +2052,7 @@ class TestSuite: ) ) self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0] - self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = all_tags_and_random_settings_limits[1] # noqa: ignore E203 + self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = all_tags_and_random_settings_limits[1] # noqa: ignore E203 self.sequential_tests = [] self.parallel_tests = [] From 5226792b1d8b4e110c63a813fb68c9dd65ea07b7 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 6 Aug 2024 08:48:06 +0000 Subject: [PATCH 030/127] Fix bad merge with master --- tests/clickhouse-test | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 84f33860484..c4124982442 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1163,6 +1163,9 @@ class TestCase: elif args.cloud and ("no-replicated-database" in tags): return FailureReason.REPLICATED_DB + elif tags and ("no-distributed-cache" in tags) and args.distributed_cache: + return FailureReason.DISTRIBUTED_CACHE + elif args.cloud and self.name in suite.cloud_skip_list: return FailureReason.NOT_SUPPORTED_IN_CLOUD @@ -1246,6 +1249,11 @@ class TestCase: ): return FailureReason.SKIP + elif "no-flaky-check" in tags and ( + 1 == int(os.environ.get("IS_FLAKY_CHECK", 0)) + ): + return FailureReason.SKIP + elif tags: for build_flag in args.build_flags: if "no-" + build_flag in tags: @@ -2274,7 +2282,6 @@ def run_tests_array(all_tests_with_params: Tuple[List[str], int, TestSuite, bool args, test_suite, client_options, server_logs_level ) test_result = test_case.process_result(test_result, MESSAGES) - break except TimeoutError: break finally: @@ -3260,6 +3267,12 @@ def parse_args(): default=False, help="Run tests over s3 storage", ) + parser.add_argument( + "--distributed-cache", + action="store_true", + default=False, + help="Run tests with enabled distributed cache", + ) parser.add_argument( "--azure-blob-storage", action="store_true", From bb33dca38470aba044da06938cc96ca55166262d Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 6 Aug 2024 08:49:08 +0000 Subject: [PATCH 031/127] Fix unrelated changes --- tests/clickhouse-test | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index c4124982442..72136404796 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -682,6 +682,7 @@ class FailureReason(enum.Enum): BUILD = "not running for current build" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" SHARED_MERGE_TREE = "no-shared-merge-tree" + DISTRIBUTED_CACHE = "distributed-cache" # UNKNOWN reasons NO_REFERENCE = "no reference file" @@ -1163,9 +1164,6 @@ class TestCase: elif args.cloud and ("no-replicated-database" in tags): return FailureReason.REPLICATED_DB - elif tags and ("no-distributed-cache" in tags) and args.distributed_cache: - return FailureReason.DISTRIBUTED_CACHE - elif args.cloud and self.name in suite.cloud_skip_list: return FailureReason.NOT_SUPPORTED_IN_CLOUD @@ -1212,6 +1210,9 @@ class TestCase: elif tags and ("no-replicated-database" in tags) and args.replicated_database: return FailureReason.REPLICATED_DB + elif tags and ("no-distributed-cache" in tags) and args.distributed_cache: + return FailureReason.DISTRIBUTED_CACHE + elif ( tags and ("atomic-database" in tags) @@ -1250,7 +1251,7 @@ class TestCase: return FailureReason.SKIP elif "no-flaky-check" in tags and ( - 1 == int(os.environ.get("IS_FLAKY_CHECK", 0)) + 1 == int(os.environ.get("IS_FLAKY_CHECK", 0)) ): return FailureReason.SKIP From 71c06b40cbf65abda49579bf5ac08e46575c7d29 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 6 Aug 2024 09:07:21 +0000 Subject: [PATCH 032/127] Avoid regexp --- docs/en/development/tests.md | 4 ++-- tests/clickhouse-test | 14 +++++++------- ...amic_merges_1_horizontal_compact_merge_tree.sql | 2 +- ...namic_merges_1_horizontal_compact_wide_tree.sql | 2 +- ...ynamic_merges_1_vertical_compact_merge_tree.sql | 2 +- ...7_dynamic_merges_1_vertical_wide_merge_tree.sql | 2 +- ...amic_merges_2_horizontal_compact_merge_tree.sql | 2 +- ...dynamic_merges_2_horizontal_wide_merge_tree.sql | 2 +- ...ynamic_merges_2_vertical_compact_merge_tree.sql | 2 +- ...7_dynamic_merges_2_vertical_wide_merge_tree.sql | 2 +- ...38_nested_dynamic_merges_compact_horizontal.sql | 2 +- ...3038_nested_dynamic_merges_compact_vertical.sql | 2 +- ...03038_nested_dynamic_merges_wide_horizontal.sql | 2 +- .../03038_nested_dynamic_merges_wide_vertical.sql | 2 +- 14 files changed, 21 insertions(+), 21 deletions(-) diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index f0afa983fec..bc9f85ef323 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -100,14 +100,14 @@ For `.sh` tests limits are written as a comment on the line next to tags or on t ```bash #!/usr/bin/env bash # Tags: no-fasttest -# Random settings limits: max_block_size=(1000, 10000), index_granularity=(100, None) +# Random settings limits: max_block_size=(1000, 10000); index_granularity=(100, None) ``` For `.sql` tests tags are placed as a SQL comment in the line next to tags or in the first line: ```sql -- Tags: no-fasttest --- Random settings limits: max_block_size=(1000, 10000), index_granularity=(100, None) +-- Random settings limits: max_block_size=(1000, 10000); index_granularity=(100, None) SELECT 1 ``` diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 72136404796..e5378e8c7f3 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1964,13 +1964,13 @@ class TestSuite: if not random_settings_limits_str.startswith(random_settings_limits_prefix): return {} random_settings_limits_str = random_settings_limits_str[len(random_settings_limits_prefix) :] # noqa: ignore E203 - # limits are specified in a form 'setting1=(min, max), setting2=(min,max), ...' - random_settings_limits = re.findall( - "([^=, ]+) *= *(\([^=]+\))", random_settings_limits_str - ) - random_settings_limits = { - pair[0]: make_tuple(pair[1]) for pair in random_settings_limits - } + # limits are specified in a form 'setting1=(min, max); setting2=(min,max); ...' + random_settings_limits = {} + for setting_and_limit in random_settings_limits_str.split(';'): + setting_and_limit = setting_and_limit.split('=') + random_settings_limits[setting_and_limit[0].strip()] = make_tuple( + setting_and_limit[1] + ) return random_settings_limits def is_shebang(line: str) -> bool: diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql index 7c2e7c3d2be..46f1c78b255 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type=1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql index aa62435188a..bf0c6ef0374 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type=1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql index bfc7bb9d206..fb82369a7a3 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type=1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql index 233667db0a7..c026bc04a56 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type=1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql index 71c6841515a..7f1934091f2 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql index 94ae1d867f5..f1f387fae9d 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql index 98ae230636a..cc11c454d38 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql index f8f5bd5d9e1..ffb2aca8b35 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql index 13c1fd8b485..9ec4e4f949b 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql index daa95071cdb..ed4de931841 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql index dea7e7c0971..bd3c4b58a8f 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql index bf1323f2ea9..81bcda5443d 100644 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql @@ -1,5 +1,5 @@ -- Tags: long, no-tsan, no-msan, no-ubsan, no-asan --- Random settings limits: index_granularity=(100, None), merge_max_block_size=(100, None) +-- Random settings limits: index_granularity=(100, None); merge_max_block_size=(100, None) set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; From 621f4bbf9e04b62628a9c053b3f39c6b8a67a52d Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Tue, 6 Aug 2024 14:13:20 +0200 Subject: [PATCH 033/127] Update SettingsChangesHistory.cpp --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index f815a21b6a1..bb062deaab0 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -79,6 +79,7 @@ static std::initializer_list Date: Tue, 6 Aug 2024 15:01:10 +0200 Subject: [PATCH 034/127] Fix pylint --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index e5378e8c7f3..dea303ecdfb 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1086,7 +1086,7 @@ class TestCase: self.random_settings_limits = ( suite.all_random_settings_limits[case] if case in suite.all_random_settings_limits - else dict() + else {} ) for tag in os.getenv("GLOBAL_TAGS", "").split(","): From d124de847b44344d9346c4d1b76ada03b31c58c8 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 6 Aug 2024 16:06:59 +0000 Subject: [PATCH 035/127] Fix style --- tests/clickhouse-test | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index dea303ecdfb..c3b1d4d907c 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1959,15 +1959,17 @@ class TestSuite: ) -> Dict[str, Tuple[int, int]]: if not line.startswith(comment_sign): return {} - random_settings_limits_str = line[len(comment_sign) :].lstrip() # noqa: ignore E203 + random_settings_limits_str = line[len(comment_sign) :].lstrip() random_settings_limits_prefix = "Random settings limits:" if not random_settings_limits_str.startswith(random_settings_limits_prefix): return {} - random_settings_limits_str = random_settings_limits_str[len(random_settings_limits_prefix) :] # noqa: ignore E203 + random_settings_limits_str = random_settings_limits_str[ + len(random_settings_limits_prefix) : + ] # limits are specified in a form 'setting1=(min, max); setting2=(min,max); ...' random_settings_limits = {} - for setting_and_limit in random_settings_limits_str.split(';'): - setting_and_limit = setting_and_limit.split('=') + for setting_and_limit in random_settings_limits_str.split(";"): + setting_and_limit = setting_and_limit.split("=") random_settings_limits[setting_and_limit[0].strip()] = make_tuple( setting_and_limit[1] ) @@ -2014,7 +2016,12 @@ class TestSuite: all_random_settings_limits = {} start_time = datetime.now() for test_name in all_tests: - tags, random_settings_limits = load_tags_and_random_settings_limits_from_file(os.path.join(suite_dir, test_name)) # noqa: ignore E203 + ( + tags, + random_settings_limits, + ) = load_tags_and_random_settings_limits_from_file( + os.path.join(suite_dir, test_name) + ) # noqa: ignore E203 if tags: all_tags[test_name] = tags if random_settings_limits: @@ -2061,7 +2068,9 @@ class TestSuite: ) ) self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0] - self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = all_tags_and_random_settings_limits[1] # noqa: ignore E203 + self.all_random_settings_limits: Dict[ + str, Dict[str, (int, int)] + ] = all_tags_and_random_settings_limits[1] self.sequential_tests = [] self.parallel_tests = [] From 0ebe8e35511f764b61cb2428433132644f7deb96 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Tue, 6 Aug 2024 18:38:23 +0200 Subject: [PATCH 036/127] Fix style --- src/Core/SettingsChangesHistory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index e6949dd4fba..1ebc9b07748 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -79,7 +79,7 @@ static std::initializer_list Date: Wed, 7 Aug 2024 14:42:42 +0200 Subject: [PATCH 037/127] Fix style check --- tests/clickhouse-test | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index c3b1d4d907c..5946e561949 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2068,10 +2068,9 @@ class TestSuite: ) ) self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0] - self.all_random_settings_limits: Dict[ - str, Dict[str, (int, int)] - ] = all_tags_and_random_settings_limits[1] - + self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = ( + all_tags_and_random_settings_limits[1] + ) self.sequential_tests = [] self.parallel_tests = [] for test_name in self.all_tests: From f2731841de804c30ece1c75e84c8ca8d3eb62ef8 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 8 Aug 2024 20:20:21 +0200 Subject: [PATCH 038/127] init --- src/Core/callOnTypeIndex.h | 3 + src/DataTypes/getLeastSupertype.cpp | 41 +++++ src/DataTypes/getLeastSupertype.h | 22 +++ src/Functions/FunctionsConversion.cpp | 61 +++++++- ...23_interval_data_type_comparison.reference | 99 ++++++++++++ .../03223_interval_data_type_comparison.sql | 142 ++++++++++++++++++ 6 files changed, 365 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03223_interval_data_type_comparison.reference create mode 100644 tests/queries/0_stateless/03223_interval_data_type_comparison.sql diff --git a/src/Core/callOnTypeIndex.h b/src/Core/callOnTypeIndex.h index f5f67df563b..ae5afce36be 100644 --- a/src/Core/callOnTypeIndex.h +++ b/src/Core/callOnTypeIndex.h @@ -3,6 +3,7 @@ #include #include +#include namespace DB @@ -212,6 +213,8 @@ static bool callOnIndexAndDataType(TypeIndex number, F && f, ExtraArgs && ... ar case TypeIndex::IPv4: return f(TypePair(), std::forward(args)...); case TypeIndex::IPv6: return f(TypePair(), std::forward(args)...); + case TypeIndex::Interval: return f(TypePair(), std::forward(args)...); + default: break; } diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index a71b19d6c92..0b9c744c091 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -228,6 +228,40 @@ void convertUInt64toInt64IfPossible(const DataTypes & types, TypeIndexSet & type } } +DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet &types_set) +{ + const auto& granularity_map = getGranularityMap(); + int min_granularity = std::get<0>(granularity_map.at(IntervalKind::Kind::Year)); + DataTypePtr smallest_type; + + bool is_higher_interval = false; // For Years, Quarters and Months + + for (const auto &type : types) + { + if (const auto * interval_type = typeid_cast(type.get())) + { + int current_granularity = std::get<0>(granularity_map.at(interval_type->getKind())); + if (current_granularity > 8) + is_higher_interval = true; + if (current_granularity < min_granularity) + { + min_granularity = current_granularity; + smallest_type = type; + } + } + } + + if (is_higher_interval && min_granularity <= 8) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot compare intervals {} and {} because the amount of days in month is not determined", types[0]->getName(), types[1]->getName()); + + if (smallest_type) + { + types_set.clear(); + types_set.insert(smallest_type->getTypeId()); + } + + return smallest_type; +} } template @@ -652,6 +686,13 @@ DataTypePtr getLeastSupertype(const DataTypes & types) return numeric_type; } + /// For interval data types. + { + auto res = findSmallestIntervalSuperType(types, type_ids); + if (res) + return res; + } + /// All other data types (UUID, AggregateFunction, Enum...) are compatible only if they are the same (checked in trivial cases). return throwOrReturn(types, "", ErrorCodes::NO_COMMON_TYPE); } diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h index 2ae1e52ca96..c584eb83011 100644 --- a/src/DataTypes/getLeastSupertype.h +++ b/src/DataTypes/getLeastSupertype.h @@ -1,5 +1,7 @@ #pragma once #include +#include +#include namespace DB { @@ -48,4 +50,24 @@ DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types); DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types); +/// A map that enumerated all interval kinds in ascending order with a conversion value to a next interval +inline const std::unordered_map> & getGranularityMap() +{ + static std::unordered_map> granularity_map = + { + {IntervalKind::Kind::Nanosecond, {1, 1000}}, + {IntervalKind::Kind::Microsecond, {2, 1000}}, + {IntervalKind::Kind::Millisecond, {3, 1000}}, + {IntervalKind::Kind::Second, {4, 60}}, + {IntervalKind::Kind::Minute, {5, 60}}, + {IntervalKind::Kind::Hour, {6, 24}}, + {IntervalKind::Kind::Day, {7, 7}}, + {IntervalKind::Kind::Week, {8, 4}}, + {IntervalKind::Kind::Month, {9, 3}}, + {IntervalKind::Kind::Quarter, {10, 4}}, + {IntervalKind::Kind::Year, {11, 1}} + }; + return granularity_map; +} + } diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 675283d011e..0ab1858dc97 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -1573,6 +1574,55 @@ struct ConvertImpl arguments, result_type, input_rows_count, additions); } } + else if constexpr (std::is_same_v && std::is_same_v) + { + IntervalKind to = typeid_cast(result_type.get())->getKind(); + IntervalKind from = typeid_cast(arguments[0].type.get())->getKind(); + + if (from == to) + return arguments[0].column; + + const auto &map = getGranularityMap(); + Int64 conversion_factor = 1; + Int64 result_value; + + int from_position = map.at(from).first; + int to_position = map.at(to).first; // Positions of each interval according to granurality map + + if (from_position < to_position) + { + for (int i = from_position - 1; i <= to_position; ++i) + { + // Find the kind that matches this position + for (const auto &entry : map) + { + if (entry.second.first == i) + { + conversion_factor *= entry.second.second; + break; + } + } + } + result_value = arguments[0].column->getInt(0) / conversion_factor; + } + else + { + for (int i = from_position - 1; i >= to_position; --i) + { + for (const auto &entry : map) + { + if (entry.second.first == i) + { + conversion_factor *= entry.second.second; + break; + } + } + } + result_value = arguments[0].column->getInt(0) * conversion_factor; + } + + return ColumnConst::create(ColumnInt64::create(1, result_value), input_rows_count); + } else { using FromFieldType = typename FromDataType::FieldType; @@ -2181,7 +2231,7 @@ private: const DataTypePtr from_type = removeNullable(arguments[0].type); ColumnPtr result_column; - [[maybe_unused]] FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; + FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; if (context) date_time_overflow_behavior = context->getSettingsRef().date_time_overflow_behavior.value; @@ -2277,7 +2327,7 @@ private: } } else - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, from_string_tag); + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, from_string_tag); return true; }; @@ -2334,6 +2384,11 @@ private: else done = callOnIndexAndDataType(from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag); } + + if constexpr (std::is_same_v) + { + done = callOnIndexAndDataType(from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag); + } } if (!done) @@ -5224,7 +5279,7 @@ REGISTER_FUNCTION(Conversion) /// MySQL compatibility alias. Cannot be registered as alias, /// because we don't want it to be normalized to toDate in queries, /// otherwise CREATE DICTIONARY query breaks. - factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::Case::Insensitive); + factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::CaseInsensitive); factory.registerFunction(); factory.registerFunction(); diff --git a/tests/queries/0_stateless/03223_interval_data_type_comparison.reference b/tests/queries/0_stateless/03223_interval_data_type_comparison.reference new file mode 100644 index 00000000000..e98f792e4b2 --- /dev/null +++ b/tests/queries/0_stateless/03223_interval_data_type_comparison.reference @@ -0,0 +1,99 @@ +Comparing nanoseconds +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +Comparing microseconds +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +0 +Comparing milliseconds +1 +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +0 +Comparing seconds +1 +1 +1 +1 +1 +0 +0 +0 +0 +0 +Comparing minutes +1 +1 +1 +1 +0 +0 +0 +0 +Comparing hours +1 +1 +1 +0 +0 +0 +Comparing days +1 +1 +0 +0 +Comparing weeks +1 +0 +Comparing months +1 +1 +1 +0 +0 +0 +Comparing quarters +1 +1 +0 +0 +Comparing years +1 +0 diff --git a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql new file mode 100644 index 00000000000..6e4862bf2d2 --- /dev/null +++ b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql @@ -0,0 +1,142 @@ +SELECT('Comparing nanoseconds'); +SELECT toIntervalNanosecond(500) > toIntervalNanosecond(300); +SELECT toIntervalNanosecond(1000) < toIntervalNanosecond(1500); +SELECT toIntervalNanosecond(2000) = toIntervalNanosecond(2000); +SELECT toIntervalNanosecond(1000) >= toIntervalMicrosecond(1); +SELECT toIntervalNanosecond(1000001) > toIntervalMillisecond(1); +SELECT toIntervalNanosecond(2000000001) > toIntervalSecond(2); +SELECT toIntervalNanosecond(60000000000) = toIntervalMinute(1); +SELECT toIntervalNanosecond(7199999999999) < toIntervalHour(2); +SELECT toIntervalNanosecond(1) < toIntervalDay(2); +SELECT toIntervalNanosecond(5) < toIntervalWeek(1); + +SELECT toIntervalNanosecond(500) < toIntervalNanosecond(300); +SELECT toIntervalNanosecond(1000) > toIntervalNanosecond(1500); +SELECT toIntervalNanosecond(2000) != toIntervalNanosecond(2000); +SELECT toIntervalNanosecond(1000) < toIntervalMicrosecond(1); +SELECT toIntervalNanosecond(1000001) < toIntervalMillisecond(1); +SELECT toIntervalNanosecond(2000000001) < toIntervalSecond(2); +SELECT toIntervalNanosecond(60000000000) != toIntervalMinute(1); +SELECT toIntervalNanosecond(7199999999999) > toIntervalHour(2); +SELECT toIntervalNanosecond(1) > toIntervalDay(2); +SELECT toIntervalNanosecond(5) > toIntervalWeek(1); + +SELECT toIntervalNanosecond(1) < toIntervalMonth(2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing microseconds'); +SELECT toIntervalMicrosecond(1) < toIntervalMicrosecond(999); +SELECT toIntervalMicrosecond(1001) > toIntervalMillisecond(1); +SELECT toIntervalMicrosecond(2000000) = toIntervalSecond(2); +SELECT toIntervalMicrosecond(179999999) < toIntervalMinute(3); +SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); +SELECT toIntervalMicrosecond(36000000000000) > toIntervalDay(2); +SELECT toIntervalMicrosecond(1209600000000) = toIntervalWeek(2); + +SELECT toIntervalMicrosecond(1) > toIntervalMicrosecond(999); +SELECT toIntervalMicrosecond(1001) < toIntervalMillisecond(1); +SELECT toIntervalMicrosecond(2000000) != toIntervalSecond(2); +SELECT toIntervalMicrosecond(179999999) > toIntervalMinute(3); +SELECT toIntervalMicrosecond(3600000000) != toIntervalHour(1); +SELECT toIntervalMicrosecond(36000000000000) < toIntervalDay(2); +SELECT toIntervalMicrosecond(1209600000000) != toIntervalWeek(2); + +SELECT toIntervalMicrosecond(36000000000000) < toIntervalQuarter(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing milliseconds'); +SELECT toIntervalMillisecond(2000) > toIntervalMillisecond(2); +SELECT toIntervalMillisecond(2000) = toIntervalSecond(2); +SELECT toIntervalMillisecond(170000) < toIntervalMinute(3); +SELECT toIntervalMillisecond(144000001) > toIntervalHour(40); +SELECT toIntervalMillisecond(1728000000) = toIntervalDay(20); +SELECT toIntervalMillisecond(1198599999) < toIntervalWeek(2); + +SELECT toIntervalMillisecond(2000) < toIntervalMillisecond(2); +SELECT toIntervalMillisecond(2000) != toIntervalSecond(2); +SELECT toIntervalMillisecond(170000) > toIntervalMinute(3); +SELECT toIntervalMillisecond(144000001) < toIntervalHour(40); +SELECT toIntervalMillisecond(1728000000) != toIntervalDay(20); +SELECT toIntervalMillisecond(1198599999) > toIntervalWeek(2); + +SELECT toIntervalMillisecond(36000000000000) < toIntervalYear(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing seconds'); +SELECT toIntervalSecond(120) > toIntervalSecond(2); +SELECT toIntervalSecond(120) = toIntervalMinute(2); +SELECT toIntervalSecond(1) < toIntervalHour(2); +SELECT toIntervalSecond(86401) >= toIntervalDay(1); +SELECT toIntervalSecond(1209600) = toIntervalWeek(2); + +SELECT toIntervalSecond(120) < toIntervalSecond(2); +SELECT toIntervalSecond(120) != toIntervalMinute(2); +SELECT toIntervalSecond(1) > toIntervalHour(2); +SELECT toIntervalSecond(86401) < toIntervalDay(1); +SELECT toIntervalSecond(1209600) != toIntervalWeek(2); + +SELECT toIntervalSecond(36000000000000) < toIntervalMonth(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing minutes'); +SELECT toIntervalMinute(1) < toIntervalMinute(59); +SELECT toIntervalMinute(1) < toIntervalHour(59); +SELECT toIntervalMinute(1440) = toIntervalDay(1); +SELECT toIntervalMinute(30241) > toIntervalWeek(3); + +SELECT toIntervalMinute(1) > toIntervalMinute(59); +SELECT toIntervalMinute(1) > toIntervalHour(59); +SELECT toIntervalMinute(1440) != toIntervalDay(1); +SELECT toIntervalMinute(30241) < toIntervalWeek(3); + +SELECT toIntervalMinute(2) = toIntervalQuarter(120); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing hours'); +SELECT toIntervalHour(48) > toIntervalHour(2); +SELECT toIntervalHour(48) >= toIntervalDay(2); +SELECT toIntervalHour(672) = toIntervalWeek(4); + +SELECT toIntervalHour(48) < toIntervalHour(2); +SELECT toIntervalHour(48) < toIntervalDay(2); +SELECT toIntervalHour(672) != toIntervalWeek(4); + +SELECT toIntervalHour(2) < toIntervalYear(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing days'); +SELECT toIntervalDay(1) < toIntervalDay(23); +SELECT toIntervalDay(25) > toIntervalWeek(3); + +SELECT toIntervalDay(1) > toIntervalDay(23); +SELECT toIntervalDay(25) < toIntervalWeek(3); + +SELECT toIntervalDay(2) = toIntervalMonth(48); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing weeks'); +SELECT toIntervalWeek(1) < toIntervalWeek(6); + +SELECT toIntervalWeek(1) > toIntervalWeek(6); + +SELECT toIntervalWeek(124) > toIntervalQuarter(8); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing months'); +SELECT toIntervalMonth(1) < toIntervalMonth(3); +SELECT toIntervalMonth(124) > toIntervalQuarter(5); +SELECT toIntervalMonth(36) = toIntervalYear(3); + +SELECT toIntervalMonth(1) > toIntervalMonth(3); +SELECT toIntervalMonth(124) < toIntervalQuarter(5); +SELECT toIntervalMonth(36) != toIntervalYear(3); + +SELECT toIntervalMonth(6) = toIntervalMicrosecond(26); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing quarters'); +SELECT toIntervalQuarter(5) > toIntervalQuarter(4); +SELECT toIntervalQuarter(20) = toIntervalYear(5); + +SELECT toIntervalQuarter(5) < toIntervalQuarter(4); +SELECT toIntervalQuarter(20) != toIntervalYear(5); + +SELECT toIntervalQuarter(2) = toIntervalNanosecond(6); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT('Comparing years'); +SELECT toIntervalYear(1) < toIntervalYear(3); + +SELECT toIntervalYear(1) > toIntervalYear(3); + +SELECT toIntervalYear(2) = toIntervalSecond(8); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } From e9659626adc29d237d23e0f3ced9c8712d472a73 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 8 Aug 2024 20:41:15 +0200 Subject: [PATCH 039/127] fix style + add docs --- .../data-types/special-data-types/interval.md | 21 +++++++++---------- .../data-types/special-data-types/interval.md | 21 +++++++++---------- .../data-types/special-data-types/interval.md | 21 +++++++++---------- src/DataTypes/getLeastSupertype.cpp | 2 +- 4 files changed, 31 insertions(+), 34 deletions(-) diff --git a/docs/en/sql-reference/data-types/special-data-types/interval.md b/docs/en/sql-reference/data-types/special-data-types/interval.md index bedbcf0bd28..be26053580b 100644 --- a/docs/en/sql-reference/data-types/special-data-types/interval.md +++ b/docs/en/sql-reference/data-types/special-data-types/interval.md @@ -53,29 +53,28 @@ SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY └─────────────────────┴───────────────────────────────┘ ``` -Intervals with different types can’t be combined. You can’t use intervals like `4 DAY 1 HOUR`. Specify intervals in units that are smaller or equal to the smallest unit of the interval, for example, the interval `1 day and an hour` interval can be expressed as `25 HOUR` or `90000 SECOND`. - -You can’t perform arithmetical operations with `Interval`-type values, but you can add intervals of different types consequently to values in `Date` or `DateTime` data types. For example: +Also it is possible to use multiple intervals simultaneously: ``` sql -SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR +SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) ``` ``` text -┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐ -│ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │ -└─────────────────────┴────────────────────────────────────────────────────────┘ +┌───current_date_time─┬─plus(current_date_time, plus(toIntervalDay(4), toIntervalHour(3)))─┐ +│ 2024-08-08 18:31:39 │ 2024-08-12 21:31:39 │ +└─────────────────────┴────────────────────────────────────────────────────────────────────┘ ``` -The following query causes an exception: +And to compare values with different intevals: ``` sql -select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) +SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); ``` ``` text -Received exception from server (version 19.14.1): -Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime.. +┌─less(toIntervalMicrosecond(179999999), toIntervalMinute(3))─┐ +│ 1 │ +└─────────────────────────────────────────────────────────────┘ ``` ## See Also diff --git a/docs/ru/sql-reference/data-types/special-data-types/interval.md b/docs/ru/sql-reference/data-types/special-data-types/interval.md index 867a6665f4b..5064391f582 100644 --- a/docs/ru/sql-reference/data-types/special-data-types/interval.md +++ b/docs/ru/sql-reference/data-types/special-data-types/interval.md @@ -54,29 +54,28 @@ SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY └─────────────────────┴───────────────────────────────┘ ``` -Нельзя объединять интервалы различных типов. Нельзя использовать интервалы вида `4 DAY 1 HOUR`. Вместо этого выражайте интервал в единицах меньших или равных минимальной единице интервала, например, интервал «1 день и 1 час» можно выразить как `25 HOUR` или `90000 SECOND`. - -Арифметические операции со значениями типов `Interval` не доступны, однако можно последовательно добавлять различные интервалы к значениям типов `Date` и `DateTime`. Например: +Также можно использовать различные типы интервалов одновременно: ``` sql -SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR +SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) ``` ``` text -┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐ -│ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │ -└─────────────────────┴────────────────────────────────────────────────────────┘ +┌───current_date_time─┬─plus(current_date_time, plus(toIntervalDay(4), toIntervalHour(3)))─┐ +│ 2024-08-08 18:31:39 │ 2024-08-12 21:31:39 │ +└─────────────────────┴────────────────────────────────────────────────────────────────────┘ ``` -Следующий запрос приведёт к генерированию исключения: +И сравнивать значения из разными интервалами: ``` sql -select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) +SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); ``` ``` text -Received exception from server (version 19.14.1): -Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime.. +┌─less(toIntervalMicrosecond(179999999), toIntervalMinute(3))─┐ +│ 1 │ +└─────────────────────────────────────────────────────────────┘ ``` ## Смотрите также {#smotrite-takzhe} diff --git a/docs/zh/sql-reference/data-types/special-data-types/interval.md b/docs/zh/sql-reference/data-types/special-data-types/interval.md index e05869b2df8..e16f6d5f84f 100644 --- a/docs/zh/sql-reference/data-types/special-data-types/interval.md +++ b/docs/zh/sql-reference/data-types/special-data-types/interval.md @@ -55,29 +55,28 @@ SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY └─────────────────────┴───────────────────────────────┘ ``` -不同类型的间隔不能合并。 你不能使用诸如 `4 DAY 1 HOUR` 的时间间隔. 以小于或等于时间间隔最小单位的单位来指定间隔,例如,时间间隔 `1 day and an hour` 可以表示为 `25 HOUR` 或 `90000 SECOND`. - -你不能对 `Interval` 类型的值执行算术运算,但你可以向 `Date` 或 `DateTime` 数据类型的值添加不同类型的时间间隔,例如: +也可以同時使用多個間隔: ``` sql -SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR +SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) ``` ``` text -┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐ -│ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │ -└─────────────────────┴────────────────────────────────────────────────────────┘ +┌───current_date_time─┬─plus(current_date_time, plus(toIntervalDay(4), toIntervalHour(3)))─┐ +│ 2024-08-08 18:31:39 │ 2024-08-12 21:31:39 │ +└─────────────────────┴────────────────────────────────────────────────────────────────────┘ ``` -以下查询将导致异常: +並比較不同直數的值: ``` sql -select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) +SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); ``` ``` text -Received exception from server (version 19.14.1): -Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime.. +┌─less(toIntervalMicrosecond(179999999), toIntervalMinute(3))─┐ +│ 1 │ +└─────────────────────────────────────────────────────────────┘ ``` ## 另请参阅 {#see-also} diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 0b9c744c091..674284460dc 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -252,7 +252,7 @@ DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet & } if (is_higher_interval && min_granularity <= 8) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot compare intervals {} and {} because the amount of days in month is not determined", types[0]->getName(), types[1]->getName()); + throw Exception(ErrorCodes::NO_COMMON_TYPE, "Cannot compare intervals {} and {} because the amount of days in month is not determined", types[0]->getName(), types[1]->getName()); if (smallest_type) { From 0ad6aa09acb72a67fc88e0cd8186afd32fefd6bf Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 8 Aug 2024 22:51:27 +0200 Subject: [PATCH 040/127] fix style --- docs/en/sql-reference/data-types/special-data-types/interval.md | 2 +- src/Functions/FunctionsConversion.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/data-types/special-data-types/interval.md b/docs/en/sql-reference/data-types/special-data-types/interval.md index be26053580b..4ef1a7e6238 100644 --- a/docs/en/sql-reference/data-types/special-data-types/interval.md +++ b/docs/en/sql-reference/data-types/special-data-types/interval.md @@ -65,7 +65,7 @@ SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVA └─────────────────────┴────────────────────────────────────────────────────────────────────┘ ``` -And to compare values with different intevals: +And to compare values with different intervals: ``` sql SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 0ab1858dc97..1708991af74 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1606,7 +1606,7 @@ struct ConvertImpl result_value = arguments[0].column->getInt(0) / conversion_factor; } else - { + { for (int i = from_position - 1; i >= to_position; --i) { for (const auto &entry : map) From 94efbb0bf9ab62a5399d4918e7bcfd358421a879 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 8 Aug 2024 23:26:24 +0200 Subject: [PATCH 041/127] fix build --- src/Functions/FunctionsConversion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 1708991af74..43ebe573582 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -5279,7 +5279,7 @@ REGISTER_FUNCTION(Conversion) /// MySQL compatibility alias. Cannot be registered as alias, /// because we don't want it to be normalized to toDate in queries, /// otherwise CREATE DICTIONARY query breaks. - factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::CaseInsensitive); + factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::Case::Insensitive); factory.registerFunction(); factory.registerFunction(); From b4c553718353eb2302f85ea4d096a92036ce832c Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 8 Aug 2024 23:49:56 +0200 Subject: [PATCH 042/127] fix errorcodes in test --- .../03223_interval_data_type_comparison.sql | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql index 6e4862bf2d2..5d01addae45 100644 --- a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql +++ b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql @@ -21,7 +21,7 @@ SELECT toIntervalNanosecond(7199999999999) > toIntervalHour(2); SELECT toIntervalNanosecond(1) > toIntervalDay(2); SELECT toIntervalNanosecond(5) > toIntervalWeek(1); -SELECT toIntervalNanosecond(1) < toIntervalMonth(2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalNanosecond(1) < toIntervalMonth(2); -- { serverError NO_COMMON_TYPE } SELECT('Comparing microseconds'); SELECT toIntervalMicrosecond(1) < toIntervalMicrosecond(999); @@ -40,7 +40,7 @@ SELECT toIntervalMicrosecond(3600000000) != toIntervalHour(1); SELECT toIntervalMicrosecond(36000000000000) < toIntervalDay(2); SELECT toIntervalMicrosecond(1209600000000) != toIntervalWeek(2); -SELECT toIntervalMicrosecond(36000000000000) < toIntervalQuarter(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalMicrosecond(36000000000000) < toIntervalQuarter(1); -- { serverError NO_COMMON_TYPE } SELECT('Comparing milliseconds'); SELECT toIntervalMillisecond(2000) > toIntervalMillisecond(2); @@ -57,7 +57,7 @@ SELECT toIntervalMillisecond(144000001) < toIntervalHour(40); SELECT toIntervalMillisecond(1728000000) != toIntervalDay(20); SELECT toIntervalMillisecond(1198599999) > toIntervalWeek(2); -SELECT toIntervalMillisecond(36000000000000) < toIntervalYear(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalMillisecond(36000000000000) < toIntervalYear(1); -- { serverError NO_COMMON_TYPE } SELECT('Comparing seconds'); SELECT toIntervalSecond(120) > toIntervalSecond(2); @@ -72,7 +72,7 @@ SELECT toIntervalSecond(1) > toIntervalHour(2); SELECT toIntervalSecond(86401) < toIntervalDay(1); SELECT toIntervalSecond(1209600) != toIntervalWeek(2); -SELECT toIntervalSecond(36000000000000) < toIntervalMonth(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalSecond(36000000000000) < toIntervalMonth(1); -- { serverError NO_COMMON_TYPE } SELECT('Comparing minutes'); SELECT toIntervalMinute(1) < toIntervalMinute(59); @@ -85,7 +85,7 @@ SELECT toIntervalMinute(1) > toIntervalHour(59); SELECT toIntervalMinute(1440) != toIntervalDay(1); SELECT toIntervalMinute(30241) < toIntervalWeek(3); -SELECT toIntervalMinute(2) = toIntervalQuarter(120); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalMinute(2) = toIntervalQuarter(120); -- { serverError NO_COMMON_TYPE } SELECT('Comparing hours'); SELECT toIntervalHour(48) > toIntervalHour(2); @@ -96,7 +96,7 @@ SELECT toIntervalHour(48) < toIntervalHour(2); SELECT toIntervalHour(48) < toIntervalDay(2); SELECT toIntervalHour(672) != toIntervalWeek(4); -SELECT toIntervalHour(2) < toIntervalYear(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalHour(2) < toIntervalYear(1); -- { serverError NO_COMMON_TYPE } SELECT('Comparing days'); SELECT toIntervalDay(1) < toIntervalDay(23); @@ -105,14 +105,14 @@ SELECT toIntervalDay(25) > toIntervalWeek(3); SELECT toIntervalDay(1) > toIntervalDay(23); SELECT toIntervalDay(25) < toIntervalWeek(3); -SELECT toIntervalDay(2) = toIntervalMonth(48); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalDay(2) = toIntervalMonth(48); -- { serverError NO_COMMON_TYPE } SELECT('Comparing weeks'); SELECT toIntervalWeek(1) < toIntervalWeek(6); SELECT toIntervalWeek(1) > toIntervalWeek(6); -SELECT toIntervalWeek(124) > toIntervalQuarter(8); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalWeek(124) > toIntervalQuarter(8); -- { serverError NO_COMMON_TYPE } SELECT('Comparing months'); SELECT toIntervalMonth(1) < toIntervalMonth(3); @@ -123,7 +123,7 @@ SELECT toIntervalMonth(1) > toIntervalMonth(3); SELECT toIntervalMonth(124) < toIntervalQuarter(5); SELECT toIntervalMonth(36) != toIntervalYear(3); -SELECT toIntervalMonth(6) = toIntervalMicrosecond(26); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalMonth(6) = toIntervalMicrosecond(26); -- { serverError NO_COMMON_TYPE } SELECT('Comparing quarters'); SELECT toIntervalQuarter(5) > toIntervalQuarter(4); @@ -132,11 +132,11 @@ SELECT toIntervalQuarter(20) = toIntervalYear(5); SELECT toIntervalQuarter(5) < toIntervalQuarter(4); SELECT toIntervalQuarter(20) != toIntervalYear(5); -SELECT toIntervalQuarter(2) = toIntervalNanosecond(6); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalQuarter(2) = toIntervalNanosecond(6); -- { serverError NO_COMMON_TYPE } SELECT('Comparing years'); SELECT toIntervalYear(1) < toIntervalYear(3); SELECT toIntervalYear(1) > toIntervalYear(3); -SELECT toIntervalYear(2) = toIntervalSecond(8); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT toIntervalYear(2) = toIntervalSecond(8); -- { serverError NO_COMMON_TYPE } From 3357275fa8c55bcc5371b4ff9c9a5d80e51ab689 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Fri, 9 Aug 2024 18:33:45 +0800 Subject: [PATCH 043/127] Fix MSAN issue caused by incorrect date format. --- src/IO/ReadHelpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index c771fced73a..dd4aef23a25 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1432,7 +1432,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D s_pos[size] = 0; if constexpr (throw_exception) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", s); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", String(s, date_broken_down_length + 1 + size)); else return false; } From 35f19522e745ef2267b4c6f99dfc5d7c1f7e78c3 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 9 Aug 2024 12:56:14 +0200 Subject: [PATCH 044/127] fix fuzzer --- src/Functions/FunctionsConversion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 43ebe573582..c25bc44450f 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1579,7 +1579,7 @@ struct ConvertImpl IntervalKind to = typeid_cast(result_type.get())->getKind(); IntervalKind from = typeid_cast(arguments[0].type.get())->getKind(); - if (from == to) + if (from == to || arguments[0].column->empty()) return arguments[0].column; const auto &map = getGranularityMap(); From ca4041847e4aa8acccd6ea31c0a18f2160c0dc7a Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Fri, 9 Aug 2024 19:15:41 +0800 Subject: [PATCH 045/127] Add tests --- src/IO/ReadHelpers.cpp | 4 ++-- ...215_fix_datetime_implicit_conversion.reference | 1 + .../03215_fix_datetime_implicit_conversion.sql | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference create mode 100644 tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index dd4aef23a25..e69b4187b37 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1402,7 +1402,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D s_pos[size] = 0; if constexpr (throw_exception) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", s); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", String(s, already_read_length)); else return false; } @@ -1432,7 +1432,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D s_pos[size] = 0; if constexpr (throw_exception) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", String(s, date_broken_down_length + 1 + size)); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", String(s, size)); else return false; } diff --git a/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql new file mode 100644 index 00000000000..70a8a3432a6 --- /dev/null +++ b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS tab SYNC; + +CREATE TABLE tab +( + a DateTime, + pk String +) Engine = MergeTree() ORDER BY pk; + +INSERT INTO tab select cast(number, 'DateTime'), generateUUIDv4() FROM system.numbers LIMIT 1; + +SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:09'; +SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:0'; -- { serverError CANNOT_PARSE_DATETIME } +SELECT count(*) FROM tab WHERE a = '2024-08-0 09:58:09'; -- { serverError TYPE_MISMATCH } + +DROP TABLE IF EXISTS tab SYNC; From 6ded5e1c8b994ad2332468e605b17a74e8d5675f Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Fri, 9 Aug 2024 23:50:03 +0800 Subject: [PATCH 046/127] Some fixups --- src/IO/ReadHelpers.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index e69b4187b37..b484f80250d 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1399,10 +1399,8 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D size_t size = buf.read(s_pos, remaining_date_size); if (size != remaining_date_size) { - s_pos[size] = 0; - if constexpr (throw_exception) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", String(s, already_read_length)); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", std::string_view(s, already_read_length + size)); else return false; } @@ -1429,10 +1427,8 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D if (size != time_broken_down_length) { - s_pos[size] = 0; - if constexpr (throw_exception) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", String(s, size)); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", std::string_view(s, size)); else return false; } From a3d8db6e1eb27d6a8fa81bbf43c8ffb171714c0b Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 9 Aug 2024 19:05:37 +0200 Subject: [PATCH 047/127] updates due to review --- .../data-types/special-data-types/interval.md | 21 +++++++------- src/DataTypes/getLeastSupertype.cpp | 13 ++++----- src/DataTypes/getLeastSupertype.h | 21 ++------------ src/Functions/FunctionsConversion.cpp | 28 +++---------------- 4 files changed, 24 insertions(+), 59 deletions(-) diff --git a/docs/zh/sql-reference/data-types/special-data-types/interval.md b/docs/zh/sql-reference/data-types/special-data-types/interval.md index e16f6d5f84f..e05869b2df8 100644 --- a/docs/zh/sql-reference/data-types/special-data-types/interval.md +++ b/docs/zh/sql-reference/data-types/special-data-types/interval.md @@ -55,28 +55,29 @@ SELECT now() as current_date_time, current_date_time + INTERVAL 4 DAY └─────────────────────┴───────────────────────────────┘ ``` -也可以同時使用多個間隔: +不同类型的间隔不能合并。 你不能使用诸如 `4 DAY 1 HOUR` 的时间间隔. 以小于或等于时间间隔最小单位的单位来指定间隔,例如,时间间隔 `1 day and an hour` 可以表示为 `25 HOUR` 或 `90000 SECOND`. + +你不能对 `Interval` 类型的值执行算术运算,但你可以向 `Date` 或 `DateTime` 数据类型的值添加不同类型的时间间隔,例如: ``` sql -SELECT now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) +SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR ``` ``` text -┌───current_date_time─┬─plus(current_date_time, plus(toIntervalDay(4), toIntervalHour(3)))─┐ -│ 2024-08-08 18:31:39 │ 2024-08-12 21:31:39 │ -└─────────────────────┴────────────────────────────────────────────────────────────────────┘ +┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐ +│ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │ +└─────────────────────┴────────────────────────────────────────────────────────┘ ``` -並比較不同直數的值: +以下查询将导致异常: ``` sql -SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); +select now() AS current_date_time, current_date_time + (INTERVAL 4 DAY + INTERVAL 3 HOUR) ``` ``` text -┌─less(toIntervalMicrosecond(179999999), toIntervalMinute(3))─┐ -│ 1 │ -└─────────────────────────────────────────────────────────────┘ +Received exception from server (version 19.14.1): +Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argument types for function plus: if one argument is Interval, then another must be Date or DateTime.. ``` ## 另请参阅 {#see-also} diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 674284460dc..8bcec49815f 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -230,8 +230,7 @@ void convertUInt64toInt64IfPossible(const DataTypes & types, TypeIndexSet & type DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet &types_set) { - const auto& granularity_map = getGranularityMap(); - int min_granularity = std::get<0>(granularity_map.at(IntervalKind::Kind::Year)); + auto min_interval = IntervalKind::Kind::Year; DataTypePtr smallest_type; bool is_higher_interval = false; // For Years, Quarters and Months @@ -240,18 +239,18 @@ DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet & { if (const auto * interval_type = typeid_cast(type.get())) { - int current_granularity = std::get<0>(granularity_map.at(interval_type->getKind())); - if (current_granularity > 8) + auto current_interval = interval_type->getKind().kind; + if (current_interval > IntervalKind::Kind::Week) is_higher_interval = true; - if (current_granularity < min_granularity) + if (current_interval < min_interval) { - min_granularity = current_granularity; + min_interval = current_interval; smallest_type = type; } } } - if (is_higher_interval && min_granularity <= 8) + if (is_higher_interval && min_interval <= IntervalKind::Kind::Week) throw Exception(ErrorCodes::NO_COMMON_TYPE, "Cannot compare intervals {} and {} because the amount of days in month is not determined", types[0]->getName(), types[1]->getName()); if (smallest_type) diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h index c584eb83011..5ea2b6417b2 100644 --- a/src/DataTypes/getLeastSupertype.h +++ b/src/DataTypes/getLeastSupertype.h @@ -50,24 +50,9 @@ DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types); DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types); -/// A map that enumerated all interval kinds in ascending order with a conversion value to a next interval -inline const std::unordered_map> & getGranularityMap() -{ - static std::unordered_map> granularity_map = - { - {IntervalKind::Kind::Nanosecond, {1, 1000}}, - {IntervalKind::Kind::Microsecond, {2, 1000}}, - {IntervalKind::Kind::Millisecond, {3, 1000}}, - {IntervalKind::Kind::Second, {4, 60}}, - {IntervalKind::Kind::Minute, {5, 60}}, - {IntervalKind::Kind::Hour, {6, 24}}, - {IntervalKind::Kind::Day, {7, 7}}, - {IntervalKind::Kind::Week, {8, 4}}, - {IntervalKind::Kind::Month, {9, 3}}, - {IntervalKind::Kind::Quarter, {10, 4}}, - {IntervalKind::Kind::Year, {11, 1}} - }; - return granularity_map; +/// A vector that shows the conversion rates to the next Interval type starting from NanoSecond +static std::vector interval_conversions = {1000, 1000, 1000, 60, 60, 24, 7, 4, 3, 4, 1}; + } } diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index c25bc44450f..25c6bbcbfef 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1582,42 +1582,22 @@ struct ConvertImpl if (from == to || arguments[0].column->empty()) return arguments[0].column; - const auto &map = getGranularityMap(); Int64 conversion_factor = 1; Int64 result_value; - int from_position = map.at(from).first; - int to_position = map.at(to).first; // Positions of each interval according to granurality map + int from_position = static_cast(from.kind); + int to_position = static_cast(to.kind); // Positions of each interval according to granurality map if (from_position < to_position) { for (int i = from_position - 1; i <= to_position; ++i) - { - // Find the kind that matches this position - for (const auto &entry : map) - { - if (entry.second.first == i) - { - conversion_factor *= entry.second.second; - break; - } - } - } + conversion_factor *= interval_conversions[i]; result_value = arguments[0].column->getInt(0) / conversion_factor; } else { for (int i = from_position - 1; i >= to_position; --i) - { - for (const auto &entry : map) - { - if (entry.second.first == i) - { - conversion_factor *= entry.second.second; - break; - } - } - } + conversion_factor *= interval_conversions[i]; result_value = arguments[0].column->getInt(0) * conversion_factor; } From 384aedccaeece56456ad1e5ea17a8da4f56a69a4 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Sat, 10 Aug 2024 00:09:50 +0200 Subject: [PATCH 048/127] Update getLeastSupertype.h --- src/DataTypes/getLeastSupertype.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h index 5ea2b6417b2..8dd1685e6e9 100644 --- a/src/DataTypes/getLeastSupertype.h +++ b/src/DataTypes/getLeastSupertype.h @@ -54,5 +54,3 @@ DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types); static std::vector interval_conversions = {1000, 1000, 1000, 60, 60, 24, 7, 4, 3, 4, 1}; } - -} From 1cc845726842f388c4524d55b248f210e28d979d Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 12 Aug 2024 16:57:47 +0200 Subject: [PATCH 049/127] fix reviews, fix crash in fuzzer --- src/DataTypes/getLeastSupertype.cpp | 2 +- src/DataTypes/getLeastSupertype.h | 2 +- src/Functions/FunctionsConversion.cpp | 11 +- .../03223_interval_data_type_comparison.sql | 198 +++++++++--------- 4 files changed, 106 insertions(+), 107 deletions(-) diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 8bcec49815f..65df529e78b 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -251,7 +251,7 @@ DataTypePtr findSmallestIntervalSuperType(const DataTypes &types, TypeIndexSet & } if (is_higher_interval && min_interval <= IntervalKind::Kind::Week) - throw Exception(ErrorCodes::NO_COMMON_TYPE, "Cannot compare intervals {} and {} because the amount of days in month is not determined", types[0]->getName(), types[1]->getName()); + throw Exception(ErrorCodes::NO_COMMON_TYPE, "Cannot compare intervals {} and {} because the number of days in a month is not fixed", types[0]->getName(), types[1]->getName()); if (smallest_type) { diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h index 8dd1685e6e9..55d8e8fff0d 100644 --- a/src/DataTypes/getLeastSupertype.h +++ b/src/DataTypes/getLeastSupertype.h @@ -51,6 +51,6 @@ DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types); DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types); /// A vector that shows the conversion rates to the next Interval type starting from NanoSecond -static std::vector interval_conversions = {1000, 1000, 1000, 60, 60, 24, 7, 4, 3, 4, 1}; +static std::vector interval_conversions = {1, 1000, 1000, 1000, 60, 60, 24, 7, 4, 3, 4}; } diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 25c6bbcbfef..b6102cb7ecf 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1586,17 +1586,17 @@ struct ConvertImpl Int64 result_value; int from_position = static_cast(from.kind); - int to_position = static_cast(to.kind); // Positions of each interval according to granurality map + int to_position = static_cast(to.kind); /// Positions of each interval according to granularity map if (from_position < to_position) { - for (int i = from_position - 1; i <= to_position; ++i) + for (int i = from_position; i < to_position; ++i) conversion_factor *= interval_conversions[i]; result_value = arguments[0].column->getInt(0) / conversion_factor; } else { - for (int i = from_position - 1; i >= to_position; --i) + for (int i = from_position; i > to_position; --i) conversion_factor *= interval_conversions[i]; result_value = arguments[0].column->getInt(0) * conversion_factor; } @@ -2366,9 +2366,8 @@ private: } if constexpr (std::is_same_v) - { - done = callOnIndexAndDataType(from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag); - } + if (WhichDataType(from_type).isInterval()) + done = callOnIndexAndDataType(from_type->getTypeId(), call, BehaviourOnErrorFromString::ConvertDefaultBehaviorTag); } if (!done) diff --git a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql index 5d01addae45..77b6e2fa3dc 100644 --- a/tests/queries/0_stateless/03223_interval_data_type_comparison.sql +++ b/tests/queries/0_stateless/03223_interval_data_type_comparison.sql @@ -1,142 +1,142 @@ SELECT('Comparing nanoseconds'); -SELECT toIntervalNanosecond(500) > toIntervalNanosecond(300); -SELECT toIntervalNanosecond(1000) < toIntervalNanosecond(1500); -SELECT toIntervalNanosecond(2000) = toIntervalNanosecond(2000); -SELECT toIntervalNanosecond(1000) >= toIntervalMicrosecond(1); -SELECT toIntervalNanosecond(1000001) > toIntervalMillisecond(1); -SELECT toIntervalNanosecond(2000000001) > toIntervalSecond(2); -SELECT toIntervalNanosecond(60000000000) = toIntervalMinute(1); -SELECT toIntervalNanosecond(7199999999999) < toIntervalHour(2); -SELECT toIntervalNanosecond(1) < toIntervalDay(2); -SELECT toIntervalNanosecond(5) < toIntervalWeek(1); +SELECT INTERVAL 500 NANOSECOND > INTERVAL 300 NANOSECOND; +SELECT INTERVAL 1000 NANOSECOND < INTERVAL 1500 NANOSECOND; +SELECT INTERVAL 2000 NANOSECOND = INTERVAL 2000 NANOSECOND; +SELECT INTERVAL 1000 NANOSECOND >= INTERVAL 1 MICROSECOND; +SELECT INTERVAL 1000001 NANOSECOND > INTERVAL 1 MILLISECOND; +SELECT INTERVAL 2000000001 NANOSECOND > INTERVAL 2 SECOND; +SELECT INTERVAL 60000000000 NANOSECOND = INTERVAL 1 MINUTE; +SELECT INTERVAL 7199999999999 NANOSECOND < INTERVAL 2 HOUR; +SELECT INTERVAL 1 NANOSECOND < INTERVAL 2 DAY; +SELECT INTERVAL 5 NANOSECOND < INTERVAL 1 WEEK; -SELECT toIntervalNanosecond(500) < toIntervalNanosecond(300); -SELECT toIntervalNanosecond(1000) > toIntervalNanosecond(1500); -SELECT toIntervalNanosecond(2000) != toIntervalNanosecond(2000); -SELECT toIntervalNanosecond(1000) < toIntervalMicrosecond(1); -SELECT toIntervalNanosecond(1000001) < toIntervalMillisecond(1); -SELECT toIntervalNanosecond(2000000001) < toIntervalSecond(2); -SELECT toIntervalNanosecond(60000000000) != toIntervalMinute(1); -SELECT toIntervalNanosecond(7199999999999) > toIntervalHour(2); -SELECT toIntervalNanosecond(1) > toIntervalDay(2); -SELECT toIntervalNanosecond(5) > toIntervalWeek(1); +SELECT INTERVAL 500 NANOSECOND < INTERVAL 300 NANOSECOND; +SELECT INTERVAL 1000 NANOSECOND > INTERVAL 1500 NANOSECOND; +SELECT INTERVAL 2000 NANOSECOND != INTERVAL 2000 NANOSECOND; +SELECT INTERVAL 1000 NANOSECOND < INTERVAL 1 MICROSECOND; +SELECT INTERVAL 1000001 NANOSECOND < INTERVAL 1 MILLISECOND; +SELECT INTERVAL 2000000001 NANOSECOND < INTERVAL 2 SECOND; +SELECT INTERVAL 60000000000 NANOSECOND != INTERVAL 1 MINUTE; +SELECT INTERVAL 7199999999999 NANOSECOND > INTERVAL 2 HOUR; +SELECT INTERVAL 1 NANOSECOND > INTERVAL 2 DAY; +SELECT INTERVAL 5 NANOSECOND > INTERVAL 1 WEEK; -SELECT toIntervalNanosecond(1) < toIntervalMonth(2); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 1 NANOSECOND < INTERVAL 2 MONTH; -- { serverError NO_COMMON_TYPE } SELECT('Comparing microseconds'); -SELECT toIntervalMicrosecond(1) < toIntervalMicrosecond(999); -SELECT toIntervalMicrosecond(1001) > toIntervalMillisecond(1); -SELECT toIntervalMicrosecond(2000000) = toIntervalSecond(2); -SELECT toIntervalMicrosecond(179999999) < toIntervalMinute(3); -SELECT toIntervalMicrosecond(3600000000) = toIntervalHour(1); -SELECT toIntervalMicrosecond(36000000000000) > toIntervalDay(2); -SELECT toIntervalMicrosecond(1209600000000) = toIntervalWeek(2); +SELECT INTERVAL 1 MICROSECOND < INTERVAL 999 MICROSECOND; +SELECT INTERVAL 1001 MICROSECOND > INTERVAL 1 MILLISECOND; +SELECT INTERVAL 2000000 MICROSECOND = INTERVAL 2 SECOND; +SELECT INTERVAL 179999999 MICROSECOND < INTERVAL 3 MINUTE; +SELECT INTERVAL 3600000000 MICROSECOND = INTERVAL 1 HOUR; +SELECT INTERVAL 36000000000000 MICROSECOND > INTERVAL 2 DAY; +SELECT INTERVAL 1209600000000 MICROSECOND = INTERVAL 2 WEEK; -SELECT toIntervalMicrosecond(1) > toIntervalMicrosecond(999); -SELECT toIntervalMicrosecond(1001) < toIntervalMillisecond(1); -SELECT toIntervalMicrosecond(2000000) != toIntervalSecond(2); -SELECT toIntervalMicrosecond(179999999) > toIntervalMinute(3); -SELECT toIntervalMicrosecond(3600000000) != toIntervalHour(1); -SELECT toIntervalMicrosecond(36000000000000) < toIntervalDay(2); -SELECT toIntervalMicrosecond(1209600000000) != toIntervalWeek(2); +SELECT INTERVAL 1 MICROSECOND > INTERVAL 999 MICROSECOND; +SELECT INTERVAL 1001 MICROSECOND < INTERVAL 1 MILLISECOND; +SELECT INTERVAL 2000000 MICROSECOND != INTERVAL 2 SECOND; +SELECT INTERVAL 179999999 MICROSECOND > INTERVAL 3 MINUTE; +SELECT INTERVAL 3600000000 MICROSECOND != INTERVAL 1 HOUR; +SELECT INTERVAL 36000000000000 MICROSECOND < INTERVAL 2 DAY; +SELECT INTERVAL 1209600000000 MICROSECOND != INTERVAL 2 WEEK; -SELECT toIntervalMicrosecond(36000000000000) < toIntervalQuarter(1); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 36000000000000 MICROSECOND < INTERVAL 1 QUARTER; -- { serverError NO_COMMON_TYPE } SELECT('Comparing milliseconds'); -SELECT toIntervalMillisecond(2000) > toIntervalMillisecond(2); -SELECT toIntervalMillisecond(2000) = toIntervalSecond(2); -SELECT toIntervalMillisecond(170000) < toIntervalMinute(3); -SELECT toIntervalMillisecond(144000001) > toIntervalHour(40); -SELECT toIntervalMillisecond(1728000000) = toIntervalDay(20); -SELECT toIntervalMillisecond(1198599999) < toIntervalWeek(2); +SELECT INTERVAL 2000 MILLISECOND > INTERVAL 2 MILLISECOND; +SELECT INTERVAL 2000 MILLISECOND = INTERVAL 2 SECOND; +SELECT INTERVAL 170000 MILLISECOND < INTERVAL 3 MINUTE; +SELECT INTERVAL 144000001 MILLISECOND > INTERVAL 40 HOUR; +SELECT INTERVAL 1728000000 MILLISECOND = INTERVAL 20 DAY; +SELECT INTERVAL 1198599999 MILLISECOND < INTERVAL 2 WEEK; -SELECT toIntervalMillisecond(2000) < toIntervalMillisecond(2); -SELECT toIntervalMillisecond(2000) != toIntervalSecond(2); -SELECT toIntervalMillisecond(170000) > toIntervalMinute(3); -SELECT toIntervalMillisecond(144000001) < toIntervalHour(40); -SELECT toIntervalMillisecond(1728000000) != toIntervalDay(20); -SELECT toIntervalMillisecond(1198599999) > toIntervalWeek(2); +SELECT INTERVAL 2000 MILLISECOND < INTERVAL 2 MILLISECOND; +SELECT INTERVAL 2000 MILLISECOND != INTERVAL 2 SECOND; +SELECT INTERVAL 170000 MILLISECOND > INTERVAL 3 MINUTE; +SELECT INTERVAL 144000001 MILLISECOND < INTERVAL 40 HOUR; +SELECT INTERVAL 1728000000 MILLISECOND != INTERVAL 20 DAY; +SELECT INTERVAL 1198599999 MILLISECOND > INTERVAL 2 WEEK; -SELECT toIntervalMillisecond(36000000000000) < toIntervalYear(1); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 36000000000000 MILLISECOND < INTERVAL 1 YEAR; -- { serverError NO_COMMON_TYPE } SELECT('Comparing seconds'); -SELECT toIntervalSecond(120) > toIntervalSecond(2); -SELECT toIntervalSecond(120) = toIntervalMinute(2); -SELECT toIntervalSecond(1) < toIntervalHour(2); -SELECT toIntervalSecond(86401) >= toIntervalDay(1); -SELECT toIntervalSecond(1209600) = toIntervalWeek(2); +SELECT INTERVAL 120 SECOND > INTERVAL 2 SECOND; +SELECT INTERVAL 120 SECOND = INTERVAL 2 MINUTE; +SELECT INTERVAL 1 SECOND < INTERVAL 2 HOUR; +SELECT INTERVAL 86401 SECOND >= INTERVAL 1 DAY; +SELECT INTERVAL 1209600 SECOND = INTERVAL 2 WEEK; -SELECT toIntervalSecond(120) < toIntervalSecond(2); -SELECT toIntervalSecond(120) != toIntervalMinute(2); -SELECT toIntervalSecond(1) > toIntervalHour(2); -SELECT toIntervalSecond(86401) < toIntervalDay(1); -SELECT toIntervalSecond(1209600) != toIntervalWeek(2); +SELECT INTERVAL 120 SECOND < INTERVAL 2 SECOND; +SELECT INTERVAL 120 SECOND != INTERVAL 2 MINUTE; +SELECT INTERVAL 1 SECOND > INTERVAL 2 HOUR; +SELECT INTERVAL 86401 SECOND < INTERVAL 1 DAY; +SELECT INTERVAL 1209600 SECOND != INTERVAL 2 WEEK; -SELECT toIntervalSecond(36000000000000) < toIntervalMonth(1); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 36000000000000 SECOND < INTERVAL 1 MONTH; -- { serverError NO_COMMON_TYPE } SELECT('Comparing minutes'); -SELECT toIntervalMinute(1) < toIntervalMinute(59); -SELECT toIntervalMinute(1) < toIntervalHour(59); -SELECT toIntervalMinute(1440) = toIntervalDay(1); -SELECT toIntervalMinute(30241) > toIntervalWeek(3); +SELECT INTERVAL 1 MINUTE < INTERVAL 59 MINUTE; +SELECT INTERVAL 1 MINUTE < INTERVAL 59 HOUR; +SELECT INTERVAL 1440 MINUTE = INTERVAL 1 DAY; +SELECT INTERVAL 30241 MINUTE > INTERVAL 3 WEEK; -SELECT toIntervalMinute(1) > toIntervalMinute(59); -SELECT toIntervalMinute(1) > toIntervalHour(59); -SELECT toIntervalMinute(1440) != toIntervalDay(1); -SELECT toIntervalMinute(30241) < toIntervalWeek(3); +SELECT INTERVAL 1 MINUTE > INTERVAL 59 MINUTE; +SELECT INTERVAL 1 MINUTE > INTERVAL 59 HOUR; +SELECT INTERVAL 1440 MINUTE != INTERVAL 1 DAY; +SELECT INTERVAL 30241 MINUTE < INTERVAL 3 WEEK; -SELECT toIntervalMinute(2) = toIntervalQuarter(120); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 2 MINUTE = INTERVAL 120 QUARTER; -- { serverError NO_COMMON_TYPE } SELECT('Comparing hours'); -SELECT toIntervalHour(48) > toIntervalHour(2); -SELECT toIntervalHour(48) >= toIntervalDay(2); -SELECT toIntervalHour(672) = toIntervalWeek(4); +SELECT INTERVAL 48 HOUR > INTERVAL 2 HOUR; +SELECT INTERVAL 48 HOUR >= INTERVAL 2 DAY; +SELECT INTERVAL 672 HOUR = INTERVAL 4 WEEK; -SELECT toIntervalHour(48) < toIntervalHour(2); -SELECT toIntervalHour(48) < toIntervalDay(2); -SELECT toIntervalHour(672) != toIntervalWeek(4); +SELECT INTERVAL 48 HOUR < INTERVAL 2 HOUR; +SELECT INTERVAL 48 HOUR < INTERVAL 2 DAY; +SELECT INTERVAL 672 HOUR != INTERVAL 4 WEEK; -SELECT toIntervalHour(2) < toIntervalYear(1); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 2 HOUR < INTERVAL 1 YEAR; -- { serverError NO_COMMON_TYPE } SELECT('Comparing days'); -SELECT toIntervalDay(1) < toIntervalDay(23); -SELECT toIntervalDay(25) > toIntervalWeek(3); +SELECT INTERVAL 1 DAY < INTERVAL 23 DAY; +SELECT INTERVAL 25 DAY > INTERVAL 3 WEEK; -SELECT toIntervalDay(1) > toIntervalDay(23); -SELECT toIntervalDay(25) < toIntervalWeek(3); +SELECT INTERVAL 1 DAY > INTERVAL 23 DAY; +SELECT INTERVAL 25 DAY < INTERVAL 3 WEEK; -SELECT toIntervalDay(2) = toIntervalMonth(48); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 2 DAY = INTERVAL 48 MONTH; -- { serverError NO_COMMON_TYPE } SELECT('Comparing weeks'); -SELECT toIntervalWeek(1) < toIntervalWeek(6); +SELECT INTERVAL 1 WEEK < INTERVAL 6 WEEK; -SELECT toIntervalWeek(1) > toIntervalWeek(6); +SELECT INTERVAL 1 WEEK > INTERVAL 6 WEEK; -SELECT toIntervalWeek(124) > toIntervalQuarter(8); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 124 WEEK > INTERVAL 8 QUARTER; -- { serverError NO_COMMON_TYPE } SELECT('Comparing months'); -SELECT toIntervalMonth(1) < toIntervalMonth(3); -SELECT toIntervalMonth(124) > toIntervalQuarter(5); -SELECT toIntervalMonth(36) = toIntervalYear(3); +SELECT INTERVAL 1 MONTH < INTERVAL 3 MONTH; +SELECT INTERVAL 124 MONTH > INTERVAL 5 QUARTER; +SELECT INTERVAL 36 MONTH = INTERVAL 3 YEAR; -SELECT toIntervalMonth(1) > toIntervalMonth(3); -SELECT toIntervalMonth(124) < toIntervalQuarter(5); -SELECT toIntervalMonth(36) != toIntervalYear(3); +SELECT INTERVAL 1 MONTH > INTERVAL 3 MONTH; +SELECT INTERVAL 124 MONTH < INTERVAL 5 QUARTER; +SELECT INTERVAL 36 MONTH != INTERVAL 3 YEAR; -SELECT toIntervalMonth(6) = toIntervalMicrosecond(26); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 6 MONTH = INTERVAL 26 MICROSECOND; -- { serverError NO_COMMON_TYPE } SELECT('Comparing quarters'); -SELECT toIntervalQuarter(5) > toIntervalQuarter(4); -SELECT toIntervalQuarter(20) = toIntervalYear(5); +SELECT INTERVAL 5 QUARTER > INTERVAL 4 QUARTER; +SELECT INTERVAL 20 QUARTER = INTERVAL 5 YEAR; -SELECT toIntervalQuarter(5) < toIntervalQuarter(4); -SELECT toIntervalQuarter(20) != toIntervalYear(5); +SELECT INTERVAL 5 QUARTER < INTERVAL 4 QUARTER; +SELECT INTERVAL 20 QUARTER != INTERVAL 5 YEAR; -SELECT toIntervalQuarter(2) = toIntervalNanosecond(6); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 2 QUARTER = INTERVAL 6 NANOSECOND; -- { serverError NO_COMMON_TYPE } SELECT('Comparing years'); -SELECT toIntervalYear(1) < toIntervalYear(3); +SELECT INTERVAL 1 YEAR < INTERVAL 3 YEAR; -SELECT toIntervalYear(1) > toIntervalYear(3); +SELECT INTERVAL 1 YEAR > INTERVAL 3 YEAR; -SELECT toIntervalYear(2) = toIntervalSecond(8); -- { serverError NO_COMMON_TYPE } +SELECT INTERVAL 2 YEAR = INTERVAL 8 SECOND; -- { serverError NO_COMMON_TYPE } \ No newline at end of file From 7f005a6ca48d4f193470d3a71bc1d97ff55f4a2f Mon Sep 17 00:00:00 2001 From: shiyer7474 Date: Tue, 13 Aug 2024 08:38:30 +0000 Subject: [PATCH 050/127] Fix small value DateTime64 constant folding in nested query --- src/Analyzer/ConstantNode.cpp | 10 ++++- ...222_datetime64_small_value_const.reference | 18 +++++++++ .../03222_datetime64_small_value_const.sql | 39 +++++++++++++++++++ 3 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03222_datetime64_small_value_const.reference create mode 100644 tests/queries/0_stateless/03222_datetime64_small_value_const.sql diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index c65090f5b55..3d0f448da4b 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -177,9 +177,15 @@ ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const * It could also lead to ambiguous parsing because we don't know if the string literal represents a date or a Decimal64 literal. * For this reason, we use a string literal representing a date instead of a Decimal64 literal. */ - if (WhichDataType(constant_value_type->getTypeId()).isDateTime64()) + if ((WhichDataType(constant_value_type->getTypeId()).isDateTime64()) || + (WhichDataType(constant_value_type->getTypeId()).isNullable() && WhichDataType((typeid_cast(constant_value_type.get()))->getNestedType()->getTypeId()).isDateTime64())) { - const auto * date_time_type = typeid_cast(constant_value_type.get()); + const DataTypeDateTime64 * date_time_type = nullptr; + if (WhichDataType(constant_value_type->getTypeId()).isNullable()) + date_time_type = typeid_cast((typeid_cast(constant_value_type.get()))->getNestedType().get()); + else + date_time_type = typeid_cast(constant_value_type.get()); + DecimalField decimal_value; if (constant_value_literal.tryGet>(decimal_value)) { diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.reference b/tests/queries/0_stateless/03222_datetime64_small_value_const.reference new file mode 100644 index 00000000000..ae36c08acc5 --- /dev/null +++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.reference @@ -0,0 +1,18 @@ +0 1970-01-01 00:00:00.000 +0 1970-01-01 00:00:05.000 +0 1970-01-01 00:45:25.456789 +0 1970-01-01 00:53:25.456789123 +0 \N +1 1970-01-01 00:00:00.000 +5 1970-01-01 00:00:00.000 +2 1970-01-01 00:00:02.456 +3 1970-01-01 00:00:04.811 +4 1970-01-01 00:10:05.000 +4 1970-01-01 00:10:05.000 +1 1970-01-01 00:00:00.000 +2 1970-01-01 00:00:02.456 +3 1970-01-01 00:00:04.811 +5 1970-01-01 00:00:00.000 +0 +0 +5 diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql new file mode 100644 index 00000000000..6999ba9662a --- /dev/null +++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql @@ -0,0 +1,39 @@ +-- Tags: shard + +select *, (select toDateTime64(0, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64(5, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64('1970-01-01 00:45:25.456789', 6)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64('1970-01-01 00:53:25.456789123', 9)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64(null,3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; + +create database if not exists shard_0; +create database if not exists shard_1; + +drop table if exists shard_0.dt64_03222; +drop table if exists shard_1.dt64_03222; +drop table if exists distr_03222_dt64; + +create table shard_0.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree order by id; +create table shard_1.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree order by id; +create table distr_03222_dt64 (id UInt64, dt DateTime64(3)) engine = Distributed(test_cluster_two_shards_different_databases, '', dt64_03222); + +insert into shard_0.dt64_03222 values(1, toDateTime64('1970-01-01 00:00:00.000',3)) +insert into shard_0.dt64_03222 values(2, toDateTime64('1970-01-01 00:00:02.456',3)); +insert into shard_1.dt64_03222 values(3, toDateTime64('1970-01-01 00:00:04.811',3)); +insert into shard_1.dt64_03222 values(4, toDateTime64('1970-01-01 00:10:05',3)); +insert into shard_1.dt64_03222 values(5, toDateTime64(0,3)); + +--Output : 1,5 2,3,4 4 1,2,3,5 0 0 5 +select id, dt from distr_03222_dt64 where dt = (select toDateTime64(0,3)) order by id; +select id, dt from distr_03222_dt64 where dt > (select toDateTime64(0,3)) order by id; +select id, dt from distr_03222_dt64 where dt > (select toDateTime64('1970-01-01 00:10:00.000',3)) order by id; +select id, dt from distr_03222_dt64 where dt < (select toDateTime64(5,3)) order by id; + +select count(*) from distr_03222_dt64 where dt > (select toDateTime64('2024-07-20 00:00:00',3)); +select count(*) from distr_03222_dt64 where dt > (select now()); +select count(*) from distr_03222_dt64 where dt < (select toDateTime64('2004-07-20 00:00:00',3)); + + +drop table if exists shard_0.dt64_03222; +drop table if exists shard_1.dt64_03222; +drop table if exists distr_03222_dt64; From e46c5a75ef6b5488834add56bea4cab327515bfb Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 13 Aug 2024 17:59:05 +0800 Subject: [PATCH 051/127] fix building issue --- src/Functions/{FunctionOverlay.cpp => overlay.cpp} | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename src/Functions/{FunctionOverlay.cpp => overlay.cpp} (99%) diff --git a/src/Functions/FunctionOverlay.cpp b/src/Functions/overlay.cpp similarity index 99% rename from src/Functions/FunctionOverlay.cpp rename to src/Functions/overlay.cpp index 61d2df88ab1..094da27a71d 100644 --- a/src/Functions/FunctionOverlay.cpp +++ b/src/Functions/overlay.cpp @@ -728,8 +728,8 @@ private: REGISTER_FUNCTION(Overlay) { - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); - factory.registerFunction>({}, FunctionFactory::CaseSensitive); + factory.registerFunction>({}, FunctionFactory::Case::Insensitive); + factory.registerFunction>({}, FunctionFactory::Case::Sensitive); } } From 0414cdbbbf32efe10a92c9dd93ba47743ceeb848 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 13 Aug 2024 15:58:49 +0200 Subject: [PATCH 052/127] Fix unpack error --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 5946e561949..5bde4686d3a 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1995,7 +1995,7 @@ class TestSuite: tag_line = find_tag_line(file) next_line = file.readline() except UnicodeDecodeError: - return [] + return [], {} try: if filepath.endswith(".sql"): for line in file: From 6dfed409f460311f133e30e70f839f9865d71861 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 13 Aug 2024 16:09:45 +0000 Subject: [PATCH 053/127] Fix seraching for query params --- tests/clickhouse-test | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 5bde4686d3a..515b519af3e 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1978,13 +1978,20 @@ class TestSuite: def is_shebang(line: str) -> bool: return line.startswith("#!") - def find_tag_line(file): - line = file.readline() - while line != "": - line = line.strip() - if line and not is_shebang(line): + def find_tag_line(lines, comment_sign): + for line in lines: + if line.startswith(comment_sign) and line[ + len(comment_sign) : + ].lstrip().startswith("Tags:"): + return line + return "" + + def find_random_settings_limits_line(lines, comment_sign): + for line in lines: + if line.startswith(comment_sign) and line[ + len(comment_sign) : + ].lstrip().startswith("Random settings limits:"): return line - line = file.readline() return "" def load_tags_and_random_settings_limits_from_file(filepath): @@ -1992,13 +1999,16 @@ class TestSuite: need_query_params = False with open(filepath, "r", encoding="utf-8") as file: try: - tag_line = find_tag_line(file) - next_line = file.readline() + lines = file.readlines() + tag_line = find_tag_line(lines, comment_sign) + random_settings_limits_line = find_random_settings_limits_line( + lines, comment_sign + ) except UnicodeDecodeError: return [], {} try: if filepath.endswith(".sql"): - for line in file: + for line in lines: if "{CLICKHOUSE_DATABASE" in line: need_query_params = True except UnicodeDecodeError: @@ -2006,7 +2016,6 @@ class TestSuite: parsed_tags = parse_tags_from_line(tag_line, comment_sign) if need_query_params: parsed_tags.add("need-query-parameters") - random_settings_limits_line = next_line if parsed_tags else tag_line random_settings_limits = parse_random_settings_limits_from_line( random_settings_limits_line, comment_sign ) @@ -2068,9 +2077,9 @@ class TestSuite: ) ) self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0] - self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = ( - all_tags_and_random_settings_limits[1] - ) + self.all_random_settings_limits: Dict[ + str, Dict[str, (int, int)] + ] = all_tags_and_random_settings_limits[1] self.sequential_tests = [] self.parallel_tests = [] for test_name in self.all_tests: From 0abca8b7ddbafa37da5b1196b21fb816999fd334 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 14 Aug 2024 09:57:59 +0800 Subject: [PATCH 054/127] fix doc --- .../sql-reference/functions/string-replace-functions.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 1caa6215b6b..d086c9ee64b 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -263,8 +263,8 @@ overlay(s, replace, position[, length]) - `s`: A string type [String](../data-types/string.md). - `replace`: A string type [String](../data-types/string.md). -- `position`: An integer type [Int](../data-types/int.md). -- `length`: Optional. An integer type [Int](../data-types/int.md). +- `position`: An integer type [Int](../data-types/int-uint.md). +- `length`: Optional. An integer type [Int](../data-types/int-uint.md). **Returned value** @@ -300,8 +300,8 @@ overlayUTF8(s, replace, position[, length]) - `s`: A string type [String](../data-types/string.md). - `replace`: A string type [String](../data-types/string.md). -- `position`: An integer type [Int](../data-types/int.md). -- `length`: Optional. An integer type [Int](../data-types/int.md). +- `position`: An integer type [Int](../data-types/int-uint.md). +- `length`: Optional. An integer type [Int](../data-types/int-uint.md). **Returned value** From f740cf4eaa71621fb518c6d5668e8356f452a979 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 14 Aug 2024 09:54:03 +0200 Subject: [PATCH 055/127] Fix data race on SampleKey --- src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index a6ef0063069..6efd3a5c97f 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -369,7 +369,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( /// If sample and final are used together no need to calculate sampling expression twice. /// The first time it was calculated for final, because sample key is a part of the PK. /// So, assume that we already have calculated column. - ASTPtr sampling_key_ast = metadata_snapshot->getSamplingKeyAST(); + ASTPtr sampling_key_ast; if (final) { @@ -377,6 +377,12 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( /// We do spoil available_real_columns here, but it is not used later. available_real_columns.emplace_back(sampling_key.column_names[0], std::move(sampling_column_type)); } + else + { + sampling_key_ast = metadata_snapshot->getSamplingKeyAST()->clone(); + } + + chassert(sampling_key_ast != nullptr); if (has_lower_limit) { From 28b0aad3f9e54beed27ee384ab81312233abaa84 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 14 Aug 2024 15:16:34 +0200 Subject: [PATCH 056/127] Fix python style --- tests/clickhouse-test | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 515b519af3e..8c2da7334d4 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1975,9 +1975,6 @@ class TestSuite: ) return random_settings_limits - def is_shebang(line: str) -> bool: - return line.startswith("#!") - def find_tag_line(lines, comment_sign): for line in lines: if line.startswith(comment_sign) and line[ @@ -2077,9 +2074,9 @@ class TestSuite: ) ) self.all_tags: Dict[str, Set[str]] = all_tags_and_random_settings_limits[0] - self.all_random_settings_limits: Dict[ - str, Dict[str, (int, int)] - ] = all_tags_and_random_settings_limits[1] + self.all_random_settings_limits: Dict[str, Dict[str, (int, int)]] = ( + all_tags_and_random_settings_limits[1] + ) self.sequential_tests = [] self.parallel_tests = [] for test_name in self.all_tests: From b82c231886f2496c01b288a138663c4d430fc7b2 Mon Sep 17 00:00:00 2001 From: shiyer7474 Date: Thu, 15 Aug 2024 10:37:13 +0000 Subject: [PATCH 057/127] Code review feedback - used removeNullable() --- src/Analyzer/ConstantNode.cpp | 11 +++-------- .../03222_datetime64_small_value_const.sql | 2 +- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index 3d0f448da4b..3a99ad08ad8 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -177,15 +177,10 @@ ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const * It could also lead to ambiguous parsing because we don't know if the string literal represents a date or a Decimal64 literal. * For this reason, we use a string literal representing a date instead of a Decimal64 literal. */ - if ((WhichDataType(constant_value_type->getTypeId()).isDateTime64()) || - (WhichDataType(constant_value_type->getTypeId()).isNullable() && WhichDataType((typeid_cast(constant_value_type.get()))->getNestedType()->getTypeId()).isDateTime64())) + const auto & constant_value_end_type = removeNullable(constant_value_type); /// if Nullable + if (WhichDataType(constant_value_end_type->getTypeId()).isDateTime64()) { - const DataTypeDateTime64 * date_time_type = nullptr; - if (WhichDataType(constant_value_type->getTypeId()).isNullable()) - date_time_type = typeid_cast((typeid_cast(constant_value_type.get()))->getNestedType().get()); - else - date_time_type = typeid_cast(constant_value_type.get()); - + const auto * date_time_type = typeid_cast(constant_value_end_type.get()); DecimalField decimal_value; if (constant_value_literal.tryGet>(decimal_value)) { diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql index 6999ba9662a..af06a622f8d 100644 --- a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql +++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql @@ -17,7 +17,7 @@ create table shard_0.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree create table shard_1.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree order by id; create table distr_03222_dt64 (id UInt64, dt DateTime64(3)) engine = Distributed(test_cluster_two_shards_different_databases, '', dt64_03222); -insert into shard_0.dt64_03222 values(1, toDateTime64('1970-01-01 00:00:00.000',3)) +insert into shard_0.dt64_03222 values(1, toDateTime64('1970-01-01 00:00:00.000',3)); insert into shard_0.dt64_03222 values(2, toDateTime64('1970-01-01 00:00:02.456',3)); insert into shard_1.dt64_03222 values(3, toDateTime64('1970-01-01 00:00:04.811',3)); insert into shard_1.dt64_03222 values(4, toDateTime64('1970-01-01 00:10:05',3)); From 5ff4d990e189dfee42eb57f567a5ff6313cfa8d8 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 16 Aug 2024 11:11:11 +0200 Subject: [PATCH 058/127] CI: Auto Releases in prod --- .github/workflows/auto_releases.yml | 76 ++++++++++------------------ .github/workflows/create_release.yml | 2 + tests/ci/auto_release.py | 9 ++++ tests/ci/ci_utils.py | 24 ++++++--- 4 files changed, 54 insertions(+), 57 deletions(-) diff --git a/.github/workflows/auto_releases.yml b/.github/workflows/auto_releases.yml index c159907187c..28483ea136f 100644 --- a/.github/workflows/auto_releases.yml +++ b/.github/workflows/auto_releases.yml @@ -19,13 +19,11 @@ on: jobs: AutoReleaseInfo: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, release-maker] outputs: data: ${{ steps.info.outputs.AUTO_RELEASE_PARAMS }} dry_run: ${{ steps.info.outputs.DRY_RUN }} steps: - - name: Debug Info - uses: ./.github/actions/debug - name: Set envs run: | cat >> "$GITHUB_ENV" << 'EOF' @@ -36,6 +34,10 @@ jobs: echo "DRY_RUN=true" >> "$GITHUB_ENV" - name: Check out repository code uses: ClickHouse/checkout@v1 + with: + fetch-depth: 0 # full history needed + - name: Debug Info + uses: ./.github/actions/debug - name: Prepare Info id: info run: | @@ -46,12 +48,7 @@ jobs: echo "::endgroup::" { echo 'AUTO_RELEASE_PARAMS<> "$GITHUB_ENV" - { - echo 'AUTO_RELEASE_PARAMS<> "$GITHUB_OUTPUT" echo "DRY_RUN=true" >> "$GITHUB_OUTPUT" @@ -62,48 +59,29 @@ jobs: - name: Clean up uses: ./.github/actions/clean - Release_0: + Releases: needs: AutoReleaseInfo - name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].release_branch }} - if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].ready }} + strategy: + matrix: + release_params: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases }} + max-parallel: 1 + name: Release ${{ matrix.release_params.release_branch }} uses: ./.github/workflows/create_release.yml with: - ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].commit_sha }} + ref: ${{ matrix.release_params.commit_sha }} type: patch - dry-run: ${{ needs.AutoReleaseInfo.outputs.dry_run }} -# -# Release_1: -# needs: [AutoReleaseInfo, Release_0] -# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].release_branch }} -# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].ready }} -# uses: ./.github/workflows/create_release.yml -# with: -# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].commit_sha }} -# type: patch -# dry-run: ${{ env.DRY_RUN }} -# -# Release_2: -# needs: [AutoReleaseInfo, Release_1] -# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[2].release_branch }} -# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[2].ready }} -# uses: ./.github/workflow/create_release.yml -# with: -# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].commit_sha }} -# type: patch -# dry-run: ${{ env.DRY_RUN }} -# -# Release_3: -# needs: [AutoReleaseInfo, Release_2] -# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].release_branch }} -# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].ready }} -# uses: ./.github/workflow/create_release.yml -# with: -# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].commit_sha }} -# type: patch -# dry-run: ${{ env.DRY_RUN }} + dry-run: ${{ fromJson(needs.AutoReleaseInfo.outputs.dry_run) }} + secrets: + ROBOT_CLICKHOUSE_COMMIT_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} -# - name: Post Slack Message -# if: ${{ !cancelled() }} -# run: | -# cd "$GITHUB_WORKSPACE/tests/ci" -# python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }} + PostSlackMessage: + needs: [AutoReleaseInfo] + runs-on: [self-hosted, release-maker] + if: ${{ !cancelled() }} + steps: + - name: Check out repository code + uses: ClickHouse/checkout@v1 + - name: Post + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }} diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 1553d689227..1fb6cb60e96 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -47,6 +47,8 @@ concurrency: required: false default: false type: boolean + secrets: + ROBOT_CLICKHOUSE_COMMIT_TOKEN: jobs: CreateRelease: diff --git a/tests/ci/auto_release.py b/tests/ci/auto_release.py index 3cc88634004..58cfc833afe 100644 --- a/tests/ci/auto_release.py +++ b/tests/ci/auto_release.py @@ -1,4 +1,5 @@ import argparse +import copy import dataclasses import json import os @@ -46,6 +47,7 @@ def parse_args(): MAX_NUMBER_OF_COMMITS_TO_CONSIDER_FOR_RELEASE = 5 AUTORELEASE_INFO_FILE = "/tmp/autorelease_info.json" +AUTORELEASE_MATRIX_PARAMS = "/tmp/autorelease_params.json" @dataclasses.dataclass @@ -74,6 +76,12 @@ class AutoReleaseInfo: with open(AUTORELEASE_INFO_FILE, "w", encoding="utf-8") as f: print(json.dumps(dataclasses.asdict(self), indent=2), file=f) + # dump file for GH action matrix that is similar to the file above but with dropped not ready release branches + params = copy.deepcopy(self) + params.releases = [release for release in params.releases if release.ready] + with open(AUTORELEASE_MATRIX_PARAMS, "w", encoding="utf-8") as f: + print(json.dumps(params, indent=2), file=f) + @staticmethod def from_file() -> "AutoReleaseInfo": with open(AUTORELEASE_INFO_FILE, "r", encoding="utf-8") as json_file: @@ -136,6 +144,7 @@ def _prepare(token): commit_ci_status = CI.GH.get_commit_status_by_name( token=token, commit_sha=commit, + # handle old name for old releases status_name=(CI.JobNames.BUILD_CHECK, "ClickHouse build check"), ) commit_sha = commit diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index b8778e0cc50..97ab10f1b58 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -102,21 +102,29 @@ class GH: assert len(commit_sha) == 40 assert Utils.is_hex(commit_sha) assert not Utils.is_hex(token) - url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses?per_page={200}" + + url = f"https://api.github.com/repos/{Envs.GITHUB_REPOSITORY}/commits/{commit_sha}/statuses" headers = { "Authorization": f"token {token}", "Accept": "application/vnd.github.v3+json", } - response = requests.get(url, headers=headers, timeout=5) if isinstance(status_name, str): status_name = (status_name,) - if response.status_code == 200: - assert "next" not in response.links, "Response truncated" - statuses = response.json() - for status in statuses: - if status["context"] in status_name: - return status["state"] # type: ignore + + while url: + response = requests.get(url, headers=headers, timeout=5) + if response.status_code == 200: + statuses = response.json() + for status in statuses: + if status["context"] in status_name: + return status["state"] + + # Check if there is a next page + url = response.links.get("next", {}).get("url") + else: + break + return "" @staticmethod From dfd17cc2d71555de9c42ad6085c35bb3f1372dd1 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 16 Aug 2024 13:23:57 +0000 Subject: [PATCH 059/127] Check for invalid regexp in JSON SKIP REGEXP section --- src/DataTypes/DataTypeObject.cpp | 11 +++++++++++ .../0_stateless/03227_json_invalid_regexp.reference | 0 .../queries/0_stateless/03227_json_invalid_regexp.sql | 4 ++++ 3 files changed, 15 insertions(+) create mode 100644 tests/queries/0_stateless/03227_json_invalid_regexp.reference create mode 100644 tests/queries/0_stateless/03227_json_invalid_regexp.sql diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index d6395155397..11fffd8769b 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -49,6 +49,17 @@ DataTypeObject::DataTypeObject( , max_dynamic_paths(max_dynamic_paths_) , max_dynamic_types(max_dynamic_types_) { + /// Check if regular expressions are valid. + for (const auto & regexp_str : path_regexps_to_skip) + { + re2::RE2::Options options; + /// Don't log errors to stderr. + options.set_log_errors(false); + auto regexp = re2::RE2(regexp_str, options); + if (!regexp.error().empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid regexp '{}': {}", regexp_str, regexp.error()); + } + for (const auto & [typed_path, type] : typed_paths) { for (const auto & path_to_skip : paths_to_skip) diff --git a/tests/queries/0_stateless/03227_json_invalid_regexp.reference b/tests/queries/0_stateless/03227_json_invalid_regexp.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03227_json_invalid_regexp.sql b/tests/queries/0_stateless/03227_json_invalid_regexp.sql new file mode 100644 index 00000000000..734dea1aac6 --- /dev/null +++ b/tests/queries/0_stateless/03227_json_invalid_regexp.sql @@ -0,0 +1,4 @@ +set allow_experimental_json_type = 1; +create table test (json JSON(SKIP REGEXP '[]')) engine=Memory(); -- {serverError BAD_ARGUMENTS} +create table test (json JSON(SKIP REGEXP '+')) engine=Memory(); -- {serverError BAD_ARGUMENTS}; + From ae389d14ee65ff5fea3543868b6b161f9fcb806e Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Sat, 17 Aug 2024 20:42:00 +0200 Subject: [PATCH 060/127] Fix stylecheck --- src/Formats/SchemaInferenceUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 54352b88578..e8eab3b4453 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -333,7 +333,7 @@ namespace type = variant_type; type_indexes = {TypeIndex::Variant}; } - + /// If we have only date/datetimes types (Date/DateTime/DateTime64), convert all of them to the common type, /// otherwise, convert all Date, DateTime and DateTime64 to String. void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes) From d6e170f77704833fa6655820d55090ba18b0b9fe Mon Sep 17 00:00:00 2001 From: Chang Chen Date: Sat, 17 Aug 2024 18:31:11 +0800 Subject: [PATCH 061/127] repeat field is also compound types, ignore it. --- src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index c6167e572df..7b5c29e321f 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -286,6 +286,9 @@ static std::vector getHyperrectangleForRowGroup(const parquet::FileMetaDa if (!s) continue; + if(s->descr()->schema_node()->is_repeated()) + continue; + auto path = c->path_in_schema()->ToDotVector(); if (path.size() != 1) continue; // compound types not supported From 858f8b502002661584e6153d39a23edc87b49dda Mon Sep 17 00:00:00 2001 From: Chang Chen Date: Sun, 18 Aug 2024 19:11:30 +0800 Subject: [PATCH 062/127] add test and update codes per commit --- .../Formats/Impl/ParquetBlockInputFormat.cpp | 2 +- .../02841_parquet_filter_pushdown_bug.reference | 1 + .../02841_parquet_filter_pushdown_bug.sh.sh | 8 ++++++++ .../0_stateless/data_parquet/68131.parquet | Bin 0 -> 289 bytes 4 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.reference create mode 100755 tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh create mode 100644 tests/queries/0_stateless/data_parquet/68131.parquet diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 7b5c29e321f..1f213fef731 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -286,7 +286,7 @@ static std::vector getHyperrectangleForRowGroup(const parquet::FileMetaDa if (!s) continue; - if(s->descr()->schema_node()->is_repeated()) + if (s->descr()->schema_node()->is_repeated()) continue; auto path = c->path_in_schema()->ToDotVector(); diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.reference b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.reference new file mode 100644 index 00000000000..6ed63af507a --- /dev/null +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.reference @@ -0,0 +1 @@ +[1,2] diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh new file mode 100755 index 00000000000..58eb207b6e6 --- /dev/null +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select f from file('$CURDIR/data_parquet/68131.parquet', Parquet, 'f Array(Int32)')" \ No newline at end of file diff --git a/tests/queries/0_stateless/data_parquet/68131.parquet b/tests/queries/0_stateless/data_parquet/68131.parquet new file mode 100644 index 0000000000000000000000000000000000000000..169f6152003db164c78e33cd69205caa33f906b5 GIT binary patch literal 289 zcmXAl!D_=W42Bgqgq#M0O4Q&(E)5xMp|QKBg Date: Sun, 18 Aug 2024 15:10:35 +0000 Subject: [PATCH 063/127] Add explicit session_timezone to UTC --- tests/queries/0_stateless/03222_datetime64_small_value_const.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql index af06a622f8d..39266ba7992 100644 --- a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql +++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql @@ -1,4 +1,5 @@ -- Tags: shard +set session_timezone = 'UTC'; -- don't randomize the session timezone select *, (select toDateTime64(0, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; select *, (select toDateTime64(5, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; From 8eb922036e5b7caa36c1b904b43fdaee8e45acaa Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 19 Aug 2024 12:45:15 +0800 Subject: [PATCH 064/127] change as request --- .../functions/string-replace-functions.md | 16 +- src/Functions/overlay.cpp | 140 +++++------ .../0_stateless/03205_overlay.reference | 230 +++++------------- tests/queries/0_stateless/03205_overlay.sql | 132 +++++----- .../0_stateless/03206_overlay_utf8.reference | 168 ------------- .../0_stateless/03206_overlay_utf8.sql | 60 ----- 6 files changed, 203 insertions(+), 543 deletions(-) delete mode 100644 tests/queries/0_stateless/03206_overlay_utf8.reference delete mode 100644 tests/queries/0_stateless/03206_overlay_utf8.sql diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index d086c9ee64b..408a896e607 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -251,24 +251,24 @@ select printf('%%%s %s %d', 'Hello', 'World', 2024); ## overlay -Replace the string `s` with the string `replace` starting from the 1-based `position` for `length` bytes. If `length` is omitted or negative, then it defaults to the length of `replace`. +Replace the string `s` with the string `replace` starting from the 1-based `offset` for `length` bytes. If `length` is omitted or negative, then it defaults to the length of `replace`. **Syntax** ```sql -overlay(s, replace, position[, length]) +overlay(s, replace, offset[, length]) ``` **Parameters** - `s`: A string type [String](../data-types/string.md). - `replace`: A string type [String](../data-types/string.md). -- `position`: An integer type [Int](../data-types/int-uint.md). +- `offset`: An integer type [Int](../data-types/int-uint.md). - `length`: Optional. An integer type [Int](../data-types/int-uint.md). **Returned value** -- A [String](../data-types/string.md) data type value. If `position` is negative the position is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. +- A [String](../data-types/string.md) data type value. If `offset` is negative the offset is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. **Example** @@ -286,26 +286,26 @@ Result: ## overlayUTF8 -Replace the string `s` with the string `replace` starting from the 1-based `position` for `length` UTF-8 characters. If `length` is omitted or negative, then it defaults to the length of `replace`. +Replace the string `s` with the string `replace` starting from the 1-based `offset` for `length` UTF-8 characters. If `length` is omitted or negative, then it defaults to the length of `replace`. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. **Syntax** ```sql -overlayUTF8(s, replace, position[, length]) +overlayUTF8(s, replace, offset[, length]) ``` **Parameters** - `s`: A string type [String](../data-types/string.md). - `replace`: A string type [String](../data-types/string.md). -- `position`: An integer type [Int](../data-types/int-uint.md). +- `offset`: An integer type [Int](../data-types/int-uint.md). - `length`: Optional. An integer type [Int](../data-types/int-uint.md). **Returned value** -- A [String](../data-types/string.md) data type value. If `position` is negative the position is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. +- A [String](../data-types/string.md) data type value. If `offset` is negative the offset is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. **Example** diff --git a/src/Functions/overlay.cpp b/src/Functions/overlay.cpp index 094da27a71d..73ca0acbb8e 100644 --- a/src/Functions/overlay.cpp +++ b/src/Functions/overlay.cpp @@ -17,13 +17,13 @@ extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -using namespace GatherUtils; - namespace { /// If 'is_utf8' - measure offset and length in code points instead of bytes. -/// Syntax: overlay(input, replace, offset[, length]) +/// Syntax: +/// - overlay(input, replace, offset[, length]) +/// - overlayUTF8(input, replace, offset[, length]) - measure offset and length in code points instead of bytes template class FunctionOverlay : public IFunction { @@ -37,63 +37,39 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - const size_t number_of_arguments = arguments.size(); - if (number_of_arguments < 3 || number_of_arguments > 4) - throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: " - "passed {}, should be 3 or 4", - getName(), - number_of_arguments); + FunctionArgumentDescriptors mandatory_args{ + {"input", static_cast(&isString), nullptr, "String"}, + {"replace", static_cast(&isString), nullptr, "String"}, + {"offset", static_cast(&isNativeInteger), nullptr, "(U)Int8/16/32/64"}, + }; - /// first argument is string - if (!isString(arguments[0])) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of first argument of function {}, expected String", - arguments[0]->getName(), - getName()); + FunctionArgumentDescriptors optional_args{ + {"length", static_cast(&isNativeInteger), nullptr, "(U)Int8/16/32/64"}, + }; - /// second argument is string - if (!isString(arguments[1])) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of second argument of function {}, expected String", - arguments[1]->getName(), - getName()); - - if (!isNativeNumber(arguments[2])) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of third argument of function {}, expected (U)Int8|16|32|64", - arguments[2]->getName(), - getName()); - - if (number_of_arguments == 4 && !isNativeNumber(arguments[3])) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of second argument of function {}, expected (U)Int8|16|32|64", - arguments[3]->getName(), - getName()); + validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (input_rows_count == 0) + return ColumnString::create(); + const size_t number_of_arguments = arguments.size(); - bool three_args = number_of_arguments == 3; + bool has_three_args = number_of_arguments == 3; ColumnPtr column_offset = arguments[2].column; ColumnPtr column_length; - if (!three_args) + if (!has_three_args) column_length = arguments[3].column; const ColumnConst * column_offset_const = checkAndGetColumn(column_offset.get()); const ColumnConst * column_length_const = nullptr; - if (!three_args) + if (!has_three_args) column_length_const = checkAndGetColumn(column_length.get()); bool offset_is_const = false; @@ -126,7 +102,7 @@ public: if (column_input_const) { StringRef input = column_input_const->getDataAt(0); - res_data.reserve(input.size * input_rows_count); + res_data.reserve((input.size + 1) * input_rows_count); } else { @@ -135,8 +111,8 @@ public: const auto * column_replace_const = checkAndGetColumn(column_replace.get()); const auto * column_replace_string = checkAndGetColumn(column_replace.get()); - bool input_is_const = column_input_const != nullptr; - bool replace_is_const = column_replace_const != nullptr; + bool input_is_const = (column_input_const != nullptr); + bool replace_is_const = (column_replace_const != nullptr); #define OVERLAY_EXECUTE_CASE(THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST) \ if (input_is_const && replace_is_const) \ @@ -150,8 +126,9 @@ public: length, \ res_data, \ res_offsets); \ - else if (input_is_const) \ + else if (input_is_const && !replace_is_const) \ constantVector( \ + input_rows_count, \ column_input_const->getDataAt(0), \ column_replace_string->getChars(), \ column_replace_string->getOffsets(), \ @@ -161,8 +138,9 @@ public: length, \ res_data, \ res_offsets); \ - else if (replace_is_const) \ + else if (!input_is_const && replace_is_const) \ vectorConstant( \ + input_rows_count, \ column_input_string->getChars(), \ column_input_string->getOffsets(), \ column_replace_const->getDataAt(0), \ @@ -174,6 +152,7 @@ public: res_offsets); \ else \ vectorVector( \ + input_rows_count, \ column_input_string->getChars(), \ column_input_string->getOffsets(), \ column_replace_string->getChars(), \ @@ -185,7 +164,7 @@ public: res_data, \ res_offsets); - if (three_args) + if (has_three_args) { if (offset_is_const) { @@ -251,7 +230,7 @@ private: return bytes; } - template + template void constantConstant( size_t rows, const StringRef & input, @@ -263,7 +242,7 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!three_args && length_is_const && const_length < 0) + if (!has_three_args && length_is_const && const_length < 0) { constantConstant( rows, input, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); @@ -277,12 +256,12 @@ private: size_t replace_size = getSliceSize(reinterpret_cast(replace.data), replace.size); size_t valid_length = 0; // not negative - if constexpr (!three_args && length_is_const) + if constexpr (!has_three_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; } - else if constexpr (three_args) + else if constexpr (has_three_args) { valid_length = replace_size; } @@ -300,7 +279,7 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (!three_args && !length_is_const) + if constexpr (!has_three_args && !length_is_const) { length = column_length->getInt(i); valid_length = length >= 0 ? length : replace_size; @@ -331,10 +310,10 @@ private: } else { - const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); size_t prefix_bytes = prefix_end > input_end ? input.size : prefix_end - input_begin; - const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); size_t suffix_bytes = input_end - suffix_begin; size_t new_res_size = res_data.size() + prefix_bytes + replace.size + suffix_bytes + 1; /// +1 for zero terminator @@ -363,8 +342,9 @@ private: } } - template + template void vectorConstant( + size_t rows, const ColumnString::Chars & input_data, const ColumnString::Offsets & input_offsets, const StringRef & replace, @@ -375,27 +355,26 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!three_args && length_is_const && const_length < 0) + if (!has_three_args && length_is_const && const_length < 0) { vectorConstant( - input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); + rows, input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); return; } size_t replace_size = getSliceSize(reinterpret_cast(replace.data), replace.size); Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative - if constexpr (!three_args && length_is_const) + if constexpr (!has_three_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; } - else if constexpr (three_args) + else if constexpr (has_three_args) { valid_length = replace_size; } - size_t rows = input_offsets.size(); Int64 offset = 0; // start from 1, maybe negative size_t valid_offset = 0; // start from 0, not negative size_t res_offset = 0; @@ -415,7 +394,7 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (!three_args && !length_is_const) + if constexpr (!has_three_args && !length_is_const) { length = column_length->getInt(i); valid_length = length >= 0 ? length : replace_size; @@ -449,9 +428,9 @@ private: { const auto * input_begin = &input_data[input_offset]; const auto * input_end = &input_data[input_offset + input_bytes]; - const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); size_t prefix_bytes = prefix_end > input_end ? input_bytes : prefix_end - input_begin; - const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); size_t suffix_bytes = input_end - suffix_begin; size_t new_res_size = res_data.size() + prefix_bytes + replace.size + suffix_bytes + 1; /// +1 for zero terminator @@ -480,8 +459,9 @@ private: } } - template + template void constantVector( + size_t rows, const StringRef & input, const ColumnString::Chars & replace_data, const ColumnString::Offsets & replace_offsets, @@ -492,10 +472,10 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!three_args && length_is_const && const_length < 0) + if (!has_three_args && length_is_const && const_length < 0) { constantVector( - input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets); + rows, input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets); return; } @@ -506,13 +486,12 @@ private: Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative - if constexpr (!three_args && length_is_const) + if constexpr (!has_three_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; } - size_t rows = replace_offsets.size(); const auto * input_begin = reinterpret_cast(input.data); const auto * input_end = reinterpret_cast(input.data + input.size); Int64 offset = 0; // start from 1, maybe negative @@ -529,7 +508,7 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (three_args) + if constexpr (has_three_args) { valid_length = replace_size; } @@ -564,9 +543,9 @@ private: } else { - const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); size_t prefix_bytes = prefix_end > input_end ? input.size : prefix_end - input_begin; - const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); size_t suffix_bytes = input_end - suffix_begin; size_t new_res_size = res_data.size() + prefix_bytes + replace_bytes + suffix_bytes + 1; /// +1 for zero terminator res_data.resize(new_res_size); @@ -594,8 +573,9 @@ private: } } - template + template void vectorVector( + size_t rows, const ColumnString::Chars & input_data, const ColumnString::Offsets & input_offsets, const ColumnString::Chars & replace_data, @@ -607,9 +587,10 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!three_args && length_is_const && const_length < 0) + if (!has_three_args && length_is_const && const_length < 0) { vectorVector( + rows, input_data, input_offsets, replace_data, @@ -625,13 +606,12 @@ private: Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative - if constexpr (!three_args && length_is_const) + if constexpr (!has_three_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; } - size_t rows = input_offsets.size(); Int64 offset = 0; // start from 1, maybe negative size_t valid_offset = 0; // start from 0, not negative size_t res_offset = 0; @@ -655,7 +635,7 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (three_args) + if constexpr (has_three_args) { valid_length = replace_size; } @@ -693,9 +673,9 @@ private: { const auto * input_begin = &input_data[input_offset]; const auto * input_end = &input_data[input_offset + input_bytes]; - const auto * prefix_end = UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); + const auto * prefix_end = GatherUtils::UTF8StringSource::skipCodePointsForward(input_begin, prefix_size, input_end); size_t prefix_bytes = prefix_end > input_end ? input_bytes : prefix_end - input_begin; - const auto * suffix_begin = UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); + const auto * suffix_begin = GatherUtils::UTF8StringSource::skipCodePointsBackward(input_end, suffix_size, input_begin); size_t suffix_bytes = input_end - suffix_begin; size_t new_res_size = res_data.size() + prefix_bytes + replace_bytes + suffix_bytes + 1; /// +1 for zero terminator res_data.resize(new_res_size); diff --git a/tests/queries/0_stateless/03205_overlay.reference b/tests/queries/0_stateless/03205_overlay.reference index 9e79db2e131..383a26986d6 100644 --- a/tests/queries/0_stateless/03205_overlay.reference +++ b/tests/queries/0_stateless/03205_overlay.reference @@ -1,168 +1,62 @@ -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark_SQL -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark CORE -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Spark ANSI SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL -Structured SQL +Negative test of overlay +Positive test 1 with various combinations of const/non-const columns +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Positive test 2 with various combinations of const/non-const columns +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Spark_SQL Spark_SQL和CH +Positive test 3 with various combinations of const/non-const columns +Spark CORE Spark CORECH +Spark CORE Spark CORECH +Spark CORE Spark CORECH +Spark CORE Spark CORECH +Spark CORE Spark CORECH +Spark CORE Spark CORECH +Spark CORE Spark CORECH +Spark CORE Spark CORECH +Positive test 4 with various combinations of const/non-const columns +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Positive test 5 with various combinations of const/non-const columns +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH +Structured SQL Structured SQL和CH diff --git a/tests/queries/0_stateless/03205_overlay.sql b/tests/queries/0_stateless/03205_overlay.sql index b131312c934..4fd0791521d 100644 --- a/tests/queries/0_stateless/03205_overlay.sql +++ b/tests/queries/0_stateless/03205_overlay.sql @@ -1,60 +1,74 @@ -SELECT overlay('Spark SQL', 'ANSI ', 7, 0) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0) from numbers(3); -SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0) from numbers(3); -SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)) from numbers(3); -SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), 0) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, materialize(0)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), materialize(0)) from numbers(3); -SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); +SELECT 'Negative test of overlay'; +SELECT overlay('hello', 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT overlay('hello', 'world'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT overlay('hello', 'world', 2, 3, 'extra'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT overlay(123, 'world', 2, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT overlay('hello', 456, 2, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT overlay('hello', 'world', 'two', 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT overlay('hello', 'world', 2, 'three'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT overlay('Spark SQL', '_', 6) from numbers(3); -SELECT overlay(materialize('Spark SQL'), '_', 6) from numbers(3); -SELECT overlay('Spark SQL', materialize('_'), 6) from numbers(3); -SELECT overlay('Spark SQL', '_', materialize(6)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('_'), 6) from numbers(3); -SELECT overlay(materialize('Spark SQL'), '_', materialize(6)) from numbers(3); -SELECT overlay('Spark SQL', materialize('_'), materialize(6)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('_'), materialize(6)) from numbers(3); - -SELECT overlay('Spark SQL', 'CORE', 7) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'CORE', 7) from numbers(3); -SELECT overlay('Spark SQL', materialize('CORE'), 7) from numbers(3); -SELECT overlay('Spark SQL', 'CORE', materialize(7)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('CORE'), 7) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'CORE', materialize(7)) from numbers(3); -SELECT overlay('Spark SQL', materialize('CORE'), materialize(7)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('CORE'), materialize(7)) from numbers(3); - -SELECT overlay('Spark SQL', 'ANSI ', 7, 0) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0) from numbers(3); -SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0) from numbers(3); -SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)) from numbers(3); -SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); - -SELECT overlay('Spark SQL', 'tructured', 2, 4) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'tructured', 2, 4) from numbers(3); -SELECT overlay('Spark SQL', materialize('tructured'), 2, 4) from numbers(3); -SELECT overlay('Spark SQL', 'tructured', materialize(2), 4) from numbers(3); -SELECT overlay('Spark SQL', 'tructured', 2, materialize(4)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('tructured'), 2, 4) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'tructured', materialize(2), 4) from numbers(3); -SELECT overlay(materialize('Spark SQL'), 'tructured', 2, materialize(4)) from numbers(3); -SELECT overlay('Spark SQL', materialize('tructured'), materialize(2), 4) from numbers(3); -SELECT overlay('Spark SQL', materialize('tructured'), 2, materialize(4)) from numbers(3); -SELECT overlay('Spark SQL', 'tructured', materialize(2), materialize(4)) from numbers(3); -SELECT overlay(materialize('Spark SQL'), materialize('tructured'), materialize(2), materialize(4)) from numbers(3); +SELECT 'Positive test 1 with various combinations of const/non-const columns'; +SELECT overlay('Spark SQL', 'ANSI ', 7, 0), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0); +SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)); +SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), 0), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), 0); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, materialize(0)); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), materialize(0)); +SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), materialize(0)); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)); + +SELECT 'Positive test 2 with various combinations of const/non-const columns'; +SELECT overlay('Spark SQL', '_', 6), overlayUTF8('Spark SQL和CH', '_', 6); +SELECT overlay(materialize('Spark SQL'), '_', 6), overlayUTF8(materialize('Spark SQL和CH'), '_', 6); +SELECT overlay('Spark SQL', materialize('_'), 6), overlayUTF8('Spark SQL和CH', materialize('_'), 6); +SELECT overlay('Spark SQL', '_', materialize(6)), overlayUTF8('Spark SQL和CH', '_', materialize(6)); +SELECT overlay(materialize('Spark SQL'), materialize('_'), 6), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), 6); +SELECT overlay(materialize('Spark SQL'), '_', materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), '_', materialize(6)); +SELECT overlay('Spark SQL', materialize('_'), materialize(6)), overlayUTF8('Spark SQL和CH', materialize('_'), materialize(6)); +SELECT overlay(materialize('Spark SQL'), materialize('_'), materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), materialize(6)); + +SELECT 'Positive test 3 with various combinations of const/non-const columns'; +SELECT overlay('Spark SQL', 'CORE', 7), overlayUTF8('Spark SQL和CH', 'CORE', 7); +SELECT overlay(materialize('Spark SQL'), 'CORE', 7), overlayUTF8(materialize('Spark SQL和CH'), 'CORE', 7); +SELECT overlay('Spark SQL', materialize('CORE'), 7), overlayUTF8('Spark SQL和CH', materialize('CORE'), 7); +SELECT overlay('Spark SQL', 'CORE', materialize(7)), overlayUTF8('Spark SQL和CH', 'CORE', materialize(7)); +SELECT overlay(materialize('Spark SQL'), materialize('CORE'), 7), overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), 7); +SELECT overlay(materialize('Spark SQL'), 'CORE', materialize(7)), overlayUTF8(materialize('Spark SQL和CH'), 'CORE', materialize(7)); +SELECT overlay('Spark SQL', materialize('CORE'), materialize(7)), overlayUTF8('Spark SQL和CH', materialize('CORE'), materialize(7)); +SELECT overlay(materialize('Spark SQL'), materialize('CORE'), materialize(7)), overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), materialize(7)); + +SELECT 'Positive test 4 with various combinations of const/non-const columns'; +SELECT overlay('Spark SQL', 'ANSI ', 7, 0), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0); +SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0); +SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)); +SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0); +SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)); +SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)); +SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)); + +SELECT 'Positive test 5 with various combinations of const/non-const columns'; +SELECT overlay('Spark SQL', 'tructured', 2, 4), overlayUTF8('Spark SQL和CH', 'tructured', 2, 4); +SELECT overlay(materialize('Spark SQL'), 'tructured', 2, 4), overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, 4); +SELECT overlay('Spark SQL', materialize('tructured'), 2, 4), overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, 4); +SELECT overlay('Spark SQL', 'tructured', materialize(2), 4), overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), 4); +SELECT overlay('Spark SQL', 'tructured', 2, materialize(4)), overlayUTF8('Spark SQL和CH', 'tructured', 2, materialize(4)); +SELECT overlay(materialize('Spark SQL'), materialize('tructured'), 2, 4), overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), 2, 4); +SELECT overlay(materialize('Spark SQL'), 'tructured', materialize(2), 4), overlayUTF8(materialize('Spark SQL和CH'), 'tructured', materialize(2), 4); +SELECT overlay(materialize('Spark SQL'), 'tructured', 2, materialize(4)), overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, materialize(4)); +SELECT overlay('Spark SQL', materialize('tructured'), materialize(2), 4), overlayUTF8('Spark SQL和CH', materialize('tructured'), materialize(2), 4); +SELECT overlay('Spark SQL', materialize('tructured'), 2, materialize(4)), overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, materialize(4)); +SELECT overlay('Spark SQL', 'tructured', materialize(2), materialize(4)), overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), materialize(4)); +SELECT overlay(materialize('Spark SQL'), materialize('tructured'), materialize(2), materialize(4)), overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), materialize(2), materialize(4)); diff --git a/tests/queries/0_stateless/03206_overlay_utf8.reference b/tests/queries/0_stateless/03206_overlay_utf8.reference deleted file mode 100644 index 19878c97184..00000000000 --- a/tests/queries/0_stateless/03206_overlay_utf8.reference +++ /dev/null @@ -1,168 +0,0 @@ -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark_SQL和CH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark CORECH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Spark ANSI SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH -Structured SQL和CH diff --git a/tests/queries/0_stateless/03206_overlay_utf8.sql b/tests/queries/0_stateless/03206_overlay_utf8.sql deleted file mode 100644 index 00b756c8b5b..00000000000 --- a/tests/queries/0_stateless/03206_overlay_utf8.sql +++ /dev/null @@ -1,60 +0,0 @@ -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), 0) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, materialize(0)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), materialize(0)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); - -SELECT overlayUTF8('Spark SQL和CH', '_', 6) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), '_', 6) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('_'), 6) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', '_', materialize(6)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), 6) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), '_', materialize(6)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('_'), materialize(6)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), materialize(6)) from numbers(3); - -SELECT overlayUTF8('Spark SQL和CH', 'CORE', 7) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'CORE', 7) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('CORE'), 7) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'CORE', materialize(7)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), 7) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'CORE', materialize(7)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('CORE'), materialize(7)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), materialize(7)) from numbers(3); - -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)) from numbers(3); - -SELECT overlayUTF8('Spark SQL和CH', 'tructured', 2, 4) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, 4) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, 4) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), 4) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'tructured', 2, materialize(4)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), 2, 4) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', materialize(2), 4) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, materialize(4)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), materialize(2), 4) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, materialize(4)) from numbers(3); -SELECT overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), materialize(4)) from numbers(3); -SELECT overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), materialize(2), materialize(4)) from numbers(3); From 29a0161cdfeeed739578af279092e7694dd7fbda Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 19 Aug 2024 18:25:01 +0800 Subject: [PATCH 065/127] change as request --- .../functions/string-replace-functions.md | 5 ++-- src/Functions/overlay.cpp | 24 ++++++++++++------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 408a896e607..55e97688b18 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -251,7 +251,8 @@ select printf('%%%s %s %d', 'Hello', 'World', 2024); ## overlay -Replace the string `s` with the string `replace` starting from the 1-based `offset` for `length` bytes. If `length` is omitted or negative, then it defaults to the length of `replace`. +Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed. + **Syntax** @@ -286,7 +287,7 @@ Result: ## overlayUTF8 -Replace the string `s` with the string `replace` starting from the 1-based `offset` for `length` UTF-8 characters. If `length` is omitted or negative, then it defaults to the length of `replace`. +Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of characters removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of characters is removed. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. diff --git a/src/Functions/overlay.cpp b/src/Functions/overlay.cpp index 73ca0acbb8e..20988c775a5 100644 --- a/src/Functions/overlay.cpp +++ b/src/Functions/overlay.cpp @@ -11,12 +11,6 @@ namespace DB { -namespace ErrorCodes -{ -extern const int ILLEGAL_TYPE_OF_ARGUMENT; -extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - namespace { @@ -708,8 +702,20 @@ private: REGISTER_FUNCTION(Overlay) { - factory.registerFunction>({}, FunctionFactory::Case::Insensitive); - factory.registerFunction>({}, FunctionFactory::Case::Sensitive); -} + factory.registerFunction>( + {.description = R"( +Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed. +)", + .categories{"String"}}, + FunctionFactory::Case::Insensitive); + factory.registerFunction>( + {.description = R"( +Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed. + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. +)", + .categories{"String"}}, + FunctionFactory::Case::Sensitive); +} } From 8653c547c9e5543b46cead484eb446725ec737d6 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 19 Aug 2024 18:50:04 +0800 Subject: [PATCH 066/127] change as request --- .../0_stateless/03205_overlay.reference | 88 ++++++++++--------- tests/queries/0_stateless/03205_overlay.sql | 72 +++++---------- 2 files changed, 70 insertions(+), 90 deletions(-) diff --git a/tests/queries/0_stateless/03205_overlay.reference b/tests/queries/0_stateless/03205_overlay.reference index 383a26986d6..67a699944e0 100644 --- a/tests/queries/0_stateless/03205_overlay.reference +++ b/tests/queries/0_stateless/03205_overlay.reference @@ -1,22 +1,5 @@ Negative test of overlay -Positive test 1 with various combinations of const/non-const columns -Spark ANSI SQL Spark ANSI SQL和CH -Spark ANSI SQL Spark ANSI SQL和CH -Spark ANSI SQL Spark ANSI SQL和CH -Spark ANSI SQL Spark ANSI SQL和CH -Spark ANSI SQL Spark ANSI SQL和CH -Spark ANSI SQL Spark ANSI SQL和CH -Spark ANSI SQL Spark ANSI SQL和CH -Spark ANSI SQL Spark ANSI SQL和CH -Spark ANSI SQL Spark ANSI SQL和CH -Spark ANSI SQL Spark ANSI SQL和CH -Spark ANSI SQL Spark ANSI SQL和CH -Spark ANSI SQL Spark ANSI SQL和CH -Spark ANSI SQL Spark ANSI SQL和CH -Spark ANSI SQL Spark ANSI SQL和CH -Spark ANSI SQL Spark ANSI SQL和CH -Spark ANSI SQL Spark ANSI SQL和CH -Positive test 2 with various combinations of const/non-const columns +Test with 3 arguments and various combinations of const/non-const columns Spark_SQL Spark_SQL和CH Spark_SQL Spark_SQL和CH Spark_SQL Spark_SQL和CH @@ -25,16 +8,7 @@ Spark_SQL Spark_SQL和CH Spark_SQL Spark_SQL和CH Spark_SQL Spark_SQL和CH Spark_SQL Spark_SQL和CH -Positive test 3 with various combinations of const/non-const columns -Spark CORE Spark CORECH -Spark CORE Spark CORECH -Spark CORE Spark CORECH -Spark CORE Spark CORECH -Spark CORE Spark CORECH -Spark CORE Spark CORECH -Spark CORE Spark CORECH -Spark CORE Spark CORECH -Positive test 4 with various combinations of const/non-const columns +Test with 4 arguments and various combinations of const/non-const columns Spark ANSI SQL Spark ANSI SQL和CH Spark ANSI SQL Spark ANSI SQL和CH Spark ANSI SQL Spark ANSI SQL和CH @@ -47,16 +21,48 @@ Spark ANSI SQL Spark ANSI SQL和CH Spark ANSI SQL Spark ANSI SQL和CH Spark ANSI SQL Spark ANSI SQL和CH Spark ANSI SQL Spark ANSI SQL和CH -Positive test 5 with various combinations of const/non-const columns -Structured SQL Structured SQL和CH -Structured SQL Structured SQL和CH -Structured SQL Structured SQL和CH -Structured SQL Structured SQL和CH -Structured SQL Structured SQL和CH -Structured SQL Structured SQL和CH -Structured SQL Structured SQL和CH -Structured SQL Structured SQL和CH -Structured SQL Structured SQL和CH -Structured SQL Structured SQL和CH -Structured SQL Structured SQL和CH -Structured SQL Structured SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Spark ANSI SQL Spark ANSI SQL和CH +Test with different offset values +-12 _park SQL _park SQL和CH +-11 _park SQL S_ark SQL和CH +-10 _park SQL Sp_rk SQL和CH +-9 _park SQL Spa_k SQL和CH +-8 S_ark SQL Spar_ SQL和CH +-7 Sp_rk SQL Spark_SQL和CH +-6 Spa_k SQL Spark _QL和CH +-5 Spar_ SQL Spark S_L和CH +-4 Spark_SQL Spark SQ_和CH +-3 Spark _QL Spark SQL_CH +-2 Spark S_L Spark SQL和_H +-1 Spark SQ_ Spark SQL和C_ +0 Spark SQL_ Spark SQL和CH_ +1 _park SQL _park SQL和CH +2 S_ark SQL S_ark SQL和CH +3 Sp_rk SQL Sp_rk SQL和CH +4 Spa_k SQL Spa_k SQL和CH +5 Spar_ SQL Spar_ SQL和CH +6 Spark_SQL Spark_SQL和CH +7 Spark _QL Spark _QL和CH +8 Spark S_L Spark S_L和CH +9 Spark SQ_ Spark SQ_和CH +10 Spark SQL_ Spark SQL_CH +11 Spark SQL_ Spark SQL和_H +12 Spark SQL_ Spark SQL和C_ +13 Spark SQL_ Spark SQL和CH_ +Test with different length values +-1 Spark ANSI Spark ANSI H +0 Spark ANSI SQL Spark ANSI SQL和CH +1 Spark ANSI QL Spark ANSI QL和CH +2 Spark ANSI L Spark ANSI L和CH +3 Spark ANSI Spark ANSI 和CH +4 Spark ANSI Spark ANSI CH +5 Spark ANSI Spark ANSI H +6 Spark ANSI Spark ANSI +Test with different input and replace values +_ _ +Spark SQL Spark SQL和CH +ANSI ANSI +Spark SQL Spark SQL和CH diff --git a/tests/queries/0_stateless/03205_overlay.sql b/tests/queries/0_stateless/03205_overlay.sql index 4fd0791521d..4d0b5ecbe03 100644 --- a/tests/queries/0_stateless/03205_overlay.sql +++ b/tests/queries/0_stateless/03205_overlay.sql @@ -7,7 +7,17 @@ SELECT overlay('hello', 456, 2, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT overlay('hello', 'world', 'two', 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT overlay('hello', 'world', 2, 'three'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT 'Positive test 1 with various combinations of const/non-const columns'; +SELECT 'Test with 3 arguments and various combinations of const/non-const columns'; +SELECT overlay('Spark SQL', '_', 6), overlayUTF8('Spark SQL和CH', '_', 6); +SELECT overlay(materialize('Spark SQL'), '_', 6), overlayUTF8(materialize('Spark SQL和CH'), '_', 6); +SELECT overlay('Spark SQL', materialize('_'), 6), overlayUTF8('Spark SQL和CH', materialize('_'), 6); +SELECT overlay('Spark SQL', '_', materialize(6)), overlayUTF8('Spark SQL和CH', '_', materialize(6)); +SELECT overlay(materialize('Spark SQL'), materialize('_'), 6), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), 6); +SELECT overlay(materialize('Spark SQL'), '_', materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), '_', materialize(6)); +SELECT overlay('Spark SQL', materialize('_'), materialize(6)), overlayUTF8('Spark SQL和CH', materialize('_'), materialize(6)); +SELECT overlay(materialize('Spark SQL'), materialize('_'), materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), materialize(6)); + +SELECT 'Test with 4 arguments and various combinations of const/non-const columns'; SELECT overlay('Spark SQL', 'ANSI ', 7, 0), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0); SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0); SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0); @@ -24,51 +34,15 @@ SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, materialize(0) SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), materialize(0)); SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), materialize(0)); SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)); - -SELECT 'Positive test 2 with various combinations of const/non-const columns'; -SELECT overlay('Spark SQL', '_', 6), overlayUTF8('Spark SQL和CH', '_', 6); -SELECT overlay(materialize('Spark SQL'), '_', 6), overlayUTF8(materialize('Spark SQL和CH'), '_', 6); -SELECT overlay('Spark SQL', materialize('_'), 6), overlayUTF8('Spark SQL和CH', materialize('_'), 6); -SELECT overlay('Spark SQL', '_', materialize(6)), overlayUTF8('Spark SQL和CH', '_', materialize(6)); -SELECT overlay(materialize('Spark SQL'), materialize('_'), 6), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), 6); -SELECT overlay(materialize('Spark SQL'), '_', materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), '_', materialize(6)); -SELECT overlay('Spark SQL', materialize('_'), materialize(6)), overlayUTF8('Spark SQL和CH', materialize('_'), materialize(6)); -SELECT overlay(materialize('Spark SQL'), materialize('_'), materialize(6)), overlayUTF8(materialize('Spark SQL和CH'), materialize('_'), materialize(6)); - -SELECT 'Positive test 3 with various combinations of const/non-const columns'; -SELECT overlay('Spark SQL', 'CORE', 7), overlayUTF8('Spark SQL和CH', 'CORE', 7); -SELECT overlay(materialize('Spark SQL'), 'CORE', 7), overlayUTF8(materialize('Spark SQL和CH'), 'CORE', 7); -SELECT overlay('Spark SQL', materialize('CORE'), 7), overlayUTF8('Spark SQL和CH', materialize('CORE'), 7); -SELECT overlay('Spark SQL', 'CORE', materialize(7)), overlayUTF8('Spark SQL和CH', 'CORE', materialize(7)); -SELECT overlay(materialize('Spark SQL'), materialize('CORE'), 7), overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), 7); -SELECT overlay(materialize('Spark SQL'), 'CORE', materialize(7)), overlayUTF8(materialize('Spark SQL和CH'), 'CORE', materialize(7)); -SELECT overlay('Spark SQL', materialize('CORE'), materialize(7)), overlayUTF8('Spark SQL和CH', materialize('CORE'), materialize(7)); -SELECT overlay(materialize('Spark SQL'), materialize('CORE'), materialize(7)), overlayUTF8(materialize('Spark SQL和CH'), materialize('CORE'), materialize(7)); - -SELECT 'Positive test 4 with various combinations of const/non-const columns'; -SELECT overlay('Spark SQL', 'ANSI ', 7, 0), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, 0); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, 0); -SELECT overlay('Spark SQL', materialize('ANSI '), 7, 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, 0); -SELECT overlay('Spark SQL', 'ANSI ', materialize(7), 0), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), 0); -SELECT overlay('Spark SQL', 'ANSI ', 7, materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, materialize(0)); -SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), 7, 0), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), 7, 0); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), 0), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', materialize(7), 0); -SELECT overlay(materialize('Spark SQL'), 'ANSI ', 7, materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), 'ANSI ', 7, materialize(0)); -SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), 0), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), 0); -SELECT overlay('Spark SQL', materialize('ANSI '), 7, materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), 7, materialize(0)); -SELECT overlay('Spark SQL', 'ANSI ', materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', 'ANSI ', materialize(7), materialize(0)); -SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)); - -SELECT 'Positive test 5 with various combinations of const/non-const columns'; -SELECT overlay('Spark SQL', 'tructured', 2, 4), overlayUTF8('Spark SQL和CH', 'tructured', 2, 4); -SELECT overlay(materialize('Spark SQL'), 'tructured', 2, 4), overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, 4); -SELECT overlay('Spark SQL', materialize('tructured'), 2, 4), overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, 4); -SELECT overlay('Spark SQL', 'tructured', materialize(2), 4), overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), 4); -SELECT overlay('Spark SQL', 'tructured', 2, materialize(4)), overlayUTF8('Spark SQL和CH', 'tructured', 2, materialize(4)); -SELECT overlay(materialize('Spark SQL'), materialize('tructured'), 2, 4), overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), 2, 4); -SELECT overlay(materialize('Spark SQL'), 'tructured', materialize(2), 4), overlayUTF8(materialize('Spark SQL和CH'), 'tructured', materialize(2), 4); -SELECT overlay(materialize('Spark SQL'), 'tructured', 2, materialize(4)), overlayUTF8(materialize('Spark SQL和CH'), 'tructured', 2, materialize(4)); -SELECT overlay('Spark SQL', materialize('tructured'), materialize(2), 4), overlayUTF8('Spark SQL和CH', materialize('tructured'), materialize(2), 4); -SELECT overlay('Spark SQL', materialize('tructured'), 2, materialize(4)), overlayUTF8('Spark SQL和CH', materialize('tructured'), 2, materialize(4)); -SELECT overlay('Spark SQL', 'tructured', materialize(2), materialize(4)), overlayUTF8('Spark SQL和CH', 'tructured', materialize(2), materialize(4)); -SELECT overlay(materialize('Spark SQL'), materialize('tructured'), materialize(2), materialize(4)), overlayUTF8(materialize('Spark SQL和CH'), materialize('tructured'), materialize(2), materialize(4)); + +SELECT 'Test with different offset values'; +WITH number - 12 as offset SELECT offset, overlay('Spark SQL', '_', offset), overlayUTF8('Spark SQL和CH', '_', offset) from numbers(26); + +SELECT 'Test with different length values'; +WITH number - 1 as length SELECT length, overlay('Spark SQL', 'ANSI ', 7, length), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, length) from numbers(8); + +SELECT 'Test with different input and replace values'; +SELECT overlay('', '_', 6), overlayUTF8('', '_', 6); +SELECT overlay('Spark SQL', '', 6), overlayUTF8('Spark SQL和CH', '', 6); +SELECT overlay('', 'ANSI ', 7, 0), overlayUTF8('', 'ANSI ', 7, 0); +SELECT overlay('Spark SQL', '', 7, 0), overlayUTF8('Spark SQL和CH', '', 7, 0); From c37fc005d39f880b6ab12b01e80954f162817790 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 19 Aug 2024 13:28:23 +0200 Subject: [PATCH 067/127] init --- src/Functions/FunctionSQLJSON.h | 48 ++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index 83ed874c47b..66e0b4c2efd 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -123,7 +123,7 @@ public: class Executor { public: - static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, const ContextPtr & context) + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, const bool & function_json_value_return_type_allow_complex) { MutableColumnPtr to{result_type->createColumn()}; to->reserve(input_rows_count); @@ -191,7 +191,7 @@ public: { /// Instead of creating a new generator for each row, we can reuse the same one. generator_json_path.reinitialize(); - added_to_column = impl.insertResultToColumn(*to, document, generator_json_path, context); + added_to_column = impl.insertResultToColumn(*to, document, generator_json_path, function_json_value_return_type_allow_complex); } if (!added_to_column) { @@ -204,13 +204,25 @@ public: }; template typename Impl> -class FunctionSQLJSON : public IFunction, WithConstContext +class FunctionSQLJSON : public IFunction { public: static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } - explicit FunctionSQLJSON(ContextPtr context_) : WithConstContext(context_) { } + explicit FunctionSQLJSON(ContextPtr context_) + { + max_parser_depth = context_->getSettingsRef().max_parser_depth; + max_parser_backtracks = context_->getSettingsRef().max_parser_backtracks; + allow_simdjson = context_->getSettingsRef().allow_simdjson; + function_json_value_return_type_allow_complex = context_->getSettingsRef().function_json_value_return_type_allow_complex; + function_json_value_return_type_allow_nullable = context_->getSettingsRef().function_json_value_return_type_allow_nullable; + } static constexpr auto name = Name::name; + bool max_parser_depth; + bool max_parser_backtracks; + bool allow_simdjson; + bool function_json_value_return_type_allow_complex; + bool function_json_value_return_type_allow_nullable; String getName() const override { return Name::name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } @@ -221,7 +233,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { return Impl>::getReturnType( - Name::name, arguments, getContext()); + Name::name, arguments, function_json_value_return_type_allow_nullable); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override @@ -231,18 +243,18 @@ public: /// 2. Create ASTPtr /// 3. Parser(Tokens, ASTPtr) -> complete AST /// 4. Execute functions: call getNextItem on generator and handle each item - unsigned parse_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); - unsigned parse_backtracks = static_cast(getContext()->getSettingsRef().max_parser_backtracks); + unsigned parse_depth = static_cast(max_parser_depth); + unsigned parse_backtracks = static_cast(max_parser_backtracks); #if USE_SIMDJSON - if (getContext()->getSettingsRef().allow_simdjson) + if (allow_simdjson) return FunctionSQLJSONHelpers::Executor< Name, Impl>, - SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, parse_backtracks, getContext()); + SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, parse_backtracks, function_json_value_return_type_allow_complex); #endif return FunctionSQLJSONHelpers:: Executor>, DummyJSONParser>::run( - arguments, result_type, input_rows_count, parse_depth, parse_backtracks, getContext()); + arguments, result_type, input_rows_count, parse_depth, parse_backtracks, function_json_value_return_type_allow_complex); } }; @@ -267,11 +279,11 @@ class JSONExistsImpl public: using Element = typename JSONParser::Element; - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const ContextPtr &) { return std::make_shared(); } + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool &) { return std::make_shared(); } static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const ContextPtr &) + static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const bool &) { Element current_element = root; VisitorStatus status; @@ -305,9 +317,9 @@ class JSONValueImpl public: using Element = typename JSONParser::Element; - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const ContextPtr & context) + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool & function_json_value_return_type_allow_nullable) { - if (context->getSettingsRef().function_json_value_return_type_allow_nullable) + if (function_json_value_return_type_allow_nullable) { DataTypePtr string_type = std::make_shared(); return std::make_shared(string_type); @@ -320,7 +332,7 @@ public: static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const ContextPtr & context) + static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const bool & function_json_value_return_type_allow_complex) { Element current_element = root; VisitorStatus status; @@ -329,7 +341,7 @@ public: { if (status == VisitorStatus::Ok) { - if (context->getSettingsRef().function_json_value_return_type_allow_complex) + if (function_json_value_return_type_allow_complex) { break; } @@ -383,11 +395,11 @@ class JSONQueryImpl public: using Element = typename JSONParser::Element; - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const ContextPtr &) { return std::make_shared(); } + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool &) { return std::make_shared(); } static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const ContextPtr &) + static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const bool &) { ColumnString & col_str = assert_cast(dest); From 6ee9cadc9550bf30c0c2e4b1176f803817c45bc0 Mon Sep 17 00:00:00 2001 From: shiyer7474 Date: Mon, 19 Aug 2024 12:17:48 +0000 Subject: [PATCH 068/127] Set experimental_analyzer and no_parallel --- .../0_stateless/03222_datetime64_small_value_const.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql index 39266ba7992..a64ebd38ccf 100644 --- a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql +++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql @@ -1,5 +1,6 @@ -- Tags: shard set session_timezone = 'UTC'; -- don't randomize the session timezone +SET allow_experimental_analyzer = 1; select *, (select toDateTime64(0, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; select *, (select toDateTime64(5, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; @@ -38,3 +39,6 @@ select count(*) from distr_03222_dt64 where dt < (select toDateTime64('2004-07-2 drop table if exists shard_0.dt64_03222; drop table if exists shard_1.dt64_03222; drop table if exists distr_03222_dt64; + +drop database shard_0; +drop database shard_1; From 0d444e6708f04cf4a581f34246e57745552ab001 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 19 Aug 2024 15:39:24 +0200 Subject: [PATCH 069/127] review fixes --- src/Functions/FunctionSQLJSON.h | 35 +++++++++++++++++---------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index 66e0b4c2efd..1c2a8d527f8 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -123,7 +123,7 @@ public: class Executor { public: - static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, const bool & function_json_value_return_type_allow_complex) + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, const bool function_json_value_return_type_allow_complex) { MutableColumnPtr to{result_type->createColumn()}; to->reserve(input_rows_count); @@ -209,20 +209,15 @@ class FunctionSQLJSON : public IFunction public: static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } explicit FunctionSQLJSON(ContextPtr context_) + : max_parser_depth(context_->getSettingsRef().max_parser_depth), + max_parser_backtracks(context_->getSettingsRef().max_parser_backtracks), + allow_simdjson(context_->getSettingsRef().allow_simdjson), + function_json_value_return_type_allow_complex(context_->getSettingsRef().function_json_value_return_type_allow_complex), + function_json_value_return_type_allow_nullable(context_->getSettingsRef().function_json_value_return_type_allow_nullable) { - max_parser_depth = context_->getSettingsRef().max_parser_depth; - max_parser_backtracks = context_->getSettingsRef().max_parser_backtracks; - allow_simdjson = context_->getSettingsRef().allow_simdjson; - function_json_value_return_type_allow_complex = context_->getSettingsRef().function_json_value_return_type_allow_complex; - function_json_value_return_type_allow_nullable = context_->getSettingsRef().function_json_value_return_type_allow_nullable; } static constexpr auto name = Name::name; - bool max_parser_depth; - bool max_parser_backtracks; - bool allow_simdjson; - bool function_json_value_return_type_allow_complex; - bool function_json_value_return_type_allow_nullable; String getName() const override { return Name::name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } @@ -256,6 +251,12 @@ public: Executor>, DummyJSONParser>::run( arguments, result_type, input_rows_count, parse_depth, parse_backtracks, function_json_value_return_type_allow_complex); } +private: + const size_t max_parser_depth; + const size_t max_parser_backtracks; + const bool allow_simdjson; + const bool function_json_value_return_type_allow_complex; + const bool function_json_value_return_type_allow_nullable; }; struct NameJSONExists @@ -279,11 +280,11 @@ class JSONExistsImpl public: using Element = typename JSONParser::Element; - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool &) { return std::make_shared(); } + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool) { return std::make_shared(); } static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const bool &) + static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const bool) { Element current_element = root; VisitorStatus status; @@ -317,7 +318,7 @@ class JSONValueImpl public: using Element = typename JSONParser::Element; - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool & function_json_value_return_type_allow_nullable) + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool function_json_value_return_type_allow_nullable) { if (function_json_value_return_type_allow_nullable) { @@ -332,7 +333,7 @@ public: static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const bool & function_json_value_return_type_allow_complex) + static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const bool function_json_value_return_type_allow_complex) { Element current_element = root; VisitorStatus status; @@ -395,11 +396,11 @@ class JSONQueryImpl public: using Element = typename JSONParser::Element; - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool &) { return std::make_shared(); } + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool) { return std::make_shared(); } static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const bool &) + static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const bool) { ColumnString & col_str = assert_cast(dest); From 7e2804fad5e66b28074eb28fa1f6332c741cdad2 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 19 Aug 2024 15:48:27 +0200 Subject: [PATCH 070/127] remove const (review) --- src/Functions/FunctionSQLJSON.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index 1c2a8d527f8..4721f858f5c 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -123,7 +123,7 @@ public: class Executor { public: - static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, const bool function_json_value_return_type_allow_complex) + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, bool function_json_value_return_type_allow_complex) { MutableColumnPtr to{result_type->createColumn()}; to->reserve(input_rows_count); @@ -280,11 +280,11 @@ class JSONExistsImpl public: using Element = typename JSONParser::Element; - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool) { return std::make_shared(); } + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, bool) { return std::make_shared(); } static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const bool) + static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, bool) { Element current_element = root; VisitorStatus status; @@ -318,7 +318,7 @@ class JSONValueImpl public: using Element = typename JSONParser::Element; - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool function_json_value_return_type_allow_nullable) + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, bool function_json_value_return_type_allow_nullable) { if (function_json_value_return_type_allow_nullable) { @@ -333,7 +333,7 @@ public: static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const bool function_json_value_return_type_allow_complex) + static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, bool function_json_value_return_type_allow_complex) { Element current_element = root; VisitorStatus status; @@ -396,11 +396,11 @@ class JSONQueryImpl public: using Element = typename JSONParser::Element; - static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, const bool) { return std::make_shared(); } + static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &, bool) { return std::make_shared(); } static size_t getNumberOfIndexArguments(const ColumnsWithTypeAndName & arguments) { return arguments.size() - 1; } - static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, const bool) + static bool insertResultToColumn(IColumn & dest, const Element & root, GeneratorJSONPath & generator_json_path, bool) { ColumnString & col_str = assert_cast(dest); From 2b75221e1daf2935f40acbb0e593953073f981f7 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 19 Aug 2024 18:00:28 +0200 Subject: [PATCH 071/127] Reduce max_dynamic_paths in 01825_new_type_json_ghdata.sh to consume less memory --- tests/queries/0_stateless/01825_new_type_json_ghdata.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh index 33940caec29..f165223fb98 100755 --- a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh @@ -7,7 +7,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata" -${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1 +${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON(max_dynamic_paths=100)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1 cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} \ --max_memory_usage 10G --query "INSERT INTO ghdata FORMAT JSONAsObject" From f9de579b41ac8b70c9638d658b37c77809e25324 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 19 Aug 2024 18:01:44 +0200 Subject: [PATCH 072/127] Reduce max_dynamic_paths in another ghdata test --- .../0_stateless/01825_new_type_json_ghdata_insert_select.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh index 568ba2bd185..3f5fc91f8fc 100755 --- a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh @@ -10,9 +10,9 @@ ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2_string" ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata_2_from_string" -${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2 (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1 +${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2 (data JSON(max_dynamic_paths=100)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1 ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_string (data String) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" -${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_from_string (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1 +${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_from_string (data JSON(max_dynamic_paths=100)) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1 cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO ghdata_2 FORMAT JSONAsObject" cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2_string FORMAT JSONAsString" From 2f5fb44695fdb1f66a927f1402512dbd065b8c41 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 19 Aug 2024 19:35:44 +0200 Subject: [PATCH 073/127] Better check and error code --- src/DataTypes/DataTypeObject.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index 11fffd8769b..9846ae876ca 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -56,8 +56,8 @@ DataTypeObject::DataTypeObject( /// Don't log errors to stderr. options.set_log_errors(false); auto regexp = re2::RE2(regexp_str, options); - if (!regexp.error().empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid regexp '{}': {}", regexp_str, regexp.error()); + if (!regexp.ok()) + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, "Invalid regexp '{}': {}", regexp_str, regexp.error()); } for (const auto & [typed_path, type] : typed_paths) From f49fe765328c3637ab781a986a0f6c463fc0ecde Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 19 Aug 2024 19:36:00 +0200 Subject: [PATCH 074/127] Add missing error code --- src/DataTypes/DataTypeObject.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DataTypes/DataTypeObject.cpp b/src/DataTypes/DataTypeObject.cpp index 9846ae876ca..0d410d6f24c 100644 --- a/src/DataTypes/DataTypeObject.cpp +++ b/src/DataTypes/DataTypeObject.cpp @@ -33,6 +33,7 @@ namespace ErrorCodes { extern const int UNEXPECTED_AST_STRUCTURE; extern const int BAD_ARGUMENTS; + extern const int CANNOT_COMPILE_REGEXP; } DataTypeObject::DataTypeObject( From 51c5b91b0a58f6577adba0c1e5717f649104c17b Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 19 Aug 2024 19:36:20 +0200 Subject: [PATCH 075/127] Update error code in tests --- tests/queries/0_stateless/03227_json_invalid_regexp.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03227_json_invalid_regexp.sql b/tests/queries/0_stateless/03227_json_invalid_regexp.sql index 734dea1aac6..d98e2ade29d 100644 --- a/tests/queries/0_stateless/03227_json_invalid_regexp.sql +++ b/tests/queries/0_stateless/03227_json_invalid_regexp.sql @@ -1,4 +1,4 @@ set allow_experimental_json_type = 1; -create table test (json JSON(SKIP REGEXP '[]')) engine=Memory(); -- {serverError BAD_ARGUMENTS} -create table test (json JSON(SKIP REGEXP '+')) engine=Memory(); -- {serverError BAD_ARGUMENTS}; +create table test (json JSON(SKIP REGEXP '[]')) engine=Memory(); -- {serverError CANNOT_COMPILE_REGEXP} +create table test (json JSON(SKIP REGEXP '+')) engine=Memory(); -- {serverError CANNOT_COMPILE_REGEXP}; From 2d998a9f6a12499dcc82186ff9f0c5edaf173c6c Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 19 Aug 2024 20:19:11 +0200 Subject: [PATCH 076/127] Fix test --- .../03037_dynamic_merges_1_horizontal_compact_merge_tree.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql index 46f1c78b255..5e7d1ee9826 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql @@ -33,5 +33,3 @@ optimize table test final; select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); drop table test; - -select 1; From bc9cac605a58b4dce38bf06da889639f5bd62438 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 19 Aug 2024 22:00:35 +0000 Subject: [PATCH 077/127] Fix enumerating dynamic subcolumns --- src/DataTypes/IDataType.cpp | 1 + src/DataTypes/Serializations/ISerialization.h | 1 + .../Serializations/SerializationDynamic.cpp | 2 +- .../Serializations/SerializationObject.cpp | 2 +- ...dynamic_subcolumns_enumerate_streams.reference | 15 +++++++++++++++ ...03227_dynamic_subcolumns_enumerate_streams.sql | 9 +++++++++ 6 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.reference create mode 100644 tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.sql diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 7fd8a85aeca..d14461a4ff1 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -150,6 +150,7 @@ std::unique_ptr IDataType::getSubcolumnData( ISerialization::EnumerateStreamsSettings settings; settings.position_independent_encoding = false; + settings.enumerate_dynamic_streams = false; data.serialization->enumerateStreams(settings, callback_with_data, data); if (!res && data.type->hasDynamicSubcolumnsData()) diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 480d5a4f7c4..d19cb0dd365 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -241,6 +241,7 @@ public: { SubstreamPath path; bool position_independent_encoding = true; + bool enumerate_dynamic_streams = true; }; virtual void enumerateStreams( diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp index 6bba87c40fa..10a80aa8a0d 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.cpp +++ b/src/DataTypes/Serializations/SerializationDynamic.cpp @@ -64,7 +64,7 @@ void SerializationDynamic::enumerateStreams( const auto * deserialize_state = data.deserialize_state ? checkAndGetState(data.deserialize_state) : nullptr; /// If column is nullptr and we don't have deserialize state yet, nothing to enumerate as we don't have any variants. - if (!column_dynamic && !deserialize_state) + if (!settings.enumerate_dynamic_streams || (!column_dynamic && !deserialize_state)) return; const auto & variant_type = column_dynamic ? column_dynamic->getVariantInfo().variant_type : checkAndGetState(deserialize_state->structure_state)->variant_type; diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index 2dd25e540cc..45121b942e6 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -130,7 +130,7 @@ void SerializationObject::enumerateStreams(EnumerateStreamsSettings & settings, } /// If column or deserialization state was provided, iterate over dynamic paths, - if (column_object || structure_state) + if (settings.enumerate_dynamic_streams && (column_object || structure_state)) { /// Enumerate dynamic paths in sorted order for consistency. const auto * dynamic_paths = column_object ? &column_object->getDynamicPaths() : nullptr; diff --git a/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.reference b/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.reference new file mode 100644 index 00000000000..b9d711bf9f0 --- /dev/null +++ b/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.reference @@ -0,0 +1,15 @@ +\N +\N +\N +\N +\N +str_0 +str_1 +str_2 +str_3 +str_4 +\N +\N +\N +\N +\N diff --git a/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.sql b/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.sql new file mode 100644 index 00000000000..e451521faca --- /dev/null +++ b/tests/queries/0_stateless/03227_dynamic_subcolumns_enumerate_streams.sql @@ -0,0 +1,9 @@ +set allow_experimental_json_type=1; +drop table if exists test; +create table test (json JSON) engine=Memory; +insert into test select toJSONString(map('a', 'str_' || number)) from numbers(5); +select json.a.String from test; +select json.a.:String from test; +select json.a.UInt64 from test; +drop table test; + From 246b050e300da89c408b32eba70b5e1326e28764 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Tue, 20 Aug 2024 00:38:50 +0000 Subject: [PATCH 078/127] Fix filename --- ...er_pushdown_bug.sh.sh => 02841_parquet_filter_pushdown_bug.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{02841_parquet_filter_pushdown_bug.sh.sh => 02841_parquet_filter_pushdown_bug.sh} (100%) diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh b/tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh similarity index 100% rename from tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh.sh rename to tests/queries/0_stateless/02841_parquet_filter_pushdown_bug.sh From 9b55180184c0f38cf88bf5902687603cf903ee40 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 9 Aug 2024 14:22:33 +0000 Subject: [PATCH 079/127] CI: Move out scripts from dockers --- docker/images.json | 7 +---- docker/test/fasttest/Dockerfile | 3 -- docker/test/stateful/Dockerfile | 4 --- docker/test/stateful/setup_minio.sh | 1 - docker/test/stateless/Dockerfile | 12 -------- docker/test/stress/Dockerfile | 3 -- docker/test/upgrade/Dockerfile | 29 ------------------- docker/test/util/Dockerfile | 2 -- tests/ci/.mypy.ini | 3 +- tests/ci/ci_config.py | 5 +++- tests/ci/ci_definitions.py | 7 +++-- tests/ci/docker_images_check.py | 2 +- tests/ci/fast_test_check.py | 7 ++--- tests/ci/functional_test_check.py | 20 ++++++------- tests/ci/stress_check.py | 17 +++++++---- tests/ci/upgrade_check.py | 2 +- .../docker_scripts}/attach_gdb.lib | 2 +- .../docker_scripts}/create.sql | 0 .../docker_scripts/fasttest_runner.sh | 2 +- .../process_functional_tests_result.py | 24 +++++++-------- .../docker_scripts}/setup_hdfs_minicluster.sh | 2 +- .../docker_scripts}/setup_minio.sh | 2 +- .../docker_scripts/stateful_runner.sh | 13 +++++---- .../docker_scripts/stateless_runner.sh | 22 +++++++++----- .../docker_scripts/stress_runner.sh | 21 +++++++------- .../docker_scripts}/stress_tests.lib | 2 +- .../docker_scripts/upgrade_runner.sh | 14 ++++----- .../docker_scripts}/utils.lib | 0 28 files changed, 94 insertions(+), 134 deletions(-) delete mode 120000 docker/test/stateful/setup_minio.sh delete mode 100644 docker/test/upgrade/Dockerfile rename {docker/test/stateless => tests/docker_scripts}/attach_gdb.lib (98%) rename {docker/test/stateful => tests/docker_scripts}/create.sql (100%) rename docker/test/fasttest/run.sh => tests/docker_scripts/fasttest_runner.sh (99%) rename {docker/test/util => tests/docker_scripts}/process_functional_tests_result.py (92%) rename {docker/test/stateless => tests/docker_scripts}/setup_hdfs_minicluster.sh (95%) rename {docker/test/stateless => tests/docker_scripts}/setup_minio.sh (98%) rename docker/test/stateful/run.sh => tests/docker_scripts/stateful_runner.sh (98%) rename docker/test/stateless/run.sh => tests/docker_scripts/stateless_runner.sh (97%) rename docker/test/stress/run.sh => tests/docker_scripts/stress_runner.sh (97%) mode change 100644 => 100755 rename {docker/test/stateless => tests/docker_scripts}/stress_tests.lib (99%) rename docker/test/upgrade/run.sh => tests/docker_scripts/upgrade_runner.sh (96%) mode change 100644 => 100755 rename {docker/test/stateless => tests/docker_scripts}/utils.lib (100%) diff --git a/docker/images.json b/docker/images.json index 716b76ee217..055394b69e6 100644 --- a/docker/images.json +++ b/docker/images.json @@ -47,8 +47,7 @@ "docker/test/stateful": { "name": "clickhouse/stateful-test", "dependent": [ - "docker/test/stress", - "docker/test/upgrade" + "docker/test/stress" ] }, "docker/test/unit": { @@ -59,10 +58,6 @@ "name": "clickhouse/stress-test", "dependent": [] }, - "docker/test/upgrade": { - "name": "clickhouse/upgrade-check", - "dependent": [] - }, "docker/test/integration/runner": { "name": "clickhouse/integration-tests-runner", "dependent": [] diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 5d311c673a4..ca93b24f66e 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -93,6 +93,3 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV COMMIT_SHA='' ENV PULL_REQUEST_NUMBER='' ENV COPY_CLICKHOUSE_BINARY_TO_OUTPUT=0 - -COPY run.sh / -CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 0daf88cad7e..9aa936cb069 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -10,7 +10,3 @@ RUN apt-get update -y \ npm \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* - -COPY create.sql / -COPY run.sh / -CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stateful/setup_minio.sh b/docker/test/stateful/setup_minio.sh deleted file mode 120000 index 0d539f72cb3..00000000000 --- a/docker/test/stateful/setup_minio.sh +++ /dev/null @@ -1 +0,0 @@ -../stateless/setup_minio.sh \ No newline at end of file diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index b0c4914a4e8..69f81b35a95 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -85,18 +85,6 @@ RUN curl -L --no-verbose -O 'https://archive.apache.org/dist/hadoop/common/hadoo ENV MINIO_ROOT_USER="clickhouse" ENV MINIO_ROOT_PASSWORD="clickhouse" ENV EXPORT_S3_STORAGE_POLICIES=1 -ENV CLICKHOUSE_GRPC_CLIENT="/usr/share/clickhouse-utils/grpc-client/clickhouse-grpc-client.py" RUN npm install -g azurite@3.30.0 \ && npm install -g tslib && npm install -g node - -COPY run.sh / -COPY setup_minio.sh / -COPY setup_hdfs_minicluster.sh / -COPY attach_gdb.lib / -COPY utils.lib / - -# We store stress_tests.lib in stateless image to avoid duplication of this file in stress and upgrade tests -COPY stress_tests.lib / - -CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 0f81a1cd07f..ecb98a4e3ed 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -22,8 +22,5 @@ RUN apt-get update -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -COPY run.sh / - ENV EXPORT_S3_STORAGE_POLICIES=1 -CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/upgrade/Dockerfile b/docker/test/upgrade/Dockerfile deleted file mode 100644 index 78d912fd031..00000000000 --- a/docker/test/upgrade/Dockerfile +++ /dev/null @@ -1,29 +0,0 @@ -# rebuild in #33610 -# docker build -t clickhouse/upgrade-check . -ARG FROM_TAG=latest -FROM clickhouse/stateful-test:$FROM_TAG - -RUN apt-get update -y \ - && env DEBIAN_FRONTEND=noninteractive \ - apt-get install --yes --no-install-recommends \ - bash \ - tzdata \ - parallel \ - expect \ - python3 \ - python3-lxml \ - python3-termcolor \ - python3-requests \ - curl \ - sudo \ - openssl \ - netcat-openbsd \ - brotli \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* - -COPY run.sh / - -ENV EXPORT_S3_STORAGE_POLICIES=1 - -CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 8b949ed95db..6b9fb94a4c6 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -56,7 +56,5 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* -COPY process_functional_tests_result.py / - COPY --from=clickhouse/cctools:0d6b90a7a490 /opt/gdb /opt/gdb ENV PATH="/opt/gdb/bin:${PATH}" diff --git a/tests/ci/.mypy.ini b/tests/ci/.mypy.ini index f12d27979ce..ecb4aef87dd 100644 --- a/tests/ci/.mypy.ini +++ b/tests/ci/.mypy.ini @@ -15,4 +15,5 @@ warn_return_any = True no_implicit_reexport = True strict_equality = True extra_checks = True -ignore_missing_imports = True \ No newline at end of file +ignore_missing_imports = True +logging-fstring-interpolation = False \ No newline at end of file diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 5453bffd9c6..58de25f039f 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -535,7 +535,10 @@ class CI: JobNames.FAST_TEST: JobConfig( pr_only=True, digest=DigestConfig( - include_paths=["./tests/queries/0_stateless/"], + include_paths=[ + "./tests/queries/0_stateless/", + "./tests/docker_scripts/", + ], exclude_files=[".md"], docker=["clickhouse/fasttest"], ), diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 13c222b10b9..1cdb3f1487e 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -415,6 +415,7 @@ class CommonJobConfigs: "./tests/clickhouse-test", "./tests/config", "./tests/*.txt", + "./tests/docker_scripts/", ], exclude_files=[".md"], docker=["clickhouse/stateless-test"], @@ -431,6 +432,7 @@ class CommonJobConfigs: "./tests/clickhouse-test", "./tests/config", "./tests/*.txt", + "./tests/docker_scripts/", ], exclude_files=[".md"], docker=["clickhouse/stateful-test"], @@ -448,6 +450,7 @@ class CommonJobConfigs: "./tests/clickhouse-test", "./tests/config", "./tests/*.txt", + "./tests/docker_scripts/", ], exclude_files=[".md"], docker=["clickhouse/stress-test"], @@ -459,9 +462,9 @@ class CommonJobConfigs: UPGRADE_TEST = JobConfig( job_name_keyword="upgrade", digest=DigestConfig( - include_paths=["./tests/ci/upgrade_check.py"], + include_paths=["./tests/ci/upgrade_check.py", "./tests/docker_scripts/"], exclude_files=[".md"], - docker=["clickhouse/upgrade-check"], + docker=["clickhouse/stress-test"], ), run_command="upgrade_check.py", runner_type=Runners.STRESS_TESTER, diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 786a529e0a9..c8dbcd10245 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -93,7 +93,7 @@ def process_single_image( results = [] # type: TestResults for ver in versions: stopwatch = Stopwatch() - for i in range(5): + for i in range(2): success, build_log = build_and_push_one_image( image, ver, additional_cache, push, from_tag ) diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index ed727dd3659..55eefcf9714 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -31,15 +31,14 @@ def get_fasttest_cmd( "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls "--network=host " # required to get access to IAM credentials f"-e FASTTEST_WORKSPACE=/fasttest-workspace -e FASTTEST_OUTPUT=/test_output " - f"-e FASTTEST_SOURCE=/ClickHouse " + f"-e FASTTEST_SOURCE=/repo " f"-e FASTTEST_CMAKE_FLAGS='-DCOMPILER_CACHE=sccache' " f"-e PULL_REQUEST_NUMBER={pr_number} -e COMMIT_SHA={commit_sha} " f"-e COPY_CLICKHOUSE_BINARY_TO_OUTPUT=1 " f"-e SCCACHE_BUCKET={S3_BUILDS_BUCKET} -e SCCACHE_S3_KEY_PREFIX=ccache/sccache " "-e stage=clone_submodules " - f"--volume={workspace}:/fasttest-workspace --volume={repo_path}:/ClickHouse " - f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt " - f"--volume={output_path}:/test_output {image}" + f"--volume={workspace}:/fasttest-workspace --volume={repo_path}:/repo " + f"--volume={output_path}:/test_output {image} /repo/tests/docker_scripts/fasttest_runner.sh" ) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index d08f98fa05f..ce2ead59d1a 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -119,24 +119,24 @@ def get_run_command( envs += [f"-e {e}" for e in additional_envs] env_str = " ".join(envs) - volume_with_broken_test = ( - f"--volume={repo_path}/tests/analyzer_tech_debt.txt:/analyzer_tech_debt.txt " - if "analyzer" not in check_name - else "" - ) + + if "stateful" in check_name.lower(): + run_script = "/repo/tests/docker_scripts/stateful_runner.sh" + elif "stateless" in check_name.lower(): + run_script = "/repo/tests/docker_scripts/stateless_runner.sh" + else: + assert False return ( f"docker run --rm --name func-tester --volume={builds_path}:/package_folder " # For dmesg and sysctl "--privileged " - f"{ci_logs_args}" - f"--volume={repo_path}/tests:/usr/share/clickhouse-test " - f"--volume={repo_path}/utils/grpc-client:/usr/share/clickhouse-utils/grpc-client " - f"{volume_with_broken_test}" + f"{ci_logs_args} " + f"--volume={repo_path}:/repo " f"--volume={result_path}:/test_output " f"--volume={server_log_path}:/var/log/clickhouse-server " "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls - f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image}" + f"--cap-add=SYS_PTRACE {env_str} {additional_options_str} {image} {run_script}" ) diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 85da601e379..f9656e60448 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -57,10 +57,16 @@ def get_run_command( additional_envs: List[str], ci_logs_args: str, image: DockerImage, + upgrade_check: bool, ) -> str: envs = [f"-e {e}" for e in additional_envs] env_str = " ".join(envs) + if upgrade_check: + run_script = "/repo/tests/docker_scripts/upgrade_runner.sh" + else: + run_script = "/repo/tests/docker_scripts/stress_runner.sh" + cmd = ( "docker run --cap-add=SYS_PTRACE " # For dmesg and sysctl @@ -70,8 +76,8 @@ def get_run_command( f"{ci_logs_args}" f"--volume={build_path}:/package_folder " f"--volume={result_path}:/test_output " - f"--volume={repo_tests_path}:/usr/share/clickhouse-test " - f"--volume={server_log_path}:/var/log/clickhouse-server {env_str} {image} " + f"--volume={repo_tests_path}/..:/repo " + f"--volume={server_log_path}:/var/log/clickhouse-server {env_str} {image} {run_script}" ) return cmd @@ -128,7 +134,7 @@ def process_results( return state, description, test_results, additional_files -def run_stress_test(docker_image_name: str) -> None: +def run_stress_test(upgrade_check: bool = False) -> None: logging.basicConfig(level=logging.INFO) for handler in logging.root.handlers: # pylint: disable=protected-access @@ -148,7 +154,7 @@ def run_stress_test(docker_image_name: str) -> None: pr_info = PRInfo() - docker_image = pull_image(get_docker_image(docker_image_name)) + docker_image = pull_image(get_docker_image("clickhouse/stress-test")) packages_path = temp_path / "packages" packages_path.mkdir(parents=True, exist_ok=True) @@ -177,6 +183,7 @@ def run_stress_test(docker_image_name: str) -> None: additional_envs, ci_logs_args, docker_image, + upgrade_check, ) logging.info("Going to run stress test: %s", run_command) @@ -208,4 +215,4 @@ def run_stress_test(docker_image_name: str) -> None: if __name__ == "__main__": - run_stress_test("clickhouse/stress-test") + run_stress_test() diff --git a/tests/ci/upgrade_check.py b/tests/ci/upgrade_check.py index 83b6f9e299f..8662611dffe 100644 --- a/tests/ci/upgrade_check.py +++ b/tests/ci/upgrade_check.py @@ -1,4 +1,4 @@ import stress_check if __name__ == "__main__": - stress_check.run_stress_test("clickhouse/upgrade-check") + stress_check.run_stress_test(upgrade_check=True) diff --git a/docker/test/stateless/attach_gdb.lib b/tests/docker_scripts/attach_gdb.lib similarity index 98% rename from docker/test/stateless/attach_gdb.lib rename to tests/docker_scripts/attach_gdb.lib index 2f1375a2f0f..4170a19176c 100644 --- a/docker/test/stateless/attach_gdb.lib +++ b/tests/docker_scripts/attach_gdb.lib @@ -1,7 +1,7 @@ #!/bin/bash # shellcheck source=./utils.lib -source /utils.lib +source /repo/tests/docker_scripts/utils.lib function attach_gdb_to_clickhouse() { diff --git a/docker/test/stateful/create.sql b/tests/docker_scripts/create.sql similarity index 100% rename from docker/test/stateful/create.sql rename to tests/docker_scripts/create.sql diff --git a/docker/test/fasttest/run.sh b/tests/docker_scripts/fasttest_runner.sh similarity index 99% rename from docker/test/fasttest/run.sh rename to tests/docker_scripts/fasttest_runner.sh index 9920326b11c..1eaba2c7cdf 100755 --- a/docker/test/fasttest/run.sh +++ b/tests/docker_scripts/fasttest_runner.sh @@ -325,7 +325,7 @@ case "$stage" in ;& "run_tests") run_tests ||: - /process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \ + /repo/tests/docker_scripts/process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \ --out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \ --out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv" ;; diff --git a/docker/test/util/process_functional_tests_result.py b/tests/docker_scripts/process_functional_tests_result.py similarity index 92% rename from docker/test/util/process_functional_tests_result.py rename to tests/docker_scripts/process_functional_tests_result.py index ec9e14b1430..1dc3090484c 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/tests/docker_scripts/process_functional_tests_result.py @@ -32,7 +32,7 @@ def process_test_log(log_path, broken_tests): success_finish = False test_results = [] test_end = True - with open(log_path, "r") as test_file: + with open(log_path, "r", encoding="utf-8") as test_file: for line in test_file: original_line = line line = line.strip() @@ -150,7 +150,7 @@ def process_result(result_path, broken_tests): if result_path and os.path.exists(result_path): ( - total, + _total, skipped, unknown, failed, @@ -191,11 +191,11 @@ def process_result(result_path, broken_tests): else: description = "" - description += "fail: {}, passed: {}".format(failed, success) + description += f"fail: {failed}, passed: {success}" if skipped != 0: - description += ", skipped: {}".format(skipped) + description += f", skipped: {skipped}" if unknown != 0: - description += ", unknown: {}".format(unknown) + description += f", unknown: {unknown}" else: state = "failure" description = "Output log doesn't exist" @@ -205,10 +205,10 @@ def process_result(result_path, broken_tests): def write_results(results_file, status_file, results, status): - with open(results_file, "w") as f: + with open(results_file, "w", encoding="utf-8") as f: out = csv.writer(f, delimiter="\t") out.writerows(results) - with open(status_file, "w") as f: + with open(status_file, "w", encoding="utf-8") as f: out = csv.writer(f, delimiter="\t") out.writerow(status) @@ -221,15 +221,15 @@ if __name__ == "__main__": parser.add_argument("--in-results-dir", default="/test_output/") parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") - parser.add_argument("--broken-tests", default="/analyzer_tech_debt.txt") + parser.add_argument("--broken-tests", default="/repo/tests/analyzer_tech_debt.txt") args = parser.parse_args() - broken_tests = list() + broken_tests = [] if os.path.exists(args.broken_tests): - logging.info(f"File {args.broken_tests} with broken tests found") - with open(args.broken_tests) as f: + print(f"File {args.broken_tests} with broken tests found") + with open(args.broken_tests, encoding="utf-8") as f: broken_tests = f.read().splitlines() - logging.info(f"Broken tests in the list: {len(broken_tests)}") + print(f"Broken tests in the list: {len(broken_tests)}") state, description, test_results = process_result(args.in_results_dir, broken_tests) logging.info("Result parsed") diff --git a/docker/test/stateless/setup_hdfs_minicluster.sh b/tests/docker_scripts/setup_hdfs_minicluster.sh similarity index 95% rename from docker/test/stateless/setup_hdfs_minicluster.sh rename to tests/docker_scripts/setup_hdfs_minicluster.sh index 15a54f59096..622270ba5d5 100755 --- a/docker/test/stateless/setup_hdfs_minicluster.sh +++ b/tests/docker_scripts/setup_hdfs_minicluster.sh @@ -5,7 +5,7 @@ set -e -x -a -u ls -lha -cd hadoop-3.3.1 +cd /hadoop-3.3.1 export JAVA_HOME=/usr mkdir -p target/test/data diff --git a/docker/test/stateless/setup_minio.sh b/tests/docker_scripts/setup_minio.sh similarity index 98% rename from docker/test/stateless/setup_minio.sh rename to tests/docker_scripts/setup_minio.sh index c1508df7e82..40e93e713a1 100755 --- a/docker/test/stateless/setup_minio.sh +++ b/tests/docker_scripts/setup_minio.sh @@ -143,7 +143,7 @@ main() { fi start_minio setup_minio "$1" - upload_data "${query_dir}" "${2:-/usr/share/clickhouse-test}" + upload_data "${query_dir}" "${2:-/repo/tests/}" setup_aws_credentials } diff --git a/docker/test/stateful/run.sh b/tests/docker_scripts/stateful_runner.sh similarity index 98% rename from docker/test/stateful/run.sh rename to tests/docker_scripts/stateful_runner.sh index a489a4b6e14..86f6a299ad3 100755 --- a/docker/test/stateful/run.sh +++ b/tests/docker_scripts/stateful_runner.sh @@ -14,17 +14,17 @@ dpkg -i package_folder/clickhouse-common-static-dbg_*.deb dpkg -i package_folder/clickhouse-server_*.deb dpkg -i package_folder/clickhouse-client_*.deb -ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test +ln -s /repo/tests/clickhouse-test /usr/bin/clickhouse-test # shellcheck disable=SC1091 -source /utils.lib +source /repo/tests/docker_scripts/utils.lib # install test configs -/usr/share/clickhouse-test/config/install.sh +/repo/tests/config/install.sh azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --silent --inMemoryPersistence & -./setup_minio.sh stateful +/repo/tests/docker_scripts/setup_minio.sh stateful ./mc admin trace clickminio > /test_output/minio.log & MC_ADMIN_PID=$! @@ -105,7 +105,7 @@ setup_logs_replication clickhouse-client --query "SHOW DATABASES" clickhouse-client --query "CREATE DATABASE datasets" -clickhouse-client --multiquery < create.sql +clickhouse-client --multiquery < /repo/tests/docker_scripts/create.sql clickhouse-client --query "SHOW TABLES FROM datasets" if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then @@ -237,6 +237,7 @@ function run_tests() --hung-check --print-time --capture-client-stacktrace + --queries "/repo/tests/queries" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" ) @@ -259,7 +260,7 @@ ls -la ./ echo "Files in root directory" ls -la / -/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +/repo/tests/docker_scripts/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv sudo clickhouse stop ||: if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then diff --git a/docker/test/stateless/run.sh b/tests/docker_scripts/stateless_runner.sh similarity index 97% rename from docker/test/stateless/run.sh rename to tests/docker_scripts/stateless_runner.sh index 6973eebbac3..671b1f5ca71 100755 --- a/docker/test/stateless/run.sh +++ b/tests/docker_scripts/stateless_runner.sh @@ -1,10 +1,13 @@ #!/bin/bash +# fail on errors, verbose and export all env variables +set -e -x -a + # shellcheck disable=SC1091 source /setup_export_logs.sh # shellcheck source=../stateless/stress_tests.lib -source /stress_tests.lib +source /repo/tests/docker_scripts/stress_tests.lib # Avoid overlaps with previous runs dmesg --clear @@ -39,20 +42,22 @@ if [[ -z "$BUGFIX_VALIDATE_CHECK" ]]; then chc --version || exit 1 fi -ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test +ln -sf /repo/tests/clickhouse-test /usr/bin/clickhouse-test + +export CLICKHOUSE_GRPC_CLIENT="/repo/utils/grpc-client/clickhouse-grpc-client.py" # shellcheck disable=SC1091 -source /attach_gdb.lib +source /repo/tests/docker_scripts/attach_gdb.lib # shellcheck disable=SC1091 -source /utils.lib +source /repo/tests/docker_scripts/utils.lib # install test configs -/usr/share/clickhouse-test/config/install.sh +/repo/tests/config/install.sh -./setup_minio.sh stateless +/repo/tests/docker_scripts/setup_minio.sh stateless -./setup_hdfs_minicluster.sh +/repo/tests/docker_scripts/setup_hdfs_minicluster.sh config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml @@ -316,6 +321,7 @@ function run_tests() --print-time --no-drop-if-fail --capture-client-stacktrace + --queries "/repo/tests/queries" --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" ) @@ -341,7 +347,7 @@ ls -la ./ echo "Files in root directory" ls -la / -/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +/repo/tests/docker_scripts/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv clickhouse-client -q "system flush logs" ||: diff --git a/docker/test/stress/run.sh b/tests/docker_scripts/stress_runner.sh old mode 100644 new mode 100755 similarity index 97% rename from docker/test/stress/run.sh rename to tests/docker_scripts/stress_runner.sh index b21114e456f..7666398e10b --- a/docker/test/stress/run.sh +++ b/tests/docker_scripts/stress_runner.sh @@ -3,26 +3,25 @@ # shellcheck disable=SC2086 # shellcheck disable=SC2024 +set -x + # Avoid overlaps with previous runs dmesg --clear # shellcheck disable=SC1091 source /setup_export_logs.sh -set -x - -# we mount tests folder from repo to /usr/share -ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress -ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test +ln -s /repo/tests/clickhouse-test/ci/stress.py /usr/bin/stress +ln -s /repo/tests/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib # shellcheck source=../stateless/attach_gdb.lib -source /attach_gdb.lib +source /repo/tests/docker_scripts/attach_gdb.lib # shellcheck source=../stateless/stress_tests.lib -source /stress_tests.lib +source /repo/tests/docker_scripts/stress_tests.lib # shellcheck disable=SC1091 -source /utils.lib +source /repo/tests/docker_scripts/utils.lib install_packages package_folder @@ -55,7 +54,7 @@ export ZOOKEEPER_FAULT_INJECTION=1 # available for dump via clickhouse-local configure -./setup_minio.sh stateless # to have a proper environment +/repo/tests/docker_scripts/setup_minio.sh stateless # to have a proper environment config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml @@ -64,7 +63,7 @@ start_server setup_logs_replication clickhouse-client --query "CREATE DATABASE datasets" -clickhouse-client --multiquery < create.sql +clickhouse-client --multiquery < /repo/tests/docker_scripts/create.sql clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test" @@ -267,7 +266,7 @@ fi start_server -stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ +python3 /repo/tests/ci/stress.py --hung-check --drop-databases --output-folder /test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \ || echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv diff --git a/docker/test/stateless/stress_tests.lib b/tests/docker_scripts/stress_tests.lib similarity index 99% rename from docker/test/stateless/stress_tests.lib rename to tests/docker_scripts/stress_tests.lib index 51aa299f7a6..e2b5d983fcb 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/tests/docker_scripts/stress_tests.lib @@ -42,7 +42,7 @@ function configure() # install test configs export USE_DATABASE_ORDINARY=1 export EXPORT_S3_STORAGE_POLICIES=1 - /usr/share/clickhouse-test/config/install.sh + /repo/tests/config/install.sh # avoid too slow startup sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ diff --git a/docker/test/upgrade/run.sh b/tests/docker_scripts/upgrade_runner.sh old mode 100644 new mode 100755 similarity index 96% rename from docker/test/upgrade/run.sh rename to tests/docker_scripts/upgrade_runner.sh index e9768873f6a..ece75ebf782 --- a/docker/test/upgrade/run.sh +++ b/tests/docker_scripts/upgrade_runner.sh @@ -9,20 +9,20 @@ dmesg --clear set -x # we mount tests folder from repo to /usr/share -ln -s /usr/share/clickhouse-test/ci/stress.py /usr/bin/stress -ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test -ln -s /usr/share/clickhouse-test/ci/download_release_packages.py /usr/bin/download_release_packages -ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag +ln -s /repo/tests/ci/stress.py /usr/bin/stress +ln -s /repo/tests/clickhouse-test /usr/bin/clickhouse-test +ln -s /repo/tests/ci/download_release_packages.py /usr/bin/download_release_packages +ln -s /repo/tests/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib # shellcheck source=../stateless/attach_gdb.lib -source /attach_gdb.lib +source /repo/tests/docker_scripts/attach_gdb.lib # shellcheck source=../stateless/stress_tests.lib -source /stress_tests.lib +source /repo/tests/docker_scripts/stress_tests.lib azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & -./setup_minio.sh stateless # to have a proper environment +/repo/tests/docker_scripts/setup_minio.sh stateless # to have a proper environment echo "Get previous release tag" # shellcheck disable=SC2016 diff --git a/docker/test/stateless/utils.lib b/tests/docker_scripts/utils.lib similarity index 100% rename from docker/test/stateless/utils.lib rename to tests/docker_scripts/utils.lib From 185038beb00617da3b825fc464efd694c179f9f8 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Fri, 16 Aug 2024 16:15:27 +0000 Subject: [PATCH 080/127] Add encrypted local named collections --- .../NamedCollectionsMetadataStorage.cpp | 111 +++++++++++++++++- .../NamedCollectionsMetadataStorage.h | 2 + 2 files changed, 108 insertions(+), 5 deletions(-) diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp index 36191b89e86..ae9b7776c94 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -14,6 +16,7 @@ #include #include #include +#include namespace fs = std::filesystem; @@ -74,9 +77,9 @@ public: }; -class NamedCollectionsMetadataStorage::LocalStorage : public INamedCollectionsStorage, private WithContext +class NamedCollectionsMetadataStorage::LocalStorage : public INamedCollectionsStorage, protected WithContext { -private: +protected: std::string root_path; public: @@ -168,7 +171,7 @@ public: return fs::remove(getPath(file_name)); } -private: +protected: std::string getPath(const std::string & file_name) const { const auto file_name_as_path = fs::path(file_name); @@ -178,6 +181,7 @@ private: return fs::path(root_path) / file_name_as_path; } +private: /// Delete .tmp files. They could be left undeleted in case of /// some exception or abrupt server restart. void cleanup() @@ -194,6 +198,97 @@ private: } }; +class NamedCollectionsMetadataStorage::LocalStorageEncrypted : public NamedCollectionsMetadataStorage::LocalStorage +{ +public: + LocalStorageEncrypted(ContextPtr context_, const std::string & path_) + : NamedCollectionsMetadataStorage::LocalStorage(context_, path_) + { + const auto & config = getContext()->getConfigRef(); + auto key_hex = config.getRawString("named_collections_storage.key_hex", ""); + try + { + key = boost::algorithm::unhex(key_hex); + key_fingerprint = FileEncryption::calculateKeyFingerprint(key); + } + catch (const std::exception &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read key_hex, check for valid characters [0-9a-fA-F] and length"); + } + + algorithm = FileEncryption::parseAlgorithmFromString(config.getString("named_collections_storage.algorithm", "aes_128_ctr")); + } + + std::string read(const std::string & file_name) const override + { + ReadBufferFromFile in(getPath(file_name)); + Memory<> encrypted_buffer(in.getFileSize()); + + FileEncryption::Header header; + try + { + header.read(in); + } + catch (Exception & e) + { + e.addMessage("While reading the header of encrypted file " + quoteString(file_name)); + throw; + } + + size_t bytes_read = 0; + while (bytes_read < encrypted_buffer.size() && !in.eof()) + { + bytes_read += in.read(encrypted_buffer.data() + bytes_read, encrypted_buffer.size() - bytes_read); + } + + std::string decrypted_buffer; + decrypted_buffer.resize(bytes_read); + FileEncryption::Encryptor encryptor(header.algorithm, key, header.init_vector); + encryptor.decrypt(encrypted_buffer.data(), bytes_read, decrypted_buffer.data()); + + LOG_DEBUG(getLogger("PMO"), "Read named collection {}: {}", file_name, decrypted_buffer); + return decrypted_buffer; + } + + void write(const std::string & file_name, const std::string & data, bool replace) override + { + if (!replace && fs::exists(file_name)) + { + throw Exception( + ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, + "Metadata file {} for named collection already exists", + file_name); + } + + fs::create_directories(root_path); + + auto tmp_path = getPath(file_name + ".tmp"); + + auto out = std::make_unique(tmp_path, data.size(), O_WRONLY | O_CREAT | O_EXCL); + FileEncryption::Header header{ + .algorithm = algorithm, + .key_fingerprint = key_fingerprint, + .init_vector = FileEncryption::InitVector::random() + }; + WriteBufferFromEncryptedFile out_encrypted(data.size(), std::move(out), key, header); + writeString(data, out_encrypted); + + out_encrypted.next(); + if (getContext()->getSettingsRef().fsync_metadata) + out_encrypted.sync(); + + LOG_DEBUG(getLogger("PMO"), "Wrote named collection {}: {} in plain text, encrypted {}", file_name, data, out_encrypted.buffer()); + + fs::rename(tmp_path, getPath(file_name)); + } + +private: + std::string key; + UInt128 key_fingerprint; + FileEncryption::Algorithm algorithm; +}; + + class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectionsStorage, private WithContext { @@ -495,7 +590,7 @@ std::unique_ptr NamedCollectionsMetadataStorage const auto & config = context_->getConfigRef(); const auto storage_type = config.getString(named_collections_storage_config_path + ".type", "local"); - if (storage_type == "local") + if (storage_type == "local" || storage_type == "local_encrypted") { const auto path = config.getString( named_collections_storage_config_path + ".path", @@ -504,7 +599,13 @@ std::unique_ptr NamedCollectionsMetadataStorage LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"), "Using local storage for named collections at path: {}", path); - auto local_storage = std::make_unique(context_, path); + std::unique_ptr local_storage; + + if (storage_type == "local") + local_storage = std::make_unique(context_, path); + else if (storage_type == "local_encrypted") + local_storage = std::make_unique(context_, path); + return std::unique_ptr( new NamedCollectionsMetadataStorage(std::move(local_storage), context_)); } diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h index c3468fbc468..db2b48fcd23 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h @@ -35,7 +35,9 @@ public: private: class INamedCollectionsStorage; class LocalStorage; + class LocalStorageEncrypted; class ZooKeeperStorage; + class ZooKeeperEncrypted; std::shared_ptr storage; From c269e0f71ce5198dfd7cce2d024164b452c5ea82 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Mon, 19 Aug 2024 10:13:49 +0000 Subject: [PATCH 081/127] Add encrypted named collections for ZooKeeper Consolidate the code so that Local and ZooKeeper storages use the same logic which uses memory buffers to encrypt and decrypt data. --- .../NamedCollectionsMetadataStorage.cpp | 225 ++++++++++-------- .../NamedCollectionsMetadataStorage.h | 2 +- 2 files changed, 123 insertions(+), 104 deletions(-) diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp index ae9b7776c94..b8269d2d55a 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp @@ -7,9 +7,10 @@ #include #include #include -#include -#include #include +#include +#include +#include #include #include #include @@ -129,6 +130,11 @@ public: ReadBufferFromFile in(getPath(file_name)); std::string data; readStringUntilEOF(data, in); + return readHook(data); + } + + virtual std::string readHook(const std::string & data) const + { return data; } @@ -145,8 +151,9 @@ public: fs::create_directories(root_path); auto tmp_path = getPath(file_name + ".tmp"); - WriteBufferFromFile out(tmp_path, data.size(), O_WRONLY | O_CREAT | O_EXCL); - writeString(data, out); + auto write_data = writeHook(data); + WriteBufferFromFile out(tmp_path, write_data.size(), O_WRONLY | O_CREAT | O_EXCL); + writeString(write_data, out); out.next(); if (getContext()->getSettingsRef().fsync_metadata) @@ -156,6 +163,11 @@ public: fs::rename(tmp_path, getPath(file_name)); } + virtual std::string writeHook(const std::string & data) const + { + return data; + } + void remove(const std::string & file_name) override { if (!removeIfExists(file_name)) @@ -198,99 +210,7 @@ private: } }; -class NamedCollectionsMetadataStorage::LocalStorageEncrypted : public NamedCollectionsMetadataStorage::LocalStorage -{ -public: - LocalStorageEncrypted(ContextPtr context_, const std::string & path_) - : NamedCollectionsMetadataStorage::LocalStorage(context_, path_) - { - const auto & config = getContext()->getConfigRef(); - auto key_hex = config.getRawString("named_collections_storage.key_hex", ""); - try - { - key = boost::algorithm::unhex(key_hex); - key_fingerprint = FileEncryption::calculateKeyFingerprint(key); - } - catch (const std::exception &) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read key_hex, check for valid characters [0-9a-fA-F] and length"); - } - - algorithm = FileEncryption::parseAlgorithmFromString(config.getString("named_collections_storage.algorithm", "aes_128_ctr")); - } - - std::string read(const std::string & file_name) const override - { - ReadBufferFromFile in(getPath(file_name)); - Memory<> encrypted_buffer(in.getFileSize()); - - FileEncryption::Header header; - try - { - header.read(in); - } - catch (Exception & e) - { - e.addMessage("While reading the header of encrypted file " + quoteString(file_name)); - throw; - } - - size_t bytes_read = 0; - while (bytes_read < encrypted_buffer.size() && !in.eof()) - { - bytes_read += in.read(encrypted_buffer.data() + bytes_read, encrypted_buffer.size() - bytes_read); - } - - std::string decrypted_buffer; - decrypted_buffer.resize(bytes_read); - FileEncryption::Encryptor encryptor(header.algorithm, key, header.init_vector); - encryptor.decrypt(encrypted_buffer.data(), bytes_read, decrypted_buffer.data()); - - LOG_DEBUG(getLogger("PMO"), "Read named collection {}: {}", file_name, decrypted_buffer); - return decrypted_buffer; - } - - void write(const std::string & file_name, const std::string & data, bool replace) override - { - if (!replace && fs::exists(file_name)) - { - throw Exception( - ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS, - "Metadata file {} for named collection already exists", - file_name); - } - - fs::create_directories(root_path); - - auto tmp_path = getPath(file_name + ".tmp"); - - auto out = std::make_unique(tmp_path, data.size(), O_WRONLY | O_CREAT | O_EXCL); - FileEncryption::Header header{ - .algorithm = algorithm, - .key_fingerprint = key_fingerprint, - .init_vector = FileEncryption::InitVector::random() - }; - WriteBufferFromEncryptedFile out_encrypted(data.size(), std::move(out), key, header); - writeString(data, out_encrypted); - - out_encrypted.next(); - if (getContext()->getSettingsRef().fsync_metadata) - out_encrypted.sync(); - - LOG_DEBUG(getLogger("PMO"), "Wrote named collection {}: {} in plain text, encrypted {}", file_name, data, out_encrypted.buffer()); - - fs::rename(tmp_path, getPath(file_name)); - } - -private: - std::string key; - UInt128 key_fingerprint; - FileEncryption::Algorithm algorithm; -}; - - - -class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectionsStorage, private WithContext +class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectionsStorage, protected WithContext { private: std::string root_path; @@ -370,18 +290,25 @@ public: std::string read(const std::string & file_name) const override { - return getClient()->get(getPath(file_name)); + auto data = getClient()->get(getPath(file_name)); + return readHook(data); + } + + virtual std::string readHook(const std::string & data) const + { + return data; } void write(const std::string & file_name, const std::string & data, bool replace) override { + auto write_data = writeHook(data); if (replace) { - getClient()->createOrUpdate(getPath(file_name), data, zkutil::CreateMode::Persistent); + getClient()->createOrUpdate(getPath(file_name), write_data, zkutil::CreateMode::Persistent); } else { - auto code = getClient()->tryCreate(getPath(file_name), data, zkutil::CreateMode::Persistent); + auto code = getClient()->tryCreate(getPath(file_name), write_data, zkutil::CreateMode::Persistent); if (code == Coordination::Error::ZNODEEXISTS) { @@ -393,6 +320,11 @@ public: } } + virtual std::string writeHook(const std::string & data) const + { + return data; + } + void remove(const std::string & file_name) override { getClient()->remove(getPath(file_name)); @@ -429,6 +361,89 @@ private: } }; +template +class NamedCollectionsMetadataStorageEncrypted : public BaseMetadataStorage +{ +public: + NamedCollectionsMetadataStorageEncrypted(ContextPtr context_, const std::string & path_) + : BaseMetadataStorage(context_, path_) + { + const auto & config = BaseMetadataStorage::getContext()->getConfigRef(); + auto key_hex = config.getRawString("named_collections_storage.key_hex", ""); + try + { + key = boost::algorithm::unhex(key_hex); + key_fingerprint = FileEncryption::calculateKeyFingerprint(key); + } + catch (const std::exception &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read key_hex, check for valid characters [0-9a-fA-F] and length"); + } + + algorithm = FileEncryption::parseAlgorithmFromString(config.getString("named_collections_storage.algorithm", "aes_128_ctr")); + } + + std::string readHook(const std::string & data) const override + { + ReadBufferFromString in(data); + Memory<> encrypted_buffer(data.length()); + + FileEncryption::Header header; + try + { + header.read(in); + } + catch (Exception & e) + { + e.addMessage("While reading the header of encrypted data"); + throw; + } + + size_t bytes_read = 0; + while (bytes_read < encrypted_buffer.size() && !in.eof()) + { + bytes_read += in.read(encrypted_buffer.data() + bytes_read, encrypted_buffer.size() - bytes_read); + } + + std::string decrypted_buffer; + decrypted_buffer.resize(bytes_read); + FileEncryption::Encryptor encryptor(header.algorithm, key, header.init_vector); + encryptor.decrypt(encrypted_buffer.data(), bytes_read, decrypted_buffer.data()); + + return decrypted_buffer; + } + + std::string writeHook(const std::string & data) const override + { + FileEncryption::Header header{ + .algorithm = algorithm, + .key_fingerprint = key_fingerprint, + .init_vector = FileEncryption::InitVector::random() + }; + + FileEncryption::Encryptor encryptor(header.algorithm, key, header.init_vector); + WriteBufferFromOwnString out; + header.write(out); + encryptor.encrypt(data.data(), data.size(), out); + return std::string(out.str()); + } + +private: + std::string key; + UInt128 key_fingerprint; + FileEncryption::Algorithm algorithm; +}; + +class NamedCollectionsMetadataStorage::LocalStorageEncrypted : public NamedCollectionsMetadataStorageEncrypted +{ + using NamedCollectionsMetadataStorageEncrypted::NamedCollectionsMetadataStorageEncrypted; +}; + +class NamedCollectionsMetadataStorage::ZooKeeperStorageEncrypted : public NamedCollectionsMetadataStorageEncrypted +{ + using NamedCollectionsMetadataStorageEncrypted::NamedCollectionsMetadataStorageEncrypted; +}; + NamedCollectionsMetadataStorage::NamedCollectionsMetadataStorage( std::shared_ptr storage_, ContextPtr context_) @@ -600,7 +615,6 @@ std::unique_ptr NamedCollectionsMetadataStorage "Using local storage for named collections at path: {}", path); std::unique_ptr local_storage; - if (storage_type == "local") local_storage = std::make_unique(context_, path); else if (storage_type == "local_encrypted") @@ -609,10 +623,15 @@ std::unique_ptr NamedCollectionsMetadataStorage return std::unique_ptr( new NamedCollectionsMetadataStorage(std::move(local_storage), context_)); } - if (storage_type == "zookeeper" || storage_type == "keeper") + if (storage_type == "zookeeper" || storage_type == "keeper" || storage_type == "zookeeper_encrypted" || storage_type == "keeper_encrypted") { const auto path = config.getString(named_collections_storage_config_path + ".path"); - auto zk_storage = std::make_unique(context_, path); + + std::unique_ptr zk_storage; + if (storage_type == "zookeeper" || storage_type == "keeper") + zk_storage = std::make_unique(context_, path); + else if (storage_type == "zookeeper_encrypted" || storage_type == "keeper_encrypted") + zk_storage = std::make_unique(context_, path); LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"), "Using zookeeper storage for named collections at path: {}", path); diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h index db2b48fcd23..52805e8359d 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.h @@ -37,7 +37,7 @@ private: class LocalStorage; class LocalStorageEncrypted; class ZooKeeperStorage; - class ZooKeeperEncrypted; + class ZooKeeperStorageEncrypted; std::shared_ptr storage; From 08cb7ff8d866364013286043a8810329089b5832 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Tue, 20 Aug 2024 19:02:37 +0800 Subject: [PATCH 082/127] Add integration tests and delete stateless tests --- .../__init__.py | 0 .../configs/config.d/cluster.xml | 12 ++++ .../configs/config.xml | 9 +++ .../test_incorrect_datetime_format/test.py | 57 +++++++++++++++++++ ...fix_datetime_implicit_conversion.reference | 1 - ...03215_fix_datetime_implicit_conversion.sql | 15 ----- 6 files changed, 78 insertions(+), 16 deletions(-) create mode 100644 tests/integration/test_incorrect_datetime_format/__init__.py create mode 100644 tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml create mode 100644 tests/integration/test_incorrect_datetime_format/configs/config.xml create mode 100644 tests/integration/test_incorrect_datetime_format/test.py delete mode 100644 tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference delete mode 100644 tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql diff --git a/tests/integration/test_incorrect_datetime_format/__init__.py b/tests/integration/test_incorrect_datetime_format/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml b/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml new file mode 100644 index 00000000000..9c7f02c190f --- /dev/null +++ b/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml @@ -0,0 +1,12 @@ + + + + + + node1 + 9000 + + + + + diff --git a/tests/integration/test_incorrect_datetime_format/configs/config.xml b/tests/integration/test_incorrect_datetime_format/configs/config.xml new file mode 100644 index 00000000000..053b5d30418 --- /dev/null +++ b/tests/integration/test_incorrect_datetime_format/configs/config.xml @@ -0,0 +1,9 @@ + + + information + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + diff --git a/tests/integration/test_incorrect_datetime_format/test.py b/tests/integration/test_incorrect_datetime_format/test.py new file mode 100644 index 00000000000..eb6501fbec1 --- /dev/null +++ b/tests/integration/test_incorrect_datetime_format/test.py @@ -0,0 +1,57 @@ +import logging +import pytest +from helpers.cluster import ClickHouseCluster + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node", + main_configs=[ + "configs/config.d/cluster.xml", + ] + ) + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def test_incorrect_datetime_format(cluster): + """ + Test for an MSan issue which is caused by parsing incorrect datetime string + """ + + node = cluster.instances["node"] + + table_name = "test_delete_race_leftovers" + additional_settings = { + # use another disk not to interfere with other tests + "storage_policy": "one_disk", + # always remove parts in parallel + "concurrent_part_removal_threshold": 1, + } + + node.query(""" + CREATE TABLE tab + ( + a DateTime, + pk String + ) Engine = MergeTree() ORDER BY pk; + """ + ) + + res = node.query("SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:09'").strip() + assert res == "0" + + error = node.query_and_get_error("SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:0'").strip() + print(error) + assert "Cannot parse time component of DateTime 09:58:0" in error + + error = node.query_and_get_error("SELECT count(*) FROM tab WHERE a = '2024-08-0 09:58:09'").strip() + print(error) + assert "Cannot convert string '2024-08-0 09:58:09' to type DateTime" in error diff --git a/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference deleted file mode 100644 index 573541ac970..00000000000 --- a/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.reference +++ /dev/null @@ -1 +0,0 @@ -0 diff --git a/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql b/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql deleted file mode 100644 index 70a8a3432a6..00000000000 --- a/tests/queries/0_stateless/03215_fix_datetime_implicit_conversion.sql +++ /dev/null @@ -1,15 +0,0 @@ -DROP TABLE IF EXISTS tab SYNC; - -CREATE TABLE tab -( - a DateTime, - pk String -) Engine = MergeTree() ORDER BY pk; - -INSERT INTO tab select cast(number, 'DateTime'), generateUUIDv4() FROM system.numbers LIMIT 1; - -SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:09'; -SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:0'; -- { serverError CANNOT_PARSE_DATETIME } -SELECT count(*) FROM tab WHERE a = '2024-08-0 09:58:09'; -- { serverError TYPE_MISMATCH } - -DROP TABLE IF EXISTS tab SYNC; From 0563e19cf336c629312bbc23f39c6dd8676238b5 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Tue, 20 Aug 2024 19:09:27 +0800 Subject: [PATCH 083/127] Little fix --- .../configs/config.d/cluster.xml | 3 +-- tests/integration/test_incorrect_datetime_format/test.py | 8 -------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml b/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml index 9c7f02c190f..a27968fb3d2 100644 --- a/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml +++ b/tests/integration/test_incorrect_datetime_format/configs/config.d/cluster.xml @@ -3,8 +3,7 @@ - node1 - 9000 + node diff --git a/tests/integration/test_incorrect_datetime_format/test.py b/tests/integration/test_incorrect_datetime_format/test.py index eb6501fbec1..c1803aa95a1 100644 --- a/tests/integration/test_incorrect_datetime_format/test.py +++ b/tests/integration/test_incorrect_datetime_format/test.py @@ -28,14 +28,6 @@ def test_incorrect_datetime_format(cluster): node = cluster.instances["node"] - table_name = "test_delete_race_leftovers" - additional_settings = { - # use another disk not to interfere with other tests - "storage_policy": "one_disk", - # always remove parts in parallel - "concurrent_part_removal_threshold": 1, - } - node.query(""" CREATE TABLE tab ( From 6a405b4aab1ce3f23555e2217856faecb414afca Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 20 Aug 2024 13:14:21 +0200 Subject: [PATCH 084/127] Update 01825_new_type_json_ghdata_insert_select.sh --- .../0_stateless/01825_new_type_json_ghdata_insert_select.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh index 3f5fc91f8fc..b450e9827c2 100755 --- a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-s3-storage, long -# ^ no-s3-storage: it is memory-hungry +# Tags: no-fasttest, no-s3-storage, long, no-asan +# ^ no-s3-storage: it is memory-hungry, no-asan: too long CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 653c978dfa30bf1807912eccf8ba9a8d1f7a4da5 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 19 Aug 2024 12:08:23 +0200 Subject: [PATCH 085/127] autorelease to check builds and functional tests --- .github/actions/clean/action.yml | 20 ++++++++++--- .github/workflows/auto_releases.yml | 34 ++++++++++++++-------- tests/ci/artifactory.py | 2 ++ tests/ci/auto_release.py | 44 +++++++++++++++++++++-------- tests/ci/ci_utils.py | 2 +- 5 files changed, 74 insertions(+), 28 deletions(-) diff --git a/.github/actions/clean/action.yml b/.github/actions/clean/action.yml index 547738b17cc..8c22523cacf 100644 --- a/.github/actions/clean/action.yml +++ b/.github/actions/clean/action.yml @@ -1,11 +1,23 @@ name: Clean runner description: Clean the runner's temp path on ending +inputs: + images: + description: clean docker images + default: false + type: boolean runs: using: "composite" steps: - - name: Clean + - name: Clean Temp shell: bash run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "${{runner.temp}}" + sudo rm -fr "${{runner.temp}}" + - name: Clean Docker Containers + shell: bash + run: | + docker rm -vf $(docker ps -aq) ||: + - name: Clean Docker Images + if: ${{ inputs.images }} + shell: bash + run: | + docker rmi -f $(docker images -aq) ||: diff --git a/.github/workflows/auto_releases.yml b/.github/workflows/auto_releases.yml index 28483ea136f..2fdf4e30a70 100644 --- a/.github/workflows/auto_releases.yml +++ b/.github/workflows/auto_releases.yml @@ -14,7 +14,7 @@ on: dry-run: description: 'Dry run' required: false - default: true + default: false type: boolean jobs: @@ -51,7 +51,11 @@ jobs: cat /tmp/autorelease_params.json echo 'EOF' } >> "$GITHUB_OUTPUT" - echo "DRY_RUN=true" >> "$GITHUB_OUTPUT" + if [[ "${{ github.event_name }}" == "schedule" ]]; then + echo "DRY_RUN=true" >> "$GITHUB_OUTPUT" + else + echo "DRY_RUN=${{ github.event.inputs.dry-run }}" >> "$GITHUB_OUTPUT" + fi - name: Post Release Branch statuses run: | cd "$GITHUB_WORKSPACE/tests/ci" @@ -74,14 +78,22 @@ jobs: secrets: ROBOT_CLICKHOUSE_COMMIT_TOKEN: ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} - PostSlackMessage: - needs: [AutoReleaseInfo] + CleanUp: + needs: [Releases] runs-on: [self-hosted, release-maker] - if: ${{ !cancelled() }} steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - - name: Post - run: | - cd "$GITHUB_WORKSPACE/tests/ci" - python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }} + - uses: ./.github/actions/clean + with: + images: true + +# PostSlackMessage: +# needs: [Releases] +# runs-on: [self-hosted, release-maker] +# if: ${{ !cancelled() }} +# steps: +# - name: Check out repository code +# uses: ClickHouse/checkout@v1 +# - name: Post +# run: | +# cd "$GITHUB_WORKSPACE/tests/ci" +# python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }} diff --git a/tests/ci/artifactory.py b/tests/ci/artifactory.py index f3d7d24f717..9457fa32ad3 100644 --- a/tests/ci/artifactory.py +++ b/tests/ci/artifactory.py @@ -143,6 +143,8 @@ class DebianArtifactory: print(f" {cmd}") Shell.check(cmd, strict=True) Shell.check("sync") + time.sleep(10) + Shell.check(f"lsof +D R2MountPoint.MOUNT_POINT", verbose=True) def test_packages(self): Shell.check("docker pull ubuntu:latest", strict=True) diff --git a/tests/ci/auto_release.py b/tests/ci/auto_release.py index 58cfc833afe..89714b2fb4b 100644 --- a/tests/ci/auto_release.py +++ b/tests/ci/auto_release.py @@ -1,5 +1,4 @@ import argparse -import copy import dataclasses import json import os @@ -77,8 +76,10 @@ class AutoReleaseInfo: print(json.dumps(dataclasses.asdict(self), indent=2), file=f) # dump file for GH action matrix that is similar to the file above but with dropped not ready release branches - params = copy.deepcopy(self) - params.releases = [release for release in params.releases if release.ready] + params = dataclasses.asdict(self) + params["releases"] = [ + release for release in params["releases"] if release["ready"] + ] with open(AUTORELEASE_MATRIX_PARAMS, "w", encoding="utf-8") as f: print(json.dumps(params, indent=2), file=f) @@ -110,7 +111,6 @@ def _prepare(token): refs = list(repo.get_git_matching_refs(f"tags/v{pr.head.ref}")) assert refs - refs.sort(key=lambda ref: ref.ref) latest_release_tag_ref = refs[-1] latest_release_tag = repo.get_git_tag(latest_release_tag_ref.object.sha) @@ -118,6 +118,10 @@ def _prepare(token): f"git rev-list --first-parent {latest_release_tag.tag}..origin/{pr.head.ref}", ).split("\n") commit_num = len(commits) + if latest_release_tag.tag.endswith("new"): + print("It's a new release branch - skip auto release for it") + continue + print( f"Previous release [{latest_release_tag.tag}] was [{commit_num}] commits ago, date [{latest_release_tag.tagger.date}]" ) @@ -141,17 +145,33 @@ def _prepare(token): commits_to_branch_head += 1 continue - commit_ci_status = CI.GH.get_commit_status_by_name( - token=token, - commit_sha=commit, - # handle old name for old releases - status_name=(CI.JobNames.BUILD_CHECK, "ClickHouse build check"), - ) + # TODO: switch to check if CI is entirely green + statuses = [ + CI.GH.get_commit_status_by_name( + token=token, + commit_sha=commit, + # handle old name for old releases + status_name=(CI.JobNames.BUILD_CHECK, "ClickHouse build check"), + ), + CI.GH.get_commit_status_by_name( + token=token, + commit_sha=commit, + # handle old name for old releases + status_name=CI.JobNames.STATELESS_TEST_RELEASE, + ), + CI.GH.get_commit_status_by_name( + token=token, + commit_sha=commit, + # handle old name for old releases + status_name=CI.JobNames.STATEFUL_TEST_RELEASE, + ), + ] commit_sha = commit - if commit_ci_status == SUCCESS: + if any(status == SUCCESS for status in statuses): + commit_ci_status = SUCCESS break - print(f"CI status [{commit_ci_status}] - skip") + print(f"CI status [{statuses}] - skip") commits_to_branch_head += 1 ready = False diff --git a/tests/ci/ci_utils.py b/tests/ci/ci_utils.py index 97ab10f1b58..a4c0977f47c 100644 --- a/tests/ci/ci_utils.py +++ b/tests/ci/ci_utils.py @@ -118,7 +118,7 @@ class GH: statuses = response.json() for status in statuses: if status["context"] in status_name: - return status["state"] + return status["state"] # type: ignore # Check if there is a next page url = response.links.get("next", {}).get("url") From 99e8a0babc04c9bae0565977090bdbd01272de0e Mon Sep 17 00:00:00 2001 From: maxvostrikov Date: Tue, 20 Aug 2024 14:52:54 +0200 Subject: [PATCH 086/127] materialized_view_deduplication performance comparison test performance comparison test to check deduplication in MATERIALIZED VIEW's. Logic is similar to, but with a bigger insert tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql --- .../materialized_view_deduplication.xml | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 tests/performance/materialized_view_deduplication.xml diff --git a/tests/performance/materialized_view_deduplication.xml b/tests/performance/materialized_view_deduplication.xml new file mode 100644 index 00000000000..621effd23dd --- /dev/null +++ b/tests/performance/materialized_view_deduplication.xml @@ -0,0 +1,34 @@ + + + 1 + + + + CREATE TABLE dst (`key` Int64, `value` String) + ENGINE = MergeTree ORDER BY tuple() + SETTINGS non_replicated_deduplication_window=1000; + + + CREATE TABLE mv_dst (`key` Int64, `value` String) + ENGINE = MergeTree ORDER BY tuple() + SETTINGS non_replicated_deduplication_window=1000; + + + CREATE MATERIALIZED VIEW mv_first TO mv_dst + AS SELECT 0 AS key, value AS value FROM dst; + + + CREATE MATERIALIZED VIEW mv_second TO mv_dst + AS SELECT 0 AS key, value AS value FROM dst; + + INSERT INTO dst SELECT number as key, toString(number) from numbers(1000); + + + INSERT INTO dst SELECT number as key, toString(number) from numbers(1000); + + + DROP TABLE IF EXISTS dst + DROP TABLE IF EXISTS mv_dst + DROP TABLE IF EXISTS mv_first + DROP TABLE IF EXISTS mv_second + From ee9080fba714fbe9ac482b4268ed164579ebda89 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 20 Aug 2024 15:11:06 +0200 Subject: [PATCH 087/127] Revert "Fix unexpected behavior with `FORMAT` and `SETTINGS` parsing" --- programs/client/Client.cpp | 3 - programs/server/Server.cpp | 2 +- src/Access/AccessControl.cpp | 8 +-- src/Access/AccessControl.h | 5 +- src/Access/SettingsConstraints.cpp | 8 +-- src/Client/ClientBase.cpp | 60 ++++++++++++----- src/Interpreters/InterpreterSetQuery.cpp | 34 +++++----- src/Interpreters/InterpreterSetQuery.h | 2 +- src/Parsers/ParserQueryWithOutput.cpp | 67 +++++++------------ ...QueryWithOutputSettingsPushDownVisitor.cpp | 56 ++++++++++++++++ .../QueryWithOutputSettingsPushDownVisitor.h | 39 +++++++++++ .../00857_global_joinsavel_table_alias.sql | 1 + .../01401_FORMAT_SETTINGS.reference | 4 +- .../0_stateless/01401_FORMAT_SETTINGS.sh | 2 +- .../03003_compatibility_setting_bad_value.sql | 3 +- .../03172_format_settings_clauses.reference | 14 ---- .../03172_format_settings_clauses.sql | 30 --------- 17 files changed, 197 insertions(+), 141 deletions(-) create mode 100644 src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp create mode 100644 src/Parsers/QueryWithOutputSettingsPushDownVisitor.h delete mode 100644 tests/queries/0_stateless/03172_format_settings_clauses.reference delete mode 100644 tests/queries/0_stateless/03172_format_settings_clauses.sql diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 39edaf3497e..25c94c56aa6 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1164,9 +1164,6 @@ void Client::processOptions(const OptionsDescription & options_description, /// (There is no need to copy the context because clickhouse-client has no background tasks so it won't use that context in parallel.) client_context = global_context; initClientContext(); - - /// Allow to pass-through unknown settings to the server. - client_context->getAccessControl().allowAllSettings(); } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 16fee378cf0..74228fae5a3 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1922,7 +1922,7 @@ try auto & access_control = global_context->getAccessControl(); try { - access_control.setupFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); }); + access_control.setUpFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); }); } catch (...) { diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index d4f8c7bc859..95a467bbbe5 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -280,7 +280,7 @@ void AccessControl::shutdown() } -void AccessControl::setupFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, +void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, const zkutil::GetZooKeeper & get_zookeeper_function_) { if (config_.has("custom_settings_prefixes")) @@ -868,10 +868,4 @@ const ExternalAuthenticators & AccessControl::getExternalAuthenticators() const return *external_authenticators; } - -void AccessControl::allowAllSettings() -{ - custom_settings_prefixes->registerPrefixes({""}); -} - } diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index 7d8ee1232d0..bfaf256ad48 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -57,7 +57,7 @@ public: void shutdown() override; /// Initializes access storage (user directories). - void setupFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, + void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, const zkutil::GetZooKeeper & get_zookeeper_function_); /// Parses access entities from a configuration loaded from users.xml. @@ -238,9 +238,6 @@ public: /// Gets manager of notifications. AccessChangesNotifier & getChangesNotifier(); - /// Allow all setting names - this can be used in clients to pass-through unknown settings to the server. - void allowAllSettings(); - private: class ContextAccessCache; class CustomSettingsPrefixes; diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index 7506e365035..a274f6b54f2 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -219,8 +219,8 @@ void SettingsConstraints::clamp(const Settings & current_settings, SettingsChang }); } -template -bool getNewValueToCheck(const SettingsT & current_settings, SettingChange & change, Field & new_value, bool throw_on_failure) +template +bool getNewValueToCheck(const T & current_settings, SettingChange & change, Field & new_value, bool throw_on_failure) { Field current_value; bool has_current_value = current_settings.tryGet(change.name, current_value); @@ -230,12 +230,12 @@ bool getNewValueToCheck(const SettingsT & current_settings, SettingChange & chan return false; if (throw_on_failure) - new_value = SettingsT::castValueUtil(change.name, change.value); + new_value = T::castValueUtil(change.name, change.value); else { try { - new_value = SettingsT::castValueUtil(change.name, change.value); + new_value = T::castValueUtil(change.name, change.value); } catch (...) { diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index e312f2a8158..01d03006eec 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -58,7 +58,6 @@ #include #include #include -#include #include #include #include @@ -1609,14 +1608,14 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des auto metadata = storage->getInMemoryMetadataPtr(); QueryPlan plan; storage->read( - plan, - sample.getNames(), - storage->getStorageSnapshot(metadata, client_context), - query_info, - client_context, - {}, - client_context->getSettingsRef().max_block_size, - getNumberOfPhysicalCPUCores()); + plan, + sample.getNames(), + storage->getStorageSnapshot(metadata, client_context), + query_info, + client_context, + {}, + client_context->getSettingsRef().max_block_size, + getNumberOfPhysicalCPUCores()); auto builder = plan.buildQueryPipeline( QueryPlanOptimizationSettings::fromContext(client_context), @@ -1893,19 +1892,48 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin profile_events.watch.restart(); { - /// Temporarily apply query settings to the context. - Settings old_settings = client_context->getSettingsCopy(); - SCOPE_EXIT_SAFE( - { - client_context->setSettings(old_settings); + /// Temporarily apply query settings to context. + std::optional old_settings; + SCOPE_EXIT_SAFE({ + if (old_settings) + client_context->setSettings(*old_settings); }); - InterpreterSetQuery::applySettingsFromQuery(parsed_query, client_context); + + auto apply_query_settings = [&](const IAST & settings_ast) + { + if (!old_settings) + old_settings.emplace(client_context->getSettingsRef()); + client_context->applySettingsChanges(settings_ast.as()->changes); + client_context->resetSettingsToDefaultValue(settings_ast.as()->default_settings); + }; + + const auto * insert = parsed_query->as(); + if (const auto * select = parsed_query->as(); select && select->settings()) + apply_query_settings(*select->settings()); + else if (const auto * select_with_union = parsed_query->as()) + { + const ASTs & children = select_with_union->list_of_selects->children; + if (!children.empty()) + { + // On the client it is enough to apply settings only for the + // last SELECT, since the only thing that is important to apply + // on the client is format settings. + const auto * last_select = children.back()->as(); + if (last_select && last_select->settings()) + { + apply_query_settings(*last_select->settings()); + } + } + } + else if (const auto * query_with_output = parsed_query->as(); query_with_output && query_with_output->settings_ast) + apply_query_settings(*query_with_output->settings_ast); + else if (insert && insert->settings_ast) + apply_query_settings(*insert->settings_ast); if (!connection->checkConnected(connection_parameters.timeouts)) connect(); ASTPtr input_function; - const auto * insert = parsed_query->as(); if (insert && insert->select) insert->tryFindInputFunction(input_function); diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index 2ae35c4313b..7e68fc5c4c1 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -9,7 +9,6 @@ #include #include - namespace DB { @@ -46,7 +45,9 @@ static void applySettingsFromSelectWithUnion(const ASTSelectWithUnionQuery & sel // It is flattened later, when we process UNION ALL/DISTINCT. const auto * last_select = children.back()->as(); if (last_select && last_select->settings()) - InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(/* ignore_setting_constraints= */ false); + { + InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(); + } } void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMutablePtr context_) @@ -54,20 +55,10 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta if (!ast) return; - /// First apply the outermost settings. Then they could be overridden by deeper settings. - if (const auto * query_with_output = dynamic_cast(ast.get())) - { - if (query_with_output->settings_ast) - InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); - - if (const auto * create_query = ast->as(); create_query && create_query->select) - applySettingsFromSelectWithUnion(create_query->select->as(), context_); - } - if (const auto * select_query = ast->as()) { if (auto new_settings = select_query->settings()) - InterpreterSetQuery(new_settings, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); + InterpreterSetQuery(new_settings, context_).executeForCurrentContext(); } else if (const auto * select_with_union_query = ast->as()) { @@ -76,15 +67,28 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta else if (const auto * explain_query = ast->as()) { if (explain_query->settings_ast) - InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); + InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(); applySettingsFromQuery(explain_query->getExplainedQuery(), context_); } + else if (const auto * query_with_output = dynamic_cast(ast.get())) + { + if (query_with_output->settings_ast) + InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(); + + if (const auto * create_query = ast->as()) + { + if (create_query->select) + { + applySettingsFromSelectWithUnion(create_query->select->as(), context_); + } + } + } else if (auto * insert_query = ast->as()) { context_->setInsertFormat(insert_query->format); if (insert_query->settings_ast) - InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); + InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(); } } diff --git a/src/Interpreters/InterpreterSetQuery.h b/src/Interpreters/InterpreterSetQuery.h index f50105c39f4..2438762f347 100644 --- a/src/Interpreters/InterpreterSetQuery.h +++ b/src/Interpreters/InterpreterSetQuery.h @@ -23,7 +23,7 @@ public: /** Set setting for current context (query context). * It is used for interpretation of SETTINGS clause in SELECT query. */ - void executeForCurrentContext(bool ignore_setting_constraints); + void executeForCurrentContext(bool ignore_setting_constraints = false); bool supportsTransactions() const override { return true; } diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index ac8f7d560e0..cb0c10cd1c9 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -151,55 +152,37 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec } - /// These two sections are allowed in an arbitrary order. ParserKeyword s_format(Keyword::FORMAT); - ParserKeyword s_settings(Keyword::SETTINGS); - /** Why: let's take the following example: - * SELECT 1 UNION ALL SELECT 2 FORMAT TSV - * Each subquery can be put in parentheses and have its own settings: - * (SELECT 1 SETTINGS a=b) UNION ALL (SELECT 2 SETTINGS c=d) FORMAT TSV - * And the whole query can have settings: - * (SELECT 1 SETTINGS a=b) UNION ALL (SELECT 2 SETTINGS c=d) FORMAT TSV SETTINGS e=f - * A single query with output is parsed in the same way as the UNION ALL chain: - * SELECT 1 SETTINGS a=b FORMAT TSV SETTINGS e=f - * So while these forms have a slightly different meaning, they both exist: - * SELECT 1 SETTINGS a=b FORMAT TSV - * SELECT 1 FORMAT TSV SETTINGS e=f - * And due to this effect, the users expect that the FORMAT and SETTINGS may go in an arbitrary order. - * But while this work: - * (SELECT 1) UNION ALL (SELECT 2) FORMAT TSV SETTINGS d=f - * This does not work automatically, unless we explicitly allow different orders: - * (SELECT 1) UNION ALL (SELECT 2) SETTINGS d=f FORMAT TSV - * Inevitably, we also allow this: - * SELECT 1 SETTINGS a=b SETTINGS d=f FORMAT TSV - * ^^^^^^^^^^^^^^^^^^^^^ - * Because this part is consumed into ASTSelectWithUnionQuery - * and the rest into ASTQueryWithOutput. - */ - - for (size_t i = 0; i < 2; ++i) + if (s_format.ignore(pos, expected)) { - if (!query_with_output.format && s_format.ignore(pos, expected)) - { - ParserIdentifier format_p; + ParserIdentifier format_p; - if (!format_p.parse(pos, query_with_output.format, expected)) - return false; - setIdentifierSpecial(query_with_output.format); + if (!format_p.parse(pos, query_with_output.format, expected)) + return false; + setIdentifierSpecial(query_with_output.format); - query_with_output.children.push_back(query_with_output.format); - } - else if (!query_with_output.settings_ast && s_settings.ignore(pos, expected)) + query_with_output.children.push_back(query_with_output.format); + } + + // SETTINGS key1 = value1, key2 = value2, ... + ParserKeyword s_settings(Keyword::SETTINGS); + if (!query_with_output.settings_ast && s_settings.ignore(pos, expected)) + { + ParserSetQuery parser_settings(true); + if (!parser_settings.parse(pos, query_with_output.settings_ast, expected)) + return false; + query_with_output.children.push_back(query_with_output.settings_ast); + + // SETTINGS after FORMAT is not parsed by the SELECT parser (ParserSelectQuery) + // Pass them manually, to apply in InterpreterSelectQuery::initSettings() + if (query->as()) { - // SETTINGS key1 = value1, key2 = value2, ... - ParserSetQuery parser_settings(true); - if (!parser_settings.parse(pos, query_with_output.settings_ast, expected)) - return false; - query_with_output.children.push_back(query_with_output.settings_ast); + auto settings = query_with_output.settings_ast->clone(); + assert_cast(settings.get())->print_in_format = false; + QueryWithOutputSettingsPushDownVisitor::Data data{settings}; + QueryWithOutputSettingsPushDownVisitor(data).visit(query); } - else - break; } node = std::move(query); diff --git a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp new file mode 100644 index 00000000000..8cf0d0063ae --- /dev/null +++ b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp @@ -0,0 +1,56 @@ +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +bool QueryWithOutputSettingsPushDownMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) +{ + if (node->as()) + return true; + if (node->as()) + return true; + if (child->as()) + return true; + return false; +} + +void QueryWithOutputSettingsPushDownMatcher::visit(ASTPtr & ast, Data & data) +{ + if (auto * select_query = ast->as()) + visit(*select_query, ast, data); +} + +void QueryWithOutputSettingsPushDownMatcher::visit(ASTSelectQuery & select_query, ASTPtr &, Data & data) +{ + ASTPtr select_settings_ast = select_query.settings(); + if (!select_settings_ast) + { + select_query.setExpression(ASTSelectQuery::Expression::SETTINGS, data.settings_ast->clone()); + return; + } + + SettingsChanges & select_settings = select_settings_ast->as().changes; + SettingsChanges & settings = data.settings_ast->as().changes; + + for (auto & setting : settings) + { + auto it = std::find_if(select_settings.begin(), select_settings.end(), [&](auto & select_setting) + { + return select_setting.name == setting.name; + }); + if (it == select_settings.end()) + select_settings.push_back(setting); + else + it->value = setting.value; + } +} + +} diff --git a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h new file mode 100644 index 00000000000..fde8a07b555 --- /dev/null +++ b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ASTSelectQuery; +struct SettingChange; +class SettingsChanges; + +/// Pushdown SETTINGS clause that goes after FORMAT to the SELECT query: +/// (since settings after FORMAT parsed separately not in the ParserSelectQuery but in ParserQueryWithOutput) +/// +/// SELECT 1 FORMAT Null SETTINGS max_block_size = 1 -> +/// SELECT 1 SETTINGS max_block_size = 1 FORMAT Null SETTINGS max_block_size = 1 +/// +/// Otherwise settings after FORMAT will not be applied. +class QueryWithOutputSettingsPushDownMatcher +{ +public: + using Visitor = InDepthNodeVisitor; + + struct Data + { + const ASTPtr & settings_ast; + }; + + static bool needChildVisit(ASTPtr & node, const ASTPtr & child); + static void visit(ASTPtr & ast, Data & data); + +private: + static void visit(ASTSelectQuery &, ASTPtr &, Data &); +}; + +using QueryWithOutputSettingsPushDownVisitor = QueryWithOutputSettingsPushDownMatcher::Visitor; + +} diff --git a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql index 092b071cb48..2044a9b8d22 100644 --- a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql +++ b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql @@ -1,3 +1,4 @@ + DROP TABLE IF EXISTS local_table; DROP TABLE IF EXISTS other_table; diff --git a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference index a8b99666654..22405bf1866 100644 --- a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference +++ b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference @@ -1,7 +1,7 @@ 1 1 1 +1 +1 2 -1 -2 2 diff --git a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh index 173cc949500..b70c28422c9 100755 --- a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh +++ b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh @@ -13,7 +13,7 @@ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) FORMAT CSV SETTINGS max_block_size = 1' # push down append ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_compress_block_size = 1 FORMAT CSV SETTINGS max_block_size = 1' -# not overwrite on push down +# overwrite on push down (since these settings goes latest) ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_block_size = 2 FORMAT CSV SETTINGS max_block_size = 1' # on push-down ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_block_size = 1 FORMAT CSV' diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql index 3a09eec7452..48e98798c51 100644 --- a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql +++ b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql @@ -1 +1,2 @@ -select 42 settings compatibility=NULL; -- {clientError BAD_GET} +select 42 settings compatibility=NULL; -- {clientError BAD_ARGUMENTS} + diff --git a/tests/queries/0_stateless/03172_format_settings_clauses.reference b/tests/queries/0_stateless/03172_format_settings_clauses.reference deleted file mode 100644 index 8a98b137f4b..00000000000 --- a/tests/queries/0_stateless/03172_format_settings_clauses.reference +++ /dev/null @@ -1,14 +0,0 @@ -1 -2 -1 -2 -1 -2 -1 -1 -3 -3 -3 -3 -3 -1 diff --git a/tests/queries/0_stateless/03172_format_settings_clauses.sql b/tests/queries/0_stateless/03172_format_settings_clauses.sql deleted file mode 100644 index 0d1aa4dcfbb..00000000000 --- a/tests/queries/0_stateless/03172_format_settings_clauses.sql +++ /dev/null @@ -1,30 +0,0 @@ -SET max_block_size = 10, max_threads = 1; - --- Take the following example: -SELECT 1 UNION ALL SELECT 2 FORMAT TSV; - --- Each subquery can be put in parentheses and have its own settings: -(SELECT getSetting('max_block_size') SETTINGS max_block_size = 1) UNION ALL (SELECT getSetting('max_block_size') SETTINGS max_block_size = 2) FORMAT TSV; - --- And the whole query can have settings: -(SELECT getSetting('max_block_size') SETTINGS max_block_size = 1) UNION ALL (SELECT getSetting('max_block_size') SETTINGS max_block_size = 2) FORMAT TSV SETTINGS max_block_size = 3; - --- A single query with output is parsed in the same way as the UNION ALL chain: -SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 FORMAT TSV SETTINGS max_block_size = 3; - --- So while these forms have a slightly different meaning, they both exist: -SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 FORMAT TSV; -SELECT getSetting('max_block_size') FORMAT TSV SETTINGS max_block_size = 3; - --- And due to this effect, the users expect that the FORMAT and SETTINGS may go in an arbitrary order. --- But while this work: -(SELECT getSetting('max_block_size')) UNION ALL (SELECT getSetting('max_block_size')) FORMAT TSV SETTINGS max_block_size = 3; - --- This does not work automatically, unless we explicitly allow different orders: -(SELECT getSetting('max_block_size')) UNION ALL (SELECT getSetting('max_block_size')) SETTINGS max_block_size = 3 FORMAT TSV; - --- Inevitably, we allow this: -SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 SETTINGS max_block_size = 3 FORMAT TSV; -/*^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^*/ --- Because this part is consumed into ASTSelectWithUnionQuery --- and the rest into ASTQueryWithOutput. From 8b31388db5d88bbefee66d7bf4672945ce5ba4fe Mon Sep 17 00:00:00 2001 From: Tyler Hannan Date: Tue, 20 Aug 2024 15:33:34 +0200 Subject: [PATCH 088/127] Update README.md add Guangzho meetup, fix release call --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 2120a4d1211..bd9b8f66ad0 100644 --- a/README.md +++ b/README.md @@ -34,13 +34,13 @@ curl https://clickhouse.com/ | sh Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. -* [v24.8 Community Call](https://clickhouse.com/company/events/v24-8-community-release-call) - August 29 +* [v24.8 Community Call](https://clickhouse.com/company/events/v24-8-community-release-call) - August 20 ## Upcoming Events Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. -* MORE COMING SOON! +* [ClickHouse Guangzho User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25 ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" From 4f799467ecaf1bf10e39f7da59aa459ab1903810 Mon Sep 17 00:00:00 2001 From: Michal Tabaszewski Date: Fri, 16 Aug 2024 22:54:09 +0200 Subject: [PATCH 089/127] Added removing query cache by tag. --- src/Common/CacheBase.h | 6 ++++++ src/Common/ICachePolicy.h | 1 + src/Common/LRUCachePolicy.h | 17 +++++++++++++++++ src/Common/SLRUCachePolicy.h | 21 +++++++++++++++++++++ src/Common/TTLCachePolicy.h | 17 +++++++++++++++++ src/Interpreters/Cache/QueryCache.cpp | 9 +++++++++ src/Interpreters/Cache/QueryCache.h | 1 + src/Interpreters/Context.cpp | 8 ++++++++ src/Interpreters/Context.h | 1 + src/Interpreters/InterpreterSystemQuery.cpp | 7 +++++-- src/Parsers/ASTSystemQuery.h | 1 + src/Parsers/CommonParsers.h | 1 + src/Parsers/ParserSystemQuery.cpp | 10 ++++++++++ 13 files changed, 98 insertions(+), 2 deletions(-) diff --git a/src/Common/CacheBase.h b/src/Common/CacheBase.h index a809136f451..e0f7ffbfd5f 100644 --- a/src/Common/CacheBase.h +++ b/src/Common/CacheBase.h @@ -197,6 +197,12 @@ public: cache_policy->remove(key); } + void removeWithPredicate(std::function predicate) + { + std::lock_guard lock(mutex); + cache_policy->removeWithPredicate(predicate); + } + size_t sizeInBytes() const { std::lock_guard lock(mutex); diff --git a/src/Common/ICachePolicy.h b/src/Common/ICachePolicy.h index 301a5c6cbbd..71543e0aed0 100644 --- a/src/Common/ICachePolicy.h +++ b/src/Common/ICachePolicy.h @@ -55,6 +55,7 @@ public: virtual void set(const Key & key, const MappedPtr & mapped) = 0; virtual void remove(const Key & key) = 0; + virtual void removeWithPredicate(std::function predicate) = 0; virtual void clear() = 0; virtual std::vector dump() const = 0; diff --git a/src/Common/LRUCachePolicy.h b/src/Common/LRUCachePolicy.h index f833e46a821..39bf5067aea 100644 --- a/src/Common/LRUCachePolicy.h +++ b/src/Common/LRUCachePolicy.h @@ -68,6 +68,23 @@ public: current_size_in_bytes = 0; } + void removeWithPredicate(std::function predicate) override + { + for(auto it = cells.begin(); it != cells.end();) + { + if(predicate(it->first, it->second.value)) + { + auto & cell = it->second; + current_size_in_bytes -= cell.size; + queue.erase(cell.queue_iterator); + cells.erase(it); + it = cells.erase(it); + } + else + ++it; + } + } + void remove(const Key & key) override { auto it = cells.find(key); diff --git a/src/Common/SLRUCachePolicy.h b/src/Common/SLRUCachePolicy.h index 354ec1d36d6..62c6d72a3e8 100644 --- a/src/Common/SLRUCachePolicy.h +++ b/src/Common/SLRUCachePolicy.h @@ -95,6 +95,27 @@ public: cells.erase(it); } + void removeWithPredicate(std::function predicate) override + { + for(auto it = cells.begin(); it != cells.end();) + { + if(predicate(it->first, it->second.value)) + { + auto & cell = it->second; + + current_size_in_bytes -= cell.size; + if (cell.is_protected) + current_protected_size -= cell.size; + + auto & queue = cell.is_protected ? protected_queue : probationary_queue; + queue.erase(cell.queue_iterator); + it = cells.erase(it); + } + else + ++it; + } + } + MappedPtr get(const Key & key) override { auto it = cells.find(key); diff --git a/src/Common/TTLCachePolicy.h b/src/Common/TTLCachePolicy.h index 6401835b0d7..6caab1d6c78 100644 --- a/src/Common/TTLCachePolicy.h +++ b/src/Common/TTLCachePolicy.h @@ -133,6 +133,23 @@ public: Base::user_quotas->clear(); } + void removeWithPredicate(std::function predicate) override + { + for(auto it = cache.begin(); it != cache.end();) + { + if(predicate(it->first, it->second)) + { + size_t sz = weight_function(*it->second); + if (it->first.user_id.has_value()) + Base::user_quotas->decreaseActual(*it->first.user_id, sz); + it = cache.erase(it); + size_in_bytes -= sz; + } + else + ++it; + } + } + void remove(const Key & key) override { auto it = cache.find(key); diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index ab926037c67..9c4d48fbd44 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -619,6 +619,15 @@ QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::millis return Writer(cache, key, max_entry_size_in_bytes, max_entry_size_in_rows, min_query_runtime, squash_partial_results, max_block_size); } +void QueryCache::clearWithTag(const String & tag) +{ + auto removeWithTag = [tag](const Key & k, const Cache::MappedPtr & _){ + return k.tag == tag; + }; + cache.removeWithPredicate(removeWithTag); + std::lock_guard lock(mutex); +} + void QueryCache::clear() { cache.clear(); diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index c7ebaf4d26a..dec43747e0a 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -212,6 +212,7 @@ public: Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime, bool squash_partial_results, size_t max_block_size, size_t max_query_cache_size_in_bytes_quota, size_t max_query_cache_entries_quota); void clear(); + void clearWithTag(const String & tag); size_t sizeInBytes() const; size_t count() const; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 3cc09370e86..ef5539f0b13 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3236,6 +3236,14 @@ void Context::clearQueryCache() const shared->query_cache->clear(); } +void Context::clearQueryCacheWithTag(const String & tag) const +{ + std::lock_guard lock(shared->mutex); + + if (shared->query_cache) + shared->query_cache->clearWithTag(tag); +} + void Context::clearCaches() const { std::lock_guard lock(shared->mutex); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 3da4f124553..076bff462f9 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1069,6 +1069,7 @@ public: void updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getQueryCache() const; void clearQueryCache() const; + void clearQueryCacheWithTag(const String & tag) const; /** Clear the caches of the uncompressed blocks and marks. * This is usually done when renaming tables, changing the type of columns, deleting a table. diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 21c8b44b374..c71982d0934 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -369,9 +369,12 @@ BlockIO InterpreterSystemQuery::execute() system_context->clearMMappedFileCache(); break; case Type::DROP_QUERY_CACHE: - getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE); - getContext()->clearQueryCache(); + { + getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE); + !query.query_cache_tag.empty() ? getContext()->clearQueryCacheWithTag(query.query_cache_tag): getContext()->clearQueryCache(); break; + } + case Type::DROP_COMPILED_EXPRESSION_CACHE: #if USE_EMBEDDED_COMPILER getContext()->checkAccess(AccessType::SYSTEM_DROP_COMPILED_EXPRESSION_CACHE); diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 59de90b1d8e..739d71678df 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -129,6 +129,7 @@ public: String storage_policy; String volume; String disk; + String query_cache_tag; UInt64 seconds{}; String filesystem_cache_name; diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index cc2ee79cd36..a079fba5d53 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -471,6 +471,7 @@ namespace DB MR_MACROS(TABLE, "TABLE") \ MR_MACROS(TABLES, "TABLES") \ MR_MACROS(TAGS, "TAGS") \ + MR_MACROS(TAG, "TAG") \ MR_MACROS(TAGS_INNER_UUID, "TAGS INNER UUID") \ MR_MACROS(TEMPORARY_TABLE, "TEMPORARY TABLE") \ MR_MACROS(TEMPORARY, "TEMPORARY") \ diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index efabbbfa479..95d89e6fde1 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -489,6 +489,16 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & return false; break; } + case Type::DROP_QUERY_CACHE: + { + ParserLiteral tag_parser; + ASTPtr ast; + if (ParserKeyword{Keyword::TAG}.ignore(pos, expected) && tag_parser.parse(pos, ast, expected)) + res->query_cache_tag = ast->as()->value.safeGet(); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; + break; + } case Type::SYNC_FILESYSTEM_CACHE: { ParserLiteral path_parser; From cd51535a67319d06fae7ca212123c9cb9114c91c Mon Sep 17 00:00:00 2001 From: Michal Tabaszewski Date: Sat, 17 Aug 2024 01:54:46 +0200 Subject: [PATCH 090/127] Fixed style. --- src/Common/LRUCachePolicy.h | 4 ++-- src/Common/SLRUCachePolicy.h | 4 ++-- src/Common/TTLCachePolicy.h | 4 ++-- src/Interpreters/Cache/QueryCache.cpp | 5 +++-- src/Interpreters/InterpreterSystemQuery.cpp | 2 +- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Common/LRUCachePolicy.h b/src/Common/LRUCachePolicy.h index 39bf5067aea..9f2ea548c32 100644 --- a/src/Common/LRUCachePolicy.h +++ b/src/Common/LRUCachePolicy.h @@ -70,9 +70,9 @@ public: void removeWithPredicate(std::function predicate) override { - for(auto it = cells.begin(); it != cells.end();) + for (auto it = cells.begin(); it != cells.end();) { - if(predicate(it->first, it->second.value)) + if (predicate(it->first, it->second.value)) { auto & cell = it->second; current_size_in_bytes -= cell.size; diff --git a/src/Common/SLRUCachePolicy.h b/src/Common/SLRUCachePolicy.h index 62c6d72a3e8..e6931bbd59c 100644 --- a/src/Common/SLRUCachePolicy.h +++ b/src/Common/SLRUCachePolicy.h @@ -97,9 +97,9 @@ public: void removeWithPredicate(std::function predicate) override { - for(auto it = cells.begin(); it != cells.end();) + for (auto it = cells.begin(); it != cells.end();) { - if(predicate(it->first, it->second.value)) + if (predicate(it->first, it->second.value)) { auto & cell = it->second; diff --git a/src/Common/TTLCachePolicy.h b/src/Common/TTLCachePolicy.h index 6caab1d6c78..100d753e53b 100644 --- a/src/Common/TTLCachePolicy.h +++ b/src/Common/TTLCachePolicy.h @@ -135,9 +135,9 @@ public: void removeWithPredicate(std::function predicate) override { - for(auto it = cache.begin(); it != cache.end();) + for (auto it = cache.begin(); it != cache.end();) { - if(predicate(it->first, it->second)) + if (predicate(it->first, it->second)) { size_t sz = weight_function(*it->second); if (it->first.user_id.has_value()) diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index 9c4d48fbd44..84aa9cd0342 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -621,10 +621,11 @@ QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::millis void QueryCache::clearWithTag(const String & tag) { - auto removeWithTag = [tag](const Key & k, const Cache::MappedPtr & _){ + auto remove_with_tag = [tag](const Key & k, const Cache::MappedPtr & _) + { return k.tag == tag; }; - cache.removeWithPredicate(removeWithTag); + cache.removeWithPredicate(remove_with_tag); std::lock_guard lock(mutex); } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index c71982d0934..c7540084451 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -370,7 +370,7 @@ BlockIO InterpreterSystemQuery::execute() break; case Type::DROP_QUERY_CACHE: { - getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE); + getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE); !query.query_cache_tag.empty() ? getContext()->clearQueryCacheWithTag(query.query_cache_tag): getContext()->clearQueryCache(); break; } From ad20d24c6e0aa50ea408627389acb4c83080d3e0 Mon Sep 17 00:00:00 2001 From: Michal Tabaszewski Date: Mon, 19 Aug 2024 20:52:51 +0200 Subject: [PATCH 091/127] Fixed binary-tidy complaint --- src/Interpreters/Cache/QueryCache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index 84aa9cd0342..6c1ffe9cecb 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -621,7 +621,7 @@ QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::millis void QueryCache::clearWithTag(const String & tag) { - auto remove_with_tag = [tag](const Key & k, const Cache::MappedPtr & _) + auto remove_with_tag = [tag](const Key & k, const Cache::MappedPtr &) { return k.tag == tag; }; From eee618b3196cecb44f6314a4199d5aa95ff2ca66 Mon Sep 17 00:00:00 2001 From: Michal Tabaszewski Date: Mon, 19 Aug 2024 22:37:28 +0200 Subject: [PATCH 092/127] - From 129d1e387e1b436ac572ca5db1a50dd9ed79ddae Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 20 Aug 2024 13:21:42 +0000 Subject: [PATCH 093/127] Some fixups --- docs/en/operations/query-cache.md | 2 ++ docs/en/sql-reference/statements/system.md | 6 ++++ src/Common/CacheBase.h | 4 +-- src/Common/ICachePolicy.h | 2 +- src/Common/LRUCachePolicy.h | 33 +++++++++---------- src/Common/SLRUCachePolicy.h | 2 +- src/Common/TTLCachePolicy.h | 26 +++++++-------- src/Interpreters/Cache/QueryCache.cpp | 19 +++++------ src/Interpreters/Cache/QueryCache.h | 3 +- src/Interpreters/Context.cpp | 12 ++----- src/Interpreters/Context.h | 3 +- src/Interpreters/InterpreterSystemQuery.cpp | 2 +- src/Parsers/ASTSystemQuery.h | 3 +- src/Parsers/CommonParsers.h | 2 +- src/Parsers/ParserSystemQuery.cpp | 20 +++++------ .../02494_query_cache_drop_cache.reference | 14 ++++++++ .../02494_query_cache_drop_cache.sql | 25 ++++++++++++-- 17 files changed, 105 insertions(+), 73 deletions(-) diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index 384938e28f6..955cec0234e 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -155,6 +155,8 @@ SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 1'; SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 2'; ``` +To remove only entries with tag `tag` from the query cache, you can use statement `SYSTEM DROP QUERY CACHE TAG 'tag'`. + ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#setting-max_block_size) rows. Due to filtering, aggregation, etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting [query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 3ebcf617491..77d023b67ce 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -136,7 +136,13 @@ The compiled expression cache is enabled/disabled with the query/user/profile-le ## DROP QUERY CACHE +```sql +SYSTEM DROP QUERY CACHE; +SYSTEM DROP QUERY CACHE TAG '' +```` + Clears the [query cache](../../operations/query-cache.md). +If a tag is specified, only query cache entries with the specified tag are deleted. ## DROP FORMAT SCHEMA CACHE {#system-drop-schema-format} diff --git a/src/Common/CacheBase.h b/src/Common/CacheBase.h index e0f7ffbfd5f..23e6a6fc91c 100644 --- a/src/Common/CacheBase.h +++ b/src/Common/CacheBase.h @@ -197,10 +197,10 @@ public: cache_policy->remove(key); } - void removeWithPredicate(std::function predicate) + void remove(std::function predicate) { std::lock_guard lock(mutex); - cache_policy->removeWithPredicate(predicate); + cache_policy->remove(predicate); } size_t sizeInBytes() const diff --git a/src/Common/ICachePolicy.h b/src/Common/ICachePolicy.h index 71543e0aed0..567fa35d977 100644 --- a/src/Common/ICachePolicy.h +++ b/src/Common/ICachePolicy.h @@ -55,7 +55,7 @@ public: virtual void set(const Key & key, const MappedPtr & mapped) = 0; virtual void remove(const Key & key) = 0; - virtual void removeWithPredicate(std::function predicate) = 0; + virtual void remove(std::function predicate) = 0; virtual void clear() = 0; virtual std::vector dump() const = 0; diff --git a/src/Common/LRUCachePolicy.h b/src/Common/LRUCachePolicy.h index 9f2ea548c32..cb8fdbd2b9c 100644 --- a/src/Common/LRUCachePolicy.h +++ b/src/Common/LRUCachePolicy.h @@ -68,23 +68,6 @@ public: current_size_in_bytes = 0; } - void removeWithPredicate(std::function predicate) override - { - for (auto it = cells.begin(); it != cells.end();) - { - if (predicate(it->first, it->second.value)) - { - auto & cell = it->second; - current_size_in_bytes -= cell.size; - queue.erase(cell.queue_iterator); - cells.erase(it); - it = cells.erase(it); - } - else - ++it; - } - } - void remove(const Key & key) override { auto it = cells.find(key); @@ -96,6 +79,22 @@ public: cells.erase(it); } + void remove(std::function predicate) override + { + for (auto it = cells.begin(); it != cells.end();) + { + if (predicate(it->first, it->second.value)) + { + Cell & cell = it->second; + current_size_in_bytes -= cell.size; + queue.erase(cell.queue_iterator); + it = cells.erase(it); + } + else + ++it; + } + } + MappedPtr get(const Key & key) override { auto it = cells.find(key); diff --git a/src/Common/SLRUCachePolicy.h b/src/Common/SLRUCachePolicy.h index e6931bbd59c..5321110f3e5 100644 --- a/src/Common/SLRUCachePolicy.h +++ b/src/Common/SLRUCachePolicy.h @@ -95,7 +95,7 @@ public: cells.erase(it); } - void removeWithPredicate(std::function predicate) override + void remove(std::function predicate) override { for (auto it = cells.begin(); it != cells.end();) { diff --git a/src/Common/TTLCachePolicy.h b/src/Common/TTLCachePolicy.h index 100d753e53b..6c548e5042b 100644 --- a/src/Common/TTLCachePolicy.h +++ b/src/Common/TTLCachePolicy.h @@ -133,7 +133,19 @@ public: Base::user_quotas->clear(); } - void removeWithPredicate(std::function predicate) override + void remove(const Key & key) override + { + auto it = cache.find(key); + if (it == cache.end()) + return; + size_t sz = weight_function(*it->second); + if (it->first.user_id.has_value()) + Base::user_quotas->decreaseActual(*it->first.user_id, sz); + cache.erase(it); + size_in_bytes -= sz; + } + + void remove(std::function predicate) override { for (auto it = cache.begin(); it != cache.end();) { @@ -150,18 +162,6 @@ public: } } - void remove(const Key & key) override - { - auto it = cache.find(key); - if (it == cache.end()) - return; - size_t sz = weight_function(*it->second); - if (it->first.user_id.has_value()) - Base::user_quotas->decreaseActual(*it->first.user_id, sz); - cache.erase(it); - size_in_bytes -= sz; - } - MappedPtr get(const Key & key) override { auto it = cache.find(key); diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index 6c1ffe9cecb..4312b35e18c 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -619,19 +619,18 @@ QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::millis return Writer(cache, key, max_entry_size_in_bytes, max_entry_size_in_rows, min_query_runtime, squash_partial_results, max_block_size); } -void QueryCache::clearWithTag(const String & tag) +void QueryCache::clear(const std::optional & tag) { - auto remove_with_tag = [tag](const Key & k, const Cache::MappedPtr &) + if (tag) { - return k.tag == tag; - }; - cache.removeWithPredicate(remove_with_tag); - std::lock_guard lock(mutex); -} + auto predicate = [tag](const Key & key, const Cache::MappedPtr &) { return key.tag == tag.value(); }; + cache.remove(predicate); + } + else + { + cache.clear(); + } -void QueryCache::clear() -{ - cache.clear(); std::lock_guard lock(mutex); times_executed.clear(); } diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index dec43747e0a..64407633a8d 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -211,8 +211,7 @@ public: Reader createReader(const Key & key); Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime, bool squash_partial_results, size_t max_block_size, size_t max_query_cache_size_in_bytes_quota, size_t max_query_cache_entries_quota); - void clear(); - void clearWithTag(const String & tag); + void clear(const std::optional & tag); size_t sizeInBytes() const; size_t count() const; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index ef5539f0b13..cfcaf437510 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3228,20 +3228,12 @@ QueryCachePtr Context::getQueryCache() const return shared->query_cache; } -void Context::clearQueryCache() const +void Context::clearQueryCache(const std::optional & tag) const { std::lock_guard lock(shared->mutex); if (shared->query_cache) - shared->query_cache->clear(); -} - -void Context::clearQueryCacheWithTag(const String & tag) const -{ - std::lock_guard lock(shared->mutex); - - if (shared->query_cache) - shared->query_cache->clearWithTag(tag); + shared->query_cache->clear(tag); } void Context::clearCaches() const diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 076bff462f9..e0c69471e60 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1068,8 +1068,7 @@ public: void setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_rows); void updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config); std::shared_ptr getQueryCache() const; - void clearQueryCache() const; - void clearQueryCacheWithTag(const String & tag) const; + void clearQueryCache(const std::optional & tag) const; /** Clear the caches of the uncompressed blocks and marks. * This is usually done when renaming tables, changing the type of columns, deleting a table. diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index c7540084451..d4e2f22036c 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -371,7 +371,7 @@ BlockIO InterpreterSystemQuery::execute() case Type::DROP_QUERY_CACHE: { getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE); - !query.query_cache_tag.empty() ? getContext()->clearQueryCacheWithTag(query.query_cache_tag): getContext()->clearQueryCache(); + getContext()->clearQueryCache(query.query_cache_tag); break; } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 739d71678df..d9f5b425182 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -129,9 +129,10 @@ public: String storage_policy; String volume; String disk; - String query_cache_tag; UInt64 seconds{}; + std::optional query_cache_tag; + String filesystem_cache_name; std::string key_to_drop; std::optional offset_to_drop; diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index a079fba5d53..ab0e70eb0e5 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -470,8 +470,8 @@ namespace DB MR_MACROS(TABLE_OVERRIDE, "TABLE OVERRIDE") \ MR_MACROS(TABLE, "TABLE") \ MR_MACROS(TABLES, "TABLES") \ - MR_MACROS(TAGS, "TAGS") \ MR_MACROS(TAG, "TAG") \ + MR_MACROS(TAGS, "TAGS") \ MR_MACROS(TAGS_INNER_UUID, "TAGS INNER UUID") \ MR_MACROS(TEMPORARY_TABLE, "TEMPORARY TABLE") \ MR_MACROS(TEMPORARY, "TEMPORARY") \ diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 95d89e6fde1..af84dd10bfa 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -471,6 +471,16 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & res->seconds = seconds->as()->value.safeGet(); break; } + case Type::DROP_QUERY_CACHE: + { + ParserLiteral tag_parser; + ASTPtr ast; + if (ParserKeyword{Keyword::TAG}.ignore(pos, expected) && tag_parser.parse(pos, ast, expected)) + res->query_cache_tag = std::make_optional(ast->as()->value.safeGet()); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; + break; + } case Type::DROP_FILESYSTEM_CACHE: { ParserLiteral path_parser; @@ -489,16 +499,6 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & return false; break; } - case Type::DROP_QUERY_CACHE: - { - ParserLiteral tag_parser; - ASTPtr ast; - if (ParserKeyword{Keyword::TAG}.ignore(pos, expected) && tag_parser.parse(pos, ast, expected)) - res->query_cache_tag = ast->as()->value.safeGet(); - if (!parseQueryWithOnCluster(res, pos, expected)) - return false; - break; - } case Type::SYNC_FILESYSTEM_CACHE: { ParserLiteral path_parser; diff --git a/tests/queries/0_stateless/02494_query_cache_drop_cache.reference b/tests/queries/0_stateless/02494_query_cache_drop_cache.reference index 2f1465d1598..6481b5e0770 100644 --- a/tests/queries/0_stateless/02494_query_cache_drop_cache.reference +++ b/tests/queries/0_stateless/02494_query_cache_drop_cache.reference @@ -1,3 +1,17 @@ +Cache query result in query cache 1 1 +DROP entries with a certain tag, no entry will match +1 +After a full DROP, the cache is empty now +0 +Cache query result with different or no tag in query cache +1 +1 +1 +2 +4 +DROP entries with certain tags +2 +1 0 diff --git a/tests/queries/0_stateless/02494_query_cache_drop_cache.sql b/tests/queries/0_stateless/02494_query_cache_drop_cache.sql index bc2e7f442fc..3d064169a4e 100644 --- a/tests/queries/0_stateless/02494_query_cache_drop_cache.sql +++ b/tests/queries/0_stateless/02494_query_cache_drop_cache.sql @@ -4,10 +4,31 @@ -- (it's silly to use what will be tested below but we have to assume other tests cluttered the query cache) SYSTEM DROP QUERY CACHE; --- Cache query result in query cache +SELECT 'Cache query result in query cache'; SELECT 1 SETTINGS use_query_cache = true; SELECT count(*) FROM system.query_cache; --- No query results are cached after DROP +SELECT 'DROP entries with a certain tag, no entry will match'; +SYSTEM DROP QUERY CACHE TAG 'tag'; +SELECT count(*) FROM system.query_cache; + +SELECT 'After a full DROP, the cache is empty now'; SYSTEM DROP QUERY CACHE; SELECT count(*) FROM system.query_cache; + +-- More tests for DROP with tags: + +SELECT 'Cache query result with different or no tag in query cache'; +SELECT 1 SETTINGS use_query_cache = true; +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'abc'; +SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'def'; +SELECT 2 SETTINGS use_query_cache = true; +SELECT count(*) FROM system.query_cache; + +SELECT 'DROP entries with certain tags'; +SYSTEM DROP QUERY CACHE TAG ''; +SELECT count(*) FROM system.query_cache; +SYSTEM DROP QUERY CACHE TAG 'def'; +SELECT count(*) FROM system.query_cache; +SYSTEM DROP QUERY CACHE TAG 'abc'; +SELECT count(*) FROM system.query_cache; From f5a020247486442f74861c61f162c647ebd97f8d Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Tue, 20 Aug 2024 10:35:51 +0000 Subject: [PATCH 094/127] Add integration test --- .../NamedCollectionsMetadataStorage.cpp | 4 +- .../__init__.py | 0 .../config.d/named_collections_encrypted.xml | 12 ++ ...d_collections_with_zookeeper_encrypted.xml | 31 +++++ .../configs/users.d/users.xml | 17 +++ .../test_named_collections_encrypted/test.py | 115 ++++++++++++++++++ 6 files changed, 177 insertions(+), 2 deletions(-) create mode 100644 tests/integration/test_named_collections_encrypted/__init__.py create mode 100644 tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_encrypted.xml create mode 100644 tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_with_zookeeper_encrypted.xml create mode 100644 tests/integration/test_named_collections_encrypted/configs/users.d/users.xml create mode 100644 tests/integration/test_named_collections_encrypted/test.py diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp index b8269d2d55a..8a6f3b9f6d5 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp @@ -628,9 +628,9 @@ std::unique_ptr NamedCollectionsMetadataStorage const auto path = config.getString(named_collections_storage_config_path + ".path"); std::unique_ptr zk_storage; - if (storage_type == "zookeeper" || storage_type == "keeper") + if (!storage_type.ends_with("_encrypted")) zk_storage = std::make_unique(context_, path); - else if (storage_type == "zookeeper_encrypted" || storage_type == "keeper_encrypted") + else zk_storage = std::make_unique(context_, path); LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"), diff --git a/tests/integration/test_named_collections_encrypted/__init__.py b/tests/integration/test_named_collections_encrypted/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_encrypted.xml b/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_encrypted.xml new file mode 100644 index 00000000000..233e23846cb --- /dev/null +++ b/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_encrypted.xml @@ -0,0 +1,12 @@ + + + local_encrypted + bebec0cabebec0cabebec0cabebec0ca + + + + + value1 + + + diff --git a/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_with_zookeeper_encrypted.xml b/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_with_zookeeper_encrypted.xml new file mode 100644 index 00000000000..d1dd5c29787 --- /dev/null +++ b/tests/integration/test_named_collections_encrypted/configs/config.d/named_collections_with_zookeeper_encrypted.xml @@ -0,0 +1,31 @@ + + + zookeeper_encrypted + bebec0cabebec0cabebec0cabebec0ca + /named_collections_path/ + 5000 + + + + + value1 + + + + + + + true + + node_with_keeper + 9000 + + + node_with_keeper_2 + 9000 + + + true + + + diff --git a/tests/integration/test_named_collections_encrypted/configs/users.d/users.xml b/tests/integration/test_named_collections_encrypted/configs/users.d/users.xml new file mode 100644 index 00000000000..7d4f0543ff1 --- /dev/null +++ b/tests/integration/test_named_collections_encrypted/configs/users.d/users.xml @@ -0,0 +1,17 @@ + + + + 0 + + + + + + default + default + 1 + 1 + 1 + + + diff --git a/tests/integration/test_named_collections_encrypted/test.py b/tests/integration/test_named_collections_encrypted/test.py new file mode 100644 index 00000000000..adc60b08b7f --- /dev/null +++ b/tests/integration/test_named_collections_encrypted/test.py @@ -0,0 +1,115 @@ +import logging +import pytest +import os +from helpers.cluster import ClickHouseCluster + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +NAMED_COLLECTIONS_CONFIG = os.path.join( + SCRIPT_DIR, "./configs/config.d/named_collections.xml" +) + +ZK_PATH = "/named_collections_path" + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance( + "node_encrypted", + main_configs=[ + "configs/config.d/named_collections_encrypted.xml", + ], + user_configs=[ + "configs/users.d/users.xml", + ], + stay_alive=True, + ) + cluster.add_instance( + "node_with_keeper_encrypted", + main_configs=[ + "configs/config.d/named_collections_with_zookeeper_encrypted.xml", + ], + user_configs=[ + "configs/users.d/users.xml", + ], + stay_alive=True, + with_zookeeper=True, + ) + cluster.add_instance( + "node_with_keeper_2_encrypted", + main_configs=[ + "configs/config.d/named_collections_with_zookeeper_encrypted.xml", + ], + user_configs=[ + "configs/users.d/users.xml", + ], + stay_alive=True, + with_zookeeper=True, + ) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def check_encrypted_content(node, zk=None): + assert ( + "collection1\ncollection2" + == node.query("select name from system.named_collections").strip() + ) + + assert ( + "['key1','key2']" + == node.query( + "select mapKeys(collection) from system.named_collections where name = 'collection2'" + ).strip() + ) + + assert ( + "1234\tvalue2" + == node.query( + "select collection['key1'], collection['key2'] from system.named_collections where name = 'collection2'" + ).strip() + ) + + # Check that the underlying storage is encrypted + content = zk.get(ZK_PATH + "/collection2.sql")[0] if zk is not None else open(f"{node.path}/database/named_collections/collection2.sql", "rb").read() + + assert content[0:3] == b"ENC" # file signature (aka magic number) of the encrypted file + assert b"key1" not in content + assert b"1234" not in content + assert b"key2" not in content + assert B"value2" not in content + + +def test_local_storage_encrypted(cluster): + node = cluster.instances["node_encrypted"] + node.query("CREATE NAMED COLLECTION collection2 AS key1=1234, key2='value2'") + + check_encrypted_content(node) + node.restart_clickhouse() + check_encrypted_content(node) + + node.query("DROP NAMED COLLECTION collection2") + + +def test_zookeper_storage_encrypted(cluster): + node1 = cluster.instances["node_with_keeper_encrypted"] + node2 = cluster.instances["node_with_keeper_2_encrypted"] + zk = cluster.get_kazoo_client("zoo1") + + node1.query("CREATE NAMED COLLECTION collection2 AS key1=1234, key2='value2'") + + check_encrypted_content(node1, zk) + check_encrypted_content(node2, zk) + node1.restart_clickhouse() + node2.restart_clickhouse() + check_encrypted_content(node1, zk) + check_encrypted_content(node2, zk) + + node1.query("DROP NAMED COLLECTION collection2") From d10e65291ccea45e46aab5edcc21ee047de6d5fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 2 Jul 2024 10:32:51 +0000 Subject: [PATCH 095/127] Fix docs --- docs/en/operations/settings/settings.md | 12 +++++++++++- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 19db4be17db..3617e6a3167 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2855,7 +2855,7 @@ The minimum chunk size in bytes, which each thread will parse in parallel. ## merge_selecting_sleep_ms {#merge_selecting_sleep_ms} -Sleep time for merge selecting when no part is selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters. +Minimum sleep time for merge selecting when no part is selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters. Possible values: @@ -2863,6 +2863,16 @@ Possible values: Default value: `5000`. +## max_merge_selecting_sleep_ms + +Maximum sleep time for merge selecting when no part is selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters. + +Possible values: + +- Any positive integer. + +Default value: `60000`. + ## parallel_distributed_insert_select {#parallel_distributed_insert_select} Enables parallel distributed `INSERT ... SELECT` query. diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index de1f0f60cfc..676f776df54 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -67,7 +67,7 @@ struct Settings; M(Bool, fsync_part_directory, false, "Do fsync for part directory after all part operations (writes, renames, etc.).", 0) \ M(UInt64, non_replicated_deduplication_window, 0, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \ M(UInt64, max_parts_to_merge_at_once, 100, "Max amount of parts which can be merged at once (0 - disabled). Doesn't affect OPTIMIZE FINAL query.", 0) \ - M(UInt64, merge_selecting_sleep_ms, 5000, "Maximum sleep time for merge selecting, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ + M(UInt64, merge_selecting_sleep_ms, 5000, "Minimum sleep time for merge selecting, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ M(UInt64, max_merge_selecting_sleep_ms, 60000, "Maximum sleep time for merge selecting, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ M(Float, merge_selecting_sleep_slowdown_factor, 1.2f, "The sleep time for merge selecting task is multiplied by this factor when there's nothing to merge and divided when a merge was assigned", 0) \ M(UInt64, merge_tree_clear_old_temporary_directories_interval_seconds, 60, "The period of executing the clear old temporary directories operation in background.", 0) \ From e547875a9cdffc53bb601c1f9d32c2a6fb6af560 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Fri, 5 Jul 2024 13:27:16 +0000 Subject: [PATCH 096/127] Improve wording --- docs/en/operations/settings/settings.md | 4 ++-- src/Storages/MergeTree/MergeTreeSettings.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 3617e6a3167..7bd36ccd00f 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -2855,7 +2855,7 @@ The minimum chunk size in bytes, which each thread will parse in parallel. ## merge_selecting_sleep_ms {#merge_selecting_sleep_ms} -Minimum sleep time for merge selecting when no part is selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters. +Minimum time to wait before trying to select parts to merge again after no parts were selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters. Possible values: @@ -2865,7 +2865,7 @@ Default value: `5000`. ## max_merge_selecting_sleep_ms -Maximum sleep time for merge selecting when no part is selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters. +Maximum time to wait before trying to select parts to merge again after no parts were selected. A lower setting triggers selecting tasks in `background_schedule_pool` frequently, which results in a large number of requests to ClickHouse Keeper in large-scale clusters. Possible values: diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 676f776df54..0769b60dc6b 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -67,8 +67,8 @@ struct Settings; M(Bool, fsync_part_directory, false, "Do fsync for part directory after all part operations (writes, renames, etc.).", 0) \ M(UInt64, non_replicated_deduplication_window, 0, "How many last blocks of hashes should be kept on disk (0 - disabled).", 0) \ M(UInt64, max_parts_to_merge_at_once, 100, "Max amount of parts which can be merged at once (0 - disabled). Doesn't affect OPTIMIZE FINAL query.", 0) \ - M(UInt64, merge_selecting_sleep_ms, 5000, "Minimum sleep time for merge selecting, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ - M(UInt64, max_merge_selecting_sleep_ms, 60000, "Maximum sleep time for merge selecting, a lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ + M(UInt64, merge_selecting_sleep_ms, 5000, "Minimum time to wait before trying to select parts to merge again after no parts were selected. A lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ + M(UInt64, max_merge_selecting_sleep_ms, 60000, "Maximum time to wait before trying to select parts to merge again after no parts were selected. A lower setting will trigger selecting tasks in background_schedule_pool frequently which result in large amount of requests to zookeeper in large-scale clusters", 0) \ M(Float, merge_selecting_sleep_slowdown_factor, 1.2f, "The sleep time for merge selecting task is multiplied by this factor when there's nothing to merge and divided when a merge was assigned", 0) \ M(UInt64, merge_tree_clear_old_temporary_directories_interval_seconds, 60, "The period of executing the clear old temporary directories operation in background.", 0) \ M(UInt64, merge_tree_clear_old_parts_interval_seconds, 1, "The period of executing the clear old parts operation in background.", 0) \ From c56ae57af861a3bc7bcc1f423b22c6c3256b2b73 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Tue, 20 Aug 2024 14:06:39 +0000 Subject: [PATCH 097/127] Update documentation --- docs/en/operations/named-collections.md | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index 340a5a8f87a..470acaa7200 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -73,13 +73,21 @@ In the above example the `password_sha256_hex` value is the hexadecimal represen ### Storage for named collections -Named collections can either be stored on local disk or in zookeeper/keeper. By default local storage is used. +Named collections can either be stored on local disk or in ZooKeeper/Keeper. By default local storage is used. +They can also be stored using encryption with the same algorithms used for [disk encryption](storing-data#encrypted-virtual-file-system), +where `aes_128_ctr` is used by default. -To configure named collections storage in keeper and a `type` (equal to either `keeper` or `zookeeper`) and `path` (path in keeper, where named collections will be stored) to `named_collections_storage` section in configuration file: +To configure named collections storage you need to speficy a `type`. This can be either `local` or `keeper`/`zookeeper`. For encrypted storage, +you can use `local_encrypted` or `keeper_encrypted`/`zookeeper_encrypted`. + +To use ZooKeeper/Keeper we also need to set up a `path` (path in ZooKeeper/Keeper, where named collections will be stored) to +`named_collections_storage` section in configuration file. The following example uses encryption and ZooKeeper/Keeper: ``` - zookeeper + zookeeper_encrypted + bebec0cabebec0cabebec0cabebec0ca + aes_128_ctr /named_collections_path/ 1000 @@ -315,7 +323,7 @@ The description of parameters see [postgresql](../sql-reference/table-functions/ Parameter `addresses_expr` is used in a collection instead of `host:port`. The parameter is optional, because there are other optional ones: `host`, `hostname`, `port`. The following pseudo code explains the priority: ```sql -CASE +CASE WHEN collection['addresses_expr'] != '' THEN collection['addresses_expr'] WHEN collection['host'] != '' THEN collection['host'] || ':' || if(collection['port'] != '', collection['port'], '5432') WHEN collection['hostname'] != '' THEN collection['hostname'] || ':' || if(collection['port'] != '', collection['port'], '5432') @@ -496,7 +504,7 @@ kafka_topic_list = 'kafka_topic', kafka_group_name = 'consumer_group', kafka_format = 'JSONEachRow', kafka_max_block_size = '1048576'; - + ``` ### XML example From dd3921897195a7027ef060f166ebcb611608763c Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Tue, 20 Aug 2024 14:24:53 +0000 Subject: [PATCH 098/127] Fix style --- docs/en/operations/named-collections.md | 2 +- .../test_named_collections_encrypted/test.py | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index 470acaa7200..1c82aeaaf2c 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -77,7 +77,7 @@ Named collections can either be stored on local disk or in ZooKeeper/Keeper. By They can also be stored using encryption with the same algorithms used for [disk encryption](storing-data#encrypted-virtual-file-system), where `aes_128_ctr` is used by default. -To configure named collections storage you need to speficy a `type`. This can be either `local` or `keeper`/`zookeeper`. For encrypted storage, +To configure named collections storage you need to specify a `type`. This can be either `local` or `keeper`/`zookeeper`. For encrypted storage, you can use `local_encrypted` or `keeper_encrypted`/`zookeeper_encrypted`. To use ZooKeeper/Keeper we also need to set up a `path` (path in ZooKeeper/Keeper, where named collections will be stored) to diff --git a/tests/integration/test_named_collections_encrypted/test.py b/tests/integration/test_named_collections_encrypted/test.py index adc60b08b7f..7dff32fa6c9 100644 --- a/tests/integration/test_named_collections_encrypted/test.py +++ b/tests/integration/test_named_collections_encrypted/test.py @@ -78,13 +78,21 @@ def check_encrypted_content(node, zk=None): ) # Check that the underlying storage is encrypted - content = zk.get(ZK_PATH + "/collection2.sql")[0] if zk is not None else open(f"{node.path}/database/named_collections/collection2.sql", "rb").read() + content = ( + zk.get(ZK_PATH + "/collection2.sql")[0] + if zk is not None + else open( + f"{node.path}/database/named_collections/collection2.sql", "rb" + ).read() + ) - assert content[0:3] == b"ENC" # file signature (aka magic number) of the encrypted file + assert ( + content[0:3] == b"ENC" + ) # file signature (aka magic number) of the encrypted file assert b"key1" not in content assert b"1234" not in content assert b"key2" not in content - assert B"value2" not in content + assert b"value2" not in content def test_local_storage_encrypted(cluster): From 677b28e1ac5f0962f1e43ba7589787105c5f1553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 20 Aug 2024 17:04:19 +0200 Subject: [PATCH 099/127] Fix docs --- docs/en/engines/table-engines/integrations/hdfs.md | 2 +- docs/en/engines/table-engines/integrations/s3.md | 2 +- docs/en/sql-reference/table-functions/hdfs.md | 2 +- docs/en/sql-reference/table-functions/s3.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index c9df713231a..404cec97def 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -240,7 +240,7 @@ libhdfs3 support HDFS namenode HA. ## Storage Settings {#storage-settings} - [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. -- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. +- [hdfs_create_new_file_on_insert](/docs/en/operations/settings/settings.md#hdfs_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default. - [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default. **See Also** diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index d664c37bd0f..48a08dfa499 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -225,7 +225,7 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) ## Storage Settings {#storage-settings} - [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. -- [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. +- [s3_create_new_file_on_insert](/docs/en/operations/settings/settings.md#s3_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default. - [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default. ## S3-related Settings {#settings} diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index f96e48d914d..30d2e371c7e 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -116,7 +116,7 @@ SELECT * from HDFS('hdfs://hdfs1:9000/data/path/date=*/country=*/code=*/*.parque ## Storage Settings {#storage-settings} - [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. -- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. +- [hdfs_create_new_file_on_insert](/docs/en/operations/settings/settings.md#hdfs_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default. - [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default. - [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs. diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 1bd9f38517e..181c92b92d4 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -290,7 +290,7 @@ SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date ## Storage Settings {#storage-settings} - [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default. -- [s3_create_multiple_files](/docs/en/operations/settings/settings.md#s3_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. +- [s3_create_new_file_on_insert](/docs/en/operations/settings/settings.md#s3_create_new_file_on_insert) - allows to create a new file on each insert if format has suffix. Disabled by default. - [s3_skip_empty_files](/docs/en/operations/settings/settings.md#s3_skip_empty_files) - allows to skip empty files while reading. Disabled by default. **See Also** From eb2f50e49d45f9418d9490f8c3da1af23006e9cc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 20 Aug 2024 15:28:04 +0000 Subject: [PATCH 100/127] Do not fuzz settings for 02835_drop_user_during_session.sh --- tests/queries/0_stateless/02835_drop_user_during_session.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02835_drop_user_during_session.sh b/tests/queries/0_stateless/02835_drop_user_during_session.sh index c32003a2a11..01e4f9a5c2b 100755 --- a/tests/queries/0_stateless/02835_drop_user_during_session.sh +++ b/tests/queries/0_stateless/02835_drop_user_during_session.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-debug +# Tags: no-debug, no-random-settings, no-random-merge-tree-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 0091f16af9426acbcc696b57f813aeb1222e0687 Mon Sep 17 00:00:00 2001 From: Pablo Marcos Date: Tue, 20 Aug 2024 16:13:16 +0000 Subject: [PATCH 101/127] Fix build without SSL support --- .../NamedCollectionsMetadataStorage.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp index 8a6f3b9f6d5..e9f7816ce73 100644 --- a/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp +++ b/src/Common/NamedCollections/NamedCollectionsMetadataStorage.cpp @@ -30,6 +30,7 @@ namespace ErrorCodes extern const int INVALID_CONFIG_PARAMETER; extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; + extern const int SUPPORT_IS_DISABLED; } static const std::string named_collections_storage_config_path = "named_collections_storage"; @@ -361,6 +362,8 @@ private: } }; +#if USE_SSL + template class NamedCollectionsMetadataStorageEncrypted : public BaseMetadataStorage { @@ -444,6 +447,8 @@ class NamedCollectionsMetadataStorage::ZooKeeperStorageEncrypted : public NamedC using NamedCollectionsMetadataStorageEncrypted::NamedCollectionsMetadataStorageEncrypted; }; +#endif + NamedCollectionsMetadataStorage::NamedCollectionsMetadataStorage( std::shared_ptr storage_, ContextPtr context_) @@ -618,7 +623,13 @@ std::unique_ptr NamedCollectionsMetadataStorage if (storage_type == "local") local_storage = std::make_unique(context_, path); else if (storage_type == "local_encrypted") + { +#if USE_SSL local_storage = std::make_unique(context_, path); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Named collections encryption requires building with SSL support"); +#endif + } return std::unique_ptr( new NamedCollectionsMetadataStorage(std::move(local_storage), context_)); @@ -631,7 +642,13 @@ std::unique_ptr NamedCollectionsMetadataStorage if (!storage_type.ends_with("_encrypted")) zk_storage = std::make_unique(context_, path); else + { +#if USE_SSL zk_storage = std::make_unique(context_, path); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Named collections encryption requires building with SSL support"); +#endif + } LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"), "Using zookeeper storage for named collections at path: {}", path); From 4d0fc70227e4b850953630508f65b06cbafc62f0 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Thu, 8 Aug 2024 10:07:05 +0200 Subject: [PATCH 102/127] Fix keyed hash issue with empty array/map input --- src/Functions/FunctionsHashing.h | 5 ++--- tests/queries/0_stateless/02534_keyed_siphash.reference | 5 +++++ tests/queries/0_stateless/02534_keyed_siphash.sql | 6 ++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 95c54ac9528..3dd63b64346 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -96,9 +96,8 @@ namespace impl { const auto *const begin = offsets->begin(); const auto * upper = std::upper_bound(begin, offsets->end(), i); - if (upper == offsets->end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "offset {} not found in function SipHashKeyColumns::getKey", i); - i = upper - begin; + if (upper != offsets->end()) + i = upper - begin; } const auto & key0data = assert_cast(*key0).getData(); const auto & key1data = assert_cast(*key1).getData(); diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index 3f478218ff1..22644ca1291 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -239,3 +239,8 @@ Check bug found fuzzing Check bug 2 found fuzzing 608E1FF030C9E206185B112C2A25F1A7 ABB65AE97711A2E053E324ED88B1D08B +Check bug 3 found fuzzing +4761183170873013810 +0AD04BFD000000000000000000000000 +4761183170873013810 +0AD04BFD000000000000000000000000 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index fb707109c83..a595a97592a 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -345,3 +345,9 @@ CREATE TABLE sipHashKeyed_keys (`a` Map(String, String)) ENGINE = Memory; INSERT INTO sipHashKeyed_keys FORMAT VALUES ({'a':'b', 'c':'d'}), ({'e':'f', 'g':'h'}); SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FROM sipHashKeyed_keys ORDER BY a; DROP TABLE sipHashKeyed_keys; + +SELECT 'Check bug 3 found fuzzing'; +SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []::Array(UInt8)); +SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), []::Array(UInt8))); +SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])); +SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []))); From ef2912b0f7d225a6c390ddbc77b7a30b994fe1a8 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Fri, 9 Aug 2024 18:09:52 +0200 Subject: [PATCH 103/127] Fix old and wrong assert --- src/Functions/FunctionsHashing.h | 5 +++-- tests/queries/0_stateless/02534_keyed_siphash.reference | 2 ++ tests/queries/0_stateless/02534_keyed_siphash.sql | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 3dd63b64346..c01ad5cf7e0 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -83,8 +83,7 @@ namespace impl { assert(key0 && key1); assert(key0->size() == key1->size()); - assert(offsets == nullptr || offsets->size() == key0->size()); - if (offsets != nullptr) + if (offsets != nullptr && !offsets->empty()) return offsets->back(); return key0->size(); } @@ -92,6 +91,8 @@ namespace impl { if (is_const) i = 0; + assert(key0->size() == key1->size()); + assert(key0->size() > i); if (offsets != nullptr) { const auto *const begin = offsets->begin(); diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index 22644ca1291..b65992a5ce3 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -244,3 +244,5 @@ Check bug 3 found fuzzing 0AD04BFD000000000000000000000000 4761183170873013810 0AD04BFD000000000000000000000000 +16734549324845627102 +D675BB3D687973A238AB891DD99C7047 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index a595a97592a..4e6b32966d4 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -351,3 +351,5 @@ SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []::Array(UInt8)); SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), []::Array(UInt8))); SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])); SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []))); +SELECT sipHash64Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3)); +SELECT hex(sipHash128Keyed((0::UInt64, 0::UInt64), map([0], 1, [2], 3))); From 6b7a25746af94e1dbb85de65304e47e041d4b006 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Tue, 13 Aug 2024 18:05:09 +0200 Subject: [PATCH 104/127] Improve test description --- tests/queries/0_stateless/02534_keyed_siphash.reference | 4 ++-- tests/queries/0_stateless/02534_keyed_siphash.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index b65992a5ce3..31c0cae8981 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -236,10 +236,10 @@ Check asan bug 0 Check bug found fuzzing 9042C6691B1A75F0EA3314B6F55728BB -Check bug 2 found fuzzing +Test arrays and maps 608E1FF030C9E206185B112C2A25F1A7 ABB65AE97711A2E053E324ED88B1D08B -Check bug 3 found fuzzing +Test emtpy arrays and maps 4761183170873013810 0AD04BFD000000000000000000000000 4761183170873013810 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index 4e6b32966d4..b96233200a8 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -339,14 +339,14 @@ SELECT 'Check bug found fuzzing'; SELECT [(255, 1048575)], sipHash128ReferenceKeyed((toUInt64(2147483646), toUInt64(9223372036854775807)), ([(NULL, 100), (NULL, NULL), (1024, 10)], toUInt64(2), toUInt64(1024)), ''), hex(sipHash128ReferenceKeyed((-9223372036854775807, 1.), '-1', NULL)), ('', toUInt64(65535), [(9223372036854775807, 9223372036854775806)], toUInt64(65536)), arrayJoin((NULL, 65537, 255), [(NULL, NULL)]) GROUP BY tupleElement((NULL, NULL, NULL, -1), toUInt64(2), 2) = NULL; -- { serverError NOT_IMPLEMENTED } SELECT hex(sipHash128ReferenceKeyed((0::UInt64, 0::UInt64), ([1, 1]))); -SELECT 'Check bug 2 found fuzzing'; +SELECT 'Test arrays and maps'; DROP TABLE IF EXISTS sipHashKeyed_keys; CREATE TABLE sipHashKeyed_keys (`a` Map(String, String)) ENGINE = Memory; INSERT INTO sipHashKeyed_keys FORMAT VALUES ({'a':'b', 'c':'d'}), ({'e':'f', 'g':'h'}); SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FROM sipHashKeyed_keys ORDER BY a; DROP TABLE sipHashKeyed_keys; -SELECT 'Check bug 3 found fuzzing'; +SELECT 'Test emtpy arrays and maps'; SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []::Array(UInt8)); SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), []::Array(UInt8))); SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])); From 027f913a139b39023633dbe038e4ee58d3141950 Mon Sep 17 00:00:00 2001 From: Salvatore Mesoraca Date: Wed, 14 Aug 2024 11:17:04 +0200 Subject: [PATCH 105/127] Do not check i before it gets its final value --- src/Functions/FunctionsHashing.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index c01ad5cf7e0..8d1c41f4c5f 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -92,7 +92,6 @@ namespace impl if (is_const) i = 0; assert(key0->size() == key1->size()); - assert(key0->size() > i); if (offsets != nullptr) { const auto *const begin = offsets->begin(); @@ -102,6 +101,7 @@ namespace impl } const auto & key0data = assert_cast(*key0).getData(); const auto & key1data = assert_cast(*key1).getData(); + assert(key0->size() > i); return {key0data[i], key1data[i]}; } }; From 4c6f30a70df20bb53625aceb1eb5256664f99080 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 20 Aug 2024 17:46:29 +0000 Subject: [PATCH 106/127] Cosmetics --- src/Functions/FunctionsHashing.h | 50 +++++++++++-------- .../0_stateless/02534_keyed_siphash.sql | 12 ++--- .../02552_siphash128_reference.sql | 4 +- 3 files changed, 36 insertions(+), 30 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 8d1c41f4c5f..0cf4246fd66 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -77,7 +77,7 @@ namespace impl ColumnPtr key0; ColumnPtr key1; bool is_const; - const ColumnArray::Offsets * offsets{}; + const ColumnArray::Offsets * offsets = nullptr; size_t size() const { @@ -87,6 +87,7 @@ namespace impl return offsets->back(); return key0->size(); } + SipHashKey getKey(size_t i) const { if (is_const) @@ -94,7 +95,7 @@ namespace impl assert(key0->size() == key1->size()); if (offsets != nullptr) { - const auto *const begin = offsets->begin(); + const auto * const begin = offsets->begin(); const auto * upper = std::upper_bound(begin, offsets->end(), i); if (upper != offsets->end()) i = upper - begin; @@ -108,33 +109,38 @@ namespace impl static SipHashKeyColumns parseSipHashKeyColumns(const ColumnWithTypeAndName & key) { - const ColumnTuple * tuple = nullptr; - const auto * column = key.column.get(); - bool is_const = false; - if (isColumnConst(*column)) + const auto * col_key = key.column.get(); + + bool is_const; + const ColumnTuple * col_key_tuple; + if (isColumnConst(*col_key)) { is_const = true; - tuple = checkAndGetColumnConstData(column); + col_key_tuple = checkAndGetColumnConstData(col_key); } else - tuple = checkAndGetColumn(column); - if (!tuple) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple"); - if (tuple->tupleSize() != 2) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64"); + { + is_const = false; + col_key_tuple = checkAndGetColumn(col_key); + } - SipHashKeyColumns ret{tuple->getColumnPtr(0), tuple->getColumnPtr(1), is_const}; - assert(ret.key0); - if (!checkColumn(*ret.key0)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64"); - assert(ret.key1); - if (!checkColumn(*ret.key1)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64"); + if (!col_key_tuple || col_key_tuple->tupleSize() != 2) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The key must be of type Tuple(UInt64, UInt64)"); - if (ret.size() == 1) - ret.is_const = true; + SipHashKeyColumns result{.key0 = col_key_tuple->getColumnPtr(0), .key1 = col_key_tuple->getColumnPtr(1), .is_const = is_const}; - return ret; + assert(result.key0); + assert(result.key1); + + if (!checkColumn(*result.key0)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 1st element of the key tuple is not of type UInt64"); + if (!checkColumn(*result.key1)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 2nd element of the key tuple is not of type UInt64"); + + if (result.size() == 1) + result.is_const = true; + + return result; } } diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index b96233200a8..b499d8ef02b 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -263,10 +263,10 @@ select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); -select sipHash64Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash128Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash64Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash128Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } +select sipHash64Keyed((0, 0), '1'); -- { serverError BAD_ARGUMENTS } +select sipHash128Keyed((0, 0), '1'); -- { serverError BAD_ARGUMENTS } +select sipHash64Keyed(toUInt64(0), '1'); -- { serverError BAD_ARGUMENTS } +select sipHash128Keyed(toUInt64(0), '1'); -- { serverError BAD_ARGUMENTS } select hex(sipHash64()); SELECT hex(sipHash128()); @@ -347,8 +347,8 @@ SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FRO DROP TABLE sipHashKeyed_keys; SELECT 'Test emtpy arrays and maps'; -SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []::Array(UInt8)); -SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), []::Array(UInt8))); +SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []); +SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), [])); SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])); SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []))); SELECT sipHash64Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3)); diff --git a/tests/queries/0_stateless/02552_siphash128_reference.sql b/tests/queries/0_stateless/02552_siphash128_reference.sql index f7324ed0ee4..46f292d667d 100644 --- a/tests/queries/0_stateless/02552_siphash128_reference.sql +++ b/tests/queries/0_stateless/02552_siphash128_reference.sql @@ -200,8 +200,8 @@ select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); -select sipHash128ReferenceKeyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash128ReferenceKeyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } +select sipHash128ReferenceKeyed((0, 0), '1'); -- { serverError BAD_ARGUMENTS } +select sipHash128ReferenceKeyed(toUInt64(0), '1'); -- { serverError BAD_ARGUMENTS } SELECT hex(sipHash128Reference()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000'; SELECT hex(sipHash128ReferenceKeyed()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128Keyed()) = '1CE422FEE7BD8DE20000000000000000'; From 4b08ae5f650313d286d416c68d92c240677ceb0f Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 20 Aug 2024 21:51:48 +0200 Subject: [PATCH 107/127] Restart CI --- tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 b/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 index 263e92be403..dd60a31f771 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 +++ b/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 @@ -2,6 +2,7 @@ set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; set allow_experimental_dynamic_type = 1; + drop table if exists test; {% for engine in ['MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000', From 9fd9f649db6bd3149cda217a200cfee3d7bc8238 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 20 Aug 2024 21:53:42 +0200 Subject: [PATCH 108/127] Add comment in IDataType.cpp --- src/DataTypes/IDataType.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index d14461a4ff1..1a274c7f993 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -150,6 +150,7 @@ std::unique_ptr IDataType::getSubcolumnData( ISerialization::EnumerateStreamsSettings settings; settings.position_independent_encoding = false; + /// Don't enumerate dynamic subcolumns, they are handled separately. settings.enumerate_dynamic_streams = false; data.serialization->enumerateStreams(settings, callback_with_data, data); From 5d280053c2512332bc0c171e57dbc36cb3e0b675 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 20 Aug 2024 21:55:10 +0200 Subject: [PATCH 109/127] Add comment in ISerialization.h --- src/DataTypes/Serializations/ISerialization.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index d19cb0dd365..33575a07177 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -241,6 +241,9 @@ public: { SubstreamPath path; bool position_independent_encoding = true; + /// If set to false, don't enumerate dynamic subcolumns + /// (such as dynamic types in Dynamic column or dynamic paths in JSON column). + /// It may be needed when dynamic subcolumns are processed separately. bool enumerate_dynamic_streams = true; }; From 7fe98ac30224d033110faa90b312f4badc25ca32 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Tue, 20 Aug 2024 22:20:26 +0000 Subject: [PATCH 110/127] Fix stress_tests.lib potentially mistaking its own search string for a crash message --- tests/docker_scripts/stress_tests.lib | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/docker_scripts/stress_tests.lib b/tests/docker_scripts/stress_tests.lib index e2b5d983fcb..4f3e6eeb2f4 100644 --- a/tests/docker_scripts/stress_tests.lib +++ b/tests/docker_scripts/stress_tests.lib @@ -273,7 +273,7 @@ function check_logs_for_critical_errors() [ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt # Crash - rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ + rg -Fa "###################""#####################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ && echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \ || echo -e "Not crashed$OK" >> /test_output/test_results.tsv @@ -285,7 +285,7 @@ function check_logs_for_critical_errors() # Remove file fatal_messages.txt if it's empty [ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt - rg -Faz "########################################" /test_output/* > /dev/null \ + rg -Faz "####################""####################" /test_output/* > /dev/null \ && echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv function get_gdb_log_context() From 3c7e2389d1207582a81ac28f66bd28ed9329c489 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 21 Aug 2024 09:14:50 +0800 Subject: [PATCH 111/127] Fix code style --- .../test_incorrect_datetime_format/test.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_incorrect_datetime_format/test.py b/tests/integration/test_incorrect_datetime_format/test.py index c1803aa95a1..56c061f3830 100644 --- a/tests/integration/test_incorrect_datetime_format/test.py +++ b/tests/integration/test_incorrect_datetime_format/test.py @@ -2,6 +2,7 @@ import logging import pytest from helpers.cluster import ClickHouseCluster + @pytest.fixture(scope="module") def cluster(): try: @@ -10,7 +11,7 @@ def cluster(): "node", main_configs=[ "configs/config.d/cluster.xml", - ] + ], ) logging.info("Starting cluster...") cluster.start() @@ -28,7 +29,8 @@ def test_incorrect_datetime_format(cluster): node = cluster.instances["node"] - node.query(""" + node.query( + """ CREATE TABLE tab ( a DateTime, @@ -40,10 +42,12 @@ def test_incorrect_datetime_format(cluster): res = node.query("SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:09'").strip() assert res == "0" - error = node.query_and_get_error("SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:0'").strip() - print(error) + error = node.query_and_get_error( + "SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:0'" + ).strip() assert "Cannot parse time component of DateTime 09:58:0" in error - error = node.query_and_get_error("SELECT count(*) FROM tab WHERE a = '2024-08-0 09:58:09'").strip() - print(error) + error = node.query_and_get_error( + "SELECT count(*) FROM tab WHERE a = '2024-08-0 09:58:09'" + ).strip() assert "Cannot convert string '2024-08-0 09:58:09' to type DateTime" in error From 471320dc6f8781e43e5d507086a72a6552caf531 Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 21 Aug 2024 14:15:44 +0800 Subject: [PATCH 112/127] Fix flaky test error --- .../test_incorrect_datetime_format/test.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/integration/test_incorrect_datetime_format/test.py b/tests/integration/test_incorrect_datetime_format/test.py index 56c061f3830..3cdc6781534 100644 --- a/tests/integration/test_incorrect_datetime_format/test.py +++ b/tests/integration/test_incorrect_datetime_format/test.py @@ -17,6 +17,17 @@ def cluster(): cluster.start() logging.info("Cluster started") + node = cluster.instances["node"] + node.query( + """ + CREATE TABLE tab + ( + a DateTime, + pk String + ) Engine = MergeTree() ORDER BY pk; + """ + ) + yield cluster finally: cluster.shutdown() @@ -29,16 +40,6 @@ def test_incorrect_datetime_format(cluster): node = cluster.instances["node"] - node.query( - """ - CREATE TABLE tab - ( - a DateTime, - pk String - ) Engine = MergeTree() ORDER BY pk; - """ - ) - res = node.query("SELECT count(*) FROM tab WHERE a = '2024-08-06 09:58:09'").strip() assert res == "0" From 47a245cb65e6550769f6a3621a8902b43bfd9160 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Wed, 21 Aug 2024 12:11:47 +0200 Subject: [PATCH 113/127] Remove wrong release version --- docs/changelogs/v23.8.16.16-lts.md | 38 ---------------------------- utils/list-versions/version_date.tsv | 1 - 2 files changed, 39 deletions(-) delete mode 100644 docs/changelogs/v23.8.16.16-lts.md diff --git a/docs/changelogs/v23.8.16.16-lts.md b/docs/changelogs/v23.8.16.16-lts.md deleted file mode 100644 index 9532db4fb0a..00000000000 --- a/docs/changelogs/v23.8.16.16-lts.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -sidebar_position: 1 -sidebar_label: 2024 ---- - -# 2024 Changelog - -### ClickHouse release v23.8.16.16-lts (b80cac57ead) FIXME as compared to v23.8.15.35-lts (060ff8e813a) - -#### Improvement -* Backported in [#66962](https://github.com/ClickHouse/ClickHouse/issues/66962): Added support for parameterized view with analyzer to not analyze create parameterized view. Refactor existing parameterized view logic to not analyze create parameterized view. [#54211](https://github.com/ClickHouse/ClickHouse/pull/54211) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Backported in [#65461](https://github.com/ClickHouse/ClickHouse/issues/65461): Reload certificate chain during certificate reload. [#61671](https://github.com/ClickHouse/ClickHouse/pull/61671) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)). -* Backported in [#65880](https://github.com/ClickHouse/ClickHouse/issues/65880): Always start Keeper with sufficient amount of threads in global thread pool. [#64444](https://github.com/ClickHouse/ClickHouse/pull/64444) ([Duc Canh Le](https://github.com/canhld94)). -* Backported in [#65912](https://github.com/ClickHouse/ClickHouse/issues/65912): Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)). - -#### Bug Fix (user-visible misbehavior in an official stable release) -* Backported in [#65351](https://github.com/ClickHouse/ClickHouse/issues/65351): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)). -* Backported in [#66037](https://github.com/ClickHouse/ClickHouse/issues/66037): Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)). -* Backported in [#65281](https://github.com/ClickHouse/ClickHouse/issues/65281): Fix crash with UniqInjectiveFunctionsEliminationPass and uniqCombined. [#65188](https://github.com/ClickHouse/ClickHouse/pull/65188) ([Raúl Marín](https://github.com/Algunenano)). -* Backported in [#65368](https://github.com/ClickHouse/ClickHouse/issues/65368): Fix a bug in ClickHouse Keeper that causes digest mismatch during closing session. [#65198](https://github.com/ClickHouse/ClickHouse/pull/65198) ([Aleksei Filatov](https://github.com/aalexfvk)). -* Backported in [#65782](https://github.com/ClickHouse/ClickHouse/issues/65782): Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)). -* Backported in [#65743](https://github.com/ClickHouse/ClickHouse/issues/65743): Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)). -* Backported in [#65926](https://github.com/ClickHouse/ClickHouse/issues/65926): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). -* Backported in [#65822](https://github.com/ClickHouse/ClickHouse/issues/65822): Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)). -* Backported in [#66322](https://github.com/ClickHouse/ClickHouse/issues/66322): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)). -* Backported in [#66449](https://github.com/ClickHouse/ClickHouse/issues/66449): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Backported in [#66717](https://github.com/ClickHouse/ClickHouse/issues/66717): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)). -* Backported in [#67320](https://github.com/ClickHouse/ClickHouse/issues/67320): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)). - -#### NOT FOR CHANGELOG / INSIGNIFICANT - -* Backported in [#65080](https://github.com/ClickHouse/ClickHouse/issues/65080): Follow up to [#56541](https://github.com/ClickHouse/ClickHouse/issues/56541). [#57141](https://github.com/ClickHouse/ClickHouse/pull/57141) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Backported in [#64997](https://github.com/ClickHouse/ClickHouse/issues/64997): Fix crash with DISTINCT and window functions. [#64767](https://github.com/ClickHouse/ClickHouse/pull/64767) ([Igor Nikonov](https://github.com/devcrafter)). -* Backported in [#65913](https://github.com/ClickHouse/ClickHouse/issues/65913): Fix bug with session closing in Keeper. [#65735](https://github.com/ClickHouse/ClickHouse/pull/65735) ([Antonio Andelic](https://github.com/antonio2368)). -* Backported in [#66853](https://github.com/ClickHouse/ClickHouse/issues/66853): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)). -* Backported in [#67072](https://github.com/ClickHouse/ClickHouse/issues/67072): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)). -* Update version after release. [#67691](https://github.com/ClickHouse/ClickHouse/pull/67691) ([robot-clickhouse](https://github.com/robot-clickhouse)). - diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 776a53ec01c..95ef8c0de90 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -56,7 +56,6 @@ v23.9.3.12-stable 2023-10-31 v23.9.2.56-stable 2023-10-19 v23.9.1.1854-stable 2023-09-29 v23.8.16.40-lts 2024-08-02 -v23.8.16.16-lts 2024-08-20 v23.8.15.35-lts 2024-06-14 v23.8.14.6-lts 2024-05-02 v23.8.13.25-lts 2024-04-26 From 8bf103e82f62b1484801fd95f7f73a42a33f7fef Mon Sep 17 00:00:00 2001 From: maxvostrikov Date: Wed, 21 Aug 2024 12:49:33 +0200 Subject: [PATCH 114/127] squash! materialized_view_deduplication performance comparison test performance comparison test to check deduplication in MATERIALIZED VIEW's. Logic is similar to, but with a bigger insert tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql --- tests/performance/materialized_view_deduplication.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/performance/materialized_view_deduplication.xml b/tests/performance/materialized_view_deduplication.xml index 621effd23dd..e5e0e5fc6e4 100644 --- a/tests/performance/materialized_view_deduplication.xml +++ b/tests/performance/materialized_view_deduplication.xml @@ -2,7 +2,6 @@ 1 - CREATE TABLE dst (`key` Int64, `value` String) ENGINE = MergeTree ORDER BY tuple() From c8bc7a124046ed4789ffdd6ce953f7764f88df09 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 21 Aug 2024 14:52:11 +0200 Subject: [PATCH 115/127] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bd9b8f66ad0..17b6dcd2ac1 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ Every month we get together with the community (users, contributors, customers, Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. -* [ClickHouse Guangzho User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25 +* [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25 ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" From ccff8cef80394934e1567fd0c669b8f67fe7b660 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 21 Aug 2024 14:27:49 +0000 Subject: [PATCH 116/127] Update version_date.tsv and changelogs after v24.8.1.2684-lts --- SECURITY.md | 16 +- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v24.8.1.2684-lts.md | 525 +++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 6 files changed, 532 insertions(+), 16 deletions(-) create mode 100644 docs/changelogs/v24.8.1.2684-lts.md diff --git a/SECURITY.md b/SECURITY.md index 8930dc96f8a..93c48f1d9ba 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -14,25 +14,15 @@ The following versions of ClickHouse server are currently supported with securit | Version | Supported | |:-|:-| +| 24.8 | ✔️ | | 24.7 | ✔️ | | 24.6 | ✔️ | -| 24.5 | ✔️ | +| 24.5 | ❌ | | 24.4 | ❌ | | 24.3 | ✔️ | | 24.2 | ❌ | | 24.1 | ❌ | -| 23.12 | ❌ | -| 23.11 | ❌ | -| 23.10 | ❌ | -| 23.9 | ❌ | -| 23.8 | ✔️ | -| 23.7 | ❌ | -| 23.6 | ❌ | -| 23.5 | ❌ | -| 23.4 | ❌ | -| 23.3 | ❌ | -| 23.2 | ❌ | -| 23.1 | ❌ | +| 23.* | ❌ | | 22.* | ❌ | | 21.* | ❌ | | 20.* | ❌ | diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index a44664259fb..fc93cee5bbc 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.7.3.42" +ARG VERSION="24.8.1.2684" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 2565828c846..3ceaf2a08b4 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.7.3.42" +ARG VERSION="24.8.1.2684" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 5ac8a58afea..76db997821c 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.7.3.42" +ARG VERSION="24.8.1.2684" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" #docker-official-library:off diff --git a/docs/changelogs/v24.8.1.2684-lts.md b/docs/changelogs/v24.8.1.2684-lts.md new file mode 100644 index 00000000000..8171bb3d719 --- /dev/null +++ b/docs/changelogs/v24.8.1.2684-lts.md @@ -0,0 +1,525 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.8.1.2684-lts (161c62fd295) FIXME as compared to v24.8.1.1-new (3f8b27d7acc) + +#### Backward Incompatible Change +* `clickhouse-client` and `clickhouse-local` now default to multi-query mode (instead single-query mode). As an example, `clickhouse-client -q "SELECT 1; SELECT 2"` now works, whereas users previously had to add `--multiquery` (or `-n`). The `--multiquery/-n` switch became obsolete. INSERT queries in multi-query statements are treated specially based on their FORMAT clause: If the FORMAT is `VALUES` (the most common case), the end of the INSERT statement is represented by a trailing semicolon `;` at the end of the query. For all other FORMATs (e.g. `CSV` or `JSONEachRow`), the end of the INSERT statement is represented by two newlines `\n\n` at the end of the query. [#63898](https://github.com/ClickHouse/ClickHouse/pull/63898) ([FFish](https://github.com/wxybear)). +* In previous versions, it was possible to use an alternative syntax for `LowCardinality` data types by appending `WithDictionary` to the name of the data type. It was an initial working implementation, and it was never documented or exposed to the public. Now, it is deprecated. If you have used this syntax, you have to ALTER your tables and rename the data types to `LowCardinality`. [#66842](https://github.com/ClickHouse/ClickHouse/pull/66842) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix logical errors with storage `Buffer` used with distributed destination table. It's a backward incompatible change: queries using `Buffer` with a distributed destination table may stop working if the table appears more than once in the query (e.g., in a self-join). [#67015](https://github.com/ClickHouse/ClickHouse/pull/67015) ([vdimir](https://github.com/vdimir)). +* In previous versions, calling functions for random distributions based on the Gamma function (such as Chi-Squared, Student, Fisher) with negative arguments close to zero led to a long computation or an infinite loop. In the new version, calling these functions with zero or negative arguments will produce an exception. This closes [#67297](https://github.com/ClickHouse/ClickHouse/issues/67297). [#67326](https://github.com/ClickHouse/ClickHouse/pull/67326) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The system table `text_log` is enabled by default. This is fully compatible with previous versions, but you may notice subtly increased disk usage on the local disk (this system table takes a tiny amount of disk space). [#67428](https://github.com/ClickHouse/ClickHouse/pull/67428) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* In previous versions, `arrayWithConstant` can be slow if asked to generate very large arrays. In the new version, it is limited to 1 GB per array. This closes [#32754](https://github.com/ClickHouse/ClickHouse/issues/32754). [#67741](https://github.com/ClickHouse/ClickHouse/pull/67741) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix REPLACE modifier formatting (forbid omitting brackets). [#67774](https://github.com/ClickHouse/ClickHouse/pull/67774) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68349](https://github.com/ClickHouse/ClickHouse/issues/68349): Reimplement Dynamic type. Now when the limit of dynamic data types is reached new types are not casted to String but stored in a special data structure in binary format with binary encoded data type. Now any type ever inserted into Dynamic column can be read from it as subcolumn. [#68132](https://github.com/ClickHouse/ClickHouse/pull/68132) ([Kruglov Pavel](https://github.com/Avogar)). + +#### New Feature +* Add new experimental Kafka storage engine to store offsets in Keeper instead of relying on committing them to Kafka. [#57625](https://github.com/ClickHouse/ClickHouse/pull/57625) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Add new TimeSeries table engine: - by default: ``` CREATE TABLE tbl ENGINE=TimeSeries ``` - or with specifying engines of its internal tables:. [#64183](https://github.com/ClickHouse/ClickHouse/pull/64183) ([Vitaly Baranov](https://github.com/vitlibar)). +* Support more join strictnesses (`LEFT/RIGHT SEMI/ANTI/ANY JOIN`) with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y (see setting `allow_experimental_join_condition`). [#64281](https://github.com/ClickHouse/ClickHouse/pull/64281) ([lgbo](https://github.com/lgbo-ustc)). +* Add `_etag` virtual column for S3 table engine. Fixes [#65312](https://github.com/ClickHouse/ClickHouse/issues/65312). [#65386](https://github.com/ClickHouse/ClickHouse/pull/65386) ([skyoct](https://github.com/skyoct)). +* This pull request introduces Hive-style partitioning for different engines (`File`, `URL`, `S3`, `AzureBlobStorage`, `HDFS`). Hive-style partitioning organizes data into partitioned sub-directories, making it efficient to query and manage large datasets. Currently, it only creates virtual columns with the appropriate name and data. The follow-up PR will introduce the appropriate data filtering (performance speedup). [#65997](https://github.com/ClickHouse/ClickHouse/pull/65997) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add function printf for spark compatiability. [#66257](https://github.com/ClickHouse/ClickHouse/pull/66257) ([李扬](https://github.com/taiyang-li)). +* Backported in [#68450](https://github.com/ClickHouse/ClickHouse/issues/68450): Implement new JSON data type. [#66444](https://github.com/ClickHouse/ClickHouse/pull/66444) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a new server setting: disable_insertion_and_mutation Set it to true. This node will deny all insertions and mutations(Alter table delete/update/drop partition). Include async insertion. [#66519](https://github.com/ClickHouse/ClickHouse/pull/66519) ([Xu Jia](https://github.com/XuJia0210)). +* Add options `restore_replace_external_engines_to_null` and `restore_replace_external_table_functions_to_null` to replace external engines and table_engines to Null engine that can be useful for testing. It should work for RESTORE and explicit table creation. [#66536](https://github.com/ClickHouse/ClickHouse/pull/66536) ([Ilya Yatsishin](https://github.com/qoega)). +* Added support for reading MULTILINESTRING geometry in WKT format using function readWKTLineString. [#67647](https://github.com/ClickHouse/ClickHouse/pull/67647) ([Jacob Reckhard](https://github.com/jacobrec)). +* Add a new table function `fuzzQuery`. This function allows the modification of a given query string with random variations. Example: `SELECT query FROM fuzzQuery('SELECT 1') LIMIT 5;`. [#67655](https://github.com/ClickHouse/ClickHouse/pull/67655) ([pufit](https://github.com/pufit)). +* Support query `DROP DETACHED PARTITION ALL` to drop all detached partitions. [#67885](https://github.com/ClickHouse/ClickHouse/pull/67885) ([Duc Canh Le](https://github.com/canhld94)). +* Added a tagging (namespace) mechanism for the query cache. The same queries with different tags are considered different by the query cache. Example: `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'abc'` and `SELECT 1 SETTINGS use_query_cache = 1, query_cache_tag = 'def'` now create different query cache entries. [#68235](https://github.com/ClickHouse/ClickHouse/pull/68235) ([sakulali](https://github.com/sakulali)). + +#### Performance Improvement +* Use adaptive read task size calculation method (adaptive meaning it depends on read column sizes) for parallel replicas. [#60377](https://github.com/ClickHouse/ClickHouse/pull/60377) ([Nikita Taranov](https://github.com/nickitat)). +* Store the `plain_rewritable` disk directory metadata in `__meta` layout, separately from the merge tree data in the object storage. Move the `plain_rewritable` disk to a flat directory structure. [#65751](https://github.com/ClickHouse/ClickHouse/pull/65751) ([Julia Kartseva](https://github.com/jkartseva)). +* Enable `compile_expressions` (JIT compiler for fragments of ordinary expressions) by default. This closes [#51264](https://github.com/ClickHouse/ClickHouse/issues/51264) and [#56386](https://github.com/ClickHouse/ClickHouse/issues/56386). [#66486](https://github.com/ClickHouse/ClickHouse/pull/66486) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve columns squashing for String/Array/Map/Variant/Dynamic types by reserving required memory in advance for all subcolumns. [#67043](https://github.com/ClickHouse/ClickHouse/pull/67043) ([Kruglov Pavel](https://github.com/Avogar)). +* Speed up system flush logs, flush logs on shutdown. [#67472](https://github.com/ClickHouse/ClickHouse/pull/67472) ([Sema Checherinda](https://github.com/CheSema)). +* Backported in [#68496](https://github.com/ClickHouse/ClickHouse/issues/68496): Improved overall performance of merges by reducing the overhead of scheduling steps of merges. [#68016](https://github.com/ClickHouse/ClickHouse/pull/68016) ([Anton Popov](https://github.com/CurtizJ)). +* Setting `optimize_functions_to_subcolumns` is enabled by default. [#68053](https://github.com/ClickHouse/ClickHouse/pull/68053) ([Anton Popov](https://github.com/CurtizJ)). + +#### Improvement +* ClickHouse server now supports new setting `max_keep_alive_requests`. For keep-alive HTTP connections to the server it works in tandem with `keep_alive_timeout` - if idle timeout not expired but there already more than `max_keep_alive_requests` requests done through the given connection - it will be closed by the server. [#61793](https://github.com/ClickHouse/ClickHouse/pull/61793) ([Nikita Taranov](https://github.com/nickitat)). +* As in the new version, SOURCES are checked based on Table Engine logic, even grant table engine is disabled by default, if a source is not granted, a prompt of table engine would popup instead, which is misleading. [#65419](https://github.com/ClickHouse/ClickHouse/pull/65419) ([jsc0218](https://github.com/jsc0218)). +* Added statistics type `count_min` (count-min sketches) which provide selectivity estimations for equality predicates like `col = 'val'`. Supported data types are string, date, datatime and numeric types. [#65521](https://github.com/ClickHouse/ClickHouse/pull/65521) ([JackyWoo](https://github.com/JackyWoo)). +* Do not pass logs for keeper explicitly in the image to allow overriding. [#65564](https://github.com/ClickHouse/ClickHouse/pull/65564) ([Azat Khuzhin](https://github.com/azat)). +* Use `Atomic` database by default in `clickhouse-local`. Address items 1 and 5 from [#50647](https://github.com/ClickHouse/ClickHouse/issues/50647). Closes [#44817](https://github.com/ClickHouse/ClickHouse/issues/44817). [#65860](https://github.com/ClickHouse/ClickHouse/pull/65860) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add the `rows_before_aggregation_at_least` statistic to the query response when `rows_before_aggregation` is enabled. This statistic represents the number of rows read before aggregation. In the context of a distributed query, when using the `group by` or `max` aggregation function without a `limit`, `rows_before_aggregation_at_least` can reflect the number of rows hit by the query. [#66084](https://github.com/ClickHouse/ClickHouse/pull/66084) ([morning-color](https://github.com/morning-color)). +* Introduced `use_same_password_for_base_backup` settings for `BACKUP` and `RESTORE` queries, allowing to create and restore incremental backups to/from password protected archives. [#66214](https://github.com/ClickHouse/ClickHouse/pull/66214) ([Samuele](https://github.com/sguerrini97)). +* Ignore async_load_databases for ATTACH query (previously it was possible for ATTACH to return before the tables had been attached). [#66240](https://github.com/ClickHouse/ClickHouse/pull/66240) ([Azat Khuzhin](https://github.com/azat)). +* [Replicated]MergeTreeSink has to properly cancel its delayed_chunk on `onCancel()` method. [#66279](https://github.com/ClickHouse/ClickHouse/pull/66279) ([Sema Checherinda](https://github.com/CheSema)). +* Added logs and metrics for rejected connections (where there are not enough resources). [#66410](https://github.com/ClickHouse/ClickHouse/pull/66410) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Setting `allow_experimental_analyzer` is renamed to `enable_analyzer`. The old name is preserved in a form of an alias. [#66438](https://github.com/ClickHouse/ClickHouse/pull/66438) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Support true UUID type for MongoDB engine. [#66671](https://github.com/ClickHouse/ClickHouse/pull/66671) ([Azat Khuzhin](https://github.com/azat)). +* Added a new `MergeTree` setting `deduplicate_merge_projection_mode` to control the projections during merges (for specific engines) and `OPTIMIZE DEDUPLICATE` query. Supported options: `throw` (throw an exception in case the projection is not fully supported for *MergeTree engine), `drop` (remove projection during merge if it can't be merged itself consistently) and `rebuild` (rebuild projection from scratch, which is a heavy operation). [#66672](https://github.com/ClickHouse/ClickHouse/pull/66672) ([jsc0218](https://github.com/jsc0218)). +* Add replication lag and recovery time metrics. [#66703](https://github.com/ClickHouse/ClickHouse/pull/66703) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Add S3DiskNoKeyErrors metric. [#66704](https://github.com/ClickHouse/ClickHouse/pull/66704) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Ensure COMMENT clause works for all table engines. [#66832](https://github.com/ClickHouse/ClickHouse/pull/66832) ([Joe Lynch](https://github.com/joelynch)). +* Update the usage of error code `BAD_ARGUMENTS` and `ILLEGAL_TYPE_OF_ARGUMENT` by more accurate error codes when appropriate. [#66851](https://github.com/ClickHouse/ClickHouse/pull/66851) ([Yohann Jardin](https://github.com/yohannj)). +* Function `mapFromArrays` now accepts `Map(K, V)` as first argument, for example: `SELECT mapFromArrays(map('a', 4, 'b', 4), ['aa', 'bb'])` now works and returns `{('a',4):'aa',('b',4):'bb'}`. Also, if the 1st argument is an Array, it can now also be of type `Array(Nullable(T))` or `Array(LowCardinality(Nullable(T)))` as long as the actual array values are not `NULL`. [#67103](https://github.com/ClickHouse/ClickHouse/pull/67103) ([李扬](https://github.com/taiyang-li)). +* Read configuration for clickhouse-local from ~/.clickhouse-local. [#67135](https://github.com/ClickHouse/ClickHouse/pull/67135) ([Azat Khuzhin](https://github.com/azat)). +* Rename setting `input_format_orc_read_use_writer_time_zone` to `input_format_orc_reader_timezone` and allow the user to set the reader timezone. [#67175](https://github.com/ClickHouse/ClickHouse/pull/67175) ([kevinyhzou](https://github.com/KevinyhZou)). +* Decrease level of 'Socket is not connected' error when HTTP connection immediately reset by peer after connecting, close [#34218](https://github.com/ClickHouse/ClickHouse/issues/34218). [#67177](https://github.com/ClickHouse/ClickHouse/pull/67177) ([vdimir](https://github.com/vdimir)). +* Speed up tables removal for `DROP DATABASE` query, increased the default value for `database_catalog_drop_table_concurrency` to 16. [#67228](https://github.com/ClickHouse/ClickHouse/pull/67228) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add ability to load dashboards for system.dashboards from config (once set, they overrides the default dashboards preset). [#67232](https://github.com/ClickHouse/ClickHouse/pull/67232) ([Azat Khuzhin](https://github.com/azat)). +* The window functions in SQL are traditionally in snake case. ClickHouse uses camelCase, so new aliases `denseRank()` and `percentRank()` have been created. These new functions can be called the exact same as the original `dense_rank()` and `percent_rank()` functions. Both snake case and camelCase syntaxes remain usable. A new test for each of the functions has been added as well. This closes [#67042](https://github.com/ClickHouse/ClickHouse/issues/67042) . [#67334](https://github.com/ClickHouse/ClickHouse/pull/67334) ([Peter Nguyen](https://github.com/petern48)). +* Autodetect configuration file format if is not .xml, .yml or .yaml. If the file begins with < it might be XML, otherwise it might be YAML. Non regular file just parse as XML such as PIPE: /dev/fd/X. [#67391](https://github.com/ClickHouse/ClickHouse/pull/67391) ([sakulali](https://github.com/sakulali)). +* Functions `formatDateTime` and `formatDateTimeInJodaSyntax` now treat their format parameter as optional. If it is not specified, format strings `%Y-%m-%d %H:%i:%s` and `yyyy-MM-dd HH:mm:ss` are assumed. Example: `SELECT parseDateTime('2021-01-04 23:12:34')` now returns DateTime value `2021-01-04 23:12:34` (previously, this threw an exception). [#67399](https://github.com/ClickHouse/ClickHouse/pull/67399) ([Robert Schulze](https://github.com/rschu1ze)). +* Automatically retry Keeper requests in KeeperMap if they happen because of timeout or connection loss. [#67448](https://github.com/ClickHouse/ClickHouse/pull/67448) ([Antonio Andelic](https://github.com/antonio2368)). +* Rework usage of custom table's disks. [#67684](https://github.com/ClickHouse/ClickHouse/pull/67684) ([Sema Checherinda](https://github.com/CheSema)). +* Various improvements in the advanced dashboard. This closes [#67697](https://github.com/ClickHouse/ClickHouse/issues/67697). This closes [#63407](https://github.com/ClickHouse/ClickHouse/issues/63407). This closes [#51129](https://github.com/ClickHouse/ClickHouse/issues/51129). This closes [#61204](https://github.com/ClickHouse/ClickHouse/issues/61204). [#67701](https://github.com/ClickHouse/ClickHouse/pull/67701) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid allocate too much capacity for array column while writing orc & some minor refactors to make code cleaner. Performance speeds up 15% for array column. [#67879](https://github.com/ClickHouse/ClickHouse/pull/67879) ([李扬](https://github.com/taiyang-li)). +* Support OPTIMIZE query on Join table engine to reduce Join tables memory footprint. [#67883](https://github.com/ClickHouse/ClickHouse/pull/67883) ([Duc Canh Le](https://github.com/canhld94)). +* Add replication lag and recovery time metrics. [#67913](https://github.com/ClickHouse/ClickHouse/pull/67913) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Add '-no-pie' to aarch64 Linux builds to allow proper introspection and symbolizing of stacktraces after a ClickHouse restart. [#67916](https://github.com/ClickHouse/ClickHouse/pull/67916) ([filimonov](https://github.com/filimonov)). +* Backported in [#68481](https://github.com/ClickHouse/ClickHouse/issues/68481): Added profile events for merges and mutations for better introspection. [#68015](https://github.com/ClickHouse/ClickHouse/pull/68015) ([Anton Popov](https://github.com/CurtizJ)). +* Fix settings/current_database in system.processes for async BACKUP/RESTORE. [#68163](https://github.com/ClickHouse/ClickHouse/pull/68163) ([Azat Khuzhin](https://github.com/azat)). +* Remove unnecessary logs for MergeTree that doesn't support replication. [#68238](https://github.com/ClickHouse/ClickHouse/pull/68238) ([Daniil Ivanik](https://github.com/divanik)). +* Backported in [#68430](https://github.com/ClickHouse/ClickHouse/issues/68430): Improve schema inference of date times. Now DateTime64 used only when date time has fractional part, otherwise regular DateTime is used. Inference of Date/DateTime is more strict now, especially when `date_time_input_format='best_effort'` to avoid inferring date times from strings in corner cases. [#68382](https://github.com/ClickHouse/ClickHouse/pull/68382) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)). +* Fixed reading of subcolumns after `ALTER ADD COLUMN` query. [#66243](https://github.com/ClickHouse/ClickHouse/pull/66243) ([Anton Popov](https://github.com/CurtizJ)). +* Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)). +* Fix formatting of query with aliased JOIN ON expression, e.g. `... JOIN t2 ON (x = y) AS e ORDER BY x` should be formatted as `... JOIN t2 ON ((x = y) AS e) ORDER BY x`. [#66312](https://github.com/ClickHouse/ClickHouse/pull/66312) ([vdimir](https://github.com/vdimir)). +* Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible runtime error while converting Array field with nulls to Array(Variant). [#66727](https://github.com/ClickHouse/ClickHouse/pull/66727) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)). +* Fix creating KeeperMap table after an incomplete drop. [#66865](https://github.com/ClickHouse/ClickHouse/pull/66865) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix broken part error while restoring to a `s3_plain_rewritable` disk. [#66881](https://github.com/ClickHouse/ClickHouse/pull/66881) ([Vitaly Baranov](https://github.com/vitlibar)). +* In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)). +* Fix invalid format detection in schema inference that could lead to logical error Format {} doesn't support schema inference. [#66899](https://github.com/ClickHouse/ClickHouse/pull/66899) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible deadlock on query cancel with parallel replicas. [#66905](https://github.com/ClickHouse/ClickHouse/pull/66905) ([Nikita Taranov](https://github.com/nickitat)). +* Forbid create as select even when database_replicated_allow_heavy_create is set. It was unconditionally forbidden in 23.12 and accidentally allowed under the setting in unreleased 24.7. [#66980](https://github.com/ClickHouse/ClickHouse/pull/66980) ([vdimir](https://github.com/vdimir)). +* Reading from the `numbers` could wrongly throw an exception when the `max_rows_to_read` limit was set. This closes [#66992](https://github.com/ClickHouse/ClickHouse/issues/66992). [#66996](https://github.com/ClickHouse/ClickHouse/pull/66996) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add proper type conversion to lagInFrame and leadInFrame window functions - fixes msan test. [#67091](https://github.com/ClickHouse/ClickHouse/pull/67091) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)). +* TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Use a separate client context in `clickhouse-local`. [#67133](https://github.com/ClickHouse/ClickHouse/pull/67133) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Correct behavior of `ORDER BY all` with disabled `enable_order_by_all` and parallel replicas (distributed queries as well). [#67153](https://github.com/ClickHouse/ClickHouse/pull/67153) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix wrong usage of input_format_max_bytes_to_read_for_schema_inference in schema cache. [#67157](https://github.com/ClickHouse/ClickHouse/pull/67157) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix the memory leak for count distinct, when exception issued during group by single nullable key. [#67171](https://github.com/ClickHouse/ClickHouse/pull/67171) ([Jet He](https://github.com/compasses)). +* This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)). +* Fix error `Conversion from AggregateFunction(name, Type) to AggregateFunction(name, Nullable(Type)) is not supported`. The bug was caused by the `optimize_rewrite_aggregate_function_with_if` optimization. Fixes [#67112](https://github.com/ClickHouse/ClickHouse/issues/67112). [#67229](https://github.com/ClickHouse/ClickHouse/pull/67229) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix hung query when using empty tuple as lhs of function IN. [#67295](https://github.com/ClickHouse/ClickHouse/pull/67295) ([Duc Canh Le](https://github.com/canhld94)). +* Fix crash of `uniq` and `uniqTheta ` with `tuple()` argument. Closes [#67303](https://github.com/ClickHouse/ClickHouse/issues/67303). [#67306](https://github.com/ClickHouse/ClickHouse/pull/67306) ([flynn](https://github.com/ucasfl)). +* It was possible to create a very deep nested JSON data that triggered stack overflow while skipping unknown fields. This closes [#67292](https://github.com/ClickHouse/ClickHouse/issues/67292). [#67324](https://github.com/ClickHouse/ClickHouse/pull/67324) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix attaching ReplicatedMergeTree table after exception during startup. [#67360](https://github.com/ClickHouse/ClickHouse/pull/67360) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix segfault caused by incorrectly detaching from thread group in `Aggregator`. [#67385](https://github.com/ClickHouse/ClickHouse/pull/67385) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix one more case when a non-deterministic function is specified in PK. [#67395](https://github.com/ClickHouse/ClickHouse/pull/67395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed `bloom_filter` index breaking queries with mildly weird conditions like `(k=2)=(k=2)` or `has([1,2,3], k)`. [#67423](https://github.com/ClickHouse/ClickHouse/pull/67423) ([Michael Kolupaev](https://github.com/al13n321)). +* Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix wait for tasks in ~WriteBufferFromS3 in case WriteBuffer was cancelled. [#67459](https://github.com/ClickHouse/ClickHouse/pull/67459) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Protect temporary part directories from removing during RESTORE. [#67491](https://github.com/ClickHouse/ClickHouse/pull/67491) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)). +* Fix `Logical error: Expected the argument №N of type T to have X rows, but it has 0`. The error could happen in a remote query with constant expression in `GROUP BY` (with a new analyzer). [#67536](https://github.com/ClickHouse/ClickHouse/pull/67536) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix join on tuple with NULLs: Some queries with the new analyzer and `NULL` inside the tuple in the `JOIN ON` section returned incorrect results. [#67538](https://github.com/ClickHouse/ClickHouse/pull/67538) ([vdimir](https://github.com/vdimir)). +* Fix redundant reschedule of FileCache::freeSpaceRatioKeepingThreadFunc() in case of full non-evictable cache. [#67540](https://github.com/ClickHouse/ClickHouse/pull/67540) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix for function `toStartOfWeek` which returned the wrong result with a small `DateTime64` value. [#67558](https://github.com/ClickHouse/ClickHouse/pull/67558) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix `Logical error: 'file_offset_of_buffer_end <= read_until_position'` in filesystem cache. Closes [#57508](https://github.com/ClickHouse/ClickHouse/issues/57508). [#67623](https://github.com/ClickHouse/ClickHouse/pull/67623) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixes [#62282](https://github.com/ClickHouse/ClickHouse/issues/62282). Removed the call to `convertFieldToString()` and added datatype specific serialization code. Parameterized view substitution was broken for multiple datatypes when parameter value was a function or expression returning datatype instance. [#67654](https://github.com/ClickHouse/ClickHouse/pull/67654) ([Shankar](https://github.com/shiyer7474)). +* Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)). +* Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix possible logical error "Unexpected return type from if" with experimental Variant type and enabled setting `use_variant_as_common_type ` in function if with Tuples and Maps. [#67687](https://github.com/ClickHouse/ClickHouse/pull/67687) ([Kruglov Pavel](https://github.com/Avogar)). +* Due to a bug in Linux Kernel, a query can hung in `TimerDescriptor::drain`. This closes [#37686](https://github.com/ClickHouse/ClickHouse/issues/37686). [#67702](https://github.com/ClickHouse/ClickHouse/pull/67702) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix completion of `RESTORE ON CLUSTER` command. [#67720](https://github.com/ClickHouse/ClickHouse/pull/67720) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix dictionary hang in case of CANNOT_SCHEDULE_TASK while loading. [#67751](https://github.com/ClickHouse/ClickHouse/pull/67751) ([Azat Khuzhin](https://github.com/azat)). +* Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Queries like `SELECT count() FROM t WHERE cast(c = 1 or c = 9999 AS Bool) SETTINGS use_skip_indexes=1` with bloom filter indexes on `c` now work correctly. [#67781](https://github.com/ClickHouse/ClickHouse/pull/67781) ([jsc0218](https://github.com/jsc0218)). +* Fix wrong aggregation result in some queries with aggregation without keys and filter, close [#67419](https://github.com/ClickHouse/ClickHouse/issues/67419). [#67804](https://github.com/ClickHouse/ClickHouse/pull/67804) ([vdimir](https://github.com/vdimir)). +* Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix DateTime64 parsing after constant folding in distributed queries, close [#66773](https://github.com/ClickHouse/ClickHouse/issues/66773). [#67920](https://github.com/ClickHouse/ClickHouse/pull/67920) ([vdimir](https://github.com/vdimir)). +* Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)). +* Now ClickHouse doesn't consider part as broken if projection doesn't exist on disk but exists in `checksums.txt`. [#68003](https://github.com/ClickHouse/ClickHouse/pull/68003) ([alesapin](https://github.com/alesapin)). +* Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)). +* Attempt to fix `Block structure mismatch in AggregatingStep stream: different types` for aggregate projection optimization. [#68107](https://github.com/ClickHouse/ClickHouse/pull/68107) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#68343](https://github.com/ClickHouse/ClickHouse/issues/68343): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#68400](https://github.com/ClickHouse/ClickHouse/issues/68400): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)). + +#### Build/Testing/Packaging Improvement +* Improved `test_storage_s3` tests: increased `s3_max_single_read_retries` for read from "unstable" s3 source and allowed all tests to run multiple times in a row. [#66896](https://github.com/ClickHouse/ClickHouse/pull/66896) ([Ilya Yatsishin](https://github.com/qoega)). +* Integration tests flaky check will not run each test case multiple times to find more issues in tests and make them more reliable. It is using `pytest-repeat` library to run test case multiple times for the same environment. It is important to cleanup tables and other entities in the end of a test case to pass. Repeat works much faster than several pytest runs as it starts necessary containers only once. [#66986](https://github.com/ClickHouse/ClickHouse/pull/66986) ([Ilya Yatsishin](https://github.com/qoega)). +* Allow to use CLion with ClickHouse. In previous versions, CLion freezed for a minute on every keypress. This closes [#66994](https://github.com/ClickHouse/ClickHouse/issues/66994). [#66995](https://github.com/ClickHouse/ClickHouse/pull/66995) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Getauxval: avoid crash under sanitizer re-exec due to high aslr entropy. [#67081](https://github.com/ClickHouse/ClickHouse/pull/67081) ([Raúl Marín](https://github.com/Algunenano)). +* Some parts of client code are extracted to a single file and highest possible level optimization is applied to them even for debug builds. This closes: [#65745](https://github.com/ClickHouse/ClickHouse/issues/65745). [#67215](https://github.com/ClickHouse/ClickHouse/pull/67215) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). + +#### NO CL CATEGORY + +* Backported in [#68416](https://github.com/ClickHouse/ClickHouse/issues/68416):. [#68386](https://github.com/ClickHouse/ClickHouse/pull/68386) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Fix for 992 and friends"'. [#66993](https://github.com/ClickHouse/ClickHouse/pull/66993) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Revert "Fix for 992 and friends""'. [#67029](https://github.com/ClickHouse/ClickHouse/pull/67029) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "FuzzQuery table function"'. [#67040](https://github.com/ClickHouse/ClickHouse/pull/67040) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Enable `compile_expressions` by default."'. [#67299](https://github.com/ClickHouse/ClickHouse/pull/67299) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Slightly better calculation of primary index"'. [#67392](https://github.com/ClickHouse/ClickHouse/pull/67392) ([alesapin](https://github.com/alesapin)). +* NO CL ENTRY: 'Revert "Add settings to replace external engines to Null during create"'. [#67507](https://github.com/ClickHouse/ClickHouse/pull/67507) ([Raúl Marín](https://github.com/Algunenano)). +* NO CL ENTRY: 'Revert "Revert "Add settings to replace external engines to Null during create""'. [#67511](https://github.com/ClickHouse/ClickHouse/pull/67511) ([Ilya Yatsishin](https://github.com/qoega)). +* NO CL ENTRY: 'Revert "Add replication lag and recovery time metrics"'. [#67731](https://github.com/ClickHouse/ClickHouse/pull/67731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Revert "Slightly better calculation of primary index""'. [#67846](https://github.com/ClickHouse/ClickHouse/pull/67846) ([Anton Popov](https://github.com/CurtizJ)). +* NO CL ENTRY: 'Revert "CI: Strict job timeout 1.5h for tests, 2h for builds"'. [#67986](https://github.com/ClickHouse/ClickHouse/pull/67986) ([Max K.](https://github.com/maxknv)). +* NO CL ENTRY: 'Revert "Bump rocksdb from v8.10 to v9.4 + enable jemalloc and liburing"'. [#68014](https://github.com/ClickHouse/ClickHouse/pull/68014) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* NO CL ENTRY: 'Revert "Use `Atomic` database by default in `clickhouse-local`"'. [#68023](https://github.com/ClickHouse/ClickHouse/pull/68023) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Refactor tests for (experimental) statistics"'. [#68156](https://github.com/ClickHouse/ClickHouse/pull/68156) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* CI: enable libfuzzer (fixing build and docker). [#61908](https://github.com/ClickHouse/ClickHouse/pull/61908) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Initial implementation of vector similarity index. [#63675](https://github.com/ClickHouse/ClickHouse/pull/63675) ([Robert Schulze](https://github.com/rschu1ze)). +* Update zlib-ng from 2.0.2 to 2.1.7. [#64489](https://github.com/ClickHouse/ClickHouse/pull/64489) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix 02444_async_broken_outdated_part_loading flakiness. [#64956](https://github.com/ClickHouse/ClickHouse/pull/64956) ([Azat Khuzhin](https://github.com/azat)). +* attach_gdb.lib: print more information before all stacks. [#65253](https://github.com/ClickHouse/ClickHouse/pull/65253) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix some perf tests. [#65320](https://github.com/ClickHouse/ClickHouse/pull/65320) ([Nikita Taranov](https://github.com/nickitat)). +* Remove ActionsDAGPtr whenever it is possible. [#65414](https://github.com/ClickHouse/ClickHouse/pull/65414) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Scheduler queue throughput introspection, Fix CPU indication in client. [#65654](https://github.com/ClickHouse/ClickHouse/pull/65654) ([Sergei Trifonov](https://github.com/serxa)). +* Increase timeout in 02122_join_group_by_timeout for tsan build. [#65976](https://github.com/ClickHouse/ClickHouse/pull/65976) ([vdimir](https://github.com/vdimir)). +* Remove default values for certificateFile/privateKeyFile/dhParamsFile in keeper config (to avoid annoying errors in logs). [#65978](https://github.com/ClickHouse/ClickHouse/pull/65978) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v24.3.5.46-lts. [#66054](https://github.com/ClickHouse/ClickHouse/pull/66054) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix flaky `02814_currentDatabase_for_table_functions`. [#66111](https://github.com/ClickHouse/ClickHouse/pull/66111) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix possible data-race StorageKafka with statistics_interval_ms>0. [#66311](https://github.com/ClickHouse/ClickHouse/pull/66311) ([Azat Khuzhin](https://github.com/azat)). +* Avoid unneeded calculation in SeriesPeriodDetect. [#66320](https://github.com/ClickHouse/ClickHouse/pull/66320) ([Ruihang Xia](https://github.com/waynexia)). +* It aims to complete [#58630](https://github.com/ClickHouse/ClickHouse/issues/58630). This is made possible by [#60463](https://github.com/ClickHouse/ClickHouse/issues/60463), [#61459](https://github.com/ClickHouse/ClickHouse/issues/61459) and [#60082](https://github.com/ClickHouse/ClickHouse/issues/60082). [#66443](https://github.com/ClickHouse/ClickHouse/pull/66443) ([Amos Bird](https://github.com/amosbird)). +* Allow run query instantly in play. [#66457](https://github.com/ClickHouse/ClickHouse/pull/66457) ([Aleksandr Musorin](https://github.com/AVMusorin)). +* Bump ICU from v70 to v75. [#66474](https://github.com/ClickHouse/ClickHouse/pull/66474) ([Robert Schulze](https://github.com/rschu1ze)). +* Bump RocksDB from v6.29.5 to v7.10.2. [#66475](https://github.com/ClickHouse/ClickHouse/pull/66475) ([Robert Schulze](https://github.com/rschu1ze)). +* Bump RocksDB from v7.10.2 to v8.9.1. [#66479](https://github.com/ClickHouse/ClickHouse/pull/66479) ([Robert Schulze](https://github.com/rschu1ze)). +* I believe the error code for this function should not be "NOT_ALLOWED" since it simply is an invalid query and "BAD_QUERY_PARAMETER" is a more reasonable error code for this. [#66491](https://github.com/ClickHouse/ClickHouse/pull/66491) ([Ali](https://github.com/xogoodnow)). +* Update gdb to 15.1 (by compiling from sources). [#66494](https://github.com/ClickHouse/ClickHouse/pull/66494) ([Azat Khuzhin](https://github.com/azat)). +* Ensure that llvm-symbolizer is used for symbolizing sanitizer reports. [#66495](https://github.com/ClickHouse/ClickHouse/pull/66495) ([Azat Khuzhin](https://github.com/azat)). +* Remove unused local variables. [#66503](https://github.com/ClickHouse/ClickHouse/pull/66503) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* This will solve a lot of problems with inconsistent formatting. And it opens the path for [#65753](https://github.com/ClickHouse/ClickHouse/issues/65753). This closes [#66807](https://github.com/ClickHouse/ClickHouse/issues/66807). This closes [#61611](https://github.com/ClickHouse/ClickHouse/issues/61611). This closes [#61711](https://github.com/ClickHouse/ClickHouse/issues/61711). This closes [#67445](https://github.com/ClickHouse/ClickHouse/issues/67445). [#66506](https://github.com/ClickHouse/ClickHouse/pull/66506) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rename Context::getSettings() to Context::getSettingsCopy(). [#66528](https://github.com/ClickHouse/ClickHouse/pull/66528) ([Raúl Marín](https://github.com/Algunenano)). +* Uninteresting change: introducing `ClientApplicationBase`. [#66549](https://github.com/ClickHouse/ClickHouse/pull/66549) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Respond to parallel replicas protocol requests with priority on initiator. [#66618](https://github.com/ClickHouse/ClickHouse/pull/66618) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix bad code: it was catching exceptions. [#66628](https://github.com/ClickHouse/ClickHouse/pull/66628) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Dump all memory stats in CgroupsMemoryUsageObserver on hitting the limit. [#66732](https://github.com/ClickHouse/ClickHouse/pull/66732) ([Nikita Taranov](https://github.com/nickitat)). +* Save writer thread id in shared mutex for debugging. [#66745](https://github.com/ClickHouse/ClickHouse/pull/66745) ([Alexander Gololobov](https://github.com/davenger)). +* Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)). +* Looks like it runs too many mutations sometimes and fails to process them within the timeout. So if a query waits for mutations - the test fails. [#66785](https://github.com/ClickHouse/ClickHouse/pull/66785) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Better diagnostics in functional tests. [#66790](https://github.com/ClickHouse/ClickHouse/pull/66790) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad test `01042_system_reload_dictionary_reloads_completely`. [#66811](https://github.com/ClickHouse/ClickHouse/pull/66811) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Something is strange with the test about refreshable materialized views. [#66816](https://github.com/ClickHouse/ClickHouse/pull/66816) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Randomize `trace_profile_events`. [#66821](https://github.com/ClickHouse/ClickHouse/pull/66821) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Stop ignoring SIGSEGV in GDB. [#66847](https://github.com/ClickHouse/ClickHouse/pull/66847) ([Antonio Andelic](https://github.com/antonio2368)). +* Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)). +* When executing queries with parallel replicas that involve only a subset of nodes within a shard, the current behavior is that if all participating replicas are unavailable, the query completes without any errors but returns no results. Referencing issue [#65467](https://github.com/ClickHouse/ClickHouse/issues/65467), this pull request addresses the issue where only a portion of the nodes in a shard are participating in the execution. [#66880](https://github.com/ClickHouse/ClickHouse/pull/66880) ([zoomxi](https://github.com/zoomxi)). +* Speed up stateful tests setup. [#66886](https://github.com/ClickHouse/ClickHouse/pull/66886) ([Raúl Marín](https://github.com/Algunenano)). +* Functions [h-r]*: Iterate over input_rows_count where appropriate. [#66897](https://github.com/ClickHouse/ClickHouse/pull/66897) ([Robert Schulze](https://github.com/rschu1ze)). +* Stateless tests: Change status of failed tests in case of server crash and add no-parallel to high-load tests. [#66901](https://github.com/ClickHouse/ClickHouse/pull/66901) ([Nikita Fomichev](https://github.com/fm4v)). +* Fix performance test about the generateRandom table function, supposedly. [#66906](https://github.com/ClickHouse/ClickHouse/pull/66906) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad tests `share_big_sets`, CC @davenger. [#66908](https://github.com/ClickHouse/ClickHouse/pull/66908) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up mutations for non-replicated MergeTree a bit. [#66909](https://github.com/ClickHouse/ClickHouse/pull/66909) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up mutations for non-replicated MergeTree significantly. [#66911](https://github.com/ClickHouse/ClickHouse/pull/66911) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix views over distributed tables with Analyzer. [#66912](https://github.com/ClickHouse/ClickHouse/pull/66912) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* [CI fest] Try to fix `test_broken_projections/test.py::test_broken_ignored_replicated`. [#66915](https://github.com/ClickHouse/ClickHouse/pull/66915) ([Andrey Zvonov](https://github.com/zvonand)). +* Decrease rate limit in `01923_network_receive_time_metric_insert`. [#66924](https://github.com/ClickHouse/ClickHouse/pull/66924) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Grouparrayintersect: fix serialization bug. [#66928](https://github.com/ClickHouse/ClickHouse/pull/66928) ([Raúl Marín](https://github.com/Algunenano)). +* Update version after release branch. [#66929](https://github.com/ClickHouse/ClickHouse/pull/66929) ([Raúl Marín](https://github.com/Algunenano)). +* Un-flake test_runtime_configurable_cache_size. [#66934](https://github.com/ClickHouse/ClickHouse/pull/66934) ([Robert Schulze](https://github.com/rschu1ze)). +* fix unit tests ResolvePoolTest with timeouts. [#66953](https://github.com/ClickHouse/ClickHouse/pull/66953) ([Sema Checherinda](https://github.com/CheSema)). +* Split slow test 03036_dynamic_read_subcolumns. [#66954](https://github.com/ClickHouse/ClickHouse/pull/66954) ([Nikita Taranov](https://github.com/nickitat)). +* CI: Fixes docker server build for release branches. [#66955](https://github.com/ClickHouse/ClickHouse/pull/66955) ([Max K.](https://github.com/maxknv)). +* Addressing issue [#64936](https://github.com/ClickHouse/ClickHouse/issues/64936). [#66973](https://github.com/ClickHouse/ClickHouse/pull/66973) ([alesapin](https://github.com/alesapin)). +* Add initial 24.7 changelog. [#66976](https://github.com/ClickHouse/ClickHouse/pull/66976) ([Raúl Marín](https://github.com/Algunenano)). +* Apply libunwind fix. [#66977](https://github.com/ClickHouse/ClickHouse/pull/66977) ([Michael Kolupaev](https://github.com/al13n321)). +* CI: Add logs for debugging. [#66979](https://github.com/ClickHouse/ClickHouse/pull/66979) ([Max K.](https://github.com/maxknv)). +* [CI Fest] Split dynamic tests and rewrite them from sh to sql to avoid timeouts. [#66981](https://github.com/ClickHouse/ClickHouse/pull/66981) ([Kruglov Pavel](https://github.com/Avogar)). +* Split 01508_partition_pruning_long. [#66983](https://github.com/ClickHouse/ClickHouse/pull/66983) ([Nikita Taranov](https://github.com/nickitat)). +* [CI Fest] Fix use-of-uninitialized-value in JSONExtract* numeric functions. [#66984](https://github.com/ClickHouse/ClickHouse/pull/66984) ([Kruglov Pavel](https://github.com/Avogar)). +* It should fix SQLancer checks, but for some reason we stopped invalidating cache for docker builds and fix was not published to our CI for a while. [#66987](https://github.com/ClickHouse/ClickHouse/pull/66987) ([Ilya Yatsishin](https://github.com/qoega)). +* Fixes [#66941](https://github.com/ClickHouse/ClickHouse/issues/66941). [#66991](https://github.com/ClickHouse/ClickHouse/pull/66991) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Remove the support for Kerberized HDFS. [#66998](https://github.com/ClickHouse/ClickHouse/pull/66998) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CI: Fix for workflow results parsing. [#67000](https://github.com/ClickHouse/ClickHouse/pull/67000) ([Max K.](https://github.com/maxknv)). +* Fix flaky `01454_storagememory_data_race_challenge`. [#67003](https://github.com/ClickHouse/ClickHouse/pull/67003) ([Antonio Andelic](https://github.com/antonio2368)). +* CI: Jepsen Workflow fix for skipped builds and observability. [#67004](https://github.com/ClickHouse/ClickHouse/pull/67004) ([Max K.](https://github.com/maxknv)). +* bugfix AttachedTable counting not symmetry, and adding some test logs…. [#67007](https://github.com/ClickHouse/ClickHouse/pull/67007) ([Xu Jia](https://github.com/XuJia0210)). +* CI: Automerge when required and non-required checks completed. [#67008](https://github.com/ClickHouse/ClickHouse/pull/67008) ([Max K.](https://github.com/maxknv)). +* Fix test `very_long_arrays`. [#67009](https://github.com/ClickHouse/ClickHouse/pull/67009) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Try to fix exception logging in destructors of static objects. [#67016](https://github.com/ClickHouse/ClickHouse/pull/67016) ([Antonio Andelic](https://github.com/antonio2368)). +* [Green CI] Fix test test_storage_azure_blob_storage. [#67019](https://github.com/ClickHouse/ClickHouse/pull/67019) ([Daniil Ivanik](https://github.com/divanik)). +* Integration tests: fix flaky tests `test_backup_restore_on_cluster/test_concurrency.py` & `test_manipulate_statistics/test.py`. [#67027](https://github.com/ClickHouse/ClickHouse/pull/67027) ([Nikita Fomichev](https://github.com/fm4v)). +* [Green CI] Fix test test_storage_s3_queue/test.py::test_max_set_age. [#67035](https://github.com/ClickHouse/ClickHouse/pull/67035) ([Pablo Marcos](https://github.com/pamarcos)). +* Test for alter select with parallel replicas. [#67041](https://github.com/ClickHouse/ClickHouse/pull/67041) ([Igor Nikonov](https://github.com/devcrafter)). +* Split query into multiple queries to consume less memory at once + use less data. Fixes [#67034](https://github.com/ClickHouse/ClickHouse/issues/67034). [#67044](https://github.com/ClickHouse/ClickHouse/pull/67044) ([alesapin](https://github.com/alesapin)). +* Disable setting `optimize_functions_to_subcolumns`. [#67046](https://github.com/ClickHouse/ClickHouse/pull/67046) ([Anton Popov](https://github.com/CurtizJ)). +* Increase max allocation size for sanitizers. [#67049](https://github.com/ClickHouse/ClickHouse/pull/67049) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* rewrite 01171 test. [#67054](https://github.com/ClickHouse/ClickHouse/pull/67054) ([Sema Checherinda](https://github.com/CheSema)). +* Add `**` to `hdfs` docs, add test for `**` in `hdfs`. [#67064](https://github.com/ClickHouse/ClickHouse/pull/67064) ([Andrey Zvonov](https://github.com/zvonand)). +* Very sad failure: ``` 2024.07.24 13:28:45.517777 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} executeQuery: (from 172.16.11.1:55890) OPTIMIZE TABLE replicated_mt FINAL (stage: Complete) 2024.07.24 13:28:45.525945 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (ReplicatedMergeTreeQueue): Waiting for 4 entries to be processed: queue-0000000004, queue-0000000002, queue-0000000001, queue-0000000000 2024.07.24 13:29:15.528024 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e) (MergerMutator): Selected 3 parts from all_0_0_0 to all_2_2_0 2024.07.24 13:29:15.530736 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Created log entry /clickhouse/tables/replicated_mt/log/log-0000000004 for merge all_0_2_1 2024.07.24 13:29:15.530873 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Waiting for node1 to process log entry 2024.07.24 13:29:15.530919 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Waiting for node1 to pull log-0000000004 to queue 2024.07.24 13:29:15.534286 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Looking for node corresponding to log-0000000004 in node1 queue 2024.07.24 13:29:15.534793 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} default.replicated_mt (6581a6fb-8458-466d-8350-89951eb1ac8e): Waiting for queue-0000000005 to disappear from node1 queue 2024.07.24 13:29:15.585533 [ 10 ] {08745bf9-4bc1-4946-b9a8-c03d82ec55dc} TCPHandler: Processed in 30.067804125 sec. ```. [#67067](https://github.com/ClickHouse/ClickHouse/pull/67067) ([alesapin](https://github.com/alesapin)). +* Fix flaky `test_seekable_formats_url` and `test_seekable_formats` S3 storage tests. [#67070](https://github.com/ClickHouse/ClickHouse/pull/67070) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* CI: Docker server build fix for new release workflow. [#67075](https://github.com/ClickHouse/ClickHouse/pull/67075) ([Max K.](https://github.com/maxknv)). +* Fix 2680 flasky. [#67078](https://github.com/ClickHouse/ClickHouse/pull/67078) ([jsc0218](https://github.com/jsc0218)). +* [CI Fest] Fix flaky 02447_drop_replica test. [#67085](https://github.com/ClickHouse/ClickHouse/pull/67085) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fixes [#67030](https://github.com/ClickHouse/ClickHouse/issues/67030). [#67086](https://github.com/ClickHouse/ClickHouse/pull/67086) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Increase timeout for curl in tests. [#67089](https://github.com/ClickHouse/ClickHouse/pull/67089) ([Anton Popov](https://github.com/CurtizJ)). +* Try calculating available memory if ClickHouse is bound to subset of NUMA nodes. [#67098](https://github.com/ClickHouse/ClickHouse/pull/67098) ([Antonio Andelic](https://github.com/antonio2368)). +* A more precise way of tracking flushing time in 01246_buffer_flush. [#67099](https://github.com/ClickHouse/ClickHouse/pull/67099) ([Azat Khuzhin](https://github.com/azat)). +* Do not fail CheckReadyForMerge on failed Tests_2 (non-required jobs) Do not skip CiBuddy report step on failures. [#67101](https://github.com/ClickHouse/ClickHouse/pull/67101) ([Max K.](https://github.com/maxknv)). +* Tststs_1 - for all required checks Tststs_2 - for all non-required checks (normal mode) Tststs_2_ww - for all non-required checks (woolenwolfdog mode). [#67104](https://github.com/ClickHouse/ClickHouse/pull/67104) ([Max K.](https://github.com/maxknv)). +* Functions [s-t]*: Iterate over input_rows_count where appropriate. [#67105](https://github.com/ClickHouse/ClickHouse/pull/67105) ([Robert Schulze](https://github.com/rschu1ze)). +* Reintroduce 02805_distributed_queries_timeouts. [#67106](https://github.com/ClickHouse/ClickHouse/pull/67106) ([Azat Khuzhin](https://github.com/azat)). +* Added some tests in relation with [#54881](https://github.com/ClickHouse/ClickHouse/issues/54881). [#67110](https://github.com/ClickHouse/ClickHouse/pull/67110) ([max-vostrikov](https://github.com/max-vostrikov)). +* Reintroduce 03002_part_log_rmt_fetch_*_error tests without flakiness and less time. [#67113](https://github.com/ClickHouse/ClickHouse/pull/67113) ([Azat Khuzhin](https://github.com/azat)). +* Improve tag matching in backport scripts. [#67118](https://github.com/ClickHouse/ClickHouse/pull/67118) ([Raúl Marín](https://github.com/Algunenano)). +* Fixes [#67111](https://github.com/ClickHouse/ClickHouse/issues/67111). [#67121](https://github.com/ClickHouse/ClickHouse/pull/67121) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Increase lock_acquire_timeout_for_background_operations setting in dynamic merges tests. [#67126](https://github.com/ClickHouse/ClickHouse/pull/67126) ([Kruglov Pavel](https://github.com/Avogar)). +* Attempt to fix flakiness of some window view tests. [#67130](https://github.com/ClickHouse/ClickHouse/pull/67130) ([Robert Schulze](https://github.com/rschu1ze)). +* Update assert in cache. [#67138](https://github.com/ClickHouse/ClickHouse/pull/67138) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix test `00673_subquery_prepared_set_performance`. [#67141](https://github.com/ClickHouse/ClickHouse/pull/67141) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixes [#67047](https://github.com/ClickHouse/ClickHouse/issues/67047). [#67142](https://github.com/ClickHouse/ClickHouse/pull/67142) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Lower max allocation size in query fuzzer. [#67145](https://github.com/ClickHouse/ClickHouse/pull/67145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixes [#66966](https://github.com/ClickHouse/ClickHouse/issues/66966). [#67147](https://github.com/ClickHouse/ClickHouse/pull/67147) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Try fix `02481_async_insert_race_long` flakiness. [#67148](https://github.com/ClickHouse/ClickHouse/pull/67148) ([Julia Kartseva](https://github.com/jkartseva)). +* Rename (unreleased) bad setting. [#67149](https://github.com/ClickHouse/ClickHouse/pull/67149) ([Raúl Marín](https://github.com/Algunenano)). +* Uncomment accidentally commented out code in QueryProfiler. [#67152](https://github.com/ClickHouse/ClickHouse/pull/67152) ([Michael Kolupaev](https://github.com/al13n321)). +* Try to fix 2572. [#67158](https://github.com/ClickHouse/ClickHouse/pull/67158) ([jsc0218](https://github.com/jsc0218)). +* Fix benign data race in ZooKeeper. [#67164](https://github.com/ClickHouse/ClickHouse/pull/67164) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove too long unit test. [#67168](https://github.com/ClickHouse/ClickHouse/pull/67168) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `00705_drop_create_merge_tree`. [#67170](https://github.com/ClickHouse/ClickHouse/pull/67170) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix stacktrace cache. [#67173](https://github.com/ClickHouse/ClickHouse/pull/67173) ([Antonio Andelic](https://github.com/antonio2368)). +* Fixes [#67151](https://github.com/ClickHouse/ClickHouse/issues/67151). [#67174](https://github.com/ClickHouse/ClickHouse/pull/67174) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Make 02908_many_requests_to_system_replicas less stressful. [#67176](https://github.com/ClickHouse/ClickHouse/pull/67176) ([Alexander Gololobov](https://github.com/davenger)). +* Reduce max time of 00763_long_lock_buffer_alter_destination_table. [#67185](https://github.com/ClickHouse/ClickHouse/pull/67185) ([Raúl Marín](https://github.com/Algunenano)). +* Do not count AttachedTable for tables in information schema databases. [#67187](https://github.com/ClickHouse/ClickHouse/pull/67187) ([Sergei Trifonov](https://github.com/serxa)). +* Verbose output for 03203_client_benchmark_options. [#67188](https://github.com/ClickHouse/ClickHouse/pull/67188) ([vdimir](https://github.com/vdimir)). +* Split test 02967_parallel_replicas_join_algo_and_analyzer. [#67211](https://github.com/ClickHouse/ClickHouse/pull/67211) ([Nikita Taranov](https://github.com/nickitat)). +* Fix flaky `test_pkill_query_log` (tsan). [#67223](https://github.com/ClickHouse/ClickHouse/pull/67223) ([Sergei Trifonov](https://github.com/serxa)). +* Remove integration test `test_broken_projections_in_backups_1`. [#67231](https://github.com/ClickHouse/ClickHouse/pull/67231) ([Vitaly Baranov](https://github.com/vitlibar)). +* Debug logging for [#67002](https://github.com/ClickHouse/ClickHouse/issues/67002). [#67233](https://github.com/ClickHouse/ClickHouse/pull/67233) ([Nikita Taranov](https://github.com/nickitat)). +* Fix oss-fuzz build. [#67235](https://github.com/ClickHouse/ClickHouse/pull/67235) ([Nikita Taranov](https://github.com/nickitat)). +* Fix flaky 00180_no_seek_avoiding_when_reading_from_cache. [#67236](https://github.com/ClickHouse/ClickHouse/pull/67236) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* English. [#67258](https://github.com/ClickHouse/ClickHouse/pull/67258) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove strange code. [#67260](https://github.com/ClickHouse/ClickHouse/pull/67260) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix MSan report in DatabaseReplicated. [#67262](https://github.com/ClickHouse/ClickHouse/pull/67262) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `02310_clickhouse_local_INSERT_progress_profile_events`. [#67264](https://github.com/ClickHouse/ClickHouse/pull/67264) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove test `02982_aggregation_states_destruction`. [#67266](https://github.com/ClickHouse/ClickHouse/pull/67266) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix OOM in test runs. [#67268](https://github.com/ClickHouse/ClickHouse/pull/67268) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove harmful stuff from tests. [#67275](https://github.com/ClickHouse/ClickHouse/pull/67275) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `03201_variant_null_map_subcolumn`. [#67276](https://github.com/ClickHouse/ClickHouse/pull/67276) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Split `01651_lc_insert_tiny_log`. [#67279](https://github.com/ClickHouse/ClickHouse/pull/67279) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Debug test `02490_benchmark_max_consecutive_errors`. [#67281](https://github.com/ClickHouse/ClickHouse/pull/67281) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad test `02833_concurrrent_sessions`. [#67282](https://github.com/ClickHouse/ClickHouse/pull/67282) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a separate test for exception handling. [#67283](https://github.com/ClickHouse/ClickHouse/pull/67283) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Debug test `01600_parts_states_metrics_long`. [#67284](https://github.com/ClickHouse/ClickHouse/pull/67284) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Faster test `02231_buffer_aggregate_states_leak`. [#67285](https://github.com/ClickHouse/ClickHouse/pull/67285) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix curiosities in `TimerDescriptor`. [#67287](https://github.com/ClickHouse/ClickHouse/pull/67287) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add retries to test `02911_backup_restore_keeper_map`. [#67290](https://github.com/ClickHouse/ClickHouse/pull/67290) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Functions: Iterate over input_rows_count where appropriate. [#67294](https://github.com/ClickHouse/ClickHouse/pull/67294) ([Robert Schulze](https://github.com/rschu1ze)). +* Add documentation for `compile_expressions`. [#67300](https://github.com/ClickHouse/ClickHouse/pull/67300) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Wrap log lines in the CI report for functional tests. [#67301](https://github.com/ClickHouse/ClickHouse/pull/67301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad test `02050_client_profile_events`. [#67309](https://github.com/ClickHouse/ClickHouse/pull/67309) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* I do not want to think about this code. [#67312](https://github.com/ClickHouse/ClickHouse/pull/67312) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `00940_max_parts_in_total`. [#67313](https://github.com/ClickHouse/ClickHouse/pull/67313) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Tests for Kafka cannot run in parallel. [#67315](https://github.com/ClickHouse/ClickHouse/pull/67315) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#66285](https://github.com/ClickHouse/ClickHouse/issues/66285). [#67325](https://github.com/ClickHouse/ClickHouse/pull/67325) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Follow-up for [#67301](https://github.com/ClickHouse/ClickHouse/issues/67301). [#67327](https://github.com/ClickHouse/ClickHouse/pull/67327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#61659](https://github.com/ClickHouse/ClickHouse/issues/61659). [#67332](https://github.com/ClickHouse/ClickHouse/pull/67332) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix integration test `test_backup_restore_on_cluster/test_disallow_concurrency`. [#67336](https://github.com/ClickHouse/ClickHouse/pull/67336) ([Vitaly Baranov](https://github.com/vitlibar)). +* Faster and less flaky 01246_buffer_flush (by using HTTP over clickhouse-client). [#67340](https://github.com/ClickHouse/ClickHouse/pull/67340) ([Azat Khuzhin](https://github.com/azat)). +* Fix: data race in TCPHandler on socket timeouts settings. [#67341](https://github.com/ClickHouse/ClickHouse/pull/67341) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* CI: Print stdout, stderr for docker pull command. [#67343](https://github.com/ClickHouse/ClickHouse/pull/67343) ([Max K.](https://github.com/maxknv)). +* Followup [#67290](https://github.com/ClickHouse/ClickHouse/issues/67290). [#67348](https://github.com/ClickHouse/ClickHouse/pull/67348) ([vdimir](https://github.com/vdimir)). +* Skip parallel for `test_storage_kerberized_kafka`. [#67349](https://github.com/ClickHouse/ClickHouse/pull/67349) ([Andrey Zvonov](https://github.com/zvonand)). +* Don't use PeekableReadBuffer in JSONAsObject format. [#67354](https://github.com/ClickHouse/ClickHouse/pull/67354) ([Kruglov Pavel](https://github.com/Avogar)). +* This closes: [#57316](https://github.com/ClickHouse/ClickHouse/issues/57316). [#67355](https://github.com/ClickHouse/ClickHouse/pull/67355) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Remove duplicated tests. [#67357](https://github.com/ClickHouse/ClickHouse/pull/67357) ([Kruglov Pavel](https://github.com/Avogar)). +* Release branch was not detected properly and job which must run on release branch could be reused from feature branches. PR Fixes detection of release branches. [#67358](https://github.com/ClickHouse/ClickHouse/pull/67358) ([Max K.](https://github.com/maxknv)). +* Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql. [#67359](https://github.com/ClickHouse/ClickHouse/pull/67359) ([Kruglov Pavel](https://github.com/Avogar)). +* Add no-distributed-cache tag in tests. [#67361](https://github.com/ClickHouse/ClickHouse/pull/67361) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Trying to fix test_cache_evicted_by_temporary_data and print debug info. [#67362](https://github.com/ClickHouse/ClickHouse/pull/67362) ([vdimir](https://github.com/vdimir)). +* Try to fix: ALL_CONNECTION_TRIES_FAILED with parallel replicas. [#67389](https://github.com/ClickHouse/ClickHouse/pull/67389) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix bad test `01036_no_superfluous_dict_reload_on_create_database`. [#67390](https://github.com/ClickHouse/ClickHouse/pull/67390) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Adding `SelectedPartsTotal` and `SelectedMarksTotal` as new ProfileEvents. [#67393](https://github.com/ClickHouse/ClickHouse/pull/67393) ([Jordi Villar](https://github.com/jrdi)). +* Print debug info in `test_storage_s3_queue/test.py::test_shards_distributed`. [#67394](https://github.com/ClickHouse/ClickHouse/pull/67394) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Mute degraded perf test. [#67396](https://github.com/ClickHouse/ClickHouse/pull/67396) ([Nikita Taranov](https://github.com/nickitat)). +* Debug TimerDescriptor. [#67397](https://github.com/ClickHouse/ClickHouse/pull/67397) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove has_single_port property from plan stream. [#67398](https://github.com/ClickHouse/ClickHouse/pull/67398) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix typo. [#67400](https://github.com/ClickHouse/ClickHouse/pull/67400) ([Halersson Paris](https://github.com/halersson)). +* CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)). +* Disable 02932_refreshable_materialized_views. [#67404](https://github.com/ClickHouse/ClickHouse/pull/67404) ([Michael Kolupaev](https://github.com/al13n321)). +* Follow-up to [#67294](https://github.com/ClickHouse/ClickHouse/issues/67294). [#67405](https://github.com/ClickHouse/ClickHouse/pull/67405) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix DWARF range list parsing in stack symbolizer. [#67417](https://github.com/ClickHouse/ClickHouse/pull/67417) ([Michael Kolupaev](https://github.com/al13n321)). +* Make Dwarf::findAddress() fallback slow path less slow. [#67418](https://github.com/ClickHouse/ClickHouse/pull/67418) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix inconsistent formatting of CODEC and STATISTICS. [#67421](https://github.com/ClickHouse/ClickHouse/pull/67421) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Reduced complexity of the test 02832_alter_max_sessions_for_user. [#67425](https://github.com/ClickHouse/ClickHouse/pull/67425) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Remove obsolete `--multiquery` parameter from tests. [#67435](https://github.com/ClickHouse/ClickHouse/pull/67435) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix public backports. [#67439](https://github.com/ClickHouse/ClickHouse/pull/67439) ([Raúl Marín](https://github.com/Algunenano)). +* Bump Azure from v1.12 to v1.13. [#67446](https://github.com/ClickHouse/ClickHouse/pull/67446) ([Robert Schulze](https://github.com/rschu1ze)). +* 24.7 add missing documentation and testing. [#67454](https://github.com/ClickHouse/ClickHouse/pull/67454) ([Nikita Fomichev](https://github.com/fm4v)). +* Use correct order of fields in `StorageURLSource`. [#67455](https://github.com/ClickHouse/ClickHouse/pull/67455) ([Antonio Andelic](https://github.com/antonio2368)). +* run 01171 test in parallel. [#67470](https://github.com/ClickHouse/ClickHouse/pull/67470) ([Sema Checherinda](https://github.com/CheSema)). +* [Green CI] Fix WriteBuffer destructor when finalize has failed for MergeTreeDeduplicationLog::shutdown. [#67474](https://github.com/ClickHouse/ClickHouse/pull/67474) ([Alexey Katsman](https://github.com/alexkats)). +* Reduce 02473_multistep_prewhere run time. [#67475](https://github.com/ClickHouse/ClickHouse/pull/67475) ([Alexander Gololobov](https://github.com/davenger)). +* Update version_date.tsv and changelogs after v24.7.1.2915-stable. [#67483](https://github.com/ClickHouse/ClickHouse/pull/67483) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Revert [#61750](https://github.com/ClickHouse/ClickHouse/issues/61750) "Improve JSONEachRow reading by ignoring the keys case". [#67484](https://github.com/ClickHouse/ClickHouse/pull/67484) ([Michael Kolupaev](https://github.com/al13n321)). +* Disable parallel run for `01923_network_receive_time_metric_insert.sh`. [#67492](https://github.com/ClickHouse/ClickHouse/pull/67492) ([Julia Kartseva](https://github.com/jkartseva)). +* Fix test `test_backup_restore_on_cluster/test.py::test_mutation`. [#67494](https://github.com/ClickHouse/ClickHouse/pull/67494) ([Vitaly Baranov](https://github.com/vitlibar)). +* [Green CI] Fix potentially flaky test_mask_sensitive_info integration test. [#67506](https://github.com/ClickHouse/ClickHouse/pull/67506) ([Alexey Katsman](https://github.com/alexkats)). +* [Green CI] Test `test_storage_azure_blob_storage/test.py` is flaky. [#67512](https://github.com/ClickHouse/ClickHouse/pull/67512) ([Daniil Ivanik](https://github.com/divanik)). +* Prepare Release workflow for production. [#67523](https://github.com/ClickHouse/ClickHouse/pull/67523) ([Max K.](https://github.com/maxknv)). +* Fix upgrade check. [#67524](https://github.com/ClickHouse/ClickHouse/pull/67524) ([Raúl Marín](https://github.com/Algunenano)). +* [Green CI] test 03164_s3_settings_for_queries_and_merges is flaky. [#67535](https://github.com/ClickHouse/ClickHouse/pull/67535) ([Daniil Ivanik](https://github.com/divanik)). +* Log message and increased concurrency for table removal. [#67537](https://github.com/ClickHouse/ClickHouse/pull/67537) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix AsyncLoader destruction race. [#67553](https://github.com/ClickHouse/ClickHouse/pull/67553) ([Sergei Trifonov](https://github.com/serxa)). +* Add an assert into TimerDescriptor. [#67555](https://github.com/ClickHouse/ClickHouse/pull/67555) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Re-enable ICU on s390/x. [#67557](https://github.com/ClickHouse/ClickHouse/pull/67557) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelogs after v24.4.4.107-stable. [#67559](https://github.com/ClickHouse/ClickHouse/pull/67559) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Sometimes we fail with timeout in stateless tests and the reason for that seems to be in `stop_logs_replication` step. Add a check for timeout here. [#67560](https://github.com/ClickHouse/ClickHouse/pull/67560) ([Nikolay Degterinsky](https://github.com/evillique)). +* Miscellaneous. [#67564](https://github.com/ClickHouse/ClickHouse/pull/67564) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* enable parallel_view_processing in perf tests. [#67565](https://github.com/ClickHouse/ClickHouse/pull/67565) ([Sema Checherinda](https://github.com/CheSema)). +* Fix flaky `test_system_kafka_consumers_rebalance`. [#67566](https://github.com/ClickHouse/ClickHouse/pull/67566) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Update version_date.tsv and changelogs after v24.7.2.13-stable. [#67586](https://github.com/ClickHouse/ClickHouse/pull/67586) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix 01811_storage_buffer_flush_parameters flakiness. [#67589](https://github.com/ClickHouse/ClickHouse/pull/67589) ([Azat Khuzhin](https://github.com/azat)). +* Fix test_zookeeper_config_load_balancing after adding the xdist worker name to the instance. [#67590](https://github.com/ClickHouse/ClickHouse/pull/67590) ([Pablo Marcos](https://github.com/pamarcos)). +* Update minio in integration tests. [#67595](https://github.com/ClickHouse/ClickHouse/pull/67595) ([Antonio Andelic](https://github.com/antonio2368)). +* added tests for page index in parquet files. [#67596](https://github.com/ClickHouse/ClickHouse/pull/67596) ([max-vostrikov](https://github.com/max-vostrikov)). +* Update check_rabbitmq_is_available. [#67597](https://github.com/ClickHouse/ClickHouse/pull/67597) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix 02434_cancel_insert_when_client_dies. [#67600](https://github.com/ClickHouse/ClickHouse/pull/67600) ([vdimir](https://github.com/vdimir)). +* Fix 02910_bad_logs_level_in_local in fast tests. [#67603](https://github.com/ClickHouse/ClickHouse/pull/67603) ([Raúl Marín](https://github.com/Algunenano)). +* Fix 01605_adaptive_granularity_block_borders. [#67605](https://github.com/ClickHouse/ClickHouse/pull/67605) ([Nikita Taranov](https://github.com/nickitat)). +* Update CHANGELOG.md. [#67607](https://github.com/ClickHouse/ClickHouse/pull/67607) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove some `no-parallel` tags from tests. [#67610](https://github.com/ClickHouse/ClickHouse/pull/67610) ([Raúl Marín](https://github.com/Algunenano)). +* Update README.md. [#67613](https://github.com/ClickHouse/ClickHouse/pull/67613) ([Tyler Hannan](https://github.com/tylerhannan)). +* Try fix 03143_asof_join_ddb_long. [#67620](https://github.com/ClickHouse/ClickHouse/pull/67620) ([Nikita Taranov](https://github.com/nickitat)). +* Don't run ASAN unit tests under gdb. [#67622](https://github.com/ClickHouse/ClickHouse/pull/67622) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash in KeyCondition::cloneASTWithInversionPushDown() caused by type change. [#67641](https://github.com/ClickHouse/ClickHouse/pull/67641) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix race condition between ProcessList and Settings. [#67645](https://github.com/ClickHouse/ClickHouse/pull/67645) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `02481_async_insert_race_long.sh` flakiness fixes. [#67650](https://github.com/ClickHouse/ClickHouse/pull/67650) ([Julia Kartseva](https://github.com/jkartseva)). +* Fixes [#67651](https://github.com/ClickHouse/ClickHouse/issues/67651). [#67653](https://github.com/ClickHouse/ClickHouse/pull/67653) ([pufit](https://github.com/pufit)). +* Fix flaky `test_replicated_table_attach`. [#67658](https://github.com/ClickHouse/ClickHouse/pull/67658) ([Antonio Andelic](https://github.com/antonio2368)). +* Update version_date.tsv and changelogs after v24.4.4.113-stable. [#67659](https://github.com/ClickHouse/ClickHouse/pull/67659) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Even better healthcheck for ldap. [#67667](https://github.com/ClickHouse/ClickHouse/pull/67667) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix 03203_client_benchmark_options. [#67671](https://github.com/ClickHouse/ClickHouse/pull/67671) ([vdimir](https://github.com/vdimir)). +* Integration tests: fix ports clashing problem. [#67672](https://github.com/ClickHouse/ClickHouse/pull/67672) ([Nikita Fomichev](https://github.com/fm4v)). +* Remove some `no-parallel` tags from tests (Part 2). [#67673](https://github.com/ClickHouse/ClickHouse/pull/67673) ([Raúl Marín](https://github.com/Algunenano)). +* Use FunctionArgumentDescriptors for bitSlice. [#67674](https://github.com/ClickHouse/ClickHouse/pull/67674) ([Lennard Eijsackers](https://github.com/Blokje5)). +* Update version_date.tsv and changelog after v24.3.6.48-lts. [#67677](https://github.com/ClickHouse/ClickHouse/pull/67677) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Avoid ddl queue timeout in 02313_filesystem_cache_seeks. [#67680](https://github.com/ClickHouse/ClickHouse/pull/67680) ([Nikita Taranov](https://github.com/nickitat)). +* Fix bad log message in sort description. [#67690](https://github.com/ClickHouse/ClickHouse/pull/67690) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelog after v23.8.16.40-lts. [#67692](https://github.com/ClickHouse/ClickHouse/pull/67692) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix check names in test reports and the CI Logs database. [#67696](https://github.com/ClickHouse/ClickHouse/pull/67696) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CI: Minor refactoring in ci_utils. [#67706](https://github.com/ClickHouse/ClickHouse/pull/67706) ([Max K.](https://github.com/maxknv)). +* Fix 01042_system_reload_dictionary_reloads_completely flakiness. [#67719](https://github.com/ClickHouse/ClickHouse/pull/67719) ([Azat Khuzhin](https://github.com/azat)). +* Fix test `00002_log_and_exception_messages_formatting`. [#67723](https://github.com/ClickHouse/ClickHouse/pull/67723) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `02789_reading_from_s3_with_connection_pool`. [#67726](https://github.com/ClickHouse/ClickHouse/pull/67726) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix strange code in HostResolvePool. [#67727](https://github.com/ClickHouse/ClickHouse/pull/67727) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix a typo. [#67729](https://github.com/ClickHouse/ClickHouse/pull/67729) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Smart handling of processes leftovers in tests. [#67737](https://github.com/ClickHouse/ClickHouse/pull/67737) ([Azat Khuzhin](https://github.com/azat)). +* Fix test retries. [#67738](https://github.com/ClickHouse/ClickHouse/pull/67738) ([Azat Khuzhin](https://github.com/azat)). +* Fill only selected columns from system.clusters. [#67739](https://github.com/ClickHouse/ClickHouse/pull/67739) ([Azat Khuzhin](https://github.com/azat)). +* Bump NuRaft (to properly catch thread exceptions). [#67740](https://github.com/ClickHouse/ClickHouse/pull/67740) ([Azat Khuzhin](https://github.com/azat)). +* Try to fix RabbitMQ test failures. [#67743](https://github.com/ClickHouse/ClickHouse/pull/67743) ([Azat Khuzhin](https://github.com/azat)). +* Stateless tests: attempt to fix timeouts of `02473_multistep_prewhere* 00411_long_accurate_number_comparison*`. [#67746](https://github.com/ClickHouse/ClickHouse/pull/67746) ([Nikita Fomichev](https://github.com/fm4v)). +* Fix test_ttl_move::test_alter_with_merge_work flakiness. [#67747](https://github.com/ClickHouse/ClickHouse/pull/67747) ([Azat Khuzhin](https://github.com/azat)). +* ci: better stateless runner (correctly collect artifacts and also some basic errors capturing). [#67752](https://github.com/ClickHouse/ClickHouse/pull/67752) ([Azat Khuzhin](https://github.com/azat)). +* Introduce `no-flaky-check` tag. [#67755](https://github.com/ClickHouse/ClickHouse/pull/67755) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Whitespaces. [#67771](https://github.com/ClickHouse/ClickHouse/pull/67771) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* [RFC] Print original query for AST formatting check on CI. [#67776](https://github.com/ClickHouse/ClickHouse/pull/67776) ([Azat Khuzhin](https://github.com/azat)). +* Fix test `02833_concurrent_sessions`, Fix test `02835_drop_user_during_session`. [#67779](https://github.com/ClickHouse/ClickHouse/pull/67779) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix diagnostics in the test script. [#67780](https://github.com/ClickHouse/ClickHouse/pull/67780) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `02231_bloom_filter_sizing`. [#67784](https://github.com/ClickHouse/ClickHouse/pull/67784) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed session_log related tests race condition on logout. [#67785](https://github.com/ClickHouse/ClickHouse/pull/67785) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* ci/stateless: fix artifacts post-processing and detect if something failed there. [#67791](https://github.com/ClickHouse/ClickHouse/pull/67791) ([Azat Khuzhin](https://github.com/azat)). +* Integration tests: fix flaky `test_dictionaries_update_and_reload::test_reload_after_fail_by_timer`. [#67793](https://github.com/ClickHouse/ClickHouse/pull/67793) ([Nikita Fomichev](https://github.com/fm4v)). +* Fix possible CANNOT_READ_ALL_DATA during server startup in performance tests. [#67795](https://github.com/ClickHouse/ClickHouse/pull/67795) ([Azat Khuzhin](https://github.com/azat)). +* Reduce table size in 03037_dynamic_merges_2* tests. [#67797](https://github.com/ClickHouse/ClickHouse/pull/67797) ([Kruglov Pavel](https://github.com/Avogar)). +* Disable 03038_nested_dynamic_merges* under sanitizers because it's too slow. [#67798](https://github.com/ClickHouse/ClickHouse/pull/67798) ([Kruglov Pavel](https://github.com/Avogar)). +* Revert "Merge pull request [#66510](https://github.com/ClickHouse/ClickHouse/issues/66510) from canhld94/fix_trivial_count_non_deterministic_func". [#67800](https://github.com/ClickHouse/ClickHouse/pull/67800) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Update comment. [#67801](https://github.com/ClickHouse/ClickHouse/pull/67801) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix bad test `03032_redundant_equals`. [#67822](https://github.com/ClickHouse/ClickHouse/pull/67822) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update tags for a few tests. [#67829](https://github.com/ClickHouse/ClickHouse/pull/67829) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add debug logging for window view tests. [#67841](https://github.com/ClickHouse/ClickHouse/pull/67841) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Closes [#67621](https://github.com/ClickHouse/ClickHouse/issues/67621). [#67843](https://github.com/ClickHouse/ClickHouse/pull/67843) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix query cache randomization in stress tests. [#67855](https://github.com/ClickHouse/ClickHouse/pull/67855) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v24.5.5.78-stable. [#67863](https://github.com/ClickHouse/ClickHouse/pull/67863) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Un-flake 02524_fuzz_and_fuss_2. [#67867](https://github.com/ClickHouse/ClickHouse/pull/67867) ([Robert Schulze](https://github.com/rschu1ze)). +* Misc fixes. [#67869](https://github.com/ClickHouse/ClickHouse/pull/67869) ([Alexey Katsman](https://github.com/alexkats)). +* Fixes [#67444](https://github.com/ClickHouse/ClickHouse/issues/67444). [#67873](https://github.com/ClickHouse/ClickHouse/pull/67873) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* no-msan 00314_sample_factor_virtual_column. [#67874](https://github.com/ClickHouse/ClickHouse/pull/67874) ([Michael Kolupaev](https://github.com/al13n321)). +* Revert "Revert "Add a test for [#47892](https://github.com/ClickHouse/ClickHouse/issues/47892)"". [#67877](https://github.com/ClickHouse/ClickHouse/pull/67877) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Lazily create invalid files in S3. [#67882](https://github.com/ClickHouse/ClickHouse/pull/67882) ([Antonio Andelic](https://github.com/antonio2368)). +* Do not try to create azure container if not needed. [#67896](https://github.com/ClickHouse/ClickHouse/pull/67896) ([Anton Popov](https://github.com/CurtizJ)). +* CI: Fix for setting Mergeable Check from sync. [#67898](https://github.com/ClickHouse/ClickHouse/pull/67898) ([Max K.](https://github.com/maxknv)). +* Bump rocksdb from v8.10 to v9.4 + enable jemalloc and liburing. [#67904](https://github.com/ClickHouse/ClickHouse/pull/67904) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelogs after v24.6.3.95-stable. [#67910](https://github.com/ClickHouse/ClickHouse/pull/67910) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Remove some no-parallel tags from tests (Part 3). [#67914](https://github.com/ClickHouse/ClickHouse/pull/67914) ([Raúl Marín](https://github.com/Algunenano)). +* Follow up [#67235](https://github.com/ClickHouse/ClickHouse/issues/67235). [#67917](https://github.com/ClickHouse/ClickHouse/pull/67917) ([Nikita Taranov](https://github.com/nickitat)). +* CI: Changelog: Critical Bug Fix to Bug Fix. [#67919](https://github.com/ClickHouse/ClickHouse/pull/67919) ([Max K.](https://github.com/maxknv)). +* CI: Multi-channel CiBuddy. [#67923](https://github.com/ClickHouse/ClickHouse/pull/67923) ([Max K.](https://github.com/maxknv)). +* more logs to debug logical error from async inserts. [#67928](https://github.com/ClickHouse/ClickHouse/pull/67928) ([Han Fei](https://github.com/hanfei1991)). +* Fix stress test error with TDigest statistics. [#67930](https://github.com/ClickHouse/ClickHouse/pull/67930) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove some no-parallel tags from tests (Part 4). [#67932](https://github.com/ClickHouse/ClickHouse/pull/67932) ([Raúl Marín](https://github.com/Algunenano)). +* Upgrade QPL to v1.6.0. [#67933](https://github.com/ClickHouse/ClickHouse/pull/67933) ([Maria Zhukova](https://github.com/mzhukova)). +* CI: Strict job timeout 1.5h for tests, 2h for builds. [#67934](https://github.com/ClickHouse/ClickHouse/pull/67934) ([Max K.](https://github.com/maxknv)). +* Remove slow tests from fasttest check. [#67941](https://github.com/ClickHouse/ClickHouse/pull/67941) ([Raúl Marín](https://github.com/Algunenano)). +* Fix memory corruption in usearch. [#67942](https://github.com/ClickHouse/ClickHouse/pull/67942) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#68547](https://github.com/ClickHouse/ClickHouse/issues/68547): Timeout handling for functional and integration tests, store artifacts and report if timed out - sets 2h default timeout for all jobs. [#67944](https://github.com/ClickHouse/ClickHouse/pull/67944) ([Max K.](https://github.com/maxknv)). +* Unflake 02099_tsv_raw_format.sh. [#67947](https://github.com/ClickHouse/ClickHouse/pull/67947) ([Robert Schulze](https://github.com/rschu1ze)). +* This closes: [#67866](https://github.com/ClickHouse/ClickHouse/issues/67866). [#67950](https://github.com/ClickHouse/ClickHouse/pull/67950) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Change log level of an insignificant message in clickhouse-local. [#67952](https://github.com/ClickHouse/ClickHouse/pull/67952) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)). +* Fix flaky `test_storage_s3_queue/test.py::test_multiple_tables_streaming_sync_distributed`. [#67959](https://github.com/ClickHouse/ClickHouse/pull/67959) ([Julia Kartseva](https://github.com/jkartseva)). +* tests: fix 03002_part_log_rmt_fetch_merge_error flakiness. [#67960](https://github.com/ClickHouse/ClickHouse/pull/67960) ([Azat Khuzhin](https://github.com/azat)). +* Fix timeout of 02310_clickhouse_local_INSERT_progress_profile_events. [#67961](https://github.com/ClickHouse/ClickHouse/pull/67961) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove obsolete `--multiquery` parameter (follow-up to [#63898](https://github.com/ClickHouse/ClickHouse/issues/63898)), pt. III. [#67964](https://github.com/ClickHouse/ClickHouse/pull/67964) ([Robert Schulze](https://github.com/rschu1ze)). +* Update minio in stateless tests. [#67975](https://github.com/ClickHouse/ClickHouse/pull/67975) ([Antonio Andelic](https://github.com/antonio2368)). +* CI: Integration tests uncover some logging. [#67978](https://github.com/ClickHouse/ClickHouse/pull/67978) ([Max K.](https://github.com/maxknv)). +* Fix 03130_convert_outer_join_to_inner_join. [#67980](https://github.com/ClickHouse/ClickHouse/pull/67980) ([vdimir](https://github.com/vdimir)). +* Collect minio audit logs in stateless tests. [#67998](https://github.com/ClickHouse/ClickHouse/pull/67998) ([Antonio Andelic](https://github.com/antonio2368)). +* Remove some no-parallel tags from tests (Part 5). [#68002](https://github.com/ClickHouse/ClickHouse/pull/68002) ([Raúl Marín](https://github.com/Algunenano)). +* Minor fixes in tables.md. [#68004](https://github.com/ClickHouse/ClickHouse/pull/68004) ([Ilya Yatsishin](https://github.com/qoega)). +* Follow up for [#67843](https://github.com/ClickHouse/ClickHouse/issues/67843). [#68007](https://github.com/ClickHouse/ClickHouse/pull/68007) ([Ilya Yatsishin](https://github.com/qoega)). +* Remove unused CLI option. [#68008](https://github.com/ClickHouse/ClickHouse/pull/68008) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `02845_threads_count_in_distributed_queries`. [#68011](https://github.com/ClickHouse/ClickHouse/pull/68011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CI: Pass job timeout into tests config. [#68013](https://github.com/ClickHouse/ClickHouse/pull/68013) ([Nikita Fomichev](https://github.com/fm4v)). +* Add a test for [#57420](https://github.com/ClickHouse/ClickHouse/issues/57420). [#68017](https://github.com/ClickHouse/ClickHouse/pull/68017) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Revert "Revert "Bump rocksdb from v8.10 to v9.4 + enable jemalloc and liburing"". [#68021](https://github.com/ClickHouse/ClickHouse/pull/68021) ([Robert Schulze](https://github.com/rschu1ze)). +* CI: Fix for filtering jobs in PRs. [#68022](https://github.com/ClickHouse/ClickHouse/pull/68022) ([Max K.](https://github.com/maxknv)). +* Docs: Update 3rd party library guide. [#68027](https://github.com/ClickHouse/ClickHouse/pull/68027) ([Robert Schulze](https://github.com/rschu1ze)). +* Refactor tests for (experimental) statistics. [#68034](https://github.com/ClickHouse/ClickHouse/pull/68034) ([Robert Schulze](https://github.com/rschu1ze)). +* Split `00284_external_aggregation.sql`. [#68037](https://github.com/ClickHouse/ClickHouse/pull/68037) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelog after v24.7.3.42-stable. [#68045](https://github.com/ClickHouse/ClickHouse/pull/68045) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update test_drop_is_lock_free/test.py. [#68051](https://github.com/ClickHouse/ClickHouse/pull/68051) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixes [#67865](https://github.com/ClickHouse/ClickHouse/issues/67865). [#68054](https://github.com/ClickHouse/ClickHouse/pull/68054) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Disable randomization of `trace_profile_events` in clickhouse-test. [#68058](https://github.com/ClickHouse/ClickHouse/pull/68058) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Minor CMake cleanup. [#68069](https://github.com/ClickHouse/ClickHouse/pull/68069) ([Robert Schulze](https://github.com/rschu1ze)). +* If the test cluster is overloaded, sometimes simple query execution [can take more time](https://pastila.nl/?00224e71/f017cd6675b52ccc205c81aa62a47de5#8dB4+C4MOdOi3NLV1dc0Fg==) than `Buffer`'s max time to flush. This PR doubles the timeout and allows to skip the check in case of significant latency. [#68072](https://github.com/ClickHouse/ClickHouse/pull/68072) ([pufit](https://github.com/pufit)). +* Fix flaky `02675_profile_events_from_query_log_and_client`. [#68097](https://github.com/ClickHouse/ClickHouse/pull/68097) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix race in `WithRetries`. [#68106](https://github.com/ClickHouse/ClickHouse/pull/68106) ([Antonio Andelic](https://github.com/antonio2368)). +* Add empty cell to reports when time is missing. [#68112](https://github.com/ClickHouse/ClickHouse/pull/68112) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix test `00900_long_parquet_load`. [#68130](https://github.com/ClickHouse/ClickHouse/pull/68130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* tests: fix 01246_buffer_flush flakiness due to slow trace_log flush. [#68134](https://github.com/ClickHouse/ClickHouse/pull/68134) ([Azat Khuzhin](https://github.com/azat)). +* Only use Field::safeGet - Field::get prone to type punning. [#68135](https://github.com/ClickHouse/ClickHouse/pull/68135) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* tests: attempt to fix 01600_parts_states_metrics_long (by forbid parallel run). [#68136](https://github.com/ClickHouse/ClickHouse/pull/68136) ([Azat Khuzhin](https://github.com/azat)). +* Fix01710 Timeout. [#68138](https://github.com/ClickHouse/ClickHouse/pull/68138) ([jsc0218](https://github.com/jsc0218)). +* Remove the extra cell from reports when it is not necessary. [#68145](https://github.com/ClickHouse/ClickHouse/pull/68145) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Remove "Processing configuration file" message from clickhouse-local. [#68157](https://github.com/ClickHouse/ClickHouse/pull/68157) ([Azat Khuzhin](https://github.com/azat)). +* tests: fix 02122_join_group_by_timeout flakiness. [#68160](https://github.com/ClickHouse/ClickHouse/pull/68160) ([Azat Khuzhin](https://github.com/azat)). +* Fix `test_cluster_all_replicas`. [#68178](https://github.com/ClickHouse/ClickHouse/pull/68178) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix leftovers. [#68181](https://github.com/ClickHouse/ClickHouse/pull/68181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `01172_transaction_counters`. [#68182](https://github.com/ClickHouse/ClickHouse/pull/68182) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Refactor tests for (experimental) statistics. [#68186](https://github.com/ClickHouse/ClickHouse/pull/68186) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove Log engine from Kafka integration tests. [#68200](https://github.com/ClickHouse/ClickHouse/pull/68200) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* [Green CI] Fix test 01903_correct_block_size_prediction_with_default. [#68203](https://github.com/ClickHouse/ClickHouse/pull/68203) ([Pablo Marcos](https://github.com/pamarcos)). +* Replace segfault in Replicated database with logical error. [#68250](https://github.com/ClickHouse/ClickHouse/pull/68250) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#68423](https://github.com/ClickHouse/ClickHouse/issues/68423): tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#68374](https://github.com/ClickHouse/ClickHouse/issues/68374): Rename: S3DiskNoKeyErrors -> DiskS3NoSuchKeyErrors. [#68361](https://github.com/ClickHouse/ClickHouse/pull/68361) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Backported in [#68637](https://github.com/ClickHouse/ClickHouse/issues/68637): Check for invalid regexp in JSON SKIP REGEXP section. [#68451](https://github.com/ClickHouse/ClickHouse/pull/68451) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68485](https://github.com/ClickHouse/ClickHouse/issues/68485): Better inference of date times 2. [#68452](https://github.com/ClickHouse/ClickHouse/pull/68452) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68540](https://github.com/ClickHouse/ClickHouse/issues/68540): CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)). +* Backported in [#68518](https://github.com/ClickHouse/ClickHouse/issues/68518): Minor update in Dynamic/JSON serializations. [#68459](https://github.com/ClickHouse/ClickHouse/pull/68459) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#68558](https://github.com/ClickHouse/ClickHouse/issues/68558): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)). +* Backported in [#68576](https://github.com/ClickHouse/ClickHouse/issues/68576): CI: Tidy build timeout from 2h to 3h. [#68567](https://github.com/ClickHouse/ClickHouse/pull/68567) ([Max K.](https://github.com/maxknv)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 95ef8c0de90..8556375d543 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.8.1.2684-lts 2024-08-21 v24.7.3.42-stable 2024-08-08 v24.7.2.13-stable 2024-08-01 v24.7.1.2915-stable 2024-07-30 From b5dadd778b3db0d28e9f1650503ef3f5de42895e Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Wed, 21 Aug 2024 17:02:25 +0200 Subject: [PATCH 117/127] temp-commit --- .../02293_http_header_full_summary_without_progress.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh index a08928a773c..b888d977129 100755 --- a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh +++ b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh @@ -20,6 +20,7 @@ then echo "Read rows in summary is not zero" else echo "Read rows in summary is zero!" + echo "${CURL_OUTPUT}" fi # Check that the response code is correct too From 0c2ad11af27bb4a914475113652345fed1b8d6b1 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 21 Aug 2024 17:49:51 +0200 Subject: [PATCH 118/127] Fix flaky check when all tests are skipped --- tests/docker_scripts/stateless_runner.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/docker_scripts/stateless_runner.sh b/tests/docker_scripts/stateless_runner.sh index 671b1f5ca71..40a63f74a6b 100755 --- a/tests/docker_scripts/stateless_runner.sh +++ b/tests/docker_scripts/stateless_runner.sh @@ -80,6 +80,9 @@ fi export IS_FLAKY_CHECK=0 +# Export NUM_TRIES so python scripts will see its value as env variable +export NUM_TRIES + # For flaky check we also enable thread fuzzer if [ "$NUM_TRIES" -gt "1" ]; then export IS_FLAKY_CHECK=1 From f74b580ddaaf0b339b3370b603c901a22b5d0594 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Wed, 21 Aug 2024 19:52:02 +0200 Subject: [PATCH 119/127] More sanity checks --- ...02293_http_header_full_summary_without_progress.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh index b888d977129..29d59a8e45b 100755 --- a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh +++ b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh @@ -6,8 +6,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh +# Sanity check to ensure that the server is up and running +for i in {1..10}; do + echo 'SELECT 1' | ${CLICKHOUSE_CURL_COMMAND} -s "${CLICKHOUSE_URL}" --data-binary @- > /dev/null + if [ $? -eq 0 ]; then + break + fi + sleep 1 +done + CURL_OUTPUT=$(echo 'SELECT 1 + sleepEachRow(0.00002) FROM numbers(100000)' | \ - ${CLICKHOUSE_CURL_COMMAND} -vsS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0&max_execution_time=1" --data-binary @- 2>&1) + ${CLICKHOUSE_CURL_COMMAND} --max-time 3 -vsS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=0&max_execution_time=1" --data-binary @- 2>&1) READ_ROWS=$(echo "${CURL_OUTPUT}" | \ grep 'X-ClickHouse-Summary' | \ From 09dfec99c756067170ac36fe13e4e8948d92427f Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Wed, 21 Aug 2024 20:30:13 +0200 Subject: [PATCH 120/127] Lint --- .../02293_http_header_full_summary_without_progress.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh index 29d59a8e45b..0d8a568fef0 100755 --- a/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh +++ b/tests/queries/0_stateless/02293_http_header_full_summary_without_progress.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Sanity check to ensure that the server is up and running -for i in {1..10}; do +for _ in {1..10}; do echo 'SELECT 1' | ${CLICKHOUSE_CURL_COMMAND} -s "${CLICKHOUSE_URL}" --data-binary @- > /dev/null if [ $? -eq 0 ]; then break From 725f1791f48376905da317b9891cb2860c8c8a33 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Wed, 21 Aug 2024 21:34:25 +0200 Subject: [PATCH 121/127] CI: Binary tidy timeout 3h -> 4h --- tests/ci/ci_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 58de25f039f..887742840a2 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -163,7 +163,7 @@ class CI: tidy=True, comment="clang-tidy is used for static analysis", ), - timeout=10800, + timeout=14400, ), BuildNames.BINARY_DARWIN: CommonJobConfigs.BUILD.with_properties( build_config=BuildConfig( From d350f7bc1ab9e9454f7d84a1cc27176f3790be9b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 21 Aug 2024 20:20:32 +0000 Subject: [PATCH 122/127] Improve accuracy of ICU-correcting code by 12.6% --- src/Functions/LowerUpperUTF8Impl.h | 11 ++++++----- .../0_stateless/00170_lower_upper_utf8.reference | 1 + tests/queries/0_stateless/00170_lower_upper_utf8.sql | 3 +++ 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index 5da085f48e5..8469bedde0c 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -6,7 +6,6 @@ #include #include -#include #include #include @@ -43,7 +42,7 @@ struct LowerUpperUTF8Impl String output; size_t curr_offset = 0; - for (size_t i = 0; i < offsets.size(); ++i) + for (size_t i = 0; i < input_rows_count; ++i) { const auto * data_start = reinterpret_cast(&data[offsets[i - 1]]); size_t size = offsets[i] - offsets[i - 1]; @@ -57,13 +56,15 @@ struct LowerUpperUTF8Impl output.clear(); input.toUTF8String(output); - /// For valid UTF-8 input strings, ICU sometimes produces output with extra '\0's at the end. Only the data before the first + /// For valid UTF-8 input strings, ICU sometimes produces output with an extra '\0 at the end. Only the data before that /// '\0' is valid. It the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this /// case, the behavior is also reasonable. - const char * res_end = find_last_not_symbols_or_null<'\0'>(output.data(), output.data() + output.size()); - size_t valid_size = res_end ? res_end - output.data() + 1 : 0; + size_t valid_size = output.size(); + if (!output.empty() && output.back() == '\0') + --valid_size; res_data.resize(curr_offset + valid_size + 1); + memcpy(&res_data[curr_offset], output.data(), valid_size); res_data[curr_offset + valid_size] = 0; diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.reference b/tests/queries/0_stateless/00170_lower_upper_utf8.reference index 3c644f22b9b..b1cb9ad5b57 100644 --- a/tests/queries/0_stateless/00170_lower_upper_utf8.reference +++ b/tests/queries/0_stateless/00170_lower_upper_utf8.reference @@ -26,3 +26,4 @@ 1 1 1 +2 diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.sql b/tests/queries/0_stateless/00170_lower_upper_utf8.sql index 85b6c5c6095..7c7bbac0df3 100644 --- a/tests/queries/0_stateless/00170_lower_upper_utf8.sql +++ b/tests/queries/0_stateless/00170_lower_upper_utf8.sql @@ -38,3 +38,6 @@ select lowerUTF8('ır') = 'ır'; -- German language select upper('öäüß') = 'öäüß'; select lower('ÖÄÜẞ') = 'ÖÄÜẞ'; + +-- Bug 68680 +SELECT lengthUTF8(lowerUTF8('Ä\0')); From a1f312495d4a2d18052f96aee3831fa7de2588f6 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Wed, 21 Aug 2024 22:36:00 +0200 Subject: [PATCH 123/127] CI: Integration tests timeout to 3h --- tests/ci/ci_config.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 58de25f039f..fdf863797ef 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -400,7 +400,10 @@ class CI: required_builds=[BuildNames.PACKAGE_DEBUG], pr_only=True ), JobNames.INTEGRATION_TEST_ASAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], release_only=True, num_batches=4 + required_builds=[BuildNames.PACKAGE_ASAN], + release_only=True, + num_batches=4, + timeout=10800, ), JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_ASAN], From dbee1542b6541e6aa1634706a5ce1a681fc717c2 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 21 Aug 2024 23:54:19 +0200 Subject: [PATCH 124/127] It --> If --- src/Functions/LowerUpperUTF8Impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index 8469bedde0c..36ee1723269 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -57,7 +57,7 @@ struct LowerUpperUTF8Impl input.toUTF8String(output); /// For valid UTF-8 input strings, ICU sometimes produces output with an extra '\0 at the end. Only the data before that - /// '\0' is valid. It the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this + /// '\0' is valid. If the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this /// case, the behavior is also reasonable. size_t valid_size = output.size(); if (!output.empty() && output.back() == '\0') From 92e153585ded4f15e1292613584ff35a55c735f3 Mon Sep 17 00:00:00 2001 From: Tanya Bragin Date: Wed, 21 Aug 2024 19:19:07 -0700 Subject: [PATCH 125/127] Update README.md Add latest meetups from Alexey tour. --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 17b6dcd2ac1..5e66b9da73e 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,13 @@ Every month we get together with the community (users, contributors, customers, Keep an eye out for upcoming meetups and events around the world. Somewhere else you want us to be? Please feel free to reach out to tyler `` clickhouse `` com. You can also peruse [ClickHouse Events](https://clickhouse.com/company/news-events) for a list of all upcoming trainings, meetups, speaking engagements, etc. +The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey Milovidov: + * [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25 +* [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5 +* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/clickhouse-nc-meetup-group/events/302557230) - September 9 +* [New York Meetup (Ramp)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10 +* [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12 ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" From 9d0b3e3937cca32bc8bc922876fb8e6ac53a3de9 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 22 Aug 2024 11:32:59 +0800 Subject: [PATCH 126/127] change as request --- .../functions/string-replace-functions.md | 145 +++++++++--------- src/Functions/overlay.cpp | 8 +- ...new_functions_must_be_documented.reference | 2 - 3 files changed, 76 insertions(+), 79 deletions(-) diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 55e97688b18..0cc6b0b27d5 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -8,6 +8,78 @@ sidebar_label: Replacing in Strings [General strings functions](string-functions.md) and [functions for searching in strings](string-search-functions.md) are described separately. +## overlay + +Replace part of the string `input` with another string `replace`, starting at the 1-based index `offset`. + +**Syntax** + +```sql +overlay(s, replace, offset[, length]) +``` + +**Parameters** + +- `input`: A string type [String](../data-types/string.md). +- `replace`: A string type [String](../data-types/string.md). +- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the `input` string. +- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within input to be replaced. If `length` is not specified, the number of bytes removed from `input` equals the length of `replace`; otherwise `length` bytes are removed. + +**Returned value** + +- A [String](../data-types/string.md) data type value. + +**Example** + +```sql +SELECT overlay('ClickHouse SQL', 'CORE', 12) AS res; +``` + +Result: + +```text +┌─res─────────────┐ +│ ClickHouse CORE │ +└─────────────────┘ +``` + +## overlayUTF8 + +Replace part of the string `input` with another string `replace`, starting at the 1-based index `offset`. + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Syntax** + +```sql +overlayUTF8(s, replace, offset[, length]) +``` + +**Parameters** + +- `s`: A string type [String](../data-types/string.md). +- `replace`: A string type [String](../data-types/string.md). +- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the `input` string. +- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within input to be replaced. If `length` is not specified, the number of characters removed from `input` equals the length of `replace`; otherwise `length` characters are removed. + +**Returned value** + +- A [String](../data-types/string.md) data type value. + +**Example** + +```sql +SELECT overlayUTF8('ClickHouse是一款OLAP数据库', '开源', 12, 2) AS res; +``` + +Result: + +```text +┌─res────────────────────────┐ +│ ClickHouse是开源OLAP数据库 │ +└────────────────────────────┘ +``` + ## replaceOne Replaces the first occurrence of the substring `pattern` in `haystack` by the `replacement` string. @@ -248,76 +320,3 @@ select printf('%%%s %s %d', 'Hello', 'World', 2024); │ %Hello World 2024 │ └──────────────────────────────────────────────┘ ``` - -## overlay - -Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed. - - -**Syntax** - -```sql -overlay(s, replace, offset[, length]) -``` - -**Parameters** - -- `s`: A string type [String](../data-types/string.md). -- `replace`: A string type [String](../data-types/string.md). -- `offset`: An integer type [Int](../data-types/int-uint.md). -- `length`: Optional. An integer type [Int](../data-types/int-uint.md). - -**Returned value** - -- A [String](../data-types/string.md) data type value. If `offset` is negative the offset is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. - -**Example** - -```sql -SELECT overlay('Spark SQL', 'CORE', 7) AS res; -``` - -Result: - -```text - ┌─res────────┐ - │ Spark CORE │ - └────────────┘ -``` - -## overlayUTF8 - -Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of characters removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of characters is removed. - -Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. - -**Syntax** - -```sql -overlayUTF8(s, replace, offset[, length]) -``` - -**Parameters** - -- `s`: A string type [String](../data-types/string.md). -- `replace`: A string type [String](../data-types/string.md). -- `offset`: An integer type [Int](../data-types/int-uint.md). -- `length`: Optional. An integer type [Int](../data-types/int-uint.md). - -**Returned value** - -- A [String](../data-types/string.md) data type value. If `offset` is negative the offset is counted starting from the back. `length` specifies the length of the snippet within input to be replaced. - -**Example** - -```sql -SELECT overlayUTF8('ClickHouse是一款OLAP数据库', '开源', 12, 2) AS res; -``` - -Result: - -```text -┌─res────────────────────────┐ -│ ClickHouse是开源OLAP数据库 │ -└────────────────────────────┘ -``` diff --git a/src/Functions/overlay.cpp b/src/Functions/overlay.cpp index 20988c775a5..497ebb9c9cd 100644 --- a/src/Functions/overlay.cpp +++ b/src/Functions/overlay.cpp @@ -201,14 +201,14 @@ private: { if (offset > 0) { - if (static_cast(offset) > input_size + 1) [[unlikely]] + if (static_cast(offset) > input_size + 1) return input_size; else return offset - 1; } else { - if (input_size < -static_cast(offset)) [[unlikely]] + if (input_size < -static_cast(offset)) return 0; else return input_size + offset; @@ -704,14 +704,14 @@ REGISTER_FUNCTION(Overlay) { factory.registerFunction>( {.description = R"( -Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed. +Replace a part of a string `input` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `input` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed. )", .categories{"String"}}, FunctionFactory::Case::Insensitive); factory.registerFunction>( {.description = R"( -Replace a part of a string `s` with another string `replace`, starting at 1-based index `offset`. By default, the number of bytes removed from `s` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of bytes is removed. +Replace a part of a string `input` with another string `replace`, starting at 1-based index `offset`. By default, the number of characters removed from `input` equals the length of `replace`. If `length` (the optional fourth argument) is specified, a different number of characters is removed. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. )", diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 6495b6619f9..c39f1fb1ce9 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -512,8 +512,6 @@ nullIf nullIn nullInIgnoreSet or -overlay -overlayUTF8 parseDateTime parseDateTime32BestEffort parseDateTime32BestEffortOrNull From 3ff9522b69ec7e51119f445152ffb9678a0f124f Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Thu, 22 Aug 2024 12:49:10 +0800 Subject: [PATCH 127/127] change as request --- src/Functions/overlay.cpp | 165 +++++++++--------- .../0_stateless/03205_overlay.reference | 58 +++--- tests/queries/0_stateless/03205_overlay.sql | 11 +- 3 files changed, 115 insertions(+), 119 deletions(-) diff --git a/src/Functions/overlay.cpp b/src/Functions/overlay.cpp index 497ebb9c9cd..df8b825eabe 100644 --- a/src/Functions/overlay.cpp +++ b/src/Functions/overlay.cpp @@ -53,139 +53,132 @@ public: if (input_rows_count == 0) return ColumnString::create(); - const size_t number_of_arguments = arguments.size(); - bool has_three_args = number_of_arguments == 3; + bool has_four_args = (arguments.size() == 4); - ColumnPtr column_offset = arguments[2].column; - ColumnPtr column_length; - if (!has_three_args) - column_length = arguments[3].column; + ColumnPtr col_input = arguments[0].column; + const auto * col_input_const = checkAndGetColumn(col_input.get()); + const auto * col_input_string = checkAndGetColumn(col_input.get()); + bool input_is_const = (col_input_const != nullptr); - const ColumnConst * column_offset_const = checkAndGetColumn(column_offset.get()); - const ColumnConst * column_length_const = nullptr; - if (!has_three_args) - column_length_const = checkAndGetColumn(column_length.get()); + ColumnPtr col_replace = arguments[1].column; + const auto * col_replace_const = checkAndGetColumn(col_replace.get()); + const auto * col_replace_string = checkAndGetColumn(col_replace.get()); + bool replace_is_const = (col_replace_const != nullptr); + ColumnPtr col_offset = arguments[2].column; + const ColumnConst * col_offset_const = checkAndGetColumn(col_offset.get()); bool offset_is_const = false; - bool length_is_const = false; Int64 offset = -1; - Int64 length = -1; - if (column_offset_const) + if (col_offset_const) { - offset = column_offset_const->getInt(0); + offset = col_offset_const->getInt(0); offset_is_const = true; } - if (column_length_const) + ColumnPtr col_length = has_four_args ? arguments[3].column : nullptr; + const ColumnConst * col_length_const = has_four_args ? checkAndGetColumn(col_length.get()) : nullptr; + bool length_is_const = false; + Int64 length = -1; + if (col_length_const) { - length = column_length_const->getInt(0); + length = col_length_const->getInt(0); length_is_const = true; } - auto res_col = ColumnString::create(); auto & res_data = res_col->getChars(); auto & res_offsets = res_col->getOffsets(); + res_offsets.resize_exact(input_rows_count); - - ColumnPtr column_input = arguments[0].column; - ColumnPtr column_replace = arguments[1].column; - - const auto * column_input_const = checkAndGetColumn(column_input.get()); - const auto * column_input_string = checkAndGetColumn(column_input.get()); - if (column_input_const) + if (col_input_const) { - StringRef input = column_input_const->getDataAt(0); + StringRef input = col_input_const->getDataAt(0); res_data.reserve((input.size + 1) * input_rows_count); } else { - res_data.reserve(column_input_string->getChars().size()); + res_data.reserve(col_input_string->getChars().size()); } - const auto * column_replace_const = checkAndGetColumn(column_replace.get()); - const auto * column_replace_string = checkAndGetColumn(column_replace.get()); - bool input_is_const = (column_input_const != nullptr); - bool replace_is_const = (column_replace_const != nullptr); -#define OVERLAY_EXECUTE_CASE(THREE_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST) \ +#define OVERLAY_EXECUTE_CASE(HAS_FOUR_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST) \ if (input_is_const && replace_is_const) \ - constantConstant( \ + constantConstant( \ input_rows_count, \ - column_input_const->getDataAt(0), \ - column_replace_const->getDataAt(0), \ - column_offset, \ - column_length, \ + col_input_const->getDataAt(0), \ + col_replace_const->getDataAt(0), \ + col_offset, \ + col_length, \ offset, \ length, \ res_data, \ res_offsets); \ else if (input_is_const && !replace_is_const) \ - constantVector( \ + constantVector( \ input_rows_count, \ - column_input_const->getDataAt(0), \ - column_replace_string->getChars(), \ - column_replace_string->getOffsets(), \ - column_offset, \ - column_length, \ + col_input_const->getDataAt(0), \ + col_replace_string->getChars(), \ + col_replace_string->getOffsets(), \ + col_offset, \ + col_length, \ offset, \ length, \ res_data, \ res_offsets); \ else if (!input_is_const && replace_is_const) \ - vectorConstant( \ + vectorConstant( \ input_rows_count, \ - column_input_string->getChars(), \ - column_input_string->getOffsets(), \ - column_replace_const->getDataAt(0), \ - column_offset, \ - column_length, \ + col_input_string->getChars(), \ + col_input_string->getOffsets(), \ + col_replace_const->getDataAt(0), \ + col_offset, \ + col_length, \ offset, \ length, \ res_data, \ res_offsets); \ else \ - vectorVector( \ + vectorVector( \ input_rows_count, \ - column_input_string->getChars(), \ - column_input_string->getOffsets(), \ - column_replace_string->getChars(), \ - column_replace_string->getOffsets(), \ - column_offset, \ - column_length, \ + col_input_string->getChars(), \ + col_input_string->getOffsets(), \ + col_replace_string->getChars(), \ + col_replace_string->getOffsets(), \ + col_offset, \ + col_length, \ offset, \ length, \ res_data, \ res_offsets); - if (has_three_args) + if (!has_four_args) { if (offset_is_const) { - OVERLAY_EXECUTE_CASE(true, true, false) + OVERLAY_EXECUTE_CASE(false, true, false) } else { - OVERLAY_EXECUTE_CASE(true, false, false) + OVERLAY_EXECUTE_CASE(false, false, false) } } else { if (offset_is_const && length_is_const) { - OVERLAY_EXECUTE_CASE(false, true, true) + OVERLAY_EXECUTE_CASE(true, true, true) } else if (offset_is_const && !length_is_const) { - OVERLAY_EXECUTE_CASE(false, true, false) + OVERLAY_EXECUTE_CASE(true, true, false) } else if (!offset_is_const && length_is_const) { - OVERLAY_EXECUTE_CASE(false, false, true) + OVERLAY_EXECUTE_CASE(true, false, true) } else { - OVERLAY_EXECUTE_CASE(false, false, false) + OVERLAY_EXECUTE_CASE(true, false, false) } } #undef OVERLAY_EXECUTE_CASE @@ -224,7 +217,7 @@ private: return bytes; } - template + template void constantConstant( size_t rows, const StringRef & input, @@ -236,7 +229,7 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!has_three_args && length_is_const && const_length < 0) + if (has_four_args && length_is_const && const_length < 0) { constantConstant( rows, input, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); @@ -250,12 +243,12 @@ private: size_t replace_size = getSliceSize(reinterpret_cast(replace.data), replace.size); size_t valid_length = 0; // not negative - if constexpr (!has_three_args && length_is_const) + if constexpr (has_four_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; } - else if constexpr (has_three_args) + else if constexpr (!has_four_args) { valid_length = replace_size; } @@ -273,14 +266,14 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (!has_three_args && !length_is_const) + if constexpr (has_four_args && !length_is_const) { length = column_length->getInt(i); valid_length = length >= 0 ? length : replace_size; } size_t prefix_size = valid_offset; - size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length); if constexpr (!is_utf8) { @@ -332,11 +325,12 @@ private: /// add zero terminator res_data[res_offset] = 0; ++res_offset; + res_offsets[i] = res_offset; } } - template + template void vectorConstant( size_t rows, const ColumnString::Chars & input_data, @@ -349,7 +343,7 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!has_three_args && length_is_const && const_length < 0) + if (has_four_args && length_is_const && const_length < 0) { vectorConstant( rows, input_data, input_offsets, replace, column_offset, column_length, const_offset, -1, res_data, res_offsets); @@ -359,12 +353,12 @@ private: size_t replace_size = getSliceSize(reinterpret_cast(replace.data), replace.size); Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative - if constexpr (!has_three_args && length_is_const) + if constexpr (has_four_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; } - else if constexpr (has_three_args) + else if constexpr (!has_four_args) { valid_length = replace_size; } @@ -388,14 +382,14 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (!has_three_args && !length_is_const) + if constexpr (has_four_args && !length_is_const) { length = column_length->getInt(i); valid_length = length >= 0 ? length : replace_size; } size_t prefix_size = valid_offset; - size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length); if constexpr (!is_utf8) { @@ -449,11 +443,12 @@ private: /// add zero terminator res_data[res_offset] = 0; ++res_offset; + res_offsets[i] = res_offset; } } - template + template void constantVector( size_t rows, const StringRef & input, @@ -466,7 +461,7 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!has_three_args && length_is_const && const_length < 0) + if (has_four_args && length_is_const && const_length < 0) { constantVector( rows, input, replace_data, replace_offsets, column_offset, column_length, const_offset, -1, res_data, res_offsets); @@ -480,7 +475,7 @@ private: Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative - if constexpr (!has_three_args && length_is_const) + if constexpr (has_four_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; @@ -502,7 +497,7 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (has_three_args) + if constexpr (!has_four_args) { valid_length = replace_size; } @@ -513,7 +508,7 @@ private: } size_t prefix_size = valid_offset; - size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length); if constexpr (!is_utf8) { @@ -563,11 +558,12 @@ private: /// add zero terminator res_data[res_offset] = 0; ++res_offset; + res_offsets[i] = res_offset; } } - template + template void vectorVector( size_t rows, const ColumnString::Chars & input_data, @@ -581,7 +577,7 @@ private: ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) const { - if (!has_three_args && length_is_const && const_length < 0) + if (has_four_args && length_is_const && const_length < 0) { vectorVector( rows, @@ -600,7 +596,7 @@ private: Int64 length = 0; // maybe negative size_t valid_length = 0; // not negative - if constexpr (!has_three_args && length_is_const) + if constexpr (has_four_args && length_is_const) { assert(const_length >= 0); valid_length = const_length; @@ -629,7 +625,7 @@ private: valid_offset = getValidOffset(offset, input_size); } - if constexpr (has_three_args) + if constexpr (!has_four_args) { valid_length = replace_size; } @@ -640,7 +636,7 @@ private: } size_t prefix_size = valid_offset; - size_t suffix_size = prefix_size + valid_length > input_size ? 0 : input_size - prefix_size - valid_length; + size_t suffix_size = (prefix_size + valid_length > input_size) ? 0 : (input_size - prefix_size - valid_length); if constexpr (!is_utf8) { @@ -693,6 +689,7 @@ private: /// add zero terminator res_data[res_offset] = 0; ++res_offset; + res_offsets[i] = res_offset; } } diff --git a/tests/queries/0_stateless/03205_overlay.reference b/tests/queries/0_stateless/03205_overlay.reference index 67a699944e0..4be3baadaea 100644 --- a/tests/queries/0_stateless/03205_overlay.reference +++ b/tests/queries/0_stateless/03205_overlay.reference @@ -25,34 +25,34 @@ Spark ANSI SQL Spark ANSI SQL和CH Spark ANSI SQL Spark ANSI SQL和CH Spark ANSI SQL Spark ANSI SQL和CH Spark ANSI SQL Spark ANSI SQL和CH -Test with different offset values --12 _park SQL _park SQL和CH --11 _park SQL S_ark SQL和CH --10 _park SQL Sp_rk SQL和CH --9 _park SQL Spa_k SQL和CH --8 S_ark SQL Spar_ SQL和CH --7 Sp_rk SQL Spark_SQL和CH --6 Spa_k SQL Spark _QL和CH --5 Spar_ SQL Spark S_L和CH --4 Spark_SQL Spark SQ_和CH --3 Spark _QL Spark SQL_CH --2 Spark S_L Spark SQL和_H --1 Spark SQ_ Spark SQL和C_ -0 Spark SQL_ Spark SQL和CH_ -1 _park SQL _park SQL和CH -2 S_ark SQL S_ark SQL和CH -3 Sp_rk SQL Sp_rk SQL和CH -4 Spa_k SQL Spa_k SQL和CH -5 Spar_ SQL Spar_ SQL和CH -6 Spark_SQL Spark_SQL和CH -7 Spark _QL Spark _QL和CH -8 Spark S_L Spark S_L和CH -9 Spark SQ_ Spark SQ_和CH -10 Spark SQL_ Spark SQL_CH -11 Spark SQL_ Spark SQL和_H -12 Spark SQL_ Spark SQL和C_ -13 Spark SQL_ Spark SQL和CH_ -Test with different length values +Test with special offset values +-12 __ark SQL 之park SQL和CH +-11 __ark SQL S之ark SQL和CH +-10 __ark SQL Sp之rk SQL和CH +-9 __ark SQL Spa之k SQL和CH +-8 S__rk SQL Spar之 SQL和CH +-7 Sp__k SQL Spark之SQL和CH +-6 Spa__ SQL Spark 之QL和CH +-5 Spar__SQL Spark S之L和CH +-4 Spark__QL Spark SQ之和CH +-3 Spark __L Spark SQL之CH +-2 Spark S__ Spark SQL和之H +-1 Spark SQ__ Spark SQL和C之 +0 Spark SQL__ Spark SQL和CH之 +1 __ark SQL 之park SQL和CH +2 S__rk SQL S之ark SQL和CH +3 Sp__k SQL Sp之rk SQL和CH +4 Spa__ SQL Spa之k SQL和CH +5 Spar__SQL Spar之 SQL和CH +6 Spark__QL Spark之SQL和CH +7 Spark __L Spark 之QL和CH +8 Spark S__ Spark S之L和CH +9 Spark SQ__ Spark SQ之和CH +10 Spark SQL__ Spark SQL之CH +11 Spark SQL__ Spark SQL和之H +12 Spark SQL__ Spark SQL和C之 +13 Spark SQL__ Spark SQL和CH之 +Test with special length values -1 Spark ANSI Spark ANSI H 0 Spark ANSI SQL Spark ANSI SQL和CH 1 Spark ANSI QL Spark ANSI QL和CH @@ -61,7 +61,7 @@ Test with different length values 4 Spark ANSI Spark ANSI CH 5 Spark ANSI Spark ANSI H 6 Spark ANSI Spark ANSI -Test with different input and replace values +Test with special input and replace values _ _ Spark SQL Spark SQL和CH ANSI ANSI diff --git a/tests/queries/0_stateless/03205_overlay.sql b/tests/queries/0_stateless/03205_overlay.sql index 4d0b5ecbe03..765b29f93ec 100644 --- a/tests/queries/0_stateless/03205_overlay.sql +++ b/tests/queries/0_stateless/03205_overlay.sql @@ -1,5 +1,4 @@ SELECT 'Negative test of overlay'; -SELECT overlay('hello', 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT overlay('hello', 'world'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT overlay('hello', 'world', 2, 3, 'extra'); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } SELECT overlay(123, 'world', 2, 3); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } @@ -35,13 +34,13 @@ SELECT overlay(materialize('Spark SQL'), 'ANSI ', materialize(7), materialize(0) SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8('Spark SQL和CH', materialize('ANSI '), materialize(7), materialize(0)); SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0)); -SELECT 'Test with different offset values'; -WITH number - 12 as offset SELECT offset, overlay('Spark SQL', '_', offset), overlayUTF8('Spark SQL和CH', '_', offset) from numbers(26); +SELECT 'Test with special offset values'; +WITH number - 12 AS offset SELECT offset, overlay('Spark SQL', '__', offset), overlayUTF8('Spark SQL和CH', '之', offset) FROM numbers(26); -SELECT 'Test with different length values'; -WITH number - 1 as length SELECT length, overlay('Spark SQL', 'ANSI ', 7, length), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, length) from numbers(8); +SELECT 'Test with special length values'; +WITH number - 1 AS length SELECT length, overlay('Spark SQL', 'ANSI ', 7, length), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, length) FROM numbers(8); -SELECT 'Test with different input and replace values'; +SELECT 'Test with special input and replace values'; SELECT overlay('', '_', 6), overlayUTF8('', '_', 6); SELECT overlay('Spark SQL', '', 6), overlayUTF8('Spark SQL和CH', '', 6); SELECT overlay('', 'ANSI ', 7, 0), overlayUTF8('', 'ANSI ', 7, 0);