From 373f00c127d84cd70147bd63962f94b85d1a60b6 Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Tue, 12 Nov 2024 11:25:14 +0800 Subject: [PATCH] fix review & ci --- .../functions/date-time-functions.md | 6 +- .../functions/type-conversion-functions.md | 2 + src/Functions/parseDateTime.cpp | 145 ++++++++++-------- ..._parse_datetime64_in_joda_syntax.reference | 12 +- .../03252_parse_datetime64_in_joda_syntax.sql | 12 +- .../aspell-ignore/en/aspell-dict.txt | 11 +- 6 files changed, 105 insertions(+), 83 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 2357b5b2fdd..9643e4aba8a 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -4489,9 +4489,9 @@ Using replacement fields, you can define a pattern for the resulting string. | k | clockhour of day (1~24) | number | 24 | | m | minute of hour | number | 30 | | s | second of minute | number | 55 | -| S | fraction of second (not supported yet) | number | 978 | -| z | time zone (short name not supported yet) | text | Pacific Standard Time; PST | -| Z | time zone offset/id (not supported yet) | zone | -0800; -08:00; America/Los_Angeles | +| S | fraction of second | number | 978 | +| z | time zone | text | Eastern Standard Time; EST | +| Z | time zone offset | zone | -0800; -0812 | | ' | escape for text | delimiter | | | '' | single quote | literal | ' | diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index a92d7055fd5..02f796e8336 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -6889,9 +6889,11 @@ parseDateTime64(str, scale, [format[, timezone]]) Returns [DateTime64](../data-types/datetime64.md) type values parsed from input string according to a MySQL style format string. ## parseDateTime64OrZero + Same as for [parseDateTime64](#parsedatetime64) except that it returns zero date when it encounters a date format that cannot be processed. ## parseDateTime64OrNull + Same as for [parseDateTime64](#parsedatetime64) except that it returns `NULL` when it encounters a date format that cannot be processed. ## parseDateTime64InJodaSyntax diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 9fea8a4f130..41e2a8ee21e 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -38,6 +38,7 @@ namespace ErrorCodes extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; extern const int CANNOT_PARSE_DATETIME; extern const int NOT_ENOUGH_SPACE; + extern const int LOGICAL_ERROR; } namespace @@ -193,7 +194,7 @@ namespace Int32 minute = 0; /// range [0, 59] Int32 second = 0; /// range [0, 59] Int32 microsecond = 0; /// range [0, 999999] - UInt32 scale = 0; /// The scale of DateTime64, range [0, 6]. + UInt32 scale = 0; /// Scale of DateTime64. When parse syntax is `Joda`, it range [0, 6]; When parse syntax is `MySQL`, it should be 6 bool is_am = true; /// If is_hour_of_half_day = true and is_am = false (i.e. pm) then add 12 hours to the result DateTime bool hour_starts_at_1 = false; /// Whether the hour is clockhour @@ -458,6 +459,18 @@ namespace return {}; } + [[nodiscard]] + VoidOrError setScale(UInt8 scale_, ParseSyntax parse_syntax_) + { + if (parse_syntax_ == ParseSyntax::MySQL && scale_ != 6) + RETURN_ERROR(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for scale must be 6 for MySQL parse syntax", std::to_string(scale_)) + else if (parse_syntax_ == ParseSyntax::Joda && scale_ > 6) + RETURN_ERROR(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for scale must be in the range [0, 6] for joda syntax", std::to_string(scale_)) + + scale = scale_; + return {}; + } + /// For debug [[maybe_unused]] String toString() const { @@ -579,7 +592,8 @@ namespace } }; - /// _FUNC_(str[scale, format, timezone]) + /// _FUNC_(str[, format, timezone]) /// for ReturnType::DateTime + /// _FUNC_(str, scale[, format, timezone]). /// for ReturnType::DateTime64 template class FunctionParseDateTimeImpl : public IFunction { @@ -608,13 +622,13 @@ namespace DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args; - if constexpr (return_type == ReturnType::DateTime64) + if constexpr (return_type == ReturnType::DateTime) + mandatory_args = {{"time", static_cast(&isString), nullptr, "String"}}; + else mandatory_args = { {"time", static_cast(&isString), nullptr, "String"}, {"scale", static_cast(&isUInt8), &isColumnConst, "UInt8"} }; - else - mandatory_args = {{"time", static_cast(&isString), nullptr, "String"}}; FunctionArgumentDescriptors optional_args{ {"format", static_cast(&isString), nullptr, "String"}, @@ -624,22 +638,21 @@ namespace String time_zone_name = getTimeZone(arguments).getTimeZone(); DataTypePtr data_type; - if constexpr (return_type == ReturnType::DateTime64) + if constexpr (return_type == ReturnType::DateTime) + data_type = std::make_shared(time_zone_name); + else { UInt8 scale = 0; - if (isUInt8(arguments[1].type)) - { - const auto * col_scale = checkAndGetColumnConst(arguments[1].column.get()); - if (col_scale) - scale = col_scale->getValue(); - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The scale argument is not Const(UInt8) type."); - } + const auto * col_scale = checkAndGetColumnConst(arguments[1].column.get()); + if (col_scale) + scale = col_scale->getValue(); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "The scale argument is not Const(UInt8) type."); + if (parse_syntax == ParseSyntax::MySQL && scale != 6) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The scale argument's value {} of MySQL parse syntax is not 6.", std::to_string(scale)); if (scale > maxScaleOfDateTime64) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "The scale argument's value {} exceed the max scale value {}.", std::to_string(scale), std::to_string(maxScaleOfDateTime64)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The scale argument's value {} exceed the max scale value {}.", std::to_string(scale), std::to_string(maxScaleOfDateTime64)); String format = getFormat(arguments, scale); std::vector instructions = parseFormat(format); @@ -647,28 +660,26 @@ namespace { /// Check scale by counting how may 'S' characters exists in the format string. const String & fragment = instruction.getFragment(); - UInt32 s_cnt = 0; + UInt32 s_count = 0; for (char ch : fragment) { if (ch != 'S') { - s_cnt = 0; + s_count = 0; break; } else - s_cnt++; + s_count++; } - /// If the number of 'S' character in format string not euqals the scale, then throw an exception to report error. - if (s_cnt != 0 && s_cnt != scale) + /// If the number of 'S' characters in format string not euqals the scale, then throw an exception to report error. + if (s_count != 0 && s_count != scale) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "The scale of input format string {} not equals the given scale value {}.", + "The number of 'S' characters in the input format string {} does not equal the given scale value {}.", format, std::to_string(scale)); } data_type = std::make_shared(scale, time_zone_name); } - else - data_type = std::make_shared(time_zone_name); if (error_handling == ErrorHandling::Null) return std::make_shared(data_type); @@ -679,23 +690,24 @@ namespace { DataTypePtr non_null_result_type; if constexpr (error_handling == ErrorHandling::Null) + /// Remove Nullable wrapper. It will be added back later. non_null_result_type = removeNullable(result_type); else non_null_result_type = result_type; - if constexpr (return_type == ReturnType::DateTime64) + if constexpr (return_type == ReturnType::DateTime) + { + MutableColumnPtr col_res = ColumnDateTime::create(input_rows_count); + ColumnDateTime * col_datetime = assert_cast(col_res.get()); + return executeImpl2(arguments, result_type, input_rows_count, col_res, col_datetime->getData()); + } + else { const auto * datatime64_type = checkAndGetDataType(non_null_result_type.get()); MutableColumnPtr col_res = ColumnDateTime64::create(input_rows_count, datatime64_type->getScale()); ColumnDateTime64 * col_datetime64 = assert_cast(col_res.get()); return executeImpl2(arguments, result_type, input_rows_count, col_res, col_datetime64->getData()); } - else - { - MutableColumnPtr col_res = ColumnDateTime::create(input_rows_count); - ColumnDateTime * col_datetime = assert_cast(col_res.get()); - return executeImpl2(arguments, result_type, input_rows_count, col_res, col_datetime->getData()); - } } template @@ -732,41 +744,52 @@ namespace for (size_t i = 0; i < input_rows_count; ++i) { datetime.reset(); - if constexpr (return_type == ReturnType::DateTime64) - datetime.scale = scale; StringRef str_ref = col_str->getDataAt(i); Pos cur = str_ref.data; Pos end = str_ref.data + str_ref.size; bool error = false; + auto handleError = [&](const auto & result) -> void + { + if constexpr (error_handling == ErrorHandling::Zero) + { + res_data[i] = 0; + error = true; + } + else if constexpr (error_handling == ErrorHandling::Null) + { + res_data[i] = 0; + col_null_map->getData()[i] = 1; + error = true; + } + else + { + static_assert(error_handling == ErrorHandling::Exception); + const ErrorCodeAndMessage & err = result.error(); + throw Exception(err.error_code, "{}", err.error_message); + } + }; + + if constexpr (return_type == ReturnType::DateTime64) + { + if (auto result = datetime.setScale(static_cast(scale), parse_syntax); !result.has_value()) + { + handleError(result); + continue; + } + } + for (const auto & instruction : instructions) { - if (auto result = instruction.perform(cur, end, datetime); result.has_value()) + if (auto result = instruction.perform(cur, end, datetime) ; result.has_value()) { cur = *result; } else { - if constexpr (error_handling == ErrorHandling::Zero) - { - res_data[i] = 0; - error = true; - break; - } - else if constexpr (error_handling == ErrorHandling::Null) - { - res_data[i] = 0; - col_null_map->getData()[i] = 1; - error = true; - break; - } - else - { - static_assert(error_handling == ErrorHandling::Exception); - const ErrorCodeAndMessage & err = result.error(); - throw Exception(err.error_code, "{}", err.error_message); - } + handleError(result); + break; } } @@ -789,10 +812,10 @@ namespace { if (result = datetime.buildDateTime(time_zone); result.has_value()) { - if constexpr (return_type == ReturnType::DateTime64) - res_data[i] = static_cast(*result) * multiplier + datetime.microsecond; - else + if constexpr (return_type == ReturnType::DateTime) res_data[i] = static_cast(*result); + else + res_data[i] = static_cast(*result) * multiplier + datetime.microsecond; } } @@ -2241,11 +2264,9 @@ namespace String getFormat(const ColumnsWithTypeAndName & arguments, UInt32 scale) const { - size_t format_arg_index = 1; - if constexpr (return_type == ReturnType::DateTime64) - format_arg_index = 2; + size_t index_of_format_string_arg = (return_type == ReturnType::DateTime) ? 1 : 2; - if (arguments.size() <= format_arg_index) + if (arguments.size() <= index_of_format_string_arg) { String format; if constexpr (parse_syntax == ParseSyntax::MySQL) @@ -2263,12 +2284,12 @@ namespace } else { - const auto * col_format = checkAndGetColumnConst(arguments[format_arg_index].column.get()); + const auto * col_format = checkAndGetColumnConst(arguments[index_of_format_string_arg].column.get()); if (!col_format) throw Exception( ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second ('format') argument of function {}. Must be constant string.", - arguments[format_arg_index].column->getName(), + arguments[index_of_format_string_arg].column->getName(), getName()); return col_format->getValue(); } diff --git a/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.reference b/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.reference index 99252ce55ca..2c5f7191c60 100644 --- a/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.reference +++ b/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.reference @@ -1,21 +1,21 @@ 2024-10-09 10:30:10.123 2024-10-09 10:30:10.000123 2024-10-09 10:30:10.123 2024-10-09 10:30:10.000123 2024-10-10 02:42:10.123456 -2024-10-10 01:30:10.123456 -2024-10-10 01:30:10.123456 +2024-10-09 23:30:10.123456 +2024-10-09 23:30:10.123456 1970-01-01 08:00:00.000 2024-10-09 10:30:10.123 2024-10-09 10:30:10.000123 2024-10-09 10:30:10.123 2024-10-09 10:30:10.000123 2024-10-10 02:42:10.123456 1970-01-01 08:00:00.000000 -2024-10-10 01:30:10.123456 -2024-10-10 01:30:10.123456 +2024-10-09 23:30:10.123456 +2024-10-09 23:30:10.123456 1970-01-01 08:00:00.000000 \N 2024-10-09 10:30:10.123 2024-10-09 10:30:10.000123 2024-10-09 10:30:10.123 2024-10-09 10:30:10.000123 2024-10-10 02:42:10.123456 \N -2024-10-10 01:30:10.123456 -2024-10-10 01:30:10.123456 +2024-10-09 23:30:10.123456 +2024-10-09 23:30:10.123456 \N diff --git a/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.sql b/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.sql index bcb0fb5a362..cb9d802dab0 100644 --- a/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.sql +++ b/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.sql @@ -8,8 +8,8 @@ select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123', 3, 'yyyy-MM-dd HH: select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123', 3, 'yyyy-MM-dd HH:mm:ss.SSSS'); -- { serverError BAD_ARGUMENTS } select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-0812', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-08123', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZZZ'); -- {serverError CANNOT_PARSE_DATETIME} -select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456America/Los_Angeles', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); -select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456America/Los_Angeles', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSzzz'); +select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456EST', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); +select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456EST', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSzzz'); -- incorrect timezone offset and timezone select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-8000', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); -- { serverError CANNOT_PARSE_DATETIME } select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456ABCD', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); -- { serverError BAD_ARGUMENTS } @@ -22,8 +22,8 @@ select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123', 3, 'yyyy-MM- select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123', 3, 'yyyy-MM-dd HH:mm:ss.SSSS'); -- { serverError BAD_ARGUMENTS } select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456-0812', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456-08123', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZZZ'); -select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456America/Los_Angeles', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); -select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456America/Los_Angeles', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSzzz'); +select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456EST', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); +select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456EST', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSzzz'); -- incorrect timezone offset and timezone select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456-8000', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456ABCD', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); -- { serverError BAD_ARGUMENTS } @@ -36,8 +36,8 @@ select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123', 3, 'yyyy-MM- select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123', 3, 'yyyy-MM-dd HH:mm:ss.SSSS'); -- { serverError BAD_ARGUMENTS } select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456-0812', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456-08123', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZZZ'); -select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456America/Los_Angeles', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); -select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456America/Los_Angeles', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSzzz'); +select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456EST', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); +select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456EST', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSzzz'); -- incorrect timezone offset and timezone select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456-8000', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456ABCD', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); -- { serverError BAD_ARGUMENTS } \ No newline at end of file diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index a08143467cd..21fe37d0526 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 2984 +personal_ws-1.1 en 3154 AArch ACLs ALTERs @@ -186,7 +186,6 @@ ComplexKeyCache ComplexKeyDirect ComplexKeyHashed Composable -composable ConcurrencyControlAcquired ConcurrencyControlSoftLimit Config @@ -405,12 +404,12 @@ ITION Identifiant IdentifierQuotingRule IdentifierQuotingStyle -Incrementing -IndexesAreNeighbors -InfluxDB InJodaSyntax InJodaSyntaxOrNull InJodaSyntaxOrZero +Incrementing +IndexesAreNeighbors +InfluxDB Instana IntN Integrations @@ -1099,7 +1098,6 @@ URLHash URLHierarchy URLPathHierarchy USearch -USearch UTCTimestamp UUIDNumToString UUIDStringToNum @@ -1995,6 +1993,7 @@ jbod jdbc jemalloc jeprof +joda joinGet joinGetOrNull json