fix review & ci

This commit is contained in:
kevinyhzou 2024-11-12 11:25:14 +08:00
parent 0768e0b265
commit 373f00c127
6 changed files with 105 additions and 83 deletions

View File

@ -4489,9 +4489,9 @@ Using replacement fields, you can define a pattern for the resulting string.
| k | clockhour of day (1~24) | number | 24 |
| m | minute of hour | number | 30 |
| s | second of minute | number | 55 |
| S | fraction of second (not supported yet) | number | 978 |
| z | time zone (short name not supported yet) | text | Pacific Standard Time; PST |
| Z | time zone offset/id (not supported yet) | zone | -0800; -08:00; America/Los_Angeles |
| S | fraction of second | number | 978 |
| z | time zone | text | Eastern Standard Time; EST |
| Z | time zone offset | zone | -0800; -0812 |
| ' | escape for text | delimiter | |
| '' | single quote | literal | ' |

View File

@ -6889,9 +6889,11 @@ parseDateTime64(str, scale, [format[, timezone]])
Returns [DateTime64](../data-types/datetime64.md) type values parsed from input string according to a MySQL style format string.
## parseDateTime64OrZero
Same as for [parseDateTime64](#parsedatetime64) except that it returns zero date when it encounters a date format that cannot be processed.
## parseDateTime64OrNull
Same as for [parseDateTime64](#parsedatetime64) except that it returns `NULL` when it encounters a date format that cannot be processed.
## parseDateTime64InJodaSyntax

View File

@ -38,6 +38,7 @@ namespace ErrorCodes
extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE;
extern const int CANNOT_PARSE_DATETIME;
extern const int NOT_ENOUGH_SPACE;
extern const int LOGICAL_ERROR;
}
namespace
@ -193,7 +194,7 @@ namespace
Int32 minute = 0; /// range [0, 59]
Int32 second = 0; /// range [0, 59]
Int32 microsecond = 0; /// range [0, 999999]
UInt32 scale = 0; /// The scale of DateTime64, range [0, 6].
UInt32 scale = 0; /// Scale of DateTime64. When parse syntax is `Joda`, it range [0, 6]; When parse syntax is `MySQL`, it should be 6
bool is_am = true; /// If is_hour_of_half_day = true and is_am = false (i.e. pm) then add 12 hours to the result DateTime
bool hour_starts_at_1 = false; /// Whether the hour is clockhour
@ -458,6 +459,18 @@ namespace
return {};
}
[[nodiscard]]
VoidOrError setScale(UInt8 scale_, ParseSyntax parse_syntax_)
{
if (parse_syntax_ == ParseSyntax::MySQL && scale_ != 6)
RETURN_ERROR(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for scale must be 6 for MySQL parse syntax", std::to_string(scale_))
else if (parse_syntax_ == ParseSyntax::Joda && scale_ > 6)
RETURN_ERROR(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for scale must be in the range [0, 6] for joda syntax", std::to_string(scale_))
scale = scale_;
return {};
}
/// For debug
[[maybe_unused]] String toString() const
{
@ -579,7 +592,8 @@ namespace
}
};
/// _FUNC_(str[scale, format, timezone])
/// _FUNC_(str[, format, timezone]) /// for ReturnType::DateTime
/// _FUNC_(str, scale[, format, timezone]). /// for ReturnType::DateTime64
template <typename Name, ParseSyntax parse_syntax, ReturnType return_type, ErrorHandling error_handling>
class FunctionParseDateTimeImpl : public IFunction
{
@ -608,13 +622,13 @@ namespace
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors mandatory_args;
if constexpr (return_type == ReturnType::DateTime64)
if constexpr (return_type == ReturnType::DateTime)
mandatory_args = {{"time", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}};
else
mandatory_args = {
{"time", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
{"scale", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isUInt8), &isColumnConst, "UInt8"}
};
else
mandatory_args = {{"time", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"}};
FunctionArgumentDescriptors optional_args{
{"format", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
@ -624,22 +638,21 @@ namespace
String time_zone_name = getTimeZone(arguments).getTimeZone();
DataTypePtr data_type;
if constexpr (return_type == ReturnType::DateTime64)
if constexpr (return_type == ReturnType::DateTime)
data_type = std::make_shared<DataTypeDateTime>(time_zone_name);
else
{
UInt8 scale = 0;
if (isUInt8(arguments[1].type))
{
const auto * col_scale = checkAndGetColumnConst<ColumnUInt8>(arguments[1].column.get());
if (col_scale)
scale = col_scale->getValue<UInt8>();
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The scale argument is not Const(UInt8) type.");
}
const auto * col_scale = checkAndGetColumnConst<ColumnUInt8>(arguments[1].column.get());
if (col_scale)
scale = col_scale->getValue<UInt8>();
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "The scale argument is not Const(UInt8) type.");
if (parse_syntax == ParseSyntax::MySQL && scale != 6)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The scale argument's value {} of MySQL parse syntax is not 6.", std::to_string(scale));
if (scale > maxScaleOfDateTime64)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"The scale argument's value {} exceed the max scale value {}.", std::to_string(scale), std::to_string(maxScaleOfDateTime64));
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The scale argument's value {} exceed the max scale value {}.", std::to_string(scale), std::to_string(maxScaleOfDateTime64));
String format = getFormat(arguments, scale);
std::vector<Instruction> instructions = parseFormat(format);
@ -647,28 +660,26 @@ namespace
{
/// Check scale by counting how may 'S' characters exists in the format string.
const String & fragment = instruction.getFragment();
UInt32 s_cnt = 0;
UInt32 s_count = 0;
for (char ch : fragment)
{
if (ch != 'S')
{
s_cnt = 0;
s_count = 0;
break;
}
else
s_cnt++;
s_count++;
}
/// If the number of 'S' character in format string not euqals the scale, then throw an exception to report error.
if (s_cnt != 0 && s_cnt != scale)
/// If the number of 'S' characters in format string not euqals the scale, then throw an exception to report error.
if (s_count != 0 && s_count != scale)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"The scale of input format string {} not equals the given scale value {}.",
"The number of 'S' characters in the input format string {} does not equal the given scale value {}.",
format,
std::to_string(scale));
}
data_type = std::make_shared<DataTypeDateTime64>(scale, time_zone_name);
}
else
data_type = std::make_shared<DataTypeDateTime>(time_zone_name);
if (error_handling == ErrorHandling::Null)
return std::make_shared<DataTypeNullable>(data_type);
@ -679,23 +690,24 @@ namespace
{
DataTypePtr non_null_result_type;
if constexpr (error_handling == ErrorHandling::Null)
/// Remove Nullable wrapper. It will be added back later.
non_null_result_type = removeNullable(result_type);
else
non_null_result_type = result_type;
if constexpr (return_type == ReturnType::DateTime64)
if constexpr (return_type == ReturnType::DateTime)
{
MutableColumnPtr col_res = ColumnDateTime::create(input_rows_count);
ColumnDateTime * col_datetime = assert_cast<ColumnDateTime *>(col_res.get());
return executeImpl2<DataTypeDateTime::FieldType>(arguments, result_type, input_rows_count, col_res, col_datetime->getData());
}
else
{
const auto * datatime64_type = checkAndGetDataType<DataTypeDateTime64>(non_null_result_type.get());
MutableColumnPtr col_res = ColumnDateTime64::create(input_rows_count, datatime64_type->getScale());
ColumnDateTime64 * col_datetime64 = assert_cast<ColumnDateTime64 *>(col_res.get());
return executeImpl2<DataTypeDateTime64::FieldType>(arguments, result_type, input_rows_count, col_res, col_datetime64->getData());
}
else
{
MutableColumnPtr col_res = ColumnDateTime::create(input_rows_count);
ColumnDateTime * col_datetime = assert_cast<ColumnDateTime *>(col_res.get());
return executeImpl2<DataTypeDateTime::FieldType>(arguments, result_type, input_rows_count, col_res, col_datetime->getData());
}
}
template<typename T>
@ -732,41 +744,52 @@ namespace
for (size_t i = 0; i < input_rows_count; ++i)
{
datetime.reset();
if constexpr (return_type == ReturnType::DateTime64)
datetime.scale = scale;
StringRef str_ref = col_str->getDataAt(i);
Pos cur = str_ref.data;
Pos end = str_ref.data + str_ref.size;
bool error = false;
auto handleError = [&](const auto & result) -> void
{
if constexpr (error_handling == ErrorHandling::Zero)
{
res_data[i] = 0;
error = true;
}
else if constexpr (error_handling == ErrorHandling::Null)
{
res_data[i] = 0;
col_null_map->getData()[i] = 1;
error = true;
}
else
{
static_assert(error_handling == ErrorHandling::Exception);
const ErrorCodeAndMessage & err = result.error();
throw Exception(err.error_code, "{}", err.error_message);
}
};
if constexpr (return_type == ReturnType::DateTime64)
{
if (auto result = datetime.setScale(static_cast<UInt8>(scale), parse_syntax); !result.has_value())
{
handleError(result);
continue;
}
}
for (const auto & instruction : instructions)
{
if (auto result = instruction.perform(cur, end, datetime); result.has_value())
if (auto result = instruction.perform(cur, end, datetime) ; result.has_value())
{
cur = *result;
}
else
{
if constexpr (error_handling == ErrorHandling::Zero)
{
res_data[i] = 0;
error = true;
break;
}
else if constexpr (error_handling == ErrorHandling::Null)
{
res_data[i] = 0;
col_null_map->getData()[i] = 1;
error = true;
break;
}
else
{
static_assert(error_handling == ErrorHandling::Exception);
const ErrorCodeAndMessage & err = result.error();
throw Exception(err.error_code, "{}", err.error_message);
}
handleError(result);
break;
}
}
@ -789,10 +812,10 @@ namespace
{
if (result = datetime.buildDateTime(time_zone); result.has_value())
{
if constexpr (return_type == ReturnType::DateTime64)
res_data[i] = static_cast<Int64>(*result) * multiplier + datetime.microsecond;
else
if constexpr (return_type == ReturnType::DateTime)
res_data[i] = static_cast<UInt32>(*result);
else
res_data[i] = static_cast<Int64>(*result) * multiplier + datetime.microsecond;
}
}
@ -2241,11 +2264,9 @@ namespace
String getFormat(const ColumnsWithTypeAndName & arguments, UInt32 scale) const
{
size_t format_arg_index = 1;
if constexpr (return_type == ReturnType::DateTime64)
format_arg_index = 2;
size_t index_of_format_string_arg = (return_type == ReturnType::DateTime) ? 1 : 2;
if (arguments.size() <= format_arg_index)
if (arguments.size() <= index_of_format_string_arg)
{
String format;
if constexpr (parse_syntax == ParseSyntax::MySQL)
@ -2263,12 +2284,12 @@ namespace
}
else
{
const auto * col_format = checkAndGetColumnConst<ColumnString>(arguments[format_arg_index].column.get());
const auto * col_format = checkAndGetColumnConst<ColumnString>(arguments[index_of_format_string_arg].column.get());
if (!col_format)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of second ('format') argument of function {}. Must be constant string.",
arguments[format_arg_index].column->getName(),
arguments[index_of_format_string_arg].column->getName(),
getName());
return col_format->getValue<String>();
}

View File

@ -1,21 +1,21 @@
2024-10-09 10:30:10.123 2024-10-09 10:30:10.000123
2024-10-09 10:30:10.123 2024-10-09 10:30:10.000123
2024-10-10 02:42:10.123456
2024-10-10 01:30:10.123456
2024-10-10 01:30:10.123456
2024-10-09 23:30:10.123456
2024-10-09 23:30:10.123456
1970-01-01 08:00:00.000
2024-10-09 10:30:10.123 2024-10-09 10:30:10.000123
2024-10-09 10:30:10.123 2024-10-09 10:30:10.000123
2024-10-10 02:42:10.123456
1970-01-01 08:00:00.000000
2024-10-10 01:30:10.123456
2024-10-10 01:30:10.123456
2024-10-09 23:30:10.123456
2024-10-09 23:30:10.123456
1970-01-01 08:00:00.000000
\N
2024-10-09 10:30:10.123 2024-10-09 10:30:10.000123
2024-10-09 10:30:10.123 2024-10-09 10:30:10.000123
2024-10-10 02:42:10.123456
\N
2024-10-10 01:30:10.123456
2024-10-10 01:30:10.123456
2024-10-09 23:30:10.123456
2024-10-09 23:30:10.123456
\N

View File

@ -8,8 +8,8 @@ select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123', 3, 'yyyy-MM-dd HH:
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123', 3, 'yyyy-MM-dd HH:mm:ss.SSSS'); -- { serverError BAD_ARGUMENTS }
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-0812', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZ');
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-08123', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZZZ'); -- {serverError CANNOT_PARSE_DATETIME}
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456America/Los_Angeles', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz');
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456America/Los_Angeles', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSzzz');
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456EST', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz');
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456EST', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSzzz');
-- incorrect timezone offset and timezone
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-8000', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); -- { serverError CANNOT_PARSE_DATETIME }
select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456ABCD', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); -- { serverError BAD_ARGUMENTS }
@ -22,8 +22,8 @@ select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123', 3, 'yyyy-MM-
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123', 3, 'yyyy-MM-dd HH:mm:ss.SSSS'); -- { serverError BAD_ARGUMENTS }
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456-0812', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZ');
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456-08123', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZZZ');
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456America/Los_Angeles', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz');
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456America/Los_Angeles', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSzzz');
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456EST', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz');
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456EST', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSzzz');
-- incorrect timezone offset and timezone
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456-8000', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZ');
select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456ABCD', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); -- { serverError BAD_ARGUMENTS }
@ -36,8 +36,8 @@ select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123', 3, 'yyyy-MM-
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123', 3, 'yyyy-MM-dd HH:mm:ss.SSSS'); -- { serverError BAD_ARGUMENTS }
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456-0812', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZ');
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456-08123', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZZZ');
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456America/Los_Angeles', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz');
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456America/Los_Angeles', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSzzz');
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456EST', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz');
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456EST', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSzzz');
-- incorrect timezone offset and timezone
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456-8000', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSZ');
select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456ABCD', 6, 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); -- { serverError BAD_ARGUMENTS }

View File

@ -1,4 +1,4 @@
personal_ws-1.1 en 2984
personal_ws-1.1 en 3154
AArch
ACLs
ALTERs
@ -186,7 +186,6 @@ ComplexKeyCache
ComplexKeyDirect
ComplexKeyHashed
Composable
composable
ConcurrencyControlAcquired
ConcurrencyControlSoftLimit
Config
@ -405,12 +404,12 @@ ITION
Identifiant
IdentifierQuotingRule
IdentifierQuotingStyle
Incrementing
IndexesAreNeighbors
InfluxDB
InJodaSyntax
InJodaSyntaxOrNull
InJodaSyntaxOrZero
Incrementing
IndexesAreNeighbors
InfluxDB
Instana
IntN
Integrations
@ -1099,7 +1098,6 @@ URLHash
URLHierarchy
URLPathHierarchy
USearch
USearch
UTCTimestamp
UUIDNumToString
UUIDStringToNum
@ -1995,6 +1993,7 @@ jbod
jdbc
jemalloc
jeprof
joda
joinGet
joinGetOrNull
json