Merge pull request #3922 from yandex/parse-date-time-best-effort-more-formats

Added more formats to "parseDateTimeBestEffort" function
This commit is contained in:
alexey-milovidov 2018-12-24 23:05:08 +03:00 committed by GitHub
commit ddc6cccb55
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 93 additions and 16 deletions

View File

@ -86,6 +86,25 @@ ReturnType parseDateTimeBestEffortImpl(time_t & res, ReadBuffer & in, const Date
bool is_pm = false;
auto read_alpha_month = [&month] (const auto & alpha)
{
if (0 == strncasecmp(alpha, "Jan", 3)) month = 1;
else if (0 == strncasecmp(alpha, "Feb", 3)) month = 2;
else if (0 == strncasecmp(alpha, "Mar", 3)) month = 3;
else if (0 == strncasecmp(alpha, "Apr", 3)) month = 4;
else if (0 == strncasecmp(alpha, "May", 3)) month = 5;
else if (0 == strncasecmp(alpha, "Jun", 3)) month = 6;
else if (0 == strncasecmp(alpha, "Jul", 3)) month = 7;
else if (0 == strncasecmp(alpha, "Aug", 3)) month = 8;
else if (0 == strncasecmp(alpha, "Sep", 3)) month = 9;
else if (0 == strncasecmp(alpha, "Oct", 3)) month = 10;
else if (0 == strncasecmp(alpha, "Nov", 3)) month = 11;
else if (0 == strncasecmp(alpha, "Dec", 3)) month = 12;
else
return false;
return true;
};
while (!in.eof())
{
char digits[14];
@ -205,6 +224,10 @@ ReturnType parseDateTimeBestEffortImpl(time_t & res, ReadBuffer & in, const Date
/// hh - only if already have day of month
/// DD/MM/YYYY
/// DD/MM/YY
/// DD.MM.YYYY
/// DD.MM.YY
/// DD-MM-YYYY
/// DD-MM-YY
/// DD
UInt8 hour_or_day_of_month = 0;
@ -244,7 +267,7 @@ ReturnType parseDateTimeBestEffortImpl(time_t & res, ReadBuffer & in, const Date
return on_error("Cannot read DateTime: unexpected number of decimal digits after hour and minute: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME);
}
}
else if (checkChar('/', in))
else if (checkChar('/', in) || checkChar('.', in) || checkChar('-', in))
{
if (day_of_month)
return on_error("Cannot read DateTime: day of month is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME);
@ -260,10 +283,23 @@ ReturnType parseDateTimeBestEffortImpl(time_t & res, ReadBuffer & in, const Date
readDecimalNumber<2>(month, digits);
else if (num_digits == 1)
readDecimalNumber<1>(month, digits);
else if (num_digits == 0)
{
/// Month in alphabetical form
char alpha[9]; /// The longest month name: September
size_t num_alpha = readAlpha(alpha, sizeof(alpha), in);
if (num_alpha < 3)
return on_error("Cannot read DateTime: unexpected number of alphabetical characters after day of month: " + toString(num_alpha), ErrorCodes::CANNOT_PARSE_DATETIME);
if (!read_alpha_month(alpha))
return on_error("Cannot read DateTime: alphabetical characters after day of month don't look like month: " + std::string(alpha, 3), ErrorCodes::CANNOT_PARSE_DATETIME);
}
else
return on_error("Cannot read DateTime: unexpected number of decimal digits after day of month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME);
if (checkChar('/', in))
if (checkChar('/', in) || checkChar('.', in) || checkChar('-', in))
{
if (year)
return on_error("Cannot read DateTime: year component is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME);
@ -401,19 +437,9 @@ ReturnType parseDateTimeBestEffortImpl(time_t & res, ReadBuffer & in, const Date
{
bool has_day_of_week = false;
if (0 == strncasecmp(alpha, "Jan", 3)) month = 1;
else if (0 == strncasecmp(alpha, "Feb", 3)) month = 2;
else if (0 == strncasecmp(alpha, "Mar", 3)) month = 3;
else if (0 == strncasecmp(alpha, "Apr", 3)) month = 4;
else if (0 == strncasecmp(alpha, "May", 3)) month = 5;
else if (0 == strncasecmp(alpha, "Jun", 3)) month = 6;
else if (0 == strncasecmp(alpha, "Jul", 3)) month = 7;
else if (0 == strncasecmp(alpha, "Aug", 3)) month = 8;
else if (0 == strncasecmp(alpha, "Sep", 3)) month = 9;
else if (0 == strncasecmp(alpha, "Oct", 3)) month = 10;
else if (0 == strncasecmp(alpha, "Nov", 3)) month = 11;
else if (0 == strncasecmp(alpha, "Dec", 3)) month = 12;
if (read_alpha_month(alpha))
{
}
else if (0 == strncasecmp(alpha, "UTC", 3)) has_time_zone_offset = true;
else if (0 == strncasecmp(alpha, "GMT", 3)) has_time_zone_offset = true;
else if (0 == strncasecmp(alpha, "MSK", 3)) { has_time_zone_offset = true; time_zone_offset_hour = 3; }

View File

@ -34,7 +34,7 @@ class ReadBuffer;
* YYYYMM - 6 digits is a year, month if year was not already read
* hhmmss - 6 digits is a time if year was already read
*
* .nnnnnnn - any number of digits after point is fractional part of second, if it is not YYYY.MM.DD
* .nnnnnnn - any number of digits after point is fractional part of second, if it is not YYYY.MM.DD or DD.MM.YYYY
*
* T - means that time will follow
*

View File

@ -0,0 +1,22 @@
s a b
24.12.2018 2018-12-24 00:00:00 2018-12-24 00:00:00
24-12-2018 2018-12-24 00:00:00 2018-12-24 00:00:00
24.12.18 2018-12-24 00:00:00 2018-12-24 00:00:00
24-12-18 2018-12-24 00:00:00 2018-12-24 00:00:00
24-Dec-18 2018-12-24 00:00:00 2018-12-24 00:00:00
24/DEC/18 2018-12-24 00:00:00 2018-12-24 00:00:00
24/DEC/2018 2018-12-24 00:00:00 2018-12-24 00:00:00
01-OCT-2015 2015-10-01 00:00:00 2015-10-01 00:00:00
24.12.2018 2018-12-24 00:00:00 2018-12-24 00:00:00
24-12-2018 2018-12-24 00:00:00 2018-12-24 00:00:00
24.12.18 2018-12-24 00:00:00 2018-12-24 00:00:00
24-12-18 2018-12-24 00:00:00 2018-12-24 00:00:00
24-Dec-18 2018-12-24 00:00:00 2018-12-24 00:00:00
24/DEC/18 2018-12-24 00:00:00 2018-12-24 00:00:00
24/DEC/2018 2018-12-24 00:00:00 2018-12-24 00:00:00
01-OCT-2015 2015-10-01 00:00:00 2015-10-01 00:00:00
24.12.18 010203 2018-12-24 01:02:03 2018-12-24 01:02:03
24.12.18 01:02:03 2018-12-24 01:02:03 2018-12-24 01:02:03
24.DEC.18T01:02:03.000+0300 2018-12-23 22:02:03 2018-12-23 22:02:03
01-September-2018 11:22 2018-09-01 11:22:00 2018-09-01 11:22:00

View File

@ -0,0 +1,29 @@
SELECT
s,
parseDateTimeBestEffortOrNull(s, 'UTC') AS a,
parseDateTimeBestEffortOrZero(s, 'UTC') AS b
FROM
(
SELECT arrayJoin([
'24.12.2018',
'24-12-2018',
'24.12.18',
'24-12-18',
'24-Dec-18',
'24/DEC/18',
'24/DEC/2018',
'01-OCT-2015',
'24.12.2018',
'24-12-2018',
'24.12.18',
'24-12-18',
'24-Dec-18',
'24/DEC/18',
'24/DEC/2018',
'01-OCT-2015',
'24.12.18 010203',
'24.12.18 01:02:03',
'24.DEC.18T01:02:03.000+0300',
'01-September-2018 11:22'
]) AS s)
FORMAT PrettySpaceNoEscapes;