From 66a0c1691db8f830402289dbae01108fd5b4e046 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Dec 2018 21:28:41 +0300 Subject: [PATCH 1/3] Added more formats to "parseDateTimeBestEffort" function [#CLICKHOUSE-2] --- dbms/src/IO/parseDateTimeBestEffort.cpp | 56 ++++++++++++++----- dbms/src/IO/parseDateTimeBestEffort.h | 2 +- ...parse_date_time_best_effort_more.reference | 21 +++++++ ...00813_parse_date_time_best_effort_more.sql | 28 ++++++++++ 4 files changed, 91 insertions(+), 16 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.reference create mode 100644 dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.sql diff --git a/dbms/src/IO/parseDateTimeBestEffort.cpp b/dbms/src/IO/parseDateTimeBestEffort.cpp index ff9b8d067a9..4aea4d621c2 100644 --- a/dbms/src/IO/parseDateTimeBestEffort.cpp +++ b/dbms/src/IO/parseDateTimeBestEffort.cpp @@ -86,6 +86,25 @@ ReturnType parseDateTimeBestEffortImpl(time_t & res, ReadBuffer & in, const Date bool is_pm = false; + auto read_alpha_month = [&month] (const auto & alpha) + { + if (0 == strncasecmp(alpha, "Jan", 3)) month = 1; + else if (0 == strncasecmp(alpha, "Feb", 3)) month = 2; + else if (0 == strncasecmp(alpha, "Mar", 3)) month = 3; + else if (0 == strncasecmp(alpha, "Apr", 3)) month = 4; + else if (0 == strncasecmp(alpha, "May", 3)) month = 5; + else if (0 == strncasecmp(alpha, "Jun", 3)) month = 6; + else if (0 == strncasecmp(alpha, "Jul", 3)) month = 7; + else if (0 == strncasecmp(alpha, "Aug", 3)) month = 8; + else if (0 == strncasecmp(alpha, "Sep", 3)) month = 9; + else if (0 == strncasecmp(alpha, "Oct", 3)) month = 10; + else if (0 == strncasecmp(alpha, "Nov", 3)) month = 11; + else if (0 == strncasecmp(alpha, "Dec", 3)) month = 12; + else + return false; + return true; + }; + while (!in.eof()) { char digits[14]; @@ -205,6 +224,10 @@ ReturnType parseDateTimeBestEffortImpl(time_t & res, ReadBuffer & in, const Date /// hh - only if already have day of month /// DD/MM/YYYY /// DD/MM/YY + /// DD.MM.YYYY + /// DD.MM.YY + /// DD-MM-YYYY + /// DD-MM-YY /// DD UInt8 hour_or_day_of_month = 0; @@ -244,7 +267,7 @@ ReturnType parseDateTimeBestEffortImpl(time_t & res, ReadBuffer & in, const Date return on_error("Cannot read DateTime: unexpected number of decimal digits after hour and minute: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); } } - else if (checkChar('/', in)) + else if (checkChar('/', in) || checkChar('.', in) || checkChar('-', in)) { if (day_of_month) return on_error("Cannot read DateTime: day of month is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME); @@ -260,10 +283,23 @@ ReturnType parseDateTimeBestEffortImpl(time_t & res, ReadBuffer & in, const Date readDecimalNumber<2>(month, digits); else if (num_digits == 1) readDecimalNumber<1>(month, digits); + else if (num_digits == 0) + { + /// Month in alphabetical form + + char alpha[9]; /// The longest month name: September + size_t num_alpha = readAlpha(alpha, sizeof(alpha), in); + + if (num_alpha < 3) + return on_error("Cannot read DateTime: unexpected number of alphabetical characters after day of month: " + toString(num_alpha), ErrorCodes::CANNOT_PARSE_DATETIME); + + if (!read_alpha_month(alpha)) + return on_error("Cannot read DateTime: alphabetical characters after day of month don't look like month: " + std::string(alpha, 3), ErrorCodes::CANNOT_PARSE_DATETIME); + } else return on_error("Cannot read DateTime: unexpected number of decimal digits after day of month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); - if (checkChar('/', in)) + if (checkChar('/', in) || checkChar('.', in) || checkChar('-', in)) { if (year) return on_error("Cannot read DateTime: year component is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME); @@ -401,19 +437,9 @@ ReturnType parseDateTimeBestEffortImpl(time_t & res, ReadBuffer & in, const Date { bool has_day_of_week = false; - if (0 == strncasecmp(alpha, "Jan", 3)) month = 1; - else if (0 == strncasecmp(alpha, "Feb", 3)) month = 2; - else if (0 == strncasecmp(alpha, "Mar", 3)) month = 3; - else if (0 == strncasecmp(alpha, "Apr", 3)) month = 4; - else if (0 == strncasecmp(alpha, "May", 3)) month = 5; - else if (0 == strncasecmp(alpha, "Jun", 3)) month = 6; - else if (0 == strncasecmp(alpha, "Jul", 3)) month = 7; - else if (0 == strncasecmp(alpha, "Aug", 3)) month = 8; - else if (0 == strncasecmp(alpha, "Sep", 3)) month = 9; - else if (0 == strncasecmp(alpha, "Oct", 3)) month = 10; - else if (0 == strncasecmp(alpha, "Nov", 3)) month = 11; - else if (0 == strncasecmp(alpha, "Dec", 3)) month = 12; - + if (read_alpha_month(alpha)) + { + } else if (0 == strncasecmp(alpha, "UTC", 3)) has_time_zone_offset = true; else if (0 == strncasecmp(alpha, "GMT", 3)) has_time_zone_offset = true; else if (0 == strncasecmp(alpha, "MSK", 3)) { has_time_zone_offset = true; time_zone_offset_hour = 3; } diff --git a/dbms/src/IO/parseDateTimeBestEffort.h b/dbms/src/IO/parseDateTimeBestEffort.h index 65c822ff331..dc416a750c5 100644 --- a/dbms/src/IO/parseDateTimeBestEffort.h +++ b/dbms/src/IO/parseDateTimeBestEffort.h @@ -34,7 +34,7 @@ class ReadBuffer; * YYYYMM - 6 digits is a year, month if year was not already read * hhmmss - 6 digits is a time if year was already read * - * .nnnnnnn - any number of digits after point is fractional part of second, if it is not YYYY.MM.DD + * .nnnnnnn - any number of digits after point is fractional part of second, if it is not YYYY.MM.DD or DD.MM.YYYY * * T - means that time will follow * diff --git a/dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.reference b/dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.reference new file mode 100644 index 00000000000..3ec7da1c47e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.reference @@ -0,0 +1,21 @@ +s a b + +24.12.2018 2018-12-24 00:00:00 2018-12-24 00:00:00 +24-12-2018 2018-12-24 00:00:00 2018-12-24 00:00:00 +24.12.18 2018-12-24 00:00:00 2018-12-24 00:00:00 +24-12-18 2018-12-24 00:00:00 2018-12-24 00:00:00 +24-Dec-18 2018-12-24 00:00:00 2018-12-24 00:00:00 +24/DEC/18 2018-12-24 00:00:00 2018-12-24 00:00:00 +24/DEC/2018 2018-12-24 00:00:00 2018-12-24 00:00:00 +01-OCT-2015 2015-10-01 00:00:00 2015-10-01 00:00:00 +24.12.2018 2018-12-24 00:00:00 2018-12-24 00:00:00 +24-12-2018 2018-12-24 00:00:00 2018-12-24 00:00:00 +24.12.18 2018-12-24 00:00:00 2018-12-24 00:00:00 +24-12-18 2018-12-24 00:00:00 2018-12-24 00:00:00 +24-Dec-18 2018-12-24 00:00:00 2018-12-24 00:00:00 +24/DEC/18 2018-12-24 00:00:00 2018-12-24 00:00:00 +24/DEC/2018 2018-12-24 00:00:00 2018-12-24 00:00:00 +01-OCT-2015 2015-10-01 00:00:00 2015-10-01 00:00:00 +24.12.18 010203 2018-12-24 01:02:03 2018-12-24 01:02:03 +24.12.18 01:02:03 2018-12-24 01:02:03 2018-12-24 01:02:03 +24.DEC.18T01:02:03.000+0300 2018-12-23 22:02:03 2018-12-23 22:02:03 diff --git a/dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.sql b/dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.sql new file mode 100644 index 00000000000..4a24d73f55a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.sql @@ -0,0 +1,28 @@ +SELECT + s, + parseDateTimeBestEffortOrNull(s, 'UTC') AS a, + parseDateTimeBestEffortOrZero(s, 'UTC') AS b +FROM +( + SELECT arrayJoin([ +'24.12.2018', +'24-12-2018', +'24.12.18', +'24-12-18', +'24-Dec-18', +'24/DEC/18', +'24/DEC/2018', +'01-OCT-2015', +'24.12.2018', +'24-12-2018', +'24.12.18', +'24-12-18', +'24-Dec-18', +'24/DEC/18', +'24/DEC/2018', +'01-OCT-2015', +'24.12.18 010203', +'24.12.18 01:02:03', +'24.DEC.18T01:02:03.000+0300' +]) AS s) +FORMAT PrettySpaceNoEscapes; From 41e3bf55180dc735dd1b7ad202501ae109bcad45 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Dec 2018 21:31:43 +0300 Subject: [PATCH 2/3] Added more formats to "parseDateTimeBestEffort" function [#CLICKHOUSE-2] --- .../00813_parse_date_time_best_effort_more.reference | 1 + .../0_stateless/00813_parse_date_time_best_effort_more.sql | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.reference b/dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.reference index 3ec7da1c47e..921400838d0 100644 --- a/dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.reference +++ b/dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.reference @@ -19,3 +19,4 @@ s a b 24.12.18 010203 2018-12-24 01:02:03 2018-12-24 01:02:03 24.12.18 01:02:03 2018-12-24 01:02:03 2018-12-24 01:02:03 24.DEC.18T01:02:03.000+0300 2018-12-23 22:02:03 2018-12-23 22:02:03 +01-September-2018 11:22 2018-09-01 11:22:00 2018-09-01 11:22:00 diff --git a/dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.sql b/dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.sql index 4a24d73f55a..1e3b24e60c0 100644 --- a/dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.sql +++ b/dbms/tests/queries/0_stateless/00813_parse_date_time_best_effort_more.sql @@ -23,6 +23,7 @@ FROM '01-OCT-2015', '24.12.18 010203', '24.12.18 01:02:03', -'24.DEC.18T01:02:03.000+0300' +'24.DEC.18T01:02:03.000+0300', +'01-September-2018 11:22' ]) AS s) FORMAT PrettySpaceNoEscapes; From 37075ba1a3b689477927092eabb54ebdc2a5253e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 25 Dec 2018 11:43:07 +0300 Subject: [PATCH 3/3] add lost else --- docs/tools/mdx_clickhouse.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/tools/mdx_clickhouse.py b/docs/tools/mdx_clickhouse.py index ae57d1309e6..a2d71b0ea58 100755 --- a/docs/tools/mdx_clickhouse.py +++ b/docs/tools/mdx_clickhouse.py @@ -47,6 +47,9 @@ class ClickHousePreprocessor(markdown.util.Processor): for line in lines: if '' not in line: yield line + else: + for line in lines: + yield line class ClickHouseMarkdown(markdown.extensions.Extension):