From 2ea18b308550fae32fa37a2d6c2e59421d156f4a Mon Sep 17 00:00:00 2001 From: HaiBo Li Date: Wed, 2 Mar 2022 15:06:38 +0800 Subject: [PATCH] Fix the bug that the toDatetime function overflows (#32898) * Fixed overflow bug of the function toDatetime/toDate/toDate32/parseDateTimeBestEffort --- src/Common/DateLUTImpl.h | 8 ++- src/Common/tests/gtest_DateLUTImpl.cpp | 6 ++- src/Functions/FunctionsConversion.h | 50 +++++++++++++++---- src/IO/ReadHelpers.h | 13 ++++- .../integration/test_timezone_config/test.py | 22 ++++++++ .../01186_conversion_to_nullable.reference | 2 +- .../01921_datatype_date32.reference | 4 +- 7 files changed, 89 insertions(+), 16 deletions(-) diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index c178dc58854..5ca37448e36 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -1001,8 +1001,12 @@ public: inline LUTIndex makeLUTIndex(Int16 year, UInt8 month, UInt8 day_of_month) const { - if (unlikely(year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31)) + if (unlikely(year < DATE_LUT_MIN_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31)) return LUTIndex(0); + + if (unlikely(year > DATE_LUT_MAX_YEAR)) + return LUTIndex(DATE_LUT_SIZE - 1); + auto year_lut_index = (year - DATE_LUT_MIN_YEAR) * 12 + month - 1; UInt32 index = years_months_lut[year_lut_index].toUnderType() + day_of_month - 1; /// When date is out of range, default value is DATE_LUT_SIZE - 1 (2283-11-11) @@ -1012,7 +1016,7 @@ public: /// Create DayNum from year, month, day of month. inline ExtendedDayNum makeDayNum(Int16 year, UInt8 month, UInt8 day_of_month, Int32 default_error_day_num = 0) const { - if (unlikely(year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31)) + if (unlikely(year < DATE_LUT_MIN_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31)) return ExtendedDayNum(default_error_day_num); return toDayNum(makeLUTIndex(year, month, day_of_month)); diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp index 1220c50b409..fd4294fb6ef 100644 --- a/src/Common/tests/gtest_DateLUTImpl.cpp +++ b/src/Common/tests/gtest_DateLUTImpl.cpp @@ -79,8 +79,12 @@ FailuresCount countFailures(const ::testing::TestResult & test_result) TEST(DateLUTTest, makeDayNumTest) { const DateLUTImpl & lut = DateLUT::instance("UTC"); - EXPECT_EQ(0, lut.makeDayNum(2500, 12, 25)); EXPECT_EQ(0, lut.makeDayNum(1924, 12, 31)); + EXPECT_EQ(-1, lut.makeDayNum(1924, 12, 31, -1)); + EXPECT_EQ(-16436, lut.makeDayNum(1925, 1, 1)); + EXPECT_EQ(0, lut.makeDayNum(1970, 1, 1)); + EXPECT_EQ(114635, lut.makeDayNum(2283, 11, 11)); + EXPECT_EQ(114635, lut.makeDayNum(2500, 12, 25)); } diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 909803d7cd7..f75d67032f2 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -906,6 +906,41 @@ struct ConvertImplGenericToString } }; +/** Conversion of time_t to UInt16, Int32, UInt32 + */ +template +void convertFromTime(typename DataType::FieldType & x, time_t & time) +{ + x = time; +} + +template <> +inline void convertFromTime(DataTypeDate::FieldType & x, time_t & time) +{ + if (unlikely(time < 0)) + x = 0; + else if (unlikely(time > 0xFFFF)) + x = 0xFFFF; + else + x = time; +} + +template <> +inline void convertFromTime(DataTypeDate32::FieldType & x, time_t & time) +{ + x = time; +} + +template <> +inline void convertFromTime(DataTypeDateTime::FieldType & x, time_t & time) +{ + if (unlikely(time < 0)) + x = 0; + else if (unlikely(time > 0xFFFFFFFF)) + x = 0xFFFFFFFF; + else + x = time; +} /** Conversion of strings to numbers, dates, datetimes: through parsing. */ @@ -931,18 +966,16 @@ inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer x = tmp; } + // NOTE: no need of extra overload of DateTime64, since readDateTimeText64 has different signature and that case is explicitly handled in the calling code. template <> inline void parseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) { time_t time = 0; readDateTimeText(time, rb, *time_zone); - if (time < 0) - time = 0; - x = time; + convertFromTime(x, time); } - template <> inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) { @@ -951,7 +984,6 @@ inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb x = tmp.toUnderType(); } - template bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) { @@ -1178,7 +1210,7 @@ struct ConvertThroughParsing { time_t res; parseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; + convertFromTime(vec_to[i], res); } } else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) @@ -1193,7 +1225,7 @@ struct ConvertThroughParsing { time_t res; parseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; + convertFromTime(vec_to[i], res); } } else @@ -1232,14 +1264,14 @@ struct ConvertThroughParsing { time_t res; parsed = tryParseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; + convertFromTime(vec_to[i],res); } } else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) { time_t res; parsed = tryParseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; + convertFromTime(vec_to[i],res); } else { diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 5d580f6b130..09aec3c3785 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -686,6 +686,16 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) return readDateTextFallback(date, buf); } +inline void convertToDayNum(DayNum & date, ExtendedDayNum & from) +{ + if (unlikely(from < 0)) + date = 0; + else if (unlikely(from > 0xFFFF)) + date = 0xFFFF; + else + date = from; +} + template inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf) { @@ -698,7 +708,8 @@ inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf) else if (!readDateTextImpl(local_date, buf)) return false; - date = DateLUT::instance().makeDayNum(local_date.year(), local_date.month(), local_date.day()); + ExtendedDayNum ret = DateLUT::instance().makeDayNum(local_date.year(), local_date.month(), local_date.day()); + convertToDayNum(date,ret); return ReturnType(true); } diff --git a/tests/integration/test_timezone_config/test.py b/tests/integration/test_timezone_config/test.py index ac12eddc709..af7e3548e6a 100644 --- a/tests/integration/test_timezone_config/test.py +++ b/tests/integration/test_timezone_config/test.py @@ -17,3 +17,25 @@ def start_cluster(): def test_check_timezone_config(start_cluster): assert node.query("SELECT toDateTime(1111111111)") == "2005-03-17 17:58:31\n" + +def test_overflow_toDate(start_cluster): + assert node.query("SELECT toDate('2999-12-31','UTC')") == "2149-06-06\n" + assert node.query("SELECT toDate('2021-12-21','UTC')") == "2021-12-21\n" + assert node.query("SELECT toDate('1000-12-31','UTC')") == "1970-01-01\n" + +def test_overflow_toDate32(start_cluster): + assert node.query("SELECT toDate32('2999-12-31','UTC')") == "2283-11-11\n" + assert node.query("SELECT toDate32('2021-12-21','UTC')") == "2021-12-21\n" + assert node.query("SELECT toDate32('1000-12-31','UTC')") == "1925-01-01\n" + +def test_overflow_toDateTime(start_cluster): + assert node.query("SELECT toDateTime('2999-12-31 00:00:00','UTC')") == "2106-02-07 06:28:15\n" + assert node.query("SELECT toDateTime('2106-02-07 06:28:15','UTC')") == "2106-02-07 06:28:15\n" + assert node.query("SELECT toDateTime('1970-01-01 00:00:00','UTC')") == "1970-01-01 00:00:00\n" + assert node.query("SELECT toDateTime('1000-01-01 00:00:00','UTC')") == "1970-01-01 00:00:00\n" + +def test_overflow_parseDateTimeBestEffort(start_cluster): + assert node.query("SELECT parseDateTimeBestEffort('2999-12-31 00:00:00','UTC')") == "2106-02-07 06:28:15\n" + assert node.query("SELECT parseDateTimeBestEffort('2106-02-07 06:28:15','UTC')") == "2106-02-07 06:28:15\n" + assert node.query("SELECT parseDateTimeBestEffort('1970-01-01 00:00:00','UTC')") == "1970-01-01 00:00:00\n" + assert node.query("SELECT parseDateTimeBestEffort('1000-01-01 00:00:00','UTC')") == "1970-01-01 00:00:00\n" diff --git a/tests/queries/0_stateless/01186_conversion_to_nullable.reference b/tests/queries/0_stateless/01186_conversion_to_nullable.reference index 7a690240eb5..dc77029ec3b 100644 --- a/tests/queries/0_stateless/01186_conversion_to_nullable.reference +++ b/tests/queries/0_stateless/01186_conversion_to_nullable.reference @@ -12,7 +12,7 @@ \N 1970-01-01 \N -1970-01-01 +2149-06-06 2020-12-24 01:02:03 \N 1970-01-01 03:00:00 diff --git a/tests/queries/0_stateless/01921_datatype_date32.reference b/tests/queries/0_stateless/01921_datatype_date32.reference index 2114f6f6b1e..8beaefbeb38 100644 --- a/tests/queries/0_stateless/01921_datatype_date32.reference +++ b/tests/queries/0_stateless/01921_datatype_date32.reference @@ -221,13 +221,13 @@ 1925-04-01 1925-04-01 2283-03-31 -1925-01-01 +2283-11-11 2021-09-22 -------addYears--------- 1926-01-01 1926-01-01 2283-11-11 -1925-01-01 +2283-11-11 2022-06-22 -------subtractSeconds--------- 1925-01-01 00:00:00.000