From 9938e1f3f680f5c346ca38cdfc06ccadc9b5b510 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sat, 8 Aug 2020 01:38:42 +0800 Subject: [PATCH 1/5] Better toDate/toDateTime function. --- src/Functions/FunctionsConversion.h | 122 ++++++++++++++++-- .../01440_to_date_monotonicity.reference | 2 + .../01440_to_date_monotonicity.sql | 9 ++ 3 files changed, 124 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/01440_to_date_monotonicity.reference create mode 100644 tests/queries/0_stateless/01440_to_date_monotonicity.sql diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index c2a7f3f3cd2..5c3960fdef5 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -196,12 +196,38 @@ struct ToDateTransform32Or64 static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) { - return (from < 0xFFFF) ? from : time_zone.toDayNum(from); + return (from < 0xFFFF) ? from : time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF))); } }; -/** Special case of converting (U)Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to Date. - * If number is less than 65536, then it is treated as DayNum, and if greater or equals, then as unix timestamp. +template +struct ToDateTransform32Or64Signed +{ + static constexpr auto name = "toDate"; + + static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) + { + if (from < 0) return 0; + return (from < 0xFFFF) ? from : time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF))); + } +}; + +template +struct ToDateTransform8Or16Signed +{ + static constexpr auto name = "toDate"; + + static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + if (from < 0) return 0; + return from; + } +}; + +/** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience, + * Float32, Float64) to Date. If the number is negative, saturate it to unix epoch time. If the + * number is less than 65536, then it is treated as DayNum, and if it's greater or equals to 65536, + * then treated as unix timestamp. If the number exceeds UInt32, saturate to MAX_UINT32 then as DayNum. * It's a bit illogical, as we actually have two functions in one. * But allows to support frequent case, * when user write toDate(UInt32), expecting conversion of unix timestamp to Date. @@ -211,14 +237,73 @@ template struct ConvertImpl : DateTimeTransformImpl> {}; template struct ConvertImpl : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; template struct ConvertImpl - : DateTimeTransformImpl> {}; + : DateTimeTransformImpl> {}; template struct ConvertImpl - : DateTimeTransformImpl> {}; + : DateTimeTransformImpl> {}; template struct ConvertImpl - : DateTimeTransformImpl> {}; + : DateTimeTransformImpl> {}; template struct ConvertImpl - : DateTimeTransformImpl> {}; + : DateTimeTransformImpl> {}; + + +template +struct ToDateTimeTransform64 +{ + static constexpr auto name = "toDateTime"; + + static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + return std::min(time_t(from), time_t(0xFFFFFFFF)); + } +}; + +template +struct ToDateTimeTransformSigned +{ + static constexpr auto name = "toDateTime"; + + static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + if (from < 0) return 0; + return from; + } +}; + +template +struct ToDateTimeTransform64Signed +{ + static constexpr auto name = "toDateTime"; + + static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + if (from < 0) return 0; + return std::min(time_t(from), time_t(0xFFFFFFFF)); + } +}; + +/** Special case of converting Int8, Int16, Int32 or (U)Int64 (and also, for convenience, Float32, + * Float64) to DateTime. If the number is negative, saturate it to unix epoch time. If the number + * exceeds UInt32, saturate to MAX_UINT32. + */ +template struct ConvertImpl + : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; +template struct ConvertImpl + : DateTimeTransformImpl> {}; /** Conversion of Date or DateTime to DateTime64: add zero sub-second part. @@ -1412,6 +1497,25 @@ struct ToNumberMonotonicity } }; +struct ToDateMonotonicity +{ + static bool has() { return true; } + + static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) + { + auto which = WhichDataType(type); + if (which.isDateOrDateTime() || which.isInt8() || which.isInt16() || which.isUInt8() || which.isUInt16()) + return {true, true, true}; + else if ( + (which.isUInt() && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) + || (which.isInt() && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) + || (which.isFloat() && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF)))) + return {}; + else + return {true, true, true}; + } +}; + /** The monotonicity for the `toString` function is mainly determined for test purposes. * It is doubtful that anyone is looking to optimize queries with conditions `toString(CounterID) = 34`. */ @@ -1478,8 +1582,8 @@ using FunctionToInt32 = FunctionConvert>; using FunctionToFloat32 = FunctionConvert>; using FunctionToFloat64 = FunctionConvert>; -using FunctionToDate = FunctionConvert>; -using FunctionToDateTime = FunctionConvert>; +using FunctionToDate = FunctionConvert; +using FunctionToDateTime = FunctionConvert; using FunctionToDateTime64 = FunctionConvert; using FunctionToUUID = FunctionConvert>; using FunctionToString = FunctionConvert; diff --git a/tests/queries/0_stateless/01440_to_date_monotonicity.reference b/tests/queries/0_stateless/01440_to_date_monotonicity.reference new file mode 100644 index 00000000000..529601fb398 --- /dev/null +++ b/tests/queries/0_stateless/01440_to_date_monotonicity.reference @@ -0,0 +1,2 @@ +0 +1970-01-01 2106-02-07 1970-04-11 1970-01-01 2106-02-07 diff --git a/tests/queries/0_stateless/01440_to_date_monotonicity.sql b/tests/queries/0_stateless/01440_to_date_monotonicity.sql new file mode 100644 index 00000000000..0355d1fec30 --- /dev/null +++ b/tests/queries/0_stateless/01440_to_date_monotonicity.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS tdm; +CREATE TABLE tdm (x DateTime) ENGINE = MergeTree ORDER BY x SETTINGS write_final_mark = 0; +INSERT INTO tdm VALUES (now()); +SELECT count(x) FROM tdm WHERE toDate(x) < today() SETTINGS max_rows_to_read = 1; + +SELECT toDate(-1), toDate(10000000000000), toDate(100), toDate(65536), toDate(65535); +SELECT toDateTime(-1), toDateTime(10000000000000), toDateTime(1000); + +DROP TABLE tdm; From 65b3863a9e525e4eb57d951d3be6f365e40df5c4 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 7 Aug 2020 22:52:21 +0300 Subject: [PATCH 2/5] Update FunctionsConversion.h --- src/Functions/FunctionsConversion.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 5c3960fdef5..b67d023726d 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -196,7 +196,9 @@ struct ToDateTransform32Or64 static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) { - return (from < 0xFFFF) ? from : time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF))); + return (from < 0xFFFF) + ? from + : time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF))); } }; @@ -207,8 +209,12 @@ struct ToDateTransform32Or64Signed static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) { - if (from < 0) return 0; - return (from < 0xFFFF) ? from : time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF))); + /// The function should be monotonic (better for query optimizations), so we saturate instead of overflow. + if (from < 0) + return 0; + return (from < 0xFFFF) + ? from + : time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF))); } }; @@ -219,7 +225,8 @@ struct ToDateTransform8Or16Signed static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) { - if (from < 0) return 0; + if (from < 0) + return 0; return from; } }; From cbe9878e9e322fd2e50839904cefe3cac8612072 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 7 Aug 2020 22:53:52 +0300 Subject: [PATCH 3/5] Update FunctionsConversion.h --- src/Functions/FunctionsConversion.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index b67d023726d..7b1b8388866 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -276,7 +276,8 @@ struct ToDateTimeTransformSigned static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) { - if (from < 0) return 0; + if (from < 0) + return 0; return from; } }; @@ -288,7 +289,8 @@ struct ToDateTimeTransform64Signed static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) { - if (from < 0) return 0; + if (from < 0) + return 0; return std::min(time_t(from), time_t(0xFFFFFFFF)); } }; From d9de96716a4b22dd79d6780e4b36e527e8d46ced Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sat, 8 Aug 2020 14:30:50 +0800 Subject: [PATCH 4/5] string to date is not monotonic --- src/Functions/FunctionsConversion.h | 18 ++++++++++++++++-- .../01440_to_date_monotonicity.reference | 1 + 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 7b1b8388866..88dd3621ecf 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -1518,13 +1518,27 @@ struct ToDateMonotonicity else if ( (which.isUInt() && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) || (which.isInt() && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) - || (which.isFloat() && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF)))) + || (which.isFloat() && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) + || !type.isValueRepresentedByNumber()) return {}; else return {true, true, true}; } }; +struct ToDateTimeMonotonicity +{ + static bool has() { return true; } + + static IFunction::Monotonicity get(const IDataType & type, const Field &, const Field &) + { + if (type.isValueRepresentedByNumber()) + return {true, true, true}; + else + return {}; + } +}; + /** The monotonicity for the `toString` function is mainly determined for test purposes. * It is doubtful that anyone is looking to optimize queries with conditions `toString(CounterID) = 34`. */ @@ -1592,7 +1606,7 @@ using FunctionToInt64 = FunctionConvert>; using FunctionToFloat64 = FunctionConvert>; using FunctionToDate = FunctionConvert; -using FunctionToDateTime = FunctionConvert; +using FunctionToDateTime = FunctionConvert; using FunctionToDateTime64 = FunctionConvert; using FunctionToUUID = FunctionConvert>; using FunctionToString = FunctionConvert; diff --git a/tests/queries/0_stateless/01440_to_date_monotonicity.reference b/tests/queries/0_stateless/01440_to_date_monotonicity.reference index 529601fb398..30b841d4487 100644 --- a/tests/queries/0_stateless/01440_to_date_monotonicity.reference +++ b/tests/queries/0_stateless/01440_to_date_monotonicity.reference @@ -1,2 +1,3 @@ 0 1970-01-01 2106-02-07 1970-04-11 1970-01-01 2106-02-07 +1970-01-01 03:00:00 2106-02-07 09:28:15 1970-01-01 03:16:40 From ba204fe3b293aff28113d27c21533bd3bc605682 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 9 Aug 2020 00:24:43 +0800 Subject: [PATCH 5/5] fix test and add alias to FROM_UNIXTIME --- src/Functions/formatDateTime.cpp | 1 + tests/queries/0_stateless/01411_from_unixtime.reference | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 58e441bd42b..a8abfe7c562 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -718,6 +718,7 @@ void registerFunctionFormatDateTime(FunctionFactory & factory) { factory.registerFunction(); factory.registerFunction(); + factory.registerAlias("fromUnixTimestamp", "FROM_UNIXTIME"); } } diff --git a/tests/queries/0_stateless/01411_from_unixtime.reference b/tests/queries/0_stateless/01411_from_unixtime.reference index 8541fa3f1a0..1bc7519e668 100644 --- a/tests/queries/0_stateless/01411_from_unixtime.reference +++ b/tests/queries/0_stateless/01411_from_unixtime.reference @@ -1,6 +1,6 @@ 1970-01-01 00:02:03 1973-11-29 21:33:09 -2038-07-12 01:15:36 +2106-02-07 06:28:15 19 11 1970-01-15