Better toDate/toDateTime function.

This commit is contained in:
Amos Bird 2020-08-08 01:38:42 +08:00
parent 1ee0fa7d57
commit 9938e1f3f6
No known key found for this signature in database
GPG Key ID: 80D430DCBECFEDB4
3 changed files with 124 additions and 9 deletions

View File

@ -196,12 +196,38 @@ struct ToDateTransform32Or64
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
{ {
return (from < 0xFFFF) ? from : time_zone.toDayNum(from); return (from < 0xFFFF) ? from : time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF)));
} }
}; };
/** Special case of converting (U)Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to Date. template <typename FromType, typename ToType>
* If number is less than 65536, then it is treated as DayNum, and if greater or equals, then as unix timestamp. struct ToDateTransform32Or64Signed
{
static constexpr auto name = "toDate";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
{
if (from < 0) return 0;
return (from < 0xFFFF) ? from : time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF)));
}
};
template <typename FromType, typename ToType>
struct ToDateTransform8Or16Signed
{
static constexpr auto name = "toDate";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
{
if (from < 0) return 0;
return from;
}
};
/** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience,
* Float32, Float64) to Date. If the number is negative, saturate it to unix epoch time. If the
* number is less than 65536, then it is treated as DayNum, and if it's greater or equals to 65536,
* then treated as unix timestamp. If the number exceeds UInt32, saturate to MAX_UINT32 then as DayNum.
* It's a bit illogical, as we actually have two functions in one. * It's a bit illogical, as we actually have two functions in one.
* But allows to support frequent case, * But allows to support frequent case,
* when user write toDate(UInt32), expecting conversion of unix timestamp to Date. * when user write toDate(UInt32), expecting conversion of unix timestamp to Date.
@ -211,14 +237,73 @@ template <typename Name> struct ConvertImpl<DataTypeUInt32, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeUInt32, DataTypeDate, ToDateTransform32Or64<UInt32, UInt16>> {}; : DateTimeTransformImpl<DataTypeUInt32, DataTypeDate, ToDateTransform32Or64<UInt32, UInt16>> {};
template <typename Name> struct ConvertImpl<DataTypeUInt64, DataTypeDate, Name> template <typename Name> struct ConvertImpl<DataTypeUInt64, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeUInt64, DataTypeDate, ToDateTransform32Or64<UInt64, UInt16>> {}; : DateTimeTransformImpl<DataTypeUInt64, DataTypeDate, ToDateTransform32Or64<UInt64, UInt16>> {};
template <typename Name> struct ConvertImpl<DataTypeInt8, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeInt8, DataTypeDate, ToDateTransform8Or16Signed<Int8, UInt16>> {};
template <typename Name> struct ConvertImpl<DataTypeInt16, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeInt16, DataTypeDate, ToDateTransform8Or16Signed<Int16, UInt16>> {};
template <typename Name> struct ConvertImpl<DataTypeInt32, DataTypeDate, Name> template <typename Name> struct ConvertImpl<DataTypeInt32, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeInt32, DataTypeDate, ToDateTransform32Or64<Int32, UInt16>> {}; : DateTimeTransformImpl<DataTypeInt32, DataTypeDate, ToDateTransform32Or64Signed<Int32, UInt16>> {};
template <typename Name> struct ConvertImpl<DataTypeInt64, DataTypeDate, Name> template <typename Name> struct ConvertImpl<DataTypeInt64, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeInt64, DataTypeDate, ToDateTransform32Or64<Int64, UInt16>> {}; : DateTimeTransformImpl<DataTypeInt64, DataTypeDate, ToDateTransform32Or64Signed<Int64, UInt16>> {};
template <typename Name> struct ConvertImpl<DataTypeFloat32, DataTypeDate, Name> template <typename Name> struct ConvertImpl<DataTypeFloat32, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeFloat32, DataTypeDate, ToDateTransform32Or64<Float32, UInt16>> {}; : DateTimeTransformImpl<DataTypeFloat32, DataTypeDate, ToDateTransform32Or64Signed<Float32, UInt16>> {};
template <typename Name> struct ConvertImpl<DataTypeFloat64, DataTypeDate, Name> template <typename Name> struct ConvertImpl<DataTypeFloat64, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeFloat64, DataTypeDate, ToDateTransform32Or64<Float64, UInt16>> {}; : DateTimeTransformImpl<DataTypeFloat64, DataTypeDate, ToDateTransform32Or64Signed<Float64, UInt16>> {};
template <typename FromType, typename ToType>
struct ToDateTimeTransform64
{
static constexpr auto name = "toDateTime";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
{
return std::min(time_t(from), time_t(0xFFFFFFFF));
}
};
template <typename FromType, typename ToType>
struct ToDateTimeTransformSigned
{
static constexpr auto name = "toDateTime";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
{
if (from < 0) return 0;
return from;
}
};
template <typename FromType, typename ToType>
struct ToDateTimeTransform64Signed
{
static constexpr auto name = "toDateTime";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
{
if (from < 0) return 0;
return std::min(time_t(from), time_t(0xFFFFFFFF));
}
};
/** Special case of converting Int8, Int16, Int32 or (U)Int64 (and also, for convenience, Float32,
* Float64) to DateTime. If the number is negative, saturate it to unix epoch time. If the number
* exceeds UInt32, saturate to MAX_UINT32.
*/
template <typename Name> struct ConvertImpl<DataTypeInt8, DataTypeDateTime, Name>
: DateTimeTransformImpl<DataTypeInt8, DataTypeDateTime, ToDateTimeTransformSigned<Int8, UInt32>> {};
template <typename Name> struct ConvertImpl<DataTypeInt16, DataTypeDateTime, Name>
: DateTimeTransformImpl<DataTypeInt16, DataTypeDateTime, ToDateTimeTransformSigned<Int16, UInt32>> {};
template <typename Name> struct ConvertImpl<DataTypeInt32, DataTypeDateTime, Name>
: DateTimeTransformImpl<DataTypeInt32, DataTypeDateTime, ToDateTimeTransformSigned<Int32, UInt32>> {};
template <typename Name> struct ConvertImpl<DataTypeInt64, DataTypeDateTime, Name>
: DateTimeTransformImpl<DataTypeInt64, DataTypeDateTime, ToDateTimeTransform64Signed<Int64, UInt32>> {};
template <typename Name> struct ConvertImpl<DataTypeUInt64, DataTypeDateTime, Name>
: DateTimeTransformImpl<DataTypeUInt64, DataTypeDateTime, ToDateTimeTransform64<UInt64, UInt32>> {};
template <typename Name> struct ConvertImpl<DataTypeFloat32, DataTypeDateTime, Name>
: DateTimeTransformImpl<DataTypeFloat32, DataTypeDateTime, ToDateTimeTransform64Signed<Float32, UInt32>> {};
template <typename Name> struct ConvertImpl<DataTypeFloat64, DataTypeDateTime, Name>
: DateTimeTransformImpl<DataTypeFloat64, DataTypeDateTime, ToDateTimeTransform64Signed<Float64, UInt32>> {};
/** Conversion of Date or DateTime to DateTime64: add zero sub-second part. /** Conversion of Date or DateTime to DateTime64: add zero sub-second part.
@ -1412,6 +1497,25 @@ struct ToNumberMonotonicity
} }
}; };
struct ToDateMonotonicity
{
static bool has() { return true; }
static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right)
{
auto which = WhichDataType(type);
if (which.isDateOrDateTime() || which.isInt8() || which.isInt16() || which.isUInt8() || which.isUInt16())
return {true, true, true};
else if (
(which.isUInt() && ((left.isNull() || left.get<UInt64>() < 0xFFFF) && (right.isNull() || right.get<UInt64>() >= 0xFFFF)))
|| (which.isInt() && ((left.isNull() || left.get<Int64>() < 0xFFFF) && (right.isNull() || right.get<Int64>() >= 0xFFFF)))
|| (which.isFloat() && ((left.isNull() || left.get<Float64>() < 0xFFFF) && (right.isNull() || right.get<Float64>() >= 0xFFFF))))
return {};
else
return {true, true, true};
}
};
/** The monotonicity for the `toString` function is mainly determined for test purposes. /** The monotonicity for the `toString` function is mainly determined for test purposes.
* It is doubtful that anyone is looking to optimize queries with conditions `toString(CounterID) = 34`. * It is doubtful that anyone is looking to optimize queries with conditions `toString(CounterID) = 34`.
*/ */
@ -1478,8 +1582,8 @@ using FunctionToInt32 = FunctionConvert<DataTypeInt32, NameToInt32, ToNumberMono
using FunctionToInt64 = FunctionConvert<DataTypeInt64, NameToInt64, ToNumberMonotonicity<Int64>>; using FunctionToInt64 = FunctionConvert<DataTypeInt64, NameToInt64, ToNumberMonotonicity<Int64>>;
using FunctionToFloat32 = FunctionConvert<DataTypeFloat32, NameToFloat32, ToNumberMonotonicity<Float32>>; using FunctionToFloat32 = FunctionConvert<DataTypeFloat32, NameToFloat32, ToNumberMonotonicity<Float32>>;
using FunctionToFloat64 = FunctionConvert<DataTypeFloat64, NameToFloat64, ToNumberMonotonicity<Float64>>; using FunctionToFloat64 = FunctionConvert<DataTypeFloat64, NameToFloat64, ToNumberMonotonicity<Float64>>;
using FunctionToDate = FunctionConvert<DataTypeDate, NameToDate, ToNumberMonotonicity<UInt16>>; using FunctionToDate = FunctionConvert<DataTypeDate, NameToDate, ToDateMonotonicity>;
using FunctionToDateTime = FunctionConvert<DataTypeDateTime, NameToDateTime, ToNumberMonotonicity<UInt32>>; using FunctionToDateTime = FunctionConvert<DataTypeDateTime, NameToDateTime, PositiveMonotonicity>;
using FunctionToDateTime64 = FunctionConvert<DataTypeDateTime64, NameToDateTime64, UnknownMonotonicity>; using FunctionToDateTime64 = FunctionConvert<DataTypeDateTime64, NameToDateTime64, UnknownMonotonicity>;
using FunctionToUUID = FunctionConvert<DataTypeUUID, NameToUUID, ToNumberMonotonicity<UInt128>>; using FunctionToUUID = FunctionConvert<DataTypeUUID, NameToUUID, ToNumberMonotonicity<UInt128>>;
using FunctionToString = FunctionConvert<DataTypeString, NameToString, ToStringMonotonicity>; using FunctionToString = FunctionConvert<DataTypeString, NameToString, ToStringMonotonicity>;

View File

@ -0,0 +1,2 @@
0
1970-01-01 2106-02-07 1970-04-11 1970-01-01 2106-02-07

View File

@ -0,0 +1,9 @@
DROP TABLE IF EXISTS tdm;
CREATE TABLE tdm (x DateTime) ENGINE = MergeTree ORDER BY x SETTINGS write_final_mark = 0;
INSERT INTO tdm VALUES (now());
SELECT count(x) FROM tdm WHERE toDate(x) < today() SETTINGS max_rows_to_read = 1;
SELECT toDate(-1), toDate(10000000000000), toDate(100), toDate(65536), toDate(65535);
SELECT toDateTime(-1), toDateTime(10000000000000), toDateTime(1000);
DROP TABLE tdm;