Merge pull request #13497 from amosbird/tdm

Better toDate/toDateTime functions.
This commit is contained in:
alexey-milovidov 2020-08-09 02:14:58 +03:00 committed by GitHub
commit be38b0fe75
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 150 additions and 10 deletions

View File

@ -196,12 +196,45 @@ struct ToDateTransform32Or64
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
{
return (from < 0xFFFF) ? from : time_zone.toDayNum(from);
return (from < 0xFFFF)
? from
: time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF)));
}
};
/** Special case of converting (U)Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to Date.
* If number is less than 65536, then it is treated as DayNum, and if greater or equals, then as unix timestamp.
template <typename FromType, typename ToType>
struct ToDateTransform32Or64Signed
{
static constexpr auto name = "toDate";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone)
{
/// The function should be monotonic (better for query optimizations), so we saturate instead of overflow.
if (from < 0)
return 0;
return (from < 0xFFFF)
? from
: time_zone.toDayNum(std::min(time_t(from), time_t(0xFFFFFFFF)));
}
};
template <typename FromType, typename ToType>
struct ToDateTransform8Or16Signed
{
static constexpr auto name = "toDate";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
{
if (from < 0)
return 0;
return from;
}
};
/** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience,
* Float32, Float64) to Date. If the number is negative, saturate it to unix epoch time. If the
* number is less than 65536, then it is treated as DayNum, and if it's greater or equals to 65536,
* then treated as unix timestamp. If the number exceeds UInt32, saturate to MAX_UINT32 then as DayNum.
* It's a bit illogical, as we actually have two functions in one.
* But allows to support frequent case,
* when user write toDate(UInt32), expecting conversion of unix timestamp to Date.
@ -211,14 +244,75 @@ template <typename Name> struct ConvertImpl<DataTypeUInt32, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeUInt32, DataTypeDate, ToDateTransform32Or64<UInt32, UInt16>> {};
template <typename Name> struct ConvertImpl<DataTypeUInt64, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeUInt64, DataTypeDate, ToDateTransform32Or64<UInt64, UInt16>> {};
template <typename Name> struct ConvertImpl<DataTypeInt8, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeInt8, DataTypeDate, ToDateTransform8Or16Signed<Int8, UInt16>> {};
template <typename Name> struct ConvertImpl<DataTypeInt16, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeInt16, DataTypeDate, ToDateTransform8Or16Signed<Int16, UInt16>> {};
template <typename Name> struct ConvertImpl<DataTypeInt32, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeInt32, DataTypeDate, ToDateTransform32Or64<Int32, UInt16>> {};
: DateTimeTransformImpl<DataTypeInt32, DataTypeDate, ToDateTransform32Or64Signed<Int32, UInt16>> {};
template <typename Name> struct ConvertImpl<DataTypeInt64, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeInt64, DataTypeDate, ToDateTransform32Or64<Int64, UInt16>> {};
: DateTimeTransformImpl<DataTypeInt64, DataTypeDate, ToDateTransform32Or64Signed<Int64, UInt16>> {};
template <typename Name> struct ConvertImpl<DataTypeFloat32, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeFloat32, DataTypeDate, ToDateTransform32Or64<Float32, UInt16>> {};
: DateTimeTransformImpl<DataTypeFloat32, DataTypeDate, ToDateTransform32Or64Signed<Float32, UInt16>> {};
template <typename Name> struct ConvertImpl<DataTypeFloat64, DataTypeDate, Name>
: DateTimeTransformImpl<DataTypeFloat64, DataTypeDate, ToDateTransform32Or64<Float64, UInt16>> {};
: DateTimeTransformImpl<DataTypeFloat64, DataTypeDate, ToDateTransform32Or64Signed<Float64, UInt16>> {};
template <typename FromType, typename ToType>
struct ToDateTimeTransform64
{
static constexpr auto name = "toDateTime";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
{
return std::min(time_t(from), time_t(0xFFFFFFFF));
}
};
template <typename FromType, typename ToType>
struct ToDateTimeTransformSigned
{
static constexpr auto name = "toDateTime";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
{
if (from < 0)
return 0;
return from;
}
};
template <typename FromType, typename ToType>
struct ToDateTimeTransform64Signed
{
static constexpr auto name = "toDateTime";
static inline NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &)
{
if (from < 0)
return 0;
return std::min(time_t(from), time_t(0xFFFFFFFF));
}
};
/** Special case of converting Int8, Int16, Int32 or (U)Int64 (and also, for convenience, Float32,
* Float64) to DateTime. If the number is negative, saturate it to unix epoch time. If the number
* exceeds UInt32, saturate to MAX_UINT32.
*/
template <typename Name> struct ConvertImpl<DataTypeInt8, DataTypeDateTime, Name>
: DateTimeTransformImpl<DataTypeInt8, DataTypeDateTime, ToDateTimeTransformSigned<Int8, UInt32>> {};
template <typename Name> struct ConvertImpl<DataTypeInt16, DataTypeDateTime, Name>
: DateTimeTransformImpl<DataTypeInt16, DataTypeDateTime, ToDateTimeTransformSigned<Int16, UInt32>> {};
template <typename Name> struct ConvertImpl<DataTypeInt32, DataTypeDateTime, Name>
: DateTimeTransformImpl<DataTypeInt32, DataTypeDateTime, ToDateTimeTransformSigned<Int32, UInt32>> {};
template <typename Name> struct ConvertImpl<DataTypeInt64, DataTypeDateTime, Name>
: DateTimeTransformImpl<DataTypeInt64, DataTypeDateTime, ToDateTimeTransform64Signed<Int64, UInt32>> {};
template <typename Name> struct ConvertImpl<DataTypeUInt64, DataTypeDateTime, Name>
: DateTimeTransformImpl<DataTypeUInt64, DataTypeDateTime, ToDateTimeTransform64<UInt64, UInt32>> {};
template <typename Name> struct ConvertImpl<DataTypeFloat32, DataTypeDateTime, Name>
: DateTimeTransformImpl<DataTypeFloat32, DataTypeDateTime, ToDateTimeTransform64Signed<Float32, UInt32>> {};
template <typename Name> struct ConvertImpl<DataTypeFloat64, DataTypeDateTime, Name>
: DateTimeTransformImpl<DataTypeFloat64, DataTypeDateTime, ToDateTimeTransform64Signed<Float64, UInt32>> {};
/** Conversion of Date or DateTime to DateTime64: add zero sub-second part.
@ -1412,6 +1506,39 @@ struct ToNumberMonotonicity
}
};
struct ToDateMonotonicity
{
static bool has() { return true; }
static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right)
{
auto which = WhichDataType(type);
if (which.isDateOrDateTime() || which.isInt8() || which.isInt16() || which.isUInt8() || which.isUInt16())
return {true, true, true};
else if (
(which.isUInt() && ((left.isNull() || left.get<UInt64>() < 0xFFFF) && (right.isNull() || right.get<UInt64>() >= 0xFFFF)))
|| (which.isInt() && ((left.isNull() || left.get<Int64>() < 0xFFFF) && (right.isNull() || right.get<Int64>() >= 0xFFFF)))
|| (which.isFloat() && ((left.isNull() || left.get<Float64>() < 0xFFFF) && (right.isNull() || right.get<Float64>() >= 0xFFFF)))
|| !type.isValueRepresentedByNumber())
return {};
else
return {true, true, true};
}
};
struct ToDateTimeMonotonicity
{
static bool has() { return true; }
static IFunction::Monotonicity get(const IDataType & type, const Field &, const Field &)
{
if (type.isValueRepresentedByNumber())
return {true, true, true};
else
return {};
}
};
/** The monotonicity for the `toString` function is mainly determined for test purposes.
* It is doubtful that anyone is looking to optimize queries with conditions `toString(CounterID) = 34`.
*/
@ -1478,8 +1605,8 @@ using FunctionToInt32 = FunctionConvert<DataTypeInt32, NameToInt32, ToNumberMono
using FunctionToInt64 = FunctionConvert<DataTypeInt64, NameToInt64, ToNumberMonotonicity<Int64>>;
using FunctionToFloat32 = FunctionConvert<DataTypeFloat32, NameToFloat32, ToNumberMonotonicity<Float32>>;
using FunctionToFloat64 = FunctionConvert<DataTypeFloat64, NameToFloat64, ToNumberMonotonicity<Float64>>;
using FunctionToDate = FunctionConvert<DataTypeDate, NameToDate, ToNumberMonotonicity<UInt16>>;
using FunctionToDateTime = FunctionConvert<DataTypeDateTime, NameToDateTime, ToNumberMonotonicity<UInt32>>;
using FunctionToDate = FunctionConvert<DataTypeDate, NameToDate, ToDateMonotonicity>;
using FunctionToDateTime = FunctionConvert<DataTypeDateTime, NameToDateTime, ToDateTimeMonotonicity>;
using FunctionToDateTime64 = FunctionConvert<DataTypeDateTime64, NameToDateTime64, UnknownMonotonicity>;
using FunctionToUUID = FunctionConvert<DataTypeUUID, NameToUUID, ToNumberMonotonicity<UInt128>>;
using FunctionToString = FunctionConvert<DataTypeString, NameToString, ToStringMonotonicity>;

View File

@ -718,6 +718,7 @@ void registerFunctionFormatDateTime(FunctionFactory & factory)
{
factory.registerFunction<FunctionFormatDateTime>();
factory.registerFunction<FunctionFROM_UNIXTIME>();
factory.registerAlias("fromUnixTimestamp", "FROM_UNIXTIME");
}
}

View File

@ -1,6 +1,6 @@
1970-01-01 00:02:03
1973-11-29 21:33:09
2038-07-12 01:15:36
2106-02-07 06:28:15
19
11
1970-01-15

View File

@ -0,0 +1,3 @@
0
1970-01-01 2106-02-07 1970-04-11 1970-01-01 2106-02-07
1970-01-01 03:00:00 2106-02-07 09:28:15 1970-01-01 03:16:40

View File

@ -0,0 +1,9 @@
DROP TABLE IF EXISTS tdm;
CREATE TABLE tdm (x DateTime) ENGINE = MergeTree ORDER BY x SETTINGS write_final_mark = 0;
INSERT INTO tdm VALUES (now());
SELECT count(x) FROM tdm WHERE toDate(x) < today() SETTINGS max_rows_to_read = 1;
SELECT toDate(-1), toDate(10000000000000), toDate(100), toDate(65536), toDate(65535);
SELECT toDateTime(-1), toDateTime(10000000000000), toDateTime(1000);
DROP TABLE tdm;