ClickHouse/tests/queries/0_stateless/00921_datetime64_compatibility_long.python

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

236 lines
7.3 KiB
Plaintext
Raw Normal View History

2020-10-02 16:54:07 +00:00
#!/usr/bin/env python3
2019-09-07 14:22:22 +00:00
# encoding: utf-8
import re
import itertools
2019-10-04 13:49:34 +00:00
import sys
import argparse
2019-09-07 14:22:22 +00:00
# Create SQL statement to verify dateTime64 is accepted as argument to functions taking DateTime.
FUNCTIONS = """
toTimeZone(N, 'UTC')
2022-03-11 23:45:26 +00:00
toYear(N, 'Asia/Istanbul')
toQuarter(N, 'Asia/Istanbul')
toMonth(N, 'Asia/Istanbul')
toDayOfYear(N, 'Asia/Istanbul')
toDayOfMonth(N, 'Asia/Istanbul')
toDayOfWeek(N, 0, 'Asia/Istanbul')
2022-03-11 23:45:26 +00:00
toHour(N, 'Asia/Istanbul')
toMinute(N, 'Asia/Istanbul')
toSecond(N, 'Asia/Istanbul')
toUnixTimestamp(N)
2022-03-11 23:45:26 +00:00
toStartOfYear(N, 'Asia/Istanbul')
toStartOfISOYear(N, 'Asia/Istanbul')
toStartOfQuarter(N, 'Asia/Istanbul')
toStartOfMonth(N, 'Asia/Istanbul')
toMonday(N, 'Asia/Istanbul')
toStartOfWeek(N, 'Asia/Istanbul')
toStartOfDay(N, 'Asia/Istanbul')
toStartOfHour(N, 'Asia/Istanbul')
toStartOfMinute(N, 'Asia/Istanbul')
toStartOfFiveMinutes(N, 'Asia/Istanbul')
2022-03-11 23:45:26 +00:00
toStartOfTenMinutes(N, 'Asia/Istanbul')
toStartOfFifteenMinutes(N, 'Asia/Istanbul')
toStartOfInterval(N, INTERVAL 1 year, 'Asia/Istanbul')
toStartOfInterval(N, INTERVAL 1 month, 'Asia/Istanbul')
toStartOfInterval(N, INTERVAL 1 day, 'Asia/Istanbul')
toStartOfInterval(N, INTERVAL 15 minute, 'Asia/Istanbul')
date_trunc('year', N, 'Asia/Istanbul')
date_trunc('month', N, 'Asia/Istanbul')
date_trunc('day', N, 'Asia/Istanbul')
date_trunc('minute', N, 'Asia/Istanbul')
toTime(N, 'Asia/Istanbul')
toRelativeYearNum(N, 'Asia/Istanbul')
toRelativeQuarterNum(N, 'Asia/Istanbul')
toRelativeMonthNum(N, 'Asia/Istanbul')
toRelativeWeekNum(N, 'Asia/Istanbul')
toRelativeDayNum(N, 'Asia/Istanbul')
toRelativeHourNum(N, 'Asia/Istanbul')
toRelativeMinuteNum(N, 'Asia/Istanbul')
toRelativeSecondNum(N, 'Asia/Istanbul')
toISOYear(N, 'Asia/Istanbul')
toISOWeek(N, 'Asia/Istanbul')
toWeek(N, 'Asia/Istanbul')
toYearWeek(N, 'Asia/Istanbul')
timeSlot(N, 'Asia/Istanbul')
toYYYYMM(N, 'Asia/Istanbul')
toYYYYMMDD(N, 'Asia/Istanbul')
toYYYYMMDDhhmmss(N, 'Asia/Istanbul')
addYears(N, 1, 'Asia/Istanbul')
addMonths(N, 1, 'Asia/Istanbul')
addWeeks(N, 1, 'Asia/Istanbul')
addDays(N, 1, 'Asia/Istanbul')
addHours(N, 1, 'Asia/Istanbul')
addMinutes(N, 1, 'Asia/Istanbul')
addSeconds(N, 1, 'Asia/Istanbul')
addQuarters(N, 1, 'Asia/Istanbul')
subtractYears(N, 1, 'Asia/Istanbul')
subtractMonths(N, 1, 'Asia/Istanbul')
subtractWeeks(N, 1, 'Asia/Istanbul')
subtractDays(N, 1, 'Asia/Istanbul')
subtractHours(N, 1, 'Asia/Istanbul')
subtractMinutes(N, 1, 'Asia/Istanbul')
subtractSeconds(N, 1, 'Asia/Istanbul')
subtractQuarters(N, 1, 'Asia/Istanbul')
CAST(N as DateTime('Europe/Minsk'))
CAST(N as Date)
CAST(N as UInt64)
CAST(N as DateTime64(0, 'Europe/Minsk'))
CAST(N as DateTime64(3, 'Europe/Minsk'))
CAST(N as DateTime64(6, 'Europe/Minsk'))
CAST(N as DateTime64(9, 'Europe/Minsk'))
# Casting our test values to DateTime(12) will cause an overflow and hence will fail the test under UB sanitizer.
2022-03-11 23:45:26 +00:00
# CAST(N as DateTime64(12, 'Asia/Istanbul'))
# DateTime64(18) will always fail due to zero precision, but it is Ok to test here:
2022-03-11 23:45:26 +00:00
# CAST(N as DateTime64(18, 'Asia/Istanbul'))
formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%', 'Asia/Istanbul')
2019-09-07 14:22:22 +00:00
""".splitlines()
Extended range of DateTime64 to years 1925 - 2238 The Year 1925 is a starting point because most of the timezones switched to saner (mostly 15-minutes based) offsets somewhere during 1924 or before. And that significantly simplifies implementation. 2238 is to simplify arithmetics for sanitizing LUT index access; there are less than 0x1ffff days from 1925. * Extended DateLUTImpl internal LUT to 0x1ffff items, some of which represent negative (pre-1970) time values. As a collateral benefit, Date now correctly supports dates up to 2149 (instead of 2106). * Added a new strong typedef ExtendedDayNum, which represents dates pre-1970 and post 2149. * Functions that used to return DayNum now return ExtendedDayNum. * Refactored DateLUTImpl to untie DayNum from the dual role of being a value and an index (due to negative time). Index is now a different type LUTIndex with explicit conversion functions from DatNum, time_t, and ExtendedDayNum. * Updated DateLUTImpl to properly support values close to epoch start (1970-01-01 00:00), including negative ones. * Reduced resolution of DateLUTImpl::Values::time_at_offset_change to multiple of 15-minutes to allow storing 64-bits of time_t in DateLUTImpl::Value while keeping same size. * Minor performance updates to DateLUTImpl when building month LUT by skipping non-start-of-month days. * Fixed extractTimeZoneFromFunctionArguments to work correctly with DateTime64. * New unit-tests and stateless integration tests for both DateTime and DateTime64.
2020-04-17 13:26:44 +00:00
# Expanded later to cartesian product of all arguments, using format string.
2020-10-02 16:54:07 +00:00
extra_ops = [
2019-09-07 14:22:22 +00:00
# With same type:
(
["N {op} N"],
2019-09-07 14:22:22 +00:00
{
"op": [
"- ", # does not work, but should it?
"+ ", # does not work, but should it?
"!=",
"==", # equality and inequality supposed to take sub-second part in account
"< ",
"<=",
"> ",
">=",
2019-09-07 14:22:22 +00:00
]
},
2019-09-07 14:22:22 +00:00
),
# With other DateTime types:
(
["N {op} {arg}", "{arg} {op} N"],
2019-09-07 14:22:22 +00:00
{
"op": [
"-", # does not work, but should it?
"!=",
"==",
2019-09-07 14:22:22 +00:00
# these are naturally expected to work, but they don't:
"< ",
"<=",
"> ",
">=",
2019-09-07 14:22:22 +00:00
],
"arg": ["DT", "D", "DT64"],
},
2019-09-07 14:22:22 +00:00
),
# With arithmetic types
(
["N {op} {arg}", "{arg} {op} N"],
2019-09-07 14:22:22 +00:00
{
"op": ["+ ", "- ", "==", "!=", "< ", "<=", "> ", ">="],
"arg": [
"toUInt8(1)",
"toInt8(-1)",
"toUInt16(1)",
"toInt16(-1)",
"toUInt32(1)",
"toInt32(-1)",
"toUInt64(1)",
"toInt64(-1)",
2019-09-07 14:22:22 +00:00
],
},
),
]
# Expand extra_ops here
2019-10-04 13:49:34 +00:00
for funcs, args in extra_ops:
2020-10-02 16:54:07 +00:00
args_keys = list(args.keys())
for args_vals in itertools.product(*list(args.values())):
for func in funcs:
2020-10-02 16:54:07 +00:00
result_func = func.format(**dict(list(zip(args_keys, args_vals))))
FUNCTIONS.append(result_func)
2019-09-07 14:22:22 +00:00
2019-10-22 07:45:00 +00:00
# filter out empty lines and commented out lines
COMMENTED_OUT_LINE_RE = re.compile(r"^\s*#")
FUNCTIONS = list(
[f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None]
)
TYPES = ["D", "DT", "DT64"]
2019-10-22 07:45:00 +00:00
2019-10-04 13:49:34 +00:00
def escape_string(s):
2020-10-02 16:54:07 +00:00
if sys.version_info[0] > 2:
return s.encode("unicode_escape").decode("utf-8").replace("'", "\\'")
2020-10-02 16:54:07 +00:00
else:
return s.encode("string-escape").decode("utf-8")
def execute_functions_for_types(functions, types):
Extended range of DateTime64 to years 1925 - 2238 The Year 1925 is a starting point because most of the timezones switched to saner (mostly 15-minutes based) offsets somewhere during 1924 or before. And that significantly simplifies implementation. 2238 is to simplify arithmetics for sanitizing LUT index access; there are less than 0x1ffff days from 1925. * Extended DateLUTImpl internal LUT to 0x1ffff items, some of which represent negative (pre-1970) time values. As a collateral benefit, Date now correctly supports dates up to 2149 (instead of 2106). * Added a new strong typedef ExtendedDayNum, which represents dates pre-1970 and post 2149. * Functions that used to return DayNum now return ExtendedDayNum. * Refactored DateLUTImpl to untie DayNum from the dual role of being a value and an index (due to negative time). Index is now a different type LUTIndex with explicit conversion functions from DatNum, time_t, and ExtendedDayNum. * Updated DateLUTImpl to properly support values close to epoch start (1970-01-01 00:00), including negative ones. * Reduced resolution of DateLUTImpl::Values::time_at_offset_change to multiple of 15-minutes to allow storing 64-bits of time_t in DateLUTImpl::Value while keeping same size. * Minor performance updates to DateLUTImpl when building month LUT by skipping non-start-of-month days. * Fixed extractTimeZoneFromFunctionArguments to work correctly with DateTime64. * New unit-tests and stateless integration tests for both DateTime and DateTime64.
2020-04-17 13:26:44 +00:00
# NOTE: use string.Template here to allow lines with missing keys, like type, e.g. SELECT CAST(toDateTime64(1234567890), 'DateTime64')
for func in functions:
2020-10-02 16:54:07 +00:00
print(("""SELECT 'SELECT {func}';""".format(func=escape_string(func))))
for dt in types:
prologue = "\
WITH \
toDateTime64('2019-09-16 19:20:11.234', 3, 'Europe/Minsk') as DT64, \
toDateTime('2019-09-16 19:20:11', 'Europe/Minsk') as DT, \
toDate('2019-09-16') as D, {X} as N".format(
X=dt
)
print(
(
"""{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format(
prologue=prologue, func=func
)
)
)
print("""SELECT '------------------------------------------';""")
def main():
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--functions_re",
type=re.compile,
help="RE to enable functions",
default=None,
)
parser.add_argument(
"--types_re",
type=lambda s: re.compile("^(" + s + ")$"),
help="RE to enable types, supported types: " + ",".join(TYPES),
default=None,
)
parser.add_argument(
"--list_functions",
action="store_true",
help="List all functions to be tested and exit",
)
return parser.parse_args()
args = parse_args()
functions = FUNCTIONS
types = TYPES
if args.functions_re:
2020-10-02 16:54:07 +00:00
functions = list([f for f in functions if args.functions_re.search(f)])
if len(functions) == 0:
print("functions list is empty")
return -1
if args.types_re:
2020-10-02 16:54:07 +00:00
types = list([t for t in types if args.types_re.match(t)])
if len(types) == 0:
print("types list is empty")
return -1
if args.list_functions:
2020-10-02 16:54:07 +00:00
print(("\n".join(functions)))
return 0
execute_functions_for_types(functions, types)
if __name__ == "__main__":
2020-10-02 16:54:07 +00:00
exit(main())