ClickHouse/tests/queries/0_stateless/00921_datetime64_compatibility_long.python

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

228 lines
7.2 KiB
Plaintext
Raw Normal View History

2020-10-02 16:54:07 +00:00
#!/usr/bin/env python3
2019-09-07 14:22:22 +00:00
# encoding: utf-8
import re
import itertools
2019-10-04 13:49:34 +00:00
import sys
import argparse
2019-09-07 14:22:22 +00:00
# Create SQL statement to verify dateTime64 is accepted as argument to functions taking DateTime.
FUNCTIONS="""
toTimeZone(N, 'UTC')
2022-03-11 23:45:26 +00:00
toYear(N, 'Asia/Istanbul')
toQuarter(N, 'Asia/Istanbul')
toMonth(N, 'Asia/Istanbul')
toDayOfYear(N, 'Asia/Istanbul')
toDayOfMonth(N, 'Asia/Istanbul')
toDayOfWeek(N, 'Asia/Istanbul')
2022-03-11 23:45:26 +00:00
toHour(N, 'Asia/Istanbul')
toMinute(N, 'Asia/Istanbul')
toSecond(N, 'Asia/Istanbul')
toUnixTimestamp(N)
2022-03-11 23:45:26 +00:00
toStartOfYear(N, 'Asia/Istanbul')
toStartOfISOYear(N, 'Asia/Istanbul')
toStartOfQuarter(N, 'Asia/Istanbul')
toStartOfMonth(N, 'Asia/Istanbul')
toMonday(N, 'Asia/Istanbul')
toStartOfWeek(N, 'Asia/Istanbul')
toStartOfDay(N, 'Asia/Istanbul')
toStartOfHour(N, 'Asia/Istanbul')
toStartOfMinute(N, 'Asia/Istanbul')
toStartOfFiveMinutes(N, 'Asia/Istanbul')
2022-03-11 23:45:26 +00:00
toStartOfTenMinutes(N, 'Asia/Istanbul')
toStartOfFifteenMinutes(N, 'Asia/Istanbul')
toStartOfInterval(N, INTERVAL 1 year, 'Asia/Istanbul')
toStartOfInterval(N, INTERVAL 1 month, 'Asia/Istanbul')
toStartOfInterval(N, INTERVAL 1 day, 'Asia/Istanbul')
toStartOfInterval(N, INTERVAL 15 minute, 'Asia/Istanbul')
date_trunc('year', N, 'Asia/Istanbul')
date_trunc('month', N, 'Asia/Istanbul')
date_trunc('day', N, 'Asia/Istanbul')
date_trunc('minute', N, 'Asia/Istanbul')
toTime(N, 'Asia/Istanbul')
toRelativeYearNum(N, 'Asia/Istanbul')
toRelativeQuarterNum(N, 'Asia/Istanbul')
toRelativeMonthNum(N, 'Asia/Istanbul')
toRelativeWeekNum(N, 'Asia/Istanbul')
toRelativeDayNum(N, 'Asia/Istanbul')
toRelativeHourNum(N, 'Asia/Istanbul')
toRelativeMinuteNum(N, 'Asia/Istanbul')
toRelativeSecondNum(N, 'Asia/Istanbul')
toISOYear(N, 'Asia/Istanbul')
toISOWeek(N, 'Asia/Istanbul')
toWeek(N, 'Asia/Istanbul')
toYearWeek(N, 'Asia/Istanbul')
timeSlot(N, 'Asia/Istanbul')
toYYYYMM(N, 'Asia/Istanbul')
toYYYYMMDD(N, 'Asia/Istanbul')
toYYYYMMDDhhmmss(N, 'Asia/Istanbul')
addYears(N, 1, 'Asia/Istanbul')
addMonths(N, 1, 'Asia/Istanbul')
addWeeks(N, 1, 'Asia/Istanbul')
addDays(N, 1, 'Asia/Istanbul')
addHours(N, 1, 'Asia/Istanbul')
addMinutes(N, 1, 'Asia/Istanbul')
addSeconds(N, 1, 'Asia/Istanbul')
addQuarters(N, 1, 'Asia/Istanbul')
subtractYears(N, 1, 'Asia/Istanbul')
subtractMonths(N, 1, 'Asia/Istanbul')
subtractWeeks(N, 1, 'Asia/Istanbul')
subtractDays(N, 1, 'Asia/Istanbul')
subtractHours(N, 1, 'Asia/Istanbul')
subtractMinutes(N, 1, 'Asia/Istanbul')
subtractSeconds(N, 1, 'Asia/Istanbul')
subtractQuarters(N, 1, 'Asia/Istanbul')
CAST(N as DateTime('Europe/Minsk'))
CAST(N as Date)
CAST(N as UInt64)
CAST(N as DateTime64(0, 'Europe/Minsk'))
CAST(N as DateTime64(3, 'Europe/Minsk'))
CAST(N as DateTime64(6, 'Europe/Minsk'))
CAST(N as DateTime64(9, 'Europe/Minsk'))
# Casting our test values to DateTime(12) will cause an overflow and hence will fail the test under UB sanitizer.
2022-03-11 23:45:26 +00:00
# CAST(N as DateTime64(12, 'Asia/Istanbul'))
# DateTime64(18) will always fail due to zero precision, but it is Ok to test here:
2022-03-11 23:45:26 +00:00
# CAST(N as DateTime64(18, 'Asia/Istanbul'))
formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%', 'Asia/Istanbul')
2019-09-07 14:22:22 +00:00
""".splitlines()
Extended range of DateTime64 to years 1925 - 2238 The Year 1925 is a starting point because most of the timezones switched to saner (mostly 15-minutes based) offsets somewhere during 1924 or before. And that significantly simplifies implementation. 2238 is to simplify arithmetics for sanitizing LUT index access; there are less than 0x1ffff days from 1925. * Extended DateLUTImpl internal LUT to 0x1ffff items, some of which represent negative (pre-1970) time values. As a collateral benefit, Date now correctly supports dates up to 2149 (instead of 2106). * Added a new strong typedef ExtendedDayNum, which represents dates pre-1970 and post 2149. * Functions that used to return DayNum now return ExtendedDayNum. * Refactored DateLUTImpl to untie DayNum from the dual role of being a value and an index (due to negative time). Index is now a different type LUTIndex with explicit conversion functions from DatNum, time_t, and ExtendedDayNum. * Updated DateLUTImpl to properly support values close to epoch start (1970-01-01 00:00), including negative ones. * Reduced resolution of DateLUTImpl::Values::time_at_offset_change to multiple of 15-minutes to allow storing 64-bits of time_t in DateLUTImpl::Value while keeping same size. * Minor performance updates to DateLUTImpl when building month LUT by skipping non-start-of-month days. * Fixed extractTimeZoneFromFunctionArguments to work correctly with DateTime64. * New unit-tests and stateless integration tests for both DateTime and DateTime64.
2020-04-17 13:26:44 +00:00
# Expanded later to cartesian product of all arguments, using format string.
2020-10-02 16:54:07 +00:00
extra_ops = [
2019-09-07 14:22:22 +00:00
# With same type:
(
['N {op} N'],
2019-09-07 14:22:22 +00:00
{
'op':
[
'- ', # does not work, but should it?
'+ ', # does not work, but should it?
'!=', '==', # equality and inequality supposed to take sub-second part in account
'< ',
'<=',
'> ',
'>='
2019-09-07 14:22:22 +00:00
]
}
2019-09-07 14:22:22 +00:00
),
# With other DateTime types:
(
[
'N {op} {arg}',
'{arg} {op} N'
],
2019-09-07 14:22:22 +00:00
{
'op':
[
'-', # does not work, but should it?
'!=', '==',
2019-09-07 14:22:22 +00:00
# these are naturally expected to work, but they don't:
'< ',
'<=',
'> ',
'>='
2019-09-07 14:22:22 +00:00
],
'arg': ['DT', 'D', 'DT64'],
}
2019-09-07 14:22:22 +00:00
),
# With arithmetic types
(
[
'N {op} {arg}',
'{arg} {op} N'
],
2019-09-07 14:22:22 +00:00
{
'op':
[
'+ ',
'- ',
'==',
'!=',
'< ',
'<=',
'> ',
'>='
],
'arg':
[
'toUInt8(1)',
'toInt8(-1)',
'toUInt16(1)',
'toInt16(-1)',
'toUInt32(1)',
'toInt32(-1)',
'toUInt64(1)',
'toInt64(-1)'
2019-09-07 14:22:22 +00:00
],
},
),
]
# Expand extra_ops here
2019-10-04 13:49:34 +00:00
for funcs, args in extra_ops:
2020-10-02 16:54:07 +00:00
args_keys = list(args.keys())
for args_vals in itertools.product(*list(args.values())):
for func in funcs:
2020-10-02 16:54:07 +00:00
result_func = func.format(**dict(list(zip(args_keys, args_vals))))
FUNCTIONS.append(result_func)
2019-09-07 14:22:22 +00:00
2019-10-22 07:45:00 +00:00
# filter out empty lines and commented out lines
COMMENTED_OUT_LINE_RE = re.compile(r"^\s*#")
FUNCTIONS = list([f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None])
TYPES = ['D', 'DT', 'DT64']
2019-10-22 07:45:00 +00:00
2019-10-04 13:49:34 +00:00
def escape_string(s):
2020-10-02 16:54:07 +00:00
if sys.version_info[0] > 2:
return s.encode('unicode_escape').decode('utf-8').replace("'", "\\'")
2020-10-02 16:54:07 +00:00
else:
return s.encode('string-escape').decode('utf-8')
def execute_functions_for_types(functions, types):
Extended range of DateTime64 to years 1925 - 2238 The Year 1925 is a starting point because most of the timezones switched to saner (mostly 15-minutes based) offsets somewhere during 1924 or before. And that significantly simplifies implementation. 2238 is to simplify arithmetics for sanitizing LUT index access; there are less than 0x1ffff days from 1925. * Extended DateLUTImpl internal LUT to 0x1ffff items, some of which represent negative (pre-1970) time values. As a collateral benefit, Date now correctly supports dates up to 2149 (instead of 2106). * Added a new strong typedef ExtendedDayNum, which represents dates pre-1970 and post 2149. * Functions that used to return DayNum now return ExtendedDayNum. * Refactored DateLUTImpl to untie DayNum from the dual role of being a value and an index (due to negative time). Index is now a different type LUTIndex with explicit conversion functions from DatNum, time_t, and ExtendedDayNum. * Updated DateLUTImpl to properly support values close to epoch start (1970-01-01 00:00), including negative ones. * Reduced resolution of DateLUTImpl::Values::time_at_offset_change to multiple of 15-minutes to allow storing 64-bits of time_t in DateLUTImpl::Value while keeping same size. * Minor performance updates to DateLUTImpl when building month LUT by skipping non-start-of-month days. * Fixed extractTimeZoneFromFunctionArguments to work correctly with DateTime64. * New unit-tests and stateless integration tests for both DateTime and DateTime64.
2020-04-17 13:26:44 +00:00
# NOTE: use string.Template here to allow lines with missing keys, like type, e.g. SELECT CAST(toDateTime64(1234567890), 'DateTime64')
for func in functions:
2020-10-02 16:54:07 +00:00
print(("""SELECT 'SELECT {func}';""".format(func=escape_string(func))))
for dt in types:
prologue = "\
WITH \
toDateTime64('2019-09-16 19:20:11.234', 3, 'Europe/Minsk') as DT64, \
toDateTime('2019-09-16 19:20:11', 'Europe/Minsk') as DT, \
toDate('2019-09-16') as D, {X} as N".format(X=dt)
print(("""{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format(prologue=prologue, func=func)))
print("""SELECT '------------------------------------------';""")
def main():
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--functions_re', type=re.compile, help="RE to enable functions", default=None)
parser.add_argument('--types_re',
type=lambda s: re.compile('^(' + s + ')$'),
help="RE to enable types, supported types: " + ",".join(TYPES), default=None)
parser.add_argument('--list_functions', action='store_true', help="List all functions to be tested and exit")
return parser.parse_args()
args = parse_args()
functions = FUNCTIONS
types = TYPES
if args.functions_re:
2020-10-02 16:54:07 +00:00
functions = list([f for f in functions if args.functions_re.search(f)])
if len(functions) == 0:
print("functions list is empty")
return -1
if args.types_re:
2020-10-02 16:54:07 +00:00
types = list([t for t in types if args.types_re.match(t)])
if len(types) == 0:
print("types list is empty")
return -1
if args.list_functions:
2020-10-02 16:54:07 +00:00
print(("\n".join(functions)))
return 0
execute_functions_for_types(functions, types)
if __name__ == '__main__':
2020-10-02 16:54:07 +00:00
exit(main())