ClickHouse/tests/queries/0_stateless/00921_datetime64_compatibility_long.python

228 lines
7.2 KiB
Plaintext
Raw Normal View History

2020-10-02 16:54:07 +00:00
#!/usr/bin/env python3
2019-09-07 14:22:22 +00:00
# encoding: utf-8
import re
import itertools
2019-10-04 13:49:34 +00:00
import sys
import argparse
2019-09-07 14:22:22 +00:00
# Create SQL statement to verify dateTime64 is accepted as argument to functions taking DateTime.
FUNCTIONS="""
toTimeZone(N, 'UTC')
2021-07-24 15:45:33 +00:00
toYear(N, 'Europe/Moscow')
toQuarter(N, 'Europe/Moscow')
toMonth(N, 'Europe/Moscow')
toDayOfYear(N, 'Europe/Moscow')
toDayOfMonth(N, 'Europe/Moscow')
toDayOfWeek(N, 'Europe/Moscow')
toHour(N, 'Europe/Moscow')
toMinute(N, 'Europe/Moscow')
toSecond(N, 'Europe/Moscow')
toUnixTimestamp(N)
2021-07-24 15:45:33 +00:00
toStartOfYear(N, 'Europe/Moscow')
toStartOfISOYear(N, 'Europe/Moscow')
toStartOfQuarter(N, 'Europe/Moscow')
toStartOfMonth(N, 'Europe/Moscow')
toMonday(N, 'Europe/Moscow')
toStartOfWeek(N, 'Europe/Moscow')
toStartOfDay(N, 'Europe/Moscow')
toStartOfHour(N, 'Europe/Moscow')
toStartOfMinute(N, 'Europe/Moscow')
toStartOfFiveMinute(N, 'Europe/Moscow')
toStartOfTenMinutes(N, 'Europe/Moscow')
toStartOfFifteenMinutes(N, 'Europe/Moscow')
toStartOfInterval(N, INTERVAL 1 year, 'Europe/Moscow')
toStartOfInterval(N, INTERVAL 1 month, 'Europe/Moscow')
toStartOfInterval(N, INTERVAL 1 day, 'Europe/Moscow')
toStartOfInterval(N, INTERVAL 15 minute, 'Europe/Moscow')
date_trunc('year', N, 'Europe/Moscow')
date_trunc('month', N, 'Europe/Moscow')
date_trunc('day', N, 'Europe/Moscow')
date_trunc('minute', N, 'Europe/Moscow')
toTime(N, 'Europe/Moscow')
toRelativeYearNum(N, 'Europe/Moscow')
toRelativeQuarterNum(N, 'Europe/Moscow')
toRelativeMonthNum(N, 'Europe/Moscow')
toRelativeWeekNum(N, 'Europe/Moscow')
toRelativeDayNum(N, 'Europe/Moscow')
toRelativeHourNum(N, 'Europe/Moscow')
toRelativeMinuteNum(N, 'Europe/Moscow')
toRelativeSecondNum(N, 'Europe/Moscow')
toISOYear(N, 'Europe/Moscow')
toISOWeek(N, 'Europe/Moscow')
toWeek(N, 'Europe/Moscow')
toYearWeek(N, 'Europe/Moscow')
timeSlot(N, 'Europe/Moscow')
toYYYYMM(N, 'Europe/Moscow')
toYYYYMMDD(N, 'Europe/Moscow')
toYYYYMMDDhhmmss(N, 'Europe/Moscow')
addYears(N, 1, 'Europe/Moscow')
addMonths(N, 1, 'Europe/Moscow')
addWeeks(N, 1, 'Europe/Moscow')
addDays(N, 1, 'Europe/Moscow')
addHours(N, 1, 'Europe/Moscow')
addMinutes(N, 1, 'Europe/Moscow')
addSeconds(N, 1, 'Europe/Moscow')
addQuarters(N, 1, 'Europe/Moscow')
subtractYears(N, 1, 'Europe/Moscow')
subtractMonths(N, 1, 'Europe/Moscow')
subtractWeeks(N, 1, 'Europe/Moscow')
subtractDays(N, 1, 'Europe/Moscow')
subtractHours(N, 1, 'Europe/Moscow')
subtractMinutes(N, 1, 'Europe/Moscow')
subtractSeconds(N, 1, 'Europe/Moscow')
subtractQuarters(N, 1, 'Europe/Moscow')
CAST(N as DateTime('Europe/Minsk'))
CAST(N as Date)
CAST(N as UInt64)
CAST(N as DateTime64(0, 'Europe/Minsk'))
CAST(N as DateTime64(3, 'Europe/Minsk'))
CAST(N as DateTime64(6, 'Europe/Minsk'))
CAST(N as DateTime64(9, 'Europe/Minsk'))
# Casting our test values to DateTime(12) will cause an overflow and hence will fail the test under UB sanitizer.
2021-07-24 15:45:33 +00:00
# CAST(N as DateTime64(12, 'Europe/Moscow'))
# DateTime64(18) will always fail due to zero precision, but it is Ok to test here:
2021-07-24 15:45:33 +00:00
# CAST(N as DateTime64(18, 'Europe/Moscow'))
formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%', 'Europe/Moscow')
2019-09-07 14:22:22 +00:00
""".splitlines()
Extended range of DateTime64 to years 1925 - 2238 The Year 1925 is a starting point because most of the timezones switched to saner (mostly 15-minutes based) offsets somewhere during 1924 or before. And that significantly simplifies implementation. 2238 is to simplify arithmetics for sanitizing LUT index access; there are less than 0x1ffff days from 1925. * Extended DateLUTImpl internal LUT to 0x1ffff items, some of which represent negative (pre-1970) time values. As a collateral benefit, Date now correctly supports dates up to 2149 (instead of 2106). * Added a new strong typedef ExtendedDayNum, which represents dates pre-1970 and post 2149. * Functions that used to return DayNum now return ExtendedDayNum. * Refactored DateLUTImpl to untie DayNum from the dual role of being a value and an index (due to negative time). Index is now a different type LUTIndex with explicit conversion functions from DatNum, time_t, and ExtendedDayNum. * Updated DateLUTImpl to properly support values close to epoch start (1970-01-01 00:00), including negative ones. * Reduced resolution of DateLUTImpl::Values::time_at_offset_change to multiple of 15-minutes to allow storing 64-bits of time_t in DateLUTImpl::Value while keeping same size. * Minor performance updates to DateLUTImpl when building month LUT by skipping non-start-of-month days. * Fixed extractTimeZoneFromFunctionArguments to work correctly with DateTime64. * New unit-tests and stateless integration tests for both DateTime and DateTime64.
2020-04-17 13:26:44 +00:00
# Expanded later to cartesian product of all arguments, using format string.
2020-10-02 16:54:07 +00:00
extra_ops = [
2019-09-07 14:22:22 +00:00
# With same type:
(
2019-10-21 11:46:38 +00:00
['N {op} N'],
2019-09-07 14:22:22 +00:00
{
'op':
[
'- ', # does not work, but should it?
'+ ', # does not work, but should it?
'!=', '==', # equality and inequality supposed to take sub-second part in account
2019-09-07 14:22:22 +00:00
'< ',
'<=',
'> ',
'>='
]
}
),
# With other DateTime types:
(
2019-10-04 13:49:34 +00:00
[
2019-10-21 11:46:38 +00:00
'N {op} {arg}',
'{arg} {op} N'
2019-10-04 13:49:34 +00:00
],
2019-09-07 14:22:22 +00:00
{
'op':
[
'-', # does not work, but should it?
'!=', '==',
2019-09-07 14:22:22 +00:00
# these are naturally expected to work, but they don't:
'< ',
'<=',
'> ',
'>='
],
2019-10-21 11:46:38 +00:00
'arg': ['DT', 'D', 'DT64'],
2019-09-07 14:22:22 +00:00
}
),
# With arithmetic types
(
2019-10-04 13:49:34 +00:00
[
2019-10-21 11:46:38 +00:00
'N {op} {arg}',
'{arg} {op} N'
2019-10-04 13:49:34 +00:00
],
2019-09-07 14:22:22 +00:00
{
'op':
[
'+ ',
'- ',
'==',
'!=',
'< ',
'<=',
'> ',
'>='
],
'arg':
[
2019-10-21 14:04:01 +00:00
'toUInt8(1)',
'toInt8(-1)',
'toUInt16(1)',
'toInt16(-1)',
'toUInt32(1)',
'toInt32(-1)',
'toUInt64(1)',
2019-09-07 14:22:22 +00:00
'toInt64(-1)'
],
},
),
]
# Expand extra_ops here
2019-10-04 13:49:34 +00:00
for funcs, args in extra_ops:
2020-10-02 16:54:07 +00:00
args_keys = list(args.keys())
for args_vals in itertools.product(*list(args.values())):
for func in funcs:
2020-10-02 16:54:07 +00:00
result_func = func.format(**dict(list(zip(args_keys, args_vals))))
FUNCTIONS.append(result_func)
2019-09-07 14:22:22 +00:00
2019-10-22 07:45:00 +00:00
# filter out empty lines and commented out lines
COMMENTED_OUT_LINE_RE = re.compile(r"^\s*#")
2020-10-02 16:54:07 +00:00
FUNCTIONS = list([f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None])
TYPES = ['D', 'DT', 'DT64']
2019-10-22 07:45:00 +00:00
2019-10-04 13:49:34 +00:00
def escape_string(s):
2020-10-02 16:54:07 +00:00
if sys.version_info[0] > 2:
return s.encode('unicode_escape').decode('utf-8').replace("'", "\\'")
else:
return s.encode('string-escape').decode('utf-8')
def execute_functions_for_types(functions, types):
Extended range of DateTime64 to years 1925 - 2238 The Year 1925 is a starting point because most of the timezones switched to saner (mostly 15-minutes based) offsets somewhere during 1924 or before. And that significantly simplifies implementation. 2238 is to simplify arithmetics for sanitizing LUT index access; there are less than 0x1ffff days from 1925. * Extended DateLUTImpl internal LUT to 0x1ffff items, some of which represent negative (pre-1970) time values. As a collateral benefit, Date now correctly supports dates up to 2149 (instead of 2106). * Added a new strong typedef ExtendedDayNum, which represents dates pre-1970 and post 2149. * Functions that used to return DayNum now return ExtendedDayNum. * Refactored DateLUTImpl to untie DayNum from the dual role of being a value and an index (due to negative time). Index is now a different type LUTIndex with explicit conversion functions from DatNum, time_t, and ExtendedDayNum. * Updated DateLUTImpl to properly support values close to epoch start (1970-01-01 00:00), including negative ones. * Reduced resolution of DateLUTImpl::Values::time_at_offset_change to multiple of 15-minutes to allow storing 64-bits of time_t in DateLUTImpl::Value while keeping same size. * Minor performance updates to DateLUTImpl when building month LUT by skipping non-start-of-month days. * Fixed extractTimeZoneFromFunctionArguments to work correctly with DateTime64. * New unit-tests and stateless integration tests for both DateTime and DateTime64.
2020-04-17 13:26:44 +00:00
# NOTE: use string.Template here to allow lines with missing keys, like type, e.g. SELECT CAST(toDateTime64(1234567890), 'DateTime64')
for func in functions:
2020-10-02 16:54:07 +00:00
print(("""SELECT 'SELECT {func}';""".format(func=escape_string(func))))
for dt in types:
prologue = "\
WITH \
toDateTime64('2019-09-16 19:20:11.234', 3, 'Europe/Minsk') as DT64, \
toDateTime('2019-09-16 19:20:11', 'Europe/Minsk') as DT, \
toDate('2019-09-16') as D, {X} as N".format(X=dt)
2020-10-02 16:54:07 +00:00
print(("""{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format(prologue=prologue, func=func)))
print("""SELECT '------------------------------------------';""")
def main():
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--functions_re', type=re.compile, help="RE to enable functions", default=None)
parser.add_argument('--types_re',
type=lambda s: re.compile('^(' + s + ')$'),
help="RE to enable types, supported types: " + ",".join(TYPES), default=None)
parser.add_argument('--list_functions', action='store_true', help="List all functions to be tested and exit")
return parser.parse_args()
args = parse_args()
functions = FUNCTIONS
types = TYPES
if args.functions_re:
2020-10-02 16:54:07 +00:00
functions = list([f for f in functions if args.functions_re.search(f)])
if len(functions) == 0:
print("functions list is empty")
return -1
if args.types_re:
2020-10-02 16:54:07 +00:00
types = list([t for t in types if args.types_re.match(t)])
if len(types) == 0:
print("types list is empty")
return -1
if args.list_functions:
2020-10-02 16:54:07 +00:00
print(("\n".join(functions)))
return 0
execute_functions_for_types(functions, types)
if __name__ == '__main__':
2020-10-02 16:54:07 +00:00
exit(main())