ClickHouse/tests/queries/0_stateless/00921_datetime64_compatibility_long.python

228 lines
6.1 KiB
Plaintext
Raw Normal View History

2020-10-02 16:54:07 +00:00
#!/usr/bin/env python3
2019-09-07 14:22:22 +00:00
# encoding: utf-8
import re
import itertools
2019-10-04 13:49:34 +00:00
import sys
import argparse
2019-09-07 14:22:22 +00:00
# Create SQL statement to verify dateTime64 is accepted as argument to functions taking DateTime.
FUNCTIONS="""
toTimeZone(N, 'UTC')
toYear(N)
toQuarter(N)
toMonth(N)
toDayOfYear(N)
toDayOfMonth(N)
toDayOfWeek(N)
toHour(N)
toMinute(N)
toSecond(N)
toUnixTimestamp(N)
toStartOfYear(N)
toStartOfISOYear(N)
toStartOfQuarter(N)
toStartOfMonth(N)
toMonday(N)
toStartOfWeek(N)
toStartOfDay(N)
toStartOfHour(N)
toStartOfMinute(N)
toStartOfFiveMinute(N)
toStartOfTenMinutes(N)
toStartOfFifteenMinutes(N)
toStartOfInterval(N, INTERVAL 1 year)
toStartOfInterval(N, INTERVAL 1 month)
toStartOfInterval(N, INTERVAL 1 day)
toStartOfInterval(N, INTERVAL 15 minute)
2020-08-17 09:47:16 +00:00
date_trunc('year', N)
date_trunc('month', N)
date_trunc('day', N)
date_trunc('minute', N)
toTime(N)
toRelativeYearNum(N)
toRelativeQuarterNum(N)
toRelativeMonthNum(N)
toRelativeWeekNum(N)
toRelativeDayNum(N)
toRelativeHourNum(N)
toRelativeMinuteNum(N)
toRelativeSecondNum(N)
toISOYear(N)
toISOWeek(N)
toWeek(N)
toYearWeek(N)
timeSlot(N)
toYYYYMM(N)
toYYYYMMDD(N)
toYYYYMMDDhhmmss(N)
addYears(N, 1)
addMonths(N, 1)
addWeeks(N, 1)
addDays(N, 1)
addHours(N, 1)
addMinutes(N, 1)
addSeconds(N, 1)
addQuarters(N, 1)
subtractYears(N, 1)
subtractMonths(N, 1)
subtractWeeks(N, 1)
subtractDays(N, 1)
subtractHours(N, 1)
subtractMinutes(N, 1)
subtractSeconds(N, 1)
subtractQuarters(N, 1)
CAST(N as DateTime('Europe/Minsk'))
CAST(N as Date)
CAST(N as UInt64)
CAST(N as DateTime64(0, 'Europe/Minsk'))
CAST(N as DateTime64(3, 'Europe/Minsk'))
CAST(N as DateTime64(6, 'Europe/Minsk'))
CAST(N as DateTime64(9, 'Europe/Minsk'))
# Casting our test values to DateTime(12) will cause an overflow and hence will fail the test under UB sanitizer.
# CAST(N as DateTime64(12))
# DateTime64(18) will always fail due to zero precision, but it is Ok to test here:
# CAST(N as DateTime64(18))
formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%')
2019-09-07 14:22:22 +00:00
""".splitlines()
Extended range of DateTime64 to years 1925 - 2238 The Year 1925 is a starting point because most of the timezones switched to saner (mostly 15-minutes based) offsets somewhere during 1924 or before. And that significantly simplifies implementation. 2238 is to simplify arithmetics for sanitizing LUT index access; there are less than 0x1ffff days from 1925. * Extended DateLUTImpl internal LUT to 0x1ffff items, some of which represent negative (pre-1970) time values. As a collateral benefit, Date now correctly supports dates up to 2149 (instead of 2106). * Added a new strong typedef ExtendedDayNum, which represents dates pre-1970 and post 2149. * Functions that used to return DayNum now return ExtendedDayNum. * Refactored DateLUTImpl to untie DayNum from the dual role of being a value and an index (due to negative time). Index is now a different type LUTIndex with explicit conversion functions from DatNum, time_t, and ExtendedDayNum. * Updated DateLUTImpl to properly support values close to epoch start (1970-01-01 00:00), including negative ones. * Reduced resolution of DateLUTImpl::Values::time_at_offset_change to multiple of 15-minutes to allow storing 64-bits of time_t in DateLUTImpl::Value while keeping same size. * Minor performance updates to DateLUTImpl when building month LUT by skipping non-start-of-month days. * Fixed extractTimeZoneFromFunctionArguments to work correctly with DateTime64. * New unit-tests and stateless integration tests for both DateTime and DateTime64.
2020-04-17 13:26:44 +00:00
# Expanded later to cartesian product of all arguments, using format string.
2020-10-02 16:54:07 +00:00
extra_ops = [
2019-09-07 14:22:22 +00:00
# With same type:
(
2019-10-21 11:46:38 +00:00
['N {op} N'],
2019-09-07 14:22:22 +00:00
{
'op':
[
'- ', # does not work, but should it?
'+ ', # does not work, but should it?
'!=', '==', # equality and inequality supposed to take sub-second part in account
2019-09-07 14:22:22 +00:00
'< ',
'<=',
'> ',
'>='
]
}
),
# With other DateTime types:
(
2019-10-04 13:49:34 +00:00
[
2019-10-21 11:46:38 +00:00
'N {op} {arg}',
'{arg} {op} N'
2019-10-04 13:49:34 +00:00
],
2019-09-07 14:22:22 +00:00
{
'op':
[
'-', # does not work, but should it?
'!=', '==',
2019-09-07 14:22:22 +00:00
# these are naturally expected to work, but they don't:
'< ',
'<=',
'> ',
'>='
],
2019-10-21 11:46:38 +00:00
'arg': ['DT', 'D', 'DT64'],
2019-09-07 14:22:22 +00:00
}
),
# With arithmetic types
(
2019-10-04 13:49:34 +00:00
[
2019-10-21 11:46:38 +00:00
'N {op} {arg}',
'{arg} {op} N'
2019-10-04 13:49:34 +00:00
],
2019-09-07 14:22:22 +00:00
{
'op':
[
'+ ',
'- ',
'==',
'!=',
'< ',
'<=',
'> ',
'>='
],
'arg':
[
2019-10-21 14:04:01 +00:00
'toUInt8(1)',
'toInt8(-1)',
'toUInt16(1)',
'toInt16(-1)',
'toUInt32(1)',
'toInt32(-1)',
'toUInt64(1)',
2019-09-07 14:22:22 +00:00
'toInt64(-1)'
],
},
),
]
# Expand extra_ops here
2019-10-04 13:49:34 +00:00
for funcs, args in extra_ops:
2020-10-02 16:54:07 +00:00
args_keys = list(args.keys())
for args_vals in itertools.product(*list(args.values())):
for func in funcs:
2020-10-02 16:54:07 +00:00
result_func = func.format(**dict(list(zip(args_keys, args_vals))))
FUNCTIONS.append(result_func)
2019-09-07 14:22:22 +00:00
2019-10-22 07:45:00 +00:00
# filter out empty lines and commented out lines
COMMENTED_OUT_LINE_RE = re.compile(r"^\s*#")
2020-10-02 16:54:07 +00:00
FUNCTIONS = list([f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None])
TYPES = ['D', 'DT', 'DT64']
2019-10-22 07:45:00 +00:00
2019-10-04 13:49:34 +00:00
def escape_string(s):
2020-10-02 16:54:07 +00:00
if sys.version_info[0] > 2:
return s.encode('unicode_escape').decode('utf-8').replace("'", "\\'")
else:
return s.encode('string-escape').decode('utf-8')
def execute_functions_for_types(functions, types):
Extended range of DateTime64 to years 1925 - 2238 The Year 1925 is a starting point because most of the timezones switched to saner (mostly 15-minutes based) offsets somewhere during 1924 or before. And that significantly simplifies implementation. 2238 is to simplify arithmetics for sanitizing LUT index access; there are less than 0x1ffff days from 1925. * Extended DateLUTImpl internal LUT to 0x1ffff items, some of which represent negative (pre-1970) time values. As a collateral benefit, Date now correctly supports dates up to 2149 (instead of 2106). * Added a new strong typedef ExtendedDayNum, which represents dates pre-1970 and post 2149. * Functions that used to return DayNum now return ExtendedDayNum. * Refactored DateLUTImpl to untie DayNum from the dual role of being a value and an index (due to negative time). Index is now a different type LUTIndex with explicit conversion functions from DatNum, time_t, and ExtendedDayNum. * Updated DateLUTImpl to properly support values close to epoch start (1970-01-01 00:00), including negative ones. * Reduced resolution of DateLUTImpl::Values::time_at_offset_change to multiple of 15-minutes to allow storing 64-bits of time_t in DateLUTImpl::Value while keeping same size. * Minor performance updates to DateLUTImpl when building month LUT by skipping non-start-of-month days. * Fixed extractTimeZoneFromFunctionArguments to work correctly with DateTime64. * New unit-tests and stateless integration tests for both DateTime and DateTime64.
2020-04-17 13:26:44 +00:00
# NOTE: use string.Template here to allow lines with missing keys, like type, e.g. SELECT CAST(toDateTime64(1234567890), 'DateTime64')
for func in functions:
2020-10-02 16:54:07 +00:00
print(("""SELECT 'SELECT {func}';""".format(func=escape_string(func))))
for dt in types:
prologue = "\
WITH \
toDateTime64('2019-09-16 19:20:11.234', 3, 'Europe/Minsk') as DT64, \
toDateTime('2019-09-16 19:20:11', 'Europe/Minsk') as DT, \
toDate('2019-09-16') as D, {X} as N".format(X=dt)
2020-10-02 16:54:07 +00:00
print(("""{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format(prologue=prologue, func=func)))
print("""SELECT '------------------------------------------';""")
def main():
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--functions_re', type=re.compile, help="RE to enable functions", default=None)
parser.add_argument('--types_re',
type=lambda s: re.compile('^(' + s + ')$'),
help="RE to enable types, supported types: " + ",".join(TYPES), default=None)
parser.add_argument('--list_functions', action='store_true', help="List all functions to be tested and exit")
return parser.parse_args()
args = parse_args()
functions = FUNCTIONS
types = TYPES
if args.functions_re:
2020-10-02 16:54:07 +00:00
functions = list([f for f in functions if args.functions_re.search(f)])
if len(functions) == 0:
print("functions list is empty")
return -1
if args.types_re:
2020-10-02 16:54:07 +00:00
types = list([t for t in types if args.types_re.match(t)])
if len(types) == 0:
print("types list is empty")
return -1
if args.list_functions:
2020-10-02 16:54:07 +00:00
print(("\n".join(functions)))
return 0
execute_functions_for_types(functions, types)
if __name__ == '__main__':
2020-10-02 16:54:07 +00:00
exit(main())