Merge pull request #44860 from bigo-sg/improve_week_day

update function DAYOFWEEK and add new function WEEKDAY for mysql/spark compatiability
This commit is contained in:
Robert Schulze 2023-01-11 13:58:45 +01:00 committed by GitHub
commit 9bb1e31369
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 153 additions and 90 deletions

View File

@ -207,9 +207,16 @@ Converts a date or date with time to a UInt8 number containing the number of the
Aliases: `DAYOFMONTH`, `DAY`.
## toDayOfWeek
## toDayOfWeek(date\[,mode\])
Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7).
Converts a date or date with time to a UInt8 number containing the number of the day of the week. The two-argument form of toDayOfWeek() enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or from 1-7. If the mode argument is ommited, the default mode is 0.
| Mode | First day of week | Range |
|------|-------------------|------------------------------------------------|
| 0 | Monday | 1-7, Monday = 1, Tuesday = 2, ..., Sunday = 7 |
| 1 | Monday | 0-6, Monday = 0, Tuesday = 1, ..., Sunday = 6 |
| 2 | Sunday | 0-6, Sunday = 0, Monday = 1, ..., Saturday = 6 |
| 3 | Sunday | 1-7, Sunday = 1, Monday = 2, ..., Saturday = 7 |
Alias: `DAYOFWEEK`.

View File

@ -39,6 +39,15 @@ enum class WeekModeFlag : UInt8
};
using YearWeek = std::pair<UInt16, UInt8>;
/// Modes for toDayOfWeek() function.
enum class WeekDayMode
{
WeekStartsMonday1 = 0,
WeekStartsMonday0 = 1,
WeekStartsSunday0 = 2,
WeekStartsSunday1 = 3
};
/** Lookup table to conversion of time to date, and to month / year / day of week / day of month and so on.
* First time was implemented for OLAPServer, that needed to do billions of such transformations.
*/
@ -619,9 +628,25 @@ public:
template <typename DateOrTime>
inline Int16 toYear(DateOrTime v) const { return lut[toLUTIndex(v)].year; }
/// 1-based, starts on Monday
template <typename DateOrTime>
inline UInt8 toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; }
template <typename DateOrTime>
inline UInt8 toDayOfWeek(DateOrTime v, UInt8 week_day_mode) const
{
WeekDayMode mode = check_week_day_mode(week_day_mode);
UInt8 res = toDayOfWeek(v);
bool start_from_sunday = (mode == WeekDayMode::WeekStartsSunday0 || mode == WeekDayMode::WeekStartsSunday1);
bool zero_based = (mode == WeekDayMode::WeekStartsMonday0 || mode == WeekDayMode::WeekStartsSunday0);
if (start_from_sunday)
res = res % 7 + 1;
if (zero_based)
--res;
return res;
}
template <typename DateOrTime>
inline UInt8 toDayOfMonth(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_month; }
@ -844,6 +869,13 @@ public:
return week_format;
}
/// Check and change mode to effective.
inline WeekDayMode check_week_day_mode(UInt8 mode) const /// NOLINT
{
return static_cast<WeekDayMode>(mode & 3);
}
/** Calculate weekday from d.
* Returns 0 for monday, 1 for tuesday...
*/

View File

@ -786,21 +786,21 @@ struct ToDayOfWeekImpl
{
static constexpr auto name = "toDayOfWeek";
static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone)
static inline UInt8 execute(Int64 t, UInt8 week_day_mode, const DateLUTImpl & time_zone)
{
return time_zone.toDayOfWeek(t);
return time_zone.toDayOfWeek(t, week_day_mode);
}
static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone)
static inline UInt8 execute(UInt32 t, UInt8 week_day_mode, const DateLUTImpl & time_zone)
{
return time_zone.toDayOfWeek(t);
return time_zone.toDayOfWeek(t, week_day_mode);
}
static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone)
static inline UInt8 execute(Int32 d, UInt8 week_day_mode, const DateLUTImpl & time_zone)
{
return time_zone.toDayOfWeek(ExtendedDayNum(d));
return time_zone.toDayOfWeek(ExtendedDayNum(d), week_day_mode);
}
static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone)
static inline UInt8 execute(UInt16 d, UInt8 week_day_mode, const DateLUTImpl & time_zone)
{
return time_zone.toDayOfWeek(DayNum(d));
return time_zone.toDayOfWeek(DayNum(d), week_day_mode);
}
using FactorTransform = ToMondayImpl;

View File

@ -225,8 +225,8 @@ public:
}
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeWeekNumImpl<ResultPrecision::Extended>>>)
{
auto x_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
auto y_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
auto x_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_x.getScaleMultiplier()).execute(x, 0, timezone_x);
auto y_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_y.getScaleMultiplier()).execute(y, 0, timezone_y);
if ((x_day_of_week > y_day_of_week)
|| ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour))
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)

View File

@ -276,7 +276,7 @@ private:
{
static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
{
const auto day = ToDayOfWeekImpl::execute(source, timezone);
const auto day = ToDayOfWeekImpl::execute(source, 0, timezone);
static constexpr std::string_view day_names[] =
{
"Monday",

View File

@ -344,13 +344,13 @@ private:
static size_t mysqlDayOfWeek(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
{
*dest = '0' + ToDayOfWeekImpl::execute(source, timezone);
*dest = '0' + ToDayOfWeekImpl::execute(source, 0, timezone);
return 1;
}
static size_t mysqlDayOfWeek0To6(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
{
auto day = ToDayOfWeekImpl::execute(source, timezone);
auto day = ToDayOfWeekImpl::execute(source, 0, timezone);
*dest = '0' + (day == 7 ? 0 : day);
return 1;
}
@ -499,13 +499,13 @@ private:
static size_t jodaDayOfWeek1Based(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
{
auto week_day = ToDayOfWeekImpl::execute(source, timezone);
auto week_day = ToDayOfWeekImpl::execute(source, 0, timezone);
return writeNumberWithPadding(dest, week_day, min_represent_digits);
}
static size_t jodaDayOfWeekText(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
{
auto week_day = ToDayOfWeekImpl::execute(source, timezone);
auto week_day = ToDayOfWeekImpl::execute(source, 0, timezone);
if (week_day == 7)
week_day = 0;

View File

@ -1,13 +1,12 @@
#include <Functions/FunctionFactory.h>
#include <Functions/DateTimeTransforms.h>
#include <Functions/FunctionDateOrDateTimeToSomething.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionCustomWeekToSomething.h>
namespace DB
{
using FunctionToDayOfWeek = FunctionDateOrDateTimeToSomething<DataTypeUInt8, ToDayOfWeekImpl>;
using FunctionToDayOfWeek = FunctionCustomWeekToSomething<DataTypeUInt8, ToDayOfWeekImpl>;
REGISTER_FUNCTION(ToDayOfWeek)
{

View File

@ -120,11 +120,11 @@ SELECT toDayOfMonth(toDateTime(1412106600), 'Pacific/Pitcairn');
/* toDayOfWeek */
SELECT 'toDayOfWeek';
SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Istanbul');
SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/Paris');
SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/London');
SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Tokyo');
SELECT toDayOfWeek(toDateTime(1412106600), 'Pacific/Pitcairn');
SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Asia/Istanbul');
SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Europe/Paris');
SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Europe/London');
SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Asia/Tokyo');
SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Pacific/Pitcairn');
/* toHour */

View File

@ -7,14 +7,14 @@ import sys
import argparse
# Create SQL statement to verify dateTime64 is accepted as argument to functions taking DateTime.
FUNCTIONS="""
FUNCTIONS = """
toTimeZone(N, 'UTC')
toYear(N, 'Asia/Istanbul')
toQuarter(N, 'Asia/Istanbul')
toMonth(N, 'Asia/Istanbul')
toDayOfYear(N, 'Asia/Istanbul')
toDayOfMonth(N, 'Asia/Istanbul')
toDayOfWeek(N, 'Asia/Istanbul')
toDayOfWeek(N, 0, 'Asia/Istanbul')
toHour(N, 'Asia/Istanbul')
toMinute(N, 'Asia/Istanbul')
toSecond(N, 'Asia/Istanbul')
@ -90,68 +90,51 @@ formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%',
extra_ops = [
# With same type:
(
['N {op} N'],
["N {op} N"],
{
'op':
[
'- ', # does not work, but should it?
'+ ', # does not work, but should it?
'!=', '==', # equality and inequality supposed to take sub-second part in account
'< ',
'<=',
'> ',
'>='
"op": [
"- ", # does not work, but should it?
"+ ", # does not work, but should it?
"!=",
"==", # equality and inequality supposed to take sub-second part in account
"< ",
"<=",
"> ",
">=",
]
}
},
),
# With other DateTime types:
(
[
'N {op} {arg}',
'{arg} {op} N'
],
["N {op} {arg}", "{arg} {op} N"],
{
'op':
[
'-', # does not work, but should it?
'!=', '==',
"op": [
"-", # does not work, but should it?
"!=",
"==",
# these are naturally expected to work, but they don't:
'< ',
'<=',
'> ',
'>='
"< ",
"<=",
"> ",
">=",
],
'arg': ['DT', 'D', 'DT64'],
}
"arg": ["DT", "D", "DT64"],
},
),
# With arithmetic types
(
[
'N {op} {arg}',
'{arg} {op} N'
],
["N {op} {arg}", "{arg} {op} N"],
{
'op':
[
'+ ',
'- ',
'==',
'!=',
'< ',
'<=',
'> ',
'>='
],
'arg':
[
'toUInt8(1)',
'toInt8(-1)',
'toUInt16(1)',
'toInt16(-1)',
'toUInt32(1)',
'toInt32(-1)',
'toUInt64(1)',
'toInt64(-1)'
"op": ["+ ", "- ", "==", "!=", "< ", "<=", "> ", ">="],
"arg": [
"toUInt8(1)",
"toInt8(-1)",
"toUInt16(1)",
"toInt16(-1)",
"toUInt32(1)",
"toInt32(-1)",
"toUInt64(1)",
"toInt64(-1)",
],
},
),
@ -167,14 +150,17 @@ for funcs, args in extra_ops:
# filter out empty lines and commented out lines
COMMENTED_OUT_LINE_RE = re.compile(r"^\s*#")
FUNCTIONS = list([f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None])
TYPES = ['D', 'DT', 'DT64']
FUNCTIONS = list(
[f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None]
)
TYPES = ["D", "DT", "DT64"]
def escape_string(s):
if sys.version_info[0] > 2:
return s.encode('unicode_escape').decode('utf-8').replace("'", "\\'")
return s.encode("unicode_escape").decode("utf-8").replace("'", "\\'")
else:
return s.encode('string-escape').decode('utf-8')
return s.encode("string-escape").decode("utf-8")
def execute_functions_for_types(functions, types):
@ -186,18 +172,39 @@ def execute_functions_for_types(functions, types):
WITH \
toDateTime64('2019-09-16 19:20:11.234', 3, 'Europe/Minsk') as DT64, \
toDateTime('2019-09-16 19:20:11', 'Europe/Minsk') as DT, \
toDate('2019-09-16') as D, {X} as N".format(X=dt)
print(("""{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format(prologue=prologue, func=func)))
toDate('2019-09-16') as D, {X} as N".format(
X=dt
)
print(
(
"""{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format(
prologue=prologue, func=func
)
)
)
print("""SELECT '------------------------------------------';""")
def main():
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--functions_re', type=re.compile, help="RE to enable functions", default=None)
parser.add_argument('--types_re',
type=lambda s: re.compile('^(' + s + ')$'),
help="RE to enable types, supported types: " + ",".join(TYPES), default=None)
parser.add_argument('--list_functions', action='store_true', help="List all functions to be tested and exit")
parser.add_argument(
"--functions_re",
type=re.compile,
help="RE to enable functions",
default=None,
)
parser.add_argument(
"--types_re",
type=lambda s: re.compile("^(" + s + ")$"),
help="RE to enable types, supported types: " + ",".join(TYPES),
default=None,
)
parser.add_argument(
"--list_functions",
action="store_true",
help="List all functions to be tested and exit",
)
return parser.parse_args()
args = parse_args()
@ -223,5 +230,6 @@ def main():
execute_functions_for_types(functions, types)
if __name__ == '__main__':
if __name__ == "__main__":
exit(main())

View File

@ -28,7 +28,7 @@ SELECT toDayOfMonth(N, \'Asia/Istanbul\')
"UInt8",16
"UInt8",16
------------------------------------------
SELECT toDayOfWeek(N, \'Asia/Istanbul\')
SELECT toDayOfWeek(N, 0, \'Asia/Istanbul\')
"UInt8",1
"UInt8",1
"UInt8",1

View File

@ -0,0 +1,7 @@
1 7
1 7
0 6
1 0
2 1
1 7
0 6

View File

@ -0,0 +1,10 @@
with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon), toDayOfWeek(date_sun);
with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 0), toDayOfWeek(date_sun, 0);
with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 1), toDayOfWeek(date_sun, 1);
with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 2), toDayOfWeek(date_sun, 2);
with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 3), toDayOfWeek(date_sun, 3);
with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 4), toDayOfWeek(date_sun, 4);
with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 5), toDayOfWeek(date_sun, 5);
select toDayOfWeek(today(), -1); -- { serverError 43 }