Fixed year 2038 problem. Added support for timezones with fractional offset from UTC. [#CLICKHOUSE-2099] [#CLICKHOUSE-3392].

This commit is contained in:
Alexey Milovidov 2017-10-29 03:51:40 +03:00
parent f02b63fc39
commit 4236bcbfa2
12 changed files with 280 additions and 60 deletions

View File

@ -188,7 +188,7 @@ struct ToStartOfMinuteImpl
{
static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfMinuteInaccurate(t);
return time_zone.toStartOfMinute(t);
}
static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone)
{
@ -202,7 +202,7 @@ struct ToStartOfFiveMinuteImpl
{
static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfFiveMinuteInaccurate(t);
return time_zone.toStartOfFiveMinute(t);
}
static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone)
{
@ -216,8 +216,9 @@ struct ToStartOfHourImpl
{
static inline UInt32 execute(UInt32 t, const DateLUTImpl & time_zone)
{
return time_zone.toStartOfHourInaccurate(t);
return time_zone.toStartOfHour(t);
}
static inline UInt32 execute(UInt16 d, const DateLUTImpl & time_zone)
{
throw Exception("Illegal type Date of argument for function toStartOfHour", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -288,6 +289,7 @@ struct ToHourImpl
{
return time_zone.toHour(t);
}
static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone)
{
throw Exception("Illegal type Date of argument for function toHour", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -300,7 +302,7 @@ struct ToMinuteImpl
{
static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone)
{
return time_zone.toMinuteInaccurate(t);
return time_zone.toMinute(t);
}
static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone)
{
@ -314,7 +316,7 @@ struct ToSecondImpl
{
static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone)
{
return time_zone.toSecondInaccurate(t);
return time_zone.toSecond(t);
}
static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone)
{

View File

@ -563,8 +563,8 @@ inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTI
s[9] += values.day_of_month % 10;
UInt8 hour = date_lut.toHour(datetime);
UInt8 minute = date_lut.toMinuteInaccurate(datetime);
UInt8 second = date_lut.toSecondInaccurate(datetime);
UInt8 minute = date_lut.toMinute(datetime);
UInt8 second = date_lut.toSecond(datetime);
s[11] += hour / 10;
s[12] += hour % 10;

View File

@ -0,0 +1,96 @@
<test>
<name>Functions for working with date and time</name>
<type>once</type>
<stop_conditions>
<any_of>
<average_speed_not_changing_for_ms>10000</average_speed_not_changing_for_ms>
<total_time_ms>1000</total_time_ms>
</any_of>
</stop_conditions>
<metrics>
<max_rows_per_second />
<max_bytes_per_second />
<avg_rows_per_second />
<avg_bytes_per_second />
</metrics>
<substitutions>
<substitution>
<name>datetime_transform</name>
<values>
<value>toSecond</value>
<value>toMinute</value>
<value>toHour</value>
<value>toDayOfWeek</value>
<value>toDayOfMonth</value>
<value>toMonth</value>
<value>toQuarter</value>
<value>toYear</value>
<value>toStartOfMinute</value>
<value>toStartOfFiveMinute</value>
<value>toStartOfHour</value>
<value>toStartOfDay</value>
<value>toDate</value>
<value>toMonday</value>
<value>toStartOfMonth</value>
<value>toStartOfQuarter</value>
<value>toYear</value>
<value>toTime</value>
<value>toRelativeYearNum</value>
<value>toRelativeMonthNum</value>
<value>toRelativeWeekNum</value>
<value>toRelativeDayNum</value>
<value>toRelativeHourNum</value>
<value>toRelativeMinuteNum</value>
<value>toRelativeSecondNum</value>
<value>toYYYYMM</value>
<value>toYYYYMMDD</value>
<value>toYYYYMMDDhhmmss</value>
</values>
</substitution>
<substitution>
<name>date_transform</name>
<values>
<value>toDayOfWeek</value>
<value>toDayOfMonth</value>
<value>toMonth</value>
<value>toQuarter</value>
<value>toYear</value>
<value>toStartOfDay</value>
<value>toDate</value>
<value>toMonday</value>
<value>toStartOfMonth</value>
<value>toStartOfQuarter</value>
<value>toYear</value>
<value>toRelativeYearNum</value>
<value>toRelativeMonthNum</value>
<value>toRelativeWeekNum</value>
<value>toRelativeDayNum</value>
<value>toYYYYMM</value>
<value>toYYYYMMDD</value>
<value>toYYYYMMDDhhmmss</value>
</values>
</substitution>
<substitution>
<name>time_zone</name>
<values>
<value>UTC</value>
<value>Europe/Moscow</value>
<value>Asia/Kolkata</value>
</values>
</substitution>
</substitutions>
<query>SELECT count() FROM system.numbers WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform}(t, '{time_zone}'))</query>
<query>SELECT count() FROM system.numbers WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t))</query>
</test>

View File

@ -0,0 +1,41 @@
<test>
<name>set_lookup_random</name>
<type>once</type>
<stop_conditions>
<all_of>
<total_time_ms>10000</total_time_ms>
</all_of>
<any_of>
<average_speed_not_changing_for_ms>5000</average_speed_not_changing_for_ms>
<total_time_ms>20000</total_time_ms>
</any_of>
</stop_conditions>
<main_metric>
<avg_rows_per_second/>
</main_metric>
<substitutions>
<substitution>
<name>table</name>
<values>
<value>system.numbers</value>
<value>system.numbers_mt</value>
</values>
</substitution>
<substitution>
<name>size</name>
<values>
<value>1</value>
<value>16</value>
<value>1024</value>
<value>16384</value>
<value>1048576</value>
<value>10000000</value>
</values>
</substitution>
</substitutions>
<query>SELECT count() FROM {table} WHERE rand64() IN (SELECT number FROM system.numbers LIMIT {size})</query>
</test>

View File

@ -0,0 +1,25 @@
<test>
<name>set_lookup_hits</name>
<type>once</type>
<stop_conditions>
<all_of>
<total_time_ms>10000</total_time_ms>
</all_of>
<any_of>
<average_speed_not_changing_for_ms>5000</average_speed_not_changing_for_ms>
<total_time_ms>20000</total_time_ms>
</any_of>
</stop_conditions>
<main_metric>
<avg_rows_per_second/>
</main_metric>
<query>SELECT count() FROM hits_100m_single WHERE UserID IN (SELECT UserID FROM hits_100m_single WHERE AdvEngineID != 0)</query>
<query>SELECT count() FROM hits_100m_single WHERE UserID IN (SELECT UserID FROM hits_100m_single)</query>
<query>SELECT count() FROM hits_100m_single WHERE SearchPhrase IN (SELECT SearchPhrase FROM hits_100m_single)</query>
<query>SELECT count() FROM hits_100m_single WHERE URL IN (SELECT URL FROM hits_100m_single WHERE AdvEngineID != 0)</query>
<query>SELECT count() FROM hits_100m_single WHERE URL IN (SELECT URL FROM hits_100m_single WHERE SearchEngineID != 0)</query>
<query>SELECT count() FROM hits_100m_single WHERE RegionID IN (SELECT RegionID FROM hits_100m_single)</query>
</test>

View File

@ -0,0 +1,3 @@
1990-10-19 00:00:00
1990-10-19 00:00:00
1990-10-19 00:00:00

View File

@ -0,0 +1,5 @@
#!/usr/bin/env bash
TZ=Europe/Moscow clickhouse-local --query="SELECT toDateTime('1990-10-19 00:00:00')" 2>/dev/null
TZ=Asia/Colombo clickhouse-local --query="SELECT toDateTime('1990-10-19 00:00:00')" 2>/dev/null
TZ=Asia/Kathmandu clickhouse-local --query="SELECT toDateTime('1990-10-19 00:00:00')" 2>/dev/null

View File

@ -0,0 +1,12 @@
2 2017-10-28 02:00:00
2 2017-10-28 02:00:00
2 2017-10-28 02:00:00
2 2017-10-28 02:00:00
2 2017-10-28 02:00:00
2 2017-10-28 02:00:00
3 2017-10-28 03:00:00
3 2017-10-28 03:00:00
3 2017-10-28 03:00:00
3 2017-10-28 03:00:00
3 2017-10-28 03:00:00
3 2017-10-28 03:00:00

View File

@ -0,0 +1 @@
WITH toDateTime(1509138000) + number * 300 AS t SELECT toHour(t, 'Asia/Kolkata') AS h, toString(toStartOfHour(t, 'Asia/Kolkata'), 'Asia/Kolkata') AS h_start FROM system.numbers LIMIT 12;

View File

@ -11,11 +11,13 @@
#include <ctime>
#define DATE_LUT_MIN 0
#define DATE_LUT_MAX (0x7FFFFFFF - 86400)
#define DATE_LUT_MAX_DAY_NUM (0x7FFFFFFF / 86400)
#define DATE_LUT_MAX (0xFFFFFFFF - 86400)
#define DATE_LUT_MAX_DAY_NUM (0xFFFFFFFF / 86400)
/// Table size is bigger than DATE_LUT_MAX_DAY_NUM to fill all indices within UInt16 range: this allows to remove extra check.
#define DATE_LUT_SIZE 0x10000
#define DATE_LUT_MIN_YEAR 1970
#define DATE_LUT_MAX_YEAR 2037 /// Last supported year
#define DATE_LUT_YEARS 68 /// Number of years in lookup table
#define DATE_LUT_MAX_YEAR 2105 /// Last supported year
#define DATE_LUT_YEARS 136 /// Number of years in lookup table
STRONG_TYPEDEF(UInt16, DayNum_t);
@ -33,9 +35,9 @@ public:
struct Values
{
/// Least significat 32 bits from time_t at beginning of the day.
/// Signedness is important to support 1970-01-01 MSK, where time_t == -10800.
/// Change to time_t; change constants above; and recompile the sources if you need to support time after 2038 year.
Int32 date;
/// If the unix timestamp of beginning of the day is negative (example: 1970-01-01 MSK, where time_t == -10800), then value is zero.
/// Change to time_t; change constants above; and recompile the sources if you need to support time after 2105 year.
UInt32 date;
/// Properties of the day.
UInt16 year;
@ -51,15 +53,16 @@ public:
private:
/// Lookup table is indexed by DayNum.
/// Day nums are the same in all time zones. 1970-01-01 is 0 and so on.
/// Table is relatively large (~30 000 elements), so better not to place object on stack.
/// Table is relatively large, so better not to place object on stack.
/// In comparison to std::vector, plain array is cheaper by one indirection.
Values lut[DATE_LUT_MAX_DAY_NUM + 1];
Values lut[DATE_LUT_SIZE];
/// Year number after DATE_LUT_MIN_YEAR -> day num for start of year.
DayNum_t years_lut[DATE_LUT_YEARS];
/// UTC offset at beginning of the Unix epoch.
/// UTC offset at beginning of the Unix epoch. The same as unix timestamp of 1970-01-01 00:00:00 local time.
time_t offset_at_start_of_epoch;
bool offset_is_whole_number_of_hours_everytime;
/// Time zone name.
std::string time_zone;
@ -92,15 +95,10 @@ private:
return lut[findIndex(t)];
}
static inline DayNum_t fixDay(DayNum_t day)
{
return day > DATE_LUT_MAX_DAY_NUM ? static_cast<DayNum_t>(0) : day;
}
public:
const std::string & getTimeZone() const { return time_zone; }
/// всё ниже thread-safe; корректность входных данных не проверяется
/// All functions below are thread-safe; arguments are not checked.
inline time_t toDate(time_t t) const { return find(t).date; }
inline unsigned toMonth(time_t t) const { return find(t).month; }
@ -175,7 +173,7 @@ public:
inline DayNum_t toFirstDayNumOfMonth(DayNum_t d) const
{
return DayNum_t(d - (lut[fixDay(d)].day_of_month - 1));
return DayNum_t(d - (lut[d].day_of_month - 1));
}
inline DayNum_t toFirstDayNumOfMonth(time_t t) const
@ -202,7 +200,7 @@ public:
inline DayNum_t toFirstDayNumOfQuarter(DayNum_t d) const
{
size_t index = fixDay(d);
size_t index = d;
switch (lut[index].month % 3)
{
case 0:
@ -238,7 +236,7 @@ public:
inline DayNum_t toFirstDayNumOfYear(DayNum_t d) const
{
return years_lut[lut[fixDay(d)].year - DATE_LUT_MIN_YEAR];
return years_lut[lut[d].year - DATE_LUT_MIN_YEAR];
}
inline time_t toFirstDayNumOfYear(time_t t) const
@ -275,7 +273,7 @@ public:
/** Округление до даты; затем сдвиг на указанное количество дней.
* Замечание: результат сдвига должен находиться в пределах LUT.
*/
inline time_t toDateAndShift(time_t t, int days = 1) const
inline time_t toDateAndShift(time_t t, int days) const
{
return lut[findIndex(t) + days].date;
}
@ -283,6 +281,10 @@ public:
inline time_t toTime(time_t t) const
{
size_t index = findIndex(t);
if (unlikely(index == 0))
return t - offset_at_start_of_epoch;
time_t res = t - lut[index].date;
if (res >= lut[index].time_at_offset_change)
@ -294,6 +296,10 @@ public:
inline unsigned toHour(time_t t) const
{
size_t index = findIndex(t);
if (unlikely(index == 0))
return (t - offset_at_start_of_epoch) / 3600;
time_t res = t - lut[index].date;
if (res >= lut[index].time_at_offset_change)
@ -302,42 +308,59 @@ public:
return res / 3600;
}
inline unsigned toMinute(time_t t) const { return ((t - find(t).date) % 3600) / 60; }
inline unsigned toSecond(time_t t) const { return (t - find(t).date) % 60; }
/** Only for time zones with/when offset from UTC is multiple of five minutes.
* This is true for all time zones: right now, all time zones have an offset that is multiple of 15 minutes.
*
* "By 1929, most major countries had adopted hourly time zones. Nepal was the last
* country to adopt a standard offset, shifting slightly to UTC+5:45 in 1986."
* - https://en.wikipedia.org/wiki/Time_zone#Offsets_from_UTC
*
* Also please note, that unix timestamp doesn't count "leap seconds":
* each minute, with added or subtracted leap second, spans exactly 60 unix timestamps.
*/
inline unsigned toStartOfMinute(time_t t) const
inline unsigned toSecond(time_t t) const { return t % 60; }
inline unsigned toMinute(time_t t) const
{
if (offset_is_whole_number_of_hours_everytime)
return (t / 60) % 60;
time_t date = find(t).date;
return date + (t - date) / 60 * 60;
return (t - date) / 60 % 60;
}
inline unsigned toStartOfHour(time_t t) const
inline time_t toStartOfMinute(time_t t) const { return t / 60 * 60; }
inline time_t toStartOfFiveMinute(time_t t) const { return t / 300 * 300; }
inline time_t toStartOfHour(time_t t) const
{
if (offset_is_whole_number_of_hours_everytime)
return t / 3600 * 3600;
time_t date = find(t).date;
/// Still can return wrong values for time at 1970-01-01 if the UTC offset was non-whole number of hours.
return date + (t - date) / 3600 * 3600;
}
/** Только для часовых поясов, отличающихся от UTC на значение, кратное часу и без перевода стрелок не значение не кратное часу */
inline unsigned toMinuteInaccurate(time_t t) const { return (t / 60) % 60; }
inline unsigned toSecondInaccurate(time_t t) const { return t % 60; }
inline unsigned toStartOfMinuteInaccurate(time_t t) const { return t / 60 * 60; }
inline unsigned toStartOfFiveMinuteInaccurate(time_t t) const { return t / 300 * 300; }
inline unsigned toStartOfHourInaccurate(time_t t) const { return t / 3600 * 3600; }
/// Номер дня в пределах UNIX эпохи (и немного больше) - позволяет хранить дату в двух байтах
/** Number of calendar day since the beginning of UNIX epoch (1970-01-01 is zero)
* We use just two bytes for it. It covers the range up to 2105 and slightly more.
*
* This is "calendar" day, it itself is independent of time zone
* (conversion from/to unix timestamp will depend on time zone,
* because the same calendar day starts/ends at different timestamps in different time zones)
*/
inline DayNum_t toDayNum(time_t t) const { return static_cast<DayNum_t>(findIndex(t)); }
inline time_t fromDayNum(DayNum_t d) const { return lut[fixDay(d)].date; }
inline time_t fromDayNum(DayNum_t d) const { return lut[d].date; }
inline time_t toDate(DayNum_t d) const { return lut[fixDay(d)].date; }
inline unsigned toMonth(DayNum_t d) const { return lut[fixDay(d)].month; }
inline unsigned toYear(DayNum_t d) const { return lut[fixDay(d)].year; }
inline unsigned toDayOfWeek(DayNum_t d) const { return lut[fixDay(d)].day_of_week; }
inline unsigned toDayOfMonth(DayNum_t d) const { return lut[fixDay(d)].day_of_month; }
inline time_t toDate(DayNum_t d) const { return lut[d].date; }
inline unsigned toMonth(DayNum_t d) const { return lut[d].month; }
inline unsigned toYear(DayNum_t d) const { return lut[d].year; }
inline unsigned toDayOfWeek(DayNum_t d) const { return lut[d].day_of_week; }
inline unsigned toDayOfMonth(DayNum_t d) const { return lut[d].day_of_month; }
inline const Values & getValues(DayNum_t d) const { return lut[fixDay(d)]; }
inline const Values & getValues(DayNum_t d) const { return lut[d]; }
inline const Values & getValues(time_t t) const { return lut[findIndex(t)]; }
/// получает DayNum_t из года, месяца, дня
@ -375,7 +398,7 @@ public:
inline UInt32 toNumYYYYMM(DayNum_t d) const
{
const Values & values = lut[fixDay(d)];
const Values & values = lut[d];
return values.year * 100 + values.month;
}
@ -387,7 +410,7 @@ public:
inline UInt32 toNumYYYYMMDD(DayNum_t d) const
{
const Values & values = lut[fixDay(d)];
const Values & values = lut[d];
return values.year * 10000 + values.month * 100 + values.day_of_month;
}
@ -406,8 +429,8 @@ public:
{
const Values & values = find(t);
return
toSecondInaccurate(t)
+ toMinuteInaccurate(t) * 100
toSecond(t)
+ toMinute(t) * 100
+ toHour(t) * 10000
+ UInt64(values.day_of_month) * 1000000
+ UInt64(values.month) * 100000000
@ -425,7 +448,6 @@ public:
num % 100);
}
inline std::string timeToString(time_t t) const
{
const Values & values = find(t);
@ -442,8 +464,8 @@ public:
s[9] += values.day_of_month % 10;
auto hour = toHour(t);
auto minute = toMinuteInaccurate(t);
auto second = toSecondInaccurate(t);
auto minute = toMinute(t);
auto second = toSecond(t);
s[11] += hour / 10;
s[12] += hour % 10;
@ -475,7 +497,7 @@ public:
inline std::string dateToString(DayNum_t d) const
{
const Values & values = lut[fixDay(d)];
const Values & values = lut[d];
std::string s {"0000-00-00"};
@ -490,4 +512,6 @@ public:
return s;
}
inline bool isOffsetWholeNumberOfHoursEveryTime() const { return offset_is_whole_number_of_hours_everytime; }
};

View File

@ -48,8 +48,8 @@ private:
m_month = values.month;
m_day = values.day_of_month;
m_hour = date_lut.toHour(time);
m_minute = date_lut.toMinuteInaccurate(time);
m_second = date_lut.toSecondInaccurate(time);
m_minute = date_lut.toMinute(time);
m_second = date_lut.toSecond(time);
}
void init(const char * s, size_t length)

View File

@ -45,6 +45,7 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_)
cctz::time_zone::absolute_lookup start_of_epoch_lookup = cctz_time_zone.lookup(std::chrono::system_clock::from_time_t(start_of_day));
offset_at_start_of_epoch = start_of_epoch_lookup.offset;
offset_is_whole_number_of_hours_everytime = true;
cctz::civil_day date{1970, 1, 1};
@ -64,6 +65,9 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_)
values.time_at_offset_change = 0;
values.amount_of_offset_change = 0;
if (start_of_day % 3600)
offset_is_whole_number_of_hours_everytime = false;
/// If UTC offset was changed in previous day.
if (i != 0)
{
@ -102,7 +106,14 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_)
}
while (start_of_day <= DATE_LUT_MAX && i <= DATE_LUT_MAX_DAY_NUM);
/// Заполняем lookup таблицу для годов
/// Fill excessive part of lookup table. This is needed only to simplify handling of overflow cases.
while (i < DATE_LUT_SIZE)
{
lut[i] = lut[0];
++i;
}
/// Fill lookup table for years.
::memset(years_lut, 0, DATE_LUT_YEARS * sizeof(years_lut[0]));
for (size_t day = 0; day < i && lut[day].year <= DATE_LUT_MAX_YEAR; ++day)
{