ClickHouse/src/Functions/GregorianDate.h

418 lines
12 KiB
C++
Raw Normal View History

#pragma once
2021-10-02 07:13:14 +00:00
#include <base/extended_types.h>
#include <Common/Exception.h>
#include <Core/Types.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <cstdint>
2021-05-06 15:45:58 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
extern const int CANNOT_PARSE_DATE;
extern const int CANNOT_FORMAT_DATETIME;
extern const int LOGICAL_ERROR;
}
/** Proleptic Gregorian calendar date. YearT is an integral type
* which should be at least 32 bits wide, and should preferably
* be signed.
*/
template <typename YearT = int32_t>
class GregorianDate
{
public:
/** Construct from date in text form 'YYYY-MM-DD' by reading from
* ReadBuffer.
*/
explicit GregorianDate(ReadBuffer & in);
/** Construct from Modified Julian Day. The type T is an
* integral type which should be at least 32 bits wide, and
* should preferably signed.
*/
2022-10-19 01:26:22 +00:00
explicit GregorianDate(is_integer auto modified_julian_day);
/** Convert to Modified Julian Day. The type T is an integral type
* which should be at least 32 bits wide, and should preferably
* signed.
*/
2021-09-10 21:28:43 +00:00
template <is_integer T>
2020-12-09 15:30:44 +00:00
T toModifiedJulianDay() const;
/** Write the date in text form 'YYYY-MM-DD' to a buffer.
*/
void write(WriteBuffer & buf) const;
/** Convert to a string in text form 'YYYY-MM-DD'.
*/
std::string toString() const;
YearT year() const noexcept
{
return year_;
}
uint8_t month() const noexcept
{
return month_;
}
uint8_t day_of_month() const noexcept /// NOLINT
{
2020-12-09 15:30:44 +00:00
return day_of_month_;
}
private:
YearT year_; /// NOLINT
uint8_t month_; /// NOLINT
uint8_t day_of_month_; /// NOLINT
};
/** ISO 8601 Ordinal Date. YearT is an integral type which should
* be at least 32 bits wide, and should preferably signed.
*/
template <typename YearT = int32_t>
class OrdinalDate
{
public:
2020-12-09 15:30:44 +00:00
OrdinalDate(YearT year, uint16_t day_of_year);
/** Construct from Modified Julian Day. The type T is an
* integral type which should be at least 32 bits wide, and
* should preferably signed.
*/
2022-10-19 01:26:22 +00:00
explicit OrdinalDate(is_integer auto modified_julian_day);
/** Convert to Modified Julian Day. The type T is an integral
* type which should be at least 32 bits wide, and should
* preferably be signed.
*/
2021-09-10 21:28:43 +00:00
template <is_integer T>
2020-12-09 15:30:44 +00:00
T toModifiedJulianDay() const noexcept;
YearT year() const noexcept
{
return year_;
}
uint16_t dayOfYear() const noexcept
{
2020-12-09 15:30:44 +00:00
return day_of_year_;
}
private:
YearT year_; /// NOLINT
uint16_t day_of_year_; /// NOLINT
};
class MonthDay
{
public:
/** Construct from month and day. */
2020-12-09 15:30:44 +00:00
MonthDay(uint8_t month, uint8_t day_of_month);
/** Construct from day of year in Gregorian or Julian
* calendars to month and day.
*/
2020-12-09 15:30:44 +00:00
MonthDay(bool is_leap_year, uint16_t day_of_year);
/** Convert month and day in Gregorian or Julian calendars to
* day of year.
*/
2020-12-09 15:30:44 +00:00
uint16_t dayOfYear(bool is_leap_year) const;
uint8_t month() const noexcept
{
return month_;
}
uint8_t day_of_month() const noexcept /// NOLINT
{
2020-12-09 15:30:44 +00:00
return day_of_month_;
}
private:
uint8_t month_; /// NOLINT
uint8_t day_of_month_; /// NOLINT
};
}
/* Implementation */
2020-12-08 18:28:18 +00:00
namespace gd
{
using namespace DB;
template <typename YearT>
2020-12-09 15:30:44 +00:00
static inline constexpr bool is_leap_year(YearT year)
{
2020-12-08 18:28:18 +00:00
return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0));
}
2020-12-09 15:30:44 +00:00
static inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month)
{
switch (month)
{
case 1: return 31;
2020-12-09 15:30:44 +00:00
case 2: return is_leap_year ? 29 : 28;
case 3: return 31;
case 4: return 30;
case 5: return 31;
case 6: return 30;
case 7: return 31;
case 8: return 31;
case 9: return 30;
case 10: return 31;
case 11: return 30;
case 12: return 31;
default:
std::terminate();
}
}
/** Integer division truncated toward negative infinity.
*/
template <typename I, typename J>
static inline constexpr I div(I x, J y)
{
const auto y_cast = static_cast<I>(y);
if (x > 0 && y_cast < 0)
return ((x - 1) / y_cast) - 1;
else if (x < 0 && y_cast > 0)
return ((x + 1) / y_cast) - 1;
2020-12-08 18:28:18 +00:00
else
return x / y_cast;
}
/** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x.
*/
template <typename I, typename J>
static inline constexpr I mod(I x, J y)
{
const auto y_cast = static_cast<I>(y);
const auto r = x % y_cast;
if ((x > 0 && y_cast < 0) || (x < 0 && y_cast > 0))
return r == 0 ? static_cast<I>(0) : r + y_cast;
2020-12-08 18:28:18 +00:00
else
return r;
}
/** Like std::min(), but the type of operands may differ.
*/
template <typename I, typename J>
static inline constexpr I min(I x, J y)
{
const auto y_cast = static_cast<I>(y);
return x < y_cast ? x : y_cast;
}
static inline char readDigit(ReadBuffer & in)
{
char c;
2020-12-08 18:28:18 +00:00
if (!in.read(c))
throw Exception(
"Cannot parse input: expected a digit at the end of stream",
ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
2020-12-08 18:28:18 +00:00
else if (c < '0' || c > '9')
throw Exception(
"Cannot read input: expected a digit but got something else",
ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
2020-12-08 18:28:18 +00:00
else
return c - '0';
}
}
namespace DB
{
template <typename YearT>
GregorianDate<YearT>::GregorianDate(ReadBuffer & in)
{
year_ = gd::readDigit(in) * 1000
2020-12-09 15:30:44 +00:00
+ gd::readDigit(in) * 100
+ gd::readDigit(in) * 10
+ gd::readDigit(in);
assertChar('-', in);
month_ = gd::readDigit(in) * 10
+ gd::readDigit(in);
assertChar('-', in);
2020-12-09 15:30:44 +00:00
day_of_month_ = gd::readDigit(in) * 10
+ gd::readDigit(in);
assertEOF(in);
2020-12-09 15:30:44 +00:00
if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > gd::monthLength(gd::is_leap_year(year_), month_))
throw Exception("Invalid date: " + toString(), ErrorCodes::CANNOT_PARSE_DATE);
}
template <typename YearT>
2022-10-19 01:26:22 +00:00
GregorianDate<YearT>::GregorianDate(is_integer auto modified_julian_day)
{
2022-10-19 01:26:22 +00:00
const OrdinalDate<YearT> ord(modified_julian_day);
2020-12-09 15:30:44 +00:00
const MonthDay md(gd::is_leap_year(ord.year()), ord.dayOfYear());
year_ = ord.year();
month_ = md.month();
2020-12-09 15:30:44 +00:00
day_of_month_ = md.day_of_month();
}
template <typename YearT>
2021-09-10 21:28:43 +00:00
template <is_integer T>
2020-12-09 15:30:44 +00:00
T GregorianDate<YearT>::toModifiedJulianDay() const
{
2020-12-09 15:30:44 +00:00
const MonthDay md(month_, day_of_month_);
const auto day_of_year = md.dayOfYear(gd::is_leap_year(year_));
const OrdinalDate<YearT> ord(year_, day_of_year);
return ord.template toModifiedJulianDay<T>();
}
template <typename YearT>
void GregorianDate<YearT>::write(WriteBuffer & buf) const
{
if (year_ < 0 || year_ > 9999)
{
throw Exception(
"Impossible to stringify: year too big or small: " + DB::toString(year_),
ErrorCodes::CANNOT_FORMAT_DATETIME);
}
2020-12-08 18:28:18 +00:00
else
{
auto y = year_;
writeChar('0' + y / 1000, buf); y %= 1000;
writeChar('0' + y / 100, buf); y %= 100;
writeChar('0' + y / 10, buf); y %= 10;
2020-12-07 15:47:57 +00:00
writeChar('0' + y , buf);
writeChar('-', buf);
auto m = month_;
writeChar('0' + m / 10, buf); m %= 10;
writeChar('0' + m , buf);
writeChar('-', buf);
2020-12-09 15:30:44 +00:00
auto d = day_of_month_;
writeChar('0' + d / 10, buf); d %= 10;
writeChar('0' + d , buf);
}
}
template <typename YearT>
std::string GregorianDate<YearT>::toString() const
{
WriteBufferFromOwnString buf;
write(buf);
return buf.str();
}
template <typename YearT>
2020-12-09 15:30:44 +00:00
OrdinalDate<YearT>::OrdinalDate(YearT year, uint16_t day_of_year)
: year_(year)
2020-12-09 15:30:44 +00:00
, day_of_year_(day_of_year)
{
2020-12-09 15:30:44 +00:00
if (day_of_year < 1 || day_of_year > (gd::is_leap_year(year) ? 366 : 365))
{
throw Exception(
2020-12-09 15:30:44 +00:00
"Invalid ordinal date: " + toString(year) + "-" + toString(day_of_year),
ErrorCodes::LOGICAL_ERROR);
}
}
template <typename YearT>
template <is_integer TDay>
OrdinalDate<YearT>::OrdinalDate(TDay modified_julian_day)
{
2022-10-19 01:26:22 +00:00
/// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively).
2022-10-19 01:34:16 +00:00
if constexpr (is_signed_v<decltype(modified_julian_day)> && std::numeric_limits<decltype(modified_julian_day)>::lowest() < -678941)
if (modified_julian_day < -678941)
modified_julian_day = -678941;
2022-10-19 01:26:22 +00:00
2022-10-19 01:34:16 +00:00
if constexpr (std::numeric_limits<decltype(modified_julian_day)>::max() > 2973119)
if (modified_julian_day > 2973119)
modified_julian_day = 2973119;
2022-10-19 01:26:22 +00:00
const auto a = modified_julian_day + 678575;
const auto quad_cent = gd::div(a, 146097);
const auto b = gd::mod(a, 146097);
const auto cent = gd::min(gd::div(b, 36524), 3);
const auto c = b - cent * 36524;
const auto quad = gd::div(c, 1461);
const auto d = gd::mod(c, 1461);
const auto y = gd::min(gd::div(d, 365), 3);
2022-10-19 01:26:22 +00:00
2020-12-09 15:30:44 +00:00
day_of_year_ = d - y * 365 + 1;
2022-10-19 01:26:22 +00:00
year_ = quad_cent * 400 + cent * 100 + quad * 4 + y + 1;
}
template <typename YearT>
2021-09-10 21:28:43 +00:00
template <is_integer T>
2020-12-09 15:30:44 +00:00
T OrdinalDate<YearT>::toModifiedJulianDay() const noexcept
{
const auto y = year_ - 1;
2020-12-09 15:30:44 +00:00
return day_of_year_
2020-12-08 18:28:18 +00:00
+ 365 * y
+ gd::div(y, 4)
- gd::div(y, 100)
+ gd::div(y, 400)
- 678576;
}
2020-12-09 15:30:44 +00:00
inline MonthDay::MonthDay(uint8_t month, uint8_t day_of_month)
: month_(month)
2020-12-09 15:30:44 +00:00
, day_of_month_(day_of_month)
{
2020-12-08 18:28:18 +00:00
if (month < 1 || month > 12)
throw Exception(
"Invalid month: " + DB::toString(month),
ErrorCodes::LOGICAL_ERROR);
2020-12-09 15:30:44 +00:00
/* We can't validate day_of_month here, because we don't know if
* it's a leap year. */
}
2020-12-09 15:30:44 +00:00
inline MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year)
{
2020-12-09 15:30:44 +00:00
if (day_of_year < 1 || day_of_year > (is_leap_year ? 366 : 365))
throw Exception(
std::string("Invalid day of year: ") +
2020-12-09 15:30:44 +00:00
(is_leap_year ? "leap, " : "non-leap, ") + DB::toString(day_of_year),
ErrorCodes::LOGICAL_ERROR);
month_ = 1;
2020-12-09 15:30:44 +00:00
uint16_t d = day_of_year;
2020-12-08 18:28:18 +00:00
while (true)
{
2020-12-09 15:30:44 +00:00
const auto len = gd::monthLength(is_leap_year, month_);
2020-12-08 18:28:18 +00:00
if (d <= len)
break;
2020-12-08 18:28:18 +00:00
month_++;
d -= len;
}
2020-12-09 15:30:44 +00:00
day_of_month_ = d;
}
2020-12-09 15:30:44 +00:00
inline uint16_t MonthDay::dayOfYear(bool is_leap_year) const
{
2020-12-09 15:30:44 +00:00
if (day_of_month_ < 1 || day_of_month_ > gd::monthLength(is_leap_year, month_))
{
throw Exception(
std::string("Invalid day of month: ") +
2020-12-09 15:30:44 +00:00
(is_leap_year ? "leap, " : "non-leap, ") + DB::toString(month_) +
"-" + DB::toString(day_of_month_),
ErrorCodes::LOGICAL_ERROR);
}
2020-12-09 15:30:44 +00:00
const auto k = month_ <= 2 ? 0 : is_leap_year ? -1 :-2;
return (367 * month_ - 362) / 12 + k + day_of_month_;
}
}