ClickHouse/src/Functions/parseDateTime.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1737 lines
70 KiB
C++
Raw Normal View History

#include <Columns/ColumnString.h>
#include <Columns/ColumnsDateTime.h>
2023-02-22 10:05:53 +00:00
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeString.h>
2023-02-22 10:05:53 +00:00
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/FunctionsConversion.h>
#include <Functions/IFunction.h>
#include <Functions/castTypeToEither.h>
#include <IO/WriteHelpers.h>
2023-02-24 09:07:27 +00:00
#include <base/types.h>
2023-02-22 10:05:53 +00:00
namespace DB
{
namespace
{
2023-02-27 13:41:38 +00:00
using Pos = const char *;
constexpr std::string_view weekdaysShort[] = {"sun", "mon", "tue", "wed", "thu", "fri", "sat"};
constexpr std::string_view weekdaysFull[] = {"sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday"};
constexpr std::string_view monthsShort[] = {"jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"};
const std::unordered_map<String, std::pair<String, Int32>> dayOfWeekMap{
{"mon", {"day", 1}},
{"tue", {"sday", 2}},
{"wed", {"nesday", 3}},
{"thu", {"rsday", 4}},
{"fri", {"day", 5}},
{"sat", {"urday", 6}},
{"sun", {"day", 7}},
};
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
const std::unordered_map<String, std::pair<String, Int32>> monthMap{
{"jan", {"uary", 1}},
{"feb", {"ruary", 2}},
{"mar", {"rch", 3}},
{"apr", {"il", 4}},
{"may", {"", 5}},
{"jun", {"e", 6}},
{"jul", {"y", 7}},
{"aug", {"ust", 8}},
{"sep", {"tember", 9}},
{"oct", {"ober", 10}},
{"nov", {"ember", 11}},
{"dec", {"ember", 12}},
};
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
constexpr Int32 leapDays[] = {0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
constexpr Int32 normalDays[] = {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
constexpr Int32 cumulativeLeapDays[] = {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366};
constexpr Int32 cumulativeDays[] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365};
constexpr Int32 cumulativeYearDays[]
= {0, 365, 730, 1096, 1461, 1826, 2191, 2557, 2922, 3287, 3652, 4018, 4383, 4748, 5113, 5479, 5844, 6209,
6574, 6940, 7305, 7670, 8035, 8401, 8766, 9131, 9496, 9862, 10227, 10592, 10957, 11323, 11688, 12053, 12418, 12784,
13149, 13514, 13879, 14245, 14610, 14975, 15340, 15706, 16071, 16436, 16801, 17167, 17532, 17897, 18262, 18628, 18993, 19358,
19723, 20089, 20454, 20819, 21184, 21550, 21915, 22280, 22645, 23011, 23376, 23741, 24106, 24472, 24837, 25202, 25567, 25933,
26298, 26663, 27028, 27394, 27759, 28124, 28489, 28855, 29220, 29585, 29950, 30316, 30681, 31046, 31411, 31777, 32142, 32507,
32872, 33238, 33603, 33968, 34333, 34699, 35064, 35429, 35794, 36160, 36525, 36890, 37255, 37621, 37986, 38351, 38716, 39082,
39447, 39812, 40177, 40543, 40908, 41273, 41638, 42004, 42369, 42734, 43099, 43465, 43830, 44195, 44560, 44926, 45291, 45656,
46021, 46387, 46752, 47117, 47482, 47847, 48212, 48577, 48942, 49308, 49673};
constexpr Int32 minYear = 1970;
constexpr Int32 maxYear = 2106;
/// Counts the number of literal characters in Joda format string until the next closing literal
/// sequence single quote. Returns -1 if no literal single quote was found.
/// In Joda format string(https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html)
/// literal content must be quoted with single quote. and two single quote means literal with one single quote.
/// For example:
/// Format string: "'aaaa'", unescaped literal: "aaaa";
/// Format string: "'aa''aa'", unescaped literal: "aa'aa";
/// Format string: "'aaa''aa" is not valid because of missing of end single quote.
[[maybe_unused]] Int64 numLiteralChars(const char * cur, const char * end)
{
bool found = false;
Int64 count = 0;
while (cur < end)
{
if (*cur == '\'')
{
if (cur + 1 < end && *(cur + 1) == '\'')
{
count += 2;
cur += 2;
}
else
{
found = true;
break;
}
}
else
{
++count;
++cur;
}
}
return found ? count : -1;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
struct Date
{
Int32 year = 1970;
Int32 month = 1;
Int32 day = 1;
bool is_ad = true; // AD -> true, BC -> false.
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
Int32 week = 1; // Week of year based on ISO week date, e.g: 27
Int32 day_of_week = 1; // Day of week, Monday:1, Tuesday:2, ..., Sunday:7
bool week_date_format = false;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
Int32 day_of_year = 1;
bool day_of_year_format = false;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
bool century_format = false;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
bool is_year_of_era = false; // Year of era cannot be zero or negative.
bool has_year = false; // Whether year was explicitly specified.
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
Int32 hour = 0;
Int32 minute = 0;
Int32 second = 0;
// Int32 microsecond = 0;
bool is_am = true; // AM -> true, PM -> false
std::optional<Int64> time_zone_offset;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
bool is_clock_hour = false; // Whether most recent hour specifier is clockhour
bool is_hour_of_half_day = false; // Whether most recent hour specifier is of half day.
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
std::vector<Int32> day_of_month_values;
std::vector<Int32> day_of_year_values;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
/// For debug
[[maybe_unused]] String toString() const
{
String res;
res += "year:" + std::to_string(year);
res += ",";
res += "month:" + std::to_string(month);
res += ",";
res += "day:" + std::to_string(day);
res += ",";
res += "hour:" + std::to_string(hour);
res += ",";
res += "minute:" + std::to_string(minute);
res += ",";
res += "second:" + std::to_string(second);
return res;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static bool isLeapYear(Int32 year_) { return year_ % 4 == 0 && (year_ % 100 != 0 || year_ % 400 == 0); }
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static bool isDateValid(Int32 year_, Int32 month_, Int32 day_)
{
if (month_ < 1 || month_ > 12)
return false;
2023-02-27 13:41:38 +00:00
if (year_ < minYear || year_ > maxYear)
return false;
2023-02-24 10:27:21 +00:00
2023-02-27 13:41:38 +00:00
bool leap = isLeapYear(year_);
if (day_ < 1)
return false;
2023-02-27 13:41:38 +00:00
if (leap && day_ > leapDays[month_])
return false;
2023-02-27 13:41:38 +00:00
if (!leap && day_ > normalDays[month_])
return false;
return true;
}
2023-02-27 13:41:38 +00:00
static bool isDayOfYearValid(Int32 year_, Int32 day_of_year_)
{
if (year_ < minYear || year_ > maxYear)
return false;
2023-02-27 13:41:38 +00:00
if (day_of_year_ < 1 || day_of_year_ > 365 + (isLeapYear(year_) ? 1 : 0))
return false;
2023-02-27 13:41:38 +00:00
return true;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static bool isWeekDateValid(Int32 week_year_, Int32 week_of_year_, Int32 day_of_week_)
{
if (day_of_week_ < 1 || day_of_week_ > 7)
return false;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
if (week_of_year_ < 1 || week_of_year_ > 52)
return false;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
if (week_year_ < minYear || week_year_ > maxYear)
return false;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
return true;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static Int32 extractISODayOfTheWeek(Int32 days_since_epoch)
{
if (days_since_epoch < 0)
{
// negative date: start off at 4 and cycle downwards
return (7 - ((-int64_t(days_since_epoch) + 3) % 7));
}
else
{
// positive date: start off at 4 and cycle upwards
return ((int64_t(days_since_epoch) + 3) % 7) + 1;
}
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static Int32 daysSinceEpochFromWeekDate(int32_t week_year_, int32_t week_of_year_, int32_t day_of_week_)
{
if (!isWeekDateValid(week_year_, week_of_year_, day_of_week_))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid week date");
2023-02-27 13:41:38 +00:00
Int32 days_since_epoch_of_jan_fourth = daysSinceEpochFromDate(week_year_, 1, 4);
Int32 first_day_of_week_year = extractISODayOfTheWeek(days_since_epoch_of_jan_fourth);
return days_since_epoch_of_jan_fourth - (first_day_of_week_year - 1) + 7 * (week_of_year_ - 1) + day_of_week_ - 1;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static Int32 daysSinceEpochFromDayOfYear(Int32 year_, Int32 day_of_year_)
{
2023-02-27 13:41:38 +00:00
if (!isDayOfYearValid(year_, day_of_year_))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year");
Int32 res = daysSinceEpochFromDate(year_, 1, 1);
res += day_of_year_ - 1;
return res;
}
2023-02-27 13:41:38 +00:00
static Int32 daysSinceEpochFromDate(Int32 year_, Int32 month_, Int32 day_)
{
2023-02-27 13:41:38 +00:00
if (!isDateValid(year_, month_, day_))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid date");
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
Int32 res = cumulativeYearDays[year_ - 1970];
res += isLeapYear(year_) ? cumulativeLeapDays[month_ - 1] : cumulativeDays[month_ - 1];
res += day_ - 1;
return res;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
Int64 checkAndGetDateTime(const DateLUTImpl & time_zone)
{
/// Era is BC and year of era is provided
if (is_year_of_era && !is_ad)
year = -1 * (year - 1);
2023-02-27 13:41:38 +00:00
if (is_hour_of_half_day && !is_am)
hour += 12;
2023-02-27 13:41:38 +00:00
/// Ensure all day of year values are valid for ending year value
for (const auto d : day_of_month_values)
{
if (!isDateValid(year, month, d))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month.");
}
2023-02-27 13:41:38 +00:00
// Ensure all day of year values are valid for ending year value
for (const auto d : day_of_year_values)
{
if (!isDayOfYearValid(year, d))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year.");
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
// Convert the parsed date/time into a timestamp.
Int32 days_since_epoch;
if (week_date_format)
days_since_epoch = daysSinceEpochFromWeekDate(year, week, day_of_week);
else if (day_of_year_format)
days_since_epoch = daysSinceEpochFromDayOfYear(year, day_of_year);
else
2023-02-28 06:08:03 +00:00
{
2023-02-27 13:41:38 +00:00
days_since_epoch = daysSinceEpochFromDate(year, month, day);
2023-02-28 06:08:03 +00:00
std::cout << "year:" << year << "month:" << month << "day:" << day << std::endl;
}
std::cout << "days_since_epoch:" << days_since_epoch << std::endl;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
Int64 seconds_since_epoch = days_since_epoch * 86400 + hour * 3600 + minute * 60 + second;
2023-02-28 06:08:03 +00:00
std::cout << "seconds_since_epoch:" << seconds_since_epoch << std::endl;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
/// Time zone is not specified, use local time zone
if (!time_zone_offset)
*time_zone_offset = time_zone.getOffsetAtStartOfEpoch();
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
// std::cout << "timezonename:" << time_zone.getTimeZone() << std::endl;
// std::cout << "time_zone_offset:" << *time_zone_offset << time_zone.getOffsetAtStartOfEpoch() << std::endl;
// std::cout << "before timestamp:" << seconds_since_epoch << std::endl;
/// Time zone is specified in format string.
2023-02-28 06:08:03 +00:00
if (seconds_since_epoch >= *time_zone_offset)
seconds_since_epoch -= *time_zone_offset;
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Seconds since epoch is negative");
std::cout << "after adjustment:" << seconds_since_epoch << std::endl;
2023-02-27 13:41:38 +00:00
return seconds_since_epoch;
}
2023-02-27 13:41:38 +00:00
};
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
struct ParseDateTimeTraits
{
enum class ParseSyntax
{
2023-02-27 13:41:38 +00:00
MySQL,
Joda
};
};
2023-02-22 10:05:53 +00:00
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
/// _FUNC_(str[, format, timezone])
template <typename Name, /*ParseDateTimeTraits::SupportInteger support_integer, */ ParseDateTimeTraits::ParseSyntax parse_syntax>
class FunctionParseDateTimeImpl : public IFunction
{
public:
static constexpr auto name = Name::name;
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionParseDateTimeImpl>(); }
2023-02-27 13:41:38 +00:00
String getName() const override { return name; }
2023-02-27 13:41:38 +00:00
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
2023-02-27 13:41:38 +00:00
if (arguments.size() != 1 && arguments.size() != 2 && arguments.size() != 3)
2023-02-24 09:07:27 +00:00
throw Exception(
2023-02-27 13:41:38 +00:00
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 1, 2 or 3",
getName(),
arguments.size());
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
if (!isString(arguments[0].type))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of first argument of function {} when arguments size is 1. Should be string",
arguments[0].type->getName(),
getName());
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
if (arguments.size() > 1 && !isString(arguments[1].type))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of second argument of function {} when arguments size is 1. Should be string",
arguments[0].type->getName(),
getName());
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
if (arguments.size() > 2 && !isString(arguments[2].type))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of third argument of function {} when arguments size is 1. Should be string",
arguments[0].type->getName(),
getName());
2023-02-27 13:41:38 +00:00
String time_zone_name = getTimeZone(arguments).second;
return std::make_shared<DataTypeDateTime>(time_zone_name);
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
ColumnPtr
executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
{
const auto * col_str = checkAndGetColumn<ColumnString>(arguments[0].column.get());
if (!col_str)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of first ('str') argument of function {}. Must be string.",
arguments[0].column->getName(),
getName());
String format = getFormat(arguments);
const auto * time_zone = getTimeZone(arguments).first;
2023-02-28 06:08:03 +00:00
std::cout << "timezonename:" << getTimeZone(arguments).second << std::endl;
2023-02-27 13:41:38 +00:00
std::vector<Action> instructions;
parseFormat(format, instructions);
auto col_res = ColumnDateTime::create();
col_res->reserve(input_rows_count);
auto & data_res = col_res->getData();
for (size_t i = 0; i < input_rows_count; ++i)
{
StringRef str_ref = col_str->getDataAt(i);
Pos cur = str_ref.data;
Pos end = str_ref.data + str_ref.size;
Date date;
for (const auto & instruction : instructions)
{
cur = instruction.perform(cur, end, date);
2023-02-28 06:08:03 +00:00
std::cout << "instruction:" << instruction.toString() << std::endl;
std::cout << "date:" << date.toString() << std::endl;
2023-02-27 13:41:38 +00:00
}
2023-02-27 13:41:38 +00:00
// Ensure all input was consumed.
if (cur < end)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Invalid format input {} is malformed at {}",
str_ref.toView(),
std::string_view(cur, end - cur));
2023-02-27 13:41:38 +00:00
Int64 time = date.checkAndGetDateTime(*time_zone);
data_res.push_back(static_cast<UInt32>(time));
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
return col_res;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
private:
class Action
2023-02-22 10:05:53 +00:00
{
2023-02-27 13:41:38 +00:00
private:
using Func = std::conditional_t<
parse_syntax == ParseDateTimeTraits::ParseSyntax::MySQL,
Pos (*)(Pos, Pos, Date &),
std::function<Pos(Pos, Pos, Date &)>>;
Func func{};
std::string func_name;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
std::string literal;
2023-02-22 12:42:59 +00:00
2023-02-27 13:41:38 +00:00
public:
explicit Action(Func && func_, const char * func_name_) : func(std::move(func_)), func_name(func_name_) { }
2023-02-22 12:42:59 +00:00
2023-02-27 13:41:38 +00:00
explicit Action(const String & literal_) : literal(literal_) { }
explicit Action(String && literal_) : literal(std::move(literal_)) { }
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
/// For debug
[[maybe_unused]] String toString() const
{
if (func)
return "func:" + func_name;
else
return "literal:" + literal;
}
2023-02-22 12:42:59 +00:00
2023-02-27 13:41:38 +00:00
Pos perform(Pos cur, Pos end, Date & date) const
{
if (func)
return func(cur, end, date);
else
{
ensureSpace(cur, end, literal.size(), "requires size >= " + std::to_string(literal.size()));
if (std::string_view(cur, literal.size()) != literal)
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Expect literal {} but {} provided", literal, std::string_view(cur, literal.size()));
cur += literal.size();
return cur;
}
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
template <typename T>
static Pos readNumber2(Pos cur, Pos end, T & res)
{
ensureSpace(cur, end, 2, "readNumber2 requires size >= 2");
res = (*cur - '0') * 10;
++cur;
res += *cur - '0';
++cur;
return cur;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
template <typename T>
static Pos readNumber3(Pos cur, Pos end, T & res)
{
cur = readNumber2(cur, end, res);
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
ensureSpace(cur, end, 1, "readNumber3 requires size >= 3");
res = res * 10 + (*cur - '0');
++cur;
return cur;
}
2023-02-27 13:41:38 +00:00
template <typename T>
static Pos readNumber4(Pos cur, Pos end, T & res)
{
cur = readNumber2(cur, end, res);
2023-02-27 13:41:38 +00:00
T tmp;
cur = readNumber2(cur, end, tmp);
res = res * 100 + tmp;
return cur;
}
2023-02-27 13:41:38 +00:00
static ALWAYS_INLINE void ensureSpace(Pos cur, Pos end, size_t len, const String & msg)
{
if (cur > end || cur + len > end)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to parse because {}", msg);
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static ALWAYS_INLINE Pos assertChar(Pos cur, Pos end, char ch)
{
ensureSpace(cur, end, 1, "assertChar requires size >= 1");
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
if (*cur != ch)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expect char {}, but {} provided", String(ch, 1), String(*cur, 1));
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
++cur;
return cur;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static Pos mysqlDayOfWeekTextShort(Pos cur, Pos end, Date & date)
{
ensureSpace(cur, end, 3, "Parsing DayOfWeekTextShort requires size >= 3");
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
String str(cur, 3);
Poco::toLowerInPlace(str);
Int32 i = 0;
for (; i < 7; ++i)
if (str == weekdaysShort[i])
break;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
if (i == 7)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to parse because unknown short week text");
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
date.day_of_week = i + 1;
date.week_date_format = true;
date.day_of_year_format = false;
if (!date.has_year)
{
date.has_year = true;
date.year = 2000;
}
cur += 3;
return cur;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static Pos mysqlMonthOfYearTextShort(Pos cur, Pos end, Date & date)
{
ensureSpace(cur, end, 3, "Parsing MonthOfYearTextShort requires size >= 3");
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
String str(cur, 3);
Poco::toLowerInPlace(str);
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
Int32 i = 0;
for (; i < 12; ++i)
if (str == monthsShort[i])
break;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
if (i == 12)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to parse because unknown short month text");
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
date.month = i + 1;
cur += 3;
return cur;
}
2023-02-22 10:05:53 +00:00
2023-02-28 06:08:03 +00:00
static Pos mysqlMonth(Pos cur, Pos end, Date & date)
{
cur = readNumber2(cur, end, date.month);
if (date.month < 1 || date.month > 12)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for month must be in the range [1, 12]", date.month);
return cur;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static Pos mysqlCentury(Pos cur, Pos end, Date & date)
{
Int32 centuray;
cur = readNumber2(cur, end, centuray);
date.century_format = true;
date.year = centuray * 100;
date.has_year = true;
return cur;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static Pos mysqlDayOfMonth(Pos cur, Pos end, Date & date)
{
cur = readNumber2(cur, end, date.day);
date.day_of_month_values.push_back(date.day);
date.week_date_format = false;
date.day_of_year_format = false;
if (!date.has_year)
{
date.has_year = true;
date.year = 2000;
}
return cur;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static Pos mysqlAmericanDate(Pos cur, Pos end, Date & date)
{
cur = readNumber2(cur, end, date.month);
cur = assertChar(cur, end, '/');
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
cur = readNumber2(cur, end, date.day);
cur = assertChar(cur, end, '/');
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
cur = readNumber2(cur, end, date.year);
cur = assertChar(cur, end, '/');
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
date.week_date_format = false;
date.day_of_year_format = false;
date.century_format = false;
date.is_year_of_era = false;
date.has_year = true;
return cur;
}
2023-02-22 10:05:53 +00:00
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos mysqlDayOfMonthSpacePadded(Pos cur, Pos end, Date & date)
{
ensureSpace(cur, end, 2, "mysqlDayOfMonthSpacePadded requires size >= 2");
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
date.day = *cur == ' ' ? 0 : (*cur - '0');
++cur;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
date.day = 10 * date.day + (*cur - '0');
++cur;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
date.week_date_format = false;
date.day_of_year_format = false;
if (!date.has_year)
{
date.has_year = true;
date.year = 2000;
}
return cur;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static Pos mysqlISO8601Date(Pos cur, Pos end, Date & date)
{
cur = readNumber4(cur, end, date.year);
cur = assertChar(cur, end, '-');
cur = readNumber2(cur, end, date.month);
cur = assertChar(cur, end, '-');
cur = readNumber2(cur, end, date.day);
date.week_date_format = false;
date.day_of_year_format = false;
date.century_format = false;
date.is_year_of_era = false;
date.has_year = true;
return cur;
}
2023-02-27 13:41:38 +00:00
static Pos mysqlISO8601Year2(Pos cur, Pos end, Date & date)
{
cur = readNumber2(cur, end, date.year);
date.year += 2000;
date.century_format = false;
date.is_year_of_era = false;
date.has_year = true;
return cur;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static Pos mysqlISO8601Year4(Pos cur, Pos end, Date & date)
{
cur = readNumber4(cur, end, date.year);
date.century_format = false;
date.is_year_of_era = false;
date.has_year = true;
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos mysqlDayOfYear(Pos cur, Pos end, Date & date)
{
cur = readNumber3(cur, end, date.day_of_year);
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
date.day_of_year_values.push_back(date.day_of_year);
date.day_of_year_format = true;
date.week_date_format = false;
if (!date.has_year)
{
date.has_year = true;
date.year = 2000;
}
return cur;
}
2023-02-27 13:41:38 +00:00
static Pos mysqlDayOfWeek(Pos cur, Pos end, Date & date)
{
ensureSpace(cur, end, 1, "mysqlDayOfWeek requires size >= 1");
2023-02-27 13:41:38 +00:00
date.day_of_week = *cur - '0';
date.week_date_format = true;
date.day_of_year_format = false;
if (!date.has_year)
{
date.has_year = true;
date.year = 2000;
}
return cur;
}
2023-02-27 13:41:38 +00:00
static Pos mysqlISO8601Week(Pos cur, Pos end, Date & date)
{
cur = readNumber2(cur, end, date.week);
date.week_date_format = true;
date.day_of_year_format = false;
if (date.has_year)
{
date.has_year = true;
date.year = 2000;
}
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos mysqlDayOfWeek0To6(Pos cur, Pos end, Date & date)
{
cur = mysqlDayOfWeek(cur, end, date);
if (date.day_of_week == 0)
date.day_of_week = 7;
2023-02-27 13:41:38 +00:00
return cur;
}
2023-02-27 13:41:38 +00:00
static Pos mysqlDayOfWeekTextLong(Pos cur, Pos end, Date & date)
{
mysqlDayOfWeekTextShort(cur, end, date);
auto expect_text = weekdaysFull[date.day_of_week - 1];
2023-02-27 13:41:38 +00:00
ensureSpace(cur, end, expect_text.size(), "mysqlDayOfWeekTextLong requires size >= " + std::to_string(expect_text.size()));
std::string_view text(cur, expect_text.size());
if (text != expect_text)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to parse because unknown full day of week text {}", expect_text);
2023-02-27 13:41:38 +00:00
cur += expect_text.size();
return cur;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static Pos mysqlYear2(Pos cur, Pos end, Date & date)
{
cur = readNumber2(cur, end, date.year);
date.year += 2000;
date.century_format = false;
date.is_year_of_era = false;
date.has_year = true;
return cur;
}
static Pos mysqlYear4(Pos cur, Pos end, Date & date)
{
cur = readNumber4(cur, end, date.year);
date.century_format = false;
date.is_year_of_era = false;
date.has_year = true;
return cur;
}
2023-02-22 12:42:59 +00:00
2023-02-27 13:41:38 +00:00
static Pos mysqlTimezoneOffset(Pos cur, Pos end, Date & date)
{
/// TODO figure out what timezone_id mean
ensureSpace(cur, end, 1, "Parse mysqlTimezoneOffset failed");
Int32 sign = 1;
if (*cur == '-')
sign = -1;
++cur;
2023-02-22 12:42:59 +00:00
2023-02-27 13:41:38 +00:00
Int32 hour;
cur = readNumber2(cur, end, hour);
2023-02-22 12:42:59 +00:00
2023-02-27 13:41:38 +00:00
Int32 minute;
cur = readNumber2(cur, end, minute);
2023-02-22 12:42:59 +00:00
2023-02-27 13:41:38 +00:00
*date.time_zone_offset = sign * (hour * 3600 + minute * 60);
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-28 06:08:03 +00:00
static Pos mysqlMinute(Pos cur, Pos end, Date & date)
{
cur = readNumber2(cur, end, date.minute);
if (date.minute < 0 || date.minute > 59)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for minute must be in the range [0, 59]", date.minute);
return cur;
}
2023-02-27 13:41:38 +00:00
static Pos mysqlAMPM(Pos cur, Pos end, Date & date)
2023-02-24 10:27:21 +00:00
{
2023-02-27 13:41:38 +00:00
ensureSpace(cur, end, 2, "mysqlAMPM requires size >= 2");
std::string text(cur, 2);
2023-02-28 06:08:03 +00:00
Poco::toUpperInPlace(text);
2023-02-27 13:41:38 +00:00
if (text == "PM")
date.is_am = true;
else if (text == "AM")
date.is_am = false;
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Text should be AM or PM, but {} provided", text);
cur += 2;
return cur;
2023-02-24 10:27:21 +00:00
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos mysqlHHMM12(Pos cur, Pos end, Date & date)
{
cur = readNumber2(cur, end, date.hour);
date.is_clock_hour = false;
date.is_hour_of_half_day = true;
cur = assertChar(cur, end, ':');
cur = readNumber2(cur, end, date.minute);
cur = assertChar(cur, end, ' ');
cur = mysqlAMPM(cur, end, date);
return cur;
}
2023-02-22 12:42:59 +00:00
2023-02-27 13:41:38 +00:00
static Pos mysqlHHMM24(Pos cur, Pos end, Date & date)
{
cur = readNumber2(cur, end, date.hour);
date.is_clock_hour = false;
date.is_hour_of_half_day = false;
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
cur = assertChar(cur, end, ':');
cur = readNumber2(cur, end, date.minute);
return cur;
}
2023-02-28 06:08:03 +00:00
static Pos mysqlSecond(Pos cur, Pos end, Date & date)
{
cur = readNumber2(cur, end, date.second);
if (date.second < 0 || date.second > 59)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for second must be in the range [0,59]", date.second);
return cur;
}
2023-02-27 13:41:38 +00:00
static Pos mysqlISO8601Time(Pos cur, Pos end, Date & date)
{
cur = readNumber2(cur, end, date.hour);
cur = assertChar(cur, end, ':');
cur = readNumber2(cur, end, date.minute);
cur = assertChar(cur, end, ':');
cur = readNumber2(cur, end, date.second);
date.is_clock_hour = false;
date.is_hour_of_half_day = false;
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos mysqlHour12(Pos cur, Pos end, Date & date)
2023-02-22 12:42:59 +00:00
{
2023-02-27 13:41:38 +00:00
cur = readNumber2(cur, end, date.hour);
2023-02-28 06:08:03 +00:00
if (date.hour < 1 || date.hour > 12)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for mysql hour12 must be in the range [1,12]", date.hour);
2023-02-27 13:41:38 +00:00
date.is_hour_of_half_day = true;
date.is_clock_hour = false;
return cur;
}
2023-02-27 13:41:38 +00:00
static Pos mysqlHour24(Pos cur, Pos end, Date & date)
{
cur = readNumber2(cur, end, date.hour);
2023-02-28 06:08:03 +00:00
if (date.hour < 0 || date.hour > 23)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for mysql hour24 must be in the range [0,23]", date.hour);
2023-02-27 13:41:38 +00:00
date.is_hour_of_half_day = false;
date.is_clock_hour = false;
return cur;
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
static Pos readNumberWithVariableLength(
Pos cur,
Pos end,
bool allow_negative,
bool allow_plus_sign,
bool is_year,
int repetitions,
int max_digits_consume,
Int32 & number)
{
bool negative = false;
if (allow_negative && cur < end && *cur == '-')
2023-02-24 09:07:27 +00:00
{
2023-02-27 13:41:38 +00:00
negative = true;
++cur;
}
else if (allow_plus_sign && cur < end && *cur == '+')
{
negative = false;
++cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
number = 0;
Pos start = cur;
if (is_year && repetitions == 2)
{
// If abbreviated two year digit is provided in format string, try to read
// in two digits of year and convert to appropriate full length year The
// two-digit mapping is as follows: [00, 69] -> [2000, 2069]
// [70, 99] -> [1970, 1999]
// If more than two digits are provided, then simply read in full year
// normally without conversion
int count = 0;
while (cur < end && cur < start + max_digits_consume && *cur >= '0' && *cur <= '9')
{
number = number * 10 + (*cur - '0');
++cur;
++count;
}
if (count == 2)
{
if (number >= 70)
number += 1900;
else if (number >= 0 && number < 70)
number += 2000;
}
else
{
while (cur < end && cur < start + max_digits_consume && *cur >= '0' && *cur <= '9')
{
number = number * 10 + (*cur - '0');
++cur;
}
}
}
else
{
while (cur < end && cur < start + max_digits_consume && *cur >= '0' and *cur <= '9')
{
number = number * 10 + (*cur - '0');
++cur;
}
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
/// Need to have read at least one digit.
if (cur <= start)
throw Exception(ErrorCodes::LOGICAL_ERROR, "read number from {} failed", String(cur, end - cur));
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
if (negative)
number *= -1;
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaEra(int, Pos cur, Pos end, Date & date)
{
ensureSpace(cur, end, 2, "jodaEra requires size >= 2");
if (std::strncmp(cur, "AD", 2) == 0 || std::strncmp(cur, "ad", 2) == 0)
date.is_ad = true;
else if (std::strncmp(cur, "BC", 2) == 0 || std::strncmp(cur, "bc", 2) == 0)
date.is_ad = false;
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown era {}", std::string(cur, 2));
cur += 2;
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaCenturyOfEra(int repetitions, Pos cur, Pos end, Date & date)
{
Int32 number;
cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, repetitions, number);
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
if (number < 0 || number > 2922789)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for century of era must be in the range [0, 2922789]", number);
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
date.century_format = true;
date.year = 100 * number;
date.has_year = true;
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaYearOfEra(int repetitions, Pos cur, Pos end, Date & date)
{
Int32 number;
cur = readNumberWithVariableLength(cur, end, false, false, true, repetitions, repetitions, number);
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
date.century_format = false;
date.is_year_of_era = true;
if (number > 292278993 || number < 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for year of era must be in the range [1, 292278993]", number);
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
date.has_year = true;
date.year = number;
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaWeekYear(int repetitions, Pos cur, Pos end, Date & date)
{
Int32 number;
cur = readNumberWithVariableLength(cur, end, true, true, true, repetitions, repetitions, number);
if (number < -292275054 || number > 292278993)
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Value {} for week year must be in the range [-292275054,292278993]", number);
date.year = number;
date.week_date_format = true;
date.day_of_year_format = false;
date.century_format = false;
date.has_year = true;
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaWeekOfWeekYear(int repetitions, Pos cur, Pos end, Date & date)
{
Int32 number;
cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2), number);
if (number < 1 || number > 52)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for week of week year must be in the range [1, 52]", number);
date.week = number;
date.week_date_format = true;
date.day_of_year_format = false;
if (!date.has_year)
{
date.has_year = true;
date.year = 2000;
}
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaDayOfWeek1Based(int repetitions, Pos cur, Pos end, Date & date)
{
Int32 number;
cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, repetitions, number);
if (number < 1 || number > 7)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for day of week 1-based must be in the range [1, 7]", number);
date.day_of_week = number;
date.week_date_format = true;
date.day_of_year_format = false;
if (!date.has_year)
{
date.has_year = true;
date.year = 2000;
}
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaDayOfWeekText(size_t /*min_represent_digits*/, Pos cur, Pos end, Date & date)
{
ensureSpace(cur, end, 3, "jodaDayOfWeekText requires size >= 3");
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
String text1(cur, 3);
Poco::toLowerInPlace(text1);
auto it = dayOfWeekMap.find(text1);
if (it == dayOfWeekMap.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown day of week text: {}", text1);
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
date.day_of_week = it->second.second;
cur += 3;
if (cur + it->second.first.size() <= end)
{
String text2(cur, it->second.first.size());
Poco::toLowerInPlace(text2);
if (text2 == it->second.first)
{
cur += it->second.first.size();
return cur;
}
}
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaYear(int repetitions, Pos cur, Pos end, Date & date)
{
Int32 number;
cur = readNumberWithVariableLength(cur, end, true, true, true, repetitions, repetitions, number);
if (number > 292278994 || number < -292275055)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for year must be in the range [-292275055,292278994]", number);
2023-02-24 09:07:27 +00:00
2023-02-28 06:08:03 +00:00
date.century_format = false;
date.is_year_of_era = false;
2023-02-27 13:41:38 +00:00
date.has_year = true;
date.year = number;
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaDayOfYear(int repetitions, Pos cur, Pos end, Date & date)
{
Int32 number;
cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 3), number);
2023-02-28 06:08:03 +00:00
if (number < 1 || number > 366)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for day of year must be in the range [1, 366]", number);
2023-02-27 13:41:38 +00:00
date.day_of_year_values.push_back(number);
date.day_of_year = true;
date.day_of_year_format = true;
date.week_date_format = false;
if (date.has_year)
{
date.has_year = true;
date.year = 2000;
}
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaMonthOfYear(int repetitions, Pos cur, Pos end, Date & date)
{
Int32 number;
cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, 2, number);
if (number < 1 || number > 12)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for month of year must be in the range [1, 12]", number);
date.month = number;
date.week_date_format = false;
date.day_of_year_format = false;
if (!date.has_year)
{
date.has_year = true;
date.year = 2000;
}
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-28 06:08:03 +00:00
static Pos jodaMonthOfYearText(int, Pos cur, Pos end, Date & date)
2023-02-27 13:41:38 +00:00
{
ensureSpace(cur, end, 3, "jodaMonthOfYearText requires size >= 3");
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
String text1(cur, 3);
Poco::toLowerInPlace(text1);
auto it = monthMap.find(text1);
if (it == monthMap.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown month of year text: {}", text1);
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
date.month = it->second.second;
cur += 3;
if (cur + it->second.first.size() <= end)
{
String text2(cur, it->second.first.size());
Poco::toLowerInPlace(text2);
if (text2 == it->second.first)
{
cur += it->second.first.size();
return cur;
}
}
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaDayOfMonth(int repetitions, Pos cur, Pos end, Date & date)
{
Int32 number;
cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2), number);
2023-02-28 06:08:03 +00:00
if (number < 1 || number > 31)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for day of month must be in the range [1, 31]", number);
2023-02-27 13:41:38 +00:00
date.day_of_month_values.push_back(number);
date.day = number;
date.week_date_format = false;
date.day_of_year_format = false;
if (!date.has_year)
{
date.has_year = true;
date.year = 2000;
}
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-28 06:08:03 +00:00
static Pos jodaHalfDayOfDay(int, Pos cur, Pos end, Date & date)
2023-02-27 13:41:38 +00:00
{
ensureSpace(cur, end, 2, "jodaHalfDayOfDay requires size >= 2");
String text(cur, 2);
2023-02-28 06:08:03 +00:00
Poco::toLowerInPlace(text);
2023-02-27 13:41:38 +00:00
if (text == "am")
date.is_am = true;
else if (text == "pm")
date.is_am = false;
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown half day of day: {}", text);
cur += 2;
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaHourOfHalfDay(int repetitions, Pos cur, Pos end, Date & date)
{
Int32 number;
cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2), number);
if (number > 11 || number < 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for hour of half day must be in the range [0, 11]", number);
date.is_clock_hour = false;
date.is_hour_of_half_day = true;
date.hour = number;
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaClockHourOfHalfDay(int repetitions, Pos cur, Pos end, Date & date)
{
Int32 number;
cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2), number);
if (number > 12 || number < 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for clock hour of half day must be in the range [1, 12]", number);
date.is_clock_hour = true;
date.is_hour_of_half_day = true;
date.hour = number;
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaHourOfDay(int repetitions, Pos cur, Pos end, Date & date)
{
Int32 number;
cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2), number);
if (number > 23 || number < 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for hour of day must be in the range [0, 23]", number);
date.is_clock_hour = false;
date.is_hour_of_half_day = false;
date.hour = number;
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaClockHourOfDay(int repetitions, Pos cur, Pos end, Date & date)
{
Int32 number;
cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2), number);
if (number > 24 || number < 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for clock hour of day must be in the range [1, 24]", number);
date.is_clock_hour = true;
date.is_hour_of_half_day = false;
date.hour = number % 24;
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaMinuteOfHour(int repetitions, Pos cur, Pos end, Date & date)
{
Int32 number;
cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2), number);
if (number > 59 || number < 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for minute of hour must be in the range [0, 59]", number);
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
date.minute = number;
return cur;
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
static Pos jodaSecondOfMinute(int repetitions, Pos cur, Pos end, Date & date)
{
Int32 number;
cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2), number);
if (number > 59 || number < 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Value {} for second of minute must be in the range [0, 59]", number);
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
date.second = number;
return cur;
}
};
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
ALWAYS_INLINE void parseFormat(const String & format, std::vector<Action> & instructions) const
{
if constexpr (parse_syntax == ParseDateTimeTraits::ParseSyntax::MySQL)
parseMysqlFormat(format, instructions);
else if constexpr (parse_syntax == ParseDateTimeTraits::ParseSyntax::Joda)
parseJodaFormat(format, instructions);
else
throw Exception(
ErrorCodes::NOT_IMPLEMENTED,
"Unknown datetime format style {} in function {}",
magic_enum::enum_name(parse_syntax),
getName());
}
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
ALWAYS_INLINE void parseMysqlFormat(const String & format, std::vector<Action> & instructions) const
{
#define ACTION_ARGS(func) &(func), #func
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
Pos pos = format.data();
Pos end = pos + format.size();
while (true)
{
Pos percent_pos = find_first_symbols<'%'>(pos, end);
if (percent_pos < end)
{
if (pos < percent_pos)
instructions.emplace_back(String(pos, percent_pos - pos));
pos = percent_pos + 1;
if (pos >= end)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sign '%' is the last in format, if you need it, use '%%'");
switch (*pos)
{
// Abbreviated weekday [Mon...Sun]
case 'a':
instructions.emplace_back(ACTION_ARGS(Action::mysqlDayOfWeekTextShort));
break;
// Abbreviated month [Jan...Dec]
case 'b':
instructions.emplace_back(ACTION_ARGS(Action::mysqlMonthOfYearTextShort));
break;
// Month as a decimal number (01-12)
case 'c':
instructions.emplace_back(ACTION_ARGS(Action::mysqlMonth));
break;
// Year, divided by 100, zero-padded
case 'C':
instructions.emplace_back(ACTION_ARGS(Action::mysqlCentury));
break;
// Day of month, zero-padded (01-31)
case 'd':
instructions.emplace_back(ACTION_ARGS(Action::mysqlDayOfMonth));
break;
// Short MM/DD/YY date, equivalent to %m/%d/%y
case 'D':
instructions.emplace_back(ACTION_ARGS(Action::mysqlAmericanDate));
break;
// Day of month, space-padded ( 1-31) 23
case 'e':
instructions.emplace_back(ACTION_ARGS(Action::mysqlDayOfMonthSpacePadded));
break;
// Fractional seconds
case 'f':
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for fractional seconds");
// Short YYYY-MM-DD date, equivalent to %Y-%m-%d 2001-08-23
case 'F':
instructions.emplace_back(ACTION_ARGS(Action::mysqlISO8601Date));
break;
// Last two digits of year of ISO 8601 week number (see %G)
case 'g':
instructions.emplace_back(ACTION_ARGS(Action::mysqlISO8601Year2));
break;
// Year of ISO 8601 week number (see %V)
case 'G':
instructions.emplace_back(ACTION_ARGS(Action::mysqlISO8601Year4));
break;
// Day of the year (001-366) 235
case 'j':
instructions.emplace_back(ACTION_ARGS(Action::mysqlDayOfYear));
break;
// Month as a decimal number (01-12)
case 'm':
instructions.emplace_back(ACTION_ARGS(Action::mysqlMonth));
break;
// ISO 8601 weekday as number with Monday as 1 (1-7)
case 'u':
instructions.emplace_back(ACTION_ARGS(Action::mysqlDayOfWeek));
break;
// ISO 8601 week number (01-53)
case 'V':
instructions.emplace_back(ACTION_ARGS(Action::mysqlISO8601Week));
break;
// Weekday as a decimal number with Sunday as 0 (0-6) 4
case 'w':
instructions.emplace_back(ACTION_ARGS(Action::mysqlDayOfWeek0To6));
break;
// Full weekday [Monday...Sunday]
case 'W':
instructions.emplace_back(ACTION_ARGS(Action::mysqlDayOfWeekTextLong));
break;
// Two digits year
case 'y':
instructions.emplace_back(ACTION_ARGS(Action::mysqlYear2));
break;
// Four digits year
case 'Y':
instructions.emplace_back(ACTION_ARGS(Action::mysqlYear4));
break;
// Quarter (1-4)
case 'Q':
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for quarter");
break;
// Offset from UTC timezone as +hhmm or -hhmm
case 'z':
instructions.emplace_back(ACTION_ARGS(Action::mysqlTimezoneOffset));
break;
/// Time components. If the argument is Date, not a DateTime, then this components will have default value.
// Minute (00-59)
case 'M':
instructions.emplace_back(ACTION_ARGS(Action::mysqlMinute));
break;
// AM or PM
case 'p':
instructions.emplace_back(ACTION_ARGS(Action::mysqlAMPM));
break;
// 12-hour HH:MM time, equivalent to %h:%i %p 2:55 PM
case 'r':
instructions.emplace_back(ACTION_ARGS(Action::mysqlHHMM12));
break;
// 24-hour HH:MM time, equivalent to %H:%i 14:55
case 'R':
instructions.emplace_back(ACTION_ARGS(Action::mysqlHHMM24));
break;
// Seconds
case 's':
instructions.emplace_back(ACTION_ARGS(Action::mysqlSecond));
break;
// Seconds
case 'S':
instructions.emplace_back(ACTION_ARGS(Action::mysqlSecond));
break;
// ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S 14:55:02
case 'T':
instructions.emplace_back(ACTION_ARGS(Action::mysqlISO8601Time));
break;
// Hour in 12h format (01-12)
case 'h':
instructions.emplace_back(ACTION_ARGS(Action::mysqlHour12));
break;
// Hour in 24h format (00-23)
case 'H':
instructions.emplace_back(ACTION_ARGS(Action::mysqlHour24));
break;
// Minute of hour range [0, 59]
case 'i':
instructions.emplace_back(ACTION_ARGS(Action::mysqlMinute));
break;
// Hour in 12h format (01-12)
case 'I':
instructions.emplace_back(ACTION_ARGS(Action::mysqlHour12));
break;
// Hour in 24h format (00-23)
case 'k':
instructions.emplace_back(ACTION_ARGS(Action::mysqlHour24));
break;
// Hour in 12h format (01-12)
case 'l':
instructions.emplace_back(ACTION_ARGS(Action::mysqlHour12));
break;
case 't':
instructions.emplace_back("\t");
break;
case 'n':
instructions.emplace_back("\n");
break;
// Escaped literal characters.
case '%':
instructions.emplace_back("\n");
break;
// Unimplemented
case 'U':
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for WEEK (Sun-Sat)");
case 'v':
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for WEEK (Mon-Sun)");
case 'x':
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for YEAR for week (Mon-Sun)");
case 'X':
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for YEAR for week (Sun-Sat)");
default:
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Incorrect syntax '{}', symbol is not supported '{}' for function {}",
format,
*pos,
getName());
}
++pos;
}
else
{
if (pos < end)
instructions.emplace_back(String(pos, end - pos));
break;
}
2023-02-24 09:07:27 +00:00
}
2023-02-27 13:41:38 +00:00
#undef ACTION_ARGS
}
void parseJodaFormat(const String & format, std::vector<Action> & instructions) const
{
#define ACTION_ARGS_WITH_BIND(func, arg) std::bind_front(&(func), (arg)), #func
// size_t reserve_size = 0;
const char * pos = format.data();
const char * end = pos + format.size();
while (pos < end)
2023-02-22 10:05:53 +00:00
{
2023-02-27 13:41:38 +00:00
const char * cur_token = pos;
// Literal case
if (*cur_token == '\'')
{
// Case 1: 2 consecutive single quote
if (pos + 1 < end && *(pos + 1) == '\'')
{
instructions.emplace_back(String(cur_token, 1));
// ++reserve_size;
pos += 2;
}
else
{
// Case 2: find closing single quote
Int64 count = numLiteralChars(cur_token + 1, end);
if (count == -1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "No closing single quote for literal");
else
{
for (Int64 i = 1; i <= count; i++)
{
instructions.emplace_back(String(cur_token + i, 1));
// ++reserve_size;
if (*(cur_token + i) == '\'')
i += 1;
}
pos += count + 2;
}
}
}
else
{
int repetitions = 1;
++pos;
while (pos < end && *cur_token == *pos)
{
++repetitions;
++pos;
}
switch (*cur_token)
{
case 'G':
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaEra, repetitions));
// reserve_size += repetitions <= 3 ? 2 : 13;
break;
case 'C':
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaCenturyOfEra, repetitions));
/// Year range [1900, 2299]
// reserve_size += std::max(repetitions, 2);
break;
case 'Y':
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaYearOfEra, repetitions));
/// Year range [1900, 2299]
// reserve_size += repetitions == 2 ? 2 : std::max(repetitions, 4);
break;
case 'x':
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaWeekYear, repetitions));
/// weekyear range [1900, 2299]
// reserve_size += std::max(repetitions, 4);
break;
case 'w':
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaWeekOfWeekYear, repetitions));
/// Week of weekyear range [1, 52]
// reserve_size += std::max(repetitions, 2);
break;
case 'e':
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaDayOfWeek1Based, repetitions));
/// Day of week range [1, 7]
// reserve_size += std::max(repetitions, 1);
break;
case 'E':
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaDayOfWeekText, repetitions));
/// Maximum length of short name is 3, maximum length of full name is 9.
// reserve_size += repetitions <= 3 ? 3 : 9;
break;
case 'y':
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaYear, repetitions));
/// Year range [1900, 2299]
// reserve_size += repetitions == 2 ? 2 : std::max(repetitions, 4);
break;
case 'D':
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaDayOfYear, repetitions));
/// Day of year range [1, 366]
// reserve_size += std::max(repetitions, 3);
break;
case 'M':
if (repetitions <= 2)
{
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaMonthOfYear, repetitions));
/// Month of year range [1, 12]
// reserve_size += 2;
}
else
{
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaMonthOfYearText, repetitions));
/// Maximum length of short name is 3, maximum length of full name is 9.
// reserve_size += repetitions <= 3 ? 3 : 9;
}
break;
case 'd':
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaDayOfMonth, repetitions));
/// Day of month range [1, 3]
// reserve_size += std::max(repetitions, 3);
break;
case 'a':
/// Default half day of day is "AM"
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaHalfDayOfDay, repetitions));
// reserve_size += 2;
break;
case 'K':
/// Default hour of half day is 0
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaHourOfHalfDay, repetitions));
/// Hour of half day range [0, 11]
// reserve_size += std::max(repetitions, 2);
break;
case 'h':
/// Default clock hour of half day is 12
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaClockHourOfHalfDay, repetitions));
/// Clock hour of half day range [1, 12]
// reserve_size += std::max(repetitions, 2);
break;
case 'H':
/// Default hour of day is 0
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaHourOfDay, repetitions));
/// Hour of day range [0, 23]
// reserve_size += std::max(repetitions, 2);
break;
case 'k':
/// Default clock hour of day is 24
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaClockHourOfDay, repetitions));
/// Clock hour of day range [1, 24]
// reserve_size += std::max(repetitions, 2);
break;
case 'm':
/// Default minute of hour is 0
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaMinuteOfHour, repetitions));
/// Minute of hour range [0, 59]
// reserve_size += std::max(repetitions, 2);
break;
case 's':
/// Default second of minute is 0
instructions.emplace_back(ACTION_ARGS_WITH_BIND(Action::jodaSecondOfMinute, repetitions));
/// Second of minute range [0, 59]
// reserve_size += std::max(repetitions, 2);
break;
case 'S':
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for fractional seconds");
break;
case 'z':
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for timezone");
break;
case 'Z':
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for timezone offset id");
default:
if (isalpha(*cur_token))
throw Exception(
ErrorCodes::NOT_IMPLEMENTED, "format is not supported for {}", String(cur_token, repetitions));
instructions.emplace_back(String(cur_token, pos - cur_token));
// reserve_size += pos - cur_token;
break;
}
}
2023-02-22 10:05:53 +00:00
}
2023-02-28 06:08:03 +00:00
#undef ACTION_ARGS_WITH_BIND
2023-02-22 10:05:53 +00:00
}
2023-02-27 13:41:38 +00:00
ALWAYS_INLINE String getFormat(const ColumnsWithTypeAndName & arguments) const
{
if (arguments.size() < 2)
{
if constexpr (parse_syntax == ParseDateTimeTraits::ParseSyntax::Joda)
return "yyyy-MM-dd HH:mm:ss";
else
return "%Y-%m-%d %H:%M:%S";
}
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
const auto * format_column = checkAndGetColumnConst<ColumnString>(arguments[1].column.get());
if (!format_column)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of second ('format') argument of function {}. Must be constant string.",
arguments[1].column->getName(),
getName());
return format_column->getValue<String>();
}
ALWAYS_INLINE std::pair<const DateLUTImpl *, String> getTimeZone(const ColumnsWithTypeAndName & arguments) const
2023-02-22 10:05:53 +00:00
{
2023-02-27 13:41:38 +00:00
if (arguments.size() < 3)
return {&DateLUT::instance(), ""};
const auto * col = checkAndGetColumnConst<ColumnString>(arguments[2].column.get());
if (!col)
throw Exception(
ErrorCodes::ILLEGAL_COLUMN,
"Illegal column {} of third ('timezone') argument of function {}. Must be constant string.",
arguments[2].column->getName(),
getName());
String time_zone = col->getValue<String>();
if (time_zone.empty())
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty and be a valid time zone");
return {&DateLUT::instance(time_zone), time_zone};
2023-02-22 10:05:53 +00:00
}
2023-02-27 13:41:38 +00:00
};
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
struct NameParseDateTime
{
static constexpr auto name = "parseDateTime";
};
2023-02-24 09:07:27 +00:00
2023-02-27 13:41:38 +00:00
struct NameParseDateTimeInJodaSyntax
2023-02-24 09:07:27 +00:00
{
2023-02-27 13:41:38 +00:00
static constexpr auto name = "parseDateTimeInJodaSyntax";
};
2023-02-24 09:07:27 +00:00
2023-02-22 10:05:53 +00:00
2023-02-27 13:41:38 +00:00
using FunctionParseDateTime = FunctionParseDateTimeImpl<NameParseDateTime, ParseDateTimeTraits::ParseSyntax::MySQL>;
using FunctionParseDateTimeInJodaSyntax
= FunctionParseDateTimeImpl<NameParseDateTimeInJodaSyntax, ParseDateTimeTraits::ParseSyntax::Joda>;
2023-02-24 09:07:27 +00:00
}
2023-02-22 10:05:53 +00:00
2023-02-24 09:07:27 +00:00
REGISTER_FUNCTION(ParseDateTime)
{
factory.registerFunction<FunctionParseDateTime>();
factory.registerAlias("TO_UNIXTIME", "parseDateTime");
2023-02-27 13:41:38 +00:00
factory.registerFunction<FunctionParseDateTimeInJodaSyntax>();
2023-02-22 10:05:53 +00:00
}
2023-02-24 09:07:27 +00:00
2023-02-22 10:05:53 +00:00
}