#include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int CANNOT_PARSE_DATETIME; } namespace { inline size_t readDigits(char * res, size_t max_chars, ReadBuffer & in) { size_t num_chars = 0; while (!in.eof() && isNumericASCII(*in.position()) && num_chars < max_chars) { res[num_chars] = *in.position() - '0'; ++num_chars; ++in.position(); } return num_chars; } inline size_t readAlpha(char * res, size_t max_chars, ReadBuffer & in) { size_t num_chars = 0; while (!in.eof() && isAlphaASCII(*in.position()) && num_chars < max_chars) { res[num_chars] = *in.position(); ++num_chars; ++in.position(); } return num_chars; } #if defined(__PPC__) #if !__clang__ #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #endif #endif template inline void readDecimalNumberImpl(T & res, const char * src) { res += src[digit] * power_of_ten; if constexpr (digit > 0) readDecimalNumberImpl(res, src); } template inline void readDecimalNumber(T & res, const char * src) { readDecimalNumberImpl(res, src); } template inline void readDecimalNumber(T & res, size_t num_digits, const char * src) { #define READ_DECIMAL_NUMBER(N) do { res *= common::exp10_i32(N); readDecimalNumber(res, src); src += (N); num_digits -= (N); } while (false) while (num_digits) { switch (num_digits) { case 3: READ_DECIMAL_NUMBER(3); break; case 2: READ_DECIMAL_NUMBER(2); break; case 1: READ_DECIMAL_NUMBER(1); break; default: READ_DECIMAL_NUMBER(4); break; } } #undef READ_DECIMAL_NUMBER } struct DateTimeSubsecondPart { Int64 value; UInt8 digits; }; template ReturnType parseDateTimeBestEffortImpl( time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, DateTimeSubsecondPart * fractional) { auto on_error = [](const std::string & message [[maybe_unused]], int code [[maybe_unused]]) { if constexpr (std::is_same_v) throw Exception(message, code); else return false; }; UInt16 year = 0; UInt8 month = 0; UInt8 day_of_month = 0; UInt8 hour = 0; UInt8 minute = 0; UInt8 second = 0; bool has_time = false; bool has_time_zone_offset = false; bool time_zone_offset_negative = false; UInt8 time_zone_offset_hour = 0; UInt8 time_zone_offset_minute = 0; bool is_pm = false; auto read_alpha_month = [&month] (const auto & alpha) { if (0 == strncasecmp(alpha, "Jan", 3)) month = 1; else if (0 == strncasecmp(alpha, "Feb", 3)) month = 2; else if (0 == strncasecmp(alpha, "Mar", 3)) month = 3; else if (0 == strncasecmp(alpha, "Apr", 3)) month = 4; else if (0 == strncasecmp(alpha, "May", 3)) month = 5; else if (0 == strncasecmp(alpha, "Jun", 3)) month = 6; else if (0 == strncasecmp(alpha, "Jul", 3)) month = 7; else if (0 == strncasecmp(alpha, "Aug", 3)) month = 8; else if (0 == strncasecmp(alpha, "Sep", 3)) month = 9; else if (0 == strncasecmp(alpha, "Oct", 3)) month = 10; else if (0 == strncasecmp(alpha, "Nov", 3)) month = 11; else if (0 == strncasecmp(alpha, "Dec", 3)) month = 12; else return false; return true; }; while (!in.eof()) { char digits[std::numeric_limits::digits10]; size_t num_digits = 0; if (!year || !has_time) { num_digits = readDigits(digits, sizeof(digits), in); if (num_digits == 10 && !year && !has_time) { /// This is unix timestamp. readDecimalNumber<10>(res, digits); return ReturnType(true); } else if (num_digits == 9 && !year && !has_time) { /// This is unix timestamp. readDecimalNumber<9>(res, digits); return ReturnType(true); } else if (num_digits == 14 && !year && !has_time) { /// This is YYYYMMDDhhmmss readDecimalNumber<4>(year, digits); readDecimalNumber<2>(month, digits + 4); readDecimalNumber<2>(day_of_month, digits + 6); readDecimalNumber<2>(hour, digits + 8); readDecimalNumber<2>(minute, digits + 10); readDecimalNumber<2>(second, digits + 12); has_time = true; } else if (num_digits == 8 && !year) { /// This is YYYYMMDD readDecimalNumber<4>(year, digits); readDecimalNumber<2>(month, digits + 4); readDecimalNumber<2>(day_of_month, digits + 6); } else if (num_digits == 6) { /// This is YYYYMM if (!year && !month) { readDecimalNumber<4>(year, digits); readDecimalNumber<2>(month, digits + 4); } else if (!has_time) { readDecimalNumber<2>(hour, digits); readDecimalNumber<2>(minute, digits + 2); readDecimalNumber<2>(second, digits + 4); has_time = true; } else return on_error("Cannot read DateTime: ambiguous 6 digits, it can be YYYYMM or hhmmss", ErrorCodes::CANNOT_PARSE_DATETIME); } else if (num_digits == 4 && !year) { /// YYYY /// YYYY*MM /// YYYY*MM*DD /// YYYY*M /// YYYY*M*DD /// YYYY*M*D readDecimalNumber<4>(year, digits); if (!in.eof()) { char delimiter_after_year = *in.position(); if (delimiter_after_year < 0x20 || delimiter_after_year == ',' || delimiter_after_year == ';' || delimiter_after_year == '\'' || delimiter_after_year == '"') break; if (month) continue; ++in.position(); num_digits = readDigits(digits, sizeof(digits), in); if (num_digits == 2) readDecimalNumber<2>(month, digits); else if (num_digits == 1) readDecimalNumber<1>(month, digits); else if (delimiter_after_year == ' ') continue; else return on_error("Cannot read DateTime: unexpected number of decimal digits after year: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); /// Only the same delimiter. if (!day_of_month && checkChar(delimiter_after_year, in)) { num_digits = readDigits(digits, sizeof(digits), in); if (num_digits == 2) readDecimalNumber<2>(day_of_month, digits); else if (num_digits == 1) readDecimalNumber<1>(day_of_month, digits); else if (delimiter_after_year == ' ') continue; else return on_error("Cannot read DateTime: unexpected number of decimal digits after year and month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); } } } else if (num_digits == 2 || num_digits == 1) { /// hh:mm:ss /// hh:mm /// hh - only if already have day of month /// DD/MM/YYYY /// DD/MM/YY /// DD.MM.YYYY /// DD.MM.YY /// DD-MM-YYYY /// DD-MM-YY /// DD UInt8 hour_or_day_of_month = 0; if (num_digits == 2) readDecimalNumber<2>(hour_or_day_of_month, digits); else if (num_digits == 1) //-V547 readDecimalNumber<1>(hour_or_day_of_month, digits); else return on_error("Cannot read DateTime: logical error, unexpected branch in code", ErrorCodes::LOGICAL_ERROR); if (checkChar(':', in)) { if (has_time) return on_error("Cannot read DateTime: time component is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME); hour = hour_or_day_of_month; has_time = true; num_digits = readDigits(digits, sizeof(digits), in); if (num_digits == 2) readDecimalNumber<2>(minute, digits); else if (num_digits == 1) readDecimalNumber<1>(minute, digits); else return on_error("Cannot read DateTime: unexpected number of decimal digits after hour: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); if (checkChar(':', in)) { num_digits = readDigits(digits, sizeof(digits), in); if (num_digits == 2) readDecimalNumber<2>(second, digits); else if (num_digits == 1) readDecimalNumber<1>(second, digits); else return on_error("Cannot read DateTime: unexpected number of decimal digits after hour and minute: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); } } else if (checkChar('/', in) || checkChar('.', in) || checkChar('-', in)) { if (day_of_month) return on_error("Cannot read DateTime: day of month is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME); if (month) return on_error("Cannot read DateTime: month is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME); day_of_month = hour_or_day_of_month; num_digits = readDigits(digits, sizeof(digits), in); if (num_digits == 2) readDecimalNumber<2>(month, digits); else if (num_digits == 1) readDecimalNumber<1>(month, digits); else if (num_digits == 0) { /// Month in alphabetical form char alpha[9]; /// The longest month name: September size_t num_alpha = readAlpha(alpha, sizeof(alpha), in); if (num_alpha < 3) return on_error("Cannot read DateTime: unexpected number of alphabetical characters after day of month: " + toString(num_alpha), ErrorCodes::CANNOT_PARSE_DATETIME); if (!read_alpha_month(alpha)) return on_error("Cannot read DateTime: alphabetical characters after day of month don't look like month: " + std::string(alpha, 3), ErrorCodes::CANNOT_PARSE_DATETIME); } else return on_error("Cannot read DateTime: unexpected number of decimal digits after day of month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); if (checkChar('/', in) || checkChar('.', in) || checkChar('-', in)) { if (year) return on_error("Cannot read DateTime: year component is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME); num_digits = readDigits(digits, sizeof(digits), in); if (num_digits == 4) readDecimalNumber<4>(year, digits); else if (num_digits == 2) { readDecimalNumber<2>(year, digits); if (year >= 70) year += 1900; else year += 2000; } else return on_error("Cannot read DateTime: unexpected number of decimal digits after day of month and month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); } } else { if (day_of_month) hour = hour_or_day_of_month; else day_of_month = hour_or_day_of_month; } } else if (num_digits != 0) return on_error("Cannot read DateTime: unexpected number of decimal digits: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); } if (num_digits == 0) { char c = *in.position(); /// 'T' is a separator between date and time according to ISO 8601. /// But don't skip it if we didn't read the date part yet, because 'T' is also a prefix for 'Tue' and 'Thu'. if (c == ' ' || (c == 'T' && year && !has_time)) { ++in.position(); } else if (c == 'Z') { ++in.position(); has_time_zone_offset = true; } else if (c == '.') /// We don't support comma (ISO 8601:2004) for fractional part of second to not mess up with CSV separator. { if (!has_time) return on_error("Cannot read DateTime: unexpected point symbol", ErrorCodes::CANNOT_PARSE_DATETIME); ++in.position(); num_digits = readDigits(digits, sizeof(digits), in); if (fractional) { using FractionalType = typename std::decayvalue)>::type; // Reading more decimal digits than fits into FractionalType would case an // overflow, so it is better to skip all digits from the right side that do not // fit into result type. To provide less precise value rather than bogus one. num_digits = std::min(static_cast(std::numeric_limits::digits10), num_digits); fractional->digits = num_digits; readDecimalNumber(fractional->value, num_digits, digits); } } else if (c == '+' || c == '-') { ++in.position(); has_time_zone_offset = true; if (c == '-') time_zone_offset_negative = true; num_digits = readDigits(digits, sizeof(digits), in); if (num_digits == 4) { readDecimalNumber<2>(time_zone_offset_hour, digits); readDecimalNumber<2>(time_zone_offset_minute, digits + 2); } else if (num_digits == 3) { readDecimalNumber<1>(time_zone_offset_hour, digits); readDecimalNumber<2>(time_zone_offset_minute, digits + 1); } else if (num_digits == 2) { readDecimalNumber<2>(time_zone_offset_hour, digits); } else if (num_digits == 1) { readDecimalNumber<1>(time_zone_offset_hour, digits); } else return on_error("Cannot read DateTime: unexpected number of decimal digits for time zone offset: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); if (num_digits < 3 && checkChar(':', in)) { num_digits = readDigits(digits, sizeof(digits), in); if (num_digits == 2) { readDecimalNumber<2>(time_zone_offset_minute, digits); } else if (num_digits == 1) { readDecimalNumber<1>(time_zone_offset_minute, digits); } else return on_error("Cannot read DateTime: unexpected number of decimal digits for time zone offset in minutes: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); } } else { char alpha[3]; size_t num_alpha = readAlpha(alpha, sizeof(alpha), in); if (!num_alpha) { break; } else if (num_alpha == 1) { return on_error("Cannot read DateTime: unexpected alphabetical character", ErrorCodes::CANNOT_PARSE_DATETIME); } else if (num_alpha == 2) { if (alpha[1] == 'M' || alpha[1] == 'm') { if (alpha[0] == 'A' || alpha[0] == 'a') { } else if (alpha[0] == 'P' || alpha[0] == 'p') { is_pm = true; } else return on_error("Cannot read DateTime: unexpected word", ErrorCodes::CANNOT_PARSE_DATETIME); } else return on_error("Cannot read DateTime: unexpected word", ErrorCodes::CANNOT_PARSE_DATETIME); } else if (num_alpha == 3) { bool has_day_of_week = false; if (read_alpha_month(alpha)) { } else if (0 == strncasecmp(alpha, "UTC", 3)) has_time_zone_offset = true; // NOLINT else if (0 == strncasecmp(alpha, "GMT", 3)) has_time_zone_offset = true; else if (0 == strncasecmp(alpha, "MSK", 3)) { has_time_zone_offset = true; time_zone_offset_hour = 3; } else if (0 == strncasecmp(alpha, "MSD", 3)) { has_time_zone_offset = true; time_zone_offset_hour = 4; } else if (0 == strncasecmp(alpha, "Mon", 3)) has_day_of_week = true; // NOLINT else if (0 == strncasecmp(alpha, "Tue", 3)) has_day_of_week = true; else if (0 == strncasecmp(alpha, "Wed", 3)) has_day_of_week = true; else if (0 == strncasecmp(alpha, "Thu", 3)) has_day_of_week = true; else if (0 == strncasecmp(alpha, "Fri", 3)) has_day_of_week = true; else if (0 == strncasecmp(alpha, "Sat", 3)) has_day_of_week = true; else if (0 == strncasecmp(alpha, "Sun", 3)) has_day_of_week = true; else return on_error("Cannot read DateTime: unexpected word", ErrorCodes::CANNOT_PARSE_DATETIME); while (!in.eof() && isAlphaASCII(*in.position())) ++in.position(); /// For RFC 2822 if (has_day_of_week) checkChar(',', in); } else return on_error("Cannot read DateTime: logical error, unexpected branch in code", ErrorCodes::LOGICAL_ERROR); } } } if (!year) year = 2000; if (!month) month = 1; if (!day_of_month) day_of_month = 1; if (is_pm && hour < 12) hour += 12; auto adjust_time_zone = [&] { if (time_zone_offset_hour) { if (time_zone_offset_negative) res += time_zone_offset_hour * 3600; else res -= time_zone_offset_hour * 3600; } if (time_zone_offset_minute) { if (time_zone_offset_negative) res += time_zone_offset_minute * 60; else res -= time_zone_offset_minute * 60; } }; if (has_time_zone_offset) { res = utc_time_zone.makeDateTime(year, month, day_of_month, hour, minute, second); adjust_time_zone(); } else { res = local_time_zone.makeDateTime(year, month, day_of_month, hour, minute, second); } return ReturnType(true); } template ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) { time_t whole; DateTimeSubsecondPart subsecond = {0, 0}; // needs to be explicitly initialized sine it could be missing from input string if (!parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond)) return ReturnType(false); DateTime64::NativeType fractional = subsecond.value; if (scale < subsecond.digits) { fractional /= common::exp10_i64(subsecond.digits - scale); } else if (scale > subsecond.digits) { fractional *= common::exp10_i64(scale - subsecond.digits); } res = DecimalUtils::decimalFromComponents(whole, fractional, scale); return ReturnType(true); } } #if defined(__PPC__) #if !__clang__ #pragma GCC diagnostic pop #endif #endif void parseDateTimeBestEffort(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) { parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr); } bool tryParseDateTimeBestEffort(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) { return parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr); } void parseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) { return parseDateTime64BestEffortImpl(res, scale, in, local_time_zone, utc_time_zone); } bool tryParseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) { return parseDateTime64BestEffortImpl(res, scale, in, local_time_zone, utc_time_zone); } }