#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NOT_IMPLEMENTED; extern const int ILLEGAL_COLUMN; extern const int BAD_ARGUMENTS; } namespace { using Pos = const char *; enum class SupportInteger { Yes, No }; enum class FormatSyntax { MySQL, Joda }; template struct InstructionValueTypeMap {}; template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; template <> struct InstructionValueTypeMap { using InstructionValueType = UInt16; }; template <> struct InstructionValueTypeMap { using InstructionValueType = Int32; }; template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; template <> struct InstructionValueTypeMap { using InstructionValueType = Int64; }; /// Cast value from integer to string, making sure digits number in result string is no less than total_digits by padding leading '0'. String padValue(UInt32 val, size_t min_digits) { String str = std::to_string(val); auto length = str.size(); if (length >= min_digits) return str; String paddings(min_digits - length, '0'); return str.insert(0, paddings); } constexpr std::string_view weekdaysFull[] = {"Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}; constexpr std::string_view weekdaysShort[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; constexpr std::string_view monthsFull[] = {"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"}; constexpr std::string_view monthsShort[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; /** formatDateTime(time, 'format') * Performs formatting of time, according to provided format. * * This function is optimized with an assumption, that the resulting strings are fixed width. * (This assumption is fulfilled for currently supported formatting options). * * It is implemented in two steps. * At first step, it creates a template of zeros, literal characters, whitespaces, etc. * and quickly fills resulting character array (string column) with this format. * At second step, it walks across the resulting character array and modifies/replaces specific characters, * by calling some functions by pointers and shifting cursor by specified amount. * * Advantages: * - memcpy is mostly unrolled; * - low number of arithmetic ops due to pre-filled template; * - for somewhat reason, function by pointer call is faster than switch/case. * * Possible further optimization options: * - slightly interleave first and second step for better cache locality * (but it has no sense when character array fits in L1d cache); * - avoid indirect function calls and inline functions with JIT compilation. * * Performance on Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz: * * WITH formatDateTime(now() + number, '%H:%i:%S') AS x SELECT count() FROM system.numbers WHERE NOT ignore(x); * - 97 million rows per second per core; * * WITH formatDateTime(toDateTime('2018-01-01 00:00:00') + number, '%F %T') AS x SELECT count() FROM system.numbers WHERE NOT ignore(x) * - 71 million rows per second per core; * * select count() from (select formatDateTime(t, '%m/%d/%Y %H:%i:%S') from (select toDateTime('2018-01-01 00:00:00')+number as t from numbers(100000000))); * - 53 million rows per second per core; * * select count() from (select formatDateTime(t, 'Hello %Y World') from (select toDateTime('2018-01-01 00:00:00')+number as t from numbers(100000000))); * - 138 million rows per second per core; * * PS. We can make this function to return FixedString. Currently it returns String. */ template class FunctionFormatDateTimeImpl : public IFunction { private: /// Time is either UInt32 for DateTime or UInt16 for Date. template static bool castType(const IDataType * type, F && f) { return castTypeToEither< DataTypeInt8, DataTypeUInt8, DataTypeInt16, DataTypeUInt16, DataTypeInt32, DataTypeUInt32, DataTypeInt64, DataTypeUInt64>(type, std::forward(f)); } template class Instruction { public: /// Joda format generally requires capturing extra variables (i.e. holding state) which is more convenient with /// std::function and std::bind. Unfortunately, std::function causes a performance degradation by 0.45x compared to raw function /// pointers. For MySQL format, we generally prefer raw function pointers. Because of the special case that not all formatters are /// fixed-width formatters (see mysqlLiteral instruction), we still need to be able to store state. For that reason, we use member /// function pointers instead of static function pointers. using FuncMysql = size_t (Instruction