#include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; } namespace { const std::unordered_map time_unit_to_float = { {"years", 365 * 24 * 3600}, {"year", 365 * 24 * 3600}, {"yr", 365 * 24 * 3600}, {"y", 365 * 24 * 3600}, {"months", 30.5 * 24 * 3600}, {"month", 30.5 * 24 * 3600}, {"mo", 30.5 * 24 * 3600}, {"weeks", 7 * 24 * 3600}, {"week", 7 * 24 * 3600}, {"w", 7 * 24 * 3600}, {"days", 24 * 3600}, {"day", 24 * 3600}, {"d", 24 * 3600}, {"hours", 3600}, {"hour", 3600}, {"hr", 3600}, {"h", 3600}, {"minutes", 60}, {"minute", 60}, {"min", 60}, {"m", 60}, {"seconds", 1}, {"second", 1}, {"sec", 1}, {"s", 1}, {"milliseconds", 1e-3}, {"millisecond", 1e-3}, {"millisec", 1e-3}, {"ms", 1e-3}, {"microseconds", 1e-6}, {"microsecond", 1e-6}, {"microsec", 1e-6}, {"μs", 1e-6}, {"us", 1e-6}, {"nanoseconds", 1e-9}, {"nanosecond", 1e-9}, {"nanosec", 1e-9}, {"ns", 1e-9}, }; /** Prints amount of seconds in form of: * "1 year 2 months 4 weeks 12 days 3 hours 1 minute 33 seconds". * ' ', ';', '-', '+', ',', ':' can be used as separator, eg. "1yr-2mo", "2m:6s" * * valid expressions: * SELECT parseTimeDelta('1 min 35 sec'); * SELECT parseTimeDelta('0m;11.23s.'); * SELECT parseTimeDelta('11hr 25min 3.1s'); * SELECT parseTimeDelta('0.00123 seconds'); * SELECT parseTimeDelta('1yr2mo'); * SELECT parseTimeDelta('11s+22min'); * SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ; 33 seconds'); * * invalid expressions: * SELECT parseTimeDelta(); * SELECT parseTimeDelta('1yr', 1); * SELECT parseTimeDelta(1); * SELECT parseTimeDelta(' '); * SELECT parseTimeDelta('-1yr'); * SELECT parseTimeDelta('1yr-'); * SELECT parseTimeDelta('yr2mo'); * SELECT parseTimeDelta('1.yr2mo'); * SELECT parseTimeDelta('1-yr'); * SELECT parseTimeDelta('1 1yr'); * SELECT parseTimeDelta('1yyr'); * SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ;. 33 seconds'); * * The length of years and months (and even days in presence of time adjustments) are rough: * year is just 365 days, month is 30.5 days, day is 86400 seconds, similarly to what formatReadableTimeDelta is doing. */ class FunctionParseTimeDelta : public IFunction { public: static constexpr auto name = "parseTimeDelta"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return name; } bool isVariadic() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } size_t getNumberOfArguments() const override { return 0; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.empty()) throw Exception( ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()); if (arguments.size() > 1) throw Exception( ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()); const IDataType & type = *arguments[0]; if (!isString(type)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot format {} as time string.", type.getName()); return std::make_shared(); } bool useDefaultImplementationForConstants() const override { return true; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto col_to = ColumnFloat64::create(); auto & res_data = col_to->getData(); for (size_t i = 0; i < input_rows_count; ++i) { std::string_view str{arguments[0].column->getDataAt(i)}; Int64 token_tail = 0; Int64 token_front = 0; Int64 last_pos = str.length() - 1; Float64 result = 0; /// ignore '.' and ' ' at the end of string while (last_pos >= 0 && (str[last_pos] == ' ' || str[last_pos] == '.')) --last_pos; /// no valid characters if (last_pos < 0) { throw Exception( ErrorCodes::BAD_ARGUMENTS, "Invalid expression for function {}, don't find valid characters, str: \"{}\".", getName(), String(str)); } /// last pos character must be character and not be separator or number after ignoring '.' and ' ' if (!isalpha(str[last_pos])) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid expression for function {}, str: \"{}\".", getName(), String(str)); } /// scan spaces at the beginning scanSpaces(str, token_tail, last_pos); token_front = token_tail; while (token_tail <= last_pos) { /// scan unsigned integer if (!scanUnsignedInteger(str, token_tail, last_pos)) { throw Exception( ErrorCodes::BAD_ARGUMENTS, "Invalid expression for function {}, find number failed, str: \"{}\".", getName(), String(str)); } /// if there is a '.', then scan another integer to get a float number if (token_tail <= last_pos && str[token_tail] == '.') { token_tail++; if (!scanUnsignedInteger(str, token_tail, last_pos)) { throw Exception( ErrorCodes::BAD_ARGUMENTS, "Invalid expression for function {}, find number after '.' failed, str: \"{}\".", getName(), String(str)); } } /// convert float/integer string to float Float64 base = 0; std::string_view base_str = str.substr(token_front, token_tail - token_front); auto value = boost::convert(base_str, boost::cnv::strtol()); if (!value.has_value()) { throw Exception( ErrorCodes::BAD_ARGUMENTS, "Invalid expression for function {}, convert string to float64 failed: \"{}\".", getName(), String(base_str)); } base = value.get(); scanSpaces(str, token_tail, last_pos); token_front = token_tail; /// scan a unit if (!scanUnit(str, token_tail, last_pos)) { throw Exception( ErrorCodes::BAD_ARGUMENTS, "Invalid expression for function {}, find unit failed, str: \"{}\".", getName(), String(str)); } /// get unit number std::string_view unit = str.substr(token_front, token_tail - token_front); auto iter = time_unit_to_float.find(unit); if (iter == time_unit_to_float.end()) /// not find unit { throw Exception( ErrorCodes::BAD_ARGUMENTS, "Invalid expression for function {}, parse unit failed: \"{}\".", getName(), unit); } result += base * iter->second; /// scan separator between two tokens scanSeparator(str, token_tail, last_pos); token_front = token_tail; } res_data.emplace_back(result); } return col_to; } /// scan an unsigned integer number static bool scanUnsignedInteger(std::string_view & str, Int64 & index, Int64 last_pos) { int64_t begin_index = index; while (index <= last_pos && isdigit(str[index])) { index++; } return index != begin_index; } /// scan a unit static bool scanUnit(std::string_view & str, Int64 & index, Int64 last_pos) { int64_t begin_index = index; while (index <= last_pos && !isdigit(str[index]) && !isSeparator(str[index])) { index++; } return index != begin_index; } /// scan spaces static void scanSpaces(std::string_view & str, Int64 & index, Int64 last_pos) { while (index <= last_pos && (str[index] == ' ')) { index++; } } /// scan for characters to ignore static void scanSeparator(std::string_view & str, Int64 & index, Int64 last_pos) { /// ignore spaces scanSpaces(str, index, last_pos); /// ignore separator if (index <= last_pos && isSeparator(str[index])) { index++; } scanSpaces(str, index, last_pos); } static bool isSeparator(char symbol) { return symbol == ';' || symbol == '-' || symbol == '+' || symbol == ',' || symbol == ':' || symbol == ' '; } }; } REGISTER_FUNCTION(ParseTimeDelta) { factory.registerFunction(); } }