#include #include #include #include namespace DB { namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; } namespace { /** If the string is UTF-8 encoded text, it returns the length of the text in code points. * (not in characters: the length of the text "ё" can be either 1 or 2, depending on the normalization) * (not in characters: the length of the text "" can be either 1 or 2, depending on the normalization) * Otherwise, the behavior is undefined. */ struct LengthUTF8Impl { static constexpr auto is_fixed_to_constant = false; static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) { size_t size = offsets.size(); ColumnString::Offset prev_offset = 0; for (size_t i = 0; i < size; ++i) { res[i] = UTF8::countCodePoints(&data[prev_offset], offsets[i] - prev_offset - 1); prev_offset = offsets[i]; } } static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt64 & /*res*/) { } static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) { size_t size = data.size() / n; for (size_t i = 0; i < size; ++i) { res[i] = UTF8::countCodePoints(&data[i * n], n); } } [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to Array argument"); } [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to UUID argument"); } [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to IPv6 argument"); } [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to IPv4 argument"); } }; struct NameLengthUTF8 { static constexpr auto name = "lengthUTF8"; }; using FunctionLengthUTF8 = FunctionStringOrArrayToT; } REGISTER_FUNCTION(LengthUTF8) { factory.registerFunction(); /// Compatibility aliases. factory.registerAlias("CHAR_LENGTH", "lengthUTF8", FunctionFactory::CaseInsensitive); factory.registerAlias("CHARACTER_LENGTH", "lengthUTF8", FunctionFactory::CaseInsensitive); } }