#include #include #include #include namespace DB { namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; } /** If the string is UTF-8 encoded text, it returns the length of the text in code points. * (not in characters: the length of the text "ё" can be either 1 or 2, depending on the normalization) * (not in characters: the length of the text "" can be either 1 or 2, depending on the normalization) * Otherwise, the behavior is undefined. */ struct LengthUTF8Impl { static constexpr auto is_fixed_to_constant = false; static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) { size_t size = offsets.size(); ColumnString::Offset prev_offset = 0; for (size_t i = 0; i < size; ++i) { res[i] = UTF8::countCodePoints(&data[prev_offset], offsets[i] - prev_offset - 1); prev_offset = offsets[i]; } } static void vectorFixedToConstant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt64 & /*res*/) { } static void vectorFixedToVector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) { size_t size = data.size() / n; for (size_t i = 0; i < size; ++i) { res[i] = UTF8::countCodePoints(&data[i * n], n); } } [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &) { throw Exception("Cannot apply function lengthUTF8 to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } }; struct NameLengthUTF8 { static constexpr auto name = "lengthUTF8"; }; using FunctionLengthUTF8 = FunctionStringOrArrayToT; void registerFunctionLengthUTF8(FunctionFactory & factory) { factory.registerFunction(); /// Compatibility aliases. factory.registerFunction("CHAR_LENGTH", FunctionFactory::CaseInsensitive); factory.registerFunction("CHARACTER_LENGTH", FunctionFactory::CaseInsensitive); } }