bitCount support FixedString data type

2024-11-27 10:02:01 +00:00 · 2023-04-22 14:17:31 +00:00 · 2023-04-22 14:17:31 +00:00 · 18672c2d4b
commit 18672c2d4b
parent 0fbc9585f1
9 changed files with 93 additions and 19 deletions
--- a/src/Functions/FunctionUnaryArithmetic.h
+++ b/src/Functions/FunctionUnaryArithmetic.h
@ -130,6 +130,48 @@ struct FixedStringUnaryOperationImpl
    }
 };

+template <typename Op>
+struct FixedStringUnaryOperationReduceImpl
+{
+    MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(
+        MULTITARGET_FUNCTION_HEADER(static UInt64 NO_INLINE),
+        vectorImpl,
+        MULTITARGET_FUNCTION_BODY((const UInt8 * start, const UInt8 * end) { /// NOLINT
+            UInt64 res = 0;
+            while (start < end)
+                res += Op::apply(*start++);
+            return res;
+        }))
+
+    static UInt64 NO_INLINE vector(const UInt8 * start, const UInt8 * end)
+    {
+#if USE_MULTITARGET_CODE
+        if (isArchSupported(TargetArch::AVX512BW))
+        {
+            return vectorImplAVX512BW(start, end);
+        }
+
+        if (isArchSupported(TargetArch::AVX512F))
+        {
+            return vectorImplAVX512F(start, end);
+        }
+
+        if (isArchSupported(TargetArch::AVX2))
+        {
+            return vectorImplAVX2(start, end);
+        }
+
+        if (isArchSupported(TargetArch::SSE42))
+        {
+            return vectorImplSSE42(start, end);
+        }
+#endif
+
+        return vectorImpl(start, end);
+    }
+};
+
+

 template <typename FunctionName>
 struct FunctionUnaryArithmeticMonotonicity;
@ -143,6 +185,7 @@ class FunctionUnaryArithmetic : public IFunction
 {
    static constexpr bool allow_decimal = IsUnaryOperation<Op>::negate || IsUnaryOperation<Op>::abs || IsUnaryOperation<Op>::sign;
    static constexpr bool allow_fixed_string = Op<UInt8>::allow_fixed_string;
+    static constexpr bool reduce_fixed_string_for_chars = allow_fixed_string && Op<UInt8>::reduce_fixed_string_for_chars;
    static constexpr bool is_sign_function = IsUnaryOperation<Op>::sign;

    ContextPtr context;
@ -232,10 +275,19 @@ public:
            using DataType = std::decay_t<decltype(type)>;
            if constexpr (std::is_same_v<DataTypeFixedString, DataType>)
            {
-                if constexpr (!Op<DataTypeFixedString>::allow_fixed_string)
+                if constexpr (!allow_fixed_string)
                    return false;
+                /// For `bitCount`, when argument is FixedString, it's return type
+                /// should be integer instead of FixedString, the return value is
+                /// the sum of `bitCount` apply to each chars.
+                else
+                {
+                    if constexpr (reduce_fixed_string_for_chars)
+                        result = std::make_shared<DataTypeUInt64>();
+                    else
                        result = std::make_shared<DataType>(type.getN());
                }
+            }
            else if constexpr (std::is_same_v<DataTypeInterval, DataType>)
            {
                if constexpr (!IsUnaryOperation<Op>::negate)
@ -281,6 +333,25 @@ public:
                if constexpr (allow_fixed_string)
                {
                    if (const auto * col = checkAndGetColumn<ColumnFixedString>(arguments[0].column.get()))
+                    {
+                        if constexpr (reduce_fixed_string_for_chars)
+                        {
+                            auto size = col->size();
+
+                            auto col_res = ColumnUInt64::create(size);
+                            auto & vec_res = col_res->getData();
+
+                            const auto & chars = col->getChars();
+                            auto n = col->getN();
+                            for (size_t i = 0; i < size; ++i)
+                            {
+                                vec_res[i] = FixedStringUnaryOperationReduceImpl<Op<UInt8>>::vector(
+                                    chars.data() + n * i, chars.data() + n * (i + 1));
+                            }
+                            result_column = std::move(col_res);
+                            return true;
+                        }
+                        else
                        {
                            auto col_res = ColumnFixedString::create(col->getN());
                            auto & vec_res = col_res->getChars();
@ -291,6 +362,7 @@ public:
                        }
                    }
                }
+            }
            else if constexpr (IsDataTypeDecimal<DataType>)
            {
                using T0 = typename DataType::FieldType;
--- a/src/Functions/bitAnd.cpp
+++ b/src/Functions/bitAnd.cpp
@ -16,8 +16,8 @@ template <typename A, typename B>
 struct BitAndImpl
 {
    using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
-    static constexpr const bool allow_fixed_string = true;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr bool allow_fixed_string = true;
+    static constexpr bool allow_string_integer = false;

    template <typename Result = ResultType>
    static inline Result apply(A a, B b)
--- a/src/Functions/bitCount.cpp
+++ b/src/Functions/bitCount.cpp
@ -13,8 +13,8 @@ template <typename A>
 struct BitCountImpl
 {
    using ResultType = UInt8;
-    static constexpr bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr bool allow_fixed_string = true;
+    static constexpr bool reduce_fixed_string_for_chars = true;

    static inline ResultType apply(A a)
    {
--- a/src/Functions/bitHammingDistance.cpp
+++ b/src/Functions/bitHammingDistance.cpp
@ -8,8 +8,8 @@ template <typename A, typename B>
 struct BitHammingDistanceImpl
 {
    using ResultType = UInt8;
-    static const constexpr bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr bool allow_fixed_string = true;
+    static constexpr bool allow_string_integer = false;

    template <typename Result = ResultType>
    static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
--- a/src/Functions/bitNot.cpp
+++ b/src/Functions/bitNot.cpp
@ -17,8 +17,8 @@ template <typename A>
 struct BitNotImpl
 {
    using ResultType = typename NumberTraits::ResultOfBitNot<A>::Type;
-    static const constexpr bool allow_fixed_string = true;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr bool allow_fixed_string = true;
+    static constexpr bool reduce_fixed_string_for_chars = false;

    static inline ResultType apply(A a)
    {
--- a/src/Functions/bitOr.cpp
+++ b/src/Functions/bitOr.cpp
@ -15,8 +15,8 @@ template <typename A, typename B>
 struct BitOrImpl
 {
    using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
-    static constexpr const bool allow_fixed_string = true;
-    static const constexpr bool allow_string_integer = false;
+    static constexpr bool allow_fixed_string = true;
+    static constexpr bool allow_string_integer = false;

    template <typename Result = ResultType>
    static inline Result apply(A a, B b)
--- a/src/Functions/factorial.cpp
+++ b/src/Functions/factorial.cpp
@ -18,7 +18,6 @@ struct FactorialImpl
    using ResultType = UInt64;
    static const constexpr bool allow_decimal = false;
    static const constexpr bool allow_fixed_string = false;
-    static const constexpr bool allow_string_integer = false;

    static inline NO_SANITIZE_UNDEFINED ResultType apply(A a)
    {
--- a/tests/queries/0_stateless/01066_bit_count.reference
+++ b/tests/queries/0_stateless/01066_bit_count.reference
@ -19,3 +19,4 @@
 1	10	000000000000F03F
 -1	11	000000000000F0BF
 inf	11	000000000000F07F
+Hello, world!!!!	55
--- a/tests/queries/0_stateless/01066_bit_count.sql
+++ b/tests/queries/0_stateless/01066_bit_count.sql
@ -11,3 +11,5 @@ SELECT bitCount(toInt16(-1));
 SELECT bitCount(toInt8(-1));

 SELECT x, bitCount(x), hex(reinterpretAsString(x)) FROM VALUES ('x Float64', (1), (-1), (inf));
+
+SELECT toFixedString('Hello, world!!!!', 16) AS x, bitCount(x);