Merge pull request #59504 from bigo-sg/opt_sum_decimal

Optimize performance of sum/avg conditionally for bigint and big decimal types by reducing branch miss
2024-09-20 00:30:49 +00:00 · 2024-02-20 11:32:02 +01:00 · 2024-02-20 11:32:02 +01:00 · e09113f10b
commit e09113f10b
parent 7e11fc79d9 2d6b4b400c
4 changed files with 66 additions and 4 deletions
--- a/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/src/AggregateFunctions/AggregateFunctionSum.h
@ -146,9 +146,7 @@ struct AggregateFunctionSumData
        size_t count = end - start;
        const auto * end_ptr = ptr + count;

-        if constexpr (
-            (is_integer<T> && !is_big_int_v<T>)
-            || (is_decimal<T> && !std::is_same_v<T, Decimal256> && !std::is_same_v<T, Decimal128>))
+        if constexpr ((is_integer<T> || is_decimal<T>) && !is_over_big_int<T>)
        {
            /// For integers we can vectorize the operation if we replace the null check using a multiplication (by 0 for null, 1 for not null)
            /// https://quick-bench.com/q/MLTnfTvwC2qZFVeWHfOBR3U7a8I
@ -163,8 +161,39 @@ struct AggregateFunctionSumData
            Impl::add(sum, local_sum);
            return;
        }
+        else if constexpr (is_over_big_int<T>)
+        {
+            /// Use a mask to discard or keep the value to reduce branch miss.
+            /// Notice that for (U)Int128 or Decimal128, MaskType is Int8 instead of Int64, otherwise extra branches will be introduced by compiler (for unknown reason) and performance will be worse.
+            using MaskType = std::conditional_t<sizeof(T) == 16, Int8, Int64>;
+            alignas(64) const MaskType masks[2] = {0, -1};
+            T local_sum{};
+            while (ptr < end_ptr)
+            {
+                Value v = *ptr;
+                if constexpr (!add_if_zero)
+                {
+                    if constexpr (is_integer<T>)
+                        v &= masks[!!*condition_map];
+                    else
+                        v.value &= masks[!!*condition_map];
+                }
+                else
+                {
+                    if constexpr (is_integer<T>)
+                        v &= masks[!*condition_map];
+                    else
+                        v.value &= masks[!*condition_map];
+                }

-        if constexpr (std::is_floating_point_v<T>)
+                Impl::add(local_sum, v);
+                ++ptr;
+                ++condition_map;
+            }
+            Impl::add(sum, local_sum);
+            return;
+        }
+        else if constexpr (std::is_floating_point_v<T>)
        {
            /// For floating point we use a similar trick as above, except that now we  reinterpret the floating point number as an unsigned
            /// integer of the same size and use a mask instead (0 to discard, 0xFF..FF to keep)
--- a/tests/performance/sum.xml
+++ b/tests/performance/sum.xml
@ -17,6 +17,13 @@
    <query>SELECT sumKahan(toNullable(toFloat32(number))) FROM numbers(100000000)</query>
    <query>SELECT sumKahan(toNullable(toFloat64(number))) FROM numbers(100000000)</query>

+    <query>select sumIf(number::Decimal128(3), rand32() % 2 = 0) from numbers(100000000)</query>
+    <query>select sumIf(number::Decimal256(3), rand32() % 2 = 0) from numbers(100000000)</query>
+    <query>select sumIf(number::Int128, rand32() % 2 = 0) from numbers(100000000)</query>
+    <query>select sumIf(number::UInt128, rand32() % 2 = 0) from numbers(100000000)</query>
+    <query>select sumIf(number::Int256, rand32() % 2 = 0) from numbers(100000000)</query>
+    <query>select sumIf(number::UInt256, rand32() % 2 = 0) from numbers(100000000)</query>
+
    <!-- Create a table with ~20% null values. Make it random so the branch predictor doesn't do all the work -->
    <create_query>CREATE TABLE nullfloat32 (x Nullable(Float32)) ENGINE = Memory</create_query>
    <fill_query>INSERT INTO nullfloat32
--- a/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference
+++ b/tests/queries/0_stateless/02985_if_over_big_int_decimal.reference
@ -0,0 +1,12 @@
+49500
+49500
+49500
+49500
+49500
+49500
+450000
+450000
+450000
+450000
+450000
+450000
--- a/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql
+++ b/tests/queries/0_stateless/02985_if_over_big_int_decimal.sql
@ -0,0 +1,14 @@
+select sumIf(number::Int128, number % 10 == 0) from numbers(1000);
+select sumIf(number::UInt128, number % 10 == 0) from numbers(1000);
+select sumIf(number::Int256, number % 10 == 0) from numbers(1000);
+select sumIf(number::UInt256, number % 10 == 0) from numbers(1000);
+select sumIf(number::Decimal128(3), number % 10 == 0) from numbers(1000);
+select sumIf(number::Decimal256(3), number % 10 == 0) from numbers(1000);
+
+-- Test when the condition is neither 0 nor 1
+select sumIf(number::Int128, number % 10) from numbers(1000);
+select sumIf(number::UInt128, number % 10) from numbers(1000);
+select sumIf(number::Int256, number % 10) from numbers(1000);
+select sumIf(number::UInt256, number % 10) from numbers(1000);
+select sumIf(number::Decimal128(3), number % 10) from numbers(1000);
+select sumIf(number::Decimal256(3), number % 10) from numbers(1000);