Merge pull request #59504 from bigo-sg/opt_sum_decimal

Optimize performance of sum/avg conditionally for bigint and big decimal types by reducing branch miss
This commit is contained in:
Raúl Marín 2024-02-20 11:32:02 +01:00 committed by GitHub
commit e09113f10b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 66 additions and 4 deletions

View File

@ -146,9 +146,7 @@ struct AggregateFunctionSumData
size_t count = end - start;
const auto * end_ptr = ptr + count;
if constexpr (
(is_integer<T> && !is_big_int_v<T>)
|| (is_decimal<T> && !std::is_same_v<T, Decimal256> && !std::is_same_v<T, Decimal128>))
if constexpr ((is_integer<T> || is_decimal<T>) && !is_over_big_int<T>)
{
/// For integers we can vectorize the operation if we replace the null check using a multiplication (by 0 for null, 1 for not null)
/// https://quick-bench.com/q/MLTnfTvwC2qZFVeWHfOBR3U7a8I
@ -163,8 +161,39 @@ struct AggregateFunctionSumData
Impl::add(sum, local_sum);
return;
}
else if constexpr (is_over_big_int<T>)
{
/// Use a mask to discard or keep the value to reduce branch miss.
/// Notice that for (U)Int128 or Decimal128, MaskType is Int8 instead of Int64, otherwise extra branches will be introduced by compiler (for unknown reason) and performance will be worse.
using MaskType = std::conditional_t<sizeof(T) == 16, Int8, Int64>;
alignas(64) const MaskType masks[2] = {0, -1};
T local_sum{};
while (ptr < end_ptr)
{
Value v = *ptr;
if constexpr (!add_if_zero)
{
if constexpr (is_integer<T>)
v &= masks[!!*condition_map];
else
v.value &= masks[!!*condition_map];
}
else
{
if constexpr (is_integer<T>)
v &= masks[!*condition_map];
else
v.value &= masks[!*condition_map];
}
if constexpr (std::is_floating_point_v<T>)
Impl::add(local_sum, v);
++ptr;
++condition_map;
}
Impl::add(sum, local_sum);
return;
}
else if constexpr (std::is_floating_point_v<T>)
{
/// For floating point we use a similar trick as above, except that now we reinterpret the floating point number as an unsigned
/// integer of the same size and use a mask instead (0 to discard, 0xFF..FF to keep)

View File

@ -17,6 +17,13 @@
<query>SELECT sumKahan(toNullable(toFloat32(number))) FROM numbers(100000000)</query>
<query>SELECT sumKahan(toNullable(toFloat64(number))) FROM numbers(100000000)</query>
<query>select sumIf(number::Decimal128(3), rand32() % 2 = 0) from numbers(100000000)</query>
<query>select sumIf(number::Decimal256(3), rand32() % 2 = 0) from numbers(100000000)</query>
<query>select sumIf(number::Int128, rand32() % 2 = 0) from numbers(100000000)</query>
<query>select sumIf(number::UInt128, rand32() % 2 = 0) from numbers(100000000)</query>
<query>select sumIf(number::Int256, rand32() % 2 = 0) from numbers(100000000)</query>
<query>select sumIf(number::UInt256, rand32() % 2 = 0) from numbers(100000000)</query>
<!-- Create a table with ~20% null values. Make it random so the branch predictor doesn't do all the work -->
<create_query>CREATE TABLE nullfloat32 (x Nullable(Float32)) ENGINE = Memory</create_query>
<fill_query>INSERT INTO nullfloat32

View File

@ -0,0 +1,12 @@
49500
49500
49500
49500
49500
49500
450000
450000
450000
450000
450000
450000

View File

@ -0,0 +1,14 @@
select sumIf(number::Int128, number % 10 == 0) from numbers(1000);
select sumIf(number::UInt128, number % 10 == 0) from numbers(1000);
select sumIf(number::Int256, number % 10 == 0) from numbers(1000);
select sumIf(number::UInt256, number % 10 == 0) from numbers(1000);
select sumIf(number::Decimal128(3), number % 10 == 0) from numbers(1000);
select sumIf(number::Decimal256(3), number % 10 == 0) from numbers(1000);
-- Test when the condition is neither 0 nor 1
select sumIf(number::Int128, number % 10) from numbers(1000);
select sumIf(number::UInt128, number % 10) from numbers(1000);
select sumIf(number::Int256, number % 10) from numbers(1000);
select sumIf(number::UInt256, number % 10) from numbers(1000);
select sumIf(number::Decimal128(3), number % 10) from numbers(1000);
select sumIf(number::Decimal256(3), number % 10) from numbers(1000);