Speed up sumIf

This commit is contained in:
Raúl Marín 2021-08-27 18:36:22 +02:00
parent af709ab9a0
commit a5c8af1e1b

View File

@ -96,8 +96,9 @@ struct AggregateFunctionSumData
Impl::add(sum, local_sum);
}
template <typename Value>
void NO_SANITIZE_UNDEFINED NO_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
template <typename Value, bool add_if_zero>
void NO_SANITIZE_UNDEFINED NO_INLINE
addManyConditional_internal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t count)
{
const auto * end = ptr + count;
@ -110,10 +111,10 @@ struct AggregateFunctionSumData
T local_sum{};
while (ptr < end)
{
T multiplier = !*null_map;
T multiplier = !*condition_map == add_if_zero;
Impl::add(local_sum, *ptr * multiplier);
++ptr;
++null_map;
++condition_map;
}
Impl::add(sum, local_sum);
return;
@ -130,13 +131,13 @@ struct AggregateFunctionSumData
{
for (size_t i = 0; i < unroll_count; ++i)
{
if (!null_map[i])
if (!condition_map[i] == add_if_zero)
{
Impl::add(partial_sums[i], ptr[i]);
}
}
ptr += unroll_count;
null_map += unroll_count;
condition_map += unroll_count;
}
for (size_t i = 0; i < unroll_count; ++i)
@ -146,14 +147,26 @@ struct AggregateFunctionSumData
T local_sum{};
while (ptr < end)
{
if (!*null_map)
if (!*condition_map == add_if_zero)
Impl::add(local_sum, *ptr);
++ptr;
++null_map;
++condition_map;
}
Impl::add(sum, local_sum);
}
template <typename Value>
void ALWAYS_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
{
return addManyConditional_internal<Value, true>(ptr, null_map, count);
}
template <typename Value>
void ALWAYS_INLINE addManyConditional(const Value * __restrict ptr, const UInt8 * __restrict cond_map, size_t count)
{
return addManyConditional_internal<Value, false>(ptr, cond_map, count);
}
void NO_SANITIZE_UNDEFINED merge(const AggregateFunctionSumData & rhs)
{
Impl::add(sum, rhs.sum);
@ -229,8 +242,8 @@ struct AggregateFunctionSumKahanData
}
}
template <typename Value>
void NO_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
template <typename Value, bool add_if_zero>
void NO_INLINE addManyConditional_internal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t count)
{
constexpr size_t unroll_count = 4;
T partial_sums[unroll_count]{};
@ -242,10 +255,10 @@ struct AggregateFunctionSumKahanData
while (ptr < unrolled_end)
{
for (size_t i = 0; i < unroll_count; ++i)
if (!null_map[i])
if ((!condition_map[i]) == add_if_zero)
addImpl(ptr[i], partial_sums[i], partial_compensations[i]);
ptr += unroll_count;
null_map += unroll_count;
condition_map += unroll_count;
}
for (size_t i = 0; i < unroll_count; ++i)
@ -253,13 +266,25 @@ struct AggregateFunctionSumKahanData
while (ptr < end)
{
if (!*null_map)
if ((!*condition_map) == add_if_zero)
addImpl(*ptr, sum, compensation);
++ptr;
++null_map;
++condition_map;
}
}
template <typename Value>
void ALWAYS_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
{
return addManyConditional_internal<Value, true>(ptr, null_map, count);
}
template <typename Value>
void ALWAYS_INLINE addManyConditional(const Value * __restrict ptr, const UInt8 * __restrict cond_map, size_t count)
{
return addManyConditional_internal<Value, false>(ptr, cond_map, count);
}
void ALWAYS_INLINE mergeImpl(T & to_sum, T & to_compensation, T from_sum, T from_compensation)
{
auto raw_sum = to_sum + from_sum;
@ -352,22 +377,17 @@ public:
this->data(place).add(column.getData()[row_num]);
}
/// Vectorized version when there is no GROUP BY keys.
void addBatchSinglePlace(
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos) const override
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena *, ssize_t if_argument_pos) const override
{
const auto & column = assert_cast<const ColVecType &>(*columns[0]);
if (if_argument_pos >= 0)
{
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
for (size_t i = 0; i < batch_size; ++i)
{
if (flags[i])
add(place, columns, i, arena);
}
this->data(place).addManyConditional(column.getData().data(), flags.data(), batch_size);
}
else
{
const auto & column = assert_cast<const ColVecType &>(*columns[0]);
this->data(place).addMany(column.getData().data(), batch_size);
}
}