From a5c8af1e1b625199361088546a39c3e0e83a0313 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 27 Aug 2021 18:36:22 +0200 Subject: [PATCH] Speed up sumIf --- src/AggregateFunctions/AggregateFunctionSum.h | 64 ++++++++++++------- 1 file changed, 42 insertions(+), 22 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 3355cb0d6fc..77539240d97 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -96,8 +96,9 @@ struct AggregateFunctionSumData Impl::add(sum, local_sum); } - template - void NO_SANITIZE_UNDEFINED NO_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count) + template + void NO_SANITIZE_UNDEFINED NO_INLINE + addManyConditional_internal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t count) { const auto * end = ptr + count; @@ -110,10 +111,10 @@ struct AggregateFunctionSumData T local_sum{}; while (ptr < end) { - T multiplier = !*null_map; + T multiplier = !*condition_map == add_if_zero; Impl::add(local_sum, *ptr * multiplier); ++ptr; - ++null_map; + ++condition_map; } Impl::add(sum, local_sum); return; @@ -130,13 +131,13 @@ struct AggregateFunctionSumData { for (size_t i = 0; i < unroll_count; ++i) { - if (!null_map[i]) + if (!condition_map[i] == add_if_zero) { Impl::add(partial_sums[i], ptr[i]); } } ptr += unroll_count; - null_map += unroll_count; + condition_map += unroll_count; } for (size_t i = 0; i < unroll_count; ++i) @@ -146,14 +147,26 @@ struct AggregateFunctionSumData T local_sum{}; while (ptr < end) { - if (!*null_map) + if (!*condition_map == add_if_zero) Impl::add(local_sum, *ptr); ++ptr; - ++null_map; + ++condition_map; } Impl::add(sum, local_sum); } + template + void ALWAYS_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count) + { + return addManyConditional_internal(ptr, null_map, count); + } + + template + void ALWAYS_INLINE addManyConditional(const Value * __restrict ptr, const UInt8 * __restrict cond_map, size_t count) + { + return addManyConditional_internal(ptr, cond_map, count); + } + void NO_SANITIZE_UNDEFINED merge(const AggregateFunctionSumData & rhs) { Impl::add(sum, rhs.sum); @@ -229,8 +242,8 @@ struct AggregateFunctionSumKahanData } } - template - void NO_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count) + template + void NO_INLINE addManyConditional_internal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t count) { constexpr size_t unroll_count = 4; T partial_sums[unroll_count]{}; @@ -242,10 +255,10 @@ struct AggregateFunctionSumKahanData while (ptr < unrolled_end) { for (size_t i = 0; i < unroll_count; ++i) - if (!null_map[i]) + if ((!condition_map[i]) == add_if_zero) addImpl(ptr[i], partial_sums[i], partial_compensations[i]); ptr += unroll_count; - null_map += unroll_count; + condition_map += unroll_count; } for (size_t i = 0; i < unroll_count; ++i) @@ -253,13 +266,25 @@ struct AggregateFunctionSumKahanData while (ptr < end) { - if (!*null_map) + if ((!*condition_map) == add_if_zero) addImpl(*ptr, sum, compensation); ++ptr; - ++null_map; + ++condition_map; } } + template + void ALWAYS_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count) + { + return addManyConditional_internal(ptr, null_map, count); + } + + template + void ALWAYS_INLINE addManyConditional(const Value * __restrict ptr, const UInt8 * __restrict cond_map, size_t count) + { + return addManyConditional_internal(ptr, cond_map, count); + } + void ALWAYS_INLINE mergeImpl(T & to_sum, T & to_compensation, T from_sum, T from_compensation) { auto raw_sum = to_sum + from_sum; @@ -352,22 +377,17 @@ public: this->data(place).add(column.getData()[row_num]); } - /// Vectorized version when there is no GROUP BY keys. void addBatchSinglePlace( - size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos) const override + size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena *, ssize_t if_argument_pos) const override { + const auto & column = assert_cast(*columns[0]); if (if_argument_pos >= 0) { const auto & flags = assert_cast(*columns[if_argument_pos]).getData(); - for (size_t i = 0; i < batch_size; ++i) - { - if (flags[i]) - add(place, columns, i, arena); - } + this->data(place).addManyConditional(column.getData().data(), flags.data(), batch_size); } else { - const auto & column = assert_cast(*columns[0]); this->data(place).addMany(column.getData().data(), batch_size); } }