Add thread pool and cancellation if merge data with key

This patch will add thread pool and cancellation if merge data with key. During the merge, if the data size is large, we may convert the singleLevelHash to twoLevelHash and merge in parallel. Test the patch with 2 x 80 vCPUs, Q8 and Q9 has got 10.3% and 7.6% performance improvement. Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
2024-09-19 16:20:50 +00:00 · 2024-08-06 19:05:12 +08:00 · 2024-08-06 19:05:12 +08:00 · cb9039e91d
commit cb9039e91d
parent 452d463d77
6 changed files with 36 additions and 15 deletions
--- a/src/AggregateFunctions/AggregateFunctionUniq.h
+++ b/src/AggregateFunctions/AggregateFunctionUniq.h
@ -459,6 +459,8 @@ public:

    bool isParallelizeMergePrepareNeeded() const override { return is_parallelize_merge_prepare_needed; }

+    constexpr static bool parallelizeMergeWithKey() { return true; }
+
    void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled) const override
    {
        if constexpr (is_parallelize_merge_prepare_needed)
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@ -145,6 +145,8 @@ public:

    virtual bool isParallelizeMergePrepareNeeded() const { return false; }

+    constexpr static bool parallelizeMergeWithKey() { return false; }
+
    virtual void parallelizeMergePrepare(AggregateDataPtrs & /*places*/, ThreadPool & /*thread_pool*/, std::atomic<bool> & /*is_cancelled*/) const
    {
        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "parallelizeMergePrepare() with thread pool parameter isn't implemented for {} ", getName());
@ -169,7 +171,7 @@ public:

    /// Merges states (on which src places points to) with other states (on which dst places points to) of current aggregation function
    /// then destroy states (on which src places points to).
-    virtual void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * src_places, size_t size, size_t offset, Arena * arena) const = 0;
+    virtual void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * src_places, size_t size, size_t offset, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled, Arena * arena) const = 0;

    /// Serializes state (to transmit it over the network, for example).
    virtual void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version = std::nullopt) const = 0; /// NOLINT
@ -499,11 +501,15 @@ public:
                static_cast<const Derived *>(this)->merge(places[i] + place_offset, rhs[i], arena);
    }

-    void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * rhs_places, size_t size, size_t offset, Arena * arena) const override
+    void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * rhs_places, size_t size, size_t offset, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled, Arena * arena) const override
    {
        for (size_t i = 0; i < size; ++i)
        {
-            static_cast<const Derived *>(this)->merge(dst_places[i] + offset, rhs_places[i] + offset, arena);
+            if constexpr (Derived::parallelizeMergeWithKey())
+                static_cast<const Derived *>(this)->merge(dst_places[i] + offset, rhs_places[i] + offset, thread_pool, is_cancelled, arena);
+            else
+                static_cast<const Derived *>(this)->merge(dst_places[i] + offset, rhs_places[i] + offset, arena);
+
            static_cast<const Derived *>(this)->destroy(rhs_places[i] + offset);
        }
    }
--- a/src/AggregateFunctions/UniqExactSet.h
+++ b/src/AggregateFunctions/UniqExactSet.h
@ -101,6 +101,13 @@ public:

    auto merge(const UniqExactSet & other, ThreadPool * thread_pool = nullptr, std::atomic<bool> * is_cancelled = nullptr)
    {
+        /// If the size is large, we may convert the singleLevelHash to twoLevelHash and merge in parallel.
+        if (other.size() > 40000)
+        {
+            if (isSingleLevel())
+                convertToTwoLevel();
+        }
+
        if (isSingleLevel() && other.isTwoLevel())
            convertToTwoLevel();

--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@ -2371,7 +2371,7 @@ void NO_INLINE Aggregator::mergeDataNullKey(

 template <typename Method, typename Table>
 void NO_INLINE Aggregator::mergeDataImpl(
-    Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions [[maybe_unused]], bool prefetch) const
+    Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions [[maybe_unused]], bool prefetch, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled) const
 {
    if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
        mergeDataNullKey<Method, Table>(table_dst, table_src, arena);
@ -2410,7 +2410,7 @@ void NO_INLINE Aggregator::mergeDataImpl(
        {
            if (!is_aggregate_function_compiled[i])
                aggregate_functions[i]->mergeAndDestroyBatch(
-                    dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], arena);
+                    dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], thread_pool, is_cancelled, arena);
        }

        return;
@ -2420,7 +2420,7 @@ void NO_INLINE Aggregator::mergeDataImpl(
    for (size_t i = 0; i < params.aggregates_size; ++i)
    {
        aggregate_functions[i]->mergeAndDestroyBatch(
-            dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], arena);
+            dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], thread_pool, is_cancelled, arena);
    }
 }

@ -2535,8 +2535,10 @@ void NO_INLINE Aggregator::mergeWithoutKeyDataImpl(

 template <typename Method>
 void NO_INLINE Aggregator::mergeSingleLevelDataImpl(
-    ManyAggregatedDataVariants & non_empty_data) const
+    ManyAggregatedDataVariants & non_empty_data, std::atomic<bool> & is_cancelled) const
 {
+    ThreadPool thread_pool{CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, CurrentMetrics::AggregatorThreadsScheduled, params.max_threads};
+
    AggregatedDataVariantsPtr & res = non_empty_data[0];
    bool no_more_keys = false;

@ -2557,13 +2559,13 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl(
            if (compiled_aggregate_functions_holder)
            {
                mergeDataImpl<Method>(
-                    getDataVariant<Method>(*res).data, getDataVariant<Method>(current).data, res->aggregates_pool, true, prefetch);
+                    getDataVariant<Method>(*res).data, getDataVariant<Method>(current).data, res->aggregates_pool, true, prefetch, thread_pool, is_cancelled);
            }
            else
 #endif
            {
                mergeDataImpl<Method>(
-                    getDataVariant<Method>(*res).data, getDataVariant<Method>(current).data, res->aggregates_pool, false, prefetch);
+                    getDataVariant<Method>(*res).data, getDataVariant<Method>(current).data, res->aggregates_pool, false, prefetch, thread_pool, is_cancelled);
            }
        }
        else if (res->without_key)
@ -2589,7 +2591,7 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl(

 #define M(NAME) \
    template void NO_INLINE Aggregator::mergeSingleLevelDataImpl<decltype(AggregatedDataVariants::NAME)::element_type>( \
-        ManyAggregatedDataVariants & non_empty_data) const;
+        ManyAggregatedDataVariants & non_empty_data, std::atomic<bool> & is_cancelled) const;
    APPLY_FOR_VARIANTS_SINGLE_LEVEL(M)
 #undef M

@ -2597,6 +2599,8 @@ template <typename Method>
 void NO_INLINE Aggregator::mergeBucketImpl(
    ManyAggregatedDataVariants & data, Int32 bucket, Arena * arena, std::atomic<bool> & is_cancelled) const
 {
+    ThreadPool thread_pool{CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, CurrentMetrics::AggregatorThreadsScheduled, params.max_threads};
+
    /// We merge all aggregation results to the first.
    AggregatedDataVariantsPtr & res = data[0];

@ -2613,7 +2617,7 @@ void NO_INLINE Aggregator::mergeBucketImpl(
        if (compiled_aggregate_functions_holder)
        {
            mergeDataImpl<Method>(
-                getDataVariant<Method>(*res).data.impls[bucket], getDataVariant<Method>(current).data.impls[bucket], arena, true, prefetch);
+                getDataVariant<Method>(*res).data.impls[bucket], getDataVariant<Method>(current).data.impls[bucket], arena, true, prefetch, thread_pool, is_cancelled);
        }
        else
 #endif
@ -2623,7 +2627,9 @@ void NO_INLINE Aggregator::mergeBucketImpl(
                getDataVariant<Method>(current).data.impls[bucket],
                arena,
                false,
-                prefetch);
+                prefetch,
+                thread_pool,
+                is_cancelled);
        }
    }
 }
--- a/src/Interpreters/Aggregator.h
+++ b/src/Interpreters/Aggregator.h
@ -455,7 +455,7 @@ private:

    /// Merge data from hash table `src` into `dst`.
    template <typename Method, typename Table>
-    void mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions, bool prefetch) const;
+    void mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions, bool prefetch, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled) const;

    /// Merge data from hash table `src` into `dst`, but only for keys that already exist in dst. In other cases, merge the data into `overflows`.
    template <typename Method, typename Table>
@ -478,7 +478,7 @@ private:

    template <typename Method>
    void mergeSingleLevelDataImpl(
-        ManyAggregatedDataVariants & non_empty_data) const;
+        ManyAggregatedDataVariants & non_empty_data, std::atomic<bool> & is_cancelled) const;

    template <bool return_single_block>
    using ConvertToBlockRes = std::conditional_t<return_single_block, Block, BlocksList>;
--- a/src/Processors/Transforms/AggregatingTransform.cpp
+++ b/src/Processors/Transforms/AggregatingTransform.cpp
@ -495,7 +495,7 @@ private:

    #define M(NAME) \
                else if (first->type == AggregatedDataVariants::Type::NAME) \
-                    params->aggregator.mergeSingleLevelDataImpl<decltype(first->NAME)::element_type>(*data);
+                    params->aggregator.mergeSingleLevelDataImpl<decltype(first->NAME)::element_type>(*data, shared_data->is_cancelled);
        if (false) {} // NOLINT
        APPLY_FOR_VARIANTS_SINGLE_LEVEL(M)
    #undef M