mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Merge pull request #68441 from jiebinn/parallelMergeWithKey
Add thread pool and cancellation to support parallel merge with key
This commit is contained in:
commit
57e3812087
@ -459,6 +459,8 @@ public:
|
|||||||
|
|
||||||
bool isParallelizeMergePrepareNeeded() const override { return is_parallelize_merge_prepare_needed; }
|
bool isParallelizeMergePrepareNeeded() const override { return is_parallelize_merge_prepare_needed; }
|
||||||
|
|
||||||
|
constexpr static bool parallelizeMergeWithKey() { return true; }
|
||||||
|
|
||||||
void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled) const override
|
void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled) const override
|
||||||
{
|
{
|
||||||
if constexpr (is_parallelize_merge_prepare_needed)
|
if constexpr (is_parallelize_merge_prepare_needed)
|
||||||
|
@ -145,6 +145,8 @@ public:
|
|||||||
|
|
||||||
virtual bool isParallelizeMergePrepareNeeded() const { return false; }
|
virtual bool isParallelizeMergePrepareNeeded() const { return false; }
|
||||||
|
|
||||||
|
constexpr static bool parallelizeMergeWithKey() { return false; }
|
||||||
|
|
||||||
virtual void parallelizeMergePrepare(AggregateDataPtrs & /*places*/, ThreadPool & /*thread_pool*/, std::atomic<bool> & /*is_cancelled*/) const
|
virtual void parallelizeMergePrepare(AggregateDataPtrs & /*places*/, ThreadPool & /*thread_pool*/, std::atomic<bool> & /*is_cancelled*/) const
|
||||||
{
|
{
|
||||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "parallelizeMergePrepare() with thread pool parameter isn't implemented for {} ", getName());
|
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "parallelizeMergePrepare() with thread pool parameter isn't implemented for {} ", getName());
|
||||||
@ -169,7 +171,7 @@ public:
|
|||||||
|
|
||||||
/// Merges states (on which src places points to) with other states (on which dst places points to) of current aggregation function
|
/// Merges states (on which src places points to) with other states (on which dst places points to) of current aggregation function
|
||||||
/// then destroy states (on which src places points to).
|
/// then destroy states (on which src places points to).
|
||||||
virtual void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * src_places, size_t size, size_t offset, Arena * arena) const = 0;
|
virtual void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * src_places, size_t size, size_t offset, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled, Arena * arena) const = 0;
|
||||||
|
|
||||||
/// Serializes state (to transmit it over the network, for example).
|
/// Serializes state (to transmit it over the network, for example).
|
||||||
virtual void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version = std::nullopt) const = 0; /// NOLINT
|
virtual void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> version = std::nullopt) const = 0; /// NOLINT
|
||||||
@ -499,11 +501,15 @@ public:
|
|||||||
static_cast<const Derived *>(this)->merge(places[i] + place_offset, rhs[i], arena);
|
static_cast<const Derived *>(this)->merge(places[i] + place_offset, rhs[i], arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * rhs_places, size_t size, size_t offset, Arena * arena) const override
|
void mergeAndDestroyBatch(AggregateDataPtr * dst_places, AggregateDataPtr * rhs_places, size_t size, size_t offset, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled, Arena * arena) const override
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < size; ++i)
|
for (size_t i = 0; i < size; ++i)
|
||||||
{
|
{
|
||||||
static_cast<const Derived *>(this)->merge(dst_places[i] + offset, rhs_places[i] + offset, arena);
|
if constexpr (Derived::parallelizeMergeWithKey())
|
||||||
|
static_cast<const Derived *>(this)->merge(dst_places[i] + offset, rhs_places[i] + offset, thread_pool, is_cancelled, arena);
|
||||||
|
else
|
||||||
|
static_cast<const Derived *>(this)->merge(dst_places[i] + offset, rhs_places[i] + offset, arena);
|
||||||
|
|
||||||
static_cast<const Derived *>(this)->destroy(rhs_places[i] + offset);
|
static_cast<const Derived *>(this)->destroy(rhs_places[i] + offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -101,6 +101,13 @@ public:
|
|||||||
|
|
||||||
auto merge(const UniqExactSet & other, ThreadPool * thread_pool = nullptr, std::atomic<bool> * is_cancelled = nullptr)
|
auto merge(const UniqExactSet & other, ThreadPool * thread_pool = nullptr, std::atomic<bool> * is_cancelled = nullptr)
|
||||||
{
|
{
|
||||||
|
/// If the size is large, we may convert the singleLevelHash to twoLevelHash and merge in parallel.
|
||||||
|
if (other.size() > 40000)
|
||||||
|
{
|
||||||
|
if (isSingleLevel())
|
||||||
|
convertToTwoLevel();
|
||||||
|
}
|
||||||
|
|
||||||
if (isSingleLevel() && other.isTwoLevel())
|
if (isSingleLevel() && other.isTwoLevel())
|
||||||
convertToTwoLevel();
|
convertToTwoLevel();
|
||||||
|
|
||||||
|
@ -2371,7 +2371,7 @@ void NO_INLINE Aggregator::mergeDataNullKey(
|
|||||||
|
|
||||||
template <typename Method, typename Table>
|
template <typename Method, typename Table>
|
||||||
void NO_INLINE Aggregator::mergeDataImpl(
|
void NO_INLINE Aggregator::mergeDataImpl(
|
||||||
Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions [[maybe_unused]], bool prefetch) const
|
Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions [[maybe_unused]], bool prefetch, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled) const
|
||||||
{
|
{
|
||||||
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
|
if constexpr (Method::low_cardinality_optimization || Method::one_key_nullable_optimization)
|
||||||
mergeDataNullKey<Method, Table>(table_dst, table_src, arena);
|
mergeDataNullKey<Method, Table>(table_dst, table_src, arena);
|
||||||
@ -2410,7 +2410,7 @@ void NO_INLINE Aggregator::mergeDataImpl(
|
|||||||
{
|
{
|
||||||
if (!is_aggregate_function_compiled[i])
|
if (!is_aggregate_function_compiled[i])
|
||||||
aggregate_functions[i]->mergeAndDestroyBatch(
|
aggregate_functions[i]->mergeAndDestroyBatch(
|
||||||
dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], arena);
|
dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], thread_pool, is_cancelled, arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
@ -2420,7 +2420,7 @@ void NO_INLINE Aggregator::mergeDataImpl(
|
|||||||
for (size_t i = 0; i < params.aggregates_size; ++i)
|
for (size_t i = 0; i < params.aggregates_size; ++i)
|
||||||
{
|
{
|
||||||
aggregate_functions[i]->mergeAndDestroyBatch(
|
aggregate_functions[i]->mergeAndDestroyBatch(
|
||||||
dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], arena);
|
dst_places.data(), src_places.data(), dst_places.size(), offsets_of_aggregate_states[i], thread_pool, is_cancelled, arena);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2535,8 +2535,10 @@ void NO_INLINE Aggregator::mergeWithoutKeyDataImpl(
|
|||||||
|
|
||||||
template <typename Method>
|
template <typename Method>
|
||||||
void NO_INLINE Aggregator::mergeSingleLevelDataImpl(
|
void NO_INLINE Aggregator::mergeSingleLevelDataImpl(
|
||||||
ManyAggregatedDataVariants & non_empty_data) const
|
ManyAggregatedDataVariants & non_empty_data, std::atomic<bool> & is_cancelled) const
|
||||||
{
|
{
|
||||||
|
ThreadPool thread_pool{CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, CurrentMetrics::AggregatorThreadsScheduled, params.max_threads};
|
||||||
|
|
||||||
AggregatedDataVariantsPtr & res = non_empty_data[0];
|
AggregatedDataVariantsPtr & res = non_empty_data[0];
|
||||||
bool no_more_keys = false;
|
bool no_more_keys = false;
|
||||||
|
|
||||||
@ -2557,13 +2559,13 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl(
|
|||||||
if (compiled_aggregate_functions_holder)
|
if (compiled_aggregate_functions_holder)
|
||||||
{
|
{
|
||||||
mergeDataImpl<Method>(
|
mergeDataImpl<Method>(
|
||||||
getDataVariant<Method>(*res).data, getDataVariant<Method>(current).data, res->aggregates_pool, true, prefetch);
|
getDataVariant<Method>(*res).data, getDataVariant<Method>(current).data, res->aggregates_pool, true, prefetch, thread_pool, is_cancelled);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
mergeDataImpl<Method>(
|
mergeDataImpl<Method>(
|
||||||
getDataVariant<Method>(*res).data, getDataVariant<Method>(current).data, res->aggregates_pool, false, prefetch);
|
getDataVariant<Method>(*res).data, getDataVariant<Method>(current).data, res->aggregates_pool, false, prefetch, thread_pool, is_cancelled);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (res->without_key)
|
else if (res->without_key)
|
||||||
@ -2589,7 +2591,7 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl(
|
|||||||
|
|
||||||
#define M(NAME) \
|
#define M(NAME) \
|
||||||
template void NO_INLINE Aggregator::mergeSingleLevelDataImpl<decltype(AggregatedDataVariants::NAME)::element_type>( \
|
template void NO_INLINE Aggregator::mergeSingleLevelDataImpl<decltype(AggregatedDataVariants::NAME)::element_type>( \
|
||||||
ManyAggregatedDataVariants & non_empty_data) const;
|
ManyAggregatedDataVariants & non_empty_data, std::atomic<bool> & is_cancelled) const;
|
||||||
APPLY_FOR_VARIANTS_SINGLE_LEVEL(M)
|
APPLY_FOR_VARIANTS_SINGLE_LEVEL(M)
|
||||||
#undef M
|
#undef M
|
||||||
|
|
||||||
@ -2597,6 +2599,8 @@ template <typename Method>
|
|||||||
void NO_INLINE Aggregator::mergeBucketImpl(
|
void NO_INLINE Aggregator::mergeBucketImpl(
|
||||||
ManyAggregatedDataVariants & data, Int32 bucket, Arena * arena, std::atomic<bool> & is_cancelled) const
|
ManyAggregatedDataVariants & data, Int32 bucket, Arena * arena, std::atomic<bool> & is_cancelled) const
|
||||||
{
|
{
|
||||||
|
ThreadPool thread_pool{CurrentMetrics::AggregatorThreads, CurrentMetrics::AggregatorThreadsActive, CurrentMetrics::AggregatorThreadsScheduled, params.max_threads};
|
||||||
|
|
||||||
/// We merge all aggregation results to the first.
|
/// We merge all aggregation results to the first.
|
||||||
AggregatedDataVariantsPtr & res = data[0];
|
AggregatedDataVariantsPtr & res = data[0];
|
||||||
|
|
||||||
@ -2613,7 +2617,7 @@ void NO_INLINE Aggregator::mergeBucketImpl(
|
|||||||
if (compiled_aggregate_functions_holder)
|
if (compiled_aggregate_functions_holder)
|
||||||
{
|
{
|
||||||
mergeDataImpl<Method>(
|
mergeDataImpl<Method>(
|
||||||
getDataVariant<Method>(*res).data.impls[bucket], getDataVariant<Method>(current).data.impls[bucket], arena, true, prefetch);
|
getDataVariant<Method>(*res).data.impls[bucket], getDataVariant<Method>(current).data.impls[bucket], arena, true, prefetch, thread_pool, is_cancelled);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
@ -2623,7 +2627,9 @@ void NO_INLINE Aggregator::mergeBucketImpl(
|
|||||||
getDataVariant<Method>(current).data.impls[bucket],
|
getDataVariant<Method>(current).data.impls[bucket],
|
||||||
arena,
|
arena,
|
||||||
false,
|
false,
|
||||||
prefetch);
|
prefetch,
|
||||||
|
thread_pool,
|
||||||
|
is_cancelled);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -467,7 +467,7 @@ private:
|
|||||||
|
|
||||||
/// Merge data from hash table `src` into `dst`.
|
/// Merge data from hash table `src` into `dst`.
|
||||||
template <typename Method, typename Table>
|
template <typename Method, typename Table>
|
||||||
void mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions, bool prefetch) const;
|
void mergeDataImpl(Table & table_dst, Table & table_src, Arena * arena, bool use_compiled_functions, bool prefetch, ThreadPool & thread_pool, std::atomic<bool> & is_cancelled) const;
|
||||||
|
|
||||||
/// Merge data from hash table `src` into `dst`, but only for keys that already exist in dst. In other cases, merge the data into `overflows`.
|
/// Merge data from hash table `src` into `dst`, but only for keys that already exist in dst. In other cases, merge the data into `overflows`.
|
||||||
template <typename Method, typename Table>
|
template <typename Method, typename Table>
|
||||||
@ -490,7 +490,7 @@ private:
|
|||||||
|
|
||||||
template <typename Method>
|
template <typename Method>
|
||||||
void mergeSingleLevelDataImpl(
|
void mergeSingleLevelDataImpl(
|
||||||
ManyAggregatedDataVariants & non_empty_data) const;
|
ManyAggregatedDataVariants & non_empty_data, std::atomic<bool> & is_cancelled) const;
|
||||||
|
|
||||||
template <bool return_single_block>
|
template <bool return_single_block>
|
||||||
using ConvertToBlockRes = std::conditional_t<return_single_block, Block, BlocksList>;
|
using ConvertToBlockRes = std::conditional_t<return_single_block, Block, BlocksList>;
|
||||||
|
@ -486,7 +486,7 @@ private:
|
|||||||
|
|
||||||
#define M(NAME) \
|
#define M(NAME) \
|
||||||
else if (first->type == AggregatedDataVariants::Type::NAME) \
|
else if (first->type == AggregatedDataVariants::Type::NAME) \
|
||||||
params->aggregator.mergeSingleLevelDataImpl<decltype(first->NAME)::element_type>(*data);
|
params->aggregator.mergeSingleLevelDataImpl<decltype(first->NAME)::element_type>(*data, shared_data->is_cancelled);
|
||||||
if (false) {} // NOLINT
|
if (false) {} // NOLINT
|
||||||
APPLY_FOR_VARIANTS_SINGLE_LEVEL(M)
|
APPLY_FOR_VARIANTS_SINGLE_LEVEL(M)
|
||||||
#undef M
|
#undef M
|
||||||
|
Loading…
Reference in New Issue
Block a user