diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index b834ed82729..6b4746b1320 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -195,7 +195,14 @@ AggregatingSortedAlgorithm::AggregatingMergedData::AggregatingMergedData( MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_) : MergedData(std::move(columns_), false, max_block_size_), def(def_) { - initAggregateDescription(); + initAggregateDescription(); + + /// Just to make startGroup() simpler. + if (def.allocates_memory_in_arena) + { + arena = std::make_unique(); + arena_size = arena->size(); + } } void AggregatingSortedAlgorithm::AggregatingMergedData::startGroup(const ColumnRawPtrs & raw_columns, size_t row) @@ -212,8 +219,17 @@ void AggregatingSortedAlgorithm::AggregatingMergedData::startGroup(const ColumnR for (auto & desc : def.columns_to_simple_aggregate) desc.createState(); - if (def.allocates_memory_in_arena) + /// If and only if: + /// - arena is required (i.e. SimpleAggregateFunction(any, String) in PK) + /// - arena was used since otherwise it may be too costly to increment atomic counters inside Arena. + /// i.e. SELECT with SimpleAggregateFunction(String) in PK and lots of groups + /// may produce ~1.5M of ArenaAllocChunks atomic increments, + /// while LOCK is too costly for CPU (~10% overhead here). + if (def.allocates_memory_in_arena && arena->size() > arena_size) + { arena = std::make_unique(); + arena_size = arena->size(); + } is_group_started = true; } diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h index da4ec876b69..e572ed7d526 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h @@ -73,6 +73,7 @@ private: /// Memory pool for SimpleAggregateFunction /// (only when allocates_memory_in_arena == true). std::unique_ptr arena; + size_t arena_size = 0; bool is_group_started = false; diff --git a/tests/performance/aggregating_merge_tree_simple_aggregate_function_string.xml b/tests/performance/aggregating_merge_tree_simple_aggregate_function_string.xml new file mode 100644 index 00000000000..d62f8d8e088 --- /dev/null +++ b/tests/performance/aggregating_merge_tree_simple_aggregate_function_string.xml @@ -0,0 +1,21 @@ + + + CREATE TABLE bench + ENGINE = AggregatingMergeTree() + ORDER BY key + SETTINGS index_granularity = 8192 + AS + SELECT CAST(reinterpretAsString(number), 'SimpleAggregateFunction(any, String)') AS key + FROM numbers_mt(toUInt64(10e6)) + + + + SELECT * + FROM bench + GROUP BY key + SETTINGS optimize_aggregation_in_order = 1, max_threads = 16 + FORMAT Null + + + DROP TABLE IF EXISTS bench +