mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Do not use more then 98K of memory for uniqCombined*
uniqCombined() uses hashtable for medium cardinality, and since HashTable resize by the power of 2 (well actually HashTableGrower grows double by the power of 2, hence HashTableGrower::increaseSize() should be overwritten to change this), with 1<<13 (default for uniqCombined) and UInt64 HashValueType, the HashTable will takes: getBufferSizeInBytes() == 131072 While it should be not greater then sizeof(HLL) ~= 98K, so reduce the maximum cardinality for hashtable to 1<<12 with UInt64 HashValueType and to 1<13 with UInt32, overwrite HashTableGrower::increaseSize() and cover this using max_memory_usage. Refs: https://github.com/ClickHouse/ClickHouse/pull/7221#issuecomment-539672742 v2: cover uniqCombined() with non-default K
This commit is contained in:
parent
15deedb420
commit
e373862c83
@ -66,6 +66,11 @@ namespace detail
|
||||
|
||||
}
|
||||
|
||||
// Unlike HashTableGrower always grows to power of 2.
|
||||
struct UniqCombinedHashTableGrower : public HashTableGrower<>
|
||||
{
|
||||
void increaseSize() { ++size_degree; }
|
||||
};
|
||||
|
||||
template <typename Key, UInt8 K>
|
||||
struct AggregateFunctionUniqCombinedDataWithKey
|
||||
@ -76,7 +81,7 @@ struct AggregateFunctionUniqCombinedDataWithKey
|
||||
// We want to migrate from |HashSet| to |HyperLogLogCounter| when the sizes in memory become almost equal.
|
||||
// The size per element in |HashSet| is sizeof(Key)*2 bytes, and the overall size of |HyperLogLogCounter| is 2^K * 6 bits.
|
||||
// For Key=UInt32 we can calculate: 2^X * 4 * 2 ≤ 2^(K-3) * 6 ⇒ X ≤ K-4.
|
||||
using Set = CombinedCardinalityEstimator<Key, HashSet<Key, TrivialHash, HashTableGrower<>>, 16, K - 4, K, TrivialHash, Key>;
|
||||
using Set = CombinedCardinalityEstimator<Key, HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>, 16, K - 5 + (sizeof(Key) == sizeof(UInt32)), K, TrivialHash, Key>;
|
||||
|
||||
Set set;
|
||||
};
|
||||
@ -85,9 +90,9 @@ template <typename Key>
|
||||
struct AggregateFunctionUniqCombinedDataWithKey<Key, 17>
|
||||
{
|
||||
using Set = CombinedCardinalityEstimator<Key,
|
||||
HashSet<Key, TrivialHash, HashTableGrower<>>,
|
||||
HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>,
|
||||
16,
|
||||
13,
|
||||
12 + (sizeof(Key) == sizeof(UInt32)),
|
||||
17,
|
||||
TrivialHash,
|
||||
Key,
|
||||
|
@ -0,0 +1,14 @@
|
||||
UInt32
|
||||
819200
|
||||
UInt64
|
||||
409600
|
||||
K=16
|
||||
UInt32
|
||||
409600
|
||||
UInt64
|
||||
204800
|
||||
K=18
|
||||
UInt32
|
||||
1638400
|
||||
UInt64
|
||||
819200
|
@ -0,0 +1,49 @@
|
||||
-- each uniqCombined state should not use > sizeof(HLL) in memory,
|
||||
-- sizeof(HLL) is (2^K * 6 / 8)
|
||||
-- hence max_memory_usage for 100 rows = (96<<10)*100 = 9830400
|
||||
|
||||
-- HashTable for UInt32 (used until (1<<13) elements), hence 8192 elements
|
||||
SELECT 'UInt32';
|
||||
SET max_memory_usage = 4000000;
|
||||
SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k); -- { serverError 241 }
|
||||
SET max_memory_usage = 9830400;
|
||||
SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k);
|
||||
|
||||
-- HashTable for UInt64 (used until (1<<12) elements), hence 4096 elements
|
||||
SELECT 'UInt64';
|
||||
SET max_memory_usage = 4000000;
|
||||
SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k); -- { serverError 241 }
|
||||
SET max_memory_usage = 9830400;
|
||||
SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k);
|
||||
|
||||
SELECT 'K=16';
|
||||
|
||||
-- HashTable for UInt32 (used until (1<<12) elements), hence 4096 elements
|
||||
SELECT 'UInt32';
|
||||
SET max_memory_usage = 2000000;
|
||||
SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k); -- { serverError 241 }
|
||||
SET max_memory_usage = 4915200;
|
||||
SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k);
|
||||
|
||||
-- HashTable for UInt64 (used until (1<<11) elements), hence 2048 elements
|
||||
SELECT 'UInt64';
|
||||
SET max_memory_usage = 2000000;
|
||||
SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(toUInt64(2048 * 100)) GROUP BY k); -- { serverError 241 }
|
||||
SET max_memory_usage = 4915200;
|
||||
SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(toUInt64(2048 * 100)) GROUP BY k);
|
||||
|
||||
SELECT 'K=18';
|
||||
|
||||
-- HashTable for UInt32 (used until (1<<14) elements), hence 16384 elements
|
||||
SELECT 'UInt32';
|
||||
SET max_memory_usage = 8000000;
|
||||
SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(toUInt64(16384 * 100)) GROUP BY k); -- { serverError 241 }
|
||||
SET max_memory_usage = 19660800;
|
||||
SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(toUInt64(16384 * 100)) GROUP BY k);
|
||||
|
||||
-- HashTable for UInt64 (used until (1<<13) elements), hence 8192 elements
|
||||
SELECT 'UInt64';
|
||||
SET max_memory_usage = 8000000;
|
||||
SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k); -- { serverError 241 }
|
||||
SET max_memory_usage = 19660800;
|
||||
SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k);
|
Loading…
Reference in New Issue
Block a user