Merge pull request #7236 from azat/uniqCombined-memory-usage

Do not use more then 98K of memory for uniqCombined*
This commit is contained in:
alexey-milovidov 2019-10-09 20:19:30 +03:00 committed by GitHub
commit 81a9c81ba7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 71 additions and 3 deletions

View File

@ -66,6 +66,11 @@ namespace detail
}
// Unlike HashTableGrower always grows to power of 2.
struct UniqCombinedHashTableGrower : public HashTableGrower<>
{
void increaseSize() { ++size_degree; }
};
template <typename Key, UInt8 K>
struct AggregateFunctionUniqCombinedDataWithKey
@ -76,7 +81,7 @@ struct AggregateFunctionUniqCombinedDataWithKey
// We want to migrate from |HashSet| to |HyperLogLogCounter| when the sizes in memory become almost equal.
// The size per element in |HashSet| is sizeof(Key)*2 bytes, and the overall size of |HyperLogLogCounter| is 2^K * 6 bits.
// For Key=UInt32 we can calculate: 2^X * 4 * 2 ≤ 2^(K-3) * 6 ⇒ X ≤ K-4.
using Set = CombinedCardinalityEstimator<Key, HashSet<Key, TrivialHash, HashTableGrower<>>, 16, K - 4, K, TrivialHash, Key>;
using Set = CombinedCardinalityEstimator<Key, HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>, 16, K - 5 + (sizeof(Key) == sizeof(UInt32)), K, TrivialHash, Key>;
Set set;
};
@ -85,9 +90,9 @@ template <typename Key>
struct AggregateFunctionUniqCombinedDataWithKey<Key, 17>
{
using Set = CombinedCardinalityEstimator<Key,
HashSet<Key, TrivialHash, HashTableGrower<>>,
HashSet<Key, TrivialHash, UniqCombinedHashTableGrower>,
16,
13,
12 + (sizeof(Key) == sizeof(UInt32)),
17,
TrivialHash,
Key,

View File

@ -0,0 +1,14 @@
UInt32
819200
UInt64
409600
K=16
UInt32
409600
UInt64
204800
K=18
UInt32
1638400
UInt64
819200

View File

@ -0,0 +1,49 @@
-- each uniqCombined state should not use > sizeof(HLL) in memory,
-- sizeof(HLL) is (2^K * 6 / 8)
-- hence max_memory_usage for 100 rows = (96<<10)*100 = 9830400
-- HashTable for UInt32 (used until (1<<13) elements), hence 8192 elements
SELECT 'UInt32';
SET max_memory_usage = 4000000;
SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k); -- { serverError 241 }
SET max_memory_usage = 9830400;
SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k);
-- HashTable for UInt64 (used until (1<<12) elements), hence 4096 elements
SELECT 'UInt64';
SET max_memory_usage = 4000000;
SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k); -- { serverError 241 }
SET max_memory_usage = 9830400;
SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k);
SELECT 'K=16';
-- HashTable for UInt32 (used until (1<<12) elements), hence 4096 elements
SELECT 'UInt32';
SET max_memory_usage = 2000000;
SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k); -- { serverError 241 }
SET max_memory_usage = 4915200;
SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k);
-- HashTable for UInt64 (used until (1<<11) elements), hence 2048 elements
SELECT 'UInt64';
SET max_memory_usage = 2000000;
SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(toUInt64(2048 * 100)) GROUP BY k); -- { serverError 241 }
SET max_memory_usage = 4915200;
SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(toUInt64(2048 * 100)) GROUP BY k);
SELECT 'K=18';
-- HashTable for UInt32 (used until (1<<14) elements), hence 16384 elements
SELECT 'UInt32';
SET max_memory_usage = 8000000;
SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(toUInt64(16384 * 100)) GROUP BY k); -- { serverError 241 }
SET max_memory_usage = 19660800;
SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(toUInt64(16384 * 100)) GROUP BY k);
-- HashTable for UInt64 (used until (1<<13) elements), hence 8192 elements
SELECT 'UInt64';
SET max_memory_usage = 8000000;
SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k); -- { serverError 241 }
SET max_memory_usage = 19660800;
SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k);