Add ability to configure maximum load factor for the HASHED/SPARSE_HASHED layout

As it turns out, HashMap/PackedHashMap works great even with max load
factor of 0.99. By "great" I mean it least it works faster then
google sparsehash, and not to mention it's friendliness to the memory
allocator (it has zero fragmentation since it works with a continuious
memory region, in comparison to the sparsehash that doing lots of
realloc, which jemalloc does not like, due to it's slabs).

Here is a table of different setups:

settings                         | load (sec) | read (sec) | read (million rows/s) | bytes_allocated | RSS
-                                | -          | -          | -                     | -               | -
HASHED upstream                  | -          | -          | -                     | -               | 35GiB
SPARSE_HASHED upstream           | -          | -          | -                     | -               | 26GiB
-                                | -          | -          | -                     | -               | -
sparse_hash_map glibc hashbench  | -          | -          | -                     | -               | 17.5GiB
sparse_hash_map packed allocator | 101.878    | 231.48     | 4.32                  | -               | 17.7GiB
PackedHashMap 0.5                | 15.514     | 42.35      | 23.61                 | 20GiB           | 22GiB
hashed 0.95                      | 34.903     | 115.615    | 8.65                  | 16GiB           | 18.7GiB
**PackedHashMap 0.95**           | **93.6**   | **19.883** | **10.68**             | **10GiB**       | **12.8GiB**
PackedHashMap 0.99               | 26.113     | 83.6       | 11.96                 | 10GiB           | 12.3GiB

As it shows, PackedHashMap with 0.95 max_load_factor, eats 2.6x less
memory then SPARSE_HASHED in upstream, and it also 2x faster for read!

v2: fix grower
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
Azat Khuzhin 2023-05-01 20:34:47 +02:00
parent 3698302ddb
commit 2996b38606
9 changed files with 384 additions and 11 deletions

View File

@ -284,6 +284,13 @@ Configuration example:
10000 is good balance between memory and speed.
Even for 10e10 elements and can handle all the load without starvation. -->
<shard_load_queue_backlog>10000</shard_load_queue_backlog>
<!-- Maximum load factor of the hash table, with greater values, the memory
is utilized more efficiently (less memory is wasted) but read/performance
may deteriorate.
Valid values: [0.5, 0.99]
Default: 0.5 -->
<max_load_factor>0.5</max_load_factor>
</hashed>
</layout>
```
@ -327,6 +334,7 @@ Configuration example:
<complex_key_hashed>
<shards>1</shards>
<!-- <shard_load_queue_backlog>10000</shard_load_queue_backlog> -->
<!-- <max_load_factor>0.5</max_load_factor> -->
</complex_key_hashed>
</layout>
```

View File

@ -630,6 +630,8 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::createAttributes()
const auto size = dict_struct.attributes.size();
attributes.reserve(size);
HashTableGrowerWithMaxLoadFactor grower(configuration.max_load_factor);
for (const auto & dictionary_attribute : dict_struct.attributes)
{
auto type_call = [&, this](const auto & dictionary_attribute_type)
@ -639,8 +641,23 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::createAttributes()
using ValueType = DictionaryValueType<AttributeType>;
auto is_nullable_sets = dictionary_attribute.is_nullable ? std::make_optional<NullableSets>(configuration.shards) : std::optional<NullableSets>{};
Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_sets), CollectionsHolder<ValueType>(configuration.shards)};
attributes.emplace_back(std::move(attribute));
if constexpr (IsBuiltinHashTable<typename CollectionsHolder<ValueType>::value_type>)
{
CollectionsHolder<ValueType> collections;
collections.reserve(configuration.shards);
for (size_t i = 0; i < configuration.shards; ++i)
collections.emplace_back(grower);
Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_sets), std::move(collections)};
attributes.emplace_back(std::move(attribute));
}
else
{
Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_sets), CollectionsHolder<ValueType>(configuration.shards)};
for (auto & container : std::get<CollectionsHolder<ValueType>>(attribute.containers))
container.max_load_factor(configuration.max_load_factor);
attributes.emplace_back(std::move(attribute));
}
};
callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call);
@ -648,7 +665,9 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::createAttributes()
if (unlikely(attributes.size()) == 0)
{
no_attributes_containers.resize(configuration.shards);
no_attributes_containers.reserve(configuration.shards);
for (size_t i = 0; i < configuration.shards; ++i)
no_attributes_containers.emplace_back(grower);
}
string_arenas.resize(configuration.shards);
@ -1136,9 +1155,14 @@ void registerDictionaryHashed(DictionaryFactory & factory)
if (shard_load_queue_backlog <= 0)
throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARD_LOAD_QUEUE_BACKLOG parameter should be greater then zero", full_name);
float max_load_factor = static_cast<float>(config.getDouble(config_prefix + dictionary_layout_prefix + ".max_load_factor", 0.5));
if (max_load_factor < 0.5 || max_load_factor > 0.99)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: max_load_factor parameter should be within [0.5, 0.99], got {}", full_name, max_load_factor);
HashedDictionaryConfiguration configuration{
static_cast<UInt64>(shards),
static_cast<UInt64>(shard_load_queue_backlog),
max_load_factor,
require_nonempty,
dict_lifetime,
};

View File

@ -25,6 +25,7 @@ struct HashedDictionaryConfiguration
{
const UInt64 shards;
const UInt64 shard_load_queue_backlog;
const float max_load_factor;
const bool require_nonempty;
const DictionaryLifetime lifetime;
};

View File

@ -40,6 +40,92 @@ constexpr bool useSparseHashForHashedDictionary()
return sizeof(PackedPairNoInit<K, V>) > 16;
}
/// Grower with custom fill limit/load factor (instead of default 50%).
///
/// It turns out that HashMap can outperform google::sparse_hash_map in case of
/// the structure size of not big, in terms of speed *and* memory. Even 99% of
/// max load factor was faster then google::sparse_hash_map in my simple tests
/// (1e9 UInt64 keys with UInt16 values, randomly distributed).
///
/// And not to mention very high allocator memory fragmentation in
/// google::sparse_hash_map.
///
/// Based on HashTableGrowerWithPrecalculation
class alignas(64) HashTableGrowerWithMaxLoadFactor
{
static constexpr size_t initial_size_degree = 8;
UInt8 size_degree = initial_size_degree;
size_t precalculated_mask = (1ULL << initial_size_degree) - 1;
size_t precalculated_max_fill = 1ULL << (initial_size_degree - 1);
float max_load_factor = 0.5;
/// HashTableGrowerWithPrecalculation has 23, but to decrease memory usage
/// at least slightly 19 is used here. Also note, that for dictionaries it
/// is not that important since they are not that frequently loaded.
static constexpr size_t max_size_degree_quadratic = 19;
public:
static constexpr auto initial_count = 1ULL << initial_size_degree;
/// If collision resolution chains are contiguous, we can implement erase operation by moving the elements.
static constexpr auto performs_linear_probing_with_single_step = true;
HashTableGrowerWithMaxLoadFactor() = default;
explicit HashTableGrowerWithMaxLoadFactor(float max_load_factor_)
: max_load_factor(max_load_factor_)
{
increaseSizeDegree(0);
}
UInt8 sizeDegree() const { return size_degree; }
void increaseSizeDegree(UInt8 delta)
{
size_degree += delta;
precalculated_mask = (1ULL << size_degree) - 1;
precalculated_max_fill = static_cast<size_t>((1ULL << size_degree) * max_load_factor);
}
/// The size of the hash table in the cells.
size_t bufSize() const { return 1ULL << size_degree; }
/// From the hash value, get the cell number in the hash table.
size_t place(size_t x) const { return x & precalculated_mask; }
/// The next cell in the collision resolution chain.
size_t next(size_t pos) const { return (pos + 1) & precalculated_mask; }
/// Whether the hash table is sufficiently full. You need to increase the size of the hash table, or remove something unnecessary from it.
bool overflow(size_t elems) const { return elems > precalculated_max_fill; }
/// Increase the size of the hash table.
void increaseSize() { increaseSizeDegree(size_degree >= max_size_degree_quadratic ? 1 : 2); }
/// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
void set(size_t num_elems)
{
if (num_elems <= 1)
size_degree = initial_size_degree;
else if (initial_size_degree > static_cast<size_t>(log2(num_elems - 1)) + 2)
size_degree = initial_size_degree;
else
{
/// Slightly more optimal than HashTableGrowerWithPrecalculation
/// and takes into account max_load_factor.
size_degree = static_cast<size_t>(log2(num_elems - 1)) + 1;
if ((1ULL << size_degree) * max_load_factor < num_elems)
++size_degree;
}
increaseSizeDegree(0);
}
void setBufSize(size_t buf_size_)
{
size_degree = static_cast<size_t>(log2(buf_size_ - 1) + 1);
increaseSizeDegree(0);
}
};
static_assert(sizeof(HashTableGrowerWithMaxLoadFactor) == 64);
///
/// Map (dictionary with attributes)
///
@ -54,8 +140,8 @@ struct HashedDictionaryMapType<dictionary_key_type, /* sparse= */ false, Key, Va
{
using Type = std::conditional_t<
dictionary_key_type == DictionaryKeyType::Simple,
HashMap<UInt64, Value, DefaultHash<UInt64>>,
HashMapWithSavedHash<StringRef, Value, DefaultHash<StringRef>>>;
HashMap<UInt64, Value, DefaultHash<UInt64>, HashTableGrowerWithMaxLoadFactor>,
HashMapWithSavedHash<StringRef, Value, DefaultHash<StringRef>, HashTableGrowerWithMaxLoadFactor>>;
};
/// Implementations for SPARSE_HASHED layout.
@ -89,8 +175,8 @@ struct HashedDictionarySparseMapType<dictionary_key_type, Key, Value, /* use_spa
{
using Type = std::conditional_t<
dictionary_key_type == DictionaryKeyType::Simple,
PackedHashMap<UInt64, Value, DefaultHash<UInt64>>,
PackedHashMap<StringRef, Value, DefaultHash<StringRef>>>;
PackedHashMap<UInt64, Value, DefaultHash<UInt64>, HashTableGrowerWithMaxLoadFactor>,
PackedHashMap<StringRef, Value, DefaultHash<StringRef>, HashTableGrowerWithMaxLoadFactor>>;
};
template <DictionaryKeyType dictionary_key_type, typename Key, typename Value>
struct HashedDictionaryMapType<dictionary_key_type, /* sparse= */ true, Key, Value>
@ -113,8 +199,8 @@ struct HashedDictionarySetType<dictionary_key_type, /* sparse= */ false, Key>
{
using Type = std::conditional_t<
dictionary_key_type == DictionaryKeyType::Simple,
HashSet<UInt64, DefaultHash<UInt64>>,
HashSetWithSavedHash<StringRef, DefaultHash<StringRef>>>;
HashSet<UInt64, DefaultHash<UInt64>, HashTableGrowerWithMaxLoadFactor>,
HashSetWithSavedHash<StringRef, DefaultHash<StringRef>, HashTableGrowerWithMaxLoadFactor>>;
};
/// Implementation for SPARSE_HASHED.
@ -127,8 +213,8 @@ struct HashedDictionarySetType<dictionary_key_type, /* sparse= */ true, Key>
{
using Type = std::conditional_t<
dictionary_key_type == DictionaryKeyType::Simple,
HashSet<UInt64, DefaultHash<UInt64>>,
HashSet<StringRef, DefaultHash<StringRef>>>;
HashSet<UInt64, DefaultHash<UInt64>, HashTableGrowerWithMaxLoadFactor>,
HashSet<StringRef, DefaultHash<StringRef>, HashTableGrowerWithMaxLoadFactor>>;
};
}

View File

@ -0,0 +1,92 @@
<test>
<substitutions>
<substitution>
<name>layout_suffix</name>
<values>
<value>HASHED</value>
<value>SPARSE_HASHED</value>
</values>
</substitution>
<substitution>
<name>load_factor</name>
<values>
<!-- 0. will be prepended -->
<value>5</value>
<value>7</value>
<value>99</value>
</values>
</substitution>
</substitutions>
<create_query>
CREATE TABLE simple_key_dictionary_source_table
(
id UInt64,
value_int UInt16
) ENGINE = Memory
</create_query>
<create_query>
CREATE TABLE complex_key_dictionary_source_table
(
id UInt64,
id_key String,
value_int UInt64
) ENGINE = Memory
</create_query>
<create_query>
CREATE DICTIONARY IF NOT EXISTS simple_key_{layout_suffix}_dictionary_l0_{load_factor}
(
id UInt64,
value_int UInt64
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(TABLE 'simple_key_dictionary_source_table'))
LAYOUT({layout_suffix}(MAX_LOAD_FACTOR 0.{load_factor}))
LIFETIME(0)
</create_query>
<create_query>
CREATE DICTIONARY IF NOT EXISTS complex_key_{layout_suffix}_dictionary_l0_{load_factor}
(
id UInt64,
id_key String,
value_int UInt64
)
PRIMARY KEY id, id_key
SOURCE(CLICKHOUSE(TABLE 'complex_key_dictionary_source_table'))
LAYOUT(COMPLEX_KEY_{layout_suffix}(MAX_LOAD_FACTOR 0.{load_factor}))
LIFETIME(0)
</create_query>
<fill_query>INSERT INTO simple_key_dictionary_source_table SELECT number, number FROM numbers(3_000_000)</fill_query>
<fill_query>INSERT INTO complex_key_dictionary_source_table SELECT number, toString(number), number FROM numbers(2_000_000)</fill_query>
<fill_query>SYSTEM RELOAD DICTIONARY simple_key_{layout_suffix}_dictionary_l0_{load_factor}</fill_query>
<fill_query>SYSTEM RELOAD DICTIONARY complex_key_{layout_suffix}_dictionary_l0_{load_factor}</fill_query>
<query>SYSTEM RELOAD DICTIONARY simple_key_{layout_suffix}_dictionary_l0_{load_factor}</query>
<query>SYSTEM RELOAD DICTIONARY complex_key_{layout_suffix}_dictionary_l0_{load_factor}</query>
<query>
WITH rand64() % 3_000_000 as key
SELECT dictHas('default.simple_key_{layout_suffix}_dictionary_l0_{load_factor}', key)
FROM numbers(3_000_000)
FORMAT Null
</query>
<query>
WITH (rand64() % 2_000_000, toString(rand64() % 2_000_000)) as key
SELECT dictHas('default.complex_key_{layout_suffix}_dictionary_l0_{load_factor}', key)
FROM numbers(2_000_000)
FORMAT Null
</query>
<drop_query>DROP DICTIONARY simple_key_{layout_suffix}_dictionary_l0_{load_factor}</drop_query>
<drop_query>DROP DICTIONARY complex_key_{layout_suffix}_dictionary_l0_{load_factor}</drop_query>
<drop_query>DROP TABLE simple_key_dictionary_source_table</drop_query>
<drop_query>DROP TABLE complex_key_dictionary_source_table</drop_query>
</test>

View File

@ -0,0 +1,12 @@
CREATE DICTIONARY default.test_sparse_dictionary_load_factor\n(\n `key` UInt64,\n `value` UInt16\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE test_table))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(MAX_LOAD_FACTOR 0.9))
100000
0
CREATE DICTIONARY default.test_dictionary_load_factor\n(\n `key` UInt64,\n `value` UInt16\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE test_table))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(HASHED(MAX_LOAD_FACTOR 0.9))
100000
0
CREATE DICTIONARY default.test_dictionary_load_factor_nullable\n(\n `key` UInt64,\n `value` Nullable(UInt16)\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE test_table_nullable))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(HASHED(MAX_LOAD_FACTOR 0.9))
100000
0
CREATE DICTIONARY default.test_complex_dictionary_load_factor\n(\n `key_1` UInt64,\n `key_2` UInt64,\n `value` UInt16\n)\nPRIMARY KEY key_1, key_2\nSOURCE(CLICKHOUSE(TABLE test_table_complex))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(COMPLEX_KEY_HASHED(MAX_LOAD_FACTOR 0.9))
100000
0

View File

@ -0,0 +1,107 @@
DROP TABLE IF EXISTS test_table;
CREATE TABLE test_table
(
key UInt64,
value UInt16
) ENGINE=Memory() AS SELECT number, number FROM numbers(1e5);
DROP TABLE IF EXISTS test_table_nullable;
CREATE TABLE test_table_nullable
(
key UInt64,
value Nullable(UInt16)
) ENGINE=Memory() AS SELECT number, number % 2 == 0 ? NULL : number FROM numbers(1e5);
DROP TABLE IF EXISTS test_table_string;
CREATE TABLE test_table_string
(
key String,
value UInt16
) ENGINE=Memory() AS SELECT 'foo' || number::String, number FROM numbers(1e5);
DROP TABLE IF EXISTS test_table_complex;
CREATE TABLE test_table_complex
(
key_1 UInt64,
key_2 UInt64,
value UInt16
) ENGINE=Memory() AS SELECT number, number, number FROM numbers(1e5);
DROP DICTIONARY IF EXISTS test_sparse_dictionary_load_factor;
CREATE DICTIONARY test_sparse_dictionary_load_factor
(
key UInt64,
value UInt16
) PRIMARY KEY key
SOURCE(CLICKHOUSE(TABLE test_table))
LAYOUT(SPARSE_HASHED(MAX_LOAD_FACTOR 0.90))
LIFETIME(0);
SHOW CREATE test_sparse_dictionary_load_factor;
SYSTEM RELOAD DICTIONARY test_sparse_dictionary_load_factor;
SELECT element_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'test_sparse_dictionary_load_factor';
SELECT count() FROM test_table WHERE dictGet('test_sparse_dictionary_load_factor', 'value', key) != value;
DROP DICTIONARY test_sparse_dictionary_load_factor;
DROP DICTIONARY IF EXISTS test_dictionary_load_factor;
CREATE DICTIONARY test_dictionary_load_factor
(
key UInt64,
value UInt16
) PRIMARY KEY key
SOURCE(CLICKHOUSE(TABLE test_table))
LAYOUT(HASHED(MAX_LOAD_FACTOR 0.90))
LIFETIME(0);
SHOW CREATE test_dictionary_load_factor;
SYSTEM RELOAD DICTIONARY test_dictionary_load_factor;
SELECT element_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'test_dictionary_load_factor';
SELECT count() FROM test_table WHERE dictGet('test_dictionary_load_factor', 'value', key) != value;
DROP DICTIONARY test_dictionary_load_factor;
DROP DICTIONARY IF EXISTS test_dictionary_load_factor_nullable;
CREATE DICTIONARY test_dictionary_load_factor_nullable
(
key UInt64,
value Nullable(UInt16)
) PRIMARY KEY key
SOURCE(CLICKHOUSE(TABLE test_table_nullable))
LAYOUT(HASHED(MAX_LOAD_FACTOR 0.90))
LIFETIME(0);
SHOW CREATE test_dictionary_load_factor_nullable;
SYSTEM RELOAD DICTIONARY test_dictionary_load_factor_nullable;
SELECT element_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'test_dictionary_load_factor_nullable';
SELECT count() FROM test_table_nullable WHERE dictGet('test_dictionary_load_factor_nullable', 'value', key) != value;
DROP DICTIONARY test_dictionary_load_factor_nullable;
DROP DICTIONARY IF EXISTS test_complex_dictionary_load_factor;
CREATE DICTIONARY test_complex_dictionary_load_factor
(
key_1 UInt64,
key_2 UInt64,
value UInt16
) PRIMARY KEY key_1, key_2
SOURCE(CLICKHOUSE(TABLE test_table_complex))
LAYOUT(COMPLEX_KEY_HASHED(MAX_LOAD_FACTOR 0.90))
LIFETIME(0);
SYSTEM RELOAD DICTIONARY test_complex_dictionary_load_factor;
SHOW CREATE test_complex_dictionary_load_factor;
SELECT element_count FROM system.dictionaries WHERE database = currentDatabase() and name = 'test_complex_dictionary_load_factor';
SELECT count() FROM test_table_complex WHERE dictGet('test_complex_dictionary_load_factor', 'value', (key_1, key_2)) != value;
DROP DICTIONARY test_complex_dictionary_load_factor;
DROP DICTIONARY IF EXISTS test_dictionary_load_factor_string;
CREATE DICTIONARY test_dictionary_load_factor_string
(
key String,
value UInt16
) PRIMARY KEY key
SOURCE(CLICKHOUSE(TABLE test_table_string))
LAYOUT(HASHED(MAX_LOAD_FACTOR 1))
LIFETIME(0);
-- should because of MAX_LOAD_FACTOR is 1 (maximum allowed value is 0.99)
SYSTEM RELOAD DICTIONARY test_dictionary_load_factor_string; -- { serverError BAD_ARGUMENTS }
DROP DICTIONARY test_dictionary_load_factor_string;
DROP TABLE test_table;
DROP TABLE test_table_nullable;
DROP TABLE test_table_string;
DROP TABLE test_table_complex;

View File

@ -0,0 +1,4 @@
test_dictionary_hashed 1000000 0.4768 33558760
test_dictionary_hashed_load_factor 1000000 0.9537 16781544
test_dictionary_sparse_hashed 1000000 0.4768 20975848
test_dictionary_sparse_hashed_load_factor 1000000 0.9537 10490088

View File

@ -0,0 +1,39 @@
{# vi: ft=sql #}
{% for layout in ["hashed", "sparse_hashed"] %}
DROP DICTIONARY IF EXISTS test_dictionary_{{layout}};
DROP DICTIONARY IF EXISTS test_dictionary_{{layout}}_load_factor;
DROP TABLE IF EXISTS test_table;
CREATE TABLE test_table
(
key UInt64,
value UInt16
) ENGINE=Memory() AS SELECT number, number FROM numbers(1e6);
CREATE DICTIONARY test_dictionary_{{layout}}
(
key UInt64,
value UInt16
) PRIMARY KEY key
SOURCE(CLICKHOUSE(TABLE test_table))
LAYOUT({{layout}}())
LIFETIME(0);
CREATE DICTIONARY test_dictionary_{{layout}}_load_factor
(
key UInt64,
value UInt16
) PRIMARY KEY key
SOURCE(CLICKHOUSE(TABLE test_table))
LAYOUT({{layout}}(MAX_LOAD_FACTOR 0.98))
LIFETIME(0);
SYSTEM RELOAD DICTIONARY test_dictionary_{{layout}};
SYSTEM RELOAD DICTIONARY test_dictionary_{{layout}}_load_factor;
SELECT name, element_count, round(load_factor, 4), bytes_allocated FROM system.dictionaries WHERE database = currentDatabase() ORDER BY name;
DROP DICTIONARY IF EXISTS test_dictionary_{{layout}};
DROP DICTIONARY IF EXISTS test_dictionary_{{layout}}_load_factor;
DROP TABLE test_table;
{% endfor %}