mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-30 03:22:14 +00:00
2996b38606
As it turns out, HashMap/PackedHashMap works great even with max load factor of 0.99. By "great" I mean it least it works faster then google sparsehash, and not to mention it's friendliness to the memory allocator (it has zero fragmentation since it works with a continuious memory region, in comparison to the sparsehash that doing lots of realloc, which jemalloc does not like, due to it's slabs). Here is a table of different setups: settings | load (sec) | read (sec) | read (million rows/s) | bytes_allocated | RSS - | - | - | - | - | - HASHED upstream | - | - | - | - | 35GiB SPARSE_HASHED upstream | - | - | - | - | 26GiB - | - | - | - | - | - sparse_hash_map glibc hashbench | - | - | - | - | 17.5GiB sparse_hash_map packed allocator | 101.878 | 231.48 | 4.32 | - | 17.7GiB PackedHashMap 0.5 | 15.514 | 42.35 | 23.61 | 20GiB | 22GiB hashed 0.95 | 34.903 | 115.615 | 8.65 | 16GiB | 18.7GiB **PackedHashMap 0.95** | **93.6** | **19.883** | **10.68** | **10GiB** | **12.8GiB** PackedHashMap 0.99 | 26.113 | 83.6 | 11.96 | 10GiB | 12.3GiB As it shows, PackedHashMap with 0.95 max_load_factor, eats 2.6x less memory then SPARSE_HASHED in upstream, and it also 2x faster for read! v2: fix grower Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
93 lines
3.2 KiB
XML
93 lines
3.2 KiB
XML
<test>
|
|
<substitutions>
|
|
<substitution>
|
|
<name>layout_suffix</name>
|
|
<values>
|
|
<value>HASHED</value>
|
|
<value>SPARSE_HASHED</value>
|
|
</values>
|
|
</substitution>
|
|
|
|
<substitution>
|
|
<name>load_factor</name>
|
|
<values>
|
|
<!-- 0. will be prepended -->
|
|
<value>5</value>
|
|
<value>7</value>
|
|
<value>99</value>
|
|
</values>
|
|
</substitution>
|
|
</substitutions>
|
|
|
|
<create_query>
|
|
CREATE TABLE simple_key_dictionary_source_table
|
|
(
|
|
id UInt64,
|
|
value_int UInt16
|
|
) ENGINE = Memory
|
|
</create_query>
|
|
|
|
<create_query>
|
|
CREATE TABLE complex_key_dictionary_source_table
|
|
(
|
|
id UInt64,
|
|
id_key String,
|
|
value_int UInt64
|
|
) ENGINE = Memory
|
|
</create_query>
|
|
|
|
<create_query>
|
|
CREATE DICTIONARY IF NOT EXISTS simple_key_{layout_suffix}_dictionary_l0_{load_factor}
|
|
(
|
|
id UInt64,
|
|
value_int UInt64
|
|
)
|
|
PRIMARY KEY id
|
|
SOURCE(CLICKHOUSE(TABLE 'simple_key_dictionary_source_table'))
|
|
LAYOUT({layout_suffix}(MAX_LOAD_FACTOR 0.{load_factor}))
|
|
LIFETIME(0)
|
|
</create_query>
|
|
|
|
<create_query>
|
|
CREATE DICTIONARY IF NOT EXISTS complex_key_{layout_suffix}_dictionary_l0_{load_factor}
|
|
(
|
|
id UInt64,
|
|
id_key String,
|
|
value_int UInt64
|
|
)
|
|
PRIMARY KEY id, id_key
|
|
SOURCE(CLICKHOUSE(TABLE 'complex_key_dictionary_source_table'))
|
|
LAYOUT(COMPLEX_KEY_{layout_suffix}(MAX_LOAD_FACTOR 0.{load_factor}))
|
|
LIFETIME(0)
|
|
</create_query>
|
|
|
|
<fill_query>INSERT INTO simple_key_dictionary_source_table SELECT number, number FROM numbers(3_000_000)</fill_query>
|
|
<fill_query>INSERT INTO complex_key_dictionary_source_table SELECT number, toString(number), number FROM numbers(2_000_000)</fill_query>
|
|
|
|
<fill_query>SYSTEM RELOAD DICTIONARY simple_key_{layout_suffix}_dictionary_l0_{load_factor}</fill_query>
|
|
<fill_query>SYSTEM RELOAD DICTIONARY complex_key_{layout_suffix}_dictionary_l0_{load_factor}</fill_query>
|
|
|
|
<query>SYSTEM RELOAD DICTIONARY simple_key_{layout_suffix}_dictionary_l0_{load_factor}</query>
|
|
<query>SYSTEM RELOAD DICTIONARY complex_key_{layout_suffix}_dictionary_l0_{load_factor}</query>
|
|
|
|
<query>
|
|
WITH rand64() % 3_000_000 as key
|
|
SELECT dictHas('default.simple_key_{layout_suffix}_dictionary_l0_{load_factor}', key)
|
|
FROM numbers(3_000_000)
|
|
FORMAT Null
|
|
</query>
|
|
|
|
<query>
|
|
WITH (rand64() % 2_000_000, toString(rand64() % 2_000_000)) as key
|
|
SELECT dictHas('default.complex_key_{layout_suffix}_dictionary_l0_{load_factor}', key)
|
|
FROM numbers(2_000_000)
|
|
FORMAT Null
|
|
</query>
|
|
|
|
<drop_query>DROP DICTIONARY simple_key_{layout_suffix}_dictionary_l0_{load_factor}</drop_query>
|
|
<drop_query>DROP DICTIONARY complex_key_{layout_suffix}_dictionary_l0_{load_factor}</drop_query>
|
|
|
|
<drop_query>DROP TABLE simple_key_dictionary_source_table</drop_query>
|
|
<drop_query>DROP TABLE complex_key_dictionary_source_table</drop_query>
|
|
</test>
|