ClickHouse/tests/performance/hashed_dictionary_sharded.xml
Azat Khuzhin 7b5d156cc5 Optimize SPARSE_HASHED layout (by using PackedHashMap)
In case you want dictionary optimized for memory, SPARSE_HASHED is not
always gives you what you need.

Consider the following example <UInt64, UInt16> as <Key, Value>, but
this pair will also have a 6 byte padding (on amd64), so this is almost
40% of space wastage.

And because of this padding, even google::sparse_hash_map, does not make
picture better, in fact, sparse_hash_map is not very friendly to memory
allocators (especially jemalloc).

Here are some numbers for dictionary with 1e9 elements and UInt64 as
key, and UInt16 as value:

settings                         | load (sec) | read (sec) | read (million rows/s) | bytes_allocated | RSS
HASHED upstream                  | -          | -          | -                     | -               | 35GiB
SPARSE_HASHED upstream           | -          | -          | -                     | -               | 26GiB
-                                | -          | -          | -                     | -               | -
sparse_hash_map glibc hashbench  | -          | -          | -                     | -               | 17.5GiB
sparse_hash_map packed allocator | 101.878    | 231.48     | 4.32                  | -               | 17.7GiB
PackedHashMap                    | 15.514     | 42.35      | 23.61                 | 20GiB           | 22GiB

As you can see PackedHashMap looks way more better then HASHED, and
even better then SPARSE_HASHED, but slightly worse then sparse_hash_map
with packed allocator (it is done with a custom patch to google
sparse_hash_map).

v2: rebase on top of bucket_count fix
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2023-05-19 06:07:21 +02:00

92 lines
3.1 KiB
XML

<test>
<substitutions>
<substitution>
<name>layout_suffix</name>
<values>
<value>HASHED</value>
<value>SPARSE_HASHED</value>
</values>
</substitution>
<substitution>
<name>shards</name>
<values>
<value>1</value>
<value>8</value>
<value>16</value>
</values>
</substitution>
</substitutions>
<create_query>
CREATE TABLE simple_key_dictionary_source_table
(
id UInt64,
value_int UInt16
) ENGINE = Memory
</create_query>
<create_query>
CREATE TABLE complex_key_dictionary_source_table
(
id UInt64,
id_key String,
value_int UInt64
) ENGINE = Memory
</create_query>
<create_query>
CREATE DICTIONARY IF NOT EXISTS simple_key_{layout_suffix}_dictionary_s{shards}
(
id UInt64,
value_int UInt64
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(TABLE 'simple_key_dictionary_source_table'))
LAYOUT({layout_suffix}(SHARDS {shards}))
LIFETIME(0)
</create_query>
<create_query>
CREATE DICTIONARY IF NOT EXISTS complex_key_{layout_suffix}_dictionary_s{shards}
(
id UInt64,
id_key String,
value_int UInt64
)
PRIMARY KEY id, id_key
SOURCE(CLICKHOUSE(TABLE 'complex_key_dictionary_source_table'))
LAYOUT(COMPLEX_KEY_{layout_suffix}(SHARDS {shards}))
LIFETIME(0)
</create_query>
<fill_query>INSERT INTO simple_key_dictionary_source_table SELECT number, number FROM numbers(3_000_000)</fill_query>
<fill_query>INSERT INTO complex_key_dictionary_source_table SELECT number, toString(number), number FROM numbers(2_000_000)</fill_query>
<fill_query>SYSTEM RELOAD DICTIONARY simple_key_{layout_suffix}_dictionary_s{shards}</fill_query>
<fill_query>SYSTEM RELOAD DICTIONARY complex_key_{layout_suffix}_dictionary_s{shards}</fill_query>
<query>SYSTEM RELOAD DICTIONARY simple_key_{layout_suffix}_dictionary_s{shards}</query>
<query>SYSTEM RELOAD DICTIONARY complex_key_{layout_suffix}_dictionary_s{shards}</query>
<query>
WITH rand64() % 3_000_000 as key
SELECT dictHas('default.simple_key_{layout_suffix}_dictionary_s{shards}', key)
FROM numbers(3_000_000)
FORMAT Null
</query>
<query>
WITH (rand64() % 2_000_000, toString(rand64() % 2_000_000)) as key
SELECT dictHas('default.complex_key_{layout_suffix}_dictionary_s{shards}', key)
FROM numbers(2_000_000)
FORMAT Null
</query>
<drop_query>DROP DICTIONARY IF EXISTS simple_key_{layout_suffix}_dictionary_s{shards}</drop_query>
<drop_query>DROP DICTIONARY IF EXISTS complex_key_{layout_suffix}_dictionary_s{shards}</drop_query>
<drop_query>DROP TABLE IF EXISTS simple_key_dictionary_source_table</drop_query>
<drop_query>DROP TABLE IF EXISTS complex_key_dictionary_source_table</drop_query>
</test>