mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-30 19:42:00 +00:00
7b5d156cc5
In case you want dictionary optimized for memory, SPARSE_HASHED is not always gives you what you need. Consider the following example <UInt64, UInt16> as <Key, Value>, but this pair will also have a 6 byte padding (on amd64), so this is almost 40% of space wastage. And because of this padding, even google::sparse_hash_map, does not make picture better, in fact, sparse_hash_map is not very friendly to memory allocators (especially jemalloc). Here are some numbers for dictionary with 1e9 elements and UInt64 as key, and UInt16 as value: settings | load (sec) | read (sec) | read (million rows/s) | bytes_allocated | RSS HASHED upstream | - | - | - | - | 35GiB SPARSE_HASHED upstream | - | - | - | - | 26GiB - | - | - | - | - | - sparse_hash_map glibc hashbench | - | - | - | - | 17.5GiB sparse_hash_map packed allocator | 101.878 | 231.48 | 4.32 | - | 17.7GiB PackedHashMap | 15.514 | 42.35 | 23.61 | 20GiB | 22GiB As you can see PackedHashMap looks way more better then HASHED, and even better then SPARSE_HASHED, but slightly worse then sparse_hash_map with packed allocator (it is done with a custom patch to google sparse_hash_map). v2: rebase on top of bucket_count fix Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
92 lines
3.1 KiB
XML
92 lines
3.1 KiB
XML
<test>
|
|
<substitutions>
|
|
<substitution>
|
|
<name>layout_suffix</name>
|
|
<values>
|
|
<value>HASHED</value>
|
|
<value>SPARSE_HASHED</value>
|
|
</values>
|
|
</substitution>
|
|
|
|
<substitution>
|
|
<name>shards</name>
|
|
<values>
|
|
<value>1</value>
|
|
<value>8</value>
|
|
<value>16</value>
|
|
</values>
|
|
</substitution>
|
|
</substitutions>
|
|
|
|
<create_query>
|
|
CREATE TABLE simple_key_dictionary_source_table
|
|
(
|
|
id UInt64,
|
|
value_int UInt16
|
|
) ENGINE = Memory
|
|
</create_query>
|
|
|
|
<create_query>
|
|
CREATE TABLE complex_key_dictionary_source_table
|
|
(
|
|
id UInt64,
|
|
id_key String,
|
|
value_int UInt64
|
|
) ENGINE = Memory
|
|
</create_query>
|
|
|
|
<create_query>
|
|
CREATE DICTIONARY IF NOT EXISTS simple_key_{layout_suffix}_dictionary_s{shards}
|
|
(
|
|
id UInt64,
|
|
value_int UInt64
|
|
)
|
|
PRIMARY KEY id
|
|
SOURCE(CLICKHOUSE(TABLE 'simple_key_dictionary_source_table'))
|
|
LAYOUT({layout_suffix}(SHARDS {shards}))
|
|
LIFETIME(0)
|
|
</create_query>
|
|
|
|
<create_query>
|
|
CREATE DICTIONARY IF NOT EXISTS complex_key_{layout_suffix}_dictionary_s{shards}
|
|
(
|
|
id UInt64,
|
|
id_key String,
|
|
value_int UInt64
|
|
)
|
|
PRIMARY KEY id, id_key
|
|
SOURCE(CLICKHOUSE(TABLE 'complex_key_dictionary_source_table'))
|
|
LAYOUT(COMPLEX_KEY_{layout_suffix}(SHARDS {shards}))
|
|
LIFETIME(0)
|
|
</create_query>
|
|
|
|
<fill_query>INSERT INTO simple_key_dictionary_source_table SELECT number, number FROM numbers(3_000_000)</fill_query>
|
|
<fill_query>INSERT INTO complex_key_dictionary_source_table SELECT number, toString(number), number FROM numbers(2_000_000)</fill_query>
|
|
|
|
<fill_query>SYSTEM RELOAD DICTIONARY simple_key_{layout_suffix}_dictionary_s{shards}</fill_query>
|
|
<fill_query>SYSTEM RELOAD DICTIONARY complex_key_{layout_suffix}_dictionary_s{shards}</fill_query>
|
|
|
|
<query>SYSTEM RELOAD DICTIONARY simple_key_{layout_suffix}_dictionary_s{shards}</query>
|
|
<query>SYSTEM RELOAD DICTIONARY complex_key_{layout_suffix}_dictionary_s{shards}</query>
|
|
|
|
<query>
|
|
WITH rand64() % 3_000_000 as key
|
|
SELECT dictHas('default.simple_key_{layout_suffix}_dictionary_s{shards}', key)
|
|
FROM numbers(3_000_000)
|
|
FORMAT Null
|
|
</query>
|
|
|
|
<query>
|
|
WITH (rand64() % 2_000_000, toString(rand64() % 2_000_000)) as key
|
|
SELECT dictHas('default.complex_key_{layout_suffix}_dictionary_s{shards}', key)
|
|
FROM numbers(2_000_000)
|
|
FORMAT Null
|
|
</query>
|
|
|
|
<drop_query>DROP DICTIONARY IF EXISTS simple_key_{layout_suffix}_dictionary_s{shards}</drop_query>
|
|
<drop_query>DROP DICTIONARY IF EXISTS complex_key_{layout_suffix}_dictionary_s{shards}</drop_query>
|
|
|
|
<drop_query>DROP TABLE IF EXISTS simple_key_dictionary_source_table</drop_query>
|
|
<drop_query>DROP TABLE IF EXISTS complex_key_dictionary_source_table</drop_query>
|
|
</test>
|