mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-13 11:04:10 +00:00
064f901ea8
preallocation can be used only when we know number of rows, and for this we need: - source clickhouse - no filtering (i.e. lack of <where>), since filtering can filter too much rows and eventually it may allocate memory that will never be used. For sparse_hash the difference is quite significant, preallocated sparse_hash hashtable allocates ~33% faster (7.5 seconds vs 5 seconds for insert, and the difference is more significant for higher number of elements): $ ninja bench-sparse_hash-run [1/1] cd /src/ch/hashtable-bench/.cmake && ...ch/hashtable-bench/.cmake/bench-sparse_hash sparse_hash/insert: 7.574 <!-- sparse_hash/find : 2.14426 sparse_hash/maxrss: 174MiB sparse_hash/time: 9710.51 msec (user+sys) $ time ninja bench-sparse_hash-preallocate-run [1/1] cd /src/ch/hashtable-bench/.cmake && ...-bench/.cmake/bench-sparse_hash-preallocate sparse_hash/insert: 5.0522 <!-- sparse_hash/find : 2.14024 sparse_hash/maxrss: 174MiB sparse_hash/time: 7192.06 msec (user+sys) P.S. the difference for sparse_hashed dictionary with 4e9 elements (uint64, uint16) is ~18% (4975.905 vs 4103.569 sec) v2: do not reallocate the dictionary from the progress callback Since this will access hashtable in parallel. v3: drop PREALLOCATE() and do this only for source=clickhouse and empty <where>
37 lines
972 B
SQL
37 lines
972 B
SQL
-- The test itself does not test does preallocation works
|
|
-- It simply check SPARSE_HASHED dictionary with bunch of dictGet()
|
|
-- (since at the moment of writing there were no such test)
|
|
|
|
DROP DATABASE IF EXISTS db_01509;
|
|
CREATE DATABASE db_01509;
|
|
|
|
CREATE TABLE db_01509.data
|
|
(
|
|
key UInt64,
|
|
value String
|
|
)
|
|
ENGINE = MergeTree()
|
|
ORDER BY key;
|
|
INSERT INTO db_01509.data SELECT number key, toString(number) value FROM numbers(1000);
|
|
|
|
DROP DICTIONARY IF EXISTS db_01509.dict;
|
|
CREATE DICTIONARY db_01509.dict
|
|
(
|
|
key UInt64,
|
|
value String DEFAULT '-'
|
|
)
|
|
PRIMARY KEY key
|
|
SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'data' PASSWORD '' DB 'db_01509'))
|
|
LAYOUT(SPARSE_HASHED())
|
|
LIFETIME(0);
|
|
|
|
SHOW CREATE DICTIONARY db_01509.dict;
|
|
|
|
SYSTEM RELOAD DICTIONARY db_01509.dict;
|
|
|
|
SELECT dictGet('db_01509.dict', 'value', toUInt64(1e12));
|
|
SELECT dictGet('db_01509.dict', 'value', toUInt64(0));
|
|
SELECT count() FROM db_01509.dict;
|
|
|
|
DROP DATABASE IF EXISTS db_01509;
|