ClickHouse/dbms/tests/queries/0_stateless/00950_dict_get.sql
Azat Khuzhin 420089c301
Add new dictionary layout (sparse_hashed) that is more memory efficient
With this new layout, sparsehash will be used over default HashMap,
sparsehash is more memory efficient but it is also slower.

So in a nutshell:
- HashMap uses ~2x more memory then sparse_hash_map
- HashMap ~2-2.5x faster then sparse_hash_map
(tested on lots of input, and the most close to production was
dictionary with 600KK hashes and UInt16 as value)

TODO:
- fix allocated memory calculation
- getBufferSizeInBytes/getBufferSizeInCells interface
- benchmarks

v0: replace HashMap with google::sparse_hash_map
v2: use google::sparse_hash_map only when <sparse> isset to true
v3: replace attributes with different layout
v4: use ch hash over std::hash
2019-09-21 02:22:40 +03:00

277 lines
14 KiB
SQL

-- Must use `test_00950` database and these tables - they're configured in dbms/tests/*_dictionary.xml
create database if not exists test_00950;
use test_00950;
drop table if exists ints;
drop table if exists strings;
drop table if exists decimals;
create table ints (key UInt64, i8 Int8, i16 Int16, i32 Int32, i64 Int64, u8 UInt8, u16 UInt16, u32 UInt32, u64 UInt64) Engine = Memory;
create table strings (key UInt64, str String) Engine = Memory;
create table decimals (key UInt64, d32 Decimal32(4), d64 Decimal64(6), d128 Decimal128(1)) Engine = Memory;
insert into ints values (1, 1, 1, 1, 1, 1, 1, 1, 1);
insert into strings values (1, '1');
insert into decimals values (1, 1, 1, 1);
select 'dictGet', 'flat_ints' as dict_name, toUInt64(1) as k,
dictGet(dict_name, 'i8', k),
dictGet(dict_name, 'i16', k),
dictGet(dict_name, 'i32', k),
dictGet(dict_name, 'i64', k),
dictGet(dict_name, 'u8', k),
dictGet(dict_name, 'u16', k),
dictGet(dict_name, 'u32', k),
dictGet(dict_name, 'u64', k);
select 'dictGetOrDefault', 'flat_ints' as dict_name, toUInt64(1) as k,
dictGetOrDefault(dict_name, 'i8', k, toInt8(42)),
dictGetOrDefault(dict_name, 'i16', k, toInt16(42)),
dictGetOrDefault(dict_name, 'i32', k, toInt32(42)),
dictGetOrDefault(dict_name, 'i64', k, toInt64(42)),
dictGetOrDefault(dict_name, 'u8', k, toUInt8(42)),
dictGetOrDefault(dict_name, 'u16', k, toUInt16(42)),
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
select 'dictGetOrDefault', 'flat_ints' as dict_name, toUInt64(0) as k,
dictGetOrDefault(dict_name, 'i8', k, toInt8(42)),
dictGetOrDefault(dict_name, 'i16', k, toInt16(42)),
dictGetOrDefault(dict_name, 'i32', k, toInt32(42)),
dictGetOrDefault(dict_name, 'i64', k, toInt64(42)),
dictGetOrDefault(dict_name, 'u8', k, toUInt8(42)),
dictGetOrDefault(dict_name, 'u16', k, toUInt16(42)),
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
select 'dictGet', 'hashed_ints' as dict_name, toUInt64(1) as k,
dictGet(dict_name, 'i8', k),
dictGet(dict_name, 'i16', k),
dictGet(dict_name, 'i32', k),
dictGet(dict_name, 'i64', k),
dictGet(dict_name, 'u8', k),
dictGet(dict_name, 'u16', k),
dictGet(dict_name, 'u32', k),
dictGet(dict_name, 'u64', k);
select 'dictGetOrDefault', 'hashed_ints' as dict_name, toUInt64(1) as k,
dictGetOrDefault(dict_name, 'i8', k, toInt8(42)),
dictGetOrDefault(dict_name, 'i16', k, toInt16(42)),
dictGetOrDefault(dict_name, 'i32', k, toInt32(42)),
dictGetOrDefault(dict_name, 'i64', k, toInt64(42)),
dictGetOrDefault(dict_name, 'u8', k, toUInt8(42)),
dictGetOrDefault(dict_name, 'u16', k, toUInt16(42)),
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
select 'dictGetOrDefault', 'hashed_ints' as dict_name, toUInt64(0) as k,
dictGetOrDefault(dict_name, 'i8', k, toInt8(42)),
dictGetOrDefault(dict_name, 'i16', k, toInt16(42)),
dictGetOrDefault(dict_name, 'i32', k, toInt32(42)),
dictGetOrDefault(dict_name, 'i64', k, toInt64(42)),
dictGetOrDefault(dict_name, 'u8', k, toUInt8(42)),
dictGetOrDefault(dict_name, 'u16', k, toUInt16(42)),
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
select 'dictGet', 'hashed_sparse_ints' as dict_name, toUInt64(1) as k,
dictGet(dict_name, 'i8', k),
dictGet(dict_name, 'i16', k),
dictGet(dict_name, 'i32', k),
dictGet(dict_name, 'i64', k),
dictGet(dict_name, 'u8', k),
dictGet(dict_name, 'u16', k),
dictGet(dict_name, 'u32', k),
dictGet(dict_name, 'u64', k);
select 'dictGetOrDefault', 'hashed_sparse_ints' as dict_name, toUInt64(1) as k,
dictGetOrDefault(dict_name, 'i8', k, toInt8(42)),
dictGetOrDefault(dict_name, 'i16', k, toInt16(42)),
dictGetOrDefault(dict_name, 'i32', k, toInt32(42)),
dictGetOrDefault(dict_name, 'i64', k, toInt64(42)),
dictGetOrDefault(dict_name, 'u8', k, toUInt8(42)),
dictGetOrDefault(dict_name, 'u16', k, toUInt16(42)),
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
select 'dictGetOrDefault', 'hashed_sparse_ints' as dict_name, toUInt64(0) as k,
dictGetOrDefault(dict_name, 'i8', k, toInt8(42)),
dictGetOrDefault(dict_name, 'i16', k, toInt16(42)),
dictGetOrDefault(dict_name, 'i32', k, toInt32(42)),
dictGetOrDefault(dict_name, 'i64', k, toInt64(42)),
dictGetOrDefault(dict_name, 'u8', k, toUInt8(42)),
dictGetOrDefault(dict_name, 'u16', k, toUInt16(42)),
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
select 'dictGet', 'cache_ints' as dict_name, toUInt64(1) as k,
dictGet(dict_name, 'i8', k),
dictGet(dict_name, 'i16', k),
dictGet(dict_name, 'i32', k),
dictGet(dict_name, 'i64', k),
dictGet(dict_name, 'u8', k),
dictGet(dict_name, 'u16', k),
dictGet(dict_name, 'u32', k),
dictGet(dict_name, 'u64', k);
select 'dictGetOrDefault', 'cache_ints' as dict_name, toUInt64(1) as k,
dictGetOrDefault(dict_name, 'i8', k, toInt8(42)),
dictGetOrDefault(dict_name, 'i16', k, toInt16(42)),
dictGetOrDefault(dict_name, 'i32', k, toInt32(42)),
dictGetOrDefault(dict_name, 'i64', k, toInt64(42)),
dictGetOrDefault(dict_name, 'u8', k, toUInt8(42)),
dictGetOrDefault(dict_name, 'u16', k, toUInt16(42)),
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
select 'dictGetOrDefault', 'cache_ints' as dict_name, toUInt64(0) as k,
dictGetOrDefault(dict_name, 'i8', k, toInt8(42)),
dictGetOrDefault(dict_name, 'i16', k, toInt16(42)),
dictGetOrDefault(dict_name, 'i32', k, toInt32(42)),
dictGetOrDefault(dict_name, 'i64', k, toInt64(42)),
dictGetOrDefault(dict_name, 'u8', k, toUInt8(42)),
dictGetOrDefault(dict_name, 'u16', k, toUInt16(42)),
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
select 'dictGet', 'complex_hashed_ints' as dict_name, tuple(toUInt64(1)) as k,
dictGet(dict_name, 'i8', k),
dictGet(dict_name, 'i16', k),
dictGet(dict_name, 'i32', k),
dictGet(dict_name, 'i64', k),
dictGet(dict_name, 'u8', k),
dictGet(dict_name, 'u16', k),
dictGet(dict_name, 'u32', k),
dictGet(dict_name, 'u64', k);
select 'dictGetOrDefault', 'complex_hashed_ints' as dict_name, tuple(toUInt64(1)) as k,
dictGetOrDefault(dict_name, 'i8', k, toInt8(42)),
dictGetOrDefault(dict_name, 'i16', k, toInt16(42)),
dictGetOrDefault(dict_name, 'i32', k, toInt32(42)),
dictGetOrDefault(dict_name, 'i64', k, toInt64(42)),
dictGetOrDefault(dict_name, 'u8', k, toUInt8(42)),
dictGetOrDefault(dict_name, 'u16', k, toUInt16(42)),
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
select 'dictGetOrDefault', 'complex_hashed_ints' as dict_name, tuple(toUInt64(0)) as k,
dictGetOrDefault(dict_name, 'i8', k, toInt8(42)),
dictGetOrDefault(dict_name, 'i16', k, toInt16(42)),
dictGetOrDefault(dict_name, 'i32', k, toInt32(42)),
dictGetOrDefault(dict_name, 'i64', k, toInt64(42)),
dictGetOrDefault(dict_name, 'u8', k, toUInt8(42)),
dictGetOrDefault(dict_name, 'u16', k, toUInt16(42)),
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
select 'dictGet', 'complex_cache_ints' as dict_name, tuple(toUInt64(1)) as k,
dictGet(dict_name, 'i8', k),
dictGet(dict_name, 'i16', k),
dictGet(dict_name, 'i32', k),
dictGet(dict_name, 'i64', k),
dictGet(dict_name, 'u8', k),
dictGet(dict_name, 'u16', k),
dictGet(dict_name, 'u32', k),
dictGet(dict_name, 'u64', k);
select 'dictGetOrDefault', 'complex_cache_ints' as dict_name, tuple(toUInt64(1)) as k,
dictGetOrDefault(dict_name, 'i8', k, toInt8(42)),
dictGetOrDefault(dict_name, 'i16', k, toInt16(42)),
dictGetOrDefault(dict_name, 'i32', k, toInt32(42)),
dictGetOrDefault(dict_name, 'i64', k, toInt64(42)),
dictGetOrDefault(dict_name, 'u8', k, toUInt8(42)),
dictGetOrDefault(dict_name, 'u16', k, toUInt16(42)),
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
select 'dictGetOrDefault', 'complex_cache_ints' as dict_name, tuple(toUInt64(0)) as k,
dictGetOrDefault(dict_name, 'i8', k, toInt8(42)),
dictGetOrDefault(dict_name, 'i16', k, toInt16(42)),
dictGetOrDefault(dict_name, 'i32', k, toInt32(42)),
dictGetOrDefault(dict_name, 'i64', k, toInt64(42)),
dictGetOrDefault(dict_name, 'u8', k, toUInt8(42)),
dictGetOrDefault(dict_name, 'u16', k, toUInt16(42)),
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
--
select 'dictGet', 'flat_strings' as dict_name, toUInt64(1) as k, dictGet(dict_name, 'str', k);
select 'dictGetOrDefault', 'flat_strings' as dict_name, toUInt64(1) as k, dictGetOrDefault(dict_name, 'str', k, '*');
select 'dictGetOrDefault', 'flat_strings' as dict_name, toUInt64(0) as k, dictGetOrDefault(dict_name, 'str', k, '*');
select 'dictGet', 'hashed_strings' as dict_name, toUInt64(1) as k, dictGet(dict_name, 'str', k);
select 'dictGetOrDefault', 'hashed_strings' as dict_name, toUInt64(1) as k, dictGetOrDefault(dict_name, 'str', k, '*');
select 'dictGetOrDefault', 'hashed_strings' as dict_name, toUInt64(0) as k, dictGetOrDefault(dict_name, 'str', k, '*');
select 'dictGet', 'cache_strings' as dict_name, toUInt64(1) as k, dictGet(dict_name, 'str', k);
select 'dictGetOrDefault', 'cache_strings' as dict_name, toUInt64(1) as k, dictGetOrDefault(dict_name, 'str', k, '*');
select 'dictGetOrDefault', 'cache_strings' as dict_name, toUInt64(0) as k, dictGetOrDefault(dict_name, 'str', k, '*');
select 'dictGet', 'complex_hashed_strings' as dict_name, toUInt64(1) as k, dictGet(dict_name, 'str', tuple(k));
select 'dictGetOrDefault', 'complex_hashed_strings' as dict_name, toUInt64(1) as k, dictGetOrDefault(dict_name, 'str', tuple(k), '*');
select 'dictGetOrDefault', 'complex_hashed_strings' as dict_name, toUInt64(0) as k, dictGetOrDefault(dict_name, 'str', tuple(k), '*');
select 'dictGet', 'complex_cache_strings' as dict_name, toUInt64(1) as k, dictGet(dict_name, 'str', tuple(k));
select 'dictGetOrDefault', 'complex_cache_strings' as dict_name, toUInt64(1) as k, dictGetOrDefault(dict_name, 'str', tuple(k), '*');
select 'dictGetOrDefault', 'complex_cache_strings' as dict_name, toUInt64(0) as k, dictGetOrDefault(dict_name, 'str', tuple(k), '*');
--
select 'dictGet', 'flat_decimals' as dict_name, toUInt64(1) as k,
dictGet(dict_name, 'd32', k),
dictGet(dict_name, 'd64', k),
dictGet(dict_name, 'd128', k);
select 'dictGetOrDefault', 'flat_decimals' as dict_name, toUInt64(1) as k,
dictGetOrDefault(dict_name, 'd32', k, toDecimal32(42, 4)),
dictGetOrDefault(dict_name, 'd64', k, toDecimal64(42, 6)),
dictGetOrDefault(dict_name, 'd128', k, toDecimal128(42, 1));
select 'dictGetOrDefault', 'flat_decimals' as dict_name, toUInt64(0) as k,
dictGetOrDefault(dict_name, 'd32', k, toDecimal32(42, 4)),
dictGetOrDefault(dict_name, 'd64', k, toDecimal64(42, 6)),
dictGetOrDefault(dict_name, 'd128', k, toDecimal128(42, 1));
select 'dictGet', 'hashed_decimals' as dict_name, toUInt64(1) as k,
dictGet(dict_name, 'd32', k),
dictGet(dict_name, 'd64', k),
dictGet(dict_name, 'd128', k);
select 'dictGetOrDefault', 'hashed_decimals' as dict_name, toUInt64(1) as k,
dictGetOrDefault(dict_name, 'd32', k, toDecimal32(42, 4)),
dictGetOrDefault(dict_name, 'd64', k, toDecimal64(42, 6)),
dictGetOrDefault(dict_name, 'd128', k, toDecimal128(42, 1));
select 'dictGetOrDefault', 'hashed_decimals' as dict_name, toUInt64(0) as k,
dictGetOrDefault(dict_name, 'd32', k, toDecimal32(42, 4)),
dictGetOrDefault(dict_name, 'd64', k, toDecimal64(42, 6)),
dictGetOrDefault(dict_name, 'd128', k, toDecimal128(42, 1));
select 'dictGet', 'cache_decimals' as dict_name, toUInt64(1) as k,
dictGet(dict_name, 'd32', k),
dictGet(dict_name, 'd64', k),
dictGet(dict_name, 'd128', k);
select 'dictGetOrDefault', 'cache_decimals' as dict_name, toUInt64(1) as k,
dictGetOrDefault(dict_name, 'd32', k, toDecimal32(42, 4)),
dictGetOrDefault(dict_name, 'd64', k, toDecimal64(42, 6)),
dictGetOrDefault(dict_name, 'd128', k, toDecimal128(42, 1));
select 'dictGetOrDefault', 'cache_decimals' as dict_name, toUInt64(0) as k,
dictGetOrDefault(dict_name, 'd32', k, toDecimal32(42, 4)),
dictGetOrDefault(dict_name, 'd64', k, toDecimal64(42, 6)),
dictGetOrDefault(dict_name, 'd128', k, toDecimal128(42, 1));
select 'dictGet', 'complex_hashed_decimals' as dict_name, tuple(toUInt64(1)) as k,
dictGet(dict_name, 'd32', k),
dictGet(dict_name, 'd64', k),
dictGet(dict_name, 'd128', k);
select 'dictGetOrDefault', 'complex_hashed_decimals' as dict_name, tuple(toUInt64(1)) as k,
dictGetOrDefault(dict_name, 'd32', k, toDecimal32(42, 4)),
dictGetOrDefault(dict_name, 'd64', k, toDecimal64(42, 6)),
dictGetOrDefault(dict_name, 'd128', k, toDecimal128(42, 1));
select 'dictGetOrDefault', 'complex_hashed_decimals' as dict_name, tuple(toUInt64(0)) as k,
dictGetOrDefault(dict_name, 'd32', k, toDecimal32(42, 4)),
dictGetOrDefault(dict_name, 'd64', k, toDecimal64(42, 6)),
dictGetOrDefault(dict_name, 'd128', k, toDecimal128(42, 1));
select 'dictGet', 'complex_cache_decimals' as dict_name, tuple(toUInt64(1)) as k,
dictGet(dict_name, 'd32', k),
dictGet(dict_name, 'd64', k),
dictGet(dict_name, 'd128', k);
select 'dictGetOrDefault', 'complex_cache_decimals' as dict_name, tuple(toUInt64(1)) as k,
dictGetOrDefault(dict_name, 'd32', k, toDecimal32(42, 4)),
dictGetOrDefault(dict_name, 'd64', k, toDecimal64(42, 6)),
dictGetOrDefault(dict_name, 'd128', k, toDecimal128(42, 1));
select 'dictGetOrDefault', 'complex_cache_decimals' as dict_name, tuple(toUInt64(0)) as k,
dictGetOrDefault(dict_name, 'd32', k, toDecimal32(42, 4)),
dictGetOrDefault(dict_name, 'd64', k, toDecimal64(42, 6)),
dictGetOrDefault(dict_name, 'd128', k, toDecimal128(42, 1));
drop table ints;
drop table strings;
drop table decimals;
drop database test_00950;