mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Add ability to configure maximum load factor for the HASHED/SPARSE_HASHED layout
As it turns out, HashMap/PackedHashMap works great even with max load factor of 0.99. By "great" I mean it least it works faster then google sparsehash, and not to mention it's friendliness to the memory allocator (it has zero fragmentation since it works with a continuious memory region, in comparison to the sparsehash that doing lots of realloc, which jemalloc does not like, due to it's slabs). Here is a table of different setups: settings | load (sec) | read (sec) | read (million rows/s) | bytes_allocated | RSS - | - | - | - | - | - HASHED upstream | - | - | - | - | 35GiB SPARSE_HASHED upstream | - | - | - | - | 26GiB - | - | - | - | - | - sparse_hash_map glibc hashbench | - | - | - | - | 17.5GiB sparse_hash_map packed allocator | 101.878 | 231.48 | 4.32 | - | 17.7GiB PackedHashMap 0.5 | 15.514 | 42.35 | 23.61 | 20GiB | 22GiB hashed 0.95 | 34.903 | 115.615 | 8.65 | 16GiB | 18.7GiB **PackedHashMap 0.95** | **93.6** | **19.883** | **10.68** | **10GiB** | **12.8GiB** PackedHashMap 0.99 | 26.113 | 83.6 | 11.96 | 10GiB | 12.3GiB As it shows, PackedHashMap with 0.95 max_load_factor, eats 2.6x less memory then SPARSE_HASHED in upstream, and it also 2x faster for read! v2: fix grower Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
parent
3698302ddb
commit
2996b38606
@ -284,6 +284,13 @@ Configuration example:
|
||||
10000 is good balance between memory and speed.
|
||||
Even for 10e10 elements and can handle all the load without starvation. -->
|
||||
<shard_load_queue_backlog>10000</shard_load_queue_backlog>
|
||||
<!-- Maximum load factor of the hash table, with greater values, the memory
|
||||
is utilized more efficiently (less memory is wasted) but read/performance
|
||||
may deteriorate.
|
||||
|
||||
Valid values: [0.5, 0.99]
|
||||
Default: 0.5 -->
|
||||
<max_load_factor>0.5</max_load_factor>
|
||||
</hashed>
|
||||
</layout>
|
||||
```
|
||||
@ -327,6 +334,7 @@ Configuration example:
|
||||
<complex_key_hashed>
|
||||
<shards>1</shards>
|
||||
<!-- <shard_load_queue_backlog>10000</shard_load_queue_backlog> -->
|
||||
<!-- <max_load_factor>0.5</max_load_factor> -->
|
||||
</complex_key_hashed>
|
||||
</layout>
|
||||
```
|
||||
|
@ -630,6 +630,8 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::createAttributes()
|
||||
const auto size = dict_struct.attributes.size();
|
||||
attributes.reserve(size);
|
||||
|
||||
HashTableGrowerWithMaxLoadFactor grower(configuration.max_load_factor);
|
||||
|
||||
for (const auto & dictionary_attribute : dict_struct.attributes)
|
||||
{
|
||||
auto type_call = [&, this](const auto & dictionary_attribute_type)
|
||||
@ -639,8 +641,23 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::createAttributes()
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
|
||||
auto is_nullable_sets = dictionary_attribute.is_nullable ? std::make_optional<NullableSets>(configuration.shards) : std::optional<NullableSets>{};
|
||||
Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_sets), CollectionsHolder<ValueType>(configuration.shards)};
|
||||
attributes.emplace_back(std::move(attribute));
|
||||
if constexpr (IsBuiltinHashTable<typename CollectionsHolder<ValueType>::value_type>)
|
||||
{
|
||||
CollectionsHolder<ValueType> collections;
|
||||
collections.reserve(configuration.shards);
|
||||
for (size_t i = 0; i < configuration.shards; ++i)
|
||||
collections.emplace_back(grower);
|
||||
|
||||
Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_sets), std::move(collections)};
|
||||
attributes.emplace_back(std::move(attribute));
|
||||
}
|
||||
else
|
||||
{
|
||||
Attribute attribute{dictionary_attribute.underlying_type, std::move(is_nullable_sets), CollectionsHolder<ValueType>(configuration.shards)};
|
||||
for (auto & container : std::get<CollectionsHolder<ValueType>>(attribute.containers))
|
||||
container.max_load_factor(configuration.max_load_factor);
|
||||
attributes.emplace_back(std::move(attribute));
|
||||
}
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(dictionary_attribute.underlying_type, type_call);
|
||||
@ -648,7 +665,9 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::createAttributes()
|
||||
|
||||
if (unlikely(attributes.size()) == 0)
|
||||
{
|
||||
no_attributes_containers.resize(configuration.shards);
|
||||
no_attributes_containers.reserve(configuration.shards);
|
||||
for (size_t i = 0; i < configuration.shards; ++i)
|
||||
no_attributes_containers.emplace_back(grower);
|
||||
}
|
||||
|
||||
string_arenas.resize(configuration.shards);
|
||||
@ -1136,9 +1155,14 @@ void registerDictionaryHashed(DictionaryFactory & factory)
|
||||
if (shard_load_queue_backlog <= 0)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARD_LOAD_QUEUE_BACKLOG parameter should be greater then zero", full_name);
|
||||
|
||||
float max_load_factor = static_cast<float>(config.getDouble(config_prefix + dictionary_layout_prefix + ".max_load_factor", 0.5));
|
||||
if (max_load_factor < 0.5 || max_load_factor > 0.99)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{}: max_load_factor parameter should be within [0.5, 0.99], got {}", full_name, max_load_factor);
|
||||
|
||||
HashedDictionaryConfiguration configuration{
|
||||
static_cast<UInt64>(shards),
|
||||
static_cast<UInt64>(shard_load_queue_backlog),
|
||||
max_load_factor,
|
||||
require_nonempty,
|
||||
dict_lifetime,
|
||||
};
|
||||
|
@ -25,6 +25,7 @@ struct HashedDictionaryConfiguration
|
||||
{
|
||||
const UInt64 shards;
|
||||
const UInt64 shard_load_queue_backlog;
|
||||
const float max_load_factor;
|
||||
const bool require_nonempty;
|
||||
const DictionaryLifetime lifetime;
|
||||
};
|
||||
|
@ -40,6 +40,92 @@ constexpr bool useSparseHashForHashedDictionary()
|
||||
return sizeof(PackedPairNoInit<K, V>) > 16;
|
||||
}
|
||||
|
||||
/// Grower with custom fill limit/load factor (instead of default 50%).
|
||||
///
|
||||
/// It turns out that HashMap can outperform google::sparse_hash_map in case of
|
||||
/// the structure size of not big, in terms of speed *and* memory. Even 99% of
|
||||
/// max load factor was faster then google::sparse_hash_map in my simple tests
|
||||
/// (1e9 UInt64 keys with UInt16 values, randomly distributed).
|
||||
///
|
||||
/// And not to mention very high allocator memory fragmentation in
|
||||
/// google::sparse_hash_map.
|
||||
///
|
||||
/// Based on HashTableGrowerWithPrecalculation
|
||||
class alignas(64) HashTableGrowerWithMaxLoadFactor
|
||||
{
|
||||
static constexpr size_t initial_size_degree = 8;
|
||||
UInt8 size_degree = initial_size_degree;
|
||||
size_t precalculated_mask = (1ULL << initial_size_degree) - 1;
|
||||
size_t precalculated_max_fill = 1ULL << (initial_size_degree - 1);
|
||||
float max_load_factor = 0.5;
|
||||
/// HashTableGrowerWithPrecalculation has 23, but to decrease memory usage
|
||||
/// at least slightly 19 is used here. Also note, that for dictionaries it
|
||||
/// is not that important since they are not that frequently loaded.
|
||||
static constexpr size_t max_size_degree_quadratic = 19;
|
||||
|
||||
public:
|
||||
static constexpr auto initial_count = 1ULL << initial_size_degree;
|
||||
|
||||
/// If collision resolution chains are contiguous, we can implement erase operation by moving the elements.
|
||||
static constexpr auto performs_linear_probing_with_single_step = true;
|
||||
|
||||
HashTableGrowerWithMaxLoadFactor() = default;
|
||||
explicit HashTableGrowerWithMaxLoadFactor(float max_load_factor_)
|
||||
: max_load_factor(max_load_factor_)
|
||||
{
|
||||
increaseSizeDegree(0);
|
||||
}
|
||||
|
||||
UInt8 sizeDegree() const { return size_degree; }
|
||||
|
||||
void increaseSizeDegree(UInt8 delta)
|
||||
{
|
||||
size_degree += delta;
|
||||
precalculated_mask = (1ULL << size_degree) - 1;
|
||||
precalculated_max_fill = static_cast<size_t>((1ULL << size_degree) * max_load_factor);
|
||||
}
|
||||
|
||||
/// The size of the hash table in the cells.
|
||||
size_t bufSize() const { return 1ULL << size_degree; }
|
||||
|
||||
/// From the hash value, get the cell number in the hash table.
|
||||
size_t place(size_t x) const { return x & precalculated_mask; }
|
||||
|
||||
/// The next cell in the collision resolution chain.
|
||||
size_t next(size_t pos) const { return (pos + 1) & precalculated_mask; }
|
||||
|
||||
/// Whether the hash table is sufficiently full. You need to increase the size of the hash table, or remove something unnecessary from it.
|
||||
bool overflow(size_t elems) const { return elems > precalculated_max_fill; }
|
||||
|
||||
/// Increase the size of the hash table.
|
||||
void increaseSize() { increaseSizeDegree(size_degree >= max_size_degree_quadratic ? 1 : 2); }
|
||||
|
||||
/// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
|
||||
void set(size_t num_elems)
|
||||
{
|
||||
if (num_elems <= 1)
|
||||
size_degree = initial_size_degree;
|
||||
else if (initial_size_degree > static_cast<size_t>(log2(num_elems - 1)) + 2)
|
||||
size_degree = initial_size_degree;
|
||||
else
|
||||
{
|
||||
/// Slightly more optimal than HashTableGrowerWithPrecalculation
|
||||
/// and takes into account max_load_factor.
|
||||
size_degree = static_cast<size_t>(log2(num_elems - 1)) + 1;
|
||||
if ((1ULL << size_degree) * max_load_factor < num_elems)
|
||||
++size_degree;
|
||||
}
|
||||
increaseSizeDegree(0);
|
||||
}
|
||||
|
||||
void setBufSize(size_t buf_size_)
|
||||
{
|
||||
size_degree = static_cast<size_t>(log2(buf_size_ - 1) + 1);
|
||||
increaseSizeDegree(0);
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(HashTableGrowerWithMaxLoadFactor) == 64);
|
||||
|
||||
///
|
||||
/// Map (dictionary with attributes)
|
||||
///
|
||||
@ -54,8 +140,8 @@ struct HashedDictionaryMapType<dictionary_key_type, /* sparse= */ false, Key, Va
|
||||
{
|
||||
using Type = std::conditional_t<
|
||||
dictionary_key_type == DictionaryKeyType::Simple,
|
||||
HashMap<UInt64, Value, DefaultHash<UInt64>>,
|
||||
HashMapWithSavedHash<StringRef, Value, DefaultHash<StringRef>>>;
|
||||
HashMap<UInt64, Value, DefaultHash<UInt64>, HashTableGrowerWithMaxLoadFactor>,
|
||||
HashMapWithSavedHash<StringRef, Value, DefaultHash<StringRef>, HashTableGrowerWithMaxLoadFactor>>;
|
||||
};
|
||||
|
||||
/// Implementations for SPARSE_HASHED layout.
|
||||
@ -89,8 +175,8 @@ struct HashedDictionarySparseMapType<dictionary_key_type, Key, Value, /* use_spa
|
||||
{
|
||||
using Type = std::conditional_t<
|
||||
dictionary_key_type == DictionaryKeyType::Simple,
|
||||
PackedHashMap<UInt64, Value, DefaultHash<UInt64>>,
|
||||
PackedHashMap<StringRef, Value, DefaultHash<StringRef>>>;
|
||||
PackedHashMap<UInt64, Value, DefaultHash<UInt64>, HashTableGrowerWithMaxLoadFactor>,
|
||||
PackedHashMap<StringRef, Value, DefaultHash<StringRef>, HashTableGrowerWithMaxLoadFactor>>;
|
||||
};
|
||||
template <DictionaryKeyType dictionary_key_type, typename Key, typename Value>
|
||||
struct HashedDictionaryMapType<dictionary_key_type, /* sparse= */ true, Key, Value>
|
||||
@ -113,8 +199,8 @@ struct HashedDictionarySetType<dictionary_key_type, /* sparse= */ false, Key>
|
||||
{
|
||||
using Type = std::conditional_t<
|
||||
dictionary_key_type == DictionaryKeyType::Simple,
|
||||
HashSet<UInt64, DefaultHash<UInt64>>,
|
||||
HashSetWithSavedHash<StringRef, DefaultHash<StringRef>>>;
|
||||
HashSet<UInt64, DefaultHash<UInt64>, HashTableGrowerWithMaxLoadFactor>,
|
||||
HashSetWithSavedHash<StringRef, DefaultHash<StringRef>, HashTableGrowerWithMaxLoadFactor>>;
|
||||
};
|
||||
|
||||
/// Implementation for SPARSE_HASHED.
|
||||
@ -127,8 +213,8 @@ struct HashedDictionarySetType<dictionary_key_type, /* sparse= */ true, Key>
|
||||
{
|
||||
using Type = std::conditional_t<
|
||||
dictionary_key_type == DictionaryKeyType::Simple,
|
||||
HashSet<UInt64, DefaultHash<UInt64>>,
|
||||
HashSet<StringRef, DefaultHash<StringRef>>>;
|
||||
HashSet<UInt64, DefaultHash<UInt64>, HashTableGrowerWithMaxLoadFactor>,
|
||||
HashSet<StringRef, DefaultHash<StringRef>, HashTableGrowerWithMaxLoadFactor>>;
|
||||
};
|
||||
|
||||
}
|
||||
|
92
tests/performance/hashed_dictionary_load_factor.xml
Normal file
92
tests/performance/hashed_dictionary_load_factor.xml
Normal file
@ -0,0 +1,92 @@
|
||||
<test>
|
||||
<substitutions>
|
||||
<substitution>
|
||||
<name>layout_suffix</name>
|
||||
<values>
|
||||
<value>HASHED</value>
|
||||
<value>SPARSE_HASHED</value>
|
||||
</values>
|
||||
</substitution>
|
||||
|
||||
<substitution>
|
||||
<name>load_factor</name>
|
||||
<values>
|
||||
<!-- 0. will be prepended -->
|
||||
<value>5</value>
|
||||
<value>7</value>
|
||||
<value>99</value>
|
||||
</values>
|
||||
</substitution>
|
||||
</substitutions>
|
||||
|
||||
<create_query>
|
||||
CREATE TABLE simple_key_dictionary_source_table
|
||||
(
|
||||
id UInt64,
|
||||
value_int UInt16
|
||||
) ENGINE = Memory
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE TABLE complex_key_dictionary_source_table
|
||||
(
|
||||
id UInt64,
|
||||
id_key String,
|
||||
value_int UInt64
|
||||
) ENGINE = Memory
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE DICTIONARY IF NOT EXISTS simple_key_{layout_suffix}_dictionary_l0_{load_factor}
|
||||
(
|
||||
id UInt64,
|
||||
value_int UInt64
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(TABLE 'simple_key_dictionary_source_table'))
|
||||
LAYOUT({layout_suffix}(MAX_LOAD_FACTOR 0.{load_factor}))
|
||||
LIFETIME(0)
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE DICTIONARY IF NOT EXISTS complex_key_{layout_suffix}_dictionary_l0_{load_factor}
|
||||
(
|
||||
id UInt64,
|
||||
id_key String,
|
||||
value_int UInt64
|
||||
)
|
||||
PRIMARY KEY id, id_key
|
||||
SOURCE(CLICKHOUSE(TABLE 'complex_key_dictionary_source_table'))
|
||||
LAYOUT(COMPLEX_KEY_{layout_suffix}(MAX_LOAD_FACTOR 0.{load_factor}))
|
||||
LIFETIME(0)
|
||||
</create_query>
|
||||
|
||||
<fill_query>INSERT INTO simple_key_dictionary_source_table SELECT number, number FROM numbers(3_000_000)</fill_query>
|
||||
<fill_query>INSERT INTO complex_key_dictionary_source_table SELECT number, toString(number), number FROM numbers(2_000_000)</fill_query>
|
||||
|
||||
<fill_query>SYSTEM RELOAD DICTIONARY simple_key_{layout_suffix}_dictionary_l0_{load_factor}</fill_query>
|
||||
<fill_query>SYSTEM RELOAD DICTIONARY complex_key_{layout_suffix}_dictionary_l0_{load_factor}</fill_query>
|
||||
|
||||
<query>SYSTEM RELOAD DICTIONARY simple_key_{layout_suffix}_dictionary_l0_{load_factor}</query>
|
||||
<query>SYSTEM RELOAD DICTIONARY complex_key_{layout_suffix}_dictionary_l0_{load_factor}</query>
|
||||
|
||||
<query>
|
||||
WITH rand64() % 3_000_000 as key
|
||||
SELECT dictHas('default.simple_key_{layout_suffix}_dictionary_l0_{load_factor}', key)
|
||||
FROM numbers(3_000_000)
|
||||
FORMAT Null
|
||||
</query>
|
||||
|
||||
<query>
|
||||
WITH (rand64() % 2_000_000, toString(rand64() % 2_000_000)) as key
|
||||
SELECT dictHas('default.complex_key_{layout_suffix}_dictionary_l0_{load_factor}', key)
|
||||
FROM numbers(2_000_000)
|
||||
FORMAT Null
|
||||
</query>
|
||||
|
||||
<drop_query>DROP DICTIONARY simple_key_{layout_suffix}_dictionary_l0_{load_factor}</drop_query>
|
||||
<drop_query>DROP DICTIONARY complex_key_{layout_suffix}_dictionary_l0_{load_factor}</drop_query>
|
||||
|
||||
<drop_query>DROP TABLE simple_key_dictionary_source_table</drop_query>
|
||||
<drop_query>DROP TABLE complex_key_dictionary_source_table</drop_query>
|
||||
</test>
|
@ -0,0 +1,12 @@
|
||||
CREATE DICTIONARY default.test_sparse_dictionary_load_factor\n(\n `key` UInt64,\n `value` UInt16\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE test_table))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(SPARSE_HASHED(MAX_LOAD_FACTOR 0.9))
|
||||
100000
|
||||
0
|
||||
CREATE DICTIONARY default.test_dictionary_load_factor\n(\n `key` UInt64,\n `value` UInt16\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE test_table))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(HASHED(MAX_LOAD_FACTOR 0.9))
|
||||
100000
|
||||
0
|
||||
CREATE DICTIONARY default.test_dictionary_load_factor_nullable\n(\n `key` UInt64,\n `value` Nullable(UInt16)\n)\nPRIMARY KEY key\nSOURCE(CLICKHOUSE(TABLE test_table_nullable))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(HASHED(MAX_LOAD_FACTOR 0.9))
|
||||
100000
|
||||
0
|
||||
CREATE DICTIONARY default.test_complex_dictionary_load_factor\n(\n `key_1` UInt64,\n `key_2` UInt64,\n `value` UInt16\n)\nPRIMARY KEY key_1, key_2\nSOURCE(CLICKHOUSE(TABLE test_table_complex))\nLIFETIME(MIN 0 MAX 0)\nLAYOUT(COMPLEX_KEY_HASHED(MAX_LOAD_FACTOR 0.9))
|
||||
100000
|
||||
0
|
@ -0,0 +1,107 @@
|
||||
DROP TABLE IF EXISTS test_table;
|
||||
CREATE TABLE test_table
|
||||
(
|
||||
key UInt64,
|
||||
value UInt16
|
||||
) ENGINE=Memory() AS SELECT number, number FROM numbers(1e5);
|
||||
|
||||
DROP TABLE IF EXISTS test_table_nullable;
|
||||
CREATE TABLE test_table_nullable
|
||||
(
|
||||
key UInt64,
|
||||
value Nullable(UInt16)
|
||||
) ENGINE=Memory() AS SELECT number, number % 2 == 0 ? NULL : number FROM numbers(1e5);
|
||||
|
||||
DROP TABLE IF EXISTS test_table_string;
|
||||
CREATE TABLE test_table_string
|
||||
(
|
||||
key String,
|
||||
value UInt16
|
||||
) ENGINE=Memory() AS SELECT 'foo' || number::String, number FROM numbers(1e5);
|
||||
|
||||
DROP TABLE IF EXISTS test_table_complex;
|
||||
CREATE TABLE test_table_complex
|
||||
(
|
||||
key_1 UInt64,
|
||||
key_2 UInt64,
|
||||
value UInt16
|
||||
) ENGINE=Memory() AS SELECT number, number, number FROM numbers(1e5);
|
||||
|
||||
DROP DICTIONARY IF EXISTS test_sparse_dictionary_load_factor;
|
||||
CREATE DICTIONARY test_sparse_dictionary_load_factor
|
||||
(
|
||||
key UInt64,
|
||||
value UInt16
|
||||
) PRIMARY KEY key
|
||||
SOURCE(CLICKHOUSE(TABLE test_table))
|
||||
LAYOUT(SPARSE_HASHED(MAX_LOAD_FACTOR 0.90))
|
||||
LIFETIME(0);
|
||||
SHOW CREATE test_sparse_dictionary_load_factor;
|
||||
SYSTEM RELOAD DICTIONARY test_sparse_dictionary_load_factor;
|
||||
SELECT element_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'test_sparse_dictionary_load_factor';
|
||||
SELECT count() FROM test_table WHERE dictGet('test_sparse_dictionary_load_factor', 'value', key) != value;
|
||||
DROP DICTIONARY test_sparse_dictionary_load_factor;
|
||||
|
||||
DROP DICTIONARY IF EXISTS test_dictionary_load_factor;
|
||||
CREATE DICTIONARY test_dictionary_load_factor
|
||||
(
|
||||
key UInt64,
|
||||
value UInt16
|
||||
) PRIMARY KEY key
|
||||
SOURCE(CLICKHOUSE(TABLE test_table))
|
||||
LAYOUT(HASHED(MAX_LOAD_FACTOR 0.90))
|
||||
LIFETIME(0);
|
||||
SHOW CREATE test_dictionary_load_factor;
|
||||
SYSTEM RELOAD DICTIONARY test_dictionary_load_factor;
|
||||
SELECT element_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'test_dictionary_load_factor';
|
||||
SELECT count() FROM test_table WHERE dictGet('test_dictionary_load_factor', 'value', key) != value;
|
||||
DROP DICTIONARY test_dictionary_load_factor;
|
||||
|
||||
DROP DICTIONARY IF EXISTS test_dictionary_load_factor_nullable;
|
||||
CREATE DICTIONARY test_dictionary_load_factor_nullable
|
||||
(
|
||||
key UInt64,
|
||||
value Nullable(UInt16)
|
||||
) PRIMARY KEY key
|
||||
SOURCE(CLICKHOUSE(TABLE test_table_nullable))
|
||||
LAYOUT(HASHED(MAX_LOAD_FACTOR 0.90))
|
||||
LIFETIME(0);
|
||||
SHOW CREATE test_dictionary_load_factor_nullable;
|
||||
SYSTEM RELOAD DICTIONARY test_dictionary_load_factor_nullable;
|
||||
SELECT element_count FROM system.dictionaries WHERE database = currentDatabase() AND name = 'test_dictionary_load_factor_nullable';
|
||||
SELECT count() FROM test_table_nullable WHERE dictGet('test_dictionary_load_factor_nullable', 'value', key) != value;
|
||||
DROP DICTIONARY test_dictionary_load_factor_nullable;
|
||||
|
||||
DROP DICTIONARY IF EXISTS test_complex_dictionary_load_factor;
|
||||
CREATE DICTIONARY test_complex_dictionary_load_factor
|
||||
(
|
||||
key_1 UInt64,
|
||||
key_2 UInt64,
|
||||
value UInt16
|
||||
) PRIMARY KEY key_1, key_2
|
||||
SOURCE(CLICKHOUSE(TABLE test_table_complex))
|
||||
LAYOUT(COMPLEX_KEY_HASHED(MAX_LOAD_FACTOR 0.90))
|
||||
LIFETIME(0);
|
||||
SYSTEM RELOAD DICTIONARY test_complex_dictionary_load_factor;
|
||||
SHOW CREATE test_complex_dictionary_load_factor;
|
||||
SELECT element_count FROM system.dictionaries WHERE database = currentDatabase() and name = 'test_complex_dictionary_load_factor';
|
||||
SELECT count() FROM test_table_complex WHERE dictGet('test_complex_dictionary_load_factor', 'value', (key_1, key_2)) != value;
|
||||
DROP DICTIONARY test_complex_dictionary_load_factor;
|
||||
|
||||
DROP DICTIONARY IF EXISTS test_dictionary_load_factor_string;
|
||||
CREATE DICTIONARY test_dictionary_load_factor_string
|
||||
(
|
||||
key String,
|
||||
value UInt16
|
||||
) PRIMARY KEY key
|
||||
SOURCE(CLICKHOUSE(TABLE test_table_string))
|
||||
LAYOUT(HASHED(MAX_LOAD_FACTOR 1))
|
||||
LIFETIME(0);
|
||||
-- should because of MAX_LOAD_FACTOR is 1 (maximum allowed value is 0.99)
|
||||
SYSTEM RELOAD DICTIONARY test_dictionary_load_factor_string; -- { serverError BAD_ARGUMENTS }
|
||||
DROP DICTIONARY test_dictionary_load_factor_string;
|
||||
|
||||
DROP TABLE test_table;
|
||||
DROP TABLE test_table_nullable;
|
||||
DROP TABLE test_table_string;
|
||||
DROP TABLE test_table_complex;
|
@ -0,0 +1,4 @@
|
||||
test_dictionary_hashed 1000000 0.4768 33558760
|
||||
test_dictionary_hashed_load_factor 1000000 0.9537 16781544
|
||||
test_dictionary_sparse_hashed 1000000 0.4768 20975848
|
||||
test_dictionary_sparse_hashed_load_factor 1000000 0.9537 10490088
|
@ -0,0 +1,39 @@
|
||||
{# vi: ft=sql #}
|
||||
|
||||
{% for layout in ["hashed", "sparse_hashed"] %}
|
||||
DROP DICTIONARY IF EXISTS test_dictionary_{{layout}};
|
||||
DROP DICTIONARY IF EXISTS test_dictionary_{{layout}}_load_factor;
|
||||
DROP TABLE IF EXISTS test_table;
|
||||
|
||||
CREATE TABLE test_table
|
||||
(
|
||||
key UInt64,
|
||||
value UInt16
|
||||
) ENGINE=Memory() AS SELECT number, number FROM numbers(1e6);
|
||||
|
||||
CREATE DICTIONARY test_dictionary_{{layout}}
|
||||
(
|
||||
key UInt64,
|
||||
value UInt16
|
||||
) PRIMARY KEY key
|
||||
SOURCE(CLICKHOUSE(TABLE test_table))
|
||||
LAYOUT({{layout}}())
|
||||
LIFETIME(0);
|
||||
|
||||
CREATE DICTIONARY test_dictionary_{{layout}}_load_factor
|
||||
(
|
||||
key UInt64,
|
||||
value UInt16
|
||||
) PRIMARY KEY key
|
||||
SOURCE(CLICKHOUSE(TABLE test_table))
|
||||
LAYOUT({{layout}}(MAX_LOAD_FACTOR 0.98))
|
||||
LIFETIME(0);
|
||||
|
||||
SYSTEM RELOAD DICTIONARY test_dictionary_{{layout}};
|
||||
SYSTEM RELOAD DICTIONARY test_dictionary_{{layout}}_load_factor;
|
||||
SELECT name, element_count, round(load_factor, 4), bytes_allocated FROM system.dictionaries WHERE database = currentDatabase() ORDER BY name;
|
||||
|
||||
DROP DICTIONARY IF EXISTS test_dictionary_{{layout}};
|
||||
DROP DICTIONARY IF EXISTS test_dictionary_{{layout}}_load_factor;
|
||||
DROP TABLE test_table;
|
||||
{% endfor %}
|
Loading…
Reference in New Issue
Block a user