mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-14 19:45:11 +00:00
Fix load_factor for HASHED/SPARSE_HASHED dictionaries with SHARDS
Previously, bucket_count was set only for the one shard, and hence load_factor was > 1. Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
parent
a8e63abbb4
commit
93201f21d9
@ -957,6 +957,15 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
|
||||
|
||||
for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
|
||||
{
|
||||
/// bucket_count should be a sum over all shards (CollectionsHolder),
|
||||
/// but it should not be a sum over all attributes, since it is used to
|
||||
/// calculate load_factor like this:
|
||||
///
|
||||
/// element_count / bucket_count
|
||||
///
|
||||
/// While element_count is a sum over all shards, not over all attributes.
|
||||
bucket_count = 0;
|
||||
|
||||
getAttributeContainers(attribute_index, [&](const auto & containers)
|
||||
{
|
||||
for (const auto & container : containers)
|
||||
@ -973,12 +982,12 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
|
||||
/// and since this is sparsehash, empty cells should not be significant,
|
||||
/// and since items cannot be removed from the dictionary, deleted is also not important.
|
||||
bytes_allocated += container.size() * (sizeof(KeyType) + sizeof(AttributeValueType));
|
||||
bucket_count = container.bucket_count();
|
||||
bucket_count += container.bucket_count();
|
||||
}
|
||||
else
|
||||
{
|
||||
bytes_allocated += container.getBufferSizeInBytes();
|
||||
bucket_count = container.getBufferSizeInCells();
|
||||
bucket_count += container.getBufferSizeInCells();
|
||||
}
|
||||
}
|
||||
});
|
||||
@ -1002,12 +1011,12 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
|
||||
if constexpr (sparse)
|
||||
{
|
||||
bytes_allocated += container.size() * (sizeof(KeyType));
|
||||
bucket_count = container.bucket_count();
|
||||
bucket_count += container.bucket_count();
|
||||
}
|
||||
else
|
||||
{
|
||||
bytes_allocated += container.getBufferSizeInBytes();
|
||||
bucket_count = container.getBufferSizeInCells();
|
||||
bucket_count += container.getBufferSizeInCells();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,2 @@
|
||||
0.4768
|
||||
0.4768
|
@ -0,0 +1,17 @@
|
||||
DROP DICTIONARY IF EXISTS dict_sharded;
|
||||
DROP DICTIONARY IF EXISTS dict_sharded_multi;
|
||||
DROP TABLE IF EXISTS dict_data;
|
||||
|
||||
CREATE TABLE dict_data (key UInt64, v0 UInt16, v1 UInt16, v2 UInt16, v3 UInt16, v4 UInt16) engine=Memory() AS SELECT number, number%65535, number%65535, number%6553, number%655355, number%65535 FROM numbers(1e6);
|
||||
|
||||
CREATE DICTIONARY dict_sharded (key UInt64, v0 UInt16) PRIMARY KEY key SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(MIN 0 MAX 0) LAYOUT(HASHED(SHARDS 32));
|
||||
SYSTEM RELOAD DICTIONARY dict_sharded;
|
||||
SELECT round(load_factor, 4) FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_sharded';
|
||||
DROP DICTIONARY dict_sharded;
|
||||
|
||||
CREATE DICTIONARY dict_sharded_multi (key UInt64, v0 UInt16, v1 UInt16, v2 UInt16, v3 UInt16, v4 UInt16) PRIMARY KEY key SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(MIN 0 MAX 0) LAYOUT(HASHED(SHARDS 32));
|
||||
SYSTEM RELOAD DICTIONARY dict_sharded_multi;
|
||||
SELECT round(load_factor, 4) FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_sharded_multi';
|
||||
DROP DICTIONARY dict_sharded_multi;
|
||||
|
||||
DROP TABLE dict_data;
|
Loading…
Reference in New Issue
Block a user