Fix load_factor for HASHED/SPARSE_HASHED dictionaries with SHARDS

Previously, bucket_count was set only for the one shard, and hence
load_factor was > 1.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
Azat Khuzhin 2023-04-28 11:46:01 +02:00
parent a8e63abbb4
commit 93201f21d9
3 changed files with 32 additions and 4 deletions

View File

@ -957,6 +957,15 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index)
{
/// bucket_count should be a sum over all shards (CollectionsHolder),
/// but it should not be a sum over all attributes, since it is used to
/// calculate load_factor like this:
///
/// element_count / bucket_count
///
/// While element_count is a sum over all shards, not over all attributes.
bucket_count = 0;
getAttributeContainers(attribute_index, [&](const auto & containers)
{
for (const auto & container : containers)
@ -973,12 +982,12 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
/// and since this is sparsehash, empty cells should not be significant,
/// and since items cannot be removed from the dictionary, deleted is also not important.
bytes_allocated += container.size() * (sizeof(KeyType) + sizeof(AttributeValueType));
bucket_count = container.bucket_count();
bucket_count += container.bucket_count();
}
else
{
bytes_allocated += container.getBufferSizeInBytes();
bucket_count = container.getBufferSizeInCells();
bucket_count += container.getBufferSizeInCells();
}
}
});
@ -1002,12 +1011,12 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::calculateBytesAlloc
if constexpr (sparse)
{
bytes_allocated += container.size() * (sizeof(KeyType));
bucket_count = container.bucket_count();
bucket_count += container.bucket_count();
}
else
{
bytes_allocated += container.getBufferSizeInBytes();
bucket_count = container.getBufferSizeInCells();
bucket_count += container.getBufferSizeInCells();
}
}
}

View File

@ -0,0 +1,2 @@
0.4768
0.4768

View File

@ -0,0 +1,17 @@
DROP DICTIONARY IF EXISTS dict_sharded;
DROP DICTIONARY IF EXISTS dict_sharded_multi;
DROP TABLE IF EXISTS dict_data;
CREATE TABLE dict_data (key UInt64, v0 UInt16, v1 UInt16, v2 UInt16, v3 UInt16, v4 UInt16) engine=Memory() AS SELECT number, number%65535, number%65535, number%6553, number%655355, number%65535 FROM numbers(1e6);
CREATE DICTIONARY dict_sharded (key UInt64, v0 UInt16) PRIMARY KEY key SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(MIN 0 MAX 0) LAYOUT(HASHED(SHARDS 32));
SYSTEM RELOAD DICTIONARY dict_sharded;
SELECT round(load_factor, 4) FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_sharded';
DROP DICTIONARY dict_sharded;
CREATE DICTIONARY dict_sharded_multi (key UInt64, v0 UInt16, v1 UInt16, v2 UInt16, v3 UInt16, v4 UInt16) PRIMARY KEY key SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(MIN 0 MAX 0) LAYOUT(HASHED(SHARDS 32));
SYSTEM RELOAD DICTIONARY dict_sharded_multi;
SELECT round(load_factor, 4) FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_sharded_multi';
DROP DICTIONARY dict_sharded_multi;
DROP TABLE dict_data;