diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index c0d6a4a2a47..56e72fb0328 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -957,6 +957,15 @@ void HashedDictionary::calculateBytesAlloc for (size_t attribute_index = 0; attribute_index < attributes_size; ++attribute_index) { + /// bucket_count should be a sum over all shards (CollectionsHolder), + /// but it should not be a sum over all attributes, since it is used to + /// calculate load_factor like this: + /// + /// element_count / bucket_count + /// + /// While element_count is a sum over all shards, not over all attributes. + bucket_count = 0; + getAttributeContainers(attribute_index, [&](const auto & containers) { for (const auto & container : containers) @@ -973,12 +982,12 @@ void HashedDictionary::calculateBytesAlloc /// and since this is sparsehash, empty cells should not be significant, /// and since items cannot be removed from the dictionary, deleted is also not important. bytes_allocated += container.size() * (sizeof(KeyType) + sizeof(AttributeValueType)); - bucket_count = container.bucket_count(); + bucket_count += container.bucket_count(); } else { bytes_allocated += container.getBufferSizeInBytes(); - bucket_count = container.getBufferSizeInCells(); + bucket_count += container.getBufferSizeInCells(); } } }); @@ -1002,12 +1011,12 @@ void HashedDictionary::calculateBytesAlloc if constexpr (sparse) { bytes_allocated += container.size() * (sizeof(KeyType)); - bucket_count = container.bucket_count(); + bucket_count += container.bucket_count(); } else { bytes_allocated += container.getBufferSizeInBytes(); - bucket_count = container.getBufferSizeInCells(); + bucket_count += container.getBufferSizeInCells(); } } } diff --git a/tests/queries/0_stateless/02730_dictionary_hashed_load_factor.reference b/tests/queries/0_stateless/02730_dictionary_hashed_load_factor.reference new file mode 100644 index 00000000000..0832a85e22a --- /dev/null +++ b/tests/queries/0_stateless/02730_dictionary_hashed_load_factor.reference @@ -0,0 +1,2 @@ +0.4768 +0.4768 diff --git a/tests/queries/0_stateless/02730_dictionary_hashed_load_factor.sql b/tests/queries/0_stateless/02730_dictionary_hashed_load_factor.sql new file mode 100644 index 00000000000..d6e99a91284 --- /dev/null +++ b/tests/queries/0_stateless/02730_dictionary_hashed_load_factor.sql @@ -0,0 +1,17 @@ +DROP DICTIONARY IF EXISTS dict_sharded; +DROP DICTIONARY IF EXISTS dict_sharded_multi; +DROP TABLE IF EXISTS dict_data; + +CREATE TABLE dict_data (key UInt64, v0 UInt16, v1 UInt16, v2 UInt16, v3 UInt16, v4 UInt16) engine=Memory() AS SELECT number, number%65535, number%65535, number%6553, number%655355, number%65535 FROM numbers(1e6); + +CREATE DICTIONARY dict_sharded (key UInt64, v0 UInt16) PRIMARY KEY key SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(MIN 0 MAX 0) LAYOUT(HASHED(SHARDS 32)); +SYSTEM RELOAD DICTIONARY dict_sharded; +SELECT round(load_factor, 4) FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_sharded'; +DROP DICTIONARY dict_sharded; + +CREATE DICTIONARY dict_sharded_multi (key UInt64, v0 UInt16, v1 UInt16, v2 UInt16, v3 UInt16, v4 UInt16) PRIMARY KEY key SOURCE(CLICKHOUSE(TABLE 'dict_data')) LIFETIME(MIN 0 MAX 0) LAYOUT(HASHED(SHARDS 32)); +SYSTEM RELOAD DICTIONARY dict_sharded_multi; +SELECT round(load_factor, 4) FROM system.dictionaries WHERE database = currentDatabase() AND name = 'dict_sharded_multi'; +DROP DICTIONARY dict_sharded_multi; + +DROP TABLE dict_data;