Merge pull request #72630 from ClickHouse/better-memory-usage-estimation

Better memory usage approx of `MarksInCompressedFile`
This commit is contained in:
Antonio Andelic 2024-12-02 07:49:38 +00:00 committed by GitHub
commit 8b5e00d3f2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 23 additions and 12 deletions

View File

@ -63,7 +63,7 @@ MarksInCompressedFile::MarksInCompressedFile(const PlainArray & marks)
// Overallocate by +1 element to let the bit packing/unpacking do less bounds checking.
size_t packed_length = (packed_bits + 63) / 64 + 1;
packed.reserve(packed_length);
packed.reserve_exact(packed_length);
packed.resize_fill(packed_length);
// Second pass: write out the packed marks.
@ -97,7 +97,7 @@ std::tuple<const MarksInCompressedFile::BlockInfo *, size_t> MarksInCompressedFi
size_t MarksInCompressedFile::approximateMemoryUsage() const
{
return sizeof(*this) + blocks.size() * sizeof(blocks[0]) + packed.size() * sizeof(packed[0]);
return sizeof(*this) + blocks.allocated_bytes() + packed.allocated_bytes();
}
}

View File

@ -35,18 +35,29 @@ TEST(Marks, Compression)
EXPECT_LE((marks.approximateMemoryUsage() - sizeof(MarksInCompressedFile)) * 8, plain.size() * max_bits_per_mark);
};
// Typical.
test(gen(10000, 1'000'000, 0), 30);
{
SCOPED_TRACE("Typical");
test(gen(10000, 1'000'000, 0), 30);
}
// Completely random 64-bit values.
test(gen(10000, UINT64_MAX - 1, UINT64_MAX - 1), 130);
// All zeros.
test(gen(10000, 0, 0), 2);
{
SCOPED_TRACE("Completely random 64-bit values");
test(gen(10000, UINT64_MAX - 1, UINT64_MAX - 1), 130);
}
// Short.
test(gen(10, 1000, 1000), 65);
{
SCOPED_TRACE("All zeros");
test(gen(10000, 0, 0), 2);
}
// Empty.
test(gen(0, 0, 0), 0);
{
SCOPED_TRACE("Short");
test(gen(10, 1000, 1000), 65);
}
{
SCOPED_TRACE("Empty");
test(gen(0, 0, 0), 0);
}
}