add unit test for constant index granularity

This commit is contained in:
Anton Popov 2024-11-12 18:37:09 +00:00
parent b758838c73
commit f9a9407186
4 changed files with 100 additions and 34 deletions

View File

@ -12,8 +12,8 @@ public:
MergeTreeIndexGranularity() = default;
virtual ~MergeTreeIndexGranularity() = default;
/// Returns granularity if it is constant for whole part (except last granule).
virtual std::optional<size_t> getConstantGranularity() const = 0;
/// Return count of rows between marks
virtual size_t getRowsCountInRange(size_t begin, size_t end) const = 0;
/// Return count of rows between marks
@ -53,13 +53,12 @@ public:
size_t getLastNonFinalMarkRows() const;
virtual bool hasFinalMark() const = 0;
bool empty() const { return getMarksCount() == 0; }
/// Add new mark with rows_count
/// Add new mark with rows_count.
virtual void appendMark(size_t rows_count) = 0;
/// Extends last mark by rows_count.
/// Sets last mark equal to rows_count.
virtual void adjustLastMark(size_t rows_count) = 0;
void addRowsToLastMark(size_t rows_count);
@ -67,6 +66,8 @@ public:
virtual uint64_t getBytesSize() const = 0;
virtual uint64_t getBytesAllocated() const = 0;
/// Possibly optimizes values in memory (for example, to constant value).
/// Returns new optimized index granularity structure or nullptr if no optimization is not applicable.
virtual std::shared_ptr<MergeTreeIndexGranularity> optimize() const = 0;
virtual std::string describe() const = 0;
};

View File

@ -108,11 +108,13 @@ size_t MergeTreeIndexGranularityConstant::getMarkUpperBoundForRow(size_t row_ind
{
size_t num_rows_with_constant_granularity = (num_marks_without_final - 1) * constant_granularity;
/// All granules with constant granularity + last granule + final granule
if (row_index >= num_rows_with_constant_granularity)
if (row_index >= getTotalRows())
return getMarksCount();
return (row_index + constant_granularity - 1) / constant_granularity;
if (row_index >= num_rows_with_constant_granularity)
return num_marks_without_final - 1;
return row_index / constant_granularity;
}
size_t MergeTreeIndexGranularityConstant::countMarksForRows(size_t from_mark, size_t number_of_rows) const

View File

@ -1,12 +1,15 @@
#include <gtest/gtest.h>
#include <Core/Block.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/DataTypesNumber.h>
// I know that inclusion of .cpp is not good at all
#include <Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp> // NOLINT
#include <Storages/MergeTree/MergeTreeDataPartWriterWide.cpp> // NOLINT
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
#include <Storages/MergeTree/MergeTreeIndexGranularityConstant.h>
using namespace DB;
static Block getBlockWithSize(size_t required_size_in_bytes, size_t size_of_row_in_bytes)
{
@ -25,16 +28,16 @@ TEST(AdaptiveIndexGranularity, FillGranularityToyTests)
auto block1 = getBlockWithSize(80, 8);
EXPECT_EQ(block1.bytes(), 80);
{ /// Granularity bytes are not set. Take default index_granularity.
MergeTreeIndexGranularity index_granularity;
auto granularity = computeIndexGranularityImpl(block1, 0, 100, false, false);
MergeTreeIndexGranularityAdaptive index_granularity;
auto granularity = computeIndexGranularityForBlock(block1.rows(), block1.bytes(), 0, 100, false, false);
fillIndexGranularityImpl(index_granularity, 0, granularity, block1.rows());
EXPECT_EQ(index_granularity.getMarksCount(), 1);
EXPECT_EQ(index_granularity.getMarkRows(0), 100);
}
{ /// Granule size is less than block size. Block contains multiple granules.
MergeTreeIndexGranularity index_granularity;
auto granularity = computeIndexGranularityImpl(block1, 16, 100, false, true);
MergeTreeIndexGranularityAdaptive index_granularity;
auto granularity = computeIndexGranularityForBlock(block1.rows(), block1.bytes(), 16, 100, false, true);
fillIndexGranularityImpl(index_granularity, 0, granularity, block1.rows());
EXPECT_EQ(index_granularity.getMarksCount(), 5); /// First granule with 8 rows, and second with 1 row
for (size_t i = 0; i < index_granularity.getMarksCount(); ++i)
@ -43,8 +46,8 @@ TEST(AdaptiveIndexGranularity, FillGranularityToyTests)
{ /// Granule size is more than block size. Whole block (and maybe more) can be placed in single granule.
MergeTreeIndexGranularity index_granularity;
auto granularity = computeIndexGranularityImpl(block1, 512, 100, false, true);
MergeTreeIndexGranularityAdaptive index_granularity;
auto granularity = computeIndexGranularityForBlock(block1.rows(), block1.bytes(), 512, 100, false, true);
fillIndexGranularityImpl(index_granularity, 0, granularity, block1.rows());
EXPECT_EQ(index_granularity.getMarksCount(), 1);
for (size_t i = 0; i < index_granularity.getMarksCount(); ++i)
@ -53,8 +56,8 @@ TEST(AdaptiveIndexGranularity, FillGranularityToyTests)
{ /// Blocks with granule size
MergeTreeIndexGranularity index_granularity;
auto granularity = computeIndexGranularityImpl(block1, 1, 100, true, true);
MergeTreeIndexGranularityAdaptive index_granularity;
auto granularity = computeIndexGranularityForBlock(block1.rows(), block1.bytes(), 1, 100, true, true);
fillIndexGranularityImpl(index_granularity, 0, granularity, block1.rows());
EXPECT_EQ(index_granularity.getMarksCount(), 1);
for (size_t i = 0; i < index_granularity.getMarksCount(); ++i)
@ -62,8 +65,8 @@ TEST(AdaptiveIndexGranularity, FillGranularityToyTests)
}
{ /// Shift in index offset
MergeTreeIndexGranularity index_granularity;
auto granularity = computeIndexGranularityImpl(block1, 16, 100, false, true);
MergeTreeIndexGranularityAdaptive index_granularity;
auto granularity = computeIndexGranularityForBlock(block1.rows(), block1.bytes(), 16, 100, false, true);
fillIndexGranularityImpl(index_granularity, 6, granularity, block1.rows());
EXPECT_EQ(index_granularity.getMarksCount(), 2);
for (size_t i = 0; i < index_granularity.getMarksCount(); ++i)
@ -78,10 +81,10 @@ TEST(AdaptiveIndexGranularity, FillGranularitySequenceOfBlocks)
auto block1 = getBlockWithSize(65536, 8);
auto block2 = getBlockWithSize(65536, 8);
auto block3 = getBlockWithSize(65536, 8);
MergeTreeIndexGranularity index_granularity;
MergeTreeIndexGranularityAdaptive index_granularity;
for (const auto & block : {block1, block2, block3})
{
auto granularity = computeIndexGranularityImpl(block, 1024, 8192, false, true);
auto granularity = computeIndexGranularityForBlock(block.rows(), block.bytes(), 1024, 8192, false, true);
fillIndexGranularityImpl(index_granularity, 0, granularity, block.rows());
}
@ -94,10 +97,10 @@ TEST(AdaptiveIndexGranularity, FillGranularitySequenceOfBlocks)
auto block2 = getBlockWithSize(32768, 32);
auto block3 = getBlockWithSize(2048, 32);
EXPECT_EQ(block1.rows() + block2.rows() + block3.rows(), 3136);
MergeTreeIndexGranularity index_granularity;
MergeTreeIndexGranularityAdaptive index_granularity;
for (const auto & block : {block1, block2, block3})
{
auto granularity = computeIndexGranularityImpl(block, 1024, 8192, false, true);
auto granularity = computeIndexGranularityForBlock(block.rows(), block.bytes(), 1024, 8192, false, true);
fillIndexGranularityImpl(index_granularity, 0, granularity, block.rows());
}
@ -113,11 +116,11 @@ TEST(AdaptiveIndexGranularity, FillGranularitySequenceOfBlocks)
EXPECT_EQ(block1.rows() + block2.rows() + block3.rows(), (2048 + 4096 + 8192) / 32);
MergeTreeIndexGranularity index_granularity;
MergeTreeIndexGranularityAdaptive index_granularity;
size_t index_offset = 0;
for (const auto & block : {block1, block2, block3})
{
auto granularity = computeIndexGranularityImpl(block, 16384, 8192, false, true);
auto granularity = computeIndexGranularityForBlock(block.rows(), block.bytes(), 16384, 8192, false, true);
fillIndexGranularityImpl(index_granularity, index_offset, granularity, block.rows());
index_offset = index_granularity.getLastMarkRows() - block.rows();
}
@ -128,10 +131,10 @@ TEST(AdaptiveIndexGranularity, FillGranularitySequenceOfBlocks)
}
TEST(AdaptiveIndexGranularity, TestIndexGranularityClass)
TEST(AdaptiveIndexGranularity, TestIndexGranularityAdaptive)
{
{
MergeTreeIndexGranularity index_granularity;
MergeTreeIndexGranularityAdaptive index_granularity;
size_t sum_rows = 0;
size_t sum_marks = 0;
for (size_t i = 10; i <= 100; i+=10)
@ -148,11 +151,70 @@ TEST(AdaptiveIndexGranularity, TestIndexGranularityClass)
EXPECT_EQ(index_granularity.getMarkStartingRow(2), 30);
EXPECT_EQ(index_granularity.getMarkStartingRow(3), 60);
EXPECT_EQ(index_granularity.getRowsCountInRange({0, 10}), sum_rows);
EXPECT_EQ(index_granularity.getRowsCountInRange({0, 1}), 10);
EXPECT_EQ(index_granularity.getRowsCountInRange({2, 5}), 30 + 40 + 50);
EXPECT_EQ(index_granularity.getRowsCountInRange(0, 10), sum_rows);
EXPECT_EQ(index_granularity.getRowsCountInRange(0, 1), 10);
EXPECT_EQ(index_granularity.getRowsCountInRange(2, 5), 30 + 40 + 50);
EXPECT_EQ(index_granularity.getRowsCountInRanges({{2, 5}, {0, 1}, {0, 10}}), 10 + 30 + 40 + 50 + sum_rows);
}
}
TEST(AdaptiveIndexGranularity, TestIndexGranularityConstant)
{
auto test = [](MergeTreeIndexGranularity & index_granularity, size_t granularity_rows)
{
size_t sum_marks = 10;
size_t sum_rows = granularity_rows * sum_marks;
for (size_t i = 0; i < 10; ++i)
index_granularity.appendMark(granularity_rows);
size_t new_granularity_rows = granularity_rows / 2;
index_granularity.adjustLastMark(new_granularity_rows);
sum_rows -= (granularity_rows - new_granularity_rows);
index_granularity.appendMark(0);
++sum_marks;
EXPECT_EQ(index_granularity.getMarksCount(), sum_marks);
EXPECT_EQ(index_granularity.getMarksCountWithoutFinal(), sum_marks - 1);
EXPECT_EQ(index_granularity.hasFinalMark(), true);
EXPECT_EQ(index_granularity.getTotalRows(), sum_rows);
EXPECT_EQ(index_granularity.getTotalRows(), sum_rows);
EXPECT_EQ(index_granularity.getLastMarkRows(), 0);
EXPECT_EQ(index_granularity.getLastNonFinalMarkRows(), granularity_rows / 2);
EXPECT_EQ(index_granularity.getMarkStartingRow(0), 0);
EXPECT_EQ(index_granularity.getMarkStartingRow(3), 30);
EXPECT_EQ(index_granularity.getMarkStartingRow(9), 90);
EXPECT_EQ(index_granularity.getMarkStartingRow(10), sum_rows);
EXPECT_EQ(index_granularity.getMarkStartingRow(11), sum_rows);
EXPECT_EQ(index_granularity.getRowsCountInRange(0, 10), sum_rows);
EXPECT_EQ(index_granularity.getRowsCountInRange(0, 11), sum_rows);
EXPECT_EQ(index_granularity.getRowsCountInRange(0, 1), 10);
EXPECT_EQ(index_granularity.getRowsCountInRange(2, 5), 30);
EXPECT_EQ(index_granularity.getRowsCountInRange(3, 9), 60);
EXPECT_EQ(index_granularity.getRowsCountInRange(5, 10), 45);
EXPECT_EQ(index_granularity.getRowsCountInRange(5, 11), 45);
EXPECT_EQ(index_granularity.countMarksForRows(0, 35), 3);
EXPECT_EQ(index_granularity.countMarksForRows(5, 29), 2);
EXPECT_EQ(index_granularity.countMarksForRows(0, 89), 8);
EXPECT_EQ(index_granularity.countMarksForRows(0, 90), 9);
EXPECT_EQ(index_granularity.countMarksForRows(0, 92), 9);
EXPECT_EQ(index_granularity.countMarksForRows(0, 95), sum_marks);
EXPECT_EQ(index_granularity.countMarksForRows(0, 99), sum_marks);
};
const size_t granularity_rows = 10;
{
MergeTreeIndexGranularityConstant index_granularity(granularity_rows);
test(index_granularity, granularity_rows);
}
{
MergeTreeIndexGranularityAdaptive index_granularity;
test(index_granularity, granularity_rows);
}
}

View File

@ -4,6 +4,7 @@
// I know that inclusion of .cpp is not good at all
#include <Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp> // NOLINT
#include <Storages/MergeTree/MergeTreeIndexGranularityAdaptive.h>
using namespace DB;
@ -13,7 +14,7 @@ TEST(IndexGranularityCompactParts, FillGranularitySequenceOfBlocks)
size_t rows = 8;
size_t granularity = 32;
MergeTreeIndexGranularity index_granularity;
MergeTreeIndexGranularityAdaptive index_granularity;
size_t index_offset = 0;
size_t rows_written = 0;
for (size_t i = 0; i < 3; ++i)
@ -34,7 +35,7 @@ TEST(IndexGranularityCompactParts, FillGranularitySequenceOfBlocks)
size_t rows2 = 8;
size_t granularity = 32;
MergeTreeIndexGranularity index_granularity;
MergeTreeIndexGranularityAdaptive index_granularity;
size_t index_offset = 0;
fillIndexGranularityImpl(index_granularity, index_offset, granularity, rows1);
@ -51,7 +52,7 @@ TEST(IndexGranularityCompactParts, FillGranularitySequenceOfBlocks)
size_t rows2 = 25;
size_t granularity = 32;
MergeTreeIndexGranularity index_granularity;
MergeTreeIndexGranularityAdaptive index_granularity;
size_t index_offset = 0;
fillIndexGranularityImpl(index_granularity, index_offset, granularity, rows1);
@ -68,7 +69,7 @@ TEST(IndexGranularityCompactParts, FillGranularitySequenceOfBlocks)
size_t rows = 40;
size_t granularity = 32;
MergeTreeIndexGranularity index_granularity;
MergeTreeIndexGranularityAdaptive index_granularity;
size_t index_offset = 0;
for (size_t i = 0; i < 3; ++i)