Enhance unittest MergeTree.CombineFilters

This commit is contained in:
Zhiguo Zhou 2023-11-20 13:56:03 +08:00
parent b66a9e8fd3
commit 1f044b11e3

View File

@ -7,7 +7,7 @@
using namespace DB;
/* The combineFilters function from MergeTreeRangeReader.cpp could be optimized with Intel's AVX512VBMI2 intrinsic,
* _mm512_mask_expandloadu_epi8. And this test is added to ensure that the vectorized code outputs the exact results
* _mm512_mask_expandloadu_epi8. And these tests are added to ensure that the vectorized code outputs the exact results
* as the original scalar code when the required hardware feature is supported on the device.
*
* To avoid the contingency of the all-one/all-zero sequences, this test fills in the filters with alternating 1s and
@ -54,8 +54,81 @@ bool testCombineFilters(size_t size)
return true;
}
/* This test is to further test DB::combineFilters by combining two UInt8 columns. Given the implementation of
* DB::combineFilters, the non-zero values in the first column are contiguously replaced with the elements in the
* second column. And to validate the first column with arbitrary intervals, this test constructs its values in
* the following manner: the count of 0s between two consecutive 1s increases in step of 1. An example column
* with the size of 16 looks like:
* [1 1 0 1 0 0 1 0 0 0 1 0 0 0 0 1]
*
* The second column contains the consecutively incremented UInt8 integers between 0x00 and 0xFF, and when the overflow
* occurs, the value would reset to 0x00 and increment again.
*/
bool testCombineColumns(size_t size)
{
auto generateFirstColumn = [] (size_t len, size_t & non_zero_count)->ColumnPtr
{
auto column = ColumnUInt8::create(len, 0);
auto & column_data = column->getData();
non_zero_count = 0;
for (size_t i = 0; i < len; non_zero_count++, i+=non_zero_count)
{
column_data[i] = 1;
}
return column;
};
auto generateSecondColumn = [] (size_t len)->ColumnPtr
{
auto column = ColumnUInt8::create(len, 0);
auto & column_data = column->getData();
for (size_t i = 0; i < len; i++)
{
column_data[i] = static_cast<UInt8>(i);
}
return column;
};
size_t non_zero_count = 0;
auto first_column = generateFirstColumn(size, non_zero_count);
const auto & first_column_data = typeid_cast<const ColumnUInt8 *>(first_column.get())->getData();
/// The count of non-zero values in the first column should be the size of the second column.
auto second_column = generateSecondColumn(non_zero_count);
auto result = combineFilters(first_column, second_column);
const auto & result_data = typeid_cast<const ColumnUInt8 *>(result.get())->getData();
if (result->size() != size) return false;
UInt8 expected = 0;
for (size_t i = 0; i < size; ++i)
{
if (first_column_data[i])
{
if (result_data[i] != expected)
{
return false;
}
/// Integer overflow is speculated during the integer increments. It is the expected behavior.
expected++;
}
else
{
if (result_data[i] != 0) return false;
}
}
return true;
}
TEST(MergeTree, CombineFilters)
{
/// Tests with only 0/1 and fixed intervals.
EXPECT_TRUE(testCombineFilters(1));
EXPECT_TRUE(testCombineFilters(2));
EXPECT_TRUE(testCombineFilters(63));
@ -63,4 +136,13 @@ TEST(MergeTree, CombineFilters)
EXPECT_TRUE(testCombineFilters(65));
EXPECT_TRUE(testCombineFilters(200));
EXPECT_TRUE(testCombineFilters(201));
/// Extended tests: combination of two UInt8 columns.
EXPECT_TRUE(testCombineColumns(1));
EXPECT_TRUE(testCombineColumns(2));
EXPECT_TRUE(testCombineColumns(63));
EXPECT_TRUE(testCombineColumns(64));
EXPECT_TRUE(testCombineColumns(200));
EXPECT_TRUE(testCombineColumns(201));
EXPECT_TRUE(testCombineColumns(2000));
EXPECT_TRUE(testCombineColumns(200000));
}