From 1f044b11e36d17097fab066044edf7ebde746ffc Mon Sep 17 00:00:00 2001 From: Zhiguo Zhou Date: Mon, 20 Nov 2023 13:56:03 +0800 Subject: [PATCH] Enhance unittest MergeTree.CombineFilters --- .../MergeTree/tests/gtest_combine_filters.cpp | 84 ++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/tests/gtest_combine_filters.cpp b/src/Storages/MergeTree/tests/gtest_combine_filters.cpp index 64cccd4cbad..91e9e53c230 100644 --- a/src/Storages/MergeTree/tests/gtest_combine_filters.cpp +++ b/src/Storages/MergeTree/tests/gtest_combine_filters.cpp @@ -7,7 +7,7 @@ using namespace DB; /* The combineFilters function from MergeTreeRangeReader.cpp could be optimized with Intel's AVX512VBMI2 intrinsic, - * _mm512_mask_expandloadu_epi8. And this test is added to ensure that the vectorized code outputs the exact results + * _mm512_mask_expandloadu_epi8. And these tests are added to ensure that the vectorized code outputs the exact results * as the original scalar code when the required hardware feature is supported on the device. * * To avoid the contingency of the all-one/all-zero sequences, this test fills in the filters with alternating 1s and @@ -54,8 +54,81 @@ bool testCombineFilters(size_t size) return true; } +/* This test is to further test DB::combineFilters by combining two UInt8 columns. Given the implementation of + * DB::combineFilters, the non-zero values in the first column are contiguously replaced with the elements in the + * second column. And to validate the first column with arbitrary intervals, this test constructs its values in + * the following manner: the count of 0s between two consecutive 1s increases in step of 1. An example column + * with the size of 16 looks like: + * [1 1 0 1 0 0 1 0 0 0 1 0 0 0 0 1] + * + * The second column contains the consecutively incremented UInt8 integers between 0x00 and 0xFF, and when the overflow + * occurs, the value would reset to 0x00 and increment again. + */ +bool testCombineColumns(size_t size) +{ + auto generateFirstColumn = [] (size_t len, size_t & non_zero_count)->ColumnPtr + { + auto column = ColumnUInt8::create(len, 0); + auto & column_data = column->getData(); + + non_zero_count = 0; + for (size_t i = 0; i < len; non_zero_count++, i+=non_zero_count) + { + column_data[i] = 1; + } + + return column; + }; + + auto generateSecondColumn = [] (size_t len)->ColumnPtr + { + auto column = ColumnUInt8::create(len, 0); + auto & column_data = column->getData(); + + for (size_t i = 0; i < len; i++) + { + column_data[i] = static_cast(i); + } + + return column; + }; + + size_t non_zero_count = 0; + auto first_column = generateFirstColumn(size, non_zero_count); + const auto & first_column_data = typeid_cast(first_column.get())->getData(); + + /// The count of non-zero values in the first column should be the size of the second column. + auto second_column = generateSecondColumn(non_zero_count); + + auto result = combineFilters(first_column, second_column); + const auto & result_data = typeid_cast(result.get())->getData(); + + if (result->size() != size) return false; + + UInt8 expected = 0; + for (size_t i = 0; i < size; ++i) + { + if (first_column_data[i]) + { + if (result_data[i] != expected) + { + return false; + } + /// Integer overflow is speculated during the integer increments. It is the expected behavior. + expected++; + } + else + { + if (result_data[i] != 0) return false; + } + } + + return true; +} + TEST(MergeTree, CombineFilters) { + /// Tests with only 0/1 and fixed intervals. EXPECT_TRUE(testCombineFilters(1)); EXPECT_TRUE(testCombineFilters(2)); EXPECT_TRUE(testCombineFilters(63)); @@ -63,4 +136,13 @@ TEST(MergeTree, CombineFilters) EXPECT_TRUE(testCombineFilters(65)); EXPECT_TRUE(testCombineFilters(200)); EXPECT_TRUE(testCombineFilters(201)); + /// Extended tests: combination of two UInt8 columns. + EXPECT_TRUE(testCombineColumns(1)); + EXPECT_TRUE(testCombineColumns(2)); + EXPECT_TRUE(testCombineColumns(63)); + EXPECT_TRUE(testCombineColumns(64)); + EXPECT_TRUE(testCombineColumns(200)); + EXPECT_TRUE(testCombineColumns(201)); + EXPECT_TRUE(testCombineColumns(2000)); + EXPECT_TRUE(testCombineColumns(200000)); }