Merge pull request #62268 from nickitat/test_for_bug

Fix PartsSplitter
This commit is contained in:
Nikita Taranov 2024-04-08 15:11:25 +00:00 committed by GitHub
commit 5b4f67cc39
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 32 additions and 4 deletions

View File

@ -1,4 +1,5 @@
#include <algorithm>
#include <limits>
#include <memory>
#include <numeric>
#include <queue>
@ -125,14 +126,20 @@ int compareValues(const Values & lhs, const Values & rhs)
class IndexAccess
{
public:
explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_) { }
explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_)
{
/// Some suffix of index columns might not be loaded (see `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns`)
/// and we need to use the same set of index columns across all parts.
for (const auto & part : parts)
loaded_columns = std::min(loaded_columns, part.data_part->getIndex().size());
}
Values getValue(size_t part_idx, size_t mark) const
{
const auto & index = parts[part_idx].data_part->getIndex();
size_t size = index.size();
Values values(size);
for (size_t i = 0; i < size; ++i)
chassert(index.size() >= loaded_columns);
Values values(loaded_columns);
for (size_t i = 0; i < loaded_columns; ++i)
{
index[i]->get(mark, values[i]);
if (values[i].isNull())
@ -199,6 +206,7 @@ public:
}
private:
const RangesInDataParts & parts;
size_t loaded_columns = std::numeric_limits<size_t>::max();
};
class RangesInDataPartsBuilder

View File

@ -0,0 +1,19 @@
create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1;
system stop merges t;
-- for this part the first columns is useless, so we have to use both
insert into t select 42, number from numbers_mt(100);
-- for this part the first columns is enough
insert into t select number, number from numbers_mt(100);
-- force reloading index
detach table t;
attach table t;
set merge_tree_min_bytes_for_concurrent_read=1, merge_tree_min_rows_for_concurrent_read=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=1.0, max_threads=4;
-- the bug happened when we used (a, b) index values for one part and only (a) for another in PartsSplitter. even a simple count query is enough,
-- because some granules were assinged to wrong layers and hence not returned from the reading step (because they were filtered out by `FilterSortedStreamByRange`)
select count() from t where not ignore(*);