Merge pull request #53616 from wzb5212/skip_idx_bug_fix

Fix number of dropped granules in EXPLAIN PLAN index=1
This commit is contained in:
Robert Schulze 2023-08-24 16:46:47 +02:00 committed by GitHub
commit 45d924c62c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 69 additions and 36 deletions

View File

@ -981,9 +981,9 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
const auto & index_and_condition = skip_indexes.useful_indices[idx];
auto & stat = useful_indices_stat[idx];
stat.total_parts.fetch_add(1, std::memory_order_relaxed);
stat.total_granules.fetch_add(ranges.ranges.getNumberOfMarks(), std::memory_order_relaxed);
size_t total_granules = ranges.ranges.getNumberOfMarks();
stat.total_granules.fetch_add(total_granules, std::memory_order_relaxed);
size_t granules_dropped = 0;
ranges.ranges = filterMarksUsingIndex(
index_and_condition.index,
index_and_condition.condition,
@ -991,12 +991,11 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
ranges.ranges,
settings,
reader_settings,
granules_dropped,
mark_cache.get(),
uncompressed_cache.get(),
log);
stat.granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed);
stat.granules_dropped.fetch_add(total_granules - ranges.ranges.getNumberOfMarks(), std::memory_order_relaxed);
if (ranges.ranges.empty())
stat.parts_dropped.fetch_add(1, std::memory_order_relaxed);
}
@ -1010,17 +1009,15 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
auto & stat = merged_indices_stat[idx];
stat.total_parts.fetch_add(1, std::memory_order_relaxed);
size_t total_granules = 0;
size_t granules_dropped = 0;
size_t total_granules = ranges.ranges.getNumberOfMarks();
ranges.ranges = filterMarksUsingMergedIndex(
indices_and_condition.indices, indices_and_condition.condition,
part, ranges.ranges,
settings, reader_settings,
total_granules, granules_dropped,
mark_cache.get(), uncompressed_cache.get(), log);
stat.total_granules.fetch_add(total_granules, std::memory_order_relaxed);
stat.granules_dropped.fetch_add(granules_dropped, std::memory_order_relaxed);
stat.granules_dropped.fetch_add(total_granules - ranges.ranges.getNumberOfMarks(), std::memory_order_relaxed);
if (ranges.ranges.empty())
stat.parts_dropped.fetch_add(1, std::memory_order_relaxed);
@ -1578,7 +1575,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
const MarkRanges & ranges,
const Settings & settings,
const MergeTreeReaderSettings & reader_settings,
size_t & granules_dropped,
MarkCache * mark_cache,
UncompressedCache * uncompressed_cache,
Poco::Logger * log)
@ -1648,8 +1644,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
{
// vector of indexes of useful ranges
auto result = ann_condition->getUsefulRanges(granule);
if (result.empty())
++granules_dropped;
for (auto range : result)
{
@ -1674,10 +1668,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
result = cache_in_store.store ? gin_filter_condition->mayBeTrueOnGranuleInPart(granule, cache_in_store) : true;
if (!result)
{
++granules_dropped;
continue;
}
MarkRange data_range(
std::max(ranges[i].begin, index_mark * index_granularity),
@ -1702,8 +1693,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex(
const MarkRanges & ranges,
const Settings & settings,
const MergeTreeReaderSettings & reader_settings,
size_t & total_granules,
size_t & granules_dropped,
MarkCache * mark_cache,
UncompressedCache * uncompressed_cache,
Poco::Logger * log)
@ -1760,8 +1749,6 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex(
for (auto & reader : readers)
reader->seek(index_range.begin);
total_granules += index_range.end - index_range.begin;
for (size_t index_mark = index_range.begin; index_mark < index_range.end; ++index_mark)
{
if (index_mark != index_range.begin || !granules_filled || last_index_mark != index_range.begin)
@ -1774,10 +1761,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex(
}
if (!condition->mayBeTrueOnGranule(granules))
{
++granules_dropped;
continue;
}
MarkRange data_range(
std::max(range.begin, index_mark * index_granularity),

View File

@ -93,7 +93,6 @@ private:
const MarkRanges & ranges,
const Settings & settings,
const MergeTreeReaderSettings & reader_settings,
size_t & granules_dropped,
MarkCache * mark_cache,
UncompressedCache * uncompressed_cache,
Poco::Logger * log);
@ -105,8 +104,6 @@ private:
const MarkRanges & ranges,
const Settings & settings,
const MergeTreeReaderSettings & reader_settings,
size_t & total_granules,
size_t & granules_dropped,
MarkCache * mark_cache,
UncompressedCache * uncompressed_cache,
Poco::Logger * log);

View File

@ -24,12 +24,12 @@
Name: t_minmax
Description: minmax GRANULARITY 2
Parts: 1/2
Granules: 4/6
Granules: 3/6
Skip
Name: t_set
Description: set GRANULARITY 2
Parts: 1/1
Granules: 2/4
Granules: 2/3
-----------------
"Node Type": "ReadFromMergeTree",
"Description": "default.test_index",
@ -68,7 +68,7 @@
"Initial Parts": 2,
"Selected Parts": 1,
"Initial Granules": 6,
"Selected Granules": 4
"Selected Granules": 3
},
{
"Type": "Skip",
@ -76,7 +76,7 @@
"Description": "set GRANULARITY 2",
"Initial Parts": 1,
"Selected Parts": 1,
"Initial Granules": 4,
"Initial Granules": 3,
"Selected Granules": 2
}
]

View File

@ -94,7 +94,7 @@ Expression ((Projection + Before ORDER BY))
Name: annoy_index
Description: annoy GRANULARITY 2
Parts: 0/1
Granules: 2/4
Granules: 0/4
ORDER BY type, L2Distance, check that index is used
Expression (Projection)
Limit (preliminary LIMIT (without OFFSET))
@ -110,7 +110,7 @@ Expression (Projection)
Name: annoy_index
Description: annoy GRANULARITY 2
Parts: 1/1
Granules: 4/4
Granules: 2/4
--- Test with Array, GRANULARITY = 4, index_granularity = 4 ---
WHERE type, L2Distance, check that index is used
Expression ((Projection + Before ORDER BY))
@ -125,7 +125,7 @@ Expression ((Projection + Before ORDER BY))
Name: annoy_index
Description: annoy GRANULARITY 4
Parts: 0/1
Granules: 3/4
Granules: 0/4
ORDER BY type, L2Distance, check that index is used
Expression (Projection)
Limit (preliminary LIMIT (without OFFSET))
@ -141,7 +141,7 @@ Expression (Projection)
Name: annoy_index
Description: annoy GRANULARITY 4
Parts: 1/1
Granules: 4/4
Granules: 1/4
--- Test correctness of Annoy index with > 1 mark
1 [1,0,0,0]
9000 [9000,0,0,0]

View File

@ -93,7 +93,7 @@ Expression ((Projection + Before ORDER BY))
Name: usearch_index
Description: usearch GRANULARITY 2
Parts: 0/1
Granules: 2/4
Granules: 0/4
ORDER BY type, L2Distance, check that index is used
Expression (Projection)
Limit (preliminary LIMIT (without OFFSET))
@ -109,7 +109,7 @@ Expression (Projection)
Name: usearch_index
Description: usearch GRANULARITY 2
Parts: 1/1
Granules: 4/4
Granules: 2/4
--- Test with Array, GRANULARITY = 4, index_granularity = 4 ---
WHERE type, L2Distance, check that index is used
Expression ((Projection + Before ORDER BY))
@ -124,7 +124,7 @@ Expression ((Projection + Before ORDER BY))
Name: usearch_index
Description: usearch GRANULARITY 4
Parts: 0/1
Granules: 3/4
Granules: 0/4
ORDER BY type, L2Distance, check that index is used
Expression (Projection)
Limit (preliminary LIMIT (without OFFSET))
@ -140,4 +140,4 @@ Expression (Projection)
Name: usearch_index
Description: usearch GRANULARITY 4
Parts: 1/1
Granules: 4/4
Granules: 1/4

View File

@ -0,0 +1,28 @@
Expression ((Project names + Projection))
Filter ((WHERE + Change column names to column identifiers))
ReadFromMergeTree (default.test_skip_idx)
Indexes:
Skip
Name: name_idx_g2
Description: minmax GRANULARITY 2
Parts: 1/1
Granules: 2/5
Skip
Name: name_idx_g1
Description: minmax GRANULARITY 1
Parts: 1/1
Granules: 1/2
Expression ((Project names + Projection))
Filter ((WHERE + Change column names to column identifiers))
ReadFromMergeTree (default.test_skip_idx)
Indexes:
Skip
Name: name_idx_g2
Description: minmax GRANULARITY 2
Parts: 1/1
Granules: 2/5
Skip
Name: name_idx_g1
Description: minmax GRANULARITY 1
Parts: 1/1
Granules: 2/2

View File

@ -0,0 +1,24 @@
-- Tags: no-random-merge-tree-settings
SET optimize_move_to_prewhere = 1;
SET convert_query_to_cnf = 0;
SET optimize_read_in_order = 1;
SET allow_experimental_analyzer = 1; -- slightly different operator names than w/o
DROP TABLE IF EXISTS test_skip_idx;
CREATE TABLE test_skip_idx (
id UInt32,
INDEX name_idx_g2 id TYPE minmax GRANULARITY 2,
INDEX name_idx_g1 id TYPE minmax GRANULARITY 1)
ENGINE = MergeTree
ORDER BY tuple()
SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;
INSERT INTO test_skip_idx SELECT number FROM system.numbers LIMIT 5 OFFSET 1;
EXPLAIN indexes = 1 SELECT * FROM test_skip_idx WHERE id < 2;
EXPLAIN indexes = 1 SELECT * FROM test_skip_idx WHERE id < 3;
DROP TABLE test_skip_idx;