Merge pull request #50432 from azat/fix-min-for-seek

Fix merge_tree_min_rows_for_seek/merge_tree_min_bytes_for_seek for data skipping indexes
This commit is contained in:
Alexey Milovidov 2023-06-02 04:09:35 +03:00 committed by GitHub
commit 31773d7eee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 24 additions and 2 deletions

View File

@ -1729,7 +1729,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
std::max(ranges[i].begin, index_mark * index_granularity),
std::min(ranges[i].end, (index_mark + 1) * index_granularity));
if (res.empty() || res.back().end - data_range.begin > min_marks_for_seek)
if (res.empty() || data_range.begin - res.back().end > min_marks_for_seek)
res.push_back(data_range);
else
res.back().end = data_range.end;
@ -1829,7 +1829,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex(
std::max(range.begin, index_mark * index_granularity),
std::min(range.end, (index_mark + 1) * index_granularity));
if (res.empty() || res.back().end - data_range.begin > min_marks_for_seek)
if (res.empty() || data_range.begin - res.back().end > min_marks_for_seek)
res.push_back(data_range);
else
res.back().end = data_range.end;

View File

@ -0,0 +1,22 @@
-- Tags: no-random-merge-tree-settings, no-random-settings
DROP TABLE IF EXISTS data;
CREATE TABLE data
(
key Int,
v1 DateTime,
INDEX v1_index v1 TYPE minmax GRANULARITY 1
) ENGINE=AggregatingMergeTree()
ORDER BY key
SETTINGS index_granularity=8192;
SYSTEM STOP MERGES data;
-- generate 50% of marks that cannot be skipped with v1_index
-- this will create a gap in marks
INSERT INTO data SELECT number, if(number/8192 % 2 == 0, now(), now() - INTERVAL 200 DAY) FROM numbers(1e6);
INSERT INTO data SELECT number+1e6, if(number/8192 % 2 == 0, now(), now() - INTERVAL 200 DAY) FROM numbers(1e6);
SELECT * FROM data WHERE v1 >= now() - INTERVAL 180 DAY FORMAT Null SETTINGS max_threads=1, max_final_threads=1, force_data_skipping_indices='v1_index', merge_tree_min_rows_for_seek=0, max_rows_to_read=1999999;
SELECT * FROM data WHERE v1 >= now() - INTERVAL 180 DAY FORMAT Null SETTINGS max_threads=1, max_final_threads=1, force_data_skipping_indices='v1_index', merge_tree_min_rows_for_seek=1, max_rows_to_read=1999999; -- { serverError TOO_MANY_ROWS }