From 7465e00163a5e02fa6928513a6cae89023dcab5d Mon Sep 17 00:00:00 2001 From: Alexander Kazakov Date: Tue, 15 Sep 2020 17:22:32 +0300 Subject: [PATCH] Optimized marks selection algorithm for continuous marks ranges --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 94 +++++++------------ 1 file changed, 35 insertions(+), 59 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index e780ebda111..f2010b4e34e 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1498,79 +1498,55 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( } else { - // Do inclusion search, where we only look for one range + // For the case of one continuous range of keys we use binary search algorithm + + LOG_TRACE(log, "Running binary search on index range for part {} ({} marks)", part->name, marks_count); size_t steps = 0; - auto find_leaf = [&](bool left) -> std::optional + MarkRange result_range; + + size_t searched_left = 0; + size_t searched_right = marks_count; + + while (searched_left + 1 < searched_right) { - std::vector stack = {}; - - MarkRange range = {0, marks_count}; - - steps++; - + const size_t middle = (searched_left + searched_right) / 2; + MarkRange range(0, middle); if (may_be_true_in_range(range)) - stack.emplace_back(range.begin, range.end); + searched_right = middle; + else + searched_left = middle; + ++steps; + } + result_range.begin = searched_left; + LOG_TRACE(log, "Found (LEFT) boundary mark: {}", searched_left); - while (!stack.empty()) - { - range = stack.back(); - stack.pop_back(); + searched_right = marks_count; + while (searched_left + 1 < searched_right) + { + const size_t middle = (searched_left + searched_right) / 2; + MarkRange range(middle, marks_count); + if (may_be_true_in_range(range)) + searched_left = middle; + else + searched_right = middle; + ++steps; + } + result_range.end = searched_right; + LOG_TRACE(log, "Found (RIGHT) boundary mark: {}", searched_right); - if (range.end == range.begin + 1) - { - if (left) - return range.begin; - else - return range.end; - } - else - { - std::vector check_order = {}; - MarkRange left_range = {range.begin, (range.begin + range.end) / 2}; - MarkRange right_range = {(range.begin + range.end) / 2, range.end}; + if (may_be_true_in_range(result_range)) + res.emplace_back(std::move(result_range)); - if (left) - { - check_order.emplace_back(left_range.begin, left_range.end); - check_order.emplace_back(right_range.begin, right_range.end); - } - else - { - check_order.emplace_back(right_range.begin, right_range.end); - check_order.emplace_back(left_range.begin, left_range.end); - } - - steps++; - - if (may_be_true_in_range(check_order[0])) - { - stack.emplace_back(check_order[0].begin, check_order[0].end); - continue; - } - - if (may_be_true_in_range(check_order[1])) - stack.emplace_back(check_order[1].begin, check_order[1].end); - else - break; // No mark range would suffice - } - } - - return std::nullopt; - }; - - auto left_leaf = find_leaf(true); - if (left_leaf) - res.emplace_back(left_leaf.value(), find_leaf(false).value()); - - LOG_TRACE(log, "Used optimized inclusion search over index for part {} with {} steps", part->name, steps); + LOG_TRACE(log, "Found {} range in {} steps", res.empty() ? "empty" : "continuous", steps); } return res; } + MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( MergeTreeIndexPtr index_helper, MergeTreeIndexConditionPtr condition,