Optimized marks selection algorithm for continuous marks ranges

This commit is contained in:
Alexander Kazakov 2020-09-15 17:22:32 +03:00
parent 7aa3f86ab9
commit 7465e00163

View File

@ -1498,79 +1498,55 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
}
else
{
// Do inclusion search, where we only look for one range
// For the case of one continuous range of keys we use binary search algorithm
LOG_TRACE(log, "Running binary search on index range for part {} ({} marks)", part->name, marks_count);
size_t steps = 0;
auto find_leaf = [&](bool left) -> std::optional<size_t>
MarkRange result_range;
size_t searched_left = 0;
size_t searched_right = marks_count;
while (searched_left + 1 < searched_right)
{
std::vector<MarkRange> stack = {};
MarkRange range = {0, marks_count};
steps++;
const size_t middle = (searched_left + searched_right) / 2;
MarkRange range(0, middle);
if (may_be_true_in_range(range))
stack.emplace_back(range.begin, range.end);
searched_right = middle;
else
searched_left = middle;
++steps;
}
result_range.begin = searched_left;
LOG_TRACE(log, "Found (LEFT) boundary mark: {}", searched_left);
while (!stack.empty())
{
range = stack.back();
stack.pop_back();
searched_right = marks_count;
while (searched_left + 1 < searched_right)
{
const size_t middle = (searched_left + searched_right) / 2;
MarkRange range(middle, marks_count);
if (may_be_true_in_range(range))
searched_left = middle;
else
searched_right = middle;
++steps;
}
result_range.end = searched_right;
LOG_TRACE(log, "Found (RIGHT) boundary mark: {}", searched_right);
if (range.end == range.begin + 1)
{
if (left)
return range.begin;
else
return range.end;
}
else
{
std::vector<MarkRange> check_order = {};
MarkRange left_range = {range.begin, (range.begin + range.end) / 2};
MarkRange right_range = {(range.begin + range.end) / 2, range.end};
if (may_be_true_in_range(result_range))
res.emplace_back(std::move(result_range));
if (left)
{
check_order.emplace_back(left_range.begin, left_range.end);
check_order.emplace_back(right_range.begin, right_range.end);
}
else
{
check_order.emplace_back(right_range.begin, right_range.end);
check_order.emplace_back(left_range.begin, left_range.end);
}
steps++;
if (may_be_true_in_range(check_order[0]))
{
stack.emplace_back(check_order[0].begin, check_order[0].end);
continue;
}
if (may_be_true_in_range(check_order[1]))
stack.emplace_back(check_order[1].begin, check_order[1].end);
else
break; // No mark range would suffice
}
}
return std::nullopt;
};
auto left_leaf = find_leaf(true);
if (left_leaf)
res.emplace_back(left_leaf.value(), find_leaf(false).value());
LOG_TRACE(log, "Used optimized inclusion search over index for part {} with {} steps", part->name, steps);
LOG_TRACE(log, "Found {} range in {} steps", res.empty() ? "empty" : "continuous", steps);
}
return res;
}
MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
MergeTreeIndexPtr index_helper,
MergeTreeIndexConditionPtr condition,