more optimal ranges selection at reading in order

This commit is contained in:
CurtizJ 2019-08-02 19:16:18 +03:00
parent b8dff6ebb1
commit 5f7ebb18ed
3 changed files with 16 additions and 16 deletions

View File

@ -102,8 +102,6 @@ Block FinishSortingBlockInputStream::readImpl()
if (block.rows() == 0)
continue;
// We need to sort each block separately before merging.
sortBlock(block, description_to_sort, limit);
removeConstantsFromBlock(block);

View File

@ -1915,13 +1915,12 @@ void InterpreterSelectQuery::executeOrder(Pipeline & pipeline, SortingInfoPtr so
* At this stage we merge per-thread streams into one.
*/
if (sorting_info->prefix_order_descr.size() < order_descr.size())
bool need_finish_sorting = (sorting_info->prefix_order_descr.size() < order_descr.size());
if (need_finish_sorting)
{
pipeline.transform([&](auto & stream)
{
stream = std::make_shared<FinishSortingBlockInputStream>(
stream, sorting_info->prefix_order_descr,
order_descr, settings.max_block_size, limit);
stream = std::make_shared<PartialSortingBlockInputStream>(stream, order_descr, limit);
});
}
@ -1933,10 +1932,17 @@ void InterpreterSelectQuery::executeOrder(Pipeline & pipeline, SortingInfoPtr so
});
pipeline.firstStream() = std::make_shared<MergingSortedBlockInputStream>(
pipeline.streams, order_descr,
pipeline.streams, sorting_info->prefix_order_descr,
settings.max_block_size, limit);
pipeline.streams.resize(1);
}
if (need_finish_sorting)
{
pipeline.firstStream() = std::make_shared<FinishSortingBlockInputStream>(
pipeline.firstStream(), sorting_info->prefix_order_descr,
order_descr, settings.max_block_size, limit);
}
}
else
{

View File

@ -867,25 +867,20 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithO
if (sum_marks == 0)
return streams;
/// In case of reverse order let's split ranges to avoid reading much data.
/// Let's split ranges to avoid reading much data.
auto split_ranges = [rows_granularity = data.settings.index_granularity, max_block_size](const auto & ranges, int direction)
{
if (direction == 1)
return ranges;
MarkRanges new_ranges;
/// Probably it useful to make it larger.
const size_t max_marks_in_range = (max_block_size + rows_granularity - 1) / rows_granularity;
size_t marks_in_range = 1;
for (auto range : ranges)
{
size_t marks_in_range = 1;
while (range.begin + marks_in_range < range.end)
{
if (direction == 1)
{
/// Comment
/// Split first few ranges to avoid reading much data.
new_ranges.emplace_back(range.begin, range.begin + marks_in_range);
range.begin += marks_in_range;
marks_in_range *= 2;
@ -895,7 +890,8 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithO
}
else
{
/// Comment
/// Split all ranges to avoid reading much data, because we have to
/// store whole range in memory to reverse it.
new_ranges.emplace_back(range.end - marks_in_range, range.end);
range.end -= marks_in_range;
marks_in_range = std::min(marks_in_range * 2, max_marks_in_range);