mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-24 18:50:49 +00:00
more optimal ranges selection at reading in order
This commit is contained in:
parent
b8dff6ebb1
commit
5f7ebb18ed
@ -102,8 +102,6 @@ Block FinishSortingBlockInputStream::readImpl()
|
||||
if (block.rows() == 0)
|
||||
continue;
|
||||
|
||||
// We need to sort each block separately before merging.
|
||||
sortBlock(block, description_to_sort, limit);
|
||||
|
||||
removeConstantsFromBlock(block);
|
||||
|
||||
|
@ -1915,13 +1915,12 @@ void InterpreterSelectQuery::executeOrder(Pipeline & pipeline, SortingInfoPtr so
|
||||
* At this stage we merge per-thread streams into one.
|
||||
*/
|
||||
|
||||
if (sorting_info->prefix_order_descr.size() < order_descr.size())
|
||||
bool need_finish_sorting = (sorting_info->prefix_order_descr.size() < order_descr.size());
|
||||
if (need_finish_sorting)
|
||||
{
|
||||
pipeline.transform([&](auto & stream)
|
||||
{
|
||||
stream = std::make_shared<FinishSortingBlockInputStream>(
|
||||
stream, sorting_info->prefix_order_descr,
|
||||
order_descr, settings.max_block_size, limit);
|
||||
stream = std::make_shared<PartialSortingBlockInputStream>(stream, order_descr, limit);
|
||||
});
|
||||
}
|
||||
|
||||
@ -1933,10 +1932,17 @@ void InterpreterSelectQuery::executeOrder(Pipeline & pipeline, SortingInfoPtr so
|
||||
});
|
||||
|
||||
pipeline.firstStream() = std::make_shared<MergingSortedBlockInputStream>(
|
||||
pipeline.streams, order_descr,
|
||||
pipeline.streams, sorting_info->prefix_order_descr,
|
||||
settings.max_block_size, limit);
|
||||
pipeline.streams.resize(1);
|
||||
}
|
||||
|
||||
if (need_finish_sorting)
|
||||
{
|
||||
pipeline.firstStream() = std::make_shared<FinishSortingBlockInputStream>(
|
||||
pipeline.firstStream(), sorting_info->prefix_order_descr,
|
||||
order_descr, settings.max_block_size, limit);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -867,25 +867,20 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithO
|
||||
if (sum_marks == 0)
|
||||
return streams;
|
||||
|
||||
/// In case of reverse order let's split ranges to avoid reading much data.
|
||||
/// Let's split ranges to avoid reading much data.
|
||||
auto split_ranges = [rows_granularity = data.settings.index_granularity, max_block_size](const auto & ranges, int direction)
|
||||
{
|
||||
if (direction == 1)
|
||||
return ranges;
|
||||
|
||||
MarkRanges new_ranges;
|
||||
|
||||
/// Probably it useful to make it larger.
|
||||
const size_t max_marks_in_range = (max_block_size + rows_granularity - 1) / rows_granularity;
|
||||
size_t marks_in_range = 1;
|
||||
|
||||
for (auto range : ranges)
|
||||
{
|
||||
size_t marks_in_range = 1;
|
||||
while (range.begin + marks_in_range < range.end)
|
||||
{
|
||||
if (direction == 1)
|
||||
{
|
||||
/// Comment
|
||||
/// Split first few ranges to avoid reading much data.
|
||||
new_ranges.emplace_back(range.begin, range.begin + marks_in_range);
|
||||
range.begin += marks_in_range;
|
||||
marks_in_range *= 2;
|
||||
@ -895,7 +890,8 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithO
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Comment
|
||||
/// Split all ranges to avoid reading much data, because we have to
|
||||
/// store whole range in memory to reverse it.
|
||||
new_ranges.emplace_back(range.end - marks_in_range, range.end);
|
||||
range.end -= marks_in_range;
|
||||
marks_in_range = std::min(marks_in_range * 2, max_marks_in_range);
|
||||
|
Loading…
Reference in New Issue
Block a user