mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
fix order of ranges in MergeTreeDataSelectExecutor
This commit is contained in:
parent
cdcaef9349
commit
b9fc9b4569
@ -73,7 +73,10 @@ MergeTreeReadTask::MergeTreeReadTask(
|
|||||||
: data_part{data_part_}, mark_ranges{mark_ranges_}, part_index_in_query{part_index_in_query_},
|
: data_part{data_part_}, mark_ranges{mark_ranges_}, part_index_in_query{part_index_in_query_},
|
||||||
ordered_names{ordered_names_}, column_name_set{column_name_set_}, columns{columns_}, pre_columns{pre_columns_},
|
ordered_names{ordered_names_}, column_name_set{column_name_set_}, columns{columns_}, pre_columns{pre_columns_},
|
||||||
remove_prewhere_column{remove_prewhere_column_}, should_reorder{should_reorder_}, size_predictor{std::move(size_predictor_)}
|
remove_prewhere_column{remove_prewhere_column_}, should_reorder{should_reorder_}, size_predictor{std::move(size_predictor_)}
|
||||||
{}
|
{
|
||||||
|
/// We need to save marks in reverse order.
|
||||||
|
std::reverse(mark_ranges.begin(), mark_ranges.end());
|
||||||
|
}
|
||||||
|
|
||||||
MergeTreeReadTask::~MergeTreeReadTask() = default;
|
MergeTreeReadTask::~MergeTreeReadTask() = default;
|
||||||
|
|
||||||
|
@ -718,9 +718,6 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams(
|
|||||||
for (size_t i = 0; i < parts.size(); ++i)
|
for (size_t i = 0; i < parts.size(); ++i)
|
||||||
{
|
{
|
||||||
total_rows += parts[i].getRowsCount();
|
total_rows += parts[i].getRowsCount();
|
||||||
/// Let the ranges be listed from right to left so that the leftmost range can be dropped using `pop_back()`.
|
|
||||||
std::reverse(parts[i].ranges.begin(), parts[i].ranges.end());
|
|
||||||
|
|
||||||
sum_marks_in_parts[i] = parts[i].getMarksCount();
|
sum_marks_in_parts[i] = parts[i].getMarksCount();
|
||||||
sum_marks += sum_marks_in_parts[i];
|
sum_marks += sum_marks_in_parts[i];
|
||||||
|
|
||||||
@ -897,6 +894,8 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
|
|||||||
}
|
}
|
||||||
new_ranges.emplace_back(range.begin, range.end);
|
new_ranges.emplace_back(range.begin, range.end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Restore left-to-right order.
|
||||||
std::reverse(new_ranges.begin(), new_ranges.end());
|
std::reverse(new_ranges.begin(), new_ranges.end());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -28,11 +28,6 @@ MergeTreeReadPool::MergeTreeReadPool(
|
|||||||
column_names{column_names_}, do_not_steal_tasks{do_not_steal_tasks_},
|
column_names{column_names_}, do_not_steal_tasks{do_not_steal_tasks_},
|
||||||
predict_block_size_bytes{preferred_block_size_bytes_ > 0}, prewhere_info{prewhere_info_}, parts_ranges{parts_}
|
predict_block_size_bytes{preferred_block_size_bytes_ > 0}, prewhere_info{prewhere_info_}, parts_ranges{parts_}
|
||||||
{
|
{
|
||||||
/// reverse from right-to-left to left-to-right
|
|
||||||
/// because 'reverse' was done in MergeTreeDataSelectExecutor
|
|
||||||
for (auto & part_ranges : parts_ranges)
|
|
||||||
std::reverse(std::begin(part_ranges.ranges), std::end(part_ranges.ranges));
|
|
||||||
|
|
||||||
/// parts don't contain duplicate MergeTreeDataPart's.
|
/// parts don't contain duplicate MergeTreeDataPart's.
|
||||||
const auto per_part_sum_marks = fillPerPartInfo(parts_, check_columns_);
|
const auto per_part_sum_marks = fillPerPartInfo(parts_, check_columns_);
|
||||||
fillPerThreadInfo(threads_, sum_marks_, per_part_sum_marks, parts_, min_marks_for_concurrent_read_);
|
fillPerThreadInfo(threads_, sum_marks_, per_part_sum_marks, parts_, min_marks_for_concurrent_read_);
|
||||||
@ -79,10 +74,9 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(const size_t min_marks_to_read,
|
|||||||
{
|
{
|
||||||
const auto marks_to_get_from_range = marks_in_part;
|
const auto marks_to_get_from_range = marks_in_part;
|
||||||
|
|
||||||
/** Ranges are in right-to-left order, because 'reverse' was done in MergeTreeDataSelectExecutor
|
/// Ranges are in right-to-left order, because 'reverse' was done in 'fillPerThreadInfo'.
|
||||||
* and that order is supported in 'fillPerThreadInfo'.
|
|
||||||
*/
|
|
||||||
ranges_to_get_from_part = thread_task.ranges;
|
ranges_to_get_from_part = thread_task.ranges;
|
||||||
|
std::reverse(ranges_to_get_from_part.begin(), ranges_to_get_from_part.end());
|
||||||
|
|
||||||
marks_in_part -= marks_to_get_from_range;
|
marks_in_part -= marks_to_get_from_range;
|
||||||
|
|
||||||
@ -113,11 +107,7 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(const size_t min_marks_to_read,
|
|||||||
marks_in_part -= marks_to_get_from_range;
|
marks_in_part -= marks_to_get_from_range;
|
||||||
need_marks -= marks_to_get_from_range;
|
need_marks -= marks_to_get_from_range;
|
||||||
}
|
}
|
||||||
|
/// Order of ranges was changed to left-to-right due to .pop_back() above.
|
||||||
/** Change order to right-to-left, for MergeTreeThreadSelectBlockInputStream to get ranges with .pop_back()
|
|
||||||
* (order was changed to left-to-right due to .pop_back() above).
|
|
||||||
*/
|
|
||||||
std::reverse(std::begin(ranges_to_get_from_part), std::end(ranges_to_get_from_part));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto curr_task_size_predictor = !per_part_size_predictor[part_idx] ? nullptr
|
auto curr_task_size_predictor = !per_part_size_predictor[part_idx] ? nullptr
|
||||||
@ -211,7 +201,6 @@ std::vector<size_t> MergeTreeReadPool::fillPerPartInfo(
|
|||||||
|
|
||||||
/// Read marks for every data part.
|
/// Read marks for every data part.
|
||||||
size_t sum_marks = 0;
|
size_t sum_marks = 0;
|
||||||
/// Ranges are in right-to-left order, due to 'reverse' in MergeTreeDataSelectExecutor.
|
|
||||||
for (const auto & range : part.ranges)
|
for (const auto & range : part.ranges)
|
||||||
sum_marks += range.end - range.begin;
|
sum_marks += range.end - range.begin;
|
||||||
|
|
||||||
@ -251,6 +240,10 @@ void MergeTreeReadPool::fillPerThreadInfo(
|
|||||||
{
|
{
|
||||||
threads_tasks.resize(threads);
|
threads_tasks.resize(threads);
|
||||||
|
|
||||||
|
/// Let the ranges be listed from right to left so that the leftmost range can be dropped using `pop_back()`.
|
||||||
|
for (auto & part : parts)
|
||||||
|
std::reverse(part.ranges.begin(), part.ranges.end());
|
||||||
|
|
||||||
const size_t min_marks_per_thread = (sum_marks - 1) / threads + 1;
|
const size_t min_marks_per_thread = (sum_marks - 1) / threads + 1;
|
||||||
|
|
||||||
for (size_t i = 0; i < threads && !parts.empty(); ++i)
|
for (size_t i = 0; i < threads && !parts.empty(); ++i)
|
||||||
|
@ -95,11 +95,6 @@ try
|
|||||||
|
|
||||||
task_columns = getReadTaskColumns(storage, data_part, required_columns, prewhere_info, check_columns);
|
task_columns = getReadTaskColumns(storage, data_part, required_columns, prewhere_info, check_columns);
|
||||||
|
|
||||||
/** @note you could simply swap `reverse` in if and else branches of MergeTreeDataSelectExecutor,
|
|
||||||
* and remove this reverse. */
|
|
||||||
MarkRanges remaining_mark_ranges = all_mark_ranges;
|
|
||||||
std::reverse(remaining_mark_ranges.begin(), remaining_mark_ranges.end());
|
|
||||||
|
|
||||||
auto size_predictor = (preferred_block_size_bytes == 0)
|
auto size_predictor = (preferred_block_size_bytes == 0)
|
||||||
? nullptr
|
? nullptr
|
||||||
: std::make_unique<MergeTreeBlockSizePredictor>(data_part, ordered_names, data_part->storage.getSampleBlock());
|
: std::make_unique<MergeTreeBlockSizePredictor>(data_part, ordered_names, data_part->storage.getSampleBlock());
|
||||||
@ -109,7 +104,7 @@ try
|
|||||||
column_name_set = NameSet{column_names.begin(), column_names.end()};
|
column_name_set = NameSet{column_names.begin(), column_names.end()};
|
||||||
|
|
||||||
task = std::make_unique<MergeTreeReadTask>(
|
task = std::make_unique<MergeTreeReadTask>(
|
||||||
data_part, remaining_mark_ranges, part_index_in_query, ordered_names, column_name_set, task_columns.columns,
|
data_part, all_mark_ranges, part_index_in_query, ordered_names, column_name_set, task_columns.columns,
|
||||||
task_columns.pre_columns, prewhere_info && prewhere_info->remove_prewhere_column,
|
task_columns.pre_columns, prewhere_info && prewhere_info->remove_prewhere_column,
|
||||||
task_columns.should_reorder, std::move(size_predictor));
|
task_columns.should_reorder, std::move(size_predictor));
|
||||||
|
|
||||||
|
@ -0,0 +1 @@
|
|||||||
|
300
|
@ -0,0 +1,16 @@
|
|||||||
|
drop table if exists t;
|
||||||
|
|
||||||
|
create table t (a Int, b Int) engine = MergeTree order by (a, b) settings index_granularity = 400;
|
||||||
|
|
||||||
|
insert into t select 0, 0 from numbers(50);
|
||||||
|
insert into t select 0, 1 from numbers(350);
|
||||||
|
insert into t select 1, 2 from numbers(400);
|
||||||
|
insert into t select 2, 2 from numbers(400);
|
||||||
|
insert into t select 3, 0 from numbers(100);
|
||||||
|
|
||||||
|
select sleep(1) format Null; -- sleep a bit to wait possible merges after insert
|
||||||
|
|
||||||
|
set max_threads = 1;
|
||||||
|
optimize table t final;
|
||||||
|
|
||||||
|
select sum(a) from t where a in (0, 3) and b = 0;
|
Loading…
Reference in New Issue
Block a user