proper handle of 'max_rows_to_read' in case of reading in order of sorting key and limit

This commit is contained in:
Anton Popov 2021-12-27 16:42:06 +03:00
parent a5824cb3e0
commit aa092eeffb
4 changed files with 40 additions and 7 deletions

View File

@ -875,12 +875,22 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
{
std::atomic<size_t> total_rows{0};
/// Do not check number of read rows if we have reading
/// in order of sorting key with limit.
/// In general case, when there exists WHERE clause
/// it's impossible to estimate number of rows precisely,
/// because we can stop reading at any time.
SizeLimits limits;
if (settings.read_overflow_mode == OverflowMode::THROW && settings.max_rows_to_read)
if (settings.read_overflow_mode == OverflowMode::THROW
&& settings.max_rows_to_read
&& !query_info.input_order_info)
limits = SizeLimits(settings.max_rows_to_read, 0, settings.read_overflow_mode);
SizeLimits leaf_limits;
if (settings.read_overflow_mode_leaf == OverflowMode::THROW && settings.max_rows_to_read_leaf)
if (settings.read_overflow_mode_leaf == OverflowMode::THROW
&& settings.max_rows_to_read_leaf
&& !query_info.input_order_info)
leaf_limits = SizeLimits(settings.max_rows_to_read_leaf, 0, settings.read_overflow_mode_leaf);
auto mark_cache = context->getIndexMarkCache();

View File

@ -37,11 +37,6 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor(
has_limit_below_one_block(has_limit_below_one_block_),
total_rows(data_part->index_granularity.getRowsCountInRanges(all_mark_ranges))
{
/// Actually it means that parallel reading from replicas enabled
/// and we have to collaborate with initiator.
/// In this case we won't set approximate rows, because it will be accounted multiple times
if (!extension_.has_value())
addTotalRowsApprox(total_rows);
ordered_names = header_without_virtual_columns.getNames();
}

View File

@ -0,0 +1,6 @@
10
0
1
2
3
4

View File

@ -0,0 +1,22 @@
DROP TABLE IF EXISTS t_max_rows_to_read;
CREATE TABLE t_max_rows_to_read (a UInt64)
ENGINE = MergeTree ORDER BY a
SETTINGS index_granularity = 4;
INSERT INTO t_max_rows_to_read SELECT number FROM numbers(100);
SET max_threads = 1;
SELECT a FROM t_max_rows_to_read WHERE a = 10 SETTINGS max_rows_to_read = 4;
SELECT a FROM t_max_rows_to_read ORDER BY a LIMIT 5 SETTINGS max_rows_to_read = 12;
-- This should work, but actually it doesn't. Need to investigate.
-- SELECT a FROM t_max_rows_to_read WHERE a > 10 ORDER BY a LIMIT 5 SETTINGS max_rows_to_read = 20;
SELECT a FROM t_max_rows_to_read ORDER BY a LIMIT 20 FORMAT Null SETTINGS max_rows_to_read = 12; -- { serverError 158 }
SELECT a FROM t_max_rows_to_read WHERE a > 10 ORDER BY a LIMIT 5 FORMAT Null SETTINGS max_rows_to_read = 12; -- { serverError 158 }
SELECT a FROM t_max_rows_to_read WHERE a = 10 OR a = 20 FORMAT Null SETTINGS max_rows_to_read = 4; -- { serverError 158 }
DROP TABLE t_max_rows_to_read;