From aa092eeffb8f09d9d65294bfa2a62cacc258e562 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 27 Dec 2021 16:42:06 +0300 Subject: [PATCH] proper handle of 'max_rows_to_read' in case of reading in order of sorting key and limit --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 14 ++++++++++-- .../MergeTree/MergeTreeSelectProcessor.cpp | 5 ----- ...5_read_in_order_max_rows_to_read.reference | 6 +++++ .../02155_read_in_order_max_rows_to_read.sql | 22 +++++++++++++++++++ 4 files changed, 40 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/02155_read_in_order_max_rows_to_read.reference create mode 100644 tests/queries/0_stateless/02155_read_in_order_max_rows_to_read.sql diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index cdedd37e14a..07ac6f5764b 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -875,12 +875,22 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd { std::atomic total_rows{0}; + /// Do not check number of read rows if we have reading + /// in order of sorting key with limit. + /// In general case, when there exists WHERE clause + /// it's impossible to estimate number of rows precisely, + /// because we can stop reading at any time. + SizeLimits limits; - if (settings.read_overflow_mode == OverflowMode::THROW && settings.max_rows_to_read) + if (settings.read_overflow_mode == OverflowMode::THROW + && settings.max_rows_to_read + && !query_info.input_order_info) limits = SizeLimits(settings.max_rows_to_read, 0, settings.read_overflow_mode); SizeLimits leaf_limits; - if (settings.read_overflow_mode_leaf == OverflowMode::THROW && settings.max_rows_to_read_leaf) + if (settings.read_overflow_mode_leaf == OverflowMode::THROW + && settings.max_rows_to_read_leaf + && !query_info.input_order_info) leaf_limits = SizeLimits(settings.max_rows_to_read_leaf, 0, settings.read_overflow_mode_leaf); auto mark_cache = context->getIndexMarkCache(); diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 2d4d3617cee..332eb27094a 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -37,11 +37,6 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( has_limit_below_one_block(has_limit_below_one_block_), total_rows(data_part->index_granularity.getRowsCountInRanges(all_mark_ranges)) { - /// Actually it means that parallel reading from replicas enabled - /// and we have to collaborate with initiator. - /// In this case we won't set approximate rows, because it will be accounted multiple times - if (!extension_.has_value()) - addTotalRowsApprox(total_rows); ordered_names = header_without_virtual_columns.getNames(); } diff --git a/tests/queries/0_stateless/02155_read_in_order_max_rows_to_read.reference b/tests/queries/0_stateless/02155_read_in_order_max_rows_to_read.reference new file mode 100644 index 00000000000..b73ab43cabb --- /dev/null +++ b/tests/queries/0_stateless/02155_read_in_order_max_rows_to_read.reference @@ -0,0 +1,6 @@ +10 +0 +1 +2 +3 +4 diff --git a/tests/queries/0_stateless/02155_read_in_order_max_rows_to_read.sql b/tests/queries/0_stateless/02155_read_in_order_max_rows_to_read.sql new file mode 100644 index 00000000000..e82c78b5e42 --- /dev/null +++ b/tests/queries/0_stateless/02155_read_in_order_max_rows_to_read.sql @@ -0,0 +1,22 @@ +DROP TABLE IF EXISTS t_max_rows_to_read; + +CREATE TABLE t_max_rows_to_read (a UInt64) +ENGINE = MergeTree ORDER BY a +SETTINGS index_granularity = 4; + +INSERT INTO t_max_rows_to_read SELECT number FROM numbers(100); + +SET max_threads = 1; + +SELECT a FROM t_max_rows_to_read WHERE a = 10 SETTINGS max_rows_to_read = 4; + +SELECT a FROM t_max_rows_to_read ORDER BY a LIMIT 5 SETTINGS max_rows_to_read = 12; + +-- This should work, but actually it doesn't. Need to investigate. +-- SELECT a FROM t_max_rows_to_read WHERE a > 10 ORDER BY a LIMIT 5 SETTINGS max_rows_to_read = 20; + +SELECT a FROM t_max_rows_to_read ORDER BY a LIMIT 20 FORMAT Null SETTINGS max_rows_to_read = 12; -- { serverError 158 } +SELECT a FROM t_max_rows_to_read WHERE a > 10 ORDER BY a LIMIT 5 FORMAT Null SETTINGS max_rows_to_read = 12; -- { serverError 158 } +SELECT a FROM t_max_rows_to_read WHERE a = 10 OR a = 20 FORMAT Null SETTINGS max_rows_to_read = 4; -- { serverError 158 } + +DROP TABLE t_max_rows_to_read;