mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge pull request #13677 from hagen1778/merge-tree-fail-fast-on-rows-limit
[mergeTree]: fail fast if max_rows_to_read limit exceeded on parts scan
This commit is contained in:
commit
23ccb0b6be
@ -79,6 +79,7 @@ namespace ErrorCodes
|
|||||||
extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
|
extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
|
||||||
extern const int ILLEGAL_COLUMN;
|
extern const int ILLEGAL_COLUMN;
|
||||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||||
|
extern const int TOO_MANY_ROWS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -573,6 +574,8 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
|
|
||||||
/// Let's find what range to read from each part.
|
/// Let's find what range to read from each part.
|
||||||
{
|
{
|
||||||
|
std::atomic<size_t> total_rows {0};
|
||||||
|
|
||||||
auto process_part = [&](size_t part_index)
|
auto process_part = [&](size_t part_index)
|
||||||
{
|
{
|
||||||
auto & part = parts[part_index];
|
auto & part = parts[part_index];
|
||||||
@ -599,7 +602,23 @@ Pipe MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
index_and_condition.first, index_and_condition.second, part, ranges.ranges, settings, reader_settings, log);
|
index_and_condition.first, index_and_condition.second, part, ranges.ranges, settings, reader_settings, log);
|
||||||
|
|
||||||
if (!ranges.ranges.empty())
|
if (!ranges.ranges.empty())
|
||||||
|
{
|
||||||
|
if (settings.read_overflow_mode == OverflowMode::THROW && settings.max_rows_to_read)
|
||||||
|
{
|
||||||
|
/// Fail fast if estimated number of rows to read exceeds the limit
|
||||||
|
auto current_rows_estimate = ranges.getRowsCount();
|
||||||
|
size_t prev_total_rows_estimate = total_rows.fetch_add(current_rows_estimate);
|
||||||
|
size_t total_rows_estimate = current_rows_estimate + prev_total_rows_estimate;
|
||||||
|
if (total_rows_estimate > settings.max_rows_to_read)
|
||||||
|
throw Exception(
|
||||||
|
"Limit for rows (controlled by 'max_rows_to_read' setting) exceeded, max rows: "
|
||||||
|
+ formatReadableQuantity(settings.max_rows_to_read)
|
||||||
|
+ ", estimated rows to read (at least): " + formatReadableQuantity(total_rows_estimate),
|
||||||
|
ErrorCodes::TOO_MANY_ROWS);
|
||||||
|
}
|
||||||
|
|
||||||
parts_with_ranges[part_index] = std::move(ranges);
|
parts_with_ranges[part_index] = std::move(ranges);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
size_t num_threads = std::min(size_t(num_streams), parts.size());
|
size_t num_threads = std::min(size_t(num_streams), parts.size());
|
||||||
|
@ -12,6 +12,7 @@ DROP TABLE IF EXISTS mv_checkouts2target;
|
|||||||
-- that is the final table, which is filled incrementally from 2 different sources
|
-- that is the final table, which is filled incrementally from 2 different sources
|
||||||
|
|
||||||
CREATE TABLE target_table Engine=SummingMergeTree() ORDER BY id
|
CREATE TABLE target_table Engine=SummingMergeTree() ORDER BY id
|
||||||
|
SETTINGS index_granularity=128
|
||||||
AS
|
AS
|
||||||
SELECT
|
SELECT
|
||||||
number as id,
|
number as id,
|
||||||
@ -85,12 +86,20 @@ INSERT INTO logins SELECT number as id, '2000-01-01 08:00:00' from numbers(50
|
|||||||
INSERT INTO checkouts SELECT number as id, '2000-01-01 10:00:00' from numbers(50000);
|
INSERT INTO checkouts SELECT number as id, '2000-01-01 10:00:00' from numbers(50000);
|
||||||
|
|
||||||
-- ensure that we don't read whole target table during join
|
-- ensure that we don't read whole target table during join
|
||||||
set max_rows_to_read = 2000;
|
-- by this time we should have 3 parts for target_table because of prev inserts
|
||||||
|
-- and we plan to make two more inserts. With index_granularity=128 and max id=1000
|
||||||
|
-- we expect to read not more than:
|
||||||
|
-- (1000/128) marks per part * (3 + 2) parts * 128 granularity = 5120 rows
|
||||||
|
set max_rows_to_read = 5120;
|
||||||
|
|
||||||
INSERT INTO logins SELECT number as id, '2000-01-01 11:00:00' from numbers(1000);
|
INSERT INTO logins SELECT number as id, '2000-01-01 11:00:00' from numbers(1000);
|
||||||
INSERT INTO checkouts SELECT number as id, '2000-01-01 11:10:00' from numbers(1000);
|
INSERT INTO checkouts SELECT number as id, '2000-01-01 11:10:00' from numbers(1000);
|
||||||
|
|
||||||
set max_rows_to_read = 10;
|
-- by this time we should have 5 parts for target_table because of prev inserts
|
||||||
|
-- and we plan to make two more inserts. With index_granularity=128 and max id=1
|
||||||
|
-- we expect to read not more than:
|
||||||
|
-- 1 mark per part * (5 + 2) parts * 128 granularity = 896 rows
|
||||||
|
set max_rows_to_read = 896;
|
||||||
|
|
||||||
INSERT INTO logins SELECT number+2 as id, '2001-01-01 11:10:01' from numbers(1);
|
INSERT INTO logins SELECT number+2 as id, '2001-01-01 11:10:01' from numbers(1);
|
||||||
INSERT INTO checkouts SELECT number+2 as id, '2001-01-01 11:10:02' from numbers(1);
|
INSERT INTO checkouts SELECT number+2 as id, '2001-01-01 11:10:02' from numbers(1);
|
||||||
|
Loading…
Reference in New Issue
Block a user