Merge pull request #10268 from ClickHouse/max-rows-to-sort

Added failing tests about "max_rows_to_sort" setting.
2024-11-24 08:32:02 +00:00 · 2020-05-17 22:21:30 +03:00 · 2020-05-17 22:21:30 +03:00 · f7b1263d41
commit f7b1263d41
parent 8d3c37992a c33373f7fb
14 changed files with 360 additions and 18 deletions
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -2067,10 +2067,9 @@ void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputSorting

    const Settings & settings = context->getSettingsRef();

-    /// TODO: Limits on sorting
-//    IBlockInputStream::LocalLimits limits;
-//    limits.mode = IBlockInputStream::LIMITS_TOTAL;
-//    limits.size_limits = SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode);
+    IBlockInputStream::LocalLimits limits;
+    limits.mode = IBlockInputStream::LIMITS_CURRENT;
+    limits.size_limits = SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort, settings.sort_overflow_mode);

    if (input_sorting_info)
    {
@ -2107,6 +2106,8 @@ void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputSorting
                return std::make_shared<PartialSortingTransform>(header, output_order_descr, limit);
            });

+            /// NOTE limits are not applied to the size of temporary sets in FinishSortingTransform
+
            pipeline.addSimpleTransform([&](const Block & header) -> ProcessorPtr
            {
                return std::make_shared<FinishSortingTransform>(
@ -2126,6 +2127,15 @@ void InterpreterSelectQuery::executeOrder(QueryPipeline & pipeline, InputSorting
        return std::make_shared<PartialSortingTransform>(header, output_order_descr, limit);
    });

+    pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
+    {
+        if (stream_type == QueryPipeline::StreamType::Totals)
+            return nullptr;
+
+        auto transform = std::make_shared<LimitsCheckingTransform>(header, limits);
+        return transform;
+    });
+
    /// Merge the sorted blocks.
    pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
    {
--- a/src/Processors/ISource.cpp
+++ b/src/Processors/ISource.cpp
@ -56,11 +56,6 @@ void ISource::work()
        finished = true;
        throw;
    }
-//    {
-//        current_chunk = std::current_exception();
-//        has_input = true;
-//        got_exception = true;
-//    }
 }

 }
--- a/src/Processors/Sources/SourceWithProgress.cpp
+++ b/src/Processors/Sources/SourceWithProgress.cpp
@ -15,7 +15,9 @@ namespace ErrorCodes
 void SourceWithProgress::work()
 {
    if (!limits.speed_limits.checkTimeLimit(total_stopwatch.elapsed(), limits.timeout_overflow_mode))
+    {
        cancel();
+    }
    else
    {
        was_progress_called = false;
@ -57,7 +59,13 @@ void SourceWithProgress::progress(const Progress & value)
        /// The total amount of data processed or intended for processing in all sources, possibly on remote servers.

        ProgressValues progress = process_list_elem->getProgressIn();
-        size_t total_rows_estimate = std::max(progress.read_rows, progress.total_rows_to_read);
+
+        /// If the mode is "throw" and estimate of total rows is known, then throw early if an estimate is too high.
+        /// If the mode is "break", then allow to read before limit even if estimate is very high.
+
+        size_t rows_to_check_limit = progress.read_rows;
+        if (limits.size_limits.overflow_mode == OverflowMode::THROW && progress.total_rows_to_read > progress.read_rows)
+            rows_to_check_limit = progress.total_rows_to_read;

        /// Check the restrictions on the
        ///  * amount of data to read
@ -67,9 +75,11 @@ void SourceWithProgress::progress(const Progress & value)

        if (limits.mode == LimitsMode::LIMITS_TOTAL)
        {
-            if (!limits.size_limits.check(total_rows_estimate, progress.read_bytes, "rows to read",
+            if (!limits.size_limits.check(rows_to_check_limit, progress.read_bytes, "rows or bytes to read",
                                          ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES))
+            {
                cancel();
+            }
        }

        size_t total_rows = progress.total_rows_to_read;
--- a/src/Processors/Transforms/LimitsCheckingTransform.cpp
+++ b/src/Processors/Transforms/LimitsCheckingTransform.cpp
@ -44,7 +44,9 @@ void LimitsCheckingTransform::transform(Chunk & chunk)

        if (limits.mode == LimitsMode::LIMITS_CURRENT &&
            !limits.size_limits.check(info.rows, info.bytes, "result", ErrorCodes::TOO_MANY_ROWS_OR_BYTES))
+        {
            stopReading();
+        }

        if (quota)
            checkQuota(chunk);
@ -56,13 +58,16 @@ void LimitsCheckingTransform::checkQuota(Chunk & chunk)
    switch (limits.mode)
    {
        case LimitsMode::LIMITS_TOTAL:
-            /// Checked in `progress` method.
+            /// Checked in SourceWithProgress::progress method.
            break;

        case LimitsMode::LIMITS_CURRENT:
        {
            UInt64 total_elapsed = info.total_stopwatch.elapsedNanoseconds();
-            quota->used({Quota::RESULT_ROWS, chunk.getNumRows()}, {Quota::RESULT_BYTES, chunk.bytes()}, {Quota::EXECUTION_TIME, total_elapsed - prev_elapsed});
+            quota->used(
+                {Quota::RESULT_ROWS, chunk.getNumRows()},
+                {Quota::RESULT_BYTES, chunk.bytes()},
+                {Quota::EXECUTION_TIME, total_elapsed - prev_elapsed});
            prev_elapsed = total_elapsed;
            break;
        }
--- a/src/Processors/Transforms/MergeSortingTransform.h
+++ b/src/Processors/Transforms/MergeSortingTransform.h
@ -38,6 +38,9 @@ private:
    VolumePtr tmp_volume;
    size_t min_free_disk_space;

+    size_t sum_rows_in_blocks = 0;
+    size_t sum_bytes_in_blocks = 0;
+
    Logger * log = &Logger::get("MergeSortingTransform");

    /// If remerge doesn't save memory at least several times, mark it as useless and don't do it anymore.
--- a/src/Processors/Transforms/SortingTransform.h
+++ b/src/Processors/Transforms/SortingTransform.h
@ -66,8 +66,8 @@ class SortingTransform : public IProcessor
 public:
    /// limit - if not 0, allowed to return just first 'limit' rows in sorted order.
    SortingTransform(const Block & header,
-                          const SortDescription & description_,
-                          size_t max_merged_block_size_, UInt64 limit_);
+        const SortDescription & description_,
+        size_t max_merged_block_size_, UInt64 limit_);

    ~SortingTransform() override;

@ -83,9 +83,6 @@ protected:
    size_t max_merged_block_size;
    UInt64 limit;

-    size_t sum_rows_in_blocks = 0;
-    size_t sum_bytes_in_blocks = 0;
-
    /// Before operation, will remove constant columns from blocks. And after, place constant columns back.
    /// (to avoid excessive virtual function calls and because constants cannot be serialized in Native format for temporary files)
    /// Save original block structure here.
--- a/tests/queries/0_stateless/01131_max_rows_to_sort.reference
+++ b/tests/queries/0_stateless/01131_max_rows_to_sort.reference
@ -0,0 +1 @@
+1
--- a/tests/queries/0_stateless/01131_max_rows_to_sort.sql
+++ b/tests/queries/0_stateless/01131_max_rows_to_sort.sql
@ -0,0 +1,7 @@
+SET max_rows_to_sort = 100;
+SELECT * FROM system.numbers ORDER BY number; -- { serverError 396 }
+
+SET sort_overflow_mode = 'break';
+SET max_block_size = 1000;
+
+SELECT count() >= 100 AND count() <= 1000 FROM (SELECT * FROM system.numbers ORDER BY number);
--- a/tests/queries/0_stateless/01132_max_rows_to_read.reference
+++ b/tests/queries/0_stateless/01132_max_rows_to_read.reference
@ -0,0 +1,57 @@
+19
+20
+19
+20
+20
+20
+20
+20
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
--- a/tests/queries/0_stateless/01132_max_rows_to_read.sql
+++ b/tests/queries/0_stateless/01132_max_rows_to_read.sql
@ -0,0 +1,30 @@
+SET max_block_size = 10;
+SET max_rows_to_read = 20;
+SET read_overflow_mode = 'throw';
+
+SELECT count() FROM numbers(30); -- { serverError 158 }
+SELECT count() FROM numbers(19);
+SELECT count() FROM numbers(20);
+SELECT count() FROM numbers(21); -- { serverError 158 }
+
+-- check early exception if the estimated number of rows is high
+SELECT * FROM numbers(30); -- { serverError 158 }
+
+SET read_overflow_mode = 'break';
+
+SELECT count() FROM numbers(19);
+SELECT count() FROM numbers(20);
+SELECT count() FROM numbers(21);
+SELECT count() FROM numbers(29);
+SELECT count() FROM numbers(30);
+SELECT count() FROM numbers(31);
+
+-- check that partial result is returned even if the estimated number of rows is high
+SELECT * FROM numbers(30);
+
+-- the same for uneven block sizes
+-- NOTE: currently it outputs less amount of data; it will be better to output the latest block also
+SET max_block_size = 11;
+SELECT * FROM numbers(30);
+SET max_block_size = 9;
+SELECT * FROM numbers(30);
--- a/tests/queries/0_stateless/01133_max_result_rows.reference
+++ b/tests/queries/0_stateless/01133_max_result_rows.reference
@ -0,0 +1,173 @@
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+100
+100
+200
--- a/tests/queries/0_stateless/01133_max_result_rows.sql
+++ b/tests/queries/0_stateless/01133_max_result_rows.sql
@ -0,0 +1,24 @@
+SET max_block_size = 10;
+SET max_result_rows = 20;
+SET result_overflow_mode = 'throw';
+
+SELECT DISTINCT intDiv(number, 10) FROM numbers(300); -- { serverError 396 }
+SELECT DISTINCT intDiv(number, 10) FROM numbers(190);
+SELECT DISTINCT intDiv(number, 10) FROM numbers(200);
+SELECT DISTINCT intDiv(number, 10) FROM numbers(210); -- { serverError 396 }
+
+SET result_overflow_mode = 'break';
+
+SELECT DISTINCT intDiv(number, 10) FROM numbers(300);
+SELECT DISTINCT intDiv(number, 10) FROM numbers(190);
+SELECT DISTINCT intDiv(number, 10) FROM numbers(200);
+SELECT DISTINCT intDiv(number, 10) FROM numbers(210);
+
+SET max_block_size = 10;
+SET max_result_rows = 1;
+SELECT number FROM system.numbers;
+SELECT count() FROM numbers(100);
+-- subquery result is not the total result
+SELECT count() FROM (SELECT * FROM numbers(100));
+-- remote query result is not the total result
+SELECT count() FROM remote('127.0.0.{1,2}', numbers(100));
--- a/tests/queries/0_stateless/01134_set_overflow_mode.reference
+++ b/tests/queries/0_stateless/01134_set_overflow_mode.reference
@ -0,0 +1,13 @@
+1
+0
+1
+0
+---
+1
+0
+1
+0
+1
+0
+1
+0
--- a/tests/queries/0_stateless/01134_set_overflow_mode.sql
+++ b/tests/queries/0_stateless/01134_set_overflow_mode.sql
@ -0,0 +1,17 @@
+SET max_block_size = 10;
+SET max_rows_in_set = 20;
+SET set_overflow_mode = 'throw';
+
+SELECT arrayJoin([5, 25]) IN (SELECT DISTINCT toUInt8(intDiv(number, 10)) FROM numbers(300)); -- { serverError 191 }
+SELECT arrayJoin([5, 25]) IN (SELECT DISTINCT toUInt8(intDiv(number, 10)) FROM numbers(190));
+SELECT arrayJoin([5, 25]) IN (SELECT DISTINCT toUInt8(intDiv(number, 10)) FROM numbers(200));
+SELECT arrayJoin([5, 25]) IN (SELECT DISTINCT toUInt8(intDiv(number, 10)) FROM numbers(210)); -- { serverError 191 }
+
+SET set_overflow_mode = 'break';
+
+SELECT '---';
+
+SELECT arrayJoin([5, 25]) IN (SELECT DISTINCT toUInt8(intDiv(number, 10)) FROM numbers(300));
+SELECT arrayJoin([5, 25]) IN (SELECT DISTINCT toUInt8(intDiv(number, 10)) FROM numbers(190));
+SELECT arrayJoin([5, 25]) IN (SELECT DISTINCT toUInt8(intDiv(number, 10)) FROM numbers(200));
+SELECT arrayJoin([5, 25]) IN (SELECT DISTINCT toUInt8(intDiv(number, 10)) FROM numbers(210));