From 2b6f0cfba1a8ecc5934daf52184d3730537fc2b1 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 17 Jul 2024 21:09:55 +0000 Subject: [PATCH] Backport #66231 to 24.6: Fix bug in `numbers` when both limit and offset is used but the index cannot be used --- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 81 ++++++++++++------- .../02970_generate_series.reference | 1 + .../0_stateless/02970_generate_series.sql | 1 + ...numbers_limit_and_offset_complex.reference | 14 ++++ ...ystem_numbers_limit_and_offset_complex.sql | 38 +++++++++ ..._numbers_limit_and_offset_simple.reference | 6 ++ ...system_numbers_limit_and_offset_simple.sql | 11 +++ 7 files changed, 123 insertions(+), 29 deletions(-) create mode 100644 tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.reference create mode 100644 tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql create mode 100644 tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.reference create mode 100644 tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.sql diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 11371578c79..ce567d3c388 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -35,18 +35,33 @@ inline void iotaWithStepOptimized(T * begin, size_t count, T first_value, T step iotaWithStep(begin, count, first_value, step); } +/// The range is defined as [start, end) +UInt64 itemCountInRange(UInt64 start, UInt64 end, UInt64 step) +{ + const auto range_count = end - start; + if (step == 1) + return range_count; + + return (range_count - 1) / step + 1; +} + class NumbersSource : public ISource { public: - NumbersSource(UInt64 block_size_, UInt64 offset_, std::optional limit_, UInt64 chunk_step_, const std::string & column_name, UInt64 step_) + NumbersSource( + UInt64 block_size_, + UInt64 offset_, + std::optional end_, + const std::string & column_name, + UInt64 step_in_chunk_, + UInt64 step_between_chunks_) : ISource(createHeader(column_name)) , block_size(block_size_) , next(offset_) - , chunk_step(chunk_step_) - , step(step_) + , end(end_) + , step_in_chunk(step_in_chunk_) + , step_between_chunks(step_between_chunks_) { - if (limit_.has_value()) - end = limit_.value() + offset_; } String getName() const override { return "Numbers"; } @@ -63,7 +78,10 @@ protected: { if (end.value() <= next) return {}; - real_block_size = std::min(block_size, end.value() - next); + + auto max_items_to_generate = itemCountInRange(next, *end, step_in_chunk); + + real_block_size = std::min(block_size, max_items_to_generate); } auto column = ColumnUInt64::create(real_block_size); ColumnUInt64::Container & vec = column->getData(); @@ -73,21 +91,20 @@ protected: UInt64 * current_end = &vec[real_block_size]; - iotaWithStepOptimized(pos, static_cast(current_end - pos), curr, step); + iotaWithStepOptimized(pos, static_cast(current_end - pos), curr, step_in_chunk); - next += chunk_step; + next += step_between_chunks; progress(column->size(), column->byteSize()); - return {Columns{std::move(column)}, real_block_size}; } private: UInt64 block_size; UInt64 next; - UInt64 chunk_step; std::optional end; /// not included - UInt64 step; + UInt64 step_in_chunk; + UInt64 step_between_chunks; }; struct RangeWithStep @@ -548,20 +565,39 @@ Pipe ReadFromSystemNumbersStep::makePipe() return pipe; } + const auto end = std::invoke( + [&]() -> std::optional + { + if (numbers_storage.limit.has_value()) + return *(numbers_storage.limit) + numbers_storage.offset; + return {}; + }); + /// Fall back to NumbersSource + /// Range in a single block + const auto block_range = max_block_size * numbers_storage.step; + /// Step between chunks in a single source. + /// It is bigger than block_range in case of multiple threads, because we have to account for other sources as well. + const auto step_between_chunks = num_streams * block_range; for (size_t i = 0; i < num_streams; ++i) { + const auto source_offset = i * block_range; + if (numbers_storage.limit.has_value() && *numbers_storage.limit < source_offset) + break; + + const auto source_start = numbers_storage.offset + source_offset; + auto source = std::make_shared( max_block_size, - numbers_storage.offset + i * max_block_size * numbers_storage.step, - numbers_storage.limit, - num_streams * max_block_size * numbers_storage.step, + source_start, + end, numbers_storage.column_name, - numbers_storage.step); + numbers_storage.step, + step_between_chunks); if (numbers_storage.limit && i == 0) { - auto rows_appr = (*numbers_storage.limit - 1) / numbers_storage.step + 1; + auto rows_appr = itemCountInRange(numbers_storage.offset, *numbers_storage.limit, numbers_storage.step); if (limit > 0 && limit < rows_appr) rows_appr = limit; source->addTotalRowsApprox(rows_appr); @@ -570,19 +606,6 @@ Pipe ReadFromSystemNumbersStep::makePipe() pipe.addSource(std::move(source)); } - if (numbers_storage.limit) - { - size_t i = 0; - auto storage_limit = (*numbers_storage.limit - 1) / numbers_storage.step + 1; - /// This formula is how to split 'limit' elements to 'num_streams' chunks almost uniformly. - pipe.addSimpleTransform( - [&](const Block & header) - { - ++i; - return std::make_shared(header, storage_limit * i / num_streams - storage_limit * (i - 1) / num_streams, 0); - }); - } - return pipe; } diff --git a/tests/queries/0_stateless/02970_generate_series.reference b/tests/queries/0_stateless/02970_generate_series.reference index 6e6f3c81587..f8a8e1891b7 100644 --- a/tests/queries/0_stateless/02970_generate_series.reference +++ b/tests/queries/0_stateless/02970_generate_series.reference @@ -5,6 +5,7 @@ 501 50 17928 +17928 0 10 13 diff --git a/tests/queries/0_stateless/02970_generate_series.sql b/tests/queries/0_stateless/02970_generate_series.sql index 0844714b3a6..edae884a561 100644 --- a/tests/queries/0_stateless/02970_generate_series.sql +++ b/tests/queries/0_stateless/02970_generate_series.sql @@ -5,6 +5,7 @@ SELECT count() FROM generate_series(7, 77, 10); SELECT count() FROM generate_series(0, 1000, 2); SELECT count() FROM generate_series(0, 999, 20); SELECT sum(generate_series) FROM generate_series(4, 1008, 4) WHERE generate_series % 7 = 1; +SELECT sum(generate_series) FROM generate_series(4, 1008, 4) WHERE generate_series % 7 = 1 SETTINGS max_block_size = 71; SELECT * FROM generate_series(5, 4); SELECT * FROM generate_series(0, 0); diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.reference new file mode 100644 index 00000000000..d38b21d2d05 --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.reference @@ -0,0 +1,14 @@ +18679 31 +0 +10 +20 +30 +40 +50 +60 +70 +80 +90 +100 +110 +4250 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql new file mode 100644 index 00000000000..756e08da27d --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql @@ -0,0 +1,38 @@ +--- The following query was buggy before, so let's use it as a test case +WITH + (num > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < num) AND ((num % y) = 0)), range(toUInt64(sqrt(num)) + 1))) = 0) AS is_prime_slow +SELECT + num, + ds, +FROM +( + WITH + arraySum(arrayMap(y -> toUInt8(y), splitByString('', toString(num)))) AS digits_sum + SELECT + 1 + (number * 2) AS num, + digits_sum AS ds + FROM numbers_mt(10000) + WHERE ds IN ( + WITH + (number > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < number) AND ((number % y) = 0)), range(toUInt64(sqrt(number)) + 1))) = 0) AS is_prime_slow + SELECT number + FROM numbers(180 + 1) + WHERE is_prime_slow + ) +) +WHERE is_prime_slow +ORDER BY num ASC +LIMIT 998, 1 +SETTINGS max_block_size = 64, max_threads=16; + +SELECT number +FROM numbers_mt(120) +WHERE (number % 10) = 0 +ORDER BY number ASC +SETTINGS max_block_size = 31, max_threads = 11; + +SELECT number +FROM numbers_mt(4242, 9) +WHERE (number % 10) = 0 +ORDER BY number ASC +SETTINGS max_block_size = 31, max_threads = 11; diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.reference new file mode 100644 index 00000000000..6aad3ee0b4b --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.reference @@ -0,0 +1,6 @@ +case 1 +9900 +9910 +9920 +case 2 +9990 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.sql new file mode 100644 index 00000000000..df01ddf4312 --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.sql @@ -0,0 +1,11 @@ +SELECT 'case 1'; +SELECT number FROM numbers_mt(10000) +WHERE (number % 10) = 0 +ORDER BY number ASC +LIMIT 990, 3; + +SELECT 'case 2'; +SELECT number FROM numbers_mt(10000) +WHERE (number % 10) = 0 +ORDER BY number ASC +LIMIT 999, 20 SETTINGS max_block_size = 31;