From 94b6362c2dbe8211ed234728e2b7e915bddc4ce4 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 7 Mar 2024 09:08:32 +0000 Subject: [PATCH] Backport #60546 to 24.2: Reduce the number of read rows from `system.numbers` --- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 22 ++++++++++++++----- .../integration/test_storage_numbers/test.py | 16 ++++++++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index c72c63d09c4..a294683c640 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -26,9 +26,11 @@ namespace class NumbersSource : public ISource { public: - NumbersSource(UInt64 block_size_, UInt64 offset_, UInt64 step_) + NumbersSource(UInt64 block_size_, UInt64 offset_, std::optional limit_, UInt64 step_) : ISource(createHeader()), block_size(block_size_), next(offset_), step(step_) { + if (limit_.has_value()) + end = limit_.value() + offset_; } String getName() const override { return "Numbers"; } @@ -38,24 +40,32 @@ public: protected: Chunk generate() override { - auto column = ColumnUInt64::create(block_size); + UInt64 real_block_size = block_size; + if (end.has_value()) + { + if (end.value() <= next) + return {}; + real_block_size = std::min(block_size, end.value() - next); + } + auto column = ColumnUInt64::create(real_block_size); ColumnUInt64::Container & vec = column->getData(); UInt64 curr = next; /// The local variable for some reason works faster (>20%) than member of class. UInt64 * pos = vec.data(); /// This also accelerates the code. - UInt64 * end = &vec[block_size]; - iota(pos, static_cast(end - pos), curr); + UInt64 * end_ = &vec[real_block_size]; + iota(pos, static_cast(end_ - pos), curr); next += step; progress(column->size(), column->byteSize()); - return {Columns{std::move(column)}, block_size}; + return {Columns{std::move(column)}, real_block_size}; } private: UInt64 block_size; UInt64 next; + std::optional end; /// not included UInt64 step; }; @@ -478,7 +488,7 @@ Pipe ReadFromSystemNumbersStep::makePipe() for (size_t i = 0; i < num_streams; ++i) { auto source - = std::make_shared(max_block_size, numbers_storage.offset + i * max_block_size, num_streams * max_block_size); + = std::make_shared(max_block_size, numbers_storage.offset + i * max_block_size, numbers_storage.limit, num_streams * max_block_size); if (numbers_storage.limit && i == 0) { diff --git a/tests/integration/test_storage_numbers/test.py b/tests/integration/test_storage_numbers/test.py index 61fe8719ea2..cbd7793fd8c 100644 --- a/tests/integration/test_storage_numbers/test.py +++ b/tests/integration/test_storage_numbers/test.py @@ -242,3 +242,19 @@ def test_overflow(started_cluster): ) assert response == "(18446744073709551614),(18446744073709551615),(0),(1),(2)" check_read_rows("test_overflow", 5) + + +def test_non_number_filter(started_cluster): + response = node.query( + "SELECT toString(number) as a FROM numbers(3) WHERE a = '1' FORMAT Values", + query_id="test_non_number_filter", + ) + assert response == "('1')" + check_read_rows("test_non_number_filter", 3) + + response = node.query( + "SELECT toString(number) as a FROM numbers(1, 4) WHERE a = '1' FORMAT Values SETTINGS max_block_size = 3", + query_id="test_non_number_filter2", + ) + assert response == "('1')" + check_read_rows("test_non_number_filter2", 4)