Reduce the number of read rows from system.numbers (#60546)

* Fix read more data for system.numbers

* Fix tests
This commit is contained in:
JackyWoo 2024-03-07 16:38:07 +08:00 committed by GitHub
parent 5e597228d7
commit 9e7894d8cb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 32 additions and 6 deletions

View File

@ -26,9 +26,11 @@ namespace
class NumbersSource : public ISource
{
public:
NumbersSource(UInt64 block_size_, UInt64 offset_, UInt64 step_)
NumbersSource(UInt64 block_size_, UInt64 offset_, std::optional<UInt64> limit_, UInt64 step_)
: ISource(createHeader()), block_size(block_size_), next(offset_), step(step_)
{
if (limit_.has_value())
end = limit_.value() + offset_;
}
String getName() const override { return "Numbers"; }
@ -38,24 +40,32 @@ public:
protected:
Chunk generate() override
{
auto column = ColumnUInt64::create(block_size);
UInt64 real_block_size = block_size;
if (end.has_value())
{
if (end.value() <= next)
return {};
real_block_size = std::min(block_size, end.value() - next);
}
auto column = ColumnUInt64::create(real_block_size);
ColumnUInt64::Container & vec = column->getData();
UInt64 curr = next; /// The local variable for some reason works faster (>20%) than member of class.
UInt64 * pos = vec.data(); /// This also accelerates the code.
UInt64 * end = &vec[block_size];
iota(pos, static_cast<size_t>(end - pos), curr);
UInt64 * end_ = &vec[real_block_size];
iota(pos, static_cast<size_t>(end_ - pos), curr);
next += step;
progress(column->size(), column->byteSize());
return {Columns{std::move(column)}, block_size};
return {Columns{std::move(column)}, real_block_size};
}
private:
UInt64 block_size;
UInt64 next;
std::optional<UInt64> end; /// not included
UInt64 step;
};
@ -478,7 +488,7 @@ Pipe ReadFromSystemNumbersStep::makePipe()
for (size_t i = 0; i < num_streams; ++i)
{
auto source
= std::make_shared<NumbersSource>(max_block_size, numbers_storage.offset + i * max_block_size, num_streams * max_block_size);
= std::make_shared<NumbersSource>(max_block_size, numbers_storage.offset + i * max_block_size, numbers_storage.limit, num_streams * max_block_size);
if (numbers_storage.limit && i == 0)
{

View File

@ -242,3 +242,19 @@ def test_overflow(started_cluster):
)
assert response == "(18446744073709551614),(18446744073709551615),(0),(1),(2)"
check_read_rows("test_overflow", 5)
def test_non_number_filter(started_cluster):
response = node.query(
"SELECT toString(number) as a FROM numbers(3) WHERE a = '1' FORMAT Values",
query_id="test_non_number_filter",
)
assert response == "('1')"
check_read_rows("test_non_number_filter", 3)
response = node.query(
"SELECT toString(number) as a FROM numbers(1, 4) WHERE a = '1' FORMAT Values SETTINGS max_block_size = 3",
query_id="test_non_number_filter2",
)
assert response == "('1')"
check_read_rows("test_non_number_filter2", 4)