Backport #66231 to 24.6: Fix bug in numbers when both limit and offset is used but the index cannot be used

This commit is contained in:
robot-clickhouse 2024-07-17 21:09:55 +00:00
parent 293a9a0c6c
commit 2b6f0cfba1
7 changed files with 123 additions and 29 deletions

View File

@ -35,18 +35,33 @@ inline void iotaWithStepOptimized(T * begin, size_t count, T first_value, T step
iotaWithStep(begin, count, first_value, step);
}
/// The range is defined as [start, end)
UInt64 itemCountInRange(UInt64 start, UInt64 end, UInt64 step)
{
const auto range_count = end - start;
if (step == 1)
return range_count;
return (range_count - 1) / step + 1;
}
class NumbersSource : public ISource
{
public:
NumbersSource(UInt64 block_size_, UInt64 offset_, std::optional<UInt64> limit_, UInt64 chunk_step_, const std::string & column_name, UInt64 step_)
NumbersSource(
UInt64 block_size_,
UInt64 offset_,
std::optional<UInt64> end_,
const std::string & column_name,
UInt64 step_in_chunk_,
UInt64 step_between_chunks_)
: ISource(createHeader(column_name))
, block_size(block_size_)
, next(offset_)
, chunk_step(chunk_step_)
, step(step_)
, end(end_)
, step_in_chunk(step_in_chunk_)
, step_between_chunks(step_between_chunks_)
{
if (limit_.has_value())
end = limit_.value() + offset_;
}
String getName() const override { return "Numbers"; }
@ -63,7 +78,10 @@ protected:
{
if (end.value() <= next)
return {};
real_block_size = std::min(block_size, end.value() - next);
auto max_items_to_generate = itemCountInRange(next, *end, step_in_chunk);
real_block_size = std::min(block_size, max_items_to_generate);
}
auto column = ColumnUInt64::create(real_block_size);
ColumnUInt64::Container & vec = column->getData();
@ -73,21 +91,20 @@ protected:
UInt64 * current_end = &vec[real_block_size];
iotaWithStepOptimized(pos, static_cast<size_t>(current_end - pos), curr, step);
iotaWithStepOptimized(pos, static_cast<size_t>(current_end - pos), curr, step_in_chunk);
next += chunk_step;
next += step_between_chunks;
progress(column->size(), column->byteSize());
return {Columns{std::move(column)}, real_block_size};
}
private:
UInt64 block_size;
UInt64 next;
UInt64 chunk_step;
std::optional<UInt64> end; /// not included
UInt64 step;
UInt64 step_in_chunk;
UInt64 step_between_chunks;
};
struct RangeWithStep
@ -548,20 +565,39 @@ Pipe ReadFromSystemNumbersStep::makePipe()
return pipe;
}
const auto end = std::invoke(
[&]() -> std::optional<UInt64>
{
if (numbers_storage.limit.has_value())
return *(numbers_storage.limit) + numbers_storage.offset;
return {};
});
/// Fall back to NumbersSource
/// Range in a single block
const auto block_range = max_block_size * numbers_storage.step;
/// Step between chunks in a single source.
/// It is bigger than block_range in case of multiple threads, because we have to account for other sources as well.
const auto step_between_chunks = num_streams * block_range;
for (size_t i = 0; i < num_streams; ++i)
{
const auto source_offset = i * block_range;
if (numbers_storage.limit.has_value() && *numbers_storage.limit < source_offset)
break;
const auto source_start = numbers_storage.offset + source_offset;
auto source = std::make_shared<NumbersSource>(
max_block_size,
numbers_storage.offset + i * max_block_size * numbers_storage.step,
numbers_storage.limit,
num_streams * max_block_size * numbers_storage.step,
source_start,
end,
numbers_storage.column_name,
numbers_storage.step);
numbers_storage.step,
step_between_chunks);
if (numbers_storage.limit && i == 0)
{
auto rows_appr = (*numbers_storage.limit - 1) / numbers_storage.step + 1;
auto rows_appr = itemCountInRange(numbers_storage.offset, *numbers_storage.limit, numbers_storage.step);
if (limit > 0 && limit < rows_appr)
rows_appr = limit;
source->addTotalRowsApprox(rows_appr);
@ -570,19 +606,6 @@ Pipe ReadFromSystemNumbersStep::makePipe()
pipe.addSource(std::move(source));
}
if (numbers_storage.limit)
{
size_t i = 0;
auto storage_limit = (*numbers_storage.limit - 1) / numbers_storage.step + 1;
/// This formula is how to split 'limit' elements to 'num_streams' chunks almost uniformly.
pipe.addSimpleTransform(
[&](const Block & header)
{
++i;
return std::make_shared<LimitTransform>(header, storage_limit * i / num_streams - storage_limit * (i - 1) / num_streams, 0);
});
}
return pipe;
}

View File

@ -5,6 +5,7 @@
501
50
17928
17928
0
10
13

View File

@ -5,6 +5,7 @@ SELECT count() FROM generate_series(7, 77, 10);
SELECT count() FROM generate_series(0, 1000, 2);
SELECT count() FROM generate_series(0, 999, 20);
SELECT sum(generate_series) FROM generate_series(4, 1008, 4) WHERE generate_series % 7 = 1;
SELECT sum(generate_series) FROM generate_series(4, 1008, 4) WHERE generate_series % 7 = 1 SETTINGS max_block_size = 71;
SELECT * FROM generate_series(5, 4);
SELECT * FROM generate_series(0, 0);

View File

@ -0,0 +1,14 @@
18679 31
0
10
20
30
40
50
60
70
80
90
100
110
4250

View File

@ -0,0 +1,38 @@
--- The following query was buggy before, so let's use it as a test case
WITH
(num > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < num) AND ((num % y) = 0)), range(toUInt64(sqrt(num)) + 1))) = 0) AS is_prime_slow
SELECT
num,
ds,
FROM
(
WITH
arraySum(arrayMap(y -> toUInt8(y), splitByString('', toString(num)))) AS digits_sum
SELECT
1 + (number * 2) AS num,
digits_sum AS ds
FROM numbers_mt(10000)
WHERE ds IN (
WITH
(number > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < number) AND ((number % y) = 0)), range(toUInt64(sqrt(number)) + 1))) = 0) AS is_prime_slow
SELECT number
FROM numbers(180 + 1)
WHERE is_prime_slow
)
)
WHERE is_prime_slow
ORDER BY num ASC
LIMIT 998, 1
SETTINGS max_block_size = 64, max_threads=16;
SELECT number
FROM numbers_mt(120)
WHERE (number % 10) = 0
ORDER BY number ASC
SETTINGS max_block_size = 31, max_threads = 11;
SELECT number
FROM numbers_mt(4242, 9)
WHERE (number % 10) = 0
ORDER BY number ASC
SETTINGS max_block_size = 31, max_threads = 11;

View File

@ -0,0 +1,6 @@
case 1
9900
9910
9920
case 2
9990

View File

@ -0,0 +1,11 @@
SELECT 'case 1';
SELECT number FROM numbers_mt(10000)
WHERE (number % 10) = 0
ORDER BY number ASC
LIMIT 990, 3;
SELECT 'case 2';
SELECT number FROM numbers_mt(10000)
WHERE (number % 10) = 0
ORDER BY number ASC
LIMIT 999, 20 SETTINGS max_block_size = 31;