ClickHouse/dbms/Storages/System/StorageSystemNumbers.cpp

178 lines
4.9 KiB
C++
Raw Normal View History

#include <Common/Exception.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataStreams/IBlockInputStream.h>
#include <DataStreams/LimitBlockInputStream.h>
#include <Storages/System/StorageSystemNumbers.h>
2010-03-04 19:20:28 +00:00
2019-11-15 16:23:48 +00:00
#include <Processors/Sources/SourceWithProgress.h>
#include <Processors/Pipe.h>
#include <Processors/LimitTransform.h>
2019-08-20 19:53:27 +00:00
2010-03-04 19:20:28 +00:00
namespace DB
{
2019-08-20 19:53:27 +00:00
namespace
{
2019-11-15 16:23:48 +00:00
class NumbersSource : public SourceWithProgress
2010-03-04 19:20:28 +00:00
{
public:
2019-11-15 16:23:48 +00:00
NumbersSource(UInt64 block_size_, UInt64 offset_, UInt64 step_)
: SourceWithProgress(createHeader()), block_size(block_size_), next(offset_), step(step_) {}
String getName() const override { return "Numbers"; }
protected:
2019-11-15 16:23:48 +00:00
Chunk generate() override
{
auto column = ColumnUInt64::create(block_size);
ColumnUInt64::Container & vec = column->getData();
size_t curr = next; /// The local variable for some reason works faster (>20%) than member of class.
UInt64 * pos = vec.data(); /// This also accelerates the code.
UInt64 * end = &vec[block_size];
while (pos < end)
*pos++ = curr++;
next += step;
2019-11-15 16:23:48 +00:00
2020-01-27 10:05:57 +00:00
progress({column->size(), column->byteSize()});
2019-11-15 16:23:48 +00:00
return { Columns {std::move(column)}, block_size };
}
2019-11-15 16:23:48 +00:00
private:
2019-02-10 16:22:38 +00:00
UInt64 block_size;
UInt64 next;
UInt64 step;
2019-11-15 16:23:48 +00:00
static Block createHeader()
{
return { ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number") };
}
};
2019-08-20 19:53:27 +00:00
struct NumbersMultiThreadedState
2019-08-20 10:28:20 +00:00
{
std::atomic<UInt64> counter;
2019-08-20 19:53:27 +00:00
explicit NumbersMultiThreadedState(UInt64 offset) : counter(offset) {}
2019-08-20 10:28:20 +00:00
};
2019-08-20 19:53:27 +00:00
using NumbersMultiThreadedStatePtr = std::shared_ptr<NumbersMultiThreadedState>;
2019-08-20 10:28:20 +00:00
2019-11-15 16:23:48 +00:00
class NumbersMultiThreadedSource : public SourceWithProgress
2019-08-20 10:28:20 +00:00
{
public:
2019-11-15 16:23:48 +00:00
NumbersMultiThreadedSource(NumbersMultiThreadedStatePtr state_, UInt64 block_size_, UInt64 max_counter_)
: SourceWithProgress(createHeader())
, state(std::move(state_))
, block_size(block_size_)
, max_counter(max_counter_) {}
2019-08-20 10:28:20 +00:00
String getName() const override { return "NumbersMt"; }
protected:
2019-11-15 16:23:48 +00:00
Chunk generate() override
2019-08-20 10:28:20 +00:00
{
if (block_size == 0)
return {};
2019-08-20 19:53:27 +00:00
UInt64 curr = state->counter.fetch_add(block_size, std::memory_order_acquire);
2019-08-20 10:28:20 +00:00
if (curr >= max_counter)
return {};
if (curr + block_size > max_counter)
block_size = max_counter - curr;
auto column = ColumnUInt64::create(block_size);
ColumnUInt64::Container & vec = column->getData();
UInt64 * pos = vec.data();
UInt64 * end = &vec[block_size];
while (pos < end)
*pos++ = curr++;
2020-01-27 10:05:57 +00:00
progress({column->size(), column->byteSize()});
2019-11-15 16:23:48 +00:00
return { Columns {std::move(column)}, block_size };
2019-08-20 10:28:20 +00:00
}
private:
2019-08-20 19:53:27 +00:00
NumbersMultiThreadedStatePtr state;
2019-08-20 10:28:20 +00:00
UInt64 block_size;
UInt64 max_counter;
2019-11-15 16:23:48 +00:00
2020-03-18 00:57:00 +00:00
static Block createHeader()
2019-11-15 16:23:48 +00:00
{
return { ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number") };
}
2019-08-20 10:28:20 +00:00
};
2019-08-20 19:53:27 +00:00
}
2019-08-20 10:28:20 +00:00
2020-03-10 19:36:17 +00:00
StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool multithreaded_, std::optional<UInt64> limit_, UInt64 offset_, bool even_distribution_)
: IStorage(table_id), multithreaded(multithreaded_), even_distribution(even_distribution_), limit(limit_), offset(offset_)
2010-03-04 19:20:28 +00:00
{
setColumns(ColumnsDescription({{"number", std::make_shared<DataTypeUInt64>()}}));
2011-08-15 02:24:44 +00:00
}
2010-03-04 19:20:28 +00:00
Pipes StorageSystemNumbers::read(
const Names & column_names,
2017-12-02 02:47:12 +00:00
const SelectQueryInfo &,
const Context & /*context*/,
QueryProcessingStage::Enum /*processed_stage*/,
size_t max_block_size,
2017-06-02 15:54:39 +00:00
unsigned num_streams)
2011-08-15 02:24:44 +00:00
{
check(column_names);
if (limit && *limit < max_block_size)
{
max_block_size = static_cast<size_t>(*limit);
multithreaded = false;
}
if (!multithreaded)
2017-06-02 15:54:39 +00:00
num_streams = 1;
2019-11-15 16:23:48 +00:00
Pipes res;
res.reserve(num_streams);
2019-08-20 10:28:20 +00:00
if (num_streams > 1 && !even_distribution && *limit)
{
2019-08-20 19:53:27 +00:00
auto state = std::make_shared<NumbersMultiThreadedState>(offset);
2019-08-20 10:28:20 +00:00
UInt64 max_counter = offset + *limit;
for (size_t i = 0; i < num_streams; ++i)
2019-11-15 16:23:48 +00:00
res.emplace_back(std::make_shared<NumbersMultiThreadedSource>(state, max_block_size, max_counter));
2019-08-20 10:28:20 +00:00
return res;
}
2017-06-02 15:54:39 +00:00
for (size_t i = 0; i < num_streams; ++i)
{
2019-11-15 16:23:48 +00:00
auto source = std::make_shared<NumbersSource>(max_block_size, offset + i * max_block_size, num_streams * max_block_size);
if (limit && i == 0)
source->addTotalRowsApprox(*limit);
res.emplace_back(std::move(source));
2019-11-15 16:23:48 +00:00
if (limit)
{
/// This formula is how to split 'limit' elements to 'num_streams' chunks almost uniformly.
res.back().addSimpleTransform(std::make_shared<LimitTransform>(
res.back().getHeader(), *limit * (i + 1) / num_streams - *limit * i / num_streams, 0));
2019-11-15 16:23:48 +00:00
}
}
return res;
2010-03-04 19:20:28 +00:00
}
}