2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/Exception.h>
|
|
|
|
#include <Columns/ColumnsNumber.h>
|
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
2019-01-23 14:48:50 +00:00
|
|
|
#include <DataStreams/IBlockInputStream.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/System/StorageSystemNumbers.h>
|
2010-03-04 19:20:28 +00:00
|
|
|
|
2019-11-15 16:23:48 +00:00
|
|
|
#include <Processors/Sources/SourceWithProgress.h>
|
|
|
|
#include <Processors/Pipe.h>
|
|
|
|
#include <Processors/LimitTransform.h>
|
2019-08-20 19:53:27 +00:00
|
|
|
|
2010-03-04 19:20:28 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2019-08-20 19:53:27 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
2019-11-15 16:23:48 +00:00
|
|
|
class NumbersSource : public SourceWithProgress
|
2010-03-04 19:20:28 +00:00
|
|
|
{
|
2014-08-22 17:26:43 +00:00
|
|
|
public:
|
2019-11-15 16:23:48 +00:00
|
|
|
NumbersSource(UInt64 block_size_, UInt64 offset_, UInt64 step_)
|
|
|
|
: SourceWithProgress(createHeader()), block_size(block_size_), next(offset_), step(step_) {}
|
2014-08-22 17:26:43 +00:00
|
|
|
|
2018-01-06 18:10:44 +00:00
|
|
|
String getName() const override { return "Numbers"; }
|
|
|
|
|
2014-08-22 17:26:43 +00:00
|
|
|
protected:
|
2019-11-15 16:23:48 +00:00
|
|
|
Chunk generate() override
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-12-14 01:43:19 +00:00
|
|
|
auto column = ColumnUInt64::create(block_size);
|
2017-12-15 21:32:25 +00:00
|
|
|
ColumnUInt64::Container & vec = column->getData();
|
2014-08-22 17:26:43 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t curr = next; /// The local variable for some reason works faster (>20%) than member of class.
|
2018-09-02 03:00:04 +00:00
|
|
|
UInt64 * pos = vec.data(); /// This also accelerates the code.
|
2017-04-01 07:20:54 +00:00
|
|
|
UInt64 * end = &vec[block_size];
|
|
|
|
while (pos < end)
|
|
|
|
*pos++ = curr++;
|
2014-08-22 17:26:43 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
next += step;
|
2019-11-15 16:23:48 +00:00
|
|
|
|
2020-01-27 10:05:57 +00:00
|
|
|
progress({column->size(), column->byteSize()});
|
|
|
|
|
2019-11-15 16:23:48 +00:00
|
|
|
return { Columns {std::move(column)}, block_size };
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2019-11-15 16:23:48 +00:00
|
|
|
|
2014-08-22 17:26:43 +00:00
|
|
|
private:
|
2019-02-10 16:22:38 +00:00
|
|
|
UInt64 block_size;
|
2017-04-01 07:20:54 +00:00
|
|
|
UInt64 next;
|
|
|
|
UInt64 step;
|
2019-11-15 16:23:48 +00:00
|
|
|
|
|
|
|
static Block createHeader()
|
|
|
|
{
|
|
|
|
return { ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number") };
|
|
|
|
}
|
2014-08-22 17:26:43 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2019-08-20 19:53:27 +00:00
|
|
|
struct NumbersMultiThreadedState
|
2019-08-20 10:28:20 +00:00
|
|
|
{
|
|
|
|
std::atomic<UInt64> counter;
|
2019-08-20 19:53:27 +00:00
|
|
|
explicit NumbersMultiThreadedState(UInt64 offset) : counter(offset) {}
|
2019-08-20 10:28:20 +00:00
|
|
|
};
|
|
|
|
|
2019-08-20 19:53:27 +00:00
|
|
|
using NumbersMultiThreadedStatePtr = std::shared_ptr<NumbersMultiThreadedState>;
|
2019-08-20 10:28:20 +00:00
|
|
|
|
2019-11-15 16:23:48 +00:00
|
|
|
class NumbersMultiThreadedSource : public SourceWithProgress
|
2019-08-20 10:28:20 +00:00
|
|
|
{
|
|
|
|
public:
|
2019-11-15 16:23:48 +00:00
|
|
|
NumbersMultiThreadedSource(NumbersMultiThreadedStatePtr state_, UInt64 block_size_, UInt64 max_counter_)
|
|
|
|
: SourceWithProgress(createHeader())
|
|
|
|
, state(std::move(state_))
|
|
|
|
, block_size(block_size_)
|
|
|
|
, max_counter(max_counter_) {}
|
2019-08-20 10:28:20 +00:00
|
|
|
|
|
|
|
String getName() const override { return "NumbersMt"; }
|
|
|
|
|
|
|
|
protected:
|
2019-11-15 16:23:48 +00:00
|
|
|
Chunk generate() override
|
2019-08-20 10:28:20 +00:00
|
|
|
{
|
|
|
|
if (block_size == 0)
|
|
|
|
return {};
|
|
|
|
|
2019-08-20 19:53:27 +00:00
|
|
|
UInt64 curr = state->counter.fetch_add(block_size, std::memory_order_acquire);
|
2019-08-20 10:28:20 +00:00
|
|
|
|
|
|
|
if (curr >= max_counter)
|
|
|
|
return {};
|
|
|
|
|
|
|
|
if (curr + block_size > max_counter)
|
|
|
|
block_size = max_counter - curr;
|
|
|
|
|
|
|
|
auto column = ColumnUInt64::create(block_size);
|
|
|
|
ColumnUInt64::Container & vec = column->getData();
|
|
|
|
|
|
|
|
UInt64 * pos = vec.data();
|
|
|
|
UInt64 * end = &vec[block_size];
|
|
|
|
while (pos < end)
|
|
|
|
*pos++ = curr++;
|
|
|
|
|
2020-01-27 10:05:57 +00:00
|
|
|
progress({column->size(), column->byteSize()});
|
|
|
|
|
2019-11-15 16:23:48 +00:00
|
|
|
return { Columns {std::move(column)}, block_size };
|
2019-08-20 10:28:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2019-08-20 19:53:27 +00:00
|
|
|
NumbersMultiThreadedStatePtr state;
|
2019-08-20 10:28:20 +00:00
|
|
|
|
|
|
|
UInt64 block_size;
|
|
|
|
UInt64 max_counter;
|
2019-11-15 16:23:48 +00:00
|
|
|
|
2020-03-18 00:57:00 +00:00
|
|
|
static Block createHeader()
|
2019-11-15 16:23:48 +00:00
|
|
|
{
|
|
|
|
return { ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), "number") };
|
|
|
|
}
|
2019-08-20 10:28:20 +00:00
|
|
|
};
|
|
|
|
|
2019-08-20 19:53:27 +00:00
|
|
|
}
|
|
|
|
|
2019-08-20 10:28:20 +00:00
|
|
|
|
2020-03-10 19:36:17 +00:00
|
|
|
StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool multithreaded_, std::optional<UInt64> limit_, UInt64 offset_, bool even_distribution_)
|
|
|
|
: IStorage(table_id), multithreaded(multithreaded_), even_distribution(even_distribution_), limit(limit_), offset(offset_)
|
2010-03-04 19:20:28 +00:00
|
|
|
{
|
2020-06-19 15:39:41 +00:00
|
|
|
StorageInMemoryMetadata storage_metadata;
|
|
|
|
storage_metadata.setColumns(ColumnsDescription({{"number", std::make_shared<DataTypeUInt64>()}}));
|
|
|
|
setInMemoryMetadata(storage_metadata);
|
2011-08-15 02:24:44 +00:00
|
|
|
}
|
2010-03-04 19:20:28 +00:00
|
|
|
|
2020-08-06 12:24:05 +00:00
|
|
|
Pipe StorageSystemNumbers::read(
|
2017-04-01 07:20:54 +00:00
|
|
|
const Names & column_names,
|
2020-06-17 14:32:25 +00:00
|
|
|
const StorageMetadataPtr & metadata_snapshot,
|
2020-09-20 17:52:17 +00:00
|
|
|
SelectQueryInfo &,
|
2018-09-08 11:29:23 +00:00
|
|
|
const Context & /*context*/,
|
|
|
|
QueryProcessingStage::Enum /*processed_stage*/,
|
2019-02-18 23:38:44 +00:00
|
|
|
size_t max_block_size,
|
2017-06-02 15:54:39 +00:00
|
|
|
unsigned num_streams)
|
2011-08-15 02:24:44 +00:00
|
|
|
{
|
2020-06-19 17:17:13 +00:00
|
|
|
metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
|
2014-08-22 17:26:43 +00:00
|
|
|
|
2019-02-05 17:05:33 +00:00
|
|
|
if (limit && *limit < max_block_size)
|
2017-06-10 09:04:31 +00:00
|
|
|
{
|
2019-02-19 01:53:58 +00:00
|
|
|
max_block_size = static_cast<size_t>(*limit);
|
2017-06-10 09:04:31 +00:00
|
|
|
multithreaded = false;
|
|
|
|
}
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (!multithreaded)
|
2017-06-02 15:54:39 +00:00
|
|
|
num_streams = 1;
|
2014-08-22 17:26:43 +00:00
|
|
|
|
2020-08-06 12:24:05 +00:00
|
|
|
Pipe pipe;
|
2019-08-20 10:28:20 +00:00
|
|
|
|
|
|
|
if (num_streams > 1 && !even_distribution && *limit)
|
|
|
|
{
|
2019-08-20 19:53:27 +00:00
|
|
|
auto state = std::make_shared<NumbersMultiThreadedState>(offset);
|
2019-08-20 10:28:20 +00:00
|
|
|
UInt64 max_counter = offset + *limit;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num_streams; ++i)
|
2020-08-06 12:24:05 +00:00
|
|
|
pipe.addSource(std::make_shared<NumbersMultiThreadedSource>(state, max_block_size, max_counter));
|
2019-08-20 10:28:20 +00:00
|
|
|
|
2020-08-06 12:24:05 +00:00
|
|
|
return pipe;
|
2019-08-20 10:28:20 +00:00
|
|
|
}
|
|
|
|
|
2017-06-02 15:54:39 +00:00
|
|
|
for (size_t i = 0; i < num_streams; ++i)
|
2017-06-10 09:04:31 +00:00
|
|
|
{
|
2019-11-15 16:23:48 +00:00
|
|
|
auto source = std::make_shared<NumbersSource>(max_block_size, offset + i * max_block_size, num_streams * max_block_size);
|
|
|
|
|
|
|
|
if (limit && i == 0)
|
|
|
|
source->addTotalRowsApprox(*limit);
|
|
|
|
|
2020-08-06 12:24:05 +00:00
|
|
|
pipe.addSource(std::move(source));
|
|
|
|
}
|
2014-08-22 17:26:43 +00:00
|
|
|
|
2020-08-06 12:24:05 +00:00
|
|
|
if (limit)
|
|
|
|
{
|
|
|
|
size_t i = 0;
|
|
|
|
/// This formula is how to split 'limit' elements to 'num_streams' chunks almost uniformly.
|
|
|
|
pipe.addSimpleTransform([&](const Block & header)
|
2019-11-15 16:23:48 +00:00
|
|
|
{
|
2020-08-06 12:24:05 +00:00
|
|
|
++i;
|
|
|
|
return std::make_shared<LimitTransform>(
|
|
|
|
header, *limit * i / num_streams - *limit * (i - 1) / num_streams, 0);
|
|
|
|
});
|
2017-06-10 09:04:31 +00:00
|
|
|
}
|
|
|
|
|
2020-08-06 12:24:05 +00:00
|
|
|
return pipe;
|
2010-03-04 19:20:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|