ClickHouse/dbms/DataStreams/narrowBlockInputStreams.cpp

73 lines
1.7 KiB
C++
Raw Normal View History

#include <random>
2019-07-28 15:30:38 +00:00
#include <Common/thread_local_rng.h>
#include <DataStreams/ConcatBlockInputStream.h>
#include <Processors/ConcatProcessor.h>
#include <Processors/Pipe.h>
2019-12-15 06:34:43 +00:00
#include "narrowBlockInputStreams.h"
2012-06-25 03:04:34 +00:00
namespace DB
{
namespace
{
using Distribution = std::vector<size_t>;
Distribution getDistribution(size_t from, size_t to)
{
Distribution distribution(from);
for (size_t i = 0; i < from; ++i)
distribution[i] = i % to;
std::shuffle(distribution.begin(), distribution.end(), thread_local_rng);
return distribution;
}
}
2012-06-25 03:04:34 +00:00
BlockInputStreams narrowBlockInputStreams(BlockInputStreams & inputs, size_t width)
{
size_t size = inputs.size();
if (size <= width)
return inputs;
2012-06-25 03:04:34 +00:00
std::vector<BlockInputStreams> partitions(width);
2012-06-25 03:04:34 +00:00
auto distribution = getDistribution(size, width);
2012-08-21 18:34:55 +00:00
for (size_t i = 0; i < size; ++i)
partitions[distribution[i]].push_back(inputs[i]);
2012-06-25 03:04:34 +00:00
BlockInputStreams res(width);
for (size_t i = 0; i < width; ++i)
res[i] = std::make_shared<ConcatBlockInputStream>(partitions[i]);
2012-06-25 03:04:34 +00:00
return res;
2012-06-25 03:04:34 +00:00
}
Pipes narrowPipes(Pipes pipes, size_t width)
{
size_t size = pipes.size();
if (size <= width)
return pipes;
std::vector<Pipes> partitions(width);
auto distribution = getDistribution(size, width);
for (size_t i = 0; i < size; ++i)
partitions[distribution[i]].emplace_back(std::move(pipes[i]));
Pipes res;
res.reserve(width);
for (size_t i = 0; i < width; ++i)
{
auto processor = std::make_shared<ConcatProcessor>(partitions[i].at(0).getHeader(), partitions[i].size());
res.emplace_back(std::move(partitions[i]), std::move(processor));
}
return res;
}
2012-06-25 03:04:34 +00:00
}