2020-04-28 13:55:50 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <mutex>
|
|
|
|
#include <condition_variable>
|
|
|
|
|
|
|
|
#include <Common/filesystemHelpers.h>
|
|
|
|
#include <Core/Block.h>
|
|
|
|
#include <Core/SortDescription.h>
|
|
|
|
#include <DataStreams/SizeLimits.h>
|
|
|
|
#include <DataStreams/IBlockStream_fwd.h>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
class TableJoin;
|
|
|
|
class MergeJoinCursor;
|
|
|
|
struct MergeJoinEqualRange;
|
|
|
|
|
2020-05-04 20:15:38 +00:00
|
|
|
class VolumeJBOD;
|
|
|
|
using VolumeJBODPtr = std::shared_ptr<VolumeJBOD>;
|
2020-04-28 13:55:50 +00:00
|
|
|
|
|
|
|
struct SortedBlocksWriter
|
|
|
|
{
|
|
|
|
using TmpFilePtr = std::unique_ptr<TemporaryFile>;
|
|
|
|
using SortedFiles = std::vector<TmpFilePtr>;
|
|
|
|
|
|
|
|
struct Blocks
|
|
|
|
{
|
|
|
|
BlocksList blocks;
|
|
|
|
size_t row_count = 0;
|
|
|
|
size_t bytes = 0;
|
|
|
|
|
|
|
|
bool empty() const { return blocks.empty(); }
|
|
|
|
|
|
|
|
void insert(Block && block)
|
|
|
|
{
|
|
|
|
countBlockSize(block);
|
|
|
|
blocks.emplace_back(std::move(block));
|
|
|
|
}
|
|
|
|
|
|
|
|
void countBlockSize(const Block & block)
|
|
|
|
{
|
|
|
|
row_count += block.rows();
|
|
|
|
bytes += block.bytes();
|
|
|
|
}
|
|
|
|
|
|
|
|
void clear()
|
|
|
|
{
|
|
|
|
blocks.clear();
|
|
|
|
row_count = 0;
|
|
|
|
bytes = 0;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-06-16 20:13:18 +00:00
|
|
|
struct PremergedFiles
|
|
|
|
{
|
|
|
|
SortedFiles files;
|
|
|
|
BlockInputStreams streams;
|
|
|
|
};
|
|
|
|
|
2020-04-28 13:55:50 +00:00
|
|
|
static constexpr const size_t num_streams = 2;
|
|
|
|
|
|
|
|
std::mutex insert_mutex;
|
|
|
|
std::condition_variable flush_condvar;
|
|
|
|
const SizeLimits & size_limits;
|
2020-05-04 20:15:38 +00:00
|
|
|
VolumeJBODPtr volume;
|
2020-06-16 20:13:18 +00:00
|
|
|
Block sample_block;
|
2020-04-28 13:55:50 +00:00
|
|
|
const SortDescription & sort_description;
|
2020-06-16 20:13:18 +00:00
|
|
|
Blocks inserted_blocks;
|
2020-04-28 13:55:50 +00:00
|
|
|
const size_t rows_in_block;
|
|
|
|
const size_t num_files_for_merge;
|
|
|
|
const String & codec;
|
|
|
|
SortedFiles sorted_files;
|
|
|
|
size_t row_count_in_flush = 0;
|
|
|
|
size_t bytes_in_flush = 0;
|
|
|
|
size_t flush_number = 0;
|
|
|
|
size_t flush_inflight = 0;
|
|
|
|
|
2020-06-16 20:13:18 +00:00
|
|
|
SortedBlocksWriter(const SizeLimits & size_limits_, VolumeJBODPtr volume_, const Block & sample_block_,
|
|
|
|
const SortDescription & description, size_t rows_in_block_, size_t num_files_to_merge_, const String & codec_)
|
2020-04-28 13:55:50 +00:00
|
|
|
: size_limits(size_limits_)
|
|
|
|
, volume(volume_)
|
|
|
|
, sample_block(sample_block_)
|
|
|
|
, sort_description(description)
|
|
|
|
, rows_in_block(rows_in_block_)
|
|
|
|
, num_files_for_merge(num_files_to_merge_)
|
|
|
|
, codec(codec_)
|
2020-06-16 20:13:18 +00:00
|
|
|
{}
|
|
|
|
|
|
|
|
void addBlocks(const Blocks & blocks)
|
2020-04-28 13:55:50 +00:00
|
|
|
{
|
2020-06-16 20:13:18 +00:00
|
|
|
sorted_files.emplace_back(flush(blocks.blocks));
|
2020-04-28 13:55:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
String getPath() const;
|
|
|
|
BlockInputStreamPtr streamFromFile(const TmpFilePtr & file) const;
|
|
|
|
|
|
|
|
void insert(Block && block);
|
|
|
|
TmpFilePtr flush(const BlocksList & blocks) const;
|
2020-06-16 20:13:18 +00:00
|
|
|
PremergedFiles premerge();
|
2020-04-28 13:55:50 +00:00
|
|
|
SortedFiles finishMerge(std::function<void(const Block &)> callback = [](const Block &){});
|
|
|
|
};
|
|
|
|
|
2020-06-16 20:13:18 +00:00
|
|
|
|
|
|
|
class SortedBlocksBuffer
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
SortedBlocksBuffer(const SortDescription & sort_description_, size_t max_bytes_)
|
|
|
|
: max_bytes(max_bytes_)
|
|
|
|
, current_bytes(0)
|
|
|
|
, sort_description(sort_description_)
|
|
|
|
{}
|
|
|
|
|
|
|
|
Block exchange(Block && block);
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::mutex mutex;
|
|
|
|
size_t max_bytes;
|
|
|
|
size_t current_bytes;
|
|
|
|
Blocks buffer;
|
|
|
|
const SortDescription & sort_description;
|
|
|
|
|
|
|
|
Block mergeBlocks(Blocks &&) const;
|
|
|
|
};
|
|
|
|
|
2020-04-28 13:55:50 +00:00
|
|
|
}
|