ClickHouse/src/Storages/MergeTree/MergeTreeDataWriter.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

113 lines
3.1 KiB
C++
Raw Normal View History

2013-04-24 10:31:32 +00:00
#pragma once
#include <Core/Block.h>
#include <IO/WriteBufferFromFile.h>
2018-12-28 18:15:26 +00:00
#include <Compression/CompressedWriteBuffer.h>
2013-09-15 01:10:16 +00:00
#include <Columns/ColumnsNumber.h>
2013-09-15 01:10:16 +00:00
#include <Interpreters/sortBlock.h>
2013-09-15 01:10:16 +00:00
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergedBlockOutputStream.h>
2013-04-24 10:31:32 +00:00
2013-09-15 01:10:16 +00:00
2013-04-24 10:31:32 +00:00
namespace DB
{
2014-03-09 17:36:01 +00:00
struct BlockWithPartition
2014-03-13 17:44:00 +00:00
{
Block block;
Row partition;
BlockWithPartition(Block && block_, Row && partition_)
: block(block_), partition(std::move(partition_))
2016-09-04 17:12:07 +00:00
{
}
2014-03-13 17:44:00 +00:00
};
using BlocksWithPartition = std::vector<BlockWithPartition>;
2014-03-13 17:44:00 +00:00
2018-05-07 02:01:11 +00:00
/** Writes new parts of data to the merge tree.
2014-03-13 12:48:07 +00:00
*/
class MergeTreeDataWriter
{
public:
2022-06-23 12:01:26 +00:00
explicit MergeTreeDataWriter(MergeTreeData & data_)
: data(data_)
, log(&Poco::Logger::get(data.getLogName() + " (Writer)"))
{}
2014-03-13 12:48:07 +00:00
2016-09-04 17:12:07 +00:00
/** Split the block to blocks, each of them must be written as separate part.
* (split rows by partition)
2016-09-04 17:12:07 +00:00
* Works deterministically: if same block was passed, function will return same result in same order.
2014-03-13 12:48:07 +00:00
*/
static BlocksWithPartition splitBlockIntoParts(const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context);
2014-03-13 12:48:07 +00:00
/// This structure contains not completely written temporary part.
/// Some writes may happen asynchronously, e.g. for blob storages.
/// You should call finalize() to wait until all data is written.
struct TemporaryPart
{
MergeTreeData::MutableDataPartPtr part;
struct Stream
{
std::unique_ptr<MergedBlockOutputStream> stream;
MergedBlockOutputStream::Finalizer finalizer;
};
std::vector<Stream> streams;
2022-08-09 21:16:08 +00:00
scope_guard temporary_directory_lock;
void finalize();
};
/** All rows must correspond to same partition.
* Returns part with unique name starting with 'tmp_', yet not added to MergeTreeData.
*/
TemporaryPart writeTempPart(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, ContextPtr context);
2021-08-26 11:01:15 +00:00
/// For insertion.
static TemporaryPart writeProjectionPart(
2022-10-29 14:26:34 +00:00
const MergeTreeData & data,
2021-08-26 11:01:15 +00:00
Poco::Logger * log,
Block block,
const ProjectionDescription & projection,
2022-10-24 14:44:22 +00:00
IMergeTreeDataPart * parent_part);
2021-08-26 11:01:15 +00:00
/// For mutation: MATERIALIZE PROJECTION.
static TemporaryPart writeTempProjectionPart(
2022-10-29 14:26:34 +00:00
const MergeTreeData & data,
Poco::Logger * log,
Block block,
const ProjectionDescription & projection,
2022-10-24 14:44:22 +00:00
IMergeTreeDataPart * parent_part,
size_t block_num);
static Block mergeBlock(
const Block & block,
SortDescription sort_description,
const Names & partition_key_columns,
IColumn::Permutation *& permutation,
const MergeTreeData::MergingParams & merging_params);
2014-03-13 12:48:07 +00:00
private:
static TemporaryPart writeProjectionPartImpl(
const String & part_name,
2021-08-26 11:01:15 +00:00
bool is_temp,
2022-10-24 14:44:22 +00:00
IMergeTreeDataPart * parent_part,
2021-08-26 11:01:15 +00:00
const MergeTreeData & data,
Poco::Logger * log,
Block block,
const ProjectionDescription & projection);
2014-03-13 12:48:07 +00:00
MergeTreeData & data;
2020-05-30 21:57:37 +00:00
Poco::Logger * log;
2013-04-24 10:31:32 +00:00
};
2014-03-13 17:44:00 +00:00
2013-04-24 10:31:32 +00:00
}