2013-04-24 10:31:32 +00:00
|
|
|
#pragma once
|
|
|
|
|
2017-08-19 18:11:20 +00:00
|
|
|
#include <Core/Block.h>
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/WriteBufferFromFile.h>
|
2018-12-28 18:15:26 +00:00
|
|
|
#include <Compression/CompressedWriteBuffer.h>
|
2013-09-15 01:10:16 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Columns/ColumnsNumber.h>
|
2013-09-15 01:10:16 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Interpreters/sortBlock.h>
|
2013-09-15 01:10:16 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeData.h>
|
2022-02-01 10:36:51 +00:00
|
|
|
#include <Storages/MergeTree/MergedBlockOutputStream.h>
|
2013-04-24 10:31:32 +00:00
|
|
|
|
2013-09-15 01:10:16 +00:00
|
|
|
|
2013-04-24 10:31:32 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
2014-03-09 17:36:01 +00:00
|
|
|
|
2017-08-18 19:46:26 +00:00
|
|
|
struct BlockWithPartition
|
2014-03-13 17:44:00 +00:00
|
|
|
{
|
|
|
|
Block block;
|
2017-08-19 18:11:20 +00:00
|
|
|
Row partition;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-08-19 18:11:20 +00:00
|
|
|
BlockWithPartition(Block && block_, Row && partition_)
|
2017-08-18 19:46:26 +00:00
|
|
|
: block(block_), partition(std::move(partition_))
|
2016-09-04 17:12:07 +00:00
|
|
|
{
|
|
|
|
}
|
2014-03-13 17:44:00 +00:00
|
|
|
};
|
|
|
|
|
2017-08-19 18:11:20 +00:00
|
|
|
using BlocksWithPartition = std::vector<BlockWithPartition>;
|
2014-03-13 17:44:00 +00:00
|
|
|
|
2018-05-07 02:01:11 +00:00
|
|
|
/** Writes new parts of data to the merge tree.
|
2014-03-13 12:48:07 +00:00
|
|
|
*/
|
|
|
|
class MergeTreeDataWriter
|
|
|
|
{
|
|
|
|
public:
|
2022-06-23 12:01:26 +00:00
|
|
|
explicit MergeTreeDataWriter(MergeTreeData & data_)
|
|
|
|
: data(data_)
|
|
|
|
, log(&Poco::Logger::get(data.getLogName() + " (Writer)"))
|
|
|
|
{}
|
2014-03-13 12:48:07 +00:00
|
|
|
|
2016-09-04 17:12:07 +00:00
|
|
|
/** Split the block to blocks, each of them must be written as separate part.
|
2017-08-14 18:16:11 +00:00
|
|
|
* (split rows by partition)
|
2016-09-04 17:12:07 +00:00
|
|
|
* Works deterministically: if same block was passed, function will return same result in same order.
|
2014-03-13 12:48:07 +00:00
|
|
|
*/
|
2021-05-21 16:14:01 +00:00
|
|
|
static BlocksWithPartition splitBlockIntoParts(const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot, ContextPtr context);
|
2014-03-13 12:48:07 +00:00
|
|
|
|
2022-02-01 10:36:51 +00:00
|
|
|
/// This structure contains not completely written temporary part.
|
|
|
|
/// Some writes may happen asynchronously, e.g. for blob storages.
|
|
|
|
/// You should call finalize() to wait until all data is written.
|
2022-03-31 13:26:32 +00:00
|
|
|
|
2022-02-01 10:36:51 +00:00
|
|
|
struct TemporaryPart
|
|
|
|
{
|
|
|
|
MergeTreeData::MutableDataPartPtr part;
|
|
|
|
|
|
|
|
struct Stream
|
|
|
|
{
|
|
|
|
std::unique_ptr<MergedBlockOutputStream> stream;
|
|
|
|
MergedBlockOutputStream::Finalizer finalizer;
|
|
|
|
};
|
|
|
|
|
|
|
|
std::vector<Stream> streams;
|
|
|
|
|
2022-08-09 21:16:08 +00:00
|
|
|
scope_guard temporary_directory_lock;
|
|
|
|
|
2022-02-01 10:36:51 +00:00
|
|
|
void finalize();
|
|
|
|
};
|
|
|
|
|
2022-03-31 13:26:32 +00:00
|
|
|
/** All rows must correspond to same partition.
|
|
|
|
* Returns part with unique name starting with 'tmp_', yet not added to MergeTreeData.
|
|
|
|
*/
|
2022-02-01 10:36:51 +00:00
|
|
|
TemporaryPart writeTempPart(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot, ContextPtr context);
|
2021-02-10 14:12:49 +00:00
|
|
|
|
2021-08-26 11:01:15 +00:00
|
|
|
/// For insertion.
|
2022-02-01 10:36:51 +00:00
|
|
|
static TemporaryPart writeProjectionPart(
|
2022-10-29 14:26:34 +00:00
|
|
|
const MergeTreeData & data,
|
2021-08-26 11:01:15 +00:00
|
|
|
Poco::Logger * log,
|
|
|
|
Block block,
|
|
|
|
const ProjectionDescription & projection,
|
2022-10-24 14:44:22 +00:00
|
|
|
IMergeTreeDataPart * parent_part);
|
2021-02-10 14:12:49 +00:00
|
|
|
|
2021-08-26 11:01:15 +00:00
|
|
|
/// For mutation: MATERIALIZE PROJECTION.
|
2022-02-01 10:36:51 +00:00
|
|
|
static TemporaryPart writeTempProjectionPart(
|
2022-10-29 14:26:34 +00:00
|
|
|
const MergeTreeData & data,
|
2021-02-10 14:12:49 +00:00
|
|
|
Poco::Logger * log,
|
|
|
|
Block block,
|
|
|
|
const ProjectionDescription & projection,
|
2022-10-24 14:44:22 +00:00
|
|
|
IMergeTreeDataPart * parent_part,
|
2021-02-10 14:12:49 +00:00
|
|
|
size_t block_num);
|
|
|
|
|
2021-11-26 14:45:56 +00:00
|
|
|
static Block mergeBlock(
|
|
|
|
const Block & block,
|
|
|
|
SortDescription sort_description,
|
|
|
|
const Names & partition_key_columns,
|
|
|
|
IColumn::Permutation *& permutation,
|
|
|
|
const MergeTreeData::MergingParams & merging_params);
|
2014-03-13 12:48:07 +00:00
|
|
|
|
|
|
|
private:
|
2022-02-01 10:36:51 +00:00
|
|
|
static TemporaryPart writeProjectionPartImpl(
|
2021-11-26 14:45:56 +00:00
|
|
|
const String & part_name,
|
2021-08-26 11:01:15 +00:00
|
|
|
bool is_temp,
|
2022-10-24 14:44:22 +00:00
|
|
|
IMergeTreeDataPart * parent_part,
|
2021-08-26 11:01:15 +00:00
|
|
|
const MergeTreeData & data,
|
2021-02-10 14:12:49 +00:00
|
|
|
Poco::Logger * log,
|
|
|
|
Block block,
|
2021-11-26 14:45:56 +00:00
|
|
|
const ProjectionDescription & projection);
|
2021-02-10 14:12:49 +00:00
|
|
|
|
2014-03-13 12:48:07 +00:00
|
|
|
MergeTreeData & data;
|
2020-05-30 21:57:37 +00:00
|
|
|
Poco::Logger * log;
|
2013-04-24 10:31:32 +00:00
|
|
|
};
|
2014-03-13 17:44:00 +00:00
|
|
|
|
2013-04-24 10:31:32 +00:00
|
|
|
}
|