ClickHouse/dbms/src/Storages/MergeTree/IMergeTreeDataPartWriter.h

96 lines
2.9 KiB
C++
Raw Normal View History

2019-10-21 15:33:59 +00:00
#pragma once
2019-10-16 18:27:53 +00:00
#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
#include <Storages/MergeTree/MergeTreeIndexGranularityInfo.h>
#include <IO/WriteBufferFromFile.h>
2019-10-19 16:49:36 +00:00
#include <IO/WriteBufferFromFileBase.h>
2019-10-16 18:27:53 +00:00
#include <Compression/CompressedWriteBuffer.h>
#include <IO/HashingWriteBuffer.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <DataStreams/IBlockOutputStream.h>
2019-10-21 15:33:59 +00:00
// #include <Storages/MergeTree/MergeTreeData.h>
// #include <Storages/MergeTree/IMergeTreeDataPart.h>
2019-10-16 18:27:53 +00:00
namespace DB
{
class IMergeTreeDataPartWriter
{
public:
using WrittenOffsetColumns = std::set<std::string>;
struct ColumnStream
{
ColumnStream(
const String & escaped_column_name_,
const String & data_path_,
const std::string & data_file_extension_,
const std::string & marks_path_,
const std::string & marks_file_extension_,
const CompressionCodecPtr & compression_codec_,
size_t max_compress_block_size_,
size_t estimated_size_,
size_t aio_threshold_);
String escaped_column_name;
std::string data_file_extension;
std::string marks_file_extension;
/// compressed -> compressed_buf -> plain_hashing -> plain_file
std::unique_ptr<WriteBufferFromFileBase> plain_file;
2019-10-20 23:36:27 +00:00
HashingWriteBuffer plain_hashing;
2019-10-16 18:27:53 +00:00
CompressedWriteBuffer compressed_buf;
HashingWriteBuffer compressed;
/// marks -> marks_file
WriteBufferFromFile marks_file;
HashingWriteBuffer marks;
void finalize();
void sync();
2019-10-21 00:28:29 +00:00
void addToChecksums(IMergeTreeDataPart::Checksums & checksums);
2019-10-16 18:27:53 +00:00
};
2019-10-19 16:49:36 +00:00
using ColumnStreamPtr = std::unique_ptr<ColumnStream>;
2019-10-21 00:28:29 +00:00
using ColumnStreams = std::map<String, ColumnStreamPtr>;
2019-10-19 16:49:36 +00:00
IMergeTreeDataPartWriter(
const String & part_path,
const MergeTreeData & storage,
const NamesAndTypesList & columns_list,
const IColumn::Permutation * permutation,
const String & marks_file_extension,
const CompressionCodecPtr & default_codec,
2019-10-21 15:33:59 +00:00
const WriterSettings & settings);
2019-10-19 16:49:36 +00:00
2019-10-16 18:27:53 +00:00
virtual size_t write(
2019-10-19 16:49:36 +00:00
const Block & block, size_t from_mark, size_t offset, const MergeTreeIndexGranularity & index_granularity,
2019-10-16 18:27:53 +00:00
/* Blocks with already sorted index columns */
const Block & primary_key_block = {}, const Block & skip_indexes_block = {}) = 0;
2019-10-21 15:33:59 +00:00
// virtual void writeFinalMarks() = 0;
virtual ~IMergeTreeDataPartWriter();
2019-10-16 18:27:53 +00:00
protected:
2019-10-19 16:49:36 +00:00
using SerializationState = IDataType::SerializeBinaryBulkStatePtr;
using SerializationStates = std::vector<SerializationState>;
String part_path;
2019-10-20 23:36:27 +00:00
const MergeTreeData & storage;
2019-10-19 16:49:36 +00:00
NamesAndTypesList columns_list;
const IColumn::Permutation * permutation;
const String marks_file_extension;
2019-10-16 18:27:53 +00:00
2019-10-19 16:49:36 +00:00
CompressionCodecPtr default_codec;
2019-10-21 15:33:59 +00:00
WriterSettings settings;
2019-10-16 18:27:53 +00:00
};
2019-10-21 15:33:59 +00:00
using MergeTreeWriterPtr = std::unique_ptr<IMergeTreeDataPartWriter>;
2019-10-19 16:49:36 +00:00
}