ClickHouse/src/Storages/MergeTree/IMergeTreeDataPartWriter.h

91 lines
3.3 KiB
C++
Raw Normal View History

2019-10-21 15:33:59 +00:00
#pragma once
2019-10-16 18:27:53 +00:00
#include <IO/WriteBufferFromFile.h>
2019-10-19 16:49:36 +00:00
#include <IO/WriteBufferFromFileBase.h>
2019-10-16 18:27:53 +00:00
#include <Compression/CompressedWriteBuffer.h>
#include <IO/HashingWriteBuffer.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <DataStreams/IBlockOutputStream.h>
2019-11-05 11:53:22 +00:00
#include <Storages/MergeTree/IMergeTreeDataPart.h>
#include <Disks/IDisk.h>
2019-10-16 18:27:53 +00:00
namespace DB
{
2020-02-03 12:08:40 +00:00
/// Writes data part to disk in different formats.
/// Calculates and serializes primary and skip indices if needed.
2019-12-27 21:17:53 +00:00
class IMergeTreeDataPartWriter : private boost::noncopyable
2019-10-16 18:27:53 +00:00
{
public:
2020-04-14 19:47:19 +00:00
IMergeTreeDataPartWriter(
const MergeTreeData::DataPartPtr & data_part_,
2020-04-14 19:47:19 +00:00
const NamesAndTypesList & columns_list_,
2020-06-26 11:30:23 +00:00
const StorageMetadataPtr & metadata_snapshot_,
2020-04-14 01:27:27 +00:00
const MergeTreeWriterSettings & settings_);
2019-10-19 16:49:36 +00:00
IMergeTreeDataPartWriter(
const MergeTreeData::DataPartPtr & data_part_,
2020-04-14 01:27:27 +00:00
const NamesAndTypesList & columns_list_,
2020-06-17 12:39:20 +00:00
const StorageMetadataPtr & metadata_snapshot_,
2020-04-14 01:27:27 +00:00
const MergeTreeIndices & skip_indices_,
const MergeTreeIndexGranularity & index_granularity_,
const MergeTreeWriterSettings & settings_);
2019-10-19 16:49:36 +00:00
virtual ~IMergeTreeDataPartWriter();
2019-11-07 11:11:38 +00:00
virtual void write(
2019-12-09 21:21:17 +00:00
const Block & block, const IColumn::Permutation * permutation = nullptr,
2019-10-16 18:27:53 +00:00
/* Blocks with already sorted index columns */
2019-11-07 11:11:38 +00:00
const Block & primary_key_block = {}, const Block & skip_indexes_block = {}) = 0;
2019-10-21 17:23:06 +00:00
2020-04-14 01:27:27 +00:00
virtual void calculateAndSerializePrimaryIndex(const Block & /* primary_index_block */) {}
virtual void calculateAndSerializeSkipIndices(const Block & /* skip_indexes_block */) {}
2019-10-21 15:33:59 +00:00
/// Shift mark and offset to prepare read next mark.
/// You must call it after calling write method and optionally
/// calling calculations of primary and skip indices.
void next();
2019-10-16 18:27:53 +00:00
2020-04-14 01:27:27 +00:00
virtual void initSkipIndices() {}
virtual void initPrimaryIndex() {}
2019-11-07 11:11:38 +00:00
virtual void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync) = 0;
2020-09-01 15:26:49 +00:00
virtual void finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & /* checksums */, bool /* sync */) {}
virtual void finishSkipIndicesSerialization(MergeTreeData::DataPart::Checksums & /* checksums */, bool /* sync */) {}
2019-12-09 21:21:17 +00:00
2020-04-14 01:27:27 +00:00
Columns releaseIndexColumns();
const MergeTreeIndexGranularity & getIndexGranularity() const { return index_granularity; }
const MergeTreeIndices & getSkipIndices() { return skip_indices; }
2019-10-16 18:27:53 +00:00
protected:
size_t getCurrentMark() const { return current_mark; }
size_t getIndexOffset() const { return index_offset; }
2019-10-19 16:49:36 +00:00
using SerializationState = IDataType::SerializeBinaryBulkStatePtr;
2019-11-07 11:11:38 +00:00
using SerializationStates = std::unordered_map<String, SerializationState>;
2019-10-19 16:49:36 +00:00
MergeTreeData::DataPartPtr data_part;
2019-10-20 23:36:27 +00:00
const MergeTreeData & storage;
2020-06-17 12:39:20 +00:00
StorageMetadataPtr metadata_snapshot;
2019-10-19 16:49:36 +00:00
NamesAndTypesList columns_list;
MergeTreeIndices skip_indices;
2020-04-14 01:27:27 +00:00
MergeTreeIndexGranularity index_granularity;
MergeTreeWriterSettings settings;
2019-11-05 11:53:22 +00:00
bool with_final_mark;
size_t next_mark = 0;
size_t next_index_offset = 0;
MutableColumns index_columns;
private:
/// Data is already written up to this mark.
size_t current_mark = 0;
/// The offset to the first row of the block for which you want to write the index.
size_t index_offset = 0;
2019-10-16 18:27:53 +00:00
};
2019-10-19 16:49:36 +00:00
}