2019-10-21 15:33:59 +00:00
|
|
|
#pragma once
|
|
|
|
|
2019-10-16 18:27:53 +00:00
|
|
|
#include <IO/WriteBufferFromFile.h>
|
2019-10-19 16:49:36 +00:00
|
|
|
#include <IO/WriteBufferFromFileBase.h>
|
2019-10-16 18:27:53 +00:00
|
|
|
#include <Compression/CompressedWriteBuffer.h>
|
|
|
|
#include <IO/HashingWriteBuffer.h>
|
|
|
|
#include <Storages/MergeTree/MergeTreeData.h>
|
|
|
|
#include <DataStreams/IBlockOutputStream.h>
|
2019-11-05 11:53:22 +00:00
|
|
|
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
2020-02-27 16:47:40 +00:00
|
|
|
#include <Disks/IDisk.h>
|
2019-10-16 18:27:53 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2020-02-03 12:08:40 +00:00
|
|
|
|
|
|
|
/// Writes data part to disk in different formats.
|
|
|
|
/// Calculates and serializes primary and skip indices if needed.
|
2019-12-27 21:17:53 +00:00
|
|
|
class IMergeTreeDataPartWriter : private boost::noncopyable
|
2019-10-16 18:27:53 +00:00
|
|
|
{
|
|
|
|
public:
|
2020-04-14 19:47:19 +00:00
|
|
|
IMergeTreeDataPartWriter(
|
2020-06-03 13:27:54 +00:00
|
|
|
const MergeTreeData::DataPartPtr & data_part_,
|
2020-04-14 19:47:19 +00:00
|
|
|
const NamesAndTypesList & columns_list_,
|
2020-06-26 11:30:23 +00:00
|
|
|
const StorageMetadataPtr & metadata_snapshot_,
|
2020-04-14 01:27:27 +00:00
|
|
|
const MergeTreeWriterSettings & settings_);
|
2019-10-19 16:49:36 +00:00
|
|
|
|
|
|
|
IMergeTreeDataPartWriter(
|
2020-06-03 13:27:54 +00:00
|
|
|
const MergeTreeData::DataPartPtr & data_part_,
|
2020-04-14 01:27:27 +00:00
|
|
|
const NamesAndTypesList & columns_list_,
|
2020-06-17 12:39:20 +00:00
|
|
|
const StorageMetadataPtr & metadata_snapshot_,
|
2020-04-14 01:27:27 +00:00
|
|
|
const MergeTreeIndices & skip_indices_,
|
|
|
|
const MergeTreeIndexGranularity & index_granularity_,
|
|
|
|
const MergeTreeWriterSettings & settings_);
|
2019-10-19 16:49:36 +00:00
|
|
|
|
2019-12-18 15:54:45 +00:00
|
|
|
virtual ~IMergeTreeDataPartWriter();
|
|
|
|
|
2019-11-07 11:11:38 +00:00
|
|
|
virtual void write(
|
2019-12-09 21:21:17 +00:00
|
|
|
const Block & block, const IColumn::Permutation * permutation = nullptr,
|
2019-10-16 18:27:53 +00:00
|
|
|
/* Blocks with already sorted index columns */
|
2019-11-07 11:11:38 +00:00
|
|
|
const Block & primary_key_block = {}, const Block & skip_indexes_block = {}) = 0;
|
2019-10-21 17:23:06 +00:00
|
|
|
|
2020-04-14 01:27:27 +00:00
|
|
|
virtual void calculateAndSerializePrimaryIndex(const Block & /* primary_index_block */) {}
|
|
|
|
virtual void calculateAndSerializeSkipIndices(const Block & /* skip_indexes_block */) {}
|
2019-10-21 15:33:59 +00:00
|
|
|
|
2019-12-18 15:54:45 +00:00
|
|
|
/// Shift mark and offset to prepare read next mark.
|
2019-12-18 16:41:11 +00:00
|
|
|
/// You must call it after calling write method and optionally
|
2019-12-18 15:54:45 +00:00
|
|
|
/// calling calculations of primary and skip indices.
|
|
|
|
void next();
|
2019-10-16 18:27:53 +00:00
|
|
|
|
2020-04-14 01:27:27 +00:00
|
|
|
virtual void initSkipIndices() {}
|
|
|
|
virtual void initPrimaryIndex() {}
|
2019-11-07 11:11:38 +00:00
|
|
|
|
2020-08-24 09:57:27 +00:00
|
|
|
virtual void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync) = 0;
|
2020-09-01 15:26:49 +00:00
|
|
|
virtual void finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & /* checksums */, bool /* sync */) {}
|
2020-08-24 09:57:27 +00:00
|
|
|
virtual void finishSkipIndicesSerialization(MergeTreeData::DataPart::Checksums & /* checksums */, bool /* sync */) {}
|
2019-12-09 21:21:17 +00:00
|
|
|
|
2020-04-14 01:27:27 +00:00
|
|
|
Columns releaseIndexColumns();
|
|
|
|
const MergeTreeIndexGranularity & getIndexGranularity() const { return index_granularity; }
|
2020-04-10 13:36:51 +00:00
|
|
|
const MergeTreeIndices & getSkipIndices() { return skip_indices; }
|
2019-12-19 14:05:26 +00:00
|
|
|
|
2019-10-16 18:27:53 +00:00
|
|
|
protected:
|
2020-04-29 21:57:58 +00:00
|
|
|
size_t getCurrentMark() const { return current_mark; }
|
|
|
|
size_t getIndexOffset() const { return index_offset; }
|
|
|
|
|
2019-10-19 16:49:36 +00:00
|
|
|
using SerializationState = IDataType::SerializeBinaryBulkStatePtr;
|
2019-11-07 11:11:38 +00:00
|
|
|
using SerializationStates = std::unordered_map<String, SerializationState>;
|
2019-10-19 16:49:36 +00:00
|
|
|
|
2020-05-10 13:33:27 +00:00
|
|
|
MergeTreeData::DataPartPtr data_part;
|
2019-10-20 23:36:27 +00:00
|
|
|
const MergeTreeData & storage;
|
2020-06-17 12:39:20 +00:00
|
|
|
StorageMetadataPtr metadata_snapshot;
|
2019-10-19 16:49:36 +00:00
|
|
|
NamesAndTypesList columns_list;
|
2020-04-10 13:36:51 +00:00
|
|
|
MergeTreeIndices skip_indices;
|
2020-04-14 01:27:27 +00:00
|
|
|
MergeTreeIndexGranularity index_granularity;
|
2019-12-18 15:54:45 +00:00
|
|
|
MergeTreeWriterSettings settings;
|
2019-11-05 11:53:22 +00:00
|
|
|
bool with_final_mark;
|
|
|
|
|
|
|
|
size_t next_mark = 0;
|
|
|
|
size_t next_index_offset = 0;
|
|
|
|
|
|
|
|
MutableColumns index_columns;
|
2020-04-29 21:57:58 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
/// Data is already written up to this mark.
|
|
|
|
size_t current_mark = 0;
|
|
|
|
/// The offset to the first row of the block for which you want to write the index.
|
|
|
|
size_t index_offset = 0;
|
2019-10-16 18:27:53 +00:00
|
|
|
};
|
|
|
|
|
2019-10-19 16:49:36 +00:00
|
|
|
}
|