2020-10-10 18:37:02 +00:00
|
|
|
#pragma once
|
2020-04-14 01:26:34 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h>
|
2019-10-16 18:27:53 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2020-02-03 12:08:40 +00:00
|
|
|
/// Writes data part in compact format.
|
2020-04-14 01:26:34 +00:00
|
|
|
class MergeTreeDataPartWriterCompact : public MergeTreeDataPartWriterOnDisk
|
2019-10-16 18:27:53 +00:00
|
|
|
{
|
|
|
|
public:
|
2019-10-22 10:50:17 +00:00
|
|
|
MergeTreeDataPartWriterCompact(
|
2020-05-10 13:33:27 +00:00
|
|
|
const MergeTreeData::DataPartPtr & data_part,
|
2022-04-12 18:59:49 +00:00
|
|
|
DataPartStorageBuilderPtr data_part_storage_builder_,
|
2019-10-22 10:50:17 +00:00
|
|
|
const NamesAndTypesList & columns_list,
|
2020-06-17 12:39:20 +00:00
|
|
|
const StorageMetadataPtr & metadata_snapshot_,
|
2019-12-18 16:41:11 +00:00
|
|
|
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
|
2019-10-22 10:50:17 +00:00
|
|
|
const String & marks_file_extension,
|
|
|
|
const CompressionCodecPtr & default_codec,
|
2019-12-18 15:54:45 +00:00
|
|
|
const MergeTreeWriterSettings & settings,
|
2019-11-07 11:11:38 +00:00
|
|
|
const MergeTreeIndexGranularity & index_granularity);
|
2019-10-22 10:50:17 +00:00
|
|
|
|
2020-12-10 08:57:52 +00:00
|
|
|
void write(const Block & block, const IColumn::Permutation * permutation) override;
|
2019-10-16 18:27:53 +00:00
|
|
|
|
2022-02-01 10:36:51 +00:00
|
|
|
void fillChecksums(IMergeTreeDataPart::Checksums & checksums) override;
|
|
|
|
void finish(bool sync) override;
|
2019-10-21 17:23:06 +00:00
|
|
|
|
2020-12-09 18:10:09 +00:00
|
|
|
private:
|
2020-12-15 10:34:28 +00:00
|
|
|
/// Finish serialization of the data. Flush rows in buffer to disk, compute checksums.
|
2022-02-01 10:36:51 +00:00
|
|
|
void fillDataChecksums(IMergeTreeDataPart::Checksums & checksums);
|
|
|
|
void finishDataSerialization(bool sync);
|
2019-10-21 17:23:06 +00:00
|
|
|
|
2020-04-27 18:12:17 +00:00
|
|
|
void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) override;
|
2020-04-26 21:19:25 +00:00
|
|
|
|
2020-12-15 10:34:28 +00:00
|
|
|
/// Write block of rows into .bin file and marks in .mrk files
|
2020-12-14 12:51:14 +00:00
|
|
|
void writeDataBlock(const Block & block, const Granules & granules);
|
|
|
|
|
2020-12-15 10:34:28 +00:00
|
|
|
/// Write block of rows into .bin file and marks in .mrk files, primary index in .idx file
|
|
|
|
/// and skip indices in their corresponding files.
|
2020-12-14 12:51:14 +00:00
|
|
|
void writeDataBlockPrimaryIndexAndSkipIndices(const Block & block, const Granules & granules);
|
2019-11-27 11:35:27 +00:00
|
|
|
|
2020-09-04 15:07:17 +00:00
|
|
|
void addToChecksums(MergeTreeDataPartChecksums & checksums);
|
2019-11-27 11:35:27 +00:00
|
|
|
|
2020-10-21 23:02:20 +00:00
|
|
|
void addStreams(const NameAndTypePair & column, const ASTPtr & effective_codec_desc);
|
2020-09-21 17:35:09 +00:00
|
|
|
|
2019-11-27 11:35:27 +00:00
|
|
|
Block header;
|
2019-12-27 21:17:53 +00:00
|
|
|
|
2019-12-27 21:32:55 +00:00
|
|
|
/** Simplified SquashingTransform. The original one isn't suitable in this case
|
2019-12-27 21:17:53 +00:00
|
|
|
* as it can return smaller block from buffer without merging it with larger block if last is enough size.
|
|
|
|
* But in compact parts we should guarantee, that written block is larger or equals than index_granularity.
|
|
|
|
*/
|
|
|
|
class ColumnsBuffer
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
void add(MutableColumns && columns);
|
|
|
|
size_t size() const;
|
|
|
|
Columns releaseColumns();
|
|
|
|
private:
|
|
|
|
MutableColumns accumulated_columns;
|
|
|
|
};
|
|
|
|
|
|
|
|
ColumnsBuffer columns_buffer;
|
2020-07-07 00:15:02 +00:00
|
|
|
|
2020-09-03 14:53:05 +00:00
|
|
|
/// hashing_buf -> compressed_buf -> plain_hashing -> plain_file
|
2020-07-07 00:15:02 +00:00
|
|
|
std::unique_ptr<WriteBufferFromFileBase> plain_file;
|
|
|
|
HashingWriteBuffer plain_hashing;
|
|
|
|
|
2020-09-22 12:49:55 +00:00
|
|
|
/// Compressed stream which allows to write with codec.
|
2020-07-07 00:15:02 +00:00
|
|
|
struct CompressedStream
|
|
|
|
{
|
|
|
|
CompressedWriteBuffer compressed_buf;
|
|
|
|
HashingWriteBuffer hashing_buf;
|
|
|
|
|
2020-09-22 12:16:15 +00:00
|
|
|
CompressedStream(WriteBuffer & buf, const CompressionCodecPtr & codec)
|
|
|
|
: compressed_buf(buf, codec)
|
2020-09-21 17:35:09 +00:00
|
|
|
, hashing_buf(compressed_buf) {}
|
2020-07-07 00:15:02 +00:00
|
|
|
};
|
|
|
|
|
2020-09-03 22:04:46 +00:00
|
|
|
using CompressedStreamPtr = std::shared_ptr<CompressedStream>;
|
2020-09-03 22:38:17 +00:00
|
|
|
|
2020-09-22 12:49:55 +00:00
|
|
|
/// Create compressed stream for every different codec. All streams write to
|
|
|
|
/// a single file on disk.
|
2020-09-03 22:38:17 +00:00
|
|
|
std::unordered_map<UInt64, CompressedStreamPtr> streams_by_codec;
|
|
|
|
|
2020-09-22 12:49:55 +00:00
|
|
|
/// Stream for each column's substreams path (look at addStreams).
|
2020-09-21 17:35:09 +00:00
|
|
|
std::unordered_map<String, CompressedStreamPtr> compressed_streams;
|
2020-07-07 00:15:02 +00:00
|
|
|
|
|
|
|
/// marks -> marks_file
|
|
|
|
std::unique_ptr<WriteBufferFromFileBase> marks_file;
|
|
|
|
HashingWriteBuffer marks;
|
2019-10-16 18:27:53 +00:00
|
|
|
};
|
|
|
|
|
2019-10-21 15:33:59 +00:00
|
|
|
}
|