ClickHouse/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h

90 lines
3.0 KiB
C++
Raw Normal View History

2020-04-14 01:26:34 +00:00
#include <Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h>
2019-10-16 18:27:53 +00:00
namespace DB
{
2020-02-03 12:08:40 +00:00
/// Writes data part in compact format.
2020-04-14 01:26:34 +00:00
class MergeTreeDataPartWriterCompact : public MergeTreeDataPartWriterOnDisk
2019-10-16 18:27:53 +00:00
{
public:
2019-10-22 10:50:17 +00:00
MergeTreeDataPartWriterCompact(
const MergeTreeData::DataPartPtr & data_part,
2019-10-22 10:50:17 +00:00
const NamesAndTypesList & columns_list,
2020-06-17 12:39:20 +00:00
const StorageMetadataPtr & metadata_snapshot_,
const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
2019-10-22 10:50:17 +00:00
const String & marks_file_extension,
const CompressionCodecPtr & default_codec,
const MergeTreeWriterSettings & settings,
2019-11-07 11:11:38 +00:00
const MergeTreeIndexGranularity & index_granularity);
2019-10-22 10:50:17 +00:00
2020-03-18 03:27:32 +00:00
void write(const Block & block, const IColumn::Permutation * permutation,
const Block & primary_key_block, const Block & skip_indexes_block) override;
2019-10-16 18:27:53 +00:00
void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync) override;
2019-10-21 17:23:06 +00:00
2020-04-26 21:19:25 +00:00
protected:
2020-04-27 18:12:17 +00:00
void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) override;
2020-04-26 21:19:25 +00:00
2019-10-21 17:23:06 +00:00
private:
2019-11-27 11:35:27 +00:00
void writeBlock(const Block & block);
2020-09-04 15:07:17 +00:00
void addToChecksums(MergeTreeDataPartChecksums & checksums);
2019-11-27 11:35:27 +00:00
void addStreams(const String & name, const IDataType & type, const ASTPtr & effective_codec_desc);
2019-11-27 11:35:27 +00:00
Block header;
2019-12-27 21:17:53 +00:00
2019-12-27 21:32:55 +00:00
/** Simplified SquashingTransform. The original one isn't suitable in this case
2019-12-27 21:17:53 +00:00
* as it can return smaller block from buffer without merging it with larger block if last is enough size.
* But in compact parts we should guarantee, that written block is larger or equals than index_granularity.
*/
class ColumnsBuffer
{
public:
void add(MutableColumns && columns);
size_t size() const;
Columns releaseColumns();
private:
MutableColumns accumulated_columns;
};
ColumnsBuffer columns_buffer;
2020-07-07 00:15:02 +00:00
/// hashing_buf -> compressed_buf -> plain_hashing -> plain_file
2020-07-07 00:15:02 +00:00
std::unique_ptr<WriteBufferFromFileBase> plain_file;
HashingWriteBuffer plain_hashing;
struct CompressedStream
{
UInt64 codec_id;
2020-07-07 00:15:02 +00:00
CompressedWriteBuffer compressed_buf;
HashingWriteBuffer hashing_buf;
CompressedStream(UInt64 codec_id_, WriteBuffer & buf, const CompressionCodecPtr & codec)
: codec_id(codec_id_)
, compressed_buf(buf, codec)
, hashing_buf(compressed_buf) {}
2020-07-07 00:15:02 +00:00
};
using CompressedStreamPtr = std::shared_ptr<CompressedStream>;
/// Create compressed stream for every different codec.
std::unordered_map<UInt64, CompressedStreamPtr> streams_by_codec;
/// For better performance save pointer to stream by every column.
std::unordered_map<String, CompressedStreamPtr> compressed_streams;
2020-07-07 00:15:02 +00:00
/// marks -> marks_file
std::unique_ptr<WriteBufferFromFileBase> marks_file;
HashingWriteBuffer marks;
/// Write single granule of one column (rows between 2 marks)
2020-09-04 15:07:17 +00:00
static void writeColumnSingleGranule(
const ColumnWithTypeAndName & column,
IDataType::OutputStreamGetter stream_getter,
size_t from_row,
2020-09-04 15:07:17 +00:00
size_t number_of_rows);
2019-10-16 18:27:53 +00:00
};
2019-10-21 15:33:59 +00:00
}