ClickHouse/dbms/src/Storages/MergeTree/IMergedBlockOutputStream.h

#pragma once

#include <Storages/MergeTree/MergeTreeIndexGranularity.h>
#include <Storages/MergeTree/MergeTreeIndexGranularityInfo.h>
#include <IO/WriteBufferFromFile.h>
#include <Compression/CompressedWriteBuffer.h>
#include <IO/HashingWriteBuffer.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <DataStreams/IBlockOutputStream.h>
#include <Storages/MergeTree/IMergeTreeDataPartWriter.h>


namespace DB
{

class IMergedBlockOutputStream : public IBlockOutputStream
{
public:
    IMergedBlockOutputStream(
        const MergeTreeDataPartPtr & data_part,
        CompressionCodecPtr codec_,
        const WriterSettings & writer_settings_,
        bool blocks_are_granules_size_,
        const std::vector<MergeTreeIndexPtr> & indices_to_recalc,
        const MergeTreeIndexGranularity & index_granularity_,
        bool can_use_adaptive_granularity_);

    using WrittenOffsetColumns = std::set<std::string>;

protected:
    using SerializationState = IDataType::SerializeBinaryBulkStatePtr;
    using SerializationStates = std::vector<SerializationState>;

    IDataType::OutputStreamGetter createStreamGetter(const String & name, WrittenOffsetColumns & offset_columns, bool skip_offsets);

    /// Count index_granularity for block and store in `index_granularity`
    void fillIndexGranularity(const Block & block);

    /// Write final mark to the end of column
    void writeFinalMark(
        const std::string & column_name,
        const DataTypePtr column_type,
        WrittenOffsetColumns & offset_columns,
        bool skip_offsets,
        DB::IDataType::SubstreamPath & path);

    void initSkipIndices();
    void calculateAndSerializeSkipIndices(const ColumnsWithTypeAndName & skip_indexes_columns, size_t rows);
    void finishSkipIndicesSerialization(MergeTreeData::DataPart::Checksums & checksums);
protected:
    MergeTreeData & storage;

    SerializationStates serialization_states;
    String part_path;

    /// The offset to the first row of the block for which you want to write the index.
    size_t index_offset = 0;

    WriterSettings writer_settings;

    size_t current_mark = 0;

    /// Number of mark in data from which skip indices have to start
    /// aggregation. I.e. it's data mark number, not skip indices mark.
    size_t skip_index_data_mark = 0;

    const bool can_use_adaptive_granularity;
    const std::string marks_file_extension;
    const bool blocks_are_granules_size;

    MergeTreeIndexGranularity index_granularity;

    const bool compute_granularity;
    CompressionCodecPtr codec;

    std::vector<MergeTreeIndexPtr> skip_indices;
    std::vector<std::unique_ptr<IMergeTreeDataPartWriter::ColumnStream>> skip_indices_streams;
    MergeTreeIndexAggregators skip_indices_aggregators;
    std::vector<size_t> skip_index_filling;

    std::unique_ptr<IMergeTreeDataPartWriter> writer;

    const bool with_final_mark;
};

}
Add ability to write final mark for MergeTree engines family (#5624) Add the ability to write final mark to MergeTree engines family. 2019-06-18 12:54:27 +00:00			`#pragma once`

			`#include <Storages/MergeTree/MergeTreeIndexGranularity.h>`
Fix bug with enable_mixed_granularity_parts and mutations 2019-08-19 10:37:04 +00:00			`#include <Storages/MergeTree/MergeTreeIndexGranularityInfo.h>`
Add ability to write final mark for MergeTree engines family (#5624) Add the ability to write final mark to MergeTree engines family. 2019-06-18 12:54:27 +00:00			`#include <IO/WriteBufferFromFile.h>`
			`#include <Compression/CompressedWriteBuffer.h>`
			`#include <IO/HashingWriteBuffer.h>`
			`#include <Storages/MergeTree/MergeTreeData.h>`
			`#include <DataStreams/IBlockOutputStream.h>`
polymorphic parts (development) 2019-10-19 16:49:36 +00:00			`#include <Storages/MergeTree/IMergeTreeDataPartWriter.h>`
Add ability to write final mark for MergeTree engines family (#5624) Add the ability to write final mark to MergeTree engines family. 2019-06-18 12:54:27 +00:00

			`namespace DB`
			`{`

			`class IMergedBlockOutputStream : public IBlockOutputStream`
			`{`
			`public:`
			`IMergedBlockOutputStream(`
polymorphic parts (development) 2019-10-19 16:49:36 +00:00			`const MergeTreeDataPartPtr & data_part,`
			`CompressionCodecPtr codec_,`
			`const WriterSettings & writer_settings_,`
Add ability to write final mark for MergeTree engines family (#5624) Add the ability to write final mark to MergeTree engines family. 2019-06-18 12:54:27 +00:00			`bool blocks_are_granules_size_,`
fix 2019-07-28 11:10:35 +00:00			`const std::vector<MergeTreeIndexPtr> & indices_to_recalc,`
Fix bug with enable_mixed_granularity_parts and mutations 2019-08-19 10:37:04 +00:00			`const MergeTreeIndexGranularity & index_granularity_,`
polymorphic parts (development) 2019-10-19 16:49:36 +00:00			`bool can_use_adaptive_granularity_);`
Add ability to write final mark for MergeTree engines family (#5624) Add the ability to write final mark to MergeTree engines family. 2019-06-18 12:54:27 +00:00
			`using WrittenOffsetColumns = std::set<std::string>;`

			`protected:`
			`using SerializationState = IDataType::SerializeBinaryBulkStatePtr;`
			`using SerializationStates = std::vector<SerializationState>;`

			`IDataType::OutputStreamGetter createStreamGetter(const String & name, WrittenOffsetColumns & offset_columns, bool skip_offsets);`

			/// Count index_granularity for block and store in `index_granularity`
			`void fillIndexGranularity(const Block & block);`

			`/// Write final mark to the end of column`
			`void writeFinalMark(`
			`const std::string & column_name,`
			`const DataTypePtr column_type,`
			`WrittenOffsetColumns & offset_columns,`
			`bool skip_offsets,`
			`DB::IDataType::SubstreamPath & path);`

fix 2019-07-28 11:10:35 +00:00			`void initSkipIndices();`
			`void calculateAndSerializeSkipIndices(const ColumnsWithTypeAndName & skip_indexes_columns, size_t rows);`
			`void finishSkipIndicesSerialization(MergeTreeData::DataPart::Checksums & checksums);`
Add ability to write final mark for MergeTree engines family (#5624) Add the ability to write final mark to MergeTree engines family. 2019-06-18 12:54:27 +00:00			`protected:`
			`MergeTreeData & storage;`

fix 2019-07-28 11:10:35 +00:00			`SerializationStates serialization_states;`
			`String part_path;`

Add ability to write final mark for MergeTree engines family (#5624) Add the ability to write final mark to MergeTree engines family. 2019-06-18 12:54:27 +00:00			`/// The offset to the first row of the block for which you want to write the index.`
			`size_t index_offset = 0;`

polymorphic parts (development) 2019-10-19 16:49:36 +00:00			`WriterSettings writer_settings;`
Add ability to write final mark for MergeTree engines family (#5624) Add the ability to write final mark to MergeTree engines family. 2019-06-18 12:54:27 +00:00
			`size_t current_mark = 0;`
Fix bad size of marks 2019-08-30 14:29:08 +00:00
Fix comment 2019-08-30 14:30:28 +00:00			`/// Number of mark in data from which skip indices have to start`
Fix bad size of marks 2019-08-30 14:29:08 +00:00			`/// aggregation. I.e. it's data mark number, not skip indices mark.`
			`size_t skip_index_data_mark = 0;`
Add ability to write final mark for MergeTree engines family (#5624) Add the ability to write final mark to MergeTree engines family. 2019-06-18 12:54:27 +00:00
Fix bug with enable_mixed_granularity_parts and mutations 2019-08-19 10:37:04 +00:00			`const bool can_use_adaptive_granularity;`
Add ability to write final mark for MergeTree engines family (#5624) Add the ability to write final mark to MergeTree engines family. 2019-06-18 12:54:27 +00:00			`const std::string marks_file_extension;`
			`const bool blocks_are_granules_size;`

			`MergeTreeIndexGranularity index_granularity;`

			`const bool compute_granularity;`
			`CompressionCodecPtr codec;`

fix 2019-07-28 11:10:35 +00:00			`std::vector<MergeTreeIndexPtr> skip_indices;`
polymorphic parts (development) 2019-10-19 16:49:36 +00:00			`std::vector<std::unique_ptr<IMergeTreeDataPartWriter::ColumnStream>> skip_indices_streams;`
fix 2019-07-28 11:10:35 +00:00			`MergeTreeIndexAggregators skip_indices_aggregators;`
			`std::vector<size_t> skip_index_filling;`

polymorphic parts (development) 2019-10-19 16:49:36 +00:00			`std::unique_ptr<IMergeTreeDataPartWriter> writer;`

Add ability to write final mark for MergeTree engines family (#5624) Add the ability to write final mark to MergeTree engines family. 2019-06-18 12:54:27 +00:00			`const bool with_final_mark;`
			`};`

			`}`