Add some comments

2024-11-22 07:31:57 +00:00 · 2020-12-15 13:34:28 +03:00 · 2020-12-15 13:34:28 +03:00 · 8670836573
commit 8670836573
parent 74c2211510
6 changed files with 73 additions and 35 deletions
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@ -72,6 +72,7 @@ void MergeTreeDataPartWriterCompact::addStreams(const String & name, const IData
 namespace
 {
 /// Get granules for block using index_granularity
 Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity, size_t block_rows, size_t current_mark, bool last_block)
 {
    if (current_mark >= index_granularity.getMarksCount())
@ -85,6 +86,8 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity,
        size_t rest_rows = block_rows - current_row;
        if (rest_rows < expected_rows)
        {
            /// Invariant: we always have equal amount of rows for block in compact parts because we accumulate them in buffer.
            /// The only exclusion is the last block, when we cannot accumulate more rows.
            if (!last_block)
                throw Exception(ErrorCodes::LOGICAL_ERROR, "Required to write {} rows, but only {} rows was written for the non last granule", expected_rows, rest_rows);
@ -98,6 +101,7 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity,
        }
        else
        {
            /// Normal granule with amount of rows equal to rows in compute granularity
            result.emplace_back(Granule{
                .start_row = current_row,
                .granularity_rows = expected_rows,
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h
@ -23,13 +23,16 @@ public:
    void finish(IMergeTreeDataPart::Checksums & checksums, bool sync) override;
 private:
-
+    /// Finish serialization of the data. Flush rows in buffer to disk, compute checksums.
    void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync);
    void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) override;
    /// Write block of rows into .bin file and marks in .mrk files
    void writeDataBlock(const Block & block, const Granules & granules);
    /// Write block of rows into .bin file and marks in .mrk files, primary index in .idx file
    /// and skip indices in their corresponding files.
    void writeDataBlockPrimaryIndexAndSkipIndices(const Block & block, const Granules & granules);
    void addToChecksums(MergeTreeDataPartChecksums & checksums);
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@ -167,7 +167,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices()
                        default_codec, settings.max_compress_block_size,
                        0, settings.aio_threshold));
        skip_indices_aggregators.push_back(index_helper->createIndexAggregator());
-        skip_index_filling.push_back(0);
+        skip_index_accumulated_marks.push_back(0);
    }
 }
@ -221,7 +221,6 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block
            if (skip_indices_aggregators[i]->empty() && granule.mark_on_start)
            {
                skip_indices_aggregators[i] = index_helper->createIndexAggregator();
                skip_index_filling[i] = 0;
                if (stream.compressed.offset() >= settings.min_compress_block_size)
                    stream.compressed.next();
@ -238,13 +237,13 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block
            skip_indices_aggregators[i]->update(skip_indexes_block, &pos, granule.granularity_rows);
            if (granule.isCompleted())
            {
-                ++skip_index_filling[i];
+                ++skip_index_accumulated_marks[i];
                /// write index if it is filled
-                if (skip_index_filling[i] == index_helper->index.granularity)
+                if (skip_index_accumulated_marks[i] == index_helper->index.granularity)
                {
                    skip_indices_aggregators[i]->getGranuleAndReset()->serializeBinary(stream.compressed);
-                    skip_index_filling[i] = 0;
+                    skip_index_accumulated_marks[i] = 0;
                }
            }
        }
@ -302,7 +301,7 @@ void MergeTreeDataPartWriterOnDisk::finishSkipIndicesSerialization(
    skip_indices_streams.clear();
    skip_indices_aggregators.clear();
-    skip_index_filling.clear();
+    skip_index_accumulated_marks.clear();
 }
 Names MergeTreeDataPartWriterOnDisk::getSkipIndicesColumns() const
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
@ -14,20 +14,34 @@
 namespace DB
 {
 /// Single unit for writing data to disk. Contains information about
 /// amount of rows to write and marks.
 struct Granule
 {
    /// Start row in block for granule
    size_t start_row;
    /// Amount of rows which granule have to contain according to index
    /// granularity.
    /// NOTE: Sometimes it's not equal to actually written rows, for example
    /// for the last granule if it's smaller than computed granularity.
    size_t granularity_rows;
    /// Amount of rows from block which have to be written to disk from start_row
    size_t block_rows;
    /// Global mark number in the list of all marks (index_granularity) for this part
    size_t mark_number;
    /// Should writer write mark for the first of this granule to disk.
    /// NOTE: Sometimes we don't write mark for the start row, because
    /// this granule can be continuation of the previous one.
    bool mark_on_start;
    /// Is this granule contain amout of rows equal to the value in index granularity
    bool isCompleted() const
    {
        return granularity_rows == block_rows;
    }
 };
 /// Multiple granules to write for concrete block.
 using Granules = std::vector<Granule>;
 /// Writes data part to disk in different formats.
@ -90,19 +104,29 @@ public:
    {
        written_offset_columns = written_offset_columns_;
    }
 protected:
     /// Count index_granularity for block and store in `index_granularity`
    size_t computeIndexGranularity(const Block & block) const;
    /// Write primary index according to granules_to_write
    void calculateAndSerializePrimaryIndex(const Block & primary_index_block, const Granules & granules_to_write);
    /// Write skip indices according to granules_to_write. Skip indices also have their own marks
    /// and one skip index granule can contain multiple "normal" marks. So skip indices serialization
    /// require additional state: skip_indices_aggregators and skip_index_accumulated_marks
    void calculateAndSerializeSkipIndices(const Block & skip_indexes_block, const Granules & granules_to_write);
    /// Finishes primary index serialization: write final primary index row (if required) and compute checksums
    void finishPrimaryIndexSerialization(MergeTreeData::DataPart::Checksums & checksums, bool sync);
    /// Finishes skip indices serialization: write all accumulated data to disk and compute checksums
    void finishSkipIndicesSerialization(MergeTreeData::DataPart::Checksums & checksums, bool sync);
    /// Get global number of the current which we are writing (or going to start to write)
    size_t getCurrentMark() const { return current_mark; }
    void setCurrentMark(size_t mark) { current_mark = mark; }
    /// Get unique non ordered skip indices column.
    Names getSkipIndicesColumns() const;
    const MergeTreeIndices skip_indices;
@ -113,13 +137,9 @@ protected:
    const bool compute_granularity;
    /// Number of marsk in data from which skip indices have to start
    /// aggregation. I.e. it's data mark number, not skip indices mark.
    size_t skip_index_data_mark = 0;
    std::vector<StreamPtr> skip_indices_streams;
    MergeTreeIndexAggregators skip_indices_aggregators;
-    std::vector<size_t> skip_index_filling;
+    std::vector<size_t> skip_index_accumulated_marks;
    std::unique_ptr<WriteBufferFromFileBase> index_file_stream;
    std::unique_ptr<HashingWriteBuffer> index_stream;
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@ -18,6 +18,7 @@ namespace
 namespace
 {
 /// Get granules for block using index_granularity
 Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity, size_t block_rows, size_t current_mark, size_t rows_written_in_last_mark)
 {
    if (current_mark >= index_granularity.getMarksCount())
@ -25,6 +26,7 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity,
    Granules result;
    size_t current_row = 0;
    /// When our last mark is not finished yet and we have to write in rows into it
    if (rows_written_in_last_mark > 0)
    {
        size_t rows_left_in_last_mark = index_granularity.getMarkRows(current_mark) - rows_written_in_last_mark;
@ -35,7 +37,7 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity,
                .granularity_rows = rows_left_in_last_mark,
                .block_rows = rest_rows,
                .mark_number = current_mark,
-                .mark_on_start = false,
+                .mark_on_start = false, /// Don't mark this granule because we have already marked it
            });
        else
            result.emplace_back(Granule{
@ -43,16 +45,19 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity,
                .granularity_rows = rows_left_in_last_mark,
                .block_rows = rows_left_in_last_mark,
                .mark_number = current_mark,
-                .mark_on_start = false,
+                .mark_on_start = false, /// Don't mark this granule because we have already marked it
            });
        current_row += rows_left_in_last_mark;
        current_mark++;
    }
    /// Calculating normal granules for block
    while (current_row < block_rows)
    {
        size_t expected_rows = index_granularity.getMarkRows(current_mark);
        size_t rest_rows = block_rows - current_row;
        /// If we have less rows in block than expected in granularity
        /// save incomplete granule
        if (rest_rows < expected_rows)
            result.emplace_back(Granule{
                .start_row = current_row,
@ -153,10 +158,14 @@ IDataType::OutputStreamGetter MergeTreeDataPartWriterWide::createStreamGetter(
 void MergeTreeDataPartWriterWide::shiftCurrentMark(const Granules & granules_written)
 {
    auto last_granule = granules_written.back();
    /// If we didn't finished last granule than we will continue to write it from new block
    if (!last_granule.isCompleted())
    {
        /// Shift forward except last granule
        setCurrentMark(getCurrentMark() + granules_written.size() - 1);
        bool still_in_the_same_granule = granules_written.size() == 1;
        /// We wrote whole block in the same granule, but didn't finished it.
        /// So add written rows to rows written in last_mark
        if (still_in_the_same_granule)
            rows_written_in_last_mark += last_granule.block_rows;
        else
@ -289,14 +298,12 @@ void MergeTreeDataPartWriterWide::writeSingleGranule(
    WrittenOffsetColumns & offset_columns,
    IDataType::SerializeBinaryBulkStatePtr & serialization_state,
    IDataType::SerializeBinaryBulkSettings & serialize_settings,
-    size_t from_row,
+    const Granule & granule)
    size_t number_of_rows,
    bool write_marks)
 {
-    if (write_marks)
+    if (granule.mark_on_start)
-        writeSingleMark(name, type, offset_columns, number_of_rows, serialize_settings.path);
+        writeSingleMark(name, type, offset_columns, granule.granularity_rows, serialize_settings.path);
-    type.serializeBinaryBulkWithMultipleStreams(column, from_row, number_of_rows, serialize_settings, serialization_state);
+    type.serializeBinaryBulkWithMultipleStreams(column, granule.start_row, granule.granularity_rows, serialize_settings, serialization_state);
    /// So that instead of the marks pointing to the end of the compressed block, there were marks pointing to the beginning of the next one.
    type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
@ -322,6 +329,7 @@ void MergeTreeDataPartWriterWide::writeColumn(
    const Granules & granules)
 {
    auto [it, inserted] = serialization_states.emplace(name, nullptr);
    if (inserted)
    {
        IDataType::SerializeBinaryBulkSettings serialize_settings;
@ -347,9 +355,7 @@ void MergeTreeDataPartWriterWide::writeColumn(
           offset_columns,
           it->second,
           serialize_settings,
-           granule.start_row,
+           granule
           granule.granularity_rows,
           granule.mark_on_start
        );
    }
@ -376,7 +382,6 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name,
    DB::ReadBufferFromFile mrk_in(mrk_path);
    DB::CompressedReadBufferFromFile bin_in(bin_path, 0, 0, 0);
    bool must_be_last = false;
    //auto * log = &Poco::Logger::get(storage.getLogName());
    UInt64 offset_in_compressed_file = 0;
    UInt64 offset_in_decompressed_block = 0;
    UInt64 index_granularity_rows = 0;
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
@ -32,7 +32,8 @@ public:
    void finish(IMergeTreeDataPart::Checksums & checksums, bool sync) final;
 private:
-
+    /// Finish serialization of data: write final mark if required and compute checksums
    /// Also validate written data in debug mode
    void finishDataSerialization(IMergeTreeDataPart::Checksums & checksums, bool sync);
    /// Write data of one column.
@ -45,7 +46,7 @@ private:
        WrittenOffsetColumns & offset_columns,
        const Granules & granules);
-    /// Write single granule of one column (rows between 2 marks)
+    /// Write single granule of one column.
    void writeSingleGranule(
        const String & name,
        const IDataType & type,
@ -53,22 +54,21 @@ private:
        WrittenOffsetColumns & offset_columns,
        IDataType::SerializeBinaryBulkStatePtr & serialization_state,
        IDataType::SerializeBinaryBulkSettings & serialize_settings,
-        size_t from_row,
+        const Granule & granule);
        size_t number_of_rows,
        bool write_marks);
    void flushMarkToFile(
        const StreamNameAndMark & stream_with_mark,
        size_t rows_in_mark);
    /// Take offsets from column and return as MarkInCompressed file with stream name
    StreamsWithMarks getCurrentMarksForColumn(
        const String & name,
        const IDataType & type,
        WrittenOffsetColumns & offset_columns,
        DB::IDataType::SubstreamPath & path);
-    /// Write mark for column
+    /// Write mark to disk using stream and rows count
    void flushMarkToFile(
        const StreamNameAndMark & stream_with_mark,
        size_t rows_in_mark);
    /// Write mark for column taking offsets from column stream
    void writeSingleMark(
        const String & name,
        const IDataType & type,
@ -88,10 +88,15 @@ private:
        const ASTPtr & effective_codec_desc,
        size_t estimated_size);
    /// Method for self check (used in debug-build only). Checks that written
    /// data and corresponding marks are consistent. Otherwise throws logical
    /// errors.
    void validateColumnOfFixedSize(const String & name, const IDataType & type);
    void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) override;
    /// Use information from just written granules to shift current mark
    /// in our index_granularity array.
    void shiftCurrentMark(const Granules & granules_written);
    IDataType::OutputStreamGetter createStreamGetter(const String & name, WrittenOffsetColumns & offset_columns) const;
@ -104,6 +109,8 @@ private:
    using ColumnStreams = std::map<String, StreamPtr>;
    ColumnStreams column_streams;
    /// How many rows we have already written in the current mark.
    /// More than zero when incoming blocks are smaller then their granularity.
    size_t rows_written_in_last_mark = 0;
 };