ClickHouse/dbms/DataStreams/ReplacingSortedBlockInputStream.h

62 lines
1.9 KiB
C++
Raw Normal View History

2016-04-15 19:09:42 +00:00
#pragma once
#include <common/logger_useful.h>
#include <DataStreams/MergingSortedBlockInputStream.h>
2019-02-12 19:09:51 +00:00
#include <DataStreams/ColumnGathererStream.h>
2016-04-15 19:09:42 +00:00
namespace DB
{
2017-05-13 22:19:04 +00:00
/** Merges several sorted streams into one.
* For each group of consecutive identical values of the primary key (the columns by which the data is sorted),
* keeps row with max `version` value.
2016-04-15 19:09:42 +00:00
*/
class ReplacingSortedBlockInputStream : public MergingSortedBlockInputStream
{
public:
ReplacingSortedBlockInputStream(
const BlockInputStreams & inputs_, const SortDescription & description_,
const String & version_column, size_t max_block_size_, WriteBuffer * out_row_sources_buf_ = nullptr,
bool average_block_sizes_ = false)
: MergingSortedBlockInputStream(inputs_, description_, max_block_size_, 0, out_row_sources_buf_, false, average_block_sizes_)
{
if (!version_column.empty())
version_column_number = header.getPositionByName(version_column);
}
2016-04-15 19:09:42 +00:00
String getName() const override { return "ReplacingSorted"; }
2016-04-15 19:09:42 +00:00
protected:
2017-05-13 22:19:04 +00:00
/// Can return 1 more records than max_block_size.
Block readImpl() override;
2016-04-15 19:09:42 +00:00
private:
ssize_t version_column_number = -1;
2016-04-15 19:09:42 +00:00
Logger * log = &Logger::get("ReplacingSortedBlockInputStream");
2016-04-15 19:09:42 +00:00
/// All data has been read.
bool finished = false;
2016-04-15 19:09:42 +00:00
/// Primary key of current row.
2019-04-19 13:38:25 +00:00
SharedBlockRowRef current_key;
/// Primary key of next row.
2019-04-19 13:38:25 +00:00
SharedBlockRowRef next_key;
/// Last row with maximum version for current primary key.
2019-04-19 13:38:25 +00:00
SharedBlockRowRef selected_row;
/// The position (into current_row_sources) of the row with the highest version.
size_t max_pos = 0;
2016-04-15 19:09:42 +00:00
/// Sources of rows with the current primary key.
PODArray<RowSourcePart> current_row_sources;
2019-12-22 00:19:07 +00:00
void merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue);
2016-04-15 19:09:42 +00:00
2017-05-13 22:19:04 +00:00
/// Output into result the rows for current primary key.
void insertRow(MutableColumns & merged_columns);
2016-04-15 19:09:42 +00:00
};
}