ClickHouse/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h

73 lines
2.2 KiB
C++
Raw Normal View History

2016-04-15 19:09:42 +00:00
#pragma once
#include <common/logger_useful.h>
#include <DataStreams/MergingSortedBlockInputStream.h>
2016-04-15 19:09:42 +00:00
namespace DB
{
2017-05-13 22:19:04 +00:00
/** Merges several sorted streams into one.
* For each group of consecutive identical values of the primary key (the columns by which the data is sorted),
* keeps row with max `version` value.
2016-04-15 19:09:42 +00:00
*/
class ReplacingSortedBlockInputStream : public MergingSortedBlockInputStream
{
public:
ReplacingSortedBlockInputStream(BlockInputStreams inputs_, const SortDescription & description_,
const String & version_column_, size_t max_block_size_, WriteBuffer * out_row_sources_buf_ = nullptr)
: MergingSortedBlockInputStream(inputs_, description_, max_block_size_, 0, out_row_sources_buf_),
version_column(version_column_)
{
}
2016-04-15 19:09:42 +00:00
String getName() const override { return "ReplacingSorted"; }
2016-04-15 19:09:42 +00:00
String getID() const override
{
std::stringstream res;
res << "ReplacingSorted(inputs";
2016-04-15 19:09:42 +00:00
for (size_t i = 0; i < children.size(); ++i)
res << ", " << children[i]->getID();
2016-04-15 19:09:42 +00:00
res << ", description";
2016-04-15 19:09:42 +00:00
for (size_t i = 0; i < description.size(); ++i)
res << ", " << description[i].getID();
2016-04-15 19:09:42 +00:00
res << ", version_column, " << version_column << ")";
return res.str();
}
2016-04-15 19:09:42 +00:00
protected:
2017-05-13 22:19:04 +00:00
/// Can return 1 more records than max_block_size.
Block readImpl() override;
2016-04-15 19:09:42 +00:00
private:
String version_column;
ssize_t version_column_number = -1;
2016-04-15 19:09:42 +00:00
Logger * log = &Logger::get("ReplacingSortedBlockInputStream");
2016-04-15 19:09:42 +00:00
/// All data has been read.
bool finished = false;
2016-04-15 19:09:42 +00:00
RowRef current_key; /// Primary key of current row.
RowRef next_key; /// Primary key of next row.
2016-04-15 19:09:42 +00:00
RowRef selected_row; /// Last row with maximum version for current primary key.
2016-04-15 19:09:42 +00:00
UInt64 max_version = 0; /// Max version for current primary key.
2016-04-15 19:09:42 +00:00
PODArray<RowSourcePart> current_row_sources; /// Sources of rows with the current primary key
void merge(MutableColumns & merged_columns, std::priority_queue<SortCursor> & queue);
2016-04-15 19:09:42 +00:00
2017-05-13 22:19:04 +00:00
/// Output into result the rows for current primary key.
void insertRow(MutableColumns & merged_columns, size_t & merged_rows);
2016-04-15 19:09:42 +00:00
};
}