2016-04-15 19:09:42 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <common/logger_useful.h>
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <DataStreams/MergingSortedBlockInputStream.h>
|
2019-02-12 19:09:51 +00:00
|
|
|
#include <DataStreams/ColumnGathererStream.h>
|
2016-04-15 19:09:42 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/** Merges several sorted streams into one.
|
|
|
|
* For each group of consecutive identical values of the primary key (the columns by which the data is sorted),
|
|
|
|
* keeps row with max `version` value.
|
2016-04-15 19:09:42 +00:00
|
|
|
*/
|
|
|
|
class ReplacingSortedBlockInputStream : public MergingSortedBlockInputStream
|
|
|
|
{
|
|
|
|
public:
|
2018-04-07 01:46:50 +00:00
|
|
|
ReplacingSortedBlockInputStream(
|
|
|
|
const BlockInputStreams & inputs_, const SortDescription & description_,
|
2019-03-26 10:23:14 +00:00
|
|
|
const String & version_column, size_t max_block_size_, WriteBuffer * out_row_sources_buf_ = nullptr,
|
|
|
|
bool average_block_sizes = false)
|
|
|
|
: MergingSortedBlockInputStream(inputs_, description_, max_block_size_, 0, out_row_sources_buf_, false, average_block_sizes)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-04-07 01:46:50 +00:00
|
|
|
if (!version_column.empty())
|
|
|
|
version_column_number = header.getPositionByName(version_column);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2016-04-15 19:09:42 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
String getName() const override { return "ReplacingSorted"; }
|
2016-04-15 19:09:42 +00:00
|
|
|
|
|
|
|
protected:
|
2017-05-13 22:19:04 +00:00
|
|
|
/// Can return 1 more records than max_block_size.
|
2017-04-01 07:20:54 +00:00
|
|
|
Block readImpl() override;
|
2016-04-15 19:09:42 +00:00
|
|
|
|
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
ssize_t version_column_number = -1;
|
2016-04-15 19:09:42 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
Logger * log = &Logger::get("ReplacingSortedBlockInputStream");
|
2016-04-15 19:09:42 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// All data has been read.
|
|
|
|
bool finished = false;
|
2016-04-15 19:09:42 +00:00
|
|
|
|
2018-02-22 08:37:16 +00:00
|
|
|
/// Primary key of current row.
|
|
|
|
RowRef current_key;
|
|
|
|
/// Primary key of next row.
|
|
|
|
RowRef next_key;
|
|
|
|
/// Last row with maximum version for current primary key.
|
|
|
|
RowRef selected_row;
|
|
|
|
/// The position (into current_row_sources) of the row with the highest version.
|
2018-02-21 23:04:37 +00:00
|
|
|
size_t max_pos = 0;
|
2016-04-15 19:09:42 +00:00
|
|
|
|
2018-02-22 08:37:16 +00:00
|
|
|
/// Sources of rows with the current primary key.
|
|
|
|
PODArray<RowSourcePart> current_row_sources;
|
2017-07-14 18:17:23 +00:00
|
|
|
|
2017-12-15 00:01:59 +00:00
|
|
|
void merge(MutableColumns & merged_columns, std::priority_queue<SortCursor> & queue);
|
2016-04-15 19:09:42 +00:00
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// Output into result the rows for current primary key.
|
2019-03-26 10:23:14 +00:00
|
|
|
void insertRow(MutableColumns & merged_columns);
|
2016-04-15 19:09:42 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|