ClickHouse/dbms/DataStreams/CollapsingSortedBlockInputStream.h

85 lines
3.8 KiB
C++
Raw Normal View History

2012-08-14 20:35:44 +00:00
#pragma once
2015-09-29 19:19:54 +00:00
#include <common/logger_useful.h>
#include <DataStreams/MergingSortedBlockInputStream.h>
2019-02-12 19:09:51 +00:00
#include <DataStreams/ColumnGathererStream.h>
2012-08-14 20:35:44 +00:00
namespace DB
{
2017-05-13 22:19:04 +00:00
/** Merges several sorted streams to one.
* For each group of consecutive identical values of the primary key (the columns by which the data is sorted),
* keeps no more than one row with the value of the column `sign_column = -1` ("negative row")
* and no more than a row with the value of the column `sign_column = 1` ("positive row").
* That is, it collapses the records from the change log.
2012-08-16 17:02:31 +00:00
*
2017-05-13 22:19:04 +00:00
* If the number of positive and negative rows is the same, and the last row is positive, then the first negative and last positive rows are written.
* If the number of positive and negative rows is the same, and the last line is negative, it writes nothing.
* If the positive by 1 is greater than the negative rows, then only the last positive row is written.
* If negative by 1 is greater than positive rows, then only the first negative row is written.
* Otherwise, a logical error.
2012-08-14 20:35:44 +00:00
*/
class CollapsingSortedBlockInputStream : public MergingSortedBlockInputStream
{
public:
CollapsingSortedBlockInputStream(
BlockInputStreams inputs_, const SortDescription & description_,
2019-03-28 09:31:57 +00:00
const String & sign_column, size_t max_block_size_,
WriteBuffer * out_row_sources_buf_ = nullptr, bool average_block_sizes_ = false)
: MergingSortedBlockInputStream(inputs_, description_, max_block_size_, 0, out_row_sources_buf_, false, average_block_sizes_)
{
sign_column_number = header.getPositionByName(sign_column);
}
2012-08-14 20:35:44 +00:00
String getName() const override { return "CollapsingSorted"; }
2012-08-14 20:35:44 +00:00
2012-10-20 02:10:47 +00:00
protected:
2017-05-13 22:19:04 +00:00
/// Can return 1 more records than max_block_size.
Block readImpl() override;
2012-10-20 02:10:47 +00:00
2012-08-14 20:35:44 +00:00
private:
size_t sign_column_number;
2012-08-14 20:35:44 +00:00
Logger * log = &Logger::get("CollapsingSortedBlockInputStream");
2015-01-18 08:25:56 +00:00
2017-05-13 22:19:04 +00:00
/// Read is finished.
bool finished = false;
2012-08-16 17:02:31 +00:00
2019-04-19 13:38:25 +00:00
SharedBlockRowRef current_key; /// The current primary key.
SharedBlockRowRef next_key; /// The primary key of the next row.
2019-04-19 13:38:25 +00:00
SharedBlockRowRef first_negative; /// The first negative row for the current primary key.
SharedBlockRowRef last_positive; /// The last positive row for the current primary key.
SharedBlockRowRef last_negative; /// Last negative row. It is only stored if there is not one row is written to output.
2012-08-16 17:02:31 +00:00
2017-05-13 22:19:04 +00:00
size_t count_positive = 0; /// The number of positive rows for the current primary key.
size_t count_negative = 0; /// The number of negative rows for the current primary key.
bool last_is_positive = false; /// true if the last row for the current primary key is positive.
2012-08-16 17:02:31 +00:00
2017-05-13 22:19:04 +00:00
size_t count_incorrect_data = 0; /// To prevent too many error messages from writing to the log.
size_t blocks_written = 0;
2013-10-30 08:50:58 +00:00
/// Fields specific for VERTICAL merge algorithm.
/// Row numbers are relative to the start of current primary key.
size_t current_pos = 0; /// Current row number
size_t first_negative_pos = 0; /// Row number of first_negative
size_t last_positive_pos = 0; /// Row number of last_positive
size_t last_negative_pos = 0; /// Row number of last_negative
PODArray<RowSourcePart> current_row_sources; /// Sources of rows with the current primary key
2017-05-13 22:19:04 +00:00
/** We support two different cursors - with Collation and without.
* Templates are used instead of polymorphic SortCursors and calls to virtual functions.
*/
2019-12-22 00:19:07 +00:00
void merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue);
2012-08-16 17:02:31 +00:00
2017-05-13 22:19:04 +00:00
/// Output to result rows for the current primary key.
void insertRows(MutableColumns & merged_columns, size_t block_size, MergeStopCondition & condition);
void reportIncorrectData();
2012-08-14 20:35:44 +00:00
};
}