2017-04-01 09:19:00 +00:00
|
|
|
#include <DataStreams/ReplacingSortedBlockInputStream.h>
|
|
|
|
#include <Columns/ColumnsNumber.h>
|
2017-07-17 19:40:30 +00:00
|
|
|
#include <common/logger_useful.h>
|
2016-04-15 19:09:42 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2017-12-15 00:01:59 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
}
|
2016-04-15 19:09:42 +00:00
|
|
|
|
2017-12-15 00:01:59 +00:00
|
|
|
|
2019-03-26 10:23:14 +00:00
|
|
|
void ReplacingSortedBlockInputStream::insertRow(MutableColumns & merged_columns)
|
2016-04-15 19:09:42 +00:00
|
|
|
{
|
2017-07-14 18:17:23 +00:00
|
|
|
if (out_row_sources_buf)
|
|
|
|
{
|
|
|
|
/// true flag value means "skip row"
|
2018-02-21 13:57:58 +00:00
|
|
|
current_row_sources[max_pos].setSkipFlag(false);
|
2017-07-14 18:17:23 +00:00
|
|
|
|
|
|
|
out_row_sources_buf->write(reinterpret_cast<const char *>(current_row_sources.data()),
|
|
|
|
current_row_sources.size() * sizeof(RowSourcePart));
|
|
|
|
current_row_sources.resize(0);
|
|
|
|
}
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
for (size_t i = 0; i < num_columns; ++i)
|
2018-01-29 17:42:19 +00:00
|
|
|
merged_columns[i]->insertFrom(*(*selected_row.columns)[i], selected_row.row_num);
|
2016-04-15 19:09:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Block ReplacingSortedBlockInputStream::readImpl()
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (finished)
|
|
|
|
return Block();
|
2016-04-15 19:09:42 +00:00
|
|
|
|
2017-12-15 00:01:59 +00:00
|
|
|
MutableColumns merged_columns;
|
2018-04-07 01:46:50 +00:00
|
|
|
init(merged_columns);
|
2017-12-15 00:01:59 +00:00
|
|
|
|
|
|
|
if (has_collation)
|
|
|
|
throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR);
|
2016-04-15 19:09:42 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
if (merged_columns.empty())
|
|
|
|
return Block();
|
2016-04-15 19:09:42 +00:00
|
|
|
|
2018-08-10 04:02:56 +00:00
|
|
|
merge(merged_columns, queue_without_collation);
|
2017-12-15 00:06:56 +00:00
|
|
|
return header.cloneWithColumns(std::move(merged_columns));
|
2016-04-15 19:09:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-12-22 00:19:07 +00:00
|
|
|
void ReplacingSortedBlockInputStream::merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue)
|
2016-04-15 19:09:42 +00:00
|
|
|
{
|
2019-03-26 10:23:14 +00:00
|
|
|
MergeStopCondition stop_condition(average_block_sizes, max_block_size);
|
2019-12-22 00:19:07 +00:00
|
|
|
|
2017-12-15 00:01:59 +00:00
|
|
|
/// Take the rows in needed order and put them into `merged_columns` until rows no more than `max_block_size`
|
2019-12-22 00:19:07 +00:00
|
|
|
while (queue.isValid())
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2019-12-22 00:19:07 +00:00
|
|
|
SortCursor current = queue.current();
|
2019-03-26 10:23:14 +00:00
|
|
|
size_t current_block_granularity = current->rows;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
if (current_key.empty())
|
|
|
|
setPrimaryKeyRef(current_key, current);
|
|
|
|
|
|
|
|
setPrimaryKeyRef(next_key, current);
|
|
|
|
|
|
|
|
bool key_differs = next_key != current_key;
|
|
|
|
|
|
|
|
/// if there are enough rows and the last one is calculated completely
|
2019-03-26 10:23:14 +00:00
|
|
|
if (key_differs && stop_condition.checkStop())
|
2017-04-01 07:20:54 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
if (key_differs)
|
|
|
|
{
|
|
|
|
/// Write the data for the previous primary key.
|
2019-03-26 10:23:14 +00:00
|
|
|
insertRow(merged_columns);
|
2019-03-28 11:12:59 +00:00
|
|
|
stop_condition.addRowWithGranularity(current_block_granularity);
|
2018-03-06 14:53:53 +00:00
|
|
|
selected_row.reset();
|
2017-04-01 07:20:54 +00:00
|
|
|
current_key.swap(next_key);
|
|
|
|
}
|
|
|
|
|
2017-07-17 19:40:30 +00:00
|
|
|
/// Initially, skip all rows. Unskip last on insert.
|
2018-02-21 13:57:58 +00:00
|
|
|
size_t current_pos = current_row_sources.size();
|
2017-07-17 19:40:30 +00:00
|
|
|
if (out_row_sources_buf)
|
|
|
|
current_row_sources.emplace_back(current.impl->order, true);
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
/// A non-strict comparison, since we select the last row for the same version values.
|
2018-03-06 14:53:53 +00:00
|
|
|
if (version_column_number == -1
|
|
|
|
|| selected_row.empty()
|
|
|
|
|| current->all_columns[version_column_number]->compareAt(
|
|
|
|
current->pos, selected_row.row_num,
|
|
|
|
*(*selected_row.columns)[version_column_number],
|
|
|
|
/* nan_direction_hint = */ 1) >= 0)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2018-02-21 13:57:58 +00:00
|
|
|
max_pos = current_pos;
|
2017-04-01 07:20:54 +00:00
|
|
|
setRowRef(selected_row, current);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!current->isLast())
|
|
|
|
{
|
2019-12-22 00:19:07 +00:00
|
|
|
queue.next();
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/// We get the next block from the corresponding source, if there is one.
|
|
|
|
fetchNextBlock(current, queue);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// We will write the data for the last primary key.
|
2018-06-25 10:48:11 +00:00
|
|
|
if (!selected_row.empty())
|
2019-03-26 10:23:14 +00:00
|
|
|
insertRow(merged_columns);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
finished = true;
|
2016-04-15 19:09:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|