ClickHouse/dbms/DataStreams/ReplacingSortedBlockInputStream.cpp

117 lines
3.4 KiB
C++
Raw Normal View History

#include <DataStreams/ReplacingSortedBlockInputStream.h>
#include <Columns/ColumnsNumber.h>
#include <common/logger_useful.h>
2016-04-15 19:09:42 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
2016-04-15 19:09:42 +00:00
void ReplacingSortedBlockInputStream::insertRow(MutableColumns & merged_columns)
2016-04-15 19:09:42 +00:00
{
if (out_row_sources_buf)
{
/// true flag value means "skip row"
current_row_sources[max_pos].setSkipFlag(false);
out_row_sources_buf->write(reinterpret_cast<const char *>(current_row_sources.data()),
current_row_sources.size() * sizeof(RowSourcePart));
current_row_sources.resize(0);
}
for (size_t i = 0; i < num_columns; ++i)
merged_columns[i]->insertFrom(*(*selected_row.columns)[i], selected_row.row_num);
2016-04-15 19:09:42 +00:00
}
Block ReplacingSortedBlockInputStream::readImpl()
{
if (finished)
return Block();
2016-04-15 19:09:42 +00:00
MutableColumns merged_columns;
init(merged_columns);
if (has_collation)
throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR);
2016-04-15 19:09:42 +00:00
if (merged_columns.empty())
return Block();
2016-04-15 19:09:42 +00:00
merge(merged_columns, queue_without_collation);
return header.cloneWithColumns(std::move(merged_columns));
2016-04-15 19:09:42 +00:00
}
2019-12-22 00:19:07 +00:00
void ReplacingSortedBlockInputStream::merge(MutableColumns & merged_columns, SortingHeap<SortCursor> & queue)
2016-04-15 19:09:42 +00:00
{
MergeStopCondition stop_condition(average_block_sizes, max_block_size);
2019-12-22 00:19:07 +00:00
/// Take the rows in needed order and put them into `merged_columns` until rows no more than `max_block_size`
2019-12-22 00:19:07 +00:00
while (queue.isValid())
{
2019-12-22 00:19:07 +00:00
SortCursor current = queue.current();
size_t current_block_granularity = current->rows;
if (current_key.empty())
setPrimaryKeyRef(current_key, current);
setPrimaryKeyRef(next_key, current);
bool key_differs = next_key != current_key;
/// if there are enough rows and the last one is calculated completely
if (key_differs && stop_condition.checkStop())
return;
if (key_differs)
{
/// Write the data for the previous primary key.
insertRow(merged_columns);
2019-03-28 11:12:59 +00:00
stop_condition.addRowWithGranularity(current_block_granularity);
selected_row.reset();
current_key.swap(next_key);
}
/// Initially, skip all rows. Unskip last on insert.
size_t current_pos = current_row_sources.size();
if (out_row_sources_buf)
current_row_sources.emplace_back(current.impl->order, true);
/// A non-strict comparison, since we select the last row for the same version values.
if (version_column_number == -1
|| selected_row.empty()
|| current->all_columns[version_column_number]->compareAt(
current->pos, selected_row.row_num,
*(*selected_row.columns)[version_column_number],
/* nan_direction_hint = */ 1) >= 0)
{
max_pos = current_pos;
setRowRef(selected_row, current);
}
if (!current->isLast())
{
2019-12-22 00:19:07 +00:00
queue.next();
}
else
{
/// We get the next block from the corresponding source, if there is one.
fetchNextBlock(current, queue);
}
}
/// We will write the data for the last primary key.
if (!selected_row.empty())
insertRow(merged_columns);
finished = true;
2016-04-15 19:09:42 +00:00
}
}