ClickHouse/dbms/src/DataStreams/CollapsingSortedBlockInputStream.cpp

141 lines
3.4 KiB
C++
Raw Normal View History

2012-08-16 17:02:31 +00:00
#include <DB/DataStreams/CollapsingSortedBlockInputStream.h>
namespace DB
{
2012-08-22 16:30:41 +00:00
void CollapsingSortedBlockInputStream::reportIncorrectData()
{
std::stringstream s;
s << "Incorrect data: number of rows with sign = 1 (" << count_positive
<< ") differs with number of rows with sign = -1 (" << count_negative
<< ") by more than one (for key: ";
for (size_t i = 0, size = current_key.size(); i < size; ++i)
{
if (i != 0)
s << ", ";
s << apply_visitor(FieldVisitorToString(), current_key[i]);
}
s << ").";
2012-08-22 16:30:41 +00:00
/** Пока ограничимся всего лишь логгированием таких ситуаций,
* так как данные генерируются внешними программами.
*/
LOG_ERROR(log, s.rdbuf());
}
2012-08-16 20:07:13 +00:00
void CollapsingSortedBlockInputStream::insertRows(ColumnPlainPtrs & merged_columns, size_t & merged_rows)
2012-08-16 17:02:31 +00:00
{
2012-08-16 19:43:03 +00:00
if (count_positive != 0 || count_negative != 0)
2012-08-16 17:02:31 +00:00
{
2012-08-22 16:30:41 +00:00
if (count_positive <= count_negative)
2012-08-16 20:07:13 +00:00
{
++merged_rows;
2012-08-16 17:02:31 +00:00
for (size_t i = 0; i < num_columns; ++i)
merged_columns[i]->insert(first_negative[i]);
2012-08-16 20:07:13 +00:00
}
2012-08-16 19:47:18 +00:00
2012-08-22 16:30:41 +00:00
if (count_positive >= count_negative)
2012-08-16 20:07:13 +00:00
{
++merged_rows;
2012-08-16 17:02:31 +00:00
for (size_t i = 0; i < num_columns; ++i)
merged_columns[i]->insert(last_positive[i]);
2012-08-16 20:07:13 +00:00
}
2012-08-16 19:47:18 +00:00
if (!(count_positive == count_negative || count_positive + 1 == count_negative || count_positive == count_negative + 1))
2012-08-22 16:30:41 +00:00
reportIncorrectData();
2012-08-16 17:02:31 +00:00
}
}
Block CollapsingSortedBlockInputStream::readImpl()
{
if (!inputs.size())
return Block();
if (inputs.size() == 1)
return inputs[0]->read();
size_t merged_rows = 0;
Block merged_block;
ColumnPlainPtrs merged_columns;
init(merged_block, merged_columns);
if (merged_columns.empty())
return Block();
/// Дополнительная инициализация.
if (first_negative.empty())
{
first_negative.resize(num_columns);
last_positive.resize(num_columns);
current_key.resize(description.size());
next_key.resize(description.size());
2012-08-16 17:02:31 +00:00
sign_column_number = merged_block.getPositionByName(sign_column);
}
/// Вынимаем строки в нужном порядке и кладём в merged_block, пока строк не больше max_block_size
while (!queue.empty())
{
SortCursor current = queue.top();
queue.pop();
Int8 sign = get<Int64>((*current->all_columns[sign_column_number])[current->pos]);
setPrimaryKey(next_key, current);
2012-08-16 17:02:31 +00:00
if (next_key != current_key)
2012-08-16 17:02:31 +00:00
{
/// Запишем данные для предыдущего визита.
2012-08-16 20:07:13 +00:00
insertRows(merged_columns, merged_rows);
2012-08-16 17:02:31 +00:00
current_key = next_key;
2012-08-16 17:02:31 +00:00
count_negative = 0;
count_positive = 0;
}
if (sign == 1)
{
++count_positive;
setRow(last_positive, current);
}
else if (sign == -1)
{
if (!count_negative)
setRow(first_negative, current);
++count_negative;
}
else
throw Exception("Incorrect data: Sign = " + Poco::NumberFormatter::format(sign) + " (must be 1 or -1).",
ErrorCodes::INCORRECT_DATA);
if (!current->isLast())
{
current->next();
queue.push(current);
}
else
{
/// Достаём из соответствующего источника следующий блок, если есть.
fetchNextBlock(current);
}
2012-08-16 20:07:13 +00:00
if (merged_rows >= max_block_size)
2012-08-16 17:02:31 +00:00
return merged_block;
}
/// Запишем данные для последнего визита.
2012-08-16 20:07:13 +00:00
insertRows(merged_columns, merged_rows);
2012-08-16 17:02:31 +00:00
inputs.clear();
return merged_block;
}
}