2012-08-16 17:02:31 +00:00
|
|
|
|
#include <DB/DataStreams/CollapsingSortedBlockInputStream.h>
|
|
|
|
|
|
2013-09-13 22:59:02 +00:00
|
|
|
|
/// Максимальное количество сообщений о некорректных данных в логе.
|
|
|
|
|
#define MAX_ERROR_MESSAGES 10
|
|
|
|
|
|
2012-08-16 17:02:31 +00:00
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
|
2012-08-22 16:30:41 +00:00
|
|
|
|
void CollapsingSortedBlockInputStream::reportIncorrectData()
|
2012-08-20 05:32:50 +00:00
|
|
|
|
{
|
|
|
|
|
std::stringstream s;
|
|
|
|
|
s << "Incorrect data: number of rows with sign = 1 (" << count_positive
|
|
|
|
|
<< ") differs with number of rows with sign = -1 (" << count_negative
|
|
|
|
|
<< ") by more than one (for key: ";
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0, size = current_key.size(); i < size; ++i)
|
|
|
|
|
{
|
|
|
|
|
if (i != 0)
|
|
|
|
|
s << ", ";
|
2013-01-05 20:03:19 +00:00
|
|
|
|
s << apply_visitor(FieldVisitorToString(), current_key[i]);
|
2012-08-20 05:32:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
s << ").";
|
|
|
|
|
|
2012-08-22 16:30:41 +00:00
|
|
|
|
/** Пока ограничимся всего лишь логгированием таких ситуаций,
|
|
|
|
|
* так как данные генерируются внешними программами.
|
2013-09-13 22:59:02 +00:00
|
|
|
|
* При неконсистентных данных, это - неизбежная ошибка, которая не может быть легко исправлена админами. Поэтому Warning.
|
2012-08-22 16:30:41 +00:00
|
|
|
|
*/
|
2013-09-13 22:59:02 +00:00
|
|
|
|
LOG_WARNING(log, s.rdbuf());
|
2012-08-20 05:32:50 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2013-10-30 08:50:58 +00:00
|
|
|
|
void CollapsingSortedBlockInputStream::insertRows(ColumnPlainPtrs & merged_columns, size_t & merged_rows, bool last_in_stream)
|
2012-08-16 17:02:31 +00:00
|
|
|
|
{
|
2012-08-16 19:43:03 +00:00
|
|
|
|
if (count_positive != 0 || count_negative != 0)
|
2012-08-16 17:02:31 +00:00
|
|
|
|
{
|
2013-10-30 08:50:58 +00:00
|
|
|
|
if (count_positive == count_negative && !last_is_positive)
|
|
|
|
|
{
|
|
|
|
|
/// Если все строки во входных потоках схлопнулись, мы все равно хотим выдать хоть один блок в результат.
|
|
|
|
|
if (last_in_stream && merged_rows == 0 && !blocks_written)
|
|
|
|
|
{
|
2014-04-07 19:02:45 +00:00
|
|
|
|
LOG_INFO(log, "All rows collapsed");
|
2013-10-30 08:50:58 +00:00
|
|
|
|
++merged_rows;
|
|
|
|
|
for (size_t i = 0; i < num_columns; ++i)
|
|
|
|
|
merged_columns[i]->insert(last_positive[i]);
|
|
|
|
|
++merged_rows;
|
|
|
|
|
for (size_t i = 0; i < num_columns; ++i)
|
|
|
|
|
merged_columns[i]->insert(last_negative[i]);
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2012-08-22 16:30:41 +00:00
|
|
|
|
if (count_positive <= count_negative)
|
2012-08-16 20:07:13 +00:00
|
|
|
|
{
|
|
|
|
|
++merged_rows;
|
2012-08-16 17:02:31 +00:00
|
|
|
|
for (size_t i = 0; i < num_columns; ++i)
|
|
|
|
|
merged_columns[i]->insert(first_negative[i]);
|
2012-08-16 20:07:13 +00:00
|
|
|
|
}
|
2012-08-16 19:47:18 +00:00
|
|
|
|
|
2012-08-22 16:30:41 +00:00
|
|
|
|
if (count_positive >= count_negative)
|
2012-08-16 20:07:13 +00:00
|
|
|
|
{
|
|
|
|
|
++merged_rows;
|
2012-08-16 17:02:31 +00:00
|
|
|
|
for (size_t i = 0; i < num_columns; ++i)
|
|
|
|
|
merged_columns[i]->insert(last_positive[i]);
|
2012-08-16 20:07:13 +00:00
|
|
|
|
}
|
2012-08-16 19:47:18 +00:00
|
|
|
|
|
|
|
|
|
if (!(count_positive == count_negative || count_positive + 1 == count_negative || count_positive == count_negative + 1))
|
2013-09-13 22:59:02 +00:00
|
|
|
|
{
|
|
|
|
|
if (count_incorrect_data < MAX_ERROR_MESSAGES)
|
|
|
|
|
reportIncorrectData();
|
|
|
|
|
++count_incorrect_data;
|
|
|
|
|
}
|
2012-08-16 17:02:31 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Block CollapsingSortedBlockInputStream::readImpl()
|
|
|
|
|
{
|
2013-05-04 04:05:15 +00:00
|
|
|
|
if (!children.size())
|
2012-08-16 17:02:31 +00:00
|
|
|
|
return Block();
|
|
|
|
|
|
2013-05-04 04:05:15 +00:00
|
|
|
|
if (children.size() == 1)
|
|
|
|
|
return children[0]->read();
|
2012-08-16 17:02:31 +00:00
|
|
|
|
|
|
|
|
|
Block merged_block;
|
|
|
|
|
ColumnPlainPtrs merged_columns;
|
|
|
|
|
|
|
|
|
|
init(merged_block, merged_columns);
|
|
|
|
|
if (merged_columns.empty())
|
|
|
|
|
return Block();
|
|
|
|
|
|
|
|
|
|
/// Дополнительная инициализация.
|
|
|
|
|
if (first_negative.empty())
|
|
|
|
|
{
|
|
|
|
|
first_negative.resize(num_columns);
|
2013-10-30 08:50:58 +00:00
|
|
|
|
last_negative.resize(num_columns);
|
2012-08-16 17:02:31 +00:00
|
|
|
|
last_positive.resize(num_columns);
|
2012-08-20 05:32:50 +00:00
|
|
|
|
current_key.resize(description.size());
|
|
|
|
|
next_key.resize(description.size());
|
2012-08-16 17:02:31 +00:00
|
|
|
|
|
|
|
|
|
sign_column_number = merged_block.getPositionByName(sign_column);
|
|
|
|
|
}
|
2013-05-28 16:56:05 +00:00
|
|
|
|
|
|
|
|
|
if (has_collation)
|
|
|
|
|
merge(merged_block, merged_columns, queue_with_collation);
|
|
|
|
|
else
|
|
|
|
|
merge(merged_block, merged_columns, queue);
|
|
|
|
|
|
|
|
|
|
return merged_block;
|
|
|
|
|
}
|
2012-08-16 17:02:31 +00:00
|
|
|
|
|
2013-05-28 16:56:05 +00:00
|
|
|
|
template<class TSortCursor>
|
|
|
|
|
void CollapsingSortedBlockInputStream::merge(Block & merged_block, ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue)
|
|
|
|
|
{
|
|
|
|
|
size_t merged_rows = 0;
|
|
|
|
|
|
2012-08-16 17:02:31 +00:00
|
|
|
|
/// Вынимаем строки в нужном порядке и кладём в merged_block, пока строк не больше max_block_size
|
|
|
|
|
while (!queue.empty())
|
|
|
|
|
{
|
2013-05-28 16:56:05 +00:00
|
|
|
|
TSortCursor current = queue.top();
|
2012-08-16 17:02:31 +00:00
|
|
|
|
queue.pop();
|
|
|
|
|
|
2013-01-05 20:03:19 +00:00
|
|
|
|
Int8 sign = get<Int64>((*current->all_columns[sign_column_number])[current->pos]);
|
2012-08-20 05:32:50 +00:00
|
|
|
|
setPrimaryKey(next_key, current);
|
2012-08-16 17:02:31 +00:00
|
|
|
|
|
2012-08-20 05:32:50 +00:00
|
|
|
|
if (next_key != current_key)
|
2012-08-16 17:02:31 +00:00
|
|
|
|
{
|
|
|
|
|
/// Запишем данные для предыдущего визита.
|
2012-08-16 20:07:13 +00:00
|
|
|
|
insertRows(merged_columns, merged_rows);
|
2012-08-16 17:02:31 +00:00
|
|
|
|
|
2014-03-25 18:16:26 +00:00
|
|
|
|
current_key = std::move(next_key);
|
2013-01-10 17:12:14 +00:00
|
|
|
|
next_key.resize(description.size());
|
|
|
|
|
|
2012-08-16 17:02:31 +00:00
|
|
|
|
count_negative = 0;
|
|
|
|
|
count_positive = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (sign == 1)
|
|
|
|
|
{
|
|
|
|
|
++count_positive;
|
2013-10-30 08:50:58 +00:00
|
|
|
|
last_is_positive = true;
|
2012-08-16 17:02:31 +00:00
|
|
|
|
|
|
|
|
|
setRow(last_positive, current);
|
|
|
|
|
}
|
|
|
|
|
else if (sign == -1)
|
|
|
|
|
{
|
|
|
|
|
if (!count_negative)
|
|
|
|
|
setRow(first_negative, current);
|
2013-10-30 08:50:58 +00:00
|
|
|
|
if (!blocks_written && !merged_rows)
|
|
|
|
|
setRow(last_negative, current);
|
2012-08-16 17:02:31 +00:00
|
|
|
|
|
|
|
|
|
++count_negative;
|
2013-10-30 08:50:58 +00:00
|
|
|
|
last_is_positive = false;
|
2012-08-16 17:02:31 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
2013-06-21 20:34:19 +00:00
|
|
|
|
throw Exception("Incorrect data: Sign = " + toString(sign) + " (must be 1 or -1).",
|
2012-08-16 17:02:31 +00:00
|
|
|
|
ErrorCodes::INCORRECT_DATA);
|
|
|
|
|
|
|
|
|
|
if (!current->isLast())
|
|
|
|
|
{
|
|
|
|
|
current->next();
|
|
|
|
|
queue.push(current);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/// Достаём из соответствующего источника следующий блок, если есть.
|
2013-05-28 16:56:05 +00:00
|
|
|
|
fetchNextBlock(current, queue);
|
2012-08-16 17:02:31 +00:00
|
|
|
|
}
|
|
|
|
|
|
2012-08-16 20:07:13 +00:00
|
|
|
|
if (merged_rows >= max_block_size)
|
2013-10-30 08:50:58 +00:00
|
|
|
|
{
|
|
|
|
|
++blocks_written;
|
|
|
|
|
return;
|
|
|
|
|
}
|
2012-08-16 17:02:31 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Запишем данные для последнего визита.
|
2013-10-30 08:50:58 +00:00
|
|
|
|
insertRows(merged_columns, merged_rows, true);
|
2012-08-16 17:02:31 +00:00
|
|
|
|
|
2013-05-04 04:05:15 +00:00
|
|
|
|
children.clear();
|
2012-08-16 17:02:31 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|