2013-04-25 15:48:09 +00:00
|
|
|
|
#pragma once
|
|
|
|
|
#include <Yandex/logger_useful.h>
|
|
|
|
|
#include <DB/DataStreams/IProfilingBlockInputStream.h>
|
|
|
|
|
#include <DB/Core/SortDescription.h>
|
|
|
|
|
#include <DB/Columns/ColumnsNumber.h>
|
|
|
|
|
#include <queue>
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
2013-04-26 13:20:42 +00:00
|
|
|
|
/// Схлопывает одинаковые строки с противоположным знаком примерно как CollapsingSortedBlockInputStream.
|
|
|
|
|
/// Выдает строки в произвольном порядке (входные потоки по-прежнему должны быть упорядочены).
|
|
|
|
|
/// Выдает только строки с положительным знаком.
|
2013-04-25 15:48:09 +00:00
|
|
|
|
class CollapsingFinalBlockInputStream : public IProfilingBlockInputStream
|
|
|
|
|
{
|
|
|
|
|
public:
|
2014-03-14 17:03:52 +00:00
|
|
|
|
CollapsingFinalBlockInputStream(BlockInputStreams inputs_, const SortDescription & description_,
|
2015-03-12 00:01:14 +00:00
|
|
|
|
const String & sign_column_name_)
|
|
|
|
|
: description(description_), sign_column_name(sign_column_name_)
|
2013-05-04 04:05:15 +00:00
|
|
|
|
{
|
|
|
|
|
children.insert(children.end(), inputs_.begin(), inputs_.end());
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
~CollapsingFinalBlockInputStream();
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2014-11-08 23:52:18 +00:00
|
|
|
|
String getName() const override { return "CollapsingFinalBlockInputStream"; }
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2014-11-08 23:52:18 +00:00
|
|
|
|
String getID() const override
|
2013-05-03 10:20:53 +00:00
|
|
|
|
{
|
|
|
|
|
std::stringstream res;
|
|
|
|
|
res << "CollapsingFinal(inputs";
|
|
|
|
|
|
2013-05-04 04:05:15 +00:00
|
|
|
|
for (size_t i = 0; i < children.size(); ++i)
|
|
|
|
|
res << ", " << children[i]->getID();
|
2013-05-03 10:20:53 +00:00
|
|
|
|
|
|
|
|
|
res << ", description";
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < description.size(); ++i)
|
|
|
|
|
res << ", " << description[i].getID();
|
|
|
|
|
|
2015-03-12 00:01:14 +00:00
|
|
|
|
res << ", sign_column, " << sign_column_name << ")";
|
2013-05-03 10:20:53 +00:00
|
|
|
|
return res.str();
|
|
|
|
|
}
|
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
protected:
|
2014-11-08 23:52:18 +00:00
|
|
|
|
Block readImpl() override;
|
2013-05-03 10:20:53 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
private:
|
|
|
|
|
struct MergingBlock;
|
|
|
|
|
typedef std::vector<MergingBlock*> BlockPlainPtrs;
|
2013-05-03 10:20:53 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
struct MergingBlock : boost::noncopyable
|
|
|
|
|
{
|
|
|
|
|
MergingBlock(Block block_,
|
|
|
|
|
size_t stream_index_,
|
2015-03-12 00:01:14 +00:00
|
|
|
|
const SortDescription & desc,
|
2013-04-25 15:48:09 +00:00
|
|
|
|
String sign_column_name,
|
|
|
|
|
BlockPlainPtrs * output_blocks)
|
2015-03-12 00:01:14 +00:00
|
|
|
|
: block(block_), stream_index(stream_index_), output_blocks(output_blocks)
|
2013-04-25 15:48:09 +00:00
|
|
|
|
{
|
|
|
|
|
sort_columns.resize(desc.size());
|
|
|
|
|
for (size_t i = 0; i < desc.size(); ++i)
|
|
|
|
|
{
|
|
|
|
|
size_t column_number = !desc[i].column_name.empty()
|
|
|
|
|
? block.getPositionByName(desc[i].column_name)
|
|
|
|
|
: desc[i].column_number;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
sort_columns[i] = &*block.getByPosition(column_number).column;
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
const IColumn * sign_icolumn = &*block.getByName(sign_column_name).column;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
|
|
|
|
sign_column = typeid_cast<const ColumnInt8 *>(sign_icolumn);
|
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
if (!sign_column)
|
|
|
|
|
throw Exception("Sign column must have type Int8", ErrorCodes::BAD_TYPE_OF_FIELD);
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
rows = sign_column->size();
|
2013-12-16 07:12:30 +00:00
|
|
|
|
/// Заполняется целиком нулями. Потом выставляются единички в позициях строчек, которых нужно оставить.
|
|
|
|
|
filter.resize_fill(rows);
|
2013-04-25 15:48:09 +00:00
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
Block block;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
/// Строки с одинаковым ключом будут упорядочены по возрастанию stream_index.
|
|
|
|
|
size_t stream_index;
|
|
|
|
|
size_t rows;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
/// Какие строки нужно оставить. Заполняется при слиянии потоков.
|
|
|
|
|
IColumn::Filter filter;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
/// Указывают в block.
|
|
|
|
|
ConstColumnPlainPtrs sort_columns;
|
|
|
|
|
const ColumnInt8 * sign_column;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
/// Когда достигает нуля, блок можно выдавать в ответ.
|
2015-03-12 00:01:14 +00:00
|
|
|
|
int refcount = 0;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
/// Куда положить блок, когда он готов попасть в ответ.
|
|
|
|
|
BlockPlainPtrs * output_blocks;
|
|
|
|
|
};
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
/// При удалении последней ссылки на блок, добавляет блок в output_blocks.
|
|
|
|
|
class MergingBlockPtr
|
|
|
|
|
{
|
|
|
|
|
public:
|
|
|
|
|
MergingBlockPtr() : ptr() {}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
explicit MergingBlockPtr(MergingBlock * ptr_) : ptr(ptr_)
|
|
|
|
|
{
|
|
|
|
|
if (ptr)
|
|
|
|
|
++ptr->refcount;
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
MergingBlockPtr(const MergingBlockPtr & rhs) : ptr(rhs.ptr)
|
|
|
|
|
{
|
|
|
|
|
if (ptr)
|
|
|
|
|
++ptr->refcount;
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
MergingBlockPtr & operator=(const MergingBlockPtr & rhs)
|
|
|
|
|
{
|
|
|
|
|
destroy();
|
|
|
|
|
ptr = rhs.ptr;
|
|
|
|
|
if (ptr)
|
|
|
|
|
++ptr->refcount;
|
|
|
|
|
return *this;
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
~MergingBlockPtr()
|
|
|
|
|
{
|
|
|
|
|
destroy();
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
/// Обнулить указатель и не добавлять блок в output_blocks.
|
|
|
|
|
void cancel()
|
|
|
|
|
{
|
|
|
|
|
if (ptr)
|
|
|
|
|
{
|
|
|
|
|
--ptr->refcount;
|
|
|
|
|
if (!ptr->refcount)
|
|
|
|
|
delete ptr;
|
2014-04-08 07:31:51 +00:00
|
|
|
|
ptr = nullptr;
|
2013-04-25 15:48:09 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
MergingBlock & operator*() const { return *ptr; }
|
|
|
|
|
MergingBlock * operator->() const { return ptr; }
|
|
|
|
|
operator bool() const { return !!ptr; }
|
|
|
|
|
bool operator!() const { return !ptr; }
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
private:
|
|
|
|
|
MergingBlock * ptr;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
void destroy()
|
|
|
|
|
{
|
|
|
|
|
if (ptr)
|
|
|
|
|
{
|
|
|
|
|
--ptr->refcount;
|
|
|
|
|
if (!ptr->refcount)
|
|
|
|
|
{
|
|
|
|
|
if (std::uncaught_exception())
|
|
|
|
|
delete ptr;
|
|
|
|
|
else
|
|
|
|
|
ptr->output_blocks->push_back(ptr);
|
|
|
|
|
}
|
2014-04-08 07:31:51 +00:00
|
|
|
|
ptr = nullptr;
|
2013-04-25 15:48:09 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
struct Cursor
|
|
|
|
|
{
|
|
|
|
|
MergingBlockPtr block;
|
|
|
|
|
size_t pos;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
Cursor() {}
|
|
|
|
|
explicit Cursor(MergingBlockPtr block_, size_t pos_ = 0) : block(block_), pos(pos_) {}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2015-03-12 00:01:14 +00:00
|
|
|
|
bool operator< (const Cursor & rhs) const
|
2013-04-25 15:48:09 +00:00
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < block->sort_columns.size(); ++i)
|
|
|
|
|
{
|
2015-03-12 00:01:14 +00:00
|
|
|
|
int res = block->sort_columns[i]->compareAt(pos, rhs.pos, *(rhs.block->sort_columns[i]), 1);
|
2013-04-25 15:48:09 +00:00
|
|
|
|
if (res > 0)
|
|
|
|
|
return true;
|
|
|
|
|
if (res < 0)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2015-03-12 00:01:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
return block->stream_index > rhs.block->stream_index;
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
/// Не согласован с operator< : не учитывает order.
|
|
|
|
|
bool equal(const Cursor & rhs) const
|
|
|
|
|
{
|
|
|
|
|
if (!block || !rhs.block)
|
|
|
|
|
return false;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
for (size_t i = 0; i < block->sort_columns.size(); ++i)
|
|
|
|
|
{
|
2015-03-12 00:01:14 +00:00
|
|
|
|
int res = block->sort_columns[i]->compareAt(pos, rhs.pos, *(rhs.block->sort_columns[i]), 1);
|
2013-04-25 15:48:09 +00:00
|
|
|
|
if (res != 0)
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
return true;
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
Int8 getSign()
|
|
|
|
|
{
|
|
|
|
|
return block->sign_column->getData()[pos];
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
/// Помечает, что эту строку нужно взять в ответ.
|
|
|
|
|
void addToFilter()
|
|
|
|
|
{
|
|
|
|
|
block->filter[pos] = 1;
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
bool isLast()
|
|
|
|
|
{
|
|
|
|
|
return pos + 1 == block->rows;
|
|
|
|
|
}
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
void next()
|
|
|
|
|
{
|
|
|
|
|
++pos;
|
|
|
|
|
}
|
|
|
|
|
};
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
typedef std::priority_queue<Cursor> Queue;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2015-03-12 00:01:14 +00:00
|
|
|
|
const SortDescription description;
|
|
|
|
|
String sign_column_name;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2015-03-12 00:01:14 +00:00
|
|
|
|
Logger * log = &Logger::get("CollapsingFinalBlockInputStream");
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2015-03-12 00:01:14 +00:00
|
|
|
|
bool first = true;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
BlockPlainPtrs output_blocks;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
Queue queue;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-26 13:20:42 +00:00
|
|
|
|
Cursor previous; /// Текущий первичный ключ.
|
2013-04-25 15:48:09 +00:00
|
|
|
|
Cursor last_positive; /// Последняя положительная строка для текущего первичного ключа.
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2015-03-12 00:01:14 +00:00
|
|
|
|
size_t count_positive = 0; /// Количество положительных строк для текущего первичного ключа.
|
|
|
|
|
size_t count_negative = 0; /// Количество отрицательных строк для текущего первичного ключа.
|
|
|
|
|
bool last_is_positive = false; /// true, если последняя строка для текущего первичного ключа положительная.
|
2013-09-13 22:59:02 +00:00
|
|
|
|
|
2015-03-12 00:01:14 +00:00
|
|
|
|
size_t count_incorrect_data = 0; /// Чтобы не писать в лог слишком много сообщений об ошибке.
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
/// Посчитаем, сколько блоков получили на вход и отдали на выход.
|
2015-03-12 00:01:14 +00:00
|
|
|
|
size_t blocks_fetched = 0;
|
|
|
|
|
size_t blocks_output = 0;
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
void fetchNextBlock(size_t input_index);
|
|
|
|
|
void commitCurrent();
|
2014-06-26 00:58:14 +00:00
|
|
|
|
|
2013-04-25 15:48:09 +00:00
|
|
|
|
void reportBadCounts();
|
|
|
|
|
void reportBadSign(Int8 sign);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
}
|