2012-07-25 19:53:43 +00:00
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
#include <queue>
|
2016-04-13 04:15:30 +00:00
|
|
|
|
#include <boost/intrusive_ptr.hpp>
|
2012-07-25 19:53:43 +00:00
|
|
|
|
|
2015-09-29 19:19:54 +00:00
|
|
|
|
#include <common/logger_useful.h>
|
2012-09-05 19:51:09 +00:00
|
|
|
|
|
2013-10-01 20:38:01 +00:00
|
|
|
|
#include <DB/Core/Row.h>
|
2012-07-25 19:53:43 +00:00
|
|
|
|
#include <DB/Core/SortDescription.h>
|
|
|
|
|
|
|
|
|
|
#include <DB/DataStreams/IProfilingBlockInputStream.h>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
|
namespace ErrorCodes
|
|
|
|
|
{
|
|
|
|
|
extern const int CORRUPTED_DATA;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2016-04-13 04:15:30 +00:00
|
|
|
|
/// Позволяет ссылаться на строку в блоке и удерживать владение блоком,
|
|
|
|
|
/// и таким образом избежать создания временного объекта-строки.
|
|
|
|
|
/// Не используется std::shared_ptr, так как не нужно место для weak_count и deleter;
|
|
|
|
|
/// не используется Poco::SharedPtr, так как нужно выделять блок и refcount одним куском;
|
|
|
|
|
/// не используется Poco::AutoPtr, так как у него нет move конструктора и есть лишние проверки на nullptr;
|
|
|
|
|
/// Счётчик ссылок неатомарный, так как используется из одного потока.
|
|
|
|
|
namespace detail
|
|
|
|
|
{
|
|
|
|
|
struct SharedBlock : Block
|
|
|
|
|
{
|
|
|
|
|
int refcount = 0;
|
|
|
|
|
|
|
|
|
|
SharedBlock(Block && value_)
|
|
|
|
|
: Block(std::move(value_)) {};
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
using SharedBlockPtr = boost::intrusive_ptr<detail::SharedBlock>;
|
|
|
|
|
|
|
|
|
|
inline void intrusive_ptr_add_ref(detail::SharedBlock * ptr)
|
|
|
|
|
{
|
|
|
|
|
++ptr->refcount;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
inline void intrusive_ptr_release(detail::SharedBlock * ptr)
|
|
|
|
|
{
|
|
|
|
|
if (0 == --ptr->refcount)
|
|
|
|
|
delete ptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2012-07-25 19:53:43 +00:00
|
|
|
|
/** Соединяет несколько сортированных потоков в один.
|
|
|
|
|
*/
|
|
|
|
|
class MergingSortedBlockInputStream : public IProfilingBlockInputStream
|
|
|
|
|
{
|
|
|
|
|
public:
|
2013-09-16 05:44:47 +00:00
|
|
|
|
/// limit - если не 0, то можно выдать только первые limit строк в сортированном порядке.
|
2014-03-14 17:03:52 +00:00
|
|
|
|
MergingSortedBlockInputStream(BlockInputStreams inputs_, const SortDescription & description_, size_t max_block_size_, size_t limit_ = 0)
|
2015-01-07 15:30:11 +00:00
|
|
|
|
: description(description_), max_block_size(max_block_size_), limit(limit_),
|
|
|
|
|
source_blocks(inputs_.size()), cursors(inputs_.size())
|
2012-07-25 19:53:43 +00:00
|
|
|
|
{
|
2013-05-04 04:05:15 +00:00
|
|
|
|
children.insert(children.end(), inputs_.begin(), inputs_.end());
|
2012-07-25 19:53:43 +00:00
|
|
|
|
}
|
|
|
|
|
|
2015-06-08 20:22:02 +00:00
|
|
|
|
String getName() const override { return "MergingSorted"; }
|
2012-07-25 19:53:43 +00:00
|
|
|
|
|
2014-11-08 23:52:18 +00:00
|
|
|
|
String getID() const override
|
2013-05-03 10:20:53 +00:00
|
|
|
|
{
|
|
|
|
|
std::stringstream res;
|
|
|
|
|
res << "MergingSorted(";
|
|
|
|
|
|
|
|
|
|
Strings children_ids(children.size());
|
|
|
|
|
for (size_t i = 0; i < children.size(); ++i)
|
|
|
|
|
children_ids[i] = children[i]->getID();
|
|
|
|
|
|
|
|
|
|
/// Порядок не имеет значения.
|
|
|
|
|
std::sort(children_ids.begin(), children_ids.end());
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < children_ids.size(); ++i)
|
|
|
|
|
res << (i == 0 ? "" : ", ") << children_ids[i];
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < description.size(); ++i)
|
|
|
|
|
res << ", " << description[i].getID();
|
|
|
|
|
|
|
|
|
|
res << ")";
|
|
|
|
|
return res.str();
|
|
|
|
|
}
|
|
|
|
|
|
2012-08-14 20:33:37 +00:00
|
|
|
|
protected:
|
2016-04-13 03:56:22 +00:00
|
|
|
|
struct RowRef
|
|
|
|
|
{
|
|
|
|
|
ConstColumnPlainPtrs columns;
|
|
|
|
|
size_t row_num;
|
|
|
|
|
SharedBlockPtr shared_block;
|
|
|
|
|
|
|
|
|
|
void swap(RowRef & other)
|
|
|
|
|
{
|
|
|
|
|
std::swap(columns, other.columns);
|
|
|
|
|
std::swap(row_num, other.row_num);
|
|
|
|
|
std::swap(shared_block, other.shared_block);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Количество и типы столбцов обязаны соответствовать.
|
|
|
|
|
bool operator==(const RowRef & other) const
|
|
|
|
|
{
|
|
|
|
|
size_t size = columns.size();
|
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
|
if (0 != columns[i]->compareAt(row_num, other.row_num, *other.columns[i], 1))
|
|
|
|
|
return false;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool operator!=(const RowRef & other) const
|
|
|
|
|
{
|
|
|
|
|
return !(*this == other);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool empty() const { return columns.empty(); }
|
|
|
|
|
size_t size() const { return columns.size(); }
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
2014-11-08 23:52:18 +00:00
|
|
|
|
Block readImpl() override;
|
2016-05-20 20:01:34 +00:00
|
|
|
|
|
2014-11-08 23:52:18 +00:00
|
|
|
|
void readSuffixImpl() override;
|
|
|
|
|
|
2012-08-14 20:33:37 +00:00
|
|
|
|
/// Инициализирует очередь и следующий блок результата.
|
|
|
|
|
void init(Block & merged_block, ColumnPlainPtrs & merged_columns);
|
2014-11-08 23:52:18 +00:00
|
|
|
|
|
2012-08-14 20:33:37 +00:00
|
|
|
|
/// Достаёт из источника, соответствующего current следующий блок.
|
2013-09-15 03:14:29 +00:00
|
|
|
|
template <typename TSortCursor>
|
2013-05-28 16:56:05 +00:00
|
|
|
|
void fetchNextBlock(const TSortCursor & current, std::priority_queue<TSortCursor> & queue);
|
2014-11-08 23:52:18 +00:00
|
|
|
|
|
|
|
|
|
|
2012-07-25 19:53:43 +00:00
|
|
|
|
SortDescription description;
|
|
|
|
|
size_t max_block_size;
|
2013-09-16 05:44:47 +00:00
|
|
|
|
size_t limit;
|
2015-01-07 15:30:11 +00:00
|
|
|
|
size_t total_merged_rows = 0;
|
2012-07-25 19:53:43 +00:00
|
|
|
|
|
2015-01-07 15:30:11 +00:00
|
|
|
|
bool first = true;
|
|
|
|
|
bool has_collation = false;
|
2012-07-25 19:53:43 +00:00
|
|
|
|
|
|
|
|
|
/// Текущие сливаемые блоки.
|
2015-01-07 15:30:11 +00:00
|
|
|
|
size_t num_columns = 0;
|
2016-04-13 03:56:22 +00:00
|
|
|
|
std::vector<SharedBlockPtr> source_blocks;
|
2014-11-08 23:52:18 +00:00
|
|
|
|
|
2016-05-28 10:35:44 +00:00
|
|
|
|
using CursorImpls = std::vector<SortCursorImpl>;
|
2012-07-27 20:19:15 +00:00
|
|
|
|
CursorImpls cursors;
|
2012-07-25 19:53:43 +00:00
|
|
|
|
|
2016-05-28 10:35:44 +00:00
|
|
|
|
using Queue = std::priority_queue<SortCursor>;
|
2012-07-25 19:53:43 +00:00
|
|
|
|
Queue queue;
|
2014-11-08 23:52:18 +00:00
|
|
|
|
|
2016-05-28 10:35:44 +00:00
|
|
|
|
using QueueWithCollation = std::priority_queue<SortCursorWithCollation>;
|
2013-05-28 16:56:05 +00:00
|
|
|
|
QueueWithCollation queue_with_collation;
|
2012-08-01 19:55:05 +00:00
|
|
|
|
|
2013-10-01 20:38:01 +00:00
|
|
|
|
|
2016-04-15 17:42:51 +00:00
|
|
|
|
/// Эти методы используются в Collapsing/Summing/Aggregating... SortedBlockInputStream-ах.
|
2013-10-01 20:38:01 +00:00
|
|
|
|
|
2014-04-11 16:56:49 +00:00
|
|
|
|
/// Сохранить строчку, на которую указывает cursor, в row.
|
2016-04-13 03:56:22 +00:00
|
|
|
|
template <class TSortCursor>
|
2013-10-01 20:38:01 +00:00
|
|
|
|
void setRow(Row & row, TSortCursor & cursor)
|
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < num_columns; ++i)
|
2014-07-21 11:34:43 +00:00
|
|
|
|
{
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
cursor->all_columns[i]->get(cursor->pos, row[i]);
|
|
|
|
|
}
|
|
|
|
|
catch (...)
|
|
|
|
|
{
|
|
|
|
|
tryLogCurrentException(__PRETTY_FUNCTION__);
|
|
|
|
|
|
|
|
|
|
/// Узнаем имя столбца и бросим исключение поинформативней.
|
2014-11-08 23:52:18 +00:00
|
|
|
|
|
2014-07-21 11:34:43 +00:00
|
|
|
|
String column_name;
|
2016-04-13 03:56:22 +00:00
|
|
|
|
for (const auto & block : source_blocks)
|
2014-07-21 11:34:43 +00:00
|
|
|
|
{
|
2016-04-13 03:56:22 +00:00
|
|
|
|
if (i < block->columns())
|
2014-07-21 11:34:43 +00:00
|
|
|
|
{
|
2016-04-13 03:56:22 +00:00
|
|
|
|
column_name = block->getByPosition(i).name;
|
2014-07-21 11:34:43 +00:00
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-04-13 03:56:22 +00:00
|
|
|
|
throw Exception("MergingSortedBlockInputStream failed to read row " + toString(cursor->pos)
|
2014-07-21 11:34:43 +00:00
|
|
|
|
+ " of column " + toString(i) + (column_name.empty() ? "" : " (" + column_name + ")"),
|
|
|
|
|
ErrorCodes::CORRUPTED_DATA);
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-10-01 20:38:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
2016-04-13 03:56:22 +00:00
|
|
|
|
template <class TSortCursor>
|
|
|
|
|
void setRowRef(RowRef & row_ref, TSortCursor & cursor)
|
|
|
|
|
{
|
|
|
|
|
row_ref.row_num = cursor.impl->pos;
|
|
|
|
|
row_ref.shared_block = source_blocks[cursor.impl->order];
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < num_columns; ++i)
|
|
|
|
|
row_ref.columns[i] = cursor->all_columns[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <class TSortCursor>
|
|
|
|
|
void setPrimaryKeyRef(RowRef & row_ref, TSortCursor & cursor)
|
|
|
|
|
{
|
|
|
|
|
row_ref.row_num = cursor.impl->pos;
|
|
|
|
|
row_ref.shared_block = source_blocks[cursor.impl->order];
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < cursor->sort_columns_size; ++i)
|
|
|
|
|
row_ref.columns[i] = cursor->sort_columns[i];
|
|
|
|
|
}
|
|
|
|
|
|
2012-08-14 20:33:37 +00:00
|
|
|
|
private:
|
2014-11-08 23:52:18 +00:00
|
|
|
|
|
2013-05-28 16:56:05 +00:00
|
|
|
|
/** Делаем поддержку двух разных курсоров - с Collation и без.
|
|
|
|
|
* Шаблоны используем вместо полиморфных SortCursor'ов и вызовов виртуальных функций.
|
|
|
|
|
*/
|
2013-09-15 03:14:29 +00:00
|
|
|
|
template <typename TSortCursor>
|
2014-11-08 23:52:18 +00:00
|
|
|
|
void initQueue(std::priority_queue<TSortCursor> & queue);
|
|
|
|
|
|
2013-09-15 03:14:29 +00:00
|
|
|
|
template <typename TSortCursor>
|
2015-11-29 13:18:12 +00:00
|
|
|
|
void merge(Block & merged_block, ColumnPlainPtrs & merged_columns, std::priority_queue<TSortCursor> & queue);
|
2014-11-08 23:52:18 +00:00
|
|
|
|
|
2015-01-07 15:30:11 +00:00
|
|
|
|
Logger * log = &Logger::get("MergingSortedBlockInputStream");
|
2015-01-18 08:25:56 +00:00
|
|
|
|
|
|
|
|
|
/// Прочитали до конца.
|
|
|
|
|
bool finished = false;
|
2012-07-25 19:53:43 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
}
|