2012-07-23 06:23:29 +00:00
|
|
|
|
#include <queue>
|
2012-07-24 18:17:44 +00:00
|
|
|
|
#include <iomanip>
|
|
|
|
|
|
|
|
|
|
#include <statdaemons/Stopwatch.h>
|
2012-07-23 06:23:29 +00:00
|
|
|
|
|
2011-09-04 01:42:14 +00:00
|
|
|
|
#include <DB/DataStreams/MergeSortingBlockInputStream.h>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
2011-09-04 21:23:19 +00:00
|
|
|
|
Block MergeSortingBlockInputStream::readImpl()
|
2011-09-04 01:42:14 +00:00
|
|
|
|
{
|
2012-07-23 20:01:29 +00:00
|
|
|
|
/** Достаточно простой алгоритм:
|
2011-09-04 01:42:14 +00:00
|
|
|
|
* - прочитать в оперативку все блоки;
|
2012-07-24 18:44:23 +00:00
|
|
|
|
* - объединить их всех;
|
2011-09-04 01:42:14 +00:00
|
|
|
|
*/
|
2012-03-05 02:34:20 +00:00
|
|
|
|
|
|
|
|
|
if (has_been_read)
|
|
|
|
|
return Block();
|
|
|
|
|
|
|
|
|
|
has_been_read = true;
|
2011-09-04 01:42:14 +00:00
|
|
|
|
|
2012-07-23 06:23:29 +00:00
|
|
|
|
Blocks blocks;
|
2013-05-04 05:20:07 +00:00
|
|
|
|
while (Block block = children.back()->read())
|
2011-09-04 01:42:14 +00:00
|
|
|
|
blocks.push_back(block);
|
|
|
|
|
|
2013-01-07 01:41:06 +00:00
|
|
|
|
if (isCancelled())
|
|
|
|
|
return Block();
|
|
|
|
|
|
2012-07-27 20:25:48 +00:00
|
|
|
|
return merge(blocks);
|
2011-09-04 01:42:14 +00:00
|
|
|
|
}
|
|
|
|
|
|
2012-07-23 20:01:29 +00:00
|
|
|
|
Block MergeSortingBlockInputStream::merge(Blocks & blocks)
|
2011-09-04 01:42:14 +00:00
|
|
|
|
{
|
2012-07-27 20:25:48 +00:00
|
|
|
|
if (blocks.empty())
|
2013-05-28 16:56:05 +00:00
|
|
|
|
return Block();
|
2012-07-23 06:23:29 +00:00
|
|
|
|
|
2012-07-23 20:01:29 +00:00
|
|
|
|
if (blocks.size() == 1)
|
|
|
|
|
return blocks[0];
|
2013-05-28 16:56:05 +00:00
|
|
|
|
|
|
|
|
|
Stopwatch watch;
|
2012-07-23 20:01:29 +00:00
|
|
|
|
|
2012-07-25 20:24:38 +00:00
|
|
|
|
LOG_DEBUG(log, "Merge sorting");
|
2013-05-28 16:56:05 +00:00
|
|
|
|
|
2012-07-27 20:19:15 +00:00
|
|
|
|
CursorImpls cursors(blocks.size());
|
2012-07-24 17:46:55 +00:00
|
|
|
|
|
2013-05-28 16:56:05 +00:00
|
|
|
|
bool has_collation = false;
|
|
|
|
|
|
2012-07-24 17:46:55 +00:00
|
|
|
|
size_t i = 0;
|
|
|
|
|
for (Blocks::const_iterator it = blocks.begin(); it != blocks.end(); ++it, ++i)
|
2012-07-23 06:23:29 +00:00
|
|
|
|
{
|
|
|
|
|
if (!*it)
|
|
|
|
|
continue;
|
|
|
|
|
|
2012-07-27 20:19:15 +00:00
|
|
|
|
cursors[i] = SortCursorImpl(*it, description);
|
2013-05-28 16:56:05 +00:00
|
|
|
|
has_collation |= cursors[i].has_collation;
|
2012-07-23 06:23:29 +00:00
|
|
|
|
}
|
2013-05-28 16:56:05 +00:00
|
|
|
|
|
|
|
|
|
Block merged;
|
|
|
|
|
|
|
|
|
|
if (has_collation)
|
|
|
|
|
merged = mergeImpl<SortCursorWithCollation>(blocks, cursors);
|
|
|
|
|
else
|
|
|
|
|
merged = mergeImpl<SortCursor>(blocks, cursors);
|
|
|
|
|
|
|
|
|
|
LOG_DEBUG(log, std::fixed << std::setprecision(2)
|
|
|
|
|
<< "Merge sorted " << blocks.size() << " blocks, " << merged.rows() << " rows"
|
|
|
|
|
<< " in " << watch.elapsedSeconds() << " sec., "
|
|
|
|
|
<< merged.rows() / watch.elapsedSeconds() << " rows/sec., "
|
|
|
|
|
<< merged.bytes() / 1000000.0 / watch.elapsedSeconds() << " MiB/sec.");
|
|
|
|
|
|
|
|
|
|
return merged;
|
|
|
|
|
}
|
2012-07-23 06:23:29 +00:00
|
|
|
|
|
2013-09-16 05:44:47 +00:00
|
|
|
|
template <typename TSortCursor>
|
|
|
|
|
Block MergeSortingBlockInputStream::mergeImpl(Blocks & blocks, CursorImpls & cursors)
|
2013-05-28 16:56:05 +00:00
|
|
|
|
{
|
|
|
|
|
Block merged = blocks[0].cloneEmpty();
|
|
|
|
|
size_t num_columns = blocks[0].columns();
|
|
|
|
|
|
|
|
|
|
typedef std::priority_queue<TSortCursor> Queue;
|
|
|
|
|
Queue queue;
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < cursors.size(); ++i)
|
|
|
|
|
queue.push(TSortCursor(&cursors[i]));
|
|
|
|
|
|
2012-12-16 00:23:41 +00:00
|
|
|
|
ColumnPlainPtrs merged_columns;
|
2013-09-16 05:44:47 +00:00
|
|
|
|
for (size_t i = 0; i < num_columns; ++i) /// TODO: reserve
|
2012-12-16 00:23:41 +00:00
|
|
|
|
merged_columns.push_back(&*merged.getByPosition(i).column);
|
2012-07-23 20:01:29 +00:00
|
|
|
|
|
|
|
|
|
/// Вынимаем строки в нужном порядке и кладём в merged.
|
2013-09-16 05:44:47 +00:00
|
|
|
|
for (size_t row = 0; (!limit || row < limit) && !queue.empty(); ++row)
|
2012-07-23 06:23:29 +00:00
|
|
|
|
{
|
2013-05-28 16:56:05 +00:00
|
|
|
|
TSortCursor current = queue.top();
|
2012-07-23 20:01:29 +00:00
|
|
|
|
queue.pop();
|
2012-07-23 06:23:29 +00:00
|
|
|
|
|
2012-07-23 20:01:29 +00:00
|
|
|
|
for (size_t i = 0; i < num_columns; ++i)
|
2012-12-16 00:52:06 +00:00
|
|
|
|
merged_columns[i]->insertFrom(*current->all_columns[i], current->pos);
|
2012-07-23 20:01:29 +00:00
|
|
|
|
|
2012-07-27 20:19:15 +00:00
|
|
|
|
if (!current->isLast())
|
|
|
|
|
{
|
|
|
|
|
current->next();
|
|
|
|
|
queue.push(current);
|
|
|
|
|
}
|
2012-07-23 06:23:29 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return merged;
|
2012-07-23 20:01:29 +00:00
|
|
|
|
}
|
2012-07-23 06:23:29 +00:00
|
|
|
|
|
2011-09-04 01:42:14 +00:00
|
|
|
|
}
|