2012-07-25 19:53:43 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <queue>
|
2018-06-03 17:43:56 +00:00
|
|
|
|
|
|
|
#ifdef __clang__
|
|
|
|
#pragma clang diagnostic push
|
|
|
|
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <boost/smart_ptr/intrusive_ptr.hpp>
|
|
|
|
|
|
|
|
#ifdef __clang__
|
|
|
|
#pragma clang diagnostic pop
|
|
|
|
#endif
|
2012-07-25 19:53:43 +00:00
|
|
|
|
2015-09-29 19:19:54 +00:00
|
|
|
#include <common/logger_useful.h>
|
2012-09-05 19:51:09 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Core/Row.h>
|
|
|
|
#include <Core/SortDescription.h>
|
|
|
|
#include <Core/SortCursor.h>
|
2012-07-25 19:53:43 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <DataStreams/IProfilingBlockInputStream.h>
|
|
|
|
#include <DataStreams/ColumnGathererStream.h>
|
2012-07-25 19:53:43 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int CORRUPTED_DATA;
|
2016-01-11 21:46:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// Allows you refer to the row in the block and hold the block ownership,
|
|
|
|
/// and thus avoid creating a temporary row object.
|
|
|
|
/// Do not use std::shared_ptr, since there is no need for a place for `weak_count` and `deleter`;
|
|
|
|
/// does not use Poco::SharedPtr, since you need to allocate a block and `refcount` in one piece;
|
|
|
|
/// does not use Poco::AutoPtr, since it does not have a `move` constructor and there are extra checks for nullptr;
|
|
|
|
/// The reference counter is not atomic, since it is used from one thread.
|
2016-04-13 04:15:30 +00:00
|
|
|
namespace detail
|
|
|
|
{
|
2018-01-29 17:42:19 +00:00
|
|
|
struct SharedBlock : Block
|
|
|
|
{
|
|
|
|
int refcount = 0;
|
2016-04-13 04:15:30 +00:00
|
|
|
|
2018-01-29 17:42:19 +00:00
|
|
|
ColumnRawPtrs all_columns;
|
|
|
|
ColumnRawPtrs sort_columns;
|
|
|
|
|
|
|
|
SharedBlock(Block && block) : Block(std::move(block)) {}
|
|
|
|
};
|
2016-04-13 04:15:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
using SharedBlockPtr = boost::intrusive_ptr<detail::SharedBlock>;
|
|
|
|
|
|
|
|
inline void intrusive_ptr_add_ref(detail::SharedBlock * ptr)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
++ptr->refcount;
|
2016-04-13 04:15:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
inline void intrusive_ptr_release(detail::SharedBlock * ptr)
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
if (0 == --ptr->refcount)
|
|
|
|
delete ptr;
|
2016-04-13 04:15:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-12-15 20:03:21 +00:00
|
|
|
/** Merges several sorted streams into one sorted stream.
|
2012-07-25 19:53:43 +00:00
|
|
|
*/
|
|
|
|
class MergingSortedBlockInputStream : public IProfilingBlockInputStream
|
|
|
|
{
|
|
|
|
public:
|
2017-04-01 07:20:54 +00:00
|
|
|
/** limit - if isn't 0, then we can produce only first limit rows in sorted order.
|
|
|
|
* out_row_sources - if isn't nullptr, then at the end of execution it should contain part numbers of each readed row (and needed flag)
|
|
|
|
* quiet - don't log profiling info
|
|
|
|
*/
|
2017-07-04 12:38:53 +00:00
|
|
|
MergingSortedBlockInputStream(
|
2018-04-07 01:46:50 +00:00
|
|
|
const BlockInputStreams & inputs_, const SortDescription & description_, size_t max_block_size_,
|
|
|
|
size_t limit_ = 0, WriteBuffer * out_row_sources_buf_ = nullptr, bool quiet_ = false);
|
2012-07-25 19:53:43 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
String getName() const override { return "MergingSorted"; }
|
2012-07-25 19:53:43 +00:00
|
|
|
|
2017-04-07 19:47:21 +00:00
|
|
|
bool isSortedOutput() const override { return true; }
|
|
|
|
const SortDescription & getSortDescription() const override { return description; }
|
|
|
|
|
2018-04-07 01:46:50 +00:00
|
|
|
Block getHeader() const override { return header; }
|
2018-01-06 18:10:44 +00:00
|
|
|
|
2012-08-14 20:33:37 +00:00
|
|
|
protected:
|
2017-04-01 07:20:54 +00:00
|
|
|
struct RowRef
|
|
|
|
{
|
2018-01-29 17:42:19 +00:00
|
|
|
ColumnRawPtrs * columns = nullptr;
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t row_num;
|
|
|
|
SharedBlockPtr shared_block;
|
|
|
|
|
|
|
|
void swap(RowRef & other)
|
|
|
|
{
|
|
|
|
std::swap(columns, other.columns);
|
|
|
|
std::swap(row_num, other.row_num);
|
|
|
|
std::swap(shared_block, other.shared_block);
|
|
|
|
}
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// The number and types of columns must match.
|
2017-04-01 07:20:54 +00:00
|
|
|
bool operator==(const RowRef & other) const
|
|
|
|
{
|
2018-01-29 17:42:19 +00:00
|
|
|
size_t size = columns->size();
|
2017-04-01 07:20:54 +00:00
|
|
|
for (size_t i = 0; i < size; ++i)
|
2018-01-29 17:42:19 +00:00
|
|
|
if (0 != (*columns)[i]->compareAt(row_num, other.row_num, *(*other.columns)[i], 1))
|
2017-04-01 07:20:54 +00:00
|
|
|
return false;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool operator!=(const RowRef & other) const
|
|
|
|
{
|
|
|
|
return !(*this == other);
|
|
|
|
}
|
|
|
|
|
2018-03-06 14:53:53 +00:00
|
|
|
void reset()
|
|
|
|
{
|
|
|
|
RowRef empty;
|
|
|
|
swap(empty);
|
|
|
|
}
|
|
|
|
|
2018-01-29 17:42:19 +00:00
|
|
|
bool empty() const { return columns == nullptr; }
|
|
|
|
size_t size() const { return empty() ? 0 : columns->size(); }
|
2017-04-01 07:20:54 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
Block readImpl() override;
|
|
|
|
|
|
|
|
void readSuffixImpl() override;
|
|
|
|
|
2018-04-07 01:46:50 +00:00
|
|
|
/// Initializes the queue and the columns of next result block.
|
|
|
|
void init(MutableColumns & merged_columns);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// Gets the next block from the source corresponding to the `current`.
|
2017-04-01 07:20:54 +00:00
|
|
|
template <typename TSortCursor>
|
|
|
|
void fetchNextBlock(const TSortCursor & current, std::priority_queue<TSortCursor> & queue);
|
|
|
|
|
|
|
|
|
2018-04-07 01:46:50 +00:00
|
|
|
Block header;
|
|
|
|
|
2017-04-07 19:47:21 +00:00
|
|
|
const SortDescription description;
|
2017-04-01 07:20:54 +00:00
|
|
|
const size_t max_block_size;
|
|
|
|
size_t limit;
|
|
|
|
size_t total_merged_rows = 0;
|
|
|
|
|
|
|
|
bool first = true;
|
|
|
|
bool has_collation = false;
|
|
|
|
bool quiet = false;
|
|
|
|
|
|
|
|
/// May be smaller or equal to max_block_size. To do 'reserve' for columns.
|
|
|
|
size_t expected_block_size = 0;
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// Blocks currently being merged.
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t num_columns = 0;
|
|
|
|
std::vector<SharedBlockPtr> source_blocks;
|
|
|
|
|
|
|
|
using CursorImpls = std::vector<SortCursorImpl>;
|
|
|
|
CursorImpls cursors;
|
|
|
|
|
|
|
|
using Queue = std::priority_queue<SortCursor>;
|
|
|
|
Queue queue;
|
|
|
|
|
|
|
|
using QueueWithCollation = std::priority_queue<SortCursorWithCollation>;
|
|
|
|
QueueWithCollation queue_with_collation;
|
|
|
|
|
|
|
|
/// Used in Vertical merge algorithm to gather non-PK columns (on next step)
|
|
|
|
/// If it is not nullptr then it should be populated during execution
|
2017-07-04 12:38:53 +00:00
|
|
|
WriteBuffer * out_row_sources_buf;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// These methods are used in Collapsing/Summing/Aggregating... SortedBlockInputStream-s.
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// Save the row pointed to by cursor in `row`.
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename TSortCursor>
|
2017-04-01 07:20:54 +00:00
|
|
|
void setRow(Row & row, TSortCursor & cursor)
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i < num_columns; ++i)
|
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
|
|
|
cursor->all_columns[i]->get(cursor->pos, row[i]);
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
tryLogCurrentException(__PRETTY_FUNCTION__);
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// Find out the name of the column and throw more informative exception.
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
String column_name;
|
|
|
|
for (const auto & block : source_blocks)
|
|
|
|
{
|
|
|
|
if (i < block->columns())
|
|
|
|
{
|
|
|
|
column_name = block->safeGetByPosition(i).name;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
throw Exception("MergingSortedBlockInputStream failed to read row " + toString(cursor->pos)
|
|
|
|
+ " of column " + toString(i) + (column_name.empty() ? "" : " (" + column_name + ")"),
|
|
|
|
ErrorCodes::CORRUPTED_DATA);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename TSortCursor>
|
2017-04-01 07:20:54 +00:00
|
|
|
void setRowRef(RowRef & row_ref, TSortCursor & cursor)
|
|
|
|
{
|
|
|
|
row_ref.row_num = cursor.impl->pos;
|
|
|
|
row_ref.shared_block = source_blocks[cursor.impl->order];
|
2018-01-29 17:42:19 +00:00
|
|
|
row_ref.columns = &row_ref.shared_block->all_columns;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2016-04-13 03:56:22 +00:00
|
|
|
|
2017-09-15 12:16:12 +00:00
|
|
|
template <typename TSortCursor>
|
2017-04-01 07:20:54 +00:00
|
|
|
void setPrimaryKeyRef(RowRef & row_ref, TSortCursor & cursor)
|
|
|
|
{
|
|
|
|
row_ref.row_num = cursor.impl->pos;
|
|
|
|
row_ref.shared_block = source_blocks[cursor.impl->order];
|
2018-01-29 17:42:19 +00:00
|
|
|
row_ref.columns = &row_ref.shared_block->sort_columns;
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
2016-04-13 03:56:22 +00:00
|
|
|
|
2012-08-14 20:33:37 +00:00
|
|
|
private:
|
2014-11-08 23:52:18 +00:00
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/** We support two different cursors - with Collation and without.
|
|
|
|
* Templates are used instead of polymorphic SortCursor and calls to virtual functions.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
template <typename TSortCursor>
|
|
|
|
void initQueue(std::priority_queue<TSortCursor> & queue);
|
2014-11-08 23:52:18 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
template <typename TSortCursor>
|
2017-12-15 00:06:56 +00:00
|
|
|
void merge(MutableColumns & merged_columns, std::priority_queue<TSortCursor> & queue);
|
2014-11-08 23:52:18 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
Logger * log = &Logger::get("MergingSortedBlockInputStream");
|
2015-01-18 08:25:56 +00:00
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// Read is finished.
|
2017-04-01 07:20:54 +00:00
|
|
|
bool finished = false;
|
2012-07-25 19:53:43 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|