2012-01-10 22:11:51 +00:00
|
|
|
#pragma once
|
|
|
|
|
2015-09-29 19:19:54 +00:00
|
|
|
#include <common/logger_useful.h>
|
2012-01-10 22:11:51 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/ConcurrentBoundedQueue.h>
|
|
|
|
#include <DataStreams/IProfilingBlockInputStream.h>
|
|
|
|
#include <DataStreams/ParallelInputsProcessor.h>
|
2012-01-10 22:11:51 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-01-12 02:39:12 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2016-01-12 02:39:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2015-10-12 14:53:16 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
|
|
|
template <StreamUnionMode mode>
|
|
|
|
struct OutputData;
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// A block or an exception.
|
2015-10-12 14:53:16 +00:00
|
|
|
template <>
|
|
|
|
struct OutputData<StreamUnionMode::Basic>
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
Block block;
|
|
|
|
std::exception_ptr exception;
|
2015-10-12 14:53:16 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
OutputData() {}
|
|
|
|
OutputData(Block & block_) : block(block_) {}
|
|
|
|
OutputData(std::exception_ptr & exception_) : exception(exception_) {}
|
2015-10-12 14:53:16 +00:00
|
|
|
};
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// Block + additional information or an exception.
|
2015-10-12 14:53:16 +00:00
|
|
|
template <>
|
|
|
|
struct OutputData<StreamUnionMode::ExtraInfo>
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
Block block;
|
|
|
|
BlockExtraInfo extra_info;
|
|
|
|
std::exception_ptr exception;
|
2015-10-12 14:53:16 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
OutputData() {}
|
|
|
|
OutputData(Block & block_, BlockExtraInfo & extra_info_) : block(block_), extra_info(extra_info_) {}
|
|
|
|
OutputData(std::exception_ptr & exception_) : exception(exception_) {}
|
2015-10-12 14:53:16 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|
2012-01-10 22:11:51 +00:00
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/** Merges several sources into one.
|
|
|
|
* Blocks from different sources are interleaved with each other in an arbitrary way.
|
|
|
|
* You can specify the number of threads (max_threads),
|
|
|
|
* in which data will be retrieved from different sources.
|
2012-09-24 02:05:40 +00:00
|
|
|
*
|
2017-05-13 22:19:04 +00:00
|
|
|
* It's managed like this:
|
|
|
|
* - with the help of ParallelInputsProcessor in several threads it takes out blocks from the sources;
|
|
|
|
* - the completed blocks are added to a limited queue of finished blocks;
|
|
|
|
* - the main thread takes out completed blocks from the queue of finished blocks;
|
|
|
|
* - if the StreamUnionMode::ExtraInfo mode is specified, in addition to the UnionBlockInputStream
|
|
|
|
* extracts blocks information; In this case all sources should support such mode.
|
2012-01-10 22:11:51 +00:00
|
|
|
*/
|
2014-11-30 18:22:57 +00:00
|
|
|
|
2015-10-12 14:53:16 +00:00
|
|
|
template <StreamUnionMode mode = StreamUnionMode::Basic>
|
2017-09-08 04:58:57 +00:00
|
|
|
class UnionBlockInputStream final : public IProfilingBlockInputStream
|
2012-01-10 22:11:51 +00:00
|
|
|
{
|
2016-03-04 16:33:31 +00:00
|
|
|
public:
|
2017-04-01 07:20:54 +00:00
|
|
|
using ExceptionCallback = std::function<void()>;
|
2016-03-04 16:33:31 +00:00
|
|
|
|
2015-10-12 14:53:16 +00:00
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
using Self = UnionBlockInputStream<mode>;
|
2015-10-12 14:53:16 +00:00
|
|
|
|
2012-01-10 22:11:51 +00:00
|
|
|
public:
|
2017-04-01 07:20:54 +00:00
|
|
|
UnionBlockInputStream(BlockInputStreams inputs, BlockInputStreamPtr additional_input_at_end, size_t max_threads,
|
|
|
|
ExceptionCallback exception_callback_ = ExceptionCallback()) :
|
|
|
|
output_queue(std::min(inputs.size(), max_threads)),
|
|
|
|
handler(*this),
|
|
|
|
processor(inputs, additional_input_at_end, max_threads, handler),
|
|
|
|
exception_callback(exception_callback_)
|
|
|
|
{
|
|
|
|
children = inputs;
|
|
|
|
if (additional_input_at_end)
|
|
|
|
children.push_back(additional_input_at_end);
|
|
|
|
}
|
|
|
|
|
|
|
|
String getName() const override { return "Union"; }
|
|
|
|
|
|
|
|
String getID() const override
|
|
|
|
{
|
|
|
|
std::stringstream res;
|
|
|
|
res << "Union(";
|
|
|
|
|
|
|
|
Strings children_ids(children.size());
|
|
|
|
for (size_t i = 0; i < children.size(); ++i)
|
|
|
|
children_ids[i] = children[i]->getID();
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// Order does not matter.
|
2017-04-01 07:20:54 +00:00
|
|
|
std::sort(children_ids.begin(), children_ids.end());
|
|
|
|
|
|
|
|
for (size_t i = 0; i < children_ids.size(); ++i)
|
|
|
|
res << (i == 0 ? "" : ", ") << children_ids[i];
|
|
|
|
|
|
|
|
res << ")";
|
|
|
|
return res.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
~UnionBlockInputStream() override
|
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
|
|
|
if (!all_read)
|
|
|
|
cancel();
|
|
|
|
|
|
|
|
finalize();
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
tryLogCurrentException(__PRETTY_FUNCTION__);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/** Different from the default implementation by trying to stop all sources,
|
|
|
|
* skipping failed by execution.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
void cancel() override
|
|
|
|
{
|
|
|
|
bool old_val = false;
|
|
|
|
if (!is_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed))
|
|
|
|
return;
|
|
|
|
|
|
|
|
//std::cerr << "cancelling\n";
|
|
|
|
processor.cancel();
|
|
|
|
}
|
|
|
|
|
|
|
|
BlockExtraInfo getBlockExtraInfo() const override
|
|
|
|
{
|
|
|
|
return doGetBlockExtraInfo();
|
|
|
|
}
|
2013-11-25 10:46:25 +00:00
|
|
|
|
2013-11-29 18:44:02 +00:00
|
|
|
protected:
|
2017-04-01 07:20:54 +00:00
|
|
|
void finalize()
|
|
|
|
{
|
|
|
|
if (!started)
|
|
|
|
return;
|
|
|
|
|
|
|
|
LOG_TRACE(log, "Waiting for threads to finish");
|
|
|
|
|
|
|
|
std::exception_ptr exception;
|
|
|
|
if (!all_read)
|
|
|
|
{
|
2017-05-13 22:19:04 +00:00
|
|
|
/** Let's read everything up to the end, so that ParallelInputsProcessor is not blocked when trying to insert into the queue.
|
|
|
|
* Maybe there is an exception in the queue.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
OutputData<mode> res;
|
|
|
|
while (true)
|
|
|
|
{
|
|
|
|
//std::cerr << "popping\n";
|
|
|
|
output_queue.pop(res);
|
|
|
|
|
|
|
|
if (res.exception)
|
|
|
|
{
|
|
|
|
if (!exception)
|
|
|
|
exception = res.exception;
|
|
|
|
else if (Exception * e = exception_cast<Exception *>(exception))
|
|
|
|
e->addMessage("\n" + getExceptionMessage(res.exception, false));
|
|
|
|
}
|
|
|
|
else if (!res.block)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
all_read = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
processor.wait();
|
|
|
|
|
|
|
|
LOG_TRACE(log, "Waited for threads to finish");
|
|
|
|
|
|
|
|
if (exception)
|
|
|
|
std::rethrow_exception(exception);
|
|
|
|
}
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// Do nothing, to make the preparation for the query execution in parallel, in ParallelInputsProcessor.
|
2017-04-01 07:20:54 +00:00
|
|
|
void readPrefix() override
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/** The following options are possible:
|
|
|
|
* 1. `readImpl` function is called until it returns an empty block.
|
|
|
|
* Then `readSuffix` function is called and then destructor.
|
|
|
|
* 2. `readImpl` function is called. At some point, `cancel` function is called perhaps from another thread.
|
|
|
|
* Then `readSuffix` function is called and then destructor.
|
|
|
|
* 3. At any time, the object can be destroyed (destructor called).
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
Block readImpl() override
|
|
|
|
{
|
|
|
|
if (all_read)
|
|
|
|
return received_payload.block;
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// Run threads if this has not already been done.
|
2017-04-01 07:20:54 +00:00
|
|
|
if (!started)
|
|
|
|
{
|
|
|
|
started = true;
|
|
|
|
processor.process();
|
|
|
|
}
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// We will wait until the next block is ready or an exception is thrown.
|
2017-04-01 07:20:54 +00:00
|
|
|
//std::cerr << "popping\n";
|
|
|
|
output_queue.pop(received_payload);
|
|
|
|
|
|
|
|
if (received_payload.exception)
|
|
|
|
{
|
|
|
|
if (exception_callback)
|
|
|
|
exception_callback();
|
|
|
|
std::rethrow_exception(received_payload.exception);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!received_payload.block)
|
|
|
|
all_read = true;
|
|
|
|
|
|
|
|
return received_payload.block;
|
|
|
|
}
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// Called either after everything is read, or after cancel.
|
2017-04-01 07:20:54 +00:00
|
|
|
void readSuffix() override
|
|
|
|
{
|
|
|
|
//std::cerr << "readSuffix\n";
|
|
|
|
if (!all_read && !is_cancelled.load(std::memory_order_seq_cst))
|
|
|
|
throw Exception("readSuffix called before all data is read", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
finalize();
|
|
|
|
|
|
|
|
for (size_t i = 0; i < children.size(); ++i)
|
|
|
|
children[i]->readSuffix();
|
|
|
|
}
|
2013-09-13 20:33:09 +00:00
|
|
|
|
2012-09-24 02:05:40 +00:00
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
template<StreamUnionMode mode2 = mode>
|
|
|
|
BlockExtraInfo doGetBlockExtraInfo(typename std::enable_if<mode2 == StreamUnionMode::ExtraInfo>::type * = nullptr) const
|
|
|
|
{
|
|
|
|
return received_payload.extra_info;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<StreamUnionMode mode2 = mode>
|
|
|
|
BlockExtraInfo doGetBlockExtraInfo(typename std::enable_if<mode2 == StreamUnionMode::Basic>::type * = nullptr) const
|
|
|
|
{
|
|
|
|
throw Exception("Method getBlockExtraInfo is not supported for mode StreamUnionMode::Basic",
|
|
|
|
ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
}
|
2012-01-10 22:11:51 +00:00
|
|
|
|
2015-10-12 14:53:16 +00:00
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
using Payload = OutputData<mode>;
|
|
|
|
using OutputQueue = ConcurrentBoundedQueue<Payload>;
|
2015-10-12 14:53:16 +00:00
|
|
|
|
|
|
|
private:
|
2017-05-13 22:19:04 +00:00
|
|
|
/** The queue of the finished blocks. Also, you can put an exception instead of a block.
|
|
|
|
* When data is run out, an empty block is inserted into the queue.
|
|
|
|
* Sooner or later, an empty block is always inserted into the queue (even after exception or query cancellation).
|
|
|
|
* The queue is always (even after exception or canceling the query, even in destructor) you must read up to an empty block,
|
|
|
|
* otherwise ParallelInputsProcessor can be blocked during insertion into the queue.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
OutputQueue output_queue;
|
|
|
|
|
|
|
|
struct Handler
|
|
|
|
{
|
|
|
|
Handler(Self & parent_) : parent(parent_) {}
|
|
|
|
|
|
|
|
template <StreamUnionMode mode2 = mode>
|
|
|
|
void onBlock(Block & block, size_t thread_num,
|
|
|
|
typename std::enable_if<mode2 == StreamUnionMode::Basic>::type * = nullptr)
|
|
|
|
{
|
|
|
|
//std::cerr << "pushing block\n";
|
|
|
|
parent.output_queue.push(Payload(block));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <StreamUnionMode mode2 = mode>
|
|
|
|
void onBlock(Block & block, BlockExtraInfo & extra_info, size_t thread_num,
|
|
|
|
typename std::enable_if<mode2 == StreamUnionMode::ExtraInfo>::type * = nullptr)
|
|
|
|
{
|
|
|
|
//std::cerr << "pushing block with extra info\n";
|
|
|
|
parent.output_queue.push(Payload(block, extra_info));
|
|
|
|
}
|
|
|
|
|
|
|
|
void onFinish()
|
|
|
|
{
|
|
|
|
//std::cerr << "pushing end\n";
|
|
|
|
parent.output_queue.push(Payload());
|
|
|
|
}
|
|
|
|
|
|
|
|
void onFinishThread(size_t thread_num)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void onException(std::exception_ptr & exception, size_t thread_num)
|
|
|
|
{
|
|
|
|
//std::cerr << "pushing exception\n";
|
|
|
|
|
2017-05-13 22:19:04 +00:00
|
|
|
/// The order of the rows matters. If it is changed, then the situation is possible,
|
|
|
|
/// when before exception, an empty block (end of data) will be put into the queue,
|
|
|
|
/// and the exception is lost.
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
parent.output_queue.push(exception);
|
2017-05-13 22:19:04 +00:00
|
|
|
parent.cancel(); /// Does not throw exceptions.
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Self & parent;
|
|
|
|
};
|
|
|
|
|
|
|
|
Handler handler;
|
|
|
|
ParallelInputsProcessor<Handler, mode> processor;
|
|
|
|
|
|
|
|
ExceptionCallback exception_callback;
|
|
|
|
|
|
|
|
Payload received_payload;
|
|
|
|
|
|
|
|
bool started = false;
|
|
|
|
bool all_read = false;
|
|
|
|
|
|
|
|
Logger * log = &Logger::get("UnionBlockInputStream");
|
2012-01-10 22:11:51 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|