2019-03-26 18:28:37 +00:00
|
|
|
#include <Processors/QueryPipeline.h>
|
|
|
|
|
|
|
|
#include <Processors/ResizeProcessor.h>
|
|
|
|
#include <Processors/ConcatProcessor.h>
|
|
|
|
#include <Processors/NullSink.h>
|
2019-04-08 14:55:20 +00:00
|
|
|
#include <Processors/LimitTransform.h>
|
2019-03-26 18:28:37 +00:00
|
|
|
#include <Processors/Sources/NullSource.h>
|
|
|
|
#include <Processors/Transforms/TotalsHavingTransform.h>
|
2019-04-05 11:34:11 +00:00
|
|
|
#include <Processors/Transforms/ExtremesTransform.h>
|
2019-03-26 18:28:37 +00:00
|
|
|
#include <Processors/Transforms/CreatingSetsTransform.h>
|
|
|
|
#include <Processors/Transforms/ConvertingTransform.h>
|
|
|
|
#include <Processors/Formats/IOutputFormat.h>
|
|
|
|
#include <Processors/Sources/SourceFromInputStream.h>
|
|
|
|
#include <Processors/Executors/PipelineExecutor.h>
|
2019-04-18 14:55:43 +00:00
|
|
|
#include <Processors/Transforms/PartialSortingTransform.h>
|
|
|
|
#include <Processors/Sources/SourceFromSingleChunk.h>
|
2019-03-26 18:28:37 +00:00
|
|
|
#include <IO/WriteHelpers.h>
|
|
|
|
#include <Interpreters/Context.h>
|
|
|
|
#include <Common/typeid_cast.h>
|
2019-04-18 14:55:43 +00:00
|
|
|
#include <Common/CurrentThread.h>
|
2020-01-09 11:52:34 +00:00
|
|
|
#include <Processors/DelayedPortsProcessor.h>
|
2019-03-26 18:28:37 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
2020-02-25 18:10:48 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
}
|
2019-03-26 18:28:37 +00:00
|
|
|
|
|
|
|
void QueryPipeline::checkInitialized()
|
|
|
|
{
|
|
|
|
if (!initialized())
|
|
|
|
throw Exception("QueryPipeline wasn't initialized.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
}
|
|
|
|
|
2019-04-17 14:38:16 +00:00
|
|
|
void QueryPipeline::checkSource(const ProcessorPtr & source, bool can_have_totals)
|
2019-03-26 18:28:37 +00:00
|
|
|
{
|
|
|
|
if (!source->getInputs().empty())
|
|
|
|
throw Exception("Source for query pipeline shouldn't have any input, but " + source->getName() + " has " +
|
|
|
|
toString(source->getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2019-04-17 14:38:16 +00:00
|
|
|
if (source->getOutputs().empty())
|
|
|
|
throw Exception("Source for query pipeline should have single output, but it doesn't have any",
|
|
|
|
ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
if (!can_have_totals && source->getOutputs().size() != 1)
|
2019-03-26 18:28:37 +00:00
|
|
|
throw Exception("Source for query pipeline should have single output, but " + source->getName() + " has " +
|
|
|
|
toString(source->getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
|
2019-04-17 14:38:16 +00:00
|
|
|
|
|
|
|
if (source->getOutputs().size() > 2)
|
|
|
|
throw Exception("Source for query pipeline should have 1 or 2 outputs, but " + source->getName() + " has " +
|
|
|
|
toString(source->getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
|
2019-03-26 18:28:37 +00:00
|
|
|
}
|
|
|
|
|
2019-11-05 17:33:03 +00:00
|
|
|
void QueryPipeline::init(Pipe pipe)
|
|
|
|
{
|
|
|
|
Pipes pipes;
|
|
|
|
pipes.emplace_back(std::move(pipe));
|
|
|
|
init(std::move(pipes));
|
|
|
|
}
|
|
|
|
|
|
|
|
void QueryPipeline::init(Pipes pipes)
|
2019-03-26 18:28:37 +00:00
|
|
|
{
|
|
|
|
if (initialized())
|
|
|
|
throw Exception("Pipeline has already been initialized.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2019-11-05 17:33:03 +00:00
|
|
|
if (pipes.empty())
|
|
|
|
throw Exception("Can't initialize pipeline with empty pipes list.", ErrorCodes::LOGICAL_ERROR);
|
2019-03-26 18:28:37 +00:00
|
|
|
|
2020-01-29 15:55:13 +00:00
|
|
|
/// Move locks from pipes to pipeline class.
|
|
|
|
for (auto & pipe : pipes)
|
2020-01-29 16:59:11 +00:00
|
|
|
{
|
2020-01-29 15:55:13 +00:00
|
|
|
for (auto & lock : pipe.getTableLocks())
|
|
|
|
table_locks.emplace_back(lock);
|
|
|
|
|
2020-01-29 16:59:11 +00:00
|
|
|
for (auto & context : pipe.getContexts())
|
|
|
|
interpreter_context.emplace_back(context);
|
|
|
|
|
|
|
|
for (auto & storage : pipe.getStorageHolders())
|
|
|
|
storage_holders.emplace_back(storage);
|
|
|
|
}
|
|
|
|
|
2019-04-17 14:38:16 +00:00
|
|
|
std::vector<OutputPort *> totals;
|
|
|
|
|
2019-11-05 17:33:03 +00:00
|
|
|
for (auto & pipe : pipes)
|
2019-03-26 18:28:37 +00:00
|
|
|
{
|
2019-11-05 17:33:03 +00:00
|
|
|
auto & header = pipe.getHeader();
|
2019-03-26 18:28:37 +00:00
|
|
|
|
2019-04-05 10:52:07 +00:00
|
|
|
if (current_header)
|
2019-03-26 18:28:37 +00:00
|
|
|
assertBlocksHaveEqualStructure(current_header, header, "QueryPipeline");
|
|
|
|
else
|
|
|
|
current_header = header;
|
|
|
|
|
2019-11-05 17:33:03 +00:00
|
|
|
if (auto * totals_port = pipe.getTotalsPort())
|
2019-04-17 14:38:16 +00:00
|
|
|
{
|
2019-11-05 17:33:03 +00:00
|
|
|
assertBlocksHaveEqualStructure(current_header, totals_port->getHeader(), "QueryPipeline");
|
|
|
|
totals.emplace_back(totals_port);
|
2019-04-17 14:38:16 +00:00
|
|
|
}
|
|
|
|
|
2020-03-03 10:28:37 +00:00
|
|
|
streams.addStream(&pipe.getPort());
|
2019-11-05 17:33:03 +00:00
|
|
|
auto cur_processors = std::move(pipe).detachProcessors();
|
|
|
|
processors.insert(processors.end(), cur_processors.begin(), cur_processors.end());
|
2019-03-26 18:28:37 +00:00
|
|
|
}
|
2019-04-17 14:38:16 +00:00
|
|
|
|
|
|
|
if (!totals.empty())
|
|
|
|
{
|
|
|
|
if (totals.size() == 1)
|
|
|
|
totals_having_port = totals.back();
|
|
|
|
else
|
|
|
|
{
|
|
|
|
auto resize = std::make_shared<ResizeProcessor>(current_header, totals.size(), 1);
|
|
|
|
auto in = resize->getInputs().begin();
|
|
|
|
for (auto & total : totals)
|
|
|
|
connect(*total, *(in++));
|
|
|
|
|
|
|
|
totals_having_port = &resize->getOutputs().front();
|
|
|
|
processors.emplace_back(std::move(resize));
|
|
|
|
}
|
|
|
|
}
|
2019-03-26 18:28:37 +00:00
|
|
|
}
|
|
|
|
|
2019-04-09 10:17:25 +00:00
|
|
|
static ProcessorPtr callProcessorGetter(
|
|
|
|
const Block & header, const QueryPipeline::ProcessorGetter & getter, QueryPipeline::StreamType)
|
|
|
|
{
|
|
|
|
return getter(header);
|
|
|
|
}
|
|
|
|
|
|
|
|
static ProcessorPtr callProcessorGetter(
|
|
|
|
const Block & header, const QueryPipeline::ProcessorGetterWithStreamKind & getter, QueryPipeline::StreamType kind)
|
|
|
|
{
|
|
|
|
return getter(header, kind);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename TProcessorGetter>
|
|
|
|
void QueryPipeline::addSimpleTransformImpl(const TProcessorGetter & getter)
|
2019-03-26 18:28:37 +00:00
|
|
|
{
|
|
|
|
checkInitialized();
|
|
|
|
|
|
|
|
Block header;
|
|
|
|
|
2019-09-11 17:06:06 +00:00
|
|
|
auto add_transform = [&](OutputPort *& stream, StreamType stream_type, size_t stream_num [[maybe_unused]] = IProcessor::NO_STREAM)
|
2019-03-26 18:28:37 +00:00
|
|
|
{
|
2019-04-05 17:44:18 +00:00
|
|
|
if (!stream)
|
|
|
|
return;
|
|
|
|
|
2019-04-25 14:52:51 +00:00
|
|
|
auto transform = callProcessorGetter(stream->getHeader(), getter, stream_type);
|
2019-03-26 18:28:37 +00:00
|
|
|
|
2019-04-18 12:43:13 +00:00
|
|
|
if (transform)
|
|
|
|
{
|
|
|
|
if (transform->getInputs().size() != 1)
|
|
|
|
throw Exception("Processor for query pipeline transform should have single input, "
|
|
|
|
"but " + transform->getName() + " has " +
|
|
|
|
toString(transform->getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
if (transform->getOutputs().size() != 1)
|
|
|
|
throw Exception("Processor for query pipeline transform should have single output, "
|
|
|
|
"but " + transform->getName() + " has " +
|
|
|
|
toString(transform->getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
}
|
2019-03-26 18:28:37 +00:00
|
|
|
|
2019-04-18 12:43:13 +00:00
|
|
|
auto & out_header = transform ? transform->getOutputs().front().getHeader()
|
|
|
|
: stream->getHeader();
|
2019-04-30 17:12:43 +00:00
|
|
|
|
|
|
|
if (stream_type != StreamType::Totals)
|
|
|
|
{
|
|
|
|
if (header)
|
|
|
|
assertBlocksHaveEqualStructure(header, out_header, "QueryPipeline");
|
|
|
|
else
|
|
|
|
header = out_header;
|
|
|
|
}
|
2019-03-26 18:28:37 +00:00
|
|
|
|
2019-04-18 12:43:13 +00:00
|
|
|
if (transform)
|
|
|
|
{
|
2019-09-11 17:06:06 +00:00
|
|
|
// if (stream_type == StreamType::Main)
|
|
|
|
// transform->setStream(stream_num);
|
2019-09-02 14:49:05 +00:00
|
|
|
|
2019-04-18 12:43:13 +00:00
|
|
|
connect(*stream, transform->getInputs().front());
|
|
|
|
stream = &transform->getOutputs().front();
|
|
|
|
processors.emplace_back(std::move(transform));
|
|
|
|
}
|
2019-04-05 17:44:18 +00:00
|
|
|
};
|
|
|
|
|
2019-09-02 14:49:05 +00:00
|
|
|
for (size_t stream_num = 0; stream_num < streams.size(); ++stream_num)
|
|
|
|
add_transform(streams[stream_num], StreamType::Main, stream_num);
|
2019-04-05 17:44:18 +00:00
|
|
|
|
2019-04-09 10:17:25 +00:00
|
|
|
add_transform(totals_having_port, StreamType::Totals);
|
|
|
|
add_transform(extremes_port, StreamType::Extremes);
|
2019-03-26 18:28:37 +00:00
|
|
|
|
|
|
|
current_header = std::move(header);
|
|
|
|
}
|
|
|
|
|
2019-04-09 10:17:25 +00:00
|
|
|
void QueryPipeline::addSimpleTransform(const ProcessorGetter & getter)
|
|
|
|
{
|
|
|
|
addSimpleTransformImpl(getter);
|
|
|
|
}
|
|
|
|
|
|
|
|
void QueryPipeline::addSimpleTransform(const ProcessorGetterWithStreamKind & getter)
|
|
|
|
{
|
|
|
|
addSimpleTransformImpl(getter);
|
|
|
|
}
|
|
|
|
|
2019-03-26 18:28:37 +00:00
|
|
|
void QueryPipeline::addPipe(Processors pipe)
|
|
|
|
{
|
|
|
|
checkInitialized();
|
|
|
|
|
|
|
|
if (pipe.empty())
|
|
|
|
throw Exception("Can't add empty processors list to QueryPipeline.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
auto & first = pipe.front();
|
|
|
|
auto & last = pipe.back();
|
|
|
|
|
|
|
|
auto num_inputs = first->getInputs().size();
|
|
|
|
|
|
|
|
if (num_inputs != streams.size())
|
|
|
|
throw Exception("Can't add processors to QueryPipeline because first processor has " + toString(num_inputs) +
|
|
|
|
" input ports, but QueryPipeline has " + toString(streams.size()) + " streams.",
|
|
|
|
ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
auto stream = streams.begin();
|
|
|
|
for (auto & input : first->getInputs())
|
|
|
|
connect(**(stream++), input);
|
|
|
|
|
|
|
|
Block header;
|
|
|
|
streams.clear();
|
|
|
|
streams.reserve(last->getOutputs().size());
|
|
|
|
for (auto & output : last->getOutputs())
|
|
|
|
{
|
2020-03-03 10:28:37 +00:00
|
|
|
streams.addStream(&output);
|
2019-03-26 18:28:37 +00:00
|
|
|
if (header)
|
|
|
|
assertBlocksHaveEqualStructure(header, output.getHeader(), "QueryPipeline");
|
|
|
|
else
|
|
|
|
header = output.getHeader();
|
|
|
|
}
|
|
|
|
|
|
|
|
processors.insert(processors.end(), pipe.begin(), pipe.end());
|
|
|
|
current_header = std::move(header);
|
|
|
|
}
|
|
|
|
|
|
|
|
void QueryPipeline::addDelayedStream(ProcessorPtr source)
|
|
|
|
{
|
|
|
|
checkInitialized();
|
|
|
|
|
2019-04-17 14:58:34 +00:00
|
|
|
checkSource(source, false);
|
2019-03-26 18:28:37 +00:00
|
|
|
assertBlocksHaveEqualStructure(current_header, source->getOutputs().front().getHeader(), "QueryPipeline");
|
|
|
|
|
2020-01-09 11:52:34 +00:00
|
|
|
IProcessor::PortNumbers delayed_streams = { streams.size() };
|
2020-03-03 10:28:37 +00:00
|
|
|
streams.addStream(&source->getOutputs().front());
|
2019-03-26 18:28:37 +00:00
|
|
|
processors.emplace_back(std::move(source));
|
2019-04-08 09:31:49 +00:00
|
|
|
|
2020-01-09 11:52:34 +00:00
|
|
|
auto processor = std::make_shared<DelayedPortsProcessor>(current_header, streams.size(), delayed_streams);
|
|
|
|
addPipe({ std::move(processor) });
|
2019-03-26 18:28:37 +00:00
|
|
|
}
|
|
|
|
|
2020-01-13 12:04:02 +00:00
|
|
|
void QueryPipeline::resize(size_t num_streams, bool force, bool strict)
|
2019-03-26 18:28:37 +00:00
|
|
|
{
|
|
|
|
checkInitialized();
|
|
|
|
|
2019-09-03 10:14:13 +00:00
|
|
|
if (!force && num_streams == getNumStreams())
|
2019-03-26 18:28:37 +00:00
|
|
|
return;
|
|
|
|
|
2019-05-14 13:13:12 +00:00
|
|
|
has_resize = true;
|
2019-05-14 11:04:11 +00:00
|
|
|
|
2020-01-13 12:04:02 +00:00
|
|
|
ProcessorPtr resize;
|
|
|
|
|
|
|
|
if (strict)
|
|
|
|
resize = std::make_shared<StrictResizeProcessor>(current_header, getNumStreams(), num_streams);
|
|
|
|
else
|
|
|
|
resize = std::make_shared<ResizeProcessor>(current_header, getNumStreams(), num_streams);
|
|
|
|
|
2019-03-26 18:28:37 +00:00
|
|
|
auto stream = streams.begin();
|
|
|
|
for (auto & input : resize->getInputs())
|
|
|
|
connect(**(stream++), input);
|
|
|
|
|
|
|
|
streams.clear();
|
|
|
|
streams.reserve(num_streams);
|
|
|
|
for (auto & output : resize->getOutputs())
|
2020-03-03 10:28:37 +00:00
|
|
|
streams.addStream(&output);
|
2019-04-05 10:52:07 +00:00
|
|
|
|
|
|
|
processors.emplace_back(std::move(resize));
|
2019-03-26 18:28:37 +00:00
|
|
|
}
|
|
|
|
|
2019-12-26 16:52:15 +00:00
|
|
|
void QueryPipeline::enableQuotaForCurrentStreams()
|
|
|
|
{
|
|
|
|
for (auto & stream : streams)
|
|
|
|
stream->getProcessor().enableQuota();
|
|
|
|
}
|
|
|
|
|
2019-03-26 18:28:37 +00:00
|
|
|
void QueryPipeline::addTotalsHavingTransform(ProcessorPtr transform)
|
|
|
|
{
|
|
|
|
checkInitialized();
|
|
|
|
|
|
|
|
if (!typeid_cast<const TotalsHavingTransform *>(transform.get()))
|
|
|
|
throw Exception("TotalsHavingTransform expected for QueryPipeline::addTotalsHavingTransform.",
|
|
|
|
ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2019-04-08 09:31:49 +00:00
|
|
|
if (totals_having_port)
|
2019-03-26 18:28:37 +00:00
|
|
|
throw Exception("Totals having transform was already added to pipeline.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
resize(1);
|
|
|
|
|
|
|
|
connect(*streams.front(), transform->getInputs().front());
|
|
|
|
|
|
|
|
auto & outputs = transform->getOutputs();
|
|
|
|
|
2020-03-03 10:28:37 +00:00
|
|
|
streams.assign({ &outputs.front() });
|
2019-03-26 18:28:37 +00:00
|
|
|
totals_having_port = &outputs.back();
|
|
|
|
current_header = outputs.front().getHeader();
|
|
|
|
processors.emplace_back(std::move(transform));
|
|
|
|
}
|
|
|
|
|
2019-04-09 14:51:38 +00:00
|
|
|
void QueryPipeline::addDefaultTotals()
|
|
|
|
{
|
|
|
|
checkInitialized();
|
|
|
|
|
|
|
|
if (totals_having_port)
|
|
|
|
throw Exception("Totals having transform was already added to pipeline.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
Columns columns;
|
|
|
|
columns.reserve(current_header.columns());
|
|
|
|
|
|
|
|
for (size_t i = 0; i < current_header.columns(); ++i)
|
|
|
|
{
|
|
|
|
auto column = current_header.getByPosition(i).type->createColumn();
|
|
|
|
column->insertDefault();
|
|
|
|
columns.emplace_back(std::move(column));
|
|
|
|
}
|
|
|
|
|
|
|
|
auto source = std::make_shared<SourceFromSingleChunk>(current_header, Chunk(std::move(columns), 1));
|
|
|
|
totals_having_port = &source->getPort();
|
|
|
|
processors.emplace_back(source);
|
|
|
|
}
|
|
|
|
|
2019-04-10 16:28:37 +00:00
|
|
|
void QueryPipeline::addTotals(ProcessorPtr source)
|
|
|
|
{
|
|
|
|
checkInitialized();
|
|
|
|
|
|
|
|
if (totals_having_port)
|
|
|
|
throw Exception("Totals having transform was already added to pipeline.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2019-04-17 14:58:34 +00:00
|
|
|
checkSource(source, false);
|
2019-04-10 16:28:37 +00:00
|
|
|
assertBlocksHaveEqualStructure(current_header, source->getOutputs().front().getHeader(), "QueryPipeline");
|
|
|
|
|
|
|
|
totals_having_port = &source->getOutputs().front();
|
|
|
|
processors.emplace_back(source);
|
|
|
|
}
|
|
|
|
|
2019-04-17 15:35:22 +00:00
|
|
|
void QueryPipeline::dropTotalsIfHas()
|
|
|
|
{
|
|
|
|
if (totals_having_port)
|
|
|
|
{
|
|
|
|
auto null_sink = std::make_shared<NullSink>(totals_having_port->getHeader());
|
|
|
|
connect(*totals_having_port, null_sink->getPort());
|
|
|
|
processors.emplace_back(std::move(null_sink));
|
2019-04-17 15:43:01 +00:00
|
|
|
totals_having_port = nullptr;
|
2019-04-17 15:35:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-26 18:28:37 +00:00
|
|
|
void QueryPipeline::addExtremesTransform(ProcessorPtr transform)
|
|
|
|
{
|
|
|
|
checkInitialized();
|
|
|
|
|
|
|
|
if (!typeid_cast<const ExtremesTransform *>(transform.get()))
|
|
|
|
throw Exception("ExtremesTransform expected for QueryPipeline::addExtremesTransform.",
|
|
|
|
ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2019-04-08 09:31:49 +00:00
|
|
|
if (extremes_port)
|
2019-03-26 18:28:37 +00:00
|
|
|
throw Exception("Extremes transform was already added to pipeline.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
if (getNumStreams() != 1)
|
|
|
|
throw Exception("Cant't add Extremes transform because pipeline is expected to have single stream, "
|
|
|
|
"but it has " + toString(getNumStreams()) + " streams.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
connect(*streams.front(), transform->getInputs().front());
|
|
|
|
|
|
|
|
auto & outputs = transform->getOutputs();
|
|
|
|
|
2020-03-03 10:28:37 +00:00
|
|
|
streams.assign({ &outputs.front() });
|
2019-03-26 18:28:37 +00:00
|
|
|
extremes_port = &outputs.back();
|
|
|
|
current_header = outputs.front().getHeader();
|
|
|
|
processors.emplace_back(std::move(transform));
|
|
|
|
}
|
|
|
|
|
|
|
|
void QueryPipeline::addCreatingSetsTransform(ProcessorPtr transform)
|
|
|
|
{
|
|
|
|
checkInitialized();
|
|
|
|
|
|
|
|
if (!typeid_cast<const CreatingSetsTransform *>(transform.get()))
|
|
|
|
throw Exception("CreatingSetsTransform expected for QueryPipeline::addExtremesTransform.",
|
|
|
|
ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
resize(1);
|
|
|
|
|
|
|
|
auto concat = std::make_shared<ConcatProcessor>(current_header, 2);
|
|
|
|
connect(transform->getOutputs().front(), concat->getInputs().front());
|
|
|
|
connect(*streams.back(), concat->getInputs().back());
|
|
|
|
|
2020-03-03 10:28:37 +00:00
|
|
|
streams.assign({ &concat->getOutputs().front() });
|
2019-03-26 18:28:37 +00:00
|
|
|
processors.emplace_back(std::move(transform));
|
|
|
|
processors.emplace_back(std::move(concat));
|
|
|
|
}
|
|
|
|
|
|
|
|
void QueryPipeline::setOutput(ProcessorPtr output)
|
|
|
|
{
|
|
|
|
checkInitialized();
|
|
|
|
|
2019-04-05 10:52:07 +00:00
|
|
|
auto * format = dynamic_cast<IOutputFormat * >(output.get());
|
2019-03-26 18:28:37 +00:00
|
|
|
|
|
|
|
if (!format)
|
|
|
|
throw Exception("IOutputFormat processor expected for QueryPipeline::setOutput.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2019-04-08 14:55:20 +00:00
|
|
|
if (output_format)
|
2019-03-26 18:28:37 +00:00
|
|
|
throw Exception("QueryPipeline already has output.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2019-04-08 14:55:20 +00:00
|
|
|
output_format = format;
|
2019-03-26 18:28:37 +00:00
|
|
|
|
|
|
|
resize(1);
|
|
|
|
|
|
|
|
auto & main = format->getPort(IOutputFormat::PortKind::Main);
|
|
|
|
auto & totals = format->getPort(IOutputFormat::PortKind::Totals);
|
|
|
|
auto & extremes = format->getPort(IOutputFormat::PortKind::Extremes);
|
|
|
|
|
2019-04-08 09:31:49 +00:00
|
|
|
if (!totals_having_port)
|
2019-03-26 18:28:37 +00:00
|
|
|
{
|
|
|
|
auto null_source = std::make_shared<NullSource>(totals.getHeader());
|
|
|
|
totals_having_port = &null_source->getPort();
|
|
|
|
processors.emplace_back(std::move(null_source));
|
|
|
|
}
|
|
|
|
|
2019-04-08 09:31:49 +00:00
|
|
|
if (!extremes_port)
|
2019-03-26 18:28:37 +00:00
|
|
|
{
|
|
|
|
auto null_source = std::make_shared<NullSource>(extremes.getHeader());
|
|
|
|
extremes_port = &null_source->getPort();
|
|
|
|
processors.emplace_back(std::move(null_source));
|
|
|
|
}
|
|
|
|
|
2019-04-05 10:52:07 +00:00
|
|
|
processors.emplace_back(std::move(output));
|
|
|
|
|
2019-03-26 18:28:37 +00:00
|
|
|
connect(*streams.front(), main);
|
|
|
|
connect(*totals_having_port, totals);
|
|
|
|
connect(*extremes_port, extremes);
|
|
|
|
}
|
|
|
|
|
2019-04-09 13:07:07 +00:00
|
|
|
void QueryPipeline::unitePipelines(
|
|
|
|
std::vector<QueryPipeline> && pipelines, const Block & common_header, const Context & context)
|
2019-03-26 18:28:37 +00:00
|
|
|
{
|
|
|
|
checkInitialized();
|
|
|
|
|
2019-04-09 13:07:07 +00:00
|
|
|
addSimpleTransform([&](const Block & header)
|
|
|
|
{
|
|
|
|
return std::make_shared<ConvertingTransform>(
|
|
|
|
header, common_header, ConvertingTransform::MatchColumnsMode::Position, context);
|
|
|
|
});
|
|
|
|
|
2019-03-26 18:28:37 +00:00
|
|
|
std::vector<OutputPort *> extremes;
|
|
|
|
|
|
|
|
for (auto & pipeline : pipelines)
|
|
|
|
{
|
|
|
|
pipeline.checkInitialized();
|
|
|
|
|
2019-04-05 11:34:11 +00:00
|
|
|
pipeline.addSimpleTransform([&](const Block & header)
|
|
|
|
{
|
2019-03-26 18:28:37 +00:00
|
|
|
return std::make_shared<ConvertingTransform>(
|
2019-04-09 13:07:07 +00:00
|
|
|
header, common_header, ConvertingTransform::MatchColumnsMode::Position, context);
|
2019-03-26 18:28:37 +00:00
|
|
|
});
|
|
|
|
|
|
|
|
if (pipeline.extremes_port)
|
|
|
|
{
|
|
|
|
auto converting = std::make_shared<ConvertingTransform>(
|
2019-04-09 13:07:07 +00:00
|
|
|
pipeline.current_header, common_header, ConvertingTransform::MatchColumnsMode::Position, context);
|
2019-03-26 18:28:37 +00:00
|
|
|
|
|
|
|
connect(*pipeline.extremes_port, converting->getInputPort());
|
|
|
|
extremes.push_back(&converting->getOutputPort());
|
|
|
|
processors.push_back(std::move(converting));
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Take totals only from first port.
|
|
|
|
if (pipeline.totals_having_port)
|
|
|
|
{
|
2019-04-08 09:31:49 +00:00
|
|
|
if (!totals_having_port)
|
2019-03-26 18:28:37 +00:00
|
|
|
{
|
|
|
|
auto converting = std::make_shared<ConvertingTransform>(
|
2019-04-09 13:07:07 +00:00
|
|
|
pipeline.current_header, common_header, ConvertingTransform::MatchColumnsMode::Position, context);
|
2019-03-26 18:28:37 +00:00
|
|
|
|
2019-04-10 11:15:30 +00:00
|
|
|
connect(*pipeline.totals_having_port, converting->getInputPort());
|
2019-03-26 18:28:37 +00:00
|
|
|
totals_having_port = &converting->getOutputPort();
|
|
|
|
processors.push_back(std::move(converting));
|
|
|
|
}
|
|
|
|
else
|
2019-04-17 15:35:22 +00:00
|
|
|
pipeline.dropTotalsIfHas();
|
2019-03-26 18:28:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
processors.insert(processors.end(), pipeline.processors.begin(), pipeline.processors.end());
|
2020-03-03 10:28:37 +00:00
|
|
|
streams.addStreams(pipeline.streams);
|
2019-11-15 18:41:18 +00:00
|
|
|
|
2019-11-21 15:37:59 +00:00
|
|
|
table_locks.insert(table_locks.end(), std::make_move_iterator(pipeline.table_locks.begin()), std::make_move_iterator(pipeline.table_locks.end()));
|
2019-11-15 18:41:18 +00:00
|
|
|
interpreter_context.insert(interpreter_context.end(), pipeline.interpreter_context.begin(), pipeline.interpreter_context.end());
|
2020-01-29 16:59:11 +00:00
|
|
|
storage_holders.insert(storage_holders.end(), pipeline.storage_holders.begin(), pipeline.storage_holders.end());
|
2020-01-01 10:42:46 +00:00
|
|
|
|
|
|
|
max_threads = std::max(max_threads, pipeline.max_threads);
|
2019-03-26 18:28:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!extremes.empty())
|
|
|
|
{
|
2019-04-08 09:31:49 +00:00
|
|
|
size_t num_inputs = extremes.size() + (extremes_port ? 1u : 0u);
|
2019-03-26 18:28:37 +00:00
|
|
|
|
|
|
|
if (num_inputs == 1)
|
|
|
|
extremes_port = extremes.front();
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/// Add extra processor for extremes.
|
|
|
|
auto resize = std::make_shared<ResizeProcessor>(current_header, num_inputs, 1);
|
|
|
|
auto input = resize->getInputs().begin();
|
|
|
|
|
2019-04-08 09:31:49 +00:00
|
|
|
if (extremes_port)
|
2019-03-26 18:28:37 +00:00
|
|
|
connect(*extremes_port, *(input++));
|
|
|
|
|
|
|
|
for (auto & output : extremes)
|
|
|
|
connect(*output, *(input++));
|
|
|
|
|
|
|
|
auto transform = std::make_shared<ExtremesTransform>(current_header);
|
|
|
|
extremes_port = &transform->getOutputPort();
|
|
|
|
|
|
|
|
connect(resize->getOutputs().front(), transform->getInputPort());
|
|
|
|
processors.emplace_back(std::move(transform));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void QueryPipeline::setProgressCallback(const ProgressCallback & callback)
|
|
|
|
{
|
|
|
|
for (auto & processor : processors)
|
2019-04-10 12:38:57 +00:00
|
|
|
{
|
2019-10-09 09:33:16 +00:00
|
|
|
if (auto * source = dynamic_cast<ISourceWithProgress *>(processor.get()))
|
2019-10-04 17:46:48 +00:00
|
|
|
source->setProgressCallback(callback);
|
2019-04-10 12:38:57 +00:00
|
|
|
|
|
|
|
if (auto * source = typeid_cast<CreatingSetsTransform *>(processor.get()))
|
|
|
|
source->setProgressCallback(callback);
|
|
|
|
}
|
2019-03-26 18:28:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void QueryPipeline::setProcessListElement(QueryStatus * elem)
|
|
|
|
{
|
2020-01-28 12:59:34 +00:00
|
|
|
process_list_element = elem;
|
|
|
|
|
2019-03-26 18:28:37 +00:00
|
|
|
for (auto & processor : processors)
|
2019-04-10 12:38:57 +00:00
|
|
|
{
|
2019-10-04 17:46:48 +00:00
|
|
|
if (auto * source = dynamic_cast<ISourceWithProgress *>(processor.get()))
|
|
|
|
source->setProcessListElement(elem);
|
2019-04-10 12:38:57 +00:00
|
|
|
|
|
|
|
if (auto * source = typeid_cast<CreatingSetsTransform *>(processor.get()))
|
|
|
|
source->setProcessListElement(elem);
|
|
|
|
}
|
2019-03-26 18:28:37 +00:00
|
|
|
}
|
|
|
|
|
2019-04-08 14:55:20 +00:00
|
|
|
void QueryPipeline::finalize()
|
|
|
|
{
|
|
|
|
checkInitialized();
|
|
|
|
|
|
|
|
if (!output_format)
|
|
|
|
throw Exception("Cannot finalize pipeline because it doesn't have output.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
|
|
|
calcRowsBeforeLimit();
|
|
|
|
}
|
|
|
|
|
|
|
|
void QueryPipeline::calcRowsBeforeLimit()
|
|
|
|
{
|
|
|
|
/// TODO get from Remote
|
|
|
|
|
|
|
|
UInt64 rows_before_limit_at_least = 0;
|
2019-04-29 16:52:50 +00:00
|
|
|
UInt64 rows_before_limit = 0;
|
|
|
|
|
2019-04-08 14:55:20 +00:00
|
|
|
bool has_limit = false;
|
2019-04-29 16:52:50 +00:00
|
|
|
bool has_partial_sorting = false;
|
2019-04-08 14:55:20 +00:00
|
|
|
|
2019-04-29 16:52:50 +00:00
|
|
|
std::unordered_set<IProcessor *> visited;
|
|
|
|
|
|
|
|
struct QueuedEntry
|
2019-04-08 14:55:20 +00:00
|
|
|
{
|
2019-04-29 16:52:50 +00:00
|
|
|
IProcessor * processor;
|
|
|
|
bool visited_limit;
|
|
|
|
};
|
2019-04-12 15:20:24 +00:00
|
|
|
|
2019-04-29 16:52:50 +00:00
|
|
|
std::queue<QueuedEntry> queue;
|
|
|
|
|
|
|
|
queue.push({ output_format, false });
|
|
|
|
visited.emplace(output_format);
|
|
|
|
|
|
|
|
while (!queue.empty())
|
|
|
|
{
|
|
|
|
auto processor = queue.front().processor;
|
|
|
|
auto visited_limit = queue.front().visited_limit;
|
|
|
|
queue.pop();
|
|
|
|
|
|
|
|
if (!visited_limit)
|
2019-04-12 15:20:24 +00:00
|
|
|
{
|
2019-04-29 16:52:50 +00:00
|
|
|
if (auto * limit = typeid_cast<const LimitTransform *>(processor))
|
2019-04-12 15:20:24 +00:00
|
|
|
{
|
2019-04-29 16:52:50 +00:00
|
|
|
has_limit = visited_limit = true;
|
|
|
|
rows_before_limit_at_least += limit->getRowsBeforeLimitAtLeast();
|
2019-04-12 15:20:24 +00:00
|
|
|
}
|
2019-04-08 14:55:20 +00:00
|
|
|
|
2019-04-29 16:52:50 +00:00
|
|
|
if (auto * source = typeid_cast<SourceFromInputStream *>(processor))
|
|
|
|
{
|
2020-01-31 08:54:57 +00:00
|
|
|
if (auto & stream = source->getStream())
|
2019-04-29 16:52:50 +00:00
|
|
|
{
|
2020-01-31 08:54:57 +00:00
|
|
|
auto & info = stream->getProfileInfo();
|
|
|
|
if (info.hasAppliedLimit())
|
|
|
|
{
|
|
|
|
has_limit = visited_limit = true;
|
|
|
|
rows_before_limit_at_least += info.getRowsBeforeLimit();
|
|
|
|
}
|
2019-04-29 16:52:50 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-04-08 14:55:20 +00:00
|
|
|
|
2019-04-29 16:52:50 +00:00
|
|
|
if (auto * sorting = typeid_cast<const PartialSortingTransform *>(processor))
|
2019-04-08 14:55:20 +00:00
|
|
|
{
|
|
|
|
has_partial_sorting = true;
|
2019-04-29 16:52:50 +00:00
|
|
|
rows_before_limit += sorting->getNumReadRows();
|
|
|
|
|
|
|
|
/// Don't go to children. Take rows_before_limit from last PartialSortingTransform.
|
2019-07-08 15:52:18 +00:00
|
|
|
/// continue;
|
2019-04-29 16:52:50 +00:00
|
|
|
}
|
|
|
|
|
2019-05-13 12:08:02 +00:00
|
|
|
/// Skip totals and extremes port for output format.
|
2019-05-13 13:04:52 +00:00
|
|
|
if (auto * format = dynamic_cast<IOutputFormat *>(processor))
|
2019-05-13 12:08:02 +00:00
|
|
|
{
|
|
|
|
auto * child_processor = &format->getPort(IOutputFormat::PortKind::Main).getOutputPort().getProcessor();
|
|
|
|
if (visited.emplace(child_processor).second)
|
|
|
|
queue.push({ child_processor, visited_limit });
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-04-29 16:52:50 +00:00
|
|
|
for (auto & child_port : processor->getInputs())
|
|
|
|
{
|
|
|
|
auto * child_processor = &child_port.getOutputPort().getProcessor();
|
|
|
|
if (visited.emplace(child_processor).second)
|
|
|
|
queue.push({ child_processor, visited_limit });
|
2019-04-08 14:55:20 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-29 16:52:50 +00:00
|
|
|
/// Get num read rows from PartialSortingTransform if have it.
|
2019-04-08 14:55:20 +00:00
|
|
|
if (has_limit)
|
|
|
|
output_format->setRowsBeforeLimit(has_partial_sorting ? rows_before_limit : rows_before_limit_at_least);
|
|
|
|
}
|
2019-03-26 18:28:37 +00:00
|
|
|
|
2020-01-29 16:59:11 +00:00
|
|
|
Pipe QueryPipeline::getPipe() &&
|
|
|
|
{
|
|
|
|
resize(1);
|
|
|
|
Pipe pipe(std::move(processors), streams.at(0), totals_having_port);
|
|
|
|
|
|
|
|
for (auto & lock : table_locks)
|
|
|
|
pipe.addTableLock(lock);
|
|
|
|
|
|
|
|
for (auto & context : interpreter_context)
|
|
|
|
pipe.addInterpreterContext(context);
|
|
|
|
|
|
|
|
for (auto & storage : storage_holders)
|
|
|
|
pipe.addStorageHolder(storage);
|
|
|
|
|
2020-02-03 14:28:30 +00:00
|
|
|
if (totals_having_port)
|
|
|
|
pipe.setTotalsPort(totals_having_port);
|
|
|
|
|
2020-01-29 16:59:11 +00:00
|
|
|
return pipe;
|
|
|
|
}
|
|
|
|
|
2019-04-29 13:57:29 +00:00
|
|
|
PipelineExecutorPtr QueryPipeline::execute()
|
2019-03-26 18:28:37 +00:00
|
|
|
{
|
|
|
|
checkInitialized();
|
|
|
|
|
2019-04-08 14:55:20 +00:00
|
|
|
if (!output_format)
|
2019-03-26 18:28:37 +00:00
|
|
|
throw Exception("Cannot execute pipeline because it doesn't have output.", ErrorCodes::LOGICAL_ERROR);
|
|
|
|
|
2020-01-28 12:59:34 +00:00
|
|
|
return std::make_shared<PipelineExecutor>(processors, process_list_element);
|
2019-03-26 18:28:37 +00:00
|
|
|
}
|
|
|
|
|
2020-02-27 15:40:11 +00:00
|
|
|
QueryPipeline & QueryPipeline::operator= (QueryPipeline && rhs)
|
|
|
|
{
|
|
|
|
/// Reset primitive fields
|
|
|
|
process_list_element = rhs.process_list_element;
|
|
|
|
rhs.process_list_element = nullptr;
|
|
|
|
max_threads = rhs.max_threads;
|
|
|
|
rhs.max_threads = 0;
|
|
|
|
output_format = rhs.output_format;
|
|
|
|
rhs.output_format = nullptr;
|
|
|
|
has_resize = rhs.has_resize;
|
|
|
|
rhs.has_resize = false;
|
|
|
|
extremes_port = rhs.extremes_port;
|
|
|
|
rhs.extremes_port = nullptr;
|
|
|
|
totals_having_port = rhs.totals_having_port;
|
|
|
|
rhs.totals_having_port = nullptr;
|
|
|
|
|
|
|
|
/// Move these fields in destruction order (it's important)
|
|
|
|
streams = std::move(rhs.streams);
|
|
|
|
processors = std::move(rhs.processors);
|
|
|
|
current_header = std::move(rhs.current_header);
|
|
|
|
table_locks = std::move(rhs.table_locks);
|
|
|
|
storage_holders = std::move(rhs.storage_holders);
|
|
|
|
interpreter_context = std::move(rhs.interpreter_context);
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2019-03-26 18:28:37 +00:00
|
|
|
}
|