ClickHouse/src/QueryPipeline/QueryPipelineBuilder.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

675 lines
24 KiB
C++
Raw Normal View History

2021-10-16 14:03:50 +00:00
#include <QueryPipeline/QueryPipelineBuilder.h>
2019-03-26 18:28:37 +00:00
2023-02-03 13:34:18 +00:00
#include <Common/CurrentThread.h>
#include <Common/typeid_cast.h>
#include "Core/UUID.h"
#include <Core/SortDescription.h>
2019-03-26 18:28:37 +00:00
#include <Interpreters/Context.h>
2020-11-17 17:16:55 +00:00
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/IJoin.h>
2022-04-03 15:54:22 +00:00
#include <Interpreters/TableJoin.h>
2023-02-03 13:34:18 +00:00
#include <IO/WriteHelpers.h>
#include <Processors/ConcatProcessor.h>
#include <Processors/DelayedPortsProcessor.h>
2023-02-03 13:34:18 +00:00
#include <Processors/Executors/PipelineExecutor.h>
#include <Processors/Formats/IOutputFormat.h>
#include <Processors/LimitTransform.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/ResizeProcessor.h>
#include <Processors/RowsBeforeLimitCounter.h>
2020-06-03 19:50:11 +00:00
#include <Processors/Sources/RemoteSource.h>
2023-02-03 13:34:18 +00:00
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/Transforms/CreatingSetsTransform.h>
#include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/Transforms/ExtremesTransform.h>
#include <Processors/Transforms/JoiningTransform.h>
#include <Processors/Transforms/MergeJoinTransform.h>
#include <Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h>
#include <Processors/Transforms/PartialSortingTransform.h>
#include <Processors/Transforms/TotalsHavingTransform.h>
#include <QueryPipeline/narrowPipe.h>
2019-03-26 18:28:37 +00:00
namespace DB
{
2020-02-25 18:10:48 +00:00
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
2022-05-03 12:05:22 +00:00
extern const int NOT_IMPLEMENTED;
2020-02-25 18:10:48 +00:00
}
2019-03-26 18:28:37 +00:00
void QueryPipelineBuilder::checkInitialized()
2019-03-26 18:28:37 +00:00
{
if (!initialized())
throw Exception(ErrorCodes::LOGICAL_ERROR, "QueryPipeline is uninitialized");
2019-03-26 18:28:37 +00:00
}
void QueryPipelineBuilder::checkInitializedAndNotCompleted()
2020-05-27 18:20:26 +00:00
{
checkInitialized();
2020-08-04 13:06:59 +00:00
if (pipe.isCompleted())
throw Exception(ErrorCodes::LOGICAL_ERROR, "QueryPipeline is already completed");
2020-05-27 18:20:26 +00:00
}
2020-08-04 13:06:59 +00:00
static void checkSource(const ProcessorPtr & source, bool can_have_totals)
2019-03-26 18:28:37 +00:00
{
if (!source->getInputs().empty())
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Source for query pipeline shouldn't have any input, but {} has {} inputs",
source->getName(),
source->getInputs().size());
2019-03-26 18:28:37 +00:00
2019-04-17 14:38:16 +00:00
if (source->getOutputs().empty())
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Source for query pipeline should have single output, but {} doesn't have any", source->getName());
2019-04-17 14:38:16 +00:00
if (!can_have_totals && source->getOutputs().size() != 1)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Source for query pipeline should have single output, but {} has {} outputs",
source->getName(),
source->getOutputs().size());
2019-04-17 14:38:16 +00:00
if (source->getOutputs().size() > 2)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Source for query pipeline should have 1 or 2 output, but {} has {} outputs",
source->getName(),
source->getOutputs().size());
2019-03-26 18:28:37 +00:00
}
void QueryPipelineBuilder::init(Pipe pipe_)
2019-03-26 18:28:37 +00:00
{
if (initialized())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline has already been initialized");
2019-03-26 18:28:37 +00:00
2020-08-06 12:24:05 +00:00
if (pipe_.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't initialize pipeline with empty pipe");
2019-04-09 10:17:25 +00:00
2020-08-04 13:06:59 +00:00
pipe = std::move(pipe_);
2019-04-09 10:17:25 +00:00
}
2022-05-20 19:49:31 +00:00
void QueryPipelineBuilder::init(QueryPipeline & pipeline)
2021-09-16 17:40:42 +00:00
{
if (initialized())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline has already been initialized");
2021-09-16 17:40:42 +00:00
2021-09-20 09:05:34 +00:00
if (pipeline.pushing())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't initialize pushing pipeline");
2021-09-16 17:40:42 +00:00
2021-09-20 09:05:34 +00:00
if (pipeline.output)
{
pipe.output_ports = {pipeline.output};
pipe.header = pipeline.output->getHeader();
}
else
{
pipe.output_ports.clear();
pipe.header = {};
}
2021-09-16 17:40:42 +00:00
pipe.totals_port = pipeline.totals;
pipe.extremes_port = pipeline.extremes;
pipe.max_parallel_streams = pipeline.num_threads;
}
void QueryPipelineBuilder::reset()
2020-08-06 12:24:05 +00:00
{
Pipe pipe_to_destroy(std::move(pipe));
*this = QueryPipelineBuilder();
2020-08-06 12:24:05 +00:00
}
void QueryPipelineBuilder::addSimpleTransform(const Pipe::ProcessorGetter & getter)
2019-03-26 18:28:37 +00:00
{
2020-05-27 18:20:26 +00:00
checkInitializedAndNotCompleted();
2020-08-04 13:06:59 +00:00
pipe.addSimpleTransform(getter);
2019-03-26 18:28:37 +00:00
}
void QueryPipelineBuilder::addSimpleTransform(const Pipe::ProcessorGetterWithStreamKind & getter)
2019-04-09 10:17:25 +00:00
{
2020-08-04 13:06:59 +00:00
checkInitializedAndNotCompleted();
pipe.addSimpleTransform(getter);
2019-04-09 10:17:25 +00:00
}
void QueryPipelineBuilder::addTransform(ProcessorPtr transform)
2020-05-27 18:20:26 +00:00
{
checkInitializedAndNotCompleted();
2020-08-04 13:06:59 +00:00
pipe.addTransform(std::move(transform));
2020-05-27 18:20:26 +00:00
}
void QueryPipelineBuilder::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes)
2021-04-27 17:30:33 +00:00
{
checkInitializedAndNotCompleted();
pipe.addTransform(std::move(transform), totals, extremes);
}
void QueryPipelineBuilder::addChains(std::vector<Chain> chains)
{
checkInitializedAndNotCompleted();
pipe.addChains(std::move(chains));
}
void QueryPipelineBuilder::addChain(Chain chain)
{
checkInitializedAndNotCompleted();
std::vector<Chain> chains;
chains.emplace_back(std::move(chain));
pipe.resize(1);
pipe.addChains(std::move(chains));
}
2022-08-01 13:20:12 +00:00
void QueryPipelineBuilder::transform(const Transformer & transformer, bool check_ports)
{
checkInitializedAndNotCompleted();
2022-08-01 13:20:12 +00:00
pipe.transform(transformer, check_ports);
}
void QueryPipelineBuilder::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter)
2019-03-26 18:28:37 +00:00
{
2020-05-27 18:20:26 +00:00
checkInitializedAndNotCompleted();
2020-08-04 13:06:59 +00:00
pipe.setSinks(getter);
2019-03-26 18:28:37 +00:00
}
void QueryPipelineBuilder::addDelayedStream(ProcessorPtr source)
2019-03-26 18:28:37 +00:00
{
2020-05-27 18:20:26 +00:00
checkInitializedAndNotCompleted();
2019-03-26 18:28:37 +00:00
2019-04-17 14:58:34 +00:00
checkSource(source, false);
2020-08-04 13:06:59 +00:00
assertBlocksHaveEqualStructure(getHeader(), source->getOutputs().front().getHeader(), "QueryPipeline");
2019-03-26 18:28:37 +00:00
2020-08-04 13:06:59 +00:00
IProcessor::PortNumbers delayed_streams = { pipe.numOutputPorts() };
pipe.addSource(std::move(source));
2019-04-08 09:31:49 +00:00
2020-08-04 13:06:59 +00:00
auto processor = std::make_shared<DelayedPortsProcessor>(getHeader(), pipe.numOutputPorts(), delayed_streams);
addTransform(std::move(processor));
}
void QueryPipelineBuilder::addMergingAggregatedMemoryEfficientTransform(AggregatingTransformParamsPtr params, size_t num_merging_processors)
2020-08-04 13:06:59 +00:00
{
DB::addMergingAggregatedMemoryEfficientTransform(pipe, std::move(params), num_merging_processors);
2019-03-26 18:28:37 +00:00
}
void QueryPipelineBuilder::resize(size_t num_streams, bool force, bool strict)
2019-03-26 18:28:37 +00:00
{
2020-05-27 18:20:26 +00:00
checkInitializedAndNotCompleted();
2020-10-12 09:30:05 +00:00
pipe.resize(num_streams, force, strict);
2019-12-26 16:52:15 +00:00
}
2022-07-25 19:41:43 +00:00
void QueryPipelineBuilder::narrow(size_t size)
{
checkInitializedAndNotCompleted();
narrowPipe(pipe, size);
}
void QueryPipelineBuilder::addTotalsHavingTransform(ProcessorPtr transform)
2019-03-26 18:28:37 +00:00
{
2020-05-27 18:20:26 +00:00
checkInitializedAndNotCompleted();
2019-03-26 18:28:37 +00:00
if (!typeid_cast<const TotalsHavingTransform *>(transform.get()))
throw Exception(ErrorCodes::LOGICAL_ERROR, "TotalsHavingTransform is expected for QueryPipeline::addTotalsHavingTransform");
2019-03-26 18:28:37 +00:00
2020-08-04 13:06:59 +00:00
if (pipe.getTotalsPort())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Totals having transform was already added to pipeline");
2019-03-26 18:28:37 +00:00
resize(1);
2020-08-04 13:06:59 +00:00
auto * totals_port = &transform->getOutputs().back();
2020-08-04 15:51:56 +00:00
pipe.addTransform(std::move(transform), totals_port, nullptr);
2019-03-26 18:28:37 +00:00
}
void QueryPipelineBuilder::addDefaultTotals()
2019-04-09 14:51:38 +00:00
{
2020-05-27 18:20:26 +00:00
checkInitializedAndNotCompleted();
2019-04-09 14:51:38 +00:00
2020-08-04 13:06:59 +00:00
if (pipe.getTotalsPort())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Totals having transform was already added to pipeline");
2019-04-09 14:51:38 +00:00
2020-08-04 13:06:59 +00:00
const auto & current_header = getHeader();
2019-04-09 14:51:38 +00:00
Columns columns;
columns.reserve(current_header.columns());
for (size_t i = 0; i < current_header.columns(); ++i)
{
auto column = current_header.getByPosition(i).type->createColumn();
column->insertDefault();
columns.emplace_back(std::move(column));
}
auto source = std::make_shared<SourceFromSingleChunk>(current_header, Chunk(std::move(columns), 1));
2020-08-04 13:06:59 +00:00
pipe.addTotalsSource(std::move(source));
2019-04-10 16:28:37 +00:00
}
void QueryPipelineBuilder::dropTotalsAndExtremes()
2019-04-17 15:35:22 +00:00
{
2020-08-04 15:51:56 +00:00
pipe.dropTotals();
pipe.dropExtremes();
2019-04-17 15:35:22 +00:00
}
void QueryPipelineBuilder::addExtremesTransform()
2019-03-26 18:28:37 +00:00
{
2020-05-27 18:20:26 +00:00
checkInitializedAndNotCompleted();
2019-03-26 18:28:37 +00:00
2021-01-22 10:44:24 +00:00
/// It is possible that pipeline already have extremes.
/// For example, it may be added from VIEW subquery.
/// In this case, recalculate extremes again - they should be calculated for different rows.
2020-08-04 15:51:56 +00:00
if (pipe.getExtremesPort())
2021-01-22 10:44:24 +00:00
pipe.dropExtremes();
2019-03-26 18:28:37 +00:00
2020-08-04 15:51:56 +00:00
resize(1);
auto transform = std::make_shared<ExtremesTransform>(getHeader());
auto * port = &transform->getExtremesPort();
pipe.addTransform(std::move(transform), nullptr, port);
2019-03-26 18:28:37 +00:00
}
QueryPipelineBuilder QueryPipelineBuilder::unitePipelines(
std::vector<std::unique_ptr<QueryPipelineBuilder>> pipelines,
2020-08-04 15:51:56 +00:00
size_t max_threads_limit,
Processors * collected_processors)
2019-03-26 18:28:37 +00:00
{
2021-03-25 09:57:14 +00:00
if (pipelines.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot unite an empty set of pipelines");
Block common_header = pipelines.front()->getHeader();
/// Should we limit the number of threads for united pipeline. True if all pipelines have max_threads != 0.
/// If true, result max_threads will be sum(max_threads).
/// Note: it may be > than settings.max_threads, so we should apply this limit again.
2020-08-04 15:51:56 +00:00
bool will_limit_max_threads = true;
size_t max_threads = 0;
Pipes pipes;
2022-05-24 20:06:08 +00:00
QueryPlanResourceHolder resources;
2019-03-26 18:28:37 +00:00
for (auto & pipeline_ptr : pipelines)
2019-03-26 18:28:37 +00:00
{
auto & pipeline = *pipeline_ptr;
2019-03-26 18:28:37 +00:00
pipeline.checkInitialized();
2022-05-24 20:06:08 +00:00
resources = std::move(pipeline.resources);
2020-08-04 15:51:56 +00:00
pipeline.pipe.collected_processors = collected_processors;
2019-03-26 18:28:37 +00:00
2020-08-04 15:51:56 +00:00
pipes.emplace_back(std::move(pipeline.pipe));
2020-01-01 10:42:46 +00:00
max_threads += pipeline.max_threads;
2020-07-03 13:38:35 +00:00
will_limit_max_threads = will_limit_max_threads && pipeline.max_threads != 0;
/// If one of pipelines uses more threads then current limit, will keep it.
/// It may happen if max_distributed_connections > max_threads
if (pipeline.max_threads > max_threads_limit)
max_threads_limit = pipeline.max_threads;
2019-03-26 18:28:37 +00:00
}
QueryPipelineBuilder pipeline;
pipeline.init(Pipe::unitePipes(std::move(pipes), collected_processors, false));
2022-05-24 20:06:08 +00:00
pipeline.addResources(std::move(resources));
2020-08-04 15:51:56 +00:00
if (will_limit_max_threads)
2019-03-26 18:28:37 +00:00
{
2020-08-04 15:51:56 +00:00
pipeline.setMaxThreads(max_threads);
pipeline.limitMaxThreads(max_threads_limit);
2020-04-08 12:40:04 +00:00
}
2019-03-26 18:28:37 +00:00
pipeline.setCollectedProcessors(nullptr);
2020-08-04 15:51:56 +00:00
return pipeline;
2019-03-26 18:28:37 +00:00
}
2022-03-30 10:07:09 +00:00
QueryPipelineBuilderPtr QueryPipelineBuilder::mergePipelines(
QueryPipelineBuilderPtr left,
QueryPipelineBuilderPtr right,
ProcessorPtr transform,
Processors * collected_processors)
{
if (transform->getOutputs().size() != 1)
2022-07-05 14:06:10 +00:00
throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge transform must have exactly 1 output, got {}", transform->getOutputs().size());
2022-03-30 10:07:09 +00:00
connect(*left->pipe.output_ports.front(), transform->getInputs().front());
connect(*right->pipe.output_ports.front(), transform->getInputs().back());
if (collected_processors)
collected_processors->emplace_back(transform);
left->pipe.output_ports.front() = &transform->getOutputs().front();
2022-10-17 00:10:36 +00:00
left->pipe.processors->emplace_back(transform);
2022-03-30 10:07:09 +00:00
2022-10-17 00:10:36 +00:00
left->pipe.processors->insert(left->pipe.processors->end(), right->pipe.processors->begin(), right->pipe.processors->end());
2022-03-30 10:07:09 +00:00
left->pipe.header = left->pipe.output_ports.front()->getHeader();
left->pipe.max_parallel_streams = std::max(left->pipe.max_parallel_streams, right->pipe.max_parallel_streams);
return left;
}
std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelinesYShaped(
2022-03-30 10:07:09 +00:00
std::unique_ptr<QueryPipelineBuilder> left,
std::unique_ptr<QueryPipelineBuilder> right,
2022-04-05 10:12:42 +00:00
JoinPtr join,
const Block & out_header,
2022-03-30 10:07:09 +00:00
size_t max_block_size,
Processors * collected_processors)
{
left->checkInitializedAndNotCompleted();
right->checkInitializedAndNotCompleted();
left->pipe.dropExtremes();
right->pipe.dropExtremes();
2022-07-19 14:07:44 +00:00
if (left->getNumStreams() != 1 || right->getNumStreams() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Join is supported only for pipelines with one output port");
2022-03-30 10:07:09 +00:00
if (left->hasTotals() || right->hasTotals())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Current join algorithm is supported only for pipelines without totals");
2022-03-30 10:07:09 +00:00
Blocks inputs = {left->getHeader(), right->getHeader()};
2022-04-03 15:54:22 +00:00
auto joining = std::make_shared<MergeJoinTransform>(join, inputs, out_header, max_block_size);
2022-03-30 10:07:09 +00:00
2022-07-19 14:07:44 +00:00
return mergePipelines(std::move(left), std::move(right), std::move(joining), collected_processors);
2022-03-30 10:07:09 +00:00
}
std::unique_ptr<QueryPipelineBuilder> QueryPipelineBuilder::joinPipelinesRightLeft(
std::unique_ptr<QueryPipelineBuilder> left,
std::unique_ptr<QueryPipelineBuilder> right,
2021-04-28 17:32:12 +00:00
JoinPtr join,
2022-07-07 15:00:40 +00:00
const Block & output_header,
2021-04-28 17:32:12 +00:00
size_t max_block_size,
size_t max_streams,
bool keep_left_read_in_order,
2021-04-28 17:32:12 +00:00
Processors * collected_processors)
{
left->checkInitializedAndNotCompleted();
right->checkInitializedAndNotCompleted();
2021-04-29 17:51:35 +00:00
/// Extremes before join are useless. They will be calculated after if needed.
2021-04-28 17:32:12 +00:00
left->pipe.dropExtremes();
right->pipe.dropExtremes();
left->pipe.collected_processors = collected_processors;
/// Collect the NEW processors for the right pipeline.
QueryPipelineProcessorsCollector collector(*right);
/// Remember the last step of the right pipeline.
IQueryPlanStep * step = right->pipe.processors->back()->getQueryPlanStep();
2021-04-28 17:32:12 +00:00
/// In case joined subquery has totals, and we don't, add default chunk to totals.
bool default_totals = false;
2022-10-27 15:25:50 +00:00
if (!join->supportTotals() && (left->hasTotals() || right->hasTotals()))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Current join algorithm is supported only for pipelines without totals");
2021-04-28 17:32:12 +00:00
if (!left->hasTotals() && right->hasTotals())
{
left->addDefaultTotals();
default_totals = true;
}
2021-04-30 13:06:17 +00:00
/// (left) ──────┐
/// ╞> Joining ─> (joined)
/// (left) ─┐┌───┘
/// └┼───┐
/// (right) ┐ (totals) ──┼─┐ ╞> Joining ─> (joined)
/// ╞> Resize ┐ ╓─┘┌┼─┘
/// (right) ┘ │ ╟──┘└─┐
/// ╞> FillingJoin ─> Resize ╣ ╞> Joining ─> (totals)
/// (totals) ─────────┘ ╙─────┘
2021-04-29 17:51:35 +00:00
2021-04-28 17:32:12 +00:00
size_t num_streams = left->getNumStreams();
2022-05-05 01:23:49 +00:00
if (join->supportParallelJoin() && !right->hasTotals())
{
2022-04-29 01:29:04 +00:00
if (!keep_left_read_in_order)
{
2022-04-29 01:29:04 +00:00
left->resize(max_streams);
num_streams = max_streams;
}
2022-05-05 01:23:49 +00:00
right->resize(max_streams);
auto concurrent_right_filling_transform = [&](OutputPortRawPtrs outports)
2022-04-29 01:29:04 +00:00
{
2022-05-05 01:23:49 +00:00
Processors processors;
for (auto & outport : outports)
{
2022-05-05 01:23:49 +00:00
auto adding_joined = std::make_shared<FillingRightJoinSideTransform>(right->getHeader(), join);
connect(*outport, adding_joined->getInputs().front());
processors.emplace_back(adding_joined);
}
return processors;
};
right->transform(concurrent_right_filling_transform);
right->resize(1);
}
else
{
right->resize(1);
auto adding_joined = std::make_shared<FillingRightJoinSideTransform>(right->getHeader(), join);
InputPort * totals_port = nullptr;
if (right->hasTotals())
totals_port = adding_joined->addTotalsPort();
2021-04-28 17:32:12 +00:00
right->addTransform(std::move(adding_joined), totals_port, nullptr);
}
2021-04-28 17:32:12 +00:00
size_t num_streams_including_totals = num_streams + (left->hasTotals() ? 1 : 0);
right->resize(num_streams_including_totals);
2021-04-29 17:51:35 +00:00
/// This counter is needed for every Joining except totals, to decide which Joining will generate non joined rows.
2021-04-28 17:32:12 +00:00
auto finish_counter = std::make_shared<JoiningTransform::FinishCounter>(num_streams);
auto lit = left->pipe.output_ports.begin();
auto rit = right->pipe.output_ports.begin();
2022-10-05 12:40:32 +00:00
std::vector<OutputPort *> joined_output_ports;
2022-10-18 11:43:01 +00:00
std::vector<OutputPort *> delayed_root_output_ports;
std::shared_ptr<DelayedJoinedBlocksTransform> delayed_root = nullptr;
if (join->hasDelayedBlocks())
{
delayed_root = std::make_shared<DelayedJoinedBlocksTransform>(num_streams, join);
if (!delayed_root->getInputs().empty() || delayed_root->getOutputs().size() != num_streams)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"DelayedJoinedBlocksTransform should have no inputs and {} outputs, "
"but has {} inputs and {} outputs",
2022-10-18 11:43:01 +00:00
num_streams, delayed_root->getInputs().size(), delayed_root->getOutputs().size());
if (collected_processors)
collected_processors->emplace_back(delayed_root);
2022-10-26 13:50:14 +00:00
left->pipe.processors->emplace_back(delayed_root);
2022-10-18 11:43:01 +00:00
for (auto & outport : delayed_root->getOutputs())
delayed_root_output_ports.emplace_back(&outport);
}
2022-10-18 12:20:52 +00:00
Block left_header = left->getHeader();
Block joined_header = JoiningTransform::transformHeader(left_header, join);
2021-04-28 17:32:12 +00:00
for (size_t i = 0; i < num_streams; ++i)
{
2022-10-05 12:40:32 +00:00
auto joining = std::make_shared<JoiningTransform>(
left_header, output_header, join, max_block_size, false, default_totals, finish_counter);
2021-04-28 17:32:12 +00:00
connect(**lit, joining->getInputs().front());
connect(**rit, joining->getInputs().back());
2022-10-18 11:43:01 +00:00
if (delayed_root)
{
// Process delayed joined blocks when all JoiningTransform are finished.
2022-10-18 12:20:52 +00:00
auto delayed = std::make_shared<DelayedJoinedBlocksWorkerTransform>(joined_header);
2022-10-18 11:43:01 +00:00
if (delayed->getInputs().size() != 1 || delayed->getOutputs().size() != 1)
throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform should have one input and one output");
2022-10-18 11:43:01 +00:00
connect(*delayed_root_output_ports[i], delayed->getInputs().front());
2022-10-05 12:40:32 +00:00
joined_output_ports.push_back(&joining->getOutputs().front());
joined_output_ports.push_back(&delayed->getOutputs().front());
if (collected_processors)
collected_processors->emplace_back(delayed);
2022-10-26 13:50:14 +00:00
left->pipe.processors->emplace_back(std::move(delayed));
}
else
{
*lit = &joining->getOutputs().front();
}
2021-04-28 17:32:12 +00:00
++lit;
++rit;
2021-04-28 17:32:12 +00:00
if (collected_processors)
collected_processors->emplace_back(joining);
2022-10-05 12:40:32 +00:00
2022-10-17 00:10:36 +00:00
left->pipe.processors->emplace_back(std::move(joining));
2022-10-05 12:40:32 +00:00
}
2022-10-18 11:43:01 +00:00
if (delayed_root)
2022-10-05 12:40:32 +00:00
{
// Process DelayedJoinedBlocksTransform after all JoiningTransforms.
DelayedPortsProcessor::PortNumbers delayed_ports_numbers;
delayed_ports_numbers.reserve(joined_output_ports.size() / 2);
for (size_t i = 1; i < joined_output_ports.size(); i += 2)
delayed_ports_numbers.push_back(i);
auto delayed_processor = std::make_shared<DelayedPortsProcessor>(joined_header, 2 * num_streams, delayed_ports_numbers);
2022-10-05 12:40:32 +00:00
if (collected_processors)
collected_processors->emplace_back(delayed_processor);
2022-10-26 13:50:14 +00:00
left->pipe.processors->emplace_back(delayed_processor);
// Connect @delayed_processor ports with inputs (JoiningTransforms & DelayedJoinedBlocksTransforms) / pipe outputs
auto next_delayed_input = delayed_processor->getInputs().begin();
for (OutputPort * port : joined_output_ports)
connect(*port, *next_delayed_input++);
left->pipe.output_ports.clear();
for (OutputPort & port : delayed_processor->getOutputs())
left->pipe.output_ports.push_back(&port);
left->pipe.header = joined_header;
left->resize(num_streams);
2021-04-28 17:32:12 +00:00
}
if (left->hasTotals())
{
auto joining = std::make_shared<JoiningTransform>(left_header, output_header, join, max_block_size, true, default_totals);
2021-04-28 17:32:12 +00:00
connect(*left->pipe.totals_port, joining->getInputs().front());
connect(**rit, joining->getInputs().back());
left->pipe.totals_port = &joining->getOutputs().front();
++rit;
if (collected_processors)
collected_processors->emplace_back(joining);
2022-10-17 00:10:36 +00:00
left->pipe.processors->emplace_back(std::move(joining));
2021-04-28 17:32:12 +00:00
}
/// Move the collected processors to the last step in the right pipeline.
Processors processors = collector.detachProcessors();
if (step)
step->appendExtraProcessors(processors);
2022-10-17 00:10:36 +00:00
left->pipe.processors->insert(left->pipe.processors->end(), right->pipe.processors->begin(), right->pipe.processors->end());
2022-05-24 20:06:08 +00:00
left->resources = std::move(right->resources);
left->pipe.header = left->pipe.output_ports.front()->getHeader();
left->pipe.max_parallel_streams = std::max(left->pipe.max_parallel_streams, right->pipe.max_parallel_streams);
2021-04-28 17:32:12 +00:00
return left;
}
2020-09-16 16:11:16 +00:00
void QueryPipelineBuilder::addCreatingSetsTransform(const Block & res_header, SubqueryForSet subquery_for_set, const SizeLimits & limits, ContextPtr context)
2020-09-16 16:11:16 +00:00
{
2020-09-16 16:30:48 +00:00
resize(1);
2020-09-16 16:11:16 +00:00
auto transform = std::make_shared<CreatingSetsTransform>(
2020-09-16 16:45:56 +00:00
getHeader(),
2020-09-16 16:38:33 +00:00
res_header,
2020-09-16 16:11:16 +00:00
std::move(subquery_for_set),
2020-09-16 16:30:48 +00:00
limits,
2020-09-16 16:45:56 +00:00
context);
2020-09-16 16:11:16 +00:00
InputPort * totals_port = nullptr;
if (pipe.getTotalsPort())
totals_port = transform->addTotalsPort();
pipe.addTransform(std::move(transform), totals_port, nullptr);
}
void QueryPipelineBuilder::addPipelineBefore(QueryPipelineBuilder pipeline)
2020-09-15 13:25:14 +00:00
{
checkInitializedAndNotCompleted();
if (pipeline.getHeader())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline for CreatingSets should have empty header. Got: {}",
pipeline.getHeader().dumpStructure());
IProcessor::PortNumbers delayed_streams(pipe.numOutputPorts());
for (size_t i = 0; i < delayed_streams.size(); ++i)
delayed_streams[i] = i;
2020-09-15 13:25:14 +00:00
auto * collected_processors = pipe.collected_processors;
Pipes pipes;
pipes.emplace_back(std::move(pipe));
2022-05-23 13:46:57 +00:00
pipes.emplace_back(QueryPipelineBuilder::getPipe(std::move(pipeline), resources));
pipe = Pipe::unitePipes(std::move(pipes), collected_processors, true);
2020-09-15 13:25:14 +00:00
2020-12-27 11:02:21 +00:00
auto processor = std::make_shared<DelayedPortsProcessor>(getHeader(), pipe.numOutputPorts(), delayed_streams, true);
addTransform(std::move(processor));
2020-09-15 13:25:14 +00:00
}
2022-10-17 02:21:08 +00:00
void QueryPipelineBuilder::setProcessListElement(QueryStatusPtr elem)
2019-03-26 18:28:37 +00:00
{
process_list_element = elem;
2019-03-26 18:28:37 +00:00
}
void QueryPipelineBuilder::setProgressCallback(ProgressCallback callback)
{
progress_callback = callback;
}
PipelineExecutorPtr QueryPipelineBuilder::execute()
2019-03-26 18:28:37 +00:00
{
2020-05-27 18:20:26 +00:00
if (!isCompleted())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot execute pipeline because it is not completed");
2019-03-26 18:28:37 +00:00
2020-08-04 15:51:56 +00:00
return std::make_shared<PipelineExecutor>(pipe.processors, process_list_element);
2020-06-25 09:39:17 +00:00
}
2022-05-23 13:46:57 +00:00
Pipe QueryPipelineBuilder::getPipe(QueryPipelineBuilder pipeline, QueryPlanResourceHolder & resources)
{
resources = std::move(pipeline.resources);
return std::move(pipeline.pipe);
}
2022-05-24 20:06:08 +00:00
QueryPipeline QueryPipelineBuilder::getPipeline(QueryPipelineBuilder builder)
2021-09-15 19:35:48 +00:00
{
QueryPipeline res(std::move(builder.pipe));
2022-05-23 13:46:57 +00:00
res.addResources(std::move(builder.resources));
2021-09-15 19:35:48 +00:00
res.setNumThreads(builder.getNumThreads());
res.setProcessListElement(builder.process_list_element);
res.setProgressCallback(builder.progress_callback);
2021-09-15 19:35:48 +00:00
return res;
}
void QueryPipelineBuilder::setCollectedProcessors(Processors * processors)
2020-06-25 09:39:17 +00:00
{
2020-08-04 15:51:56 +00:00
pipe.collected_processors = processors;
2020-06-25 09:39:17 +00:00
}
2020-08-07 08:28:12 +00:00
QueryPipelineProcessorsCollector::QueryPipelineProcessorsCollector(QueryPipelineBuilder & pipeline_, IQueryPlanStep * step_)
2020-06-25 09:39:17 +00:00
: pipeline(pipeline_), step(step_)
{
2020-08-04 15:51:56 +00:00
pipeline.setCollectedProcessors(&processors);
2020-06-25 09:39:17 +00:00
}
QueryPipelineProcessorsCollector::~QueryPipelineProcessorsCollector()
{
2020-08-04 15:51:56 +00:00
pipeline.setCollectedProcessors(nullptr);
2020-06-25 09:39:17 +00:00
}
Processors QueryPipelineProcessorsCollector::detachProcessors(size_t group)
{
for (auto & processor : processors)
processor->setQueryPlanStep(step, group);
2020-06-26 17:56:33 +00:00
Processors res;
res.swap(processors);
2020-06-25 09:39:17 +00:00
return res;
}
2019-03-26 18:28:37 +00:00
}