ClickHouse/src/Processors/Pipe.h

148 lines
6.5 KiB
C++
Raw Normal View History

2019-11-05 17:33:03 +00:00
#pragma once
#include <Processors/IProcessor.h>
2019-11-05 17:33:03 +00:00
#include <Processors/Sources/SourceWithProgress.h>
namespace DB
{
class Pipe;
2020-08-03 11:33:11 +00:00
using Pipes = std::vector<Pipe>;
2020-01-29 18:18:12 +00:00
class IStorage;
using StoragePtr = std::shared_ptr<IStorage>;
2020-08-03 11:33:11 +00:00
using OutputPortRawPtrs = std::vector<OutputPort *>;
2020-08-14 01:10:10 +00:00
/// Pipe is a set of processors which represents the part of pipeline.
/// Pipe contains a list of output ports, with specified port for totals and specified port for extremes.
2020-07-31 16:54:54 +00:00
/// All output ports have same header.
/// All other ports are connected, all connections are inside processors set.
2020-08-03 11:33:11 +00:00
class Pipe
2020-07-31 16:54:54 +00:00
{
public:
2020-08-03 11:33:11 +00:00
/// Default constructor creates empty pipe. Generally, you cannot do anything with it except to check it is empty().
/// You cannot get empty pipe in any other way. All transforms check that result pipe is not empty.
Pipe() = default;
2020-07-31 16:54:54 +00:00
/// Create from source. Source must have no input ports and single output.
2020-08-03 11:33:11 +00:00
explicit Pipe(ProcessorPtr source);
2020-08-04 13:06:59 +00:00
/// Create from source with specified totals end extremes (may be nullptr). Ports should be owned by source.
explicit Pipe(ProcessorPtr source, OutputPort * output, OutputPort * totals, OutputPort * extremes);
2020-07-31 16:54:54 +00:00
/// Create from processors. Use all not-connected output ports as output_ports. Check invariants.
2020-08-03 11:33:11 +00:00
explicit Pipe(Processors processors_);
2020-07-31 16:54:54 +00:00
2020-08-03 11:33:11 +00:00
Pipe(const Pipe & other) = delete;
Pipe(Pipe && other) = default;
Pipe & operator=(const Pipe & other) = delete;
Pipe & operator=(Pipe && other) = default;
2020-07-31 16:54:54 +00:00
const Block & getHeader() const { return header; }
2020-08-04 13:06:59 +00:00
bool empty() const { return processors.empty(); }
2020-08-03 11:33:11 +00:00
size_t numOutputPorts() const { return output_ports.size(); }
2020-08-04 13:06:59 +00:00
size_t maxParallelStreams() const { return max_parallel_streams; }
2020-07-31 16:54:54 +00:00
OutputPort * getOutputPort(size_t pos) const { return output_ports[pos]; }
OutputPort * getTotalsPort() const { return totals_port; }
OutputPort * getExtremesPort() const { return extremes_port; }
/// Add processor to list, add it output ports to output_ports.
/// Processor shouldn't have input ports, output ports shouldn't be connected.
/// Output headers should have same structure and be compatible with current header (if not empty()).
2020-08-04 13:06:59 +00:00
void addSource(ProcessorPtr source);
2020-07-31 16:54:54 +00:00
2020-08-03 11:33:11 +00:00
/// Add totals and extremes.
void addTotalsSource(ProcessorPtr source);
void addExtremesSource(ProcessorPtr source);
2020-08-04 15:51:56 +00:00
/// Drop totals and extremes (create NullSink for them).
void dropTotals();
void dropExtremes();
2020-07-31 16:54:54 +00:00
/// Add processor to list. It should have size() input ports with compatible header.
/// Output ports should have same headers.
/// If totals or extremes are not empty, transform shouldn't change header.
void addTransform(ProcessorPtr transform);
2020-08-04 15:51:56 +00:00
void addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort * extremes);
2020-07-31 16:54:54 +00:00
enum class StreamType
{
Main = 0, /// Stream for query data. There may be several streams of this type.
Totals, /// Stream for totals. No more then one.
Extremes, /// Stream for extremes. No more then one.
};
2020-08-03 13:54:14 +00:00
using ProcessorGetter = std::function<ProcessorPtr(const Block & header)>;
using ProcessorGetterWithStreamKind = std::function<ProcessorPtr(const Block & header, StreamType stream_type)>;
2020-07-31 16:54:54 +00:00
/// Add transform with single input and single output for each port.
2020-08-06 12:24:05 +00:00
void addSimpleTransform(const ProcessorGetter & getter);
void addSimpleTransform(const ProcessorGetterWithStreamKind & getter);
2020-07-31 16:54:54 +00:00
2020-08-03 11:33:11 +00:00
using Transformer = std::function<Processors(OutputPortRawPtrs ports)>;
/// Transform Pipe in general way.
void transform(const Transformer & transformer);
/// Unite several pipes together. They should have same header.
static Pipe unitePipes(Pipes pipes);
2020-07-31 16:54:54 +00:00
2020-08-04 13:06:59 +00:00
/// Get processors from Pipe. Use it with cautious, it is easy to loss totals and extremes ports.
static Processors detachProcessors(Pipe pipe) { return std::move(pipe.processors); }
2020-08-03 15:54:53 +00:00
/// Specify quotas and limits for every ISourceWithProgress.
void setLimits(const SourceWithProgress::LocalLimits & limits);
void setQuota(const std::shared_ptr<const EnabledQuota> & quota);
2020-08-03 13:54:14 +00:00
/// Do not allow to change the table while the processors of pipe are alive.
2020-08-06 12:24:05 +00:00
void addTableLock(const TableLockHolder & lock) { holder.table_locks.push_back(lock); }
2020-08-03 13:54:14 +00:00
/// This methods are from QueryPipeline. Needed to make conversion from pipeline to pipe possible.
2020-08-06 12:24:05 +00:00
void addInterpreterContext(std::shared_ptr<Context> context) { holder.interpreter_context.emplace_back(std::move(context)); }
void addStorageHolder(StoragePtr storage) { holder.storage_holders.emplace_back(std::move(storage)); }
2020-08-03 13:54:14 +00:00
2020-07-31 16:54:54 +00:00
private:
2020-08-04 13:06:59 +00:00
/// Destruction order: processors, header, locks, temporary storages, local contexts
2020-08-06 12:24:05 +00:00
struct Holder
{
Holder() = default;
Holder(Holder &&) = default;
/// Custom mode assignment does not destroy data from lhs. It appends data from rhs to lhs.
Holder& operator=(Holder &&);
/// Some processors may implicitly use Context or temporary Storage created by Interpreter.
/// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here,
/// because QueryPipeline is alive until query is finished.
std::vector<std::shared_ptr<Context>> interpreter_context;
std::vector<StoragePtr> storage_holders;
std::vector<TableLockHolder> table_locks;
};
Holder holder;
2020-07-31 16:54:54 +00:00
/// Header is common for all output below.
Block header;
2020-08-04 13:06:59 +00:00
Processors processors;
2020-07-31 16:54:54 +00:00
/// Output ports. Totals and extremes are allowed to be empty.
2020-08-03 11:33:11 +00:00
OutputPortRawPtrs output_ports;
2020-07-31 16:54:54 +00:00
OutputPort * totals_port = nullptr;
OutputPort * extremes_port = nullptr;
2020-08-04 13:06:59 +00:00
/// It is the max number of processors which can be executed in parallel for each step.
2020-07-31 16:54:54 +00:00
/// Usually, it's the same as the number of output ports.
size_t max_parallel_streams = 0;
2020-08-03 15:54:53 +00:00
/// If is set, all newly created processors will be added to this too.
/// It is needed for debug. See QueryPipelineProcessorsCollector.
Processors * collected_processors = nullptr;
2020-08-04 13:06:59 +00:00
/// This methods are for QueryPipeline. It is allowed to complete graph only there.
/// So, we may be sure that Pipe always has output port if not empty.
bool isCompleted() const { return !empty() && output_ports.empty(); }
2020-08-03 15:54:53 +00:00
static Pipe unitePipes(Pipes pipes, Processors * collected_processors);
2020-08-04 13:06:59 +00:00
void setSinks(const Pipe::ProcessorGetterWithStreamKind & getter);
2020-08-04 15:51:56 +00:00
void setOutputFormat(ProcessorPtr output);
2020-08-03 15:54:53 +00:00
friend class QueryPipeline;
2020-08-03 11:33:11 +00:00
};
2020-07-31 16:54:54 +00:00
}