ClickHouse/src/Processors/Port.h

475 lines
13 KiB
C++
Raw Normal View History

#pragma once
#include <atomic>
#include <memory>
#include <vector>
#include <variant>
2019-06-19 18:30:02 +00:00
#include <cstdint>
#include <Core/Block.h>
2019-10-07 18:56:03 +00:00
#include <Core/Defines.h>
2019-02-19 18:41:18 +00:00
#include <Processors/Chunk.h>
#include <Common/Exception.h>
namespace DB
{
class InputPort;
class OutputPort;
class IProcessor;
2019-10-04 17:46:36 +00:00
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
class Port
{
friend void connect(OutputPort &, InputPort &);
friend class IProcessor;
public:
struct UpdateInfo
{
using UpdateList = std::vector<void *>;
UpdateList * update_list = nullptr;
void * id = nullptr;
UInt64 version = 0;
UInt64 prev_version = 0;
void inline ALWAYS_INLINE update()
{
if (version == prev_version && update_list)
update_list->push_back(id);
++version;
}
void inline ALWAYS_INLINE trigger() { prev_version = version; }
};
protected:
/// Shared state of two connected ports.
2019-02-07 18:51:53 +00:00
class State
{
2019-02-07 18:51:53 +00:00
public:
2019-04-29 18:43:50 +00:00
2019-06-19 18:30:02 +00:00
struct Data
2019-02-07 18:51:53 +00:00
{
2019-06-19 18:30:02 +00:00
/// Note: std::variant can be used. But move constructor for it can't be inlined.
Chunk chunk;
std::exception_ptr exception;
};
2019-02-07 18:51:53 +00:00
2019-06-19 18:30:02 +00:00
private:
static std::uintptr_t getUInt(Data * data) { return reinterpret_cast<std::uintptr_t>(data); }
static Data * getPtr(std::uintptr_t data) { return reinterpret_cast<Data *>(data); }
2019-02-07 18:51:53 +00:00
2019-06-19 18:30:02 +00:00
public:
2019-02-07 18:51:53 +00:00
2019-06-19 18:30:02 +00:00
/// Flags for Port state.
/// Will store them in least pointer bits.
2019-02-07 18:51:53 +00:00
2019-06-19 18:30:02 +00:00
/// Port was set finished or closed.
static constexpr std::uintptr_t IS_FINISHED = 1;
/// Block is not needed right now, but may be will be needed later.
/// This allows to pause calculations if we are not sure that we need more data.
static constexpr std::uintptr_t IS_NEEDED = 2;
/// Check if port has data.
static constexpr std::uintptr_t HAS_DATA = 4;
2019-06-19 18:30:02 +00:00
static constexpr std::uintptr_t FLAGS_MASK = IS_FINISHED | IS_NEEDED | HAS_DATA;
static constexpr std::uintptr_t PTR_MASK = ~FLAGS_MASK;
2019-06-19 18:30:02 +00:00
/// Tiny smart ptr class for Data. Takes into account that ptr can have flags in least bits.
class DataPtr
2019-02-07 18:51:53 +00:00
{
2019-06-19 18:30:02 +00:00
public:
DataPtr() : data(new Data())
{
if (unlikely((getUInt(data) & FLAGS_MASK) != 0))
throw Exception("Not alignment memory for Port.", ErrorCodes::LOGICAL_ERROR);
}
/// Pointer can store flags in case of exception in swap.
~DataPtr() { delete getPtr(getUInt(data) & PTR_MASK); }
DataPtr(DataPtr const &) : data(new Data()) {}
DataPtr& operator=(DataPtr const &) = delete;
Data * operator->() const { return data; }
Data & operator*() const { return *data; }
Data * get() const { return data; }
explicit operator bool() const { return data; }
Data * release()
{
Data * result = nullptr;
std::swap(result, data);
return result;
}
uintptr_t ALWAYS_INLINE swap(std::atomic<Data *> & value, std::uintptr_t flags, std::uintptr_t mask)
{
Data * expected = nullptr;
Data * desired = getPtr(flags | getUInt(data));
while (!value.compare_exchange_weak(expected, desired))
desired = getPtr((getUInt(expected) & FLAGS_MASK & (~mask)) | flags | getUInt(data));
2020-08-08 00:47:03 +00:00
/// It's not very safe. In case of exception after exchange and before assignment we will get leak.
2019-06-19 18:30:02 +00:00
/// Don't know how to make it better.
data = getPtr(getUInt(expected) & PTR_MASK);
return getUInt(expected) & FLAGS_MASK;
}
private:
Data * data = nullptr;
};
/// Not finished, not needed, has not data.
State() : data(new Data())
{
if (unlikely((getUInt(data) & FLAGS_MASK) != 0))
throw Exception("Not alignment memory for Port.", ErrorCodes::LOGICAL_ERROR);
2019-02-07 18:51:53 +00:00
}
2019-06-19 18:30:02 +00:00
~State()
{
2019-06-19 18:30:02 +00:00
Data * desired = nullptr;
Data * expected = nullptr;
2019-06-19 18:30:02 +00:00
while (!data.compare_exchange_weak(expected, desired));
2019-06-19 18:30:02 +00:00
expected = getPtr(getUInt(expected) & PTR_MASK);
delete expected;
}
2019-06-19 18:30:02 +00:00
void ALWAYS_INLINE push(DataPtr & data_, std::uintptr_t & flags)
2019-02-07 18:51:53 +00:00
{
2019-06-19 18:30:02 +00:00
flags = data_.swap(data, HAS_DATA, HAS_DATA);
2019-02-07 18:51:53 +00:00
2019-06-19 18:30:02 +00:00
/// It's possible to push data into finished port. Will just ignore it.
/// if (flags & IS_FINISHED)
/// throw Exception("Cannot push block to finished port.", ErrorCodes::LOGICAL_ERROR);
2019-02-07 18:51:53 +00:00
2019-06-19 18:30:02 +00:00
/// It's possible to push data into port which is not needed now.
/// if ((flags & IS_NEEDED) == 0)
/// throw Exception("Cannot push block to port which is not needed.", ErrorCodes::LOGICAL_ERROR);
2019-02-07 18:51:53 +00:00
2019-06-19 18:30:02 +00:00
if (unlikely(flags & HAS_DATA))
throw Exception("Cannot push block to port which already has data.", ErrorCodes::LOGICAL_ERROR);
2019-02-07 18:51:53 +00:00
}
void ALWAYS_INLINE pull(DataPtr & data_, std::uintptr_t & flags, bool set_not_needed = false)
2019-02-07 18:51:53 +00:00
{
uintptr_t mask = HAS_DATA;
if (set_not_needed)
mask |= IS_NEEDED;
flags = data_.swap(data, 0, mask);
2019-06-19 18:30:02 +00:00
/// It's ok to check because this flag can be changed only by pulling thread.
if (unlikely((flags & IS_NEEDED) == 0) && !set_not_needed)
2019-06-19 18:30:02 +00:00
throw Exception("Cannot pull block from port which is not needed.", ErrorCodes::LOGICAL_ERROR);
2019-02-07 18:51:53 +00:00
2019-06-19 18:30:02 +00:00
if (unlikely((flags & HAS_DATA) == 0))
throw Exception("Cannot pull block from port which has no data.", ErrorCodes::LOGICAL_ERROR);
}
2019-02-07 18:51:53 +00:00
2019-06-19 18:30:02 +00:00
std::uintptr_t ALWAYS_INLINE setFlags(std::uintptr_t flags, std::uintptr_t mask)
2019-02-07 18:51:53 +00:00
{
2019-06-19 18:30:02 +00:00
Data * expected = nullptr;
Data * desired = getPtr(flags);
2019-02-07 18:51:53 +00:00
2019-06-19 18:30:02 +00:00
while (!data.compare_exchange_weak(expected, desired))
desired = getPtr((getUInt(expected) & FLAGS_MASK & (~mask)) | flags | (getUInt(expected) & PTR_MASK));
2019-02-07 18:51:53 +00:00
2019-06-19 18:30:02 +00:00
return getUInt(expected) & FLAGS_MASK;
2019-02-07 18:51:53 +00:00
}
2019-06-19 18:30:02 +00:00
std::uintptr_t ALWAYS_INLINE getFlags() const
2019-02-07 18:51:53 +00:00
{
2019-06-19 18:30:02 +00:00
return getUInt(data.load()) & FLAGS_MASK;
2019-02-07 18:51:53 +00:00
}
private:
2019-06-19 18:30:02 +00:00
std::atomic<Data *> data;
};
Block header;
std::shared_ptr<State> state;
2019-06-19 18:30:02 +00:00
/// This object is only used for data exchange between port and shared state.
State::DataPtr data;
IProcessor * processor = nullptr;
/// If update_info was set, will call update() for it in case port's state have changed.
2019-12-03 08:50:19 +00:00
UpdateInfo * update_info = nullptr;
public:
2019-04-29 18:43:50 +00:00
using Data = State::Data;
2019-08-03 11:02:40 +00:00
Port(Block header_) : header(std::move(header_)) {}
Port(Block header_, IProcessor * processor_) : header(std::move(header_)), processor(processor_) {}
void setUpdateInfo(UpdateInfo * info) { update_info = info; }
const Block & getHeader() const { return header; }
2019-06-18 11:01:17 +00:00
bool ALWAYS_INLINE isConnected() const { return state != nullptr; }
2019-06-18 11:01:17 +00:00
void ALWAYS_INLINE assumeConnected() const
{
2019-06-19 18:30:02 +00:00
if (unlikely(!isConnected()))
2019-02-05 13:01:40 +00:00
throw Exception("Port is not connected", ErrorCodes::LOGICAL_ERROR);
}
2019-06-18 11:01:17 +00:00
bool ALWAYS_INLINE hasData() const
{
assumeConnected();
2019-06-19 18:30:02 +00:00
return state->getFlags() & State::HAS_DATA;
}
IProcessor & getProcessor()
{
if (!processor)
2019-02-05 13:01:40 +00:00
throw Exception("Port does not belong to Processor", ErrorCodes::LOGICAL_ERROR);
return *processor;
}
const IProcessor & getProcessor() const
{
if (!processor)
2019-02-05 13:01:40 +00:00
throw Exception("Port does not belong to Processor", ErrorCodes::LOGICAL_ERROR);
return *processor;
}
protected:
void inline ALWAYS_INLINE updateVersion()
{
if (likely(update_info))
update_info->update();
}
};
2019-02-07 18:51:53 +00:00
/// Invariants:
/// * If you close port, it isFinished().
/// * If port isFinished(), you can do nothing with it.
/// * If port is not needed, you can only setNeeded() or close() it.
/// * You can pull only if port hasData().
class InputPort : public Port
{
friend void connect(OutputPort &, InputPort &);
private:
OutputPort * output_port = nullptr;
2019-06-19 18:30:02 +00:00
mutable bool is_finished = false;
public:
using Port::Port;
Data ALWAYS_INLINE pullData(bool set_not_needed = false)
{
if (!set_not_needed)
updateVersion();
2019-02-08 16:10:57 +00:00
2019-02-07 18:51:53 +00:00
assumeConnected();
2019-06-19 18:30:02 +00:00
std::uintptr_t flags = 0;
state->pull(data, flags, set_not_needed);
2019-06-19 18:30:02 +00:00
is_finished = flags & State::IS_FINISHED;
if (unlikely(!data->exception && data->chunk.getNumColumns() != header.columns()))
{
auto & chunk = data->chunk;
String msg = "Invalid number of columns in chunk pulled from OutputPort. Expected "
+ std::to_string(header.columns()) + ", found " + std::to_string(chunk.getNumColumns()) + '\n';
msg += "Header: " + header.dumpStructure() + '\n';
msg += "Chunk: " + chunk.dumpStructure() + '\n';
throw Exception(msg, ErrorCodes::LOGICAL_ERROR);
}
return std::move(*data);
}
Chunk ALWAYS_INLINE pull(bool set_not_needed = false)
{
auto data_ = pullData(set_not_needed);
if (data_.exception)
std::rethrow_exception(data_.exception);
return std::move(data_.chunk);
}
2019-06-18 11:01:17 +00:00
bool ALWAYS_INLINE isFinished() const
{
assumeConnected();
2019-06-19 18:30:02 +00:00
if (is_finished)
return true;
auto flags = state->getFlags();
is_finished = (flags & State::IS_FINISHED) && ((flags & State::HAS_DATA) == 0);
return is_finished;
}
2019-06-18 11:01:17 +00:00
void ALWAYS_INLINE setNeeded()
{
2019-05-16 14:57:27 +00:00
assumeConnected();
if ((state->setFlags(State::IS_NEEDED, State::IS_NEEDED) & State::IS_NEEDED) == 0)
updateVersion();
}
2019-06-18 11:01:17 +00:00
void ALWAYS_INLINE setNotNeeded()
{
assumeConnected();
2019-06-19 18:30:02 +00:00
state->setFlags(0, State::IS_NEEDED);
2019-02-07 18:51:53 +00:00
}
2019-06-18 11:01:17 +00:00
void ALWAYS_INLINE close()
2019-02-07 18:51:53 +00:00
{
2019-06-19 18:30:02 +00:00
assumeConnected();
if ((state->setFlags(State::IS_FINISHED, State::IS_FINISHED) & State::IS_FINISHED) == 0)
updateVersion();
2019-02-08 16:10:57 +00:00
2019-06-19 18:30:02 +00:00
is_finished = true;
}
void ALWAYS_INLINE reopen()
{
assumeConnected();
if (!isFinished())
return;
state->setFlags(0, State::IS_FINISHED);
is_finished = false;
}
OutputPort & getOutputPort()
{
assumeConnected();
return *output_port;
}
const OutputPort & getOutputPort() const
{
assumeConnected();
return *output_port;
}
};
2019-02-07 18:51:53 +00:00
/// Invariants:
/// * If you finish port, it isFinished().
/// * If port isFinished(), you can do nothing with it.
/// * If port not isNeeded(), you can only finish() it.
2020-10-12 09:30:05 +00:00
/// * You can push only if port doesn't hasData().
class OutputPort : public Port
{
friend void connect(OutputPort &, InputPort &);
private:
InputPort * input_port = nullptr;
public:
using Port::Port;
2019-06-18 11:01:17 +00:00
void ALWAYS_INLINE push(Chunk chunk)
{
2019-06-19 18:30:02 +00:00
pushData({.chunk = std::move(chunk), .exception = {}});
2019-02-07 18:51:53 +00:00
}
2021-07-17 21:45:07 +00:00
void ALWAYS_INLINE pushException(std::exception_ptr exception)
2019-06-19 18:30:02 +00:00
{
pushData({.chunk = {}, .exception = std::move(exception)});
}
void ALWAYS_INLINE pushData(Data data_)
{
2019-06-24 15:14:58 +00:00
if (unlikely(!data_.exception && data_.chunk.getNumColumns() != header.columns()))
{
String msg = "Invalid number of columns in chunk pushed to OutputPort. Expected "
+ std::to_string(header.columns())
+ ", found " + std::to_string(data_.chunk.getNumColumns()) + '\n';
msg += "Header: " + header.dumpStructure() + '\n';
msg += "Chunk: " + data_.chunk.dumpStructure() + '\n';
throw Exception(msg, ErrorCodes::LOGICAL_ERROR);
}
updateVersion();
assumeConnected();
2019-06-19 18:30:02 +00:00
std::uintptr_t flags = 0;
*data = std::move(data_);
state->push(data, flags);
}
2019-06-18 11:01:17 +00:00
void ALWAYS_INLINE finish()
2019-02-07 18:51:53 +00:00
{
assumeConnected();
2019-06-19 18:30:02 +00:00
auto flags = state->setFlags(State::IS_FINISHED, State::IS_FINISHED);
if ((flags & State::IS_FINISHED) == 0)
updateVersion();
2019-02-07 18:51:53 +00:00
}
2019-06-18 11:01:17 +00:00
bool ALWAYS_INLINE isNeeded() const
2019-02-07 18:51:53 +00:00
{
assumeConnected();
2019-06-19 18:30:02 +00:00
return state->getFlags() & State::IS_NEEDED;
}
2019-06-18 11:01:17 +00:00
bool ALWAYS_INLINE isFinished() const
{
assumeConnected();
2019-06-19 18:30:02 +00:00
return state->getFlags() & State::IS_FINISHED;
}
2019-06-19 18:30:02 +00:00
bool ALWAYS_INLINE canPush() const
{
assumeConnected();
auto flags = state->getFlags();
return (flags & State::IS_NEEDED) && ((flags & State::HAS_DATA) == 0);
}
2019-02-07 18:51:53 +00:00
InputPort & getInputPort()
{
assumeConnected();
return *input_port;
}
const InputPort & getInputPort() const
{
assumeConnected();
return *input_port;
}
};
using InputPorts = std::list<InputPort>;
using OutputPorts = std::list<OutputPort>;
void connect(OutputPort & output, InputPort & input);
}