ClickHouse/src/Processors/OffsetTransform.cpp

195 lines
4.5 KiB
C++
Raw Normal View History

2020-05-06 06:50:55 +00:00
#include <Processors/OffsetTransform.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
OffsetTransform::OffsetTransform(
2020-07-12 05:18:01 +00:00
const Block & header_, UInt64 offset_, size_t num_streams)
2020-05-06 06:50:55 +00:00
: IProcessor(InputPorts(num_streams, header_), OutputPorts(num_streams, header_))
2020-05-07 13:40:50 +00:00
, offset(offset_)
2020-05-06 06:50:55 +00:00
{
ports_data.resize(num_streams);
size_t cur_stream = 0;
for (auto & input : inputs)
{
ports_data[cur_stream].input_port = &input;
++cur_stream;
}
cur_stream = 0;
for (auto & output : outputs)
{
ports_data[cur_stream].output_port = &output;
++cur_stream;
}
}
IProcessor::Status OffsetTransform::prepare(
const PortNumbers & updated_input_ports,
const PortNumbers & updated_output_ports)
{
bool has_full_port = false;
auto process_pair = [&](size_t pos)
{
auto status = preparePair(ports_data[pos]);
switch (status)
{
case IProcessor::Status::Finished:
{
if (!ports_data[pos].is_finished)
{
ports_data[pos].is_finished = true;
++num_finished_port_pairs;
}
return;
}
case IProcessor::Status::PortFull:
{
has_full_port = true;
return;
}
case IProcessor::Status::NeedData:
return;
default:
throw Exception(
"Unexpected status for OffsetTransform::preparePair : " + IProcessor::statusToName(status),
ErrorCodes::LOGICAL_ERROR);
}
};
for (auto pos : updated_input_ports)
process_pair(pos);
for (auto pos : updated_output_ports)
process_pair(pos);
/// All ports are finished. It may happen even before we reached the limit (has less data then limit).
if (num_finished_port_pairs == ports_data.size())
return Status::Finished;
if (has_full_port)
return Status::PortFull;
return Status::NeedData;
}
OffsetTransform::Status OffsetTransform::prepare()
{
if (ports_data.size() != 1)
throw Exception("prepare without arguments is not supported for multi-port OffsetTransform.",
ErrorCodes::LOGICAL_ERROR);
return prepare({0}, {0});
}
OffsetTransform::Status OffsetTransform::preparePair(PortsData & data)
{
auto & output = *data.output_port;
auto & input = *data.input_port;
/// Check can output.
bool output_finished = false;
if (output.isFinished())
{
output_finished = true;
}
if (!output_finished && !output.canPush())
{
input.setNotNeeded();
return Status::PortFull;
}
/// Check can input.
if (input.isFinished())
{
output.finish();
return Status::Finished;
}
input.setNeeded();
2020-05-11 06:26:01 +00:00
if (!input.hasData())
2020-05-06 06:50:55 +00:00
return Status::NeedData;
data.current_chunk = input.pull(true);
auto rows = data.current_chunk.getNumRows();
if (rows_before_limit_at_least)
rows_before_limit_at_least->add(rows);
2020-05-07 13:40:50 +00:00
/// Process block.
rows_read += rows;
2020-07-12 05:18:01 +00:00
if (rows_read <= offset)
2020-05-06 06:50:55 +00:00
{
data.current_chunk.clear();
2020-05-07 13:40:50 +00:00
2020-05-06 06:50:55 +00:00
if (input.isFinished())
{
output.finish();
return Status::Finished;
}
/// Now, we pulled from input, and it must be empty.
input.setNeeded();
return Status::NeedData;
}
2020-07-12 05:18:01 +00:00
if (!(rows <= std::numeric_limits<UInt64>::max() - offset && rows_read >= offset + rows))
2020-05-06 06:50:55 +00:00
splitChunk(data);
output.push(std::move(data.current_chunk));
return Status::PortFull;
}
2020-05-11 11:01:23 +00:00
void OffsetTransform::splitChunk(PortsData & data) const
2020-05-06 06:50:55 +00:00
{
2020-07-12 05:18:01 +00:00
UInt64 num_rows = data.current_chunk.getNumRows();
UInt64 num_columns = data.current_chunk.getNumColumns();
2020-05-06 06:50:55 +00:00
/// return a piece of the block
2020-07-12 05:18:01 +00:00
UInt64 start = 0;
2020-05-06 06:50:55 +00:00
2020-07-12 22:32:24 +00:00
/// ------------[....(.....]
2020-07-12 05:18:01 +00:00
/// <----------------------> rows_read
/// <----------> num_rows
/// <---------------> offset
/// <---> start
2020-05-06 06:50:55 +00:00
2020-07-12 05:18:01 +00:00
assert(offset < rows_read);
if (offset + num_rows > rows_read)
start = offset + num_rows - rows_read;
else
2020-05-06 06:50:55 +00:00
return;
2020-07-12 05:18:01 +00:00
UInt64 length = num_rows - start;
2020-05-06 06:50:55 +00:00
auto columns = data.current_chunk.detachColumns();
2020-07-12 05:18:01 +00:00
for (UInt64 i = 0; i < num_columns; ++i)
2020-05-06 06:50:55 +00:00
columns[i] = columns[i]->cut(start, length);
data.current_chunk.setColumns(std::move(columns), length);
}
}