2010-03-12 18:25:35 +00:00
|
|
|
#include <algorithm>
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <DataStreams/LimitBlockInputStream.h>
|
2010-03-12 18:25:35 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2019-03-08 14:36:40 +00:00
|
|
|
LimitBlockInputStream::LimitBlockInputStream(const BlockInputStreamPtr & input, UInt64 limit_, UInt64 offset_, bool always_read_till_end_, bool use_limit_as_total_rows_approx )
|
2017-04-01 07:20:54 +00:00
|
|
|
: limit(limit_), offset(offset_), always_read_till_end(always_read_till_end_)
|
2010-03-12 18:25:35 +00:00
|
|
|
{
|
2019-03-08 14:54:32 +00:00
|
|
|
if (use_limit_as_total_rows_approx)
|
|
|
|
{
|
2019-03-08 14:36:40 +00:00
|
|
|
addTotalRowsApprox(static_cast<size_t>(limit));
|
|
|
|
}
|
|
|
|
|
2017-09-08 02:29:47 +00:00
|
|
|
children.push_back(input);
|
2010-03-12 18:25:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-09-04 21:23:19 +00:00
|
|
|
Block LimitBlockInputStream::readImpl()
|
2010-03-12 18:25:35 +00:00
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
Block res;
|
2019-02-10 16:22:38 +00:00
|
|
|
UInt64 rows = 0;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
|
|
|
/// pos - how many lines were read, including the last read block
|
|
|
|
|
|
|
|
if (pos >= offset + limit)
|
|
|
|
{
|
|
|
|
if (!always_read_till_end)
|
|
|
|
return res;
|
|
|
|
else
|
|
|
|
{
|
|
|
|
while (children.back()->read())
|
|
|
|
;
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
do
|
|
|
|
{
|
|
|
|
res = children.back()->read();
|
|
|
|
if (!res)
|
|
|
|
return res;
|
|
|
|
rows = res.rows();
|
|
|
|
pos += rows;
|
|
|
|
} while (pos <= offset);
|
|
|
|
|
|
|
|
/// give away the whole block
|
|
|
|
if (pos >= offset + rows && pos <= offset + limit)
|
|
|
|
return res;
|
|
|
|
|
|
|
|
/// give away a piece of the block
|
2019-02-10 16:22:38 +00:00
|
|
|
UInt64 start = std::max(
|
2017-04-01 07:20:54 +00:00
|
|
|
static_cast<Int64>(0),
|
|
|
|
static_cast<Int64>(offset) - static_cast<Int64>(pos) + static_cast<Int64>(rows));
|
|
|
|
|
2019-02-10 16:22:38 +00:00
|
|
|
UInt64 length = std::min(
|
2017-04-01 07:20:54 +00:00
|
|
|
static_cast<Int64>(limit), std::min(
|
|
|
|
static_cast<Int64>(pos) - static_cast<Int64>(offset),
|
|
|
|
static_cast<Int64>(limit) + static_cast<Int64>(offset) - static_cast<Int64>(pos) + static_cast<Int64>(rows)));
|
|
|
|
|
|
|
|
for (size_t i = 0; i < res.columns(); ++i)
|
|
|
|
res.safeGetByPosition(i).column = res.safeGetByPosition(i).column->cut(start, length);
|
|
|
|
|
2019-01-23 14:48:50 +00:00
|
|
|
// TODO: we should provide feedback to child-block, so it will know how many rows are actually consumed.
|
|
|
|
// It's crucial for streaming engines like Kafka.
|
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return res;
|
2010-03-12 18:25:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|