mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-01 03:52:15 +00:00
write first draft of offset clause
This commit is contained in:
parent
0ab6936645
commit
8ce606571e
50
src/DataStreams/OffsetBlockInputStream.cpp
Normal file
50
src/DataStreams/OffsetBlockInputStream.cpp
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#include <algorithm>
|
||||||
|
|
||||||
|
#include <DataStreams/OffsetBlockInputStream.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
OffsetBlockInputStream::OffsetBlockInputStream(
|
||||||
|
const BlockInputStreamPtr & input, UInt64 offset_, bool always_read_till_end_,
|
||||||
|
bool use_limit_as_total_rows_approx, bool with_ties_, const SortDescription & description_)
|
||||||
|
: offset(offset_), always_read_till_end(always_read_till_end_), with_ties(with_ties_)
|
||||||
|
, description(description_)
|
||||||
|
{
|
||||||
|
if (use_limit_as_total_rows_approx)
|
||||||
|
{
|
||||||
|
addTotalRowsApprox(static_cast<size_t>(limit));
|
||||||
|
}
|
||||||
|
|
||||||
|
children.push_back(input);
|
||||||
|
}
|
||||||
|
|
||||||
|
Block OffsetBlockInputStream::readImpl()
|
||||||
|
{
|
||||||
|
Block res;
|
||||||
|
UInt64 rows = 0;
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
res = children.back()->read();
|
||||||
|
if (!res)
|
||||||
|
return res;
|
||||||
|
rows = res.rows();
|
||||||
|
pos += rows;
|
||||||
|
} while (pos <= offset);
|
||||||
|
|
||||||
|
SharedBlockPtr ptr = new detail::SharedBlock(std::move(res));
|
||||||
|
|
||||||
|
/// give away a piece of the block
|
||||||
|
UInt64 start = std::max(
|
||||||
|
static_cast<Int64>(0),
|
||||||
|
static_cast<Int64>(offset) - static_cast<Int64>(pos) + static_cast<Int64>(rows));
|
||||||
|
|
||||||
|
for (size_t i = 0; i < ptr->columns(); ++i)
|
||||||
|
ptr->safeGetByPosition(i).column = ptr->safeGetByPosition(i).column->cut(start, rows);
|
||||||
|
|
||||||
|
return *ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
47
src/DataStreams/OffsetBlockInputStream.h
Normal file
47
src/DataStreams/OffsetBlockInputStream.h
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <DataStreams/IBlockInputStream.h>
|
||||||
|
#include <Common/SharedBlockRowRef.h>
|
||||||
|
#include <Core/SortDescription.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
/** Implements the LIMIT relational operation.
|
||||||
|
*/
|
||||||
|
class OffsetBlockInputStream : public IBlockInputStream
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
/** If always_read_till_end = false (by default), then after reading enough data,
|
||||||
|
* returns an empty block, and this causes the query to be canceled.
|
||||||
|
* If always_read_till_end = true - reads all the data to the end, but ignores them. This is necessary in rare cases:
|
||||||
|
* when otherwise, due to the cancellation of the request, we would not have received the data for GROUP BY WITH TOTALS from the remote server.
|
||||||
|
* If use_limit_as_total_rows_approx = true, then addTotalRowsApprox is called to use the limit in progress & stats
|
||||||
|
* with_ties = true, when query has WITH TIES modifier. If so, description should be provided
|
||||||
|
* description lets us know which row we should check for equality
|
||||||
|
*/
|
||||||
|
OffsetBlockInputStream(
|
||||||
|
const BlockInputStreamPtr & input, UInt64 offset_,
|
||||||
|
bool always_read_till_end_ = false, bool use_limit_as_total_rows_approx = false,
|
||||||
|
bool with_ties_ = false, const SortDescription & description_ = {});
|
||||||
|
|
||||||
|
String getName() const override { return "Offset"; }
|
||||||
|
|
||||||
|
Block getHeader() const override { return children.at(0)->getHeader(); }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
Block readImpl() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
UInt64 limit;
|
||||||
|
UInt64 offset;
|
||||||
|
UInt64 pos = 0;
|
||||||
|
bool always_read_till_end;
|
||||||
|
bool with_ties;
|
||||||
|
const SortDescription description;
|
||||||
|
SharedBlockRowRef ties_row_ref;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -2,6 +2,7 @@
|
|||||||
#include <DataStreams/FilterBlockInputStream.h>
|
#include <DataStreams/FilterBlockInputStream.h>
|
||||||
#include <DataStreams/FinishSortingBlockInputStream.h>
|
#include <DataStreams/FinishSortingBlockInputStream.h>
|
||||||
#include <DataStreams/LimitBlockInputStream.h>
|
#include <DataStreams/LimitBlockInputStream.h>
|
||||||
|
#include <DataStreams/OffsetBlockInputStream.h>
|
||||||
#include <DataStreams/LimitByBlockInputStream.h>
|
#include <DataStreams/LimitByBlockInputStream.h>
|
||||||
#include <DataStreams/PartialSortingBlockInputStream.h>
|
#include <DataStreams/PartialSortingBlockInputStream.h>
|
||||||
#include <DataStreams/MergeSortingBlockInputStream.h>
|
#include <DataStreams/MergeSortingBlockInputStream.h>
|
||||||
@ -92,6 +93,7 @@
|
|||||||
#include <Processors/Transforms/CubeTransform.h>
|
#include <Processors/Transforms/CubeTransform.h>
|
||||||
#include <Processors/Transforms/FillingTransform.h>
|
#include <Processors/Transforms/FillingTransform.h>
|
||||||
#include <Processors/LimitTransform.h>
|
#include <Processors/LimitTransform.h>
|
||||||
|
#include <Processors/OffsetTransform.h>
|
||||||
#include <Processors/Transforms/FinishSortingTransform.h>
|
#include <Processors/Transforms/FinishSortingTransform.h>
|
||||||
#include <DataTypes/DataTypeAggregateFunction.h>
|
#include <DataTypes/DataTypeAggregateFunction.h>
|
||||||
#include <DataStreams/materializeBlock.h>
|
#include <DataStreams/materializeBlock.h>
|
||||||
@ -675,12 +677,9 @@ static std::pair<UInt64, UInt64> getLimitLengthAndOffset(const ASTSelectQuery &
|
|||||||
UInt64 offset = 0;
|
UInt64 offset = 0;
|
||||||
|
|
||||||
if (query.limitLength())
|
if (query.limitLength())
|
||||||
{
|
|
||||||
length = getLimitUIntValue(query.limitLength(), context);
|
length = getLimitUIntValue(query.limitLength(), context);
|
||||||
if (query.limitOffset() && length)
|
if (query.limitOffset())
|
||||||
offset = getLimitUIntValue(query.limitOffset(), context);
|
offset = getLimitUIntValue(query.limitOffset(), context);
|
||||||
}
|
|
||||||
|
|
||||||
return {length, offset};
|
return {length, offset};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1065,6 +1064,8 @@ void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputS
|
|||||||
|
|
||||||
if (!(pipeline_with_processors && has_prelimit)) /// Limit is no longer needed if there is prelimit.
|
if (!(pipeline_with_processors && has_prelimit)) /// Limit is no longer needed if there is prelimit.
|
||||||
executeLimit(pipeline);
|
executeLimit(pipeline);
|
||||||
|
|
||||||
|
executeOffset(pipeline);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2435,6 +2436,50 @@ void InterpreterSelectQuery::executeLimit(Pipeline & pipeline)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void InterpreterSelectQuery::executeOffset(Pipeline & pipeline)
|
||||||
|
{
|
||||||
|
auto & query = getSelectQuery();
|
||||||
|
/// If there is LIMIT
|
||||||
|
if (!query.limitLength() && query.limitOffset())
|
||||||
|
{
|
||||||
|
/** Rare case:
|
||||||
|
* if there is no WITH TOTALS and there is a subquery in FROM, and there is WITH TOTALS on one of the levels,
|
||||||
|
* then when using LIMIT, you should read the data to the end, rather than cancel the query earlier,
|
||||||
|
* because if you cancel the query, we will not get `totals` data from the remote server.
|
||||||
|
*
|
||||||
|
* Another case:
|
||||||
|
* if there is WITH TOTALS and there is no ORDER BY, then read the data to the end,
|
||||||
|
* otherwise TOTALS is counted according to incomplete data.
|
||||||
|
*/
|
||||||
|
bool always_read_till_end = false;
|
||||||
|
|
||||||
|
if (query.group_by_with_totals && !query.orderBy())
|
||||||
|
always_read_till_end = true;
|
||||||
|
|
||||||
|
if (!query.group_by_with_totals && hasWithTotalsInAnySubqueryInFromClause(query))
|
||||||
|
always_read_till_end = true;
|
||||||
|
|
||||||
|
SortDescription order_descr;
|
||||||
|
if (query.limit_with_ties)
|
||||||
|
{
|
||||||
|
if (!query.orderBy())
|
||||||
|
throw Exception("LIMIT WITH TIES without ORDER BY", ErrorCodes::LOGICAL_ERROR);
|
||||||
|
order_descr = getSortDescription(query, *context);
|
||||||
|
}
|
||||||
|
|
||||||
|
UInt64 limit_length;
|
||||||
|
UInt64 limit_offset;
|
||||||
|
std::tie(limit_length, limit_offset) = getLimitLengthAndOffset(query, *context);
|
||||||
|
|
||||||
|
pipeline.transform([&](auto & stream)
|
||||||
|
{
|
||||||
|
std::cout << "BLOCK" << std::endl;
|
||||||
|
stream = std::make_shared<OffsetBlockInputStream>(stream, limit_offset, always_read_till_end, false, query.limit_with_ties, order_descr);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void InterpreterSelectQuery::executeWithFill(Pipeline & pipeline)
|
void InterpreterSelectQuery::executeWithFill(Pipeline & pipeline)
|
||||||
{
|
{
|
||||||
auto & query = getSelectQuery();
|
auto & query = getSelectQuery();
|
||||||
@ -2529,6 +2574,54 @@ void InterpreterSelectQuery::executeLimit(QueryPipeline & pipeline)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void InterpreterSelectQuery::executeOffset(QueryPipeline & pipeline)
|
||||||
|
{
|
||||||
|
auto & query = getSelectQuery();
|
||||||
|
/// If there is LIMIT
|
||||||
|
if (!query.limitLength() && query.limitOffset())
|
||||||
|
{
|
||||||
|
/** Rare case:
|
||||||
|
* if there is no WITH TOTALS and there is a subquery in FROM, and there is WITH TOTALS on one of the levels,
|
||||||
|
* then when using LIMIT, you should read the data to the end, rather than cancel the query earlier,
|
||||||
|
* because if you cancel the query, we will not get `totals` data from the remote server.
|
||||||
|
*
|
||||||
|
* Another case:
|
||||||
|
* if there is WITH TOTALS and there is no ORDER BY, then read the data to the end,
|
||||||
|
* otherwise TOTALS is counted according to incomplete data.
|
||||||
|
*/
|
||||||
|
bool always_read_till_end = false;
|
||||||
|
|
||||||
|
if (query.group_by_with_totals && !query.orderBy())
|
||||||
|
always_read_till_end = true;
|
||||||
|
|
||||||
|
if (!query.group_by_with_totals && hasWithTotalsInAnySubqueryInFromClause(query))
|
||||||
|
always_read_till_end = true;
|
||||||
|
|
||||||
|
UInt64 limit_length;
|
||||||
|
UInt64 limit_offset;
|
||||||
|
std::tie(limit_length, limit_offset) = getLimitLengthAndOffset(query, *context);
|
||||||
|
|
||||||
|
SortDescription order_descr;
|
||||||
|
if (query.limit_with_ties)
|
||||||
|
{
|
||||||
|
if (!query.orderBy())
|
||||||
|
throw Exception("LIMIT WITH TIES without ORDER BY", ErrorCodes::LOGICAL_ERROR);
|
||||||
|
order_descr = getSortDescription(query, *context);
|
||||||
|
}
|
||||||
|
|
||||||
|
pipeline.addSimpleTransform([&](const Block & header, QueryPipeline::StreamType stream_type) -> ProcessorPtr
|
||||||
|
{
|
||||||
|
if (stream_type != QueryPipeline::StreamType::Main)
|
||||||
|
return nullptr;
|
||||||
|
std::cout << "TRANSFORM" << std::endl;
|
||||||
|
return std::make_shared<OffsetTransform>(
|
||||||
|
header, limit_length, limit_offset, 1, always_read_till_end, query.limit_with_ties, order_descr);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void InterpreterSelectQuery::executeExtremes(Pipeline & pipeline)
|
void InterpreterSelectQuery::executeExtremes(Pipeline & pipeline)
|
||||||
{
|
{
|
||||||
if (!context->getSettingsRef().extremes)
|
if (!context->getSettingsRef().extremes)
|
||||||
|
@ -185,6 +185,7 @@ private:
|
|||||||
void executeUnion(Pipeline & pipeline, Block header);
|
void executeUnion(Pipeline & pipeline, Block header);
|
||||||
void executeLimitBy(Pipeline & pipeline);
|
void executeLimitBy(Pipeline & pipeline);
|
||||||
void executeLimit(Pipeline & pipeline);
|
void executeLimit(Pipeline & pipeline);
|
||||||
|
void executeOffset(Pipeline & pipeline);
|
||||||
static void executeProjection(Pipeline & pipeline, const ExpressionActionsPtr & expression);
|
static void executeProjection(Pipeline & pipeline, const ExpressionActionsPtr & expression);
|
||||||
void executeDistinct(Pipeline & pipeline, bool before_order, Names columns);
|
void executeDistinct(Pipeline & pipeline, bool before_order, Names columns);
|
||||||
void executeExtremes(Pipeline & pipeline);
|
void executeExtremes(Pipeline & pipeline);
|
||||||
@ -203,6 +204,7 @@ private:
|
|||||||
void executePreLimit(QueryPipeline & pipeline, bool do_not_skip_offset);
|
void executePreLimit(QueryPipeline & pipeline, bool do_not_skip_offset);
|
||||||
void executeLimitBy(QueryPipeline & pipeline);
|
void executeLimitBy(QueryPipeline & pipeline);
|
||||||
void executeLimit(QueryPipeline & pipeline);
|
void executeLimit(QueryPipeline & pipeline);
|
||||||
|
void executeOffset(QueryPipeline & pipeline);
|
||||||
static void executeProjection(QueryPipeline & pipeline, const ExpressionActionsPtr & expression);
|
static void executeProjection(QueryPipeline & pipeline, const ExpressionActionsPtr & expression);
|
||||||
void executeDistinct(QueryPipeline & pipeline, bool before_order, Names columns);
|
void executeDistinct(QueryPipeline & pipeline, bool before_order, Names columns);
|
||||||
void executeExtremes(QueryPipeline & pipeline);
|
void executeExtremes(QueryPipeline & pipeline);
|
||||||
|
@ -247,6 +247,11 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
|||||||
{
|
{
|
||||||
if (!exp_elem.parse(pos, limit_offset, expected))
|
if (!exp_elem.parse(pos, limit_offset, expected))
|
||||||
return false;
|
return false;
|
||||||
|
if (s_with_ties.ignore(pos, expected))
|
||||||
|
{
|
||||||
|
limit_with_ties_occured = true;
|
||||||
|
select_query->limit_with_ties = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Because TOP n in totally equals LIMIT n
|
/// Because TOP n in totally equals LIMIT n
|
||||||
|
308
src/Processors/OffsetTransform.cpp
Normal file
308
src/Processors/OffsetTransform.cpp
Normal file
@ -0,0 +1,308 @@
|
|||||||
|
#include <Processors/OffsetTransform.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int LOGICAL_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
OffsetTransform::OffsetTransform(
|
||||||
|
const Block & header_, size_t limit_, size_t offset_, size_t num_streams,
|
||||||
|
bool always_read_till_end_, bool with_ties_,
|
||||||
|
SortDescription description_)
|
||||||
|
: IProcessor(InputPorts(num_streams, header_), OutputPorts(num_streams, header_))
|
||||||
|
, limit(limit_), offset(offset_)
|
||||||
|
, always_read_till_end(always_read_till_end_)
|
||||||
|
, with_ties(with_ties_), description(std::move(description_))
|
||||||
|
{
|
||||||
|
if (num_streams != 1 && with_ties)
|
||||||
|
throw Exception("Cannot use OffsetTransform with multiple ports and ties.", ErrorCodes::LOGICAL_ERROR);
|
||||||
|
|
||||||
|
ports_data.resize(num_streams);
|
||||||
|
|
||||||
|
size_t cur_stream = 0;
|
||||||
|
for (auto & input : inputs)
|
||||||
|
{
|
||||||
|
ports_data[cur_stream].input_port = &input;
|
||||||
|
++cur_stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
cur_stream = 0;
|
||||||
|
for (auto & output : outputs)
|
||||||
|
{
|
||||||
|
ports_data[cur_stream].output_port = &output;
|
||||||
|
++cur_stream;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto & desc : description)
|
||||||
|
{
|
||||||
|
if (!desc.column_name.empty())
|
||||||
|
sort_column_positions.push_back(header_.getPositionByName(desc.column_name));
|
||||||
|
else
|
||||||
|
sort_column_positions.push_back(desc.column_number);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Chunk OffsetTransform::makeChunkWithPreviousRow(const Chunk & chunk, size_t row) const
|
||||||
|
{
|
||||||
|
assert(row < chunk.getNumRows());
|
||||||
|
ColumnRawPtrs current_columns = extractSortColumns(chunk.getColumns());
|
||||||
|
MutableColumns last_row_sort_columns;
|
||||||
|
for (size_t i = 0; i < current_columns.size(); ++i)
|
||||||
|
{
|
||||||
|
last_row_sort_columns.emplace_back(current_columns[i]->cloneEmpty());
|
||||||
|
last_row_sort_columns[i]->insertFrom(*current_columns[i], row);
|
||||||
|
}
|
||||||
|
return Chunk(std::move(last_row_sort_columns), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
IProcessor::Status OffsetTransform::prepare(
|
||||||
|
const PortNumbers & updated_input_ports,
|
||||||
|
const PortNumbers & updated_output_ports)
|
||||||
|
{
|
||||||
|
bool has_full_port = false;
|
||||||
|
|
||||||
|
auto process_pair = [&](size_t pos)
|
||||||
|
{
|
||||||
|
auto status = preparePair(ports_data[pos]);
|
||||||
|
|
||||||
|
switch (status)
|
||||||
|
{
|
||||||
|
case IProcessor::Status::Finished:
|
||||||
|
{
|
||||||
|
if (!ports_data[pos].is_finished)
|
||||||
|
{
|
||||||
|
ports_data[pos].is_finished = true;
|
||||||
|
++num_finished_port_pairs;
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
case IProcessor::Status::PortFull:
|
||||||
|
{
|
||||||
|
has_full_port = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
case IProcessor::Status::NeedData:
|
||||||
|
return;
|
||||||
|
default:
|
||||||
|
throw Exception(
|
||||||
|
"Unexpected status for OffsetTransform::preparePair : " + IProcessor::statusToName(status),
|
||||||
|
ErrorCodes::LOGICAL_ERROR);
|
||||||
|
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for (auto pos : updated_input_ports)
|
||||||
|
process_pair(pos);
|
||||||
|
|
||||||
|
for (auto pos : updated_output_ports)
|
||||||
|
process_pair(pos);
|
||||||
|
|
||||||
|
/// All ports are finished. It may happen even before we reached the limit (has less data then limit).
|
||||||
|
if (num_finished_port_pairs == ports_data.size())
|
||||||
|
return Status::Finished;
|
||||||
|
|
||||||
|
/// If we reached limit for some port, then close others. Otherwise some sources may infinitely read data.
|
||||||
|
/// Example: SELECT * FROM system.numbers_mt WHERE number = 1000000 LIMIT 1
|
||||||
|
// if ((rows_read >= offset) && !previous_row_chunk && !always_read_till_end)
|
||||||
|
// {
|
||||||
|
// for (auto & input : inputs)
|
||||||
|
// input.close();
|
||||||
|
|
||||||
|
// for (auto & output : outputs)
|
||||||
|
// output.finish();
|
||||||
|
|
||||||
|
//return Status::Finished;
|
||||||
|
//}
|
||||||
|
|
||||||
|
if (has_full_port)
|
||||||
|
return Status::PortFull;
|
||||||
|
|
||||||
|
return Status::NeedData;
|
||||||
|
}
|
||||||
|
|
||||||
|
OffsetTransform::Status OffsetTransform::prepare()
|
||||||
|
{
|
||||||
|
if (ports_data.size() != 1)
|
||||||
|
throw Exception("prepare without arguments is not supported for multi-port OffsetTransform.",
|
||||||
|
ErrorCodes::LOGICAL_ERROR);
|
||||||
|
|
||||||
|
return prepare({0}, {0});
|
||||||
|
}
|
||||||
|
|
||||||
|
OffsetTransform::Status OffsetTransform::preparePair(PortsData & data)
|
||||||
|
{
|
||||||
|
auto & output = *data.output_port;
|
||||||
|
auto & input = *data.input_port;
|
||||||
|
|
||||||
|
/// Check can output.
|
||||||
|
bool output_finished = false;
|
||||||
|
if (output.isFinished())
|
||||||
|
{
|
||||||
|
output_finished = true;
|
||||||
|
if (!always_read_till_end)
|
||||||
|
{
|
||||||
|
input.close();
|
||||||
|
return Status::Finished;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!output_finished && !output.canPush())
|
||||||
|
{
|
||||||
|
input.setNotNeeded();
|
||||||
|
return Status::PortFull;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check can input.
|
||||||
|
|
||||||
|
if (input.isFinished())
|
||||||
|
{
|
||||||
|
output.finish();
|
||||||
|
return Status::Finished;
|
||||||
|
}
|
||||||
|
|
||||||
|
input.setNeeded();
|
||||||
|
if (!input.hasData())
|
||||||
|
return Status::NeedData;
|
||||||
|
|
||||||
|
data.current_chunk = input.pull(true);
|
||||||
|
|
||||||
|
auto rows = data.current_chunk.getNumRows();
|
||||||
|
|
||||||
|
if (rows_before_limit_at_least)
|
||||||
|
rows_before_limit_at_least->add(rows);
|
||||||
|
|
||||||
|
/// Skip block (for 'always_read_till_end' case).
|
||||||
|
if (output_finished)
|
||||||
|
{
|
||||||
|
data.current_chunk.clear();
|
||||||
|
if (input.isFinished())
|
||||||
|
{
|
||||||
|
output.finish();
|
||||||
|
return Status::Finished;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Now, we pulled from input, and it must be empty.
|
||||||
|
input.setNeeded();
|
||||||
|
return Status::NeedData;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Process block.
|
||||||
|
|
||||||
|
rows_read += rows;
|
||||||
|
|
||||||
|
//if (rows_read <= offset)
|
||||||
|
//{
|
||||||
|
// data.current_chunk.clear();
|
||||||
|
//
|
||||||
|
// if (input.isFinished())
|
||||||
|
// {
|
||||||
|
// output.finish();
|
||||||
|
// return Status::Finished;
|
||||||
|
// }
|
||||||
|
|
||||||
|
/// Now, we pulled from input, and it must be empty.
|
||||||
|
// input.setNeeded();
|
||||||
|
// return Status::NeedData;
|
||||||
|
//}
|
||||||
|
|
||||||
|
if (rows_read >= offset + rows && rows_read <= offset)
|
||||||
|
{
|
||||||
|
/// Return the whole chunk.
|
||||||
|
|
||||||
|
/// Save the last row of current chunk to check if next block begins with the same row (for WITH TIES).
|
||||||
|
if (with_ties && rows_read == offset + limit)
|
||||||
|
previous_row_chunk = makeChunkWithPreviousRow(data.current_chunk, data.current_chunk.getNumRows() - 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
/// This function may be heavy to execute in prepare. But it happens no more then twice, and make code simpler.
|
||||||
|
splitChunk(data);
|
||||||
|
|
||||||
|
//bool may_need_more_data_for_ties = previous_row_chunk || rows_read - rows <= offset;
|
||||||
|
/// No more data is needed.
|
||||||
|
//if (!always_read_till_end && (rows_read >= offset) && !may_need_more_data_for_ties)
|
||||||
|
// input.close();
|
||||||
|
|
||||||
|
output.push(std::move(data.current_chunk));
|
||||||
|
|
||||||
|
return Status::PortFull;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void OffsetTransform::splitChunk(PortsData & data)
|
||||||
|
{
|
||||||
|
auto current_chunk_sort_columns = extractSortColumns(data.current_chunk.getColumns());
|
||||||
|
size_t num_rows = data.current_chunk.getNumRows();
|
||||||
|
size_t num_columns = data.current_chunk.getNumColumns();
|
||||||
|
|
||||||
|
/// return a piece of the block
|
||||||
|
size_t start = std::max(
|
||||||
|
static_cast<Int64>(0),
|
||||||
|
static_cast<Int64>(offset) - static_cast<Int64>(rows_read) + static_cast<Int64>(num_rows));
|
||||||
|
|
||||||
|
//size_t length = std::min(
|
||||||
|
// static_cast<Int64>(rows_read) - static_cast<Int64>(offset),
|
||||||
|
// static_cast<Int64>(offset) - static_cast<Int64>(rows_read) + static_cast<Int64>(num_rows));
|
||||||
|
|
||||||
|
size_t length = static_cast<Int64>(num_rows);
|
||||||
|
std::cout << "===========================" << std::endl
|
||||||
|
<< start << " " << length << std::endl
|
||||||
|
<< static_cast<Int64>(rows_read) << " " << static_cast<Int64>(num_rows) << std::endl
|
||||||
|
<< "===========================" << std::endl;
|
||||||
|
/// check if other rows in current block equals to last one in limit
|
||||||
|
if (with_ties && length)
|
||||||
|
{
|
||||||
|
size_t current_row_num = start + length;
|
||||||
|
previous_row_chunk = makeChunkWithPreviousRow(data.current_chunk, current_row_num - 1);
|
||||||
|
|
||||||
|
for (; current_row_num < num_rows; ++current_row_num)
|
||||||
|
{
|
||||||
|
if (!sortColumnsEqualAt(current_chunk_sort_columns, current_row_num))
|
||||||
|
{
|
||||||
|
previous_row_chunk = {};
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
length = current_row_num - start;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (length == num_rows)
|
||||||
|
return;
|
||||||
|
|
||||||
|
auto columns = data.current_chunk.detachColumns();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < num_columns; ++i)
|
||||||
|
columns[i] = columns[i]->cut(start, length);
|
||||||
|
|
||||||
|
data.current_chunk.setColumns(std::move(columns), length);
|
||||||
|
}
|
||||||
|
|
||||||
|
ColumnRawPtrs OffsetTransform::extractSortColumns(const Columns & columns) const
|
||||||
|
{
|
||||||
|
ColumnRawPtrs res;
|
||||||
|
res.reserve(description.size());
|
||||||
|
for (size_t pos : sort_column_positions)
|
||||||
|
res.push_back(columns[pos].get());
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool OffsetTransform::sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort_columns, size_t current_chunk_row_num) const
|
||||||
|
{
|
||||||
|
assert(current_chunk_sort_columns.size() == previous_row_chunk.getNumColumns());
|
||||||
|
size_t size = current_chunk_sort_columns.size();
|
||||||
|
const auto & previous_row_sort_columns = previous_row_chunk.getColumns();
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
if (0 != current_chunk_sort_columns[i]->compareAt(current_chunk_row_num, 0, *previous_row_sort_columns[i], 1))
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
72
src/Processors/OffsetTransform.h
Normal file
72
src/Processors/OffsetTransform.h
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Processors/IProcessor.h>
|
||||||
|
#include <Processors/RowsBeforeLimitCounter.h>
|
||||||
|
#include <Core/SortDescription.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
/// Implementation for LIMIT N OFFSET M
|
||||||
|
/// This processor support multiple inputs and outputs (the same number).
|
||||||
|
/// Each pair of input and output port works independently.
|
||||||
|
/// The reason to have multiple ports is to be able to stop all sources when limit is reached, in a query like:
|
||||||
|
/// SELECT * FROM system.numbers_mt WHERE number = 1000000 LIMIT 1
|
||||||
|
///
|
||||||
|
/// always_read_till_end - read all data from input ports even if limit was reached.
|
||||||
|
/// with_ties, description - implementation of LIMIT WITH TIES. It works only for single port.
|
||||||
|
class OffsetTransform : public IProcessor
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
|
||||||
|
size_t limit;
|
||||||
|
size_t offset;
|
||||||
|
bool always_read_till_end;
|
||||||
|
|
||||||
|
bool with_ties;
|
||||||
|
const SortDescription description;
|
||||||
|
|
||||||
|
Chunk previous_row_chunk; /// for WITH TIES, contains only sort columns
|
||||||
|
std::vector<size_t> sort_column_positions;
|
||||||
|
|
||||||
|
size_t rows_read = 0; /// including the last read block
|
||||||
|
RowsBeforeLimitCounterPtr rows_before_limit_at_least;
|
||||||
|
|
||||||
|
/// State of port's pair.
|
||||||
|
/// Chunks from different port pairs are not mixed for berret cache locality.
|
||||||
|
struct PortsData
|
||||||
|
{
|
||||||
|
Chunk current_chunk;
|
||||||
|
|
||||||
|
InputPort * input_port = nullptr;
|
||||||
|
OutputPort * output_port = nullptr;
|
||||||
|
bool is_finished = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<PortsData> ports_data;
|
||||||
|
size_t num_finished_port_pairs = 0;
|
||||||
|
|
||||||
|
Chunk makeChunkWithPreviousRow(const Chunk & current_chunk, size_t row_num) const;
|
||||||
|
ColumnRawPtrs extractSortColumns(const Columns & columns) const;
|
||||||
|
bool sortColumnsEqualAt(const ColumnRawPtrs & current_chunk_sort_columns, size_t current_chunk_row_num) const;
|
||||||
|
|
||||||
|
public:
|
||||||
|
OffsetTransform(
|
||||||
|
const Block & header_, size_t limit_, size_t offset_, size_t num_streams = 1,
|
||||||
|
bool always_read_till_end_ = false, bool with_ties_ = false,
|
||||||
|
SortDescription description_ = {});
|
||||||
|
|
||||||
|
String getName() const override { return "Limit"; }
|
||||||
|
|
||||||
|
Status prepare(const PortNumbers & /*updated_input_ports*/, const PortNumbers & /*updated_output_ports*/) override;
|
||||||
|
Status prepare() override; /// Compatibility for TreeExecutor.
|
||||||
|
Status preparePair(PortsData & data);
|
||||||
|
void splitChunk(PortsData & data);
|
||||||
|
|
||||||
|
InputPort & getInputPort() { return inputs.front(); }
|
||||||
|
OutputPort & getOutputPort() { return outputs.front(); }
|
||||||
|
|
||||||
|
void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit_at_least.swap(counter); }
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user