mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
RANGE frame works in some cases
This commit is contained in:
parent
10a8831d8b
commit
83cfdde6d9
@ -57,6 +57,14 @@ struct WindowFrame
|
||||
|
||||
* OffsetType end_offset = Current;
|
||||
*/
|
||||
|
||||
|
||||
bool operator == (const WindowFrame & other) const
|
||||
{
|
||||
// We don't compare is_default because it's not a real property of the
|
||||
// frame, and only influences how we display it.
|
||||
return other.type == type;
|
||||
}
|
||||
};
|
||||
|
||||
struct WindowDescription
|
||||
|
@ -516,7 +516,8 @@ void makeWindowDescriptionFromAST(WindowDescription & desc, const IAST * ast)
|
||||
desc.full_sort_description.insert(desc.full_sort_description.end(),
|
||||
desc.order_by.begin(), desc.order_by.end());
|
||||
|
||||
if (definition.frame.type != WindowFrame::FrameType::Rows)
|
||||
if (definition.frame.type != WindowFrame::FrameType::Rows
|
||||
&& definition.frame.type != WindowFrame::FrameType::Range)
|
||||
{
|
||||
std::string name = definition.frame.type == WindowFrame::FrameType::Rows
|
||||
? "ROWS"
|
||||
|
@ -52,13 +52,18 @@ void ASTWindowDefinition::formatImpl(const FormatSettings & settings,
|
||||
order_by->formatImpl(settings, state, format_frame);
|
||||
}
|
||||
|
||||
if ((partition_by || order_by) && !frame.is_default)
|
||||
{
|
||||
settings.ostr << " ";
|
||||
}
|
||||
|
||||
if (!frame.is_default)
|
||||
{
|
||||
const auto * name = frame.type == WindowFrame::FrameType::Rows
|
||||
? "ROWS" : frame.type == WindowFrame::FrameType::Groups
|
||||
? "GROUPS" : "RANGE";
|
||||
|
||||
settings.ostr << name << " BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW";
|
||||
settings.ostr << name << " UNBOUNDED PRECEDING";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -535,29 +535,51 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p
|
||||
ParserKeyword keyword_and("AND");
|
||||
ParserKeyword keyword_current_row("CURRENT ROW");
|
||||
|
||||
if (!keyword_between.ignore(pos, expected))
|
||||
// There are two variants of grammar for the frame:
|
||||
// 1) ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
||||
// 2) ROWS UNBOUNDED PRECEDING
|
||||
// When the frame end is not specified (2), it defaults to CURRENT ROW.
|
||||
if (keyword_between.ignore(pos, expected))
|
||||
{
|
||||
return false;
|
||||
// 1) ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
|
||||
if (!keyword_unbounded.ignore(pos, expected))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!keyword_preceding.ignore(pos, expected))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!keyword_and.ignore(pos, expected))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!keyword_current_row.ignore(pos, expected))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
// 2) ROWS UNBOUNDED PRECEDING
|
||||
if (!keyword_unbounded.ignore(pos, expected))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!keyword_preceding.ignore(pos, expected))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!keyword_unbounded.ignore(pos, expected))
|
||||
if (node->frame != WindowFrame{})
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!keyword_preceding.ignore(pos, expected))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!keyword_and.ignore(pos, expected))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!keyword_current_row.ignore(pos, expected))
|
||||
{
|
||||
return false;
|
||||
node->frame.is_default = false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -79,7 +79,7 @@ void WindowTransform::advancePartitionEnd()
|
||||
|
||||
const RowNumber end = blocksEnd();
|
||||
|
||||
fmt::print(stderr, "end {}, partition_end {}\n", end, partition_end);
|
||||
// fmt::print(stderr, "end {}, partition_end {}\n", end, partition_end);
|
||||
|
||||
// If we're at the total end of data, we must end the partition. This is the
|
||||
// only place in calculations where we need special handling for end of data,
|
||||
@ -95,7 +95,8 @@ void WindowTransform::advancePartitionEnd()
|
||||
return;
|
||||
}
|
||||
|
||||
// If we got to the end of the block already, just stop.
|
||||
// If we got to the end of the block already, but expect more data, wait for
|
||||
// it.
|
||||
if (partition_end == end)
|
||||
{
|
||||
return;
|
||||
@ -122,18 +123,17 @@ void WindowTransform::advancePartitionEnd()
|
||||
// The partition ends when the PARTITION BY columns change. We need
|
||||
// some reference columns for comparison. We might have already
|
||||
// dropped the blocks where the partition starts, but any row in the
|
||||
// partition will do. Use group_start -- it's always in the valid
|
||||
// region, because it points to the start of the current group,
|
||||
// which we haven't fully processed yet, and therefore cannot drop.
|
||||
// It might be the same as the partition_end if it's the first group of the
|
||||
// partition will do. We use a special partition_etalon pointer for this.
|
||||
// It might be the same as the partition_end if we're at the first row of the
|
||||
// first partition, so we compare it to itself, but it still works correctly.
|
||||
const auto block_number = partition_end.block;
|
||||
const auto block_rows = blockRowsNumber(partition_end);
|
||||
for (; partition_end.row < block_rows; ++partition_end.row)
|
||||
{
|
||||
size_t i = 0;
|
||||
for (; i < n; i++)
|
||||
{
|
||||
const auto * ref = inputAt(group_start)[partition_by_indices[i]].get();
|
||||
const auto * ref = inputAt(partition_etalon)[partition_by_indices[i]].get();
|
||||
const auto * c = inputAt(partition_end)[partition_by_indices[i]].get();
|
||||
if (c->compareAt(partition_end.row,
|
||||
group_start.row, *ref,
|
||||
@ -150,14 +150,19 @@ void WindowTransform::advancePartitionEnd()
|
||||
}
|
||||
}
|
||||
|
||||
if (partition_end.row == block_rows)
|
||||
{
|
||||
++partition_end.block;
|
||||
partition_end.row = 0;
|
||||
}
|
||||
// Went until the end of block, go to the next.
|
||||
assert(partition_end.row == block_rows);
|
||||
++partition_end.block;
|
||||
partition_end.row = 0;
|
||||
|
||||
// Went until the end of data and didn't find the new partition.
|
||||
assert(!partition_ended && partition_end == blocksEnd());
|
||||
|
||||
// Advance the partition etalon so that we can drop the old blocks.
|
||||
// We can use the last valid row of the block as the partition etalon.
|
||||
// Shouldn't have empty blocks here (what would it mean?).
|
||||
assert(block_rows > 0);
|
||||
partition_etalon = RowNumber{block_number, block_rows - 1};
|
||||
}
|
||||
|
||||
void WindowTransform::advanceGroupEnd()
|
||||
@ -169,19 +174,17 @@ void WindowTransform::advanceGroupEnd()
|
||||
|
||||
switch (window_description.frame.type)
|
||||
{
|
||||
case WindowFrame::FrameType::Range:
|
||||
case WindowFrame::FrameType::Groups:
|
||||
advanceGroupEndGroups();
|
||||
advanceGroupEndOrderBy();
|
||||
break;
|
||||
case WindowFrame::FrameType::Rows:
|
||||
advanceGroupEndRows();
|
||||
break;
|
||||
case WindowFrame::FrameType::Range:
|
||||
assert(false);
|
||||
advanceGroupEndTrivial();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void WindowTransform::advanceGroupEndRows()
|
||||
void WindowTransform::advanceGroupEndTrivial()
|
||||
{
|
||||
// ROWS mode, peer groups always contains only the current row.
|
||||
// We cannot advance the groups if the group start is already beyond the
|
||||
@ -192,7 +195,7 @@ void WindowTransform::advanceGroupEndRows()
|
||||
group_ended = true;
|
||||
}
|
||||
|
||||
void WindowTransform::advanceGroupEndGroups()
|
||||
void WindowTransform::advanceGroupEndOrderBy()
|
||||
{
|
||||
const size_t n = order_by_indices.size();
|
||||
if (n == 0)
|
||||
@ -252,7 +255,7 @@ void WindowTransform::advanceFrameEnd()
|
||||
// Frame end is always the current group end, for now.
|
||||
// In ROWS mode the group is going to contain only the current row.
|
||||
frame_end = group_end;
|
||||
frame_ended = true;
|
||||
frame_ended = group_ended;
|
||||
|
||||
// Add the columns over which we advanced the frame to the aggregate function
|
||||
// states.
|
||||
@ -275,7 +278,7 @@ void WindowTransform::advanceFrameEnd()
|
||||
uint64_t past_the_end_block;
|
||||
// Note that the past-the-end row is not in the past-the-end block, but
|
||||
// in the block before it.
|
||||
uint32_t past_the_end_row;
|
||||
uint64_t past_the_end_row;
|
||||
|
||||
if (frame_end.block < first_block_number + blocks.size())
|
||||
{
|
||||
@ -326,7 +329,6 @@ void WindowTransform::writeOutGroup()
|
||||
// Empty groups don't make sense.
|
||||
assert(group_start < group_end);
|
||||
|
||||
std::vector<const IColumn *> argument_columns;
|
||||
for (size_t wi = 0; wi < workspaces.size(); ++wi)
|
||||
{
|
||||
auto & ws = workspaces[wi];
|
||||
@ -334,56 +336,95 @@ void WindowTransform::writeOutGroup()
|
||||
const auto * a = f.aggregate_function.get();
|
||||
auto * buf = ws.aggregate_function_state.data();
|
||||
|
||||
// Need to use a tricky loop to be able to batch per-block (but we don't
|
||||
// do it yet...). See the comments to the similar loop in
|
||||
// advanceFrameEnd() above.
|
||||
// We'll calculate the value once for the first row in the group, and
|
||||
// insert its copy for each other row in the group.
|
||||
IColumn * reference_column = outputAt(group_start)[wi].get();
|
||||
const size_t reference_row = group_start.row;
|
||||
// FIXME does it also allocate the result on the arena?
|
||||
// We'll have to pass it out with blocks then...
|
||||
a->insertResultInto(buf, *reference_column, arena.get());
|
||||
// The row we just added to the end of the column must correspond to the
|
||||
// first row of the group.
|
||||
assert(reference_column->size() == reference_row + 1);
|
||||
|
||||
// fmt::print(stderr, "calculated value of function {} is '{}'\n",
|
||||
// wi, toString((*reference_column)[reference_row]));
|
||||
|
||||
// Now duplicate the calculated value into all other rows.
|
||||
auto first_row_to_copy_to = group_start;
|
||||
advanceRowNumber(first_row_to_copy_to);
|
||||
|
||||
|
||||
// We use two explicit loops here instead of using advanceRowNumber(),
|
||||
// because we want to batch the inserts per-block.
|
||||
// Unfortunately this leads to tricky loop conditions, because the
|
||||
// frame_end might be either a past-the-end block, or a valid block, in
|
||||
// which case we also have to process its head. We have to avoid stepping
|
||||
// into the past-the-end block because it might not be valid.
|
||||
// Moreover, the past-the-end row is not in the past-the-end block, but
|
||||
// in the block before it.
|
||||
// And we also have to remember to reset the row number when moving to
|
||||
// the next block.
|
||||
uint64_t past_the_end_block;
|
||||
uint32_t past_the_end_row;
|
||||
if (frame_end.block < first_block_number + blocks.size())
|
||||
uint64_t past_the_end_row;
|
||||
if (group_end.row == 0)
|
||||
{
|
||||
past_the_end_block = frame_end.block + 1;
|
||||
past_the_end_row = frame_end.row;
|
||||
// group_end might not be valid.
|
||||
past_the_end_block = group_end.block;
|
||||
|
||||
// Otherwise a group would end at the start of data, this is not
|
||||
// possible.
|
||||
assert(group_end.block > 0);
|
||||
|
||||
const size_t first_valid_block = group_end.block - 1;
|
||||
assert(first_valid_block >= first_block_number);
|
||||
|
||||
past_the_end_row = blocks[first_valid_block - first_block_number]
|
||||
.input_columns[0]->size();
|
||||
}
|
||||
else
|
||||
{
|
||||
past_the_end_block = first_block_number + blocks.size();
|
||||
past_the_end_row = blocks.back().numRows();
|
||||
past_the_end_block = group_end.block + 1;
|
||||
past_the_end_row = group_end.row;
|
||||
}
|
||||
for (auto r = group_start;
|
||||
r.block < past_the_end_block;
|
||||
++r.block, r.row = 0)
|
||||
|
||||
for (auto block_index = first_row_to_copy_to.block;
|
||||
block_index < past_the_end_block;
|
||||
++block_index)
|
||||
{
|
||||
const auto & block = blocks[r.block - first_block_number];
|
||||
const auto & block = blocks[block_index - first_block_number];
|
||||
|
||||
argument_columns.clear();
|
||||
for (const auto ai : ws.argument_column_indices)
|
||||
// We process tail of the first block, all rows of intermediate
|
||||
// blocks, and the head of the last block.
|
||||
const auto block_first_row
|
||||
= (block_index == first_row_to_copy_to.block)
|
||||
? first_row_to_copy_to.row : 0;
|
||||
const auto block_last_row = ((block_index + 1) == past_the_end_block)
|
||||
? past_the_end_row : block.numRows();
|
||||
|
||||
// fmt::print(stderr,
|
||||
// "group rest [{}, {}), pteb {}, pter {}, cur {}, fr {}, lr {}\n",
|
||||
// group_start, group_end, past_the_end_block, group_end.row,
|
||||
// block_index, block_first_row, block_last_row);
|
||||
// The number of the elements left to insert may be zero, but we must
|
||||
// notice it on the first block. Other blocks shouldn't be empty,
|
||||
// because we don't generally have empty block, and advanceRowNumber()
|
||||
// doesn't generate past-the-end row numbers, so we wouldn't get into
|
||||
// a block we don't want to process.
|
||||
if (block_first_row == block_last_row)
|
||||
{
|
||||
argument_columns.push_back(block.input_columns[ai].get());
|
||||
assert(block_index == first_row_to_copy_to.block);
|
||||
break;
|
||||
}
|
||||
|
||||
// We process all rows of intermediate blocks, and the head of the
|
||||
// last block.
|
||||
const auto end = ((r.block + 1) == past_the_end_block)
|
||||
? past_the_end_row
|
||||
: block.numRows();
|
||||
for (; r.row < end; ++r.row)
|
||||
{
|
||||
// FIXME does it also allocate the result on the arena?
|
||||
// We'll have to pass it out with blocks then...
|
||||
a->insertResultInto(buf,
|
||||
*block.output_columns[wi],
|
||||
arena.get());
|
||||
}
|
||||
block.output_columns[wi]->insertManyFrom(*reference_column,
|
||||
reference_row, block_last_row - block_first_row);
|
||||
}
|
||||
}
|
||||
|
||||
first_not_ready_row = group_end;
|
||||
}
|
||||
|
||||
void WindowTransform::initPerBlockCaches()
|
||||
{
|
||||
}
|
||||
|
||||
void WindowTransform::appendChunk(Chunk & chunk)
|
||||
{
|
||||
// fmt::print(stderr, "new chunk, {} rows, finished={}\n", chunk.getNumRows(),
|
||||
@ -413,12 +454,12 @@ void WindowTransform::appendChunk(Chunk & chunk)
|
||||
}
|
||||
}
|
||||
|
||||
initPerBlockCaches();
|
||||
|
||||
// Start the calculations. First, advance the partition end.
|
||||
for (;;)
|
||||
{
|
||||
advancePartitionEnd();
|
||||
// fmt::print(stderr, "partition [?, {}), {}, old etalon {}\n", partition_end,
|
||||
// partition_ended, partition_etalon);
|
||||
|
||||
// Either we ran out of data or we found the end of partition (maybe
|
||||
// both, but this only happens at the total end of data).
|
||||
@ -428,22 +469,21 @@ void WindowTransform::appendChunk(Chunk & chunk)
|
||||
assert(input_is_finished);
|
||||
}
|
||||
|
||||
// fmt::print(stderr, "partition end '{}', {}\n", partition_end,
|
||||
// partition_ended);
|
||||
|
||||
// After that, advance the peer groups. We can advance peer groups until
|
||||
// the end of partition or current end of data, which is precisely the
|
||||
// description of `partition_end`.
|
||||
while (group_end < partition_end)
|
||||
while (group_start < partition_end)
|
||||
{
|
||||
group_start = group_end;
|
||||
advanceGroupEnd();
|
||||
|
||||
// fmt::print(stderr, "group end '{}'\n", group_end);
|
||||
// fmt::print(stderr, "group [{}, {}), {}\n", group_start, group_end,
|
||||
// group_ended);
|
||||
|
||||
// If the group didn't end yet, wait.
|
||||
if (!group_ended)
|
||||
{
|
||||
// Wait for more input data to find the end of group.
|
||||
assert(!input_is_finished);
|
||||
assert(!partition_ended);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -457,6 +497,9 @@ void WindowTransform::appendChunk(Chunk & chunk)
|
||||
|
||||
if (!frame_ended)
|
||||
{
|
||||
// Wait for more input data to find the end of frame.
|
||||
assert(!input_is_finished);
|
||||
assert(!partition_ended);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -467,35 +510,33 @@ void WindowTransform::appendChunk(Chunk & chunk)
|
||||
// The frame will have to be recalculated.
|
||||
frame_ended = false;
|
||||
|
||||
// Move to the next group. Don't advance group_start yet, it's
|
||||
// convenient to use it as the PARTITION BY etalon.
|
||||
// Move to the next group.
|
||||
group_ended = false;
|
||||
|
||||
if (group_end == partition_end)
|
||||
{
|
||||
break;
|
||||
}
|
||||
assert(group_end < partition_end);
|
||||
}
|
||||
|
||||
if (!partition_ended)
|
||||
{
|
||||
// We haven't encountered the end of the partition yet, need more
|
||||
// data.
|
||||
assert(partition_end == blocksEnd());
|
||||
break;
|
||||
group_start = group_end;
|
||||
}
|
||||
|
||||
if (input_is_finished)
|
||||
{
|
||||
// why?
|
||||
// We finalized the last partition in the above loop, and don't have
|
||||
// to do anything else.
|
||||
return;
|
||||
}
|
||||
|
||||
if (!partition_ended)
|
||||
{
|
||||
// Wait for more input data to find the end of partition.
|
||||
// Assert that we processed all the data we currently have, and that
|
||||
// we are going to receive more data.
|
||||
assert(partition_end == blocksEnd());
|
||||
assert(!input_is_finished);
|
||||
break;
|
||||
}
|
||||
|
||||
// Start the next partition.
|
||||
const auto new_partition_start = partition_end;
|
||||
advanceRowNumber(partition_end);
|
||||
partition_ended = false;
|
||||
partition_etalon = new_partition_start;
|
||||
// We have to reset the frame when the new partition starts. This is not a
|
||||
// generally correct way to do so, but we don't really support moving frame
|
||||
// for now.
|
||||
@ -663,10 +704,13 @@ void WindowTransform::work()
|
||||
// We don't really have to keep the entire partition, and it can be big, so
|
||||
// we want to drop the starting blocks to save memory.
|
||||
// We can drop the old blocks if we already returned them as output, and the
|
||||
// frame and group are already past them. Note that the frame start can be
|
||||
// further than group start for some frame specs, so we have to check both.
|
||||
// frame, group and the partition etalon are already past them. Note that the
|
||||
// frame start can be further than group start for some frame specs (e.g.
|
||||
// EXCLUDE CURRENT ROW), so we have to check both.
|
||||
const auto first_used_block = std::min(next_output_block_number,
|
||||
std::min(frame_start.block, group_start.block));
|
||||
std::min(frame_start.block,
|
||||
std::min(group_start.block,
|
||||
partition_etalon.block)));
|
||||
if (first_block_number < first_used_block)
|
||||
{
|
||||
// fmt::print(stderr, "will drop blocks from {} to {}\n", first_block_number,
|
||||
|
@ -15,7 +15,7 @@ using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
|
||||
|
||||
class Arena;
|
||||
|
||||
// Runtime data for computing one window function
|
||||
// Runtime data for computing one window function.
|
||||
struct WindowFunctionWorkspace
|
||||
{
|
||||
WindowFunctionDescription window_function;
|
||||
@ -58,18 +58,20 @@ struct RowNumber
|
||||
|
||||
/*
|
||||
* Computes several window functions that share the same window. The input must
|
||||
* be sorted correctly for this window (PARTITION BY, then ORDER BY).
|
||||
* be sorted by PARTITION BY (in any order), then by ORDER BY.
|
||||
* We need to track the following pointers:
|
||||
* 1) start of partition -- rows that compare equal w/PARTITION BY.
|
||||
* 2) current frame boundaries.
|
||||
* 3) start of peer group -- rows that compare equal w/ORDER BY (empty ORDER BY
|
||||
* means all rows are equal).
|
||||
* These row ranges are (almost) nested -- peer group is inside frame inside
|
||||
* partition. The only exception is when the exclusion clause is specified that
|
||||
* excludes current peer group, but we don't support it anyway.
|
||||
* All pointers only move forward.
|
||||
* The value of the function is the same for all rows of the peer group.
|
||||
* (partition [frame {group} ] )
|
||||
* 1) boundaries of partition -- rows that compare equal w/PARTITION BY.
|
||||
* 2) boundaries of peer group -- rows that compare equal w/ORDER BY (empty
|
||||
* ORDER BY means all rows are peers).
|
||||
* 3) boundaries of the frame.
|
||||
* Both the peer group and the frame are inside the partition, but can have any
|
||||
* position relative to each other.
|
||||
* All pointers only move forward. For partition and group boundaries, this is
|
||||
* ensured by the order of input data. This property also trivially holds for
|
||||
* the ROWS and GROUPS frames. For the RANGE frame, the proof requires the
|
||||
* additional fact that the ranges are specified in terms of (the single)
|
||||
* ORDER BY column.
|
||||
* The value of the window function is the same for all rows of the peer group.
|
||||
*/
|
||||
class WindowTransform : public IProcessor /* public ISimpleTransform */
|
||||
{
|
||||
@ -104,13 +106,12 @@ public:
|
||||
private:
|
||||
void advancePartitionEnd();
|
||||
void advanceGroupEnd();
|
||||
void advanceGroupEndGroups();
|
||||
void advanceGroupEndRows();
|
||||
void advanceGroupEndOrderBy();
|
||||
void advanceGroupEndTrivial();
|
||||
void advanceGroupEndRange();
|
||||
void advanceFrameStart();
|
||||
void advanceFrameEnd();
|
||||
void writeOutGroup();
|
||||
void initPerBlockCaches();
|
||||
|
||||
Columns & inputAt(const RowNumber & x)
|
||||
{
|
||||
@ -224,6 +225,9 @@ public:
|
||||
// need it, and we want to be able to drop the starting blocks to save memory.
|
||||
// The `partition_end` is past-the-end, as usual. When partition_ended = false,
|
||||
// it still haven't ended, and partition_end is the next row to check.
|
||||
// We still need to keep some not-too-far-away row in the partition, to use
|
||||
// it as an etalon for PARTITION BY comparison.
|
||||
RowNumber partition_etalon;
|
||||
RowNumber partition_end;
|
||||
bool partition_ended = false;
|
||||
|
||||
@ -233,6 +237,13 @@ public:
|
||||
RowNumber group_end;
|
||||
bool group_ended = false;
|
||||
|
||||
// The frame is [frame_start, frame_end) if frame_ended, and unknown
|
||||
// otherwise. Note that when we move to the next peer group, both the
|
||||
// frame_start and the frame_end may jump forward by an unknown amount of
|
||||
// blocks, e.g. if we use a RANGE frame. This means that sometimes we don't
|
||||
// know neither frame_end nor frame_start.
|
||||
// We update the states of the window functions as we track the frame
|
||||
// boundaries.
|
||||
// After we have found the final boundaries of the frame, we can immediately
|
||||
// output the result for the current group, w/o waiting for more data.
|
||||
RowNumber frame_start;
|
||||
|
@ -19,8 +19,8 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
|
||||
1 2
|
||||
0 2
|
||||
5 5
|
||||
4 5
|
||||
3 5
|
||||
4 4
|
||||
3 4
|
||||
8 8
|
||||
7 8
|
||||
6 8
|
||||
@ -120,33 +120,33 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
|
||||
0 2 1
|
||||
1 2 2
|
||||
2 2 3
|
||||
3 5 4
|
||||
4 5 5
|
||||
3 4 4
|
||||
4 4 5
|
||||
5 5 1
|
||||
6 8 2
|
||||
7 8 3
|
||||
8 8 4
|
||||
9 11 5
|
||||
10 11 1
|
||||
6 8 1
|
||||
7 8 2
|
||||
8 8 3
|
||||
9 10 4
|
||||
10 10 1
|
||||
11 11 2
|
||||
12 14 3
|
||||
13 14 4
|
||||
14 14 5
|
||||
15 17 1
|
||||
16 17 2
|
||||
17 17 3
|
||||
18 20 4
|
||||
19 20 5
|
||||
15 16 1
|
||||
16 16 1
|
||||
17 17 2
|
||||
18 20 3
|
||||
19 20 4
|
||||
20 20 1
|
||||
21 23 2
|
||||
22 23 3
|
||||
21 22 2
|
||||
22 22 3
|
||||
23 23 4
|
||||
24 26 5
|
||||
25 26 1
|
||||
26 26 2
|
||||
27 29 3
|
||||
28 29 4
|
||||
29 29 5
|
||||
26 26 1
|
||||
27 28 2
|
||||
28 28 3
|
||||
29 29 4
|
||||
30 30 1
|
||||
-- two functions over the same window
|
||||
-- an explain test would also be helpful, but it's too immature now and I don't
|
||||
@ -155,8 +155,8 @@ select number, max(number) over (partition by intDiv(number, 3) order by number
|
||||
0 2 3
|
||||
1 2 2
|
||||
2 2 1
|
||||
3 5 3
|
||||
4 5 2
|
||||
3 4 2
|
||||
4 4 1
|
||||
5 5 1
|
||||
6 6 1
|
||||
-- check that we can work with constant columns
|
||||
@ -230,3 +230,228 @@ from numbers(3);
|
||||
0
|
||||
1
|
||||
3
|
||||
--select
|
||||
-- sum(number)
|
||||
-- over (order by number groups between unbounded preceding and current row)
|
||||
--from numbers(3);
|
||||
|
||||
-- RANGE frame
|
||||
-- Try some mutually prime sizes of partition, group and block, for the number
|
||||
-- of rows that is their least common multiple so that we see all the interesting
|
||||
-- corner cases.
|
||||
select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c
|
||||
from numbers(30)
|
||||
window w as (partition by p order by o range unbounded preceding)
|
||||
order by number
|
||||
settings max_block_size = 5
|
||||
;
|
||||
0 0 0 2
|
||||
1 0 1 3
|
||||
2 0 0 2
|
||||
3 1 1 3
|
||||
4 1 0 1
|
||||
5 1 1 3
|
||||
6 2 0 2
|
||||
7 2 1 3
|
||||
8 2 0 2
|
||||
9 3 1 3
|
||||
10 3 0 1
|
||||
11 3 1 3
|
||||
12 4 0 2
|
||||
13 4 1 3
|
||||
14 4 0 2
|
||||
15 5 1 3
|
||||
16 5 0 1
|
||||
17 5 1 3
|
||||
18 6 0 2
|
||||
19 6 1 3
|
||||
20 6 0 2
|
||||
21 7 1 3
|
||||
22 7 0 1
|
||||
23 7 1 3
|
||||
24 8 0 2
|
||||
25 8 1 3
|
||||
26 8 0 2
|
||||
27 9 1 3
|
||||
28 9 0 1
|
||||
29 9 1 3
|
||||
select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c
|
||||
from numbers(30)
|
||||
window w as (partition by p order by o range unbounded preceding)
|
||||
order by number
|
||||
settings max_block_size = 2
|
||||
;
|
||||
0 0 0 2
|
||||
1 0 1 4
|
||||
2 0 2 5
|
||||
3 0 0 2
|
||||
4 0 1 4
|
||||
5 1 2 5
|
||||
6 1 0 2
|
||||
7 1 1 3
|
||||
8 1 2 5
|
||||
9 1 0 2
|
||||
10 2 1 3
|
||||
11 2 2 5
|
||||
12 2 0 1
|
||||
13 2 1 3
|
||||
14 2 2 5
|
||||
15 3 0 2
|
||||
16 3 1 4
|
||||
17 3 2 5
|
||||
18 3 0 2
|
||||
19 3 1 4
|
||||
20 4 2 5
|
||||
21 4 0 2
|
||||
22 4 1 3
|
||||
23 4 2 5
|
||||
24 4 0 2
|
||||
25 5 1 3
|
||||
26 5 2 5
|
||||
27 5 0 1
|
||||
28 5 1 3
|
||||
29 5 2 5
|
||||
select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c
|
||||
from numbers(30)
|
||||
window w as (partition by p order by o range unbounded preceding)
|
||||
order by number
|
||||
settings max_block_size = 3
|
||||
;
|
||||
0 0 0 3
|
||||
1 0 1 5
|
||||
2 0 0 3
|
||||
3 0 1 5
|
||||
4 0 0 3
|
||||
5 1 1 5
|
||||
6 1 0 2
|
||||
7 1 1 5
|
||||
8 1 0 2
|
||||
9 1 1 5
|
||||
10 2 0 3
|
||||
11 2 1 5
|
||||
12 2 0 3
|
||||
13 2 1 5
|
||||
14 2 0 3
|
||||
15 3 1 5
|
||||
16 3 0 2
|
||||
17 3 1 5
|
||||
18 3 0 2
|
||||
19 3 1 5
|
||||
20 4 0 3
|
||||
21 4 1 5
|
||||
22 4 0 3
|
||||
23 4 1 5
|
||||
24 4 0 3
|
||||
25 5 1 5
|
||||
26 5 0 2
|
||||
27 5 1 5
|
||||
28 5 0 2
|
||||
29 5 1 5
|
||||
select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c
|
||||
from numbers(30)
|
||||
window w as (partition by p order by o range unbounded preceding)
|
||||
order by number
|
||||
settings max_block_size = 2
|
||||
;
|
||||
0 0 0 1
|
||||
1 0 1 2
|
||||
2 0 2 3
|
||||
3 1 3 2
|
||||
4 1 4 3
|
||||
5 1 0 1
|
||||
6 2 1 1
|
||||
7 2 2 2
|
||||
8 2 3 3
|
||||
9 3 4 3
|
||||
10 3 0 1
|
||||
11 3 1 2
|
||||
12 4 2 1
|
||||
13 4 3 2
|
||||
14 4 4 3
|
||||
15 5 0 1
|
||||
16 5 1 2
|
||||
17 5 2 3
|
||||
18 6 3 2
|
||||
19 6 4 3
|
||||
20 6 0 1
|
||||
21 7 1 1
|
||||
22 7 2 2
|
||||
23 7 3 3
|
||||
24 8 4 3
|
||||
25 8 0 1
|
||||
26 8 1 2
|
||||
27 9 2 1
|
||||
28 9 3 2
|
||||
29 9 4 3
|
||||
select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c
|
||||
from numbers(30)
|
||||
window w as (partition by p order by o range unbounded preceding)
|
||||
order by number
|
||||
settings max_block_size = 3
|
||||
;
|
||||
0 0 0 1
|
||||
1 0 1 2
|
||||
2 1 2 1
|
||||
3 1 3 2
|
||||
4 2 4 2
|
||||
5 2 0 1
|
||||
6 3 1 1
|
||||
7 3 2 2
|
||||
8 4 3 1
|
||||
9 4 4 2
|
||||
10 5 0 1
|
||||
11 5 1 2
|
||||
12 6 2 1
|
||||
13 6 3 2
|
||||
14 7 4 2
|
||||
15 7 0 1
|
||||
16 8 1 1
|
||||
17 8 2 2
|
||||
18 9 3 1
|
||||
19 9 4 2
|
||||
20 10 0 1
|
||||
21 10 1 2
|
||||
22 11 2 1
|
||||
23 11 3 2
|
||||
24 12 4 2
|
||||
25 12 0 1
|
||||
26 13 1 1
|
||||
27 13 2 2
|
||||
28 14 3 1
|
||||
29 14 4 2
|
||||
select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c
|
||||
from numbers(30)
|
||||
window w as (partition by p order by o range unbounded preceding)
|
||||
order by number
|
||||
settings max_block_size = 5
|
||||
;
|
||||
0 0 0 1
|
||||
1 0 1 2
|
||||
2 1 2 2
|
||||
3 1 0 1
|
||||
4 2 1 1
|
||||
5 2 2 2
|
||||
6 3 0 1
|
||||
7 3 1 2
|
||||
8 4 2 2
|
||||
9 4 0 1
|
||||
10 5 1 1
|
||||
11 5 2 2
|
||||
12 6 0 1
|
||||
13 6 1 2
|
||||
14 7 2 2
|
||||
15 7 0 1
|
||||
16 8 1 1
|
||||
17 8 2 2
|
||||
18 9 0 1
|
||||
19 9 1 2
|
||||
20 10 2 2
|
||||
21 10 0 1
|
||||
22 11 1 1
|
||||
23 11 2 2
|
||||
24 12 0 1
|
||||
25 12 1 2
|
||||
26 13 2 2
|
||||
27 13 0 1
|
||||
28 14 1 1
|
||||
29 14 2 2
|
||||
|
@ -104,3 +104,49 @@ from numbers(3);
|
||||
-- sum(number)
|
||||
-- over (order by number groups between unbounded preceding and current row)
|
||||
--from numbers(3);
|
||||
|
||||
-- RANGE frame
|
||||
-- Try some mutually prime sizes of partition, group and block, for the number
|
||||
-- of rows that is their least common multiple so that we see all the interesting
|
||||
-- corner cases.
|
||||
select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c
|
||||
from numbers(30)
|
||||
window w as (partition by p order by o range unbounded preceding)
|
||||
order by number
|
||||
settings max_block_size = 5
|
||||
;
|
||||
|
||||
select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c
|
||||
from numbers(30)
|
||||
window w as (partition by p order by o range unbounded preceding)
|
||||
order by number
|
||||
settings max_block_size = 2
|
||||
;
|
||||
|
||||
select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c
|
||||
from numbers(30)
|
||||
window w as (partition by p order by o range unbounded preceding)
|
||||
order by number
|
||||
settings max_block_size = 3
|
||||
;
|
||||
|
||||
select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c
|
||||
from numbers(30)
|
||||
window w as (partition by p order by o range unbounded preceding)
|
||||
order by number
|
||||
settings max_block_size = 2
|
||||
;
|
||||
|
||||
select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c
|
||||
from numbers(30)
|
||||
window w as (partition by p order by o range unbounded preceding)
|
||||
order by number
|
||||
settings max_block_size = 3
|
||||
;
|
||||
|
||||
select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c
|
||||
from numbers(30)
|
||||
window w as (partition by p order by o range unbounded preceding)
|
||||
order by number
|
||||
settings max_block_size = 5
|
||||
;
|
||||
|
Loading…
Reference in New Issue
Block a user