Implementaion of window function nth_value.

1. Implemented the window function nth_value
2. Make the return type of lag/lead, and nth_value functions to be
ColumnNullable, thus the returned value can be null if it is out
of frame.
This commit is contained in:
ryzuo 2021-07-08 18:22:41 +08:00
parent 2796aa333f
commit 4eb053a303
3 changed files with 152 additions and 54 deletions

View File

@ -1558,9 +1558,85 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction
else
{
// Offset is inside the frame.
to.insertFrom(*transform->blockAt(target_row).input_columns[
workspace.argument_column_indices[0]],
target_row.row);
auto ptr = ColumnNullable::create(transform->blockAt(target_row).input_columns[
workspace.argument_column_indices[0]],
ColumnUInt8::create());
to.insertFrom(*ptr, target_row.row);
}
}
};
struct WindowFunctionNthValue final : public WindowFunction
{
WindowFunctionNthValue(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: WindowFunction(name_, argument_types_, parameters_)
{
if (!parameters.empty())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function {} cannot be parameterized", name_);
}
if (argument_types.empty())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function {} takes at least one argument", name_);
}
if(argument_types.size() != 2)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function '{}' accepts 2 arguments, {} given",
name_, argument_types.size());
}
}
DataTypePtr getReturnType() const override
{ return argument_types[0]; }
bool allocatesMemoryInArena() const override { return false; }
void windowInsertResultInto(const WindowTransform * transform,
size_t function_index) override
{
const auto & current_block = transform->blockAt(transform->current_row);
IColumn & to = *(current_block.output_columns[function_index]);
const auto & workspace = transform->workspaces[function_index];
int64_t offset = (*current_block.input_columns[
workspace.argument_column_indices[1]])[
transform->current_row.row].get<Int64>() - 1;
if (offset < 0)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"The offset for function {} must be non-negative, {} given",
getName(), offset);
}
if (offset > INT_MAX)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"The offset for function {} must be less than {}, {} given",
getName(), INT_MAX, offset);
}
const auto [target_row, offset_left] = transform->moveRowNumber(transform->frame_start, offset);
if (offset_left != 0
|| target_row < transform->frame_start
|| transform->frame_end <= target_row)
{
// Offset is outside the frame.
to.insertDefault();
}
else
{
// Offset is inside the frame.
auto ptr = ColumnNullable::create(transform->blockAt(target_row).input_columns[
workspace.argument_column_indices[0]],
ColumnUInt8::create());
to.insertFrom(*ptr, target_row.row);
}
}
};
@ -1628,6 +1704,13 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
return std::make_shared<WindowFunctionLagLeadInFrame<true>>(
name, argument_types, parameters);
}, properties});
factory.registerFunction("nth_value", {[](const std::string & name,
const DataTypes & argument_types, const Array & parameters, const Settings *)
{
return std::make_shared<WindowFunctionNthValue>(
name, argument_types, parameters);
}, properties});
}
}

View File

@ -1095,6 +1095,48 @@ order by number
7 6 8
8 7 9
9 8 9
-- nth_value without specific frame range given
select
number,
nth_value(number, 1) over w as firstValue,
nth_value(number, 2) over w as secondValue,
nth_value(number, 3) over w as thirdValue,
nth_value(number, 4) over w as fourthValue
from numbers(10)
window w as (order by number)
order by number
;
0 0 NULL NULL NULL
1 0 1 NULL NULL
2 0 1 2 NULL
3 0 1 2 3
4 0 1 2 3
5 0 1 2 3
6 0 1 2 3
7 0 1 2 3
8 0 1 2 3
9 0 1 2 3
-- nth_value with frame range specified
select
number,
nth_value(number, 1) over w as firstValue,
nth_value(number, 2) over w as secondValue,
nth_value(number, 3) over w as thirdValue,
nth_value(number, 4) over w as fourthValue
from numbers(10)
window w as (order by number range between 1 preceding and 1 following)
order by number
;
0 0 1 NULL NULL
1 0 1 2 NULL
2 1 2 3 NULL
3 2 3 4 NULL
4 3 4 5 NULL
5 4 5 6 NULL
6 5 6 7 NULL
7 6 7 8 NULL
8 7 8 9 NULL
9 8 9 NULL NULL
-- In this case, we had a problem with PartialSortingTransform returning zero-row
-- chunks for input chunks w/o columns.
select count() over () from numbers(4) where number < 2;
@ -1136,31 +1178,3 @@ select count() over (rows between 2147483648 preceding and 2147493648 following)
select count() over () from (select 1 a) l inner join (select 2 a) r using a;
-- This case works as expected, one empty input chunk marked as input end.
select count() over () where null;
-- Inheriting another window.
select number, count() over (w1 rows unbounded preceding) from numbers(10)
window
w0 as (partition by intDiv(number, 5) as p),
w1 as (w0 order by mod(number, 3) as o)
order by p, o, number
;
0 1
3 2
1 3
4 4
2 5
6 1
9 2
7 3
5 4
8 5
-- can't redefine PARTITION BY
select count() over (w partition by number) from numbers(1) window w as (partition by intDiv(number, 5)); -- { serverError 36 }
-- can't redefine existing ORDER BY
select count() over (w order by number) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3)); -- { serverError 36 }
-- parent window can't have frame
select count() over (w range unbounded preceding) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3) rows unbounded preceding); -- { serverError 36 }
-- looks weird but probably should work -- this is a window that inherits and changes nothing
select count() over (w) from numbers(1) window w as ();
1
-- nonexistent parent window
select count() over (w2 rows unbounded preceding); -- { serverError 36 }

View File

@ -403,6 +403,30 @@ window w as (order by number range between 1 preceding and 1 following)
order by number
;
-- nth_value without specific frame range given
select
number,
nth_value(number, 1) over w as firstValue,
nth_value(number, 2) over w as secondValue,
nth_value(number, 3) over w as thirdValue,
nth_value(number, 4) over w as fourthValue
from numbers(10)
window w as (order by number)
order by number
;
-- nth_value with frame range specified
select
number,
nth_value(number, 1) over w as firstValue,
nth_value(number, 2) over w as secondValue,
nth_value(number, 3) over w as thirdValue,
nth_value(number, 4) over w as fourthValue
from numbers(10)
window w as (order by number range between 1 preceding and 1 following)
order by number
;
-- In this case, we had a problem with PartialSortingTransform returning zero-row
-- chunks for input chunks w/o columns.
select count() over () from numbers(4) where number < 2;
@ -439,26 +463,3 @@ select count() over (rows between 2147483648 preceding and 2147493648 following)
select count() over () from (select 1 a) l inner join (select 2 a) r using a;
-- This case works as expected, one empty input chunk marked as input end.
select count() over () where null;
-- Inheriting another window.
select number, count() over (w1 rows unbounded preceding) from numbers(10)
window
w0 as (partition by intDiv(number, 5) as p),
w1 as (w0 order by mod(number, 3) as o)
order by p, o, number
;
-- can't redefine PARTITION BY
select count() over (w partition by number) from numbers(1) window w as (partition by intDiv(number, 5)); -- { serverError 36 }
-- can't redefine existing ORDER BY
select count() over (w order by number) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3)); -- { serverError 36 }
-- parent window can't have frame
select count() over (w range unbounded preceding) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3) rows unbounded preceding); -- { serverError 36 }
-- looks weird but probably should work -- this is a window that inherits and changes nothing
select count() over (w) from numbers(1) window w as ();
-- nonexistent parent window
select count() over (w2 rows unbounded preceding); -- { serverError 36 }