mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #22459 from ClickHouse/aku/window-frame-offset
fix window frame offset check and add more tests
This commit is contained in:
commit
d0037329cf
@ -397,8 +397,11 @@ Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_t
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert NULL to {}", to_type.getName());
|
||||
Field converted = convertFieldToType(from_value, to_type, from_type_hint);
|
||||
if (!is_null && converted.isNull())
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Cannot convert value{}: it cannot be represented as {}",
|
||||
from_type_hint ? " from " + from_type_hint->getName() : "", to_type.getName());
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
|
||||
"Cannot convert value '{}'{}: it cannot be represented as {}",
|
||||
toString(from_value),
|
||||
from_type_hint ? " from " + from_type_hint->getName() : "",
|
||||
to_type.getName());
|
||||
return converted;
|
||||
}
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/FieldVisitorsAccurateComparison.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
@ -48,7 +49,10 @@ static int compareValuesWithOffset(const IColumn * _compared_column,
|
||||
_compared_column);
|
||||
const auto * reference_column = assert_cast<const ColumnType *>(
|
||||
_reference_column);
|
||||
const auto offset = _offset.get<typename ColumnType::ValueType>();
|
||||
// Note that the storage type of offset returned by get<> is different, so
|
||||
// we need to specify the type explicitly.
|
||||
const typename ColumnType::ValueType offset
|
||||
= _offset.get<typename ColumnType::ValueType>();
|
||||
assert(offset >= 0);
|
||||
|
||||
const auto compared_value_data = compared_column->getDataAt(compared_row);
|
||||
@ -62,32 +66,32 @@ static int compareValuesWithOffset(const IColumn * _compared_column,
|
||||
reference_value_data.data);
|
||||
|
||||
bool is_overflow;
|
||||
bool overflow_to_negative;
|
||||
if (offset_is_preceding)
|
||||
{
|
||||
is_overflow = __builtin_sub_overflow(reference_value, offset,
|
||||
&reference_value);
|
||||
overflow_to_negative = offset > 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
is_overflow = __builtin_add_overflow(reference_value, offset,
|
||||
&reference_value);
|
||||
overflow_to_negative = offset < 0;
|
||||
}
|
||||
|
||||
// fmt::print(stderr,
|
||||
// "compared [{}] = {}, ref [{}] = {}, offset {} preceding {} overflow {} to negative {}\n",
|
||||
// "compared [{}] = {}, old ref {}, shifted ref [{}] = {}, offset {} preceding {} overflow {} to negative {}\n",
|
||||
// compared_row, toString(compared_value),
|
||||
// // fmt doesn't like char8_t.
|
||||
// static_cast<Int64>(unalignedLoad<typename ColumnType::ValueType>(reference_value_data.data)),
|
||||
// reference_row, toString(reference_value),
|
||||
// toString(offset), offset_is_preceding,
|
||||
// is_overflow, overflow_to_negative);
|
||||
// is_overflow, offset_is_preceding);
|
||||
|
||||
if (is_overflow)
|
||||
{
|
||||
if (overflow_to_negative)
|
||||
if (offset_is_preceding)
|
||||
{
|
||||
// Overflow to the negative, [compared] must be greater.
|
||||
// We know that because offset is >= 0.
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
@ -263,6 +267,14 @@ WindowTransform::WindowTransform(const Block & input_header_,
|
||||
window_description.frame.begin_offset = convertFieldToTypeOrThrow(
|
||||
window_description.frame.begin_offset,
|
||||
*entry.type);
|
||||
|
||||
if (applyVisitor(FieldVisitorAccurateLess{},
|
||||
window_description.frame.begin_offset, Field(0)))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Window frame start offset must be nonnegative, {} given",
|
||||
window_description.frame.begin_offset);
|
||||
}
|
||||
}
|
||||
if (window_description.frame.end_type
|
||||
== WindowFrame::BoundaryType::Offset)
|
||||
@ -270,6 +282,14 @@ WindowTransform::WindowTransform(const Block & input_header_,
|
||||
window_description.frame.end_offset = convertFieldToTypeOrThrow(
|
||||
window_description.frame.end_offset,
|
||||
*entry.type);
|
||||
|
||||
if (applyVisitor(FieldVisitorAccurateLess{},
|
||||
window_description.frame.end_offset, Field(0)))
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Window frame start offset must be nonnegative, {} given",
|
||||
window_description.frame.end_offset);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -771,6 +771,28 @@ order by x;
|
||||
125 124 127 4
|
||||
126 125 127 3
|
||||
127 126 127 2
|
||||
-- We need large offsets to trigger overflow to positive direction, or
|
||||
-- else the frame end runs into partition end w/o overflow and doesn't move
|
||||
-- after that. The frame from this query is equivalent to the entire partition.
|
||||
select x, min(x) over w, max(x) over w, count(x) over w
|
||||
from (
|
||||
select toUInt8(if(mod(number, 2),
|
||||
toInt64(255 - intDiv(number, 2)),
|
||||
toInt64(intDiv(number, 2)))) x
|
||||
from numbers(10)
|
||||
)
|
||||
window w as (order by x range between 255 preceding and 255 following)
|
||||
order by x;
|
||||
0 0 255 10
|
||||
1 0 255 10
|
||||
2 0 255 10
|
||||
3 0 255 10
|
||||
4 0 255 10
|
||||
251 0 255 10
|
||||
252 0 255 10
|
||||
253 0 255 10
|
||||
254 0 255 10
|
||||
255 0 255 10
|
||||
-- RANGE OFFSET ORDER BY DESC
|
||||
select x, min(x) over w, max(x) over w, count(x) over w from (
|
||||
select toUInt8(number) x from numbers(11)) t
|
||||
@ -1004,6 +1026,8 @@ from numbers(5);
|
||||
3 \N
|
||||
-- variants of lag/lead that respect the frame
|
||||
select number, p, pp,
|
||||
lagInFrame(number) over w as lag1,
|
||||
lagInFrame(number, number - pp) over w as lag2,
|
||||
lagInFrame(number, number - pp, number * 11) over w as lag,
|
||||
leadInFrame(number, number - pp, number * 11) over w as lead
|
||||
from (select number, intDiv(number, 5) p, p * 5 pp from numbers(16))
|
||||
@ -1012,22 +1036,22 @@ window w as (partition by p order by number
|
||||
order by number
|
||||
settings max_block_size = 3;
|
||||
;
|
||||
0 0 0 0 0
|
||||
1 0 0 0 2
|
||||
2 0 0 0 4
|
||||
3 0 0 0 33
|
||||
4 0 0 0 44
|
||||
5 1 5 5 5
|
||||
6 1 5 5 7
|
||||
7 1 5 5 9
|
||||
8 1 5 5 88
|
||||
9 1 5 5 99
|
||||
10 2 10 10 10
|
||||
11 2 10 10 12
|
||||
12 2 10 10 14
|
||||
13 2 10 10 143
|
||||
14 2 10 10 154
|
||||
15 3 15 15 15
|
||||
0 0 0 0 0 0 0
|
||||
1 0 0 0 0 0 2
|
||||
2 0 0 1 0 0 4
|
||||
3 0 0 2 0 0 33
|
||||
4 0 0 3 0 0 44
|
||||
5 1 5 0 5 5 5
|
||||
6 1 5 5 5 5 7
|
||||
7 1 5 6 5 5 9
|
||||
8 1 5 7 5 5 88
|
||||
9 1 5 8 5 5 99
|
||||
10 2 10 0 10 10 10
|
||||
11 2 10 10 10 10 12
|
||||
12 2 10 11 10 10 14
|
||||
13 2 10 12 10 10 143
|
||||
14 2 10 13 10 10 154
|
||||
15 3 15 0 15 15 15
|
||||
-- case-insensitive SQL-standard synonyms for any and anyLast
|
||||
select
|
||||
number,
|
||||
@ -1054,14 +1078,29 @@ select count() over () from numbers(4) where number < 2;
|
||||
2
|
||||
-- floating point RANGE frame
|
||||
select
|
||||
count(*) over (order by (toFloat32(number) as f32) range 5. preceding),
|
||||
count(*) over (order by (toFloat64(number) as f64) range 5. preceding)
|
||||
count(*) over (order by toFloat32(number) range 5. preceding),
|
||||
count(*) over (order by toFloat64(number) range 5. preceding),
|
||||
count(*) over (order by toFloat32(number) range between current row and 5. following),
|
||||
count(*) over (order by toFloat64(number) range between current row and 5. following)
|
||||
from numbers(7)
|
||||
;
|
||||
1 1
|
||||
2 2
|
||||
3 3
|
||||
4 4
|
||||
5 5
|
||||
6 6
|
||||
6 6
|
||||
1 1 6 6
|
||||
2 2 6 6
|
||||
3 3 5 5
|
||||
4 4 4 4
|
||||
5 5 3 3
|
||||
6 6 2 2
|
||||
6 6 1 1
|
||||
-- negative offsets should not be allowed
|
||||
select count() over (order by toInt64(number) range between -1 preceding and unbounded following) from numbers(1); -- { serverError 36 }
|
||||
select count() over (order by toInt64(number) range between -1 following and unbounded following) from numbers(1); -- { serverError 36 }
|
||||
select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 }
|
||||
select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 }
|
||||
---- a test with aggregate function that allocates memory in arena
|
||||
select sum(a[length(a)])
|
||||
from (
|
||||
select groupArray(number) over (partition by modulo(number, 11)
|
||||
order by modulo(number, 1111), number) a
|
||||
from numbers_mt(10000)
|
||||
) settings max_block_size = 7;
|
||||
49995000
|
||||
|
@ -242,6 +242,19 @@ from (
|
||||
window w as (order by x range between 1 preceding and 2 following)
|
||||
order by x;
|
||||
|
||||
-- We need large offsets to trigger overflow to positive direction, or
|
||||
-- else the frame end runs into partition end w/o overflow and doesn't move
|
||||
-- after that. The frame from this query is equivalent to the entire partition.
|
||||
select x, min(x) over w, max(x) over w, count(x) over w
|
||||
from (
|
||||
select toUInt8(if(mod(number, 2),
|
||||
toInt64(255 - intDiv(number, 2)),
|
||||
toInt64(intDiv(number, 2)))) x
|
||||
from numbers(10)
|
||||
)
|
||||
window w as (order by x range between 255 preceding and 255 following)
|
||||
order by x;
|
||||
|
||||
-- RANGE OFFSET ORDER BY DESC
|
||||
select x, min(x) over w, max(x) over w, count(x) over w from (
|
||||
select toUInt8(number) x from numbers(11)) t
|
||||
@ -349,6 +362,8 @@ from numbers(5);
|
||||
|
||||
-- variants of lag/lead that respect the frame
|
||||
select number, p, pp,
|
||||
lagInFrame(number) over w as lag1,
|
||||
lagInFrame(number, number - pp) over w as lag2,
|
||||
lagInFrame(number, number - pp, number * 11) over w as lag,
|
||||
leadInFrame(number, number - pp, number * 11) over w as lead
|
||||
from (select number, intDiv(number, 5) p, p * 5 pp from numbers(16))
|
||||
@ -374,7 +389,23 @@ select count() over () from numbers(4) where number < 2;
|
||||
|
||||
-- floating point RANGE frame
|
||||
select
|
||||
count(*) over (order by (toFloat32(number) as f32) range 5. preceding),
|
||||
count(*) over (order by (toFloat64(number) as f64) range 5. preceding)
|
||||
count(*) over (order by toFloat32(number) range 5. preceding),
|
||||
count(*) over (order by toFloat64(number) range 5. preceding),
|
||||
count(*) over (order by toFloat32(number) range between current row and 5. following),
|
||||
count(*) over (order by toFloat64(number) range between current row and 5. following)
|
||||
from numbers(7)
|
||||
;
|
||||
|
||||
-- negative offsets should not be allowed
|
||||
select count() over (order by toInt64(number) range between -1 preceding and unbounded following) from numbers(1); -- { serverError 36 }
|
||||
select count() over (order by toInt64(number) range between -1 following and unbounded following) from numbers(1); -- { serverError 36 }
|
||||
select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 }
|
||||
select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 }
|
||||
|
||||
---- a test with aggregate function that allocates memory in arena
|
||||
select sum(a[length(a)])
|
||||
from (
|
||||
select groupArray(number) over (partition by modulo(number, 11)
|
||||
order by modulo(number, 1111), number) a
|
||||
from numbers_mt(10000)
|
||||
) settings max_block_size = 7;
|
||||
|
Loading…
Reference in New Issue
Block a user