Merge pull request #22459 from ClickHouse/aku/window-frame-offset

fix window frame offset check and add more tests
This commit is contained in:
Alexander Kuzmenkov 2021-04-02 19:49:59 +03:00 committed by GitHub
commit d0037329cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 129 additions and 36 deletions

View File

@ -397,8 +397,11 @@ Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_t
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert NULL to {}", to_type.getName());
Field converted = convertFieldToType(from_value, to_type, from_type_hint);
if (!is_null && converted.isNull())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Cannot convert value{}: it cannot be represented as {}",
from_type_hint ? " from " + from_type_hint->getName() : "", to_type.getName());
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Cannot convert value '{}'{}: it cannot be represented as {}",
toString(from_value),
from_type_hint ? " from " + from_type_hint->getName() : "",
to_type.getName());
return converted;
}

View File

@ -2,6 +2,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <Common/Arena.h>
#include <Common/FieldVisitorsAccurateComparison.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/getLeastSupertype.h>
#include <Interpreters/ExpressionActions.h>
@ -48,7 +49,10 @@ static int compareValuesWithOffset(const IColumn * _compared_column,
_compared_column);
const auto * reference_column = assert_cast<const ColumnType *>(
_reference_column);
const auto offset = _offset.get<typename ColumnType::ValueType>();
// Note that the storage type of offset returned by get<> is different, so
// we need to specify the type explicitly.
const typename ColumnType::ValueType offset
= _offset.get<typename ColumnType::ValueType>();
assert(offset >= 0);
const auto compared_value_data = compared_column->getDataAt(compared_row);
@ -62,32 +66,32 @@ static int compareValuesWithOffset(const IColumn * _compared_column,
reference_value_data.data);
bool is_overflow;
bool overflow_to_negative;
if (offset_is_preceding)
{
is_overflow = __builtin_sub_overflow(reference_value, offset,
&reference_value);
overflow_to_negative = offset > 0;
}
else
{
is_overflow = __builtin_add_overflow(reference_value, offset,
&reference_value);
overflow_to_negative = offset < 0;
}
// fmt::print(stderr,
// "compared [{}] = {}, ref [{}] = {}, offset {} preceding {} overflow {} to negative {}\n",
// "compared [{}] = {}, old ref {}, shifted ref [{}] = {}, offset {} preceding {} overflow {} to negative {}\n",
// compared_row, toString(compared_value),
// // fmt doesn't like char8_t.
// static_cast<Int64>(unalignedLoad<typename ColumnType::ValueType>(reference_value_data.data)),
// reference_row, toString(reference_value),
// toString(offset), offset_is_preceding,
// is_overflow, overflow_to_negative);
// is_overflow, offset_is_preceding);
if (is_overflow)
{
if (overflow_to_negative)
if (offset_is_preceding)
{
// Overflow to the negative, [compared] must be greater.
// We know that because offset is >= 0.
return 1;
}
else
@ -263,6 +267,14 @@ WindowTransform::WindowTransform(const Block & input_header_,
window_description.frame.begin_offset = convertFieldToTypeOrThrow(
window_description.frame.begin_offset,
*entry.type);
if (applyVisitor(FieldVisitorAccurateLess{},
window_description.frame.begin_offset, Field(0)))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Window frame start offset must be nonnegative, {} given",
window_description.frame.begin_offset);
}
}
if (window_description.frame.end_type
== WindowFrame::BoundaryType::Offset)
@ -270,6 +282,14 @@ WindowTransform::WindowTransform(const Block & input_header_,
window_description.frame.end_offset = convertFieldToTypeOrThrow(
window_description.frame.end_offset,
*entry.type);
if (applyVisitor(FieldVisitorAccurateLess{},
window_description.frame.end_offset, Field(0)))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Window frame start offset must be nonnegative, {} given",
window_description.frame.end_offset);
}
}
}
}

View File

@ -771,6 +771,28 @@ order by x;
125 124 127 4
126 125 127 3
127 126 127 2
-- We need large offsets to trigger overflow to positive direction, or
-- else the frame end runs into partition end w/o overflow and doesn't move
-- after that. The frame from this query is equivalent to the entire partition.
select x, min(x) over w, max(x) over w, count(x) over w
from (
select toUInt8(if(mod(number, 2),
toInt64(255 - intDiv(number, 2)),
toInt64(intDiv(number, 2)))) x
from numbers(10)
)
window w as (order by x range between 255 preceding and 255 following)
order by x;
0 0 255 10
1 0 255 10
2 0 255 10
3 0 255 10
4 0 255 10
251 0 255 10
252 0 255 10
253 0 255 10
254 0 255 10
255 0 255 10
-- RANGE OFFSET ORDER BY DESC
select x, min(x) over w, max(x) over w, count(x) over w from (
select toUInt8(number) x from numbers(11)) t
@ -1004,6 +1026,8 @@ from numbers(5);
3 \N
-- variants of lag/lead that respect the frame
select number, p, pp,
lagInFrame(number) over w as lag1,
lagInFrame(number, number - pp) over w as lag2,
lagInFrame(number, number - pp, number * 11) over w as lag,
leadInFrame(number, number - pp, number * 11) over w as lead
from (select number, intDiv(number, 5) p, p * 5 pp from numbers(16))
@ -1012,22 +1036,22 @@ window w as (partition by p order by number
order by number
settings max_block_size = 3;
;
0 0 0 0 0
1 0 0 0 2
2 0 0 0 4
3 0 0 0 33
4 0 0 0 44
5 1 5 5 5
6 1 5 5 7
7 1 5 5 9
8 1 5 5 88
9 1 5 5 99
10 2 10 10 10
11 2 10 10 12
12 2 10 10 14
13 2 10 10 143
14 2 10 10 154
15 3 15 15 15
0 0 0 0 0 0 0
1 0 0 0 0 0 2
2 0 0 1 0 0 4
3 0 0 2 0 0 33
4 0 0 3 0 0 44
5 1 5 0 5 5 5
6 1 5 5 5 5 7
7 1 5 6 5 5 9
8 1 5 7 5 5 88
9 1 5 8 5 5 99
10 2 10 0 10 10 10
11 2 10 10 10 10 12
12 2 10 11 10 10 14
13 2 10 12 10 10 143
14 2 10 13 10 10 154
15 3 15 0 15 15 15
-- case-insensitive SQL-standard synonyms for any and anyLast
select
number,
@ -1054,14 +1078,29 @@ select count() over () from numbers(4) where number < 2;
2
-- floating point RANGE frame
select
count(*) over (order by (toFloat32(number) as f32) range 5. preceding),
count(*) over (order by (toFloat64(number) as f64) range 5. preceding)
count(*) over (order by toFloat32(number) range 5. preceding),
count(*) over (order by toFloat64(number) range 5. preceding),
count(*) over (order by toFloat32(number) range between current row and 5. following),
count(*) over (order by toFloat64(number) range between current row and 5. following)
from numbers(7)
;
1 1
2 2
3 3
4 4
5 5
6 6
6 6
1 1 6 6
2 2 6 6
3 3 5 5
4 4 4 4
5 5 3 3
6 6 2 2
6 6 1 1
-- negative offsets should not be allowed
select count() over (order by toInt64(number) range between -1 preceding and unbounded following) from numbers(1); -- { serverError 36 }
select count() over (order by toInt64(number) range between -1 following and unbounded following) from numbers(1); -- { serverError 36 }
select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 }
select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 }
---- a test with aggregate function that allocates memory in arena
select sum(a[length(a)])
from (
select groupArray(number) over (partition by modulo(number, 11)
order by modulo(number, 1111), number) a
from numbers_mt(10000)
) settings max_block_size = 7;
49995000

View File

@ -242,6 +242,19 @@ from (
window w as (order by x range between 1 preceding and 2 following)
order by x;
-- We need large offsets to trigger overflow to positive direction, or
-- else the frame end runs into partition end w/o overflow and doesn't move
-- after that. The frame from this query is equivalent to the entire partition.
select x, min(x) over w, max(x) over w, count(x) over w
from (
select toUInt8(if(mod(number, 2),
toInt64(255 - intDiv(number, 2)),
toInt64(intDiv(number, 2)))) x
from numbers(10)
)
window w as (order by x range between 255 preceding and 255 following)
order by x;
-- RANGE OFFSET ORDER BY DESC
select x, min(x) over w, max(x) over w, count(x) over w from (
select toUInt8(number) x from numbers(11)) t
@ -349,6 +362,8 @@ from numbers(5);
-- variants of lag/lead that respect the frame
select number, p, pp,
lagInFrame(number) over w as lag1,
lagInFrame(number, number - pp) over w as lag2,
lagInFrame(number, number - pp, number * 11) over w as lag,
leadInFrame(number, number - pp, number * 11) over w as lead
from (select number, intDiv(number, 5) p, p * 5 pp from numbers(16))
@ -374,7 +389,23 @@ select count() over () from numbers(4) where number < 2;
-- floating point RANGE frame
select
count(*) over (order by (toFloat32(number) as f32) range 5. preceding),
count(*) over (order by (toFloat64(number) as f64) range 5. preceding)
count(*) over (order by toFloat32(number) range 5. preceding),
count(*) over (order by toFloat64(number) range 5. preceding),
count(*) over (order by toFloat32(number) range between current row and 5. following),
count(*) over (order by toFloat64(number) range between current row and 5. following)
from numbers(7)
;
-- negative offsets should not be allowed
select count() over (order by toInt64(number) range between -1 preceding and unbounded following) from numbers(1); -- { serverError 36 }
select count() over (order by toInt64(number) range between -1 following and unbounded following) from numbers(1); -- { serverError 36 }
select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 }
select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 }
---- a test with aggregate function that allocates memory in arena
select sum(a[length(a)])
from (
select groupArray(number) over (partition by modulo(number, 11)
order by modulo(number, 1111), number) a
from numbers_mt(10000)
) settings max_block_size = 7;