diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp index a97ef41204a..05d75d4647e 100644 --- a/src/Interpreters/WindowDescription.cpp +++ b/src/Interpreters/WindowDescription.cpp @@ -86,6 +86,38 @@ void WindowFrame::toString(WriteBuffer & buf) const void WindowFrame::checkValid() const { + // Check the validity of offsets. + if (type == WindowFrame::FrameType::Rows + || type == WindowFrame::FrameType::Groups) + { + if (begin_type == BoundaryType::Offset + && !((begin_offset.getType() == Field::Types::UInt64 + || begin_offset.getType() == Field::Types::Int64) + && begin_offset.get() >= 0 + && begin_offset.get() < INT_MAX)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Frame start offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given.", + toString(type), + applyVisitor(FieldVisitorToString(), begin_offset), + Field::Types::toString(begin_offset.getType())); + } + + if (end_type == BoundaryType::Offset + && !((end_offset.getType() == Field::Types::UInt64 + || end_offset.getType() == Field::Types::Int64) + && end_offset.get() >= 0 + && end_offset.get() < INT_MAX)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Frame end offset for '{}' frame must be a nonnegative 32-bit integer, '{}' of type '{}' given.", + toString(type), + applyVisitor(FieldVisitorToString(), end_offset), + Field::Types::toString(end_offset.getType())); + } + } + + // Check relative positioning of offsets. // UNBOUNDED PRECEDING end and UNBOUNDED FOLLOWING start should have been // forbidden at the parsing level. assert(!(begin_type == BoundaryType::Unbounded && !begin_preceding)); diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 84c178790b2..3e635b2accc 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -580,18 +580,6 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p else if (parser_literal.parse(pos, ast_literal, expected)) { const Field & value = ast_literal->as().value; - if ((node->frame.type == WindowFrame::FrameType::Rows - || node->frame.type == WindowFrame::FrameType::Groups) - && !(value.getType() == Field::Types::UInt64 - || (value.getType() == Field::Types::Int64 - && value.get() >= 0))) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Frame offset for '{}' frame must be a nonnegative integer, '{}' of type '{}' given.", - WindowFrame::toString(node->frame.type), - applyVisitor(FieldVisitorToString(), value), - Field::Types::toString(value.getType())); - } node->frame.begin_offset = value; node->frame.begin_type = WindowFrame::BoundaryType::Offset; } @@ -641,18 +629,6 @@ static bool tryParseFrameDefinition(ASTWindowDefinition * node, IParser::Pos & p else if (parser_literal.parse(pos, ast_literal, expected)) { const Field & value = ast_literal->as().value; - if ((node->frame.type == WindowFrame::FrameType::Rows - || node->frame.type == WindowFrame::FrameType::Groups) - && !(value.getType() == Field::Types::UInt64 - || (value.getType() == Field::Types::Int64 - && value.get() >= 0))) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Frame offset for '{}' frame must be a nonnegative integer, '{}' of type '{}' given.", - WindowFrame::toString(node->frame.type), - applyVisitor(FieldVisitorToString(), value), - Field::Types::toString(value.getType())); - } node->frame.end_offset = value; node->frame.end_type = WindowFrame::BoundaryType::Offset; } diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 16d028f0fc1..414e45dab38 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -257,10 +257,9 @@ WindowTransform::WindowTransform(const Block & input_header_, const IColumn * column = entry.column.get(); APPLY_FOR_TYPES(compareValuesWithOffset) - // Check that the offset type matches the window type. // Convert the offsets to the ORDER BY column type. We can't just check - // that it matches, because e.g. the int literals are always (U)Int64, - // but the column might be Int8 and so on. + // that the type matches, because e.g. the int literals are always + // (U)Int64, but the column might be Int8 and so on. if (window_description.frame.begin_type == WindowFrame::BoundaryType::Offset) { @@ -435,6 +434,9 @@ auto WindowTransform::moveRowNumberNoCheck(const RowNumber & _x, int offset) con assertValid(x); assert(offset <= 0); + // abs(offset) is less than INT_MAX, as checked in the parser, so + // this negation should always work. + assert(offset >= -INT_MAX); if (x.row >= static_cast(-offset)) { x.row -= -offset; @@ -1500,6 +1502,12 @@ struct WindowFunctionLagLeadInFrame final : public WindowFunction "The offset for function {} must be nonnegative, {} given", getName(), offset); } + if (offset > INT_MAX) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "The offset for function {} must be less than {}, {} given", + getName(), INT_MAX, offset); + } } const auto [target_row, offset_left] = transform->moveRowNumber( diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index 9067ee8d955..afc20f67406 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -942,8 +942,9 @@ FROM numbers(2) ; 1 0 1 1 --- optimize_read_in_order conflicts with sorting for window functions, must --- be disabled. +-- optimize_read_in_order conflicts with sorting for window functions, check that +-- it is disabled. +drop table if exists window_mt; create table window_mt engine MergeTree order by number as select number, mod(number, 3) p from numbers(100); select number, count(*) over (partition by p) @@ -1096,7 +1097,7 @@ select count() over (order by toInt64(number) range between -1 preceding and unb select count() over (order by toInt64(number) range between -1 following and unbounded following) from numbers(1); -- { serverError 36 } select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 } select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 } ----- a test with aggregate function that allocates memory in arena +-- a test with aggregate function that allocates memory in arena select sum(a[length(a)]) from ( select groupArray(number) over (partition by modulo(number, 11) @@ -1104,3 +1105,6 @@ from ( from numbers_mt(10000) ) settings max_block_size = 7; 49995000 +-- -INT_MIN row offset that can lead to problems with negation, found when fuzzing +-- under UBSan. Should be limited to at most INT_MAX. +select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql index 85856dd797d..44b0bba0b27 100644 --- a/tests/queries/0_stateless/01591_window_functions.sql +++ b/tests/queries/0_stateless/01591_window_functions.sql @@ -329,8 +329,9 @@ SELECT FROM numbers(2) ; --- optimize_read_in_order conflicts with sorting for window functions, must --- be disabled. +-- optimize_read_in_order conflicts with sorting for window functions, check that +-- it is disabled. +drop table if exists window_mt; create table window_mt engine MergeTree order by number as select number, mod(number, 3) p from numbers(100); @@ -402,10 +403,14 @@ select count() over (order by toInt64(number) range between -1 following and unb select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 } select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 } ----- a test with aggregate function that allocates memory in arena +-- a test with aggregate function that allocates memory in arena select sum(a[length(a)]) from ( select groupArray(number) over (partition by modulo(number, 11) order by modulo(number, 1111), number) a from numbers_mt(10000) ) settings max_block_size = 7; + +-- -INT_MIN row offset that can lead to problems with negation, found when fuzzing +-- under UBSan. Should be limited to at most INT_MAX. +select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 }