From 1bb8cc5c9ab8659b97082abc2a678cbe857c92ce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 21 Jan 2021 11:10:31 +0300 Subject: [PATCH] Avoid UBSan report in arrayElement --- src/Functions/array/arrayElement.cpp | 27 ++++++++++++++----- .../01657_array_element_ubsan.reference | 26 ++++++++++++++++++ .../0_stateless/01657_array_element_ubsan.sql | 19 +++++++++++++ 3 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/01657_array_element_ubsan.reference create mode 100644 tests/queries/0_stateless/01657_array_element_ubsan.sql diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp index 88166f04e0e..7d053988cae 100644 --- a/src/Functions/array/arrayElement.cpp +++ b/src/Functions/array/arrayElement.cpp @@ -231,7 +231,7 @@ struct ArrayElementNumImpl if (builder) builder.update(j); } - else if (index < 0 && static_cast(-index) <= array_size) + else if (index < 0 && -static_cast(index) <= array_size) { size_t j = offsets[i] + index; result[i] = data[j]; @@ -329,7 +329,7 @@ struct ArrayElementStringImpl TIndex index = indices[i]; if (index > 0 && static_cast(index) <= array_size) adjusted_index = index - 1; - else if (index < 0 && static_cast(-index) <= array_size) + else if (index < 0 && -static_cast(index) <= array_size) adjusted_index = array_size + index; else adjusted_index = array_size; /// means no element should be taken @@ -427,7 +427,7 @@ struct ArrayElementGenericImpl if (builder) builder.update(j); } - else if (index < 0 && static_cast(-index) <= array_size) + else if (index < 0 && -static_cast(index) <= array_size) { size_t j = offsets[i] + index; result.insertFrom(data, j); @@ -472,11 +472,24 @@ ColumnPtr FunctionArrayElement::executeNumberConst( auto col_res = ColumnVector::create(); if (index.getType() == Field::Types::UInt64) + { ArrayElementNumImpl::template vectorConst( col_nested->getData(), col_array->getOffsets(), safeGet(index) - 1, col_res->getData(), builder); + } else if (index.getType() == Field::Types::Int64) + { + /// Cast to UInt64 before negation allows to avoid undefined behaviour for negation of the most negative number. + /// NOTE: this would be undefined behaviour in C++ sense, but nevertheless, compiler cannot see it on user provided data, + /// and generates the code that we want on supported CPU architectures (overflow in sense of two's complement arithmetic). + /// This is only needed to avoid UBSan report. + + /// Negative array indices work this way: + /// arr[-1] is the element at offset 0 from the last + /// arr[-2] is the element at offset 1 from the last and so on. + ArrayElementNumImpl::template vectorConst( - col_nested->getData(), col_array->getOffsets(), -safeGet(index) - 1, col_res->getData(), builder); + col_nested->getData(), col_array->getOffsets(), -(UInt64(safeGet(index)) + 1), col_res->getData(), builder); + } else throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); @@ -534,7 +547,7 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument col_nested->getChars(), col_array->getOffsets(), col_nested->getOffsets(), - -safeGet(index) - 1, + -(UInt64(safeGet(index)) + 1), col_res->getChars(), col_res->getOffsets(), builder); @@ -588,7 +601,7 @@ ColumnPtr FunctionArrayElement::executeGenericConst( col_nested, col_array->getOffsets(), safeGet(index) - 1, *col_res, builder); else if (index.getType() == Field::Types::Int64) ArrayElementGenericImpl::vectorConst( - col_nested, col_array->getOffsets(), -safeGet(index) - 1, *col_res, builder); + col_nested, col_array->getOffsets(), -(UInt64(safeGet(index) + 1)), *col_res, builder); else throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); @@ -639,7 +652,7 @@ ColumnPtr FunctionArrayElement::executeConst(const ColumnsWithTypeAndName & argu if (builder) builder.update(j); } - else if (index < 0 && static_cast(-index) <= array_size) + else if (index < 0 && -static_cast(index) <= array_size) { size_t j = array_size + index; res->insertFrom(array_elements, j); diff --git a/tests/queries/0_stateless/01657_array_element_ubsan.reference b/tests/queries/0_stateless/01657_array_element_ubsan.reference new file mode 100644 index 00000000000..14e3161f529 --- /dev/null +++ b/tests/queries/0_stateless/01657_array_element_ubsan.reference @@ -0,0 +1,26 @@ +0 +0 +0 +0 +--- +0 +0 +0 +--- +0 +0 +0 +0 +0 +0 +0 +1 +--- +0 +0 +0 +0 +0 +0 +0 +1 diff --git a/tests/queries/0_stateless/01657_array_element_ubsan.sql b/tests/queries/0_stateless/01657_array_element_ubsan.sql new file mode 100644 index 00000000000..82ddf643389 --- /dev/null +++ b/tests/queries/0_stateless/01657_array_element_ubsan.sql @@ -0,0 +1,19 @@ +SELECT [number][10000000000] FROM numbers(1); +SELECT [number][-10000000000] FROM numbers(1); + +SELECT [number][-0x8000000000000000] FROM numbers(1); +SELECT [number][0xFFFFFFFFFFFFFFFF] FROM numbers(1); + +SELECT '---'; + +SELECT [materialize(1)][0xFFFFFFFFFFFFFFFF]; +SELECT [materialize(1)][materialize(18446744073709551615)]; +SELECT [materialize(1)][-0x8000000000000000]; + +SELECT '---'; + +SELECT [number][arrayJoin([-0x8000000000000000, -10000000000, 0, -1])] FROM numbers(2); + +SELECT '---'; + +SELECT [number][arrayJoin([0xFFFFFFFFFFFFFFFF, 10000000000, 0, 1])] FROM numbers(2);