Fix UBsan report in lcm()/gcd()

Changelog: - Check against INT64_MIN/INT64_MAX - Check against std::numeric_limits<>::min/max - Move common code into separate header - GCDLCMImpl.h - Forbid floats UBsan report [1]. [1]: https://clickhouse-test-reports.s3.yandex.net/19466/cb30a02540a0f223df6668c5f88ff84aa666ff54/fuzzer_ubsan/report.html#fail1
2024-11-21 23:21:59 +00:00 · 2021-01-24 21:28:12 +03:00 · 2021-01-24 21:28:12 +03:00 · 27a5794795
commit 27a5794795
parent f4a4d33c2d
11 changed files with 149 additions and 90 deletions
--- a/src/Functions/GCDLCMImpl.h
+++ b/src/Functions/GCDLCMImpl.h
@ -0,0 +1,67 @@
 #pragma once
 #include <DataTypes/NumberTraits.h>
 #include <Common/Exception.h>
 #include <numeric>
 #include <limits>
 #include <type_traits>
 #if !defined(ARCADIA_BUILD)
 #    include "config_core.h"
 #endif
 namespace DB
 {
 namespace ErrorCodes
 {
    extern const int NOT_IMPLEMENTED;
    extern const int DECIMAL_OVERFLOW;
 }
 template <class T>
 inline constexpr bool is_gcd_lcm_implemeted = !(is_big_int_v<T> || std::is_floating_point_v<T>);
 template <typename A, typename B, typename Impl, typename Name>
 struct GCDLCMImpl
 {
    using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<A, B>::Type;
    static const constexpr bool allow_fixed_string = false;
    template <typename Result = ResultType>
    static inline std::enable_if_t<!is_gcd_lcm_implemeted<Result>, Result>
    apply(A, B)
    {
        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not implemented for big integers and floats", Name::name);
    }
    template <typename Result = ResultType>
    static inline std::enable_if_t<is_gcd_lcm_implemeted<Result>, Result>
    apply(A a, B b)
    {
        throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<A>::Type(a), typename NumberTraits::ToInteger<B>::Type(b));
        throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<B>::Type(b), typename NumberTraits::ToInteger<A>::Type(a));
        using Int = typename NumberTraits::ToInteger<Result>::Type;
        if constexpr (is_signed_v<Result>)
        {
            /// gcd() internally uses std::abs()
            Int a_s = static_cast<Int>(a);
            Int b_s = static_cast<Int>(b);
            Int min = std::numeric_limits<Int>::min();
            Int max = std::numeric_limits<Int>::max();
            if (unlikely((a_s == min || a_s == max) || (b_s == min || b_s == max)))
                throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Intermediate result overflow (signed a = {}, signed b = {}, min = {}, max = {})", a_s, b_s, min, max);
        }
        return Impl::applyImpl(a, b);
    }
 #if USE_EMBEDDED_COMPILER
    static constexpr bool compilable = false; /// exceptions (and a non-trivial algorithm)
 #endif
 };
 }
--- a/src/Functions/gcd.cpp
+++ b/src/Functions/gcd.cpp
@ -1,45 +1,28 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionBinaryArithmetic.h>
-#include <numeric>
+#include <Functions/GCDLCMImpl.h>
 namespace DB
 {
 namespace ErrorCodes
 {
    extern const int NOT_IMPLEMENTED;
 }
 namespace
 {
 struct NameGCD { static constexpr auto name = "gcd"; };
 template <typename A, typename B>
-struct GCDImpl
+struct GCDImpl : public GCDLCMImpl<A, B, GCDImpl<A, B>, NameGCD>
 {
-    using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<A, B>::Type;
+    using ResultType = typename GCDLCMImpl<A, B, GCDImpl, NameGCD>::ResultType;
    static const constexpr bool allow_fixed_string = false;
-    template <typename Result = ResultType>
+    static ResultType applyImpl(A a, B b)
    static inline Result apply([[maybe_unused]] A a, [[maybe_unused]] B b)
    {
-        if constexpr (is_big_int_v<A> || is_big_int_v<B> || is_big_int_v<Result>)
+        using Int = typename NumberTraits::ToInteger<ResultType>::Type;
-            throw Exception("GCD is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
+        return std::gcd(Int(a), Int(b));
        else
        {
            throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<A>::Type(a), typename NumberTraits::ToInteger<B>::Type(b));
            throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<B>::Type(b), typename NumberTraits::ToInteger<A>::Type(a));
            return std::gcd(
                typename NumberTraits::ToInteger<Result>::Type(a),
                typename NumberTraits::ToInteger<Result>::Type(b));
    }
    }
 #if USE_EMBEDDED_COMPILER
    static constexpr bool compilable = false; /// exceptions (and a non-trivial algorithm)
 #endif
 };
 struct NameGCD { static constexpr auto name = "gcd"; };
 using FunctionGCD = BinaryArithmeticOverloadResolver<GCDImpl, NameGCD, false>;
 }
--- a/src/Functions/lcm.cpp
+++ b/src/Functions/lcm.cpp
@ -1,10 +1,6 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionBinaryArithmetic.h>
-
+#include <Functions/GCDLCMImpl.h>
 #include <numeric>
 #include <limits>
 #include <type_traits>
 namespace
 {
@ -27,33 +23,21 @@ constexpr T abs(T value) noexcept
 namespace DB
 {
 namespace ErrorCodes
 {
    extern const int NOT_IMPLEMENTED;
 }
 namespace
 {
 struct NameLCM { static constexpr auto name = "lcm"; };
 template <typename A, typename B>
-struct LCMImpl
+struct LCMImpl : public GCDLCMImpl<A, B, LCMImpl<A, B>, NameLCM>
 {
-    using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<A, B>::Type;
+    using ResultType = typename GCDLCMImpl<A, B, LCMImpl<A, B>, NameLCM>::ResultType;
    static const constexpr bool allow_fixed_string = false;
-    template <typename Result = ResultType>
+    static ResultType applyImpl(A a, B b)
    static inline std::enable_if_t<is_big_int_v<A> || is_big_int_v<B> || is_big_int_v<Result>, Result>
    apply([[maybe_unused]] A a, [[maybe_unused]] B b)
    {
-        throw Exception("LCM is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
+        using Int = typename NumberTraits::ToInteger<ResultType>::Type;
-    }
+        using Unsigned = make_unsigned_t<Int>;
    template <typename Result = ResultType>
    static inline std::enable_if_t<!is_big_int_v<A> && !is_big_int_v<B> && !is_big_int_v<Result>, Result>
    apply([[maybe_unused]] A a, [[maybe_unused]] B b)
    {
        throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<A>::Type(a), typename NumberTraits::ToInteger<B>::Type(b));
        throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<B>::Type(b), typename NumberTraits::ToInteger<A>::Type(a));
        /** It's tempting to use std::lcm function.
          * But it has undefined behaviour on overflow.
@ -62,22 +46,14 @@ struct LCMImpl
          * (example: throw an exception or overflow in implementation specific way).
          */
        using Int = typename NumberTraits::ToInteger<Result>::Type;
        using Unsigned = make_unsigned_t<Int>;
        Unsigned val1 = abs<Int>(a) / std::gcd(Int(a), Int(b));
        Unsigned val2 = abs<Int>(b);
        /// Overflow in implementation specific way.
-        return Result(val1 * val2);
+        return ResultType(val1 * val2);
    }
 #if USE_EMBEDDED_COMPILER
    static constexpr bool compilable = false; /// exceptions (and a non-trivial algorithm)
 #endif
 };
 struct NameLCM { static constexpr auto name = "lcm"; };
 using FunctionLCM = BinaryArithmeticOverloadResolver<LCMImpl, NameLCM, false>;
 }
--- a/tests/queries/0_stateless/00515_gcd_lcm.reference
+++ b/tests/queries/0_stateless/00515_gcd_lcm.reference
@ -21,17 +21,3 @@
 4611686011984936962
 4611686011984936962
 2147483648
 256
 11
 64
 1
 2
 1
 1
 5120
 121
 256
 1
 4
 735
 64770
--- a/tests/queries/0_stateless/00515_gcd_lcm.sql
+++ b/tests/queries/0_stateless/00515_gcd_lcm.sql
@ -23,19 +23,19 @@ select lcm(255, 254);
 select lcm(2147483647, 2147483646);
 select lcm(4611686011984936962, 2147483647);
 select lcm(-2147483648, 1);
-- test gcd float will cast to int
+-- test gcd float
-select gcd(1280.1, 1024.1);
+select gcd(1280.1, 1024.1); -- { serverError 48 }
-select gcd(11.1, 121.1);
+select gcd(11.1, 121.1); -- { serverError 48 }
-select gcd(-256.1, 64.1);
+select gcd(-256.1, 64.1); -- { serverError 48 }
-select gcd(1.1, 1.1);
+select gcd(1.1, 1.1); -- { serverError 48 }
-select gcd(4.1, 2.1);
+select gcd(4.1, 2.1); -- { serverError 48 }
-select gcd(15.1, 49.1);
+select gcd(15.1, 49.1); -- { serverError 48 }
-select gcd(255.1, 254.1);
+select gcd(255.1, 254.1); -- { serverError 48 }
-- test lcm float cast to int
+-- test lcm float
-select lcm(1280.1, 1024.1);
+select lcm(1280.1, 1024.1); -- { serverError 48 }
-select lcm(11.1, 121.1);
+select lcm(11.1, 121.1); -- { serverError 48 }
-select lcm(-256.1, 64.1);
+select lcm(-256.1, 64.1); -- { serverError 48 }
-select lcm(1.1, 1.1);
+select lcm(1.1, 1.1); -- { serverError 48 }
-select lcm(4.1, 2.1);
+select lcm(4.1, 2.1); -- { serverError 48 }
-select lcm(15.1, 49.1);
+select lcm(15.1, 49.1); -- { serverError 48 }
-select lcm(255.1, 254.1);
+select lcm(255.1, 254.1); -- { serverError 48 }
--- a/tests/queries/0_stateless/01435_lcm_overflow.reference
+++ b/tests/queries/0_stateless/01435_lcm_overflow.reference
@ -5,4 +5,3 @@
 0
 0
 0
 0
--- a/tests/queries/0_stateless/01435_lcm_overflow.sql
+++ b/tests/queries/0_stateless/01435_lcm_overflow.sql
@ -6,5 +6,5 @@ SELECT lcm(-15, -10);
 -- Implementation specific result on overflow:
 SELECT ignore(lcm(256, 9223372036854775807));
 SELECT ignore(lcm(256, -9223372036854775807));
-SELECT ignore(lcm(-256, 9223372036854775807));
+SELECT ignore(lcm(-256, 9223372036854775807)); -- { serverError 407 }
 SELECT ignore(lcm(-256, -9223372036854775807));
--- a/tests/queries/0_stateless/01666_gcd_ubsan.reference
+++ b/tests/queries/0_stateless/01666_gcd_ubsan.reference
@ -0,0 +1,13 @@
 -- { echo }
 SELECT gcd(9223372036854775807, -9223372036854775808); -- { serverError 407 }
 SELECT gcd(9223372036854775808, -9223372036854775807); -- { serverError 407 }
 SELECT gcd(-9223372036854775808, 9223372036854775807); -- { serverError 407 }
 SELECT gcd(-9223372036854775807, 9223372036854775808); -- { serverError 407 }
 SELECT gcd(9223372036854775808, -1); -- { serverError 407 }
 SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 }
 SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720'));
 170141183460469231731687303715884105720
 SELECT gcd(-9223372036854775806, -9223372036854775806);
 9223372036854775806
--- a/tests/queries/0_stateless/01666_gcd_ubsan.sql
+++ b/tests/queries/0_stateless/01666_gcd_ubsan.sql
@ -0,0 +1,11 @@
 -- { echo }
 SELECT gcd(9223372036854775807, -9223372036854775808); -- { serverError 407 }
 SELECT gcd(9223372036854775808, -9223372036854775807); -- { serverError 407 }
 SELECT gcd(-9223372036854775808, 9223372036854775807); -- { serverError 407 }
 SELECT gcd(-9223372036854775807, 9223372036854775808); -- { serverError 407 }
 SELECT gcd(9223372036854775808, -1); -- { serverError 407 }
 SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 }
 SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720'));
 SELECT gcd(-9223372036854775806, -9223372036854775806);
--- a/tests/queries/0_stateless/01666_lcm_ubsan.reference
+++ b/tests/queries/0_stateless/01666_lcm_ubsan.reference
@ -0,0 +1,13 @@
 -- { echo }
 SELECT lcm(9223372036854775807, -9223372036854775808); -- { serverError 407 }
 SELECT lcm(9223372036854775808, -9223372036854775807); -- { serverError 407 }
 SELECT lcm(-9223372036854775808, 9223372036854775807); -- { serverError 407 }
 SELECT lcm(-9223372036854775807, 9223372036854775808); -- { serverError 407 }
 SELECT lcm(9223372036854775808, -1); -- { serverError 407 }
 SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 }
 SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720'));
 170141183460469231731687303715884105720
 SELECT lcm(-9223372036854775806, -9223372036854775806);
 9223372036854775806
--- a/tests/queries/0_stateless/01666_lcm_ubsan.sql
+++ b/tests/queries/0_stateless/01666_lcm_ubsan.sql
@ -0,0 +1,11 @@
 -- { echo }
 SELECT lcm(9223372036854775807, -9223372036854775808); -- { serverError 407 }
 SELECT lcm(9223372036854775808, -9223372036854775807); -- { serverError 407 }
 SELECT lcm(-9223372036854775808, 9223372036854775807); -- { serverError 407 }
 SELECT lcm(-9223372036854775807, 9223372036854775808); -- { serverError 407 }
 SELECT lcm(9223372036854775808, -1); -- { serverError 407 }
 SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 }
 SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 }
 SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720'));
 SELECT lcm(-9223372036854775806, -9223372036854775806);