Merge pull request #23782 from ClickHouse/merging-array-product-function

Merging array product function
2024-11-21 15:12:02 +00:00 · 2021-05-01 00:53:57 +03:00 · 2021-05-01 00:53:57 +03:00 · fa1e9de7f7
commit fa1e9de7f7
parent 9f63320589 bcfcfe7fe9
3 changed files with 147 additions and 36 deletions
--- a/src/Functions/array/arrayAggregation.cpp
+++ b/src/Functions/array/arrayAggregation.cpp
@ -15,6 +15,8 @@ namespace ErrorCodes
 {
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
    extern const int ILLEGAL_COLUMN;
+    extern const int DECIMAL_OVERFLOW;
+    extern const int ARGUMENT_OUT_OF_BOUND;
 }

 enum class AggregateOperation
@ -22,7 +24,8 @@ enum class AggregateOperation
    min,
    max,
    sum,
-    average
+    average,
+    product
 };

 /**
@ -54,6 +57,12 @@ struct ArrayAggregateResultImpl<ArrayElement, AggregateOperation::average>
    using Result = Float64;
 };

+template <typename ArrayElement>
+struct ArrayAggregateResultImpl<ArrayElement, AggregateOperation::product>
+{
+    using Result = Float64;
+};
+
 template <typename ArrayElement>
 struct ArrayAggregateResultImpl<ArrayElement, AggregateOperation::sum>
 {
@ -86,7 +95,7 @@ struct ArrayAggregateImpl
            using Types = std::decay_t<decltype(types)>;
            using DataType = typename Types::LeftType;

-            if constexpr (aggregate_operation == AggregateOperation::average)
+            if constexpr (aggregate_operation == AggregateOperation::average || aggregate_operation == AggregateOperation::product)
            {
                result = std::make_shared<DataTypeFloat64>();

@ -124,17 +133,17 @@ struct ArrayAggregateImpl
    template <typename Element>
    static NO_SANITIZE_UNDEFINED bool executeType(const ColumnPtr & mapped, const ColumnArray::Offsets & offsets, ColumnPtr & res_ptr)
    {
-        using Result = ArrayAggregateResult<Element, aggregate_operation>;
+        using ResultType = ArrayAggregateResult<Element, aggregate_operation>;
        using ColVecType = std::conditional_t<IsDecimalNumber<Element>, ColumnDecimal<Element>, ColumnVector<Element>>;
-        using ColVecResult = std::conditional_t<IsDecimalNumber<Result>, ColumnDecimal<Result>, ColumnVector<Result>>;
+        using ColVecResultType = std::conditional_t<IsDecimalNumber<ResultType>, ColumnDecimal<ResultType>, ColumnVector<ResultType>>;

-        /// For average of array we return Float64 as result, but we want to keep precision
-        /// so we convert to Float64 as last step, but intermediate sum is represented as result of sum operation
-        static constexpr bool is_average_operation = aggregate_operation == AggregateOperation::average;
+        /// For average and product of array we return Float64 as result, but we want to keep precision
+        /// so we convert to Float64 as last step, but intermediate value is represented as result of sum operation
+        static constexpr bool is_average_or_product_operation = aggregate_operation == AggregateOperation::average ||
+            aggregate_operation == AggregateOperation::product;
        using SummAggregationType = ArrayAggregateResult<Element, AggregateOperation::sum>;

-        using AggregationType = std::conditional_t<is_average_operation, SummAggregationType, Result>;
-
+        using AggregationType = std::conditional_t<is_average_or_product_operation, SummAggregationType, ResultType>;

        const ColVecType * column = checkAndGetColumn<ColVecType>(&*mapped);

@ -147,18 +156,15 @@ struct ArrayAggregateImpl
                return false;

            const AggregationType x = column_const->template getValue<Element>(); // NOLINT
-            const typename ColVecType::Container & data
-                = checkAndGetColumn<ColVecType>(&column_const->getDataColumn())->getData();
+            const auto & data = checkAndGetColumn<ColVecType>(&column_const->getDataColumn())->getData();

-            typename ColVecResult::MutablePtr res_column;
+            typename ColVecResultType::MutablePtr res_column;
            if constexpr (IsDecimalNumber<Element>)
-            {
-                res_column = ColVecResult::create(offsets.size(), data.getScale());
-            }
+                res_column = ColVecResultType::create(offsets.size(), data.getScale());
            else
-                res_column = ColVecResult::create(offsets.size());
+                res_column = ColVecResultType::create(offsets.size());

-            typename ColVecResult::Container & res = res_column->getData();
+            auto & res = res_column->getData();

            size_t pos = 0;
            for (size_t i = 0; i < offsets.size(); ++i)
@ -178,13 +184,45 @@ struct ArrayAggregateImpl
                {
                    if constexpr (IsDecimalNumber<Element>)
                    {
-                        res[i] = DecimalUtils::convertTo<Result>(x, data.getScale());
+                        res[i] = DecimalUtils::convertTo<ResultType>(x, data.getScale());
                    }
                    else
                    {
                        res[i] = x;
                    }
                }
+                else if constexpr (aggregate_operation == AggregateOperation::product)
+                {
+                    size_t array_size = offsets[i] - pos;
+                    AggregationType product = x;
+
+                    if constexpr (IsDecimalNumber<Element>)
+                    {
+                        using T = decltype(x.value);
+                        T x_val = x.value;
+
+                        for (size_t array_index = 1; array_index < array_size; ++array_index)
+                        {
+                            T product_val = product.value;
+
+                            if (common::mulOverflow(x_val, product_val, product.value))
+                                throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow");
+                        }
+
+                        auto result_scale = data.getScale() * array_size;
+                        if (unlikely(result_scale > DecimalUtils::max_precision<AggregationType>))
+                            throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale {} is out of bounds", result_scale);
+
+                        res[i] = DecimalUtils::convertTo<ResultType>(product, data.getScale() * array_size);
+                    }
+                    else
+                    {
+                        for (size_t array_index = 1; array_index < array_size; ++array_index)
+                            product = product * x;
+
+                        res[i] = product;
+                    }
+                }

                pos = offsets[i];
            }
@ -193,30 +231,30 @@ struct ArrayAggregateImpl
            return true;
        }

-        const typename ColVecType::Container & data = column->getData();
+        const auto & data = column->getData();

-        typename ColVecResult::MutablePtr res_column;
+        typename ColVecResultType::MutablePtr res_column;
        if constexpr (IsDecimalNumber<Element>)
-            res_column = ColVecResult::create(offsets.size(), data.getScale());
+            res_column = ColVecResultType::create(offsets.size(), data.getScale());
        else
-            res_column = ColVecResult::create(offsets.size());
+            res_column = ColVecResultType::create(offsets.size());

-        typename ColVecResult::Container & res = res_column->getData();
+        typename ColVecResultType::Container & res = res_column->getData();

        size_t pos = 0;
        for (size_t i = 0; i < offsets.size(); ++i)
        {
-            AggregationType s = 0;
+            AggregationType aggregate_value = 0;

            /// Array is empty
            if (offsets[i] == pos)
            {
-                res[i] = s;
+                res[i] = aggregate_value;
                continue;
            }

            size_t count = 1;
-            s = data[pos]; // NOLINT
+            aggregate_value = data[pos]; // NOLINT
            ++pos;

            for (; pos < offsets[i]; ++pos)
@ -226,20 +264,36 @@ struct ArrayAggregateImpl
                if constexpr (aggregate_operation == AggregateOperation::sum ||
                            aggregate_operation == AggregateOperation::average)
                {
-                    s += element;
+                    aggregate_value += element;
                }
                else if constexpr (aggregate_operation == AggregateOperation::min)
                {
-                    if (element < s)
+                    if (element < aggregate_value)
                    {
-                        s = element;
+                        aggregate_value = element;
                    }
                }
                else if constexpr (aggregate_operation == AggregateOperation::max)
                {
-                    if (element > s)
+                    if (element > aggregate_value)
                    {
-                        s = element;
+                        aggregate_value = element;
+                    }
+                }
+                else if constexpr (aggregate_operation == AggregateOperation::product)
+                {
+                    if constexpr (IsDecimalNumber<Element>)
+                    {
+                        using AggregateValueDecimalUnderlyingValue = decltype(aggregate_value.value);
+                        AggregateValueDecimalUnderlyingValue current_aggregate_value = aggregate_value.value;
+                        AggregateValueDecimalUnderlyingValue element_value = static_cast<AggregateValueDecimalUnderlyingValue>(element.value);
+
+                        if (common::mulOverflow(current_aggregate_value, element_value, aggregate_value.value))
+                            throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow");
+                    }
+                    else
+                    {
+                        aggregate_value *= element;
                    }
                }

@ -250,17 +304,26 @@ struct ArrayAggregateImpl
            {
                if constexpr (IsDecimalNumber<Element>)
                {
-                    s = s / count;
-                    res[i] = DecimalUtils::convertTo<Result>(s, data.getScale());
+                    aggregate_value = aggregate_value / count;
+                    res[i] = DecimalUtils::convertTo<ResultType>(aggregate_value, data.getScale());
                }
                else
                {
-                    res[i] = static_cast<Result>(s) / count;
+                    res[i] = static_cast<ResultType>(aggregate_value) / count;
                }
            }
+            else if constexpr (aggregate_operation == AggregateOperation::product && IsDecimalNumber<Element>)
+            {
+                auto result_scale = data.getScale() * count;
+
+                if (unlikely(result_scale > DecimalUtils::max_precision<AggregationType>))
+                    throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale {} is out of bounds", result_scale);
+
+                res[i] = DecimalUtils::convertTo<ResultType>(aggregate_value, result_scale);
+            }
            else
            {
-                res[i] = s;
+                res[i] = aggregate_value;
            }
        }

@ -291,7 +354,7 @@ struct ArrayAggregateImpl
            executeType<Decimal128>(mapped, offsets, res))
            return res;
        else
-            throw Exception("Unexpected column for arraySum: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN);
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected column for arraySum: {}" + mapped->getName());
    }
 };

@ -307,12 +370,16 @@ using FunctionArraySum = FunctionArrayMapped<ArrayAggregateImpl<AggregateOperati
 struct NameArrayAverage { static constexpr auto name = "arrayAvg"; };
 using FunctionArrayAverage = FunctionArrayMapped<ArrayAggregateImpl<AggregateOperation::average>, NameArrayAverage>;

+struct NameArrayProduct { static constexpr auto name = "arrayProduct"; };
+using FunctionArrayProduct = FunctionArrayMapped<ArrayAggregateImpl<AggregateOperation::product>, NameArrayProduct>;
+
 void registerFunctionArrayAggregation(FunctionFactory & factory)
 {
    factory.registerFunction<FunctionArrayMin>();
    factory.registerFunction<FunctionArrayMax>();
    factory.registerFunction<FunctionArraySum>();
    factory.registerFunction<FunctionArrayAverage>();
+    factory.registerFunction<FunctionArrayProduct>();
 }

 }
--- a/tests/queries/0_stateless/01768_array_product.reference
+++ b/tests/queries/0_stateless/01768_array_product.reference
@ -0,0 +1,18 @@
+Array product with constant column
+720	Float64
+24	Float64
+3.5	Float64
+6	Float64
+Array product with non constant column
+24
+0
+6
+24
+0
+6
+Types of aggregation result array product
+Float64	Float64	Float64	Float64
+Float64	Float64	Float64	Float64
+Float64	Float64	Float64
+Float64	Float64
+Float64	Float64	Float64
--- a/tests/queries/0_stateless/01768_array_product.sql
+++ b/tests/queries/0_stateless/01768_array_product.sql
@ -0,0 +1,26 @@
+SELECT 'Array product with constant column';
+
+SELECT arrayProduct([1,2,3,4,5,6]) as a, toTypeName(a);
+SELECT arrayProduct(array(1.0,2.0,3.0,4.0)) as a, toTypeName(a);
+SELECT arrayProduct(array(1,3.5)) as a, toTypeName(a);
+SELECT arrayProduct([toDecimal64(1,8), toDecimal64(2,8), toDecimal64(3,8)]) as a, toTypeName(a);
+
+SELECT 'Array product with non constant column';
+
+DROP TABLE IF EXISTS test_aggregation;
+CREATE TABLE test_aggregation (x Array(Int)) ENGINE=TinyLog;
+INSERT INTO test_aggregation VALUES ([1,2,3,4]), ([]), ([1,2,3]);
+SELECT arrayProduct(x) FROM test_aggregation;
+DROP TABLE test_aggregation;
+
+CREATE TABLE test_aggregation (x Array(Decimal64(8))) ENGINE=TinyLog;
+INSERT INTO test_aggregation VALUES ([1,2,3,4]), ([]), ([1,2,3]);
+SELECT arrayProduct(x) FROM test_aggregation;
+DROP TABLE test_aggregation;
+
+SELECT 'Types of aggregation result array product';
+SELECT toTypeName(arrayProduct([toInt8(0)])), toTypeName(arrayProduct([toInt16(0)])), toTypeName(arrayProduct([toInt32(0)])), toTypeName(arrayProduct([toInt64(0)]));
+SELECT toTypeName(arrayProduct([toUInt8(0)])), toTypeName(arrayProduct([toUInt16(0)])), toTypeName(arrayProduct([toUInt32(0)])), toTypeName(arrayProduct([toUInt64(0)]));
+SELECT toTypeName(arrayProduct([toInt128(0)])), toTypeName(arrayProduct([toInt256(0)])), toTypeName(arrayProduct([toUInt256(0)]));
+SELECT toTypeName(arrayProduct([toFloat32(0)])), toTypeName(arrayProduct([toFloat64(0)]));
+SELECT toTypeName(arrayProduct([toDecimal32(0, 8)])), toTypeName(arrayProduct([toDecimal64(0, 8)])), toTypeName(arrayProduct([toDecimal128(0, 8)]));