Merge pull request #16135 from hexiaoting/dev_rankCorr

Fix inconsistent behaviour of rankCorr function
2024-11-21 15:12:02 +00:00 · 2020-10-22 01:15:50 +03:00 · 2020-10-22 01:15:50 +03:00 · cfaf82ba0c
commit cfaf82ba0c
parent 2ae32e3d5c 222a4d2e9b
1 changed files with 13 additions and 22 deletions
--- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
+++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.h
@ -21,10 +21,6 @@

 #include <type_traits>

-namespace ErrorCodes
-{
-extern const int BAD_ARGUMENTS;
-}

 namespace DB
 {
@ -138,23 +134,18 @@ public:
        const auto & value = this->data(place).values;
        size_t size = this->data(place).size_x;

-        if (size < 2)
-        {
-            throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS);
-        }
-
-        //create a copy of values not to format data
+        // create a copy of values not to format data
        PODArrayWithStackMemory<std::pair<Float64, Float64>, 32> tmp_values;
        tmp_values.resize(size);
        for (size_t j = 0; j < size; ++ j)
            tmp_values[j] = static_cast<std::pair<Float64, Float64>>(value[j]);

-        //sort x_values
+        // sort x_values
        std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairFirst<std::greater>{});

        for (size_t j = 0; j < size;)
        {
-            //replace x_values with their ranks
+            // replace x_values with their ranks
            size_t rank = j + 1;
            size_t same = 1;
            size_t cur_sum = rank;
@ -166,9 +157,9 @@ public:
                {
                    // rank of (j + 1)th number
                    rank += 1;
-                    same++;
+                    ++same;
                    cur_sum += rank;
-                    j++;
+                    ++j;
                }
                else
                    break;
@ -178,16 +169,16 @@ public:
            Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
            for (size_t i = cur_start; i <= j; ++i)
                tmp_values[i].first = insert_rank;
-            j++;
+            ++j;
        }

-        //sort y_values
+        // sort y_values
        std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairSecond<std::greater>{});

-        //replace y_values with their ranks
+        // replace y_values with their ranks
        for (size_t j = 0; j < size;)
        {
-            //replace x_values with their ranks
+            // replace x_values with their ranks
            size_t rank = j + 1;
            size_t same = 1;
            size_t cur_sum = rank;
@ -199,9 +190,9 @@ public:
                {
                    // rank of (j + 1)th number
                    rank += 1;
-                    same++;
+                    ++same;
                    cur_sum += rank;
-                    j++;
+                    ++j;
                }
                else
                {
@ -213,10 +204,10 @@ public:
            Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
            for (size_t i = cur_start; i <= j; ++i)
                tmp_values[i].second = insert_rank;
-            j++;
+            ++j;
        }

-        //count d^2 sum
+        // count d^2 sum
        Float64 answer = static_cast<Float64>(0);
        for (size_t j = 0; j < size; ++ j)
            answer += (tmp_values[j].first - tmp_values[j].second) * (tmp_values[j].first - tmp_values[j].second);