mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #16135 from hexiaoting/dev_rankCorr
Fix inconsistent behaviour of rankCorr function
This commit is contained in:
commit
cfaf82ba0c
@ -21,10 +21,6 @@
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -138,23 +134,18 @@ public:
|
||||
const auto & value = this->data(place).values;
|
||||
size_t size = this->data(place).size_x;
|
||||
|
||||
if (size < 2)
|
||||
{
|
||||
throw Exception("Aggregate function " + getName() + " requires samples to be of size > 1", ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
|
||||
//create a copy of values not to format data
|
||||
// create a copy of values not to format data
|
||||
PODArrayWithStackMemory<std::pair<Float64, Float64>, 32> tmp_values;
|
||||
tmp_values.resize(size);
|
||||
for (size_t j = 0; j < size; ++ j)
|
||||
tmp_values[j] = static_cast<std::pair<Float64, Float64>>(value[j]);
|
||||
|
||||
//sort x_values
|
||||
// sort x_values
|
||||
std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairFirst<std::greater>{});
|
||||
|
||||
for (size_t j = 0; j < size;)
|
||||
{
|
||||
//replace x_values with their ranks
|
||||
// replace x_values with their ranks
|
||||
size_t rank = j + 1;
|
||||
size_t same = 1;
|
||||
size_t cur_sum = rank;
|
||||
@ -166,9 +157,9 @@ public:
|
||||
{
|
||||
// rank of (j + 1)th number
|
||||
rank += 1;
|
||||
same++;
|
||||
++same;
|
||||
cur_sum += rank;
|
||||
j++;
|
||||
++j;
|
||||
}
|
||||
else
|
||||
break;
|
||||
@ -178,16 +169,16 @@ public:
|
||||
Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
|
||||
for (size_t i = cur_start; i <= j; ++i)
|
||||
tmp_values[i].first = insert_rank;
|
||||
j++;
|
||||
++j;
|
||||
}
|
||||
|
||||
//sort y_values
|
||||
// sort y_values
|
||||
std::sort(std::begin(tmp_values), std::end(tmp_values), ComparePairSecond<std::greater>{});
|
||||
|
||||
//replace y_values with their ranks
|
||||
// replace y_values with their ranks
|
||||
for (size_t j = 0; j < size;)
|
||||
{
|
||||
//replace x_values with their ranks
|
||||
// replace x_values with their ranks
|
||||
size_t rank = j + 1;
|
||||
size_t same = 1;
|
||||
size_t cur_sum = rank;
|
||||
@ -199,9 +190,9 @@ public:
|
||||
{
|
||||
// rank of (j + 1)th number
|
||||
rank += 1;
|
||||
same++;
|
||||
++same;
|
||||
cur_sum += rank;
|
||||
j++;
|
||||
++j;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -213,10 +204,10 @@ public:
|
||||
Float64 insert_rank = static_cast<Float64>(cur_sum) / same;
|
||||
for (size_t i = cur_start; i <= j; ++i)
|
||||
tmp_values[i].second = insert_rank;
|
||||
j++;
|
||||
++j;
|
||||
}
|
||||
|
||||
//count d^2 sum
|
||||
// count d^2 sum
|
||||
Float64 answer = static_cast<Float64>(0);
|
||||
for (size_t j = 0; j < size; ++ j)
|
||||
answer += (tmp_values[j].first - tmp_values[j].second) * (tmp_values[j].first - tmp_values[j].second);
|
||||
|
Loading…
Reference in New Issue
Block a user