Merge pull request #38722 from ClickHouse/fix-rank-corr-functions

Fix a bug in `rankCorr` function
This commit is contained in:
Alexey Milovidov 2022-07-03 20:30:32 +03:00 committed by GitHub
commit bfc9ed6172
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 18 additions and 8 deletions

View File

@ -31,8 +31,8 @@ struct RankCorrelationData : public StatisticalSample<Float64, Float64>
RanksArray ranks_y;
std::tie(ranks_y, std::ignore) = computeRanksAndTieCorrection(this->y);
/// In our case sizes of both samples are equal.
const auto size = this->size_x;
/// Sizes can be non-equal due to skipped NaNs.
const auto size = std::min(this->size_x, this->size_y);
/// Count d^2 sum
Float64 answer = 0;

View File

@ -31,8 +31,8 @@ std::pair<RanksArray, Float64> computeRanksAndTieCorrection(const Values & value
/// Save initial positions, than sort indices according to the values.
std::vector<size_t> indexes(size);
std::iota(indexes.begin(), indexes.end(), 0);
::sort(indexes.begin(), indexes.end(),
[&] (size_t lhs, size_t rhs) { return values[lhs] < values[rhs]; });
std::sort(indexes.begin(), indexes.end(),
[&] (size_t lhs, size_t rhs) { return values[lhs] < values[rhs]; });
size_t left = 0;
Float64 tie_numenator = 0;
@ -74,12 +74,18 @@ struct StatisticalSample
void addX(X value, Arena * arena)
{
if (isNaN(value))
return;
++size_x;
x.push_back(value, arena);
}
void addY(Y value, Arena * arena)
{
if (isNaN(value))
return;
++size_y;
y.push_back(value, arena);
}

View File

@ -1,7 +1,9 @@
#!/usr/bin/env bash
# Tags: no-tsan
# Sometimes is takes longer than 60 seconds under TSan.
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
${CLICKHOUSE_LOCAL} -q "select col1, initializeAggregation('argMaxState', col2, insertTime) as col2, now() as insertTime FROM generateRandom('col1 String, col2 Array(Float64)') LIMIT 1000000 FORMAT CSV" | ${CLICKHOUSE_CURL} -s 'http://localhost:8123/?query=INSERT%20INTO%20non_existing_table%20SELECT%20col1%2C%20initializeAggregation(%27argMaxState%27%2C%20col2%2C%20insertTime)%20as%20col2%2C%20now()%20as%20insertTime%20FROM%20input(%27col1%20String%2C%20col2%20Array(Float64)%27)%20FORMAT%20CSV' --data-binary @- | grep -q "Table default.non_existing_table doesn't exist" && echo 'Ok.' || echo 'FAIL' ||:
${CLICKHOUSE_LOCAL} --query "select col1, initializeAggregation('argMaxState', col2, insertTime) as col2, now() as insertTime FROM generateRandom('col1 String, col2 Array(Float64)') LIMIT 1000000 FORMAT CSV" | ${CLICKHOUSE_CURL} -s 'http://localhost:8123/?query=INSERT%20INTO%20non_existing_table%20SELECT%20col1%2C%20initializeAggregation(%27argMaxState%27%2C%20col2%2C%20insertTime)%20as%20col2%2C%20now()%20as%20insertTime%20FROM%20input(%27col1%20String%2C%20col2%20Array(Float64)%27)%20FORMAT%20CSV' --data-binary @- | grep -q "Table default.non_existing_table doesn't exist" && echo 'Ok.' || echo 'FAIL' ||:

View File

@ -0,0 +1 @@
nan

View File

@ -0,0 +1 @@
SELECT rankCorr(number, nan) FROM numbers(10);