mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Merge pull request #38722 from ClickHouse/fix-rank-corr-functions
Fix a bug in `rankCorr` function
This commit is contained in:
commit
bfc9ed6172
@ -31,8 +31,8 @@ struct RankCorrelationData : public StatisticalSample<Float64, Float64>
|
||||
RanksArray ranks_y;
|
||||
std::tie(ranks_y, std::ignore) = computeRanksAndTieCorrection(this->y);
|
||||
|
||||
/// In our case sizes of both samples are equal.
|
||||
const auto size = this->size_x;
|
||||
/// Sizes can be non-equal due to skipped NaNs.
|
||||
const auto size = std::min(this->size_x, this->size_y);
|
||||
|
||||
/// Count d^2 sum
|
||||
Float64 answer = 0;
|
||||
|
@ -31,8 +31,8 @@ std::pair<RanksArray, Float64> computeRanksAndTieCorrection(const Values & value
|
||||
/// Save initial positions, than sort indices according to the values.
|
||||
std::vector<size_t> indexes(size);
|
||||
std::iota(indexes.begin(), indexes.end(), 0);
|
||||
::sort(indexes.begin(), indexes.end(),
|
||||
[&] (size_t lhs, size_t rhs) { return values[lhs] < values[rhs]; });
|
||||
std::sort(indexes.begin(), indexes.end(),
|
||||
[&] (size_t lhs, size_t rhs) { return values[lhs] < values[rhs]; });
|
||||
|
||||
size_t left = 0;
|
||||
Float64 tie_numenator = 0;
|
||||
@ -74,12 +74,18 @@ struct StatisticalSample
|
||||
|
||||
void addX(X value, Arena * arena)
|
||||
{
|
||||
if (isNaN(value))
|
||||
return;
|
||||
|
||||
++size_x;
|
||||
x.push_back(value, arena);
|
||||
}
|
||||
|
||||
void addY(Y value, Arena * arena)
|
||||
{
|
||||
if (isNaN(value))
|
||||
return;
|
||||
|
||||
++size_y;
|
||||
y.push_back(value, arena);
|
||||
}
|
||||
|
@ -1,7 +1,9 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-tsan
|
||||
# Sometimes is takes longer than 60 seconds under TSan.
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
${CLICKHOUSE_LOCAL} -q "select col1, initializeAggregation('argMaxState', col2, insertTime) as col2, now() as insertTime FROM generateRandom('col1 String, col2 Array(Float64)') LIMIT 1000000 FORMAT CSV" | ${CLICKHOUSE_CURL} -s 'http://localhost:8123/?query=INSERT%20INTO%20non_existing_table%20SELECT%20col1%2C%20initializeAggregation(%27argMaxState%27%2C%20col2%2C%20insertTime)%20as%20col2%2C%20now()%20as%20insertTime%20FROM%20input(%27col1%20String%2C%20col2%20Array(Float64)%27)%20FORMAT%20CSV' --data-binary @- | grep -q "Table default.non_existing_table doesn't exist" && echo 'Ok.' || echo 'FAIL' ||:
|
||||
${CLICKHOUSE_LOCAL} --query "select col1, initializeAggregation('argMaxState', col2, insertTime) as col2, now() as insertTime FROM generateRandom('col1 String, col2 Array(Float64)') LIMIT 1000000 FORMAT CSV" | ${CLICKHOUSE_CURL} -s 'http://localhost:8123/?query=INSERT%20INTO%20non_existing_table%20SELECT%20col1%2C%20initializeAggregation(%27argMaxState%27%2C%20col2%2C%20insertTime)%20as%20col2%2C%20now()%20as%20insertTime%20FROM%20input(%27col1%20String%2C%20col2%20Array(Float64)%27)%20FORMAT%20CSV' --data-binary @- | grep -q "Table default.non_existing_table doesn't exist" && echo 'Ok.' || echo 'FAIL' ||:
|
||||
|
1
tests/queries/0_stateless/02347_rank_corr_nan.reference
Normal file
1
tests/queries/0_stateless/02347_rank_corr_nan.reference
Normal file
@ -0,0 +1 @@
|
||||
nan
|
1
tests/queries/0_stateless/02347_rank_corr_nan.sql
Normal file
1
tests/queries/0_stateless/02347_rank_corr_nan.sql
Normal file
@ -0,0 +1 @@
|
||||
SELECT rankCorr(number, nan) FROM numbers(10);
|
Loading…
Reference in New Issue
Block a user