This commit is contained in:
nikitamikhaylov 2020-11-12 22:17:15 +03:00
parent de75c96a75
commit 02ce3ed4e7
7 changed files with 35 additions and 30 deletions

View File

@ -89,7 +89,8 @@ struct MannWhitneyData : public StatisticalSample<T, T>
private:
/// We need to compute ranks according to all samples. Use this class to avoid extra copy and memory allocation.
class ConcatenatedSamples {
class ConcatenatedSamples
{
public:
ConcatenatedSamples(const Sample & first_, const Sample & second_)
: first(first_), second(second_) {}
@ -128,7 +129,8 @@ public:
if (params.size() > 2)
throw Exception("Aggregate function " + getName() + " require two parameter or less", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (params.empty()) {
if (params.empty())
{
alternative = Alternative::TwoSided;
return;
}
@ -147,9 +149,8 @@ public:
throw Exception("Unknown parameter in aggregate function " + getName() +
". It must be one of: 'two sided', 'less', 'greater'", ErrorCodes::BAD_ARGUMENTS);
if (params.size() != 2) {
if (params.size() != 2)
return;
}
if (params[1].getType() != Field::Types::UInt64)
throw Exception("Aggregate function " + getName() + " require require second parameter to be a UInt64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

View File

@ -22,7 +22,8 @@ namespace DB
struct RankCorrelationData : public StatisticalSample<Float64, Float64>
{
Float64 getResult() {
Float64 getResult()
{
RanksArray ranks_x;
std::tie(ranks_x, std::ignore) = computeRanksAndTieCorrection(this->x);

View File

@ -29,7 +29,8 @@ static Float64 integrateSimpson(Float64 a, Float64 b, F && func)
using RanksArray = std::vector<Float64>;
template <typename Values>
std::pair<RanksArray, Float64> computeRanksAndTieCorrection(const Values & values) {
std::pair<RanksArray, Float64> computeRanksAndTieCorrection(const Values & values)
{
const size_t size = values.size();
/// Save initial positions, than sort indices according to the values.
std::vector<size_t> indexes(size);
@ -40,17 +41,16 @@ std::pair<RanksArray, Float64> computeRanksAndTieCorrection(const Values & value
size_t left = 0;
Float64 tie_numenator = 0;
RanksArray out(size);
while (left < size) {
while (left < size)
{
size_t right = left;
while (right < size && values[indexes[left]] == values[indexes[right]]) {
while (right < size && values[indexes[left]] == values[indexes[right]])
++right;
}
auto adjusted = (left + right + 1.) / 2.;
auto count_equal = right - left;
tie_numenator += std::pow(count_equal, 3) - count_equal;
for (size_t iter = left; iter < right; ++iter) {
for (size_t iter = left; iter < right; ++iter)
out[indexes[iter]] = adjusted;
}
left = right;
}
return {out, 1 - (tie_numenator / (std::pow(size, 3) - size))};
@ -71,12 +71,14 @@ struct StatisticalSample
size_t size_x{0};
size_t size_y{0};
void addX(X value, Arena * arena) {
void addX(X value, Arena * arena)
{
++size_x;
x.push_back(value, arena);
}
void addY(Y value, Arena * arena) {
void addY(Y value, Arena * arena)
{
++size_y;
y.push_back(value, arena);
}

View File

@ -20,8 +20,8 @@ TEST(Ranks, Simple)
ASSERT_EQ(ranks.size(), expected.size());
for (size_t i = 0; i < ranks.size(); ++i) {
for (size_t i = 0; i < ranks.size(); ++i)
ASSERT_DOUBLE_EQ(ranks[i], expected[i]);
}
ASSERT_DOUBLE_EQ(t, 0.9975296442687747);
}

View File

@ -29,6 +29,7 @@ SRCS(
AggregateFunctionHistogram.cpp
AggregateFunctionIf.cpp
AggregateFunctionMLMethod.cpp
AggregateFunctionMannWhitney.cpp
AggregateFunctionMaxIntersections.cpp
AggregateFunctionMerge.cpp
AggregateFunctionMinMaxAny.cpp