ClickHouse/src/AggregateFunctions/AggregateFunctionContingencyCoefficient.cpp

54 lines
1.3 KiB
C++
Raw Normal View History

2021-06-09 21:14:36 +00:00
#include <AggregateFunctions/AggregateFunctionFactory.h>
2022-01-02 18:50:41 +00:00
#include <AggregateFunctions/CrossTab.h>
2021-06-09 21:14:36 +00:00
#include <AggregateFunctions/FactoryHelpers.h>
#include <memory>
2022-01-02 18:50:41 +00:00
#include <cmath>
2021-06-09 21:14:36 +00:00
namespace DB
{
2022-01-02 18:50:41 +00:00
2021-06-09 21:14:36 +00:00
namespace
{
2022-01-03 13:45:02 +00:00
struct ContingencyData : CrossTabData
{
2022-01-03 18:39:04 +00:00
static const char * getName()
{
return "contingency";
}
2022-01-03 13:45:02 +00:00
Float64 getResult() const
2022-01-02 18:50:41 +00:00
{
2022-01-03 13:45:02 +00:00
if (count < 2)
return std::numeric_limits<Float64>::quiet_NaN();
2022-01-02 18:50:41 +00:00
2022-01-03 13:45:02 +00:00
Float64 phi = 0.0;
for (const auto & [key, value_ab] : count_ab)
2022-01-02 18:50:41 +00:00
{
2022-01-03 13:45:02 +00:00
Float64 value_a = count_a.at(key.items[0]);
Float64 value_b = count_b.at(key.items[1]);
2022-01-02 18:50:41 +00:00
2022-01-03 13:45:02 +00:00
phi += value_ab * value_ab / (value_a * value_b) * count - 2 * value_ab + (value_a * value_b) / count;
}
phi /= count;
2022-01-02 18:50:41 +00:00
2022-01-03 13:45:02 +00:00
return sqrt(phi / (phi + count));
}
};
2022-01-02 19:42:56 +00:00
}
2021-06-09 21:14:36 +00:00
2022-01-02 18:50:41 +00:00
void registerAggregateFunctionContingency(AggregateFunctionFactory & factory)
2021-06-09 21:14:36 +00:00
{
2022-01-02 18:50:41 +00:00
factory.registerFunction(ContingencyData::getName(),
[](const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
2022-01-02 18:50:41 +00:00
{
2022-01-03 18:39:04 +00:00
assertBinary(name, argument_types);
2022-01-02 18:50:41 +00:00
assertNoParameters(name, parameters);
return std::make_shared<AggregateFunctionCrossTab<ContingencyData>>(argument_types);
});
2021-06-09 21:14:36 +00:00
}
}