ClickHouse/dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h

196 lines
4.5 KiB
C++
Raw Normal View History

2019-03-13 07:22:57 +00:00
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
2019-03-22 07:57:33 +00:00
#include <Columns/ColumnVector.h>
2019-04-05 13:32:25 +00:00
#include <Columns/ColumnTuple.h>
2019-03-13 07:22:57 +00:00
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <limits>
2019-03-13 07:22:57 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
template <typename X, typename Y, typename Ret>
2019-03-22 07:57:33 +00:00
struct AggregateFunctionLeastSqrData final
2019-03-13 07:22:57 +00:00
{
size_t count = 0;
Ret sum_x = 0;
Ret sum_y = 0;
Ret sum_xx = 0;
Ret sum_xy = 0;
void add(X x, Y y)
{
count += 1;
sum_x += x;
sum_y += y;
sum_xx += x * x;
sum_xy += x * y;
}
void merge(const AggregateFunctionLeastSqrData & other)
{
count += other.count;
sum_x += other.sum_x;
sum_y += other.sum_y;
sum_xx += other.sum_xx;
sum_xy += other.sum_xy;
}
void serialize(WriteBuffer & buf) const
{
writeBinary(count, buf);
writeBinary(sum_x, buf);
writeBinary(sum_y, buf);
writeBinary(sum_xx, buf);
writeBinary(sum_xy, buf);
}
void deserialize(ReadBuffer & buf)
{
readBinary(count, buf);
readBinary(sum_x, buf);
readBinary(sum_y, buf);
readBinary(sum_xx, buf);
readBinary(sum_xy, buf);
}
Ret getK() const
{
Ret divisor = sum_xx * count - sum_x * sum_x;
if (divisor == 0)
return std::numeric_limits<Ret>::quiet_NaN();
return (sum_xy * count - sum_x * sum_y) / divisor;
2019-03-13 07:22:57 +00:00
}
Ret getB(Ret k) const
{
if (count == 0)
return std::numeric_limits<Ret>::quiet_NaN();
2019-03-13 07:22:57 +00:00
return (sum_y - k * sum_x) / count;
}
};
2019-04-05 13:32:25 +00:00
/// Calculates simple linear regression parameters.
/// Result is a tuple (k, b) for y = k * x + b equation, solved by least squares approximation.
2019-03-13 07:22:57 +00:00
template <typename X, typename Y, typename Ret = Float64>
class AggregateFunctionLeastSqr final : public IAggregateFunctionDataHelper<
AggregateFunctionLeastSqrData<X, Y, Ret>,
AggregateFunctionLeastSqr<X, Y, Ret>
>
{
public:
AggregateFunctionLeastSqr(
const DataTypes & arguments,
const Array & params
):
IAggregateFunctionDataHelper<
AggregateFunctionLeastSqrData<X, Y, Ret>,
AggregateFunctionLeastSqr<X, Y, Ret>
> {arguments, params}
{
// notice: arguments has been checked before
}
2019-03-22 07:57:33 +00:00
String getName() const override
{
return "leastSqr";
}
const char * getHeaderFilePath() const override
{
return __FILE__;
}
2019-03-13 07:22:57 +00:00
void add(
AggregateDataPtr place,
const IColumn ** columns,
size_t row_num,
Arena *
) const override
{
2019-03-22 07:57:33 +00:00
auto col_x {
static_cast<const ColumnVector<X> *>(columns[0])
};
auto col_y {
static_cast<const ColumnVector<Y> *>(columns[1])
};
X x = col_x->getData()[row_num];
Y y = col_y->getData()[row_num];
2019-03-13 07:22:57 +00:00
this->data(place).add(x, y);
}
void merge(
AggregateDataPtr place,
ConstAggregateDataPtr rhs, Arena *
) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(
ConstAggregateDataPtr place,
WriteBuffer & buf
) const override
{
this->data(place).serialize(buf);
}
void deserialize(
AggregateDataPtr place,
ReadBuffer & buf, Arena *
) const override
{
this->data(place).deserialize(buf);
}
DataTypePtr getReturnType() const override
{
DataTypes types {
2019-04-05 13:32:25 +00:00
std::make_shared<DataTypeNumber<Ret>>(),
std::make_shared<DataTypeNumber<Ret>>(),
2019-03-13 07:22:57 +00:00
};
Strings names {
"k",
"b",
};
return std::make_shared<DataTypeTuple>(
std::move(types),
std::move(names)
);
}
void insertResultInto(
ConstAggregateDataPtr place,
IColumn & to
) const override
{
Ret k = this->data(place).getK();
Ret b = this->data(place).getB(k);
2019-04-05 13:32:25 +00:00
auto & col_tuple = static_cast<ColumnTuple &>(to);
auto & col_k = static_cast<ColumnVector<Ret> &>(col_tuple.getColumn(0));
auto & col_b = static_cast<ColumnVector<Ret> &>(col_tuple.getColumn(1));
2019-03-13 07:22:57 +00:00
2019-04-05 13:32:25 +00:00
col_k.getData().push_back(k);
col_b.getData().push_back(b);
2019-03-13 07:22:57 +00:00
}
};
}