changes in weights updater

This commit is contained in:
quid 2019-03-03 11:46:36 +03:00
parent e81200ba57
commit 3fa972e6b6
5 changed files with 343 additions and 159 deletions

View File

@ -42,7 +42,6 @@ AggregateFunctionPtr createAggregateFunctionMLMethod(
}
/// Gradient_Computer for LinearRegression has LinearRegression gradient computer
if (std::is_same<Method, FuncLinearRegression>::value)
{
gc = std::make_shared<LinearRegression>(argument_types.size());
@ -64,11 +63,12 @@ AggregateFunctionPtr createAggregateFunctionMLMethod(
} else if (applyVisitor(FieldVisitorConvertToNumber<UInt32>(), parameters[2]) == Float64{3.0})
{
wu = std::make_shared<Nesterov>();
} else if (applyVisitor(FieldVisitorConvertToNumber<UInt32>(), parameters[2]) == Float64{4.0})
{
wu = std::make_shared<Adam>();
} else
{
/// Adam should be here
wu = std::make_shared<Nesterov>();
} else {
throw Exception("Such weights updater is not implemented yet", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
} else

View File

@ -29,52 +29,66 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
/**
IGradientComputer class computes gradient according to its loss function
and stores mini-batch
*/
class IGradientComputer
{
public:
IGradientComputer(UInt32 sz)
: batch_gradient(sz, 0)
{}
IGradientComputer(UInt32 sz) { std::ignore = sz;// : batch_gradient(sz, 0) {
}
virtual ~IGradientComputer() = default;
virtual void compute(const std::vector<Float64> & weights, Float64 bias, Float64 learning_rate,
Float64 target, const IColumn ** columns, size_t row_num) = 0;
/// Adds to batch_gradient computed gradient in point (weigts, bias) using corresponding loss function
virtual void compute(std::vector<Float64> * batch_gradient, const std::vector<Float64> &weights, Float64 bias,
Float64 learning_rate, Float64 target, const IColumn **columns, size_t row_num) = 0;
void reset()
{
batch_gradient.assign(batch_gradient.size(), 0);
}
// void reset()
// {
// batch_gradient.assign(batch_gradient.size(), 0);
// }
void write(WriteBuffer & buf) const
{
writeBinary(batch_gradient, buf);
}
// void write(WriteBuffer &buf) const
// {
// writeBinary(batch_gradient, buf);
// }
//
// void read(ReadBuffer &buf)
// {
// readBinary(batch_gradient, buf);
// }
void read(ReadBuffer & buf)
{
readBinary(batch_gradient, buf);
}
// const std::vector<Float64> &get() const
// {
// return batch_gradient;
// }
const std::vector<Float64> & get() const
{
return batch_gradient;
}
virtual Float64 predict(const std::vector<Float64> & predict_feature, const std::vector<Float64> & weights, Float64 bias) const = 0;
virtual void predict_for_all(ColumnVector<Float64>::Container & container, Block & block, const ColumnNumbers & arguments, const std::vector<Float64> & weights, Float64 bias) const = 0;
virtual Float64 predict(const std::vector<Float64> &predict_feature,
const std::vector<Float64> &weights,
Float64 bias) const = 0;
protected:
std::vector<Float64> batch_gradient; // gradient for bias lies in batch_gradient[batch_gradient.size() - 1]
/// Now we should use predict_for_all function instead of predict
virtual void predict_for_all(ColumnVector<Float64>::Container &container,
Block &block, const ColumnNumbers &arguments,
const std::vector<Float64> &weights,
Float64 bias) const = 0;
//protected:
// std::vector<Float64> batch_gradient; // gradient for bias lies in batch_gradient[batch_gradient.size() - 1]
};
class LinearRegression : public IGradientComputer
{
public:
LinearRegression(UInt32 sz)
: IGradientComputer(sz)
{}
: IGradientComputer(sz) {
}
void compute(const std::vector<Float64> & weights, Float64 bias, Float64 learning_rate,
Float64 target, const IColumn ** columns, size_t row_num) override
void compute(std::vector<Float64> * batch_gradient, const std::vector<Float64> &weights, Float64 bias,
Float64 learning_rate, Float64 target, const IColumn **columns, size_t row_num) override
{
Float64 derivative = (target - bias);
for (size_t i = 0; i < weights.size(); ++i)
@ -83,20 +97,19 @@ public:
}
derivative *= (2 * learning_rate);
batch_gradient[weights.size()] += derivative;
(*batch_gradient)[weights.size()] += derivative;
for (size_t i = 0; i < weights.size(); ++i)
{
batch_gradient[i] += derivative * static_cast<const ColumnVector<Float64> &>(*columns[i]).getData()[row_num];;
(*batch_gradient)[i] +=
derivative * static_cast<const ColumnVector<Float64> &>(*columns[i]).getData()[row_num];
}
}
Float64 predict(const std::vector<Float64> & predict_feature, const std::vector<Float64> & weights, Float64 bias) const override
Float64 predict(const std::vector<Float64> &predict_feature,
const std::vector<Float64> &weights, Float64 bias) const override
{
/// не обновляем веса при предикте, т.к. это может замедлить предсказание
/// однако можно например обновлять их при каждом мердже не зависимо от того, сколько элементнов в батче
// if (cur_batch)
// {
// update_weights();
// }
Float64 res{0.0};
for (size_t i = 0; i < predict_feature.size(); ++i)
@ -107,7 +120,11 @@ public:
return res;
}
void predict_for_all(ColumnVector<Float64>::Container & container, Block & block, const ColumnNumbers & arguments, const std::vector<Float64> & weights, Float64 bias) const override
void predict_for_all(ColumnVector<Float64>::Container &container,
Block &block,
const ColumnNumbers &arguments,
const std::vector<Float64> &weights, Float64 bias) const override
{
size_t rows_num = block.rows();
std::vector<Float64> results(rows_num, bias);
@ -134,6 +151,7 @@ public:
}
};
class LogisticRegression : public IGradientComputer
{
public:
@ -141,31 +159,32 @@ public:
: IGradientComputer(sz)
{}
void compute(const std::vector<Float64> & weights, Float64 bias, Float64 learning_rate,
Float64 target, const IColumn ** columns, size_t row_num) override
void compute(std::vector<Float64> * batch_gradient, const std::vector<Float64> &weights, Float64 bias,
Float64 learning_rate, Float64 target, const IColumn **columns, size_t row_num) override
{
Float64 derivative = bias;
for (size_t i = 0; i < weights.size(); ++i)
{
derivative += weights[i] * static_cast<const ColumnVector<Float64> &>(*columns[i]).getData()[row_num];;
derivative += weights[i] * static_cast<const ColumnVector<Float64> &>(*columns[i]).getData()[row_num];
}
derivative *= target;
derivative = exp(derivative);
derivative = exp(derivative);
batch_gradient[weights.size()] += learning_rate * target / (derivative + 1);;
(*batch_gradient)[weights.size()] += learning_rate * target / (derivative + 1);;
for (size_t i = 0; i < weights.size(); ++i)
{
batch_gradient[i] += learning_rate * target / (derivative + 1) * static_cast<const ColumnVector<Float64> &>(*columns[i]).getData()[row_num];
(*batch_gradient)[i] +=
learning_rate * target *
static_cast<const ColumnVector<Float64> &>(*columns[i]).getData()[row_num]
/ (derivative + 1);
}
}
Float64 predict(const std::vector<Float64> & predict_feature, const std::vector<Float64> & weights, Float64 bias) const override
Float64 predict(const std::vector<Float64> &predict_feature,
const std::vector<Float64> &weights, Float64 bias) const override
{
/// не обновляем веса при предикте, т.к. это может замедлить предсказание
/// однако можно например обновлять их при каждом мердже не зависимо от того, сколько элементнов в батче
// if (cur_batch)
// {
// update_weights();
// }
Float64 res{0.0};
for (size_t i = 0; i < predict_feature.size(); ++i)
@ -176,7 +195,11 @@ public:
res = 1 / (1 + exp(-res));
return res;
}
void predict_for_all(ColumnVector<Float64>::Container & container, Block & block, const ColumnNumbers & arguments, const std::vector<Float64> & weights, Float64 bias) const override
void predict_for_all(ColumnVector<Float64>::Container & container,
Block & block,
const ColumnNumbers & arguments,
const std::vector<Float64> & weights, Float64 bias) const override
{
size_t rows_num = block.rows();
std::vector<Float64> results(rows_num, bias);
@ -186,7 +209,6 @@ public:
ColumnPtr cur_col = block.getByPosition(arguments[i]).column;
for (size_t row_num = 0; row_num != rows_num; ++row_num)
{
const auto &element = (*cur_col)[row_num];
if (element.getType() != Field::Types::Float64)
throw Exception("Prediction arguments must be values of type Float",
@ -197,124 +219,226 @@ public:
}
for (size_t row_num = 0; row_num != rows_num; ++row_num)
{
results[row_num] = 1 / (1 + exp(-results[row_num]));
container.emplace_back(results[row_num]);
container.emplace_back(1 / (1 + exp(-results[row_num])));
}
}
};
/**
* IWeightsUpdater class defines the way to update current state
* and uses GradientComputer on each iteration
*/
class IWeightsUpdater
{
public:
virtual ~IWeightsUpdater() = default;
virtual void update(UInt32 batch_size, std::vector<Float64> & weights, Float64 & bias, const std::vector<Float64> & gradient) = 0;
virtual void merge(const IWeightsUpdater &, Float64, Float64) {}
virtual std::vector<Float64> get_update(UInt32 sz, UInt32) {
return std::vector<Float64>(sz, 0.0);
virtual void add_to_batch(std::vector<Float64> * batch_gradient, std::shared_ptr<IGradientComputer> gc,
const std::vector<Float64> & weights, Float64 bias,
Float64 learning_rate, Float64 target, const IColumn **columns, size_t row_num)
{
gc->compute(batch_gradient, weights, bias, learning_rate, target, columns, row_num);
}
virtual void update(UInt32 batch_size,
std::vector<Float64> & weights, Float64 & bias,
const std::vector<Float64> & gradient) = 0;
virtual void merge(const IWeightsUpdater &, Float64, Float64)
{}
virtual void write(WriteBuffer &) const
{}
virtual void read(ReadBuffer &)
{}
// virtual std::vector<Float64> get_update(UInt32 sz, UInt32)
// {
// return std::vector<Float64>(sz, 0.0);
// }
};
class StochasticGradientDescent : public IWeightsUpdater
{
public:
void update(UInt32 batch_size, std::vector<Float64> & weights, Float64 & bias, const std::vector<Float64> & batch_gradient) override {
void update(UInt32 batch_size,
std::vector<Float64> & weights, Float64 & bias,
const std::vector<Float64> & batch_gradient) override
{
/// batch_size is already checked to be greater than 0
for (size_t i = 0; i < weights.size(); ++i)
{
weights[i] += batch_gradient[i] / batch_size;
}
bias += batch_gradient[weights.size()] / batch_size;
// batch_gradient->assign(batch_gradient->size(), Float64{0.0});
}
};
class Momentum : public IWeightsUpdater
{
public:
Momentum() {}
Momentum (Float64 alpha) : alpha_(alpha) {}
void update(UInt32 batch_size, std::vector<Float64> & weights, Float64 & bias, const std::vector<Float64> & batch_gradient) override {
/// batch_size is already checked to be greater than 0
Momentum()
{}
if (accumulated_gradient.size() == 0)
Momentum(Float64 alpha) : alpha_(alpha)
{}
void update(UInt32 batch_size,
std::vector<Float64> & weights, Float64 & bias,
const std::vector<Float64> & batch_gradient) override
{
/// batch_size is already checked to be greater than 0
if (accumulated_gradient.empty())
{
accumulated_gradient.resize(batch_gradient.size(), Float64{0.0});
}
for (size_t i = 0; i < batch_gradient.size(); ++i)
{
accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + batch_gradient[i];
accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + batch_gradient[i] / batch_size;
}
for (size_t i = 0; i < weights.size(); ++i)
{
weights[i] += accumulated_gradient[i] / batch_size;
weights[i] += accumulated_gradient[i];
}
bias += accumulated_gradient[weights.size()] / batch_size;
bias += accumulated_gradient[weights.size()];
}
virtual void merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) override {
auto & momentum_rhs = static_cast<const Momentum &>(rhs);
virtual void merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) override
{
auto &momentum_rhs = static_cast<const Momentum &>(rhs);
for (size_t i = 0; i < accumulated_gradient.size(); ++i)
{
accumulated_gradient[i] = accumulated_gradient[i] * frac + momentum_rhs.accumulated_gradient[i] * rhs_frac;
accumulated_gradient[i] = accumulated_gradient[i] * frac +
momentum_rhs.accumulated_gradient[i] * rhs_frac;
}
}
void write(WriteBuffer &buf) const override
{
writeBinary(accumulated_gradient, buf);
}
void read(ReadBuffer &buf) override
{
readBinary(accumulated_gradient, buf);
}
private:
Float64 alpha_{0.1};
std::vector<Float64> accumulated_gradient;
};
class Nesterov : public IWeightsUpdater
{
public:
Nesterov() {}
Nesterov (Float64 alpha) : alpha_(alpha) {}
void update(UInt32 batch_size, std::vector<Float64> & weights, Float64 & bias, const std::vector<Float64> & batch_gradient) override {
if (accumulated_gradient.size() == 0)
Nesterov()
{}
Nesterov(Float64 alpha) : alpha_(alpha)
{}
void add_to_batch(std::vector<Float64> * batch_gradient, std::shared_ptr<IGradientComputer> gc,
const std::vector<Float64> & weights, Float64 bias,
Float64 learning_rate, Float64 target, const IColumn ** columns, size_t row_num) override
{
if (accumulated_gradient.empty())
{
accumulated_gradient.resize(batch_gradient->size(), Float64{0.0});
}
std::vector<Float64> shifted_weights(weights.size());
for (size_t i = 0; i != shifted_weights.size(); ++i)
{
shifted_weights[i] = weights[i] + accumulated_gradient[i] * alpha_;
}
auto shifted_bias = bias + accumulated_gradient[weights.size()] * alpha_;
gc->compute(batch_gradient, shifted_weights, shifted_bias, learning_rate, target, columns, row_num);
}
void update(UInt32 batch_size,
std::vector<Float64> & weights, Float64 & bias,
const std::vector<Float64> & batch_gradient) override
{
if (accumulated_gradient.empty())
{
accumulated_gradient.resize(batch_gradient.size(), Float64{0.0});
}
for (size_t i = 0; i < batch_gradient.size(); ++i)
{
accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + batch_gradient[i];
accumulated_gradient[i] = accumulated_gradient[i] * alpha_ + batch_gradient[i] / batch_size;
}
for (size_t i = 0; i < weights.size(); ++i)
{
weights[i] += accumulated_gradient[i] / batch_size;
weights[i] += accumulated_gradient[i];
}
bias += accumulated_gradient[weights.size()] / batch_size;
std::cout<<"BIAS " << bias<<'\n';
bias += accumulated_gradient[weights.size()];
}
virtual void merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) override {
virtual void merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) override
{
auto & nesterov_rhs = static_cast<const Nesterov &>(rhs);
for (size_t i = 0; i < accumulated_gradient.size(); ++i)
{
accumulated_gradient[i] = accumulated_gradient[i] * frac + nesterov_rhs.accumulated_gradient[i] * rhs_frac;
accumulated_gradient[i] =
accumulated_gradient[i] * frac + nesterov_rhs.accumulated_gradient[i] * rhs_frac;
}
}
virtual std::vector<Float64> get_update(UInt32 sz, UInt32 batch_size) override {
if (accumulated_gradient.size() == 0)
{
accumulated_gradient.resize(sz, Float64{0.0});
return accumulated_gradient;
}
std::vector<Float64> delta(accumulated_gradient.size());
// std::cout<<"\n\nHK\n\n";
for (size_t i = 0; i < delta.size(); ++i)
{
delta[i] = accumulated_gradient[i] * alpha_ / batch_size;
}
return delta;
}
Float64 alpha_{0.1};
std::vector<Float64> accumulated_gradient;
void write(WriteBuffer &buf) const override
{
writeBinary(accumulated_gradient, buf);
}
void read(ReadBuffer &buf) override
{
readBinary(accumulated_gradient, buf);
}
// virtual std::vector<Float64> get_update(UInt32 sz, UInt32 batch_size) override
// {
// if (accumulated_gradient.size() == 0)
// {
// accumulated_gradient.resize(sz, Float64{0.0});
// return accumulated_gradient;
// }
// std::vector<Float64> delta(accumulated_gradient.size());
// // std::cout<<"\n\nHK\n\n";
// for (size_t i = 0; i < delta.size(); ++i)
// {
// delta[i] = accumulated_gradient[i] * alpha_ / batch_size;
// }
// return delta;
// }
private:
Float64 alpha_{0.1};
std::vector<Float64> accumulated_gradient;
};
// TODO: проверить после изменения логики моментума
/*
class Adam : public IWeightsUpdater
{
public:
Adam() {}
Adam (Float64 betta1, Float64 betta2) : betta1_(betta1), betta2_(betta2), betta1t_(betta1), betta2t_(betta2) {}
void update(UInt32 cur_batch, std::vector<Float64> & weights, Float64 & bias, const std::vector<Float64> & batch_gradient) override {
Adam()
{}
Adam(Float64 betta1, Float64 betta2) : betta1_(betta1), betta2_(betta2), betta1t_(betta1), betta2t_(betta2)
{}
void update(UInt32 cur_batch,
std::vector<Float64> & weights, Float64 & bias,
std::vector<Float64> * batch_gradient) override
{
if (mt_.size() == 0)
{
mt_.resize(batch_gradient.size(), Float64{0.0});
@ -325,11 +449,13 @@ public:
{
mt_[i] = mt_[i] * betta1_ + (1 - betta1_) * batch_gradient[i];
vt_[i] = vt_[i] * betta2_ + (1 - betta2_) * batch_gradient[i] * batch_gradient[i];
if (t < 8) {
if (t < 8)
{
mt_[i] = mt_[i] / (1 - betta1t_);
betta1t_ *= betta1_;
}
if (t < 850) {
if (t < 850)
{
vt_[i] = vt_[i] / (1 - betta2t_);
betta2t_ *= betta2_;
}
@ -341,23 +467,33 @@ public:
bias += (mt_[weights.size()] / (sqrt(vt_[weights.size()] + eps))) / cur_batch;
t += 1;
}
virtual void merge(const IWeightsUpdater & rhs, Float64 frac, Float64 rhs_frac) override {
auto & adam_rhs = static_cast<const Adam &>(rhs);
virtual void merge(const IWeightsUpdater &rhs, Float64 frac, Float64 rhs_frac) override
{
auto &adam_rhs = static_cast<const Adam &>(rhs);
for (size_t i = 0; i < mt_.size(); ++i)
{
mt_[i] = mt_[i] * frac + adam_rhs.mt_[i] * rhs_frac;
vt_[i] = vt_[i] * frac + adam_rhs.vt_[i] * rhs_frac;
}
}
Float64 betta1_{0.2};
Float64 betta2_{0.3};
Float64 betta1t_{0.3};
Float64 betta2t_{0.3};
UInt32 t = 0;
std::vector<Float64> mt_;
std::vector<Float64> vt_;
};
private:
Float64 betta1_{0.2};
Float64 betta2_{0.3};
Float64 betta1t_{0.3};
Float64 betta2t_{0.3};
UInt32 t = 0;
std::vector<Float64> mt_;
std::vector<Float64> vt_;
};
*/
/**
* LinearModelData is a class which manages current state of learning
* and is stored as AggregateFunctionState
*/
class LinearModelData
{
public:
@ -365,33 +501,53 @@ public:
{}
LinearModelData(Float64 learning_rate,
UInt32 param_num,
UInt32 batch_capacity,
std::shared_ptr<IGradientComputer> gc,
std::shared_ptr<IWeightsUpdater> wu)
: learning_rate(learning_rate),
batch_capacity(batch_capacity),
gradient_computer(std::move(gc)),
weights_updater(std::move(wu))
UInt32 param_num,
UInt32 batch_capacity,
std::shared_ptr<IGradientComputer> gc,
std::shared_ptr<IWeightsUpdater> wu)
: learning_rate(learning_rate),
batch_capacity(batch_capacity),
batch_size(0),
gradient_computer(std::move(gc)),
weights_updater(std::move(wu))
{
weights.resize(param_num, Float64{0.0});
batch_size = 0;
gradient_batch.resize(param_num + 1, Float64{0.0});
}
void add(const IColumn ** columns, size_t row_num)
void add(const IColumn **columns, size_t row_num)
{
/// first column stores target; features start from (columns + 1)
const auto & target = static_cast<const ColumnVector<Float64> &>(*columns[0]).getData()[row_num];
auto delta = weights_updater->get_update(weights.size() + 1, batch_capacity);
Float64 delta_bias = bias + delta[weights.size()];
delta.resize(weights.size());
for (size_t i = 0; i < weights.size(); ++i) {
delta[i] += weights[i];
}
gradient_computer->compute(delta, delta_bias, learning_rate, target, columns + 1, row_num);
// gradient_computer->compute(weights, bias, learning_rate, target, columns + 1, row_num);
const auto &target = static_cast<const ColumnVector<Float64> &>(*columns[0]).getData()[row_num];
// auto delta = weights_updater->get_update(weights.size() + 1, batch_capacity);
// Float64 delta_bias = bias + delta[weights.size()];
// delta.resize(weights.size());
// for (size_t i = 0; i < weights.size(); ++i)
// {
// delta[i] += weights[i];
// }
// gradient_computer->compute(delta, delta_bias, learning_rate, target, columns + 1, row_num);
// gradient_computer->compute(weights, bias, learning_rate, target, columns + 1, row_num);
std::cout << "\nBATCH BEFORE\n";
for (auto i : gradient_batch)
std::cout << i << " ";
std::cout << "\nhello\n";
weights_updater->add_to_batch(&gradient_batch, gradient_computer,
weights, bias, learning_rate, target, columns, row_num);
std::cout << "BATCH AFTER\n";
for (auto i : gradient_batch)
std::cout << i << " ";
std::cout << "\nhello\n\n";
if (iter_num == 10)
exit(1);
++batch_size;
if (batch_size == batch_capacity)
{
@ -399,7 +555,7 @@ public:
}
}
void merge(const LinearModelData & rhs)
void merge(const LinearModelData &rhs)
{
if (iter_num == 0 && rhs.iter_num == 0)
return;
@ -421,63 +577,90 @@ public:
weights_updater->merge(*rhs.weights_updater, frac, rhs_frac);
}
void write(WriteBuffer & buf) const
void write(WriteBuffer &buf) const
{
writeBinary(bias, buf);
writeBinary(weights, buf);
writeBinary(iter_num, buf);
writeBinary(gradient_batch, buf);
writeBinary(batch_size, buf);
gradient_computer->write(buf);
weights_updater->write(buf);
// gradient_computer->write(buf);
}
void read(ReadBuffer & buf)
void read(ReadBuffer &buf)
{
readBinary(bias, buf);
readBinary(weights, buf);
readBinary(iter_num, buf);
readBinary(gradient_batch, buf);
readBinary(batch_size, buf);
gradient_computer->read(buf);
weights_updater->read(buf);
// gradient_computer->read(buf);
}
Float64 predict(const std::vector<Float64> & predict_feature) const
Float64 predict(const std::vector<Float64> &predict_feature) const
{
/// не обновляем веса при предикте, т.к. это может замедлить предсказание
/// однако можно например обновлять их при каждом мердже не зависимо от того, сколько элементнов в батче
/// однако можно например обновлять их при каждом мердже независимо от того, сколько элементнов в батче
// if (cur_batch)
// {
// update_weights();
// }
std::cout << "\n\nWEIGHTS: ";
for (size_t i = 0; i != weights.size(); ++i) {
std::cout << weights[i] << " ";
}
std::cout << "\n\n";
return gradient_computer->predict(predict_feature, weights, bias);
}
void predict_for_all(ColumnVector<Float64>::Container & container, Block & block, const ColumnNumbers & arguments) const
void predict_for_all(ColumnVector<Float64>::Container &container, Block &block, const ColumnNumbers &arguments) const
{
std::cout << "\n\nWEIGHTS: ";
for (size_t i = 0; i != weights.size(); ++i) {
std::cout << weights[i] << " ";
}
std::cout << "\n\n";
gradient_computer->predict_for_all(container, block, arguments, weights, bias);
}
private:
std::vector<Float64> weights;
Float64 bias{0.0};
Float64 learning_rate;
UInt32 batch_capacity;
Float64 bias{0.0};
UInt32 iter_num = 0;
std::vector<Float64> gradient_batch;
UInt32 batch_size;
std::shared_ptr<IGradientComputer> gradient_computer;
std::shared_ptr<IWeightsUpdater> weights_updater;
/**
* The function is called when we want to flush current batch and make a step with it
*/
void update_state()
{
if (batch_size == 0)
return;
weights_updater->update(batch_size, weights, bias, gradient_computer->get());
// weights_updater->update(batch_size, weights, bias, gradient_batch);
// /// use pointer to gradient_batch, because some methods (e.g. simple stochastic descent) require to reset it
weights_updater->update(batch_size, weights, bias, gradient_batch);
batch_size = 0;
++iter_num;
gradient_computer->reset();
//TODO ask: для нестерова и адама не очень. Нужно добавить другую функцию
gradient_batch.assign(gradient_batch.size(), Float64{0.0});
}
};
template <
/// Implemented Machine Learning method
typename Data,
@ -490,19 +673,19 @@ public:
String getName() const override { return Name::name; }
explicit AggregateFunctionMLMethod(UInt32 param_num,
std::shared_ptr<IGradientComputer> gradient_computer,
std::shared_ptr<IWeightsUpdater> weights_updater,
Float64 learning_rate,
UInt32 batch_size,
const DataTypes & argument_types,
const Array & params)
std::shared_ptr<IGradientComputer> gradient_computer,
std::shared_ptr<IWeightsUpdater> weights_updater,
Float64 learning_rate,
UInt32 batch_size,
const DataTypes & argument_types,
const Array & params)
: IAggregateFunctionDataHelper<Data, AggregateFunctionMLMethod<Data, Name>>(argument_types, params),
param_num(param_num),
learning_rate(learning_rate),
batch_size(batch_size),
gc(std::move(gradient_computer)),
wu(std::move(weights_updater))
{}
wu(std::move(weights_updater)) {
}
DataTypePtr getReturnType() const override
{
@ -538,7 +721,8 @@ public:
void predictResultInto(ConstAggregateDataPtr place, IColumn & to, Block & block, const ColumnNumbers & arguments) const
{
if (arguments.size() != param_num + 1)
throw Exception("Predict got incorrect number of arguments. Got: " + std::to_string(arguments.size()) + ". Required: " + std::to_string(param_num + 1),
throw Exception("Predict got incorrect number of arguments. Got: " +
std::to_string(arguments.size()) + ". Required: " + std::to_string(param_num + 1),
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
auto &column = dynamic_cast<ColumnVector<Float64> &>(to);

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long