2021-08-09 15:14:51 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <DataTypes/DataTypeString.h>
|
|
|
|
#include <AggregateFunctions/IAggregateFunction.h>
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/range.h>
|
2021-08-09 15:14:51 +00:00
|
|
|
#include <IO/ReadHelpers.h>
|
|
|
|
#include <IO/WriteHelpers.h>
|
|
|
|
#include <Columns/ColumnString.h>
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/logger_useful.h>
|
2021-08-09 15:14:51 +00:00
|
|
|
#include <IO/ReadBufferFromString.h>
|
2021-08-17 09:28:54 +00:00
|
|
|
#include <Common/HashTable/HashMap.h>
|
2021-08-09 15:14:51 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
template<typename X, typename Y>
|
|
|
|
struct AggregateFunctionSparkbarData
|
|
|
|
{
|
|
|
|
|
2021-08-17 09:28:54 +00:00
|
|
|
using Points = HashMap<X, Y>;
|
2021-08-09 15:14:51 +00:00
|
|
|
Points points;
|
|
|
|
|
2021-08-17 09:28:54 +00:00
|
|
|
X min_x = std::numeric_limits<X>::max();
|
|
|
|
X max_x = std::numeric_limits<X>::lowest();
|
|
|
|
|
2021-08-09 15:14:51 +00:00
|
|
|
Y min_y = std::numeric_limits<Y>::max();
|
2021-08-10 08:59:36 +00:00
|
|
|
Y max_y = std::numeric_limits<Y>::lowest();
|
2021-08-09 15:14:51 +00:00
|
|
|
|
2021-08-17 09:28:54 +00:00
|
|
|
void insert(const X & x, const Y & y)
|
2021-08-09 15:14:51 +00:00
|
|
|
{
|
2021-08-17 09:28:54 +00:00
|
|
|
auto result = points.insert({x, y});
|
|
|
|
if (!result.second)
|
|
|
|
result.first->getMapped() += y;
|
|
|
|
}
|
2021-08-15 11:50:32 +00:00
|
|
|
|
2021-08-17 09:28:54 +00:00
|
|
|
void add(X x, Y y)
|
|
|
|
{
|
|
|
|
insert(x, y);
|
|
|
|
min_x = std::min(x, min_x);
|
|
|
|
max_x = std::max(x, max_x);
|
2021-08-15 11:50:32 +00:00
|
|
|
min_y = std::min(y, min_y);
|
|
|
|
max_y = std::max(y, max_y);
|
2021-08-09 15:14:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void merge(const AggregateFunctionSparkbarData & other)
|
|
|
|
{
|
|
|
|
if (other.points.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (auto & point : other.points)
|
2021-08-17 09:28:54 +00:00
|
|
|
insert(point.getKey(), point.getMapped());
|
2021-08-09 15:14:51 +00:00
|
|
|
|
2021-08-17 09:28:54 +00:00
|
|
|
min_x = std::min(other.min_x, min_x);
|
|
|
|
max_x = std::max(other.max_x, max_x);
|
2021-08-15 11:50:32 +00:00
|
|
|
min_y = std::min(other.min_y, min_y);
|
|
|
|
max_y = std::max(other.max_y, max_y);
|
2021-08-09 15:14:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void serialize(WriteBuffer & buf) const
|
|
|
|
{
|
2021-08-17 09:28:54 +00:00
|
|
|
writeBinary(min_x, buf);
|
|
|
|
writeBinary(max_x, buf);
|
2021-08-09 15:14:51 +00:00
|
|
|
writeBinary(min_y, buf);
|
|
|
|
writeBinary(max_y, buf);
|
|
|
|
writeVarUInt(points.size(), buf);
|
|
|
|
|
|
|
|
for (const auto & elem : points)
|
|
|
|
{
|
2021-08-17 09:28:54 +00:00
|
|
|
writeBinary(elem.getKey(), buf);
|
|
|
|
writeBinary(elem.getMapped(), buf);
|
2021-08-09 15:14:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void deserialize(ReadBuffer & buf)
|
|
|
|
{
|
2021-08-17 09:28:54 +00:00
|
|
|
readBinary(min_x, buf);
|
|
|
|
readBinary(max_x, buf);
|
|
|
|
readBinary(min_y, buf);
|
|
|
|
readBinary(max_y, buf);
|
2021-08-09 15:14:51 +00:00
|
|
|
size_t size;
|
|
|
|
readVarUInt(size, buf);
|
|
|
|
|
|
|
|
/// TODO Protection against huge size
|
|
|
|
X x;
|
|
|
|
Y y;
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
{
|
|
|
|
readBinary(x, buf);
|
|
|
|
readBinary(y, buf);
|
2021-08-17 09:28:54 +00:00
|
|
|
insert(x, y);
|
2021-08-09 15:14:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename X, typename Y>
|
|
|
|
class AggregateFunctionSparkbar final
|
|
|
|
: public IAggregateFunctionDataHelper<AggregateFunctionSparkbarData<X, Y>, AggregateFunctionSparkbar<X, Y>>
|
|
|
|
{
|
|
|
|
|
|
|
|
private:
|
|
|
|
size_t width;
|
|
|
|
X min_x;
|
|
|
|
X max_x;
|
2021-11-22 10:07:08 +00:00
|
|
|
bool specified_min_max_x;
|
2021-08-09 15:14:51 +00:00
|
|
|
|
2021-11-24 15:25:30 +00:00
|
|
|
template <class T>
|
|
|
|
String getBar(const T value) const
|
2021-08-09 15:14:51 +00:00
|
|
|
{
|
2021-11-24 15:25:30 +00:00
|
|
|
if (isNaN(value) || value > 8 || value < 1)
|
|
|
|
return " ";
|
|
|
|
|
2021-08-09 15:14:51 +00:00
|
|
|
// ▁▂▃▄▅▆▇█
|
2021-11-24 15:25:30 +00:00
|
|
|
switch (static_cast<UInt8>(value))
|
2021-08-09 15:14:51 +00:00
|
|
|
{
|
|
|
|
case 1: return "▁";
|
|
|
|
case 2: return "▂";
|
|
|
|
case 3: return "▃";
|
|
|
|
case 4: return "▄";
|
|
|
|
case 5: return "▅";
|
|
|
|
case 6: return "▆";
|
|
|
|
case 7: return "▇";
|
|
|
|
case 8: return "█";
|
|
|
|
}
|
|
|
|
return " ";
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The minimum value of y is rendered as the lowest height "▁",
|
|
|
|
* the maximum value of y is rendered as the highest height "█", and the middle value will be rendered proportionally.
|
|
|
|
* If a bucket has no y value, it will be rendered as " ".
|
|
|
|
* If the actual number of buckets is greater than the specified bucket, it will be compressed by width.
|
|
|
|
* For example, there are actually 11 buckets, specify 10 buckets, and divide the 11 buckets as follows (11/10):
|
|
|
|
* 0.0-1.1, 1.1-2.2, 2.2-3.3, 3.3-4.4, 4.4-5.5, 5.5-6.6, 6.6-7.7, 7.7-8.8, 8.8-9.9, 9.9-11.
|
|
|
|
* The y value of the first bucket will be calculated as follows:
|
|
|
|
* the actual y value of the first position + the actual second position y*0.1, and the remaining y*0.9 is reserved for the next bucket.
|
|
|
|
* The next bucket will use the last y*0.9 + the actual third position y*0.2, and the remaining y*0.8 will be reserved for the next bucket. And so on.
|
|
|
|
*/
|
|
|
|
String render(const AggregateFunctionSparkbarData<X, Y> & data) const
|
|
|
|
{
|
|
|
|
String value;
|
2021-08-17 09:28:54 +00:00
|
|
|
if (data.points.empty() || !width)
|
2021-08-15 11:50:32 +00:00
|
|
|
return value;
|
2021-11-22 10:07:08 +00:00
|
|
|
|
2021-11-24 15:25:30 +00:00
|
|
|
size_t diff_x;
|
|
|
|
X min_x_local;
|
|
|
|
if (specified_min_max_x)
|
|
|
|
{
|
|
|
|
diff_x = max_x - min_x;
|
|
|
|
min_x_local = min_x;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
diff_x = data.max_x - data.min_x;
|
|
|
|
min_x_local = data.min_x;
|
|
|
|
}
|
|
|
|
|
2021-08-09 15:14:51 +00:00
|
|
|
if ((diff_x + 1) <= width)
|
|
|
|
{
|
|
|
|
Y min_y = data.min_y;
|
2021-08-20 12:48:23 +00:00
|
|
|
Y max_y = data.max_y;
|
|
|
|
Float64 diff_y = max_y - min_y;
|
2021-08-10 08:59:36 +00:00
|
|
|
|
|
|
|
if (diff_y)
|
|
|
|
{
|
|
|
|
for (size_t i = 0; i <= diff_x; ++i)
|
|
|
|
{
|
2021-11-24 15:25:30 +00:00
|
|
|
auto it = data.points.find(min_x_local + i);
|
2021-08-10 08:59:36 +00:00
|
|
|
bool found = it != data.points.end();
|
2021-11-24 15:25:30 +00:00
|
|
|
value += getBar(found ? std::round(((it->getMapped() - min_y) / diff_y) * 7) + 1 : 0.0);
|
2021-08-10 08:59:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
2021-08-09 15:14:51 +00:00
|
|
|
{
|
2021-08-10 08:59:36 +00:00
|
|
|
for (size_t i = 0; i <= diff_x; ++i)
|
2021-11-24 15:25:30 +00:00
|
|
|
value += getBar(data.points.has(min_x_local + i) ? 1 : 0);
|
2021-08-09 15:14:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
// begin reshapes to width buckets
|
|
|
|
Float64 multiple_d = (diff_x + 1) / static_cast<Float64>(width);
|
|
|
|
|
|
|
|
std::optional<Float64> min_y;
|
|
|
|
std::optional<Float64> max_y;
|
|
|
|
|
|
|
|
std::optional<Float64> new_y;
|
2021-08-20 10:27:14 +00:00
|
|
|
std::vector<std::optional<Float64>> newPoints;
|
2021-08-30 09:29:25 +00:00
|
|
|
newPoints.reserve(width);
|
2021-08-17 09:28:54 +00:00
|
|
|
|
|
|
|
std::pair<size_t, Float64> bound{0, 0.0};
|
|
|
|
size_t cur_bucket_num = 0;
|
|
|
|
// upper bound for bucket
|
|
|
|
auto upperBound = [&](size_t bucket_num)
|
|
|
|
{
|
|
|
|
bound.second = (bucket_num + 1) * multiple_d;
|
|
|
|
bound.first = std::floor(bound.second);
|
|
|
|
};
|
|
|
|
upperBound(cur_bucket_num);
|
2021-08-09 15:14:51 +00:00
|
|
|
for (size_t i = 0; i <= (diff_x + 1); ++i)
|
|
|
|
{
|
2021-08-17 09:28:54 +00:00
|
|
|
if (i == bound.first) // is bound
|
2021-08-09 15:14:51 +00:00
|
|
|
{
|
2021-08-17 09:28:54 +00:00
|
|
|
Float64 proportion = bound.second - bound.first;
|
2021-11-24 15:25:30 +00:00
|
|
|
auto it = data.points.find(min_x_local + i);
|
2021-08-09 15:14:51 +00:00
|
|
|
bool found = (it != data.points.end());
|
2021-11-22 10:07:08 +00:00
|
|
|
if (found && proportion > 0)
|
2021-08-17 09:28:54 +00:00
|
|
|
new_y = new_y.value_or(0) + it->getMapped() * proportion;
|
2021-08-09 15:14:51 +00:00
|
|
|
|
|
|
|
if (new_y)
|
|
|
|
{
|
|
|
|
Float64 avg_y = new_y.value() / multiple_d;
|
|
|
|
|
2021-08-30 09:29:25 +00:00
|
|
|
newPoints.emplace_back(avg_y);
|
2021-08-20 10:27:14 +00:00
|
|
|
// If min_y has no value, or if the avg_y of the current bucket is less than min_y, update it.
|
2021-08-09 15:14:51 +00:00
|
|
|
if (!min_y || avg_y < min_y)
|
|
|
|
min_y = avg_y;
|
|
|
|
if (!max_y || avg_y > max_y)
|
|
|
|
max_y = avg_y;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-08-30 09:29:25 +00:00
|
|
|
newPoints.emplace_back();
|
2021-08-09 15:14:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// next bucket
|
2021-08-17 09:28:54 +00:00
|
|
|
new_y = found ? ((1 - proportion) * it->getMapped()) : std::optional<Float64>();
|
|
|
|
upperBound(++cur_bucket_num);
|
2021-08-09 15:14:51 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-11-24 15:25:30 +00:00
|
|
|
auto it = data.points.find(min_x_local + i);
|
2021-08-09 15:14:51 +00:00
|
|
|
if (it != data.points.end())
|
2021-08-17 09:28:54 +00:00
|
|
|
new_y = new_y.value_or(0) + it->getMapped();
|
2021-08-09 15:14:51 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-08-20 10:27:14 +00:00
|
|
|
if (!min_y || !max_y) // No value is set
|
2021-08-09 15:14:51 +00:00
|
|
|
return {};
|
|
|
|
|
|
|
|
Float64 diff_y = max_y.value() - min_y.value();
|
|
|
|
|
2021-08-20 10:27:14 +00:00
|
|
|
auto getBars = [&] (const std::optional<Float64> & point_y)
|
2021-08-09 15:14:51 +00:00
|
|
|
{
|
2021-11-24 15:25:30 +00:00
|
|
|
value += getBar(point_y ? std::round(((point_y.value() - min_y.value()) / diff_y) * 7) + 1 : 0);
|
2021-08-09 15:14:51 +00:00
|
|
|
};
|
2021-08-20 10:27:14 +00:00
|
|
|
auto getBarsForConstant = [&] (const std::optional<Float64> & point_y)
|
2021-08-09 15:14:51 +00:00
|
|
|
{
|
2021-08-20 10:27:14 +00:00
|
|
|
value += getBar(point_y ? 1 : 0);
|
2021-08-09 15:14:51 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
if (diff_y)
|
|
|
|
std::for_each(newPoints.begin(), newPoints.end(), getBars);
|
|
|
|
else
|
|
|
|
std::for_each(newPoints.begin(), newPoints.end(), getBarsForConstant);
|
|
|
|
}
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
AggregateFunctionSparkbar(const DataTypes & arguments, const Array & params)
|
|
|
|
: IAggregateFunctionDataHelper<AggregateFunctionSparkbarData<X, Y>, AggregateFunctionSparkbar>(
|
|
|
|
arguments, params)
|
|
|
|
{
|
|
|
|
width = params.at(0).safeGet<UInt64>();
|
|
|
|
if (params.size() == 3)
|
|
|
|
{
|
2021-11-22 10:07:08 +00:00
|
|
|
specified_min_max_x = true;
|
2021-08-09 15:14:51 +00:00
|
|
|
min_x = params.at(1).safeGet<X>();
|
|
|
|
max_x = params.at(2).safeGet<X>();
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2021-11-22 10:07:08 +00:00
|
|
|
specified_min_max_x = false;
|
2021-08-09 15:14:51 +00:00
|
|
|
min_x = std::numeric_limits<X>::min();
|
|
|
|
max_x = std::numeric_limits<X>::max();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
String getName() const override
|
|
|
|
{
|
|
|
|
return "sparkbar";
|
|
|
|
}
|
|
|
|
|
|
|
|
DataTypePtr getReturnType() const override
|
|
|
|
{
|
|
|
|
return std::make_shared<DataTypeString>();
|
|
|
|
}
|
|
|
|
|
|
|
|
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * /*arena*/) const override
|
|
|
|
{
|
|
|
|
X x = assert_cast<const ColumnVector<X> *>(columns[0])->getData()[row_num];
|
|
|
|
if (min_x <= x && x <= max_x)
|
|
|
|
{
|
|
|
|
Y y = assert_cast<const ColumnVector<Y> *>(columns[1])->getData()[row_num];
|
|
|
|
this->data(place).add(x, y);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * /*arena*/) const override
|
|
|
|
{
|
|
|
|
this->data(place).merge(this->data(rhs));
|
|
|
|
}
|
|
|
|
|
2021-11-18 07:51:19 +00:00
|
|
|
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
2021-08-09 15:14:51 +00:00
|
|
|
{
|
|
|
|
this->data(place).serialize(buf);
|
|
|
|
}
|
|
|
|
|
2021-11-18 07:51:19 +00:00
|
|
|
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
2021-08-09 15:14:51 +00:00
|
|
|
{
|
|
|
|
this->data(place).deserialize(buf);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool allocatesMemoryInArena() const override { return false; }
|
|
|
|
|
|
|
|
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * /*arena*/) const override
|
|
|
|
{
|
|
|
|
auto & to_column = assert_cast<ColumnString &>(to);
|
|
|
|
const auto & data = this->data(place);
|
|
|
|
const String & value = render(data);
|
|
|
|
to_column.insertData(value.data(), value.size());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|