From 91c93ba8f8451793c7a9215ecc2eab77b911fec0 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 2 Feb 2023 13:23:46 +0000 Subject: [PATCH 1/8] Update AggregateFunctionSparkbar --- .../AggregateFunctionSparkbar.cpp | 7 +- .../AggregateFunctionSparkbar.h | 239 +++++++----------- .../02016_aggregation_spark_bar.reference | 1 - .../02016_aggregation_spark_bar.sql | 18 +- 4 files changed, 112 insertions(+), 153 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSparkbar.cpp b/src/AggregateFunctions/AggregateFunctionSparkbar.cpp index 78c70670513..e87e3b306c2 100644 --- a/src/AggregateFunctions/AggregateFunctionSparkbar.cpp +++ b/src/AggregateFunctions/AggregateFunctionSparkbar.cpp @@ -50,11 +50,13 @@ AggregateFunctionPtr createAggregateFunctionSparkbar(const std::string & name, c assertBinary(name, arguments); if (params.size() != 1 && params.size() != 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The number of params does not match for aggregate function {}", name); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "The number of params does not match for aggregate function '{}', expected 1 or 3, got {}", name, params.size()); if (params.size() == 3) { - if (params.at(1).getType() != arguments[0]->getDefault().getType() || params.at(2).getType() != arguments[0]->getDefault().getType()) + if (params.at(1).getType() != arguments[0]->getDefault().getType() || + params.at(2).getType() != arguments[0]->getDefault().getType()) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The second and third parameters are not the same type as the first arguments for aggregate function {}", name); @@ -63,7 +65,6 @@ AggregateFunctionPtr createAggregateFunctionSparkbar(const std::string & name, c return createAggregateFunctionSparkbarImpl(name, *arguments[0], *arguments[1], arguments, params); } - } void registerAggregateFunctionSparkbar(AggregateFunctionFactory & factory) diff --git a/src/AggregateFunctions/AggregateFunctionSparkbar.h b/src/AggregateFunctions/AggregateFunctionSparkbar.h index 8cf84660775..c27326baff6 100644 --- a/src/AggregateFunctions/AggregateFunctionSparkbar.h +++ b/src/AggregateFunctions/AggregateFunctionSparkbar.h @@ -18,10 +18,12 @@ namespace DB { +constexpr size_t NUM_BAR_LEVELS = 8; + template struct AggregateFunctionSparkbarData { - + /// TODO: calculate histogram instead of storing all points using Points = HashMap; Points points; @@ -31,20 +33,26 @@ struct AggregateFunctionSparkbarData Y min_y = std::numeric_limits::max(); Y max_y = std::numeric_limits::lowest(); - void insert(const X & x, const Y & y) + Y insert(const X & x, const Y & y) { - auto result = points.insert({x, y}); - if (!result.second) - result.first->getMapped() += y; + if (isNaN(y) || y <= 0) + return 0; + + auto [it, inserted] = points.insert({x, y}); + if (!inserted) + it->getMapped() += y; + return it->getMapped(); } void add(X x, Y y) { - insert(x, y); + auto new_y = insert(x, y); + min_x = std::min(x, min_x); max_x = std::max(x, max_x); + min_y = std::min(y, min_y); - max_y = std::max(y, max_y); + max_y = std::max(new_y, max_y); } void merge(const AggregateFunctionSparkbarData & other) @@ -53,10 +61,14 @@ struct AggregateFunctionSparkbarData return; for (auto & point : other.points) - insert(point.getKey(), point.getMapped()); + { + auto new_y = insert(point.getKey(), point.getMapped()); + max_y = std::max(new_y, max_y); + } min_x = std::min(other.min_x, min_x); max_x = std::max(other.max_x, max_x); + min_y = std::min(other.min_y, min_y); max_y = std::max(other.max_y, max_y); } @@ -85,7 +97,6 @@ struct AggregateFunctionSparkbarData size_t size; readVarUInt(size, buf); - /// TODO Protection against huge size X x; Y y; for (size_t i = 0; i < size; ++i) @@ -95,7 +106,6 @@ struct AggregateFunctionSparkbarData insert(x, y); } } - }; template @@ -104,16 +114,18 @@ class AggregateFunctionSparkbar final { private: - size_t width; - X min_x; - X max_x; - bool specified_min_max_x; + const size_t width = 0; + + /// Range for x specified in parameters. + const bool is_specified_range_x = false; + const X begin_x = std::numeric_limits::min(); + const X end_x = std::numeric_limits::max(); template size_t updateFrame(ColumnString::Chars & frame, const T value) const { static constexpr std::array bars{" ", "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"}; - const auto & bar = (isNaN(value) || value > 8 || value < 1) ? bars[0] : bars[static_cast(value)]; + const auto & bar = (isNaN(value) || value < 1 || 8 < value) ? bars[0] : bars[static_cast(value)]; frame.insert(bar.begin(), bar.end()); return bar.size(); } @@ -122,161 +134,104 @@ private: * The minimum value of y is rendered as the lowest height "▁", * the maximum value of y is rendered as the highest height "█", and the middle value will be rendered proportionally. * If a bucket has no y value, it will be rendered as " ". - * If the actual number of buckets is greater than the specified bucket, it will be compressed by width. - * For example, there are actually 11 buckets, specify 10 buckets, and divide the 11 buckets as follows (11/10): - * 0.0-1.1, 1.1-2.2, 2.2-3.3, 3.3-4.4, 4.4-5.5, 5.5-6.6, 6.6-7.7, 7.7-8.8, 8.8-9.9, 9.9-11. - * The y value of the first bucket will be calculated as follows: - * the actual y value of the first position + the actual second position y*0.1, and the remaining y*0.9 is reserved for the next bucket. - * The next bucket will use the last y*0.9 + the actual third position y*0.2, and the remaining y*0.8 will be reserved for the next bucket. And so on. */ void render(ColumnString & to_column, const AggregateFunctionSparkbarData & data) const { size_t sz = 0; auto & values = to_column.getChars(); auto & offsets = to_column.getOffsets(); - auto update_column = [&] () + + if (data.points.empty()) { values.push_back('\0'); offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1); - }; + } - if (data.points.empty() || !width) - return update_column(); + X from_x; + X to_x; - size_t diff_x; - X min_x_local; - if (specified_min_max_x) + if (is_specified_range_x) { - diff_x = max_x - min_x; - min_x_local = min_x; + from_x = begin_x; + to_x = end_x; } else { - diff_x = data.max_x - data.min_x; - min_x_local = data.min_x; + from_x = data.min_x; + to_x = data.max_x; } - if ((diff_x + 1) <= width) + if (from_x >= to_x) { - Y min_y = data.min_y; - Y max_y = data.max_y; - Float64 diff_y = max_y - min_y; - - if (diff_y != 0.0) - { - for (size_t i = 0; i <= diff_x; ++i) - { - auto it = data.points.find(static_cast(min_x_local + i)); - bool found = it != data.points.end(); - sz += updateFrame(values, found ? std::round(((it->getMapped() - min_y) / diff_y) * 7) + 1 : 0.0); - } - } - else - { - for (size_t i = 0; i <= diff_x; ++i) - sz += updateFrame(values, data.points.has(min_x_local + static_cast(i)) ? 1 : 0); - } + sz += updateFrame(values, 1); + values.push_back('\0'); + offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1); + return; } - else + + PaddedPODArray histogram(width, 0); + + if (data.points.size() < width) + histogram.resize(data.points.size()); + + for (const auto & point : data.points) { - // begin reshapes to width buckets - Float64 multiple_d = (diff_x + 1) / static_cast(width); + if (point.getKey() < from_x || to_x < point.getKey()) + continue; - std::optional min_y; - std::optional max_y; + size_t index = (point.getKey() - from_x) * (histogram.size() - 1) / (to_x - from_x); - std::optional new_y; - std::vector> new_points; - new_points.reserve(width); - - std::pair bound{0, 0.0}; - size_t cur_bucket_num = 0; - // upper bound for bucket - auto upper_bound = [&](size_t bucket_num) - { - bound.second = (bucket_num + 1) * multiple_d; - bound.first = static_cast(std::floor(bound.second)); - }; - upper_bound(cur_bucket_num); - for (size_t i = 0; i <= (diff_x + 1); ++i) - { - if (i == bound.first) // is bound - { - Float64 proportion = bound.second - bound.first; - auto it = data.points.find(min_x_local + static_cast(i)); - bool found = (it != data.points.end()); - if (found && proportion > 0) - new_y = new_y.value_or(0) + it->getMapped() * proportion; - - if (new_y) - { - Float64 avg_y = new_y.value() / multiple_d; - - new_points.emplace_back(avg_y); - // If min_y has no value, or if the avg_y of the current bucket is less than min_y, update it. - if (!min_y || avg_y < min_y) - min_y = avg_y; - if (!max_y || avg_y > max_y) - max_y = avg_y; - } - else - { - new_points.emplace_back(); - } - - // next bucket - new_y = found ? ((1 - proportion) * it->getMapped()) : std::optional(); - upper_bound(++cur_bucket_num); - } - else - { - auto it = data.points.find(min_x_local + static_cast(i)); - if (it != data.points.end()) - new_y = new_y.value_or(0) + it->getMapped(); - } - } - - if (!min_y || !max_y) // No value is set - return update_column(); - - Float64 diff_y = max_y.value() - min_y.value(); - - auto update_frame = [&] (const std::optional & point_y) - { - sz += updateFrame(values, point_y ? std::round(((point_y.value() - min_y.value()) / diff_y) * 7) + 1 : 0); - }; - auto update_frame_for_constant = [&] (const std::optional & point_y) - { - sz += updateFrame(values, point_y ? 1 : 0); - }; - - if (diff_y != 0.0) - std::for_each(new_points.begin(), new_points.end(), update_frame); + /// In case of overflow, just saturate + if (std::numeric_limits::max() - histogram[index] < point.getMapped()) + histogram[index] = std::numeric_limits::max(); else - std::for_each(new_points.begin(), new_points.end(), update_frame_for_constant); + histogram[index] += point.getMapped(); } - update_column(); + + Y y_min = std::numeric_limits::max(); + Y y_max = std::numeric_limits::min(); + for (auto & y : histogram) + { + if (isNaN(y) || y <= 0) + continue; + y_min = std::min(y_min, y); + y_max = std::max(y_max, y); + } + + if (y_min >= y_max) + y_min = 0; + + if (y_min == y_max) + y_max = y_min + 1; + + for (auto & y : histogram) + { + if (isNaN(y) || y <= 0) + y = 0; + else + y = (y - y_min) * 7 / (y_max - y_min) + 1; + } + + for (const auto & y : histogram) + sz += updateFrame(values, y); + + values.push_back('\0'); + offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1); } - public: AggregateFunctionSparkbar(const DataTypes & arguments, const Array & params) - : IAggregateFunctionDataHelper, AggregateFunctionSparkbar>( - arguments, params, std::make_shared()) + : IAggregateFunctionDataHelper, AggregateFunctionSparkbar>(arguments, params, std::make_shared()) + , width(params.empty() ? 0 : params.at(0).safeGet()) + , is_specified_range_x(params.size() >= 3) + , begin_x(is_specified_range_x ? static_cast(params.at(1).safeGet()) : std::numeric_limits::min()) + , end_x(is_specified_range_x ? static_cast(params.at(2).safeGet()) : std::numeric_limits::max()) { - width = params.at(0).safeGet(); - if (params.size() == 3) - { - specified_min_max_x = true; - min_x = static_cast(params.at(1).safeGet()); - max_x = static_cast(params.at(2).safeGet()); - } - else - { - specified_min_max_x = false; - min_x = std::numeric_limits::min(); - max_x = std::numeric_limits::max(); - } + if (width < 2 || 1024 < width) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter width must be in range [2, 1024]"); + + if (begin_x >= end_x) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter `min_x` must be less than `max_x`"); } String getName() const override @@ -287,7 +242,7 @@ public: void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * /*arena*/) const override { X x = assert_cast *>(columns[0])->getData()[row_num]; - if (min_x <= x && x <= max_x) + if (begin_x <= x && x <= end_x) { Y y = assert_cast *>(columns[1])->getData()[row_num]; this->data(place).add(x, y); diff --git a/tests/queries/0_stateless/02016_aggregation_spark_bar.reference b/tests/queries/0_stateless/02016_aggregation_spark_bar.reference index 118d42a62d4..7ff91c55676 100644 --- a/tests/queries/0_stateless/02016_aggregation_spark_bar.reference +++ b/tests/queries/0_stateless/02016_aggregation_spark_bar.reference @@ -1,4 +1,3 @@ -▁ ▁█ ▃█▁ ▄▅█▁ diff --git a/tests/queries/0_stateless/02016_aggregation_spark_bar.sql b/tests/queries/0_stateless/02016_aggregation_spark_bar.sql index 5237f832d25..70b22220fb7 100644 --- a/tests/queries/0_stateless/02016_aggregation_spark_bar.sql +++ b/tests/queries/0_stateless/02016_aggregation_spark_bar.sql @@ -4,7 +4,11 @@ CREATE TABLE spark_bar_test (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree INSERT INTO spark_bar_test VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11'); -SELECT sparkbar(1)(event_date,cnt) FROM spark_bar_test; +SELECT sparkbar(1)(event_date,cnt) FROM spark_bar_test; -- { serverError BAD_ARGUMENTS } +SELECT sparkbar(1025)(event_date,cnt) FROM spark_bar_test; -- { serverError BAD_ARGUMENTS } +SELECT sparkbar(2, 0)(event_date,cnt) FROM spark_bar_test; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT sparkbar(2, 0, 1, 3)(event_date,cnt) FROM spark_bar_test; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + SELECT sparkbar(2)(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(3)(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(4)(event_date,cnt) FROM spark_bar_test; @@ -20,7 +24,7 @@ SELECT sparkbar(11,2,5)(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(11,3,7)(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(11,4,11)(event_date,cnt) FROM spark_bar_test; -SELECT sparkbar(11,toDate('2020-01-02'),toDate('2020-01-02'))(event_date,cnt) FROM spark_bar_test; +SELECT sparkbar(11,toDate('2020-01-02'),toDate('2020-01-02'))(event_date,cnt) FROM spark_bar_test; -- { serverError BAD_ARGUMENTS } SELECT sparkbar(11,toDate('2020-01-02'),toDate('2020-01-05'))(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(11,toDate('2020-01-03'),toDate('2020-01-07'))(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(11,toDate('2020-01-04'),toDate('2020-01-11'))(event_date,cnt) FROM spark_bar_test; @@ -37,8 +41,8 @@ DROP TABLE IF EXISTS spark_bar_test; WITH number DIV 50 AS k, number % 50 AS value SELECT k, sparkbar(50, 0, 99)(number, value) FROM numbers(100) GROUP BY k ORDER BY k; -- OOM guard -DROP TABLE IF EXISTS spark_bar_oom; -CREATE TABLE spark_bar_oom (x UInt64, y UInt8) Engine=MergeTree ORDER BY tuple(); -INSERT INTO spark_bar_oom VALUES (18446744073709551615,255),(0,0),(0,0),(4036797895307271799,163); -SELECT sparkbar(9)(x,y) FROM spark_bar_oom SETTINGS max_memory_usage = 100000000; -- { serverError 241 } -DROP TABLE IF EXISTS spark_bar_oom; +-- DROP TABLE IF EXISTS spark_bar_oom; +-- CREATE TABLE spark_bar_oom (x UInt64, y UInt8) Engine=MergeTree ORDER BY tuple(); +-- INSERT INTO spark_bar_oom VALUES (18446744073709551615,255),(0,0),(0,0),(4036797895307271799,163); +-- SELECT sparkbar(9)(x,y) FROM spark_bar_oom SETTINGS max_memory_usage = 100000000; -- { serverError 241 } +-- DROP TABLE IF EXISTS spark_bar_oom; From 212b800771122c2413b212104bc145fd325d6846 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 2 Feb 2023 15:44:21 +0000 Subject: [PATCH 2/8] wip AggregateFunctionSparkbar --- .../AggregateFunctionSparkbar.h | 60 ++++++++++--------- .../02016_aggregation_spark_bar.reference | 27 ++++++++- .../02016_aggregation_spark_bar.sql | 13 +--- 3 files changed, 59 insertions(+), 41 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSparkbar.h b/src/AggregateFunctions/AggregateFunctionSparkbar.h index c27326baff6..e2cd2f11040 100644 --- a/src/AggregateFunctions/AggregateFunctionSparkbar.h +++ b/src/AggregateFunctions/AggregateFunctionSparkbar.h @@ -18,8 +18,6 @@ namespace DB { -constexpr size_t NUM_BAR_LEVELS = 8; - template struct AggregateFunctionSparkbarData { @@ -121,8 +119,7 @@ private: const X begin_x = std::numeric_limits::min(); const X end_x = std::numeric_limits::max(); - template - size_t updateFrame(ColumnString::Chars & frame, const T value) const + size_t updateFrame(ColumnString::Chars & frame, Y value) const { static constexpr std::array bars{" ", "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"}; const auto & bar = (isNaN(value) || value < 1 || 8 < value) ? bars[0] : bars[static_cast(value)]; @@ -137,72 +134,78 @@ private: */ void render(ColumnString & to_column, const AggregateFunctionSparkbarData & data) const { - size_t sz = 0; auto & values = to_column.getChars(); auto & offsets = to_column.getOffsets(); if (data.points.empty()) { values.push_back('\0'); - offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1); + offsets.push_back(offsets.empty() ? 1 : offsets.back() + 1); + return; } - X from_x; - X to_x; - - if (is_specified_range_x) - { - from_x = begin_x; - to_x = end_x; - } - else - { - from_x = data.min_x; - to_x = data.max_x; - } + auto from_x = is_specified_range_x ? begin_x : data.min_x; + auto to_x = is_specified_range_x ? end_x : data.max_x; if (from_x >= to_x) { - sz += updateFrame(values, 1); + size_t sz = updateFrame(values, 1); values.push_back('\0'); offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1); return; } PaddedPODArray histogram(width, 0); - - if (data.points.size() < width) - histogram.resize(data.points.size()); + PaddedPODArray fhistogram(width, 0); for (const auto & point : data.points) { if (point.getKey() < from_x || to_x < point.getKey()) continue; - size_t index = (point.getKey() - from_x) * (histogram.size() - 1) / (to_x - from_x); + size_t index = (point.getKey() - from_x) * histogram.size() / (to_x - from_x + 1); /// In case of overflow, just saturate if (std::numeric_limits::max() - histogram[index] < point.getMapped()) histogram[index] = std::numeric_limits::max(); else histogram[index] += point.getMapped(); + + fhistogram[index] += 1; + } + + for (size_t i = 0; i < histogram.size(); ++i) + { + if (fhistogram[i] > 0) + histogram[i] /= fhistogram[i]; } Y y_min = std::numeric_limits::max(); Y y_max = std::numeric_limits::min(); + bool has_valid_y = false; for (auto & y : histogram) { if (isNaN(y) || y <= 0) continue; + has_valid_y = true; y_min = std::min(y_min, y); y_max = std::max(y_max, y); } - if (y_min >= y_max) - y_min = 0; + if (!has_valid_y) + { + values.push_back('\0'); + offsets.push_back(offsets.empty() ? 1 : offsets.back() + 1); + return; + } - if (y_min == y_max) - y_max = y_min + 1; + if (y_min >= y_max) + { + size_t sz = updateFrame(values, 1); + values.push_back('\0'); + offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1); + return; + } for (auto & y : histogram) { @@ -212,6 +215,7 @@ private: y = (y - y_min) * 7 / (y_max - y_min) + 1; } + size_t sz = 0; for (const auto & y : histogram) sz += updateFrame(values, y); diff --git a/tests/queries/0_stateless/02016_aggregation_spark_bar.reference b/tests/queries/0_stateless/02016_aggregation_spark_bar.reference index 7ff91c55676..a97c675a93e 100644 --- a/tests/queries/0_stateless/02016_aggregation_spark_bar.reference +++ b/tests/queries/0_stateless/02016_aggregation_spark_bar.reference @@ -1,25 +1,50 @@ +-- { echoOn } + +SELECT sparkbar(2)(event_date,cnt) FROM spark_bar_test; ▁█ +SELECT sparkbar(3)(event_date,cnt) FROM spark_bar_test; ▃█▁ +SELECT sparkbar(4)(event_date,cnt) FROM spark_bar_test; ▄▅█▁ +SELECT sparkbar(5)(event_date,cnt) FROM spark_bar_test; ▄▄█▇▁ +SELECT sparkbar(6)(event_date,cnt) FROM spark_bar_test; ▃▄▅█▃▁ +SELECT sparkbar(7)(event_date,cnt) FROM spark_bar_test; ▂▅▃▇█▁▂ +SELECT sparkbar(8)(event_date,cnt) FROM spark_bar_test; ▂▅▃▅██ ▁ +SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_test; ▁▅▄▃██▅ ▁ +SELECT sparkbar(10)(event_date,cnt) FROM spark_bar_test; ▁▄▄▂▅▇█▂ ▂ +SELECT sparkbar(11)(event_date,cnt) FROM spark_bar_test; ▁▄▅▂▃▇▆█ ▂ +SELECT sparkbar(11,2,5)(event_date,cnt) FROM spark_bar_test; +SELECT sparkbar(11,3,7)(event_date,cnt) FROM spark_bar_test; +SELECT sparkbar(11,4,11)(event_date,cnt) FROM spark_bar_test; -▁ +SELECT sparkbar(11,toDate('2020-01-02'),toDate('2020-01-05'))(event_date,cnt) FROM spark_bar_test; ▆█▁▃ +SELECT sparkbar(11,toDate('2020-01-03'),toDate('2020-01-07'))(event_date,cnt) FROM spark_bar_test; ▅▁▂█▇ +SELECT sparkbar(11,toDate('2020-01-04'),toDate('2020-01-11'))(event_date,cnt) FROM spark_bar_test; ▁▂▇▆█ ▁ +SELECT sparkbar(2,toDate('2020-01-01'),toDate('2020-01-08'))(event_date,cnt) FROM spark_bar_test; ▁█ +SELECT sparkbar(2,toDate('2020-01-02'),toDate('2020-01-09'))(event_date,cnt) FROM spark_bar_test; ▁█ +SELECT sparkbar(3,toDate('2020-01-01'),toDate('2020-01-09'))(event_date,cnt) FROM spark_bar_test; ▁▄█ +SELECT sparkbar(3,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_test; ▂█▁ +SELECT sparkbar(4,toDate('2020-01-01'),toDate('2020-01-08'))(event_date,cnt) FROM spark_bar_test; ▁▃▅█ +SELECT sparkbar(5,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_test; ▁▃▅█ +DROP TABLE IF EXISTS spark_bar_test; +WITH number DIV 50 AS k, number % 50 AS value SELECT k, sparkbar(50, 0, 99)(number, value) FROM numbers(100) GROUP BY k ORDER BY k; 0 ▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██ 1 ▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██ diff --git a/tests/queries/0_stateless/02016_aggregation_spark_bar.sql b/tests/queries/0_stateless/02016_aggregation_spark_bar.sql index 70b22220fb7..86c5d6977a5 100644 --- a/tests/queries/0_stateless/02016_aggregation_spark_bar.sql +++ b/tests/queries/0_stateless/02016_aggregation_spark_bar.sql @@ -4,10 +4,7 @@ CREATE TABLE spark_bar_test (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree INSERT INTO spark_bar_test VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11'); -SELECT sparkbar(1)(event_date,cnt) FROM spark_bar_test; -- { serverError BAD_ARGUMENTS } -SELECT sparkbar(1025)(event_date,cnt) FROM spark_bar_test; -- { serverError BAD_ARGUMENTS } -SELECT sparkbar(2, 0)(event_date,cnt) FROM spark_bar_test; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT sparkbar(2, 0, 1, 3)(event_date,cnt) FROM spark_bar_test; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +-- { echoOn } SELECT sparkbar(2)(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(3)(event_date,cnt) FROM spark_bar_test; @@ -24,7 +21,6 @@ SELECT sparkbar(11,2,5)(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(11,3,7)(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(11,4,11)(event_date,cnt) FROM spark_bar_test; -SELECT sparkbar(11,toDate('2020-01-02'),toDate('2020-01-02'))(event_date,cnt) FROM spark_bar_test; -- { serverError BAD_ARGUMENTS } SELECT sparkbar(11,toDate('2020-01-02'),toDate('2020-01-05'))(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(11,toDate('2020-01-03'),toDate('2020-01-07'))(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(11,toDate('2020-01-04'),toDate('2020-01-11'))(event_date,cnt) FROM spark_bar_test; @@ -39,10 +35,3 @@ SELECT sparkbar(5,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FRO DROP TABLE IF EXISTS spark_bar_test; WITH number DIV 50 AS k, number % 50 AS value SELECT k, sparkbar(50, 0, 99)(number, value) FROM numbers(100) GROUP BY k ORDER BY k; - --- OOM guard --- DROP TABLE IF EXISTS spark_bar_oom; --- CREATE TABLE spark_bar_oom (x UInt64, y UInt8) Engine=MergeTree ORDER BY tuple(); --- INSERT INTO spark_bar_oom VALUES (18446744073709551615,255),(0,0),(0,0),(4036797895307271799,163); --- SELECT sparkbar(9)(x,y) FROM spark_bar_oom SETTINGS max_memory_usage = 100000000; -- { serverError 241 } --- DROP TABLE IF EXISTS spark_bar_oom; From ccc96ab9cce512e68d4847f91cffd8b39f1c9c8d Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Thu, 2 Feb 2023 21:50:13 +0100 Subject: [PATCH 3/8] Added BAD_ARGUMENTS --- src/AggregateFunctions/AggregateFunctionSparkbar.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/AggregateFunctions/AggregateFunctionSparkbar.h b/src/AggregateFunctions/AggregateFunctionSparkbar.h index e2cd2f11040..3c57ffdde5b 100644 --- a/src/AggregateFunctions/AggregateFunctionSparkbar.h +++ b/src/AggregateFunctions/AggregateFunctionSparkbar.h @@ -18,6 +18,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + template struct AggregateFunctionSparkbarData { From c6e473a66ae581fe0622aa50716ac36a9758b72a Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 3 Feb 2023 16:47:13 +0000 Subject: [PATCH 4/8] Canonize 02016_aggregation_spark_bar --- .../02016_aggregation_spark_bar.reference | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/queries/0_stateless/02016_aggregation_spark_bar.reference b/tests/queries/0_stateless/02016_aggregation_spark_bar.reference index a97c675a93e..5a38f08a106 100644 --- a/tests/queries/0_stateless/02016_aggregation_spark_bar.reference +++ b/tests/queries/0_stateless/02016_aggregation_spark_bar.reference @@ -1,23 +1,23 @@ -- { echoOn } SELECT sparkbar(2)(event_date,cnt) FROM spark_bar_test; -▁█ +▅█ SELECT sparkbar(3)(event_date,cnt) FROM spark_bar_test; -▃█▁ +▄█▃ SELECT sparkbar(4)(event_date,cnt) FROM spark_bar_test; -▄▅█▁ +▄▅█▃ SELECT sparkbar(5)(event_date,cnt) FROM spark_bar_test; -▄▄█▇▁ +▃▂▆█▂ SELECT sparkbar(6)(event_date,cnt) FROM spark_bar_test; -▃▄▅█▃▁ +▃▄▆█ ▃ SELECT sparkbar(7)(event_date,cnt) FROM spark_bar_test; -▂▅▃▇█▁▂ +▂▃▃▆█ ▂ SELECT sparkbar(8)(event_date,cnt) FROM spark_bar_test; -▂▅▃▅██ ▁ +▂▅▂▇▆█ ▂ SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_test; -▁▅▄▃██▅ ▁ +▂▅▂▃▆█ ▂ SELECT sparkbar(10)(event_date,cnt) FROM spark_bar_test; -▁▄▄▂▅▇█▂ ▂ +▂▅▂▃▇▆█ ▂ SELECT sparkbar(11)(event_date,cnt) FROM spark_bar_test; ▁▄▅▂▃▇▆█ ▂ SELECT sparkbar(11,2,5)(event_date,cnt) FROM spark_bar_test; @@ -27,24 +27,24 @@ SELECT sparkbar(11,3,7)(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(11,4,11)(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(11,toDate('2020-01-02'),toDate('2020-01-05'))(event_date,cnt) FROM spark_bar_test; -▆█▁▃ +▆ █ ▃ ▅ SELECT sparkbar(11,toDate('2020-01-03'),toDate('2020-01-07'))(event_date,cnt) FROM spark_bar_test; -▅▁▂█▇ +▆ ▃ ▄ █ ▇ SELECT sparkbar(11,toDate('2020-01-04'),toDate('2020-01-11'))(event_date,cnt) FROM spark_bar_test; -▁▂▇▆█ ▁ +▂▃▇ ▆█ ▂ SELECT sparkbar(2,toDate('2020-01-01'),toDate('2020-01-08'))(event_date,cnt) FROM spark_bar_test; -▁█ +▄█ SELECT sparkbar(2,toDate('2020-01-02'),toDate('2020-01-09'))(event_date,cnt) FROM spark_bar_test; -▁█ +▄█ SELECT sparkbar(3,toDate('2020-01-01'),toDate('2020-01-09'))(event_date,cnt) FROM spark_bar_test; -▁▄█ +▄▅█ SELECT sparkbar(3,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_test; -▂█▁ +▃▅█ SELECT sparkbar(4,toDate('2020-01-01'),toDate('2020-01-08'))(event_date,cnt) FROM spark_bar_test; -▁▃▅█ +▃▄▆█ SELECT sparkbar(5,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_test; -▁▃▅█ +▃▄▆█ DROP TABLE IF EXISTS spark_bar_test; WITH number DIV 50 AS k, number % 50 AS value SELECT k, sparkbar(50, 0, 99)(number, value) FROM numbers(100) GROUP BY k ORDER BY k; -0 ▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██ -1 ▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██ +0 ▁▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇█ +1 ▁▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇█ From 1e45033531ce70e2bfa09246806b23e64cb515ef Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 3 Feb 2023 17:22:08 +0000 Subject: [PATCH 5/8] Update AggregateFunctionSparkbar --- .../AggregateFunctionSparkbar.h | 43 +++++++++---------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSparkbar.h b/src/AggregateFunctions/AggregateFunctionSparkbar.h index 3c57ffdde5b..919b59448a1 100644 --- a/src/AggregateFunctions/AggregateFunctionSparkbar.h +++ b/src/AggregateFunctions/AggregateFunctionSparkbar.h @@ -154,7 +154,7 @@ private: if (from_x >= to_x) { - size_t sz = updateFrame(values, 1); + size_t sz = updateFrame(values, 8); values.push_back('\0'); offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1); return; @@ -168,15 +168,24 @@ private: if (point.getKey() < from_x || to_x < point.getKey()) continue; - size_t index = (point.getKey() - from_x) * histogram.size() / (to_x - from_x + 1); + X delta = to_x - from_x; + if (delta < std::numeric_limits::max()) + delta = delta + 1; - /// In case of overflow, just saturate - if (std::numeric_limits::max() - histogram[index] < point.getMapped()) - histogram[index] = std::numeric_limits::max(); - else + X value = point.getKey() - from_x; + Float64 w = histogram.size(); + size_t index = std::min(static_cast(w / delta * value), histogram.size() - 1); + + if (std::numeric_limits::max() - histogram[index] > point.getMapped()) + { histogram[index] += point.getMapped(); - - fhistogram[index] += 1; + fhistogram[index] += 1; + } + else + { + /// In case of overflow, just saturate + histogram[index] = std::numeric_limits::max(); + } } for (size_t i = 0; i < histogram.size(); ++i) @@ -185,39 +194,27 @@ private: histogram[i] /= fhistogram[i]; } - Y y_min = std::numeric_limits::max(); - Y y_max = std::numeric_limits::min(); - bool has_valid_y = false; + Y y_max = 0; for (auto & y : histogram) { if (isNaN(y) || y <= 0) continue; - has_valid_y = true; - y_min = std::min(y_min, y); y_max = std::max(y_max, y); } - if (!has_valid_y) + if (y_max == 0) { values.push_back('\0'); offsets.push_back(offsets.empty() ? 1 : offsets.back() + 1); return; } - if (y_min >= y_max) - { - size_t sz = updateFrame(values, 1); - values.push_back('\0'); - offsets.push_back(offsets.empty() ? sz + 1 : offsets.back() + sz + 1); - return; - } - for (auto & y : histogram) { if (isNaN(y) || y <= 0) y = 0; else - y = (y - y_min) * 7 / (y_max - y_min) + 1; + y = y * 7 / y_max + 1; } size_t sz = 0; From 18e699f459ae60e9246401bc68311dad76c74ef6 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 3 Feb 2023 17:22:32 +0000 Subject: [PATCH 6/8] Add testcases to 02016_aggregation_spark_bar --- .../02016_aggregation_spark_bar.reference | 21 +++++++++++++- .../02016_aggregation_spark_bar.sql | 29 +++++++++++++++++-- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02016_aggregation_spark_bar.reference b/tests/queries/0_stateless/02016_aggregation_spark_bar.reference index 5a38f08a106..534942fc1d5 100644 --- a/tests/queries/0_stateless/02016_aggregation_spark_bar.reference +++ b/tests/queries/0_stateless/02016_aggregation_spark_bar.reference @@ -44,7 +44,26 @@ SELECT sparkbar(4,toDate('2020-01-01'),toDate('2020-01-08'))(event_date,cnt) FRO ▃▄▆█ SELECT sparkbar(5,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_test; ▃▄▆█ -DROP TABLE IF EXISTS spark_bar_test; +SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_test; +▂▅▂▃▇▆█ WITH number DIV 50 AS k, number % 50 AS value SELECT k, sparkbar(50, 0, 99)(number, value) FROM numbers(100) GROUP BY k ORDER BY k; 0 ▁▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇█ 1 ▁▁▁▁▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇█ +SELECT sparkbar(128, 0, 9223372036854775806)(toUInt64(9223372036854775806), number % 65535) FROM numbers(100); + █ +SELECT sparkbar(128)(toUInt64(9223372036854775806), number % 65535) FROM numbers(100); +█ +SELECT sparkbar(9)(x, y) FROM (SELECT * FROM Values('x UInt64, y UInt8', (18446744073709551615,255), (0,0), (0,0), (4036797895307271799,254))); + ▇ █ +SELECT sparkbar(8, 0, 7)((number + 1) % 8, 1), sparkbar(8, 0, 7)((number + 2) % 8, 1), sparkbar(8, 0, 7)((number + 3) % 8, 1) FROM numbers(7); + ███████ █ ██████ ██ █████ +SELECT sparkbar(2)(number, -number) FROM numbers(10); + +SELECT sparkbar(10)(number, number - 7) FROM numbers(10); + ▄█ +SELECT sparkbar(1024)(number, number) FROM numbers(1024); + ▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█ +SELECT sparkbar(1024)(number, 1) FROM numbers(1024); +████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████ +SELECT sparkbar(1024)(number, 0) FROM numbers(1024); + diff --git a/tests/queries/0_stateless/02016_aggregation_spark_bar.sql b/tests/queries/0_stateless/02016_aggregation_spark_bar.sql index 86c5d6977a5..2100a3dd4a6 100644 --- a/tests/queries/0_stateless/02016_aggregation_spark_bar.sql +++ b/tests/queries/0_stateless/02016_aggregation_spark_bar.sql @@ -31,7 +31,32 @@ SELECT sparkbar(3,toDate('2020-01-01'),toDate('2020-01-09'))(event_date,cnt) FRO SELECT sparkbar(3,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(4,toDate('2020-01-01'),toDate('2020-01-08'))(event_date,cnt) FROM spark_bar_test; SELECT sparkbar(5,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_test; - -DROP TABLE IF EXISTS spark_bar_test; +SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_test; WITH number DIV 50 AS k, number % 50 AS value SELECT k, sparkbar(50, 0, 99)(number, value) FROM numbers(100) GROUP BY k ORDER BY k; + +SELECT sparkbar(128, 0, 9223372036854775806)(toUInt64(9223372036854775806), number % 65535) FROM numbers(100); +SELECT sparkbar(128)(toUInt64(9223372036854775806), number % 65535) FROM numbers(100); +SELECT sparkbar(9)(x, y) FROM (SELECT * FROM Values('x UInt64, y UInt8', (18446744073709551615,255), (0,0), (0,0), (4036797895307271799,254))); + +SELECT sparkbar(8, 0, 7)((number + 1) % 8, 1), sparkbar(8, 0, 7)((number + 2) % 8, 1), sparkbar(8, 0, 7)((number + 3) % 8, 1) FROM numbers(7); + +SELECT sparkbar(2)(number, -number) FROM numbers(10); +SELECT sparkbar(10)(number, number - 7) FROM numbers(10); +SELECT sparkbar(1024)(number, number) FROM numbers(1024); +SELECT sparkbar(1024)(number, 1) FROM numbers(1024); +SELECT sparkbar(1024)(number, 0) FROM numbers(1024); + +-- { echoOff } + +SELECT sparkbar(0)(number, number) FROM numbers(10); -- { serverError BAD_ARGUMENTS } +SELECT sparkbar(1)(number, number) FROM numbers(10); -- { serverError BAD_ARGUMENTS } +SELECT sparkbar(1025)(number, number) FROM numbers(10); -- { serverError BAD_ARGUMENTS } +SELECT sparkbar(2, 10, 9)(number, number) FROM numbers(10); -- { serverError BAD_ARGUMENTS } +SELECT sparkbar(2, -5, -1)(number, number) FROM numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT sparkbar(2, -5, 1)(number, number) FROM numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT sparkbar(2)(toInt32(number), number) FROM numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT sparkbar(2, 0)(number, number) FROM numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT sparkbar(2, 0, 5, 8)(number, number) FROM numbers(10); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +DROP TABLE IF EXISTS spark_bar_test; From 6e0d5e41508c1a6ddbe75f6a76e461a32cb07ffb Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 3 Feb 2023 17:23:10 +0000 Subject: [PATCH 7/8] Update doc for sparkbar function --- .../aggregate-functions/reference/sparkbar.md | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md index 2026d086375..12da9be2847 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md @@ -1,5 +1,5 @@ --- -slug: /en/sql-reference/aggregate-functions/reference/sparkbar +slug: /en/sql-reference/aggregate-functions/reference/sparkbar sidebar_position: 311 sidebar_label: sparkbar --- @@ -7,9 +7,11 @@ sidebar_label: sparkbar # sparkbar The function plots a frequency histogram for values `x` and the repetition rate `y` of these values over the interval `[min_x, max_x]`. +Repetitions for all `x` falling into the same bucket are averaged, so data should be pre-aggregated. +Negative repetitions are ignored. - -If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end. +If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end. +Otherwise, values outside the interval are ignored. **Syntax** @@ -37,29 +39,24 @@ sparkbar(width[, min_x, max_x])(x, y) Query: ``` sql -CREATE TABLE spark_bar_data (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree ORDER BY event_date SETTINGS index_granularity = 8192; - -INSERT INTO spark_bar_data VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11'); +CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date; -SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_data; +INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11'); -SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_data; +SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date); + +SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date); ``` Result: ``` text - ┌─sparkbar(9)(event_date, cnt)─┐ -│ │ -│ ▁▅▄▃██▅ ▁ │ -│ │ +│ ▂▅▂▃▆█ ▂ │ └──────────────────────────────┘ ┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐ -│ │ -│▁▄▄▂▅▇█▁ │ -│ │ +│ ▂▅▂▃▇▆█ │ └──────────────────────────────────────────────────────────────────────────┘ ``` From e175b72d79e4cfb275c20f69aad33dbb59837545 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 3 Feb 2023 17:25:28 +0000 Subject: [PATCH 8/8] Update ru doc for sparkbar function --- .../aggregate-functions/reference/sparkbar.md | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md index 7a9fc033542..958a4bd3504 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md @@ -1,14 +1,15 @@ --- -slug: /ru/sql-reference/aggregate-functions/reference/sparkbar +slug: /ru/sql-reference/aggregate-functions/reference/sparkbar sidebar_position: 311 sidebar_label: sparkbar --- # sparkbar {#sparkbar} -Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`. +Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`. Повторения для всех `x`, попавших в один бакет, усредняются, поэтому данные должны быть предварительно агрегированы. Отрицательные повторения игнорируются. Если интервал для построения не указан, то в качестве нижней границы интервала будет взято минимальное значение `x`, а в качестве верхней границы — максимальное значение `x`. +Значения `x` вне указанного интервала игнорируются. **Синтаксис** @@ -39,29 +40,23 @@ sparkbar(width[, min_x, max_x])(x, y) Запрос: ``` sql -CREATE TABLE spark_bar_data (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree ORDER BY event_date SETTINGS index_granularity = 8192; - -INSERT INTO spark_bar_data VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11'); +CREATE TABLE spark_bar_data (`value` Int64, `event_date` Date) ENGINE = MergeTree ORDER BY event_date; -SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_data; +INSERT INTO spark_bar_data VALUES (1,'2020-01-01'), (3,'2020-01-02'), (4,'2020-01-02'), (-3,'2020-01-02'), (5,'2020-01-03'), (2,'2020-01-04'), (3,'2020-01-05'), (7,'2020-01-06'), (6,'2020-01-07'), (8,'2020-01-08'), (2,'2020-01-11'); -SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_data; +SELECT sparkbar(9)(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date); + +SELECT sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date,cnt) FROM (SELECT sum(value) as cnt, event_date FROM spark_bar_data GROUP BY event_date); ``` Результат: ``` text - ┌─sparkbar(9)(event_date, cnt)─┐ -│ │ -│ ▁▅▄▃██▅ ▁ │ -│ │ +│ ▂▅▂▃▆█ ▂ │ └──────────────────────────────┘ ┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐ -│ │ -│▁▄▄▂▅▇█▁ │ -│ │ +│ ▂▅▂▃▇▆█ │ └──────────────────────────────────────────────────────────────────────────┘ ``` -