Merge pull request #23204 from RedClusive/feature-quantileBfloat16

Feature quantile bfloat16
2024-11-21 15:12:02 +00:00 · 2021-05-21 23:46:05 +03:00 · 2021-05-21 23:46:05 +03:00 · 7b38ad3a85
commit 7b38ad3a85
parent 89d4d0d5e6 2b01b46b25
11 changed files with 322 additions and 10 deletions
--- a/src/AggregateFunctions/AggregateFunctionQuantile.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.cpp
@ -52,6 +52,9 @@ template <typename Value, bool float_return> using FuncQuantilesTDigest = Aggreg
 template <typename Value, bool float_return> using FuncQuantileTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, false>;
 template <typename Value, bool float_return> using FuncQuantilesTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, true>;

+template <typename Value, bool float_return> using FuncQuantileBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantileBFloat16, false, std::conditional_t<float_return, Float64, void>, false>;
+template <typename Value, bool float_return> using FuncQuantilesBFloat16 = AggregateFunctionQuantile<Value, QuantileBFloat16Histogram<Value>, NameQuantilesBFloat16, false, std::conditional_t<float_return, Float64, void>, true>;
+

 template <template <typename, bool> class Function>
 static constexpr bool supportDecimal()
@ -156,6 +159,9 @@ void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory)
    factory.registerFunction(NameQuantileTDigestWeighted::name, createAggregateFunctionQuantile<FuncQuantileTDigestWeighted>);
    factory.registerFunction(NameQuantilesTDigestWeighted::name, createAggregateFunctionQuantile<FuncQuantilesTDigestWeighted>);

+    factory.registerFunction(NameQuantileBFloat16::name, createAggregateFunctionQuantile<FuncQuantileBFloat16>);
+    factory.registerFunction(NameQuantilesBFloat16::name, createAggregateFunctionQuantile<FuncQuantilesBFloat16>);
+
    /// 'median' is an alias for 'quantile'
    factory.registerAlias("median", NameQuantile::name);
    factory.registerAlias("medianDeterministic", NameQuantileDeterministic::name);
@ -167,6 +173,7 @@ void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory)
    factory.registerAlias("medianTimingWeighted", NameQuantileTimingWeighted::name);
    factory.registerAlias("medianTDigest", NameQuantileTDigest::name);
    factory.registerAlias("medianTDigestWeighted", NameQuantileTDigestWeighted::name);
+    factory.registerAlias("medianBFloat16", NameQuantileBFloat16::name);
 }

 }
--- a/src/AggregateFunctions/AggregateFunctionQuantile.h
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.h
@ -9,6 +9,7 @@
 #include <AggregateFunctions/QuantileExactWeighted.h>
 #include <AggregateFunctions/QuantileTiming.h>
 #include <AggregateFunctions/QuantileTDigest.h>
+#include <AggregateFunctions/QuantileBFloat16Histogram.h>

 #include <AggregateFunctions/IAggregateFunction.h>
 #include <AggregateFunctions/QuantilesCommon.h>
@ -228,4 +229,7 @@ struct NameQuantileTDigestWeighted { static constexpr auto name = "quantileTDige
 struct NameQuantilesTDigest { static constexpr auto name = "quantilesTDigest"; };
 struct NameQuantilesTDigestWeighted { static constexpr auto name = "quantilesTDigestWeighted"; };

+struct NameQuantileBFloat16 { static constexpr auto name = "quantileBFloat16"; };
+struct NameQuantilesBFloat16 { static constexpr auto name = "quantilesBFloat16"; };
+
 }
--- a/src/AggregateFunctions/QuantileBFloat16Histogram.h
+++ b/src/AggregateFunctions/QuantileBFloat16Histogram.h
@ -0,0 +1,207 @@
+#pragma once
+
+#include <IO/ReadBuffer.h>
+#include <IO/WriteBuffer.h>
+#include <Common/HashTable/HashMap.h>
+#include <common/types.h>
+#include <ext/bit_cast.h>
+
+
+namespace DB
+{
+
+/** `bfloat16` is a 16-bit floating point data type that is the same as the corresponding most significant 16 bits of the `float`.
+  * https://en.wikipedia.org/wiki/Bfloat16_floating-point_format
+  *
+  * To calculate quantile, simply convert input value to 16 bit (convert to float, then take the most significant 16 bits),
+  * and calculate the histogram of these values.
+  *
+  * Hash table is the preferred way to store histogram, because the number of distinct values is small:
+  * ```
+  * SELECT uniq(bfloat)
+  * FROM
+  * (
+  *     SELECT
+  *         number,
+  *         toFloat32(number) AS f,
+  *         bitShiftRight(bitAnd(reinterpretAsUInt32(reinterpretAsFixedString(f)), 4294901760) AS cut, 16),
+  *         reinterpretAsFloat32(reinterpretAsFixedString(cut)) AS bfloat
+  *     FROM numbers(100000000)
+  * )
+  *
+  * ┌─uniq(bfloat)─┐
+  * │         2623 │
+  * └──────────────┘
+  * ```
+  * (when increasing the range of values 1000 times, the number of distinct bfloat16 values increases just by 1280).
+  *
+  * Then calculate quantile from the histogram.
+  *
+  * This sketch is very simple and rough. Its relative precision is constant 1 / 256 = 0.390625%.
+  */
+template <typename Value>
+struct QuantileBFloat16Histogram
+{
+    using BFloat16 = UInt16;
+    using Weight = UInt64;
+
+    /// Make automatic memory for 16 elements to avoid allocations for small states.
+    /// The usage of trivial hash is ok, because we effectively take logarithm of the values and pathological cases are unlikely.
+    using Data = HashMapWithStackMemory<BFloat16, Weight, TrivialHash, 4>;
+
+    Data data;
+
+    void add(const Value & x)
+    {
+        add(x, 1);
+    }
+
+    void add(const Value & x, Weight w)
+    {
+        if (!isNaN(x))
+            data[toBFloat16(x)] += w;
+    }
+
+    void merge(const QuantileBFloat16Histogram & rhs)
+    {
+        for (const auto & pair : rhs.data)
+            data[pair.getKey()] += pair.getMapped();
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        data.write(buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        data.read(buf);
+    }
+
+    Value get(Float64 level) const
+    {
+        return getImpl<Value>(level);
+    }
+
+    void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result) const
+    {
+        getManyImpl(levels, indices, size, result);
+    }
+
+    Float64 getFloat(Float64 level) const
+    {
+        return getImpl<Float64>(level);
+    }
+
+    void getManyFloat(const Float64 * levels, const size_t * indices, size_t size, Float64 * result) const
+    {
+        getManyImpl(levels, indices, size, result);
+    }
+
+private:
+    /// Take the most significant 16 bits of the floating point number.
+    BFloat16 toBFloat16(const Value & x) const
+    {
+        return ext::bit_cast<UInt32>(static_cast<Float32>(x)) >> 16;
+    }
+
+    /// Put the bits into most significant 16 bits of the floating point number and fill other bits with zeros.
+    Float32 toFloat32(const BFloat16 & x) const
+    {
+        return ext::bit_cast<Float32>(x << 16);
+    }
+
+    using Pair = PairNoInit<Float32, Weight>;
+
+    template <typename T>
+    T getImpl(Float64 level) const
+    {
+        size_t size = data.size();
+
+        if (0 == size)
+            return std::numeric_limits<T>::quiet_NaN();
+
+        std::unique_ptr<Pair[]> array_holder(new Pair[size]);
+        Pair * array = array_holder.get();
+
+        Float64 sum_weight = 0;
+        Pair * arr_it = array;
+        for (const auto & pair : data)
+        {
+            sum_weight += pair.getMapped();
+            *arr_it = {toFloat32(pair.getKey()), pair.getMapped()};
+            ++arr_it;
+        }
+
+        std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
+
+        Float64 threshold = std::ceil(sum_weight * level);
+        Float64 accumulated = 0;
+
+        for (const Pair * p = array; p != (array + size); ++p)
+        {
+            accumulated += p->second;
+
+            if (accumulated >= threshold)
+                return p->first;
+        }
+
+        return array[size - 1].first;
+    }
+
+    template <typename T>
+    void getManyImpl(const Float64 * levels, const size_t * indices, size_t num_levels, T * result) const
+    {
+        size_t size = data.size();
+
+        if (0 == size)
+        {
+            for (size_t i = 0; i < num_levels; ++i)
+                result[i] = std::numeric_limits<T>::quiet_NaN();
+
+            return;
+        }
+
+        std::unique_ptr<Pair[]> array_holder(new Pair[size]);
+        Pair * array = array_holder.get();
+
+        Float64 sum_weight = 0;
+        Pair * arr_it = array;
+        for (const auto & pair : data)
+        {
+            sum_weight += pair.getMapped();
+            *arr_it = {toFloat32(pair.getKey()), pair.getMapped()};
+            ++arr_it;
+        }
+
+        std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
+
+        size_t level_index = 0;
+        Float64 accumulated = 0;
+        Float64 threshold = std::ceil(sum_weight * levels[indices[level_index]]);
+
+        for (const Pair * p = array; p != (array + size); ++p)
+        {
+            accumulated += p->second;
+
+            while (accumulated >= threshold)
+            {
+                result[indices[level_index]] = p->first;
+                ++level_index;
+
+                if (level_index == num_levels)
+                    return;
+
+                threshold = std::ceil(sum_weight * levels[indices[level_index]]);
+            }
+        }
+
+        while (level_index < num_levels)
+        {
+            result[indices[level_index]] = array[size - 1].first;
+            ++level_index;
+        }
+    }
+};
+
+}
--- a/tests/performance/quantile.xml
+++ b/tests/performance/quantile.xml
@ -0,0 +1,29 @@
+<test>
+    <preconditions>
+        <table_exists>hits_100m_single</table_exists>
+    </preconditions>
+
+    <substitutions>
+        <substitution>
+           <name>key</name>
+           <values>
+               <value>SearchEngineID</value>
+               <value>RegionID</value>
+               <value>SearchPhrase</value>
+               <value>ClientIP</value>
+           </values>
+        </substitution>
+        <substitution>
+           <name>func</name>
+           <values>
+               <value>quantile</value>
+               <value>quantileExact</value>
+               <value>quantileTDigest</value>
+               <value>quantileTiming</value>
+               <value>quantileBFloat16</value>
+           </values>
+       </substitution>
+    </substitutions>
+
+    <query>SELECT {key} AS k, {func}(ResolutionWidth) FROM hits_100m_single GROUP BY k FORMAT Null</query>
+</test>
--- a/tests/performance/uniq.xml
+++ b/tests/performance/uniq.xml
@ -1,12 +1,9 @@
 <test>
-
    <preconditions>
        <table_exists>hits_100m_single</table_exists>
        <ram_size>30000000000</ram_size>
    </preconditions>

-
-
    <settings>
        <max_memory_usage>30000000000</max_memory_usage>
        <!--
@ -36,7 +33,7 @@
               <value>SearchPhrase</value>
               <value>ClientIP</value>
           </values>
-       </substitution>
+        </substitution>
        <substitution>
           <name>func</name>
           <values>
--- a/tests/queries/0_stateless/00753_quantile_format.reference
+++ b/tests/queries/0_stateless/00753_quantile_format.reference
@ -10,7 +10,9 @@
 [30000]
 30000
 [30000]
-2016-06-15 23:01:04
-['2016-06-15 23:01:04']
-2016-06-15 23:01:04
-['2016-06-15 23:01:04']
+2016-06-15 23:00:16
+['2016-06-15 23:00:16']
+2016-06-15 23:00:16
+['2016-06-15 23:00:16']
+2016-04-02 17:23:12
+['2016-04-02 17:23:12']
--- a/tests/queries/0_stateless/00753_quantile_format.sql
+++ b/tests/queries/0_stateless/00753_quantile_format.sql
@ -1,7 +1,7 @@
 DROP TABLE IF EXISTS datetime;

-CREATE TABLE datetime (d DateTime) ENGINE = Memory;
-INSERT INTO datetime(d) VALUES(toDateTime('2016-06-15 23:00:00'));
+CREATE TABLE datetime (d DateTime('UTC')) ENGINE = Memory;
+INSERT INTO datetime(d) VALUES(toDateTime('2016-06-15 23:00:00', 'UTC'));

 SELECT quantile(0.2)(d) FROM datetime;
 SELECT quantiles(0.2)(d) FROM datetime;
@ -27,4 +27,7 @@ SELECT quantilesTDigest(0.2)(d) FROM datetime;
 SELECT quantileTDigestWeighted(0.2)(d, 1) FROM datetime;
 SELECT quantilesTDigestWeighted(0.2)(d, 1) FROM datetime;

+SELECT quantileBFloat16(0.2)(d) FROM datetime;
+SELECT quantilesBFloat16(0.2)(d) FROM datetime;
+
 DROP TABLE datetime;
--- a/tests/queries/0_stateless/01813_quantileBfloat16_nans.reference
+++ b/tests/queries/0_stateless/01813_quantileBfloat16_nans.reference
@ -0,0 +1 @@
+1
--- a/tests/queries/0_stateless/01813_quantileBfloat16_nans.sql
+++ b/tests/queries/0_stateless/01813_quantileBfloat16_nans.sql
@ -0,0 +1,16 @@
+SELECT DISTINCT
+    eq
+FROM
+    (
+        WITH
+            range(2 + number % 10) AS arr, -- minimum two elements, to avoid nan result --
+            arrayMap(x -> x = intDiv(number, 10) ? nan : x, arr) AS arr_with_nan,
+            arrayFilter(x -> x != intDiv(number, 10), arr) AS arr_filtered
+        SELECT
+            number,
+            arrayReduce('quantileBFloat16', arr_with_nan) AS q1,
+            arrayReduce('quantileBFloat16', arr_filtered) AS q2,
+            q1 = q2 AS eq
+        FROM
+            numbers(100)
+    );
--- a/tests/queries/1_stateful/00164_quantileBfloat16.reference
+++ b/tests/queries/1_stateful/00164_quantileBfloat16.reference
@ -0,0 +1,40 @@
+1704509	1384
+732797	1336
+598875	1384
+792887	1336
+3807842	1336
+25703952	1336
+716829	1384
+59183	1336
+33010362	1336
+800784	1336
+1704509	[1296,1384,1840,1960,3696]
+732797	[1232,1336,1840,1944,3664]
+598875	[1232,1384,1840,1944,3536]
+792887	[1296,1336,1840,1888,3696]
+3807842	[1232,1336,1840,1936,2032]
+25703952	[1012,1336,1840,1944,3696]
+716829	[1232,1384,1840,1944,3696]
+59183	[316,1336,1840,2008,2032]
+33010362	[1232,1336,1840,1936,2032]
+800784	[1232,1336,1840,1928,2032]
+1704509	1384
+732797	1336
+598875	1384
+792887	1336
+3807842	1336
+25703952	1336
+716829	1384
+59183	1336
+33010362	1336
+800784	1336
+1704509	[1296,1384,1840,1960,3696]
+732797	[1232,1336,1840,1944,3664]
+598875	[1232,1384,1840,1944,3536]
+792887	[1296,1336,1840,1888,3696]
+3807842	[1232,1336,1840,1936,2032]
+25703952	[1012,1336,1840,1944,3696]
+716829	[1232,1384,1840,1944,3696]
+59183	[316,1336,1840,2008,2032]
+33010362	[1232,1336,1840,1936,2032]
+800784	[1232,1336,1840,1928,2032]
--- a/tests/queries/1_stateful/00164_quantileBfloat16.sql
+++ b/tests/queries/1_stateful/00164_quantileBfloat16.sql
@ -0,0 +1,6 @@
+SELECT CounterID AS k, quantileBFloat16(0.5)(ResolutionWidth) FROM test.hits GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10;
+SELECT CounterID AS k, quantilesBFloat16(0.1, 0.5, 0.9, 0.99, 0.999)(ResolutionWidth) FROM test.hits GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10;
+
+
+SELECT CounterID AS k, quantileBFloat16(0.5)(ResolutionWidth) FROM remote('127.0.0.{1,2}', test.hits) GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10;
+SELECT CounterID AS k, quantilesBFloat16(0.1, 0.5, 0.9, 0.99, 0.999)(ResolutionWidth) FROM remote('127.0.0.{1,2}', test.hits) GROUP BY k ORDER BY count() DESC, CounterID LIMIT 10;