ClickHouse/src/AggregateFunctions/QuantileBFloat16Histogram.h

#pragma once

#include <base/types.h>
#include <base/bit_cast.h>
#include <Common/HashTable/HashMap.h>

#include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h>


namespace DB
{

/** `bfloat16` is a 16-bit floating point data type that is the same as the corresponding most significant 16 bits of the `float`.
  * https://en.wikipedia.org/wiki/Bfloat16_floating-point_format
  *
  * To calculate quantile, simply convert input value to 16 bit (convert to float, then take the most significant 16 bits),
  * and calculate the histogram of these values.
  *
  * Hash table is the preferred way to store histogram, because the number of distinct values is small:
  * ```
  * SELECT uniq(bfloat)
  * FROM
  * (
  *     SELECT
  *         number,
  *         toFloat32(number) AS f,
  *         bitShiftRight(bitAnd(reinterpretAsUInt32(reinterpretAsFixedString(f)), 4294901760) AS cut, 16),
  *         reinterpretAsFloat32(reinterpretAsFixedString(cut)) AS bfloat
  *     FROM numbers(100000000)
  * )
  *
  * ┌─uniq(bfloat)─┐
  * │         2623 │
  * └──────────────┘
  * ```
  * (when increasing the range of values 1000 times, the number of distinct bfloat16 values increases just by 1280).
  *
  * Then calculate quantile from the histogram.
  *
  * This sketch is very simple and rough. Its relative precision is constant 1 / 256 = 0.390625%.
  */
template <typename Value>
struct QuantileBFloat16Histogram
{
    using BFloat16 = UInt16;
    using Weight = UInt64;

    /// Make automatic memory for 16 elements to avoid allocations for small states.
    /// The usage of trivial hash is ok, because we effectively take logarithm of the values and pathological cases are unlikely.
    using Data = HashMapWithStackMemory<BFloat16, Weight, TrivialHash, 4>;

    Data data;

    void add(const Value & x)
    {
        add(x, 1);
    }

    void add(const Value & x, Weight w)
    {
        if (!isNaN(x))
            data[toBFloat16(x)] += w;
    }

    void merge(const QuantileBFloat16Histogram & rhs)
    {
        for (const auto & pair : rhs.data)
            data[pair.getKey()] += pair.getMapped();
    }

    void serialize(WriteBuffer & buf) const
    {
        data.write(buf);
    }

    void deserialize(ReadBuffer & buf)
    {
        data.read(buf);
    }

    Value get(Float64 level) const
    {
        return getImpl<Value>(level);
    }

    void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result) const
    {
        getManyImpl(levels, indices, size, result);
    }

    Float64 getFloat(Float64 level) const
    {
        return getImpl<Float64>(level);
    }

    void getManyFloat(const Float64 * levels, const size_t * indices, size_t size, Float64 * result) const
    {
        getManyImpl(levels, indices, size, result);
    }

private:
    /// Take the most significant 16 bits of the floating point number.
    BFloat16 toBFloat16(const Value & x) const
    {
        return bit_cast<UInt32>(static_cast<Float32>(x)) >> 16;
    }

    /// Put the bits into most significant 16 bits of the floating point number and fill other bits with zeros.
    Float32 toFloat32(const BFloat16 & x) const
    {
        return bit_cast<Float32>(x << 16);
    }

    using Pair = PairNoInit<Float32, Weight>;

    template <typename T>
    T getImpl(Float64 level) const
    {
        size_t size = data.size();

        if (0 == size)
            return std::numeric_limits<T>::quiet_NaN();

        std::unique_ptr<Pair[]> array_holder(new Pair[size]);
        Pair * array = array_holder.get();

        Float64 sum_weight = 0;
        Pair * arr_it = array;
        for (const auto & pair : data)
        {
            sum_weight += pair.getMapped();
            *arr_it = {toFloat32(pair.getKey()), pair.getMapped()};
            ++arr_it;
        }

        std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });

        Float64 threshold = std::ceil(sum_weight * level);
        Float64 accumulated = 0;

        for (const Pair * p = array; p != (array + size); ++p)
        {
            accumulated += p->second;

            if (accumulated >= threshold)
                return p->first;
        }

        return array[size - 1].first;
    }

    template <typename T>
    void getManyImpl(const Float64 * levels, const size_t * indices, size_t num_levels, T * result) const
    {
        size_t size = data.size();

        if (0 == size)
        {
            for (size_t i = 0; i < num_levels; ++i)
                result[i] = std::numeric_limits<T>::quiet_NaN();

            return;
        }

        std::unique_ptr<Pair[]> array_holder(new Pair[size]);
        Pair * array = array_holder.get();

        Float64 sum_weight = 0;
        Pair * arr_it = array;
        for (const auto & pair : data)
        {
            sum_weight += pair.getMapped();
            *arr_it = {toFloat32(pair.getKey()), pair.getMapped()};
            ++arr_it;
        }

        std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });

        size_t level_index = 0;
        Float64 accumulated = 0;
        Float64 threshold = std::ceil(sum_weight * levels[indices[level_index]]);

        for (const Pair * p = array; p != (array + size); ++p)
        {
            accumulated += p->second;

            while (accumulated >= threshold)
            {
                result[indices[level_index]] = p->first;
                ++level_index;

                if (level_index == num_levels)
                    return;

                threshold = std::ceil(sum_weight * levels[indices[level_index]]);
            }
        }

        while (level_index < num_levels)
        {
            result[indices[level_index]] = array[size - 1].first;
            ++level_index;
        }
    }
};

}
add quantileBfloat16 2021-04-14 20:38:56 +00:00			`#pragma once`

Rename "common" to "base" 2021-10-02 07:13:14 +00:00			`#include <base/types.h>`
			`#include <base/bit_cast.h>`
Merge ext into common 2021-06-15 19:55:21 +00:00			`#include <Common/HashTable/HashMap.h>`

add weight and change logic 2021-04-28 14:54:10 +00:00			`#include <IO/ReadBuffer.h>`
			`#include <IO/WriteBuffer.h>`
add quantileBfloat16 2021-04-14 20:38:56 +00:00
Refinements 2021-05-21 06:30:13 +00:00
fix style 2021-04-24 19:11:56 +00:00			`namespace DB`
add quantileBfloat16 2021-04-14 20:38:56 +00:00			`{`
Refinements 2021-05-21 06:30:13 +00:00
			/** `bfloat16` is a 16-bit floating point data type that is the same as the corresponding most significant 16 bits of the `float`.
			`* https://en.wikipedia.org/wiki/Bfloat16_floating-point_format`
			`*`
			`* To calculate quantile, simply convert input value to 16 bit (convert to float, then take the most significant 16 bits),`
			`* and calculate the histogram of these values.`
			`*`
			`* Hash table is the preferred way to store histogram, because the number of distinct values is small:`
			* ```
			`* SELECT uniq(bfloat)`
			`* FROM`
			`* (`
			`* SELECT`
			`* number,`
			`* toFloat32(number) AS f,`
			`* bitShiftRight(bitAnd(reinterpretAsUInt32(reinterpretAsFixedString(f)), 4294901760) AS cut, 16),`
			`* reinterpretAsFloat32(reinterpretAsFixedString(cut)) AS bfloat`
			`* FROM numbers(100000000)`
			`* )`
			`*`
			`* ┌─uniq(bfloat)─┐`
			`* │ 2623 │`
			`* └──────────────┘`
			* ```
			`* (when increasing the range of values 1000 times, the number of distinct bfloat16 values increases just by 1280).`
			`*`
			`* Then calculate quantile from the histogram.`
			`*`
			`* This sketch is very simple and rough. Its relative precision is constant 1 / 256 = 0.390625%.`
			`*/`
fix style 2021-04-24 19:11:56 +00:00			`template <typename Value>`
refactored 2021-04-26 09:39:08 +00:00			`struct QuantileBFloat16Histogram`
add quantileBfloat16 2021-04-14 20:38:56 +00:00			`{`
Refinements 2021-05-21 06:30:13 +00:00			`using BFloat16 = UInt16;`
add weight and change logic 2021-04-28 14:54:10 +00:00			`using Weight = UInt64;`
Add comment 2021-05-21 06:33:00 +00:00
			`/// Make automatic memory for 16 elements to avoid allocations for small states.`
			`/// The usage of trivial hash is ok, because we effectively take logarithm of the values and pathological cases are unlikely.`
Refinements 2021-05-21 06:30:13 +00:00			`using Data = HashMapWithStackMemory<BFloat16, Weight, TrivialHash, 4>;`

add weight and change logic 2021-04-28 14:54:10 +00:00			`Data data;`
add quantileBfloat16 2021-04-14 20:38:56 +00:00
Refinements 2021-05-21 06:30:13 +00:00			`void add(const Value & x)`
			`{`
			`add(x, 1);`
			`}`
add quantileBfloat16 2021-04-14 20:38:56 +00:00
add weight and change logic 2021-04-28 14:54:10 +00:00			`void add(const Value & x, Weight w)`
add quantileBfloat16 2021-04-14 20:38:56 +00:00			`{`
add weight and change logic 2021-04-28 14:54:10 +00:00			`if (!isNaN(x))`
Refinements 2021-05-21 06:30:13 +00:00			`data[toBFloat16(x)] += w;`
add quantileBfloat16 2021-04-14 20:38:56 +00:00			`}`

add weight and change logic 2021-04-28 14:54:10 +00:00			`void merge(const QuantileBFloat16Histogram & rhs)`
			`{`
			`for (const auto & pair : rhs.data)`
			`data[pair.getKey()] += pair.getMapped();`
			`}`
add quantileBfloat16 2021-04-14 20:38:56 +00:00
Refinements 2021-05-21 06:30:13 +00:00			`void serialize(WriteBuffer & buf) const`
			`{`
			`data.write(buf);`
			`}`
add quantileBfloat16 2021-04-14 20:38:56 +00:00
Refinements 2021-05-21 06:30:13 +00:00			`void deserialize(ReadBuffer & buf)`
			`{`
			`data.read(buf);`
			`}`
add quantileBfloat16 2021-04-14 20:38:56 +00:00
Refinements 2021-05-21 06:30:13 +00:00			`Value get(Float64 level) const`
			`{`
			`return getImpl<Value>(level);`
			`}`
add quantileBfloat16 2021-04-14 20:38:56 +00:00
add weight and change logic 2021-04-28 14:54:10 +00:00			`void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result) const`
add quantileBfloat16 2021-04-14 20:38:56 +00:00			`{`
add weight and change logic 2021-04-28 14:54:10 +00:00			`getManyImpl(levels, indices, size, result);`
add quantileBfloat16 2021-04-14 20:38:56 +00:00			`}`

Refinements 2021-05-21 06:30:13 +00:00			`Float64 getFloat(Float64 level) const`
			`{`
			`return getImpl<Float64>(level);`
			`}`
add quantileBfloat16 2021-04-14 20:38:56 +00:00
add weight and change logic 2021-04-28 14:54:10 +00:00			`void getManyFloat(const Float64 * levels, const size_t * indices, size_t size, Float64 * result) const`
add quantileBfloat16 2021-04-14 20:38:56 +00:00			`{`
add weight and change logic 2021-04-28 14:54:10 +00:00			`getManyImpl(levels, indices, size, result);`
			`}`

			`private:`
Refinements 2021-05-21 06:30:13 +00:00			`/// Take the most significant 16 bits of the floating point number.`
			`BFloat16 toBFloat16(const Value & x) const`
			`{`
Merge ext into common 2021-06-15 19:55:21 +00:00			`return bit_cast<UInt32>(static_cast<Float32>(x)) >> 16;`
Refinements 2021-05-21 06:30:13 +00:00			`}`
add weight and change logic 2021-04-28 14:54:10 +00:00
Refinements 2021-05-21 06:30:13 +00:00			`/// Put the bits into most significant 16 bits of the floating point number and fill other bits with zeros.`
			`Float32 toFloat32(const BFloat16 & x) const`
			`{`
Merge ext into common 2021-06-15 19:55:21 +00:00			`return bit_cast<Float32>(x << 16);`
Refinements 2021-05-21 06:30:13 +00:00			`}`
add weight and change logic 2021-04-28 14:54:10 +00:00
			`using Pair = PairNoInit<Float32, Weight>;`

			`template <typename T>`
			`T getImpl(Float64 level) const`
			`{`
			`size_t size = data.size();`

			`if (0 == size)`
			`return std::numeric_limits<T>::quiet_NaN();`

			`std::unique_ptr<Pair[]> array_holder(new Pair[size]);`
			`Pair * array = array_holder.get();`

			`Float64 sum_weight = 0;`
			`Pair * arr_it = array;`
			`for (const auto & pair : data)`
			`{`
			`sum_weight += pair.getMapped();`
Refinements 2021-05-21 06:30:13 +00:00			`*arr_it = {toFloat32(pair.getKey()), pair.getMapped()};`
add weight and change logic 2021-04-28 14:54:10 +00:00			`++arr_it;`
			`}`

			`std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });`

			`Float64 threshold = std::ceil(sum_weight * level);`
			`Float64 accumulated = 0;`

			`for (const Pair * p = array; p != (array + size); ++p)`
			`{`
			`accumulated += p->second;`

			`if (accumulated >= threshold)`
			`return p->first;`
			`}`

			`return array[size - 1].first;`
			`}`

			`template <typename T>`
			`void getManyImpl(const Float64 * levels, const size_t * indices, size_t num_levels, T * result) const`
			`{`
			`size_t size = data.size();`

			`if (0 == size)`
			`{`
			`for (size_t i = 0; i < num_levels; ++i)`
			`result[i] = std::numeric_limits<T>::quiet_NaN();`

			`return;`
			`}`

			`std::unique_ptr<Pair[]> array_holder(new Pair[size]);`
			`Pair * array = array_holder.get();`

			`Float64 sum_weight = 0;`
			`Pair * arr_it = array;`
			`for (const auto & pair : data)`
			`{`
			`sum_weight += pair.getMapped();`
Refinements 2021-05-21 06:30:13 +00:00			`*arr_it = {toFloat32(pair.getKey()), pair.getMapped()};`
add weight and change logic 2021-04-28 14:54:10 +00:00			`++arr_it;`
			`}`

			`std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });`

			`size_t level_index = 0;`
			`Float64 accumulated = 0;`
			`Float64 threshold = std::ceil(sum_weight * levels[indices[level_index]]);`

			`for (const Pair * p = array; p != (array + size); ++p)`
			`{`
			`accumulated += p->second;`

			`while (accumulated >= threshold)`
			`{`
			`result[indices[level_index]] = p->first;`
			`++level_index;`

			`if (level_index == num_levels)`
			`return;`

			`threshold = std::ceil(sum_weight * levels[indices[level_index]]);`
			`}`
			`}`

			`while (level_index < num_levels)`
			`{`
			`result[indices[level_index]] = array[size - 1].first;`
			`++level_index;`
			`}`
add quantileBfloat16 2021-04-14 20:38:56 +00:00			`}`
			`};`

fix styles 2021-04-14 21:06:22 +00:00			`}`