ClickHouse/dbms/src/AggregateFunctions/QuantileExact.h

#pragma once

#include <Common/PODArray.h>
#include <Common/NaNUtils.h>
#include <Core/Types.h>
#include <IO/WriteBuffer.h>
#include <IO/ReadBuffer.h>
#include <IO/VarInt.h>


namespace DB
{

namespace ErrorCodes
{
    extern const int NOT_IMPLEMENTED;
}

/** Calculates quantile by collecting all values into array
  *  and applying n-th element (introselect) algorithm for the resulting array.
  *
  * It uses O(N) memory and it is very inefficient in case of high amount of identical values.
  * But it is very CPU efficient for not large datasets.
  */
template <typename Value>
struct QuantileExact
{
    /// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.
    static constexpr size_t bytes_in_arena = 64 - sizeof(PODArray<Value>);
    using Array = PODArrayWithStackMemory<Value, bytes_in_arena>;
    Array array;

    void add(const Value & x)
    {
        /// We must skip NaNs as they are not compatible with comparison sorting.
        if (!isNaN(x))
            array.push_back(x);
    }

    template <typename Weight>
    void add(const Value &, const Weight &)
    {
        throw Exception("Method add with weight is not implemented for QuantileExact", ErrorCodes::NOT_IMPLEMENTED);
    }

    void merge(const QuantileExact & rhs)
    {
        array.insert(rhs.array.begin(), rhs.array.end());
    }

    void serialize(WriteBuffer & buf) const
    {
        size_t size = array.size();
        writeVarUInt(size, buf);
        buf.write(reinterpret_cast<const char *>(array.data()), size * sizeof(array[0]));
    }

    void deserialize(ReadBuffer & buf)
    {
        size_t size = 0;
        readVarUInt(size, buf);
        array.resize(size);
        buf.read(reinterpret_cast<char *>(array.data()), size * sizeof(array[0]));
    }

    /// Get the value of the `level` quantile. The level must be between 0 and 1.
    Value get(Float64 level)
    {
        if (!array.empty())
        {
            size_t n = level < 1
                ? level * array.size()
                : (array.size() - 1);

            std::nth_element(array.begin(), array.begin() + n, array.end());    /// NOTE You can think of the radix-select algorithm.
            return array[n];
        }

        return std::numeric_limits<Value>::quiet_NaN();
    }

    /// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
    /// indices - an array of index levels such that the corresponding elements will go in ascending order.
    void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)
    {
        if (!array.empty())
        {
            size_t prev_n = 0;
            for (size_t i = 0; i < size; ++i)
            {
                auto level = levels[indices[i]];

                size_t n = level < 1
                    ? level * array.size()
                    : (array.size() - 1);

                std::nth_element(array.begin() + prev_n, array.begin() + n, array.end());

                result[indices[i]] = array[n];
                prev_n = n;
            }
        }
        else
        {
            for (size_t i = 0; i < size; ++i)
                result[i] = Value();
        }
    }

    /// The same, but in the case of an empty state, NaN is returned.
    Float64 getFloat(Float64) const
    {
        throw Exception("Method getFloat is not implemented for QuantileExact", ErrorCodes::NOT_IMPLEMENTED);
    }

    void getManyFloat(const Float64 *, const size_t *, size_t, Float64 *) const
    {
        throw Exception("Method getManyFloat is not implemented for QuantileExact", ErrorCodes::NOT_IMPLEMENTED);
    }
};

}
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 07:36:30 +00:00			`#pragma once`

Fix missing includes 2017-12-26 19:00:20 +00:00			`#include <Common/PODArray.h>`
Fixed handling of NaNs in aggregate functions that use comparison based sorting #2012 2018-03-14 05:03:51 +00:00			`#include <Common/NaNUtils.h>`
Grouped includes (40 of 182) 2019-02-10 17:40:52 +00:00			`#include <Core/Types.h>`
Fix missing includes 2017-12-26 19:00:20 +00:00			`#include <IO/WriteBuffer.h>`
			`#include <IO/ReadBuffer.h>`
			`#include <IO/VarInt.h>`

Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 07:36:30 +00:00
			`namespace DB`
			`{`

			`namespace ErrorCodes`
			`{`
			`extern const int NOT_IMPLEMENTED;`
			`}`

Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 20:25:22 +00:00			`/** Calculates quantile by collecting all values into array`
			`* and applying n-th element (introselect) algorithm for the resulting array.`
			`*`
Aggregate function for entropy 2019-02-02 14:27:43 +00:00			`* It uses O(N) memory and it is very inefficient in case of high amount of identical values.`
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 20:25:22 +00:00			`* But it is very CPU efficient for not large datasets.`
			`*/`
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 07:36:30 +00:00			`template <typename Value>`
			`struct QuantileExact`
			`{`
			`/// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.`
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 08:39:21 +00:00			`static constexpr size_t bytes_in_arena = 64 - sizeof(PODArray<Value>);`
Fix initial size of some inline PODArray's. A template parameter of PODArray named INITIAL_SIZE didn't make its units clear, which made some callers to erroneously assume that it specifies the number of elements and not the number of bytes. Rename it, fix the wrong usages and, where possible, use the PODArrayWithStackMemory typedef for arrays with inline memory. 2019-06-28 12:51:01 +00:00			`using Array = PODArrayWithStackMemory<Value, bytes_in_arena>;`
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 07:36:30 +00:00			`Array array;`

			`void add(const Value & x)`
			`{`
Fixed handling of NaNs in aggregate functions that use comparison based sorting #2012 2018-03-14 05:03:51 +00:00			`/// We must skip NaNs as they are not compatible with comparison sorting.`
			`if (!isNaN(x))`
			`array.push_back(x);`
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 07:36:30 +00:00			`}`

			`template <typename Weight>`
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 08:39:21 +00:00			`void add(const Value &, const Weight &)`
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 07:36:30 +00:00			`{`
			`throw Exception("Method add with weight is not implemented for QuantileExact", ErrorCodes::NOT_IMPLEMENTED);`
			`}`

			`void merge(const QuantileExact & rhs)`
			`{`
			`array.insert(rhs.array.begin(), rhs.array.end());`
			`}`

			`void serialize(WriteBuffer & buf) const`
			`{`
			`size_t size = array.size();`
			`writeVarUInt(size, buf);`
Avoid temporary nullptr references [#CLICKHOUSE-2] 2018-09-02 03:00:04 +00:00			`buf.write(reinterpret_cast<const char >(array.data()), size sizeof(array[0]));`
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 07:36:30 +00:00			`}`

			`void deserialize(ReadBuffer & buf)`
			`{`
			`size_t size = 0;`
			`readVarUInt(size, buf);`
			`array.resize(size);`
Avoid temporary nullptr references [#CLICKHOUSE-2] 2018-09-02 03:00:04 +00:00			`buf.read(reinterpret_cast<char >(array.data()), size sizeof(array[0]));`
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 07:36:30 +00:00			`}`

			/// Get the value of the `level` quantile. The level must be between 0 and 1.
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 08:49:56 +00:00			`Value get(Float64 level)`
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 07:36:30 +00:00			`{`
			`if (!array.empty())`
			`{`
			`size_t n = level < 1`
			`? level * array.size()`
			`: (array.size() - 1);`

			`std::nth_element(array.begin(), array.begin() + n, array.end()); /// NOTE You can think of the radix-select algorithm.`
			`return array[n];`
			`}`

return nan when quantileExact with empty float column 2018-08-13 08:33:51 +00:00			`return std::numeric_limits<Value>::quiet_NaN();`
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 07:36:30 +00:00			`}`

			/// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
			`/// indices - an array of index levels such that the corresponding elements will go in ascending order.`
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 08:49:56 +00:00			`void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result)`
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 07:36:30 +00:00			`{`
			`if (!array.empty())`
			`{`
			`size_t prev_n = 0;`
			`for (size_t i = 0; i < size; ++i)`
			`{`
			`auto level = levels[indices[i]];`

			`size_t n = level < 1`
			`? level * array.size()`
			`: (array.size() - 1);`

			`std::nth_element(array.begin() + prev_n, array.begin() + n, array.end());`

			`result[indices[i]] = array[n];`
			`prev_n = n;`
			`}`
			`}`
			`else`
			`{`
			`for (size_t i = 0; i < size; ++i)`
			`result[i] = Value();`
			`}`
			`}`

			`/// The same, but in the case of an empty state, NaN is returned.`
Simplification of aggregate functions: compatibility details [#CLICKHOUSE-2]. 2017-12-21 01:19:25 +00:00			`Float64 getFloat(Float64) const`
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 07:36:30 +00:00			`{`
			`throw Exception("Method getFloat is not implemented for QuantileExact", ErrorCodes::NOT_IMPLEMENTED);`
			`}`

Simplification of aggregate functions: compatibility details [#CLICKHOUSE-2]. 2017-12-21 01:19:25 +00:00			`void getManyFloat(const Float64 , const size_t , size_t, Float64 *) const`
Simplification of aggregate functions: development [#CLICKHOUSE-2]. 2017-12-20 07:36:30 +00:00			`{`
			`throw Exception("Method getManyFloat is not implemented for QuantileExact", ErrorCodes::NOT_IMPLEMENTED);`
			`}`
			`};`

			`}`