mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Fixed handling of NaNs in aggregate functions that use comparison based sorting #2012
This commit is contained in:
parent
c55bbe4917
commit
f08940c04c
@ -9,6 +9,7 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
@ -90,8 +91,11 @@ public:
|
||||
PointType left = static_cast<const ColumnVector<PointType> &>(*columns[0]).getData()[row_num];
|
||||
PointType right = static_cast<const ColumnVector<PointType> &>(*columns[1]).getData()[row_num];
|
||||
|
||||
this->data(place).value.push_back(std::make_pair(left, Int64(1)), arena);
|
||||
this->data(place).value.push_back(std::make_pair(right, Int64(-1)), arena);
|
||||
if (!isNaN(left))
|
||||
this->data(place).value.push_back(std::make_pair(left, Int64(1)), arena);
|
||||
|
||||
if (!isNaN(right))
|
||||
this->data(place).value.push_back(std::make_pair(right, Int64(-1)), arena);
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
@ -133,7 +137,6 @@ public:
|
||||
/// const_cast because we will sort the array
|
||||
auto & array = const_cast<typename MaxIntersectionsData<PointType>::Array &>(this->data(place).value);
|
||||
|
||||
/// TODO NaNs?
|
||||
std::sort(array.begin(), array.end(), [](const auto & a, const auto & b) { return a.first < b.first; });
|
||||
|
||||
for (const auto & point_weight : array)
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/PODArray.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <Core/Types.h>
|
||||
@ -32,7 +33,9 @@ struct QuantileExact
|
||||
|
||||
void add(const Value & x)
|
||||
{
|
||||
array.push_back(x);
|
||||
/// We must skip NaNs as they are not compatible with comparison sorting.
|
||||
if (!isNaN(x))
|
||||
array.push_back(x);
|
||||
}
|
||||
|
||||
template <typename Weight>
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -33,12 +34,15 @@ struct QuantileExactWeighted
|
||||
|
||||
void add(const Value & x)
|
||||
{
|
||||
++map[x];
|
||||
/// We must skip NaNs as they are not compatible with comparison sorting.
|
||||
if (!isNaN(x))
|
||||
++map[x];
|
||||
}
|
||||
|
||||
void add(const Value & x, const Weight & weight)
|
||||
{
|
||||
map[x] += weight;
|
||||
if (!isNaN(x))
|
||||
map[x] += weight;
|
||||
}
|
||||
|
||||
void merge(const QuantileExactWeighted & rhs)
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include <Core/Field.h>
|
||||
#include <Common/FieldVisitors.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -12,6 +13,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int PARAMETER_OUT_OF_BOUND;
|
||||
}
|
||||
|
||||
|
||||
@ -55,6 +57,10 @@ struct QuantileLevels
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
levels[i] = applyVisitor(FieldVisitorConvertToNumber<Float64>(), params[i]);
|
||||
|
||||
if (isNaN(levels[i]) || levels[i] < 0 || levels[i] > 1)
|
||||
throw Exception("Quantile level is out of range [0..1]", ErrorCodes::PARAMETER_OUT_OF_BOUND);
|
||||
|
||||
permutation[i] = i;
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
#include <Poco/Exception.h>
|
||||
#include <pcg_random.hpp>
|
||||
|
||||
@ -67,6 +68,9 @@ public:
|
||||
|
||||
void insert(const T & v)
|
||||
{
|
||||
if (isNaN(v))
|
||||
return;
|
||||
|
||||
sorted = false;
|
||||
++total_values;
|
||||
if (samples.size() < sample_count)
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <Common/NaNUtils.h>
|
||||
#include <Poco/Exception.h>
|
||||
|
||||
|
||||
@ -66,6 +67,9 @@ public:
|
||||
|
||||
void insert(const T & v, const UInt64 determinator)
|
||||
{
|
||||
if (isNaN(v))
|
||||
return;
|
||||
|
||||
const UInt32 hash = intHash64(determinator);
|
||||
if (!good(hash))
|
||||
return;
|
||||
|
@ -0,0 +1 @@
|
||||
1
|
@ -0,0 +1 @@
|
||||
SELECT DISTINCT eq FROM (WITH range(number % 10) AS arr, arrayMap(x -> x = intDiv(number, 10) ? nan : x, arr) AS arr_with_nan, arrayFilter(x -> x != intDiv(number, 10), arr) AS arr_filtered SELECT number, arrayReduce('quantileExact', arr_with_nan) AS q1, arrayReduce('quantileExact', arr_filtered) AS q2, q1 = q2 AS eq FROM numbers(100));
|
Loading…
Reference in New Issue
Block a user