Fixed handling of NaNs in aggregate functions that use comparison based sorting #2012

This commit is contained in:
Alexey Milovidov 2018-03-14 08:03:51 +03:00
parent c55bbe4917
commit f08940c04c
8 changed files with 32 additions and 6 deletions

View File

@ -9,6 +9,7 @@
#include <IO/WriteHelpers.h>
#include <Common/ArenaAllocator.h>
#include <Common/NaNUtils.h>
#include <AggregateFunctions/IAggregateFunction.h>
@ -90,8 +91,11 @@ public:
PointType left = static_cast<const ColumnVector<PointType> &>(*columns[0]).getData()[row_num];
PointType right = static_cast<const ColumnVector<PointType> &>(*columns[1]).getData()[row_num];
this->data(place).value.push_back(std::make_pair(left, Int64(1)), arena);
this->data(place).value.push_back(std::make_pair(right, Int64(-1)), arena);
if (!isNaN(left))
this->data(place).value.push_back(std::make_pair(left, Int64(1)), arena);
if (!isNaN(right))
this->data(place).value.push_back(std::make_pair(right, Int64(-1)), arena);
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override
@ -133,7 +137,6 @@ public:
/// const_cast because we will sort the array
auto & array = const_cast<typename MaxIntersectionsData<PointType>::Array &>(this->data(place).value);
/// TODO NaNs?
std::sort(array.begin(), array.end(), [](const auto & a, const auto & b) { return a.first < b.first; });
for (const auto & point_weight : array)

View File

@ -1,6 +1,7 @@
#pragma once
#include <Common/PODArray.h>
#include <Common/NaNUtils.h>
#include <IO/WriteBuffer.h>
#include <IO/ReadBuffer.h>
#include <Core/Types.h>
@ -32,7 +33,9 @@ struct QuantileExact
void add(const Value & x)
{
array.push_back(x);
/// We must skip NaNs as they are not compatible with comparison sorting.
if (!isNaN(x))
array.push_back(x);
}
template <typename Weight>

View File

@ -1,6 +1,7 @@
#pragma once
#include <Common/HashTable/HashMap.h>
#include <Common/NaNUtils.h>
namespace DB
@ -33,12 +34,15 @@ struct QuantileExactWeighted
void add(const Value & x)
{
++map[x];
/// We must skip NaNs as they are not compatible with comparison sorting.
if (!isNaN(x))
++map[x];
}
void add(const Value & x, const Weight & weight)
{
map[x] += weight;
if (!isNaN(x))
map[x] += weight;
}
void merge(const QuantileExactWeighted & rhs)

View File

@ -4,6 +4,7 @@
#include <Core/Field.h>
#include <Common/FieldVisitors.h>
#include <Common/NaNUtils.h>
namespace DB
@ -12,6 +13,7 @@ namespace DB
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int PARAMETER_OUT_OF_BOUND;
}
@ -55,6 +57,10 @@ struct QuantileLevels
for (size_t i = 0; i < size; ++i)
{
levels[i] = applyVisitor(FieldVisitorConvertToNumber<Float64>(), params[i]);
if (isNaN(levels[i]) || levels[i] < 0 || levels[i] > 1)
throw Exception("Quantile level is out of range [0..1]", ErrorCodes::PARAMETER_OUT_OF_BOUND);
permutation[i] = i;
}

View File

@ -9,6 +9,7 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/PODArray.h>
#include <Common/NaNUtils.h>
#include <Poco/Exception.h>
#include <pcg_random.hpp>
@ -67,6 +68,9 @@ public:
void insert(const T & v)
{
if (isNaN(v))
return;
sorted = false;
++total_values;
if (samples.size() < sample_count)

View File

@ -11,6 +11,7 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Common/PODArray.h>
#include <Common/NaNUtils.h>
#include <Poco/Exception.h>
@ -66,6 +67,9 @@ public:
void insert(const T & v, const UInt64 determinator)
{
if (isNaN(v))
return;
const UInt32 hash = intHash64(determinator);
if (!good(hash))
return;

View File

@ -0,0 +1 @@
SELECT DISTINCT eq FROM (WITH range(number % 10) AS arr, arrayMap(x -> x = intDiv(number, 10) ? nan : x, arr) AS arr_with_nan, arrayFilter(x -> x != intDiv(number, 10), arr) AS arr_filtered SELECT number, arrayReduce('quantileExact', arr_with_nan) AS q1, arrayReduce('quantileExact', arr_filtered) AS q2, q1 = q2 AS eq FROM numbers(100));