Fixed off-by-one errors in aggregate functions quantileTiming and quantileExactWeighted [#METR-20381].

This commit is contained in:
Alexey Milovidov 2016-03-13 17:40:27 +03:00
parent 1292dc3841
commit bcd6875d6f
4 changed files with 87 additions and 63 deletions

View File

@ -139,9 +139,13 @@ public:
const Pair * it = array;
const Pair * end = array + size;
while (it < end && accumulated < threshold)
while (it < end)
{
accumulated += it->second;
if (accumulated >= threshold)
break;
++it;
}
@ -262,17 +266,31 @@ public:
const Pair * it = array;
const Pair * end = array + size;
for (auto level_index : levels.permutation)
{
size_t level_index = 0;
UInt64 threshold = sum_weight * levels.levels[level_index];
while (it < end && accumulated < threshold)
while (it < end)
{
accumulated += it->second;
while (accumulated >= threshold)
{
data_to[old_size + level_index] = it->first;
++level_index;
if (level_index == num_levels)
return;
threshold = sum_weight * levels.levels[level_index];
}
++it;
}
data_to[old_size + level_index] = it < end ? it->first : it[-1].first;
while (level_index < num_levels)
{
data_to[old_size + level_index] = array[size - 1].first;
++level_index;
}
}
};

View File

@ -264,6 +264,45 @@ namespace detail
+ (intHash32<0>(i) % BIG_PRECISION - (BIG_PRECISION / 2)); /// Небольшая рандомизация, чтобы не было заметно, что все значения чётные.
}
/// Позволяет перебрать значения гистограммы, пропуская нули.
class Iterator
{
private:
const UInt64 * begin;
const UInt64 * pos;
const UInt64 * end;
void adjust()
{
while (isValid() && 0 == *pos)
++pos;
}
public:
Iterator(const QuantileTimingLarge & parent)
: begin(parent.count_small), pos(begin), end(&parent.count_big[BIG_SIZE])
{
adjust();
}
bool isValid() const { return pos < end; }
void next()
{
++pos;
adjust();
}
UInt64 count() const { return *pos; }
UInt16 key() const
{
return pos - begin < SMALL_THRESHOLD
? pos - begin
: indexInBigToValue(pos - begin - SMALL_THRESHOLD);
}
};
public:
QuantileTimingLarge()
{
@ -367,28 +406,19 @@ namespace detail
UInt64 pos = count * level;
UInt64 accumulated = 0;
Iterator it(*this);
size_t i = 0;
while (i < SMALL_THRESHOLD && accumulated < pos)
while (it.isValid())
{
accumulated += count_small[i];
++i;
accumulated += it.count();
if (accumulated >= pos)
break;
it.next();
}
if (i < SMALL_THRESHOLD)
return i;
i = 0;
while (i < BIG_SIZE && accumulated < pos)
{
accumulated += count_big[i];
++i;
}
if (i < BIG_SIZE)
return indexInBigToValue(i);
return BIG_THRESHOLD;
return it.isValid() ? it.key() : BIG_THRESHOLD;
}
/// Получить значения size квантилей уровней levels. Записать size результатов начиная с адреса result.
@ -402,20 +432,15 @@ namespace detail
UInt64 pos = count * levels[*index];
UInt64 accumulated = 0;
Iterator it(*this);
size_t i = 0;
while (i < SMALL_THRESHOLD)
while (it.isValid())
{
while (i < SMALL_THRESHOLD && accumulated < pos)
{
accumulated += count_small[i];
++i;
}
accumulated += it.count();
if (i < SMALL_THRESHOLD)
while (accumulated >= pos)
{
result[*index] = i;
result[*index] = it.key();
++index;
if (index == indices_end)
@ -423,36 +448,11 @@ namespace detail
pos = count * levels[*index];
}
it.next();
}
i = 0;
while (i < BIG_SIZE)
{
while (i < BIG_SIZE && accumulated < pos)
{
accumulated += count_big[i];
++i;
}
if (i < BIG_SIZE)
{
result[*index] = indexInBigToValue(i);
++index;
if (index == indices_end)
return;
pos = count * levels[*index];
}
}
while (index < indices_end)
{
result[*index] = BIG_THRESHOLD;
++index;
}
}
/// То же самое, но в случае пустого состояния возвращается NaN.

View File

@ -0,0 +1,3 @@
10 [1,1,1,1,10,10,10,10,100,100,100]
100 100
61 61

View File

@ -0,0 +1,3 @@
SELECT quantileExactWeighted(0.5)(x, 1) AS q5, quantilesExactWeighted(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)(x, 1) AS qs FROM (SELECT arrayJoin([1, 1, 1, 10, 10, 10, 10, 100, 100, 100]) AS x);
SELECT quantileExact(0)(x), quantileTiming(0)(x) FROM (SELECT number + 100 AS x FROM system.numbers LIMIT 10000);
SELECT quantileExact(x), quantileTiming(x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 10000);