mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Fixed off-by-one errors in aggregate functions quantileTiming and quantileExactWeighted [#METR-20381].
This commit is contained in:
parent
1292dc3841
commit
bcd6875d6f
@ -139,9 +139,13 @@ public:
|
||||
|
||||
const Pair * it = array;
|
||||
const Pair * end = array + size;
|
||||
while (it < end && accumulated < threshold)
|
||||
while (it < end)
|
||||
{
|
||||
accumulated += it->second;
|
||||
|
||||
if (accumulated >= threshold)
|
||||
break;
|
||||
|
||||
++it;
|
||||
}
|
||||
|
||||
@ -262,17 +266,31 @@ public:
|
||||
const Pair * it = array;
|
||||
const Pair * end = array + size;
|
||||
|
||||
for (auto level_index : levels.permutation)
|
||||
{
|
||||
UInt64 threshold = sum_weight * levels.levels[level_index];
|
||||
size_t level_index = 0;
|
||||
UInt64 threshold = sum_weight * levels.levels[level_index];
|
||||
|
||||
while (it < end && accumulated < threshold)
|
||||
while (it < end)
|
||||
{
|
||||
accumulated += it->second;
|
||||
|
||||
while (accumulated >= threshold)
|
||||
{
|
||||
accumulated += it->second;
|
||||
++it;
|
||||
data_to[old_size + level_index] = it->first;
|
||||
++level_index;
|
||||
|
||||
if (level_index == num_levels)
|
||||
return;
|
||||
|
||||
threshold = sum_weight * levels.levels[level_index];
|
||||
}
|
||||
|
||||
data_to[old_size + level_index] = it < end ? it->first : it[-1].first;
|
||||
++it;
|
||||
}
|
||||
|
||||
while (level_index < num_levels)
|
||||
{
|
||||
data_to[old_size + level_index] = array[size - 1].first;
|
||||
++level_index;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -264,6 +264,45 @@ namespace detail
|
||||
+ (intHash32<0>(i) % BIG_PRECISION - (BIG_PRECISION / 2)); /// Небольшая рандомизация, чтобы не было заметно, что все значения чётные.
|
||||
}
|
||||
|
||||
/// Позволяет перебрать значения гистограммы, пропуская нули.
|
||||
class Iterator
|
||||
{
|
||||
private:
|
||||
const UInt64 * begin;
|
||||
const UInt64 * pos;
|
||||
const UInt64 * end;
|
||||
|
||||
void adjust()
|
||||
{
|
||||
while (isValid() && 0 == *pos)
|
||||
++pos;
|
||||
}
|
||||
|
||||
public:
|
||||
Iterator(const QuantileTimingLarge & parent)
|
||||
: begin(parent.count_small), pos(begin), end(&parent.count_big[BIG_SIZE])
|
||||
{
|
||||
adjust();
|
||||
}
|
||||
|
||||
bool isValid() const { return pos < end; }
|
||||
|
||||
void next()
|
||||
{
|
||||
++pos;
|
||||
adjust();
|
||||
}
|
||||
|
||||
UInt64 count() const { return *pos; }
|
||||
|
||||
UInt16 key() const
|
||||
{
|
||||
return pos - begin < SMALL_THRESHOLD
|
||||
? pos - begin
|
||||
: indexInBigToValue(pos - begin - SMALL_THRESHOLD);
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
QuantileTimingLarge()
|
||||
{
|
||||
@ -367,28 +406,19 @@ namespace detail
|
||||
UInt64 pos = count * level;
|
||||
|
||||
UInt64 accumulated = 0;
|
||||
Iterator it(*this);
|
||||
|
||||
size_t i = 0;
|
||||
while (i < SMALL_THRESHOLD && accumulated < pos)
|
||||
while (it.isValid())
|
||||
{
|
||||
accumulated += count_small[i];
|
||||
++i;
|
||||
accumulated += it.count();
|
||||
|
||||
if (accumulated >= pos)
|
||||
break;
|
||||
|
||||
it.next();
|
||||
}
|
||||
|
||||
if (i < SMALL_THRESHOLD)
|
||||
return i;
|
||||
|
||||
i = 0;
|
||||
while (i < BIG_SIZE && accumulated < pos)
|
||||
{
|
||||
accumulated += count_big[i];
|
||||
++i;
|
||||
}
|
||||
|
||||
if (i < BIG_SIZE)
|
||||
return indexInBigToValue(i);
|
||||
|
||||
return BIG_THRESHOLD;
|
||||
return it.isValid() ? it.key() : BIG_THRESHOLD;
|
||||
}
|
||||
|
||||
/// Получить значения size квантилей уровней levels. Записать size результатов начиная с адреса result.
|
||||
@ -402,20 +432,15 @@ namespace detail
|
||||
UInt64 pos = count * levels[*index];
|
||||
|
||||
UInt64 accumulated = 0;
|
||||
Iterator it(*this);
|
||||
|
||||
size_t i = 0;
|
||||
while (i < SMALL_THRESHOLD)
|
||||
while (it.isValid())
|
||||
{
|
||||
while (i < SMALL_THRESHOLD && accumulated < pos)
|
||||
{
|
||||
accumulated += count_small[i];
|
||||
++i;
|
||||
}
|
||||
accumulated += it.count();
|
||||
|
||||
if (i < SMALL_THRESHOLD)
|
||||
while (accumulated >= pos)
|
||||
{
|
||||
result[*index] = i;
|
||||
|
||||
result[*index] = it.key();
|
||||
++index;
|
||||
|
||||
if (index == indices_end)
|
||||
@ -423,36 +448,11 @@ namespace detail
|
||||
|
||||
pos = count * levels[*index];
|
||||
}
|
||||
|
||||
it.next();
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (i < BIG_SIZE)
|
||||
{
|
||||
while (i < BIG_SIZE && accumulated < pos)
|
||||
{
|
||||
accumulated += count_big[i];
|
||||
++i;
|
||||
}
|
||||
|
||||
if (i < BIG_SIZE)
|
||||
{
|
||||
result[*index] = indexInBigToValue(i);
|
||||
|
||||
++index;
|
||||
|
||||
if (index == indices_end)
|
||||
return;
|
||||
|
||||
pos = count * levels[*index];
|
||||
}
|
||||
}
|
||||
|
||||
while (index < indices_end)
|
||||
{
|
||||
result[*index] = BIG_THRESHOLD;
|
||||
|
||||
++index;
|
||||
}
|
||||
}
|
||||
|
||||
/// То же самое, но в случае пустого состояния возвращается NaN.
|
||||
|
@ -0,0 +1,3 @@
|
||||
10 [1,1,1,1,10,10,10,10,100,100,100]
|
||||
100 100
|
||||
61 61
|
@ -0,0 +1,3 @@
|
||||
SELECT quantileExactWeighted(0.5)(x, 1) AS q5, quantilesExactWeighted(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)(x, 1) AS qs FROM (SELECT arrayJoin([1, 1, 1, 10, 10, 10, 10, 100, 100, 100]) AS x);
|
||||
SELECT quantileExact(0)(x), quantileTiming(0)(x) FROM (SELECT number + 100 AS x FROM system.numbers LIMIT 10000);
|
||||
SELECT quantileExact(x), quantileTiming(x) FROM (SELECT number % 123 AS x FROM system.numbers LIMIT 10000);
|
Loading…
Reference in New Issue
Block a user