mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
Improve performance of quantileMerge #16640
This commit is contained in:
parent
2fae1c3c31
commit
5fe679324e
@ -158,12 +158,25 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
randomShuffle(samples);
|
||||
/// Replace every element in our reservoir to the b's reservoir
|
||||
/// with the probability of b.total_values / (a.total_values + b.total_values)
|
||||
/// Do it more roughly than true random sampling to save performance.
|
||||
|
||||
total_values += b.total_values;
|
||||
for (size_t i = 0; i < sample_count; ++i)
|
||||
|
||||
/// Will replace every frequency'th element in a to element from b.
|
||||
double frequency = static_cast<double>(total_values) / b.total_values;
|
||||
|
||||
/// When frequency is too low, replace just one random element with the corresponding probability.
|
||||
if (frequency * 2 >= sample_count)
|
||||
{
|
||||
UInt64 rnd = genRandom(total_values);
|
||||
if (rnd < b.total_values)
|
||||
UInt64 rnd = genRandom(frequency);
|
||||
if (rnd < sample_count)
|
||||
samples[rnd] = b.samples[rnd];
|
||||
}
|
||||
else
|
||||
{
|
||||
for (double i = 0; i < sample_count; i += frequency)
|
||||
samples[i] = b.samples[i];
|
||||
}
|
||||
}
|
||||
@ -222,15 +235,6 @@ private:
|
||||
return (static_cast<UInt64>(rng()) * (static_cast<UInt64>(rng.max()) + 1ULL) + static_cast<UInt64>(rng())) % lim;
|
||||
}
|
||||
|
||||
void randomShuffle(Array & v)
|
||||
{
|
||||
for (size_t i = 1; i < v.size(); ++i)
|
||||
{
|
||||
size_t j = genRandom(i + 1);
|
||||
std::swap(v[i], v[j]);
|
||||
}
|
||||
}
|
||||
|
||||
void sortIfNeeded()
|
||||
{
|
||||
if (sorted)
|
||||
|
Loading…
Reference in New Issue
Block a user