improve performance

This commit is contained in:
taiyang-li 2023-02-15 21:19:50 +08:00
parent a478e57905
commit ddba07b9c3

View File

@ -1,7 +1,8 @@
#pragma once #pragma once
#include <cmath> #include <cmath>
#include <base/sort.h>
#include <Common/RadixSort.h>
#include <IO/WriteBuffer.h> #include <IO/WriteBuffer.h>
#include <IO/ReadBuffer.h> #include <IO/ReadBuffer.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
@ -53,6 +54,8 @@ public:
, compressed(compressed_) , compressed(compressed_)
, sampled(sampled_) , sampled(sampled_)
{ {
sampled.reserve(compress_threshold);
backup_sampled.reserve(compress_threshold);
} }
bool isCompressed() const { return compressed; } bool isCompressed() const { return compressed; }
@ -113,9 +116,8 @@ public:
void compress() void compress()
{ {
withHeadBufferInserted(); withHeadBufferInserted();
auto compressed_samples = doCompress(sampled, 2 * relative_error * count);
std::swap(sampled, compressed_samples); doCompress(2 * relative_error * count);
compressed = true; compressed = true;
} }
@ -165,8 +167,8 @@ public:
// `max(g_ab + delta_ab) <= floor(2 * eps_ab * (n_a + n_b))` since // `max(g_ab + delta_ab) <= floor(2 * eps_ab * (n_a + n_b))` since
// `max(g_ab + delta_ab) <= floor(2 * eps_a * n_a) + floor(2 * eps_b * n_b)` // `max(g_ab + delta_ab) <= floor(2 * eps_a * n_a) + floor(2 * eps_b * n_b)`
// Finally, one can see how the `insert(x)` operation can be expressed as `merge([(x, 1, 0])` // Finally, one can see how the `insert(x)` operation can be expressed as `merge([(x, 1, 0])`
std::vector<Stats> merged_sampled; backup_sampled.clear();
merged_sampled.reserve(sampled.size() + other.sampled.size()); backup_sampled.reserve(sampled.size() + other.sampled.size());
double merged_relative_error = std::max(relative_error, other.relative_error); double merged_relative_error = std::max(relative_error, other.relative_error);
size_t merged_count = count + other.count; size_t merged_count = count + other.count;
Int64 additional_self_delta = static_cast<Int64>(std::floor(2 * other.relative_error * other.count)); Int64 additional_self_delta = static_cast<Int64>(std::floor(2 * other.relative_error * other.count));
@ -198,27 +200,28 @@ public:
// Insert it // Insert it
next_sample.delta += additional_delta; next_sample.delta += additional_delta;
merged_sampled.push_back(next_sample); backup_sampled.emplace_back(std::move(next_sample));
} }
// Copy the remaining samples from the other list // Copy the remaining samples from the other list
// (by construction, at most one `while` loop will run) // (by construction, at most one `while` loop will run)
while (self_idx < sampled.size()) while (self_idx < sampled.size())
{ {
merged_sampled.push_back(sampled[self_idx]); backup_sampled.emplace_back(sampled[self_idx]);
++self_idx; ++self_idx;
} }
while (other_idx < other.sampled.size()) while (other_idx < other.sampled.size())
{ {
merged_sampled.emplace_back(other.sampled[other_idx]); backup_sampled.emplace_back(other.sampled[other_idx]);
++other_idx; ++other_idx;
} }
auto compressed_sampled = doCompress(merged_sampled, 2 * merged_relative_error * merged_count); std::swap(sampled, backup_sampled);
compress_threshold = other.compress_threshold;
relative_error = merged_relative_error; relative_error = merged_relative_error;
std::swap(compressed_sampled, sampled);
count = merged_count; count = merged_count;
compress_threshold = other.compress_threshold;
doCompress(2 * merged_relative_error * merged_count);
compressed = true; compressed = true;
} }
} }
@ -287,13 +290,18 @@ private:
if (head_sampled.empty()) if (head_sampled.empty())
return; return;
size_t current_count = count; bool use_radix_sort = head_sampled.size() >= 256 && (is_arithmetic_v<T> && !is_big_int_v<T>);
std::sort(head_sampled.begin(), head_sampled.end()); if (use_radix_sort)
std::vector<Stats> new_samples; RadixSort<RadixSortNumTraits<T>>::executeLSD(head_sampled.data(), head_sampled.size());
new_samples.reserve(sampled.size() + head_sampled.size()); else
::sort(head_sampled.begin(), head_sampled.end());
backup_sampled.clear();
backup_sampled.reserve(sampled.size() + head_sampled.size());
size_t sample_idx = 0; size_t sample_idx = 0;
size_t ops_idx = 0; size_t ops_idx = 0;
size_t current_count = count;
for (; ops_idx < head_sampled.size(); ++ops_idx) for (; ops_idx < head_sampled.size(); ++ops_idx)
{ {
T current_sample = head_sampled[ops_idx]; T current_sample = head_sampled[ops_idx];
@ -301,47 +309,47 @@ private:
// Add all the samples before the next observation. // Add all the samples before the next observation.
while (sample_idx < sampled.size() && sampled[sample_idx].value <= current_sample) while (sample_idx < sampled.size() && sampled[sample_idx].value <= current_sample)
{ {
new_samples.push_back(sampled[sample_idx]); backup_sampled.emplace_back(sampled[sample_idx]);
++sample_idx; ++sample_idx;
} }
// If it is the first one to insert, of if it is the last one // If it is the first one to insert, of if it is the last one
++current_count; ++current_count;
Int64 delta; Int64 delta;
if (new_samples.empty() || (sample_idx == sampled.size() && ops_idx == (head_sampled.size() - 1))) if (backup_sampled.empty() || (sample_idx == sampled.size() && ops_idx == (head_sampled.size() - 1)))
delta = 0; delta = 0;
else else
delta = static_cast<Int64>(std::floor(2 * relative_error * current_count)); delta = static_cast<Int64>(std::floor(2 * relative_error * current_count));
new_samples.emplace_back(current_sample, 1, delta); backup_sampled.emplace_back(current_sample, 1, delta);
} }
// Add all the remaining existing samples // Add all the remaining existing samples
for (; sample_idx < sampled.size(); ++sample_idx) for (; sample_idx < sampled.size(); ++sample_idx)
new_samples.push_back(sampled[sample_idx]); backup_sampled.emplace_back(sampled[sample_idx]);
std::swap(sampled, new_samples); std::swap(sampled, backup_sampled);
head_sampled.clear(); head_sampled.clear();
count = current_count; count = current_count;
} }
static std::vector<Stats> doCompress(const std::vector<Stats> & current_samples, double merge_threshold) void doCompress(double merge_threshold)
{ {
if (current_samples.empty()) if (sampled.empty())
return {}; return;
std::vector<Stats> res; backup_sampled.clear();
// Start for the last element, which is always part of the set. // Start for the last element, which is always part of the set.
// The head contains the current new head, that may be merged with the current element. // The head contains the current new head, that may be merged with the current element.
auto head = current_samples.back(); Stats head = sampled.back();
ssize_t i = current_samples.size() - 2; ssize_t i = sampled.size() - 2;
// Do not compress the last element // Do not compress the last element
while (i >= 1) while (i >= 1)
{ {
// The current sample: // The current sample:
const auto & sample1 = current_samples[i]; const auto & sample1 = sampled[i];
// Do we need to compress? // Do we need to compress?
if (sample1.g + head.g + head.delta < merge_threshold) if (sample1.g + head.g + head.delta < merge_threshold)
{ {
@ -351,22 +359,23 @@ private:
else else
{ {
// Prepend the current head, and keep the current sample as target for merging. // Prepend the current head, and keep the current sample as target for merging.
res.insert(res.begin(), head); backup_sampled.push_back(head);
head = sample1; head = sample1;
} }
--i; --i;
} }
res.insert(res.begin(), head); backup_sampled.push_back(head);
// If necessary, add the minimum element: // If necessary, add the minimum element:
auto curr_head = current_samples.front(); auto curr_head = sampled.front();
// don't add the minimum element if `currentSamples` has only one element (both `currHead` and // don't add the minimum element if `currentSamples` has only one element (both `currHead` and
// `head` point to the same element) // `head` point to the same element)
if (curr_head.value <= head.value && current_samples.size() > 1) if (curr_head.value <= head.value && sampled.size() > 1)
res.insert(res.begin(), current_samples.front()); backup_sampled.emplace_back(sampled.front());
return res; std::reverse(backup_sampled.begin(), backup_sampled.end());
std::swap(sampled, backup_sampled);
} }
double relative_error; double relative_error;
@ -375,6 +384,8 @@ private:
bool compressed; bool compressed;
std::vector<Stats> sampled; std::vector<Stats> sampled;
std::vector<Stats> backup_sampled;
std::vector<T> head_sampled; std::vector<T> head_sampled;
static constexpr size_t default_compress_threshold = 10000; static constexpr size_t default_compress_threshold = 10000;