ClickHouse/src/Common/tests/space_saving.cpp

122 lines
2.9 KiB
C++
Raw Normal View History

#include <iomanip>
2017-06-28 12:30:23 +00:00
#include <iostream>
#include <map>
2017-06-28 12:30:23 +00:00
#include <string>
#include <Common/SpaceSaving.h>
2017-06-28 12:30:23 +00:00
#include <common/StringRef.h>
2017-12-01 18:36:55 +00:00
int main(int, char **)
{
{
using Cont = DB::SpaceSaving<int>;
Cont first(10);
/* Test biased insertion */
2017-06-28 12:30:23 +00:00
for (int i = 0; i < 200; ++i)
{
first.insert(i);
int k = i % 5; // Bias towards 0-4
first.insert(k);
}
/* Test whether the biased elements are retained */
std::map<int, UInt64> expect;
2017-06-28 12:30:23 +00:00
for (int i = 0; i < 5; ++i)
{
2017-06-28 12:22:07 +00:00
expect[i] = 41;
}
2017-06-28 12:30:23 +00:00
for (auto x : first.topK(5))
{
if (expect[x.key] != x.count)
{
std::cerr << "key: " << x.key << " value: " << x.count << " expected: " << expect[x.key] << std::endl;
2017-06-28 12:30:23 +00:00
}
else
{
std::cout << "key: " << x.key << " value: " << x.count << std::endl;
}
expect.erase(x.key);
}
2017-06-28 12:30:23 +00:00
if (!expect.empty())
{
std::cerr << "expected to find all heavy hitters" << std::endl;
}
/* Create another table and test merging */
Cont second(10);
2017-06-28 12:30:23 +00:00
for (int i = 0; i < 200; ++i)
{
first.insert(i);
}
2017-06-28 12:30:23 +00:00
for (int i = 0; i < 5; ++i)
{
2017-06-28 12:22:07 +00:00
expect[i] = 42;
}
first.merge(second);
2017-06-28 12:30:23 +00:00
for (auto x : first.topK(5))
{
if (expect[x.key] != x.count)
{
std::cerr << "key: " << x.key << " value: " << x.count << " expected: " << expect[x.key] << std::endl;
2017-06-28 12:30:23 +00:00
}
else
{
std::cout << "key: " << x.key << " value: " << x.count << std::endl;
}
expect.erase(x.key);
}
}
{
/* Same test for string keys */
2017-06-28 12:22:07 +00:00
using Cont = DB::SpaceSaving<StringRef, StringRefHash>;
Cont cont(10);
2017-06-28 12:30:23 +00:00
for (int i = 0; i < 400; ++i)
{
cont.insert(std::to_string(i));
cont.insert(std::to_string(i % 5)); // Bias towards 0-4
}
// The hashing is going to be more lossy
// Expect at least ~ 10% count
std::map<std::string, UInt64> expect;
2017-06-28 12:30:23 +00:00
for (int i = 0; i < 5; ++i)
{
expect[std::to_string(i)] = 38;
}
2017-06-28 12:30:23 +00:00
for (auto x : cont.topK(5))
{
2017-06-28 12:24:49 +00:00
auto key = x.key.toString();
2017-06-28 12:30:23 +00:00
if (x.count < expect[key])
{
std::cerr << "key: " << key << " value: " << x.count << " expected: " << expect[key] << std::endl;
2017-06-28 12:30:23 +00:00
}
else
{
std::cout << "key: " << key << " value: " << x.count << std::endl;
}
expect.erase(key);
}
2017-06-28 12:30:23 +00:00
if (!expect.empty())
{
std::cerr << "expected to find all heavy hitters" << std::endl;
abort();
}
}
return 0;
}