Updated LRUHashMap benchmarks

This commit is contained in:
Maksim Kita 2021-05-17 11:08:22 +03:00
parent 98e6c27a95
commit cf43a3efe2
3 changed files with 187 additions and 137 deletions

View File

@ -220,6 +220,12 @@ public:
return find(key) != nullptr;
}
Value & ALWAYS_INLINE operator[](const Key & key)
{
auto [it, _] = emplace(key);
return it->getMapped();
}
bool ALWAYS_INLINE erase(const Key & key)
{
auto key_hash = Base::hash(key);

View File

@ -36,7 +36,7 @@ add_executable (arena_with_free_lists arena_with_free_lists.cpp)
target_link_libraries (arena_with_free_lists PRIVATE dbms)
add_executable (lru_hash_map_perf lru_hash_map_perf.cpp)
target_link_libraries (lru_hash_map_perf PRIVATE clickhouse_common_io)
target_link_libraries (lru_hash_map_perf PRIVATE dbms)
add_executable (thread_creation_latency thread_creation_latency.cpp)
target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io)

View File

@ -7,23 +7,26 @@
#include <Common/Stopwatch.h>
#include <Common/HashTable/LRUHashMap.h>
#include <IO/ReadBufferFromFile.h>
#include <Compression/CompressedReadBuffer.h>
template<class Key, class Value>
class LRUHashMapBasic
{
public:
using key_type = Key;
using value_type = Value;
using list_type = std::list<key_type>;
using node = std::pair<value_type, typename list_type::iterator>;
using map_type = std::unordered_map<key_type, node, DefaultHash<Key>>;
using list_type = std::list<std::pair<key_type, value_type>>;
using map_type = std::unordered_map<key_type, typename list_type::iterator>;
LRUHashMapBasic(size_t max_size_, bool preallocated)
LRUHashMapBasic(size_t max_size_, bool preallocated = false)
: hash_map(preallocated ? max_size_ : 32)
, max_size(max_size_)
{
}
void insert(const Key &key, const Value &value)
template<typename ...Args>
std::pair<Value *, bool> emplace(const Key &key, Args &&... args)
{
auto it = hash_map.find(key);
@ -33,40 +36,39 @@ public:
{
auto iterator_to_remove = list.begin();
hash_map.erase(*iterator_to_remove);
auto & key_to_remove = iterator_to_remove->first;
hash_map.erase(key_to_remove);
list.erase(iterator_to_remove);
}
list.push_back(key);
hash_map[key] = std::make_pair(value, --list.end());
Value value(std::forward<Args>(args)...);
auto node = std::make_pair(key, std::move(value));
list.push_back(std::move(node));
auto inserted_iterator = --list.end();
hash_map[key] = inserted_iterator;
return std::make_pair(&inserted_iterator->second, true);
}
else
{
auto & [value_to_update, iterator_in_list_to_update] = it->second;
auto & iterator_in_list_to_update = it->second;
list.splice(list.end(), list, iterator_in_list_to_update);
iterator_in_list_to_update = --list.end();
iterator_in_list_to_update = list.end();
value_to_update = value;
return std::make_pair(&iterator_in_list_to_update->second, false);
}
}
value_type & get(const key_type &key)
value_type & operator[](const key_type & key)
{
auto iterator_in_map = hash_map.find(key);
assert(iterator_in_map != hash_map.end());
auto & [value_to_return, iterator_in_list_to_update] = iterator_in_map->second;
list.splice(list.end(), list, iterator_in_list_to_update);
iterator_in_list_to_update = list.end();
return value_to_return;
}
const value_type & get(const key_type & key) const
{
return const_cast<std::decay_t<decltype(*this)> *>(this)->get(key);
auto [it, _] = emplace(key);
return *it;
}
size_t getMaxSize() const
@ -101,110 +103,123 @@ private:
size_t max_size;
};
std::vector<UInt64> generateNumbersToInsert(size_t numbers_to_insert_size)
// std::vector<UInt64> generateNumbersToInsert(size_t numbers_to_insert_size)
// {
// std::vector<UInt64> numbers;
// numbers.reserve(numbers_to_insert_size);
// std::random_device rd;
// pcg64 gen(rd());
// UInt64 min = std::numeric_limits<UInt64>::min();
// UInt64 max = std::numeric_limits<UInt64>::max();
// auto distribution = std::uniform_int_distribution<>(min, max);
// for (size_t i = 0; i < numbers_to_insert_size; ++i)
// {
// UInt64 number = distribution(gen);
// numbers.emplace_back(number);
// }
// return numbers;
// }
// void testInsertElementsIntoHashMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
// {
// size_t numbers_to_insert_size = numbers_to_insert.size();
// std::cout << "TestInsertElementsIntoHashMap preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
// std::cout << std::endl;
// HashMap<int, int> hash_map(preallocated ? map_size : 32);
// Stopwatch watch;
// for (size_t i = 0; i < numbers_to_insert_size; ++i)
// hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
// std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
// UInt64 summ = 0;
// for (size_t i = 0; i < numbers_to_insert_size; ++i)
// {
// auto * it = hash_map.find(numbers_to_insert[i]);
// if (it)
// summ += it->getMapped();
// }
// std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
// }
// void testInsertElementsIntoStandardMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
// {
// size_t numbers_to_insert_size = numbers_to_insert.size();
// std::cout << "TestInsertElementsIntoStandardMap map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
// std::cout << std::endl;
// std::unordered_map<int, int> hash_map(preallocated ? map_size : 32);
// Stopwatch watch;
// for (size_t i = 0; i < numbers_to_insert_size; ++i)
// hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
// std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
// UInt64 summ = 0;
// for (size_t i = 0; i < numbers_to_insert_size; ++i)
// {
// auto it = hash_map.find(numbers_to_insert[i]);
// if (it != hash_map.end())
// summ += it->second;
// }
// std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
// }
template <typename Key, typename Map>
static void NO_INLINE test(const Key * data, size_t size, const std::string & name)
{
std::vector<UInt64> numbers;
numbers.reserve(numbers_to_insert_size);
std::random_device rd;
pcg64 gen(rd());
UInt64 min = std::numeric_limits<UInt64>::min();
UInt64 max = std::numeric_limits<UInt64>::max();
auto distribution = std::uniform_int_distribution<>(min, max);
for (size_t i = 0; i < numbers_to_insert_size; ++i)
{
UInt64 number = distribution(gen);
numbers.emplace_back(number);
}
return numbers;
}
void testInsertElementsIntoHashMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
{
size_t numbers_to_insert_size = numbers_to_insert.size();
std::cout << "TestInsertElementsIntoHashMap preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
std::cout << std::endl;
HashMap<int, int> hash_map(preallocated ? map_size : 32);
size_t cache_size = size / 10;
Map cache(cache_size);
Stopwatch watch;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
for (size_t i = 0; i < size; ++i)
++cache[data[i]];
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
watch.stop();
UInt64 summ = 0;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
{
auto * it = hash_map.find(numbers_to_insert[i]);
if (it)
summ += it->getMapped();
}
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
std::cerr << name
<< ":\nElapsed: " << watch.elapsedSeconds()
<< " (" << size / watch.elapsedSeconds() << " elem/sec.)"
<< ", map size: " << cache.size() << "\n";
}
void testInsertElementsIntoStandardMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
template <typename Key>
static void NO_INLINE testForType(size_t method, size_t rows_size)
{
size_t numbers_to_insert_size = numbers_to_insert.size();
std::cout << "TestInsertElementsIntoStandardMap map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
std::cout << std::endl;
std::cerr << std::fixed << std::setprecision(3);
std::unordered_map<int, int> hash_map(preallocated ? map_size : 32);
std::vector<Key> data(rows_size);
Stopwatch watch;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
UInt64 summ = 0;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
{
auto it = hash_map.find(numbers_to_insert[i]);
if (it != hash_map.end())
summ += it->second;
DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
DB::CompressedReadBuffer in2(in1);
in2.readStrict(reinterpret_cast<char*>(data.data()), sizeof(data[0]) * rows_size);
}
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
}
template<typename LRUCache>
UInt64 testInsertIntoEmptyCache(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
{
size_t numbers_to_insert_size = numbers_to_insert.size();
std::cout << "Test testInsertPreallocated preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
std::cout << std::endl;
LRUCache cache(map_size, preallocated);
Stopwatch watch;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
if (method == 0)
{
cache.insert(numbers_to_insert[i], numbers_to_insert[i]);
test<Key, LRUHashMap<Key, UInt64>>(data.data(), data.size(), "CH HashMap");
}
else if (method == 1)
{
test<Key, LRUHashMapBasic<Key, UInt64>>(data.data(), data.size(), "BasicLRU");
}
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
UInt64 summ = 0;
for (size_t i = 0; i < numbers_to_insert_size; ++i)
if (cache.contains(numbers_to_insert[i]))
summ += cache.get(numbers_to_insert[i]);
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
return summ;
}
int main(int argc, char ** argv)
@ -212,33 +227,62 @@ int main(int argc, char ** argv)
(void)(argc);
(void)(argv);
size_t hash_map_size = 1200000;
size_t numbers_to_insert_size = 12000000;
std::vector<UInt64> numbers = generateNumbersToInsert(numbers_to_insert_size);
// size_t hash_map_size = 1200000;
// size_t numbers_to_insert_size = 12000000;
// std::vector<UInt64> numbers = generateNumbersToInsert(numbers_to_insert_size);
std::cout << "Test insert into HashMap preallocated=0" << std::endl;
testInsertElementsIntoHashMap(hash_map_size, numbers, true);
std::cout << std::endl;
// std::cout << "Test insert into HashMap preallocated=0" << std::endl;
// testInsertElementsIntoHashMap(hash_map_size, numbers, true);
// std::cout << std::endl;
std::cout << "Test insert into HashMap preallocated=1" << std::endl;
testInsertElementsIntoHashMap(hash_map_size, numbers, true);
std::cout << std::endl;
// std::cout << "Test insert into HashMap preallocated=1" << std::endl;
// testInsertElementsIntoHashMap(hash_map_size, numbers, true);
// std::cout << std::endl;
std::cout << "Test LRUHashMap preallocated=0" << std::endl;
testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, false);
std::cout << std::endl;
// std::cout << "Test LRUHashMap preallocated=0" << std::endl;
// testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, false);
// std::cout << std::endl;
std::cout << "Test LRUHashMap preallocated=1" << std::endl;
testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, true);
std::cout << std::endl;
// std::cout << "Test LRUHashMap preallocated=1" << std::endl;
// testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, true);
// std::cout << std::endl;
std::cout << "Test LRUHashMapBasic preallocated=0" << std::endl;
testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, false);
std::cout << std::endl;
// std::cout << "Test LRUHashMapBasic preallocated=0" << std::endl;
// testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, false);
// std::cout << std::endl;
std::cout << "Test LRUHashMapBasic preallocated=1" << std::endl;
testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, true);
std::cout << std::endl;
// std::cout << "Test LRUHashMapBasic preallocated=1" << std::endl;
// testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, true);
// std::cout << std::endl;
if (argc < 4)
{
std::cerr << "Usage: program method column_type_name rows_count < input_column.bin \n";
return 1;
}
size_t method = std::stoull(argv[1]);
std::string type_name = std::string(argv[2]);
size_t n = std::stoull(argv[3]);
if (type_name == "UInt8")
testForType<UInt8>(method, n);
else if (type_name == "UInt16")
testForType<UInt16>(method, n);
else if (type_name == "UInt32")
testForType<UInt32>(method, n);
else if (type_name == "UInt64")
testForType<UInt64>(method, n);
else if (type_name == "Int8")
testForType<Int8>(method, n);
else if (type_name == "Int16")
testForType<Int16>(method, n);
else if (type_name == "Int32")
testForType<Int32>(method, n);
else if (type_name == "Int64")
testForType<Int64>(method, n);
else
std::cerr << "Unexpected type passed " << type_name << std::endl;
return 0;
}