mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 00:52:02 +00:00
Updated LRUHashMap benchmarks
This commit is contained in:
parent
98e6c27a95
commit
cf43a3efe2
@ -220,6 +220,12 @@ public:
|
||||
return find(key) != nullptr;
|
||||
}
|
||||
|
||||
Value & ALWAYS_INLINE operator[](const Key & key)
|
||||
{
|
||||
auto [it, _] = emplace(key);
|
||||
return it->getMapped();
|
||||
}
|
||||
|
||||
bool ALWAYS_INLINE erase(const Key & key)
|
||||
{
|
||||
auto key_hash = Base::hash(key);
|
||||
|
@ -36,7 +36,7 @@ add_executable (arena_with_free_lists arena_with_free_lists.cpp)
|
||||
target_link_libraries (arena_with_free_lists PRIVATE dbms)
|
||||
|
||||
add_executable (lru_hash_map_perf lru_hash_map_perf.cpp)
|
||||
target_link_libraries (lru_hash_map_perf PRIVATE clickhouse_common_io)
|
||||
target_link_libraries (lru_hash_map_perf PRIVATE dbms)
|
||||
|
||||
add_executable (thread_creation_latency thread_creation_latency.cpp)
|
||||
target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io)
|
||||
|
@ -7,23 +7,26 @@
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/HashTable/LRUHashMap.h>
|
||||
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <Compression/CompressedReadBuffer.h>
|
||||
|
||||
template<class Key, class Value>
|
||||
class LRUHashMapBasic
|
||||
{
|
||||
public:
|
||||
using key_type = Key;
|
||||
using value_type = Value;
|
||||
using list_type = std::list<key_type>;
|
||||
using node = std::pair<value_type, typename list_type::iterator>;
|
||||
using map_type = std::unordered_map<key_type, node, DefaultHash<Key>>;
|
||||
using list_type = std::list<std::pair<key_type, value_type>>;
|
||||
using map_type = std::unordered_map<key_type, typename list_type::iterator>;
|
||||
|
||||
LRUHashMapBasic(size_t max_size_, bool preallocated)
|
||||
LRUHashMapBasic(size_t max_size_, bool preallocated = false)
|
||||
: hash_map(preallocated ? max_size_ : 32)
|
||||
, max_size(max_size_)
|
||||
{
|
||||
}
|
||||
|
||||
void insert(const Key &key, const Value &value)
|
||||
template<typename ...Args>
|
||||
std::pair<Value *, bool> emplace(const Key &key, Args &&... args)
|
||||
{
|
||||
auto it = hash_map.find(key);
|
||||
|
||||
@ -33,40 +36,39 @@ public:
|
||||
{
|
||||
auto iterator_to_remove = list.begin();
|
||||
|
||||
hash_map.erase(*iterator_to_remove);
|
||||
auto & key_to_remove = iterator_to_remove->first;
|
||||
hash_map.erase(key_to_remove);
|
||||
|
||||
list.erase(iterator_to_remove);
|
||||
}
|
||||
|
||||
list.push_back(key);
|
||||
hash_map[key] = std::make_pair(value, --list.end());
|
||||
|
||||
Value value(std::forward<Args>(args)...);
|
||||
auto node = std::make_pair(key, std::move(value));
|
||||
|
||||
list.push_back(std::move(node));
|
||||
|
||||
auto inserted_iterator = --list.end();
|
||||
|
||||
hash_map[key] = inserted_iterator;
|
||||
|
||||
return std::make_pair(&inserted_iterator->second, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & [value_to_update, iterator_in_list_to_update] = it->second;
|
||||
auto & iterator_in_list_to_update = it->second;
|
||||
|
||||
list.splice(list.end(), list, iterator_in_list_to_update);
|
||||
iterator_in_list_to_update = --list.end();
|
||||
|
||||
iterator_in_list_to_update = list.end();
|
||||
value_to_update = value;
|
||||
return std::make_pair(&iterator_in_list_to_update->second, false);
|
||||
}
|
||||
}
|
||||
|
||||
value_type & get(const key_type &key)
|
||||
value_type & operator[](const key_type & key)
|
||||
{
|
||||
auto iterator_in_map = hash_map.find(key);
|
||||
assert(iterator_in_map != hash_map.end());
|
||||
|
||||
auto & [value_to_return, iterator_in_list_to_update] = iterator_in_map->second;
|
||||
|
||||
list.splice(list.end(), list, iterator_in_list_to_update);
|
||||
iterator_in_list_to_update = list.end();
|
||||
|
||||
return value_to_return;
|
||||
}
|
||||
|
||||
const value_type & get(const key_type & key) const
|
||||
{
|
||||
return const_cast<std::decay_t<decltype(*this)> *>(this)->get(key);
|
||||
auto [it, _] = emplace(key);
|
||||
return *it;
|
||||
}
|
||||
|
||||
size_t getMaxSize() const
|
||||
@ -101,110 +103,123 @@ private:
|
||||
size_t max_size;
|
||||
};
|
||||
|
||||
std::vector<UInt64> generateNumbersToInsert(size_t numbers_to_insert_size)
|
||||
// std::vector<UInt64> generateNumbersToInsert(size_t numbers_to_insert_size)
|
||||
// {
|
||||
// std::vector<UInt64> numbers;
|
||||
// numbers.reserve(numbers_to_insert_size);
|
||||
|
||||
// std::random_device rd;
|
||||
// pcg64 gen(rd());
|
||||
|
||||
// UInt64 min = std::numeric_limits<UInt64>::min();
|
||||
// UInt64 max = std::numeric_limits<UInt64>::max();
|
||||
|
||||
// auto distribution = std::uniform_int_distribution<>(min, max);
|
||||
|
||||
// for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
// {
|
||||
// UInt64 number = distribution(gen);
|
||||
// numbers.emplace_back(number);
|
||||
// }
|
||||
|
||||
// return numbers;
|
||||
// }
|
||||
|
||||
// void testInsertElementsIntoHashMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
|
||||
// {
|
||||
// size_t numbers_to_insert_size = numbers_to_insert.size();
|
||||
// std::cout << "TestInsertElementsIntoHashMap preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
|
||||
// std::cout << std::endl;
|
||||
|
||||
// HashMap<int, int> hash_map(preallocated ? map_size : 32);
|
||||
|
||||
// Stopwatch watch;
|
||||
|
||||
// for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
// hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
|
||||
|
||||
// std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
|
||||
// UInt64 summ = 0;
|
||||
|
||||
// for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
// {
|
||||
// auto * it = hash_map.find(numbers_to_insert[i]);
|
||||
|
||||
// if (it)
|
||||
// summ += it->getMapped();
|
||||
// }
|
||||
|
||||
// std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
// }
|
||||
|
||||
// void testInsertElementsIntoStandardMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
|
||||
// {
|
||||
// size_t numbers_to_insert_size = numbers_to_insert.size();
|
||||
// std::cout << "TestInsertElementsIntoStandardMap map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
|
||||
// std::cout << std::endl;
|
||||
|
||||
// std::unordered_map<int, int> hash_map(preallocated ? map_size : 32);
|
||||
|
||||
// Stopwatch watch;
|
||||
|
||||
// for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
// hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
|
||||
|
||||
// std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
|
||||
// UInt64 summ = 0;
|
||||
|
||||
// for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
// {
|
||||
// auto it = hash_map.find(numbers_to_insert[i]);
|
||||
|
||||
// if (it != hash_map.end())
|
||||
// summ += it->second;
|
||||
// }
|
||||
|
||||
// std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
// }
|
||||
|
||||
template <typename Key, typename Map>
|
||||
static void NO_INLINE test(const Key * data, size_t size, const std::string & name)
|
||||
{
|
||||
std::vector<UInt64> numbers;
|
||||
numbers.reserve(numbers_to_insert_size);
|
||||
|
||||
std::random_device rd;
|
||||
pcg64 gen(rd());
|
||||
|
||||
UInt64 min = std::numeric_limits<UInt64>::min();
|
||||
UInt64 max = std::numeric_limits<UInt64>::max();
|
||||
|
||||
auto distribution = std::uniform_int_distribution<>(min, max);
|
||||
|
||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
{
|
||||
UInt64 number = distribution(gen);
|
||||
numbers.emplace_back(number);
|
||||
}
|
||||
|
||||
return numbers;
|
||||
}
|
||||
|
||||
void testInsertElementsIntoHashMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
|
||||
{
|
||||
size_t numbers_to_insert_size = numbers_to_insert.size();
|
||||
std::cout << "TestInsertElementsIntoHashMap preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
|
||||
std::cout << std::endl;
|
||||
|
||||
HashMap<int, int> hash_map(preallocated ? map_size : 32);
|
||||
|
||||
size_t cache_size = size / 10;
|
||||
Map cache(cache_size);
|
||||
Stopwatch watch;
|
||||
|
||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
++cache[data[i]];
|
||||
|
||||
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
watch.stop();
|
||||
|
||||
UInt64 summ = 0;
|
||||
|
||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
{
|
||||
auto * it = hash_map.find(numbers_to_insert[i]);
|
||||
|
||||
if (it)
|
||||
summ += it->getMapped();
|
||||
}
|
||||
|
||||
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
std::cerr << name
|
||||
<< ":\nElapsed: " << watch.elapsedSeconds()
|
||||
<< " (" << size / watch.elapsedSeconds() << " elem/sec.)"
|
||||
<< ", map size: " << cache.size() << "\n";
|
||||
}
|
||||
|
||||
void testInsertElementsIntoStandardMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
|
||||
template <typename Key>
|
||||
static void NO_INLINE testForType(size_t method, size_t rows_size)
|
||||
{
|
||||
size_t numbers_to_insert_size = numbers_to_insert.size();
|
||||
std::cout << "TestInsertElementsIntoStandardMap map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
|
||||
std::cout << std::endl;
|
||||
std::cerr << std::fixed << std::setprecision(3);
|
||||
|
||||
std::unordered_map<int, int> hash_map(preallocated ? map_size : 32);
|
||||
std::vector<Key> data(rows_size);
|
||||
|
||||
Stopwatch watch;
|
||||
|
||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
|
||||
|
||||
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
|
||||
UInt64 summ = 0;
|
||||
|
||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
{
|
||||
auto it = hash_map.find(numbers_to_insert[i]);
|
||||
|
||||
if (it != hash_map.end())
|
||||
summ += it->second;
|
||||
DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
|
||||
DB::CompressedReadBuffer in2(in1);
|
||||
in2.readStrict(reinterpret_cast<char*>(data.data()), sizeof(data[0]) * rows_size);
|
||||
}
|
||||
|
||||
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
}
|
||||
|
||||
template<typename LRUCache>
|
||||
UInt64 testInsertIntoEmptyCache(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
|
||||
{
|
||||
size_t numbers_to_insert_size = numbers_to_insert.size();
|
||||
std::cout << "Test testInsertPreallocated preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
|
||||
std::cout << std::endl;
|
||||
|
||||
LRUCache cache(map_size, preallocated);
|
||||
Stopwatch watch;
|
||||
|
||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
if (method == 0)
|
||||
{
|
||||
cache.insert(numbers_to_insert[i], numbers_to_insert[i]);
|
||||
test<Key, LRUHashMap<Key, UInt64>>(data.data(), data.size(), "CH HashMap");
|
||||
}
|
||||
else if (method == 1)
|
||||
{
|
||||
test<Key, LRUHashMapBasic<Key, UInt64>>(data.data(), data.size(), "BasicLRU");
|
||||
}
|
||||
|
||||
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
|
||||
UInt64 summ = 0;
|
||||
|
||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||
if (cache.contains(numbers_to_insert[i]))
|
||||
summ += cache.get(numbers_to_insert[i]);
|
||||
|
||||
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||
|
||||
return summ;
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
@ -212,33 +227,62 @@ int main(int argc, char ** argv)
|
||||
(void)(argc);
|
||||
(void)(argv);
|
||||
|
||||
size_t hash_map_size = 1200000;
|
||||
size_t numbers_to_insert_size = 12000000;
|
||||
std::vector<UInt64> numbers = generateNumbersToInsert(numbers_to_insert_size);
|
||||
// size_t hash_map_size = 1200000;
|
||||
// size_t numbers_to_insert_size = 12000000;
|
||||
// std::vector<UInt64> numbers = generateNumbersToInsert(numbers_to_insert_size);
|
||||
|
||||
std::cout << "Test insert into HashMap preallocated=0" << std::endl;
|
||||
testInsertElementsIntoHashMap(hash_map_size, numbers, true);
|
||||
std::cout << std::endl;
|
||||
// std::cout << "Test insert into HashMap preallocated=0" << std::endl;
|
||||
// testInsertElementsIntoHashMap(hash_map_size, numbers, true);
|
||||
// std::cout << std::endl;
|
||||
|
||||
std::cout << "Test insert into HashMap preallocated=1" << std::endl;
|
||||
testInsertElementsIntoHashMap(hash_map_size, numbers, true);
|
||||
std::cout << std::endl;
|
||||
// std::cout << "Test insert into HashMap preallocated=1" << std::endl;
|
||||
// testInsertElementsIntoHashMap(hash_map_size, numbers, true);
|
||||
// std::cout << std::endl;
|
||||
|
||||
std::cout << "Test LRUHashMap preallocated=0" << std::endl;
|
||||
testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, false);
|
||||
std::cout << std::endl;
|
||||
// std::cout << "Test LRUHashMap preallocated=0" << std::endl;
|
||||
// testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, false);
|
||||
// std::cout << std::endl;
|
||||
|
||||
std::cout << "Test LRUHashMap preallocated=1" << std::endl;
|
||||
testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, true);
|
||||
std::cout << std::endl;
|
||||
// std::cout << "Test LRUHashMap preallocated=1" << std::endl;
|
||||
// testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, true);
|
||||
// std::cout << std::endl;
|
||||
|
||||
std::cout << "Test LRUHashMapBasic preallocated=0" << std::endl;
|
||||
testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, false);
|
||||
std::cout << std::endl;
|
||||
// std::cout << "Test LRUHashMapBasic preallocated=0" << std::endl;
|
||||
// testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, false);
|
||||
// std::cout << std::endl;
|
||||
|
||||
std::cout << "Test LRUHashMapBasic preallocated=1" << std::endl;
|
||||
testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, true);
|
||||
std::cout << std::endl;
|
||||
// std::cout << "Test LRUHashMapBasic preallocated=1" << std::endl;
|
||||
// testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, true);
|
||||
// std::cout << std::endl;
|
||||
|
||||
if (argc < 4)
|
||||
{
|
||||
std::cerr << "Usage: program method column_type_name rows_count < input_column.bin \n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t method = std::stoull(argv[1]);
|
||||
std::string type_name = std::string(argv[2]);
|
||||
size_t n = std::stoull(argv[3]);
|
||||
|
||||
if (type_name == "UInt8")
|
||||
testForType<UInt8>(method, n);
|
||||
else if (type_name == "UInt16")
|
||||
testForType<UInt16>(method, n);
|
||||
else if (type_name == "UInt32")
|
||||
testForType<UInt32>(method, n);
|
||||
else if (type_name == "UInt64")
|
||||
testForType<UInt64>(method, n);
|
||||
else if (type_name == "Int8")
|
||||
testForType<Int8>(method, n);
|
||||
else if (type_name == "Int16")
|
||||
testForType<Int16>(method, n);
|
||||
else if (type_name == "Int32")
|
||||
testForType<Int32>(method, n);
|
||||
else if (type_name == "Int64")
|
||||
testForType<Int64>(method, n);
|
||||
else
|
||||
std::cerr << "Unexpected type passed " << type_name << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user