mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-28 18:42:26 +00:00
Updated LRUHashMap benchmarks
This commit is contained in:
parent
98e6c27a95
commit
cf43a3efe2
@ -220,6 +220,12 @@ public:
|
|||||||
return find(key) != nullptr;
|
return find(key) != nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Value & ALWAYS_INLINE operator[](const Key & key)
|
||||||
|
{
|
||||||
|
auto [it, _] = emplace(key);
|
||||||
|
return it->getMapped();
|
||||||
|
}
|
||||||
|
|
||||||
bool ALWAYS_INLINE erase(const Key & key)
|
bool ALWAYS_INLINE erase(const Key & key)
|
||||||
{
|
{
|
||||||
auto key_hash = Base::hash(key);
|
auto key_hash = Base::hash(key);
|
||||||
|
@ -36,7 +36,7 @@ add_executable (arena_with_free_lists arena_with_free_lists.cpp)
|
|||||||
target_link_libraries (arena_with_free_lists PRIVATE dbms)
|
target_link_libraries (arena_with_free_lists PRIVATE dbms)
|
||||||
|
|
||||||
add_executable (lru_hash_map_perf lru_hash_map_perf.cpp)
|
add_executable (lru_hash_map_perf lru_hash_map_perf.cpp)
|
||||||
target_link_libraries (lru_hash_map_perf PRIVATE clickhouse_common_io)
|
target_link_libraries (lru_hash_map_perf PRIVATE dbms)
|
||||||
|
|
||||||
add_executable (thread_creation_latency thread_creation_latency.cpp)
|
add_executable (thread_creation_latency thread_creation_latency.cpp)
|
||||||
target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io)
|
target_link_libraries (thread_creation_latency PRIVATE clickhouse_common_io)
|
||||||
|
@ -7,23 +7,26 @@
|
|||||||
#include <Common/Stopwatch.h>
|
#include <Common/Stopwatch.h>
|
||||||
#include <Common/HashTable/LRUHashMap.h>
|
#include <Common/HashTable/LRUHashMap.h>
|
||||||
|
|
||||||
|
#include <IO/ReadBufferFromFile.h>
|
||||||
|
#include <Compression/CompressedReadBuffer.h>
|
||||||
|
|
||||||
template<class Key, class Value>
|
template<class Key, class Value>
|
||||||
class LRUHashMapBasic
|
class LRUHashMapBasic
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
using key_type = Key;
|
using key_type = Key;
|
||||||
using value_type = Value;
|
using value_type = Value;
|
||||||
using list_type = std::list<key_type>;
|
using list_type = std::list<std::pair<key_type, value_type>>;
|
||||||
using node = std::pair<value_type, typename list_type::iterator>;
|
using map_type = std::unordered_map<key_type, typename list_type::iterator>;
|
||||||
using map_type = std::unordered_map<key_type, node, DefaultHash<Key>>;
|
|
||||||
|
|
||||||
LRUHashMapBasic(size_t max_size_, bool preallocated)
|
LRUHashMapBasic(size_t max_size_, bool preallocated = false)
|
||||||
: hash_map(preallocated ? max_size_ : 32)
|
: hash_map(preallocated ? max_size_ : 32)
|
||||||
, max_size(max_size_)
|
, max_size(max_size_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void insert(const Key &key, const Value &value)
|
template<typename ...Args>
|
||||||
|
std::pair<Value *, bool> emplace(const Key &key, Args &&... args)
|
||||||
{
|
{
|
||||||
auto it = hash_map.find(key);
|
auto it = hash_map.find(key);
|
||||||
|
|
||||||
@ -33,40 +36,39 @@ public:
|
|||||||
{
|
{
|
||||||
auto iterator_to_remove = list.begin();
|
auto iterator_to_remove = list.begin();
|
||||||
|
|
||||||
hash_map.erase(*iterator_to_remove);
|
auto & key_to_remove = iterator_to_remove->first;
|
||||||
|
hash_map.erase(key_to_remove);
|
||||||
|
|
||||||
list.erase(iterator_to_remove);
|
list.erase(iterator_to_remove);
|
||||||
}
|
}
|
||||||
|
|
||||||
list.push_back(key);
|
|
||||||
hash_map[key] = std::make_pair(value, --list.end());
|
Value value(std::forward<Args>(args)...);
|
||||||
|
auto node = std::make_pair(key, std::move(value));
|
||||||
|
|
||||||
|
list.push_back(std::move(node));
|
||||||
|
|
||||||
|
auto inserted_iterator = --list.end();
|
||||||
|
|
||||||
|
hash_map[key] = inserted_iterator;
|
||||||
|
|
||||||
|
return std::make_pair(&inserted_iterator->second, true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
auto & [value_to_update, iterator_in_list_to_update] = it->second;
|
auto & iterator_in_list_to_update = it->second;
|
||||||
|
|
||||||
list.splice(list.end(), list, iterator_in_list_to_update);
|
list.splice(list.end(), list, iterator_in_list_to_update);
|
||||||
|
iterator_in_list_to_update = --list.end();
|
||||||
|
|
||||||
iterator_in_list_to_update = list.end();
|
return std::make_pair(&iterator_in_list_to_update->second, false);
|
||||||
value_to_update = value;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
value_type & get(const key_type &key)
|
value_type & operator[](const key_type & key)
|
||||||
{
|
{
|
||||||
auto iterator_in_map = hash_map.find(key);
|
auto [it, _] = emplace(key);
|
||||||
assert(iterator_in_map != hash_map.end());
|
return *it;
|
||||||
|
|
||||||
auto & [value_to_return, iterator_in_list_to_update] = iterator_in_map->second;
|
|
||||||
|
|
||||||
list.splice(list.end(), list, iterator_in_list_to_update);
|
|
||||||
iterator_in_list_to_update = list.end();
|
|
||||||
|
|
||||||
return value_to_return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const value_type & get(const key_type & key) const
|
|
||||||
{
|
|
||||||
return const_cast<std::decay_t<decltype(*this)> *>(this)->get(key);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t getMaxSize() const
|
size_t getMaxSize() const
|
||||||
@ -101,110 +103,123 @@ private:
|
|||||||
size_t max_size;
|
size_t max_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<UInt64> generateNumbersToInsert(size_t numbers_to_insert_size)
|
// std::vector<UInt64> generateNumbersToInsert(size_t numbers_to_insert_size)
|
||||||
|
// {
|
||||||
|
// std::vector<UInt64> numbers;
|
||||||
|
// numbers.reserve(numbers_to_insert_size);
|
||||||
|
|
||||||
|
// std::random_device rd;
|
||||||
|
// pcg64 gen(rd());
|
||||||
|
|
||||||
|
// UInt64 min = std::numeric_limits<UInt64>::min();
|
||||||
|
// UInt64 max = std::numeric_limits<UInt64>::max();
|
||||||
|
|
||||||
|
// auto distribution = std::uniform_int_distribution<>(min, max);
|
||||||
|
|
||||||
|
// for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||||
|
// {
|
||||||
|
// UInt64 number = distribution(gen);
|
||||||
|
// numbers.emplace_back(number);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// return numbers;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// void testInsertElementsIntoHashMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
|
||||||
|
// {
|
||||||
|
// size_t numbers_to_insert_size = numbers_to_insert.size();
|
||||||
|
// std::cout << "TestInsertElementsIntoHashMap preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
|
||||||
|
// std::cout << std::endl;
|
||||||
|
|
||||||
|
// HashMap<int, int> hash_map(preallocated ? map_size : 32);
|
||||||
|
|
||||||
|
// Stopwatch watch;
|
||||||
|
|
||||||
|
// for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||||
|
// hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
|
||||||
|
|
||||||
|
// std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||||
|
|
||||||
|
// UInt64 summ = 0;
|
||||||
|
|
||||||
|
// for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||||
|
// {
|
||||||
|
// auto * it = hash_map.find(numbers_to_insert[i]);
|
||||||
|
|
||||||
|
// if (it)
|
||||||
|
// summ += it->getMapped();
|
||||||
|
// }
|
||||||
|
|
||||||
|
// std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// void testInsertElementsIntoStandardMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
|
||||||
|
// {
|
||||||
|
// size_t numbers_to_insert_size = numbers_to_insert.size();
|
||||||
|
// std::cout << "TestInsertElementsIntoStandardMap map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
|
||||||
|
// std::cout << std::endl;
|
||||||
|
|
||||||
|
// std::unordered_map<int, int> hash_map(preallocated ? map_size : 32);
|
||||||
|
|
||||||
|
// Stopwatch watch;
|
||||||
|
|
||||||
|
// for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||||
|
// hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
|
||||||
|
|
||||||
|
// std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||||
|
|
||||||
|
// UInt64 summ = 0;
|
||||||
|
|
||||||
|
// for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
||||||
|
// {
|
||||||
|
// auto it = hash_map.find(numbers_to_insert[i]);
|
||||||
|
|
||||||
|
// if (it != hash_map.end())
|
||||||
|
// summ += it->second;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
||||||
|
// }
|
||||||
|
|
||||||
|
template <typename Key, typename Map>
|
||||||
|
static void NO_INLINE test(const Key * data, size_t size, const std::string & name)
|
||||||
{
|
{
|
||||||
std::vector<UInt64> numbers;
|
size_t cache_size = size / 10;
|
||||||
numbers.reserve(numbers_to_insert_size);
|
Map cache(cache_size);
|
||||||
|
|
||||||
std::random_device rd;
|
|
||||||
pcg64 gen(rd());
|
|
||||||
|
|
||||||
UInt64 min = std::numeric_limits<UInt64>::min();
|
|
||||||
UInt64 max = std::numeric_limits<UInt64>::max();
|
|
||||||
|
|
||||||
auto distribution = std::uniform_int_distribution<>(min, max);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
|
||||||
{
|
|
||||||
UInt64 number = distribution(gen);
|
|
||||||
numbers.emplace_back(number);
|
|
||||||
}
|
|
||||||
|
|
||||||
return numbers;
|
|
||||||
}
|
|
||||||
|
|
||||||
void testInsertElementsIntoHashMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
|
|
||||||
{
|
|
||||||
size_t numbers_to_insert_size = numbers_to_insert.size();
|
|
||||||
std::cout << "TestInsertElementsIntoHashMap preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
|
|
||||||
std::cout << std::endl;
|
|
||||||
|
|
||||||
HashMap<int, int> hash_map(preallocated ? map_size : 32);
|
|
||||||
|
|
||||||
Stopwatch watch;
|
Stopwatch watch;
|
||||||
|
|
||||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
for (size_t i = 0; i < size; ++i)
|
||||||
hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
|
++cache[data[i]];
|
||||||
|
|
||||||
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
watch.stop();
|
||||||
|
|
||||||
UInt64 summ = 0;
|
std::cerr << name
|
||||||
|
<< ":\nElapsed: " << watch.elapsedSeconds()
|
||||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
<< " (" << size / watch.elapsedSeconds() << " elem/sec.)"
|
||||||
{
|
<< ", map size: " << cache.size() << "\n";
|
||||||
auto * it = hash_map.find(numbers_to_insert[i]);
|
|
||||||
|
|
||||||
if (it)
|
|
||||||
summ += it->getMapped();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void testInsertElementsIntoStandardMap(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
|
template <typename Key>
|
||||||
|
static void NO_INLINE testForType(size_t method, size_t rows_size)
|
||||||
{
|
{
|
||||||
size_t numbers_to_insert_size = numbers_to_insert.size();
|
std::cerr << std::fixed << std::setprecision(3);
|
||||||
std::cout << "TestInsertElementsIntoStandardMap map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
|
|
||||||
std::cout << std::endl;
|
|
||||||
|
|
||||||
std::unordered_map<int, int> hash_map(preallocated ? map_size : 32);
|
std::vector<Key> data(rows_size);
|
||||||
|
|
||||||
Stopwatch watch;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
|
||||||
hash_map.insert({ numbers_to_insert[i], numbers_to_insert[i] });
|
|
||||||
|
|
||||||
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
|
||||||
|
|
||||||
UInt64 summ = 0;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
|
||||||
{
|
{
|
||||||
auto it = hash_map.find(numbers_to_insert[i]);
|
DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
|
||||||
|
DB::CompressedReadBuffer in2(in1);
|
||||||
if (it != hash_map.end())
|
in2.readStrict(reinterpret_cast<char*>(data.data()), sizeof(data[0]) * rows_size);
|
||||||
summ += it->second;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
if (method == 0)
|
||||||
}
|
|
||||||
|
|
||||||
template<typename LRUCache>
|
|
||||||
UInt64 testInsertIntoEmptyCache(size_t map_size, const std::vector<UInt64> & numbers_to_insert, bool preallocated)
|
|
||||||
{
|
|
||||||
size_t numbers_to_insert_size = numbers_to_insert.size();
|
|
||||||
std::cout << "Test testInsertPreallocated preallocated map size: " << map_size << " numbers to insert size: " << numbers_to_insert_size;
|
|
||||||
std::cout << std::endl;
|
|
||||||
|
|
||||||
LRUCache cache(map_size, preallocated);
|
|
||||||
Stopwatch watch;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
|
||||||
{
|
{
|
||||||
cache.insert(numbers_to_insert[i], numbers_to_insert[i]);
|
test<Key, LRUHashMap<Key, UInt64>>(data.data(), data.size(), "CH HashMap");
|
||||||
|
}
|
||||||
|
else if (method == 1)
|
||||||
|
{
|
||||||
|
test<Key, LRUHashMapBasic<Key, UInt64>>(data.data(), data.size(), "BasicLRU");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::cout << "Inserted in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
|
||||||
|
|
||||||
UInt64 summ = 0;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < numbers_to_insert_size; ++i)
|
|
||||||
if (cache.contains(numbers_to_insert[i]))
|
|
||||||
summ += cache.get(numbers_to_insert[i]);
|
|
||||||
|
|
||||||
std::cout << "Calculated summ: " << summ << " in " << watch.elapsedMilliseconds() << " milliseconds" << std::endl;
|
|
||||||
|
|
||||||
return summ;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char ** argv)
|
int main(int argc, char ** argv)
|
||||||
@ -212,33 +227,62 @@ int main(int argc, char ** argv)
|
|||||||
(void)(argc);
|
(void)(argc);
|
||||||
(void)(argv);
|
(void)(argv);
|
||||||
|
|
||||||
size_t hash_map_size = 1200000;
|
// size_t hash_map_size = 1200000;
|
||||||
size_t numbers_to_insert_size = 12000000;
|
// size_t numbers_to_insert_size = 12000000;
|
||||||
std::vector<UInt64> numbers = generateNumbersToInsert(numbers_to_insert_size);
|
// std::vector<UInt64> numbers = generateNumbersToInsert(numbers_to_insert_size);
|
||||||
|
|
||||||
std::cout << "Test insert into HashMap preallocated=0" << std::endl;
|
// std::cout << "Test insert into HashMap preallocated=0" << std::endl;
|
||||||
testInsertElementsIntoHashMap(hash_map_size, numbers, true);
|
// testInsertElementsIntoHashMap(hash_map_size, numbers, true);
|
||||||
std::cout << std::endl;
|
// std::cout << std::endl;
|
||||||
|
|
||||||
std::cout << "Test insert into HashMap preallocated=1" << std::endl;
|
// std::cout << "Test insert into HashMap preallocated=1" << std::endl;
|
||||||
testInsertElementsIntoHashMap(hash_map_size, numbers, true);
|
// testInsertElementsIntoHashMap(hash_map_size, numbers, true);
|
||||||
std::cout << std::endl;
|
// std::cout << std::endl;
|
||||||
|
|
||||||
std::cout << "Test LRUHashMap preallocated=0" << std::endl;
|
// std::cout << "Test LRUHashMap preallocated=0" << std::endl;
|
||||||
testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, false);
|
// testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, false);
|
||||||
std::cout << std::endl;
|
// std::cout << std::endl;
|
||||||
|
|
||||||
std::cout << "Test LRUHashMap preallocated=1" << std::endl;
|
// std::cout << "Test LRUHashMap preallocated=1" << std::endl;
|
||||||
testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, true);
|
// testInsertIntoEmptyCache<LRUHashMap<UInt64, UInt64>>(hash_map_size, numbers, true);
|
||||||
std::cout << std::endl;
|
// std::cout << std::endl;
|
||||||
|
|
||||||
std::cout << "Test LRUHashMapBasic preallocated=0" << std::endl;
|
// std::cout << "Test LRUHashMapBasic preallocated=0" << std::endl;
|
||||||
testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, false);
|
// testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, false);
|
||||||
std::cout << std::endl;
|
// std::cout << std::endl;
|
||||||
|
|
||||||
std::cout << "Test LRUHashMapBasic preallocated=1" << std::endl;
|
// std::cout << "Test LRUHashMapBasic preallocated=1" << std::endl;
|
||||||
testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, true);
|
// testInsertIntoEmptyCache<LRUHashMapBasic<UInt64, UInt64>>(hash_map_size, numbers, true);
|
||||||
std::cout << std::endl;
|
// std::cout << std::endl;
|
||||||
|
|
||||||
|
if (argc < 4)
|
||||||
|
{
|
||||||
|
std::cerr << "Usage: program method column_type_name rows_count < input_column.bin \n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t method = std::stoull(argv[1]);
|
||||||
|
std::string type_name = std::string(argv[2]);
|
||||||
|
size_t n = std::stoull(argv[3]);
|
||||||
|
|
||||||
|
if (type_name == "UInt8")
|
||||||
|
testForType<UInt8>(method, n);
|
||||||
|
else if (type_name == "UInt16")
|
||||||
|
testForType<UInt16>(method, n);
|
||||||
|
else if (type_name == "UInt32")
|
||||||
|
testForType<UInt32>(method, n);
|
||||||
|
else if (type_name == "UInt64")
|
||||||
|
testForType<UInt64>(method, n);
|
||||||
|
else if (type_name == "Int8")
|
||||||
|
testForType<Int8>(method, n);
|
||||||
|
else if (type_name == "Int16")
|
||||||
|
testForType<Int16>(method, n);
|
||||||
|
else if (type_name == "Int32")
|
||||||
|
testForType<Int32>(method, n);
|
||||||
|
else if (type_name == "Int64")
|
||||||
|
testForType<Int64>(method, n);
|
||||||
|
else
|
||||||
|
std::cerr << "Unexpected type passed " << type_name << std::endl;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user