ClickHouse/dbms/src/Interpreters/tests/two_level_hash_map.cpp
Amos Bird 26ab5dd7a7 A Proper lookup table that uses HashTable's API
This is the first step of allowing heterogeneous cells in hash tables.

performance test results are

```

1. HashMap<UInt16, UInt8, TrivialHash, HashTableFixedGrower<16>>;
2. NewLookupMap<UInt16, UInt8>

ResolutionWidth 30000 1 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................223550276.46
ResolutionWidth 30000 2 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................248772721.24
Best: 2 - 24877272124

ResolutionWidth 100000 1 ..........................................................................................................................................................................................................................................................238498413.99
ResolutionWidth 100000 2 ..........................................................................................................................................................................................................................................................261808889.98
Best: 2 - 26180888998

ResolutionWidth 300000 1 ...................................................................................239307348.81
ResolutionWidth 300000 2 ...................................................................................257592761.30
Best: 2 - 25759276130

ResolutionWidth 1000000 1 .........................240144759.26
ResolutionWidth 1000000 2 .........................257093531.91
Best: 2 - 25709353191

ResolutionWidth 5000000 1 .....241573260.35
ResolutionWidth 5000000 2 .....259314162.79
Best: 2 - 25931416279

ResolutionDepth 30000 1 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................217108119.84
ResolutionDepth 30000 2 .................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................249459504.41
Best: 2 - 24945950441

ResolutionDepth 100000 1 ..........................................................................................................................................................................................................................................................229065162.17
ResolutionDepth 100000 2 ..........................................................................................................................................................................................................................................................253769105.64
Best: 2 - 25376910564

ResolutionDepth 300000 1 ...................................................................................233079225.18
ResolutionDepth 300000 2 ...................................................................................256316273.78
Best: 2 - 25631627378

ResolutionDepth 1000000 1 .........................234184633.51
ResolutionDepth 1000000 2 .........................261100491.57
Best: 2 - 26110049157

ResolutionDepth 5000000 1 .....233118795.66
ResolutionDepth 5000000 2 .....252436160.41
Best: 2 - 25243616041

```
2019-03-01 16:47:13 +08:00

136 lines
3.6 KiB
C++

#include <iostream>
#include <iomanip>
#include <vector>
#include <unordered_map>
#include <sparsehash/dense_hash_map>
#include <sparsehash/sparse_hash_map>
//#define DBMS_HASH_MAP_DEBUG_RESIZES
#include <Common/Stopwatch.h>
#include <AggregateFunctions/UniquesHashSet.h>
#include <Core/Types.h>
#include <IO/ReadBufferFromFile.h>
#include <Compression/CompressedReadBuffer.h>
#include <Common/HashTable/TwoLevelHashTable.h>
#include <Common/HashTable/HashMap.h>
using Key = UInt64;
using Value = UInt64;
int main(int argc, char ** argv)
{
if (argc < 2)
{
std::cerr << "Usage: program n\n";
return 1;
}
size_t n = atoi(argv[1]);
std::vector<Key> data(n);
std::cerr << "sizeof(Key) = " << sizeof(Key) << ", sizeof(Value) = " << sizeof(Value) << std::endl;
{
Stopwatch watch;
DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
DB::CompressedReadBuffer in2(in1);
in2.readStrict(reinterpret_cast<char*>(data.data()), sizeof(data[0]) * n);
watch.stop();
std::cerr << std::fixed << std::setprecision(2)
<< "Vector. Size: " << n
<< ", elapsed: " << watch.elapsedSeconds()
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
<< std::endl;
}
{
Stopwatch watch;
std::cerr << sizeof(HashMapCell<Key, Value, DefaultHash<Key>>) << std::endl;
using Map = TwoLevelHashTable<Key, HashMapCell<Key, Value, DefaultHash<Key>>, DefaultHash<Key>, HashTableGrower<8>, HashTableAllocator>;
Map map;
Map::iterator it;
bool inserted;
for (size_t i = 0; i < n; ++i)
{
map.emplace(data[i], it, inserted);
if (inserted)
it->getSecond() = 0;
++it->getSecond();
}
watch.stop();
std::cerr << std::fixed << std::setprecision(2)
<< "HashMap. Size: " << map.size()
<< ", elapsed: " << watch.elapsedSeconds()
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
<< std::endl;
size_t sum_counts = 0;
size_t elems = 0;
for (const auto & kv : map)
{
sum_counts += kv.getSecond();
++elems;
}
std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl;
}
{
Stopwatch watch;
using Map = TwoLevelHashTable<Key, HashMapCell<Key, Value, DefaultHash<Key>>, DefaultHash<Key>, HashTableGrower<8>, HashTableAllocator>;
//using Map = HashMap<Key, Value, UniquesHashSetDefaultHash>;
Map map;
Map::iterator it;
bool inserted;
for (size_t i = 0; i < n; ++i)
{
map.emplace(i, it, inserted);
if (inserted)
it->getSecond() = 0;
++it->getSecond();
}
watch.stop();
std::cerr << std::fixed << std::setprecision(2)
<< "HashMap. Size: " << map.size()
<< ", elapsed: " << watch.elapsedSeconds()
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
<< std::endl;
size_t sum_counts = 0;
size_t elems = 0;
for (const auto & kv : map)
{
sum_counts += kv.getSecond();
++elems;
if (kv.getFirst() > n)
std::cerr << kv.getFirst() << std::endl;
}
std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl;
if (sum_counts != n)
std::cerr << "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl;
}
return 0;
}