mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Added hash table benchmarks
This commit is contained in:
parent
6f92b8e949
commit
1f9c9eda45
@ -49,7 +49,11 @@ target_link_libraries (space_saving PRIVATE clickhouse_common_io)
|
||||
|
||||
add_executable (integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp)
|
||||
target_include_directories (integer_hash_tables_and_hashes SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR})
|
||||
target_link_libraries (integer_hash_tables_and_hashes PRIVATE dbms)
|
||||
target_link_libraries (integer_hash_tables_and_hashes PRIVATE dbms abseil_swiss_tables)
|
||||
|
||||
add_executable (integer_hash_tables_benchmark integer_hash_tables_benchmark.cpp)
|
||||
target_include_directories (integer_hash_tables_benchmark SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR})
|
||||
target_link_libraries (integer_hash_tables_benchmark PRIVATE dbms abseil_swiss_tables)
|
||||
|
||||
add_executable (cow_columns cow_columns.cpp)
|
||||
target_link_libraries (cow_columns PRIVATE clickhouse_common_io)
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include <sparsehash/dense_hash_map>
|
||||
#include <sparsehash/sparse_hash_map>
|
||||
#include <absl/container/flat_hash_map.h>
|
||||
|
||||
#include <Common/Stopwatch.h>
|
||||
|
||||
@ -332,6 +333,7 @@ static void NO_INLINE testForEachMapAndHash(const Key * data, size_t size)
|
||||
testForEachHash<std::unordered_map>(data, size, nothing);
|
||||
testForEachHash<::google::dense_hash_map>(data, size, [](auto & map){ map.set_empty_key(-1); });
|
||||
testForEachHash<::google::sparse_hash_map>(data, size, nothing);
|
||||
testForEachHash<::absl::flat_hash_map>(data, size, nothing);
|
||||
}
|
||||
|
||||
|
||||
|
162
src/Common/examples/integer_hash_tables_benchmark.cpp
Normal file
162
src/Common/examples/integer_hash_tables_benchmark.cpp
Normal file
@ -0,0 +1,162 @@
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include <sparsehash/dense_hash_map>
|
||||
#include <sparsehash/sparse_hash_map>
|
||||
#include <absl/container/flat_hash_map.h>
|
||||
|
||||
#include <Common/Stopwatch.h>
|
||||
|
||||
#include <common/types.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <Compression/CompressedReadBuffer.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
|
||||
template <typename Key, typename Map>
|
||||
void NO_INLINE test(const Key * data, size_t size, const std::string & name, std::function<void(Map &)> init = {})
|
||||
{
|
||||
Stopwatch watch;
|
||||
|
||||
Map map;
|
||||
|
||||
if (init)
|
||||
init(map);
|
||||
|
||||
for (const auto * end = data + size; data < end; ++data)
|
||||
++map[*data];
|
||||
|
||||
watch.stop();
|
||||
std::cerr << name
|
||||
<< ":\nElapsed: " << watch.elapsedSeconds()
|
||||
<< " (" << size / watch.elapsedSeconds() << " elem/sec.)"
|
||||
<< ", map size: " << map.size() << "\n";
|
||||
}
|
||||
|
||||
template <typename Key>
|
||||
static void NO_INLINE testForType(size_t method, size_t rows_size)
|
||||
{
|
||||
std::cerr << std::fixed << std::setprecision(3);
|
||||
|
||||
std::vector<Key> data(rows_size);
|
||||
|
||||
{
|
||||
DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
|
||||
DB::CompressedReadBuffer in2(in1);
|
||||
in2.readStrict(reinterpret_cast<char*>(data.data()), sizeof(data[0]) * rows_size);
|
||||
}
|
||||
|
||||
if (method == 0)
|
||||
{
|
||||
test<Key, HashMap<Key, UInt64, DefaultHash<Key>>>(data.data(), data.size(), "CH HashMap");
|
||||
}
|
||||
else if (method == 1)
|
||||
{
|
||||
test<Key, ::google::dense_hash_map<Key, UInt64, absl::Hash<Key>>>(data.data(), data.size(), "Google DenseMap", [](auto & map){ map.set_empty_key(0); });
|
||||
}
|
||||
else if (method == 2)
|
||||
{
|
||||
test<Key, ::absl::flat_hash_map<Key, UInt64>>(data.data(), data.size(), "Abseil HashMap");
|
||||
}
|
||||
else if (method == 3)
|
||||
{
|
||||
test<Key, std::unordered_map<Key, UInt64>>(data.data(), data.size(), "std::unordered_map");
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cerr << "Unexpected method passed " << method << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
/** This benchmark does not test which hash table is fastest.
|
||||
* It tests simple aggregation scenario that is important for ClickHouse.
|
||||
*
|
||||
* Support bash script it is important to rerun program for each method:
|
||||
* benchmark.sh
|
||||
* # Usage benchmark.sh column_file_name.bin column_type
|
||||
* echo File $1
|
||||
* ./integer_hash_tables_benchmark 0 $2 100000000 < $1
|
||||
* ./integer_hash_tables_benchmark 1 $2 100000000 < $1
|
||||
* ./integer_hash_tables_benchmark 2 $2 100000000 < $1
|
||||
* ./integer_hash_tables_benchmark 3 $2 100000000 < $1
|
||||
*
|
||||
* Results of this benchmark on hits_100m_obfuscated
|
||||
*
|
||||
* File hits_100m_obfuscated/201307_1_96_4/WatchID.bin
|
||||
* CH HashMap: Elapsed: 7.366 (13575745.933 elem/sec.), map size: 99997493
|
||||
* Google DenseMap: Elapsed: 10.089 (9911817.125 elem/sec.), map size: 99997493
|
||||
* Abseil HashMap: Elapsed: 9.011 (11097794.073 elem/sec.), map size: 99997493
|
||||
* std::unordered_map: Elapsed: 44.758 (2234223.189 elem/sec.), map size: 99997493
|
||||
*
|
||||
* File hits_100m_obfuscated/201307_1_96_4/URLHash.bin
|
||||
* CH HashMap: Elapsed: 2.672 (37421588.347 elem/sec.), map size: 20714865
|
||||
* Google DenseMap: Elapsed: 3.409 (29333308.209 elem/sec.), map size: 20714865
|
||||
* Abseil HashMap: Elapsed: 2.778 (36000540.035 elem/sec.), map size: 20714865
|
||||
* std::unordered_map: Elapsed: 8.643 (11570012.207 elem/sec.), map size: 20714865
|
||||
*
|
||||
* File hits_100m_obfuscated/201307_1_96_4/UserID.bin
|
||||
* CH HashMap: Elapsed: 2.116 (47267659.076 elem/sec.), map size: 17630976
|
||||
* Google DenseMap: Elapsed: 2.722 (36740693.786 elem/sec.), map size: 17630976
|
||||
* Abseil HashMap: Elapsed: 2.597 (38509988.663 elem/sec.), map size: 17630976
|
||||
* std::unordered_map: Elapsed: 7.327 (13647271.471 elem/sec.), map size: 17630976
|
||||
*
|
||||
* File hits_100m_obfuscated/201307_1_96_4/RegionID.bin
|
||||
* CH HashMap: Elapsed: 0.201 (498144193.695 elem/sec.), map size: 9040
|
||||
* Google DenseMap: Elapsed: 0.261 (382656387.016 elem/sec.), map size: 9046
|
||||
* Abseil HashMap: Elapsed: 0.307 (325874545.117 elem/sec.), map size: 9040
|
||||
* std::unordered_map: Elapsed: 0.466 (214379083.420 elem/sec.), map size: 9040
|
||||
*
|
||||
* File hits_100m_obfuscated/201307_1_96_4/CounterID.bin
|
||||
* CH HashMap: Elapsed: 0.220 (455344735.648 elem/sec.), map size: 6506
|
||||
* Google DenseMap: Elapsed: 0.297 (336187522.818 elem/sec.), map size: 6506
|
||||
* Abseil HashMap: Elapsed: 0.307 (325264214.480 elem/sec.), map size: 6506
|
||||
* std::unordered_map: Elapsed: 0.389 (257195996.114 elem/sec.), map size: 6506
|
||||
*
|
||||
* File hits_100m_obfuscated/201307_1_96_4/TraficSourceID.bin
|
||||
* CH HashMap: Elapsed: 0.274 (365196673.729 elem/sec.), map size: 10
|
||||
* Google DenseMap: Elapsed: 0.782 (127845746.927 elem/sec.), map size: 1565609 /// Broken because there is 0 key in dataset
|
||||
* Abseil HashMap: Elapsed: 0.303 (330461565.053 elem/sec.), map size: 10
|
||||
* std::unordered_map: Elapsed: 0.843 (118596530.649 elem/sec.), map size: 10
|
||||
*
|
||||
* File hits_100m_obfuscated/201307_1_96_4/AdvEngineID.bin
|
||||
* CH HashMap: Elapsed: 0.160 (623399865.019 elem/sec.), map size: 19
|
||||
* Google DenseMap: Elapsed: 1.673 (59757144.027 elem/sec.), map size: 32260732 /// Broken because there is 0 key in dataset
|
||||
* Abseil HashMap: Elapsed: 0.297 (336589258.845 elem/sec.), map size: 19
|
||||
* std::unordered_map: Elapsed: 0.332 (301114451.384 elem/sec.), map size: 19
|
||||
*/
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
if (argc < 4)
|
||||
{
|
||||
std::cerr << "Usage: program method column_type_name rows_count < input_column.bin \n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t method = std::stoull(argv[1]);
|
||||
std::string type_name = std::string(argv[2]);
|
||||
size_t n = std::stoull(argv[3]);
|
||||
|
||||
if (type_name == "UInt8")
|
||||
testForType<UInt8>(method, n);
|
||||
else if (type_name == "UInt16")
|
||||
testForType<UInt16>(method, n);
|
||||
else if (type_name == "UInt32")
|
||||
testForType<UInt32>(method, n);
|
||||
else if (type_name == "UInt64")
|
||||
testForType<UInt64>(method, n);
|
||||
else if (type_name == "Int8")
|
||||
testForType<Int8>(method, n);
|
||||
else if (type_name == "Int16")
|
||||
testForType<Int16>(method, n);
|
||||
else if (type_name == "Int32")
|
||||
testForType<Int32>(method, n);
|
||||
else if (type_name == "Int64")
|
||||
testForType<Int64>(method, n);
|
||||
else
|
||||
std::cerr << "Unexpected type passed " << type_name << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user