Add google benchmark to contrib (#43779)

* add google benchmark to contrib

* rework integer_hash_tables_and_hashes

* update readme

* keep benchmarks near the benchmarked code

* fix fasttests build

* rm old target

* fix
This commit is contained in:
Nikita Taranov 2022-12-08 13:38:08 +01:00 committed by GitHub
parent eba6a79afa
commit b81ad6aaf7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 138 additions and 79 deletions

3
.gitmodules vendored
View File

@ -287,3 +287,6 @@
[submodule "contrib/xxHash"] [submodule "contrib/xxHash"]
path = contrib/xxHash path = contrib/xxHash
url = https://github.com/Cyan4973/xxHash.git url = https://github.com/Cyan4973/xxHash.git
[submodule "contrib/google-benchmark"]
path = contrib/google-benchmark
url = https://github.com/google/benchmark.git

View File

@ -111,6 +111,7 @@ if (ENABLE_FUZZING)
set (ENABLE_JEMALLOC 0) set (ENABLE_JEMALLOC 0)
set (ENABLE_CHECK_HEAVY_BUILDS 1) set (ENABLE_CHECK_HEAVY_BUILDS 1)
set (GLIBC_COMPATIBILITY OFF) set (GLIBC_COMPATIBILITY OFF)
set (ENABLE_BENCHMARKS 0)
# For codegen_select_fuzzer # For codegen_select_fuzzer
set (ENABLE_PROTOBUF 1) set (ENABLE_PROTOBUF 1)
@ -168,6 +169,7 @@ endif ()
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON) option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON)
option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF) option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF)
option(ENABLE_BENCHMARKS "Build all benchmark programs in 'benchmarks' subdirectories" OFF)
if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND USE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND NOT USE_MUSL) if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND USE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND NOT USE_MUSL)
# Only for Linux, x86_64 or aarch64. # Only for Linux, x86_64 or aarch64.

View File

@ -171,6 +171,8 @@ add_contrib (annoy-cmake annoy)
add_contrib (xxHash-cmake xxHash) add_contrib (xxHash-cmake xxHash)
add_contrib (google-benchmark-cmake google-benchmark)
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs. # Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually,

1
contrib/google-benchmark vendored Submodule

@ -0,0 +1 @@
Subproject commit 2257fa4d6afb8e5a2ccd510a70f38fe7fcdf1edf

View File

@ -0,0 +1,34 @@
set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-benchmark/src")
set (SRCS
"${SRC_DIR}/benchmark.cc"
"${SRC_DIR}/benchmark_api_internal.cc"
"${SRC_DIR}/benchmark_name.cc"
"${SRC_DIR}/benchmark_register.cc"
"${SRC_DIR}/benchmark_runner.cc"
"${SRC_DIR}/check.cc"
"${SRC_DIR}/colorprint.cc"
"${SRC_DIR}/commandlineflags.cc"
"${SRC_DIR}/complexity.cc"
"${SRC_DIR}/console_reporter.cc"
"${SRC_DIR}/counter.cc"
"${SRC_DIR}/csv_reporter.cc"
"${SRC_DIR}/json_reporter.cc"
"${SRC_DIR}/perf_counters.cc"
"${SRC_DIR}/reporter.cc"
"${SRC_DIR}/sleep.cc"
"${SRC_DIR}/statistics.cc"
"${SRC_DIR}/string_util.cc"
"${SRC_DIR}/sysinfo.cc"
"${SRC_DIR}/timers.cc")
add_library(google_benchmark "${SRCS}")
target_include_directories(google_benchmark SYSTEM PUBLIC "${SRC_DIR}/../include")
add_library(google_benchmark_main "${SRC_DIR}/benchmark_main.cc")
target_link_libraries(google_benchmark_main PUBLIC google_benchmark)
add_library(google_benchmark_all INTERFACE)
target_link_libraries(google_benchmark_all INTERFACE google_benchmark google_benchmark_main)
add_library(ch_contrib::gbenchmark_all ALIAS google_benchmark_all)

View File

@ -1,5 +1,9 @@
add_subdirectory(StringUtils) add_subdirectory(StringUtils)
if (ENABLE_BENCHMARKS)
add_subdirectory(benchmarks)
endif()
if (ENABLE_EXAMPLES) if (ENABLE_EXAMPLES)
add_subdirectory(examples) add_subdirectory(examples)
endif() endif()

View File

@ -0,0 +1,9 @@
clickhouse_add_executable(integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp)
target_link_libraries (integer_hash_tables_and_hashes PRIVATE
ch_contrib::gbenchmark_all
dbms
ch_contrib::abseil_swiss_tables
ch_contrib::sparsehash
ch_contrib::wyhash
ch_contrib::farmhash
ch_contrib::xxHash)

View File

@ -1,5 +1,8 @@
#include <iostream> #include <benchmark/benchmark.h>
#include <iomanip> #include <iomanip>
#include <iostream>
#include <random>
#include <vector> #include <vector>
#include <unordered_map> #include <unordered_map>
@ -13,12 +16,23 @@
//#define DBMS_HASH_MAP_COUNT_COLLISIONS //#define DBMS_HASH_MAP_COUNT_COLLISIONS
//#define DBMS_HASH_MAP_DEBUG_RESIZES //#define DBMS_HASH_MAP_DEBUG_RESIZES
#include <base/types.h> #include <farmhash.h>
#include <IO/ReadBufferFromFile.h> #include <wyhash.h>
#include <Compression/CompressedReadBuffer.h> #include <Compression/CompressedReadBuffer.h>
#include <IO/ReadBufferFromFile.h>
#include <base/types.h>
#include <Common/HashTable/HashMap.h> #include <Common/HashTable/HashMap.h>
#include <Common/SipHash.h> #include <Common/SipHash.h>
#include <pcg-random/pcg_random.hpp>
#include <Common/randomSeed.h>
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wused-but-marked-unused"
#endif
#include <xxhash.h>
using Key = UInt64; using Key = UInt64;
using Value = UInt64; using Value = UInt64;
@ -282,98 +296,91 @@ namespace Hashes
return res; return res;
} }
}; };
struct FarmHash
{
size_t operator()(Key x) const { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash64(reinterpret_cast<const char *>(&x), sizeof(x)); }
};
struct WyHash
{
size_t operator()(Key x) const { return wyhash(reinterpret_cast<const char *>(&x), sizeof(x), 0, _wyp); }
};
struct XXH3Hash
{
size_t operator()(Key x) const { return XXH_INLINE_XXH3_64bits(reinterpret_cast<const char *>(&x), sizeof(x)); }
};
} }
template <template <typename...> class Map, typename Hash> template <template <typename...> class Map, typename Hash>
void NO_INLINE test(const Key * data, size_t size, std::function<void(Map<Key, Value, Hash> &)> init = {}) void NO_INLINE test(const Key * data, size_t size, std::function<void(Map<Key, Value, Hash> &)> init = {})
{ {
Stopwatch watch;
Map<Key, Value, Hash> map; Map<Key, Value, Hash> map;
if (init) if (init)
init(map); init(map);
for (const auto * end = data + size; data < end; ++data) for (const auto * end = data + size; data < end; ++data)
++map[*data]; ++map[*data];
watch.stop();
std::cerr << __PRETTY_FUNCTION__
<< ":\nElapsed: " << watch.elapsedSeconds()
<< " (" << size / watch.elapsedSeconds() << " elem/sec.)"
<< ", map size: " << map.size() << "\n";
} }
template <template <typename...> class Map, typename Init> template <template <typename...> typename Map, typename Hash>
void NO_INLINE testForEachHash(const Key * data, size_t size, Init && init) struct TestRndInput : public benchmark::Fixture
{ {
test<Map, Hashes::IdentityHash>(data, size, init); void SetUp(const ::benchmark::State & state) override
test<Map, Hashes::SimpleMultiplyHash>(data, size, init);
test<Map, Hashes::MultiplyAndMixHash>(data, size, init);
test<Map, Hashes::MixMultiplyMixHash>(data, size, init);
test<Map, Hashes::MurMurMixHash>(data, size, init);
test<Map, Hashes::MixAllBitsHash>(data, size, init);
test<Map, Hashes::IntHash32>(data, size, init);
test<Map, Hashes::ArcadiaNumericHash>(data, size, init);
test<Map, Hashes::MurMurButDifferentHash>(data, size, init);
test<Map, Hashes::TwoRoundsTwoVarsHash>(data, size, init);
test<Map, Hashes::TwoRoundsLessOpsHash>(data, size, init);
test<Map, Hashes::CRC32Hash>(data, size, init);
test<Map, Hashes::MulShiftHash>(data, size, init);
test<Map, Hashes::TabulationHash>(data, size, init);
test<Map, Hashes::CityHash>(data, size, init);
test<Map, Hashes::SipHash>(data, size, init);
}
static void NO_INLINE testForEachMapAndHash(const Key * data, size_t size)
{
auto nothing = [](auto &){};
testForEachHash<HashMap>(data, size, nothing);
testForEachHash<std::unordered_map>(data, size, nothing);
testForEachHash<::google::dense_hash_map>(data, size, [](auto & map){ map.set_empty_key(-1); });
testForEachHash<::google::sparse_hash_map>(data, size, nothing);
testForEachHash<::absl::flat_hash_map>(data, size, nothing);
}
int main(int argc, char ** argv)
{
if (argc < 2)
{ {
std::cerr << "Usage: program n\n"; pcg64_fast rng(randomSeed());
return 1; std::normal_distribution<double> dist(0, 10);
const size_t elements = state.range(0);
data.resize(elements);
for (auto & elem : data)
elem = static_cast<Key>(dist(rng)) % elements;
} }
size_t n = std::stol(argv[1]); void test(benchmark::State & st)
// size_t m = std::stol(argv[2]);
std::cerr << std::fixed << std::setprecision(3);
std::vector<Key> data(n);
std::cerr << "sizeof(Key) = " << sizeof(Key) << ", sizeof(Value) = " << sizeof(Value) << std::endl;
{ {
Stopwatch watch; for (auto _ : st)
DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO); ::test<HashMap, Hash>(data.data(), data.size());
DB::CompressedReadBuffer in2(in1);
in2.readStrict(reinterpret_cast<char*>(data.data()), sizeof(data[0]) * n);
watch.stop();
std::cerr
<< "Vector. Size: " << n
<< ", elapsed: " << watch.elapsedSeconds()
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
<< std::endl;
} }
/** Actually we should not run multiple test within same invocation of binary, std::vector<Key> data;
* because order of test could alter test results (due to state of allocator and various minor reasons), };
* but in this case it's Ok.
*/
testForEachMapAndHash(data.data(), data.size()); #define OK_GOOGLE(Fixture, Map, Hash, N) \
return 0; BENCHMARK_TEMPLATE_DEFINE_F(Fixture, Test##Map##Hash, Map, Hashes::Hash)(benchmark::State & st) \
} { \
test(st); \
} \
BENCHMARK_REGISTER_F(Fixture, Test##Map##Hash)->Arg(N);
constexpr size_t elements_to_insert = 10'000'000;
/// tldr: crc32 has almost the same speed as identity hash if the corresponding intrinsics are available
/// todo: extend benchmark with larger key sizes up to say 24 bytes
OK_GOOGLE(TestRndInput, HashMap, ArcadiaNumericHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, CRC32Hash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, CityHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, FarmHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, IdentityHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, IntHash32, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, MixAllBitsHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, MixMultiplyMixHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, MulShiftHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, MultiplyAndMixHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, MurMurButDifferentHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, MurMurMixHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, SimpleMultiplyHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, SipHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, TabulationHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, TwoRoundsLessOpsHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, TwoRoundsTwoVarsHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, WyHash, elements_to_insert)
OK_GOOGLE(TestRndInput, HashMap, XXH3Hash, elements_to_insert)
#ifdef __clang__
# pragma clang diagnostic pop
#endif

View File

@ -40,9 +40,6 @@ target_link_libraries (array_cache PRIVATE clickhouse_common_io)
clickhouse_add_executable (space_saving space_saving.cpp) clickhouse_add_executable (space_saving space_saving.cpp)
target_link_libraries (space_saving PRIVATE clickhouse_common_io) target_link_libraries (space_saving PRIVATE clickhouse_common_io)
clickhouse_add_executable (integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp)
target_link_libraries (integer_hash_tables_and_hashes PRIVATE dbms ch_contrib::abseil_swiss_tables ch_contrib::sparsehash)
clickhouse_add_executable (integer_hash_tables_benchmark integer_hash_tables_benchmark.cpp) clickhouse_add_executable (integer_hash_tables_benchmark integer_hash_tables_benchmark.cpp)
target_link_libraries (integer_hash_tables_benchmark PRIVATE dbms ch_contrib::abseil_swiss_tables ch_contrib::sparsehash) target_link_libraries (integer_hash_tables_benchmark PRIVATE dbms ch_contrib::abseil_swiss_tables ch_contrib::sparsehash)