mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Add google benchmark to contrib (#43779)
* add google benchmark to contrib * rework integer_hash_tables_and_hashes * update readme * keep benchmarks near the benchmarked code * fix fasttests build * rm old target * fix
This commit is contained in:
parent
eba6a79afa
commit
b81ad6aaf7
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -287,3 +287,6 @@
|
|||||||
[submodule "contrib/xxHash"]
|
[submodule "contrib/xxHash"]
|
||||||
path = contrib/xxHash
|
path = contrib/xxHash
|
||||||
url = https://github.com/Cyan4973/xxHash.git
|
url = https://github.com/Cyan4973/xxHash.git
|
||||||
|
[submodule "contrib/google-benchmark"]
|
||||||
|
path = contrib/google-benchmark
|
||||||
|
url = https://github.com/google/benchmark.git
|
||||||
|
@ -111,6 +111,7 @@ if (ENABLE_FUZZING)
|
|||||||
set (ENABLE_JEMALLOC 0)
|
set (ENABLE_JEMALLOC 0)
|
||||||
set (ENABLE_CHECK_HEAVY_BUILDS 1)
|
set (ENABLE_CHECK_HEAVY_BUILDS 1)
|
||||||
set (GLIBC_COMPATIBILITY OFF)
|
set (GLIBC_COMPATIBILITY OFF)
|
||||||
|
set (ENABLE_BENCHMARKS 0)
|
||||||
|
|
||||||
# For codegen_select_fuzzer
|
# For codegen_select_fuzzer
|
||||||
set (ENABLE_PROTOBUF 1)
|
set (ENABLE_PROTOBUF 1)
|
||||||
@ -168,6 +169,7 @@ endif ()
|
|||||||
|
|
||||||
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON)
|
option(ENABLE_TESTS "Provide unit_test_dbms target with Google.Test unit tests" ON)
|
||||||
option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF)
|
option(ENABLE_EXAMPLES "Build all example programs in 'examples' subdirectories" OFF)
|
||||||
|
option(ENABLE_BENCHMARKS "Build all benchmark programs in 'benchmarks' subdirectories" OFF)
|
||||||
|
|
||||||
if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND USE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND NOT USE_MUSL)
|
if (OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64) AND USE_STATIC_LIBRARIES AND NOT SPLIT_SHARED_LIBRARIES AND NOT USE_MUSL)
|
||||||
# Only for Linux, x86_64 or aarch64.
|
# Only for Linux, x86_64 or aarch64.
|
||||||
|
2
contrib/CMakeLists.txt
vendored
2
contrib/CMakeLists.txt
vendored
@ -171,6 +171,8 @@ add_contrib (annoy-cmake annoy)
|
|||||||
|
|
||||||
add_contrib (xxHash-cmake xxHash)
|
add_contrib (xxHash-cmake xxHash)
|
||||||
|
|
||||||
|
add_contrib (google-benchmark-cmake google-benchmark)
|
||||||
|
|
||||||
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
|
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
|
||||||
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
|
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
|
||||||
# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually,
|
# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually,
|
||||||
|
1
contrib/google-benchmark
vendored
Submodule
1
contrib/google-benchmark
vendored
Submodule
@ -0,0 +1 @@
|
|||||||
|
Subproject commit 2257fa4d6afb8e5a2ccd510a70f38fe7fcdf1edf
|
34
contrib/google-benchmark-cmake/CMakeLists.txt
Normal file
34
contrib/google-benchmark-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
set (SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-benchmark/src")
|
||||||
|
|
||||||
|
set (SRCS
|
||||||
|
"${SRC_DIR}/benchmark.cc"
|
||||||
|
"${SRC_DIR}/benchmark_api_internal.cc"
|
||||||
|
"${SRC_DIR}/benchmark_name.cc"
|
||||||
|
"${SRC_DIR}/benchmark_register.cc"
|
||||||
|
"${SRC_DIR}/benchmark_runner.cc"
|
||||||
|
"${SRC_DIR}/check.cc"
|
||||||
|
"${SRC_DIR}/colorprint.cc"
|
||||||
|
"${SRC_DIR}/commandlineflags.cc"
|
||||||
|
"${SRC_DIR}/complexity.cc"
|
||||||
|
"${SRC_DIR}/console_reporter.cc"
|
||||||
|
"${SRC_DIR}/counter.cc"
|
||||||
|
"${SRC_DIR}/csv_reporter.cc"
|
||||||
|
"${SRC_DIR}/json_reporter.cc"
|
||||||
|
"${SRC_DIR}/perf_counters.cc"
|
||||||
|
"${SRC_DIR}/reporter.cc"
|
||||||
|
"${SRC_DIR}/sleep.cc"
|
||||||
|
"${SRC_DIR}/statistics.cc"
|
||||||
|
"${SRC_DIR}/string_util.cc"
|
||||||
|
"${SRC_DIR}/sysinfo.cc"
|
||||||
|
"${SRC_DIR}/timers.cc")
|
||||||
|
|
||||||
|
add_library(google_benchmark "${SRCS}")
|
||||||
|
target_include_directories(google_benchmark SYSTEM PUBLIC "${SRC_DIR}/../include")
|
||||||
|
|
||||||
|
add_library(google_benchmark_main "${SRC_DIR}/benchmark_main.cc")
|
||||||
|
target_link_libraries(google_benchmark_main PUBLIC google_benchmark)
|
||||||
|
|
||||||
|
add_library(google_benchmark_all INTERFACE)
|
||||||
|
target_link_libraries(google_benchmark_all INTERFACE google_benchmark google_benchmark_main)
|
||||||
|
|
||||||
|
add_library(ch_contrib::gbenchmark_all ALIAS google_benchmark_all)
|
@ -1,5 +1,9 @@
|
|||||||
add_subdirectory(StringUtils)
|
add_subdirectory(StringUtils)
|
||||||
|
|
||||||
|
if (ENABLE_BENCHMARKS)
|
||||||
|
add_subdirectory(benchmarks)
|
||||||
|
endif()
|
||||||
|
|
||||||
if (ENABLE_EXAMPLES)
|
if (ENABLE_EXAMPLES)
|
||||||
add_subdirectory(examples)
|
add_subdirectory(examples)
|
||||||
endif()
|
endif()
|
||||||
|
9
src/Common/benchmarks/CMakeLists.txt
Normal file
9
src/Common/benchmarks/CMakeLists.txt
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
clickhouse_add_executable(integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp)
|
||||||
|
target_link_libraries (integer_hash_tables_and_hashes PRIVATE
|
||||||
|
ch_contrib::gbenchmark_all
|
||||||
|
dbms
|
||||||
|
ch_contrib::abseil_swiss_tables
|
||||||
|
ch_contrib::sparsehash
|
||||||
|
ch_contrib::wyhash
|
||||||
|
ch_contrib::farmhash
|
||||||
|
ch_contrib::xxHash)
|
@ -1,5 +1,8 @@
|
|||||||
#include <iostream>
|
#include <benchmark/benchmark.h>
|
||||||
|
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
|
#include <iostream>
|
||||||
|
#include <random>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
@ -13,12 +16,23 @@
|
|||||||
//#define DBMS_HASH_MAP_COUNT_COLLISIONS
|
//#define DBMS_HASH_MAP_COUNT_COLLISIONS
|
||||||
//#define DBMS_HASH_MAP_DEBUG_RESIZES
|
//#define DBMS_HASH_MAP_DEBUG_RESIZES
|
||||||
|
|
||||||
#include <base/types.h>
|
#include <farmhash.h>
|
||||||
#include <IO/ReadBufferFromFile.h>
|
#include <wyhash.h>
|
||||||
#include <Compression/CompressedReadBuffer.h>
|
#include <Compression/CompressedReadBuffer.h>
|
||||||
|
#include <IO/ReadBufferFromFile.h>
|
||||||
|
#include <base/types.h>
|
||||||
#include <Common/HashTable/HashMap.h>
|
#include <Common/HashTable/HashMap.h>
|
||||||
#include <Common/SipHash.h>
|
#include <Common/SipHash.h>
|
||||||
|
|
||||||
|
#include <pcg-random/pcg_random.hpp>
|
||||||
|
#include <Common/randomSeed.h>
|
||||||
|
|
||||||
|
#ifdef __clang__
|
||||||
|
# pragma clang diagnostic push
|
||||||
|
# pragma clang diagnostic ignored "-Wused-but-marked-unused"
|
||||||
|
#endif
|
||||||
|
#include <xxhash.h>
|
||||||
|
|
||||||
using Key = UInt64;
|
using Key = UInt64;
|
||||||
using Value = UInt64;
|
using Value = UInt64;
|
||||||
|
|
||||||
@ -282,98 +296,91 @@ namespace Hashes
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct FarmHash
|
||||||
|
{
|
||||||
|
size_t operator()(Key x) const { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash64(reinterpret_cast<const char *>(&x), sizeof(x)); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct WyHash
|
||||||
|
{
|
||||||
|
size_t operator()(Key x) const { return wyhash(reinterpret_cast<const char *>(&x), sizeof(x), 0, _wyp); }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct XXH3Hash
|
||||||
|
{
|
||||||
|
size_t operator()(Key x) const { return XXH_INLINE_XXH3_64bits(reinterpret_cast<const char *>(&x), sizeof(x)); }
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <template <typename...> class Map, typename Hash>
|
template <template <typename...> class Map, typename Hash>
|
||||||
void NO_INLINE test(const Key * data, size_t size, std::function<void(Map<Key, Value, Hash> &)> init = {})
|
void NO_INLINE test(const Key * data, size_t size, std::function<void(Map<Key, Value, Hash> &)> init = {})
|
||||||
{
|
{
|
||||||
Stopwatch watch;
|
|
||||||
|
|
||||||
Map<Key, Value, Hash> map;
|
Map<Key, Value, Hash> map;
|
||||||
if (init)
|
if (init)
|
||||||
init(map);
|
init(map);
|
||||||
|
|
||||||
for (const auto * end = data + size; data < end; ++data)
|
for (const auto * end = data + size; data < end; ++data)
|
||||||
++map[*data];
|
++map[*data];
|
||||||
|
|
||||||
watch.stop();
|
|
||||||
std::cerr << __PRETTY_FUNCTION__
|
|
||||||
<< ":\nElapsed: " << watch.elapsedSeconds()
|
|
||||||
<< " (" << size / watch.elapsedSeconds() << " elem/sec.)"
|
|
||||||
<< ", map size: " << map.size() << "\n";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <template <typename...> class Map, typename Init>
|
template <template <typename...> typename Map, typename Hash>
|
||||||
void NO_INLINE testForEachHash(const Key * data, size_t size, Init && init)
|
struct TestRndInput : public benchmark::Fixture
|
||||||
{
|
{
|
||||||
test<Map, Hashes::IdentityHash>(data, size, init);
|
void SetUp(const ::benchmark::State & state) override
|
||||||
test<Map, Hashes::SimpleMultiplyHash>(data, size, init);
|
|
||||||
test<Map, Hashes::MultiplyAndMixHash>(data, size, init);
|
|
||||||
test<Map, Hashes::MixMultiplyMixHash>(data, size, init);
|
|
||||||
test<Map, Hashes::MurMurMixHash>(data, size, init);
|
|
||||||
test<Map, Hashes::MixAllBitsHash>(data, size, init);
|
|
||||||
test<Map, Hashes::IntHash32>(data, size, init);
|
|
||||||
test<Map, Hashes::ArcadiaNumericHash>(data, size, init);
|
|
||||||
test<Map, Hashes::MurMurButDifferentHash>(data, size, init);
|
|
||||||
test<Map, Hashes::TwoRoundsTwoVarsHash>(data, size, init);
|
|
||||||
test<Map, Hashes::TwoRoundsLessOpsHash>(data, size, init);
|
|
||||||
test<Map, Hashes::CRC32Hash>(data, size, init);
|
|
||||||
test<Map, Hashes::MulShiftHash>(data, size, init);
|
|
||||||
test<Map, Hashes::TabulationHash>(data, size, init);
|
|
||||||
test<Map, Hashes::CityHash>(data, size, init);
|
|
||||||
test<Map, Hashes::SipHash>(data, size, init);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void NO_INLINE testForEachMapAndHash(const Key * data, size_t size)
|
|
||||||
{
|
|
||||||
auto nothing = [](auto &){};
|
|
||||||
|
|
||||||
testForEachHash<HashMap>(data, size, nothing);
|
|
||||||
testForEachHash<std::unordered_map>(data, size, nothing);
|
|
||||||
testForEachHash<::google::dense_hash_map>(data, size, [](auto & map){ map.set_empty_key(-1); });
|
|
||||||
testForEachHash<::google::sparse_hash_map>(data, size, nothing);
|
|
||||||
testForEachHash<::absl::flat_hash_map>(data, size, nothing);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char ** argv)
|
|
||||||
{
|
|
||||||
if (argc < 2)
|
|
||||||
{
|
{
|
||||||
std::cerr << "Usage: program n\n";
|
pcg64_fast rng(randomSeed());
|
||||||
return 1;
|
std::normal_distribution<double> dist(0, 10);
|
||||||
|
|
||||||
|
const size_t elements = state.range(0);
|
||||||
|
data.resize(elements);
|
||||||
|
for (auto & elem : data)
|
||||||
|
elem = static_cast<Key>(dist(rng)) % elements;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t n = std::stol(argv[1]);
|
void test(benchmark::State & st)
|
||||||
// size_t m = std::stol(argv[2]);
|
|
||||||
|
|
||||||
std::cerr << std::fixed << std::setprecision(3);
|
|
||||||
|
|
||||||
std::vector<Key> data(n);
|
|
||||||
|
|
||||||
std::cerr << "sizeof(Key) = " << sizeof(Key) << ", sizeof(Value) = " << sizeof(Value) << std::endl;
|
|
||||||
|
|
||||||
{
|
{
|
||||||
Stopwatch watch;
|
for (auto _ : st)
|
||||||
DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
|
::test<HashMap, Hash>(data.data(), data.size());
|
||||||
DB::CompressedReadBuffer in2(in1);
|
|
||||||
|
|
||||||
in2.readStrict(reinterpret_cast<char*>(data.data()), sizeof(data[0]) * n);
|
|
||||||
|
|
||||||
watch.stop();
|
|
||||||
std::cerr
|
|
||||||
<< "Vector. Size: " << n
|
|
||||||
<< ", elapsed: " << watch.elapsedSeconds()
|
|
||||||
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
|
|
||||||
<< std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Actually we should not run multiple test within same invocation of binary,
|
std::vector<Key> data;
|
||||||
* because order of test could alter test results (due to state of allocator and various minor reasons),
|
};
|
||||||
* but in this case it's Ok.
|
|
||||||
*/
|
|
||||||
|
|
||||||
testForEachMapAndHash(data.data(), data.size());
|
#define OK_GOOGLE(Fixture, Map, Hash, N) \
|
||||||
return 0;
|
BENCHMARK_TEMPLATE_DEFINE_F(Fixture, Test##Map##Hash, Map, Hashes::Hash)(benchmark::State & st) \
|
||||||
}
|
{ \
|
||||||
|
test(st); \
|
||||||
|
} \
|
||||||
|
BENCHMARK_REGISTER_F(Fixture, Test##Map##Hash)->Arg(N);
|
||||||
|
|
||||||
|
|
||||||
|
constexpr size_t elements_to_insert = 10'000'000;
|
||||||
|
|
||||||
|
/// tldr: crc32 has almost the same speed as identity hash if the corresponding intrinsics are available
|
||||||
|
/// todo: extend benchmark with larger key sizes up to say 24 bytes
|
||||||
|
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, ArcadiaNumericHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, CRC32Hash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, CityHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, FarmHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, IdentityHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, IntHash32, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, MixAllBitsHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, MixMultiplyMixHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, MulShiftHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, MultiplyAndMixHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, MurMurButDifferentHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, MurMurMixHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, SimpleMultiplyHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, SipHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, TabulationHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, TwoRoundsLessOpsHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, TwoRoundsTwoVarsHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, WyHash, elements_to_insert)
|
||||||
|
OK_GOOGLE(TestRndInput, HashMap, XXH3Hash, elements_to_insert)
|
||||||
|
|
||||||
|
#ifdef __clang__
|
||||||
|
# pragma clang diagnostic pop
|
||||||
|
#endif
|
@ -40,9 +40,6 @@ target_link_libraries (array_cache PRIVATE clickhouse_common_io)
|
|||||||
clickhouse_add_executable (space_saving space_saving.cpp)
|
clickhouse_add_executable (space_saving space_saving.cpp)
|
||||||
target_link_libraries (space_saving PRIVATE clickhouse_common_io)
|
target_link_libraries (space_saving PRIVATE clickhouse_common_io)
|
||||||
|
|
||||||
clickhouse_add_executable (integer_hash_tables_and_hashes integer_hash_tables_and_hashes.cpp)
|
|
||||||
target_link_libraries (integer_hash_tables_and_hashes PRIVATE dbms ch_contrib::abseil_swiss_tables ch_contrib::sparsehash)
|
|
||||||
|
|
||||||
clickhouse_add_executable (integer_hash_tables_benchmark integer_hash_tables_benchmark.cpp)
|
clickhouse_add_executable (integer_hash_tables_benchmark integer_hash_tables_benchmark.cpp)
|
||||||
target_link_libraries (integer_hash_tables_benchmark PRIVATE dbms ch_contrib::abseil_swiss_tables ch_contrib::sparsehash)
|
target_link_libraries (integer_hash_tables_benchmark PRIVATE dbms ch_contrib::abseil_swiss_tables ch_contrib::sparsehash)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user