From 03534fa3869042162c326b70e12963bd3db15f85 Mon Sep 17 00:00:00 2001 From: ricky Date: Fri, 18 Oct 2024 18:01:42 +0800 Subject: [PATCH] functions: vectorize bitHammingDistance --- contrib/CMakeLists.txt | 2 +- src/Functions/bitHammingDistance.cpp | 41 ++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index c36ace61396..d583b22081a 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -211,10 +211,10 @@ if (ARCH_S390X) add_contrib(crc32-s390x-cmake crc32-s390x) endif() +add_contrib (SimSIMD-cmake SimSIMD) option(ENABLE_USEARCH "Enable USearch" ${ENABLE_LIBRARIES}) if (ENABLE_USEARCH) add_contrib (FP16-cmake FP16) - add_contrib (SimSIMD-cmake SimSIMD) add_contrib (usearch-cmake usearch) # requires: FP16, SimdSIMD else () message(STATUS "Not using USearch") diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp index f8a1a95ae14..a38c27a5888 100644 --- a/src/Functions/bitHammingDistance.cpp +++ b/src/Functions/bitHammingDistance.cpp @@ -1,6 +1,8 @@ +#include #include #include -#include +#pragma clang diagnostic ignored "-Wused-but-marked-unused" +#include namespace DB @@ -26,17 +28,40 @@ struct BitHammingDistanceImpl if constexpr (sizeof(A) <= sizeof(UInt64) && sizeof(B) <= sizeof(UInt64)) { - UInt64 res = static_cast(a) ^ static_cast(b); - return std::popcount(res); + if constexpr (std::is_same_v) + { + simsimd_distance_t distance; + simsimd_hamming_b8( + reinterpret_cast(&a), reinterpret_cast(&b), sizeof(A), &distance); + return static_cast(distance); + } + else + { + UInt64 res = static_cast(a) ^ static_cast(b); + return std::popcount(res); + } } else if constexpr (is_big_int_v && is_big_int_v) { - auto xored = a ^ b; + if constexpr (std::is_same_v) + { + simsimd_distance_t distance; + simsimd_hamming_b8( + reinterpret_cast(&a.items), + reinterpret_cast(&b.items), + sizeof(a.items), + &distance); + return static_cast(distance); + } + else + { + auto xored = a ^ b; - ResultType res = 0; - for (auto item : xored.items) - res += std::popcount(item); - return res; + ResultType res = 0; + for (auto item : xored.items) + res += std::popcount(item); + return res; + } } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported data type combination in function 'bitHammingDistance'");