This commit is contained in:
ricky 2024-11-20 16:25:57 -08:00 committed by GitHub
commit 7405c7e527
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 38 additions and 9 deletions

View File

@ -195,10 +195,10 @@ if (ARCH_S390X)
add_contrib(crc32-s390x-cmake crc32-s390x) add_contrib(crc32-s390x-cmake crc32-s390x)
endif() endif()
add_contrib (SimSIMD-cmake SimSIMD)
option(ENABLE_USEARCH "Enable USearch" ${ENABLE_LIBRARIES}) option(ENABLE_USEARCH "Enable USearch" ${ENABLE_LIBRARIES})
if (ENABLE_USEARCH) if (ENABLE_USEARCH)
add_contrib (FP16-cmake FP16) add_contrib (FP16-cmake FP16)
add_contrib (SimSIMD-cmake SimSIMD)
add_contrib (usearch-cmake usearch) # requires: FP16, SimdSIMD add_contrib (usearch-cmake usearch) # requires: FP16, SimdSIMD
else () else ()
message(STATUS "Not using USearch") message(STATUS "Not using USearch")

View File

@ -134,6 +134,10 @@ list (APPEND OBJECT_LIBS $<TARGET_OBJECTS:clickhouse_functions_array>)
add_subdirectory(Kusto) add_subdirectory(Kusto)
list (APPEND OBJECT_LIBS $<TARGET_OBJECTS:clickhouse_functions_kusto>) list (APPEND OBJECT_LIBS $<TARGET_OBJECTS:clickhouse_functions_kusto>)
if (TARGET ch_contrib::simsimd)
list (APPEND PRIVATE_LIBS ch_contrib::simsimd)
endif()
if (TARGET ch_contrib::datasketches) if (TARGET ch_contrib::datasketches)
add_subdirectory(UniqTheta) add_subdirectory(UniqTheta)
list (APPEND OBJECT_LIBS $<TARGET_OBJECTS:clickhouse_functions_uniqtheta>) list (APPEND OBJECT_LIBS $<TARGET_OBJECTS:clickhouse_functions_uniqtheta>)

View File

@ -1,6 +1,8 @@
#include <bit>
#include <Functions/FunctionBinaryArithmetic.h> #include <Functions/FunctionBinaryArithmetic.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <bit> #pragma clang diagnostic ignored "-Wused-but-marked-unused"
#include <simsimd/simsimd.h>
namespace DB namespace DB
@ -26,17 +28,40 @@ struct BitHammingDistanceImpl
if constexpr (sizeof(A) <= sizeof(UInt64) && sizeof(B) <= sizeof(UInt64)) if constexpr (sizeof(A) <= sizeof(UInt64) && sizeof(B) <= sizeof(UInt64))
{ {
UInt64 res = static_cast<UInt64>(a) ^ static_cast<UInt64>(b); if constexpr (std::is_same_v<A, B>)
return std::popcount(res); {
simsimd_distance_t distance;
simsimd_hamming_b8(
reinterpret_cast<const simsimd_b8_t *>(&a), reinterpret_cast<const simsimd_b8_t *>(&b), sizeof(A), &distance);
return static_cast<UInt64>(distance);
}
else
{
UInt64 res = static_cast<UInt64>(a) ^ static_cast<UInt64>(b);
return std::popcount(res);
}
} }
else if constexpr (is_big_int_v<A> && is_big_int_v<B>) else if constexpr (is_big_int_v<A> && is_big_int_v<B>)
{ {
auto xored = a ^ b; if constexpr (std::is_same_v<A, B>)
{
simsimd_distance_t distance;
simsimd_hamming_b8(
reinterpret_cast<const simsimd_b8_t *>(&a.items),
reinterpret_cast<const simsimd_b8_t *>(&b.items),
sizeof(a.items),
&distance);
return static_cast<ResultType>(distance);
}
else
{
auto xored = a ^ b;
ResultType res = 0; ResultType res = 0;
for (auto item : xored.items) for (auto item : xored.items)
res += std::popcount(item); res += std::popcount(item);
return res; return res;
}
} }
else else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported data type combination in function 'bitHammingDistance'"); throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported data type combination in function 'bitHammingDistance'");