mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 16:12:01 +00:00
functions: vectorize bitHammingDistance
This commit is contained in:
parent
7fd2207626
commit
03534fa386
2
contrib/CMakeLists.txt
vendored
2
contrib/CMakeLists.txt
vendored
@ -211,10 +211,10 @@ if (ARCH_S390X)
|
|||||||
add_contrib(crc32-s390x-cmake crc32-s390x)
|
add_contrib(crc32-s390x-cmake crc32-s390x)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
add_contrib (SimSIMD-cmake SimSIMD)
|
||||||
option(ENABLE_USEARCH "Enable USearch" ${ENABLE_LIBRARIES})
|
option(ENABLE_USEARCH "Enable USearch" ${ENABLE_LIBRARIES})
|
||||||
if (ENABLE_USEARCH)
|
if (ENABLE_USEARCH)
|
||||||
add_contrib (FP16-cmake FP16)
|
add_contrib (FP16-cmake FP16)
|
||||||
add_contrib (SimSIMD-cmake SimSIMD)
|
|
||||||
add_contrib (usearch-cmake usearch) # requires: FP16, SimdSIMD
|
add_contrib (usearch-cmake usearch) # requires: FP16, SimdSIMD
|
||||||
else ()
|
else ()
|
||||||
message(STATUS "Not using USearch")
|
message(STATUS "Not using USearch")
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
|
#include <bit>
|
||||||
#include <Functions/FunctionBinaryArithmetic.h>
|
#include <Functions/FunctionBinaryArithmetic.h>
|
||||||
#include <Functions/FunctionFactory.h>
|
#include <Functions/FunctionFactory.h>
|
||||||
#include <bit>
|
#pragma clang diagnostic ignored "-Wused-but-marked-unused"
|
||||||
|
#include <simsimd/simsimd.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -26,17 +28,40 @@ struct BitHammingDistanceImpl
|
|||||||
|
|
||||||
if constexpr (sizeof(A) <= sizeof(UInt64) && sizeof(B) <= sizeof(UInt64))
|
if constexpr (sizeof(A) <= sizeof(UInt64) && sizeof(B) <= sizeof(UInt64))
|
||||||
{
|
{
|
||||||
UInt64 res = static_cast<UInt64>(a) ^ static_cast<UInt64>(b);
|
if constexpr (std::is_same_v<A, B>)
|
||||||
return std::popcount(res);
|
{
|
||||||
|
simsimd_distance_t distance;
|
||||||
|
simsimd_hamming_b8(
|
||||||
|
reinterpret_cast<const simsimd_b8_t *>(&a), reinterpret_cast<const simsimd_b8_t *>(&b), sizeof(A), &distance);
|
||||||
|
return static_cast<UInt64>(distance);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
UInt64 res = static_cast<UInt64>(a) ^ static_cast<UInt64>(b);
|
||||||
|
return std::popcount(res);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if constexpr (is_big_int_v<A> && is_big_int_v<B>)
|
else if constexpr (is_big_int_v<A> && is_big_int_v<B>)
|
||||||
{
|
{
|
||||||
auto xored = a ^ b;
|
if constexpr (std::is_same_v<A, B>)
|
||||||
|
{
|
||||||
|
simsimd_distance_t distance;
|
||||||
|
simsimd_hamming_b8(
|
||||||
|
reinterpret_cast<const simsimd_b8_t *>(&a.items),
|
||||||
|
reinterpret_cast<const simsimd_b8_t *>(&b.items),
|
||||||
|
sizeof(a.items),
|
||||||
|
&distance);
|
||||||
|
return static_cast<ResultType>(distance);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto xored = a ^ b;
|
||||||
|
|
||||||
ResultType res = 0;
|
ResultType res = 0;
|
||||||
for (auto item : xored.items)
|
for (auto item : xored.items)
|
||||||
res += std::popcount(item);
|
res += std::popcount(item);
|
||||||
return res;
|
return res;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported data type combination in function 'bitHammingDistance'");
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported data type combination in function 'bitHammingDistance'");
|
||||||
|
Loading…
Reference in New Issue
Block a user