Merge pull request #57073 from ClickHouse/implement-bit-hamming-distance-for-big-integers

Implement `bitHammingDistance` for big integers
This commit is contained in:
Alexey Milovidov 2023-11-22 11:56:33 +01:00 committed by GitHub
commit d1015aae8e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 55 additions and 9 deletions

View File

@ -1,6 +1,7 @@
#include <base/bit_cast.h> #include <base/bit_cast.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Functions/FunctionUnaryArithmetic.h> #include <Functions/FunctionUnaryArithmetic.h>
#include <bit>
namespace DB namespace DB
@ -21,19 +22,19 @@ struct BitCountImpl
{ {
ResultType res = 0; ResultType res = 0;
for (auto item : a.items) for (auto item : a.items)
res += __builtin_popcountll(item); res += std::popcount(item);
return res; return res;
} }
if constexpr (std::is_same_v<A, UInt64> || std::is_same_v<A, Int64>) if constexpr (std::is_same_v<A, UInt64> || std::is_same_v<A, Int64>)
return __builtin_popcountll(a); return std::popcount(static_cast<UInt64>(a));
if constexpr (std::is_same_v<A, UInt32> || std::is_same_v<A, Int32> || std::is_unsigned_v<A>) if constexpr (std::is_same_v<A, UInt32> || std::is_same_v<A, Int32> || std::is_unsigned_v<A>)
return __builtin_popcount(a); return std::popcount(static_cast<UInt32>(a));
if constexpr (std::is_same_v<A, Int16>) if constexpr (std::is_same_v<A, Int16>)
return __builtin_popcount(static_cast<UInt16>(a)); return std::popcount(static_cast<UInt16>(a));
if constexpr (std::is_same_v<A, Int8>) if constexpr (std::is_same_v<A, Int8>)
return __builtin_popcount(static_cast<UInt8>(a)); return std::popcount(static_cast<uint8_t>(a));
else else
return __builtin_popcountll(bit_cast<uint64_t>(a)); return std::popcount(bit_cast<uint64_t>(a));
} }
#if USE_EMBEDDED_COMPILER #if USE_EMBEDDED_COMPILER

View File

@ -2,20 +2,44 @@
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <bit> #include <bit>
namespace DB namespace DB
{ {
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
}
template <typename A, typename B> template <typename A, typename B>
struct BitHammingDistanceImpl struct BitHammingDistanceImpl
{ {
using ResultType = UInt8; using ResultType = std::conditional_t<(sizeof(A) * 8 >= 256), UInt16, UInt8>;
static constexpr bool allow_fixed_string = true; static constexpr bool allow_fixed_string = true;
static constexpr bool allow_string_integer = false; static constexpr bool allow_string_integer = false;
template <typename Result = ResultType> template <typename Result = ResultType>
static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b) static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b)
{ {
UInt64 res = static_cast<UInt64>(a) ^ static_cast<UInt64>(b); /// Note: it's unspecified if signed integers should be promoted with sign-extension or with zero-fill.
return std::popcount(res); /// This behavior can change in the future.
if constexpr (sizeof(A) <= sizeof(UInt64) && sizeof(B) <= sizeof(UInt64))
{
UInt64 res = static_cast<UInt64>(a) ^ static_cast<UInt64>(b);
return std::popcount(res);
}
else if constexpr (is_big_int_v<A> && is_big_int_v<B>)
{
auto xored = a ^ b;
ResultType res = 0;
for (auto item : xored.items)
res += std::popcount(item);
return res;
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported data type combination in function 'bitHammingDistance'");
} }
#if USE_EMBEDDED_COMPILER #if USE_EMBEDDED_COMPILER

View File

@ -0,0 +1,9 @@
314776434768051644139306697240981192872 0 74 74
14776434768051644139306697240981192872314776434768051644139306697240981192872 0 141 141
314776434768051644139306697240981192872 14776434768051644139306697240981192872314776434768051644139306697240981192872 115 115
-25505932152886819324067910190787018584 0 74 74
14776434768051644139306697240981192872314776434768051644139306697240981192872 0 141 141
-25505932152886819324067910190787018584 14776434768051644139306697240981192872314776434768051644139306697240981192872 99 99
314776434768051644139306697240981192872 0 74 74
14776434768051644139306697240981192872314776434768051644139306697240981192872 0 141 141
314776434768051644139306697240981192872 14776434768051644139306697240981192872314776434768051644139306697240981192872 115 115

View File

@ -0,0 +1,12 @@
SELECT 314776434768051644139306697240981192872::UInt128 AS x, 0::UInt128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
SELECT 14776434768051644139306697240981192872314776434768051644139306697240981192872::UInt256 AS x, 0::UInt128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
SELECT 314776434768051644139306697240981192872::UInt128 AS x, 14776434768051644139306697240981192872314776434768051644139306697240981192872::UInt256 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
SELECT 314776434768051644139306697240981192872::Int128 AS x, 0::UInt128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
SELECT 14776434768051644139306697240981192872314776434768051644139306697240981192872::Int256 AS x, 0::UInt128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
SELECT 314776434768051644139306697240981192872::Int128 AS x, 14776434768051644139306697240981192872314776434768051644139306697240981192872::UInt256 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
SELECT 314776434768051644139306697240981192872::UInt128 AS x, 0::Int128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
SELECT 14776434768051644139306697240981192872314776434768051644139306697240981192872::UInt256 AS x, 0::Int128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;
SELECT 314776434768051644139306697240981192872::UInt128 AS x, 14776434768051644139306697240981192872314776434768051644139306697240981192872::Int256 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b;