From fd8714ac297ea884e5889d370312ab9e8f03c739 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 21 Nov 2023 18:38:00 +0100 Subject: [PATCH 1/3] Implement `bitHammingDistance` for big integers --- src/Functions/bitCount.cpp | 13 +++++++------ src/Functions/bitHammingDistance.cpp | 21 ++++++++++++++++++--- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/Functions/bitCount.cpp b/src/Functions/bitCount.cpp index 566a11481be..f1a3ac897c1 100644 --- a/src/Functions/bitCount.cpp +++ b/src/Functions/bitCount.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB @@ -21,19 +22,19 @@ struct BitCountImpl { ResultType res = 0; for (auto item : a.items) - res += __builtin_popcountll(item); + res += std::popcount(item); return res; } if constexpr (std::is_same_v || std::is_same_v) - return __builtin_popcountll(a); + return std::popcount(static_cast(a)); if constexpr (std::is_same_v || std::is_same_v || std::is_unsigned_v) - return __builtin_popcount(a); + return std::popcount(static_cast(a)); if constexpr (std::is_same_v) - return __builtin_popcount(static_cast(a)); + return std::popcount(static_cast(a)); if constexpr (std::is_same_v) - return __builtin_popcount(static_cast(a)); + return std::popcount(static_cast(a)); else - return __builtin_popcountll(bit_cast(a)); + return std::popcount(bit_cast(a)); } #if USE_EMBEDDED_COMPILER diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp index 2eaa397dd04..4f36f731eda 100644 --- a/src/Functions/bitHammingDistance.cpp +++ b/src/Functions/bitHammingDistance.cpp @@ -2,20 +2,35 @@ #include #include + namespace DB { template struct BitHammingDistanceImpl { - using ResultType = UInt8; + using ResultType = std::conditional_t<(sizeof(A) * 8 >= 256), UInt16, UInt8>; static constexpr bool allow_fixed_string = true; static constexpr bool allow_string_integer = false; template static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b) { - UInt64 res = static_cast(a) ^ static_cast(b); - return std::popcount(res); + if constexpr (sizeof(A) <= sizeof(UInt64) && sizeof(B) <= sizeof(UInt64)) + { + UInt64 res = static_cast(a) ^ static_cast(b); + return std::popcount(res); + } + else if constexpr (is_big_int_v && is_big_int_v) + { + auto xored = a ^ b; + + ResultType res = 0; + for (auto item : xored.items) + res += std::popcount(item); + return res; + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported data type combination in function 'bitHammingDistance'"); } #if USE_EMBEDDED_COMPILER From de5556b458786d8d620cf3b1727aec99b454b4da Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Nov 2023 00:29:43 +0100 Subject: [PATCH 2/3] Add a test --- src/Functions/bitHammingDistance.cpp | 3 +++ .../02921_bit_hamming_distance_big_int.reference | 9 +++++++++ .../02921_bit_hamming_distance_big_int.sql | 12 ++++++++++++ 3 files changed, 24 insertions(+) create mode 100644 tests/queries/0_stateless/02921_bit_hamming_distance_big_int.reference create mode 100644 tests/queries/0_stateless/02921_bit_hamming_distance_big_int.sql diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp index 4f36f731eda..ef5ee16e553 100644 --- a/src/Functions/bitHammingDistance.cpp +++ b/src/Functions/bitHammingDistance.cpp @@ -15,6 +15,9 @@ struct BitHammingDistanceImpl template static inline NO_SANITIZE_UNDEFINED Result apply(A a, B b) { + /// Note: it's unspecified if signed integers should be promoted with sign-extension or with zero-fill. + /// This behavior can change in the future. + if constexpr (sizeof(A) <= sizeof(UInt64) && sizeof(B) <= sizeof(UInt64)) { UInt64 res = static_cast(a) ^ static_cast(b); diff --git a/tests/queries/0_stateless/02921_bit_hamming_distance_big_int.reference b/tests/queries/0_stateless/02921_bit_hamming_distance_big_int.reference new file mode 100644 index 00000000000..62245f5d176 --- /dev/null +++ b/tests/queries/0_stateless/02921_bit_hamming_distance_big_int.reference @@ -0,0 +1,9 @@ +314776434768051644139306697240981192872 0 74 74 +14776434768051644139306697240981192872314776434768051644139306697240981192872 0 141 141 +314776434768051644139306697240981192872 14776434768051644139306697240981192872314776434768051644139306697240981192872 115 115 +-25505932152886819324067910190787018584 0 74 74 +14776434768051644139306697240981192872314776434768051644139306697240981192872 0 141 141 +-25505932152886819324067910190787018584 14776434768051644139306697240981192872314776434768051644139306697240981192872 99 99 +314776434768051644139306697240981192872 0 74 74 +14776434768051644139306697240981192872314776434768051644139306697240981192872 0 141 141 +314776434768051644139306697240981192872 14776434768051644139306697240981192872314776434768051644139306697240981192872 115 115 diff --git a/tests/queries/0_stateless/02921_bit_hamming_distance_big_int.sql b/tests/queries/0_stateless/02921_bit_hamming_distance_big_int.sql new file mode 100644 index 00000000000..6f241e104b6 --- /dev/null +++ b/tests/queries/0_stateless/02921_bit_hamming_distance_big_int.sql @@ -0,0 +1,12 @@ +SELECT 314776434768051644139306697240981192872::UInt128 AS x, 0::UInt128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b; +SELECT 14776434768051644139306697240981192872314776434768051644139306697240981192872::UInt256 AS x, 0::UInt128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b; +SELECT 314776434768051644139306697240981192872::UInt128 AS x, 14776434768051644139306697240981192872314776434768051644139306697240981192872::UInt256 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b; + +SELECT 314776434768051644139306697240981192872::Int128 AS x, 0::UInt128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b; +SELECT 14776434768051644139306697240981192872314776434768051644139306697240981192872::Int256 AS x, 0::UInt128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b; +SELECT 314776434768051644139306697240981192872::Int128 AS x, 14776434768051644139306697240981192872314776434768051644139306697240981192872::UInt256 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b; + +SELECT 314776434768051644139306697240981192872::UInt128 AS x, 0::Int128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b; +SELECT 14776434768051644139306697240981192872314776434768051644139306697240981192872::UInt256 AS x, 0::Int128 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b; +SELECT 314776434768051644139306697240981192872::UInt128 AS x, 14776434768051644139306697240981192872314776434768051644139306697240981192872::Int256 AS y, bitCount(bitXor(x, y)) AS a, bitHammingDistance(x, y) AS b; + From 5cb55d3f8cc767300c95446bac0c74a39bafc401 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Nov 2023 01:24:58 +0100 Subject: [PATCH 3/3] Fix style --- src/Functions/bitHammingDistance.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp index ef5ee16e553..f00f38b61af 100644 --- a/src/Functions/bitHammingDistance.cpp +++ b/src/Functions/bitHammingDistance.cpp @@ -5,6 +5,12 @@ namespace DB { + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + template struct BitHammingDistanceImpl {