add function javaIntHash

This commit is contained in:
JackyWoo 2022-09-09 18:27:29 +08:00
parent 8ae7fb14b3
commit 2399d8b5ae
4 changed files with 50 additions and 0 deletions

View File

@ -28,6 +28,7 @@ REGISTER_FUNCTION(Hashing)
factory.registerFunction<FunctionIntHash64>(); factory.registerFunction<FunctionIntHash64>();
factory.registerFunction<FunctionURLHash>(); factory.registerFunction<FunctionURLHash>();
factory.registerFunction<FunctionJavaHash>(); factory.registerFunction<FunctionJavaHash>();
factory.registerFunction<FunctionJavaIntHash>();
factory.registerFunction<FunctionJavaHashUTF16LE>(); factory.registerFunction<FunctionJavaHashUTF16LE>();
factory.registerFunction<FunctionHiveHash>(); factory.registerFunction<FunctionHiveHash>();
factory.registerFunction<FunctionMurmurHash2_32>(); factory.registerFunction<FunctionMurmurHash2_32>();

View File

@ -80,8 +80,47 @@ namespace ErrorCodes
* intHash32: number -> UInt32 * intHash32: number -> UInt32
* intHash64: number -> UInt64 * intHash64: number -> UInt64
* *
* Non-cryptographic hash function from Java integer types (Byte, Short, Integer, Long)
* javaIntHash: number -> Int32
*/ */
struct JavaIntHashImpl
{
using ReturnType = Int32;
#define IS_INT64 std::is_same_v<T, std::int64_t>
#define IS_INTEGER std::is_integral_v<T>
#define SHORT_INT sizeof(T) <= sizeof(int32_t)
template <class T, typename std::enable_if_t<IS_INT64, T>* = nullptr>
static int32_t apply(T x)
{
T copy = x;
/// Implement Java >>> operation
copy = copy >> 32;
struct Long
{
int32_t low;
int32_t high;
} * l = reinterpret_cast<Long *>(&copy);
l->high = 0;
return static_cast<int32_t>(x ^ copy);
}
template <class T, typename std::enable_if<IS_INTEGER && SHORT_INT, T>::type* = nullptr>
static int32_t apply(T x)
{
return x;
}
template <class T, typename std::enable_if<!(IS_INT64 || (IS_INTEGER && SHORT_INT)), T>::type * = nullptr>
static int32_t apply(T /*x*/)
{
throw Exception("Not implemented type for Java int hash ", ErrorCodes::NOT_IMPLEMENTED);
}
};
struct IntHash32Impl struct IntHash32Impl
{ {
using ReturnType = UInt32; using ReturnType = UInt32;
@ -1408,10 +1447,12 @@ struct ImplWyHash64
struct NameIntHash32 { static constexpr auto name = "intHash32"; }; struct NameIntHash32 { static constexpr auto name = "intHash32"; };
struct NameIntHash64 { static constexpr auto name = "intHash64"; }; struct NameIntHash64 { static constexpr auto name = "intHash64"; };
struct NameJavaIntHash { static constexpr auto name = "javaIntHash"; };
using FunctionSipHash64 = FunctionAnyHash<SipHash64Impl>; using FunctionSipHash64 = FunctionAnyHash<SipHash64Impl>;
using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>; using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>; using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>;
using FunctionJavaIntHash = FunctionIntHash<JavaIntHashImpl, NameJavaIntHash>;
#if USE_SSL #if USE_SSL
using FunctionMD4 = FunctionStringHashFixedString<MD4Impl>; using FunctionMD4 = FunctionStringHashFixedString<MD4Impl>;
using FunctionHalfMD5 = FunctionAnyHash<HalfMD5Impl>; using FunctionHalfMD5 = FunctionAnyHash<HalfMD5Impl>;

View File

@ -6,3 +6,7 @@
1258255525 1258255525
96354 96354
1470786104 1470786104
12345
-12345
12345
12344

View File

@ -8,3 +8,7 @@ select javaHashUTF16LE(convertCharset('FJKLDSJFIOLD_389159837589429', 'utf-8', '
select javaHashUTF16LE(convertCharset('𐐀𐐁𐐂𐐃𐐄', 'utf-8', 'utf-16le')); select javaHashUTF16LE(convertCharset('𐐀𐐁𐐂𐐃𐐄', 'utf-8', 'utf-16le'));
select hiveHash('abc'); select hiveHash('abc');
select hiveHash('874293087'); select hiveHash('874293087');
select javaIntHash(12345)
select javaIntHash(-12345)
select javaIntHash(toInt64(12345))
select javaIntHash(toInt64(-12345))