Add kafkaMurmurHash function

This commit is contained in:
Nikolay Degterinsky 2023-03-29 18:05:25 +00:00
parent 44531e5f85
commit 4f360e76e9
6 changed files with 75 additions and 0 deletions

View File

@ -660,6 +660,46 @@ Result:
└──────────────────────┴─────────────────────┘
```
## kafkaMurmurHash
Calculates a 32-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [Kafka](https://github.com/apache/kafka/blob/461c5cfe056db0951d9b74f5adc45973670404d7/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L482).
**Syntax**
```sql
MurmurHash(par1, ...)
```
**Arguments**
- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types).
**Returned value**
- Calculated hash value.
Type: [Int32](/docs/en/sql-reference/data-types/int-uint.md).
**Example**
Query:
```sql
SELECT
kafkaMurmurHash('foobar') AS res1,
kafkaMurmurHash(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS res2,
toTypeName(res1) AS type;
```
Result:
```response
┌───────res1─┬─────res2─┬─type──┐
│ -790332482 │ 16984959 │ Int32 │
└────────────┴──────────┴───────┘
```
## murmurHash3_32, murmurHash3_64
Produces a [MurmurHash3](https://github.com/aappleby/smhasher) hash value.

View File

@ -465,6 +465,26 @@ struct GccMurmurHashImpl
static constexpr bool use_int_hash_for_pods = false;
};
/// To be compatible with Kafka: https://github.com/apache/kafka/blob/461c5cfe056db0951d9b74f5adc45973670404d7/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L480
struct KafkaMurmurHashImpl
{
static constexpr auto name = "kafkaMurmurHash";
using ReturnType = Int32;
static Int32 apply(const char * data, const size_t size)
{
return static_cast<ReturnType>(MurmurHash2(data, size, 0x9747b28cU));
}
static Int32 combineHashes(Int32 h1, Int32 h2)
{
return static_cast<ReturnType>(IntHash32Impl::apply(static_cast<UInt32>(h1)) ^ static_cast<UInt32>(h2));
}
static constexpr bool use_int_hash_for_pods = false;
};
struct MurmurHash3Impl32
{
static constexpr auto name = "murmurHash3_32";
@ -1698,6 +1718,7 @@ using FunctionMetroHash64 = FunctionAnyHash<ImplMetroHash64>;
using FunctionMurmurHash2_32 = FunctionAnyHash<MurmurHash2Impl32>;
using FunctionMurmurHash2_64 = FunctionAnyHash<MurmurHash2Impl64>;
using FunctionGccMurmurHash = FunctionAnyHash<GccMurmurHashImpl>;
using FunctionKafkaMurmurHash = FunctionAnyHash<KafkaMurmurHashImpl>;
using FunctionMurmurHash3_32 = FunctionAnyHash<MurmurHash3Impl32>;
using FunctionMurmurHash3_64 = FunctionAnyHash<MurmurHash3Impl64>;
using FunctionMurmurHash3_128 = FunctionAnyHash<MurmurHash3Impl128>;

View File

@ -17,5 +17,6 @@ REGISTER_FUNCTION(HashingMurmur)
factory.registerFunction<FunctionMurmurHash3_64>();
factory.registerFunction<FunctionMurmurHash3_128>();
factory.registerFunction<FunctionGccMurmurHash>();
factory.registerFunction<FunctionKafkaMurmurHash>();
}
}

View File

@ -389,6 +389,7 @@ javaHashUTF16LE
joinGet
joinGetOrNull
jumpConsistentHash
kafkaMurmurHash
kostikConsistentHash
lcm
least

View File

@ -0,0 +1,5 @@
-973932308
-790332482
-985981536
-1486304829
-58897971

View File

@ -0,0 +1,7 @@
-- https://github.com/apache/kafka/blob/139f7709bd3f5926901a21e55043388728ccca78/clients/src/test/java/org/apache/kafka/common/utils/UtilsTest.java#L93
SELECT kafkaMurmurHash('21');
SELECT kafkaMurmurHash('foobar');
SELECT kafkaMurmurHash('a-little-bit-long-string');
SELECT kafkaMurmurHash('a-little-bit-longer-string');
SELECT kafkaMurmurHash('lkjh234lh9fiuh90y23oiuhsafujhadof229phr9h19h89h8');