mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #28965 from nicelulu/issues_28774
128bit hash-functions accepting arbitrary list of arguments
This commit is contained in:
commit
406bb4d997
@ -18,6 +18,7 @@
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <Core/Defines.h>
|
||||
#include <base/extended_types.h>
|
||||
|
||||
|
||||
#define ROTL(x, b) static_cast<UInt64>(((x) << (b)) | ((x) >> (64 - (b))))
|
||||
@ -191,6 +192,15 @@ inline void sipHash128(const char * data, const size_t size, char * out)
|
||||
hash.get128(out);
|
||||
}
|
||||
|
||||
inline UInt128 sipHash128(const char * data, const size_t size)
|
||||
{
|
||||
SipHash hash;
|
||||
hash.update(data, size);
|
||||
UInt128 res;
|
||||
hash.get128(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
inline UInt64 sipHash64(const char * data, const size_t size)
|
||||
{
|
||||
SipHash hash;
|
||||
|
@ -103,6 +103,14 @@ struct IntHash64Impl
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, typename HashFunction>
|
||||
T combineHashesFunc(T t1, T t2)
|
||||
{
|
||||
T hashes[] = {t1, t2};
|
||||
return HashFunction::apply(reinterpret_cast<const char *>(hashes), 2 * sizeof(T));
|
||||
}
|
||||
|
||||
|
||||
#if USE_SSL
|
||||
struct HalfMD5Impl
|
||||
{
|
||||
@ -248,8 +256,7 @@ struct SipHash64Impl
|
||||
|
||||
static UInt64 combineHashes(UInt64 h1, UInt64 h2)
|
||||
{
|
||||
UInt64 hashes[] = {h1, h2};
|
||||
return apply(reinterpret_cast<const char *>(hashes), 16);
|
||||
return combineHashesFunc<UInt64, SipHash64Impl>(h1, h2);
|
||||
}
|
||||
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
@ -258,12 +265,20 @@ struct SipHash64Impl
|
||||
struct SipHash128Impl
|
||||
{
|
||||
static constexpr auto name = "sipHash128";
|
||||
enum { length = 16 };
|
||||
|
||||
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
|
||||
using ReturnType = UInt128;
|
||||
|
||||
static UInt128 combineHashes(UInt128 h1, UInt128 h2)
|
||||
{
|
||||
sipHash128(begin, size, reinterpret_cast<char*>(out_char_data));
|
||||
return combineHashesFunc<UInt128, SipHash128Impl>(h1, h2);
|
||||
}
|
||||
|
||||
static UInt128 apply(const char * data, const size_t size)
|
||||
{
|
||||
return sipHash128(data, size);
|
||||
}
|
||||
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
|
||||
/** Why we need MurmurHash2?
|
||||
@ -380,12 +395,22 @@ struct MurmurHash3Impl64
|
||||
struct MurmurHash3Impl128
|
||||
{
|
||||
static constexpr auto name = "murmurHash3_128";
|
||||
enum { length = 16 };
|
||||
|
||||
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
|
||||
using ReturnType = UInt128;
|
||||
|
||||
static UInt128 apply(const char * data, const size_t size)
|
||||
{
|
||||
MurmurHash3_x64_128(begin, size, 0, out_char_data);
|
||||
char bytes[16];
|
||||
MurmurHash3_x64_128(data, size, 0, bytes);
|
||||
return *reinterpret_cast<UInt128 *>(bytes);
|
||||
}
|
||||
|
||||
static UInt128 combineHashes(UInt128 h1, UInt128 h2)
|
||||
{
|
||||
return combineHashesFunc<UInt128, MurmurHash3Impl128>(h1, h2);
|
||||
}
|
||||
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
|
||||
/// http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452
|
||||
@ -1093,7 +1118,12 @@ public:
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override
|
||||
{
|
||||
return std::make_shared<DataTypeNumber<ToType>>();
|
||||
if constexpr (std::is_same_v<ToType, UInt128>) /// backward-compatible
|
||||
{
|
||||
return std::make_shared<DataTypeFixedString>(sizeof(UInt128));
|
||||
}
|
||||
else
|
||||
return std::make_shared<DataTypeNumber<ToType>>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
@ -1115,6 +1145,13 @@ public:
|
||||
for (const auto & col : arguments)
|
||||
executeForArgument(col.type.get(), col.column.get(), vec_to, is_first_argument);
|
||||
|
||||
if constexpr (std::is_same_v<ToType, UInt128>) /// backward-compatible
|
||||
{
|
||||
auto col_to_fixed_string = ColumnFixedString::create(sizeof(UInt128));
|
||||
col_to_fixed_string->getChars() = std::move(*reinterpret_cast<ColumnFixedString::Chars *>(&col_to->getData()));
|
||||
return col_to_fixed_string;
|
||||
}
|
||||
|
||||
return col_to;
|
||||
}
|
||||
};
|
||||
@ -1359,7 +1396,7 @@ using FunctionSHA256 = FunctionStringHashFixedString<SHA256Impl>;
|
||||
using FunctionSHA384 = FunctionStringHashFixedString<SHA384Impl>;
|
||||
using FunctionSHA512 = FunctionStringHashFixedString<SHA512Impl>;
|
||||
#endif
|
||||
using FunctionSipHash128 = FunctionStringHashFixedString<SipHash128Impl>;
|
||||
using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>;
|
||||
using FunctionCityHash64 = FunctionAnyHash<ImplCityHash64>;
|
||||
using FunctionFarmFingerprint64 = FunctionAnyHash<ImplFarmFingerprint64>;
|
||||
using FunctionFarmHash64 = FunctionAnyHash<ImplFarmHash64>;
|
||||
@ -1370,7 +1407,7 @@ using FunctionMurmurHash2_64 = FunctionAnyHash<MurmurHash2Impl64>;
|
||||
using FunctionGccMurmurHash = FunctionAnyHash<GccMurmurHashImpl>;
|
||||
using FunctionMurmurHash3_32 = FunctionAnyHash<MurmurHash3Impl32>;
|
||||
using FunctionMurmurHash3_64 = FunctionAnyHash<MurmurHash3Impl64>;
|
||||
using FunctionMurmurHash3_128 = FunctionStringHashFixedString<MurmurHash3Impl128>;
|
||||
using FunctionMurmurHash3_128 = FunctionAnyHash<MurmurHash3Impl128>;
|
||||
|
||||
using FunctionJavaHash = FunctionAnyHash<JavaHashImpl>;
|
||||
using FunctionJavaHashUTF16LE = FunctionAnyHash<JavaHashUTF16LEImpl>;
|
||||
|
@ -34,4 +34,3 @@ SELECT gccMurmurHash(1);
|
||||
|
||||
SELECT hex(murmurHash3_128('foo'));
|
||||
SELECT hex(murmurHash3_128('\x01'));
|
||||
|
||||
|
@ -1,6 +1,11 @@
|
||||
12940785793559895259
|
||||
17926972817233444501
|
||||
7456555839952096623
|
||||
CC45107CC4B79F62D831BEF2103C7CBF
|
||||
DF2EC2F0669B000EDFF6ADEE264E7D68
|
||||
4CD1C30C38AB935D418B5269EF197B9E
|
||||
9D78134EE48654D753CCA1B76185CF8E
|
||||
389D16428D2AADEC9713905572F42864
|
||||
955237314186186656
|
||||
8175794665478042155
|
||||
9325786087413524176
|
||||
@ -13,6 +18,8 @@
|
||||
8163029322371165472
|
||||
8788309436660676487
|
||||
236561483980029756
|
||||
8DD5527CC43D76F4760D26BE0F641F7E
|
||||
F8F7AD9B6CD4CF117A71E277E2EC2931
|
||||
12384823029245979431
|
||||
4507350192761038840
|
||||
1188926775431157506
|
||||
|
@ -4,6 +4,12 @@ SELECT sipHash64(1, 2, 3);
|
||||
SELECT sipHash64(1, 3, 2);
|
||||
SELECT sipHash64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2))));
|
||||
|
||||
SELECT hex(sipHash128('foo'));
|
||||
SELECT hex(sipHash128('\x01'));
|
||||
SELECT hex(sipHash128('foo', 'foo'));
|
||||
SELECT hex(sipHash128('foo', 'foo', 'foo'));
|
||||
SELECT hex(sipHash128(1, 2, 3));
|
||||
|
||||
SELECT halfMD5(1, 2, 3);
|
||||
SELECT halfMD5(1, 3, 2);
|
||||
SELECT halfMD5(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2))));
|
||||
@ -20,6 +26,9 @@ SELECT murmurHash3_64(1, 2, 3);
|
||||
SELECT murmurHash3_64(1, 3, 2);
|
||||
SELECT murmurHash3_64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2))));
|
||||
|
||||
SELECT hex(murmurHash3_128('foo', 'foo'));
|
||||
SELECT hex(murmurHash3_128('foo', 'foo', 'foo'));
|
||||
|
||||
SELECT gccMurmurHash(1, 2, 3);
|
||||
SELECT gccMurmurHash(1, 3, 2);
|
||||
SELECT gccMurmurHash(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2))));
|
Loading…
Reference in New Issue
Block a user