diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h index 559744abe72..e0d0b5ed631 100644 --- a/src/Common/SipHash.h +++ b/src/Common/SipHash.h @@ -18,6 +18,7 @@ #include #include #include +#include #define ROTL(x, b) static_cast(((x) << (b)) | ((x) >> (64 - (b)))) @@ -191,6 +192,15 @@ inline void sipHash128(const char * data, const size_t size, char * out) hash.get128(out); } +inline UInt128 sipHash128(const char * data, const size_t size) +{ + SipHash hash; + hash.update(data, size); + UInt128 res; + hash.get128(res); + return res; +} + inline UInt64 sipHash64(const char * data, const size_t size) { SipHash hash; diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 6b39f2ed5cf..a42e6b0bf65 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -103,6 +103,14 @@ struct IntHash64Impl } }; +template +T combineHashesFunc(T t1, T t2) +{ + T hashes[] = {t1, t2}; + return HashFunction::apply(reinterpret_cast(hashes), 2 * sizeof(T)); +} + + #if USE_SSL struct HalfMD5Impl { @@ -248,8 +256,7 @@ struct SipHash64Impl static UInt64 combineHashes(UInt64 h1, UInt64 h2) { - UInt64 hashes[] = {h1, h2}; - return apply(reinterpret_cast(hashes), 16); + return combineHashesFunc(h1, h2); } static constexpr bool use_int_hash_for_pods = false; @@ -258,12 +265,20 @@ struct SipHash64Impl struct SipHash128Impl { static constexpr auto name = "sipHash128"; - enum { length = 16 }; - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + using ReturnType = UInt128; + + static UInt128 combineHashes(UInt128 h1, UInt128 h2) { - sipHash128(begin, size, reinterpret_cast(out_char_data)); + return combineHashesFunc(h1, h2); } + + static UInt128 apply(const char * data, const size_t size) + { + return sipHash128(data, size); + } + + static constexpr bool use_int_hash_for_pods = false; }; /** Why we need MurmurHash2? @@ -380,12 +395,22 @@ struct MurmurHash3Impl64 struct MurmurHash3Impl128 { static constexpr auto name = "murmurHash3_128"; - enum { length = 16 }; - static void apply(const char * begin, const size_t size, unsigned char * out_char_data) + using ReturnType = UInt128; + + static UInt128 apply(const char * data, const size_t size) { - MurmurHash3_x64_128(begin, size, 0, out_char_data); + char bytes[16]; + MurmurHash3_x64_128(data, size, 0, bytes); + return *reinterpret_cast(bytes); } + + static UInt128 combineHashes(UInt128 h1, UInt128 h2) + { + return combineHashesFunc(h1, h2); + } + + static constexpr bool use_int_hash_for_pods = false; }; /// http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452 @@ -1093,7 +1118,12 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { - return std::make_shared>(); + if constexpr (std::is_same_v) /// backward-compatible + { + return std::make_shared(sizeof(UInt128)); + } + else + return std::make_shared>(); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override @@ -1115,6 +1145,13 @@ public: for (const auto & col : arguments) executeForArgument(col.type.get(), col.column.get(), vec_to, is_first_argument); + if constexpr (std::is_same_v) /// backward-compatible + { + auto col_to_fixed_string = ColumnFixedString::create(sizeof(UInt128)); + col_to_fixed_string->getChars() = std::move(*reinterpret_cast(&col_to->getData())); + return col_to_fixed_string; + } + return col_to; } }; @@ -1359,7 +1396,7 @@ using FunctionSHA256 = FunctionStringHashFixedString; using FunctionSHA384 = FunctionStringHashFixedString; using FunctionSHA512 = FunctionStringHashFixedString; #endif -using FunctionSipHash128 = FunctionStringHashFixedString; +using FunctionSipHash128 = FunctionAnyHash; using FunctionCityHash64 = FunctionAnyHash; using FunctionFarmFingerprint64 = FunctionAnyHash; using FunctionFarmHash64 = FunctionAnyHash; @@ -1370,7 +1407,7 @@ using FunctionMurmurHash2_64 = FunctionAnyHash; using FunctionGccMurmurHash = FunctionAnyHash; using FunctionMurmurHash3_32 = FunctionAnyHash; using FunctionMurmurHash3_64 = FunctionAnyHash; -using FunctionMurmurHash3_128 = FunctionStringHashFixedString; +using FunctionMurmurHash3_128 = FunctionAnyHash; using FunctionJavaHash = FunctionAnyHash; using FunctionJavaHashUTF16LE = FunctionAnyHash; diff --git a/tests/queries/0_stateless/00678_murmurhash.sql b/tests/queries/0_stateless/00678_murmurhash.sql index 91b4deef9b3..705c62480a0 100644 --- a/tests/queries/0_stateless/00678_murmurhash.sql +++ b/tests/queries/0_stateless/00678_murmurhash.sql @@ -34,4 +34,3 @@ SELECT gccMurmurHash(1); SELECT hex(murmurHash3_128('foo')); SELECT hex(murmurHash3_128('\x01')); - diff --git a/tests/queries/0_stateless/00746_hashing_tuples.reference b/tests/queries/0_stateless/00746_hashing_tuples.reference index ebb03034add..71d45be5a54 100644 --- a/tests/queries/0_stateless/00746_hashing_tuples.reference +++ b/tests/queries/0_stateless/00746_hashing_tuples.reference @@ -1,6 +1,11 @@ 12940785793559895259 17926972817233444501 7456555839952096623 +CC45107CC4B79F62D831BEF2103C7CBF +DF2EC2F0669B000EDFF6ADEE264E7D68 +4CD1C30C38AB935D418B5269EF197B9E +9D78134EE48654D753CCA1B76185CF8E +389D16428D2AADEC9713905572F42864 955237314186186656 8175794665478042155 9325786087413524176 @@ -13,6 +18,8 @@ 8163029322371165472 8788309436660676487 236561483980029756 +8DD5527CC43D76F4760D26BE0F641F7E +F8F7AD9B6CD4CF117A71E277E2EC2931 12384823029245979431 4507350192761038840 1188926775431157506 diff --git a/tests/queries/0_stateless/00746_hashing_tuples.sql b/tests/queries/0_stateless/00746_hashing_tuples.sql index fe6c7e373b4..466a2184c65 100644 --- a/tests/queries/0_stateless/00746_hashing_tuples.sql +++ b/tests/queries/0_stateless/00746_hashing_tuples.sql @@ -4,6 +4,12 @@ SELECT sipHash64(1, 2, 3); SELECT sipHash64(1, 3, 2); SELECT sipHash64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))); +SELECT hex(sipHash128('foo')); +SELECT hex(sipHash128('\x01')); +SELECT hex(sipHash128('foo', 'foo')); +SELECT hex(sipHash128('foo', 'foo', 'foo')); +SELECT hex(sipHash128(1, 2, 3)); + SELECT halfMD5(1, 2, 3); SELECT halfMD5(1, 3, 2); SELECT halfMD5(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))); @@ -20,6 +26,9 @@ SELECT murmurHash3_64(1, 2, 3); SELECT murmurHash3_64(1, 3, 2); SELECT murmurHash3_64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))); +SELECT hex(murmurHash3_128('foo', 'foo')); +SELECT hex(murmurHash3_128('foo', 'foo', 'foo')); + SELECT gccMurmurHash(1, 2, 3); SELECT gccMurmurHash(1, 3, 2); SELECT gccMurmurHash(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))); \ No newline at end of file