mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Adding xxHash64 and xxHash32 functions
This commit is contained in:
parent
6b536b2bc7
commit
3757007118
@ -12,7 +12,7 @@ add_library(clickhouse_functions ${LINK_MODE} ${clickhouse_functions_sources})
|
||||
|
||||
target_link_libraries(clickhouse_functions
|
||||
PUBLIC
|
||||
dbms
|
||||
dbms
|
||||
PRIVATE
|
||||
clickhouse_dictionaries
|
||||
${CONSISTENT_HASHING_LIBRARY}
|
||||
@ -21,7 +21,8 @@ target_link_libraries(clickhouse_functions
|
||||
${METROHASH_LIBRARIES}
|
||||
murmurhash
|
||||
${BASE64_LIBRARY}
|
||||
${OPENSSL_CRYPTO_LIBRARY})
|
||||
${OPENSSL_CRYPTO_LIBRARY}
|
||||
${LZ4_LIBRARY})
|
||||
|
||||
target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR})
|
||||
|
||||
|
@ -25,5 +25,7 @@ void registerFunctionsHashing(FunctionFactory & factory)
|
||||
factory.registerFunction<FunctionMurmurHash3_32>();
|
||||
factory.registerFunction<FunctionMurmurHash3_64>();
|
||||
factory.registerFunction<FunctionMurmurHash3_128>();
|
||||
factory.registerFunction<FunctionXxHash32>();
|
||||
factory.registerFunction<FunctionXxHash64>();
|
||||
}
|
||||
}
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <metrohash.h>
|
||||
#include <murmurhash2.h>
|
||||
#include <murmurhash3.h>
|
||||
#include <xxhash.h>
|
||||
|
||||
#include <Poco/ByteOrder.h>
|
||||
|
||||
@ -116,6 +117,7 @@ struct HalfMD5Impl
|
||||
|
||||
/// If true, it will use intHash32 or intHash64 to hash POD types. This behaviour is intended for better performance of some functions.
|
||||
/// Otherwise it will hash bytes in memory as a string using corresponding hash function.
|
||||
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
|
||||
@ -355,6 +357,44 @@ struct ImplMetroHash64
|
||||
static constexpr bool use_int_hash_for_pods = true;
|
||||
};
|
||||
|
||||
struct ImplXxHash32
|
||||
{
|
||||
static constexpr auto name = "xxHash32";
|
||||
using ReturnType = UInt32;
|
||||
|
||||
static auto apply(const char * s, const size_t len) { return XXH32(s, len, 0); }
|
||||
/**
|
||||
* With current implementation with more than 1 arguments it will give the results
|
||||
* non-reproducable from outside of CH.
|
||||
*
|
||||
* Proper way of combining several input is to use streaming mode of hash function
|
||||
* https://github.com/Cyan4973/xxHash/issues/114#issuecomment-334908566
|
||||
*
|
||||
* In common case doable by init_state / update_state / finalize_state
|
||||
*/
|
||||
static auto combineHashes(UInt32 h1, UInt32 h2) { return IntHash32Impl::apply(h1) ^ h2; }
|
||||
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
|
||||
|
||||
struct ImplXxHash64
|
||||
{
|
||||
static constexpr auto name = "xxHash64";
|
||||
using ReturnType = UInt64;
|
||||
using uint128_t = CityHash_v1_0_2::uint128;
|
||||
|
||||
static auto apply(const char * s, const size_t len) { return XXH64(s, len, 0); }
|
||||
|
||||
/*
|
||||
With current implementation with more than 1 arguments it will give the results
|
||||
non-reproducable from outside of CH. (see comment on ImplXxHash32).
|
||||
*/
|
||||
static auto combineHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); }
|
||||
|
||||
static constexpr bool use_int_hash_for_pods = false;
|
||||
};
|
||||
|
||||
|
||||
template <typename Impl>
|
||||
class FunctionStringHashFixedString : public IFunction
|
||||
@ -978,4 +1018,7 @@ using FunctionMurmurHash2_64 = FunctionAnyHash<MurmurHash2Impl64>;
|
||||
using FunctionMurmurHash3_32 = FunctionAnyHash<MurmurHash3Impl32>;
|
||||
using FunctionMurmurHash3_64 = FunctionAnyHash<MurmurHash3Impl64>;
|
||||
using FunctionMurmurHash3_128 = FunctionStringHashFixedString<MurmurHash3Impl128>;
|
||||
using FunctionXxHash32 = FunctionAnyHash<ImplXxHash32>;
|
||||
using FunctionXxHash64 = FunctionAnyHash<ImplXxHash64>;
|
||||
|
||||
}
|
||||
|
54
dbms/tests/queries/0_stateless/00803_xxhash.reference
Normal file
54
dbms/tests/queries/0_stateless/00803_xxhash.reference
Normal file
@ -0,0 +1,54 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
77
dbms/tests/queries/0_stateless/00803_xxhash.sql
Normal file
77
dbms/tests/queries/0_stateless/00803_xxhash.sql
Normal file
@ -0,0 +1,77 @@
|
||||
SELECT hex(xxHash64('')) = upper('ef46db3751d8e999');
|
||||
SELECT hex(xxHash32('')) = upper('02cc5d05');
|
||||
|
||||
SELECT hex(xxHash64('ABC')) = upper('e66ae7354fcfee98');
|
||||
SELECT hex(xxHash32('ABC')) = upper('80712ed5');
|
||||
|
||||
SELECT hex(xxHash64('xxhash')) = upper('32dd38952c4bc720');
|
||||
|
||||
--
|
||||
|
||||
SELECT xxHash64(NULL) is NULL;
|
||||
SELECT xxHash64() = toUInt64(16324913028386710556);
|
||||
|
||||
SELECT xxHash64(0) = toUInt64(16804241149081757544);
|
||||
SELECT xxHash64(123456) = toUInt64(9049736899514479480);
|
||||
|
||||
select xxHash64(toUInt8(0)) = xxHash64('\0');
|
||||
select xxHash64(toUInt16(0)) = xxHash64('\0\0');
|
||||
select xxHash64(toUInt32(0)) = xxHash64('\0\0\0\0');
|
||||
select xxHash64(toUInt64(0)) = xxHash64('\0\0\0\0\0\0\0\0');
|
||||
|
||||
SELECT xxHash64(CAST(3 AS UInt8)) = toUInt64(2244420788148980662);
|
||||
SELECT xxHash64(CAST(1.2684 AS Float32)) = toUInt64(6662491266811474554);
|
||||
SELECT xxHash64(CAST(-154477 AS Int64)) = toUInt64(1162348840373071858);
|
||||
|
||||
SELECT xxHash64('') = toUInt64(17241709254077376921);
|
||||
SELECT xxHash64('foo') = toUInt64(3728699739546630719);
|
||||
SELECT xxHash64(CAST('foo' AS FixedString(3))) = xxHash64('foo');
|
||||
SELECT xxHash64(CAST('bar' AS FixedString(3))) = toUInt64(5234164152756840025);
|
||||
SELECT xxHash64(x) = toUInt64(9962287286179718960) FROM (SELECT CAST(1 AS Enum8('a' = 1, 'b' = 2)) as x);
|
||||
|
||||
SELECT xxHash64('\x01') = toUInt64(9962287286179718960);
|
||||
SELECT xxHash64('\x02\0') = toUInt64(6482051057365497128);
|
||||
SELECT xxHash64('\x03\0\0\0') = toUInt64(13361037350151369407);
|
||||
|
||||
SELECT xxHash64(1) = toUInt64(9962287286179718960);
|
||||
SELECT xxHash64(toUInt16(2)) = toUInt64(6482051057365497128);
|
||||
SELECT xxHash64(toUInt32(3)) = toUInt64(13361037350151369407);
|
||||
|
||||
SELECT xxHash64(1, 2, 3) = toUInt64(13728743482242651702);
|
||||
SELECT xxHash64(1, 3, 2) = toUInt64(10226792638577471533);
|
||||
SELECT xxHash64(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))) = toUInt64(3521288460171939489);
|
||||
|
||||
--
|
||||
|
||||
SELECT xxHash32(NULL) is NULL;
|
||||
SELECT xxHash32() = toUInt32(4263699484);
|
||||
|
||||
SELECT xxHash32(0) = toUInt32(3479547966);
|
||||
SELECT xxHash32(123456) = toUInt32(1434661961);
|
||||
|
||||
select xxHash32(toUInt8(0)) = xxHash32('\0');
|
||||
select xxHash32(toUInt16(0)) = xxHash32('\0\0');
|
||||
select xxHash32(toUInt32(0)) = xxHash32('\0\0\0\0');
|
||||
|
||||
SELECT xxHash32(CAST(3 AS UInt8)) = toUInt32(565077562);
|
||||
SELECT xxHash32(CAST(1.2684 AS Float32)) = toUInt32(3120514536);
|
||||
SELECT xxHash32(CAST(-154477 AS Int32)) = toUInt32(3279223048);
|
||||
|
||||
SELECT xxHash32('') = toUInt32(46947589);
|
||||
SELECT xxHash32('foo') = toUInt32(3792637401);
|
||||
SELECT xxHash32(CAST('foo' AS FixedString(3))) = xxHash32('foo');
|
||||
SELECT xxHash32(CAST('bar' AS FixedString(3))) = toUInt32(1101146924);
|
||||
SELECT xxHash32(x) = toUInt32(949155633) FROM (SELECT CAST(1 AS Enum8('a' = 1, 'b' = 2)) as x);
|
||||
|
||||
SELECT xxHash32('\x01') = toUInt32(949155633);
|
||||
SELECT xxHash32('\x02\0') = toUInt32(332955956);
|
||||
SELECT xxHash32('\x03\0\0\0') = toUInt32(2158931063);
|
||||
|
||||
SELECT xxHash32(1) = toUInt32(949155633);
|
||||
SELECT xxHash32(toUInt16(2)) = toUInt32(332955956);
|
||||
SELECT xxHash32(toUInt32(3)) = toUInt32(2158931063);
|
||||
|
||||
SELECT xxHash32(1, 2, 3) = toUInt32(441104368);
|
||||
SELECT xxHash32(1, 3, 2) = toUInt32(912264289);
|
||||
SELECT xxHash32(('a', [1, 2, 3], 4, (4, ['foo', 'bar'], 1, (1, 2)))) = toUInt32(1930126291);
|
||||
|
Loading…
Reference in New Issue
Block a user