mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Fixed idiosyncrasy introduced in #3451
This commit is contained in:
parent
82933e9c31
commit
719efbe60a
@ -113,6 +113,10 @@ struct HalfMD5Impl
|
|||||||
UInt64 hashes[] = {h1, h2};
|
UInt64 hashes[] = {h1, h2};
|
||||||
return apply(reinterpret_cast<const char *>(hashes), 16);
|
return apply(reinterpret_cast<const char *>(hashes), 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// If true, it will use intHash32 or intHash64 to hash POD types. This behaviour is intended for better performance of some functions.
|
||||||
|
/// Otherwise it will hash bytes in memory as a string using corresponding hash function.
|
||||||
|
static constexpr bool use_int_hash_for_pods = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MD5Impl
|
struct MD5Impl
|
||||||
@ -186,8 +190,9 @@ struct SipHash64Impl
|
|||||||
UInt64 hashes[] = {h1, h2};
|
UInt64 hashes[] = {h1, h2};
|
||||||
return apply(reinterpret_cast<const char *>(hashes), 16);
|
return apply(reinterpret_cast<const char *>(hashes), 16);
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
|
static constexpr bool use_int_hash_for_pods = false;
|
||||||
|
};
|
||||||
|
|
||||||
struct SipHash128Impl
|
struct SipHash128Impl
|
||||||
{
|
{
|
||||||
@ -201,6 +206,156 @@ struct SipHash128Impl
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/** Why we need MurmurHash2?
|
||||||
|
* MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash.
|
||||||
|
* Usually there is no reason to use MurmurHash.
|
||||||
|
* It is needed for the cases when you already have MurmurHash in some applications and you want to reproduce it
|
||||||
|
* in ClickHouse as is. For example, it is needed to reproduce the behaviour
|
||||||
|
* for NGINX a/b testing module: https://nginx.ru/en/docs/http/ngx_http_split_clients_module.html
|
||||||
|
*/
|
||||||
|
struct MurmurHash2Impl32
|
||||||
|
{
|
||||||
|
static constexpr auto name = "murmurHash2_32";
|
||||||
|
|
||||||
|
using ReturnType = UInt32;
|
||||||
|
|
||||||
|
static UInt32 apply(const char * data, const size_t size)
|
||||||
|
{
|
||||||
|
return MurmurHash2(data, size, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static UInt32 combineHashes(UInt32 h1, UInt32 h2)
|
||||||
|
{
|
||||||
|
return IntHash32Impl::apply(h1) ^ h2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr bool use_int_hash_for_pods = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MurmurHash2Impl64
|
||||||
|
{
|
||||||
|
static constexpr auto name = "murmurHash2_64";
|
||||||
|
using ReturnType = UInt64;
|
||||||
|
|
||||||
|
static UInt64 apply(const char * data, const size_t size)
|
||||||
|
{
|
||||||
|
return MurmurHash64A(data, size, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static UInt64 combineHashes(UInt64 h1, UInt64 h2)
|
||||||
|
{
|
||||||
|
return IntHash64Impl::apply(h1) ^ h2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr bool use_int_hash_for_pods = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MurmurHash3Impl32
|
||||||
|
{
|
||||||
|
static constexpr auto name = "murmurHash3_32";
|
||||||
|
using ReturnType = UInt32;
|
||||||
|
|
||||||
|
static UInt32 apply(const char * data, const size_t size)
|
||||||
|
{
|
||||||
|
union
|
||||||
|
{
|
||||||
|
UInt32 h;
|
||||||
|
char bytes[sizeof(h)];
|
||||||
|
};
|
||||||
|
MurmurHash3_x86_32(data, size, 0, bytes);
|
||||||
|
return h;
|
||||||
|
}
|
||||||
|
|
||||||
|
static UInt32 combineHashes(UInt32 h1, UInt32 h2)
|
||||||
|
{
|
||||||
|
return IntHash32Impl::apply(h1) ^ h2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr bool use_int_hash_for_pods = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MurmurHash3Impl64
|
||||||
|
{
|
||||||
|
static constexpr auto name = "murmurHash3_64";
|
||||||
|
using ReturnType = UInt64;
|
||||||
|
|
||||||
|
static UInt64 apply(const char * data, const size_t size)
|
||||||
|
{
|
||||||
|
union
|
||||||
|
{
|
||||||
|
UInt64 h[2];
|
||||||
|
char bytes[16];
|
||||||
|
};
|
||||||
|
MurmurHash3_x64_128(data, size, 0, bytes);
|
||||||
|
return h[0] ^ h[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
static UInt64 combineHashes(UInt64 h1, UInt64 h2)
|
||||||
|
{
|
||||||
|
return IntHash64Impl::apply(h1) ^ h2;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr bool use_int_hash_for_pods = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MurmurHash3Impl128
|
||||||
|
{
|
||||||
|
static constexpr auto name = "murmurHash3_128";
|
||||||
|
enum { length = 16 };
|
||||||
|
|
||||||
|
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
|
||||||
|
{
|
||||||
|
MurmurHash3_x64_128(begin, size, 0, out_char_data);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ImplCityHash64
|
||||||
|
{
|
||||||
|
static constexpr auto name = "cityHash64";
|
||||||
|
using ReturnType = UInt64;
|
||||||
|
using uint128_t = CityHash_v1_0_2::uint128;
|
||||||
|
|
||||||
|
static auto combineHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); }
|
||||||
|
static auto apply(const char * s, const size_t len) { return CityHash_v1_0_2::CityHash64(s, len); }
|
||||||
|
static constexpr bool use_int_hash_for_pods = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
// see farmhash.h for definition of NAMESPACE_FOR_HASH_FUNCTIONS
|
||||||
|
struct ImplFarmHash64
|
||||||
|
{
|
||||||
|
static constexpr auto name = "farmHash64";
|
||||||
|
using ReturnType = UInt64;
|
||||||
|
using uint128_t = NAMESPACE_FOR_HASH_FUNCTIONS::uint128_t;
|
||||||
|
|
||||||
|
static auto combineHashes(UInt64 h1, UInt64 h2) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash128to64(uint128_t(h1, h2)); }
|
||||||
|
static auto apply(const char * s, const size_t len) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash64(s, len); }
|
||||||
|
static constexpr bool use_int_hash_for_pods = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ImplMetroHash64
|
||||||
|
{
|
||||||
|
static constexpr auto name = "metroHash64";
|
||||||
|
using ReturnType = UInt64;
|
||||||
|
using uint128_t = CityHash_v1_0_2::uint128;
|
||||||
|
|
||||||
|
static auto combineHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); }
|
||||||
|
static auto apply(const char * s, const size_t len)
|
||||||
|
{
|
||||||
|
union
|
||||||
|
{
|
||||||
|
UInt64 u64;
|
||||||
|
UInt8 u8[sizeof(u64)];
|
||||||
|
};
|
||||||
|
|
||||||
|
metrohash64_1(reinterpret_cast<const UInt8 *>(s), len, 0, u8);
|
||||||
|
|
||||||
|
return u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr bool use_int_hash_for_pods = true;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
template <typename Impl>
|
template <typename Impl>
|
||||||
class FunctionStringHashFixedString : public IFunction
|
class FunctionStringHashFixedString : public IFunction
|
||||||
{
|
{
|
||||||
@ -259,12 +414,6 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
inline bool allowIntHash(const IDataType * data_type)
|
|
||||||
{
|
|
||||||
return data_type->isValueRepresentedByNumber();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename Impl, typename Name>
|
template <typename Impl, typename Name>
|
||||||
class FunctionIntHash : public IFunction
|
class FunctionIntHash : public IFunction
|
||||||
{
|
{
|
||||||
@ -308,7 +457,7 @@ public:
|
|||||||
|
|
||||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||||
{
|
{
|
||||||
if (!allowIntHash(arguments[0].get()))
|
if (!arguments[0]->isValueRepresentedByNumber())
|
||||||
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
|
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
|
||||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||||
|
|
||||||
@ -359,10 +508,19 @@ private:
|
|||||||
for (size_t i = 0; i < size; ++i)
|
for (size_t i = 0; i < size; ++i)
|
||||||
{
|
{
|
||||||
ToType h;
|
ToType h;
|
||||||
|
|
||||||
|
if constexpr (Impl::use_int_hash_for_pods)
|
||||||
|
{
|
||||||
if constexpr (std::is_same_v<ToType, UInt64>)
|
if constexpr (std::is_same_v<ToType, UInt64>)
|
||||||
h = IntHash64Impl::apply(ext::bit_cast<UInt64>(vec_from[i]));
|
h = IntHash64Impl::apply(ext::bit_cast<UInt64>(vec_from[i]));
|
||||||
else
|
else
|
||||||
h = IntHash32Impl::apply(ext::bit_cast<UInt32>(vec_from[i]));
|
h = IntHash32Impl::apply(ext::bit_cast<UInt32>(vec_from[i]));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
h = Impl::apply(reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
|
||||||
|
}
|
||||||
|
|
||||||
if (first)
|
if (first)
|
||||||
vec_to[i] = h;
|
vec_to[i] = h;
|
||||||
else
|
else
|
||||||
@ -610,102 +768,6 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/** Why we need MurmurHash2?
|
|
||||||
* MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash.
|
|
||||||
* Usually there is no reason to use MurmurHash.
|
|
||||||
* It is needed for the cases when you already have MurmurHash in some applications and you want to reproduce it
|
|
||||||
* in ClickHouse as is. For example, it is needed to reproduce the behaviour
|
|
||||||
* for NGINX a/b testing module: https://nginx.ru/en/docs/http/ngx_http_split_clients_module.html
|
|
||||||
*/
|
|
||||||
struct MurmurHash2Impl32
|
|
||||||
{
|
|
||||||
static constexpr auto name = "murmurHash2_32";
|
|
||||||
|
|
||||||
using ReturnType = UInt32;
|
|
||||||
|
|
||||||
static UInt32 apply(const char * data, const size_t size)
|
|
||||||
{
|
|
||||||
return MurmurHash2(data, size, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static UInt32 combineHashes(UInt32 h1, UInt32 h2)
|
|
||||||
{
|
|
||||||
return IntHash32Impl::apply(h1) ^ h2;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct MurmurHash2Impl64
|
|
||||||
{
|
|
||||||
static constexpr auto name = "murmurHash2_64";
|
|
||||||
using ReturnType = UInt64;
|
|
||||||
|
|
||||||
static UInt64 apply(const char * data, const size_t size)
|
|
||||||
{
|
|
||||||
return MurmurHash64A(data, size, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static UInt64 combineHashes(UInt64 h1, UInt64 h2)
|
|
||||||
{
|
|
||||||
return IntHash64Impl::apply(h1) ^ h2;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct MurmurHash3Impl32
|
|
||||||
{
|
|
||||||
static constexpr auto name = "murmurHash3_32";
|
|
||||||
using ReturnType = UInt32;
|
|
||||||
|
|
||||||
static UInt32 apply(const char * data, const size_t size)
|
|
||||||
{
|
|
||||||
union
|
|
||||||
{
|
|
||||||
UInt32 h;
|
|
||||||
char bytes[sizeof(h)];
|
|
||||||
};
|
|
||||||
MurmurHash3_x86_32(data, size, 0, bytes);
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
|
|
||||||
static UInt32 combineHashes(UInt32 h1, UInt32 h2)
|
|
||||||
{
|
|
||||||
return IntHash32Impl::apply(h1) ^ h2;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct MurmurHash3Impl64
|
|
||||||
{
|
|
||||||
static constexpr auto name = "murmurHash3_64";
|
|
||||||
using ReturnType = UInt64;
|
|
||||||
|
|
||||||
static UInt64 apply(const char * data, const size_t size)
|
|
||||||
{
|
|
||||||
union
|
|
||||||
{
|
|
||||||
UInt64 h[2];
|
|
||||||
char bytes[16];
|
|
||||||
};
|
|
||||||
MurmurHash3_x64_128(data, size, 0, bytes);
|
|
||||||
return h[0] ^ h[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
static UInt64 combineHashes(UInt64 h1, UInt64 h2)
|
|
||||||
{
|
|
||||||
return IntHash64Impl::apply(h1) ^ h2;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct MurmurHash3Impl128
|
|
||||||
{
|
|
||||||
static constexpr auto name = "murmurHash3_128";
|
|
||||||
enum { length = 16 };
|
|
||||||
|
|
||||||
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
|
|
||||||
{
|
|
||||||
MurmurHash3_x64_128(begin, size, 0, out_char_data);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
struct URLHashImpl
|
struct URLHashImpl
|
||||||
{
|
{
|
||||||
static UInt64 apply(const char * data, const size_t size)
|
static UInt64 apply(const char * data, const size_t size)
|
||||||
@ -899,48 +961,6 @@ struct NameIntHash32 { static constexpr auto name = "intHash32"; };
|
|||||||
struct NameIntHash64 { static constexpr auto name = "intHash64"; };
|
struct NameIntHash64 { static constexpr auto name = "intHash64"; };
|
||||||
|
|
||||||
|
|
||||||
struct ImplCityHash64
|
|
||||||
{
|
|
||||||
static constexpr auto name = "cityHash64";
|
|
||||||
using ReturnType = UInt64;
|
|
||||||
using uint128_t = CityHash_v1_0_2::uint128;
|
|
||||||
|
|
||||||
static auto combineHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); }
|
|
||||||
static auto apply(const char * s, const size_t len) { return CityHash_v1_0_2::CityHash64(s, len); }
|
|
||||||
};
|
|
||||||
|
|
||||||
// see farmhash.h for definition of NAMESPACE_FOR_HASH_FUNCTIONS
|
|
||||||
struct ImplFarmHash64
|
|
||||||
{
|
|
||||||
static constexpr auto name = "farmHash64";
|
|
||||||
using ReturnType = UInt64;
|
|
||||||
using uint128_t = NAMESPACE_FOR_HASH_FUNCTIONS::uint128_t;
|
|
||||||
|
|
||||||
static auto combineHashes(UInt64 h1, UInt64 h2) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash128to64(uint128_t(h1, h2)); }
|
|
||||||
static auto apply(const char * s, const size_t len) { return NAMESPACE_FOR_HASH_FUNCTIONS::Hash64(s, len); }
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ImplMetroHash64
|
|
||||||
{
|
|
||||||
static constexpr auto name = "metroHash64";
|
|
||||||
using ReturnType = UInt64;
|
|
||||||
using uint128_t = CityHash_v1_0_2::uint128;
|
|
||||||
|
|
||||||
static auto combineHashes(UInt64 h1, UInt64 h2) { return CityHash_v1_0_2::Hash128to64(uint128_t(h1, h2)); }
|
|
||||||
static auto apply(const char * s, const size_t len)
|
|
||||||
{
|
|
||||||
union
|
|
||||||
{
|
|
||||||
UInt64 u64;
|
|
||||||
UInt8 u8[sizeof(u64)];
|
|
||||||
};
|
|
||||||
|
|
||||||
metrohash64_1(reinterpret_cast<const UInt8 *>(s), len, 0, u8);
|
|
||||||
|
|
||||||
return u64;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
using FunctionHalfMD5 = FunctionAnyHash<HalfMD5Impl>;
|
using FunctionHalfMD5 = FunctionAnyHash<HalfMD5Impl>;
|
||||||
using FunctionSipHash64 = FunctionAnyHash<SipHash64Impl>;
|
using FunctionSipHash64 = FunctionAnyHash<SipHash64Impl>;
|
||||||
using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
|
using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
|
||||||
|
@ -1,25 +1,26 @@
|
|||||||
3012058918
|
623211862
|
||||||
1298551497
|
3533626746
|
||||||
864444010
|
2388617433
|
||||||
367840556
|
2708309598
|
||||||
2414502773
|
2414502773
|
||||||
670491991
|
670491991
|
||||||
1343103100
|
|
||||||
0
|
0
|
||||||
0
|
0
|
||||||
0
|
0
|
||||||
0
|
0
|
||||||
1343103100
|
0
|
||||||
1996614413
|
0
|
||||||
|
0
|
||||||
|
1
|
||||||
1
|
1
|
||||||
14834356025302342401
|
14834356025302342401
|
||||||
12725806677685968135
|
12725806677685968135
|
||||||
10577349846663553072
|
12725806677685968135
|
||||||
4138058784
|
4138058784
|
||||||
3831157163
|
3831157163
|
||||||
1343103100
|
3831157163
|
||||||
11303473983767132390
|
11303473983767132390
|
||||||
956517343494314387
|
956517343494314387
|
||||||
10577349846663553072
|
956517343494314387
|
||||||
6145F501578671E2877DBA2BE487AF7E
|
6145F501578671E2877DBA2BE487AF7E
|
||||||
16FE7483905CCE7A85670E43E4678877
|
16FE7483905CCE7A85670E43E4678877
|
||||||
|
@ -13,6 +13,7 @@ SELECT murmurHash2_32('\x03\0\0');
|
|||||||
SELECT murmurHash2_32(1);
|
SELECT murmurHash2_32(1);
|
||||||
SELECT murmurHash2_32(toUInt16(2));
|
SELECT murmurHash2_32(toUInt16(2));
|
||||||
|
|
||||||
|
SELECT murmurHash2_32(2) = bitXor(toUInt32(0x5bd1e995 * bitXor(toUInt32(3 * 0x5bd1e995) AS a, bitShiftRight(a, 13))) AS b, bitShiftRight(b, 15));
|
||||||
SELECT murmurHash2_32('\x02') = bitXor(toUInt32(0x5bd1e995 * bitXor(toUInt32(3 * 0x5bd1e995) AS a, bitShiftRight(a, 13))) AS b, bitShiftRight(b, 15));
|
SELECT murmurHash2_32('\x02') = bitXor(toUInt32(0x5bd1e995 * bitXor(toUInt32(3 * 0x5bd1e995) AS a, bitShiftRight(a, 13))) AS b, bitShiftRight(b, 15));
|
||||||
|
|
||||||
SELECT murmurHash2_64('foo');
|
SELECT murmurHash2_64('foo');
|
||||||
|
@ -1,15 +1,15 @@
|
|||||||
8732148587615156034
|
12940785793559895259
|
||||||
3856459458360415155
|
17926972817233444501
|
||||||
1993857991550209231
|
7456555839952096623
|
||||||
5465424717626995012
|
955237314186186656
|
||||||
15495040516566687427
|
8175794665478042155
|
||||||
13266110974878256384
|
9325786087413524176
|
||||||
617416965
|
2822869866
|
||||||
3293554683
|
1460833561
|
||||||
4210800467
|
222444531
|
||||||
6847376565456338547
|
13951512892560982617
|
||||||
15499510486101262177
|
4952008279444388047
|
||||||
13552202417419166072
|
15509665835504406222
|
||||||
6847376565456338547
|
8163029322371165472
|
||||||
15499510486101262177
|
8788309436660676487
|
||||||
14474638290107799038
|
236561483980029756
|
||||||
|
Loading…
Reference in New Issue
Block a user