2012-10-07 06:30:10 +00:00
|
|
|
#pragma once
|
|
|
|
|
2017-05-07 20:25:26 +00:00
|
|
|
/** SipHash is a fast cryptographic hash function for short strings.
|
|
|
|
* Taken from here: https://www.131002.net/siphash/
|
2012-10-07 06:30:10 +00:00
|
|
|
*
|
2017-05-10 04:00:19 +00:00
|
|
|
* This is SipHash 2-4 variant.
|
|
|
|
*
|
2017-05-07 20:25:26 +00:00
|
|
|
* Two changes are made:
|
2017-05-10 04:00:19 +00:00
|
|
|
* - returns also 128 bits, not only 64;
|
2017-05-07 20:25:26 +00:00
|
|
|
* - done streaming (can be calculated in parts).
|
2012-10-07 06:30:10 +00:00
|
|
|
*
|
2017-05-07 20:25:26 +00:00
|
|
|
* On short strings (URL, search phrases) more than 3 times faster than MD5 from OpenSSL.
|
|
|
|
* (~ 700 MB/sec, 15 million strings per second)
|
2012-10-07 06:30:10 +00:00
|
|
|
*/
|
|
|
|
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/types.h>
|
|
|
|
#include <base/unaligned.h>
|
2018-08-30 16:31:20 +00:00
|
|
|
#include <string>
|
2018-03-03 15:36:20 +00:00
|
|
|
#include <type_traits>
|
2019-02-01 10:14:17 +00:00
|
|
|
#include <Core/Defines.h>
|
2021-09-13 08:20:20 +00:00
|
|
|
#include <base/extended_types.h>
|
2021-01-26 18:22:40 +00:00
|
|
|
|
2012-10-07 06:30:10 +00:00
|
|
|
|
2017-05-10 06:36:15 +00:00
|
|
|
#define ROTL(x, b) static_cast<UInt64>(((x) << (b)) | ((x) >> (64 - (b))))
|
|
|
|
|
|
|
|
#define SIPROUND \
|
|
|
|
do \
|
|
|
|
{ \
|
|
|
|
v0 += v1; v1 = ROTL(v1, 13); v1 ^= v0; v0 = ROTL(v0, 32); \
|
|
|
|
v2 += v3; v3 = ROTL(v3, 16); v3 ^= v2; \
|
|
|
|
v0 += v3; v3 = ROTL(v3, 21); v3 ^= v0; \
|
|
|
|
v2 += v1; v1 = ROTL(v1, 17); v1 ^= v2; v2 = ROTL(v2, 32); \
|
2014-12-19 18:33:30 +00:00
|
|
|
} while(0)
|
2012-10-07 06:30:10 +00:00
|
|
|
|
2022-09-18 02:48:08 +00:00
|
|
|
/// Define macro CURRENT_BYTES_IDX for building index used in current_bytes array
|
|
|
|
/// to ensure correct byte order on different endian machines
|
2022-09-15 13:25:23 +00:00
|
|
|
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
2022-09-18 02:48:08 +00:00
|
|
|
#define CURRENT_BYTES_IDX(i) (7 - i)
|
2022-09-15 13:25:23 +00:00
|
|
|
#else
|
|
|
|
#define CURRENT_BYTES_IDX(i) (i)
|
|
|
|
#endif
|
2012-10-07 06:30:10 +00:00
|
|
|
|
|
|
|
class SipHash
|
|
|
|
{
|
|
|
|
private:
|
2017-05-10 04:00:19 +00:00
|
|
|
/// State.
|
|
|
|
UInt64 v0;
|
|
|
|
UInt64 v1;
|
|
|
|
UInt64 v2;
|
|
|
|
UInt64 v3;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-07 20:25:26 +00:00
|
|
|
/// How many bytes have been processed.
|
2017-05-10 04:00:19 +00:00
|
|
|
UInt64 cnt;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-07 20:25:26 +00:00
|
|
|
/// The current 8 bytes of input data.
|
2012-10-07 06:30:10 +00:00
|
|
|
union
|
|
|
|
{
|
2017-05-10 04:00:19 +00:00
|
|
|
UInt64 current_word;
|
|
|
|
UInt8 current_bytes[8];
|
2012-10-07 06:30:10 +00:00
|
|
|
};
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-02-01 10:14:17 +00:00
|
|
|
ALWAYS_INLINE void finalize()
|
2013-08-28 17:13:43 +00:00
|
|
|
{
|
2017-05-07 20:25:26 +00:00
|
|
|
/// In the last free byte, we write the remainder of the division by 256.
|
2022-09-15 13:25:23 +00:00
|
|
|
current_bytes[CURRENT_BYTES_IDX(7)] = static_cast<UInt8>(cnt);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2013-08-28 17:13:43 +00:00
|
|
|
v3 ^= current_word;
|
|
|
|
SIPROUND;
|
|
|
|
SIPROUND;
|
|
|
|
v0 ^= current_word;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2013-08-28 17:13:43 +00:00
|
|
|
v2 ^= 0xff;
|
|
|
|
SIPROUND;
|
|
|
|
SIPROUND;
|
|
|
|
SIPROUND;
|
|
|
|
SIPROUND;
|
|
|
|
}
|
2012-10-07 06:30:10 +00:00
|
|
|
|
|
|
|
public:
|
2017-05-07 20:25:26 +00:00
|
|
|
/// Arguments - seed.
|
2022-03-14 14:11:19 +00:00
|
|
|
SipHash(UInt64 k0 = 0, UInt64 k1 = 0) /// NOLINT
|
2012-10-07 06:30:10 +00:00
|
|
|
{
|
2017-05-07 20:25:26 +00:00
|
|
|
/// Initialize the state with some random bytes and seed.
|
2012-10-07 06:30:10 +00:00
|
|
|
v0 = 0x736f6d6570736575ULL ^ k0;
|
|
|
|
v1 = 0x646f72616e646f6dULL ^ k1;
|
|
|
|
v2 = 0x6c7967656e657261ULL ^ k0;
|
|
|
|
v3 = 0x7465646279746573ULL ^ k1;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2012-10-07 06:30:10 +00:00
|
|
|
cnt = 0;
|
2015-06-10 19:56:57 +00:00
|
|
|
current_word = 0;
|
2012-10-07 06:30:10 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2022-02-13 11:08:06 +00:00
|
|
|
ALWAYS_INLINE void update(const char * data, UInt64 size)
|
2012-10-07 06:30:10 +00:00
|
|
|
{
|
|
|
|
const char * end = data + size;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-07 20:25:26 +00:00
|
|
|
/// We'll finish to process the remainder of the previous update, if any.
|
2012-10-07 06:30:10 +00:00
|
|
|
if (cnt & 7)
|
|
|
|
{
|
|
|
|
while (cnt & 7 && data < end)
|
|
|
|
{
|
2022-09-15 13:25:23 +00:00
|
|
|
current_bytes[CURRENT_BYTES_IDX(cnt & 7)] = *data;
|
2012-10-07 06:30:10 +00:00
|
|
|
++data;
|
|
|
|
++cnt;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-10 04:00:19 +00:00
|
|
|
/// If we still do not have enough bytes to an 8-byte word.
|
2012-10-07 06:30:10 +00:00
|
|
|
if (cnt & 7)
|
|
|
|
return;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2012-10-07 06:30:10 +00:00
|
|
|
v3 ^= current_word;
|
|
|
|
SIPROUND;
|
|
|
|
SIPROUND;
|
|
|
|
v0 ^= current_word;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2012-10-07 06:30:10 +00:00
|
|
|
cnt += end - data;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2014-11-27 21:31:13 +00:00
|
|
|
while (data + 8 <= end)
|
2012-10-07 06:30:10 +00:00
|
|
|
{
|
2022-07-29 20:21:50 +00:00
|
|
|
current_word = unalignedLoadLE<UInt64>(data);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2012-10-07 06:30:10 +00:00
|
|
|
v3 ^= current_word;
|
|
|
|
SIPROUND;
|
|
|
|
SIPROUND;
|
|
|
|
v0 ^= current_word;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2012-10-07 06:30:10 +00:00
|
|
|
data += 8;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-05-07 20:25:26 +00:00
|
|
|
/// Pad the remainder, which is missing up to an 8-byte word.
|
2012-10-07 06:30:10 +00:00
|
|
|
current_word = 0;
|
|
|
|
switch (end - data)
|
|
|
|
{
|
2022-09-15 13:25:23 +00:00
|
|
|
case 7: current_bytes[CURRENT_BYTES_IDX(6)] = data[6]; [[fallthrough]];
|
|
|
|
case 6: current_bytes[CURRENT_BYTES_IDX(5)] = data[5]; [[fallthrough]];
|
|
|
|
case 5: current_bytes[CURRENT_BYTES_IDX(4)] = data[4]; [[fallthrough]];
|
|
|
|
case 4: current_bytes[CURRENT_BYTES_IDX(3)] = data[3]; [[fallthrough]];
|
|
|
|
case 3: current_bytes[CURRENT_BYTES_IDX(2)] = data[2]; [[fallthrough]];
|
|
|
|
case 2: current_bytes[CURRENT_BYTES_IDX(1)] = data[1]; [[fallthrough]];
|
|
|
|
case 1: current_bytes[CURRENT_BYTES_IDX(0)] = data[0]; [[fallthrough]];
|
2012-10-07 06:30:10 +00:00
|
|
|
case 0: break;
|
|
|
|
}
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-03-03 15:36:20 +00:00
|
|
|
template <typename T>
|
2022-02-13 11:08:06 +00:00
|
|
|
ALWAYS_INLINE void update(const T & x)
|
2020-08-19 11:52:17 +00:00
|
|
|
{
|
2022-03-11 21:47:28 +00:00
|
|
|
update(reinterpret_cast<const char *>(&x), sizeof(x)); /// NOLINT
|
2020-08-19 11:52:17 +00:00
|
|
|
}
|
|
|
|
|
2022-02-13 11:08:06 +00:00
|
|
|
ALWAYS_INLINE void update(const std::string & x)
|
2018-08-30 16:31:20 +00:00
|
|
|
{
|
2018-09-03 10:14:05 +00:00
|
|
|
update(x.data(), x.length());
|
2018-08-30 16:31:20 +00:00
|
|
|
}
|
|
|
|
|
2022-05-13 13:43:42 +00:00
|
|
|
ALWAYS_INLINE void update(const std::string_view x)
|
|
|
|
{
|
|
|
|
update(x.data(), x.size());
|
|
|
|
}
|
|
|
|
|
2017-05-07 20:25:26 +00:00
|
|
|
/// Get the result in some form. This can only be done once!
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2013-08-28 17:13:43 +00:00
|
|
|
void get128(char * out)
|
2012-10-07 06:30:10 +00:00
|
|
|
{
|
2013-08-28 17:13:43 +00:00
|
|
|
finalize();
|
2022-09-15 13:25:23 +00:00
|
|
|
unalignedStore<UInt64>(out, v0 ^ v1);
|
|
|
|
unalignedStore<UInt64>(out + 8, v2 ^ v3);
|
2012-10-07 06:30:10 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2018-06-19 18:09:09 +00:00
|
|
|
template <typename T>
|
2019-02-01 10:14:17 +00:00
|
|
|
ALWAYS_INLINE void get128(T & lo, T & hi)
|
2013-08-28 17:13:43 +00:00
|
|
|
{
|
2018-06-19 18:09:09 +00:00
|
|
|
static_assert(sizeof(T) == 8);
|
2013-08-28 17:13:43 +00:00
|
|
|
finalize();
|
|
|
|
lo = v0 ^ v1;
|
|
|
|
hi = v2 ^ v3;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-01-27 00:54:57 +00:00
|
|
|
template <typename T>
|
|
|
|
ALWAYS_INLINE void get128(T & dst)
|
|
|
|
{
|
|
|
|
static_assert(sizeof(T) == 16);
|
|
|
|
get128(reinterpret_cast<char *>(&dst));
|
|
|
|
}
|
|
|
|
|
2017-05-10 04:00:19 +00:00
|
|
|
UInt64 get64()
|
2013-08-28 17:13:43 +00:00
|
|
|
{
|
|
|
|
finalize();
|
|
|
|
return v0 ^ v1 ^ v2 ^ v3;
|
|
|
|
}
|
2012-10-07 06:30:10 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
#undef ROTL
|
|
|
|
#undef SIPROUND
|
2013-10-21 16:32:49 +00:00
|
|
|
|
2017-05-10 04:00:19 +00:00
|
|
|
#include <cstddef>
|
2013-10-21 16:32:49 +00:00
|
|
|
|
2014-10-29 12:25:33 +00:00
|
|
|
inline void sipHash128(const char * data, const size_t size, char * out)
|
|
|
|
{
|
|
|
|
SipHash hash;
|
|
|
|
hash.update(data, size);
|
|
|
|
hash.get128(out);
|
|
|
|
}
|
|
|
|
|
2021-09-13 08:20:20 +00:00
|
|
|
inline UInt128 sipHash128(const char * data, const size_t size)
|
|
|
|
{
|
|
|
|
SipHash hash;
|
|
|
|
hash.update(data, size);
|
|
|
|
UInt128 res;
|
|
|
|
hash.get128(res);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2017-05-10 04:00:19 +00:00
|
|
|
inline UInt64 sipHash64(const char * data, const size_t size)
|
2013-10-21 16:32:49 +00:00
|
|
|
{
|
|
|
|
SipHash hash;
|
|
|
|
hash.update(data, size);
|
|
|
|
return hash.get64();
|
|
|
|
}
|
|
|
|
|
2018-03-03 15:36:20 +00:00
|
|
|
template <typename T>
|
2021-01-26 18:22:40 +00:00
|
|
|
UInt64 sipHash64(const T & x)
|
2020-08-19 11:52:17 +00:00
|
|
|
{
|
|
|
|
SipHash hash;
|
|
|
|
hash.update(x);
|
|
|
|
return hash.get64();
|
|
|
|
}
|
|
|
|
|
2017-05-10 04:00:19 +00:00
|
|
|
inline UInt64 sipHash64(const std::string & s)
|
2013-10-21 16:32:49 +00:00
|
|
|
{
|
|
|
|
return sipHash64(s.data(), s.size());
|
|
|
|
}
|
2022-09-18 02:48:08 +00:00
|
|
|
|
|
|
|
#undef CURRENT_BYTES_IDX
|