#pragma once /** SipHash is a fast cryptographic hash function for short strings. * Taken from here: https://www.131002.net/siphash/ * * This is SipHash 2-4 variant. * * Two changes are made: * - returns also 128 bits, not only 64; * - done streaming (can be calculated in parts). * * On short strings (URL, search phrases) more than 3 times faster than MD5 from OpenSSL. * (~ 700 MB/sec, 15 million strings per second) */ #include #include #include #include #include #include #define ROTL(x, b) static_cast(((x) << (b)) | ((x) >> (64 - (b)))) #define SIPROUND \ do \ { \ v0 += v1; v1 = ROTL(v1, 13); v1 ^= v0; v0 = ROTL(v0, 32); \ v2 += v3; v3 = ROTL(v3, 16); v3 ^= v2; \ v0 += v3; v3 = ROTL(v3, 21); v3 ^= v0; \ v2 += v1; v1 = ROTL(v1, 17); v1 ^= v2; v2 = ROTL(v2, 32); \ } while(0) /// Define macro CURRENT_BYTES_IDX for building index used in current_bytes array /// to ensure correct byte order on different endian machines #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ #define CURRENT_BYTES_IDX(i) (7 - i) #else #define CURRENT_BYTES_IDX(i) (i) #endif class SipHash { private: /// State. UInt64 v0; UInt64 v1; UInt64 v2; UInt64 v3; /// How many bytes have been processed. UInt64 cnt; /// The current 8 bytes of input data. union { UInt64 current_word; UInt8 current_bytes[8]; }; ALWAYS_INLINE void finalize() { /// In the last free byte, we write the remainder of the division by 256. current_bytes[CURRENT_BYTES_IDX(7)] = static_cast(cnt); v3 ^= current_word; SIPROUND; SIPROUND; v0 ^= current_word; v2 ^= 0xff; SIPROUND; SIPROUND; SIPROUND; SIPROUND; } public: /// Arguments - seed. SipHash(UInt64 k0 = 0, UInt64 k1 = 0) /// NOLINT { /// Initialize the state with some random bytes and seed. v0 = 0x736f6d6570736575ULL ^ k0; v1 = 0x646f72616e646f6dULL ^ k1; v2 = 0x6c7967656e657261ULL ^ k0; v3 = 0x7465646279746573ULL ^ k1; cnt = 0; current_word = 0; } ALWAYS_INLINE void update(const char * data, UInt64 size) { const char * end = data + size; /// We'll finish to process the remainder of the previous update, if any. if (cnt & 7) { while (cnt & 7 && data < end) { current_bytes[CURRENT_BYTES_IDX(cnt & 7)] = *data; ++data; ++cnt; } /// If we still do not have enough bytes to an 8-byte word. if (cnt & 7) return; v3 ^= current_word; SIPROUND; SIPROUND; v0 ^= current_word; } cnt += end - data; while (data + 8 <= end) { current_word = unalignedLoadLE(data); v3 ^= current_word; SIPROUND; SIPROUND; v0 ^= current_word; data += 8; } /// Pad the remainder, which is missing up to an 8-byte word. current_word = 0; switch (end - data) { case 7: current_bytes[CURRENT_BYTES_IDX(6)] = data[6]; [[fallthrough]]; case 6: current_bytes[CURRENT_BYTES_IDX(5)] = data[5]; [[fallthrough]]; case 5: current_bytes[CURRENT_BYTES_IDX(4)] = data[4]; [[fallthrough]]; case 4: current_bytes[CURRENT_BYTES_IDX(3)] = data[3]; [[fallthrough]]; case 3: current_bytes[CURRENT_BYTES_IDX(2)] = data[2]; [[fallthrough]]; case 2: current_bytes[CURRENT_BYTES_IDX(1)] = data[1]; [[fallthrough]]; case 1: current_bytes[CURRENT_BYTES_IDX(0)] = data[0]; [[fallthrough]]; case 0: break; } } template ALWAYS_INLINE void update(const T & x) { update(reinterpret_cast(&x), sizeof(x)); /// NOLINT } ALWAYS_INLINE void update(const std::string & x) { update(x.data(), x.length()); } ALWAYS_INLINE void update(const std::string_view x) { update(x.data(), x.size()); } /// Get the result in some form. This can only be done once! void get128(char * out) { finalize(); #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ unalignedStore(out + 8, v0 ^ v1); unalignedStore(out, v2 ^ v3); #else unalignedStore(out, v0 ^ v1); unalignedStore(out + 8, v2 ^ v3); #endif } template ALWAYS_INLINE void get128(T & lo, T & hi) { static_assert(sizeof(T) == 8); finalize(); lo = v0 ^ v1; hi = v2 ^ v3; } template ALWAYS_INLINE void get128(T & dst) { static_assert(sizeof(T) == 16); get128(reinterpret_cast(&dst)); } UInt64 get64() { finalize(); return v0 ^ v1 ^ v2 ^ v3; } UInt128 get128() { UInt128 res; get128(res); return res; } }; #undef ROTL #undef SIPROUND #include inline void sipHash128(const char * data, const size_t size, char * out) { SipHash hash; hash.update(data, size); hash.get128(out); } inline UInt128 sipHash128(const char * data, const size_t size) { SipHash hash; hash.update(data, size); return hash.get128(); } inline UInt64 sipHash64(const char * data, const size_t size) { SipHash hash; hash.update(data, size); return hash.get64(); } template UInt64 sipHash64(const T & x) { SipHash hash; hash.update(x); return hash.get64(); } inline UInt64 sipHash64(const std::string & s) { return sipHash64(s.data(), s.size()); } #undef CURRENT_BYTES_IDX