diff --git a/libs/libmetrohash/LICENSE b/libs/libmetrohash/LICENSE new file mode 100644 index 00000000000..0765a504e62 --- /dev/null +++ b/libs/libmetrohash/LICENSE @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 J. Andrew Rogers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/libs/libmetrohash/README.md b/libs/libmetrohash/README.md new file mode 100644 index 00000000000..a8851cdb2d8 --- /dev/null +++ b/libs/libmetrohash/README.md @@ -0,0 +1,24 @@ +## MetroHash: Faster, Better Hash Functions + +MetroHash is a set of state-of-the-art hash functions for *non-cryptographic* use cases. They are notable for being algorithmically generated in addition to their exceptional performance. The set of published hash functions may be expanded in the future, having been selected from a very large set of hash functions that have been constructed this way. + +* Fastest general-purpose functions for bulk hashing. +* Fastest general-purpose functions for small, variable length keys. +* Robust statistical bias profile, similar to the MD5 cryptographic hash. +* 64-bit, 128-bit, and 128-bit CRC variants currently available. +* Optimized for modern x86-64 microarchitectures. +* Elegant, compact, readable functions. + +You can read more about the design and history [here](http://www.jandrewrogers.com/2015/05/27/metrohash/). + +Six hash functions have been included in the initial release: + +* 64-bit hash functions, "metrohash64_1" and "metrohash64_2" +* 128-bit hash functions, "metrohash128_1" and "metrohash128_2" +* 128-bit hash functions using CRC instructions, "metrohash128crc_1" and "metrohash128crc_2" + +Hash functions in the same family are effectively statistically unique. In other words, if you need two hash functions for a bloom filter, you can use "metrohash64_1" and "metrohash64_2" in the same implementation without issue. An unbounded set of statistically unique functions can be generated in each family. The functions in this repo were generated specifically for public release. + +The hash function generation software made no effort toward portability. While these hash functions should be easily portable to big-endian microarchitectures, they have not been tested on them and the performance optimization algorithms were not targeted at them. ARM64 microarchitectures might be a worthwhile hash function generation targets if I had the hardware. + + diff --git a/libs/libmetrohash/VERSION b/libs/libmetrohash/VERSION new file mode 100644 index 00000000000..211ea847416 --- /dev/null +++ b/libs/libmetrohash/VERSION @@ -0,0 +1,7 @@ +origin: git@github.com:jandrewrogers/MetroHash.git +commit d9dee18a54a8a6766e24c1950b814ac7ca9d1a89 +Merge: 761e8a4 3d06b24 +Author: J. Andrew Rogers +Date: Sat Jun 6 16:12:06 2015 -0700 + + modified README diff --git a/libs/libmetrohash/src/metrohash.h b/libs/libmetrohash/src/metrohash.h new file mode 100644 index 00000000000..0d9b76c99cf --- /dev/null +++ b/libs/libmetrohash/src/metrohash.h @@ -0,0 +1,73 @@ +// metrohash.h +// +// The MIT License (MIT) +// +// Copyright (c) 2015 J. Andrew Rogers +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef METROHASH_METROHASH_H +#define METROHASH_METROHASH_H + +#include +#include + +// MetroHash 64-bit hash functions +void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); +void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); + +// MetroHash 128-bit hash functions +void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); +void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); + +// MetroHash 128-bit hash functions using CRC instruction +void metrohash128crc_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); +void metrohash128crc_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); + + +/* rotate right idiom recognized by compiler*/ +inline static uint64_t rotate_right(uint64_t v, unsigned k) +{ + return (v >> k) | (v << (64 - k)); +} + +// unaligned reads, fast and safe on Nehalem and later microarchitectures +inline static uint64_t read_u64(const void * const ptr) +{ + return static_cast(*reinterpret_cast(ptr)); +} + +inline static uint64_t read_u32(const void * const ptr) +{ + return static_cast(*reinterpret_cast(ptr)); +} + +inline static uint64_t read_u16(const void * const ptr) +{ + return static_cast(*reinterpret_cast(ptr)); +} + +inline static uint64_t read_u8 (const void * const ptr) +{ + return static_cast(*reinterpret_cast(ptr)); +} + + +#endif // #ifndef METROHASH_METROHASH_H diff --git a/libs/libmetrohash/src/metrohash128.cpp b/libs/libmetrohash/src/metrohash128.cpp new file mode 100644 index 00000000000..6370412046e --- /dev/null +++ b/libs/libmetrohash/src/metrohash128.cpp @@ -0,0 +1,178 @@ +// metrohash128.cpp +// +// The MIT License (MIT) +// +// Copyright (c) 2015 J. Andrew Rogers +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "metrohash.h" + +void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) +{ + static const uint64_t k0 = 0xC83A91E1; + static const uint64_t k1 = 0x8648DBDB; + static const uint64_t k2 = 0x7BDEC03B; + static const uint64_t k3 = 0x2F5870A5; + + const uint8_t * ptr = reinterpret_cast(key); + const uint8_t * const end = ptr + len; + + uint64_t v[4]; + + v[0] = ((static_cast(seed) - k0) * k3) + len; + v[1] = ((static_cast(seed) + k1) * k2) + len; + + if (len >= 32) + { + v[2] = ((static_cast(seed) + k0) * k2) + len; + v[3] = ((static_cast(seed) - k1) * k3) + len; + + do + { + v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; + v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3]; + v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0]; + v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1]; + } + while (ptr <= (end - 32)); + + v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 26) * k1; + v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 26) * k0; + v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 26) * k1; + v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 30) * k0; + } + + if ((end - ptr) >= 16) + { + v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],33) * k3; + v[1] += read_u64(ptr) * k2; ptr += 8; v[1] = rotate_right(v[1],33) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 17) * k1; + v[1] ^= rotate_right((v[1] * k3) + v[0], 17) * k0; + } + + if ((end - ptr) >= 8) + { + v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],33) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 20) * k1; + } + + if ((end - ptr) >= 4) + { + v[1] += read_u32(ptr) * k2; ptr += 4; v[1] = rotate_right(v[1],33) * k3; + v[1] ^= rotate_right((v[1] * k3) + v[0], 18) * k0; + } + + if ((end - ptr) >= 2) + { + v[0] += read_u16(ptr) * k2; ptr += 2; v[0] = rotate_right(v[0],33) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 24) * k1; + } + + if ((end - ptr) >= 1) + { + v[1] += read_u8 (ptr) * k2; v[1] = rotate_right(v[1],33) * k3; + v[1] ^= rotate_right((v[1] * k3) + v[0], 24) * k0; + } + + v[0] += rotate_right((v[0] * k0) + v[1], 13); + v[1] += rotate_right((v[1] * k1) + v[0], 37); + v[0] += rotate_right((v[0] * k2) + v[1], 13); + v[1] += rotate_right((v[1] * k3) + v[0], 37); + + memcpy(out, v, 16); +} + + +void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) +{ + static const uint64_t k0 = 0xD6D018F5; + static const uint64_t k1 = 0xA2AA033B; + static const uint64_t k2 = 0x62992FC1; + static const uint64_t k3 = 0x30BC5B29; + + const uint8_t * ptr = reinterpret_cast(key); + const uint8_t * const end = ptr + len; + + uint64_t v[4]; + + v[0] = ((static_cast(seed) - k0) * k3) + len; + v[1] = ((static_cast(seed) + k1) * k2) + len; + + if (len >= 32) + { + v[2] = ((static_cast(seed) + k0) * k2) + len; + v[3] = ((static_cast(seed) - k1) * k3) + len; + + do + { + v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; + v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3]; + v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0]; + v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1]; + } + while (ptr <= (end - 32)); + + v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 33) * k1; + v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 33) * k0; + v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 33) * k1; + v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0; + } + + if ((end - ptr) >= 16) + { + v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],29) * k3; + v[1] += read_u64(ptr) * k2; ptr += 8; v[1] = rotate_right(v[1],29) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 29) * k1; + v[1] ^= rotate_right((v[1] * k3) + v[0], 29) * k0; + } + + if ((end - ptr) >= 8) + { + v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],29) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 29) * k1; + } + + if ((end - ptr) >= 4) + { + v[1] += read_u32(ptr) * k2; ptr += 4; v[1] = rotate_right(v[1],29) * k3; + v[1] ^= rotate_right((v[1] * k3) + v[0], 25) * k0; + } + + if ((end - ptr) >= 2) + { + v[0] += read_u16(ptr) * k2; ptr += 2; v[0] = rotate_right(v[0],29) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 30) * k1; + } + + if ((end - ptr) >= 1) + { + v[1] += read_u8 (ptr) * k2; v[1] = rotate_right(v[1],29) * k3; + v[1] ^= rotate_right((v[1] * k3) + v[0], 18) * k0; + } + + v[0] += rotate_right((v[0] * k0) + v[1], 33); + v[1] += rotate_right((v[1] * k1) + v[0], 33); + v[0] += rotate_right((v[0] * k2) + v[1], 33); + v[1] += rotate_right((v[1] * k3) + v[0], 33); + + memcpy(out, v, 16); +} + diff --git a/libs/libmetrohash/src/metrohash128crc.cpp b/libs/libmetrohash/src/metrohash128crc.cpp new file mode 100644 index 00000000000..c04cf5a6b23 --- /dev/null +++ b/libs/libmetrohash/src/metrohash128crc.cpp @@ -0,0 +1,180 @@ +// metrohash128crc.cpp +// +// The MIT License (MIT) +// +// Copyright (c) 2015 J. Andrew Rogers +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + + +#include "metrohash.h" +#include + + +void metrohash128crc_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) +{ + static const uint64_t k0 = 0xC83A91E1; + static const uint64_t k1 = 0x8648DBDB; + static const uint64_t k2 = 0x7BDEC03B; + static const uint64_t k3 = 0x2F5870A5; + + const uint8_t * ptr = reinterpret_cast(key); + const uint8_t * const end = ptr + len; + + uint64_t v[4]; + + v[0] = ((static_cast(seed) - k0) * k3) + len; + v[1] = ((static_cast(seed) + k1) * k2) + len; + + if (len >= 32) + { + v[2] = ((static_cast(seed) + k0) * k2) + len; + v[3] = ((static_cast(seed) - k1) * k3) + len; + + do + { + v[0] ^= _mm_crc32_u64(v[0], read_u64(ptr)); ptr += 8; + v[1] ^= _mm_crc32_u64(v[1], read_u64(ptr)); ptr += 8; + v[2] ^= _mm_crc32_u64(v[2], read_u64(ptr)); ptr += 8; + v[3] ^= _mm_crc32_u64(v[3], read_u64(ptr)); ptr += 8; + } + while (ptr <= (end - 32)); + + v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 34) * k1; + v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 37) * k0; + v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 34) * k1; + v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 37) * k0; + } + + if ((end - ptr) >= 16) + { + v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],34) * k3; + v[1] += read_u64(ptr) * k2; ptr += 8; v[1] = rotate_right(v[1],34) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 30) * k1; + v[1] ^= rotate_right((v[1] * k3) + v[0], 30) * k0; + } + + if ((end - ptr) >= 8) + { + v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],36) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 23) * k1; + } + + if ((end - ptr) >= 4) + { + v[1] ^= _mm_crc32_u64(v[0], read_u32(ptr)); ptr += 4; + v[1] ^= rotate_right((v[1] * k3) + v[0], 19) * k0; + } + + if ((end - ptr) >= 2) + { + v[0] ^= _mm_crc32_u64(v[1], read_u16(ptr)); ptr += 2; + v[0] ^= rotate_right((v[0] * k2) + v[1], 13) * k1; + } + + if ((end - ptr) >= 1) + { + v[1] ^= _mm_crc32_u64(v[0], read_u8 (ptr)); + v[1] ^= rotate_right((v[1] * k3) + v[0], 17) * k0; + } + + v[0] += rotate_right((v[0] * k0) + v[1], 11); + v[1] += rotate_right((v[1] * k1) + v[0], 26); + v[0] += rotate_right((v[0] * k0) + v[1], 11); + v[1] += rotate_right((v[1] * k1) + v[0], 26); + + memcpy(out, v, 16); +} + + +void metrohash128crc_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) +{ + static const uint64_t k0 = 0xEE783E2F; + static const uint64_t k1 = 0xAD07C493; + static const uint64_t k2 = 0x797A90BB; + static const uint64_t k3 = 0x2E4B2E1B; + + const uint8_t * ptr = reinterpret_cast(key); + const uint8_t * const end = ptr + len; + + uint64_t v[4]; + + v[0] = ((static_cast(seed) - k0) * k3) + len; + v[1] = ((static_cast(seed) + k1) * k2) + len; + + if (len >= 32) + { + v[2] = ((static_cast(seed) + k0) * k2) + len; + v[3] = ((static_cast(seed) - k1) * k3) + len; + + do + { + v[0] ^= _mm_crc32_u64(v[0], read_u64(ptr)); ptr += 8; + v[1] ^= _mm_crc32_u64(v[1], read_u64(ptr)); ptr += 8; + v[2] ^= _mm_crc32_u64(v[2], read_u64(ptr)); ptr += 8; + v[3] ^= _mm_crc32_u64(v[3], read_u64(ptr)); ptr += 8; + } + while (ptr <= (end - 32)); + + v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 12) * k1; + v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 19) * k0; + v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 12) * k1; + v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 19) * k0; + } + + if ((end - ptr) >= 16) + { + v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],41) * k3; + v[1] += read_u64(ptr) * k2; ptr += 8; v[1] = rotate_right(v[1],41) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 10) * k1; + v[1] ^= rotate_right((v[1] * k3) + v[0], 10) * k0; + } + + if ((end - ptr) >= 8) + { + v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],34) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 22) * k1; + } + + if ((end - ptr) >= 4) + { + v[1] ^= _mm_crc32_u64(v[0], read_u32(ptr)); ptr += 4; + v[1] ^= rotate_right((v[1] * k3) + v[0], 14) * k0; + } + + if ((end - ptr) >= 2) + { + v[0] ^= _mm_crc32_u64(v[1], read_u16(ptr)); ptr += 2; + v[0] ^= rotate_right((v[0] * k2) + v[1], 15) * k1; + } + + if ((end - ptr) >= 1) + { + v[1] ^= _mm_crc32_u64(v[0], read_u8 (ptr)); + v[1] ^= rotate_right((v[1] * k3) + v[0], 18) * k0; + } + + v[0] += rotate_right((v[0] * k0) + v[1], 15); + v[1] += rotate_right((v[1] * k1) + v[0], 27); + v[0] += rotate_right((v[0] * k0) + v[1], 15); + v[1] += rotate_right((v[1] * k1) + v[0], 27); + + memcpy(out, v, 16); +} diff --git a/libs/libmetrohash/src/metrohash64.cpp b/libs/libmetrohash/src/metrohash64.cpp new file mode 100644 index 00000000000..bc4b41eb8f2 --- /dev/null +++ b/libs/libmetrohash/src/metrohash64.cpp @@ -0,0 +1,182 @@ +// metrohash64.cpp +// +// The MIT License (MIT) +// +// Copyright (c) 2015 J. Andrew Rogers +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include "metrohash.h" + +void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) +{ + static const uint64_t k0 = 0xC83A91E1; + static const uint64_t k1 = 0x8648DBDB; + static const uint64_t k2 = 0x7BDEC03B; + static const uint64_t k3 = 0x2F5870A5; + + const uint8_t * ptr = reinterpret_cast(key); + const uint8_t * const end = ptr + len; + + uint64_t hash = ((static_cast(seed) + k2) * k0) + len; + + if (len >= 32) + { + uint64_t v[4]; + v[0] = hash; + v[1] = hash; + v[2] = hash; + v[3] = hash; + + do + { + v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; + v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3]; + v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0]; + v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1]; + } + while (ptr <= (end - 32)); + + v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 33) * k1; + v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 33) * k0; + v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 33) * k1; + v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0; + hash += v[0] ^ v[1]; + } + + if ((end - ptr) >= 16) + { + uint64_t v0 = hash + (read_u64(ptr) * k0); ptr += 8; v0 = rotate_right(v0,33) * k1; + uint64_t v1 = hash + (read_u64(ptr) * k1); ptr += 8; v1 = rotate_right(v1,33) * k2; + v0 ^= rotate_right(v0 * k0, 35) + v1; + v1 ^= rotate_right(v1 * k3, 35) + v0; + hash += v1; + } + + if ((end - ptr) >= 8) + { + hash += read_u64(ptr) * k3; ptr += 8; + hash ^= rotate_right(hash, 33) * k1; + + } + + if ((end - ptr) >= 4) + { + hash += read_u32(ptr) * k3; ptr += 4; + hash ^= rotate_right(hash, 15) * k1; + } + + if ((end - ptr) >= 2) + { + hash += read_u16(ptr) * k3; ptr += 2; + hash ^= rotate_right(hash, 13) * k1; + } + + if ((end - ptr) >= 1) + { + hash += read_u8 (ptr) * k3; + hash ^= rotate_right(hash, 25) * k1; + } + + hash ^= rotate_right(hash, 33); + hash *= k0; + hash ^= rotate_right(hash, 33); + + memcpy(out, &hash, 8); +} + + +void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) +{ + static const uint64_t k0 = 0xD6D018F5; + static const uint64_t k1 = 0xA2AA033B; + static const uint64_t k2 = 0x62992FC1; + static const uint64_t k3 = 0x30BC5B29; + + const uint8_t * ptr = reinterpret_cast(key); + const uint8_t * const end = ptr + len; + + uint64_t hash = ((static_cast(seed) + k2) * k0) + len; + + if (len >= 32) + { + uint64_t v[4]; + v[0] = hash; + v[1] = hash; + v[2] = hash; + v[3] = hash; + + do + { + v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; + v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3]; + v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0]; + v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1]; + } + while (ptr <= (end - 32)); + + v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 30) * k1; + v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 30) * k0; + v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 30) * k1; + v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 30) * k0; + hash += v[0] ^ v[1]; + } + + if ((end - ptr) >= 16) + { + uint64_t v0 = hash + (read_u64(ptr) * k2); ptr += 8; v0 = rotate_right(v0,29) * k3; + uint64_t v1 = hash + (read_u64(ptr) * k2); ptr += 8; v1 = rotate_right(v1,29) * k3; + v0 ^= rotate_right(v0 * k0, 34) + v1; + v1 ^= rotate_right(v1 * k3, 34) + v0; + hash += v1; + } + + if ((end - ptr) >= 8) + { + hash += read_u64(ptr) * k3; ptr += 8; + hash ^= rotate_right(hash, 36) * k1; + } + + if ((end - ptr) >= 4) + { + hash += read_u32(ptr) * k3; ptr += 4; + hash ^= rotate_right(hash, 15) * k1; + } + + if ((end - ptr) >= 2) + { + hash += read_u16(ptr) * k3; ptr += 2; + hash ^= rotate_right(hash, 15) * k1; + } + + if ((end - ptr) >= 1) + { + hash += read_u8 (ptr) * k3; + hash ^= rotate_right(hash, 23) * k1; + } + + hash ^= rotate_right(hash, 28); + hash *= k0; + hash ^= rotate_right(hash, 29); + + memcpy(out, &hash, 8); +} + + diff --git a/libs/libmetrohash/src/testvector.h b/libs/libmetrohash/src/testvector.h new file mode 100644 index 00000000000..8c7967453e9 --- /dev/null +++ b/libs/libmetrohash/src/testvector.h @@ -0,0 +1,70 @@ +// testvector.h +// +// The MIT License (MIT) +// +// Copyright (c) 2015 J. Andrew Rogers +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#ifndef METROHASH_TESTVECTOR_H +#define METROHASH_TESTVECTOR_H + +#include "metrohash.h" + + +typedef void (*HashFunction) (const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * hash); + +struct TestVectorData +{ + HashFunction function; + uint32_t bits; + const char * key; + uint32_t seed; + uint8_t hash[64]; +}; + +// The test vector string is selected such that it will properly exercise every +// internal branch of the hash function. Currently that requires a string with +// a length of (at least) 63 bytes. + +static const char * test_key_63 = "012345678901234567890123456789012345678901234567890123456789012"; + +const TestVectorData TestVector [] = +{ + // seed = 0 + { metrohash64_1, 64, test_key_63, 0, "658F044F5C730E40" }, + { metrohash64_2, 64, test_key_63, 0, "073CAAB960623211" }, + { metrohash128_1, 128, test_key_63, 0, "ED9997ED9D0A8B0FF3F266399477788F" }, + { metrohash128_2, 128, test_key_63, 0, "7BBA6FE119CF35D45507EDF3505359AB" }, + { metrohash128crc_1, 128, test_key_63, 0, "B329ED67831604D3DFAC4E4876D8262F" }, + { metrohash128crc_2, 128, test_key_63, 0, "0502A67E257BBD77206BBCA6BBEF2653" }, + + // seed = 1 + { metrohash64_1, 64, test_key_63, 1, "AE49EBB0A856537B" }, + { metrohash64_2, 64, test_key_63, 1, "CF518E9CF58402C0" }, + { metrohash128_1, 128, test_key_63, 1, "DDA6BA67F7DE755EFDF6BEABECCFD1F4" }, + { metrohash128_2, 128, test_key_63, 1, "2DA6AF149A5CDBC12B09DB0846D69EF0" }, + { metrohash128crc_1, 128, test_key_63, 1, "E8FAB51AF19F18A7B10D0A57D4276DF2" }, + { metrohash128crc_2, 128, test_key_63, 1, "2D54F87181A0CF64B02C50D95692BC19" }, +}; + + + +#endif // #ifndef METROHASH_TESTVECTOR_H