From 2eee2d97df04bc310df4c3979748e0ce9c0c6c4d Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 28 Jan 2019 17:00:50 +0300 Subject: [PATCH] Update contrib/libmetrohash --- cmake/Modules/Findmetrohash.cmake | 2 +- contrib/libmetrohash/CMakeLists.txt | 2 +- contrib/libmetrohash/LICENSE | 213 ++++++++++++-- contrib/libmetrohash/README.md | 32 ++ contrib/libmetrohash/VERSION | 9 +- contrib/libmetrohash/src/metrohash.h | 75 +---- contrib/libmetrohash/src/metrohash128.cpp | 275 ++++++++++++++++-- contrib/libmetrohash/src/metrohash128.h | 72 +++++ contrib/libmetrohash/src/metrohash128crc.cpp | 33 +-- contrib/libmetrohash/src/metrohash128crc.h | 27 ++ contrib/libmetrohash/src/metrohash64.cpp | 268 +++++++++++++++-- contrib/libmetrohash/src/metrohash64.h | 73 +++++ contrib/libmetrohash/src/platform.h | 50 ++++ contrib/libmetrohash/src/testvector.h | 31 +- dbms/src/Functions/CMakeLists.txt | 2 +- dbms/src/Interpreters/tests/CMakeLists.txt | 2 + .../Interpreters/tests/hash_map_string_3.cpp | 6 +- 17 files changed, 1002 insertions(+), 170 deletions(-) create mode 100644 contrib/libmetrohash/src/metrohash128.h create mode 100644 contrib/libmetrohash/src/metrohash128crc.h create mode 100644 contrib/libmetrohash/src/metrohash64.h create mode 100644 contrib/libmetrohash/src/platform.h diff --git a/cmake/Modules/Findmetrohash.cmake b/cmake/Modules/Findmetrohash.cmake index 9efc1ed2db8..c51665795bd 100644 --- a/cmake/Modules/Findmetrohash.cmake +++ b/cmake/Modules/Findmetrohash.cmake @@ -28,7 +28,7 @@ find_library(METROHASH_LIBRARIES find_path(METROHASH_INCLUDE_DIR NAMES metrohash.h - PATHS ${METROHASH_ROOT_DIR}/include ${METROHASH_INCLUDE_PATHS} + PATHS ${METROHASH_ROOT_DIR}/include PATH_SUFFIXES metrohash ${METROHASH_INCLUDE_PATHS} ) include(FindPackageHandleStandardArgs) diff --git a/contrib/libmetrohash/CMakeLists.txt b/contrib/libmetrohash/CMakeLists.txt index 2bd5628d0f8..d71a5432715 100644 --- a/contrib/libmetrohash/CMakeLists.txt +++ b/contrib/libmetrohash/CMakeLists.txt @@ -1,5 +1,5 @@ if (HAVE_SSE42) # Not used. Pretty easy to port. - set (SOURCES_SSE42_ONLY src/metrohash128crc.cpp) + set (SOURCES_SSE42_ONLY src/metrohash128crc.cpp src/metrohash128crc.h) endif () add_library(metrohash diff --git a/contrib/libmetrohash/LICENSE b/contrib/libmetrohash/LICENSE index 0765a504e62..261eeb9e9f8 100644 --- a/contrib/libmetrohash/LICENSE +++ b/contrib/libmetrohash/LICENSE @@ -1,22 +1,201 @@ -The MIT License (MIT) + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ -Copyright (c) 2015 J. Andrew Rogers + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: + 1. Definitions. -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/contrib/libmetrohash/README.md b/contrib/libmetrohash/README.md index a8851cdb2d8..2ac16b1437c 100644 --- a/contrib/libmetrohash/README.md +++ b/contrib/libmetrohash/README.md @@ -5,12 +5,44 @@ MetroHash is a set of state-of-the-art hash functions for *non-cryptographic* us * Fastest general-purpose functions for bulk hashing. * Fastest general-purpose functions for small, variable length keys. * Robust statistical bias profile, similar to the MD5 cryptographic hash. +* Hashes can be constructed incrementally (**new**) * 64-bit, 128-bit, and 128-bit CRC variants currently available. * Optimized for modern x86-64 microarchitectures. * Elegant, compact, readable functions. You can read more about the design and history [here](http://www.jandrewrogers.com/2015/05/27/metrohash/). +## News + +### 23 October 2018 + +The project has been re-licensed under Apache License v2.0. The purpose of this license change is consistency with the imminent release of MetroHash v2.0, which is also licensed under the Apache license. + +### 27 July 2015 + +Two new 64-bit and 128-bit algorithms add the ability to construct hashes incrementally. In addition to supporting incremental construction, the algorithms are slightly superior to the prior versions. + +A big change is that these new algorithms are implemented as C++ classes that support both incremental and stateless hashing. These classes also have a static method for verifying the implementation against the test vectors built into the classes. Implementations are now fully contained by their respective headers e.g. "metrohash128.h". + +*Note: an incremental version of the 128-bit CRC version is on its way but is not included in this push.* + +**Usage Example For Stateless Hashing** + +`MetroHash128::Hash(key, key_length, hash_ptr, seed)` + +**Usage Example For Incremental Hashing** + +`MetroHash128 hasher;` +`hasher.Update(partial_key, partial_key_length);` +`...` +`hasher.Update(partial_key, partial_key_length);` +`hasher.Finalize(hash_ptr);` + +An `Initialize(seed)` method allows the hasher objects to be reused. + + +### 27 May 2015 + Six hash functions have been included in the initial release: * 64-bit hash functions, "metrohash64_1" and "metrohash64_2" diff --git a/contrib/libmetrohash/VERSION b/contrib/libmetrohash/VERSION index 211ea847416..43012d2e31c 100644 --- a/contrib/libmetrohash/VERSION +++ b/contrib/libmetrohash/VERSION @@ -1,7 +1,4 @@ -origin: git@github.com:jandrewrogers/MetroHash.git -commit d9dee18a54a8a6766e24c1950b814ac7ca9d1a89 -Merge: 761e8a4 3d06b24 +origin: https://github.com/jandrewrogers/MetroHash.git +commit 690a521d9beb2e1050cc8f273fdabc13b31bf8f6 tag: v1.1.3 Author: J. Andrew Rogers -Date: Sat Jun 6 16:12:06 2015 -0700 - - modified README +Date: Tue Oct 23 09:49:53 2018 -0700 diff --git a/contrib/libmetrohash/src/metrohash.h b/contrib/libmetrohash/src/metrohash.h index 0d9b76c99cf..ffab03216b7 100644 --- a/contrib/libmetrohash/src/metrohash.h +++ b/contrib/libmetrohash/src/metrohash.h @@ -1,73 +1,24 @@ // metrohash.h // -// The MIT License (MIT) +// Copyright 2015-2018 J. Andrew Rogers // -// Copyright (c) 2015 J. Andrew Rogers +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at // -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #ifndef METROHASH_METROHASH_H #define METROHASH_METROHASH_H -#include -#include - -// MetroHash 64-bit hash functions -void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); -void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); - -// MetroHash 128-bit hash functions -void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); -void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); - -// MetroHash 128-bit hash functions using CRC instruction -void metrohash128crc_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); -void metrohash128crc_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); - - -/* rotate right idiom recognized by compiler*/ -inline static uint64_t rotate_right(uint64_t v, unsigned k) -{ - return (v >> k) | (v << (64 - k)); -} - -// unaligned reads, fast and safe on Nehalem and later microarchitectures -inline static uint64_t read_u64(const void * const ptr) -{ - return static_cast(*reinterpret_cast(ptr)); -} - -inline static uint64_t read_u32(const void * const ptr) -{ - return static_cast(*reinterpret_cast(ptr)); -} - -inline static uint64_t read_u16(const void * const ptr) -{ - return static_cast(*reinterpret_cast(ptr)); -} - -inline static uint64_t read_u8 (const void * const ptr) -{ - return static_cast(*reinterpret_cast(ptr)); -} - +#include "metrohash64.h" +#include "metrohash128.h" +#include "metrohash128crc.h" #endif // #ifndef METROHASH_METROHASH_H diff --git a/contrib/libmetrohash/src/metrohash128.cpp b/contrib/libmetrohash/src/metrohash128.cpp index 6370412046e..5c143db9cbe 100644 --- a/contrib/libmetrohash/src/metrohash128.cpp +++ b/contrib/libmetrohash/src/metrohash128.cpp @@ -1,29 +1,260 @@ // metrohash128.cpp // -// The MIT License (MIT) +// Copyright 2015-2018 J. Andrew Rogers // -// Copyright (c) 2015 J. Andrew Rogers +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at // -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include "platform.h" +#include "metrohash128.h" + +const char * MetroHash128::test_string = "012345678901234567890123456789012345678901234567890123456789012"; + +const uint8_t MetroHash128::test_seed_0[16] = { + 0xC7, 0x7C, 0xE2, 0xBF, 0xA4, 0xED, 0x9F, 0x9B, + 0x05, 0x48, 0xB2, 0xAC, 0x50, 0x74, 0xA2, 0x97 + }; + +const uint8_t MetroHash128::test_seed_1[16] = { + 0x45, 0xA3, 0xCD, 0xB8, 0x38, 0x19, 0x9D, 0x7F, + 0xBD, 0xD6, 0x8D, 0x86, 0x7A, 0x14, 0xEC, 0xEF + }; + + + +MetroHash128::MetroHash128(const uint64_t seed) +{ + Initialize(seed); +} + + +void MetroHash128::Initialize(const uint64_t seed) +{ + // initialize internal hash registers + state.v[0] = (static_cast(seed) - k0) * k3; + state.v[1] = (static_cast(seed) + k1) * k2; + state.v[2] = (static_cast(seed) + k0) * k2; + state.v[3] = (static_cast(seed) - k1) * k3; + + // initialize total length of input + bytes = 0; +} + + +void MetroHash128::Update(const uint8_t * const buffer, const uint64_t length) +{ + const uint8_t * ptr = reinterpret_cast(buffer); + const uint8_t * const end = ptr + length; + + // input buffer may be partially filled + if (bytes % 32) + { + uint64_t fill = 32 - (bytes % 32); + if (fill > length) + fill = length; + + memcpy(input.b + (bytes % 32), ptr, static_cast(fill)); + ptr += fill; + bytes += fill; + + // input buffer is still partially filled + if ((bytes % 32) != 0) return; + + // process full input buffer + state.v[0] += read_u64(&input.b[ 0]) * k0; state.v[0] = rotate_right(state.v[0],29) + state.v[2]; + state.v[1] += read_u64(&input.b[ 8]) * k1; state.v[1] = rotate_right(state.v[1],29) + state.v[3]; + state.v[2] += read_u64(&input.b[16]) * k2; state.v[2] = rotate_right(state.v[2],29) + state.v[0]; + state.v[3] += read_u64(&input.b[24]) * k3; state.v[3] = rotate_right(state.v[3],29) + state.v[1]; + } + + // bulk update + bytes += (end - ptr); + while (ptr <= (end - 32)) + { + // process directly from the source, bypassing the input buffer + state.v[0] += read_u64(ptr) * k0; ptr += 8; state.v[0] = rotate_right(state.v[0],29) + state.v[2]; + state.v[1] += read_u64(ptr) * k1; ptr += 8; state.v[1] = rotate_right(state.v[1],29) + state.v[3]; + state.v[2] += read_u64(ptr) * k2; ptr += 8; state.v[2] = rotate_right(state.v[2],29) + state.v[0]; + state.v[3] += read_u64(ptr) * k3; ptr += 8; state.v[3] = rotate_right(state.v[3],29) + state.v[1]; + } + + // store remaining bytes in input buffer + if (ptr < end) + memcpy(input.b, ptr, end - ptr); +} + + +void MetroHash128::Finalize(uint8_t * const hash) +{ + // finalize bulk loop, if used + if (bytes >= 32) + { + state.v[2] ^= rotate_right(((state.v[0] + state.v[3]) * k0) + state.v[1], 21) * k1; + state.v[3] ^= rotate_right(((state.v[1] + state.v[2]) * k1) + state.v[0], 21) * k0; + state.v[0] ^= rotate_right(((state.v[0] + state.v[2]) * k0) + state.v[3], 21) * k1; + state.v[1] ^= rotate_right(((state.v[1] + state.v[3]) * k1) + state.v[2], 21) * k0; + } + + // process any bytes remaining in the input buffer + const uint8_t * ptr = reinterpret_cast(input.b); + const uint8_t * const end = ptr + (bytes % 32); + + if ((end - ptr) >= 16) + { + state.v[0] += read_u64(ptr) * k2; ptr += 8; state.v[0] = rotate_right(state.v[0],33) * k3; + state.v[1] += read_u64(ptr) * k2; ptr += 8; state.v[1] = rotate_right(state.v[1],33) * k3; + state.v[0] ^= rotate_right((state.v[0] * k2) + state.v[1], 45) * k1; + state.v[1] ^= rotate_right((state.v[1] * k3) + state.v[0], 45) * k0; + } + + if ((end - ptr) >= 8) + { + state.v[0] += read_u64(ptr) * k2; ptr += 8; state.v[0] = rotate_right(state.v[0],33) * k3; + state.v[0] ^= rotate_right((state.v[0] * k2) + state.v[1], 27) * k1; + } + + if ((end - ptr) >= 4) + { + state.v[1] += read_u32(ptr) * k2; ptr += 4; state.v[1] = rotate_right(state.v[1],33) * k3; + state.v[1] ^= rotate_right((state.v[1] * k3) + state.v[0], 46) * k0; + } + + if ((end - ptr) >= 2) + { + state.v[0] += read_u16(ptr) * k2; ptr += 2; state.v[0] = rotate_right(state.v[0],33) * k3; + state.v[0] ^= rotate_right((state.v[0] * k2) + state.v[1], 22) * k1; + } + + if ((end - ptr) >= 1) + { + state.v[1] += read_u8 (ptr) * k2; state.v[1] = rotate_right(state.v[1],33) * k3; + state.v[1] ^= rotate_right((state.v[1] * k3) + state.v[0], 58) * k0; + } + + state.v[0] += rotate_right((state.v[0] * k0) + state.v[1], 13); + state.v[1] += rotate_right((state.v[1] * k1) + state.v[0], 37); + state.v[0] += rotate_right((state.v[0] * k2) + state.v[1], 13); + state.v[1] += rotate_right((state.v[1] * k3) + state.v[0], 37); + + bytes = 0; + + // do any endian conversion here + + memcpy(hash, state.v, 16); +} + + +void MetroHash128::Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed) +{ + const uint8_t * ptr = reinterpret_cast(buffer); + const uint8_t * const end = ptr + length; + + uint64_t v[4]; + + v[0] = (static_cast(seed) - k0) * k3; + v[1] = (static_cast(seed) + k1) * k2; + + if (length >= 32) + { + v[2] = (static_cast(seed) + k0) * k2; + v[3] = (static_cast(seed) - k1) * k3; + + do + { + v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; + v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3]; + v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0]; + v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1]; + } + while (ptr <= (end - 32)); + + v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 21) * k1; + v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 21) * k0; + v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 21) * k1; + v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 21) * k0; + } + + if ((end - ptr) >= 16) + { + v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],33) * k3; + v[1] += read_u64(ptr) * k2; ptr += 8; v[1] = rotate_right(v[1],33) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 45) * k1; + v[1] ^= rotate_right((v[1] * k3) + v[0], 45) * k0; + } + + if ((end - ptr) >= 8) + { + v[0] += read_u64(ptr) * k2; ptr += 8; v[0] = rotate_right(v[0],33) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 27) * k1; + } + + if ((end - ptr) >= 4) + { + v[1] += read_u32(ptr) * k2; ptr += 4; v[1] = rotate_right(v[1],33) * k3; + v[1] ^= rotate_right((v[1] * k3) + v[0], 46) * k0; + } + + if ((end - ptr) >= 2) + { + v[0] += read_u16(ptr) * k2; ptr += 2; v[0] = rotate_right(v[0],33) * k3; + v[0] ^= rotate_right((v[0] * k2) + v[1], 22) * k1; + } + + if ((end - ptr) >= 1) + { + v[1] += read_u8 (ptr) * k2; v[1] = rotate_right(v[1],33) * k3; + v[1] ^= rotate_right((v[1] * k3) + v[0], 58) * k0; + } + + v[0] += rotate_right((v[0] * k0) + v[1], 13); + v[1] += rotate_right((v[1] * k1) + v[0], 37); + v[0] += rotate_right((v[0] * k2) + v[1], 13); + v[1] += rotate_right((v[1] * k3) + v[0], 37); + + // do any endian conversion here + + memcpy(hash, v, 16); +} + + +bool MetroHash128::ImplementationVerified() +{ + uint8_t hash[16]; + const uint8_t * key = reinterpret_cast(MetroHash128::test_string); + + // verify one-shot implementation + MetroHash128::Hash(key, strlen(MetroHash128::test_string), hash, 0); + if (memcmp(hash, MetroHash128::test_seed_0, 16) != 0) return false; + + MetroHash128::Hash(key, strlen(MetroHash128::test_string), hash, 1); + if (memcmp(hash, MetroHash128::test_seed_1, 16) != 0) return false; + + // verify incremental implementation + MetroHash128 metro; + + metro.Initialize(0); + metro.Update(reinterpret_cast(MetroHash128::test_string), strlen(MetroHash128::test_string)); + metro.Finalize(hash); + if (memcmp(hash, MetroHash128::test_seed_0, 16) != 0) return false; + + metro.Initialize(1); + metro.Update(reinterpret_cast(MetroHash128::test_string), strlen(MetroHash128::test_string)); + metro.Finalize(hash); + if (memcmp(hash, MetroHash128::test_seed_1, 16) != 0) return false; + + return true; +} -#include "metrohash.h" void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) { @@ -97,6 +328,8 @@ void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * v[0] += rotate_right((v[0] * k2) + v[1], 13); v[1] += rotate_right((v[1] * k3) + v[0], 37); + // do any endian conversion here + memcpy(out, v, 16); } @@ -173,6 +406,8 @@ void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * v[0] += rotate_right((v[0] * k2) + v[1], 33); v[1] += rotate_right((v[1] * k3) + v[0], 33); + // do any endian conversion here + memcpy(out, v, 16); } diff --git a/contrib/libmetrohash/src/metrohash128.h b/contrib/libmetrohash/src/metrohash128.h new file mode 100644 index 00000000000..639a4fa97e3 --- /dev/null +++ b/contrib/libmetrohash/src/metrohash128.h @@ -0,0 +1,72 @@ +// metrohash128.h +// +// Copyright 2015-2018 J. Andrew Rogers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef METROHASH_METROHASH_128_H +#define METROHASH_METROHASH_128_H + +#include + +class MetroHash128 +{ +public: + static const uint32_t bits = 128; + + // Constructor initializes the same as Initialize() + MetroHash128(const uint64_t seed=0); + + // Initializes internal state for new hash with optional seed + void Initialize(const uint64_t seed=0); + + // Update the hash state with a string of bytes. If the length + // is sufficiently long, the implementation switches to a bulk + // hashing algorithm directly on the argument buffer for speed. + void Update(const uint8_t * buffer, const uint64_t length); + + // Constructs the final hash and writes it to the argument buffer. + // After a hash is finalized, this instance must be Initialized()-ed + // again or the behavior of Update() and Finalize() is undefined. + void Finalize(uint8_t * const hash); + + // A non-incremental function implementation. This can be significantly + // faster than the incremental implementation for some usage patterns. + static void Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed=0); + + // Does implementation correctly execute test vectors? + static bool ImplementationVerified(); + + // test vectors -- Hash(test_string, seed=0) => test_seed_0 + static const char * test_string; + static const uint8_t test_seed_0[16]; + static const uint8_t test_seed_1[16]; + +private: + static const uint64_t k0 = 0xC83A91E1; + static const uint64_t k1 = 0x8648DBDB; + static const uint64_t k2 = 0x7BDEC03B; + static const uint64_t k3 = 0x2F5870A5; + + struct { uint64_t v[4]; } state; + struct { uint8_t b[32]; } input; + uint64_t bytes; +}; + + +// Legacy 128-bit hash functions -- do not use +void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); +void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); + + +#endif // #ifndef METROHASH_METROHASH_128_H diff --git a/contrib/libmetrohash/src/metrohash128crc.cpp b/contrib/libmetrohash/src/metrohash128crc.cpp index c04cf5a6b23..775a9a944bf 100644 --- a/contrib/libmetrohash/src/metrohash128crc.cpp +++ b/contrib/libmetrohash/src/metrohash128crc.cpp @@ -1,31 +1,24 @@ // metrohash128crc.cpp // -// The MIT License (MIT) +// Copyright 2015-2018 J. Andrew Rogers // -// Copyright (c) 2015 J. Andrew Rogers +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at // -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. -#include "metrohash.h" #include +#include +#include "metrohash.h" +#include "platform.h" void metrohash128crc_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) diff --git a/contrib/libmetrohash/src/metrohash128crc.h b/contrib/libmetrohash/src/metrohash128crc.h new file mode 100644 index 00000000000..f151fd4200d --- /dev/null +++ b/contrib/libmetrohash/src/metrohash128crc.h @@ -0,0 +1,27 @@ +// metrohash128crc.h +// +// Copyright 2015-2018 J. Andrew Rogers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef METROHASH_METROHASH_128_CRC_H +#define METROHASH_METROHASH_128_CRC_H + +#include + +// Legacy 128-bit hash functions +void metrohash128crc_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); +void metrohash128crc_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); + + +#endif // #ifndef METROHASH_METROHASH_128_CRC_H diff --git a/contrib/libmetrohash/src/metrohash64.cpp b/contrib/libmetrohash/src/metrohash64.cpp index bc4b41eb8f2..7b5ec7f1a42 100644 --- a/contrib/libmetrohash/src/metrohash64.cpp +++ b/contrib/libmetrohash/src/metrohash64.cpp @@ -1,29 +1,257 @@ // metrohash64.cpp // -// The MIT License (MIT) +// Copyright 2015-2018 J. Andrew Rogers // -// Copyright (c) 2015 J. Andrew Rogers +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at // -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "platform.h" +#include "metrohash64.h" + +#include + +const char * MetroHash64::test_string = "012345678901234567890123456789012345678901234567890123456789012"; + +const uint8_t MetroHash64::test_seed_0[8] = { 0x6B, 0x75, 0x3D, 0xAE, 0x06, 0x70, 0x4B, 0xAD }; +const uint8_t MetroHash64::test_seed_1[8] = { 0x3B, 0x0D, 0x48, 0x1C, 0xF4, 0xB9, 0xB8, 0xDF }; + + + +MetroHash64::MetroHash64(const uint64_t seed) +{ + Initialize(seed); +} + + +void MetroHash64::Initialize(const uint64_t seed) +{ + vseed = (static_cast(seed) + k2) * k0; + + // initialize internal hash registers + state.v[0] = vseed; + state.v[1] = vseed; + state.v[2] = vseed; + state.v[3] = vseed; + + // initialize total length of input + bytes = 0; +} + + +void MetroHash64::Update(const uint8_t * const buffer, const uint64_t length) +{ + const uint8_t * ptr = reinterpret_cast(buffer); + const uint8_t * const end = ptr + length; + + // input buffer may be partially filled + if (bytes % 32) + { + uint64_t fill = 32 - (bytes % 32); + if (fill > length) + fill = length; + + memcpy(input.b + (bytes % 32), ptr, static_cast(fill)); + ptr += fill; + bytes += fill; + + // input buffer is still partially filled + if ((bytes % 32) != 0) return; + + // process full input buffer + state.v[0] += read_u64(&input.b[ 0]) * k0; state.v[0] = rotate_right(state.v[0],29) + state.v[2]; + state.v[1] += read_u64(&input.b[ 8]) * k1; state.v[1] = rotate_right(state.v[1],29) + state.v[3]; + state.v[2] += read_u64(&input.b[16]) * k2; state.v[2] = rotate_right(state.v[2],29) + state.v[0]; + state.v[3] += read_u64(&input.b[24]) * k3; state.v[3] = rotate_right(state.v[3],29) + state.v[1]; + } + + // bulk update + bytes += static_cast(end - ptr); + while (ptr <= (end - 32)) + { + // process directly from the source, bypassing the input buffer + state.v[0] += read_u64(ptr) * k0; ptr += 8; state.v[0] = rotate_right(state.v[0],29) + state.v[2]; + state.v[1] += read_u64(ptr) * k1; ptr += 8; state.v[1] = rotate_right(state.v[1],29) + state.v[3]; + state.v[2] += read_u64(ptr) * k2; ptr += 8; state.v[2] = rotate_right(state.v[2],29) + state.v[0]; + state.v[3] += read_u64(ptr) * k3; ptr += 8; state.v[3] = rotate_right(state.v[3],29) + state.v[1]; + } + + // store remaining bytes in input buffer + if (ptr < end) + memcpy(input.b, ptr, static_cast(end - ptr)); +} + + +void MetroHash64::Finalize(uint8_t * const hash) +{ + // finalize bulk loop, if used + if (bytes >= 32) + { + state.v[2] ^= rotate_right(((state.v[0] + state.v[3]) * k0) + state.v[1], 37) * k1; + state.v[3] ^= rotate_right(((state.v[1] + state.v[2]) * k1) + state.v[0], 37) * k0; + state.v[0] ^= rotate_right(((state.v[0] + state.v[2]) * k0) + state.v[3], 37) * k1; + state.v[1] ^= rotate_right(((state.v[1] + state.v[3]) * k1) + state.v[2], 37) * k0; + + state.v[0] = vseed + (state.v[0] ^ state.v[1]); + } + + // process any bytes remaining in the input buffer + const uint8_t * ptr = reinterpret_cast(input.b); + const uint8_t * const end = ptr + (bytes % 32); + + if ((end - ptr) >= 16) + { + state.v[1] = state.v[0] + (read_u64(ptr) * k2); ptr += 8; state.v[1] = rotate_right(state.v[1],29) * k3; + state.v[2] = state.v[0] + (read_u64(ptr) * k2); ptr += 8; state.v[2] = rotate_right(state.v[2],29) * k3; + state.v[1] ^= rotate_right(state.v[1] * k0, 21) + state.v[2]; + state.v[2] ^= rotate_right(state.v[2] * k3, 21) + state.v[1]; + state.v[0] += state.v[2]; + } + + if ((end - ptr) >= 8) + { + state.v[0] += read_u64(ptr) * k3; ptr += 8; + state.v[0] ^= rotate_right(state.v[0], 55) * k1; + } + + if ((end - ptr) >= 4) + { + state.v[0] += read_u32(ptr) * k3; ptr += 4; + state.v[0] ^= rotate_right(state.v[0], 26) * k1; + } + + if ((end - ptr) >= 2) + { + state.v[0] += read_u16(ptr) * k3; ptr += 2; + state.v[0] ^= rotate_right(state.v[0], 48) * k1; + } + + if ((end - ptr) >= 1) + { + state.v[0] += read_u8 (ptr) * k3; + state.v[0] ^= rotate_right(state.v[0], 37) * k1; + } + + state.v[0] ^= rotate_right(state.v[0], 28); + state.v[0] *= k0; + state.v[0] ^= rotate_right(state.v[0], 29); + + bytes = 0; + + // do any endian conversion here + + memcpy(hash, state.v, 8); +} + + +void MetroHash64::Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed) +{ + const uint8_t * ptr = reinterpret_cast(buffer); + const uint8_t * const end = ptr + length; + + uint64_t h = (static_cast(seed) + k2) * k0; + + if (length >= 32) + { + uint64_t v[4]; + v[0] = h; + v[1] = h; + v[2] = h; + v[3] = h; + + do + { + v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; + v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3]; + v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0]; + v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1]; + } + while (ptr <= (end - 32)); + + v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 37) * k1; + v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 37) * k0; + v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 37) * k1; + v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 37) * k0; + h += v[0] ^ v[1]; + } + + if ((end - ptr) >= 16) + { + uint64_t v0 = h + (read_u64(ptr) * k2); ptr += 8; v0 = rotate_right(v0,29) * k3; + uint64_t v1 = h + (read_u64(ptr) * k2); ptr += 8; v1 = rotate_right(v1,29) * k3; + v0 ^= rotate_right(v0 * k0, 21) + v1; + v1 ^= rotate_right(v1 * k3, 21) + v0; + h += v1; + } + + if ((end - ptr) >= 8) + { + h += read_u64(ptr) * k3; ptr += 8; + h ^= rotate_right(h, 55) * k1; + } + + if ((end - ptr) >= 4) + { + h += read_u32(ptr) * k3; ptr += 4; + h ^= rotate_right(h, 26) * k1; + } + + if ((end - ptr) >= 2) + { + h += read_u16(ptr) * k3; ptr += 2; + h ^= rotate_right(h, 48) * k1; + } + + if ((end - ptr) >= 1) + { + h += read_u8 (ptr) * k3; + h ^= rotate_right(h, 37) * k1; + } + + h ^= rotate_right(h, 28); + h *= k0; + h ^= rotate_right(h, 29); + + memcpy(hash, &h, 8); +} + + +bool MetroHash64::ImplementationVerified() +{ + uint8_t hash[8]; + const uint8_t * key = reinterpret_cast(MetroHash64::test_string); + + // verify one-shot implementation + MetroHash64::Hash(key, strlen(MetroHash64::test_string), hash, 0); + if (memcmp(hash, MetroHash64::test_seed_0, 8) != 0) return false; + + MetroHash64::Hash(key, strlen(MetroHash64::test_string), hash, 1); + if (memcmp(hash, MetroHash64::test_seed_1, 8) != 0) return false; + + // verify incremental implementation + MetroHash64 metro; + + metro.Initialize(0); + metro.Update(reinterpret_cast(MetroHash64::test_string), strlen(MetroHash64::test_string)); + metro.Finalize(hash); + if (memcmp(hash, MetroHash64::test_seed_0, 8) != 0) return false; + + metro.Initialize(1); + metro.Update(reinterpret_cast(MetroHash64::test_string), strlen(MetroHash64::test_string)); + metro.Finalize(hash); + if (memcmp(hash, MetroHash64::test_seed_1, 8) != 0) return false; + + return true; +} -#include "metrohash.h" void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out) { diff --git a/contrib/libmetrohash/src/metrohash64.h b/contrib/libmetrohash/src/metrohash64.h new file mode 100644 index 00000000000..d58898b117d --- /dev/null +++ b/contrib/libmetrohash/src/metrohash64.h @@ -0,0 +1,73 @@ +// metrohash64.h +// +// Copyright 2015-2018 J. Andrew Rogers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef METROHASH_METROHASH_64_H +#define METROHASH_METROHASH_64_H + +#include + +class MetroHash64 +{ +public: + static const uint32_t bits = 64; + + // Constructor initializes the same as Initialize() + MetroHash64(const uint64_t seed=0); + + // Initializes internal state for new hash with optional seed + void Initialize(const uint64_t seed=0); + + // Update the hash state with a string of bytes. If the length + // is sufficiently long, the implementation switches to a bulk + // hashing algorithm directly on the argument buffer for speed. + void Update(const uint8_t * buffer, const uint64_t length); + + // Constructs the final hash and writes it to the argument buffer. + // After a hash is finalized, this instance must be Initialized()-ed + // again or the behavior of Update() and Finalize() is undefined. + void Finalize(uint8_t * const hash); + + // A non-incremental function implementation. This can be significantly + // faster than the incremental implementation for some usage patterns. + static void Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed=0); + + // Does implementation correctly execute test vectors? + static bool ImplementationVerified(); + + // test vectors -- Hash(test_string, seed=0) => test_seed_0 + static const char * test_string; + static const uint8_t test_seed_0[8]; + static const uint8_t test_seed_1[8]; + +private: + static const uint64_t k0 = 0xD6D018F5; + static const uint64_t k1 = 0xA2AA033B; + static const uint64_t k2 = 0x62992FC1; + static const uint64_t k3 = 0x30BC5B29; + + struct { uint64_t v[4]; } state; + struct { uint8_t b[32]; } input; + uint64_t bytes; + uint64_t vseed; +}; + + +// Legacy 64-bit hash functions -- do not use +void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); +void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); + + +#endif // #ifndef METROHASH_METROHASH_64_H diff --git a/contrib/libmetrohash/src/platform.h b/contrib/libmetrohash/src/platform.h new file mode 100644 index 00000000000..31291b94b33 --- /dev/null +++ b/contrib/libmetrohash/src/platform.h @@ -0,0 +1,50 @@ +// platform.h +// +// Copyright 2015-2018 J. Andrew Rogers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef METROHASH_PLATFORM_H +#define METROHASH_PLATFORM_H + +#include + +// rotate right idiom recognized by most compilers +inline static uint64_t rotate_right(uint64_t v, unsigned k) +{ + return (v >> k) | (v << (64 - k)); +} + +// unaligned reads, fast and safe on Nehalem and later microarchitectures +inline static uint64_t read_u64(const void * const ptr) +{ + return static_cast(*reinterpret_cast(ptr)); +} + +inline static uint64_t read_u32(const void * const ptr) +{ + return static_cast(*reinterpret_cast(ptr)); +} + +inline static uint64_t read_u16(const void * const ptr) +{ + return static_cast(*reinterpret_cast(ptr)); +} + +inline static uint64_t read_u8 (const void * const ptr) +{ + return static_cast(*reinterpret_cast(ptr)); +} + + +#endif // #ifndef METROHASH_PLATFORM_H diff --git a/contrib/libmetrohash/src/testvector.h b/contrib/libmetrohash/src/testvector.h index 8c7967453e9..e4006182e4f 100644 --- a/contrib/libmetrohash/src/testvector.h +++ b/contrib/libmetrohash/src/testvector.h @@ -1,27 +1,18 @@ // testvector.h // -// The MIT License (MIT) +// Copyright 2015-2018 J. Andrew Rogers // -// Copyright (c) 2015 J. Andrew Rogers +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at // -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in all -// copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -// SOFTWARE. +// http://www.apache.org/licenses/LICENSE-2.0 // +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. #ifndef METROHASH_TESTVECTOR_H #define METROHASH_TESTVECTOR_H @@ -46,6 +37,8 @@ struct TestVectorData static const char * test_key_63 = "012345678901234567890123456789012345678901234567890123456789012"; +// The hash assumes a little-endian architecture. Treating the hash results +// as an array of uint64_t should enable conversion for big-endian implementations. const TestVectorData TestVector [] = { // seed = 0 diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index f210800d279..47e059ba93a 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -23,7 +23,7 @@ target_link_libraries(clickhouse_functions ${OPENSSL_CRYPTO_LIBRARY} ${LZ4_LIBRARY}) -target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR}) +target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR} ${METROHASH_INCLUDE_DIR}) if (CONSISTENT_HASHING_INCLUDE_DIR) target_include_directories (clickhouse_functions PRIVATE ${CONSISTENT_HASHING_INCLUDE_DIR}) diff --git a/dbms/src/Interpreters/tests/CMakeLists.txt b/dbms/src/Interpreters/tests/CMakeLists.txt index 2f814c5a6a0..fa98c3529a1 100644 --- a/dbms/src/Interpreters/tests/CMakeLists.txt +++ b/dbms/src/Interpreters/tests/CMakeLists.txt @@ -15,6 +15,7 @@ target_include_directories (hash_map SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_ target_link_libraries (hash_map PRIVATE dbms clickhouse_compression) add_executable (hash_map3 hash_map3.cpp) +target_include_directories(hash_map3 SYSTEM BEFORE PRIVATE ${METROHASH_INCLUDE_DIR}) target_link_libraries (hash_map3 PRIVATE dbms ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES}) add_executable (hash_map_string hash_map_string.cpp) @@ -25,6 +26,7 @@ add_executable (hash_map_string_2 hash_map_string_2.cpp) target_link_libraries (hash_map_string_2 PRIVATE dbms clickhouse_compression) add_executable (hash_map_string_3 hash_map_string_3.cpp) +target_include_directories(hash_map_string_3 SYSTEM BEFORE PRIVATE ${METROHASH_INCLUDE_DIR}) target_link_libraries (hash_map_string_3 PRIVATE dbms clickhouse_compression ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES}) add_executable (hash_map_string_small hash_map_string_small.cpp) diff --git a/dbms/src/Interpreters/tests/hash_map_string_3.cpp b/dbms/src/Interpreters/tests/hash_map_string_3.cpp index 5d8d8e4f7c8..f58d79d0db7 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_3.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_3.cpp @@ -325,7 +325,7 @@ struct FarmHash64 template -struct MetroHash64 +struct SMetroHash64 { size_t operator() (StringRef x) const { @@ -507,8 +507,8 @@ int main(int argc, char ** argv) if (!m || m == 8) bench (data, "StringRef_VerySimpleHash"); if (!m || m == 9) bench (data, "StringRef_FarmHash64"); - if (!m || m == 10) bench>(data, "StringRef_MetroHash64_1"); - if (!m || m == 11) bench>(data, "StringRef_MetroHash64_2"); + if (!m || m == 10) bench>(data, "StringRef_MetroHash64_1"); + if (!m || m == 11) bench>(data, "StringRef_MetroHash64_2"); return 0; }