diff --git a/contrib/murmurhash/src/MurmurHash2.cpp b/contrib/murmurhash/src/MurmurHash2.cpp index 1c4469b0a02..0bd0a352dc4 100644 --- a/contrib/murmurhash/src/MurmurHash2.cpp +++ b/contrib/murmurhash/src/MurmurHash2.cpp @@ -31,6 +31,40 @@ #define BIG_CONSTANT(x) (x##LLU) #endif // !defined(_MSC_VER) +// +//----------------------------------------------------------------------------- +// Block read - on little-endian machines this is a single load, +// while on big-endian or unknown machines the byte accesses should +// still get optimized into the most efficient instruction. +static inline uint32_t getblock ( const uint32_t * p ) +{ +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + return *p; +#else + const uint8_t *c = (const uint8_t *)p; + return (uint32_t)c[0] | + (uint32_t)c[1] << 8 | + (uint32_t)c[2] << 16 | + (uint32_t)c[3] << 24; +#endif +} + +static inline uint64_t getblock ( const uint64_t * p ) +{ +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + return *p; +#else + const uint8_t *c = (const uint8_t *)p; + return (uint64_t)c[0] | + (uint64_t)c[1] << 8 | + (uint64_t)c[2] << 16 | + (uint64_t)c[3] << 24 | + (uint64_t)c[4] << 32 | + (uint64_t)c[5] << 40 | + (uint64_t)c[6] << 48 | + (uint64_t)c[7] << 56; +#endif +} //----------------------------------------------------------------------------- @@ -52,7 +86,7 @@ uint32_t MurmurHash2 ( const void * key, size_t len, uint32_t seed ) while(len >= 4) { - uint32_t k = *(uint32_t*)data; + uint32_t k = getblock((const uint32_t *)data); k *= m; k ^= k >> r; @@ -105,7 +139,7 @@ uint64_t MurmurHash64A ( const void * key, size_t len, uint64_t seed ) while(data != end) { - uint64_t k = *data++; + uint64_t k = getblock(data++); k *= m; k ^= k >> r; @@ -151,12 +185,12 @@ uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed ) while(len >= 8) { - uint32_t k1 = *data++; + uint32_t k1 = getblock(data++); k1 *= m; k1 ^= k1 >> r; k1 *= m; h1 *= m; h1 ^= k1; len -= 4; - uint32_t k2 = *data++; + uint32_t k2 = getblock(data++); k2 *= m; k2 ^= k2 >> r; k2 *= m; h2 *= m; h2 ^= k2; len -= 4; @@ -164,7 +198,7 @@ uint64_t MurmurHash64B ( const void * key, size_t len, uint64_t seed ) if(len >= 4) { - uint32_t k1 = *data++; + uint32_t k1 = getblock(data++); k1 *= m; k1 ^= k1 >> r; k1 *= m; h1 *= m; h1 ^= k1; len -= 4; @@ -215,7 +249,7 @@ uint32_t MurmurHash2A ( const void * key, size_t len, uint32_t seed ) while(len >= 4) { - uint32_t k = *(uint32_t*)data; + uint32_t k = getblock((const uint32_t *)data); mmix(h,k); @@ -278,7 +312,7 @@ public: while(len >= 4) { - uint32_t k = *(uint32_t*)data; + uint32_t k = getblock((const uint32_t *)data); mmix(m_hash,k); @@ -427,7 +461,7 @@ uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed ) while(len >= 4) { - d = *(uint32_t *)data; + d = getblock((const uint32_t *)data); t = (t >> sr) | (d << sl); uint32_t k = t; @@ -492,7 +526,7 @@ uint32_t MurmurHashAligned2 ( const void * key, size_t len, uint32_t seed ) { while(len >= 4) { - uint32_t k = *(uint32_t *)data; + uint32_t k = getblock((const uint32_t *)data); MIX(h,k,m); diff --git a/contrib/murmurhash/src/MurmurHash3.cpp b/contrib/murmurhash/src/MurmurHash3.cpp index cf5158e97ad..6573c470be3 100644 --- a/contrib/murmurhash/src/MurmurHash3.cpp +++ b/contrib/murmurhash/src/MurmurHash3.cpp @@ -55,14 +55,32 @@ inline uint64_t rotl64 ( uint64_t x, int8_t r ) FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i ) { - uint32_t res; - memcpy(&res, p + i, sizeof(res)); - return res; +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + return p[i]; +#else + const uint8_t *c = (const uint8_t *)&p[i]; + return (uint32_t)c[0] | + (uint32_t)c[1] << 8 | + (uint32_t)c[2] << 16 | + (uint32_t)c[3] << 24; +#endif } FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i ) { +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) return p[i]; +#else + const uint8_t *c = (const uint8_t *)&p[i]; + return (uint64_t)c[0] | + (uint64_t)c[1] << 8 | + (uint64_t)c[2] << 16 | + (uint64_t)c[3] << 24 | + (uint64_t)c[4] << 32 | + (uint64_t)c[5] << 40 | + (uint64_t)c[6] << 48 | + (uint64_t)c[7] << 56; +#endif } //----------------------------------------------------------------------------- @@ -329,9 +347,13 @@ void MurmurHash3_x64_128 ( const void * key, const size_t len, h1 += h2; h2 += h1; - +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) ((uint64_t*)out)[0] = h1; ((uint64_t*)out)[1] = h2; +#else + ((uint64_t*)out)[0] = h2; + ((uint64_t*)out)[1] = h1; +#endif } //----------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/00678_murmurhash.reference b/tests/queries/0_stateless/00678_murmurhash.reference index 988c022f1bf..fb4a00ba046 100644 --- a/tests/queries/0_stateless/00678_murmurhash.reference +++ b/tests/queries/0_stateless/00678_murmurhash.reference @@ -25,5 +25,5 @@ 9631199822919835226 4334672815104069193 4334672815104069193 -6145F501578671E2877DBA2BE487AF7E -16FE7483905CCE7A85670E43E4678877 +1 +1 diff --git a/tests/queries/0_stateless/00678_murmurhash.sql b/tests/queries/0_stateless/00678_murmurhash.sql index 705c62480a0..eda29fd17cd 100644 --- a/tests/queries/0_stateless/00678_murmurhash.sql +++ b/tests/queries/0_stateless/00678_murmurhash.sql @@ -32,5 +32,7 @@ SELECT gccMurmurHash('foo'); SELECT gccMurmurHash('\x01'); SELECT gccMurmurHash(1); -SELECT hex(murmurHash3_128('foo')); -SELECT hex(murmurHash3_128('\x01')); +-- Comparison with reverse for big endian +SELECT hex(murmurHash3_128('foo')) = hex(reverse(unhex('6145F501578671E2877DBA2BE487AF7E'))) or hex(murmurHash3_128('foo')) = '6145F501578671E2877DBA2BE487AF7E'; +-- Comparison with reverse for big endian +SELECT hex(murmurHash3_128('\x01')) = hex(reverse(unhex('16FE7483905CCE7A85670E43E4678877'))) or hex(murmurHash3_128('\x01')) = '16FE7483905CCE7A85670E43E4678877';