From 28d5c3cf7f0bc6b340791bdd08cea4b2987002cd Mon Sep 17 00:00:00 2001 From: MeenaRenganathan22 Date: Wed, 11 Jan 2023 17:00:10 -0800 Subject: [PATCH] Addressed the review comments --- contrib/crc32-vpmsum-cmake/CMakeLists.txt | 4 +++- contrib/crc32-vpmsum-cmake/README.md | 3 ++- contrib/crc32-vpmsum-cmake/vec_crc32.h | 17 +++++++---------- src/Common/HashTable/Hash.h | 8 ++++---- src/Functions/FunctionsStringHash.cpp | 16 ++++++++-------- src/Functions/FunctionsStringSimilarity.cpp | 4 ++-- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/contrib/crc32-vpmsum-cmake/CMakeLists.txt b/contrib/crc32-vpmsum-cmake/CMakeLists.txt index bb7d5618410..66fe6690e01 100644 --- a/contrib/crc32-vpmsum-cmake/CMakeLists.txt +++ b/contrib/crc32-vpmsum-cmake/CMakeLists.txt @@ -1,5 +1,7 @@ +# module crc32-vpmsum gets build along with the files vec_crc32.h and crc32_constants.h in crc32-vpmsum-cmake +# Please see README.md for information about how to generate crc32_constants.h if (NOT ARCH_PPC64LE) - message(STATUS "crc32-vpmsum library is only supported on ppc64le") + message (STATUS "crc32-vpmsum library is only supported on ppc64le") return() endif() diff --git a/contrib/crc32-vpmsum-cmake/README.md b/contrib/crc32-vpmsum-cmake/README.md index 9ea8133e331..cab4cdc5501 100644 --- a/contrib/crc32-vpmsum-cmake/README.md +++ b/contrib/crc32-vpmsum-cmake/README.md @@ -1,8 +1,9 @@ # To Generate crc32_constants.h -- Run make file in `../crc32-vpmsum` diretory using folling options and CRC polynomial. These options should use the same polynomial and order used by intel intrinisic functions +- Run make file in `../crc32-vpmsum` directory using following options and CRC polynomial. These options should use the same polynomial and order used by intel intrinisic functions ```bash make crc32_constants.h CRC="0x11EDC6F41" OPTIONS="-x -r -c" ``` - move the generated `crc32_constants.h` into this directory - To understand more about this go here: https://masterchef2209.wordpress.com/2020/06/17/guide-to-intel-sse4-2-crc-intrinisics-implementation-for-simde/ +- Here is the link to information about intel intrinsic functions: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u64&ig_expand=1492,1493,1559 diff --git a/contrib/crc32-vpmsum-cmake/vec_crc32.h b/contrib/crc32-vpmsum-cmake/vec_crc32.h index 0ef13616b34..2280acec48c 100644 --- a/contrib/crc32-vpmsum-cmake/vec_crc32.h +++ b/contrib/crc32-vpmsum-cmake/vec_crc32.h @@ -1,6 +1,9 @@ #ifndef VEC_CRC32 #define VEC_CRC32 +#if ! ((defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# error PowerPC architecture is expected +#endif #ifdef __cplusplus extern "C" { @@ -10,16 +13,10 @@ unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned lon static inline uint32_t crc32_ppc(uint64_t crc, unsigned char const *buffer, size_t len) { - unsigned char *emptybuffer; - if (!buffer) { - emptybuffer = (unsigned char *)malloc(len); - bzero(emptybuffer, len); - crc = crc32_vpmsum(crc, emptybuffer, len); - free(emptybuffer); - } else { - crc = crc32_vpmsum(crc, buffer, (unsigned long)len); - } - return crc; + assert(buffer); + crc = crc32_vpmsum(crc, buffer, (unsigned long)len); + + return crc; } #ifdef __cplusplus diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h index c7342d061d8..acac8eeccb2 100644 --- a/src/Common/HashTable/Hash.h +++ b/src/Common/HashTable/Hash.h @@ -91,10 +91,10 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x) return _mm_crc32_u64(-1ULL, x); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) return __crc32cd(-1U, x); -#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return s390x_crc32(-1U, x) #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return crc32_ppc(-1U, reinterpret_cast(&x), sizeof(x)); +#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return s390x_crc32(-1U, x) #else /// On other platforms we do not have CRC32. NOTE This can be confusing. /// NOTE: consider using intHash32() @@ -107,10 +107,10 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value) return _mm_crc32_u64(updated_value, x); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) return __crc32cd(static_cast(updated_value), x); -#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ - return s390x_crc32(updated_value, x); #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return crc32_ppc(updated_value, reinterpret_cast(&x), sizeof(x)); +#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ + return s390x_crc32(updated_value, x); #else /// On other platforms we do not have CRC32. NOTE This can be confusing. return intHash64(x) ^ updated_value; diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp index bf0b7463a5d..ea861b7e657 100644 --- a/src/Functions/FunctionsStringHash.cpp +++ b/src/Functions/FunctionsStringHash.cpp @@ -40,10 +40,10 @@ struct Hash return _mm_crc32_u64(crc, val); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) return __crc32cd(static_cast(crc), val); -#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ - return s390x_crc32(crc, val); #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); +#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ + return s390x_crc32(crc, val); #else throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED); #endif @@ -55,10 +55,10 @@ struct Hash return _mm_crc32_u32(crc, val); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) return __crc32cw(crc, val); -#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ - return s390x_crc32_u32(crc, val); #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); +#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ + return s390x_crc32_u32(crc, val); #else throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED); #endif @@ -70,10 +70,10 @@ struct Hash return _mm_crc32_u16(crc, val); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) return __crc32ch(crc, val); -#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ - return s390x_crc32_u16(crc, val); #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); +#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ + return s390x_crc32_u16(crc, val); #else throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED); #endif @@ -85,10 +85,10 @@ struct Hash return _mm_crc32_u8(crc, val); #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) return __crc32cb(crc, val); -#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ - return s390x_crc32_u8(crc, val); #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); +#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ + return s390x_crc32_u8(crc, val); #else throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED); #endif diff --git a/src/Functions/FunctionsStringSimilarity.cpp b/src/Functions/FunctionsStringSimilarity.cpp index 87aa0f4b3f7..0cc0248baf4 100644 --- a/src/Functions/FunctionsStringSimilarity.cpp +++ b/src/Functions/FunctionsStringSimilarity.cpp @@ -74,10 +74,10 @@ struct NgramDistanceImpl return _mm_crc32_u64(code_points[2], combined) & 0xFFFFu; #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) return __crc32cd(code_points[2], combined) & 0xFFFFu; -#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return s390x_crc32(code_points[2], combined) & 0xFFFFu; #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ return crc32_ppc(code_points[2], reinterpret_cast(&combined), sizeof(combined)) & 0xFFFFu; +#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return s390x_crc32(code_points[2], combined) & 0xFFFFu; #else return (intHashCRC32(combined) ^ intHashCRC32(code_points[2])) & 0xFFFFu; #endif