From d6fb682f711c9e60d3346f8525fc66772e7dff03 Mon Sep 17 00:00:00 2001 From: Wangyang Guo Date: Mon, 25 Jul 2022 15:55:39 +0800 Subject: [PATCH 1/2] Columns: remove static declaration to zero registers Static declaration will enforce an extra mem load. But zeroing register does not need it (like: vpxor zmm, zmm, zmm). --- src/Columns/ColumnsCommon.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Columns/ColumnsCommon.h b/src/Columns/ColumnsCommon.h index 1e5849e2b88..20ec5872b93 100644 --- a/src/Columns/ColumnsCommon.h +++ b/src/Columns/ColumnsCommon.h @@ -27,17 +27,17 @@ namespace ErrorCodes inline UInt64 bytes64MaskToBits64Mask(const UInt8 * bytes64) { #if defined(__AVX512F__) && defined(__AVX512BW__) - static const __m512i zero64 = _mm512_setzero_epi32(); + const __m512i zero64 = _mm512_setzero_epi32(); UInt64 res = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast(bytes64)), zero64, _MM_CMPINT_EQ); #elif defined(__AVX__) && defined(__AVX2__) - static const __m256i zero32 = _mm256_setzero_si256(); + const __m256i zero32 = _mm256_setzero_si256(); UInt64 res = (static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi8( _mm256_loadu_si256(reinterpret_cast(bytes64)), zero32))) & 0xffffffff) | (static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi8( _mm256_loadu_si256(reinterpret_cast(bytes64+32)), zero32))) << 32); #elif defined(__SSE2__) && defined(__POPCNT__) - static const __m128i zero16 = _mm_setzero_si128(); + const __m128i zero16 = _mm_setzero_si128(); UInt64 res = (static_cast(_mm_movemask_epi8(_mm_cmpeq_epi8( _mm_loadu_si128(reinterpret_cast(bytes64)), zero16))) & 0xffff) From be54d0b954cdc974b43ab4e284647cfc9046566f Mon Sep 17 00:00:00 2001 From: Wangyang Guo Date: Mon, 25 Jul 2022 16:12:16 +0800 Subject: [PATCH 2/2] Columns: use AVX512BW vptestnmb to get mask --- src/Columns/ColumnsCommon.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Columns/ColumnsCommon.h b/src/Columns/ColumnsCommon.h index 20ec5872b93..8a1baa1430a 100644 --- a/src/Columns/ColumnsCommon.h +++ b/src/Columns/ColumnsCommon.h @@ -27,8 +27,8 @@ namespace ErrorCodes inline UInt64 bytes64MaskToBits64Mask(const UInt8 * bytes64) { #if defined(__AVX512F__) && defined(__AVX512BW__) - const __m512i zero64 = _mm512_setzero_epi32(); - UInt64 res = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast(bytes64)), zero64, _MM_CMPINT_EQ); + const __m512i vbytes = _mm512_loadu_si512(reinterpret_cast(bytes64)); + UInt64 res = _mm512_testn_epi8_mask(vbytes, vbytes); #elif defined(__AVX__) && defined(__AVX2__) const __m256i zero32 = _mm256_setzero_si256(); UInt64 res =