From a2857491c42d560e882debe6aa2f5563079a7841 Mon Sep 17 00:00:00 2001 From: yaqi-zhao Date: Fri, 27 May 2022 13:54:11 -0400 Subject: [PATCH] add avx512 support for mergetreereader --- src/Common/TargetSpecific.cpp | 3 ++ src/Common/TargetSpecific.h | 1 + .../MergeTree/MergeTreeRangeReader.cpp | 29 +++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/src/Common/TargetSpecific.cpp b/src/Common/TargetSpecific.cpp index 43319eff44b..369c21490d4 100644 --- a/src/Common/TargetSpecific.cpp +++ b/src/Common/TargetSpecific.cpp @@ -16,6 +16,8 @@ UInt32 getSupportedArchs() result |= static_cast(TargetArch::AVX2); if (Cpu::CpuFlagsCache::have_AVX512F) result |= static_cast(TargetArch::AVX512F); + if (Cpu::CpuFlagsCache::have_AVX512BW) + result |= static_cast(TargetArch::AVX512BW); return result; } @@ -34,6 +36,7 @@ String toString(TargetArch arch) case TargetArch::AVX: return "avx"; case TargetArch::AVX2: return "avx2"; case TargetArch::AVX512F: return "avx512f"; + case TargetArch::AVX512BW: return "avx512bw"; } __builtin_unreachable(); diff --git a/src/Common/TargetSpecific.h b/src/Common/TargetSpecific.h index d7fa55fbb08..522dd6e43c3 100644 --- a/src/Common/TargetSpecific.h +++ b/src/Common/TargetSpecific.h @@ -80,6 +80,7 @@ enum class TargetArch : UInt32 AVX = (1 << 1), AVX2 = (1 << 2), AVX512F = (1 << 3), + AVX512BW = (1 << 4), }; /// Runtime detection. diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index d8dba458203..84a1ab91906 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -10,6 +11,7 @@ #include #endif + namespace DB { namespace ErrorCodes @@ -449,6 +451,33 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con { size_t count = 0; +#if defined(__AVX512F__) && defined(__AVX512BW__) /// check if avx512 instructions are compiled + if (isArchSupported(TargetArch::AVX512BW)) { + /// check if cpu support avx512 dynamically, haveAVX512BW contains check of haveAVX512F + const __m512i zero64 = _mm512_setzero_epi32(); + while (end - begin >= 64) + { + end -= 64; + const auto * pos = end; + UInt64 val = static_cast(_mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast(pos)), zero64, _MM_CMPINT_EQ)); + val = ~val; + if (val == 0) + { + count += 64; + } else + { + count += __builtin_clzll(val); + return count; + } + } + while (end > begin && *(--end) == 0) + { + ++count; + } + return count; + } +#endif + #if defined(__SSE2__) && defined(__POPCNT__) const __m128i zero16 = _mm_setzero_si128(); while (end - begin >= 64)