add avx512 support for mergetreereader

This commit is contained in:
yaqi-zhao 2022-05-27 13:54:11 -04:00
parent eb1953f91e
commit a2857491c4
3 changed files with 33 additions and 0 deletions

View File

@ -16,6 +16,8 @@ UInt32 getSupportedArchs()
result |= static_cast<UInt32>(TargetArch::AVX2);
if (Cpu::CpuFlagsCache::have_AVX512F)
result |= static_cast<UInt32>(TargetArch::AVX512F);
if (Cpu::CpuFlagsCache::have_AVX512BW)
result |= static_cast<UInt32>(TargetArch::AVX512BW);
return result;
}
@ -34,6 +36,7 @@ String toString(TargetArch arch)
case TargetArch::AVX: return "avx";
case TargetArch::AVX2: return "avx2";
case TargetArch::AVX512F: return "avx512f";
case TargetArch::AVX512BW: return "avx512bw";
}
__builtin_unreachable();

View File

@ -80,6 +80,7 @@ enum class TargetArch : UInt32
AVX = (1 << 1),
AVX2 = (1 << 2),
AVX512F = (1 << 3),
AVX512BW = (1 << 4),
};
/// Runtime detection.

View File

@ -2,6 +2,7 @@
#include <Columns/FilterDescription.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnsCommon.h>
#include <Common/TargetSpecific.h>
#include <base/range.h>
#include <Interpreters/castColumn.h>
#include <DataTypes/DataTypeNothing.h>
@ -10,6 +11,7 @@
#include <emmintrin.h>
#endif
namespace DB
{
namespace ErrorCodes
@ -449,6 +451,33 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con
{
size_t count = 0;
#if defined(__AVX512F__) && defined(__AVX512BW__) /// check if avx512 instructions are compiled
if (isArchSupported(TargetArch::AVX512BW)) {
/// check if cpu support avx512 dynamically, haveAVX512BW contains check of haveAVX512F
const __m512i zero64 = _mm512_setzero_epi32();
while (end - begin >= 64)
{
end -= 64;
const auto * pos = end;
UInt64 val = static_cast<UInt64>(_mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast<const __m512i *>(pos)), zero64, _MM_CMPINT_EQ));
val = ~val;
if (val == 0)
{
count += 64;
} else
{
count += __builtin_clzll(val);
return count;
}
}
while (end > begin && *(--end) == 0)
{
++count;
}
return count;
}
#endif
#if defined(__SSE2__) && defined(__POPCNT__)
const __m128i zero16 = _mm_setzero_si128();
while (end - begin >= 64)