mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-19 06:01:57 +00:00
add avx512 support for mergetreereader
This commit is contained in:
parent
eb1953f91e
commit
a2857491c4
@ -16,6 +16,8 @@ UInt32 getSupportedArchs()
|
||||
result |= static_cast<UInt32>(TargetArch::AVX2);
|
||||
if (Cpu::CpuFlagsCache::have_AVX512F)
|
||||
result |= static_cast<UInt32>(TargetArch::AVX512F);
|
||||
if (Cpu::CpuFlagsCache::have_AVX512BW)
|
||||
result |= static_cast<UInt32>(TargetArch::AVX512BW);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -34,6 +36,7 @@ String toString(TargetArch arch)
|
||||
case TargetArch::AVX: return "avx";
|
||||
case TargetArch::AVX2: return "avx2";
|
||||
case TargetArch::AVX512F: return "avx512f";
|
||||
case TargetArch::AVX512BW: return "avx512bw";
|
||||
}
|
||||
|
||||
__builtin_unreachable();
|
||||
|
@ -80,6 +80,7 @@ enum class TargetArch : UInt32
|
||||
AVX = (1 << 1),
|
||||
AVX2 = (1 << 2),
|
||||
AVX512F = (1 << 3),
|
||||
AVX512BW = (1 << 4),
|
||||
};
|
||||
|
||||
/// Runtime detection.
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <Columns/FilterDescription.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <base/range.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <DataTypes/DataTypeNothing.h>
|
||||
@ -10,6 +11,7 @@
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
@ -449,6 +451,33 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con
|
||||
{
|
||||
size_t count = 0;
|
||||
|
||||
#if defined(__AVX512F__) && defined(__AVX512BW__) /// check if avx512 instructions are compiled
|
||||
if (isArchSupported(TargetArch::AVX512BW)) {
|
||||
/// check if cpu support avx512 dynamically, haveAVX512BW contains check of haveAVX512F
|
||||
const __m512i zero64 = _mm512_setzero_epi32();
|
||||
while (end - begin >= 64)
|
||||
{
|
||||
end -= 64;
|
||||
const auto * pos = end;
|
||||
UInt64 val = static_cast<UInt64>(_mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast<const __m512i *>(pos)), zero64, _MM_CMPINT_EQ));
|
||||
val = ~val;
|
||||
if (val == 0)
|
||||
{
|
||||
count += 64;
|
||||
} else
|
||||
{
|
||||
count += __builtin_clzll(val);
|
||||
return count;
|
||||
}
|
||||
}
|
||||
while (end > begin && *(--end) == 0)
|
||||
{
|
||||
++count;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__) && defined(__POPCNT__)
|
||||
const __m128i zero16 = _mm_setzero_si128();
|
||||
while (end - begin >= 64)
|
||||
|
Loading…
Reference in New Issue
Block a user