mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Revert "ColumnVector: optimize filter with AVX512VBMI2 compress store"
This commit is contained in:
parent
5ab1eca788
commit
5524706b78
@ -12,14 +12,12 @@
|
|||||||
#include <Common/RadixSort.h>
|
#include <Common/RadixSort.h>
|
||||||
#include <Common/SipHash.h>
|
#include <Common/SipHash.h>
|
||||||
#include <Common/WeakHash.h>
|
#include <Common/WeakHash.h>
|
||||||
#include <Common/TargetSpecific.h>
|
|
||||||
#include <Common/assert_cast.h>
|
#include <Common/assert_cast.h>
|
||||||
#include <base/sort.h>
|
#include <base/sort.h>
|
||||||
#include <base/unaligned.h>
|
#include <base/unaligned.h>
|
||||||
#include <base/bit_cast.h>
|
#include <base/bit_cast.h>
|
||||||
#include <base/scope_guard.h>
|
#include <base/scope_guard.h>
|
||||||
|
|
||||||
#include <bit>
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
@ -27,10 +25,6 @@
|
|||||||
# include <emmintrin.h>
|
# include <emmintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if USE_MULTITARGET_CODE
|
|
||||||
# include <immintrin.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if USE_EMBEDDED_COMPILER
|
#if USE_EMBEDDED_COMPILER
|
||||||
#include <DataTypes/Native.h>
|
#include <DataTypes/Native.h>
|
||||||
#include <llvm/IR/IRBuilder.h>
|
#include <llvm/IR/IRBuilder.h>
|
||||||
@ -477,115 +471,6 @@ void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t
|
|||||||
memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0]));
|
memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline UInt64 blsr(UInt64 mask)
|
|
||||||
{
|
|
||||||
#ifdef __BMI__
|
|
||||||
return _blsr_u64(mask);
|
|
||||||
#else
|
|
||||||
return mask & (mask-1);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
DECLARE_DEFAULT_CODE(
|
|
||||||
template <typename T, typename Container, size_t SIMD_BYTES>
|
|
||||||
inline void doFilterAligned(const UInt8 *& filt_pos, const UInt8 *& filt_end_aligned, const T *& data_pos, Container & res_data)
|
|
||||||
{
|
|
||||||
while (filt_pos < filt_end_aligned)
|
|
||||||
{
|
|
||||||
UInt64 mask = bytes64MaskToBits64Mask(filt_pos);
|
|
||||||
|
|
||||||
if (0xffffffffffffffff == mask)
|
|
||||||
{
|
|
||||||
res_data.insert(data_pos, data_pos + SIMD_BYTES);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
while (mask)
|
|
||||||
{
|
|
||||||
size_t index = std::countr_zero(mask);
|
|
||||||
res_data.push_back(data_pos[index]);
|
|
||||||
mask = blsr(mask);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
filt_pos += SIMD_BYTES;
|
|
||||||
data_pos += SIMD_BYTES;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
DECLARE_AVX512VBMI2_SPECIFIC_CODE(
|
|
||||||
template <size_t ELEMENT_WIDTH>
|
|
||||||
inline void compressStoreAVX512(const void *src, void *dst, const UInt64 mask)
|
|
||||||
{
|
|
||||||
__m512i vsrc = _mm512_loadu_si512(src);
|
|
||||||
if constexpr (ELEMENT_WIDTH == 1)
|
|
||||||
_mm512_mask_compressstoreu_epi8(dst, static_cast<__mmask64>(mask), vsrc);
|
|
||||||
else if constexpr (ELEMENT_WIDTH == 2)
|
|
||||||
_mm512_mask_compressstoreu_epi16(dst, static_cast<__mmask32>(mask), vsrc);
|
|
||||||
else if constexpr (ELEMENT_WIDTH == 4)
|
|
||||||
_mm512_mask_compressstoreu_epi32(dst, static_cast<__mmask16>(mask), vsrc);
|
|
||||||
else if constexpr (ELEMENT_WIDTH == 8)
|
|
||||||
_mm512_mask_compressstoreu_epi64(dst, static_cast<__mmask8>(mask), vsrc);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T, typename Container, size_t SIMD_BYTES>
|
|
||||||
inline void doFilterAligned(const UInt8 *& filt_pos, const UInt8 *& filt_end_aligned, const T *& data_pos, Container & res_data)
|
|
||||||
{
|
|
||||||
static constexpr size_t VEC_LEN = 64; /// AVX512 vector length - 64 bytes
|
|
||||||
static constexpr size_t ELEMENT_WIDTH = sizeof(T);
|
|
||||||
static constexpr size_t ELEMENTS_PER_VEC = VEC_LEN / ELEMENT_WIDTH;
|
|
||||||
static constexpr UInt64 KMASK = 0xffffffffffffffff >> (64 - ELEMENTS_PER_VEC);
|
|
||||||
|
|
||||||
size_t current_offset = res_data.size();
|
|
||||||
size_t reserve_size = res_data.size();
|
|
||||||
size_t alloc_size = SIMD_BYTES * 2;
|
|
||||||
|
|
||||||
while (filt_pos < filt_end_aligned)
|
|
||||||
{
|
|
||||||
/// to avoid calling resize too frequently, resize to reserve buffer.
|
|
||||||
if (reserve_size - current_offset < SIMD_BYTES)
|
|
||||||
{
|
|
||||||
reserve_size += alloc_size;
|
|
||||||
res_data.resize(reserve_size);
|
|
||||||
alloc_size *= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
UInt64 mask = bytes64MaskToBits64Mask(filt_pos);
|
|
||||||
|
|
||||||
if (0xffffffffffffffff == mask)
|
|
||||||
{
|
|
||||||
for (size_t i = 0; i < SIMD_BYTES; i += ELEMENTS_PER_VEC)
|
|
||||||
_mm512_storeu_si512(reinterpret_cast<void *>(&res_data[current_offset + i]),
|
|
||||||
_mm512_loadu_si512(reinterpret_cast<const void *>(data_pos + i)));
|
|
||||||
current_offset += SIMD_BYTES;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (mask)
|
|
||||||
{
|
|
||||||
for (size_t i = 0; i < SIMD_BYTES; i += ELEMENTS_PER_VEC)
|
|
||||||
{
|
|
||||||
compressStoreAVX512<ELEMENT_WIDTH>(reinterpret_cast<const void *>(data_pos + i),
|
|
||||||
reinterpret_cast<void *>(&res_data[current_offset]), mask & KMASK);
|
|
||||||
current_offset += std::popcount(mask & KMASK);
|
|
||||||
/// prepare mask for next iter, if ELEMENTS_PER_VEC = 64, no next iter
|
|
||||||
if (ELEMENTS_PER_VEC < 64)
|
|
||||||
{
|
|
||||||
mask >>= ELEMENTS_PER_VEC;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
filt_pos += SIMD_BYTES;
|
|
||||||
data_pos += SIMD_BYTES;
|
|
||||||
}
|
|
||||||
/// resize to the real size.
|
|
||||||
res_data.resize(current_offset);
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const
|
ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_size_hint) const
|
||||||
{
|
{
|
||||||
@ -611,13 +496,31 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
|
|||||||
static constexpr size_t SIMD_BYTES = 64;
|
static constexpr size_t SIMD_BYTES = 64;
|
||||||
const UInt8 * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
|
const UInt8 * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
|
||||||
|
|
||||||
#if USE_MULTITARGET_CODE
|
while (filt_pos < filt_end_aligned)
|
||||||
static constexpr bool VBMI2_CAPABLE = sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8;
|
{
|
||||||
if (VBMI2_CAPABLE && isArchSupported(TargetArch::AVX512VBMI2))
|
UInt64 mask = bytes64MaskToBits64Mask(filt_pos);
|
||||||
TargetSpecific::AVX512VBMI2::doFilterAligned<T, Container, SIMD_BYTES>(filt_pos, filt_end_aligned, data_pos, res_data);
|
|
||||||
else
|
if (0xffffffffffffffff == mask)
|
||||||
#endif
|
{
|
||||||
TargetSpecific::Default::doFilterAligned<T, Container, SIMD_BYTES>(filt_pos, filt_end_aligned, data_pos, res_data);
|
res_data.insert(data_pos, data_pos + SIMD_BYTES);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while (mask)
|
||||||
|
{
|
||||||
|
size_t index = std::countr_zero(mask);
|
||||||
|
res_data.push_back(data_pos[index]);
|
||||||
|
#ifdef __BMI__
|
||||||
|
mask = _blsr_u64(mask);
|
||||||
|
#else
|
||||||
|
mask = mask & (mask-1);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
filt_pos += SIMD_BYTES;
|
||||||
|
data_pos += SIMD_BYTES;
|
||||||
|
}
|
||||||
|
|
||||||
while (filt_pos < filt_end)
|
while (filt_pos < filt_end)
|
||||||
{
|
{
|
||||||
|
@ -1,91 +0,0 @@
|
|||||||
#include <typeinfo>
|
|
||||||
#include <vector>
|
|
||||||
#include <Columns/ColumnsNumber.h>
|
|
||||||
#include <Common/randomSeed.h>
|
|
||||||
#include <gtest/gtest.h>
|
|
||||||
|
|
||||||
|
|
||||||
using namespace DB;
|
|
||||||
|
|
||||||
static pcg64 rng(randomSeed());
|
|
||||||
static constexpr int error_code = 12345;
|
|
||||||
static constexpr size_t TEST_RUNS = 500;
|
|
||||||
static constexpr size_t MAX_ROWS = 10000;
|
|
||||||
static const std::vector<size_t> filter_ratios = {1, 2, 5, 11, 32, 64, 100, 1000};
|
|
||||||
static const size_t K = filter_ratios.size();
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
static MutableColumnPtr createColumn(size_t n)
|
|
||||||
{
|
|
||||||
auto column = ColumnVector<T>::create();
|
|
||||||
auto & values = column->getData();
|
|
||||||
|
|
||||||
for (size_t i = 0; i < n; ++i)
|
|
||||||
{
|
|
||||||
values.push_back(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
return column;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool checkFilter(const PaddedPODArray<UInt8> &flit, const IColumn & src, const IColumn & dst)
|
|
||||||
{
|
|
||||||
size_t n = flit.size();
|
|
||||||
size_t dst_size = dst.size();
|
|
||||||
size_t j = 0; /// index of dest
|
|
||||||
for (size_t i = 0; i < n; ++i)
|
|
||||||
{
|
|
||||||
if (flit[i] != 0)
|
|
||||||
{
|
|
||||||
if ((dst_size <= j) || (src.compareAt(i, j, dst, 0) != 0))
|
|
||||||
return false;
|
|
||||||
j++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return dst_size == j; /// filtered size check
|
|
||||||
}
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
static void testFilter()
|
|
||||||
{
|
|
||||||
auto test_case = [&](size_t rows, size_t filter_ratio)
|
|
||||||
{
|
|
||||||
auto vector_column = createColumn<T>(rows);
|
|
||||||
PaddedPODArray<UInt8> flit(rows);
|
|
||||||
for (size_t i = 0; i < rows; ++i)
|
|
||||||
flit[i] = rng() % filter_ratio == 0;
|
|
||||||
auto res_column = vector_column->filter(flit, -1);
|
|
||||||
|
|
||||||
if (!checkFilter(flit, *vector_column, *res_column))
|
|
||||||
throw Exception(error_code, "VectorColumn filter failure, type: {}", typeid(T).name());
|
|
||||||
};
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
for (size_t i = 0; i < TEST_RUNS; ++i)
|
|
||||||
{
|
|
||||||
size_t rows = rng() % MAX_ROWS + 1;
|
|
||||||
size_t filter_ratio = filter_ratios[rng() % K];
|
|
||||||
|
|
||||||
test_case(rows, filter_ratio);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (const Exception & e)
|
|
||||||
{
|
|
||||||
FAIL() << e.displayText();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
TEST(ColumnVector, Filter)
|
|
||||||
{
|
|
||||||
testFilter<UInt8>();
|
|
||||||
testFilter<Int16>();
|
|
||||||
testFilter<UInt32>();
|
|
||||||
testFilter<Int64>();
|
|
||||||
testFilter<UInt128>();
|
|
||||||
testFilter<Int256>();
|
|
||||||
testFilter<Float32>();
|
|
||||||
testFilter<Float64>();
|
|
||||||
testFilter<UUID>();
|
|
||||||
}
|
|
@ -82,7 +82,6 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT
|
|||||||
OP(AVX512BW) \
|
OP(AVX512BW) \
|
||||||
OP(AVX512VL) \
|
OP(AVX512VL) \
|
||||||
OP(AVX512VBMI) \
|
OP(AVX512VBMI) \
|
||||||
OP(AVX512VBMI2) \
|
|
||||||
OP(PREFETCHWT1) \
|
OP(PREFETCHWT1) \
|
||||||
OP(SHA) \
|
OP(SHA) \
|
||||||
OP(ADX) \
|
OP(ADX) \
|
||||||
@ -303,11 +302,6 @@ bool haveAVX512VBMI() noexcept
|
|||||||
return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ecx >> 1) & 1u);
|
return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ecx >> 1) & 1u);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool haveAVX512VBMI2() noexcept
|
|
||||||
{
|
|
||||||
return haveAVX512F() && ((CpuInfo(0x7, 0).registers.ecx >> 6) & 1u);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool haveRDRAND() noexcept
|
bool haveRDRAND() noexcept
|
||||||
{
|
{
|
||||||
return CpuInfo(0x0).registers.eax >= 0x7 && ((CpuInfo(0x1).registers.ecx >> 30) & 1u);
|
return CpuInfo(0x0).registers.eax >= 0x7 && ((CpuInfo(0x1).registers.ecx >> 30) & 1u);
|
||||||
|
@ -20,8 +20,6 @@ UInt32 getSupportedArchs()
|
|||||||
result |= static_cast<UInt32>(TargetArch::AVX512BW);
|
result |= static_cast<UInt32>(TargetArch::AVX512BW);
|
||||||
if (Cpu::CpuFlagsCache::have_AVX512VBMI)
|
if (Cpu::CpuFlagsCache::have_AVX512VBMI)
|
||||||
result |= static_cast<UInt32>(TargetArch::AVX512VBMI);
|
result |= static_cast<UInt32>(TargetArch::AVX512VBMI);
|
||||||
if (Cpu::CpuFlagsCache::have_AVX512VBMI2)
|
|
||||||
result |= static_cast<UInt32>(TargetArch::AVX512VBMI2);
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,9 +38,8 @@ String toString(TargetArch arch)
|
|||||||
case TargetArch::AVX: return "avx";
|
case TargetArch::AVX: return "avx";
|
||||||
case TargetArch::AVX2: return "avx2";
|
case TargetArch::AVX2: return "avx2";
|
||||||
case TargetArch::AVX512F: return "avx512f";
|
case TargetArch::AVX512F: return "avx512f";
|
||||||
case TargetArch::AVX512BW: return "avx512bw";
|
case TargetArch::AVX512BW: return "avx512bw";
|
||||||
case TargetArch::AVX512VBMI: return "avx512vbmi";
|
case TargetArch::AVX512VBMI: return "avx512vbmi";
|
||||||
case TargetArch::AVX512VBMI2: return "avx512vbmi";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
|
@ -31,7 +31,7 @@
|
|||||||
* int funcImpl() {
|
* int funcImpl() {
|
||||||
* return 2;
|
* return 2;
|
||||||
* }
|
* }
|
||||||
* ) // DECLARE_AVX2_SPECIFIC_CODE
|
* ) // DECLARE_DEFAULT_CODE
|
||||||
*
|
*
|
||||||
* int func() {
|
* int func() {
|
||||||
* #if USE_MULTITARGET_CODE
|
* #if USE_MULTITARGET_CODE
|
||||||
@ -80,9 +80,8 @@ enum class TargetArch : UInt32
|
|||||||
AVX = (1 << 1),
|
AVX = (1 << 1),
|
||||||
AVX2 = (1 << 2),
|
AVX2 = (1 << 2),
|
||||||
AVX512F = (1 << 3),
|
AVX512F = (1 << 3),
|
||||||
AVX512BW = (1 << 4),
|
AVX512BW = (1 << 4),
|
||||||
AVX512VBMI = (1 << 5),
|
AVX512VBMI = (1 << 5),
|
||||||
AVX512VBMI2 = (1 << 6),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Runtime detection.
|
/// Runtime detection.
|
||||||
@ -101,7 +100,6 @@ String toString(TargetArch arch);
|
|||||||
|
|
||||||
#if defined(__clang__)
|
#if defined(__clang__)
|
||||||
|
|
||||||
#define AVX512VBMI2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2")))
|
|
||||||
#define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi")))
|
#define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi")))
|
||||||
#define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw")))
|
#define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw")))
|
||||||
#define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f")))
|
#define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f")))
|
||||||
@ -110,8 +108,6 @@ String toString(TargetArch arch);
|
|||||||
#define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt")))
|
#define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt")))
|
||||||
#define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE
|
#define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE
|
||||||
|
|
||||||
# define BEGIN_AVX512VBMI2_SPECIFIC_CODE \
|
|
||||||
_Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2\"))),apply_to=function)")
|
|
||||||
# define BEGIN_AVX512VBMI_SPECIFIC_CODE \
|
# define BEGIN_AVX512VBMI_SPECIFIC_CODE \
|
||||||
_Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi\"))),apply_to=function)")
|
_Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi\"))),apply_to=function)")
|
||||||
# define BEGIN_AVX512BW_SPECIFIC_CODE \
|
# define BEGIN_AVX512BW_SPECIFIC_CODE \
|
||||||
@ -133,7 +129,6 @@ String toString(TargetArch arch);
|
|||||||
# define DUMMY_FUNCTION_DEFINITION [[maybe_unused]] void _dummy_function_definition();
|
# define DUMMY_FUNCTION_DEFINITION [[maybe_unused]] void _dummy_function_definition();
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define AVX512VBMI2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2,tune=native")))
|
|
||||||
#define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,tune=native")))
|
#define AVX512VBMI_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,tune=native")))
|
||||||
#define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,tune=native")))
|
#define AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,tune=native")))
|
||||||
#define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native")))
|
#define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native")))
|
||||||
@ -142,9 +137,6 @@ String toString(TargetArch arch);
|
|||||||
#define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt",tune=native)))
|
#define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt",tune=native)))
|
||||||
#define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE
|
#define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE
|
||||||
|
|
||||||
# define BEGIN_AVX512VBMI2_SPECIFIC_CODE \
|
|
||||||
_Pragma("GCC push_options") \
|
|
||||||
_Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,avx512vbmi2,tune=native\")")
|
|
||||||
# define BEGIN_AVX512VBMI_SPECIFIC_CODE \
|
# define BEGIN_AVX512VBMI_SPECIFIC_CODE \
|
||||||
_Pragma("GCC push_options") \
|
_Pragma("GCC push_options") \
|
||||||
_Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,tune=native\")")
|
_Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,avx512vbmi,tune=native\")")
|
||||||
@ -225,16 +217,6 @@ namespace TargetSpecific::AVX512VBMI { \
|
|||||||
} \
|
} \
|
||||||
END_TARGET_SPECIFIC_CODE
|
END_TARGET_SPECIFIC_CODE
|
||||||
|
|
||||||
#define DECLARE_AVX512VBMI2_SPECIFIC_CODE(...) \
|
|
||||||
BEGIN_AVX512VBMI2_SPECIFIC_CODE \
|
|
||||||
namespace TargetSpecific::AVX512VBMI2 { \
|
|
||||||
DUMMY_FUNCTION_DEFINITION \
|
|
||||||
using namespace DB::TargetSpecific::AVX512VBMI2; \
|
|
||||||
__VA_ARGS__ \
|
|
||||||
} \
|
|
||||||
END_TARGET_SPECIFIC_CODE
|
|
||||||
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define USE_MULTITARGET_CODE 0
|
#define USE_MULTITARGET_CODE 0
|
||||||
@ -247,7 +229,6 @@ END_TARGET_SPECIFIC_CODE
|
|||||||
#define DECLARE_AVX512F_SPECIFIC_CODE(...)
|
#define DECLARE_AVX512F_SPECIFIC_CODE(...)
|
||||||
#define DECLARE_AVX512BW_SPECIFIC_CODE(...)
|
#define DECLARE_AVX512BW_SPECIFIC_CODE(...)
|
||||||
#define DECLARE_AVX512VBMI_SPECIFIC_CODE(...)
|
#define DECLARE_AVX512VBMI_SPECIFIC_CODE(...)
|
||||||
#define DECLARE_AVX512VBMI2_SPECIFIC_CODE(...)
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -264,9 +245,8 @@ DECLARE_SSE42_SPECIFIC_CODE (__VA_ARGS__) \
|
|||||||
DECLARE_AVX_SPECIFIC_CODE (__VA_ARGS__) \
|
DECLARE_AVX_SPECIFIC_CODE (__VA_ARGS__) \
|
||||||
DECLARE_AVX2_SPECIFIC_CODE (__VA_ARGS__) \
|
DECLARE_AVX2_SPECIFIC_CODE (__VA_ARGS__) \
|
||||||
DECLARE_AVX512F_SPECIFIC_CODE(__VA_ARGS__) \
|
DECLARE_AVX512F_SPECIFIC_CODE(__VA_ARGS__) \
|
||||||
DECLARE_AVX512BW_SPECIFIC_CODE (__VA_ARGS__) \
|
DECLARE_AVX512BW_SPECIFIC_CODE(__VA_ARGS__) \
|
||||||
DECLARE_AVX512VBMI_SPECIFIC_CODE (__VA_ARGS__) \
|
DECLARE_AVX512VBMI_SPECIFIC_CODE(__VA_ARGS__)
|
||||||
DECLARE_AVX512VBMI2_SPECIFIC_CODE (__VA_ARGS__)
|
|
||||||
|
|
||||||
DECLARE_DEFAULT_CODE(
|
DECLARE_DEFAULT_CODE(
|
||||||
constexpr auto BuildArch = TargetArch::Default; /// NOLINT
|
constexpr auto BuildArch = TargetArch::Default; /// NOLINT
|
||||||
@ -296,9 +276,6 @@ DECLARE_AVX512VBMI_SPECIFIC_CODE(
|
|||||||
constexpr auto BuildArch = TargetArch::AVX512VBMI; /// NOLINT
|
constexpr auto BuildArch = TargetArch::AVX512VBMI; /// NOLINT
|
||||||
) // DECLARE_AVX512VBMI_SPECIFIC_CODE
|
) // DECLARE_AVX512VBMI_SPECIFIC_CODE
|
||||||
|
|
||||||
DECLARE_AVX512VBMI2_SPECIFIC_CODE(
|
|
||||||
constexpr auto BuildArch = TargetArch::AVX512VBMI2; /// NOLINT
|
|
||||||
) // DECLARE_AVX512VBMI2_SPECIFIC_CODE
|
|
||||||
|
|
||||||
/** Runtime Dispatch helpers for class members.
|
/** Runtime Dispatch helpers for class members.
|
||||||
*
|
*
|
||||||
|
Loading…
Reference in New Issue
Block a user