Add avx512 support for Aggregate Sum, function unary arithmetic, function comparison

This commit is contained in:
zhao zhou 2022-06-06 09:29:47 +08:00
parent a403f1cd1b
commit cd2911d635
4 changed files with 114 additions and 16 deletions

View File

@ -59,7 +59,7 @@ struct AggregateFunctionSumData
}
/// Vectorized version
MULTITARGET_FUNCTION_AVX2_SSE42(
MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(
MULTITARGET_FUNCTION_HEADER(
template <typename Value>
void NO_SANITIZE_UNDEFINED NO_INLINE
@ -107,12 +107,25 @@ struct AggregateFunctionSumData
void NO_INLINE addMany(const Value * __restrict ptr, size_t start, size_t end)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX512BW))
{
addManyImplAVX512BW(ptr, start, end);
return;
}
if (isArchSupported(TargetArch::AVX512F))
{
addManyImplAVX512F(ptr, start, end);
return;
}
if (isArchSupported(TargetArch::AVX2))
{
addManyImplAVX2(ptr, start, end);
return;
}
else if (isArchSupported(TargetArch::SSE42))
if (isArchSupported(TargetArch::SSE42))
{
addManyImplSSE42(ptr, start, end);
return;
@ -122,7 +135,7 @@ struct AggregateFunctionSumData
addManyImpl(ptr, start, end);
}
MULTITARGET_FUNCTION_AVX2_SSE42(
MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(
MULTITARGET_FUNCTION_HEADER(
template <typename Value, bool add_if_zero>
void NO_SANITIZE_UNDEFINED NO_INLINE
@ -198,12 +211,25 @@ struct AggregateFunctionSumData
void NO_INLINE addManyConditionalInternal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX512BW))
{
addManyConditionalInternalImplAVX512BW<Value, add_if_zero>(ptr, condition_map, start, end);
return;
}
if (isArchSupported(TargetArch::AVX512F))
{
addManyConditionalInternalImplAVX512F<Value, add_if_zero>(ptr, condition_map, start, end);
return;
}
if (isArchSupported(TargetArch::AVX2))
{
addManyConditionalInternalImplAVX2<Value, add_if_zero>(ptr, condition_map, start, end);
return;
}
else if (isArchSupported(TargetArch::SSE42))
if (isArchSupported(TargetArch::SSE42))
{
addManyConditionalInternalImplSSE42<Value, add_if_zero>(ptr, condition_map, start, end);
return;

View File

@ -256,7 +256,7 @@ DECLARE_AVX512BW_SPECIFIC_CODE(
* class TestClass
* {
* public:
* MULTITARGET_FUNCTION_AVX2_SSE42(
* MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(
* MULTITARGET_FUNCTION_HEADER(int), testFunctionImpl, MULTITARGET_FUNCTION_BODY((int value)
* {
* return value;
@ -264,7 +264,15 @@ DECLARE_AVX512BW_SPECIFIC_CODE(
* )
*
* void testFunction(int value) {
* if (isArchSupported(TargetArch::AVX2))
* if (isArchSupported(TargetArch::AVX512BW))
* {
* testFunctionImplAVX512BW(value);
* }
* else if (isArchSupported(TargetArch::AVX512F))
* {
* testFunctionImplAVX512F(value);
* }
* else if (isArchSupported(TargetArch::AVX2))
* {
* testFunctionImplAVX2(value);
* }
@ -290,7 +298,19 @@ DECLARE_AVX512BW_SPECIFIC_CODE(
#if ENABLE_MULTITARGET_CODE && defined(__GNUC__) && defined(__x86_64__)
/// NOLINTNEXTLINE
#define MULTITARGET_FUNCTION_AVX2_SSE42(FUNCTION_HEADER, name, FUNCTION_BODY) \
#define MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(FUNCTION_HEADER, name, FUNCTION_BODY) \
FUNCTION_HEADER \
\
AVX512BW_FUNCTION_SPECIFIC_ATTRIBUTE \
name##AVX512BW \
FUNCTION_BODY \
\
FUNCTION_HEADER \
\
AVX512_FUNCTION_SPECIFIC_ATTRIBUTE \
name##AVX512F \
FUNCTION_BODY \
\
FUNCTION_HEADER \
\
AVX2_FUNCTION_SPECIFIC_ATTRIBUTE \
@ -311,7 +331,7 @@ DECLARE_AVX512BW_SPECIFIC_CODE(
#else
/// NOLINTNEXTLINE
#define MULTITARGET_FUNCTION_AVX2_SSE42(FUNCTION_HEADER, name, FUNCTION_BODY) \
#define MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(FUNCTION_HEADER, name, FUNCTION_BODY) \
FUNCTION_HEADER \
\
name \

View File

@ -42,7 +42,7 @@ struct UnaryOperationImpl
using ArrayA = typename ColVecA::Container;
using ArrayC = typename ColVecC::Container;
MULTITARGET_FUNCTION_AVX2_SSE42(
MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(
MULTITARGET_FUNCTION_HEADER(static void NO_INLINE), vectorImpl, MULTITARGET_FUNCTION_BODY((const ArrayA & a, ArrayC & c) /// NOLINT
{
size_t size = a.size();
@ -53,12 +53,25 @@ struct UnaryOperationImpl
static void NO_INLINE vector(const ArrayA & a, ArrayC & c)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX512BW))
{
vectorImplAVX512BW(a, c);
return;
}
if (isArchSupported(TargetArch::AVX512F))
{
vectorImplAVX512F(a, c);
return;
}
if (isArchSupported(TargetArch::AVX2))
{
vectorImplAVX2(a, c);
return;
}
else if (isArchSupported(TargetArch::SSE42))
if (isArchSupported(TargetArch::SSE42))
{
vectorImplSSE42(a, c);
return;
@ -78,7 +91,7 @@ struct UnaryOperationImpl
template <typename Op>
struct FixedStringUnaryOperationImpl
{
MULTITARGET_FUNCTION_AVX2_SSE42(
MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(
MULTITARGET_FUNCTION_HEADER(static void NO_INLINE), vectorImpl, MULTITARGET_FUNCTION_BODY((const ColumnFixedString::Chars & a, /// NOLINT
ColumnFixedString::Chars & c)
{
@ -90,12 +103,25 @@ struct FixedStringUnaryOperationImpl
static void NO_INLINE vector(const ColumnFixedString::Chars & a, ColumnFixedString::Chars & c)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX512BW))
{
vectorImplAVX512BW(a, c);
return;
}
if (isArchSupported(TargetArch::AVX512F))
{
vectorImplAVX512F(a, c);
return;
}
if (isArchSupported(TargetArch::AVX2))
{
vectorImplAVX2(a, c);
return;
}
else if (isArchSupported(TargetArch::SSE42))
if (isArchSupported(TargetArch::SSE42))
{
vectorImplSSE42(a, c);
return;

View File

@ -85,7 +85,7 @@ struct NumComparisonImpl
using ContainerA = PaddedPODArray<A>;
using ContainerB = PaddedPODArray<B>;
MULTITARGET_FUNCTION_AVX2_SSE42(
MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(
MULTITARGET_FUNCTION_HEADER(static void), vectorVectorImpl, MULTITARGET_FUNCTION_BODY(( /// NOLINT
const ContainerA & a, const ContainerB & b, PaddedPODArray<UInt8> & c)
{
@ -112,12 +112,25 @@ struct NumComparisonImpl
static void NO_INLINE vectorVector(const ContainerA & a, const ContainerB & b, PaddedPODArray<UInt8> & c)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX512BW))
{
vectorVectorImplAVX512BW(a, b, c);
return;
}
if (isArchSupported(TargetArch::AVX512F))
{
vectorVectorImplAVX512F(a, b, c);
return;
}
if (isArchSupported(TargetArch::AVX2))
{
vectorVectorImplAVX2(a, b, c);
return;
}
else if (isArchSupported(TargetArch::SSE42))
if (isArchSupported(TargetArch::SSE42))
{
vectorVectorImplSSE42(a, b, c);
return;
@ -128,7 +141,7 @@ struct NumComparisonImpl
}
MULTITARGET_FUNCTION_AVX2_SSE42(
MULTITARGET_FUNCTION_AVX512BW_AVX512F_AVX2_SSE42(
MULTITARGET_FUNCTION_HEADER(static void), vectorConstantImpl, MULTITARGET_FUNCTION_BODY(( /// NOLINT
const ContainerA & a, B b, PaddedPODArray<UInt8> & c)
{
@ -148,12 +161,25 @@ struct NumComparisonImpl
static void NO_INLINE vectorConstant(const ContainerA & a, B b, PaddedPODArray<UInt8> & c)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX512BW))
{
vectorConstantImplAVX512BW(a, b, c);
return;
}
if (isArchSupported(TargetArch::AVX512F))
{
vectorConstantImplAVX512F(a, b, c);
return;
}
if (isArchSupported(TargetArch::AVX2))
{
vectorConstantImplAVX2(a, b, c);
return;
}
else if (isArchSupported(TargetArch::SSE42))
if (isArchSupported(TargetArch::SSE42))
{
vectorConstantImplSSE42(a, b, c);
return;