Dynamic dispatch infrastructure for class member functions

This commit is contained in:
Maksim Kita 2022-05-16 17:05:16 +02:00
parent 7006ef6ebb
commit 032b5d3fc3
2 changed files with 134 additions and 2 deletions

View File

@ -14,6 +14,7 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/config.h>
#include <Common/TargetSpecific.h>
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>
@ -58,8 +59,11 @@ struct AggregateFunctionSumData
}
/// Vectorized version
MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(addManyImpl,
MULTITARGET_FH(
template <typename Value>
void NO_SANITIZE_UNDEFINED NO_INLINE addMany(const Value * __restrict ptr, size_t start, size_t end)
void NO_SANITIZE_UNDEFINED NO_INLINE
), /*addManyImpl*/ MULTITARGET_FB((const Value * __restrict ptr, size_t start, size_t end) /// NOLINT
{
ptr += start;
size_t count = end - start;
@ -95,11 +99,34 @@ struct AggregateFunctionSumData
++ptr;
}
Impl::add(sum, local_sum);
})
)
/// Vectorized version
template <typename Value>
void NO_INLINE addMany(const Value * __restrict ptr, size_t start, size_t end)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX2))
{
addManyImplAVX2(ptr, start, end);
return;
}
else if (isArchSupported(TargetArch::SSE42))
{
addManyImplSSE42(ptr, start, end);
return;
}
#endif
addManyImpl(ptr, start, end);
}
MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(addManyConditionalInternalImpl,
MULTITARGET_FH(
template <typename Value, bool add_if_zero>
void NO_SANITIZE_UNDEFINED NO_INLINE
addManyConditionalInternal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
), /*addManyConditionalInternalImpl*/ MULTITARGET_FB((const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) /// NOLINT
{
ptr += start;
size_t count = end - start;
@ -163,6 +190,27 @@ struct AggregateFunctionSumData
++condition_map;
}
Impl::add(sum, local_sum);
})
)
/// Vectorized version
template <typename Value, bool add_if_zero>
void NO_INLINE addManyConditionalInternal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX2))
{
addManyConditionalInternalImplAVX2<Value, add_if_zero>(ptr, condition_map, start, end);
return;
}
else if (isArchSupported(TargetArch::SSE42))
{
addManyConditionalInternalImplSSE42<Value, add_if_zero>(ptr, condition_map, start, end);
return;
}
#endif
addManyConditionalInternalImpl<Value, add_if_zero>(ptr, condition_map, start, end);
}
template <typename Value>

View File

@ -93,6 +93,13 @@ String toString(TargetArch arch);
#define USE_MULTITARGET_CODE 1
#if defined(__clang__)
#define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f")))
#define AVX2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2")))
#define AVX_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx"))
#define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt")))
#define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE
# define BEGIN_AVX512F_SPECIFIC_CODE \
_Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f\"))),apply_to=function)")
# define BEGIN_AVX2_SPECIFIC_CODE \
@ -109,6 +116,13 @@ String toString(TargetArch arch);
*/
# define DUMMY_FUNCTION_DEFINITION [[maybe_unused]] void _dummy_function_definition();
#else
#define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native")))
#define AVX2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,tune=native")))
#define AVX_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,tune=native")))
#define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt",tune=native))))
#define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE
# define BEGIN_AVX512F_SPECIFIC_CODE \
_Pragma("GCC push_options") \
_Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native\")")
@ -212,4 +226,74 @@ DECLARE_AVX512F_SPECIFIC_CODE(
constexpr auto BuildArch = TargetArch::AVX512F; /// NOLINT
) // DECLARE_AVX512F_SPECIFIC_CODE
/** Runtime Dispatch helpers for class members.
*
* Example of usage:
*
* class TestClass
* {
* public:
* MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(testFunctionImpl,
* MULTITARGET_FH(int), /\*testFunction*\/ MULTITARGET_FB((int value)
* {
* return value;
* })
* )
*
* void testFunction(int value) {
* if (isArchSupported(TargetArch::AVX2))
* {
* testFunctionImplAVX2(value);
* }
* else if (isArchSupported(TargetArch::SSE42))
* {
* testFunctionImplSSE42(value);
* }
* else
* {
* testFunction(value);
* }
* }
*};
*
*/
/// Function header
#define MULTITARGET_FH(...) __VA_ARGS__
/// Function body
#define MULTITARGET_FB(...) __VA_ARGS__
#if ENABLE_MULTITARGET_CODE && defined(__GNUC__) && defined(__x86_64__)
/// NOLINTNEXTLINE
#define MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(name, FUNCTION_HEADER, FUNCTION_BODY) \
FUNCTION_HEADER \
\
AVX2_FUNCTION_SPECIFIC_ATTRIBUTE \
name##AVX2 \
FUNCTION_BODY \
\
FUNCTION_HEADER \
\
AVX2_FUNCTION_SPECIFIC_ATTRIBUTE \
name##SSE42 \
FUNCTION_BODY \
\
FUNCTION_HEADER \
\
name \
FUNCTION_BODY \
#else
/// NOLINTNEXTLINE
#define MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(name, FUNCTION_HEADER, FUNCTION_BODY) \
FUNCTION_HEADER \
\
name \
FUNCTION_BODY \
#endif
}