diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 6aa6d6b9501..acff8e7b90f 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -14,6 +14,7 @@ #include #include +#include #if USE_EMBEDDED_COMPILER # include @@ -58,8 +59,11 @@ struct AggregateFunctionSumData } /// Vectorized version + MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(addManyImpl, + MULTITARGET_FH( template - void NO_SANITIZE_UNDEFINED NO_INLINE addMany(const Value * __restrict ptr, size_t start, size_t end) + void NO_SANITIZE_UNDEFINED NO_INLINE + ), /*addManyImpl*/ MULTITARGET_FB((const Value * __restrict ptr, size_t start, size_t end) /// NOLINT { ptr += start; size_t count = end - start; @@ -95,11 +99,34 @@ struct AggregateFunctionSumData ++ptr; } Impl::add(sum, local_sum); + }) + ) + + /// Vectorized version + template + void NO_INLINE addMany(const Value * __restrict ptr, size_t start, size_t end) + { +#if USE_MULTITARGET_CODE + if (isArchSupported(TargetArch::AVX2)) + { + addManyImplAVX2(ptr, start, end); + return; + } + else if (isArchSupported(TargetArch::SSE42)) + { + addManyImplSSE42(ptr, start, end); + return; + } +#endif + + addManyImpl(ptr, start, end); } + MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(addManyConditionalInternalImpl, + MULTITARGET_FH( template void NO_SANITIZE_UNDEFINED NO_INLINE - addManyConditionalInternal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) + ), /*addManyConditionalInternalImpl*/ MULTITARGET_FB((const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) /// NOLINT { ptr += start; size_t count = end - start; @@ -163,6 +190,27 @@ struct AggregateFunctionSumData ++condition_map; } Impl::add(sum, local_sum); + }) + ) + + /// Vectorized version + template + void NO_INLINE addManyConditionalInternal(const Value * __restrict ptr, const UInt8 * __restrict condition_map, size_t start, size_t end) + { +#if USE_MULTITARGET_CODE + if (isArchSupported(TargetArch::AVX2)) + { + addManyConditionalInternalImplAVX2(ptr, condition_map, start, end); + return; + } + else if (isArchSupported(TargetArch::SSE42)) + { + addManyConditionalInternalImplSSE42(ptr, condition_map, start, end); + return; + } +#endif + + addManyConditionalInternalImpl(ptr, condition_map, start, end); } template diff --git a/src/Common/TargetSpecific.h b/src/Common/TargetSpecific.h index d7fa55fbb08..2b81ee2fcb3 100644 --- a/src/Common/TargetSpecific.h +++ b/src/Common/TargetSpecific.h @@ -93,6 +93,13 @@ String toString(TargetArch arch); #define USE_MULTITARGET_CODE 1 #if defined(__clang__) + +#define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f"))) +#define AVX2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2"))) +#define AVX_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx")) +#define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt"))) +#define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE + # define BEGIN_AVX512F_SPECIFIC_CODE \ _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f\"))),apply_to=function)") # define BEGIN_AVX2_SPECIFIC_CODE \ @@ -109,6 +116,13 @@ String toString(TargetArch arch); */ # define DUMMY_FUNCTION_DEFINITION [[maybe_unused]] void _dummy_function_definition(); #else + +#define AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native"))) +#define AVX2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,tune=native"))) +#define AVX_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,tune=native"))) +#define SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt",tune=native)))) +#define DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE + # define BEGIN_AVX512F_SPECIFIC_CODE \ _Pragma("GCC push_options") \ _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,tune=native\")") @@ -212,4 +226,74 @@ DECLARE_AVX512F_SPECIFIC_CODE( constexpr auto BuildArch = TargetArch::AVX512F; /// NOLINT ) // DECLARE_AVX512F_SPECIFIC_CODE +/** Runtime Dispatch helpers for class members. + * + * Example of usage: + * + * class TestClass + * { + * public: + * MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(testFunctionImpl, + * MULTITARGET_FH(int), /\*testFunction*\/ MULTITARGET_FB((int value) + * { + * return value; + * }) + * ) + * + * void testFunction(int value) { + * if (isArchSupported(TargetArch::AVX2)) + * { + * testFunctionImplAVX2(value); + * } + * else if (isArchSupported(TargetArch::SSE42)) + * { + * testFunctionImplSSE42(value); + * } + * else + * { + * testFunction(value); + * } + * } + *}; + * + */ + +/// Function header +#define MULTITARGET_FH(...) __VA_ARGS__ + +/// Function body +#define MULTITARGET_FB(...) __VA_ARGS__ + +#if ENABLE_MULTITARGET_CODE && defined(__GNUC__) && defined(__x86_64__) + +/// NOLINTNEXTLINE +#define MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(name, FUNCTION_HEADER, FUNCTION_BODY) \ + FUNCTION_HEADER \ + \ + AVX2_FUNCTION_SPECIFIC_ATTRIBUTE \ + name##AVX2 \ + FUNCTION_BODY \ + \ + FUNCTION_HEADER \ + \ + AVX2_FUNCTION_SPECIFIC_ATTRIBUTE \ + name##SSE42 \ + FUNCTION_BODY \ + \ + FUNCTION_HEADER \ + \ + name \ + FUNCTION_BODY \ + +#else + +/// NOLINTNEXTLINE +#define MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(name, FUNCTION_HEADER, FUNCTION_BODY) \ + FUNCTION_HEADER \ + \ + name \ + FUNCTION_BODY \ + +#endif + }