diff --git a/src/Functions/FunctionStartsEndsWith.h b/src/Functions/FunctionStartsEndsWith.h index 730f0b9efbb..29c81796d0e 100644 --- a/src/Functions/FunctionStartsEndsWith.h +++ b/src/Functions/FunctionStartsEndsWith.h @@ -145,11 +145,12 @@ public: FunctionStartsEndsWith(const Context &) : FunctionPerformanceAdaptor>() { - registerImplementation>(TargetArch::SSE4); - registerImplementation>(TargetArch::AVX); - registerImplementation>(TargetArch::AVX2); - registerImplementation>(TargetArch::AVX512); + registerImplementation> (TargetArch::SSE4); + registerImplementation> (TargetArch::AVX); + registerImplementation> (TargetArch::AVX2); + registerImplementation>(TargetArch::AVX512f); } + static FunctionPtr create(const Context & context) { return std::make_shared>(context); diff --git a/src/Functions/FunctionsRandom.h b/src/Functions/FunctionsRandom.h index 990c3a5f466..98d04d61ad1 100644 --- a/src/Functions/FunctionsRandom.h +++ b/src/Functions/FunctionsRandom.h @@ -95,7 +95,7 @@ public: registerImplementation>(TargetArch::SSE4); registerImplementation>(TargetArch::AVX); registerImplementation>(TargetArch::AVX2); - registerImplementation>(TargetArch::AVX512); + registerImplementation>(TargetArch::AVX512f); } static FunctionPtr create(const Context &) { diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index 12f4b84dab9..a97fdbce0b0 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -104,6 +104,11 @@ struct PerformanceStatistics PerformanceStatistics(ssize_t choose_method_) : choose_method(choose_method_) {} }; +struct PerformanceAdaptorOptions +{ + +}; + /// Combine several IExecutableFunctionImpl into one. /// All the implementations should be equivalent. /// Implementation to execute will be selected based on performance on previous runs. @@ -152,6 +157,7 @@ public: private: std::vector impls; // Alternative implementations. PerformanceStatistics statistics; + PerformanceAdaptorOptions options; }; // The same as ExecutableFunctionPerformanceAdaptor, but combine via IFunction interface. @@ -197,24 +203,7 @@ public: private: std::vector impls; // Alternative implementations. PerformanceStatistics statistics; -}; - -// TODO(dakovalkov): May be it's better to delete this macros and write every function explicitly for better readability. -#define DECLARE_STANDART_TARGET_ADAPTOR(Function) \ -class Function : public FunctionDynamicAdaptor \ -{ \ -public: \ - Function(const Context &) : FunctionDynamicAdaptor() \ - { \ - registerImplementation(TargetArch::SSE4); \ - registerImplementation(TargetArch::AVX); \ - registerImplementation(TargetArch::AVX2); \ - registerImplementation(TargetArch::AVX512); \ - } \ - static FunctionPtr create(const Context & context) \ - { \ - return std::make_shared(context); \ - } \ + PerformanceAdaptorOptions options; }; } // namespace DB diff --git a/src/Functions/TargetSpecific.cpp b/src/Functions/TargetSpecific.cpp index f22a586c333..aa017823e54 100644 --- a/src/Functions/TargetSpecific.cpp +++ b/src/Functions/TargetSpecific.cpp @@ -2,6 +2,7 @@ #if defined(__GNUC__) # include +# include #else # error "Only CLANG and GCC compilers are supported for dynamic dispatch" #endif @@ -9,6 +10,11 @@ namespace DB { +__attribute__ ((target("xsave"))) +uint64_t xgetbv(uint32_t ecx) { + return _xgetbv(ecx); +} + int GetSupportedArches() { unsigned int eax, ebx, ecx, edx; if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) { @@ -17,13 +23,15 @@ int GetSupportedArches() { int res = 0; if (ecx & bit_SSE4_2) res |= static_cast(TargetArch::SSE4); - if ((ecx & bit_OSXSAVE) && (ecx & bit_AVX)) { - // TODO(dakovalkov): check XGETBV. + // (xgetbv(0) & 0x6) == 0x6 checks that XMM state and YMM state are enabled. + if ((ecx & bit_OSXSAVE) && (ecx & bit_AVX) && (xgetbv(0) & 0x6) == 0x6) { res |= static_cast(TargetArch::AVX); if (__get_cpuid(7, &eax, &ebx, &ecx, &edx) && (ebx & bit_AVX2)) { res |= static_cast(TargetArch::AVX2); + if (ebx & bit_AVX512F) { + res |= static_cast(TargetArch::AVX512f); + } } - // TODO(dakovalkov): check AVX512 support. } return res; } @@ -34,4 +42,17 @@ bool IsArchSupported(TargetArch arch) return arch == TargetArch::Default || (arches & static_cast(arch)); } +String ToString(TargetArch arch) +{ + switch (arch) { + case TargetArch::Default: return "default"; + case TargetArch::SSE4: return "sse4"; + case TargetArch::AVX: return "avx"; + case TargetArch::AVX2: return "avx2"; + case TargetArch::AVX512f: return "avx512f"; + } + + __builtin_unreachable(); +} + } // namespace DB diff --git a/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h index e5818632843..accb1dd7fab 100644 --- a/src/Functions/TargetSpecific.h +++ b/src/Functions/TargetSpecific.h @@ -1,5 +1,7 @@ #pragma once +#include + /// This file contains macros and helpers for writing platform-dependent code. /// /// Macroses DECLARE__SPECIFIC_CODE will wrap code inside them into the namespace TargetSpecific:: and enable @@ -62,16 +64,17 @@ enum class TargetArch : int { SSE4 = (1 << 0), AVX = (1 << 1), AVX2 = (1 << 2), - AVX512 = (1 << 3), + AVX512f = (1 << 3), }; // Runtime detection. bool IsArchSupported(TargetArch arch); +String ToString(TargetArch arch); + #if defined(__clang__) -// TODO: There are lots of different AVX512 :( -# define BEGIN_AVX512_SPECIFIC_CODE \ - _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2\"))))") +# define BEGIN_AVX512f_SPECIFIC_CODE \ + _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,avx512f\"))))") # define BEGIN_AVX2_SPECIFIC_CODE \ _Pragma("clang attribute push (__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2\"))))") # define BEGIN_AVX_SPECIFIC_CODE \ @@ -81,8 +84,7 @@ bool IsArchSupported(TargetArch arch); # define END_TARGET_SPECIFIC_CODE \ _Pragma("clang attribute pop") #elif defined(__GNUC__) -// TODO: There are lots of different AVX512 :( -# define BEGIN_AVX512_SPECIFIC_CODE \ +# define BEGIN_AVX512f_SPECIFIC_CODE \ _Pragma("GCC push_options") \ _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,avx2,avx512f,tune=native\")") # define BEGIN_AVX2_SPECIFIC_CODE \ @@ -130,10 +132,10 @@ namespace TargetSpecific::AVX2 { \ } \ END_TARGET_SPECIFIC_CODE -#define DECLARE_AVX512_SPECIFIC_CODE(...) \ -BEGIN_AVX512_SPECIFIC_CODE \ -namespace TargetSpecific::AVX512 { \ - using namespace DB::TargetSpecific::AVX512; \ +#define DECLARE_AVX512f_SPECIFIC_CODE(...) \ +BEGIN_AVX512f_SPECIFIC_CODE \ +namespace TargetSpecific::AVX512f { \ + using namespace DB::TargetSpecific::AVX512f; \ __VA_ARGS__ \ } \ END_TARGET_SPECIFIC_CODE @@ -143,7 +145,7 @@ DECLARE_DEFAULT_CODE (__VA_ARGS__) \ DECLARE_SSE4_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX2_SPECIFIC_CODE (__VA_ARGS__) \ -DECLARE_AVX512_SPECIFIC_CODE(__VA_ARGS__) +DECLARE_AVX512f_SPECIFIC_CODE(__VA_ARGS__) DECLARE_DEFAULT_CODE( constexpr auto BuildArch = TargetArch::Default; @@ -161,8 +163,8 @@ DECLARE_AVX2_SPECIFIC_CODE( constexpr auto BuildArch = TargetArch::AVX2; ) // DECLARE_AVX2_SPECIFIC_CODE -DECLARE_AVX512_SPECIFIC_CODE( - constexpr auto BuildArch = TargetArch::AVX512; +DECLARE_AVX512f_SPECIFIC_CODE( + constexpr auto BuildArch = TargetArch::AVX512f; ) // DECLARE_AVX512_SPECIFIC_CODE } // namespace DB