From f2091ac6cf90bb87b1d6370bc6cd0b4d4c0daa29 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 19 Feb 2024 19:33:14 +0000 Subject: [PATCH] Mini cleanup of CPUID.h --- src/Common/CPUID.h | 490 ++++++++++++++++++++++----------------------- 1 file changed, 243 insertions(+), 247 deletions(-) diff --git a/src/Common/CPUID.h b/src/Common/CPUID.h index b47e7e808d7..d7a714ec5af 100644 --- a/src/Common/CPUID.h +++ b/src/Common/CPUID.h @@ -57,6 +57,249 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT #endif } +union CPUInfo +{ + UInt32 info[4]; + + struct Registers + { + UInt32 eax; + UInt32 ebx; + UInt32 ecx; + UInt32 edx; + } registers; + + inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); } + + inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); } +}; + +inline bool haveRDTSCP() noexcept +{ + return (CPUInfo(0x80000001).registers.edx >> 27) & 1u; +} + +inline bool haveSSE() noexcept +{ + return (CPUInfo(0x1).registers.edx >> 25) & 1u; +} + +inline bool haveSSE2() noexcept +{ + return (CPUInfo(0x1).registers.edx >> 26) & 1u; +} + +inline bool haveSSE3() noexcept +{ + return CPUInfo(0x1).registers.ecx & 1u; +} + +inline bool havePCLMUL() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 1) & 1u; +} + +inline bool haveSSSE3() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 9) & 1u; +} + +inline bool haveSSE41() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 19) & 1u; +} + +inline bool haveSSE42() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 20) & 1u; +} + +inline bool haveF16C() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 29) & 1u; +} + +inline bool havePOPCNT() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 23) & 1u; +} + +inline bool haveAES() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 25) & 1u; +} + +inline bool haveXSAVE() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 26) & 1u; +} + +inline bool haveOSXSAVE() noexcept +{ + return (CPUInfo(0x1).registers.ecx >> 27) & 1u; +} + +inline bool haveAVX() noexcept +{ +#if defined(__x86_64__) + // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf + // https://bugs.chromium.org/p/chromium/issues/detail?id=375968 + return haveOSXSAVE() // implies haveXSAVE() + && (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS + && ((CPUInfo(0x1).registers.ecx >> 28) & 1u); // AVX bit +#else + return false; +#endif +} + +inline bool haveFMA() noexcept +{ + return haveAVX() && ((CPUInfo(0x1).registers.ecx >> 12) & 1u); +} + +inline bool haveAVX2() noexcept +{ + return haveAVX() && ((CPUInfo(0x7, 0).registers.ebx >> 5) & 1u); +} + +inline bool haveBMI1() noexcept +{ + return (CPUInfo(0x7, 0).registers.ebx >> 3) & 1u; +} + +inline bool haveBMI2() noexcept +{ + return (CPUInfo(0x7, 0).registers.ebx >> 8) & 1u; +} + +inline bool haveAVX512F() noexcept +{ +#if defined(__x86_64__) + // https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support + return haveOSXSAVE() // implies haveXSAVE() + && (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS + && ((our_xgetbv(0) >> 5) & 7u) == 7u // ZMM state is enabled by OS + && CPUInfo(0x0).registers.eax >= 0x7 // leaf 7 is present + && ((CPUInfo(0x7, 0).registers.ebx >> 16) & 1u); // AVX512F bit +#else + return false; +#endif +} + +inline bool haveAVX512DQ() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 17) & 1u); +} + +inline bool haveRDSEED() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 18) & 1u); +} + +inline bool haveADX() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 19) & 1u); +} + +inline bool haveAVX512IFMA() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 21) & 1u); +} + +inline bool havePCOMMIT() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 22) & 1u); +} + +inline bool haveCLFLUSHOPT() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 23) & 1u); +} + +inline bool haveCLWB() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 24) & 1u); +} + +inline bool haveAVX512PF() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 26) & 1u); +} + +inline bool haveAVX512ER() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 27) & 1u); +} + +inline bool haveAVX512CD() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 28) & 1u); +} + +inline bool haveSHA() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 29) & 1u); +} + +inline bool haveAVX512BW() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 30) & 1u); +} + +inline bool haveAVX512VL() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 31) & 1u); +} + +inline bool havePREFETCHWT1() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ecx >> 0) & 1u); +} + +inline bool haveAVX512VBMI() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 1) & 1u); +} + +inline bool haveAVX512VBMI2() noexcept +{ + return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 6) & 1u); +} + +inline bool haveRDRAND() noexcept +{ + return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x1).registers.ecx >> 30) & 1u); +} + +inline bool haveAMX() noexcept +{ +#if defined(__x86_64__) + // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf + return haveOSXSAVE() // implies haveXSAVE() + && ((our_xgetbv(0) >> 17) & 0x3) == 0x3; // AMX state are enabled by OS +#else + return false; +#endif +} + +inline bool haveAMXBF16() noexcept +{ + return haveAMX() + && ((CPUInfo(0x7, 0).registers.edx >> 22) & 1u); // AMX-BF16 bit +} + +inline bool haveAMXTILE() noexcept +{ + return haveAMX() + && ((CPUInfo(0x7, 0).registers.edx >> 24) & 1u); // AMX-TILE bit +} + +inline bool haveAMXINT8() noexcept +{ + return haveAMX() + && ((CPUInfo(0x7, 0).registers.edx >> 25) & 1u); // AMX-INT8 bit +} + #define CPU_ID_ENUMERATE(OP) \ OP(SSE) \ OP(SSE2) \ @@ -98,253 +341,6 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT OP(AMXTILE) \ OP(AMXINT8) -union CPUInfo -{ - UInt32 info[4]; - - struct Registers - { - UInt32 eax; - UInt32 ebx; - UInt32 ecx; - UInt32 edx; - } registers; - - inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); } - - inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); } -}; - -#define DEF_NAME(X) inline bool have##X() noexcept; - CPU_ID_ENUMERATE(DEF_NAME) -#undef DEF_NAME - -bool haveRDTSCP() noexcept -{ - return (CPUInfo(0x80000001).registers.edx >> 27) & 1u; -} - -bool haveSSE() noexcept -{ - return (CPUInfo(0x1).registers.edx >> 25) & 1u; -} - -bool haveSSE2() noexcept -{ - return (CPUInfo(0x1).registers.edx >> 26) & 1u; -} - -bool haveSSE3() noexcept -{ - return CPUInfo(0x1).registers.ecx & 1u; -} - -bool havePCLMUL() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 1) & 1u; -} - -bool haveSSSE3() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 9) & 1u; -} - -bool haveSSE41() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 19) & 1u; -} - -bool haveSSE42() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 20) & 1u; -} - -bool haveF16C() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 29) & 1u; -} - -bool havePOPCNT() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 23) & 1u; -} - -bool haveAES() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 25) & 1u; -} - -bool haveXSAVE() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 26) & 1u; -} - -bool haveOSXSAVE() noexcept -{ - return (CPUInfo(0x1).registers.ecx >> 27) & 1u; -} - -bool haveAVX() noexcept -{ -#if defined(__x86_64__) - // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf - // https://bugs.chromium.org/p/chromium/issues/detail?id=375968 - return haveOSXSAVE() // implies haveXSAVE() - && (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS - && ((CPUInfo(0x1).registers.ecx >> 28) & 1u); // AVX bit -#else - return false; -#endif -} - -bool haveFMA() noexcept -{ - return haveAVX() && ((CPUInfo(0x1).registers.ecx >> 12) & 1u); -} - -bool haveAVX2() noexcept -{ - return haveAVX() && ((CPUInfo(0x7, 0).registers.ebx >> 5) & 1u); -} - -bool haveBMI1() noexcept -{ - return (CPUInfo(0x7, 0).registers.ebx >> 3) & 1u; -} - -bool haveBMI2() noexcept -{ - return (CPUInfo(0x7, 0).registers.ebx >> 8) & 1u; -} - -bool haveAVX512F() noexcept -{ -#if defined(__x86_64__) - // https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support - return haveOSXSAVE() // implies haveXSAVE() - && (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS - && ((our_xgetbv(0) >> 5) & 7u) == 7u // ZMM state is enabled by OS - && CPUInfo(0x0).registers.eax >= 0x7 // leaf 7 is present - && ((CPUInfo(0x7, 0).registers.ebx >> 16) & 1u); // AVX512F bit -#else - return false; -#endif -} - -bool haveAVX512DQ() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 17) & 1u); -} - -bool haveRDSEED() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 18) & 1u); -} - -bool haveADX() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 19) & 1u); -} - -bool haveAVX512IFMA() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 21) & 1u); -} - -bool havePCOMMIT() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 22) & 1u); -} - -bool haveCLFLUSHOPT() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 23) & 1u); -} - -bool haveCLWB() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 24) & 1u); -} - -bool haveAVX512PF() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 26) & 1u); -} - -bool haveAVX512ER() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 27) & 1u); -} - -bool haveAVX512CD() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 28) & 1u); -} - -bool haveSHA() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 29) & 1u); -} - -bool haveAVX512BW() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 30) & 1u); -} - -bool haveAVX512VL() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 31) & 1u); -} - -bool havePREFETCHWT1() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ecx >> 0) & 1u); -} - -bool haveAVX512VBMI() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 1) & 1u); -} - -bool haveAVX512VBMI2() noexcept -{ - return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 6) & 1u); -} - -bool haveRDRAND() noexcept -{ - return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x1).registers.ecx >> 30) & 1u); -} - -inline bool haveAMX() noexcept -{ -#if defined(__x86_64__) - // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf - return haveOSXSAVE() // implies haveXSAVE() - && ((our_xgetbv(0) >> 17) & 0x3) == 0x3; // AMX state are enabled by OS -#else - return false; -#endif -} - -bool haveAMXBF16() noexcept -{ - return haveAMX() - && ((CPUInfo(0x7, 0).registers.edx >> 22) & 1u); // AMX-BF16 bit -} - -bool haveAMXTILE() noexcept -{ - return haveAMX() - && ((CPUInfo(0x7, 0).registers.edx >> 24) & 1u); // AMX-TILE bit -} - -bool haveAMXINT8() noexcept -{ - return haveAMX() - && ((CPUInfo(0x7, 0).registers.edx >> 25) & 1u); // AMX-INT8 bit -} - struct CPUFlagsCache { #define DEF_NAME(X) static inline bool have_##X = have##X();