Mini cleanup of CPUID.h

This commit is contained in:
Robert Schulze 2024-02-19 19:33:14 +00:00
parent df48106cd5
commit f2091ac6cf
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A

View File

@ -57,6 +57,249 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT
#endif
}
union CPUInfo
{
UInt32 info[4];
struct Registers
{
UInt32 eax;
UInt32 ebx;
UInt32 ecx;
UInt32 edx;
} registers;
inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); }
inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); }
};
inline bool haveRDTSCP() noexcept
{
return (CPUInfo(0x80000001).registers.edx >> 27) & 1u;
}
inline bool haveSSE() noexcept
{
return (CPUInfo(0x1).registers.edx >> 25) & 1u;
}
inline bool haveSSE2() noexcept
{
return (CPUInfo(0x1).registers.edx >> 26) & 1u;
}
inline bool haveSSE3() noexcept
{
return CPUInfo(0x1).registers.ecx & 1u;
}
inline bool havePCLMUL() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 1) & 1u;
}
inline bool haveSSSE3() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 9) & 1u;
}
inline bool haveSSE41() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 19) & 1u;
}
inline bool haveSSE42() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 20) & 1u;
}
inline bool haveF16C() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 29) & 1u;
}
inline bool havePOPCNT() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 23) & 1u;
}
inline bool haveAES() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 25) & 1u;
}
inline bool haveXSAVE() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 26) & 1u;
}
inline bool haveOSXSAVE() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 27) & 1u;
}
inline bool haveAVX() noexcept
{
#if defined(__x86_64__)
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
// https://bugs.chromium.org/p/chromium/issues/detail?id=375968
return haveOSXSAVE() // implies haveXSAVE()
&& (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS
&& ((CPUInfo(0x1).registers.ecx >> 28) & 1u); // AVX bit
#else
return false;
#endif
}
inline bool haveFMA() noexcept
{
return haveAVX() && ((CPUInfo(0x1).registers.ecx >> 12) & 1u);
}
inline bool haveAVX2() noexcept
{
return haveAVX() && ((CPUInfo(0x7, 0).registers.ebx >> 5) & 1u);
}
inline bool haveBMI1() noexcept
{
return (CPUInfo(0x7, 0).registers.ebx >> 3) & 1u;
}
inline bool haveBMI2() noexcept
{
return (CPUInfo(0x7, 0).registers.ebx >> 8) & 1u;
}
inline bool haveAVX512F() noexcept
{
#if defined(__x86_64__)
// https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support
return haveOSXSAVE() // implies haveXSAVE()
&& (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS
&& ((our_xgetbv(0) >> 5) & 7u) == 7u // ZMM state is enabled by OS
&& CPUInfo(0x0).registers.eax >= 0x7 // leaf 7 is present
&& ((CPUInfo(0x7, 0).registers.ebx >> 16) & 1u); // AVX512F bit
#else
return false;
#endif
}
inline bool haveAVX512DQ() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 17) & 1u);
}
inline bool haveRDSEED() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 18) & 1u);
}
inline bool haveADX() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 19) & 1u);
}
inline bool haveAVX512IFMA() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 21) & 1u);
}
inline bool havePCOMMIT() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 22) & 1u);
}
inline bool haveCLFLUSHOPT() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 23) & 1u);
}
inline bool haveCLWB() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 24) & 1u);
}
inline bool haveAVX512PF() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 26) & 1u);
}
inline bool haveAVX512ER() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 27) & 1u);
}
inline bool haveAVX512CD() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 28) & 1u);
}
inline bool haveSHA() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 29) & 1u);
}
inline bool haveAVX512BW() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 30) & 1u);
}
inline bool haveAVX512VL() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 31) & 1u);
}
inline bool havePREFETCHWT1() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ecx >> 0) & 1u);
}
inline bool haveAVX512VBMI() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 1) & 1u);
}
inline bool haveAVX512VBMI2() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 6) & 1u);
}
inline bool haveRDRAND() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x1).registers.ecx >> 30) & 1u);
}
inline bool haveAMX() noexcept
{
#if defined(__x86_64__)
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
return haveOSXSAVE() // implies haveXSAVE()
&& ((our_xgetbv(0) >> 17) & 0x3) == 0x3; // AMX state are enabled by OS
#else
return false;
#endif
}
inline bool haveAMXBF16() noexcept
{
return haveAMX()
&& ((CPUInfo(0x7, 0).registers.edx >> 22) & 1u); // AMX-BF16 bit
}
inline bool haveAMXTILE() noexcept
{
return haveAMX()
&& ((CPUInfo(0x7, 0).registers.edx >> 24) & 1u); // AMX-TILE bit
}
inline bool haveAMXINT8() noexcept
{
return haveAMX()
&& ((CPUInfo(0x7, 0).registers.edx >> 25) & 1u); // AMX-INT8 bit
}
#define CPU_ID_ENUMERATE(OP) \
OP(SSE) \
OP(SSE2) \
@ -98,253 +341,6 @@ inline bool cpuid(UInt32 op, UInt32 * res) noexcept /// NOLINT
OP(AMXTILE) \
OP(AMXINT8)
union CPUInfo
{
UInt32 info[4];
struct Registers
{
UInt32 eax;
UInt32 ebx;
UInt32 ecx;
UInt32 edx;
} registers;
inline explicit CPUInfo(UInt32 op) noexcept { cpuid(op, info); }
inline CPUInfo(UInt32 op, UInt32 sub_op) noexcept { cpuid(op, sub_op, info); }
};
#define DEF_NAME(X) inline bool have##X() noexcept;
CPU_ID_ENUMERATE(DEF_NAME)
#undef DEF_NAME
bool haveRDTSCP() noexcept
{
return (CPUInfo(0x80000001).registers.edx >> 27) & 1u;
}
bool haveSSE() noexcept
{
return (CPUInfo(0x1).registers.edx >> 25) & 1u;
}
bool haveSSE2() noexcept
{
return (CPUInfo(0x1).registers.edx >> 26) & 1u;
}
bool haveSSE3() noexcept
{
return CPUInfo(0x1).registers.ecx & 1u;
}
bool havePCLMUL() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 1) & 1u;
}
bool haveSSSE3() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 9) & 1u;
}
bool haveSSE41() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 19) & 1u;
}
bool haveSSE42() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 20) & 1u;
}
bool haveF16C() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 29) & 1u;
}
bool havePOPCNT() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 23) & 1u;
}
bool haveAES() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 25) & 1u;
}
bool haveXSAVE() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 26) & 1u;
}
bool haveOSXSAVE() noexcept
{
return (CPUInfo(0x1).registers.ecx >> 27) & 1u;
}
bool haveAVX() noexcept
{
#if defined(__x86_64__)
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
// https://bugs.chromium.org/p/chromium/issues/detail?id=375968
return haveOSXSAVE() // implies haveXSAVE()
&& (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS
&& ((CPUInfo(0x1).registers.ecx >> 28) & 1u); // AVX bit
#else
return false;
#endif
}
bool haveFMA() noexcept
{
return haveAVX() && ((CPUInfo(0x1).registers.ecx >> 12) & 1u);
}
bool haveAVX2() noexcept
{
return haveAVX() && ((CPUInfo(0x7, 0).registers.ebx >> 5) & 1u);
}
bool haveBMI1() noexcept
{
return (CPUInfo(0x7, 0).registers.ebx >> 3) & 1u;
}
bool haveBMI2() noexcept
{
return (CPUInfo(0x7, 0).registers.ebx >> 8) & 1u;
}
bool haveAVX512F() noexcept
{
#if defined(__x86_64__)
// https://software.intel.com/en-us/articles/how-to-detect-knl-instruction-support
return haveOSXSAVE() // implies haveXSAVE()
&& (our_xgetbv(0) & 6u) == 6u // XMM state and YMM state are enabled by OS
&& ((our_xgetbv(0) >> 5) & 7u) == 7u // ZMM state is enabled by OS
&& CPUInfo(0x0).registers.eax >= 0x7 // leaf 7 is present
&& ((CPUInfo(0x7, 0).registers.ebx >> 16) & 1u); // AVX512F bit
#else
return false;
#endif
}
bool haveAVX512DQ() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 17) & 1u);
}
bool haveRDSEED() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 18) & 1u);
}
bool haveADX() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 19) & 1u);
}
bool haveAVX512IFMA() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 21) & 1u);
}
bool havePCOMMIT() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 22) & 1u);
}
bool haveCLFLUSHOPT() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 23) & 1u);
}
bool haveCLWB() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 24) & 1u);
}
bool haveAVX512PF() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 26) & 1u);
}
bool haveAVX512ER() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 27) & 1u);
}
bool haveAVX512CD() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 28) & 1u);
}
bool haveSHA() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ebx >> 29) & 1u);
}
bool haveAVX512BW() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 30) & 1u);
}
bool haveAVX512VL() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ebx >> 31) & 1u);
}
bool havePREFETCHWT1() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x7, 0).registers.ecx >> 0) & 1u);
}
bool haveAVX512VBMI() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 1) & 1u);
}
bool haveAVX512VBMI2() noexcept
{
return haveAVX512F() && ((CPUInfo(0x7, 0).registers.ecx >> 6) & 1u);
}
bool haveRDRAND() noexcept
{
return CPUInfo(0x0).registers.eax >= 0x7 && ((CPUInfo(0x1).registers.ecx >> 30) & 1u);
}
inline bool haveAMX() noexcept
{
#if defined(__x86_64__)
// http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf
return haveOSXSAVE() // implies haveXSAVE()
&& ((our_xgetbv(0) >> 17) & 0x3) == 0x3; // AMX state are enabled by OS
#else
return false;
#endif
}
bool haveAMXBF16() noexcept
{
return haveAMX()
&& ((CPUInfo(0x7, 0).registers.edx >> 22) & 1u); // AMX-BF16 bit
}
bool haveAMXTILE() noexcept
{
return haveAMX()
&& ((CPUInfo(0x7, 0).registers.edx >> 24) & 1u); // AMX-TILE bit
}
bool haveAMXINT8() noexcept
{
return haveAMX()
&& ((CPUInfo(0x7, 0).registers.edx >> 25) & 1u); // AMX-INT8 bit
}
struct CPUFlagsCache
{
#define DEF_NAME(X) static inline bool have_##X = have##X();