/* * Copyright 2008 Veselin Georgiev, * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef __LIBCPUID_H__ #define __LIBCPUID_H__ /** * @File libcpuid.h * @Author Veselin Georgiev * @Date Oct 2008 * @Version 0.2.1 * * Version history: * * 0.1.0 (2008-10-15): initial adaptation from wxfractgui sources * 0.1.1 (2009-07-06): Added intel_fn11 fields to cpu_raw_data_t to handle * new processor topology enumeration required on Core i7 * 0.1.2 (2009-09-26): Added support for MSR reading through self-extracting * kernel driver on Win32. * 0.1.3 (2010-04-20): Added support for greater more accurate CPU clock * measurements with cpu_clock_by_ic() * 0.2.0 (2011-10-11): Support for AMD Bulldozer CPUs, 128-bit SSE unit size * checking. A backwards-incompatible change, since the * sizeof cpu_id_t is now different. * 0.2.1 (2012-05-26): Support for Ivy Bridge, and detecting the presence of * the RdRand instruction. */ /** @mainpage A simple libcpuid introduction * * LibCPUID provides CPU identification and access to the CPUID and RDTSC * instructions on the x86. *

* To execute CPUID, use \ref cpu_exec_cpuid
* To execute RDTSC, use \ref cpu_rdtsc
* To fetch the CPUID info needed for CPU identification, use * \ref cpuid_get_raw_data
* To make sense of that data (decode, extract features), use \ref cpu_identify
*

*/ /** @defgroup libcpuid LibCPUID @{ */ /* Include some integer type specifications: */ #include "libcpuid_types.h" /* Some limits and other constants */ #include "libcpuid_constants.h" #ifdef __cplusplus extern "C" { #endif /** * @brief CPU vendor, as guessed from the Vendor String. */ typedef enum { VENDOR_INTEL = 0, /*!< Intel CPU */ VENDOR_AMD, /*!< AMD CPU */ VENDOR_CYRIX, /*!< Cyrix CPU */ VENDOR_NEXGEN, /*!< NexGen CPU */ VENDOR_TRANSMETA, /*!< Transmeta CPU */ VENDOR_UMC, /*!< x86 CPU by UMC */ VENDOR_CENTAUR, /*!< x86 CPU by IDT */ VENDOR_RISE, /*!< x86 CPU by Rise Technology */ VENDOR_SIS, /*!< x86 CPU by SiS */ VENDOR_NSC, /*!< x86 CPU by National Semiconductor */ NUM_CPU_VENDORS, /*!< Valid CPU vendor ids: 0..NUM_CPU_VENDORS - 1 */ VENDOR_UNKNOWN = -1, } cpu_vendor_t; #define NUM_CPU_VENDORS NUM_CPU_VENDORS /** * @brief Contains just the raw CPUID data. * * This contains only the most basic CPU data, required to do identification * and feature recognition. Every processor should be identifiable using this * data only. */ struct cpu_raw_data_t { /** contains results of CPUID for eax = 0, 1, ...*/ uint32_t basic_cpuid[MAX_CPUID_LEVEL][4]; /** contains results of CPUID for eax = 0x80000000, 0x80000001, ...*/ uint32_t ext_cpuid[MAX_EXT_CPUID_LEVEL][4]; /** when the CPU is intel and it supports deterministic cache information: this contains the results of CPUID for eax = 4 and ecx = 0, 1, ... */ uint32_t intel_fn4[MAX_INTELFN4_LEVEL][4]; /** when the CPU is intel and it supports leaf 0Bh (Extended Topology enumeration leaf), this stores the result of CPUID with eax = 11 and ecx = 0, 1, 2... */ uint32_t intel_fn11[MAX_INTELFN11_LEVEL][4]; }; /** * @brief This contains the recognized CPU features/info */ struct cpu_id_t { /** contains the CPU vendor string, e.g. "GenuineIntel" */ char vendor_str[VENDOR_STR_MAX]; /** contains the brand string, e.g. "Intel(R) Xeon(TM) CPU 2.40GHz" */ char brand_str[BRAND_STR_MAX]; /** contains the recognized CPU vendor */ cpu_vendor_t vendor; /** * contain CPU flags. Used to test for features. See * the CPU_FEATURE_* macros below. @see Features */ uint8_t flags[CPU_FLAGS_MAX]; /** CPU family */ int32_t family; /** CPU model */ int32_t model; /** CPU stepping */ int32_t stepping; /** CPU extended family */ int32_t ext_family; /** CPU extended model */ int32_t ext_model; /** Number of CPU cores on the current processor */ int32_t num_cores; /** * Number of logical processors on the current processor. * Could be more than the number of physical cores, * e.g. when the processor has HyperThreading. */ int32_t num_logical_cpus; /** * The total number of logical processors. * * This is num_logical_cpus * {total physical processors in the system} * * If you're writing a multithreaded program and you want to run it on * all CPUs, this is the number of threads you need. */ int32_t total_logical_cpus; /** * L1 data cache size in KB. Could be zero, if the CPU lacks cache. * If the size cannot be determined, it will be -1. */ int32_t l1_data_cache; /** * L1 instruction cache size in KB. Could be zero, if the CPU lacks * cache. If the size cannot be determined, it will be -1. * @note On some Intel CPUs, whose instruction cache is in fact * a trace cache, the size will be expressed in K uOps. */ int32_t l1_instruction_cache; /** * L2 cache size in KB. Could be zero, if the CPU lacks L2 cache. * If the size of the cache could not be determined, it will be -1 */ int32_t l2_cache; /** L3 cache size in KB. Zero on most systems */ int32_t l3_cache; /** Cache associativity for the L1 data cache. -1 if undetermined */ int32_t l1_assoc; /** Cache associativity for the L2 cache. -1 if undetermined */ int32_t l2_assoc; /** Cache associativity for the L3 cache. -1 if undetermined */ int32_t l3_assoc; /** Cache-line size for L1 data cache. -1 if undetermined */ int32_t l1_cacheline; /** Cache-line size for L2 cache. -1 if undetermined */ int32_t l2_cacheline; /** Cache-line size for L3 cache. -1 if undetermined */ int32_t l3_cacheline; /** * The brief and human-friendly CPU codename, which was recognized.
* Examples: * @code * +--------+--------+-------+-------+-------+---------------------------------------+-----------------------+ * | Vendor | Family | Model | Step. | Cache | Brand String | cpu_id_t.cpu_codename | * +--------+--------+-------+-------+-------+---------------------------------------+-----------------------+ * | AMD | 6 | 8 | 0 | 256 | (not available - will be ignored) | "K6-2" | * | Intel | 15 | 2 | 5 | 512 | "Intel(R) Xeon(TM) CPU 2.40GHz" | "Xeon (Prestonia)" | * | Intel | 6 | 15 | 11 | 4096 | "Intel(R) Core(TM)2 Duo CPU E6550..." | "Conroe (Core 2 Duo)" | * | AMD | 15 | 35 | 2 | 1024 | "Dual Core AMD Opteron(tm) Proces..." | "Opteron (Dual Core)" | * +--------+--------+-------+-------+-------+---------------------------------------+-----------------------+ * @endcode */ char cpu_codename[64]; /** SSE execution unit size (64 or 128; -1 if N/A) */ int32_t sse_size; /** * contain miscellaneous detection information. Used to test about specifics of * certain detected features. See CPU_HINT_* macros below. @see Hints */ uint8_t detection_hints[CPU_HINTS_MAX]; }; /** * @brief CPU feature identifiers * * Usage: * @code * ... * struct cpu_raw_data_t raw; * struct cpu_id_t id; * if (cpuid_get_raw_data(&raw) == 0 && cpu_identify(&raw, &id) == 0) { * if (id.flags[CPU_FEATURE_SSE2]) { * // The CPU has SSE2... * ... * } else { * // no SSE2 * } * } else { * // processor cannot be determined. * } * @endcode */ typedef enum { CPU_FEATURE_FPU = 0, /*!< Floating point unit */ CPU_FEATURE_VME, /*!< Virtual mode extension */ CPU_FEATURE_DE, /*!< Debugging extension */ CPU_FEATURE_PSE, /*!< Page size extension */ CPU_FEATURE_TSC, /*!< Time-stamp counter */ CPU_FEATURE_MSR, /*!< Model-specific regsisters, RDMSR/WRMSR supported */ CPU_FEATURE_PAE, /*!< Physical address extension */ CPU_FEATURE_MCE, /*!< Machine check exception */ CPU_FEATURE_CX8, /*!< CMPXCHG8B instruction supported */ CPU_FEATURE_APIC, /*!< APIC support */ CPU_FEATURE_MTRR, /*!< Memory type range registers */ CPU_FEATURE_SEP, /*!< SYSENTER / SYSEXIT instructions supported */ CPU_FEATURE_PGE, /*!< Page global enable */ CPU_FEATURE_MCA, /*!< Machine check architecture */ CPU_FEATURE_CMOV, /*!< CMOVxx instructions supported */ CPU_FEATURE_PAT, /*!< Page attribute table */ CPU_FEATURE_PSE36, /*!< 36-bit page address extension */ CPU_FEATURE_PN, /*!< Processor serial # implemented (Intel P3 only) */ CPU_FEATURE_CLFLUSH, /*!< CLFLUSH instruction supported */ CPU_FEATURE_DTS, /*!< Debug store supported */ CPU_FEATURE_ACPI, /*!< ACPI support (power states) */ CPU_FEATURE_MMX, /*!< MMX instruction set supported */ CPU_FEATURE_FXSR, /*!< FXSAVE / FXRSTOR supported */ CPU_FEATURE_SSE, /*!< Streaming-SIMD Extensions (SSE) supported */ CPU_FEATURE_SSE2, /*!< SSE2 instructions supported */ CPU_FEATURE_SS, /*!< Self-snoop */ CPU_FEATURE_HT, /*!< Hyper-threading supported (but might be disabled) */ CPU_FEATURE_TM, /*!< Thermal monitor */ CPU_FEATURE_IA64, /*!< IA64 supported (Itanium only) */ CPU_FEATURE_PBE, /*!< Pending-break enable */ CPU_FEATURE_PNI, /*!< PNI (SSE3) instructions supported */ CPU_FEATURE_PCLMUL, /*!< PCLMULQDQ instruction supported */ CPU_FEATURE_DTS64, /*!< 64-bit Debug store supported */ CPU_FEATURE_MONITOR, /*!< MONITOR / MWAIT supported */ CPU_FEATURE_DS_CPL, /*!< CPL Qualified Debug Store */ CPU_FEATURE_VMX, /*!< Virtualization technology supported */ CPU_FEATURE_SMX, /*!< Safer mode exceptions */ CPU_FEATURE_EST, /*!< Enhanced SpeedStep */ CPU_FEATURE_TM2, /*!< Thermal monitor 2 */ CPU_FEATURE_SSSE3, /*!< SSSE3 instructionss supported (this is different from SSE3!) */ CPU_FEATURE_CID, /*!< Context ID supported */ CPU_FEATURE_CX16, /*!< CMPXCHG16B instruction supported */ CPU_FEATURE_XTPR, /*!< Send Task Priority Messages disable */ CPU_FEATURE_PDCM, /*!< Performance capabilities MSR supported */ CPU_FEATURE_DCA, /*!< Direct cache access supported */ CPU_FEATURE_SSE4_1, /*!< SSE 4.1 instructions supported */ CPU_FEATURE_SSE4_2, /*!< SSE 4.2 instructions supported */ CPU_FEATURE_SYSCALL, /*!< SYSCALL / SYSRET instructions supported */ CPU_FEATURE_XD, /*!< Execute disable bit supported */ CPU_FEATURE_MOVBE, /*!< MOVBE instruction supported */ CPU_FEATURE_POPCNT, /*!< POPCNT instruction supported */ CPU_FEATURE_AES, /*!< AES* instructions supported */ CPU_FEATURE_XSAVE, /*!< XSAVE/XRSTOR/etc instructions supported */ CPU_FEATURE_OSXSAVE, /*!< non-privileged copy of OSXSAVE supported */ CPU_FEATURE_AVX, /*!< Advanced vector extensions supported */ CPU_FEATURE_MMXEXT, /*!< AMD MMX-extended instructions supported */ CPU_FEATURE_3DNOW, /*!< AMD 3DNow! instructions supported */ CPU_FEATURE_3DNOWEXT, /*!< AMD 3DNow! extended instructions supported */ CPU_FEATURE_NX, /*!< No-execute bit supported */ CPU_FEATURE_FXSR_OPT, /*!< FFXSR: FXSAVE and FXRSTOR optimizations */ CPU_FEATURE_RDTSCP, /*!< RDTSCP instruction supported (AMD-only) */ CPU_FEATURE_LM, /*!< Long mode (x86_64/EM64T) supported */ CPU_FEATURE_LAHF_LM, /*!< LAHF/SAHF supported in 64-bit mode */ CPU_FEATURE_CMP_LEGACY, /*!< core multi-processing legacy mode */ CPU_FEATURE_SVM, /*!< AMD Secure virtual machine */ CPU_FEATURE_ABM, /*!< LZCNT instruction support */ CPU_FEATURE_MISALIGNSSE,/*!< Misaligned SSE supported */ CPU_FEATURE_SSE4A, /*!< SSE 4a from AMD */ CPU_FEATURE_3DNOWPREFETCH, /*!< PREFETCH/PREFETCHW support */ CPU_FEATURE_OSVW, /*!< OS Visible Workaround (AMD) */ CPU_FEATURE_IBS, /*!< Instruction-based sampling */ CPU_FEATURE_SSE5, /*!< SSE 5 instructions supported (deprecated, will never be 1) */ CPU_FEATURE_SKINIT, /*!< SKINIT / STGI supported */ CPU_FEATURE_WDT, /*!< Watchdog timer support */ CPU_FEATURE_TS, /*!< Temperature sensor */ CPU_FEATURE_FID, /*!< Frequency ID control */ CPU_FEATURE_VID, /*!< Voltage ID control */ CPU_FEATURE_TTP, /*!< THERMTRIP */ CPU_FEATURE_TM_AMD, /*!< AMD-specified hardware thermal control */ CPU_FEATURE_STC, /*!< Software thermal control */ CPU_FEATURE_100MHZSTEPS,/*!< 100 MHz multiplier control */ CPU_FEATURE_HWPSTATE, /*!< Hardware P-state control */ CPU_FEATURE_CONSTANT_TSC, /*!< TSC ticks at constant rate */ CPU_FEATURE_XOP, /*!< The XOP instruction set (same as the old CPU_FEATURE_SSE5) */ CPU_FEATURE_FMA3, /*!< The FMA3 instruction set */ CPU_FEATURE_FMA4, /*!< The FMA4 instruction set */ CPU_FEATURE_TBM, /*!< Trailing bit manipulation instruction support */ CPU_FEATURE_F16C, /*!< 16-bit FP convert instruction support */ CPU_FEATURE_RDRAND, /*!< RdRand instruction */ CPU_FEATURE_X2APIC, /*!< x2APIC, APIC_BASE.EXTD, MSRs 0000_0800h...0000_0BFFh 64-bit ICR (+030h but not +031h), no DFR (+00Eh), SELF_IPI (+040h) also see standard level 0000_000Bh */ CPU_FEATURE_CPB, /*!< Core performance boost */ CPU_FEATURE_APERFMPERF, /*!< MPERF/APERF MSRs support */ CPU_FEATURE_PFI, /*!< Processor Feedback Interface support */ CPU_FEATURE_PA, /*!< Processor accumulator */ CPU_FEATURE_AVX2, /*!< AVX2 instructions */ /* termination: */ NUM_CPU_FEATURES, } cpu_feature_t; /** * @brief CPU detection hints identifiers * * Usage: similar to the flags usage */ typedef enum { CPU_HINT_SSE_SIZE_AUTH = 0, /*!< SSE unit size is authoritative (not only a Family/Model guesswork, but based on an actual CPUID bit) */ /* termination */ NUM_CPU_HINTS, } cpu_hint_t; /** * @brief Describes common library error codes */ typedef enum { ERR_OK = 0, /*!< "No error" */ ERR_NO_CPUID = -1, /*!< "CPUID instruction is not supported" */ ERR_NO_RDTSC = -2, /*!< "RDTSC instruction is not supported" */ ERR_NO_MEM = -3, /*!< "Memory allocation failed" */ ERR_OPEN = -4, /*!< "File open operation failed" */ ERR_BADFMT = -5, /*!< "Bad file format" */ ERR_NOT_IMP = -6, /*!< "Not implemented" */ ERR_CPU_UNKN = -7, /*!< "Unsupported processor" */ ERR_NO_RDMSR = -8, /*!< "RDMSR instruction is not supported" */ ERR_NO_DRIVER= -9, /*!< "RDMSR driver error (generic)" */ ERR_NO_PERMS = -10, /*!< "No permissions to install RDMSR driver" */ ERR_EXTRACT = -11, /*!< "Cannot extract RDMSR driver (read only media?)" */ ERR_HANDLE = -12, /*!< "Bad handle" */ ERR_INVMSR = -13, /*!< "Invalid MSR" */ } cpu_error_t; /** * @brief Internal structure, used in cpu_tsc_mark, cpu_tsc_unmark and * cpu_clock_by_mark */ struct cpu_mark_t { uint64_t tsc; /*!< Time-stamp from RDTSC */ uint64_t sys_clock; /*!< In microsecond resolution */ }; /** * @brief Returns the total number of CPUs even if CPUID is not present * @retval Number of CPUs available */ int cpuid_get_total_cpus(void); /** * @brief Checks if the CPUID instruction is supported * @retval 1 if CPUID is present * @retval 0 the CPU doesn't have CPUID. */ int cpuid_present(void); /** * @brief Executes the CPUID instruction * @param eax - the value of the EAX register when executing CPUID * @param regs - the results will be stored here. regs[0] = EAX, regs[1] = EBX, ... * @note CPUID will be executed with EAX set to the given value and EBX, ECX, * EDX set to zero. */ void cpu_exec_cpuid(uint32_t eax, uint32_t* regs); /** * @brief Executes the CPUID instruction with the given input registers * @note This is just a bit more generic version of cpu_exec_cpuid - it allows * you to control all the registers. * @param regs - Input/output. Prior to executing CPUID, EAX, EBX, ECX and * EDX will be set to regs[0], regs[1], regs[2] and regs[3]. * After CPUID, this array will contain the results. */ void cpu_exec_cpuid_ext(uint32_t* regs); /** * @brief Obtains the raw CPUID data from the current CPU * @param data - a pointer to cpu_raw_data_t structure * @returns zero if successful, and some negative number on error. * The error message can be obtained by calling \ref cpuid_error. * @see cpu_error_t */ int cpuid_get_raw_data(struct cpu_raw_data_t* data); /** * @brief Writes the raw CPUID data to a text file * @param data - a pointer to cpu_raw_data_t structure * @param filename - the path of the file, where the serialized data should be * written. If empty, stdout will be used. * @note This is intended primarily for debugging. On some processor, which is * not currently supported or not completely recognized by cpu_identify, * one can still successfully get the raw data and write it to a file. * libcpuid developers can later import this file and debug the detection * code as if running on the actual hardware. * The file is simple text format of "something=value" pairs. Version info * is also written, but the format is not intended to be neither backward- * nor forward compatible. * @returns zero if successful, and some negative number on error. * The error message can be obtained by calling \ref cpuid_error. * @see cpu_error_t */ int cpuid_serialize_raw_data(struct cpu_raw_data_t* data, const char* filename); /** * @brief Reads raw CPUID data from file * @param data - a pointer to cpu_raw_data_t structure. The deserialized data will * be written here. * @param filename - the path of the file, containing the serialized raw data. * If empty, stdin will be used. * @note This function may fail, if the file is created by different version of * the library. Also, see the notes on cpuid_serialize_raw_data. * @returns zero if successful, and some negative number on error. * The error message can be obtained by calling \ref cpuid_error. * @see cpu_error_t */ int cpuid_deserialize_raw_data(struct cpu_raw_data_t* data, const char* filename); /** * @brief Identifies the CPU * @param raw - Input - a pointer to the raw CPUID data, which is obtained * either by cpuid_get_raw_data or cpuid_deserialize_raw_data. * Can also be NULL, in which case the functions calls * cpuid_get_raw_data itself. * @param data - Output - the decoded CPU features/info is written here. * @note The function will not fail, even if some of the information * cannot be obtained. Even when the CPU is new and thus unknown to * libcpuid, some generic info, such as "AMD K9 family CPU" will be * written to data.cpu_codename, and most other things, such as the * CPU flags, cache sizes, etc. should be detected correctly anyway. * However, the function CAN fail, if the CPU is completely alien to * libcpuid. * @note While cpu_identify() and cpuid_get_raw_data() are fast for most * purposes, running them several thousand times per second can hamper * performance significantly. Specifically, avoid writing "cpu feature * checker" wrapping function, which calls cpu_identify and returns the * value of some flag, if that function is going to be called frequently. * @returns zero if successful, and some negative number on error. * The error message can be obtained by calling \ref cpuid_error. * @see cpu_error_t */ int cpu_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data); /** * @brief Returns the short textual representation of a CPU flag * @param feature - the feature, whose textual representation is wanted. * @returns a constant string like "fpu", "tsc", "sse2", etc. * @note the names of the returned flags are compatible with those from * /proc/cpuinfo in Linux, with the exception of `tm_amd' */ const char* cpu_feature_str(cpu_feature_t feature); /** * @brief Returns textual description of the last error * * libcpuid stores an `errno'-style error status, whose description * can be obtained with this function. * @note This function is not thread-safe * @see cpu_error_t */ const char* cpuid_error(void); /** * @brief Executes RDTSC * * The RDTSC (ReaD Time Stamp Counter) instruction gives access to an * internal 64-bit counter, which usually increments at each clock cycle. * This can be used for various timing routines, and as a very precise * clock source. It is set to zero on system startup. Beware that may not * increment at the same frequency as the CPU. Consecutive calls of RDTSC * are, however, guaranteed to return monotonically-increasing values. * * @param result - a pointer to a 64-bit unsigned integer, where the TSC value * will be stored * * @note If 100% compatibility is a concern, you must first check if the * RDTSC instruction is present (if it is not, your program will crash * with "invalid opcode" exception). Only some very old processors (i486, * early AMD K5 and some Cyrix CPUs) lack that instruction - they should * have become exceedingly rare these days. To verify RDTSC presence, * run cpu_identify() and check flags[CPU_FEATURE_TSC]. * * @note The monotonically increasing nature of the TSC may be violated * on SMP systems, if their TSC clocks run at different rate. If the OS * doesn't account for that, the TSC drift may become arbitrary large. */ void cpu_rdtsc(uint64_t* result); /** * @brief Store TSC and timing info * * This function stores the current TSC value and current * time info from a precise OS-specific clock source in the cpu_mark_t * structure. The sys_clock field contains time with microsecond resolution. * The values can later be used to measure time intervals, number of clocks, * FPU frequency, etc. * @see cpu_rdtsc * * @param mark [out] - a pointer to a cpu_mark_t structure */ void cpu_tsc_mark(struct cpu_mark_t* mark); /** * @brief Calculate TSC and timing difference * * @param mark - input/output: a pointer to a cpu_mark_t sturcture, which has * already been initialized by cpu_tsc_mark. The difference in * TSC and time will be written here. * * This function calculates the TSC and time difference, by obtaining the * current TSC and timing values and subtracting the contents of the `mark' * structure from them. Results are written in the same structure. * * Example: * @code * ... * struct cpu_mark_t mark; * cpu_tsc_mark(&mark); * foo(); * cpu_tsc_unmark(&mark); * printf("Foo finished. Executed in %llu cycles and %llu usecs\n", * mark.tsc, mark.sys_clock); * ... * @endcode */ void cpu_tsc_unmark(struct cpu_mark_t* mark); /** * @brief Calculates the CPU clock * * @param mark - pointer to a cpu_mark_t structure, which has been initialized * with cpu_tsc_mark and later `stopped' with cpu_tsc_unmark. * * @note For reliable results, the marked time interval should be at least about * 10 ms. * * @returns the CPU clock frequency, in MHz. Due to measurement error, it will * differ from the true value in a few least-significant bits. Accuracy depends * on the timing interval - the more, the better. If the timing interval is * insufficient, the result is -1. Also, see the comment on cpu_clock_measure * for additional issues and pitfalls in using RDTSC for CPU frequency * measurements. */ int cpu_clock_by_mark(struct cpu_mark_t* mark); /** * @brief Returns the CPU clock, as reported by the OS * * This function uses OS-specific functions to obtain the CPU clock. It may * differ from the true clock for several reasons:

* * i) The CPU might be in some power saving state, while the OS reports its * full-power frequency, or vice-versa.
* ii) In some cases you can raise or lower the CPU frequency with overclocking * utilities and the OS will not notice. * * @returns the CPU clock frequency in MHz. If the OS is not (yet) supported * or lacks the necessary reporting machinery, the return value is -1 */ int cpu_clock_by_os(void); /** * @brief Measure the CPU clock frequency * * @param millis - How much time to waste in the busy-wait cycle. In millisecs. * Useful values 10 - 1000 * @param quad_check - Do a more thorough measurement if nonzero * (see the explanation). * * The function performs a busy-wait cycle for the given time and calculates * the CPU frequency by the difference of the TSC values. The accuracy of the * calculation depends on the length of the busy-wait cycle: more is better, * but 100ms should be enough for most purposes. * * While this will calculate the CPU frequency correctly in most cases, there are * several reasons why it might be incorrect:
* * i) RDTSC doesn't guarantee it will run at the same clock as the CPU. * Apparently there aren't CPUs at the moment, but still, there's no * guarantee.
* ii) The CPU might be in a low-frequency power saving mode, and the CPU * might be switched to higher frequency at any time. If this happens * during the measurement, the result can be anywhere between the * low and high frequencies. Also, if you're interested in the * high frequency value only, this function might return the low one * instead.
* iii) On SMP systems exhibiting TSC drift (see \ref cpu_rdtsc) * * the quad_check option will run four consecutive measurements and * then return the average of the two most-consistent results. The total * runtime of the function will still be `millis' - consider using * a bit more time for the timing interval. * * Finally, for benchmarking / CPU intensive applications, the best strategy is * to use the cpu_tsc_mark() / cpu_tsc_unmark() / cpu_clock_by_mark() method. * Begin by mark()-ing about one second after application startup (allowing the * power-saving manager to kick in and rise the frequency during that time), * then unmark() just before application finishing. The result will most * acurately represent at what frequency your app was running. * * @returns the CPU clock frequency in MHz (within some measurement error * margin). If RDTSC is not supported, the result is -1. */ int cpu_clock_measure(int millis, int quad_check); /** * @brief Measure the CPU clock frequency using instruction-counting * * @param millis - how much time to allocate for each run, in milliseconds * @param runs - how many runs to perform * * The function performs a busy-wait cycle using a known number of "heavy" (SSE) * instructions. These instructions run at (more or less guaranteed) 1 IPC rate, * so by running a busy loop for a fixed amount of time, and measuring the * amount of instructions done, the CPU clock is accurately measured. * * Of course, this function is still affected by the power-saving schemes, so * the warnings as of cpu_clock_measure() still apply. However, this function is * immune to problems with detection, related to the Intel Nehalem's "Turbo" * mode, where the internal clock is raised, but the RDTSC rate is unaffected. * * The function will run for about (millis * runs) milliseconds. * You can make only a single busy-wait run (runs == 1); however, this can * be affected by task scheduling (which will break the counting), so allowing * more than one run is recommended. As run length is not imperative for * accurate readings (e.g., 50ms is sufficient), you can afford a lot of short * runs, e.g. 10 runs of 50ms or 20 runs of 25ms. * * Recommended values - millis = 50, runs = 4. For more robustness, * increase the number of runs. * * NOTE: on Bulldozer and later CPUs, the busy-wait cycle runs at 1.4 IPC, thus * the results are skewed. This is corrected internally by dividing the resulting * value by 1.4. * However, this only occurs if the thread is executed on a single CMT * module - if there are other threads competing for resources, the results are * unpredictable. Make sure you run cpu_clock_by_ic() on a CPU that is free from * competing threads, or if there are such threads, they shouldn't exceed the * number of modules. On a Bulldozer X8, that means 4 threads. * * @returns the CPU clock frequency in MHz (within some measurement error * margin). If SSE is not supported, the result is -1. If the input parameters * are incorrect, or some other internal fault is detected, the result is -2. */ int cpu_clock_by_ic(int millis, int runs); /** * @brief Get the CPU clock frequency (all-in-one method) * * This is an all-in-one method for getting the CPU clock frequency. * It tries to use the OS for that. If the OS doesn't have this info, it * uses cpu_clock_measure with 200ms time interval and quadruple checking. * * @returns the CPU clock frequency in MHz. If every possible method fails, * the result is -1. */ int cpu_clock(void); /** * @brief Returns the libcpuid version * * @returns the string representation of the libcpuid version, like "0.1.1" */ const char* cpuid_lib_version(void); typedef void (*libcpuid_warn_fn_t) (const char *msg); /** * @brief Sets the warning print function * * In some cases, the internal libcpuid machinery would like to emit useful * debug warnings. By default, these warnings are written to stderr. However, * you can set a custom function that will receive those warnings. * * @param warn_fun - the warning function you want to set. If NULL, warnings * are disabled. The function takes const char* argument. * * @returns the current warning function. You can use the return value to * keep the previous warning function and restore it at your discretion. */ libcpuid_warn_fn_t cpuid_set_warn_function(libcpuid_warn_fn_t warn_fun); /** * @brief Sets the verbosiness level * * When the verbosiness level is above zero, some functions might print * diagnostic information about what are they doing. The higher the level is, * the more detail is printed. Level zero is guaranteed to omit all such * output. The output is written using the same machinery as the warnings, * @see cpuid_set_warn_function() * * @param level the desired verbosiness level. Useful values 0..2 inclusive */ void cpuid_set_verbosiness_level(int level); /** * @brief a structure that holds a list of processor names */ struct cpu_list_t { /** Number of entries in the list */ int num_entries; /** Pointers to names. There will be num_entries of them */ char **names; }; /** * @brief Gets a list of all known CPU names from a specific vendor. * * This function compiles a list of all known CPU (code)names * (i.e. the possible values of cpu_id_t::cpu_codename) for the given vendor. * * There are about 100 entries for Intel and AMD, and a few for the other * vendors. The list is written out in approximate chronological introduction * order of the parts. * * @param vendor the vendor to be queried * @param list [out] the resulting list will be written here. * NOTE: As the memory is dynamically allocated, be sure to call * cpuid_free_cpu_list() after you're done with the data * @see cpu_list_t */ void cpuid_get_cpu_list(cpu_vendor_t vendor, struct cpu_list_t* list); /** * @brief Frees a CPU list * * This function deletes all the memory associated with a CPU list, as obtained * by cpuid_get_cpu_list() * * @param list - the list to be free()'d. */ void cpuid_free_cpu_list(struct cpu_list_t* list); /** * @brief Starts/opens a driver, needed to read MSRs (Model Specific Registers) * * On systems that support it, this function will create a temporary * system driver, that has privileges to execute the RDMSR instruction. * After the driver is created, you can read MSRs by calling \ref cpu_rdmsr * * @returns a handle to the driver on success, and NULL on error. * The error message can be obtained by calling \ref cpuid_error. * @see cpu_error_t */ struct msr_driver_t; struct msr_driver_t* cpu_msr_driver_open(void); /** * @brief Reads a Model-Specific Register (MSR) * * If the CPU has MSRs (as indicated by the CPU_FEATURE_MSR flag), you can * read a MSR with the given index by calling this function. * * There are several prerequisites you must do before reading MSRs: * 1) You must ensure the CPU has RDMSR. Check the CPU_FEATURE_MSR flag * in cpu_id_t::flags * 2) You must ensure that the CPU implements the specific MSR you intend to * read. * 3) You must open a MSR-reader driver. RDMSR is a privileged instruction and * needs ring-0 access in order to work. This temporary driver is created * by calling \ref cpu_msr_driver_open * * @param handle - a handle to the MSR reader driver, as created by * cpu_msr_driver_open * @param msr_index - the numeric ID of the MSR you want to read * @param result - a pointer to a 64-bit integer, where the MSR value is stored * * @returns zero if successful, and some negative number on error. * The error message can be obtained by calling \ref cpuid_error. * @see cpu_error_t */ int cpu_rdmsr(struct msr_driver_t* handle, int msr_index, uint64_t* result); typedef enum { INFO_MPERF, /*!< Maximum performance frequency clock. This is a counter, which increments as a proportion of the actual processor speed */ INFO_APERF, /*!< Actual performance frequency clock. This accumulates the core clock counts when the core is active. */ INFO_CUR_MULTIPLIER, /*!< Current CPU:FSB ratio, multiplied by 100. e.g., a CPU:FSB value of 18.5 reads as 1850. */ INFO_MAX_MULTIPLIER, /*!< Maxumum CPU:FSB ratio for this CPU, multiplied by 100 */ INFO_TEMPERATURE, /*!< The current core temperature in Celsius */ INFO_THROTTLING, /*!< 1 if the current logical processor is throttling. 0 if it is running normally. */ } cpu_msrinfo_request_t; /** * @brief Reads extended CPU information from Model-Specific Registers. * @param handle - a handle to an open MSR driver, @see cpu_msr_driver_open * @param which - which info field should be returned. A list of * available information entities is listed in the * cpu_msrinfo_request_t enum. * @retval - if the requested information is available for the current * processor model, the respective value is returned. * if no information is available, or the CPU doesn't support * the query, the special value CPU_INVALID_VALUE is returned */ int cpu_msrinfo(struct msr_driver_t* handle, cpu_msrinfo_request_t which); #define CPU_INVALID_VALUE 0x3fffffff /** * @brief Closes an open MSR driver * * This function unloads the MSR driver opened by cpu_msr_driver_open and * frees any resources associated with it. * * @param handle - a handle to the MSR reader driver, as created by * cpu_msr_driver_open * * @returns zero if successful, and some negative number on error. * The error message can be obtained by calling \ref cpuid_error. * @see cpu_error_t */ int cpu_msr_driver_close(struct msr_driver_t* handle); #ifdef __cplusplus }; /* extern "C" */ #endif /** @} */ #endif /* __LIBCPUID_H__ */