cpuid.go 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184
  1. // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
  2. // Package cpuid provides information about the CPU running the current program.
  3. //
  4. // CPU features are detected on startup, and kept for fast access through the life of the application.
  5. // Currently x86 / x64 (AMD64) is supported.
  6. //
  7. // You can access the CPU information by accessing the shared CPU variable of the cpuid library.
  8. //
  9. // Package home: https://github.com/klauspost/cpuid
  10. package cpuid
  11. import "strings"
  12. // Vendor is a representation of a CPU vendor.
  13. type Vendor int
  14. const (
  15. Other Vendor = iota
  16. Intel
  17. AMD
  18. VIA
  19. Transmeta
  20. NSC
  21. KVM // Kernel-based Virtual Machine
  22. MSVM // Microsoft Hyper-V or Windows Virtual PC
  23. VMware
  24. XenHVM
  25. Bhyve
  26. Hygon
  27. )
  28. const (
  29. CMOV = 1 << iota // i686 CMOV
  30. NX // NX (No-Execute) bit
  31. AMD3DNOW // AMD 3DNOW
  32. AMD3DNOWEXT // AMD 3DNowExt
  33. MMX // standard MMX
  34. MMXEXT // SSE integer functions or AMD MMX ext
  35. SSE // SSE functions
  36. SSE2 // P4 SSE functions
  37. SSE3 // Prescott SSE3 functions
  38. SSSE3 // Conroe SSSE3 functions
  39. SSE4 // Penryn SSE4.1 functions
  40. SSE4A // AMD Barcelona microarchitecture SSE4a instructions
  41. SSE42 // Nehalem SSE4.2 functions
  42. AVX // AVX functions
  43. AVX2 // AVX2 functions
  44. FMA3 // Intel FMA 3
  45. FMA4 // Bulldozer FMA4 functions
  46. XOP // Bulldozer XOP functions
  47. F16C // Half-precision floating-point conversion
  48. BMI1 // Bit Manipulation Instruction Set 1
  49. BMI2 // Bit Manipulation Instruction Set 2
  50. TBM // AMD Trailing Bit Manipulation
  51. LZCNT // LZCNT instruction
  52. POPCNT // POPCNT instruction
  53. AESNI // Advanced Encryption Standard New Instructions
  54. CLMUL // Carry-less Multiplication
  55. HTT // Hyperthreading (enabled)
  56. HLE // Hardware Lock Elision
  57. RTM // Restricted Transactional Memory
  58. RDRAND // RDRAND instruction is available
  59. RDSEED // RDSEED instruction is available
  60. ADX // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  61. SHA // Intel SHA Extensions
  62. AVX512F // AVX-512 Foundation
  63. AVX512DQ // AVX-512 Doubleword and Quadword Instructions
  64. AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
  65. AVX512PF // AVX-512 Prefetch Instructions
  66. AVX512ER // AVX-512 Exponential and Reciprocal Instructions
  67. AVX512CD // AVX-512 Conflict Detection Instructions
  68. AVX512BW // AVX-512 Byte and Word Instructions
  69. AVX512VL // AVX-512 Vector Length Extensions
  70. AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
  71. AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2
  72. AVX512VNNI // AVX-512 Vector Neural Network Instructions
  73. AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
  74. GFNI // Galois Field New Instructions
  75. VAES // Vector AES
  76. AVX512BITALG // AVX-512 Bit Algorithms
  77. VPCLMULQDQ // Carry-Less Multiplication Quadword
  78. AVX512BF16 // AVX-512 BFLOAT16 Instructions
  79. AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
  80. MPX // Intel MPX (Memory Protection Extensions)
  81. ERMS // Enhanced REP MOVSB/STOSB
  82. RDTSCP // RDTSCP Instruction
  83. CX16 // CMPXCHG16B Instruction
  84. SGX // Software Guard Extensions
  85. SGXLC // Software Guard Extensions Launch Control
  86. IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
  87. STIBP // Single Thread Indirect Branch Predictors
  88. VMX // Virtual Machine Extensions
  89. // Performance indicators
  90. SSE2SLOW // SSE2 is supported, but usually not faster
  91. SSE3SLOW // SSE3 is supported, but usually not faster
  92. ATOM // Atom processor, some SSSE3 instructions are slower
  93. )
  94. var flagNames = map[Flags]string{
  95. CMOV: "CMOV", // i686 CMOV
  96. NX: "NX", // NX (No-Execute) bit
  97. AMD3DNOW: "AMD3DNOW", // AMD 3DNOW
  98. AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt
  99. MMX: "MMX", // Standard MMX
  100. MMXEXT: "MMXEXT", // SSE integer functions or AMD MMX ext
  101. SSE: "SSE", // SSE functions
  102. SSE2: "SSE2", // P4 SSE2 functions
  103. SSE3: "SSE3", // Prescott SSE3 functions
  104. SSSE3: "SSSE3", // Conroe SSSE3 functions
  105. SSE4: "SSE4.1", // Penryn SSE4.1 functions
  106. SSE4A: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions
  107. SSE42: "SSE4.2", // Nehalem SSE4.2 functions
  108. AVX: "AVX", // AVX functions
  109. AVX2: "AVX2", // AVX functions
  110. FMA3: "FMA3", // Intel FMA 3
  111. FMA4: "FMA4", // Bulldozer FMA4 functions
  112. XOP: "XOP", // Bulldozer XOP functions
  113. F16C: "F16C", // Half-precision floating-point conversion
  114. BMI1: "BMI1", // Bit Manipulation Instruction Set 1
  115. BMI2: "BMI2", // Bit Manipulation Instruction Set 2
  116. TBM: "TBM", // AMD Trailing Bit Manipulation
  117. LZCNT: "LZCNT", // LZCNT instruction
  118. POPCNT: "POPCNT", // POPCNT instruction
  119. AESNI: "AESNI", // Advanced Encryption Standard New Instructions
  120. CLMUL: "CLMUL", // Carry-less Multiplication
  121. HTT: "HTT", // Hyperthreading (enabled)
  122. HLE: "HLE", // Hardware Lock Elision
  123. RTM: "RTM", // Restricted Transactional Memory
  124. RDRAND: "RDRAND", // RDRAND instruction is available
  125. RDSEED: "RDSEED", // RDSEED instruction is available
  126. ADX: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  127. SHA: "SHA", // Intel SHA Extensions
  128. AVX512F: "AVX512F", // AVX-512 Foundation
  129. AVX512DQ: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions
  130. AVX512IFMA: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions
  131. AVX512PF: "AVX512PF", // AVX-512 Prefetch Instructions
  132. AVX512ER: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions
  133. AVX512CD: "AVX512CD", // AVX-512 Conflict Detection Instructions
  134. AVX512BW: "AVX512BW", // AVX-512 Byte and Word Instructions
  135. AVX512VL: "AVX512VL", // AVX-512 Vector Length Extensions
  136. AVX512VBMI: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions
  137. AVX512VBMI2: "AVX512VBMI2", // AVX-512 Vector Bit Manipulation Instructions, Version 2
  138. AVX512VNNI: "AVX512VNNI", // AVX-512 Vector Neural Network Instructions
  139. AVX512VPOPCNTDQ: "AVX512VPOPCNTDQ", // AVX-512 Vector Population Count Doubleword and Quadword
  140. GFNI: "GFNI", // Galois Field New Instructions
  141. VAES: "VAES", // Vector AES
  142. AVX512BITALG: "AVX512BITALG", // AVX-512 Bit Algorithms
  143. VPCLMULQDQ: "VPCLMULQDQ", // Carry-Less Multiplication Quadword
  144. AVX512BF16: "AVX512BF16", // AVX-512 BFLOAT16 Instruction
  145. AVX512VP2INTERSECT: "AVX512VP2INTERSECT", // AVX-512 Intersect for D/Q
  146. MPX: "MPX", // Intel MPX (Memory Protection Extensions)
  147. ERMS: "ERMS", // Enhanced REP MOVSB/STOSB
  148. RDTSCP: "RDTSCP", // RDTSCP Instruction
  149. CX16: "CX16", // CMPXCHG16B Instruction
  150. SGX: "SGX", // Software Guard Extensions
  151. SGXLC: "SGXLC", // Software Guard Extensions Launch Control
  152. IBPB: "IBPB", // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
  153. STIBP: "STIBP", // Single Thread Indirect Branch Predictors
  154. VMX: "VMX", // Virtual Machine Extensions
  155. // Performance indicators
  156. SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
  157. SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
  158. ATOM: "ATOM", // Atom processor, some SSSE3 instructions are slower
  159. }
  160. // CPUInfo contains information about the detected system CPU.
  161. type CPUInfo struct {
  162. BrandName string // Brand name reported by the CPU
  163. VendorID Vendor // Comparable CPU vendor ID
  164. Features Flags // Features of the CPU
  165. PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
  166. ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable.
  167. LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
  168. Family int // CPU family number
  169. Model int // CPU model number
  170. CacheLine int // Cache line size in bytes. Will be 0 if undetectable.
  171. Cache struct {
  172. L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
  173. L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
  174. L2 int // L2 Cache (per core or shared). Will be -1 if undetected
  175. L3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
  176. }
  177. SGX SGXSupport
  178. maxFunc uint32
  179. maxExFunc uint32
  180. }
  181. var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
  182. var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
  183. var xgetbv func(index uint32) (eax, edx uint32)
  184. var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
  185. // CPU contains information about the CPU as detected on startup,
  186. // or when Detect last was called.
  187. //
  188. // Use this as the primary entry point to you data,
  189. // this way queries are
  190. var CPU CPUInfo
  191. func init() {
  192. initCPU()
  193. Detect()
  194. }
  195. // Detect will re-detect current CPU info.
  196. // This will replace the content of the exported CPU variable.
  197. //
  198. // Unless you expect the CPU to change while you are running your program
  199. // you should not need to call this function.
  200. // If you call this, you must ensure that no other goroutine is accessing the
  201. // exported CPU variable.
  202. func Detect() {
  203. CPU.maxFunc = maxFunctionID()
  204. CPU.maxExFunc = maxExtendedFunction()
  205. CPU.BrandName = brandName()
  206. CPU.CacheLine = cacheLine()
  207. CPU.Family, CPU.Model = familyModel()
  208. CPU.Features = support()
  209. CPU.SGX = hasSGX(CPU.Features&SGX != 0, CPU.Features&SGXLC != 0)
  210. CPU.ThreadsPerCore = threadsPerCore()
  211. CPU.LogicalCores = logicalCores()
  212. CPU.PhysicalCores = physicalCores()
  213. CPU.VendorID = vendorID()
  214. CPU.cacheSize()
  215. }
  216. // Generated here: http://play.golang.org/p/BxFH2Gdc0G
  217. // Cmov indicates support of CMOV instructions
  218. func (c CPUInfo) Cmov() bool {
  219. return c.Features&CMOV != 0
  220. }
  221. // Amd3dnow indicates support of AMD 3DNOW! instructions
  222. func (c CPUInfo) Amd3dnow() bool {
  223. return c.Features&AMD3DNOW != 0
  224. }
  225. // Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
  226. func (c CPUInfo) Amd3dnowExt() bool {
  227. return c.Features&AMD3DNOWEXT != 0
  228. }
  229. // VMX indicates support of VMX
  230. func (c CPUInfo) VMX() bool {
  231. return c.Features&VMX != 0
  232. }
  233. // MMX indicates support of MMX instructions
  234. func (c CPUInfo) MMX() bool {
  235. return c.Features&MMX != 0
  236. }
  237. // MMXExt indicates support of MMXEXT instructions
  238. // (SSE integer functions or AMD MMX ext)
  239. func (c CPUInfo) MMXExt() bool {
  240. return c.Features&MMXEXT != 0
  241. }
  242. // SSE indicates support of SSE instructions
  243. func (c CPUInfo) SSE() bool {
  244. return c.Features&SSE != 0
  245. }
  246. // SSE2 indicates support of SSE 2 instructions
  247. func (c CPUInfo) SSE2() bool {
  248. return c.Features&SSE2 != 0
  249. }
  250. // SSE3 indicates support of SSE 3 instructions
  251. func (c CPUInfo) SSE3() bool {
  252. return c.Features&SSE3 != 0
  253. }
  254. // SSSE3 indicates support of SSSE 3 instructions
  255. func (c CPUInfo) SSSE3() bool {
  256. return c.Features&SSSE3 != 0
  257. }
  258. // SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
  259. func (c CPUInfo) SSE4() bool {
  260. return c.Features&SSE4 != 0
  261. }
  262. // SSE42 indicates support of SSE4.2 instructions
  263. func (c CPUInfo) SSE42() bool {
  264. return c.Features&SSE42 != 0
  265. }
  266. // AVX indicates support of AVX instructions
  267. // and operating system support of AVX instructions
  268. func (c CPUInfo) AVX() bool {
  269. return c.Features&AVX != 0
  270. }
  271. // AVX2 indicates support of AVX2 instructions
  272. func (c CPUInfo) AVX2() bool {
  273. return c.Features&AVX2 != 0
  274. }
  275. // FMA3 indicates support of FMA3 instructions
  276. func (c CPUInfo) FMA3() bool {
  277. return c.Features&FMA3 != 0
  278. }
  279. // FMA4 indicates support of FMA4 instructions
  280. func (c CPUInfo) FMA4() bool {
  281. return c.Features&FMA4 != 0
  282. }
  283. // XOP indicates support of XOP instructions
  284. func (c CPUInfo) XOP() bool {
  285. return c.Features&XOP != 0
  286. }
  287. // F16C indicates support of F16C instructions
  288. func (c CPUInfo) F16C() bool {
  289. return c.Features&F16C != 0
  290. }
  291. // BMI1 indicates support of BMI1 instructions
  292. func (c CPUInfo) BMI1() bool {
  293. return c.Features&BMI1 != 0
  294. }
  295. // BMI2 indicates support of BMI2 instructions
  296. func (c CPUInfo) BMI2() bool {
  297. return c.Features&BMI2 != 0
  298. }
  299. // TBM indicates support of TBM instructions
  300. // (AMD Trailing Bit Manipulation)
  301. func (c CPUInfo) TBM() bool {
  302. return c.Features&TBM != 0
  303. }
  304. // Lzcnt indicates support of LZCNT instruction
  305. func (c CPUInfo) Lzcnt() bool {
  306. return c.Features&LZCNT != 0
  307. }
  308. // Popcnt indicates support of POPCNT instruction
  309. func (c CPUInfo) Popcnt() bool {
  310. return c.Features&POPCNT != 0
  311. }
  312. // HTT indicates the processor has Hyperthreading enabled
  313. func (c CPUInfo) HTT() bool {
  314. return c.Features&HTT != 0
  315. }
  316. // SSE2Slow indicates that SSE2 may be slow on this processor
  317. func (c CPUInfo) SSE2Slow() bool {
  318. return c.Features&SSE2SLOW != 0
  319. }
  320. // SSE3Slow indicates that SSE3 may be slow on this processor
  321. func (c CPUInfo) SSE3Slow() bool {
  322. return c.Features&SSE3SLOW != 0
  323. }
  324. // AesNi indicates support of AES-NI instructions
  325. // (Advanced Encryption Standard New Instructions)
  326. func (c CPUInfo) AesNi() bool {
  327. return c.Features&AESNI != 0
  328. }
  329. // Clmul indicates support of CLMUL instructions
  330. // (Carry-less Multiplication)
  331. func (c CPUInfo) Clmul() bool {
  332. return c.Features&CLMUL != 0
  333. }
  334. // NX indicates support of NX (No-Execute) bit
  335. func (c CPUInfo) NX() bool {
  336. return c.Features&NX != 0
  337. }
  338. // SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
  339. func (c CPUInfo) SSE4A() bool {
  340. return c.Features&SSE4A != 0
  341. }
  342. // HLE indicates support of Hardware Lock Elision
  343. func (c CPUInfo) HLE() bool {
  344. return c.Features&HLE != 0
  345. }
  346. // RTM indicates support of Restricted Transactional Memory
  347. func (c CPUInfo) RTM() bool {
  348. return c.Features&RTM != 0
  349. }
  350. // Rdrand indicates support of RDRAND instruction is available
  351. func (c CPUInfo) Rdrand() bool {
  352. return c.Features&RDRAND != 0
  353. }
  354. // Rdseed indicates support of RDSEED instruction is available
  355. func (c CPUInfo) Rdseed() bool {
  356. return c.Features&RDSEED != 0
  357. }
  358. // ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  359. func (c CPUInfo) ADX() bool {
  360. return c.Features&ADX != 0
  361. }
  362. // SHA indicates support of Intel SHA Extensions
  363. func (c CPUInfo) SHA() bool {
  364. return c.Features&SHA != 0
  365. }
  366. // AVX512F indicates support of AVX-512 Foundation
  367. func (c CPUInfo) AVX512F() bool {
  368. return c.Features&AVX512F != 0
  369. }
  370. // AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
  371. func (c CPUInfo) AVX512DQ() bool {
  372. return c.Features&AVX512DQ != 0
  373. }
  374. // AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
  375. func (c CPUInfo) AVX512IFMA() bool {
  376. return c.Features&AVX512IFMA != 0
  377. }
  378. // AVX512PF indicates support of AVX-512 Prefetch Instructions
  379. func (c CPUInfo) AVX512PF() bool {
  380. return c.Features&AVX512PF != 0
  381. }
  382. // AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
  383. func (c CPUInfo) AVX512ER() bool {
  384. return c.Features&AVX512ER != 0
  385. }
  386. // AVX512CD indicates support of AVX-512 Conflict Detection Instructions
  387. func (c CPUInfo) AVX512CD() bool {
  388. return c.Features&AVX512CD != 0
  389. }
  390. // AVX512BW indicates support of AVX-512 Byte and Word Instructions
  391. func (c CPUInfo) AVX512BW() bool {
  392. return c.Features&AVX512BW != 0
  393. }
  394. // AVX512VL indicates support of AVX-512 Vector Length Extensions
  395. func (c CPUInfo) AVX512VL() bool {
  396. return c.Features&AVX512VL != 0
  397. }
  398. // AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
  399. func (c CPUInfo) AVX512VBMI() bool {
  400. return c.Features&AVX512VBMI != 0
  401. }
  402. // AVX512VBMI2 indicates support of AVX-512 Vector Bit Manipulation Instructions, Version 2
  403. func (c CPUInfo) AVX512VBMI2() bool {
  404. return c.Features&AVX512VBMI2 != 0
  405. }
  406. // AVX512VNNI indicates support of AVX-512 Vector Neural Network Instructions
  407. func (c CPUInfo) AVX512VNNI() bool {
  408. return c.Features&AVX512VNNI != 0
  409. }
  410. // AVX512VPOPCNTDQ indicates support of AVX-512 Vector Population Count Doubleword and Quadword
  411. func (c CPUInfo) AVX512VPOPCNTDQ() bool {
  412. return c.Features&AVX512VPOPCNTDQ != 0
  413. }
  414. // GFNI indicates support of Galois Field New Instructions
  415. func (c CPUInfo) GFNI() bool {
  416. return c.Features&GFNI != 0
  417. }
  418. // VAES indicates support of Vector AES
  419. func (c CPUInfo) VAES() bool {
  420. return c.Features&VAES != 0
  421. }
  422. // AVX512BITALG indicates support of AVX-512 Bit Algorithms
  423. func (c CPUInfo) AVX512BITALG() bool {
  424. return c.Features&AVX512BITALG != 0
  425. }
  426. // VPCLMULQDQ indicates support of Carry-Less Multiplication Quadword
  427. func (c CPUInfo) VPCLMULQDQ() bool {
  428. return c.Features&VPCLMULQDQ != 0
  429. }
  430. // AVX512BF16 indicates support of
  431. func (c CPUInfo) AVX512BF16() bool {
  432. return c.Features&AVX512BF16 != 0
  433. }
  434. // AVX512VP2INTERSECT indicates support of
  435. func (c CPUInfo) AVX512VP2INTERSECT() bool {
  436. return c.Features&AVX512VP2INTERSECT != 0
  437. }
  438. // MPX indicates support of Intel MPX (Memory Protection Extensions)
  439. func (c CPUInfo) MPX() bool {
  440. return c.Features&MPX != 0
  441. }
  442. // ERMS indicates support of Enhanced REP MOVSB/STOSB
  443. func (c CPUInfo) ERMS() bool {
  444. return c.Features&ERMS != 0
  445. }
  446. // RDTSCP Instruction is available.
  447. func (c CPUInfo) RDTSCP() bool {
  448. return c.Features&RDTSCP != 0
  449. }
  450. // CX16 indicates if CMPXCHG16B instruction is available.
  451. func (c CPUInfo) CX16() bool {
  452. return c.Features&CX16 != 0
  453. }
  454. // TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
  455. // So TSX simply checks that.
  456. func (c CPUInfo) TSX() bool {
  457. return c.Features&(HLE|RTM) == HLE|RTM
  458. }
  459. // Atom indicates an Atom processor
  460. func (c CPUInfo) Atom() bool {
  461. return c.Features&ATOM != 0
  462. }
  463. // Intel returns true if vendor is recognized as Intel
  464. func (c CPUInfo) Intel() bool {
  465. return c.VendorID == Intel
  466. }
  467. // AMD returns true if vendor is recognized as AMD
  468. func (c CPUInfo) AMD() bool {
  469. return c.VendorID == AMD
  470. }
  471. // Hygon returns true if vendor is recognized as Hygon
  472. func (c CPUInfo) Hygon() bool {
  473. return c.VendorID == Hygon
  474. }
  475. // Transmeta returns true if vendor is recognized as Transmeta
  476. func (c CPUInfo) Transmeta() bool {
  477. return c.VendorID == Transmeta
  478. }
  479. // NSC returns true if vendor is recognized as National Semiconductor
  480. func (c CPUInfo) NSC() bool {
  481. return c.VendorID == NSC
  482. }
  483. // VIA returns true if vendor is recognized as VIA
  484. func (c CPUInfo) VIA() bool {
  485. return c.VendorID == VIA
  486. }
  487. // RTCounter returns the 64-bit time-stamp counter
  488. // Uses the RDTSCP instruction. The value 0 is returned
  489. // if the CPU does not support the instruction.
  490. func (c CPUInfo) RTCounter() uint64 {
  491. if !c.RDTSCP() {
  492. return 0
  493. }
  494. a, _, _, d := rdtscpAsm()
  495. return uint64(a) | (uint64(d) << 32)
  496. }
  497. // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
  498. // This variable is OS dependent, but on Linux contains information
  499. // about the current cpu/core the code is running on.
  500. // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
  501. func (c CPUInfo) Ia32TscAux() uint32 {
  502. if !c.RDTSCP() {
  503. return 0
  504. }
  505. _, _, ecx, _ := rdtscpAsm()
  506. return ecx
  507. }
  508. // LogicalCPU will return the Logical CPU the code is currently executing on.
  509. // This is likely to change when the OS re-schedules the running thread
  510. // to another CPU.
  511. // If the current core cannot be detected, -1 will be returned.
  512. func (c CPUInfo) LogicalCPU() int {
  513. if c.maxFunc < 1 {
  514. return -1
  515. }
  516. _, ebx, _, _ := cpuid(1)
  517. return int(ebx >> 24)
  518. }
  519. // VM Will return true if the cpu id indicates we are in
  520. // a virtual machine. This is only a hint, and will very likely
  521. // have many false negatives.
  522. func (c CPUInfo) VM() bool {
  523. switch c.VendorID {
  524. case MSVM, KVM, VMware, XenHVM, Bhyve:
  525. return true
  526. }
  527. return false
  528. }
  529. // Flags contains detected cpu features and caracteristics
  530. type Flags uint64
  531. // String returns a string representation of the detected
  532. // CPU features.
  533. func (f Flags) String() string {
  534. return strings.Join(f.Strings(), ",")
  535. }
  536. // Strings returns and array of the detected features.
  537. func (f Flags) Strings() []string {
  538. s := support()
  539. r := make([]string, 0, 20)
  540. for i := uint(0); i < 64; i++ {
  541. key := Flags(1 << i)
  542. val := flagNames[key]
  543. if s&key != 0 {
  544. r = append(r, val)
  545. }
  546. }
  547. return r
  548. }
  549. func maxExtendedFunction() uint32 {
  550. eax, _, _, _ := cpuid(0x80000000)
  551. return eax
  552. }
  553. func maxFunctionID() uint32 {
  554. a, _, _, _ := cpuid(0)
  555. return a
  556. }
  557. func brandName() string {
  558. if maxExtendedFunction() >= 0x80000004 {
  559. v := make([]uint32, 0, 48)
  560. for i := uint32(0); i < 3; i++ {
  561. a, b, c, d := cpuid(0x80000002 + i)
  562. v = append(v, a, b, c, d)
  563. }
  564. return strings.Trim(string(valAsString(v...)), " ")
  565. }
  566. return "unknown"
  567. }
  568. func threadsPerCore() int {
  569. mfi := maxFunctionID()
  570. if mfi < 0x4 || vendorID() != Intel {
  571. return 1
  572. }
  573. if mfi < 0xb {
  574. _, b, _, d := cpuid(1)
  575. if (d & (1 << 28)) != 0 {
  576. // v will contain logical core count
  577. v := (b >> 16) & 255
  578. if v > 1 {
  579. a4, _, _, _ := cpuid(4)
  580. // physical cores
  581. v2 := (a4 >> 26) + 1
  582. if v2 > 0 {
  583. return int(v) / int(v2)
  584. }
  585. }
  586. }
  587. return 1
  588. }
  589. _, b, _, _ := cpuidex(0xb, 0)
  590. if b&0xffff == 0 {
  591. return 1
  592. }
  593. return int(b & 0xffff)
  594. }
  595. func logicalCores() int {
  596. mfi := maxFunctionID()
  597. switch vendorID() {
  598. case Intel:
  599. // Use this on old Intel processors
  600. if mfi < 0xb {
  601. if mfi < 1 {
  602. return 0
  603. }
  604. // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
  605. // that can be assigned to logical processors in a physical package.
  606. // The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
  607. _, ebx, _, _ := cpuid(1)
  608. logical := (ebx >> 16) & 0xff
  609. return int(logical)
  610. }
  611. _, b, _, _ := cpuidex(0xb, 1)
  612. return int(b & 0xffff)
  613. case AMD, Hygon:
  614. _, b, _, _ := cpuid(1)
  615. return int((b >> 16) & 0xff)
  616. default:
  617. return 0
  618. }
  619. }
  620. func familyModel() (int, int) {
  621. if maxFunctionID() < 0x1 {
  622. return 0, 0
  623. }
  624. eax, _, _, _ := cpuid(1)
  625. family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
  626. model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
  627. return int(family), int(model)
  628. }
  629. func physicalCores() int {
  630. switch vendorID() {
  631. case Intel:
  632. return logicalCores() / threadsPerCore()
  633. case AMD, Hygon:
  634. if maxExtendedFunction() >= 0x80000008 {
  635. _, _, c, _ := cpuid(0x80000008)
  636. return int(c&0xff) + 1
  637. }
  638. }
  639. return 0
  640. }
  641. // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
  642. var vendorMapping = map[string]Vendor{
  643. "AMDisbetter!": AMD,
  644. "AuthenticAMD": AMD,
  645. "CentaurHauls": VIA,
  646. "GenuineIntel": Intel,
  647. "TransmetaCPU": Transmeta,
  648. "GenuineTMx86": Transmeta,
  649. "Geode by NSC": NSC,
  650. "VIA VIA VIA ": VIA,
  651. "KVMKVMKVMKVM": KVM,
  652. "Microsoft Hv": MSVM,
  653. "VMwareVMware": VMware,
  654. "XenVMMXenVMM": XenHVM,
  655. "bhyve bhyve ": Bhyve,
  656. "HygonGenuine": Hygon,
  657. }
  658. func vendorID() Vendor {
  659. _, b, c, d := cpuid(0)
  660. v := valAsString(b, d, c)
  661. vend, ok := vendorMapping[string(v)]
  662. if !ok {
  663. return Other
  664. }
  665. return vend
  666. }
  667. func cacheLine() int {
  668. if maxFunctionID() < 0x1 {
  669. return 0
  670. }
  671. _, ebx, _, _ := cpuid(1)
  672. cache := (ebx & 0xff00) >> 5 // cflush size
  673. if cache == 0 && maxExtendedFunction() >= 0x80000006 {
  674. _, _, ecx, _ := cpuid(0x80000006)
  675. cache = ecx & 0xff // cacheline size
  676. }
  677. // TODO: Read from Cache and TLB Information
  678. return int(cache)
  679. }
  680. func (c *CPUInfo) cacheSize() {
  681. c.Cache.L1D = -1
  682. c.Cache.L1I = -1
  683. c.Cache.L2 = -1
  684. c.Cache.L3 = -1
  685. vendor := vendorID()
  686. switch vendor {
  687. case Intel:
  688. if maxFunctionID() < 4 {
  689. return
  690. }
  691. for i := uint32(0); ; i++ {
  692. eax, ebx, ecx, _ := cpuidex(4, i)
  693. cacheType := eax & 15
  694. if cacheType == 0 {
  695. break
  696. }
  697. cacheLevel := (eax >> 5) & 7
  698. coherency := int(ebx&0xfff) + 1
  699. partitions := int((ebx>>12)&0x3ff) + 1
  700. associativity := int((ebx>>22)&0x3ff) + 1
  701. sets := int(ecx) + 1
  702. size := associativity * partitions * coherency * sets
  703. switch cacheLevel {
  704. case 1:
  705. if cacheType == 1 {
  706. // 1 = Data Cache
  707. c.Cache.L1D = size
  708. } else if cacheType == 2 {
  709. // 2 = Instruction Cache
  710. c.Cache.L1I = size
  711. } else {
  712. if c.Cache.L1D < 0 {
  713. c.Cache.L1I = size
  714. }
  715. if c.Cache.L1I < 0 {
  716. c.Cache.L1I = size
  717. }
  718. }
  719. case 2:
  720. c.Cache.L2 = size
  721. case 3:
  722. c.Cache.L3 = size
  723. }
  724. }
  725. case AMD, Hygon:
  726. // Untested.
  727. if maxExtendedFunction() < 0x80000005 {
  728. return
  729. }
  730. _, _, ecx, edx := cpuid(0x80000005)
  731. c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
  732. c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
  733. if maxExtendedFunction() < 0x80000006 {
  734. return
  735. }
  736. _, _, ecx, _ = cpuid(0x80000006)
  737. c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
  738. }
  739. return
  740. }
  741. type SGXEPCSection struct {
  742. BaseAddress uint64
  743. EPCSize uint64
  744. }
  745. type SGXSupport struct {
  746. Available bool
  747. LaunchControl bool
  748. SGX1Supported bool
  749. SGX2Supported bool
  750. MaxEnclaveSizeNot64 int64
  751. MaxEnclaveSize64 int64
  752. EPCSections []SGXEPCSection
  753. }
  754. func hasSGX(available, lc bool) (rval SGXSupport) {
  755. rval.Available = available
  756. if !available {
  757. return
  758. }
  759. rval.LaunchControl = lc
  760. a, _, _, d := cpuidex(0x12, 0)
  761. rval.SGX1Supported = a&0x01 != 0
  762. rval.SGX2Supported = a&0x02 != 0
  763. rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2
  764. rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
  765. rval.EPCSections = make([]SGXEPCSection, 0)
  766. for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
  767. eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
  768. leafType := eax & 0xf
  769. if leafType == 0 {
  770. // Invalid subleaf, stop iterating
  771. break
  772. } else if leafType == 1 {
  773. // EPC Section subleaf
  774. baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
  775. size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
  776. section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
  777. rval.EPCSections = append(rval.EPCSections, section)
  778. }
  779. }
  780. return
  781. }
  782. func support() Flags {
  783. mfi := maxFunctionID()
  784. vend := vendorID()
  785. if mfi < 0x1 {
  786. return 0
  787. }
  788. rval := uint64(0)
  789. _, _, c, d := cpuid(1)
  790. if (d & (1 << 15)) != 0 {
  791. rval |= CMOV
  792. }
  793. if (d & (1 << 23)) != 0 {
  794. rval |= MMX
  795. }
  796. if (d & (1 << 25)) != 0 {
  797. rval |= MMXEXT
  798. }
  799. if (d & (1 << 25)) != 0 {
  800. rval |= SSE
  801. }
  802. if (d & (1 << 26)) != 0 {
  803. rval |= SSE2
  804. }
  805. if (c & 1) != 0 {
  806. rval |= SSE3
  807. }
  808. if (c & (1 << 5)) != 0 {
  809. rval |= VMX
  810. }
  811. if (c & 0x00000200) != 0 {
  812. rval |= SSSE3
  813. }
  814. if (c & 0x00080000) != 0 {
  815. rval |= SSE4
  816. }
  817. if (c & 0x00100000) != 0 {
  818. rval |= SSE42
  819. }
  820. if (c & (1 << 25)) != 0 {
  821. rval |= AESNI
  822. }
  823. if (c & (1 << 1)) != 0 {
  824. rval |= CLMUL
  825. }
  826. if c&(1<<23) != 0 {
  827. rval |= POPCNT
  828. }
  829. if c&(1<<30) != 0 {
  830. rval |= RDRAND
  831. }
  832. if c&(1<<29) != 0 {
  833. rval |= F16C
  834. }
  835. if c&(1<<13) != 0 {
  836. rval |= CX16
  837. }
  838. if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
  839. if threadsPerCore() > 1 {
  840. rval |= HTT
  841. }
  842. }
  843. // Check XGETBV, OXSAVE and AVX bits
  844. if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
  845. // Check for OS support
  846. eax, _ := xgetbv(0)
  847. if (eax & 0x6) == 0x6 {
  848. rval |= AVX
  849. if (c & 0x00001000) != 0 {
  850. rval |= FMA3
  851. }
  852. }
  853. }
  854. // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
  855. if mfi >= 7 {
  856. _, ebx, ecx, edx := cpuidex(7, 0)
  857. eax1, _, _, _ := cpuidex(7, 1)
  858. if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
  859. rval |= AVX2
  860. }
  861. if (ebx & 0x00000008) != 0 {
  862. rval |= BMI1
  863. if (ebx & 0x00000100) != 0 {
  864. rval |= BMI2
  865. }
  866. }
  867. if ebx&(1<<2) != 0 {
  868. rval |= SGX
  869. }
  870. if ebx&(1<<4) != 0 {
  871. rval |= HLE
  872. }
  873. if ebx&(1<<9) != 0 {
  874. rval |= ERMS
  875. }
  876. if ebx&(1<<11) != 0 {
  877. rval |= RTM
  878. }
  879. if ebx&(1<<14) != 0 {
  880. rval |= MPX
  881. }
  882. if ebx&(1<<18) != 0 {
  883. rval |= RDSEED
  884. }
  885. if ebx&(1<<19) != 0 {
  886. rval |= ADX
  887. }
  888. if ebx&(1<<29) != 0 {
  889. rval |= SHA
  890. }
  891. if edx&(1<<26) != 0 {
  892. rval |= IBPB
  893. }
  894. if ecx&(1<<30) != 0 {
  895. rval |= SGXLC
  896. }
  897. if edx&(1<<27) != 0 {
  898. rval |= STIBP
  899. }
  900. // Only detect AVX-512 features if XGETBV is supported
  901. if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
  902. // Check for OS support
  903. eax, _ := xgetbv(0)
  904. // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
  905. // ZMM16-ZMM31 state are enabled by OS)
  906. /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
  907. if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
  908. if ebx&(1<<16) != 0 {
  909. rval |= AVX512F
  910. }
  911. if ebx&(1<<17) != 0 {
  912. rval |= AVX512DQ
  913. }
  914. if ebx&(1<<21) != 0 {
  915. rval |= AVX512IFMA
  916. }
  917. if ebx&(1<<26) != 0 {
  918. rval |= AVX512PF
  919. }
  920. if ebx&(1<<27) != 0 {
  921. rval |= AVX512ER
  922. }
  923. if ebx&(1<<28) != 0 {
  924. rval |= AVX512CD
  925. }
  926. if ebx&(1<<30) != 0 {
  927. rval |= AVX512BW
  928. }
  929. if ebx&(1<<31) != 0 {
  930. rval |= AVX512VL
  931. }
  932. // ecx
  933. if ecx&(1<<1) != 0 {
  934. rval |= AVX512VBMI
  935. }
  936. if ecx&(1<<6) != 0 {
  937. rval |= AVX512VBMI2
  938. }
  939. if ecx&(1<<8) != 0 {
  940. rval |= GFNI
  941. }
  942. if ecx&(1<<9) != 0 {
  943. rval |= VAES
  944. }
  945. if ecx&(1<<10) != 0 {
  946. rval |= VPCLMULQDQ
  947. }
  948. if ecx&(1<<11) != 0 {
  949. rval |= AVX512VNNI
  950. }
  951. if ecx&(1<<12) != 0 {
  952. rval |= AVX512BITALG
  953. }
  954. if ecx&(1<<14) != 0 {
  955. rval |= AVX512VPOPCNTDQ
  956. }
  957. // edx
  958. if edx&(1<<8) != 0 {
  959. rval |= AVX512VP2INTERSECT
  960. }
  961. // cpuid eax 07h,ecx=1
  962. if eax1&(1<<5) != 0 {
  963. rval |= AVX512BF16
  964. }
  965. }
  966. }
  967. }
  968. if maxExtendedFunction() >= 0x80000001 {
  969. _, _, c, d := cpuid(0x80000001)
  970. if (c & (1 << 5)) != 0 {
  971. rval |= LZCNT
  972. rval |= POPCNT
  973. }
  974. if (d & (1 << 31)) != 0 {
  975. rval |= AMD3DNOW
  976. }
  977. if (d & (1 << 30)) != 0 {
  978. rval |= AMD3DNOWEXT
  979. }
  980. if (d & (1 << 23)) != 0 {
  981. rval |= MMX
  982. }
  983. if (d & (1 << 22)) != 0 {
  984. rval |= MMXEXT
  985. }
  986. if (c & (1 << 6)) != 0 {
  987. rval |= SSE4A
  988. }
  989. if d&(1<<20) != 0 {
  990. rval |= NX
  991. }
  992. if d&(1<<27) != 0 {
  993. rval |= RDTSCP
  994. }
  995. /* Allow for selectively disabling SSE2 functions on AMD processors
  996. with SSE2 support but not SSE4a. This includes Athlon64, some
  997. Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
  998. than SSE2 often enough to utilize this special-case flag.
  999. AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
  1000. so that SSE2 is used unless explicitly disabled by checking
  1001. AV_CPU_FLAG_SSE2SLOW. */
  1002. if vendorID() != Intel &&
  1003. rval&SSE2 != 0 && (c&0x00000040) == 0 {
  1004. rval |= SSE2SLOW
  1005. }
  1006. /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
  1007. * used unless the OS has AVX support. */
  1008. if (rval & AVX) != 0 {
  1009. if (c & 0x00000800) != 0 {
  1010. rval |= XOP
  1011. }
  1012. if (c & 0x00010000) != 0 {
  1013. rval |= FMA4
  1014. }
  1015. }
  1016. if vendorID() == Intel {
  1017. family, model := familyModel()
  1018. if family == 6 && (model == 9 || model == 13 || model == 14) {
  1019. /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
  1020. * 6/14 (core1 "yonah") theoretically support sse2, but it's
  1021. * usually slower than mmx. */
  1022. if (rval & SSE2) != 0 {
  1023. rval |= SSE2SLOW
  1024. }
  1025. if (rval & SSE3) != 0 {
  1026. rval |= SSE3SLOW
  1027. }
  1028. }
  1029. /* The Atom processor has SSSE3 support, which is useful in many cases,
  1030. * but sometimes the SSSE3 version is slower than the SSE2 equivalent
  1031. * on the Atom, but is generally faster on other processors supporting
  1032. * SSSE3. This flag allows for selectively disabling certain SSSE3
  1033. * functions on the Atom. */
  1034. if family == 6 && model == 28 {
  1035. rval |= ATOM
  1036. }
  1037. }
  1038. }
  1039. return Flags(rval)
  1040. }
  1041. func valAsString(values ...uint32) []byte {
  1042. r := make([]byte, 4*len(values))
  1043. for i, v := range values {
  1044. dst := r[i*4:]
  1045. dst[0] = byte(v & 0xff)
  1046. dst[1] = byte((v >> 8) & 0xff)
  1047. dst[2] = byte((v >> 16) & 0xff)
  1048. dst[3] = byte((v >> 24) & 0xff)
  1049. switch {
  1050. case dst[0] == 0:
  1051. return r[:i*4]
  1052. case dst[1] == 0:
  1053. return r[:i*4+1]
  1054. case dst[2] == 0:
  1055. return r[:i*4+2]
  1056. case dst[3] == 0:
  1057. return r[:i*4+3]
  1058. }
  1059. }
  1060. return r
  1061. }