xref: /netbsd-src/sys/arch/x86/x86/identcpu.c (revision f3cfa6f6ce31685c6c4a758bc430e69eb99f50a4)
1 /*	$NetBSD: identcpu.c,v 1.91 2019/05/24 14:28:48 nonaka Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999, 2000, 2001, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Frank van der Linden,  and by Jason R. Thorpe.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 __KERNEL_RCSID(0, "$NetBSD: identcpu.c,v 1.91 2019/05/24 14:28:48 nonaka Exp $");
34 
35 #include "opt_xen.h"
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/device.h>
40 #include <sys/cpu.h>
41 
42 #include <uvm/uvm_extern.h>
43 
44 #include <machine/specialreg.h>
45 #include <machine/pio.h>
46 #include <machine/cpu.h>
47 
48 #include <x86/cputypes.h>
49 #include <x86/cacheinfo.h>
50 #include <x86/cpuvar.h>
51 #include <x86/fpu.h>
52 
53 #include <x86/x86/vmtreg.h>	/* for vmt_hvcall() */
54 #include <x86/x86/vmtvar.h>	/* for vmt_hvcall() */
55 
56 #ifndef XEN
57 #include "hyperv.h"
58 #if NHYPERV > 0
59 #include <x86/x86/hypervvar.h>
60 #endif
61 #endif
62 
63 static const struct x86_cache_info intel_cpuid_cache_info[] = INTEL_CACHE_INFO;
64 
65 static const struct x86_cache_info amd_cpuid_l2cache_assoc_info[] =
66 	AMD_L2CACHE_INFO;
67 
68 static const struct x86_cache_info amd_cpuid_l3cache_assoc_info[] =
69 	AMD_L3CACHE_INFO;
70 
71 int cpu_vendor;
72 char cpu_brand_string[49];
73 
74 int x86_fpu_save __read_mostly;
75 unsigned int x86_fpu_save_size __read_mostly = sizeof(struct save87);
76 uint64_t x86_xsave_features __read_mostly = 0;
77 
78 /*
79  * Note: these are just the ones that may not have a cpuid instruction.
80  * We deal with the rest in a different way.
81  */
82 const int i386_nocpuid_cpus[] = {
83 	CPUVENDOR_INTEL, CPUCLASS_386,	/* CPU_386SX */
84 	CPUVENDOR_INTEL, CPUCLASS_386,	/* CPU_386   */
85 	CPUVENDOR_INTEL, CPUCLASS_486,	/* CPU_486SX */
86 	CPUVENDOR_INTEL, CPUCLASS_486, 	/* CPU_486   */
87 	CPUVENDOR_CYRIX, CPUCLASS_486,	/* CPU_486DLC */
88 	CPUVENDOR_CYRIX, CPUCLASS_486,	/* CPU_6x86 */
89 	CPUVENDOR_NEXGEN, CPUCLASS_386,	/* CPU_NX586 */
90 };
91 
92 static const char cpu_vendor_names[][10] = {
93 	"Unknown", "Intel", "NS/Cyrix", "NexGen", "AMD", "IDT/VIA", "Transmeta",
94 	"Vortex86"
95 };
96 
97 static const struct x86_cache_info *
98 cache_info_lookup(const struct x86_cache_info *cai, uint8_t desc)
99 {
100 	int i;
101 
102 	for (i = 0; cai[i].cai_desc != 0; i++) {
103 		if (cai[i].cai_desc == desc)
104 			return (&cai[i]);
105 	}
106 
107 	return (NULL);
108 }
109 
110 static void
111 cpu_probe_intel_cache(struct cpu_info *ci)
112 {
113 	const struct x86_cache_info *cai;
114 	u_int descs[4];
115 	int iterations, i, j;
116 	uint8_t desc;
117 
118 	if (cpuid_level >= 2) {
119 		/* Parse the cache info from `cpuid leaf 2', if we have it. */
120 		x86_cpuid(2, descs);
121 		iterations = descs[0] & 0xff;
122 		while (iterations-- > 0) {
123 			for (i = 0; i < 4; i++) {
124 				if (descs[i] & 0x80000000)
125 					continue;
126 				for (j = 0; j < 4; j++) {
127 					if (i == 0 && j == 0)
128 						continue;
129 					desc = (descs[i] >> (j * 8)) & 0xff;
130 					if (desc == 0)
131 						continue;
132 					cai = cache_info_lookup(
133 					    intel_cpuid_cache_info, desc);
134 					if (cai != NULL) {
135 						ci->ci_cinfo[cai->cai_index] =
136 						    *cai;
137 					}
138 				}
139 			}
140 		}
141 	}
142 
143 	if (cpuid_level >= 4) {
144 		int type, level;
145 		int ways, partitions, linesize, sets;
146 		int caitype = -1;
147 		int totalsize;
148 
149 		/* Parse the cache info from `cpuid leaf 4', if we have it. */
150 		for (i = 0; ; i++) {
151 			x86_cpuid2(4, i, descs);
152 			type = __SHIFTOUT(descs[0], CPUID_DCP_CACHETYPE);
153 			if (type == CPUID_DCP_CACHETYPE_N)
154 				break;
155 			level = __SHIFTOUT(descs[0], CPUID_DCP_CACHELEVEL);
156 			switch (level) {
157 			case 1:
158 				if (type == CPUID_DCP_CACHETYPE_I)
159 					caitype = CAI_ICACHE;
160 				else if (type == CPUID_DCP_CACHETYPE_D)
161 					caitype = CAI_DCACHE;
162 				else
163 					caitype = -1;
164 				break;
165 			case 2:
166 				if (type == CPUID_DCP_CACHETYPE_U)
167 					caitype = CAI_L2CACHE;
168 				else
169 					caitype = -1;
170 				break;
171 			case 3:
172 				if (type == CPUID_DCP_CACHETYPE_U)
173 					caitype = CAI_L3CACHE;
174 				else
175 					caitype = -1;
176 				break;
177 			default:
178 				caitype = -1;
179 				break;
180 			}
181 			if (caitype == -1)
182 				continue;
183 
184 			ways = __SHIFTOUT(descs[1], CPUID_DCP_WAYS) + 1;
185 			partitions =__SHIFTOUT(descs[1], CPUID_DCP_PARTITIONS)
186 			    + 1;
187 			linesize = __SHIFTOUT(descs[1], CPUID_DCP_LINESIZE)
188 			    + 1;
189 			sets = descs[2] + 1;
190 			totalsize = ways * partitions * linesize * sets;
191 			ci->ci_cinfo[caitype].cai_totalsize = totalsize;
192 			ci->ci_cinfo[caitype].cai_associativity = ways;
193 			ci->ci_cinfo[caitype].cai_linesize = linesize;
194 		}
195 	}
196 }
197 
198 static void
199 cpu_probe_intel_errata(struct cpu_info *ci)
200 {
201 	u_int family, model, stepping;
202 
203 	family = CPUID_TO_FAMILY(ci->ci_signature);
204 	model = CPUID_TO_MODEL(ci->ci_signature);
205 	stepping = CPUID_TO_STEPPING(ci->ci_signature);
206 
207 	if (family == 0x6 && model == 0x5C && stepping == 0x9) { /* Apollo Lake */
208 		wrmsr(MSR_MISC_ENABLE,
209 		    rdmsr(MSR_MISC_ENABLE) & ~IA32_MISC_MWAIT_EN);
210 
211 		cpu_feature[1] &= ~CPUID2_MONITOR;
212 		ci->ci_feat_val[1] &= ~CPUID2_MONITOR;
213 	}
214 }
215 
216 static void
217 cpu_probe_intel(struct cpu_info *ci)
218 {
219 
220 	if (cpu_vendor != CPUVENDOR_INTEL)
221 		return;
222 
223 	cpu_probe_intel_cache(ci);
224 	cpu_probe_intel_errata(ci);
225 }
226 
227 static void
228 cpu_probe_amd_cache(struct cpu_info *ci)
229 {
230 	const struct x86_cache_info *cp;
231 	struct x86_cache_info *cai;
232 	int family, model;
233 	u_int descs[4];
234 	u_int lfunc;
235 
236 	family = CPUID_TO_FAMILY(ci->ci_signature);
237 	model = CPUID_TO_MODEL(ci->ci_signature);
238 
239 	/*
240 	 * K5 model 0 has none of this info.
241 	 */
242 	if (family == 5 && model == 0)
243 		return;
244 
245 	/*
246 	 * Determine the largest extended function value.
247 	 */
248 	x86_cpuid(0x80000000, descs);
249 	lfunc = descs[0];
250 
251 	/*
252 	 * Determine L1 cache/TLB info.
253 	 */
254 	if (lfunc < 0x80000005) {
255 		/* No L1 cache info available. */
256 		return;
257 	}
258 
259 	x86_cpuid(0x80000005, descs);
260 
261 	/*
262 	 * K6-III and higher have large page TLBs.
263 	 */
264 	if ((family == 5 && model >= 9) || family >= 6) {
265 		cai = &ci->ci_cinfo[CAI_ITLB2];
266 		cai->cai_totalsize = AMD_L1_EAX_ITLB_ENTRIES(descs[0]);
267 		cai->cai_associativity = AMD_L1_EAX_ITLB_ASSOC(descs[0]);
268 		cai->cai_linesize = (4 * 1024 * 1024);
269 
270 		cai = &ci->ci_cinfo[CAI_DTLB2];
271 		cai->cai_totalsize = AMD_L1_EAX_DTLB_ENTRIES(descs[0]);
272 		cai->cai_associativity = AMD_L1_EAX_DTLB_ASSOC(descs[0]);
273 		cai->cai_linesize = (4 * 1024 * 1024);
274 	}
275 
276 	cai = &ci->ci_cinfo[CAI_ITLB];
277 	cai->cai_totalsize = AMD_L1_EBX_ITLB_ENTRIES(descs[1]);
278 	cai->cai_associativity = AMD_L1_EBX_ITLB_ASSOC(descs[1]);
279 	cai->cai_linesize = (4 * 1024);
280 
281 	cai = &ci->ci_cinfo[CAI_DTLB];
282 	cai->cai_totalsize = AMD_L1_EBX_DTLB_ENTRIES(descs[1]);
283 	cai->cai_associativity = AMD_L1_EBX_DTLB_ASSOC(descs[1]);
284 	cai->cai_linesize = (4 * 1024);
285 
286 	cai = &ci->ci_cinfo[CAI_DCACHE];
287 	cai->cai_totalsize = AMD_L1_ECX_DC_SIZE(descs[2]);
288 	cai->cai_associativity = AMD_L1_ECX_DC_ASSOC(descs[2]);
289 	cai->cai_linesize = AMD_L1_ECX_DC_LS(descs[2]);
290 
291 	cai = &ci->ci_cinfo[CAI_ICACHE];
292 	cai->cai_totalsize = AMD_L1_EDX_IC_SIZE(descs[3]);
293 	cai->cai_associativity = AMD_L1_EDX_IC_ASSOC(descs[3]);
294 	cai->cai_linesize = AMD_L1_EDX_IC_LS(descs[3]);
295 
296 	/*
297 	 * Determine L2 cache/TLB info.
298 	 */
299 	if (lfunc < 0x80000006) {
300 		/* No L2 cache info available. */
301 		return;
302 	}
303 
304 	x86_cpuid(0x80000006, descs);
305 
306 	cai = &ci->ci_cinfo[CAI_L2CACHE];
307 	cai->cai_totalsize = AMD_L2_ECX_C_SIZE(descs[2]);
308 	cai->cai_associativity = AMD_L2_ECX_C_ASSOC(descs[2]);
309 	cai->cai_linesize = AMD_L2_ECX_C_LS(descs[2]);
310 
311 	cp = cache_info_lookup(amd_cpuid_l2cache_assoc_info,
312 	    cai->cai_associativity);
313 	if (cp != NULL)
314 		cai->cai_associativity = cp->cai_associativity;
315 	else
316 		cai->cai_associativity = 0;	/* XXX Unknown/reserved */
317 
318 	if (family < 0xf) {
319 		/* No L3 cache info available. */
320 		return;
321 	}
322 
323 	cai = &ci->ci_cinfo[CAI_L3CACHE];
324 	cai->cai_totalsize = AMD_L3_EDX_C_SIZE(descs[3]);
325 	cai->cai_associativity = AMD_L3_EDX_C_ASSOC(descs[3]);
326 	cai->cai_linesize = AMD_L3_EDX_C_LS(descs[3]);
327 
328 	cp = cache_info_lookup(amd_cpuid_l3cache_assoc_info,
329 	    cai->cai_associativity);
330 	if (cp != NULL)
331 		cai->cai_associativity = cp->cai_associativity;
332 	else
333 		cai->cai_associativity = 0;	/* XXX Unknown reserved */
334 
335 	if (lfunc < 0x80000019) {
336 		/* No 1GB Page TLB */
337 		return;
338 	}
339 
340 	x86_cpuid(0x80000019, descs);
341 
342 	cai = &ci->ci_cinfo[CAI_L1_1GBDTLB];
343 	cai->cai_totalsize = AMD_L1_1GB_EAX_DTLB_ENTRIES(descs[1]);
344 	cai->cai_associativity = AMD_L1_1GB_EAX_DTLB_ASSOC(descs[1]);
345 	cai->cai_linesize = (1 * 1024);
346 
347 	cai = &ci->ci_cinfo[CAI_L1_1GBITLB];
348 	cai->cai_totalsize = AMD_L1_1GB_EAX_IUTLB_ENTRIES(descs[0]);
349 	cai->cai_associativity = AMD_L1_1GB_EAX_IUTLB_ASSOC(descs[0]);
350 	cai->cai_linesize = (1 * 1024);
351 
352 	cai = &ci->ci_cinfo[CAI_L2_1GBDTLB];
353 	cai->cai_totalsize = AMD_L2_1GB_EBX_DUTLB_ENTRIES(descs[1]);
354 	cai->cai_associativity = AMD_L2_1GB_EBX_DUTLB_ASSOC(descs[1]);
355 	cai->cai_linesize = (1 * 1024);
356 
357 	cai = &ci->ci_cinfo[CAI_L2_1GBITLB];
358 	cai->cai_totalsize = AMD_L2_1GB_EBX_IUTLB_ENTRIES(descs[0]);
359 	cai->cai_associativity = AMD_L2_1GB_EBX_IUTLB_ASSOC(descs[0]);
360 	cai->cai_linesize = (1 * 1024);
361 }
362 
363 static void
364 cpu_probe_amd(struct cpu_info *ci)
365 {
366 	uint64_t val;
367 	int flag;
368 
369 	if (cpu_vendor != CPUVENDOR_AMD)
370 		return;
371 	if (CPUID_TO_FAMILY(ci->ci_signature) < 5)
372 		return;
373 
374 	switch (CPUID_TO_FAMILY(ci->ci_signature)) {
375 	case 0x05: /* K5 */
376 		if (CPUID_TO_MODEL(ci->ci_signature) == 0) {
377 			/*
378 			 * According to the AMD Processor Recognition App Note,
379 			 * the AMD-K5 Model 0 uses the wrong bit to indicate
380 			 * support for global PTEs, instead using bit 9 (APIC)
381 			 * rather than bit 13 (i.e. "0x200" vs. 0x2000").
382 			 */
383 			flag = ci->ci_feat_val[0];
384 			if ((flag & CPUID_APIC) != 0)
385 				flag = (flag & ~CPUID_APIC) | CPUID_PGE;
386 			ci->ci_feat_val[0] = flag;
387 		}
388 		break;
389 
390 	case 0x10: /* Family 10h */
391 		/*
392 		 * On Family 10h, certain BIOSes do not enable WC+ support.
393 		 * This causes WC+ to become CD, and degrades guest
394 		 * performance at the NPT level.
395 		 *
396 		 * Explicitly enable WC+ if we're not a guest.
397 		 */
398 		if (!ISSET(ci->ci_feat_val[1], CPUID2_RAZ)) {
399 			val = rdmsr(MSR_BU_CFG2);
400 			val &= ~BU_CFG2_CWPLUS_DIS;
401 			wrmsr(MSR_BU_CFG2, val);
402 		}
403 		break;
404 	}
405 
406 	cpu_probe_amd_cache(ci);
407 }
408 
409 static inline uint8_t
410 cyrix_read_reg(uint8_t reg)
411 {
412 
413 	outb(0x22, reg);
414 	return inb(0x23);
415 }
416 
417 static inline void
418 cyrix_write_reg(uint8_t reg, uint8_t data)
419 {
420 
421 	outb(0x22, reg);
422 	outb(0x23, data);
423 }
424 
425 static void
426 cpu_probe_cyrix_cmn(struct cpu_info *ci)
427 {
428 	/*
429 	 * i8254 latch check routine:
430 	 *     National Geode (formerly Cyrix MediaGX) has a serious bug in
431 	 *     its built-in i8254-compatible clock module (cs5510 cs5520).
432 	 *     Set the variable 'clock_broken_latch' to indicate it.
433 	 *
434 	 * This bug is not present in the cs5530, and the flag
435 	 * is disabled again in sys/arch/i386/pci/pcib.c if this later
436 	 * model device is detected. Ideally, this work-around should not
437 	 * even be in here, it should be in there. XXX
438 	 */
439 	uint8_t c3;
440 #ifndef XEN
441 	extern int clock_broken_latch;
442 
443 	switch (ci->ci_signature) {
444 	case 0x440:     /* Cyrix MediaGX */
445 	case 0x540:     /* GXm */
446 		clock_broken_latch = 1;
447 		break;
448 	}
449 #endif
450 
451 	/* set up various cyrix registers */
452 	/*
453 	 * Enable suspend on halt (powersave mode).
454 	 * When powersave mode is enabled, the TSC stops counting
455 	 * while the CPU is halted in idle() waiting for an interrupt.
456 	 * This means we can't use the TSC for interval time in
457 	 * microtime(9), and thus it is disabled here.
458 	 *
459 	 * It still makes a perfectly good cycle counter
460 	 * for program profiling, so long as you remember you're
461 	 * counting cycles, and not time. Further, if you don't
462 	 * mind not using powersave mode, the TSC works just fine,
463 	 * so this should really be optional. XXX
464 	 */
465 	cyrix_write_reg(0xc2, cyrix_read_reg(0xc2) | 0x08);
466 
467 	/*
468 	 * Do not disable the TSC on the Geode GX, it's reported to
469 	 * work fine.
470 	 */
471 	if (ci->ci_signature != 0x552)
472 		ci->ci_feat_val[0] &= ~CPUID_TSC;
473 
474 	/* enable access to ccr4/ccr5 */
475 	c3 = cyrix_read_reg(0xC3);
476 	cyrix_write_reg(0xC3, c3 | 0x10);
477 	/* cyrix's workaround  for the "coma bug" */
478 	cyrix_write_reg(0x31, cyrix_read_reg(0x31) | 0xf8);
479 	cyrix_write_reg(0x32, cyrix_read_reg(0x32) | 0x7f);
480 	cyrix_write_reg(0x33, cyrix_read_reg(0x33) & ~0xff);
481 	cyrix_write_reg(0x3c, cyrix_read_reg(0x3c) | 0x87);
482 	/* disable access to ccr4/ccr5 */
483 	cyrix_write_reg(0xC3, c3);
484 }
485 
486 static void
487 cpu_probe_cyrix(struct cpu_info *ci)
488 {
489 
490 	if (cpu_vendor != CPUVENDOR_CYRIX ||
491 	    CPUID_TO_FAMILY(ci->ci_signature) < 4 ||
492 	    CPUID_TO_FAMILY(ci->ci_signature) > 6)
493 		return;
494 
495 	cpu_probe_cyrix_cmn(ci);
496 }
497 
498 static void
499 cpu_probe_winchip(struct cpu_info *ci)
500 {
501 
502 	if (cpu_vendor != CPUVENDOR_IDT ||
503 	    CPUID_TO_FAMILY(ci->ci_signature) != 5)
504 	    	return;
505 
506 	/* WinChip C6 */
507 	if (CPUID_TO_MODEL(ci->ci_signature) == 4)
508 		ci->ci_feat_val[0] &= ~CPUID_TSC;
509 }
510 
511 static void
512 cpu_probe_c3(struct cpu_info *ci)
513 {
514 	u_int family, model, stepping, descs[4], lfunc, msr;
515 	struct x86_cache_info *cai;
516 
517 	if (cpu_vendor != CPUVENDOR_IDT ||
518 	    CPUID_TO_FAMILY(ci->ci_signature) < 6)
519 	    	return;
520 
521 	family = CPUID_TO_FAMILY(ci->ci_signature);
522 	model = CPUID_TO_MODEL(ci->ci_signature);
523 	stepping = CPUID_TO_STEPPING(ci->ci_signature);
524 
525 	/* Determine the largest extended function value. */
526 	x86_cpuid(0x80000000, descs);
527 	lfunc = descs[0];
528 
529 	if (family == 6) {
530 		/*
531 		 * VIA Eden ESP.
532 		 *
533 		 * Quoting from page 3-4 of: "VIA Eden ESP Processor Datasheet"
534 		 * http://www.via.com.tw/download/mainboards/6/14/Eden20v115.pdf
535 		 *
536 		 * 1. The CMPXCHG8B instruction is provided and always enabled,
537 		 *    however, it appears disabled in the corresponding CPUID
538 		 *    function bit 0 to avoid a bug in an early version of
539 		 *    Windows NT. However, this default can be changed via a
540 		 *    bit in the FCR MSR.
541 		 */
542 		ci->ci_feat_val[0] |= CPUID_CX8;
543 		wrmsr(MSR_VIA_FCR, rdmsr(MSR_VIA_FCR) | VIA_ACE_ECX8);
544 	}
545 
546 	if (family > 6 || model > 0x9 || (model == 0x9 && stepping >= 3)) {
547 		/* VIA Nehemiah or Esther. */
548 		x86_cpuid(0xc0000000, descs);
549 		lfunc = descs[0];
550 		if (lfunc >= 0xc0000001) {	/* has ACE, RNG */
551 		    int rng_enable = 0, ace_enable = 0;
552 		    x86_cpuid(0xc0000001, descs);
553 		    lfunc = descs[3];
554 		    ci->ci_feat_val[4] = lfunc;
555 		    /* Check for and enable RNG */
556 		    if (lfunc & CPUID_VIA_HAS_RNG) {
557 		    	if (!(lfunc & CPUID_VIA_DO_RNG)) {
558 			    rng_enable++;
559 			    ci->ci_feat_val[4] |= CPUID_VIA_DO_RNG;
560 			}
561 		    }
562 		    /* Check for and enable ACE (AES-CBC) */
563 		    if (lfunc & CPUID_VIA_HAS_ACE) {
564 			if (!(lfunc & CPUID_VIA_DO_ACE)) {
565 			    ace_enable++;
566 			    ci->ci_feat_val[4] |= CPUID_VIA_DO_ACE;
567 			}
568 		    }
569 		    /* Check for and enable SHA */
570 		    if (lfunc & CPUID_VIA_HAS_PHE) {
571 			if (!(lfunc & CPUID_VIA_DO_PHE)) {
572 			    ace_enable++;
573 			    ci->ci_feat_val[4] |= CPUID_VIA_DO_PHE;
574 			}
575 		    }
576 		    /* Check for and enable ACE2 (AES-CTR) */
577 		    if (lfunc & CPUID_VIA_HAS_ACE2) {
578 			if (!(lfunc & CPUID_VIA_DO_ACE2)) {
579 			    ace_enable++;
580 			    ci->ci_feat_val[4] |= CPUID_VIA_DO_ACE2;
581 			}
582 		    }
583 		    /* Check for and enable PMM (modmult engine) */
584 		    if (lfunc & CPUID_VIA_HAS_PMM) {
585 			if (!(lfunc & CPUID_VIA_DO_PMM)) {
586 			    ace_enable++;
587 			    ci->ci_feat_val[4] |= CPUID_VIA_DO_PMM;
588 			}
589 		    }
590 
591 		    /*
592 		     * Actually do the enables.  It's a little gross,
593 		     * but per the PadLock programming guide, "Enabling
594 		     * PadLock", condition 3, we must enable SSE too or
595 		     * else the first use of RNG or ACE instructions
596 		     * will generate a trap.
597 		     *
598 		     * We must do this early because of kernel RNG
599 		     * initialization but it is safe without the full
600 		     * FPU-detect as all these CPUs have SSE.
601 		     */
602 		    lcr4(rcr4() | CR4_OSFXSR);
603 
604 		    if (rng_enable) {
605 			msr = rdmsr(MSR_VIA_RNG);
606 			msr |= MSR_VIA_RNG_ENABLE;
607 			/* C7 stepping 8 and subsequent CPUs have dual RNG */
608 			if (model > 0xA || (model == 0xA && stepping > 0x7)) {
609 				msr |= MSR_VIA_RNG_2NOISE;
610 			}
611 			wrmsr(MSR_VIA_RNG, msr);
612 		    }
613 
614 		    if (ace_enable) {
615 			msr = rdmsr(MSR_VIA_ACE);
616 			wrmsr(MSR_VIA_ACE, msr | VIA_ACE_ENABLE);
617 		    }
618 		}
619 	}
620 
621 	/* Explicitly disable unsafe ALTINST mode. */
622 	if (ci->ci_feat_val[4] & CPUID_VIA_DO_ACE) {
623 		msr = rdmsr(MSR_VIA_ACE);
624 		wrmsr(MSR_VIA_ACE, msr & ~VIA_ACE_ALTINST);
625 	}
626 
627 	/*
628 	 * Determine L1 cache/TLB info.
629 	 */
630 	if (lfunc < 0x80000005) {
631 		/* No L1 cache info available. */
632 		return;
633 	}
634 
635 	x86_cpuid(0x80000005, descs);
636 
637 	cai = &ci->ci_cinfo[CAI_ITLB];
638 	cai->cai_totalsize = VIA_L1_EBX_ITLB_ENTRIES(descs[1]);
639 	cai->cai_associativity = VIA_L1_EBX_ITLB_ASSOC(descs[1]);
640 	cai->cai_linesize = (4 * 1024);
641 
642 	cai = &ci->ci_cinfo[CAI_DTLB];
643 	cai->cai_totalsize = VIA_L1_EBX_DTLB_ENTRIES(descs[1]);
644 	cai->cai_associativity = VIA_L1_EBX_DTLB_ASSOC(descs[1]);
645 	cai->cai_linesize = (4 * 1024);
646 
647 	cai = &ci->ci_cinfo[CAI_DCACHE];
648 	cai->cai_totalsize = VIA_L1_ECX_DC_SIZE(descs[2]);
649 	cai->cai_associativity = VIA_L1_ECX_DC_ASSOC(descs[2]);
650 	cai->cai_linesize = VIA_L1_EDX_IC_LS(descs[2]);
651 	if (family == 6 && model == 9 && stepping == 8) {
652 		/* Erratum: stepping 8 reports 4 when it should be 2 */
653 		cai->cai_associativity = 2;
654 	}
655 
656 	cai = &ci->ci_cinfo[CAI_ICACHE];
657 	cai->cai_totalsize = VIA_L1_EDX_IC_SIZE(descs[3]);
658 	cai->cai_associativity = VIA_L1_EDX_IC_ASSOC(descs[3]);
659 	cai->cai_linesize = VIA_L1_EDX_IC_LS(descs[3]);
660 	if (family == 6 && model == 9 && stepping == 8) {
661 		/* Erratum: stepping 8 reports 4 when it should be 2 */
662 		cai->cai_associativity = 2;
663 	}
664 
665 	/*
666 	 * Determine L2 cache/TLB info.
667 	 */
668 	if (lfunc < 0x80000006) {
669 		/* No L2 cache info available. */
670 		return;
671 	}
672 
673 	x86_cpuid(0x80000006, descs);
674 
675 	cai = &ci->ci_cinfo[CAI_L2CACHE];
676 	if (family > 6 || model >= 9) {
677 		cai->cai_totalsize = VIA_L2N_ECX_C_SIZE(descs[2]);
678 		cai->cai_associativity = VIA_L2N_ECX_C_ASSOC(descs[2]);
679 		cai->cai_linesize = VIA_L2N_ECX_C_LS(descs[2]);
680 	} else {
681 		cai->cai_totalsize = VIA_L2_ECX_C_SIZE(descs[2]);
682 		cai->cai_associativity = VIA_L2_ECX_C_ASSOC(descs[2]);
683 		cai->cai_linesize = VIA_L2_ECX_C_LS(descs[2]);
684 	}
685 }
686 
687 static void
688 cpu_probe_geode(struct cpu_info *ci)
689 {
690 
691 	if (memcmp("Geode by NSC", ci->ci_vendor, 12) != 0 ||
692 	    CPUID_TO_FAMILY(ci->ci_signature) != 5)
693 	    	return;
694 
695 	cpu_probe_cyrix_cmn(ci);
696 	cpu_probe_amd_cache(ci);
697 }
698 
699 static void
700 cpu_probe_vortex86(struct cpu_info *ci)
701 {
702 #define PCI_MODE1_ADDRESS_REG	0x0cf8
703 #define PCI_MODE1_DATA_REG	0x0cfc
704 #define PCI_MODE1_ENABLE	0x80000000UL
705 
706 	uint32_t reg;
707 
708 	if (cpu_vendor != CPUVENDOR_VORTEX86)
709 		return;
710 	/*
711 	 * CPU model available from "Customer ID register" in
712 	 * North Bridge Function 0 PCI space
713 	 * we can't use pci_conf_read() because the PCI subsystem is not
714 	 * not initialised early enough
715 	 */
716 
717 	outl(PCI_MODE1_ADDRESS_REG, PCI_MODE1_ENABLE | 0x90);
718 	reg = inl(PCI_MODE1_DATA_REG);
719 
720 	if ((reg & 0xf8ffffff) != 0x30504d44) {
721 		reg = 0;
722 	} else {
723 		reg = (reg >> 24) & 7;
724 	}
725 
726 	static const char *cpu_vortex86_flavor[] = {
727 	    "??", "SX", "DX", "MX", "DX2", "MX+", "DX3", "EX",
728 	};
729 	snprintf(cpu_brand_string, sizeof(cpu_brand_string), "Vortex86%s",
730 	    cpu_vortex86_flavor[reg]);
731 
732 #undef PCI_MODE1_ENABLE
733 #undef PCI_MODE1_ADDRESS_REG
734 #undef PCI_MODE1_DATA_REG
735 }
736 
737 static void
738 cpu_probe_old_fpu(struct cpu_info *ci)
739 {
740 #if defined(__i386__) && !defined(XENPV)
741 
742 	clts();
743 	fninit();
744 
745 	/* Check for 'FDIV' bug on the original Pentium */
746 	if (npx586bug1(4195835, 3145727) != 0)
747 		/* NB 120+MHz cpus are not affected */
748 		i386_fpu_fdivbug = 1;
749 
750 	stts();
751 #endif
752 }
753 
754 static void
755 cpu_probe_fpu(struct cpu_info *ci)
756 {
757 	u_int descs[4];
758 
759 	x86_fpu_eager = true;
760 	x86_fpu_save = FPU_SAVE_FSAVE;
761 
762 #ifdef i386
763 	/* If we have FXSAVE/FXRESTOR, use them. */
764 	if ((ci->ci_feat_val[0] & CPUID_FXSR) == 0) {
765 		i386_use_fxsave = 0;
766 		/* Allow for no fpu even if cpuid is supported */
767 		cpu_probe_old_fpu(ci);
768 		return;
769 	}
770 
771 	i386_use_fxsave = 1;
772 	/*
773 	 * If we have SSE/SSE2, enable XMM exceptions, and
774 	 * notify userland.
775 	 */
776 	if (ci->ci_feat_val[0] & CPUID_SSE)
777 		i386_has_sse = 1;
778 	if (ci->ci_feat_val[0] & CPUID_SSE2)
779 		i386_has_sse2 = 1;
780 #else
781 	/*
782 	 * For amd64 i386_use_fxsave, i386_has_sse and i386_has_sse2 are
783 	 * #defined to 1, because fxsave/sse/sse2 are always present.
784 	 */
785 #endif
786 
787 	x86_fpu_save = FPU_SAVE_FXSAVE;
788 	x86_fpu_save_size = sizeof(struct fxsave);
789 
790 	/* See if xsave (for AVX) is supported */
791 	if ((ci->ci_feat_val[1] & CPUID2_XSAVE) == 0)
792 		return;
793 
794 #ifdef XENPV
795 	/*
796 	 * Xen kernel can disable XSAVE via "no-xsave" option, in that case
797 	 * XSAVE instructions like xrstor become privileged and trigger
798 	 * supervisor trap. OSXSAVE flag seems to be reliably set according
799 	 * to whether XSAVE is actually available.
800 	 */
801 	if ((ci->ci_feat_val[1] & CPUID2_OSXSAVE) == 0)
802 		return;
803 #endif
804 
805 	x86_fpu_save = FPU_SAVE_XSAVE;
806 
807 #if 0 /* XXX PR 52966 */
808 	x86_cpuid2(0xd, 1, descs);
809 	if (descs[0] & CPUID_PES1_XSAVEOPT)
810 		x86_fpu_save = FPU_SAVE_XSAVEOPT;
811 #endif
812 
813 	/* Get features and maximum size of the save area */
814 	x86_cpuid(0xd, descs);
815 	if (descs[2] > sizeof(struct fxsave))
816 		x86_fpu_save_size = descs[2];
817 
818 	x86_xsave_features = (uint64_t)descs[3] << 32 | descs[0];
819 }
820 
821 void
822 cpu_probe(struct cpu_info *ci)
823 {
824 	u_int descs[4];
825 	int i;
826 	uint32_t miscbytes;
827 	uint32_t brand[12];
828 
829 	cpu_vendor = i386_nocpuid_cpus[cputype << 1];
830 	cpu_class = i386_nocpuid_cpus[(cputype << 1) + 1];
831 
832 	if (cpuid_level < 0) {
833 		/* cpuid instruction not supported */
834 		cpu_probe_old_fpu(ci);
835 		return;
836 	}
837 
838 	for (i = 0; i < __arraycount(ci->ci_feat_val); i++) {
839 		ci->ci_feat_val[i] = 0;
840 	}
841 
842 	x86_cpuid(0, descs);
843 	cpuid_level = descs[0];
844 	ci->ci_max_cpuid = descs[0];
845 
846 	ci->ci_vendor[0] = descs[1];
847 	ci->ci_vendor[2] = descs[2];
848 	ci->ci_vendor[1] = descs[3];
849 	ci->ci_vendor[3] = 0;
850 
851 	if (memcmp(ci->ci_vendor, "GenuineIntel", 12) == 0)
852 		cpu_vendor = CPUVENDOR_INTEL;
853 	else if (memcmp(ci->ci_vendor,  "AuthenticAMD", 12) == 0)
854 		cpu_vendor = CPUVENDOR_AMD;
855 	else if (memcmp(ci->ci_vendor,  "CyrixInstead", 12) == 0)
856 		cpu_vendor = CPUVENDOR_CYRIX;
857 	else if (memcmp(ci->ci_vendor,  "Geode by NSC", 12) == 0)
858 		cpu_vendor = CPUVENDOR_CYRIX;
859 	else if (memcmp(ci->ci_vendor, "CentaurHauls", 12) == 0)
860 		cpu_vendor = CPUVENDOR_IDT;
861 	else if (memcmp(ci->ci_vendor, "GenuineTMx86", 12) == 0)
862 		cpu_vendor = CPUVENDOR_TRANSMETA;
863 	else if (memcmp(ci->ci_vendor, "Vortex86 SoC", 12) == 0)
864 		cpu_vendor = CPUVENDOR_VORTEX86;
865 	else
866 		cpu_vendor = CPUVENDOR_UNKNOWN;
867 
868 	if (cpuid_level >= 1) {
869 		x86_cpuid(1, descs);
870 		ci->ci_signature = descs[0];
871 		miscbytes = descs[1];
872 		ci->ci_feat_val[1] = descs[2];
873 		ci->ci_feat_val[0] = descs[3];
874 
875 		/* Determine family + class. */
876 		cpu_class = CPUID_TO_FAMILY(ci->ci_signature)
877 		    + (CPUCLASS_386 - 3);
878 		if (cpu_class > CPUCLASS_686)
879 			cpu_class = CPUCLASS_686;
880 
881 		/* CLFLUSH line size is next 8 bits */
882 		if (ci->ci_feat_val[0] & CPUID_CFLUSH)
883 			ci->ci_cflush_lsize
884 			    = __SHIFTOUT(miscbytes, CPUID_CLFLUSH_SIZE) << 3;
885 		ci->ci_initapicid = __SHIFTOUT(miscbytes, CPUID_LOCAL_APIC_ID);
886 	}
887 
888 	/*
889 	 * Get the basic information from the extended cpuid leafs.
890 	 * These were first implemented by amd, but most of the values
891 	 * match with those generated by modern intel cpus.
892 	 */
893 	x86_cpuid(0x80000000, descs);
894 	if (descs[0] >= 0x80000000)
895 		ci->ci_max_ext_cpuid = descs[0];
896 	else
897 		ci->ci_max_ext_cpuid = 0;
898 
899 	if (ci->ci_max_ext_cpuid >= 0x80000001) {
900 		/* Determine the extended feature flags. */
901 		x86_cpuid(0x80000001, descs);
902 		ci->ci_feat_val[3] = descs[2]; /* %ecx */
903 		ci->ci_feat_val[2] = descs[3]; /* %edx */
904 	}
905 
906 	if (ci->ci_max_ext_cpuid >= 0x80000004) {
907 		x86_cpuid(0x80000002, brand);
908 		x86_cpuid(0x80000003, brand + 4);
909 		x86_cpuid(0x80000004, brand + 8);
910 		/* Skip leading spaces on brand */
911 		for (i = 0; i < 48; i++) {
912 			if (((char *) brand)[i] != ' ')
913 				break;
914 		}
915 		memcpy(cpu_brand_string, ((char *) brand) + i, 48 - i);
916 	}
917 
918 	/*
919 	 * Get the structured extended features.
920 	 */
921 	if (cpuid_level >= 7) {
922 		x86_cpuid(7, descs);
923 		ci->ci_feat_val[5] = descs[1]; /* %ebx */
924 		ci->ci_feat_val[6] = descs[2]; /* %ecx */
925 		ci->ci_feat_val[7] = descs[3]; /* %edx */
926 	}
927 
928 	cpu_probe_intel(ci);
929 	cpu_probe_amd(ci);
930 	cpu_probe_cyrix(ci);
931 	cpu_probe_winchip(ci);
932 	cpu_probe_c3(ci);
933 	cpu_probe_geode(ci);
934 	cpu_probe_vortex86(ci);
935 
936 	cpu_probe_fpu(ci);
937 
938 	x86_cpu_topology(ci);
939 
940 	if (cpu_vendor != CPUVENDOR_AMD && (ci->ci_feat_val[0] & CPUID_TM) &&
941 	    (rdmsr(MSR_MISC_ENABLE) & (1 << 3)) == 0) {
942 		/* Enable thermal monitor 1. */
943 		wrmsr(MSR_MISC_ENABLE, rdmsr(MSR_MISC_ENABLE) | (1<<3));
944 	}
945 
946 	ci->ci_feat_val[0] &= ~CPUID_FEAT_BLACKLIST;
947 	if (ci == &cpu_info_primary) {
948 		/* If first. Boot Processor is the cpu_feature reference. */
949 		for (i = 0; i < __arraycount(cpu_feature); i++) {
950 			cpu_feature[i] = ci->ci_feat_val[i];
951 		}
952 		identify_hypervisor();
953 #ifndef XEN
954 		/* Early patch of text segment. */
955 		x86_patch(true);
956 #endif
957 	} else {
958 		/*
959 		 * If not first. Warn about cpu_feature mismatch for
960 		 * secondary CPUs.
961 		 */
962 		for (i = 0; i < __arraycount(cpu_feature); i++) {
963 			if (cpu_feature[i] != ci->ci_feat_val[i])
964 				aprint_error_dev(ci->ci_dev,
965 				    "feature mismatch: cpu_feature[%d] is "
966 				    "%#x, but CPU reported %#x\n",
967 				    i, cpu_feature[i], ci->ci_feat_val[i]);
968 		}
969 	}
970 }
971 
972 /* Write what we know about the cpu to the console... */
973 void
974 cpu_identify(struct cpu_info *ci)
975 {
976 
977 	cpu_setmodel("%s %d86-class",
978 	    cpu_vendor_names[cpu_vendor], cpu_class + 3);
979 	if (cpu_brand_string[0] != '\0') {
980 		aprint_normal_dev(ci->ci_dev, "%s", cpu_brand_string);
981 	} else {
982 		aprint_normal_dev(ci->ci_dev, "%s", cpu_getmodel());
983 		if (ci->ci_data.cpu_cc_freq != 0)
984 			aprint_normal(", %dMHz",
985 			    (int)(ci->ci_data.cpu_cc_freq / 1000000));
986 	}
987 	if (ci->ci_signature != 0)
988 		aprint_normal(", id 0x%x", ci->ci_signature);
989 	aprint_normal("\n");
990 	aprint_normal_dev(ci->ci_dev, "package %lu, core %lu, smt %lu\n",
991 	    ci->ci_package_id, ci->ci_core_id, ci->ci_smt_id);
992 	if (cpu_brand_string[0] == '\0') {
993 		strlcpy(cpu_brand_string, cpu_getmodel(),
994 		    sizeof(cpu_brand_string));
995 	}
996 	if (cpu_class == CPUCLASS_386) {
997 		panic("NetBSD requires an 80486DX or later processor");
998 	}
999 	if (cputype == CPU_486DLC) {
1000 		aprint_error("WARNING: BUGGY CYRIX CACHE\n");
1001 	}
1002 
1003 #if !defined(XENPV) || defined(DOM0OPS)       /* on Xen PV rdmsr is for Dom0 only */
1004 	if (cpu_vendor == CPUVENDOR_AMD     /* check enablement of an */
1005 	    && device_unit(ci->ci_dev) == 0 /* AMD feature only once */
1006 	    && ((cpu_feature[3] & CPUID_SVM) == CPUID_SVM)) {
1007 		uint64_t val;
1008 
1009 		val = rdmsr(MSR_VMCR);
1010 		if (((val & VMCR_SVMED) == VMCR_SVMED)
1011 		    && ((val & VMCR_LOCK) == VMCR_LOCK)) {
1012 			aprint_normal_dev(ci->ci_dev,
1013 				"SVM disabled by the BIOS\n");
1014 		}
1015 	}
1016 #endif
1017 
1018 #ifdef i386
1019 	if (i386_fpu_fdivbug == 1)
1020 		aprint_normal_dev(ci->ci_dev,
1021 		    "WARNING: Pentium FDIV bug detected!\n");
1022 
1023 	if (cpu_vendor == CPUVENDOR_TRANSMETA) {
1024 		u_int descs[4];
1025 		x86_cpuid(0x80860000, descs);
1026 		if (descs[0] >= 0x80860007)
1027 			/* Create longrun sysctls */
1028 			tmx86_init_longrun();
1029 	}
1030 #endif	/* i386 */
1031 
1032 }
1033 
1034 /*
1035  * Hypervisor
1036  */
1037 vm_guest_t vm_guest = VM_GUEST_NO;
1038 
1039 static const char * const vm_bios_vendors[] = {
1040 	"QEMU",				/* QEMU */
1041 	"Plex86",			/* Plex86 */
1042 	"Bochs",			/* Bochs */
1043 	"Xen",				/* Xen */
1044 	"BHYVE",			/* bhyve */
1045 	"Seabios",			/* KVM */
1046 };
1047 
1048 static const char * const vm_system_products[] = {
1049 	"VMware Virtual Platform",	/* VMWare VM */
1050 	"Virtual Machine",		/* Microsoft VirtualPC */
1051 	"VirtualBox",			/* Sun xVM VirtualBox */
1052 	"Parallels Virtual Platform",	/* Parallels VM */
1053 	"KVM",				/* KVM */
1054 };
1055 
1056 void
1057 identify_hypervisor(void)
1058 {
1059 	u_int regs[6];
1060 	char hv_vendor[12];
1061 	const char *p;
1062 	int i;
1063 
1064 	if (vm_guest != VM_GUEST_NO)
1065 		return;
1066 
1067 	/*
1068 	 * [RFC] CPUID usage for interaction between Hypervisors and Linux.
1069 	 * http://lkml.org/lkml/2008/10/1/246
1070 	 *
1071 	 * KB1009458: Mechanisms to determine if software is running in
1072 	 * a VMware virtual machine
1073 	 * http://kb.vmware.com/kb/1009458
1074 	 */
1075 	if (ISSET(cpu_feature[1], CPUID2_RAZ)) {
1076 		vm_guest = VM_GUEST_VM;
1077 		x86_cpuid(0x40000000, regs);
1078 		if (regs[0] >= 0x40000000) {
1079 			memcpy(&hv_vendor[0], &regs[1], sizeof(*regs));
1080 			memcpy(&hv_vendor[4], &regs[2], sizeof(*regs));
1081 			memcpy(&hv_vendor[8], &regs[3], sizeof(*regs));
1082 			if (memcmp(hv_vendor, "VMwareVMware", 12) == 0)
1083 				vm_guest = VM_GUEST_VMWARE;
1084 			else if (memcmp(hv_vendor, "Microsoft Hv", 12) == 0) {
1085 				vm_guest = VM_GUEST_HV;
1086 #if NHYPERV > 0
1087 				hyperv_early_init();
1088 #endif
1089 			} else if (memcmp(hv_vendor, "KVMKVMKVM\0\0\0", 12) == 0)
1090 				vm_guest = VM_GUEST_KVM;
1091 			else if (memcmp(hv_vendor, "XenVMMXenVMM", 12) == 0)
1092 				vm_guest = VM_GUEST_XEN;
1093 			/* FreeBSD bhyve: "bhyve bhyve " */
1094 			/* OpenBSD vmm:   "OpenBSDVMM58" */
1095 			/* NetBSD nvmm:   "___ NVMM ___" */
1096 		}
1097 		return;
1098 	}
1099 
1100 	/*
1101 	 * Examine SMBIOS strings for older hypervisors.
1102 	 */
1103 	p = pmf_get_platform("system-serial");
1104 	if (p != NULL) {
1105 		if (strncmp(p, "VMware-", 7) == 0 || strncmp(p, "VMW", 3) == 0) {
1106 			vmt_hvcall(VM_CMD_GET_VERSION, regs);
1107 			if (regs[1] == VM_MAGIC) {
1108 				vm_guest = VM_GUEST_VMWARE;
1109 				return;
1110 			}
1111 		}
1112 	}
1113 	p = pmf_get_platform("bios-vendor");
1114 	if (p != NULL) {
1115 		for (i = 0; i < __arraycount(vm_bios_vendors); i++) {
1116 			if (strcmp(p, vm_bios_vendors[i]) == 0) {
1117 				vm_guest = VM_GUEST_VM;
1118 				return;
1119 			}
1120 		}
1121 	}
1122 	p = pmf_get_platform("system-product");
1123 	if (p != NULL) {
1124 		for (i = 0; i < __arraycount(vm_system_products); i++) {
1125 			if (strcmp(p, vm_system_products[i]) == 0) {
1126 				vm_guest = VM_GUEST_VM;
1127 				return;
1128 			}
1129 		}
1130 	}
1131 }
1132