xref: /openbsd-src/sys/arch/amd64/amd64/identcpu.c (revision e1973c5102583b8327761bd164427164129bbadc)
1 /*	$OpenBSD: identcpu.c,v 1.148 2024/10/07 20:30:17 dv Exp $	*/
2 /*	$NetBSD: identcpu.c,v 1.1 2003/04/26 18:39:28 fvdl Exp $	*/
3 
4 /*
5  * Copyright (c) 2003 Wasabi Systems, Inc.
6  * All rights reserved.
7  *
8  * Written by Frank van der Linden for Wasabi Systems, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *      This product includes software developed for the NetBSD Project by
21  *      Wasabi Systems, Inc.
22  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
23  *    or promote products derived from this software without specific prior
24  *    written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/atomic.h>
42 #include <sys/proc.h>
43 #include <sys/sysctl.h>
44 
45 #include "vmm.h"
46 #include "pvbus.h"
47 
48 #include <machine/cpu.h>
49 #include <machine/cpufunc.h>
50 
51 #if NPVBUS > 0
52 #include <dev/pv/pvvar.h>
53 #endif
54 
55 void	replacesmap(void);
56 void	replacemeltdown(void);
57 uint64_t cpu_freq(struct cpu_info *);
58 void	tsc_identify(struct cpu_info *);
59 void	tsc_timecounter_init(struct cpu_info *, uint64_t);
60 #if NVMM > 0
61 void	cpu_check_vmm_cap(struct cpu_info *);
62 #endif /* NVMM > 0 */
63 
64 /* sysctl wants this. */
65 char cpu_model[48];
66 int cpuspeed;
67 
68 int amd64_has_xcrypt;
69 int amd64_pos_cbit;	/* C bit position for SEV */
70 int has_rdrand;
71 int has_rdseed;
72 
73 int
74 cpu_amd64speed(int *freq)
75 {
76 	*freq = cpuspeed;
77 	return (0);
78 }
79 
80 #ifndef SMALL_KERNEL
81 void	intelcore_update_sensor(void *);
82 void	cpu_hz_update_sensor(void *);
83 
84 /*
85  * Temperature read on the CPU is relative to the maximum
86  * temperature supported by the CPU, Tj(Max).
87  * Refer to:
88  * 64-ia-32-architectures-software-developer-vol-3c-part-3-manual.pdf
89  * Section 35 and
90  * http://www.intel.com/content/dam/www/public/us/en/documents/
91  * white-papers/cpu-monitoring-dts-peci-paper.pdf
92  *
93  * The temperature on Intel CPUs can be between 70 and 105 degC, since
94  * Westmere we can read the TJmax from the die. For older CPUs we have
95  * to guess or use undocumented MSRs. Then we subtract the temperature
96  * portion of thermal status from max to get current temperature.
97  */
98 void
99 intelcore_update_sensor(void *args)
100 {
101 	struct cpu_info *ci = (struct cpu_info *) args;
102 	u_int64_t msr;
103 	int max = 100;
104 
105 	/* Only some Core family chips have MSR_TEMPERATURE_TARGET. */
106 	if (ci->ci_model == 0x0e &&
107 	    (rdmsr(MSR_TEMPERATURE_TARGET_UNDOCUMENTED) &
108 	     MSR_TEMPERATURE_TARGET_LOW_BIT_UNDOCUMENTED))
109 		max = 85;
110 
111 	/*
112 	 * Newer CPUs can tell you what their max temperature is.
113 	 * See: '64-ia-32-architectures-software-developer-
114 	 * vol-3c-part-3-manual.pdf'
115 	 */
116 	if (ci->ci_model > 0x17 && ci->ci_model != 0x1c &&
117 	    ci->ci_model != 0x26 && ci->ci_model != 0x27 &&
118 	    ci->ci_model != 0x35 && ci->ci_model != 0x36)
119 		max = MSR_TEMPERATURE_TARGET_TJMAX(
120 		    rdmsr(MSR_TEMPERATURE_TARGET));
121 
122 	msr = rdmsr(MSR_THERM_STATUS);
123 	if (msr & MSR_THERM_STATUS_VALID_BIT) {
124 		ci->ci_sensor.value = max - MSR_THERM_STATUS_TEMP(msr);
125 		/* micro degrees */
126 		ci->ci_sensor.value *= 1000000;
127 		/* kelvin */
128 		ci->ci_sensor.value += 273150000;
129 		ci->ci_sensor.flags &= ~SENSOR_FINVALID;
130 	} else {
131 		ci->ci_sensor.value = 0;
132 		ci->ci_sensor.flags |= SENSOR_FINVALID;
133 	}
134 }
135 
136 /*
137  * Effective CPU frequency measurement
138  *
139  * Refer to:
140  *   64-ia-32-architectures-software-developer-vol-3b-part-2-manual.pdf
141  *   Section 14.2 and
142  *   OSRR for AMD Family 17h processors Section 2.1.2
143  * Round to 50Mhz which is the accuracy of this measurement.
144  */
145 #define FREQ_50MHZ	(50ULL * 1000000ULL * 1000000ULL)
146 void
147 cpu_hz_update_sensor(void *args)
148 {
149 	extern uint64_t	 tsc_frequency;
150 	struct cpu_info	*ci = args;
151 	uint64_t	 mperf, aperf, mdelta, adelta, val;
152 	unsigned long	 s;
153 
154 	sched_peg_curproc(ci);
155 
156 	s = intr_disable();
157 	mperf = rdmsr(MSR_MPERF);
158 	aperf = rdmsr(MSR_APERF);
159 	intr_restore(s);
160 
161 	mdelta = mperf - ci->ci_hz_mperf;
162 	adelta = aperf - ci->ci_hz_aperf;
163 	ci->ci_hz_mperf = mperf;
164 	ci->ci_hz_aperf = aperf;
165 
166 	if (mdelta > 0) {
167 		val = (adelta * 1000000) / mdelta * tsc_frequency;
168 		val = ((val + FREQ_50MHZ / 2) / FREQ_50MHZ) * FREQ_50MHZ;
169 		ci->ci_hz_sensor.value = val;
170 	}
171 
172 	sched_unpeg_curproc();
173 }
174 #endif
175 
176 void (*setperf_setup)(struct cpu_info *);
177 
178 void via_nano_setup(struct cpu_info *ci);
179 
180 void cpu_topology(struct cpu_info *ci);
181 
182 void
183 via_nano_setup(struct cpu_info *ci)
184 {
185 	u_int32_t regs[4], val;
186 	u_int64_t msreg;
187 	int model = (ci->ci_signature >> 4) & 15;
188 
189 	if (model >= 9) {
190 		CPUID(0xC0000000, regs[0], regs[1], regs[2], regs[3]);
191 		val = regs[0];
192 		if (val >= 0xC0000001) {
193 			CPUID(0xC0000001, regs[0], regs[1], regs[2], regs[3]);
194 			val = regs[3];
195 		} else
196 			val = 0;
197 
198 		if (val & (C3_CPUID_HAS_RNG | C3_CPUID_HAS_ACE))
199 			printf("%s:", ci->ci_dev->dv_xname);
200 
201 		/* Enable RNG if present and disabled */
202 		if (val & C3_CPUID_HAS_RNG) {
203 			extern int viac3_rnd_present;
204 
205 			if (!(val & C3_CPUID_DO_RNG)) {
206 				msreg = rdmsr(0x110B);
207 				msreg |= 0x40;
208 				wrmsr(0x110B, msreg);
209 			}
210 			viac3_rnd_present = 1;
211 			printf(" RNG");
212 		}
213 
214 		/* Enable AES engine if present and disabled */
215 		if (val & C3_CPUID_HAS_ACE) {
216 #ifdef CRYPTO
217 			if (!(val & C3_CPUID_DO_ACE)) {
218 				msreg = rdmsr(0x1107);
219 				msreg |= (0x01 << 28);
220 				wrmsr(0x1107, msreg);
221 			}
222 			amd64_has_xcrypt |= C3_HAS_AES;
223 #endif /* CRYPTO */
224 			printf(" AES");
225 		}
226 
227 		/* Enable ACE2 engine if present and disabled */
228 		if (val & C3_CPUID_HAS_ACE2) {
229 #ifdef CRYPTO
230 			if (!(val & C3_CPUID_DO_ACE2)) {
231 				msreg = rdmsr(0x1107);
232 				msreg |= (0x01 << 28);
233 				wrmsr(0x1107, msreg);
234 			}
235 			amd64_has_xcrypt |= C3_HAS_AESCTR;
236 #endif /* CRYPTO */
237 			printf(" AES-CTR");
238 		}
239 
240 		/* Enable SHA engine if present and disabled */
241 		if (val & C3_CPUID_HAS_PHE) {
242 #ifdef CRYPTO
243 			if (!(val & C3_CPUID_DO_PHE)) {
244 				msreg = rdmsr(0x1107);
245 				msreg |= (0x01 << 28/**/);
246 				wrmsr(0x1107, msreg);
247 			}
248 			amd64_has_xcrypt |= C3_HAS_SHA;
249 #endif /* CRYPTO */
250 			printf(" SHA1 SHA256");
251 		}
252 
253 		/* Enable MM engine if present and disabled */
254 		if (val & C3_CPUID_HAS_PMM) {
255 #ifdef CRYPTO
256 			if (!(val & C3_CPUID_DO_PMM)) {
257 				msreg = rdmsr(0x1107);
258 				msreg |= (0x01 << 28/**/);
259 				wrmsr(0x1107, msreg);
260 			}
261 			amd64_has_xcrypt |= C3_HAS_MM;
262 #endif /* CRYPTO */
263 			printf(" RSA");
264 		}
265 
266 		printf("\n");
267 	}
268 }
269 
270 #ifndef SMALL_KERNEL
271 void via_update_sensor(void *args);
272 void
273 via_update_sensor(void *args)
274 {
275 	struct cpu_info *ci = (struct cpu_info *) args;
276 	u_int64_t msr;
277 
278 	msr = rdmsr(MSR_CENT_TMTEMPERATURE);
279 	ci->ci_sensor.value = (msr & 0xffffff);
280 	/* micro degrees */
281 	ci->ci_sensor.value *= 1000000;
282 	ci->ci_sensor.value += 273150000;
283 	ci->ci_sensor.flags &= ~SENSOR_FINVALID;
284 }
285 #endif
286 
287 uint64_t
288 cpu_freq_ctr(struct cpu_info *ci, uint32_t cpu_perf_eax,
289     uint32_t cpu_perf_edx)
290 {
291 	uint64_t count, last_count, msr;
292 
293 	if ((ci->ci_flags & CPUF_CONST_TSC) == 0 ||
294 	    (cpu_perf_eax & CPUIDEAX_VERID) <= 1 ||
295 	    CPUIDEDX_NUM_FC(cpu_perf_edx) <= 1)
296 		return (0);
297 
298 	msr = rdmsr(MSR_PERF_FIXED_CTR_CTRL);
299 	if (msr & MSR_PERF_FIXED_CTR_FC(1, MSR_PERF_FIXED_CTR_FC_MASK)) {
300 		/* some hypervisor is dicking us around */
301 		return (0);
302 	}
303 
304 	msr |= MSR_PERF_FIXED_CTR_FC(1, MSR_PERF_FIXED_CTR_FC_1);
305 	wrmsr(MSR_PERF_FIXED_CTR_CTRL, msr);
306 
307 	msr = rdmsr(MSR_PERF_GLOBAL_CTRL) | MSR_PERF_GLOBAL_CTR1_EN;
308 	wrmsr(MSR_PERF_GLOBAL_CTRL, msr);
309 
310 	last_count = rdmsr(MSR_PERF_FIXED_CTR1);
311 	delay(100000);
312 	count = rdmsr(MSR_PERF_FIXED_CTR1);
313 
314 	msr = rdmsr(MSR_PERF_FIXED_CTR_CTRL);
315 	msr &= MSR_PERF_FIXED_CTR_FC(1, MSR_PERF_FIXED_CTR_FC_MASK);
316 	wrmsr(MSR_PERF_FIXED_CTR_CTRL, msr);
317 
318 	msr = rdmsr(MSR_PERF_GLOBAL_CTRL);
319 	msr &= ~MSR_PERF_GLOBAL_CTR1_EN;
320 	wrmsr(MSR_PERF_GLOBAL_CTRL, msr);
321 
322 	return ((count - last_count) * 10);
323 }
324 
325 uint64_t
326 cpu_freq(struct cpu_info *ci)
327 {
328 	uint64_t last_count, count;
329 
330 	last_count = rdtsc();
331 	delay(100000);
332 	count = rdtsc();
333 
334 	return ((count - last_count) * 10);
335 }
336 
337 /* print flags from one cpuid for cpu0 */
338 static inline void
339 pcpu0id3(const char *id, char reg1, uint32_t val1, const char *bits1,
340     char reg2, uint32_t val2, const char *bits2,
341     char reg3, uint32_t val3, const char *bits3)
342 {
343 	if (val1 || val2 || val3) {
344 		printf("\ncpu0: cpuid %s", id);
345 		if (val1)
346 			printf(" e%cx=%b", reg1, val1, bits1);
347 		if (val2)
348 			printf(" e%cx=%b", reg2, val2, bits2);
349 		if (val3)
350 			printf(" e%cx=%b", reg3, val3, bits3);
351 	}
352 }
353 
354 /* print flags from one, 32-bit MSR for cpu0 */
355 static inline void
356 pmsr032(uint32_t msr, uint32_t value, const char *bits)
357 {
358 	if (value)
359 		printf("\ncpu0: msr %x=%b", msr, value, bits);
360 }
361 
362 static void
363 pbitdiff(uint32_t value, uint32_t base_value, const char *bits)
364 {
365 	uint32_t minus;
366 	if (value == base_value)
367 		return;
368 	minus = base_value & ~value;
369 	value &= ~base_value;
370 	if (minus)
371 		printf("-%b", minus, bits);
372 	if (value)
373 		printf("+%b", value, bits);
374 }
375 
376 static inline void
377 pcpuid(struct cpu_info *ci, const char *id, char reg, uint32_t val,
378     uint32_t prev_val, const char *bits)
379 {
380 	if (CPU_IS_PRIMARY(ci))
381 		pcpu0id3(id, reg, val, bits, 0, 0, NULL, 0, 0, NULL);
382 	else if (val != prev_val) {
383 		printf("\n%s: cpuid %s e%cx=", ci->ci_dev->dv_xname, id, reg);
384 		pbitdiff(val, prev_val, bits);
385 	}
386 }
387 
388 static inline void
389 pcpuid2(struct cpu_info *ci, const char *id,
390     char reg1, uint32_t val1, uint32_t prev_val1, const char *bits1,
391     char reg2, uint32_t val2, uint32_t prev_val2, const char *bits2)
392 {
393 	if (CPU_IS_PRIMARY(ci))
394 		pcpu0id3(id, reg1, val1, bits1, reg2, val2, bits2, 0, 0,
395 		    NULL);
396 	else if (val1 != prev_val1 || val2 != prev_val2) {
397 		printf("\n%s: cpuid %s", ci->ci_dev->dv_xname, id);
398 		if (val1 != prev_val1) {
399 			printf(" e%cx=", reg1);
400 			pbitdiff(val1, prev_val1, bits1);
401 		}
402 		if (val2 != prev_val2) {
403 			printf(" e%cx=", reg2);
404 			pbitdiff(val2, prev_val2, bits2);
405 		}
406 	}
407 }
408 
409 static inline void
410 pcpuid3(struct cpu_info *ci, const char *id,
411     char reg1, uint32_t val1, uint32_t prev_val1, const char *bits1,
412     char reg2, uint32_t val2, uint32_t prev_val2, const char *bits2,
413     char reg3, uint32_t val3, uint32_t prev_val3, const char *bits3)
414 {
415 	if (CPU_IS_PRIMARY(ci))
416 		pcpu0id3(id, reg1, val1, bits1, reg2, val2, bits2, reg3, val3,
417 		    bits3);
418 	else if (val1 != prev_val1 || val2 != prev_val2 || val3 != prev_val3) {
419 		printf("\n%s: cpuid %s", ci->ci_dev->dv_xname, id);
420 		if (val1 != prev_val1) {
421 			printf(" e%cx=", reg1);
422 			pbitdiff(val1, prev_val1, bits1);
423 		}
424 		if (val2 != prev_val2) {
425 			printf(" e%cx=", reg2);
426 			pbitdiff(val2, prev_val2, bits2);
427 		}
428 		if (val3 != prev_val3) {
429 			printf(" e%cx=", reg3);
430 			pbitdiff(val3, prev_val3, bits3);
431 		}
432 	}
433 }
434 
435 static inline void
436 pmsr32(struct cpu_info *ci, uint32_t msr, uint32_t value, uint32_t prev_value,
437     const char *bits)
438 {
439 	if (CPU_IS_PRIMARY(ci))
440 		pmsr032(msr, value, bits);
441 	else if (value != prev_value) {
442 		printf("\n%s: msr %x=", ci->ci_dev->dv_xname, msr);
443 		pbitdiff(value, prev_value, bits);
444 	}
445 }
446 
447 #ifdef MULTIPROCESSOR
448 static uint32_t prevcpu_perf_eax;
449 static uint32_t prevcpu_perf_edx;
450 #endif
451 
452 static inline void
453 print_perf_cpuid(struct cpu_info *ci, uint32_t cpu_perf_eax,
454     uint32_t cpu_perf_edx)
455 {
456 	uint32_t version;
457 
458 	if (CPU_IS_PRIMARY(ci)) {
459 		version = cpu_perf_eax & CPUIDEAX_VERID;
460 		if (version == 0)
461 			return;
462 	}
463 #ifdef MULTIPROCESSOR
464 	else {
465 		/* if no difference on the bits we care about, say nothing */
466 		if (((cpu_perf_eax ^ prevcpu_perf_eax) & 0x00ffffff) == 0 &&
467 		    ((cpu_perf_edx ^ prevcpu_perf_edx) & 0x00001fff) == 0)
468 			return;
469 		version = cpu_perf_eax & CPUIDEAX_VERID;
470 	}
471 	prevcpu_perf_eax = cpu_perf_eax;
472 	prevcpu_perf_edx = cpu_perf_edx;
473 #endif
474 
475 	printf("\n%s: cpuid a vers=%d", ci->ci_dev->dv_xname, version);
476 	if (version) {
477 		printf(", gp=%d, gpwidth=%d", CPUIDEAX_NUM_GC(cpu_perf_eax),
478 		    CPUIDEAX_BIT_GC(cpu_perf_eax));
479 		if (version > 1) {
480 			printf(", ff=%d, ffwidth=%d",
481 			    CPUIDEDX_NUM_FC(cpu_perf_edx),
482 			    CPUIDEDX_BIT_FC(cpu_perf_edx));
483 		}
484 	}
485 }
486 
487 void
488 identifycpu(struct cpu_info *ci)
489 {
490 	static uint32_t prevcpu_1_ecx, prevcpu_tpm_ecxflags, prevcpu_d_1_eax;
491 	static uint32_t prevcpu_apmi_edx, prevcpu_arch_capa;
492 	static struct cpu_info *prevci = &cpu_info_primary;
493 #define CPUID_MEMBER(member)	ci->member, prevci->member
494 	uint32_t cflushsz, curcpu_1_ecx, curcpu_apmi_edx = 0;
495 	uint32_t curcpu_perf_eax = 0, curcpu_perf_edx = 0;
496 	uint32_t curcpu_tpm_ecxflags = 0, curcpu_d_1_eax = 0;
497 	uint64_t freq = 0;
498 	u_int32_t dummy;
499 	char mycpu_model[48];
500 	char *brandstr_from, *brandstr_to;
501 	int skipspace;
502 
503 	CPUID(0x80000000, ci->ci_pnfeatset, dummy, dummy, dummy);
504 	CPUID(0x80000001, ci->ci_efeature_eax, dummy, ci->ci_efeature_ecx,
505 	    ci->ci_feature_eflags);
506 
507 	if (CPU_IS_PRIMARY(ci)) {
508 		ci->ci_signature = cpu_id;
509 		ci->ci_feature_flags = cpu_feature & ~CPUID_NXE;
510 		cflushsz = cpu_ebxfeature;
511 		curcpu_1_ecx = cpu_ecxfeature;
512 		ecpu_ecxfeature = ci->ci_efeature_ecx;
513 	} else {
514 		CPUID(1, ci->ci_signature, cflushsz, curcpu_1_ecx,
515 		    ci->ci_feature_flags);
516 		/* Let cpu_feature be the common bits */
517 		cpu_feature &= ci->ci_feature_flags |
518 		    (ci->ci_feature_eflags & CPUID_NXE);
519 		cpu_ecxfeature &= curcpu_1_ecx;
520 	}
521 	/* cflush cacheline size is equal to bits 15-8 of ebx * 8 */
522 	ci->ci_cflushsz = ((cflushsz >> 8) & 0xff) * 8;
523 
524 	CPUID(0x80000002, ci->ci_brand[0],
525 	    ci->ci_brand[1], ci->ci_brand[2], ci->ci_brand[3]);
526 	CPUID(0x80000003, ci->ci_brand[4],
527 	    ci->ci_brand[5], ci->ci_brand[6], ci->ci_brand[7]);
528 	CPUID(0x80000004, ci->ci_brand[8],
529 	    ci->ci_brand[9], ci->ci_brand[10], ci->ci_brand[11]);
530 	strlcpy(mycpu_model, (char *)ci->ci_brand, sizeof(mycpu_model));
531 
532 	/* Remove leading, trailing and duplicated spaces from mycpu_model */
533 	brandstr_from = brandstr_to = mycpu_model;
534 	skipspace = 1;
535 	while (*brandstr_from != '\0') {
536 		if (!skipspace || *brandstr_from != ' ') {
537 			skipspace = 0;
538 			*(brandstr_to++) = *brandstr_from;
539 		}
540 		if (*brandstr_from == ' ')
541 			skipspace = 1;
542 		brandstr_from++;
543 	}
544 	if (skipspace && brandstr_to > mycpu_model)
545 		brandstr_to--;
546 	*brandstr_to = '\0';
547 
548 	if (mycpu_model[0] == 0)
549 		strlcpy(mycpu_model, "Opteron or Athlon 64",
550 		    sizeof(mycpu_model));
551 
552 	/* If primary cpu, fill in the global cpu_model used by sysctl */
553 	if (CPU_IS_PRIMARY(ci))
554 		strlcpy(cpu_model, mycpu_model, sizeof(cpu_model));
555 
556 	ci->ci_family = (ci->ci_signature >> 8) & 0x0f;
557 	ci->ci_model = (ci->ci_signature >> 4) & 0x0f;
558 	if (ci->ci_family == 0x6 || ci->ci_family == 0xf) {
559 		ci->ci_family += (ci->ci_signature >> 20) & 0xff;
560 		ci->ci_model += ((ci->ci_signature >> 16) & 0x0f) << 4;
561 	}
562 
563 #if NPVBUS > 0
564 	/* Detect hypervisors early, attach the paravirtual bus later */
565 	if (CPU_IS_PRIMARY(ci) && cpu_ecxfeature & CPUIDECX_HV)
566 		pvbus_identify();
567 #endif
568 
569 	if (ci->ci_pnfeatset >= 0x80000007)
570 		CPUID(0x80000007, dummy, dummy, dummy, curcpu_apmi_edx);
571 
572 	if (ci->ci_feature_flags && ci->ci_feature_flags & CPUID_TSC) {
573 		/* Has TSC, check if it's constant */
574 		if (ci->ci_vendor == CPUV_INTEL) {
575 			if ((ci->ci_family == 0x0f && ci->ci_model >= 0x03) ||
576 			    (ci->ci_family == 0x06 && ci->ci_model >= 0x0e)) {
577 				atomic_setbits_int(&ci->ci_flags, CPUF_CONST_TSC);
578 			}
579 		} else if (ci->ci_vendor == CPUV_VIA) {
580 			/* VIA */
581 			if (ci->ci_model >= 0x0f) {
582 				atomic_setbits_int(&ci->ci_flags, CPUF_CONST_TSC);
583 			}
584 		} else if (ci->ci_vendor == CPUV_AMD) {
585 			if (curcpu_apmi_edx & CPUIDEDX_ITSC) {
586 				/* Invariant TSC indicates constant TSC on AMD */
587 				atomic_setbits_int(&ci->ci_flags, CPUF_CONST_TSC);
588 			}
589 		}
590 
591 		/* Check if it's an invariant TSC */
592 		if (curcpu_apmi_edx & CPUIDEDX_ITSC)
593 			atomic_setbits_int(&ci->ci_flags, CPUF_INVAR_TSC);
594 
595 		tsc_identify(ci);
596 	}
597 
598 	if (ci->ci_cpuid_level >= 0xa) {
599 		CPUID(0xa, curcpu_perf_eax, dummy, dummy, curcpu_perf_edx);
600 
601 		freq = cpu_freq_ctr(ci, curcpu_perf_eax, curcpu_perf_edx);
602 	}
603 	if (freq == 0)
604 		freq = cpu_freq(ci);
605 
606 	if (ci->ci_cpuid_level >= 0x07) {
607 		/* "Structured Extended Feature Flags" */
608 		CPUID_LEAF(0x7, 0, dummy, ci->ci_feature_sefflags_ebx,
609 		    ci->ci_feature_sefflags_ecx, ci->ci_feature_sefflags_edx);
610 		/* SEFF0ECX_OSPKE is set late on AP */
611 		ci->ci_feature_sefflags_ecx &= ~SEFF0ECX_OSPKE;
612 	}
613 
614 	printf("%s: %s", ci->ci_dev->dv_xname, mycpu_model);
615 
616 	if (freq != 0)
617 		printf(", %llu.%02llu MHz", (freq + 4999) / 1000000,
618 		    ((freq + 4999) / 10000) % 100);
619 
620 	if (CPU_IS_PRIMARY(ci)) {
621 		cpuspeed = (freq + 4999) / 1000000;
622 		cpu_cpuspeed = cpu_amd64speed;
623 	}
624 
625 	printf(", %02x-%02x-%02x", ci->ci_family, ci->ci_model,
626 	    ci->ci_signature & 0x0f);
627 
628 	if ((cpu_ecxfeature & CPUIDECX_HV) == 0) {
629 		uint64_t level = 0;
630 		uint32_t dummy;
631 
632 		if (ci->ci_vendor == CPUV_AMD) {
633 			level = rdmsr(MSR_PATCH_LEVEL);
634 		} else if (ci->ci_vendor == CPUV_INTEL) {
635 			wrmsr(MSR_BIOS_SIGN, 0);
636 			CPUID(1, dummy, dummy, dummy, dummy);
637 			level = rdmsr(MSR_BIOS_SIGN) >> 32;
638 		}
639 		if (level != 0)
640 			printf(", patch %08llx", level);
641 	}
642 
643 	if (ci->ci_cpuid_level >= 0x06)
644 		CPUID(0x06, ci->ci_feature_tpmflags, dummy,
645 		    curcpu_tpm_ecxflags, dummy);
646 	if (ci->ci_vendor == CPUV_AMD && ci->ci_family >= 0x12)
647 		ci->ci_feature_tpmflags |= TPM_ARAT;
648 
649 	/* xsave subfeatures */
650 	if (ci->ci_cpuid_level >= 0xd)
651 		CPUID_LEAF(0xd, 1, curcpu_d_1_eax, dummy, dummy, dummy);
652 
653 	pcpuid2(ci, "1", 'd', CPUID_MEMBER(ci_feature_flags), CPUID_EDX_BITS,
654 	    'c', curcpu_1_ecx, prevcpu_1_ecx, CPUID_ECX_BITS);
655 	pcpuid2(ci, "6", 'a', CPUID_MEMBER(ci_feature_tpmflags), TPM_EAX_BITS,
656 	    'c', curcpu_tpm_ecxflags, prevcpu_tpm_ecxflags, TPM_ECX_BITS);
657 	pcpuid3(ci, "7.0",
658 	    'b', CPUID_MEMBER(ci_feature_sefflags_ebx), SEFF0_EBX_BITS,
659 	    'c', CPUID_MEMBER(ci_feature_sefflags_ecx), SEFF0_ECX_BITS,
660 	    'd', CPUID_MEMBER(ci_feature_sefflags_edx), SEFF0_EDX_BITS);
661 	print_perf_cpuid(ci, curcpu_perf_eax, curcpu_perf_edx);
662 	pcpuid(ci, "d.1", 'a', curcpu_d_1_eax, prevcpu_d_1_eax, XSAVE_BITS);
663 	pcpuid2(ci, "80000001",
664 	    'd', CPUID_MEMBER(ci_feature_eflags), CPUIDE_EDX_BITS,
665 	    'c', CPUID_MEMBER(ci_efeature_ecx), CPUIDE_ECX_BITS);
666 	pcpuid(ci, "80000007", 'd', curcpu_apmi_edx, prevcpu_apmi_edx,
667 	    CPUID_APMI_EDX_BITS);
668 #ifdef MULTIPROCESSOR
669 	prevcpu_1_ecx = curcpu_1_ecx;
670 	prevcpu_tpm_ecxflags = curcpu_tpm_ecxflags;
671 	prevcpu_d_1_eax = curcpu_d_1_eax;
672 	prevcpu_apmi_edx = curcpu_apmi_edx;
673 #endif
674 
675 	/* speculation control features */
676 	if (ci->ci_vendor == CPUV_AMD) {
677 		if (ci->ci_pnfeatset >= 0x80000008) {
678 			CPUID(0x80000008, dummy, ci->ci_feature_amdspec_ebx,
679 			    dummy, dummy);
680 			pcpuid(ci, "80000008", 'b',
681 			    CPUID_MEMBER(ci_feature_amdspec_ebx),
682 			    CPUID_AMDSPEC_EBX_BITS);
683 		}
684 	} else if (ci->ci_vendor == CPUV_INTEL) {
685 		if (ci->ci_feature_sefflags_edx & SEFF0EDX_ARCH_CAP) {
686 			uint32_t msr = rdmsr(MSR_ARCH_CAPABILITIES);
687 
688 			pmsr32(ci, MSR_ARCH_CAPABILITIES, msr,
689 			    prevcpu_arch_capa, ARCH_CAP_MSR_BITS);
690 			prevcpu_arch_capa = msr;
691 			if (!CPU_IS_PRIMARY(ci) && cpu_meltdown &&
692 			    (msr & ARCH_CAP_RDCL_NO))
693 				printf("\n%s: -MELTDOWN", ci->ci_dev->dv_xname);
694 		}
695 		if (cpu_meltdown && CPU_IS_PRIMARY(ci))
696 			printf("\n%s: MELTDOWN", ci->ci_dev->dv_xname);
697 	}
698 
699 	/* AMD secure memory encryption and encrypted virtualization features */
700 	if (ci->ci_vendor == CPUV_AMD &&
701 	    ci->ci_pnfeatset >= CPUID_AMD_SEV_CAP) {
702 		CPUID(CPUID_AMD_SEV_CAP, ci->ci_feature_amdsev_eax,
703 		    ci->ci_feature_amdsev_ebx, ci->ci_feature_amdsev_ecx,
704 		    ci->ci_feature_amdsev_edx);
705 		pcpuid3(ci, "8000001F",
706 		    'a', CPUID_MEMBER(ci_feature_amdsev_eax),
707 		    CPUID_AMDSEV_EAX_BITS,
708 		    'c', CPUID_MEMBER(ci_feature_amdsev_ecx),
709 		    CPUID_AMDSEV_ECX_BITS,
710 		    'd', CPUID_MEMBER(ci_feature_amdsev_edx),
711 		    CPUID_AMDSEV_EDX_BITS);
712 		amd64_pos_cbit = (ci->ci_feature_amdsev_ebx & 0x3f);
713 	}
714 
715 	printf("\n");
716 
717 	replacemeltdown();
718 	x86_print_cacheinfo(ci);
719 
720 	if (CPU_IS_PRIMARY(ci)) {
721 #ifndef SMALL_KERNEL
722 		if (ci->ci_vendor == CPUV_AMD &&
723 		    ci->ci_pnfeatset >= 0x80000007) {
724 			if (curcpu_apmi_edx & 0x06) {
725 				if ((ci->ci_signature & 0xF00) == 0xF00)
726 					setperf_setup = k8_powernow_init;
727 			}
728 			if (ci->ci_family >= 0x10)
729 				setperf_setup = k1x_init;
730 		}
731 
732 		if (cpu_ecxfeature & CPUIDECX_EST)
733 			setperf_setup = est_init;
734 #endif
735 
736 		if (cpu_ecxfeature & CPUIDECX_RDRAND)
737 			has_rdrand = 1;
738 
739 		if (ci->ci_feature_sefflags_ebx & SEFF0EBX_RDSEED)
740 			has_rdseed = 1;
741 
742 		if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP)
743 			replacesmap();
744 	}
745 
746 #ifndef SMALL_KERNEL
747 	if (CPU_IS_PRIMARY(ci) && (ci->ci_feature_tpmflags & TPM_SENSOR) &&
748 	    ci->ci_vendor == CPUV_INTEL) {
749 		ci->ci_sensor.type = SENSOR_TEMP;
750 		sensor_task_register(ci, intelcore_update_sensor, 5);
751 		sensor_attach(&ci->ci_sensordev, &ci->ci_sensor);
752 	}
753 #endif
754 
755 	if (CPU_IS_PRIMARY(ci) && ci->ci_vendor == CPUV_VIA) {
756 		ci->cpu_setup = via_nano_setup;
757 #ifndef SMALL_KERNEL
758 		ci->ci_sensor.type = SENSOR_TEMP;
759 		sensor_task_register(ci, via_update_sensor, 5);
760 		sensor_attach(&ci->ci_sensordev, &ci->ci_sensor);
761 #endif
762 	}
763 
764 	tsc_timecounter_init(ci, freq);
765 
766 	cpu_topology(ci);
767 #if NVMM > 0
768 	cpu_check_vmm_cap(ci);
769 #endif /* NVMM > 0 */
770 
771 	/* Check for effective frequency via MPERF, APERF */
772 	if ((curcpu_tpm_ecxflags & TPM_EFFFREQ) && ci->ci_smt_id == 0) {
773 #ifndef SMALL_KERNEL
774 		ci->ci_hz_sensor.type = SENSOR_FREQ;
775 		sensor_task_register(ci, cpu_hz_update_sensor, 1);
776 		sensor_attach(&ci->ci_sensordev, &ci->ci_hz_sensor);
777 #endif
778 	}
779 	prevci = ci;
780 }
781 
782 #ifndef SMALL_KERNEL
783 /*
784  * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
785  */
786 static int
787 log2(unsigned int i)
788 {
789 	int ret = 0;
790 
791 	while (i >>= 1)
792 		ret++;
793 
794 	return (ret);
795 }
796 
797 static int
798 mask_width(u_int x)
799 {
800 	int bit;
801 	int mask;
802 	int powerof2;
803 
804 	powerof2 = ((x - 1) & x) == 0;
805 	mask = (x << (1 - powerof2)) - 1;
806 
807 	/* fls */
808 	if (mask == 0)
809 		return (0);
810 	for (bit = 1; mask != 1; bit++)
811 		mask = (unsigned int)mask >> 1;
812 
813 	return (bit);
814 }
815 #endif
816 
817 /*
818  * Build up cpu topology for given cpu, must run on the core itself.
819  */
820 void
821 cpu_topology(struct cpu_info *ci)
822 {
823 #ifndef SMALL_KERNEL
824 	u_int32_t eax, ebx, ecx, edx;
825 	u_int32_t apicid, max_apicid = 0, max_coreid = 0;
826 	u_int32_t smt_bits = 0, core_bits, pkg_bits = 0;
827 	u_int32_t smt_mask = 0, core_mask, pkg_mask = 0;
828 
829 	/* We need at least apicid at CPUID 1 */
830 	if (ci->ci_cpuid_level < 1)
831 		goto no_topology;
832 
833 	/* Initial apicid */
834 	CPUID(1, eax, ebx, ecx, edx);
835 	apicid = (ebx >> 24) & 0xff;
836 
837 	if (ci->ci_vendor == CPUV_AMD) {
838 		uint32_t nthreads = 1; /* per core */
839 		uint32_t thread_id; /* within a package */
840 
841 		/* We need at least apicid at CPUID 0x80000008 */
842 		if (ci->ci_pnfeatset < 0x80000008)
843 			goto no_topology;
844 
845 		CPUID(0x80000008, eax, ebx, ecx, edx);
846 		core_bits = (ecx >> 12) & 0xf;
847 
848 		if (ci->ci_pnfeatset >= 0x8000001e) {
849 			CPUID(0x8000001e, eax, ebx, ecx, edx);
850 			nthreads = ((ebx >> 8) & 0xf) + 1;
851 		}
852 
853 		/* Shift the core_bits off to get at the pkg bits */
854 		ci->ci_pkg_id = apicid >> core_bits;
855 
856 		/* Get rid of the package bits */
857 		core_mask = (1U << core_bits) - 1;
858 		thread_id = apicid & core_mask;
859 
860 		/* Cut logical thread_id into core id, and smt id in a core */
861 		ci->ci_core_id = thread_id / nthreads;
862 		ci->ci_smt_id = thread_id % nthreads;
863 	} else if (ci->ci_vendor == CPUV_INTEL) {
864 		/* We only support leaf 1/4 detection */
865 		if (ci->ci_cpuid_level < 4)
866 			goto no_topology;
867 		/* Get max_apicid */
868 		CPUID(1, eax, ebx, ecx, edx);
869 		max_apicid = (ebx >> 16) & 0xff;
870 		/* Get max_coreid */
871 		CPUID_LEAF(4, 0, eax, ebx, ecx, edx);
872 		max_coreid = ((eax >> 26) & 0x3f) + 1;
873 		/* SMT */
874 		smt_bits = mask_width(max_apicid / max_coreid);
875 		smt_mask = (1U << smt_bits) - 1;
876 		/* Core */
877 		core_bits = log2(max_coreid);
878 		core_mask = (1U << (core_bits + smt_bits)) - 1;
879 		core_mask ^= smt_mask;
880 		/* Pkg */
881 		pkg_bits = core_bits + smt_bits;
882 		pkg_mask = ~0U << core_bits;
883 
884 		ci->ci_smt_id = apicid & smt_mask;
885 		ci->ci_core_id = (apicid & core_mask) >> smt_bits;
886 		ci->ci_pkg_id = (apicid & pkg_mask) >> pkg_bits;
887 	} else
888 		goto no_topology;
889 #ifdef DEBUG
890 	printf("cpu%d: smt %u, core %u, pkg %u "
891 		"(apicid 0x%x, max_apicid 0x%x, max_coreid 0x%x, smt_bits 0x%x, smt_mask 0x%x, "
892 		"core_bits 0x%x, core_mask 0x%x, pkg_bits 0x%x, pkg_mask 0x%x)\n",
893 		ci->ci_cpuid, ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id,
894 		apicid, max_apicid, max_coreid, smt_bits, smt_mask, core_bits,
895 		core_mask, pkg_bits, pkg_mask);
896 #else
897 	printf("cpu%d: smt %u, core %u, package %u\n", ci->ci_cpuid,
898 		ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id);
899 
900 #endif
901 	return;
902 	/* We can't map, so consider ci_core_id as ci_cpuid */
903 no_topology:
904 #endif
905 	ci->ci_smt_id  = 0;
906 	ci->ci_core_id = ci->ci_cpuid;
907 	ci->ci_pkg_id  = 0;
908 }
909 
910 #if NVMM > 0
911 /*
912  * cpu_check_vmm_cap
913  *
914  * Checks for VMM capabilities for 'ci'. Initializes certain per-cpu VMM
915  * state in 'ci' if virtualization extensions are found.
916  *
917  * Parameters:
918  *  ci: the cpu being checked
919  */
920 void
921 cpu_check_vmm_cap(struct cpu_info *ci)
922 {
923 	uint64_t msr;
924 	uint32_t cap, dummy, edx;
925 
926 	/*
927 	 * Check for workable VMX
928 	 */
929 	if (cpu_ecxfeature & CPUIDECX_VMX) {
930 		msr = rdmsr(MSR_IA32_FEATURE_CONTROL);
931 
932 		if (!(msr & IA32_FEATURE_CONTROL_LOCK))
933 			ci->ci_vmm_flags |= CI_VMM_VMX;
934 		else {
935 			if (msr & IA32_FEATURE_CONTROL_VMX_EN)
936 				ci->ci_vmm_flags |= CI_VMM_VMX;
937 			else
938 				ci->ci_vmm_flags |= CI_VMM_DIS;
939 		}
940 	}
941 
942 	/*
943 	 * Check for EPT (Intel Nested Paging) and other secondary
944 	 * controls
945 	 */
946 	if (ci->ci_vmm_flags & CI_VMM_VMX) {
947 		/* Secondary controls available? */
948 		/* XXX should we check true procbased ctls here if avail? */
949 		msr = rdmsr(IA32_VMX_PROCBASED_CTLS);
950 		if (msr & (IA32_VMX_ACTIVATE_SECONDARY_CONTROLS) << 32) {
951 			msr = rdmsr(IA32_VMX_PROCBASED2_CTLS);
952 			/* EPT available? */
953 			if (msr & (IA32_VMX_ENABLE_EPT) << 32)
954 				ci->ci_vmm_flags |= CI_VMM_EPT;
955 		}
956 	}
957 
958 	/*
959 	 * Check startup config (VMX)
960 	 */
961 	if (ci->ci_vmm_flags & CI_VMM_VMX) {
962 		/* CR0 fixed and flexible bits */
963 		msr = rdmsr(IA32_VMX_CR0_FIXED0);
964 		ci->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0 = msr;
965 		msr = rdmsr(IA32_VMX_CR0_FIXED1);
966 		ci->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1 = msr;
967 
968 		/* CR4 fixed and flexible bits */
969 		msr = rdmsr(IA32_VMX_CR4_FIXED0);
970 		ci->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0 = msr;
971 		msr = rdmsr(IA32_VMX_CR4_FIXED1);
972 		ci->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1 = msr;
973 
974 		/* VMXON region revision ID (bits 30:0 of IA32_VMX_BASIC) */
975 		msr = rdmsr(IA32_VMX_BASIC);
976 		ci->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision =
977 			(uint32_t)(msr & 0x7FFFFFFF);
978 
979 		/* MSR save / load table size */
980 		msr = rdmsr(IA32_VMX_MISC);
981 		ci->ci_vmm_cap.vcc_vmx.vmx_msr_table_size =
982 			(uint32_t)(msr & IA32_VMX_MSR_LIST_SIZE_MASK) >> 25;
983 
984 		/* CR3 target count size */
985 		ci->ci_vmm_cap.vcc_vmx.vmx_cr3_tgt_count =
986 			(uint32_t)(msr & IA32_VMX_CR3_TGT_SIZE_MASK) >> 16;
987 	}
988 
989 	/*
990 	 * Check for workable SVM
991 	 */
992 	if (ecpu_ecxfeature & CPUIDECX_SVM) {
993 		msr = rdmsr(MSR_AMD_VM_CR);
994 
995 		if (!(msr & AMD_SVMDIS))
996 			ci->ci_vmm_flags |= CI_VMM_SVM;
997 
998 		CPUID(CPUID_AMD_SVM_CAP, dummy,
999 		    ci->ci_vmm_cap.vcc_svm.svm_max_asid, dummy, edx);
1000 
1001 		if (ci->ci_vmm_cap.vcc_svm.svm_max_asid > 0xFFF)
1002 			ci->ci_vmm_cap.vcc_svm.svm_max_asid = 0xFFF;
1003 
1004 		if (edx & AMD_SVM_FLUSH_BY_ASID_CAP)
1005 			ci->ci_vmm_cap.vcc_svm.svm_flush_by_asid = 1;
1006 
1007 		if (edx & AMD_SVM_VMCB_CLEAN_CAP)
1008 			ci->ci_vmm_cap.vcc_svm.svm_vmcb_clean = 1;
1009 
1010 		if (edx & AMD_SVM_DECODE_ASSIST_CAP)
1011 			ci->ci_vmm_cap.vcc_svm.svm_decode_assist = 1;
1012 	}
1013 
1014 	/*
1015 	 * Check for SVM Nested Paging
1016 	 */
1017 	if ((ci->ci_vmm_flags & CI_VMM_SVM) &&
1018 	    ci->ci_pnfeatset >= CPUID_AMD_SVM_CAP) {
1019 		CPUID(CPUID_AMD_SVM_CAP, dummy, dummy, dummy, cap);
1020 		if (cap & AMD_SVM_NESTED_PAGING_CAP)
1021 			ci->ci_vmm_flags |= CI_VMM_RVI;
1022 	}
1023 
1024 	/*
1025 	 * Check "L1 flush on VM entry" (Intel L1TF vuln) semantics
1026 	 * Full details can be found here:
1027 	 * https://software.intel.com/security-software-guidance/insights/deep-dive-intel-analysis-l1-terminal-fault
1028 	 */
1029 	if (ci->ci_vendor == CPUV_INTEL) {
1030 		if (ci->ci_feature_sefflags_edx & SEFF0EDX_L1DF)
1031 			ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr = 1;
1032 		else
1033 			ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr = 0;
1034 
1035 		/*
1036 		 * Certain CPUs may have the vulnerability remedied in
1037 		 * hardware (RDCL_NO), or we may be nested in an VMM that
1038 		 * is doing flushes (SKIP_L1DFL_VMENTRY) using the MSR.
1039 		 * In either case no mitigation at all is necessary.
1040 		 */
1041 		if (ci->ci_feature_sefflags_edx & SEFF0EDX_ARCH_CAP) {
1042 			msr = rdmsr(MSR_ARCH_CAPABILITIES);
1043 			if ((msr & ARCH_CAP_RDCL_NO) ||
1044 			    ((msr & ARCH_CAP_SKIP_L1DFL_VMENTRY) &&
1045 			    ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr))
1046 				ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr =
1047 				    VMX_SKIP_L1D_FLUSH;
1048 		}
1049 	}
1050 }
1051 #endif /* NVMM > 0 */
1052