xref: /onnv-gate/usr/src/uts/i86pc/os/cpuid.c (revision 13134:8315ff49e22e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 /*
25  * Copyright (c) 2010, Intel Corporation.
26  * All rights reserved.
27  */
28 /*
29  * Portions Copyright 2009 Advanced Micro Devices, Inc.
30  */
31 
32 /*
33  * Various routines to handle identification
34  * and classification of x86 processors.
35  */
36 
37 #include <sys/types.h>
38 #include <sys/archsystm.h>
39 #include <sys/x86_archext.h>
40 #include <sys/kmem.h>
41 #include <sys/systm.h>
42 #include <sys/cmn_err.h>
43 #include <sys/sunddi.h>
44 #include <sys/sunndi.h>
45 #include <sys/cpuvar.h>
46 #include <sys/processor.h>
47 #include <sys/sysmacros.h>
48 #include <sys/pg.h>
49 #include <sys/fp.h>
50 #include <sys/controlregs.h>
51 #include <sys/auxv_386.h>
52 #include <sys/bitmap.h>
53 #include <sys/memnode.h>
54 #include <sys/pci_cfgspace.h>
55 
56 #ifdef __xpv
57 #include <sys/hypervisor.h>
58 #else
59 #include <sys/ontrap.h>
60 #endif
61 
62 /*
63  * Pass 0 of cpuid feature analysis happens in locore. It contains special code
64  * to recognize Cyrix processors that are not cpuid-compliant, and to deal with
65  * them accordingly. For most modern processors, feature detection occurs here
66  * in pass 1.
67  *
68  * Pass 1 of cpuid feature analysis happens just at the beginning of mlsetup()
69  * for the boot CPU and does the basic analysis that the early kernel needs.
70  * x86_featureset is set based on the return value of cpuid_pass1() of the boot
71  * CPU.
72  *
73  * Pass 1 includes:
74  *
75  *	o Determining vendor/model/family/stepping and setting x86_type and
76  *	  x86_vendor accordingly.
77  *	o Processing the feature flags returned by the cpuid instruction while
78  *	  applying any workarounds or tricks for the specific processor.
79  *	o Mapping the feature flags into Solaris feature bits (X86_*).
80  *	o Processing extended feature flags if supported by the processor,
81  *	  again while applying specific processor knowledge.
82  *	o Determining the CMT characteristics of the system.
83  *
84  * Pass 1 is done on non-boot CPUs during their initialization and the results
85  * are used only as a meager attempt at ensuring that all processors within the
86  * system support the same features.
87  *
88  * Pass 2 of cpuid feature analysis happens just at the beginning
89  * of startup().  It just copies in and corrects the remainder
90  * of the cpuid data we depend on: standard cpuid functions that we didn't
91  * need for pass1 feature analysis, and extended cpuid functions beyond the
92  * simple feature processing done in pass1.
93  *
94  * Pass 3 of cpuid analysis is invoked after basic kernel services; in
95  * particular kernel memory allocation has been made available. It creates a
96  * readable brand string based on the data collected in the first two passes.
97  *
98  * Pass 4 of cpuid analysis is invoked after post_startup() when all
99  * the support infrastructure for various hardware features has been
100  * initialized. It determines which processor features will be reported
101  * to userland via the aux vector.
102  *
103  * All passes are executed on all CPUs, but only the boot CPU determines what
104  * features the kernel will use.
105  *
106  * Much of the worst junk in this file is for the support of processors
107  * that didn't really implement the cpuid instruction properly.
108  *
109  * NOTE: The accessor functions (cpuid_get*) are aware of, and ASSERT upon,
110  * the pass numbers.  Accordingly, changes to the pass code may require changes
111  * to the accessor code.
112  */
113 
114 uint_t x86_vendor = X86_VENDOR_IntelClone;
115 uint_t x86_type = X86_TYPE_OTHER;
116 uint_t x86_clflush_size = 0;
117 
118 uint_t pentiumpro_bug4046376;
119 uint_t pentiumpro_bug4064495;
120 
121 #define	NUM_X86_FEATURES	35
122 void    *x86_featureset;
123 ulong_t x86_featureset0[BT_SIZEOFMAP(NUM_X86_FEATURES)];
124 
125 char *x86_feature_names[NUM_X86_FEATURES] = {
126 	"lgpg",
127 	"tsc",
128 	"msr",
129 	"mtrr",
130 	"pge",
131 	"de",
132 	"cmov",
133 	"mmx",
134 	"mca",
135 	"pae",
136 	"cv8",
137 	"pat",
138 	"sep",
139 	"sse",
140 	"sse2",
141 	"htt",
142 	"asysc",
143 	"nx",
144 	"sse3",
145 	"cx16",
146 	"cmp",
147 	"tscp",
148 	"mwait",
149 	"sse4a",
150 	"cpuid",
151 	"ssse3",
152 	"sse4_1",
153 	"sse4_2",
154 	"1gpg",
155 	"clfsh",
156 	"64",
157 	"aes",
158 	"pclmulqdq",
159 	"xsave",
160 	"avx" };
161 
162 static void *
163 init_x86_featureset(void)
164 {
165 	return (kmem_zalloc(BT_SIZEOFMAP(NUM_X86_FEATURES), KM_SLEEP));
166 }
167 
168 void
169 free_x86_featureset(void *featureset)
170 {
171 	kmem_free(featureset, BT_SIZEOFMAP(NUM_X86_FEATURES));
172 }
173 
174 boolean_t
175 is_x86_feature(void *featureset, uint_t feature)
176 {
177 	ASSERT(feature < NUM_X86_FEATURES);
178 	return (BT_TEST((ulong_t *)featureset, feature));
179 }
180 
181 void
182 add_x86_feature(void *featureset, uint_t feature)
183 {
184 	ASSERT(feature < NUM_X86_FEATURES);
185 	BT_SET((ulong_t *)featureset, feature);
186 }
187 
188 void
189 remove_x86_feature(void *featureset, uint_t feature)
190 {
191 	ASSERT(feature < NUM_X86_FEATURES);
192 	BT_CLEAR((ulong_t *)featureset, feature);
193 }
194 
195 boolean_t
196 compare_x86_featureset(void *setA, void *setB)
197 {
198 	/*
199 	 * We assume that the unused bits of the bitmap are always zero.
200 	 */
201 	if (memcmp(setA, setB, BT_SIZEOFMAP(NUM_X86_FEATURES)) == 0) {
202 		return (B_TRUE);
203 	} else {
204 		return (B_FALSE);
205 	}
206 }
207 
208 void
209 print_x86_featureset(void *featureset)
210 {
211 	uint_t i;
212 
213 	for (i = 0; i < NUM_X86_FEATURES; i++) {
214 		if (is_x86_feature(featureset, i)) {
215 			cmn_err(CE_CONT, "?x86_feature: %s\n",
216 			    x86_feature_names[i]);
217 		}
218 	}
219 }
220 
221 uint_t enable486;
222 
223 static size_t xsave_state_size = 0;
224 uint64_t xsave_bv_all = (XFEATURE_LEGACY_FP | XFEATURE_SSE);
225 boolean_t xsave_force_disable = B_FALSE;
226 
227 /*
228  * This is set to platform type Solaris is running on.
229  */
230 static int platform_type = -1;
231 
232 #if !defined(__xpv)
233 /*
234  * Variable to patch if hypervisor platform detection needs to be
235  * disabled (e.g. platform_type will always be HW_NATIVE if this is 0).
236  */
237 int enable_platform_detection = 1;
238 #endif
239 
240 /*
241  * monitor/mwait info.
242  *
243  * size_actual and buf_actual are the real address and size allocated to get
244  * proper mwait_buf alignement.  buf_actual and size_actual should be passed
245  * to kmem_free().  Currently kmem_alloc() and mwait happen to both use
246  * processor cache-line alignment, but this is not guarantied in the furture.
247  */
248 struct mwait_info {
249 	size_t		mon_min;	/* min size to avoid missed wakeups */
250 	size_t		mon_max;	/* size to avoid false wakeups */
251 	size_t		size_actual;	/* size actually allocated */
252 	void		*buf_actual;	/* memory actually allocated */
253 	uint32_t	support;	/* processor support of monitor/mwait */
254 };
255 
256 /*
257  * xsave/xrestor info.
258  *
259  * This structure contains HW feature bits and size of the xsave save area.
260  * Note: the kernel will use the maximum size required for all hardware
261  * features. It is not optimize for potential memory savings if features at
262  * the end of the save area are not enabled.
263  */
264 struct xsave_info {
265 	uint32_t	xsav_hw_features_low;   /* Supported HW features */
266 	uint32_t	xsav_hw_features_high;  /* Supported HW features */
267 	size_t		xsav_max_size;  /* max size save area for HW features */
268 	size_t		ymm_size;	/* AVX: size of ymm save area */
269 	size_t		ymm_offset;	/* AVX: offset for ymm save area */
270 };
271 
272 
273 /*
274  * These constants determine how many of the elements of the
275  * cpuid we cache in the cpuid_info data structure; the
276  * remaining elements are accessible via the cpuid instruction.
277  */
278 
279 #define	NMAX_CPI_STD	6		/* eax = 0 .. 5 */
280 #define	NMAX_CPI_EXTD	0x1c		/* eax = 0x80000000 .. 0x8000001b */
281 
282 /*
283  * Some terminology needs to be explained:
284  *  - Socket: Something that can be plugged into a motherboard.
285  *  - Package: Same as socket
286  *  - Chip: Same as socket. Note that AMD's documentation uses term "chip"
287  *    differently: there, chip is the same as processor node (below)
288  *  - Processor node: Some AMD processors have more than one
289  *    "subprocessor" embedded in a package. These subprocessors (nodes)
290  *    are fully-functional processors themselves with cores, caches,
291  *    memory controllers, PCI configuration spaces. They are connected
292  *    inside the package with Hypertransport links. On single-node
293  *    processors, processor node is equivalent to chip/socket/package.
294  */
295 
296 struct cpuid_info {
297 	uint_t cpi_pass;		/* last pass completed */
298 	/*
299 	 * standard function information
300 	 */
301 	uint_t cpi_maxeax;		/* fn 0: %eax */
302 	char cpi_vendorstr[13];		/* fn 0: %ebx:%ecx:%edx */
303 	uint_t cpi_vendor;		/* enum of cpi_vendorstr */
304 
305 	uint_t cpi_family;		/* fn 1: extended family */
306 	uint_t cpi_model;		/* fn 1: extended model */
307 	uint_t cpi_step;		/* fn 1: stepping */
308 	chipid_t cpi_chipid;		/* fn 1: %ebx:  Intel: chip # */
309 					/*		AMD: package/socket # */
310 	uint_t cpi_brandid;		/* fn 1: %ebx: brand ID */
311 	int cpi_clogid;			/* fn 1: %ebx: thread # */
312 	uint_t cpi_ncpu_per_chip;	/* fn 1: %ebx: logical cpu count */
313 	uint8_t cpi_cacheinfo[16];	/* fn 2: intel-style cache desc */
314 	uint_t cpi_ncache;		/* fn 2: number of elements */
315 	uint_t cpi_ncpu_shr_last_cache;	/* fn 4: %eax: ncpus sharing cache */
316 	id_t cpi_last_lvl_cacheid;	/* fn 4: %eax: derived cache id */
317 	uint_t cpi_std_4_size;		/* fn 4: number of fn 4 elements */
318 	struct cpuid_regs **cpi_std_4;	/* fn 4: %ecx == 0 .. fn4_size */
319 	struct cpuid_regs cpi_std[NMAX_CPI_STD];	/* 0 .. 5 */
320 	/*
321 	 * extended function information
322 	 */
323 	uint_t cpi_xmaxeax;		/* fn 0x80000000: %eax */
324 	char cpi_brandstr[49];		/* fn 0x8000000[234] */
325 	uint8_t cpi_pabits;		/* fn 0x80000006: %eax */
326 	uint8_t	cpi_vabits;		/* fn 0x80000006: %eax */
327 	struct	cpuid_regs cpi_extd[NMAX_CPI_EXTD];	/* 0x800000XX */
328 
329 	id_t cpi_coreid;		/* same coreid => strands share core */
330 	int cpi_pkgcoreid;		/* core number within single package */
331 	uint_t cpi_ncore_per_chip;	/* AMD: fn 0x80000008: %ecx[7-0] */
332 					/* Intel: fn 4: %eax[31-26] */
333 	/*
334 	 * supported feature information
335 	 */
336 	uint32_t cpi_support[5];
337 #define	STD_EDX_FEATURES	0
338 #define	AMD_EDX_FEATURES	1
339 #define	TM_EDX_FEATURES		2
340 #define	STD_ECX_FEATURES	3
341 #define	AMD_ECX_FEATURES	4
342 	/*
343 	 * Synthesized information, where known.
344 	 */
345 	uint32_t cpi_chiprev;		/* See X86_CHIPREV_* in x86_archext.h */
346 	const char *cpi_chiprevstr;	/* May be NULL if chiprev unknown */
347 	uint32_t cpi_socket;		/* Chip package/socket type */
348 
349 	struct mwait_info cpi_mwait;	/* fn 5: monitor/mwait info */
350 	uint32_t cpi_apicid;
351 	uint_t cpi_procnodeid;		/* AMD: nodeID on HT, Intel: chipid */
352 	uint_t cpi_procnodes_per_pkg;	/* AMD: # of nodes in the package */
353 					/* Intel: 1 */
354 
355 	struct xsave_info cpi_xsave;	/* fn D: xsave/xrestor info */
356 };
357 
358 
359 static struct cpuid_info cpuid_info0;
360 
361 /*
362  * These bit fields are defined by the Intel Application Note AP-485
363  * "Intel Processor Identification and the CPUID Instruction"
364  */
365 #define	CPI_FAMILY_XTD(cpi)	BITX((cpi)->cpi_std[1].cp_eax, 27, 20)
366 #define	CPI_MODEL_XTD(cpi)	BITX((cpi)->cpi_std[1].cp_eax, 19, 16)
367 #define	CPI_TYPE(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 13, 12)
368 #define	CPI_FAMILY(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 11, 8)
369 #define	CPI_STEP(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 3, 0)
370 #define	CPI_MODEL(cpi)		BITX((cpi)->cpi_std[1].cp_eax, 7, 4)
371 
372 #define	CPI_FEATURES_EDX(cpi)		((cpi)->cpi_std[1].cp_edx)
373 #define	CPI_FEATURES_ECX(cpi)		((cpi)->cpi_std[1].cp_ecx)
374 #define	CPI_FEATURES_XTD_EDX(cpi)	((cpi)->cpi_extd[1].cp_edx)
375 #define	CPI_FEATURES_XTD_ECX(cpi)	((cpi)->cpi_extd[1].cp_ecx)
376 
377 #define	CPI_BRANDID(cpi)	BITX((cpi)->cpi_std[1].cp_ebx, 7, 0)
378 #define	CPI_CHUNKS(cpi)		BITX((cpi)->cpi_std[1].cp_ebx, 15, 7)
379 #define	CPI_CPU_COUNT(cpi)	BITX((cpi)->cpi_std[1].cp_ebx, 23, 16)
380 #define	CPI_APIC_ID(cpi)	BITX((cpi)->cpi_std[1].cp_ebx, 31, 24)
381 
382 #define	CPI_MAXEAX_MAX		0x100		/* sanity control */
383 #define	CPI_XMAXEAX_MAX		0x80000100
384 #define	CPI_FN4_ECX_MAX		0x20		/* sanity: max fn 4 levels */
385 #define	CPI_FNB_ECX_MAX		0x20		/* sanity: max fn B levels */
386 
387 /*
388  * Function 4 (Deterministic Cache Parameters) macros
389  * Defined by Intel Application Note AP-485
390  */
391 #define	CPI_NUM_CORES(regs)		BITX((regs)->cp_eax, 31, 26)
392 #define	CPI_NTHR_SHR_CACHE(regs)	BITX((regs)->cp_eax, 25, 14)
393 #define	CPI_FULL_ASSOC_CACHE(regs)	BITX((regs)->cp_eax, 9, 9)
394 #define	CPI_SELF_INIT_CACHE(regs)	BITX((regs)->cp_eax, 8, 8)
395 #define	CPI_CACHE_LVL(regs)		BITX((regs)->cp_eax, 7, 5)
396 #define	CPI_CACHE_TYPE(regs)		BITX((regs)->cp_eax, 4, 0)
397 #define	CPI_CPU_LEVEL_TYPE(regs)	BITX((regs)->cp_ecx, 15, 8)
398 
399 #define	CPI_CACHE_WAYS(regs)		BITX((regs)->cp_ebx, 31, 22)
400 #define	CPI_CACHE_PARTS(regs)		BITX((regs)->cp_ebx, 21, 12)
401 #define	CPI_CACHE_COH_LN_SZ(regs)	BITX((regs)->cp_ebx, 11, 0)
402 
403 #define	CPI_CACHE_SETS(regs)		BITX((regs)->cp_ecx, 31, 0)
404 
405 #define	CPI_PREFCH_STRIDE(regs)		BITX((regs)->cp_edx, 9, 0)
406 
407 
408 /*
409  * A couple of shorthand macros to identify "later" P6-family chips
410  * like the Pentium M and Core.  First, the "older" P6-based stuff
411  * (loosely defined as "pre-Pentium-4"):
412  * P6, PII, Mobile PII, PII Xeon, PIII, Mobile PIII, PIII Xeon
413  */
414 
415 #define	IS_LEGACY_P6(cpi) (			\
416 	cpi->cpi_family == 6 && 		\
417 		(cpi->cpi_model == 1 ||		\
418 		cpi->cpi_model == 3 ||		\
419 		cpi->cpi_model == 5 ||		\
420 		cpi->cpi_model == 6 ||		\
421 		cpi->cpi_model == 7 ||		\
422 		cpi->cpi_model == 8 ||		\
423 		cpi->cpi_model == 0xA ||	\
424 		cpi->cpi_model == 0xB)		\
425 )
426 
427 /* A "new F6" is everything with family 6 that's not the above */
428 #define	IS_NEW_F6(cpi) ((cpi->cpi_family == 6) && !IS_LEGACY_P6(cpi))
429 
430 /* Extended family/model support */
431 #define	IS_EXTENDED_MODEL_INTEL(cpi) (cpi->cpi_family == 0x6 || \
432 	cpi->cpi_family >= 0xf)
433 
434 /*
435  * Info for monitor/mwait idle loop.
436  *
437  * See cpuid section of "Intel 64 and IA-32 Architectures Software Developer's
438  * Manual Volume 2A: Instruction Set Reference, A-M" #25366-022US, November
439  * 2006.
440  * See MONITOR/MWAIT section of "AMD64 Architecture Programmer's Manual
441  * Documentation Updates" #33633, Rev 2.05, December 2006.
442  */
443 #define	MWAIT_SUPPORT		(0x00000001)	/* mwait supported */
444 #define	MWAIT_EXTENSIONS	(0x00000002)	/* extenstion supported */
445 #define	MWAIT_ECX_INT_ENABLE	(0x00000004)	/* ecx 1 extension supported */
446 #define	MWAIT_SUPPORTED(cpi)	((cpi)->cpi_std[1].cp_ecx & CPUID_INTC_ECX_MON)
447 #define	MWAIT_INT_ENABLE(cpi)	((cpi)->cpi_std[5].cp_ecx & 0x2)
448 #define	MWAIT_EXTENSION(cpi)	((cpi)->cpi_std[5].cp_ecx & 0x1)
449 #define	MWAIT_SIZE_MIN(cpi)	BITX((cpi)->cpi_std[5].cp_eax, 15, 0)
450 #define	MWAIT_SIZE_MAX(cpi)	BITX((cpi)->cpi_std[5].cp_ebx, 15, 0)
451 /*
452  * Number of sub-cstates for a given c-state.
453  */
454 #define	MWAIT_NUM_SUBC_STATES(cpi, c_state)			\
455 	BITX((cpi)->cpi_std[5].cp_edx, c_state + 3, c_state)
456 
457 /*
458  * XSAVE leaf 0xD enumeration
459  */
460 #define	CPUID_LEAFD_2_YMM_OFFSET	576
461 #define	CPUID_LEAFD_2_YMM_SIZE		256
462 
463 /*
464  * Functions we consune from cpuid_subr.c;  don't publish these in a header
465  * file to try and keep people using the expected cpuid_* interfaces.
466  */
467 extern uint32_t _cpuid_skt(uint_t, uint_t, uint_t, uint_t);
468 extern const char *_cpuid_sktstr(uint_t, uint_t, uint_t, uint_t);
469 extern uint32_t _cpuid_chiprev(uint_t, uint_t, uint_t, uint_t);
470 extern const char *_cpuid_chiprevstr(uint_t, uint_t, uint_t, uint_t);
471 extern uint_t _cpuid_vendorstr_to_vendorcode(char *);
472 
473 /*
474  * Apply up various platform-dependent restrictions where the
475  * underlying platform restrictions mean the CPU can be marked
476  * as less capable than its cpuid instruction would imply.
477  */
478 #if defined(__xpv)
479 static void
480 platform_cpuid_mangle(uint_t vendor, uint32_t eax, struct cpuid_regs *cp)
481 {
482 	switch (eax) {
483 	case 1: {
484 		uint32_t mcamask = DOMAIN_IS_INITDOMAIN(xen_info) ?
485 		    0 : CPUID_INTC_EDX_MCA;
486 		cp->cp_edx &=
487 		    ~(mcamask |
488 		    CPUID_INTC_EDX_PSE |
489 		    CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
490 		    CPUID_INTC_EDX_SEP | CPUID_INTC_EDX_MTRR |
491 		    CPUID_INTC_EDX_PGE | CPUID_INTC_EDX_PAT |
492 		    CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
493 		    CPUID_INTC_EDX_PSE36 | CPUID_INTC_EDX_HTT);
494 		break;
495 	}
496 
497 	case 0x80000001:
498 		cp->cp_edx &=
499 		    ~(CPUID_AMD_EDX_PSE |
500 		    CPUID_INTC_EDX_VME | CPUID_INTC_EDX_DE |
501 		    CPUID_AMD_EDX_MTRR | CPUID_AMD_EDX_PGE |
502 		    CPUID_AMD_EDX_PAT | CPUID_AMD_EDX_PSE36 |
503 		    CPUID_AMD_EDX_SYSC | CPUID_INTC_EDX_SEP |
504 		    CPUID_AMD_EDX_TSCP);
505 		cp->cp_ecx &= ~CPUID_AMD_ECX_CMP_LGCY;
506 		break;
507 	default:
508 		break;
509 	}
510 
511 	switch (vendor) {
512 	case X86_VENDOR_Intel:
513 		switch (eax) {
514 		case 4:
515 			/*
516 			 * Zero out the (ncores-per-chip - 1) field
517 			 */
518 			cp->cp_eax &= 0x03fffffff;
519 			break;
520 		default:
521 			break;
522 		}
523 		break;
524 	case X86_VENDOR_AMD:
525 		switch (eax) {
526 
527 		case 0x80000001:
528 			cp->cp_ecx &= ~CPUID_AMD_ECX_CR8D;
529 			break;
530 
531 		case 0x80000008:
532 			/*
533 			 * Zero out the (ncores-per-chip - 1) field
534 			 */
535 			cp->cp_ecx &= 0xffffff00;
536 			break;
537 		default:
538 			break;
539 		}
540 		break;
541 	default:
542 		break;
543 	}
544 }
545 #else
546 #define	platform_cpuid_mangle(vendor, eax, cp)	/* nothing */
547 #endif
548 
549 /*
550  *  Some undocumented ways of patching the results of the cpuid
551  *  instruction to permit running Solaris 10 on future cpus that
552  *  we don't currently support.  Could be set to non-zero values
553  *  via settings in eeprom.
554  */
555 
556 uint32_t cpuid_feature_ecx_include;
557 uint32_t cpuid_feature_ecx_exclude;
558 uint32_t cpuid_feature_edx_include;
559 uint32_t cpuid_feature_edx_exclude;
560 
561 /*
562  * Allocate space for mcpu_cpi in the machcpu structure for all non-boot CPUs.
563  */
564 void
565 cpuid_alloc_space(cpu_t *cpu)
566 {
567 	/*
568 	 * By convention, cpu0 is the boot cpu, which is set up
569 	 * before memory allocation is available.  All other cpus get
570 	 * their cpuid_info struct allocated here.
571 	 */
572 	ASSERT(cpu->cpu_id != 0);
573 	ASSERT(cpu->cpu_m.mcpu_cpi == NULL);
574 	cpu->cpu_m.mcpu_cpi =
575 	    kmem_zalloc(sizeof (*cpu->cpu_m.mcpu_cpi), KM_SLEEP);
576 }
577 
578 void
579 cpuid_free_space(cpu_t *cpu)
580 {
581 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
582 	int i;
583 
584 	ASSERT(cpi != NULL);
585 	ASSERT(cpi != &cpuid_info0);
586 
587 	/*
588 	 * Free up any function 4 related dynamic storage
589 	 */
590 	for (i = 1; i < cpi->cpi_std_4_size; i++)
591 		kmem_free(cpi->cpi_std_4[i], sizeof (struct cpuid_regs));
592 	if (cpi->cpi_std_4_size > 0)
593 		kmem_free(cpi->cpi_std_4,
594 		    cpi->cpi_std_4_size * sizeof (struct cpuid_regs *));
595 
596 	kmem_free(cpi, sizeof (*cpi));
597 	cpu->cpu_m.mcpu_cpi = NULL;
598 }
599 
600 #if !defined(__xpv)
601 
602 static void
603 determine_platform()
604 {
605 	struct cpuid_regs cp;
606 	char *xen_str;
607 	uint32_t xen_signature[4], base;
608 
609 	platform_type = HW_NATIVE;
610 
611 	if (!enable_platform_detection)
612 		return;
613 
614 	/*
615 	 * In a fully virtualized domain, Xen's pseudo-cpuid function
616 	 * returns a string representing the Xen signature in %ebx, %ecx,
617 	 * and %edx. %eax contains the maximum supported cpuid function.
618 	 * We need at least a (base + 2) leaf value to do what we want
619 	 * to do. Try different base values, since the hypervisor might
620 	 * use a different one depending on whether hyper-v emulation
621 	 * is switched on by default or not.
622 	 */
623 	for (base = 0x40000000; base < 0x40010000; base += 0x100) {
624 		cp.cp_eax = base;
625 		(void) __cpuid_insn(&cp);
626 		xen_signature[0] = cp.cp_ebx;
627 		xen_signature[1] = cp.cp_ecx;
628 		xen_signature[2] = cp.cp_edx;
629 		xen_signature[3] = 0;
630 		xen_str = (char *)xen_signature;
631 		if (strcmp("XenVMMXenVMM", xen_str) == 0 &&
632 		    cp.cp_eax >= (base + 2)) {
633 			platform_type = HW_XEN_HVM;
634 			return;
635 		}
636 	}
637 
638 	if (vmware_platform()) /* running under vmware hypervisor? */
639 		platform_type = HW_VMWARE;
640 }
641 
642 int
643 get_hwenv(void)
644 {
645 	if (platform_type == -1)
646 		determine_platform();
647 
648 	return (platform_type);
649 }
650 
651 int
652 is_controldom(void)
653 {
654 	return (0);
655 }
656 
657 #else
658 
659 int
660 get_hwenv(void)
661 {
662 	return (HW_XEN_PV);
663 }
664 
665 int
666 is_controldom(void)
667 {
668 	return (DOMAIN_IS_INITDOMAIN(xen_info));
669 }
670 
671 #endif	/* __xpv */
672 
673 static void
674 cpuid_intel_getids(cpu_t *cpu, void *feature)
675 {
676 	uint_t i;
677 	uint_t chipid_shift = 0;
678 	uint_t coreid_shift = 0;
679 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
680 
681 	for (i = 1; i < cpi->cpi_ncpu_per_chip; i <<= 1)
682 		chipid_shift++;
683 
684 	cpi->cpi_chipid = cpi->cpi_apicid >> chipid_shift;
685 	cpi->cpi_clogid = cpi->cpi_apicid & ((1 << chipid_shift) - 1);
686 
687 	if (is_x86_feature(feature, X86FSET_CMP)) {
688 		/*
689 		 * Multi-core (and possibly multi-threaded)
690 		 * processors.
691 		 */
692 		uint_t ncpu_per_core;
693 		if (cpi->cpi_ncore_per_chip == 1)
694 			ncpu_per_core = cpi->cpi_ncpu_per_chip;
695 		else if (cpi->cpi_ncore_per_chip > 1)
696 			ncpu_per_core = cpi->cpi_ncpu_per_chip /
697 			    cpi->cpi_ncore_per_chip;
698 		/*
699 		 * 8bit APIC IDs on dual core Pentiums
700 		 * look like this:
701 		 *
702 		 * +-----------------------+------+------+
703 		 * | Physical Package ID   |  MC  |  HT  |
704 		 * +-----------------------+------+------+
705 		 * <------- chipid -------->
706 		 * <------- coreid --------------->
707 		 *			   <--- clogid -->
708 		 *			   <------>
709 		 *			   pkgcoreid
710 		 *
711 		 * Where the number of bits necessary to
712 		 * represent MC and HT fields together equals
713 		 * to the minimum number of bits necessary to
714 		 * store the value of cpi->cpi_ncpu_per_chip.
715 		 * Of those bits, the MC part uses the number
716 		 * of bits necessary to store the value of
717 		 * cpi->cpi_ncore_per_chip.
718 		 */
719 		for (i = 1; i < ncpu_per_core; i <<= 1)
720 			coreid_shift++;
721 		cpi->cpi_coreid = cpi->cpi_apicid >> coreid_shift;
722 		cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
723 	} else if (is_x86_feature(feature, X86FSET_HTT)) {
724 		/*
725 		 * Single-core multi-threaded processors.
726 		 */
727 		cpi->cpi_coreid = cpi->cpi_chipid;
728 		cpi->cpi_pkgcoreid = 0;
729 	}
730 	cpi->cpi_procnodeid = cpi->cpi_chipid;
731 }
732 
733 static void
734 cpuid_amd_getids(cpu_t *cpu)
735 {
736 	int i, first_half, coreidsz;
737 	uint32_t nb_caps_reg;
738 	uint_t node2_1;
739 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
740 
741 	/*
742 	 * AMD CMP chips currently have a single thread per core.
743 	 *
744 	 * Since no two cpus share a core we must assign a distinct coreid
745 	 * per cpu, and we do this by using the cpu_id.  This scheme does not,
746 	 * however, guarantee that sibling cores of a chip will have sequential
747 	 * coreids starting at a multiple of the number of cores per chip -
748 	 * that is usually the case, but if the ACPI MADT table is presented
749 	 * in a different order then we need to perform a few more gymnastics
750 	 * for the pkgcoreid.
751 	 *
752 	 * All processors in the system have the same number of enabled
753 	 * cores. Cores within a processor are always numbered sequentially
754 	 * from 0 regardless of how many or which are disabled, and there
755 	 * is no way for operating system to discover the real core id when some
756 	 * are disabled.
757 	 */
758 
759 	cpi->cpi_coreid = cpu->cpu_id;
760 
761 	if (cpi->cpi_xmaxeax >= 0x80000008) {
762 
763 		coreidsz = BITX((cpi)->cpi_extd[8].cp_ecx, 15, 12);
764 
765 		/*
766 		 * In AMD parlance chip is really a node while Solaris
767 		 * sees chip as equivalent to socket/package.
768 		 */
769 		cpi->cpi_ncore_per_chip =
770 		    BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
771 		if (coreidsz == 0) {
772 			/* Use legacy method */
773 			for (i = 1; i < cpi->cpi_ncore_per_chip; i <<= 1)
774 				coreidsz++;
775 			if (coreidsz == 0)
776 				coreidsz = 1;
777 		}
778 	} else {
779 		/* Assume single-core part */
780 		cpi->cpi_ncore_per_chip = 1;
781 		coreidsz = 1;
782 	}
783 
784 	cpi->cpi_clogid = cpi->cpi_pkgcoreid =
785 	    cpi->cpi_apicid & ((1<<coreidsz) - 1);
786 	cpi->cpi_ncpu_per_chip = cpi->cpi_ncore_per_chip;
787 
788 	/* Get nodeID */
789 	if (cpi->cpi_family == 0xf) {
790 		cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
791 		cpi->cpi_chipid = cpi->cpi_procnodeid;
792 	} else if (cpi->cpi_family == 0x10) {
793 		/*
794 		 * See if we are a multi-node processor.
795 		 * All processors in the system have the same number of nodes
796 		 */
797 		nb_caps_reg =  pci_getl_func(0, 24, 3, 0xe8);
798 		if ((cpi->cpi_model < 8) || BITX(nb_caps_reg, 29, 29) == 0) {
799 			/* Single-node */
800 			cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 5,
801 			    coreidsz);
802 			cpi->cpi_chipid = cpi->cpi_procnodeid;
803 		} else {
804 
805 			/*
806 			 * Multi-node revision D (2 nodes per package
807 			 * are supported)
808 			 */
809 			cpi->cpi_procnodes_per_pkg = 2;
810 
811 			first_half = (cpi->cpi_pkgcoreid <=
812 			    (cpi->cpi_ncore_per_chip/2 - 1));
813 
814 			if (cpi->cpi_apicid == cpi->cpi_pkgcoreid) {
815 				/* We are BSP */
816 				cpi->cpi_procnodeid = (first_half ? 0 : 1);
817 				cpi->cpi_chipid = cpi->cpi_procnodeid >> 1;
818 			} else {
819 
820 				/* We are AP */
821 				/* NodeId[2:1] bits to use for reading F3xe8 */
822 				node2_1 = BITX(cpi->cpi_apicid, 5, 4) << 1;
823 
824 				nb_caps_reg =
825 				    pci_getl_func(0, 24 + node2_1, 3, 0xe8);
826 
827 				/*
828 				 * Check IntNodeNum bit (31:30, but bit 31 is
829 				 * always 0 on dual-node processors)
830 				 */
831 				if (BITX(nb_caps_reg, 30, 30) == 0)
832 					cpi->cpi_procnodeid = node2_1 +
833 					    !first_half;
834 				else
835 					cpi->cpi_procnodeid = node2_1 +
836 					    first_half;
837 
838 				cpi->cpi_chipid = cpi->cpi_procnodeid >> 1;
839 			}
840 		}
841 	} else if (cpi->cpi_family >= 0x11) {
842 		cpi->cpi_procnodeid = (cpi->cpi_apicid >> coreidsz) & 7;
843 		cpi->cpi_chipid = cpi->cpi_procnodeid;
844 	} else {
845 		cpi->cpi_procnodeid = 0;
846 		cpi->cpi_chipid = cpi->cpi_procnodeid;
847 	}
848 }
849 
850 /*
851  * Setup XFeature_Enabled_Mask register. Required by xsave feature.
852  */
853 void
854 setup_xfem(void)
855 {
856 	uint64_t flags = XFEATURE_LEGACY_FP;
857 
858 	ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
859 
860 	if (is_x86_feature(x86_featureset, X86FSET_SSE))
861 		flags |= XFEATURE_SSE;
862 
863 	if (is_x86_feature(x86_featureset, X86FSET_AVX))
864 		flags |= XFEATURE_AVX;
865 
866 	set_xcr(XFEATURE_ENABLED_MASK, flags);
867 
868 	xsave_bv_all = flags;
869 }
870 
871 void *
872 cpuid_pass1(cpu_t *cpu)
873 {
874 	uint32_t mask_ecx, mask_edx;
875 	void *featureset;
876 	struct cpuid_info *cpi;
877 	struct cpuid_regs *cp;
878 	int xcpuid;
879 #if !defined(__xpv)
880 	extern int idle_cpu_prefer_mwait;
881 #endif
882 
883 #if !defined(__xpv)
884 	determine_platform();
885 #endif
886 	/*
887 	 * Space statically allocated for BSP, ensure pointer is set
888 	 */
889 	if (cpu->cpu_id == 0) {
890 		if (cpu->cpu_m.mcpu_cpi == NULL)
891 			cpu->cpu_m.mcpu_cpi = &cpuid_info0;
892 		featureset = x86_featureset0;
893 	} else {
894 		featureset = init_x86_featureset();
895 	}
896 
897 	add_x86_feature(featureset, X86FSET_CPUID);
898 
899 	cpi = cpu->cpu_m.mcpu_cpi;
900 	ASSERT(cpi != NULL);
901 	cp = &cpi->cpi_std[0];
902 	cp->cp_eax = 0;
903 	cpi->cpi_maxeax = __cpuid_insn(cp);
904 	{
905 		uint32_t *iptr = (uint32_t *)cpi->cpi_vendorstr;
906 		*iptr++ = cp->cp_ebx;
907 		*iptr++ = cp->cp_edx;
908 		*iptr++ = cp->cp_ecx;
909 		*(char *)&cpi->cpi_vendorstr[12] = '\0';
910 	}
911 
912 	cpi->cpi_vendor = _cpuid_vendorstr_to_vendorcode(cpi->cpi_vendorstr);
913 	x86_vendor = cpi->cpi_vendor; /* for compatibility */
914 
915 	/*
916 	 * Limit the range in case of weird hardware
917 	 */
918 	if (cpi->cpi_maxeax > CPI_MAXEAX_MAX)
919 		cpi->cpi_maxeax = CPI_MAXEAX_MAX;
920 	if (cpi->cpi_maxeax < 1)
921 		goto pass1_done;
922 
923 	cp = &cpi->cpi_std[1];
924 	cp->cp_eax = 1;
925 	(void) __cpuid_insn(cp);
926 
927 	/*
928 	 * Extract identifying constants for easy access.
929 	 */
930 	cpi->cpi_model = CPI_MODEL(cpi);
931 	cpi->cpi_family = CPI_FAMILY(cpi);
932 
933 	if (cpi->cpi_family == 0xf)
934 		cpi->cpi_family += CPI_FAMILY_XTD(cpi);
935 
936 	/*
937 	 * Beware: AMD uses "extended model" iff base *FAMILY* == 0xf.
938 	 * Intel, and presumably everyone else, uses model == 0xf, as
939 	 * one would expect (max value means possible overflow).  Sigh.
940 	 */
941 
942 	switch (cpi->cpi_vendor) {
943 	case X86_VENDOR_Intel:
944 		if (IS_EXTENDED_MODEL_INTEL(cpi))
945 			cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
946 		break;
947 	case X86_VENDOR_AMD:
948 		if (CPI_FAMILY(cpi) == 0xf)
949 			cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
950 		break;
951 	default:
952 		if (cpi->cpi_model == 0xf)
953 			cpi->cpi_model += CPI_MODEL_XTD(cpi) << 4;
954 		break;
955 	}
956 
957 	cpi->cpi_step = CPI_STEP(cpi);
958 	cpi->cpi_brandid = CPI_BRANDID(cpi);
959 
960 	/*
961 	 * *default* assumptions:
962 	 * - believe %edx feature word
963 	 * - ignore %ecx feature word
964 	 * - 32-bit virtual and physical addressing
965 	 */
966 	mask_edx = 0xffffffff;
967 	mask_ecx = 0;
968 
969 	cpi->cpi_pabits = cpi->cpi_vabits = 32;
970 
971 	switch (cpi->cpi_vendor) {
972 	case X86_VENDOR_Intel:
973 		if (cpi->cpi_family == 5)
974 			x86_type = X86_TYPE_P5;
975 		else if (IS_LEGACY_P6(cpi)) {
976 			x86_type = X86_TYPE_P6;
977 			pentiumpro_bug4046376 = 1;
978 			pentiumpro_bug4064495 = 1;
979 			/*
980 			 * Clear the SEP bit when it was set erroneously
981 			 */
982 			if (cpi->cpi_model < 3 && cpi->cpi_step < 3)
983 				cp->cp_edx &= ~CPUID_INTC_EDX_SEP;
984 		} else if (IS_NEW_F6(cpi) || cpi->cpi_family == 0xf) {
985 			x86_type = X86_TYPE_P4;
986 			/*
987 			 * We don't currently depend on any of the %ecx
988 			 * features until Prescott, so we'll only check
989 			 * this from P4 onwards.  We might want to revisit
990 			 * that idea later.
991 			 */
992 			mask_ecx = 0xffffffff;
993 		} else if (cpi->cpi_family > 0xf)
994 			mask_ecx = 0xffffffff;
995 		/*
996 		 * We don't support MONITOR/MWAIT if leaf 5 is not available
997 		 * to obtain the monitor linesize.
998 		 */
999 		if (cpi->cpi_maxeax < 5)
1000 			mask_ecx &= ~CPUID_INTC_ECX_MON;
1001 		break;
1002 	case X86_VENDOR_IntelClone:
1003 	default:
1004 		break;
1005 	case X86_VENDOR_AMD:
1006 #if defined(OPTERON_ERRATUM_108)
1007 		if (cpi->cpi_family == 0xf && cpi->cpi_model == 0xe) {
1008 			cp->cp_eax = (0xf0f & cp->cp_eax) | 0xc0;
1009 			cpi->cpi_model = 0xc;
1010 		} else
1011 #endif
1012 		if (cpi->cpi_family == 5) {
1013 			/*
1014 			 * AMD K5 and K6
1015 			 *
1016 			 * These CPUs have an incomplete implementation
1017 			 * of MCA/MCE which we mask away.
1018 			 */
1019 			mask_edx &= ~(CPUID_INTC_EDX_MCE | CPUID_INTC_EDX_MCA);
1020 
1021 			/*
1022 			 * Model 0 uses the wrong (APIC) bit
1023 			 * to indicate PGE.  Fix it here.
1024 			 */
1025 			if (cpi->cpi_model == 0) {
1026 				if (cp->cp_edx & 0x200) {
1027 					cp->cp_edx &= ~0x200;
1028 					cp->cp_edx |= CPUID_INTC_EDX_PGE;
1029 				}
1030 			}
1031 
1032 			/*
1033 			 * Early models had problems w/ MMX; disable.
1034 			 */
1035 			if (cpi->cpi_model < 6)
1036 				mask_edx &= ~CPUID_INTC_EDX_MMX;
1037 		}
1038 
1039 		/*
1040 		 * For newer families, SSE3 and CX16, at least, are valid;
1041 		 * enable all
1042 		 */
1043 		if (cpi->cpi_family >= 0xf)
1044 			mask_ecx = 0xffffffff;
1045 		/*
1046 		 * We don't support MONITOR/MWAIT if leaf 5 is not available
1047 		 * to obtain the monitor linesize.
1048 		 */
1049 		if (cpi->cpi_maxeax < 5)
1050 			mask_ecx &= ~CPUID_INTC_ECX_MON;
1051 
1052 #if !defined(__xpv)
1053 		/*
1054 		 * Do not use MONITOR/MWAIT to halt in the idle loop on any AMD
1055 		 * processors.  AMD does not intend MWAIT to be used in the cpu
1056 		 * idle loop on current and future processors.  10h and future
1057 		 * AMD processors use more power in MWAIT than HLT.
1058 		 * Pre-family-10h Opterons do not have the MWAIT instruction.
1059 		 */
1060 		idle_cpu_prefer_mwait = 0;
1061 #endif
1062 
1063 		break;
1064 	case X86_VENDOR_TM:
1065 		/*
1066 		 * workaround the NT workaround in CMS 4.1
1067 		 */
1068 		if (cpi->cpi_family == 5 && cpi->cpi_model == 4 &&
1069 		    (cpi->cpi_step == 2 || cpi->cpi_step == 3))
1070 			cp->cp_edx |= CPUID_INTC_EDX_CX8;
1071 		break;
1072 	case X86_VENDOR_Centaur:
1073 		/*
1074 		 * workaround the NT workarounds again
1075 		 */
1076 		if (cpi->cpi_family == 6)
1077 			cp->cp_edx |= CPUID_INTC_EDX_CX8;
1078 		break;
1079 	case X86_VENDOR_Cyrix:
1080 		/*
1081 		 * We rely heavily on the probing in locore
1082 		 * to actually figure out what parts, if any,
1083 		 * of the Cyrix cpuid instruction to believe.
1084 		 */
1085 		switch (x86_type) {
1086 		case X86_TYPE_CYRIX_486:
1087 			mask_edx = 0;
1088 			break;
1089 		case X86_TYPE_CYRIX_6x86:
1090 			mask_edx = 0;
1091 			break;
1092 		case X86_TYPE_CYRIX_6x86L:
1093 			mask_edx =
1094 			    CPUID_INTC_EDX_DE |
1095 			    CPUID_INTC_EDX_CX8;
1096 			break;
1097 		case X86_TYPE_CYRIX_6x86MX:
1098 			mask_edx =
1099 			    CPUID_INTC_EDX_DE |
1100 			    CPUID_INTC_EDX_MSR |
1101 			    CPUID_INTC_EDX_CX8 |
1102 			    CPUID_INTC_EDX_PGE |
1103 			    CPUID_INTC_EDX_CMOV |
1104 			    CPUID_INTC_EDX_MMX;
1105 			break;
1106 		case X86_TYPE_CYRIX_GXm:
1107 			mask_edx =
1108 			    CPUID_INTC_EDX_MSR |
1109 			    CPUID_INTC_EDX_CX8 |
1110 			    CPUID_INTC_EDX_CMOV |
1111 			    CPUID_INTC_EDX_MMX;
1112 			break;
1113 		case X86_TYPE_CYRIX_MediaGX:
1114 			break;
1115 		case X86_TYPE_CYRIX_MII:
1116 		case X86_TYPE_VIA_CYRIX_III:
1117 			mask_edx =
1118 			    CPUID_INTC_EDX_DE |
1119 			    CPUID_INTC_EDX_TSC |
1120 			    CPUID_INTC_EDX_MSR |
1121 			    CPUID_INTC_EDX_CX8 |
1122 			    CPUID_INTC_EDX_PGE |
1123 			    CPUID_INTC_EDX_CMOV |
1124 			    CPUID_INTC_EDX_MMX;
1125 			break;
1126 		default:
1127 			break;
1128 		}
1129 		break;
1130 	}
1131 
1132 #if defined(__xpv)
1133 	/*
1134 	 * Do not support MONITOR/MWAIT under a hypervisor
1135 	 */
1136 	mask_ecx &= ~CPUID_INTC_ECX_MON;
1137 	/*
1138 	 * Do not support XSAVE under a hypervisor for now
1139 	 */
1140 	xsave_force_disable = B_TRUE;
1141 
1142 #endif	/* __xpv */
1143 
1144 	if (xsave_force_disable) {
1145 		mask_ecx &= ~CPUID_INTC_ECX_XSAVE;
1146 		mask_ecx &= ~CPUID_INTC_ECX_AVX;
1147 	}
1148 
1149 	/*
1150 	 * Now we've figured out the masks that determine
1151 	 * which bits we choose to believe, apply the masks
1152 	 * to the feature words, then map the kernel's view
1153 	 * of these feature words into its feature word.
1154 	 */
1155 	cp->cp_edx &= mask_edx;
1156 	cp->cp_ecx &= mask_ecx;
1157 
1158 	/*
1159 	 * apply any platform restrictions (we don't call this
1160 	 * immediately after __cpuid_insn here, because we need the
1161 	 * workarounds applied above first)
1162 	 */
1163 	platform_cpuid_mangle(cpi->cpi_vendor, 1, cp);
1164 
1165 	/*
1166 	 * fold in overrides from the "eeprom" mechanism
1167 	 */
1168 	cp->cp_edx |= cpuid_feature_edx_include;
1169 	cp->cp_edx &= ~cpuid_feature_edx_exclude;
1170 
1171 	cp->cp_ecx |= cpuid_feature_ecx_include;
1172 	cp->cp_ecx &= ~cpuid_feature_ecx_exclude;
1173 
1174 	if (cp->cp_edx & CPUID_INTC_EDX_PSE) {
1175 		add_x86_feature(featureset, X86FSET_LARGEPAGE);
1176 	}
1177 	if (cp->cp_edx & CPUID_INTC_EDX_TSC) {
1178 		add_x86_feature(featureset, X86FSET_TSC);
1179 	}
1180 	if (cp->cp_edx & CPUID_INTC_EDX_MSR) {
1181 		add_x86_feature(featureset, X86FSET_MSR);
1182 	}
1183 	if (cp->cp_edx & CPUID_INTC_EDX_MTRR) {
1184 		add_x86_feature(featureset, X86FSET_MTRR);
1185 	}
1186 	if (cp->cp_edx & CPUID_INTC_EDX_PGE) {
1187 		add_x86_feature(featureset, X86FSET_PGE);
1188 	}
1189 	if (cp->cp_edx & CPUID_INTC_EDX_CMOV) {
1190 		add_x86_feature(featureset, X86FSET_CMOV);
1191 	}
1192 	if (cp->cp_edx & CPUID_INTC_EDX_MMX) {
1193 		add_x86_feature(featureset, X86FSET_MMX);
1194 	}
1195 	if ((cp->cp_edx & CPUID_INTC_EDX_MCE) != 0 &&
1196 	    (cp->cp_edx & CPUID_INTC_EDX_MCA) != 0) {
1197 		add_x86_feature(featureset, X86FSET_MCA);
1198 	}
1199 	if (cp->cp_edx & CPUID_INTC_EDX_PAE) {
1200 		add_x86_feature(featureset, X86FSET_PAE);
1201 	}
1202 	if (cp->cp_edx & CPUID_INTC_EDX_CX8) {
1203 		add_x86_feature(featureset, X86FSET_CX8);
1204 	}
1205 	if (cp->cp_ecx & CPUID_INTC_ECX_CX16) {
1206 		add_x86_feature(featureset, X86FSET_CX16);
1207 	}
1208 	if (cp->cp_edx & CPUID_INTC_EDX_PAT) {
1209 		add_x86_feature(featureset, X86FSET_PAT);
1210 	}
1211 	if (cp->cp_edx & CPUID_INTC_EDX_SEP) {
1212 		add_x86_feature(featureset, X86FSET_SEP);
1213 	}
1214 	if (cp->cp_edx & CPUID_INTC_EDX_FXSR) {
1215 		/*
1216 		 * In our implementation, fxsave/fxrstor
1217 		 * are prerequisites before we'll even
1218 		 * try and do SSE things.
1219 		 */
1220 		if (cp->cp_edx & CPUID_INTC_EDX_SSE) {
1221 			add_x86_feature(featureset, X86FSET_SSE);
1222 		}
1223 		if (cp->cp_edx & CPUID_INTC_EDX_SSE2) {
1224 			add_x86_feature(featureset, X86FSET_SSE2);
1225 		}
1226 		if (cp->cp_ecx & CPUID_INTC_ECX_SSE3) {
1227 			add_x86_feature(featureset, X86FSET_SSE3);
1228 		}
1229 		if (cpi->cpi_vendor == X86_VENDOR_Intel) {
1230 			if (cp->cp_ecx & CPUID_INTC_ECX_SSSE3) {
1231 				add_x86_feature(featureset, X86FSET_SSSE3);
1232 			}
1233 			if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_1) {
1234 				add_x86_feature(featureset, X86FSET_SSE4_1);
1235 			}
1236 			if (cp->cp_ecx & CPUID_INTC_ECX_SSE4_2) {
1237 				add_x86_feature(featureset, X86FSET_SSE4_2);
1238 			}
1239 			if (cp->cp_ecx & CPUID_INTC_ECX_AES) {
1240 				add_x86_feature(featureset, X86FSET_AES);
1241 			}
1242 			if (cp->cp_ecx & CPUID_INTC_ECX_PCLMULQDQ) {
1243 				add_x86_feature(featureset, X86FSET_PCLMULQDQ);
1244 			}
1245 
1246 			if (cp->cp_ecx & CPUID_INTC_ECX_XSAVE) {
1247 				add_x86_feature(featureset, X86FSET_XSAVE);
1248 				/* We only test AVX when there is XSAVE */
1249 				if (cp->cp_ecx & CPUID_INTC_ECX_AVX) {
1250 					add_x86_feature(featureset,
1251 					    X86FSET_AVX);
1252 				}
1253 			}
1254 		}
1255 	}
1256 	if (cp->cp_edx & CPUID_INTC_EDX_DE) {
1257 		add_x86_feature(featureset, X86FSET_DE);
1258 	}
1259 #if !defined(__xpv)
1260 	if (cp->cp_ecx & CPUID_INTC_ECX_MON) {
1261 
1262 		/*
1263 		 * We require the CLFLUSH instruction for erratum workaround
1264 		 * to use MONITOR/MWAIT.
1265 		 */
1266 		if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1267 			cpi->cpi_mwait.support |= MWAIT_SUPPORT;
1268 			add_x86_feature(featureset, X86FSET_MWAIT);
1269 		} else {
1270 			extern int idle_cpu_assert_cflush_monitor;
1271 
1272 			/*
1273 			 * All processors we are aware of which have
1274 			 * MONITOR/MWAIT also have CLFLUSH.
1275 			 */
1276 			if (idle_cpu_assert_cflush_monitor) {
1277 				ASSERT((cp->cp_ecx & CPUID_INTC_ECX_MON) &&
1278 				    (cp->cp_edx & CPUID_INTC_EDX_CLFSH));
1279 			}
1280 		}
1281 	}
1282 #endif	/* __xpv */
1283 
1284 	/*
1285 	 * Only need it first time, rest of the cpus would follow suite.
1286 	 * we only capture this for the bootcpu.
1287 	 */
1288 	if (cp->cp_edx & CPUID_INTC_EDX_CLFSH) {
1289 		add_x86_feature(featureset, X86FSET_CLFSH);
1290 		x86_clflush_size = (BITX(cp->cp_ebx, 15, 8) * 8);
1291 	}
1292 	if (is_x86_feature(featureset, X86FSET_PAE))
1293 		cpi->cpi_pabits = 36;
1294 
1295 	/*
1296 	 * Hyperthreading configuration is slightly tricky on Intel
1297 	 * and pure clones, and even trickier on AMD.
1298 	 *
1299 	 * (AMD chose to set the HTT bit on their CMP processors,
1300 	 * even though they're not actually hyperthreaded.  Thus it
1301 	 * takes a bit more work to figure out what's really going
1302 	 * on ... see the handling of the CMP_LGCY bit below)
1303 	 */
1304 	if (cp->cp_edx & CPUID_INTC_EDX_HTT) {
1305 		cpi->cpi_ncpu_per_chip = CPI_CPU_COUNT(cpi);
1306 		if (cpi->cpi_ncpu_per_chip > 1)
1307 			add_x86_feature(featureset, X86FSET_HTT);
1308 	} else {
1309 		cpi->cpi_ncpu_per_chip = 1;
1310 	}
1311 
1312 	/*
1313 	 * Work on the "extended" feature information, doing
1314 	 * some basic initialization for cpuid_pass2()
1315 	 */
1316 	xcpuid = 0;
1317 	switch (cpi->cpi_vendor) {
1318 	case X86_VENDOR_Intel:
1319 		if (IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf)
1320 			xcpuid++;
1321 		break;
1322 	case X86_VENDOR_AMD:
1323 		if (cpi->cpi_family > 5 ||
1324 		    (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
1325 			xcpuid++;
1326 		break;
1327 	case X86_VENDOR_Cyrix:
1328 		/*
1329 		 * Only these Cyrix CPUs are -known- to support
1330 		 * extended cpuid operations.
1331 		 */
1332 		if (x86_type == X86_TYPE_VIA_CYRIX_III ||
1333 		    x86_type == X86_TYPE_CYRIX_GXm)
1334 			xcpuid++;
1335 		break;
1336 	case X86_VENDOR_Centaur:
1337 	case X86_VENDOR_TM:
1338 	default:
1339 		xcpuid++;
1340 		break;
1341 	}
1342 
1343 	if (xcpuid) {
1344 		cp = &cpi->cpi_extd[0];
1345 		cp->cp_eax = 0x80000000;
1346 		cpi->cpi_xmaxeax = __cpuid_insn(cp);
1347 	}
1348 
1349 	if (cpi->cpi_xmaxeax & 0x80000000) {
1350 
1351 		if (cpi->cpi_xmaxeax > CPI_XMAXEAX_MAX)
1352 			cpi->cpi_xmaxeax = CPI_XMAXEAX_MAX;
1353 
1354 		switch (cpi->cpi_vendor) {
1355 		case X86_VENDOR_Intel:
1356 		case X86_VENDOR_AMD:
1357 			if (cpi->cpi_xmaxeax < 0x80000001)
1358 				break;
1359 			cp = &cpi->cpi_extd[1];
1360 			cp->cp_eax = 0x80000001;
1361 			(void) __cpuid_insn(cp);
1362 
1363 			if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1364 			    cpi->cpi_family == 5 &&
1365 			    cpi->cpi_model == 6 &&
1366 			    cpi->cpi_step == 6) {
1367 				/*
1368 				 * K6 model 6 uses bit 10 to indicate SYSC
1369 				 * Later models use bit 11. Fix it here.
1370 				 */
1371 				if (cp->cp_edx & 0x400) {
1372 					cp->cp_edx &= ~0x400;
1373 					cp->cp_edx |= CPUID_AMD_EDX_SYSC;
1374 				}
1375 			}
1376 
1377 			platform_cpuid_mangle(cpi->cpi_vendor, 0x80000001, cp);
1378 
1379 			/*
1380 			 * Compute the additions to the kernel's feature word.
1381 			 */
1382 			if (cp->cp_edx & CPUID_AMD_EDX_NX) {
1383 				add_x86_feature(featureset, X86FSET_NX);
1384 			}
1385 
1386 			/*
1387 			 * Regardless whether or not we boot 64-bit,
1388 			 * we should have a way to identify whether
1389 			 * the CPU is capable of running 64-bit.
1390 			 */
1391 			if (cp->cp_edx & CPUID_AMD_EDX_LM) {
1392 				add_x86_feature(featureset, X86FSET_64);
1393 			}
1394 
1395 #if defined(__amd64)
1396 			/* 1 GB large page - enable only for 64 bit kernel */
1397 			if (cp->cp_edx & CPUID_AMD_EDX_1GPG) {
1398 				add_x86_feature(featureset, X86FSET_1GPG);
1399 			}
1400 #endif
1401 
1402 			if ((cpi->cpi_vendor == X86_VENDOR_AMD) &&
1403 			    (cpi->cpi_std[1].cp_edx & CPUID_INTC_EDX_FXSR) &&
1404 			    (cp->cp_ecx & CPUID_AMD_ECX_SSE4A)) {
1405 				add_x86_feature(featureset, X86FSET_SSE4A);
1406 			}
1407 
1408 			/*
1409 			 * If both the HTT and CMP_LGCY bits are set,
1410 			 * then we're not actually HyperThreaded.  Read
1411 			 * "AMD CPUID Specification" for more details.
1412 			 */
1413 			if (cpi->cpi_vendor == X86_VENDOR_AMD &&
1414 			    is_x86_feature(featureset, X86FSET_HTT) &&
1415 			    (cp->cp_ecx & CPUID_AMD_ECX_CMP_LGCY)) {
1416 				remove_x86_feature(featureset, X86FSET_HTT);
1417 				add_x86_feature(featureset, X86FSET_CMP);
1418 			}
1419 #if defined(__amd64)
1420 			/*
1421 			 * It's really tricky to support syscall/sysret in
1422 			 * the i386 kernel; we rely on sysenter/sysexit
1423 			 * instead.  In the amd64 kernel, things are -way-
1424 			 * better.
1425 			 */
1426 			if (cp->cp_edx & CPUID_AMD_EDX_SYSC) {
1427 				add_x86_feature(featureset, X86FSET_ASYSC);
1428 			}
1429 
1430 			/*
1431 			 * While we're thinking about system calls, note
1432 			 * that AMD processors don't support sysenter
1433 			 * in long mode at all, so don't try to program them.
1434 			 */
1435 			if (x86_vendor == X86_VENDOR_AMD) {
1436 				remove_x86_feature(featureset, X86FSET_SEP);
1437 			}
1438 #endif
1439 			if (cp->cp_edx & CPUID_AMD_EDX_TSCP) {
1440 				add_x86_feature(featureset, X86FSET_TSCP);
1441 			}
1442 			break;
1443 		default:
1444 			break;
1445 		}
1446 
1447 		/*
1448 		 * Get CPUID data about processor cores and hyperthreads.
1449 		 */
1450 		switch (cpi->cpi_vendor) {
1451 		case X86_VENDOR_Intel:
1452 			if (cpi->cpi_maxeax >= 4) {
1453 				cp = &cpi->cpi_std[4];
1454 				cp->cp_eax = 4;
1455 				cp->cp_ecx = 0;
1456 				(void) __cpuid_insn(cp);
1457 				platform_cpuid_mangle(cpi->cpi_vendor, 4, cp);
1458 			}
1459 			/*FALLTHROUGH*/
1460 		case X86_VENDOR_AMD:
1461 			if (cpi->cpi_xmaxeax < 0x80000008)
1462 				break;
1463 			cp = &cpi->cpi_extd[8];
1464 			cp->cp_eax = 0x80000008;
1465 			(void) __cpuid_insn(cp);
1466 			platform_cpuid_mangle(cpi->cpi_vendor, 0x80000008, cp);
1467 
1468 			/*
1469 			 * Virtual and physical address limits from
1470 			 * cpuid override previously guessed values.
1471 			 */
1472 			cpi->cpi_pabits = BITX(cp->cp_eax, 7, 0);
1473 			cpi->cpi_vabits = BITX(cp->cp_eax, 15, 8);
1474 			break;
1475 		default:
1476 			break;
1477 		}
1478 
1479 		/*
1480 		 * Derive the number of cores per chip
1481 		 */
1482 		switch (cpi->cpi_vendor) {
1483 		case X86_VENDOR_Intel:
1484 			if (cpi->cpi_maxeax < 4) {
1485 				cpi->cpi_ncore_per_chip = 1;
1486 				break;
1487 			} else {
1488 				cpi->cpi_ncore_per_chip =
1489 				    BITX((cpi)->cpi_std[4].cp_eax, 31, 26) + 1;
1490 			}
1491 			break;
1492 		case X86_VENDOR_AMD:
1493 			if (cpi->cpi_xmaxeax < 0x80000008) {
1494 				cpi->cpi_ncore_per_chip = 1;
1495 				break;
1496 			} else {
1497 				/*
1498 				 * On family 0xf cpuid fn 2 ECX[7:0] "NC" is
1499 				 * 1 less than the number of physical cores on
1500 				 * the chip.  In family 0x10 this value can
1501 				 * be affected by "downcoring" - it reflects
1502 				 * 1 less than the number of cores actually
1503 				 * enabled on this node.
1504 				 */
1505 				cpi->cpi_ncore_per_chip =
1506 				    BITX((cpi)->cpi_extd[8].cp_ecx, 7, 0) + 1;
1507 			}
1508 			break;
1509 		default:
1510 			cpi->cpi_ncore_per_chip = 1;
1511 			break;
1512 		}
1513 
1514 		/*
1515 		 * Get CPUID data about TSC Invariance in Deep C-State.
1516 		 */
1517 		switch (cpi->cpi_vendor) {
1518 		case X86_VENDOR_Intel:
1519 			if (cpi->cpi_maxeax >= 7) {
1520 				cp = &cpi->cpi_extd[7];
1521 				cp->cp_eax = 0x80000007;
1522 				cp->cp_ecx = 0;
1523 				(void) __cpuid_insn(cp);
1524 			}
1525 			break;
1526 		default:
1527 			break;
1528 		}
1529 	} else {
1530 		cpi->cpi_ncore_per_chip = 1;
1531 	}
1532 
1533 	/*
1534 	 * If more than one core, then this processor is CMP.
1535 	 */
1536 	if (cpi->cpi_ncore_per_chip > 1) {
1537 		add_x86_feature(featureset, X86FSET_CMP);
1538 	}
1539 
1540 	/*
1541 	 * If the number of cores is the same as the number
1542 	 * of CPUs, then we cannot have HyperThreading.
1543 	 */
1544 	if (cpi->cpi_ncpu_per_chip == cpi->cpi_ncore_per_chip) {
1545 		remove_x86_feature(featureset, X86FSET_HTT);
1546 	}
1547 
1548 	cpi->cpi_apicid = CPI_APIC_ID(cpi);
1549 	cpi->cpi_procnodes_per_pkg = 1;
1550 	if (is_x86_feature(featureset, X86FSET_HTT) == B_FALSE &&
1551 	    is_x86_feature(featureset, X86FSET_CMP) == B_FALSE) {
1552 		/*
1553 		 * Single-core single-threaded processors.
1554 		 */
1555 		cpi->cpi_chipid = -1;
1556 		cpi->cpi_clogid = 0;
1557 		cpi->cpi_coreid = cpu->cpu_id;
1558 		cpi->cpi_pkgcoreid = 0;
1559 		if (cpi->cpi_vendor == X86_VENDOR_AMD)
1560 			cpi->cpi_procnodeid = BITX(cpi->cpi_apicid, 3, 0);
1561 		else
1562 			cpi->cpi_procnodeid = cpi->cpi_chipid;
1563 	} else if (cpi->cpi_ncpu_per_chip > 1) {
1564 		if (cpi->cpi_vendor == X86_VENDOR_Intel)
1565 			cpuid_intel_getids(cpu, featureset);
1566 		else if (cpi->cpi_vendor == X86_VENDOR_AMD)
1567 			cpuid_amd_getids(cpu);
1568 		else {
1569 			/*
1570 			 * All other processors are currently
1571 			 * assumed to have single cores.
1572 			 */
1573 			cpi->cpi_coreid = cpi->cpi_chipid;
1574 			cpi->cpi_pkgcoreid = 0;
1575 			cpi->cpi_procnodeid = cpi->cpi_chipid;
1576 		}
1577 	}
1578 
1579 	/*
1580 	 * Synthesize chip "revision" and socket type
1581 	 */
1582 	cpi->cpi_chiprev = _cpuid_chiprev(cpi->cpi_vendor, cpi->cpi_family,
1583 	    cpi->cpi_model, cpi->cpi_step);
1584 	cpi->cpi_chiprevstr = _cpuid_chiprevstr(cpi->cpi_vendor,
1585 	    cpi->cpi_family, cpi->cpi_model, cpi->cpi_step);
1586 	cpi->cpi_socket = _cpuid_skt(cpi->cpi_vendor, cpi->cpi_family,
1587 	    cpi->cpi_model, cpi->cpi_step);
1588 
1589 pass1_done:
1590 	cpi->cpi_pass = 1;
1591 	return (featureset);
1592 }
1593 
1594 /*
1595  * Make copies of the cpuid table entries we depend on, in
1596  * part for ease of parsing now, in part so that we have only
1597  * one place to correct any of it, in part for ease of
1598  * later export to userland, and in part so we can look at
1599  * this stuff in a crash dump.
1600  */
1601 
1602 /*ARGSUSED*/
1603 void
1604 cpuid_pass2(cpu_t *cpu)
1605 {
1606 	uint_t n, nmax;
1607 	int i;
1608 	struct cpuid_regs *cp;
1609 	uint8_t *dp;
1610 	uint32_t *iptr;
1611 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
1612 
1613 	ASSERT(cpi->cpi_pass == 1);
1614 
1615 	if (cpi->cpi_maxeax < 1)
1616 		goto pass2_done;
1617 
1618 	if ((nmax = cpi->cpi_maxeax + 1) > NMAX_CPI_STD)
1619 		nmax = NMAX_CPI_STD;
1620 	/*
1621 	 * (We already handled n == 0 and n == 1 in pass 1)
1622 	 */
1623 	for (n = 2, cp = &cpi->cpi_std[2]; n < nmax; n++, cp++) {
1624 		cp->cp_eax = n;
1625 
1626 		/*
1627 		 * CPUID function 4 expects %ecx to be initialized
1628 		 * with an index which indicates which cache to return
1629 		 * information about. The OS is expected to call function 4
1630 		 * with %ecx set to 0, 1, 2, ... until it returns with
1631 		 * EAX[4:0] set to 0, which indicates there are no more
1632 		 * caches.
1633 		 *
1634 		 * Here, populate cpi_std[4] with the information returned by
1635 		 * function 4 when %ecx == 0, and do the rest in cpuid_pass3()
1636 		 * when dynamic memory allocation becomes available.
1637 		 *
1638 		 * Note: we need to explicitly initialize %ecx here, since
1639 		 * function 4 may have been previously invoked.
1640 		 */
1641 		if (n == 4)
1642 			cp->cp_ecx = 0;
1643 
1644 		(void) __cpuid_insn(cp);
1645 		platform_cpuid_mangle(cpi->cpi_vendor, n, cp);
1646 		switch (n) {
1647 		case 2:
1648 			/*
1649 			 * "the lower 8 bits of the %eax register
1650 			 * contain a value that identifies the number
1651 			 * of times the cpuid [instruction] has to be
1652 			 * executed to obtain a complete image of the
1653 			 * processor's caching systems."
1654 			 *
1655 			 * How *do* they make this stuff up?
1656 			 */
1657 			cpi->cpi_ncache = sizeof (*cp) *
1658 			    BITX(cp->cp_eax, 7, 0);
1659 			if (cpi->cpi_ncache == 0)
1660 				break;
1661 			cpi->cpi_ncache--;	/* skip count byte */
1662 
1663 			/*
1664 			 * Well, for now, rather than attempt to implement
1665 			 * this slightly dubious algorithm, we just look
1666 			 * at the first 15 ..
1667 			 */
1668 			if (cpi->cpi_ncache > (sizeof (*cp) - 1))
1669 				cpi->cpi_ncache = sizeof (*cp) - 1;
1670 
1671 			dp = cpi->cpi_cacheinfo;
1672 			if (BITX(cp->cp_eax, 31, 31) == 0) {
1673 				uint8_t *p = (void *)&cp->cp_eax;
1674 				for (i = 1; i < 4; i++)
1675 					if (p[i] != 0)
1676 						*dp++ = p[i];
1677 			}
1678 			if (BITX(cp->cp_ebx, 31, 31) == 0) {
1679 				uint8_t *p = (void *)&cp->cp_ebx;
1680 				for (i = 0; i < 4; i++)
1681 					if (p[i] != 0)
1682 						*dp++ = p[i];
1683 			}
1684 			if (BITX(cp->cp_ecx, 31, 31) == 0) {
1685 				uint8_t *p = (void *)&cp->cp_ecx;
1686 				for (i = 0; i < 4; i++)
1687 					if (p[i] != 0)
1688 						*dp++ = p[i];
1689 			}
1690 			if (BITX(cp->cp_edx, 31, 31) == 0) {
1691 				uint8_t *p = (void *)&cp->cp_edx;
1692 				for (i = 0; i < 4; i++)
1693 					if (p[i] != 0)
1694 						*dp++ = p[i];
1695 			}
1696 			break;
1697 
1698 		case 3:	/* Processor serial number, if PSN supported */
1699 			break;
1700 
1701 		case 4:	/* Deterministic cache parameters */
1702 			break;
1703 
1704 		case 5:	/* Monitor/Mwait parameters */
1705 		{
1706 			size_t mwait_size;
1707 
1708 			/*
1709 			 * check cpi_mwait.support which was set in cpuid_pass1
1710 			 */
1711 			if (!(cpi->cpi_mwait.support & MWAIT_SUPPORT))
1712 				break;
1713 
1714 			/*
1715 			 * Protect ourself from insane mwait line size.
1716 			 * Workaround for incomplete hardware emulator(s).
1717 			 */
1718 			mwait_size = (size_t)MWAIT_SIZE_MAX(cpi);
1719 			if (mwait_size < sizeof (uint32_t) ||
1720 			    !ISP2(mwait_size)) {
1721 #if DEBUG
1722 				cmn_err(CE_NOTE, "Cannot handle cpu %d mwait "
1723 				    "size %ld", cpu->cpu_id, (long)mwait_size);
1724 #endif
1725 				break;
1726 			}
1727 
1728 			cpi->cpi_mwait.mon_min = (size_t)MWAIT_SIZE_MIN(cpi);
1729 			cpi->cpi_mwait.mon_max = mwait_size;
1730 			if (MWAIT_EXTENSION(cpi)) {
1731 				cpi->cpi_mwait.support |= MWAIT_EXTENSIONS;
1732 				if (MWAIT_INT_ENABLE(cpi))
1733 					cpi->cpi_mwait.support |=
1734 					    MWAIT_ECX_INT_ENABLE;
1735 			}
1736 			break;
1737 		}
1738 		default:
1739 			break;
1740 		}
1741 	}
1742 
1743 	if (cpi->cpi_maxeax >= 0xB && cpi->cpi_vendor == X86_VENDOR_Intel) {
1744 		struct cpuid_regs regs;
1745 
1746 		cp = &regs;
1747 		cp->cp_eax = 0xB;
1748 		cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1749 
1750 		(void) __cpuid_insn(cp);
1751 
1752 		/*
1753 		 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
1754 		 * indicates that the extended topology enumeration leaf is
1755 		 * available.
1756 		 */
1757 		if (cp->cp_ebx) {
1758 			uint32_t x2apic_id;
1759 			uint_t coreid_shift = 0;
1760 			uint_t ncpu_per_core = 1;
1761 			uint_t chipid_shift = 0;
1762 			uint_t ncpu_per_chip = 1;
1763 			uint_t i;
1764 			uint_t level;
1765 
1766 			for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
1767 				cp->cp_eax = 0xB;
1768 				cp->cp_ecx = i;
1769 
1770 				(void) __cpuid_insn(cp);
1771 				level = CPI_CPU_LEVEL_TYPE(cp);
1772 
1773 				if (level == 1) {
1774 					x2apic_id = cp->cp_edx;
1775 					coreid_shift = BITX(cp->cp_eax, 4, 0);
1776 					ncpu_per_core = BITX(cp->cp_ebx, 15, 0);
1777 				} else if (level == 2) {
1778 					x2apic_id = cp->cp_edx;
1779 					chipid_shift = BITX(cp->cp_eax, 4, 0);
1780 					ncpu_per_chip = BITX(cp->cp_ebx, 15, 0);
1781 				}
1782 			}
1783 
1784 			cpi->cpi_apicid = x2apic_id;
1785 			cpi->cpi_ncpu_per_chip = ncpu_per_chip;
1786 			cpi->cpi_ncore_per_chip = ncpu_per_chip /
1787 			    ncpu_per_core;
1788 			cpi->cpi_chipid = x2apic_id >> chipid_shift;
1789 			cpi->cpi_clogid = x2apic_id & ((1 << chipid_shift) - 1);
1790 			cpi->cpi_coreid = x2apic_id >> coreid_shift;
1791 			cpi->cpi_pkgcoreid = cpi->cpi_clogid >> coreid_shift;
1792 		}
1793 
1794 		/* Make cp NULL so that we don't stumble on others */
1795 		cp = NULL;
1796 	}
1797 
1798 	/*
1799 	 * XSAVE enumeration
1800 	 */
1801 	if (cpi->cpi_maxeax >= 0xD && cpi->cpi_vendor == X86_VENDOR_Intel) {
1802 		struct cpuid_regs regs;
1803 		boolean_t cpuid_d_valid = B_TRUE;
1804 
1805 		cp = &regs;
1806 		cp->cp_eax = 0xD;
1807 		cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
1808 
1809 		(void) __cpuid_insn(cp);
1810 
1811 		/*
1812 		 * Sanity checks for debug
1813 		 */
1814 		if ((cp->cp_eax & XFEATURE_LEGACY_FP) == 0 ||
1815 		    (cp->cp_eax & XFEATURE_SSE) == 0) {
1816 			cpuid_d_valid = B_FALSE;
1817 		}
1818 
1819 		cpi->cpi_xsave.xsav_hw_features_low = cp->cp_eax;
1820 		cpi->cpi_xsave.xsav_hw_features_high = cp->cp_edx;
1821 		cpi->cpi_xsave.xsav_max_size = cp->cp_ecx;
1822 
1823 		/*
1824 		 * If the hw supports AVX, get the size and offset in the save
1825 		 * area for the ymm state.
1826 		 */
1827 		if (cpi->cpi_xsave.xsav_hw_features_low & XFEATURE_AVX) {
1828 			cp->cp_eax = 0xD;
1829 			cp->cp_ecx = 2;
1830 			cp->cp_edx = cp->cp_ebx = 0;
1831 
1832 			(void) __cpuid_insn(cp);
1833 
1834 			if (cp->cp_ebx != CPUID_LEAFD_2_YMM_OFFSET ||
1835 			    cp->cp_eax != CPUID_LEAFD_2_YMM_SIZE) {
1836 				cpuid_d_valid = B_FALSE;
1837 			}
1838 
1839 			cpi->cpi_xsave.ymm_size = cp->cp_eax;
1840 			cpi->cpi_xsave.ymm_offset = cp->cp_ebx;
1841 		}
1842 
1843 		if (is_x86_feature(x86_featureset, X86FSET_XSAVE)) {
1844 			xsave_state_size = 0;
1845 		} else if (cpuid_d_valid) {
1846 			xsave_state_size = cpi->cpi_xsave.xsav_max_size;
1847 		} else {
1848 			/* Broken CPUID 0xD, probably in HVM */
1849 			cmn_err(CE_WARN, "cpu%d: CPUID.0xD returns invalid "
1850 			    "value: hw_low = %d, hw_high = %d, xsave_size = %d"
1851 			    ", ymm_size = %d, ymm_offset = %d\n",
1852 			    cpu->cpu_id, cpi->cpi_xsave.xsav_hw_features_low,
1853 			    cpi->cpi_xsave.xsav_hw_features_high,
1854 			    (int)cpi->cpi_xsave.xsav_max_size,
1855 			    (int)cpi->cpi_xsave.ymm_size,
1856 			    (int)cpi->cpi_xsave.ymm_offset);
1857 
1858 			if (xsave_state_size != 0) {
1859 				/*
1860 				 * This must be a non-boot CPU. We cannot
1861 				 * continue, because boot cpu has already
1862 				 * enabled XSAVE.
1863 				 */
1864 				ASSERT(cpu->cpu_id != 0);
1865 				cmn_err(CE_PANIC, "cpu%d: we have already "
1866 				    "enabled XSAVE on boot cpu, cannot "
1867 				    "continue.", cpu->cpu_id);
1868 			} else {
1869 				/*
1870 				 * Must be from boot CPU, OK to disable XSAVE.
1871 				 */
1872 				ASSERT(cpu->cpu_id == 0);
1873 				remove_x86_feature(x86_featureset,
1874 				    X86FSET_XSAVE);
1875 				remove_x86_feature(x86_featureset, X86FSET_AVX);
1876 				CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_XSAVE;
1877 				CPI_FEATURES_ECX(cpi) &= ~CPUID_INTC_ECX_AVX;
1878 				xsave_force_disable = B_TRUE;
1879 			}
1880 		}
1881 	}
1882 
1883 
1884 	if ((cpi->cpi_xmaxeax & 0x80000000) == 0)
1885 		goto pass2_done;
1886 
1887 	if ((nmax = cpi->cpi_xmaxeax - 0x80000000 + 1) > NMAX_CPI_EXTD)
1888 		nmax = NMAX_CPI_EXTD;
1889 	/*
1890 	 * Copy the extended properties, fixing them as we go.
1891 	 * (We already handled n == 0 and n == 1 in pass 1)
1892 	 */
1893 	iptr = (void *)cpi->cpi_brandstr;
1894 	for (n = 2, cp = &cpi->cpi_extd[2]; n < nmax; cp++, n++) {
1895 		cp->cp_eax = 0x80000000 + n;
1896 		(void) __cpuid_insn(cp);
1897 		platform_cpuid_mangle(cpi->cpi_vendor, 0x80000000 + n, cp);
1898 		switch (n) {
1899 		case 2:
1900 		case 3:
1901 		case 4:
1902 			/*
1903 			 * Extract the brand string
1904 			 */
1905 			*iptr++ = cp->cp_eax;
1906 			*iptr++ = cp->cp_ebx;
1907 			*iptr++ = cp->cp_ecx;
1908 			*iptr++ = cp->cp_edx;
1909 			break;
1910 		case 5:
1911 			switch (cpi->cpi_vendor) {
1912 			case X86_VENDOR_AMD:
1913 				/*
1914 				 * The Athlon and Duron were the first
1915 				 * parts to report the sizes of the
1916 				 * TLB for large pages. Before then,
1917 				 * we don't trust the data.
1918 				 */
1919 				if (cpi->cpi_family < 6 ||
1920 				    (cpi->cpi_family == 6 &&
1921 				    cpi->cpi_model < 1))
1922 					cp->cp_eax = 0;
1923 				break;
1924 			default:
1925 				break;
1926 			}
1927 			break;
1928 		case 6:
1929 			switch (cpi->cpi_vendor) {
1930 			case X86_VENDOR_AMD:
1931 				/*
1932 				 * The Athlon and Duron were the first
1933 				 * AMD parts with L2 TLB's.
1934 				 * Before then, don't trust the data.
1935 				 */
1936 				if (cpi->cpi_family < 6 ||
1937 				    cpi->cpi_family == 6 &&
1938 				    cpi->cpi_model < 1)
1939 					cp->cp_eax = cp->cp_ebx = 0;
1940 				/*
1941 				 * AMD Duron rev A0 reports L2
1942 				 * cache size incorrectly as 1K
1943 				 * when it is really 64K
1944 				 */
1945 				if (cpi->cpi_family == 6 &&
1946 				    cpi->cpi_model == 3 &&
1947 				    cpi->cpi_step == 0) {
1948 					cp->cp_ecx &= 0xffff;
1949 					cp->cp_ecx |= 0x400000;
1950 				}
1951 				break;
1952 			case X86_VENDOR_Cyrix:	/* VIA C3 */
1953 				/*
1954 				 * VIA C3 processors are a bit messed
1955 				 * up w.r.t. encoding cache sizes in %ecx
1956 				 */
1957 				if (cpi->cpi_family != 6)
1958 					break;
1959 				/*
1960 				 * model 7 and 8 were incorrectly encoded
1961 				 *
1962 				 * xxx is model 8 really broken?
1963 				 */
1964 				if (cpi->cpi_model == 7 ||
1965 				    cpi->cpi_model == 8)
1966 					cp->cp_ecx =
1967 					    BITX(cp->cp_ecx, 31, 24) << 16 |
1968 					    BITX(cp->cp_ecx, 23, 16) << 12 |
1969 					    BITX(cp->cp_ecx, 15, 8) << 8 |
1970 					    BITX(cp->cp_ecx, 7, 0);
1971 				/*
1972 				 * model 9 stepping 1 has wrong associativity
1973 				 */
1974 				if (cpi->cpi_model == 9 && cpi->cpi_step == 1)
1975 					cp->cp_ecx |= 8 << 12;
1976 				break;
1977 			case X86_VENDOR_Intel:
1978 				/*
1979 				 * Extended L2 Cache features function.
1980 				 * First appeared on Prescott.
1981 				 */
1982 			default:
1983 				break;
1984 			}
1985 			break;
1986 		default:
1987 			break;
1988 		}
1989 	}
1990 
1991 pass2_done:
1992 	cpi->cpi_pass = 2;
1993 }
1994 
1995 static const char *
1996 intel_cpubrand(const struct cpuid_info *cpi)
1997 {
1998 	int i;
1999 
2000 	if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2001 	    cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2002 		return ("i486");
2003 
2004 	switch (cpi->cpi_family) {
2005 	case 5:
2006 		return ("Intel Pentium(r)");
2007 	case 6:
2008 		switch (cpi->cpi_model) {
2009 			uint_t celeron, xeon;
2010 			const struct cpuid_regs *cp;
2011 		case 0:
2012 		case 1:
2013 		case 2:
2014 			return ("Intel Pentium(r) Pro");
2015 		case 3:
2016 		case 4:
2017 			return ("Intel Pentium(r) II");
2018 		case 6:
2019 			return ("Intel Celeron(r)");
2020 		case 5:
2021 		case 7:
2022 			celeron = xeon = 0;
2023 			cp = &cpi->cpi_std[2];	/* cache info */
2024 
2025 			for (i = 1; i < 4; i++) {
2026 				uint_t tmp;
2027 
2028 				tmp = (cp->cp_eax >> (8 * i)) & 0xff;
2029 				if (tmp == 0x40)
2030 					celeron++;
2031 				if (tmp >= 0x44 && tmp <= 0x45)
2032 					xeon++;
2033 			}
2034 
2035 			for (i = 0; i < 2; i++) {
2036 				uint_t tmp;
2037 
2038 				tmp = (cp->cp_ebx >> (8 * i)) & 0xff;
2039 				if (tmp == 0x40)
2040 					celeron++;
2041 				else if (tmp >= 0x44 && tmp <= 0x45)
2042 					xeon++;
2043 			}
2044 
2045 			for (i = 0; i < 4; i++) {
2046 				uint_t tmp;
2047 
2048 				tmp = (cp->cp_ecx >> (8 * i)) & 0xff;
2049 				if (tmp == 0x40)
2050 					celeron++;
2051 				else if (tmp >= 0x44 && tmp <= 0x45)
2052 					xeon++;
2053 			}
2054 
2055 			for (i = 0; i < 4; i++) {
2056 				uint_t tmp;
2057 
2058 				tmp = (cp->cp_edx >> (8 * i)) & 0xff;
2059 				if (tmp == 0x40)
2060 					celeron++;
2061 				else if (tmp >= 0x44 && tmp <= 0x45)
2062 					xeon++;
2063 			}
2064 
2065 			if (celeron)
2066 				return ("Intel Celeron(r)");
2067 			if (xeon)
2068 				return (cpi->cpi_model == 5 ?
2069 				    "Intel Pentium(r) II Xeon(tm)" :
2070 				    "Intel Pentium(r) III Xeon(tm)");
2071 			return (cpi->cpi_model == 5 ?
2072 			    "Intel Pentium(r) II or Pentium(r) II Xeon(tm)" :
2073 			    "Intel Pentium(r) III or Pentium(r) III Xeon(tm)");
2074 		default:
2075 			break;
2076 		}
2077 	default:
2078 		break;
2079 	}
2080 
2081 	/* BrandID is present if the field is nonzero */
2082 	if (cpi->cpi_brandid != 0) {
2083 		static const struct {
2084 			uint_t bt_bid;
2085 			const char *bt_str;
2086 		} brand_tbl[] = {
2087 			{ 0x1,	"Intel(r) Celeron(r)" },
2088 			{ 0x2,	"Intel(r) Pentium(r) III" },
2089 			{ 0x3,	"Intel(r) Pentium(r) III Xeon(tm)" },
2090 			{ 0x4,	"Intel(r) Pentium(r) III" },
2091 			{ 0x6,	"Mobile Intel(r) Pentium(r) III" },
2092 			{ 0x7,	"Mobile Intel(r) Celeron(r)" },
2093 			{ 0x8,	"Intel(r) Pentium(r) 4" },
2094 			{ 0x9,	"Intel(r) Pentium(r) 4" },
2095 			{ 0xa,	"Intel(r) Celeron(r)" },
2096 			{ 0xb,	"Intel(r) Xeon(tm)" },
2097 			{ 0xc,	"Intel(r) Xeon(tm) MP" },
2098 			{ 0xe,	"Mobile Intel(r) Pentium(r) 4" },
2099 			{ 0xf,	"Mobile Intel(r) Celeron(r)" },
2100 			{ 0x11, "Mobile Genuine Intel(r)" },
2101 			{ 0x12, "Intel(r) Celeron(r) M" },
2102 			{ 0x13, "Mobile Intel(r) Celeron(r)" },
2103 			{ 0x14, "Intel(r) Celeron(r)" },
2104 			{ 0x15, "Mobile Genuine Intel(r)" },
2105 			{ 0x16,	"Intel(r) Pentium(r) M" },
2106 			{ 0x17, "Mobile Intel(r) Celeron(r)" }
2107 		};
2108 		uint_t btblmax = sizeof (brand_tbl) / sizeof (brand_tbl[0]);
2109 		uint_t sgn;
2110 
2111 		sgn = (cpi->cpi_family << 8) |
2112 		    (cpi->cpi_model << 4) | cpi->cpi_step;
2113 
2114 		for (i = 0; i < btblmax; i++)
2115 			if (brand_tbl[i].bt_bid == cpi->cpi_brandid)
2116 				break;
2117 		if (i < btblmax) {
2118 			if (sgn == 0x6b1 && cpi->cpi_brandid == 3)
2119 				return ("Intel(r) Celeron(r)");
2120 			if (sgn < 0xf13 && cpi->cpi_brandid == 0xb)
2121 				return ("Intel(r) Xeon(tm) MP");
2122 			if (sgn < 0xf13 && cpi->cpi_brandid == 0xe)
2123 				return ("Intel(r) Xeon(tm)");
2124 			return (brand_tbl[i].bt_str);
2125 		}
2126 	}
2127 
2128 	return (NULL);
2129 }
2130 
2131 static const char *
2132 amd_cpubrand(const struct cpuid_info *cpi)
2133 {
2134 	if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2135 	    cpi->cpi_maxeax < 1 || cpi->cpi_family < 5)
2136 		return ("i486 compatible");
2137 
2138 	switch (cpi->cpi_family) {
2139 	case 5:
2140 		switch (cpi->cpi_model) {
2141 		case 0:
2142 		case 1:
2143 		case 2:
2144 		case 3:
2145 		case 4:
2146 		case 5:
2147 			return ("AMD-K5(r)");
2148 		case 6:
2149 		case 7:
2150 			return ("AMD-K6(r)");
2151 		case 8:
2152 			return ("AMD-K6(r)-2");
2153 		case 9:
2154 			return ("AMD-K6(r)-III");
2155 		default:
2156 			return ("AMD (family 5)");
2157 		}
2158 	case 6:
2159 		switch (cpi->cpi_model) {
2160 		case 1:
2161 			return ("AMD-K7(tm)");
2162 		case 0:
2163 		case 2:
2164 		case 4:
2165 			return ("AMD Athlon(tm)");
2166 		case 3:
2167 		case 7:
2168 			return ("AMD Duron(tm)");
2169 		case 6:
2170 		case 8:
2171 		case 10:
2172 			/*
2173 			 * Use the L2 cache size to distinguish
2174 			 */
2175 			return ((cpi->cpi_extd[6].cp_ecx >> 16) >= 256 ?
2176 			    "AMD Athlon(tm)" : "AMD Duron(tm)");
2177 		default:
2178 			return ("AMD (family 6)");
2179 		}
2180 	default:
2181 		break;
2182 	}
2183 
2184 	if (cpi->cpi_family == 0xf && cpi->cpi_model == 5 &&
2185 	    cpi->cpi_brandid != 0) {
2186 		switch (BITX(cpi->cpi_brandid, 7, 5)) {
2187 		case 3:
2188 			return ("AMD Opteron(tm) UP 1xx");
2189 		case 4:
2190 			return ("AMD Opteron(tm) DP 2xx");
2191 		case 5:
2192 			return ("AMD Opteron(tm) MP 8xx");
2193 		default:
2194 			return ("AMD Opteron(tm)");
2195 		}
2196 	}
2197 
2198 	return (NULL);
2199 }
2200 
2201 static const char *
2202 cyrix_cpubrand(struct cpuid_info *cpi, uint_t type)
2203 {
2204 	if (!is_x86_feature(x86_featureset, X86FSET_CPUID) ||
2205 	    cpi->cpi_maxeax < 1 || cpi->cpi_family < 5 ||
2206 	    type == X86_TYPE_CYRIX_486)
2207 		return ("i486 compatible");
2208 
2209 	switch (type) {
2210 	case X86_TYPE_CYRIX_6x86:
2211 		return ("Cyrix 6x86");
2212 	case X86_TYPE_CYRIX_6x86L:
2213 		return ("Cyrix 6x86L");
2214 	case X86_TYPE_CYRIX_6x86MX:
2215 		return ("Cyrix 6x86MX");
2216 	case X86_TYPE_CYRIX_GXm:
2217 		return ("Cyrix GXm");
2218 	case X86_TYPE_CYRIX_MediaGX:
2219 		return ("Cyrix MediaGX");
2220 	case X86_TYPE_CYRIX_MII:
2221 		return ("Cyrix M2");
2222 	case X86_TYPE_VIA_CYRIX_III:
2223 		return ("VIA Cyrix M3");
2224 	default:
2225 		/*
2226 		 * Have another wild guess ..
2227 		 */
2228 		if (cpi->cpi_family == 4 && cpi->cpi_model == 9)
2229 			return ("Cyrix 5x86");
2230 		else if (cpi->cpi_family == 5) {
2231 			switch (cpi->cpi_model) {
2232 			case 2:
2233 				return ("Cyrix 6x86");	/* Cyrix M1 */
2234 			case 4:
2235 				return ("Cyrix MediaGX");
2236 			default:
2237 				break;
2238 			}
2239 		} else if (cpi->cpi_family == 6) {
2240 			switch (cpi->cpi_model) {
2241 			case 0:
2242 				return ("Cyrix 6x86MX"); /* Cyrix M2? */
2243 			case 5:
2244 			case 6:
2245 			case 7:
2246 			case 8:
2247 			case 9:
2248 				return ("VIA C3");
2249 			default:
2250 				break;
2251 			}
2252 		}
2253 		break;
2254 	}
2255 	return (NULL);
2256 }
2257 
2258 /*
2259  * This only gets called in the case that the CPU extended
2260  * feature brand string (0x80000002, 0x80000003, 0x80000004)
2261  * aren't available, or contain null bytes for some reason.
2262  */
2263 static void
2264 fabricate_brandstr(struct cpuid_info *cpi)
2265 {
2266 	const char *brand = NULL;
2267 
2268 	switch (cpi->cpi_vendor) {
2269 	case X86_VENDOR_Intel:
2270 		brand = intel_cpubrand(cpi);
2271 		break;
2272 	case X86_VENDOR_AMD:
2273 		brand = amd_cpubrand(cpi);
2274 		break;
2275 	case X86_VENDOR_Cyrix:
2276 		brand = cyrix_cpubrand(cpi, x86_type);
2277 		break;
2278 	case X86_VENDOR_NexGen:
2279 		if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2280 			brand = "NexGen Nx586";
2281 		break;
2282 	case X86_VENDOR_Centaur:
2283 		if (cpi->cpi_family == 5)
2284 			switch (cpi->cpi_model) {
2285 			case 4:
2286 				brand = "Centaur C6";
2287 				break;
2288 			case 8:
2289 				brand = "Centaur C2";
2290 				break;
2291 			case 9:
2292 				brand = "Centaur C3";
2293 				break;
2294 			default:
2295 				break;
2296 			}
2297 		break;
2298 	case X86_VENDOR_Rise:
2299 		if (cpi->cpi_family == 5 &&
2300 		    (cpi->cpi_model == 0 || cpi->cpi_model == 2))
2301 			brand = "Rise mP6";
2302 		break;
2303 	case X86_VENDOR_SiS:
2304 		if (cpi->cpi_family == 5 && cpi->cpi_model == 0)
2305 			brand = "SiS 55x";
2306 		break;
2307 	case X86_VENDOR_TM:
2308 		if (cpi->cpi_family == 5 && cpi->cpi_model == 4)
2309 			brand = "Transmeta Crusoe TM3x00 or TM5x00";
2310 		break;
2311 	case X86_VENDOR_NSC:
2312 	case X86_VENDOR_UMC:
2313 	default:
2314 		break;
2315 	}
2316 	if (brand) {
2317 		(void) strcpy((char *)cpi->cpi_brandstr, brand);
2318 		return;
2319 	}
2320 
2321 	/*
2322 	 * If all else fails ...
2323 	 */
2324 	(void) snprintf(cpi->cpi_brandstr, sizeof (cpi->cpi_brandstr),
2325 	    "%s %d.%d.%d", cpi->cpi_vendorstr, cpi->cpi_family,
2326 	    cpi->cpi_model, cpi->cpi_step);
2327 }
2328 
2329 /*
2330  * This routine is called just after kernel memory allocation
2331  * becomes available on cpu0, and as part of mp_startup() on
2332  * the other cpus.
2333  *
2334  * Fixup the brand string, and collect any information from cpuid
2335  * that requires dynamicically allocated storage to represent.
2336  */
2337 /*ARGSUSED*/
2338 void
2339 cpuid_pass3(cpu_t *cpu)
2340 {
2341 	int	i, max, shft, level, size;
2342 	struct cpuid_regs regs;
2343 	struct cpuid_regs *cp;
2344 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2345 
2346 	ASSERT(cpi->cpi_pass == 2);
2347 
2348 	/*
2349 	 * Function 4: Deterministic cache parameters
2350 	 *
2351 	 * Take this opportunity to detect the number of threads
2352 	 * sharing the last level cache, and construct a corresponding
2353 	 * cache id. The respective cpuid_info members are initialized
2354 	 * to the default case of "no last level cache sharing".
2355 	 */
2356 	cpi->cpi_ncpu_shr_last_cache = 1;
2357 	cpi->cpi_last_lvl_cacheid = cpu->cpu_id;
2358 
2359 	if (cpi->cpi_maxeax >= 4 && cpi->cpi_vendor == X86_VENDOR_Intel) {
2360 
2361 		/*
2362 		 * Find the # of elements (size) returned by fn 4, and along
2363 		 * the way detect last level cache sharing details.
2364 		 */
2365 		bzero(&regs, sizeof (regs));
2366 		cp = &regs;
2367 		for (i = 0, max = 0; i < CPI_FN4_ECX_MAX; i++) {
2368 			cp->cp_eax = 4;
2369 			cp->cp_ecx = i;
2370 
2371 			(void) __cpuid_insn(cp);
2372 
2373 			if (CPI_CACHE_TYPE(cp) == 0)
2374 				break;
2375 			level = CPI_CACHE_LVL(cp);
2376 			if (level > max) {
2377 				max = level;
2378 				cpi->cpi_ncpu_shr_last_cache =
2379 				    CPI_NTHR_SHR_CACHE(cp) + 1;
2380 			}
2381 		}
2382 		cpi->cpi_std_4_size = size = i;
2383 
2384 		/*
2385 		 * Allocate the cpi_std_4 array. The first element
2386 		 * references the regs for fn 4, %ecx == 0, which
2387 		 * cpuid_pass2() stashed in cpi->cpi_std[4].
2388 		 */
2389 		if (size > 0) {
2390 			cpi->cpi_std_4 =
2391 			    kmem_alloc(size * sizeof (cp), KM_SLEEP);
2392 			cpi->cpi_std_4[0] = &cpi->cpi_std[4];
2393 
2394 			/*
2395 			 * Allocate storage to hold the additional regs
2396 			 * for function 4, %ecx == 1 .. cpi_std_4_size.
2397 			 *
2398 			 * The regs for fn 4, %ecx == 0 has already
2399 			 * been allocated as indicated above.
2400 			 */
2401 			for (i = 1; i < size; i++) {
2402 				cp = cpi->cpi_std_4[i] =
2403 				    kmem_zalloc(sizeof (regs), KM_SLEEP);
2404 				cp->cp_eax = 4;
2405 				cp->cp_ecx = i;
2406 
2407 				(void) __cpuid_insn(cp);
2408 			}
2409 		}
2410 		/*
2411 		 * Determine the number of bits needed to represent
2412 		 * the number of CPUs sharing the last level cache.
2413 		 *
2414 		 * Shift off that number of bits from the APIC id to
2415 		 * derive the cache id.
2416 		 */
2417 		shft = 0;
2418 		for (i = 1; i < cpi->cpi_ncpu_shr_last_cache; i <<= 1)
2419 			shft++;
2420 		cpi->cpi_last_lvl_cacheid = cpi->cpi_apicid >> shft;
2421 	}
2422 
2423 	/*
2424 	 * Now fixup the brand string
2425 	 */
2426 	if ((cpi->cpi_xmaxeax & 0x80000000) == 0) {
2427 		fabricate_brandstr(cpi);
2428 	} else {
2429 
2430 		/*
2431 		 * If we successfully extracted a brand string from the cpuid
2432 		 * instruction, clean it up by removing leading spaces and
2433 		 * similar junk.
2434 		 */
2435 		if (cpi->cpi_brandstr[0]) {
2436 			size_t maxlen = sizeof (cpi->cpi_brandstr);
2437 			char *src, *dst;
2438 
2439 			dst = src = (char *)cpi->cpi_brandstr;
2440 			src[maxlen - 1] = '\0';
2441 			/*
2442 			 * strip leading spaces
2443 			 */
2444 			while (*src == ' ')
2445 				src++;
2446 			/*
2447 			 * Remove any 'Genuine' or "Authentic" prefixes
2448 			 */
2449 			if (strncmp(src, "Genuine ", 8) == 0)
2450 				src += 8;
2451 			if (strncmp(src, "Authentic ", 10) == 0)
2452 				src += 10;
2453 
2454 			/*
2455 			 * Now do an in-place copy.
2456 			 * Map (R) to (r) and (TM) to (tm).
2457 			 * The era of teletypes is long gone, and there's
2458 			 * -really- no need to shout.
2459 			 */
2460 			while (*src != '\0') {
2461 				if (src[0] == '(') {
2462 					if (strncmp(src + 1, "R)", 2) == 0) {
2463 						(void) strncpy(dst, "(r)", 3);
2464 						src += 3;
2465 						dst += 3;
2466 						continue;
2467 					}
2468 					if (strncmp(src + 1, "TM)", 3) == 0) {
2469 						(void) strncpy(dst, "(tm)", 4);
2470 						src += 4;
2471 						dst += 4;
2472 						continue;
2473 					}
2474 				}
2475 				*dst++ = *src++;
2476 			}
2477 			*dst = '\0';
2478 
2479 			/*
2480 			 * Finally, remove any trailing spaces
2481 			 */
2482 			while (--dst > cpi->cpi_brandstr)
2483 				if (*dst == ' ')
2484 					*dst = '\0';
2485 				else
2486 					break;
2487 		} else
2488 			fabricate_brandstr(cpi);
2489 	}
2490 	cpi->cpi_pass = 3;
2491 }
2492 
2493 /*
2494  * This routine is called out of bind_hwcap() much later in the life
2495  * of the kernel (post_startup()).  The job of this routine is to resolve
2496  * the hardware feature support and kernel support for those features into
2497  * what we're actually going to tell applications via the aux vector.
2498  */
2499 uint_t
2500 cpuid_pass4(cpu_t *cpu)
2501 {
2502 	struct cpuid_info *cpi;
2503 	uint_t hwcap_flags = 0;
2504 
2505 	if (cpu == NULL)
2506 		cpu = CPU;
2507 	cpi = cpu->cpu_m.mcpu_cpi;
2508 
2509 	ASSERT(cpi->cpi_pass == 3);
2510 
2511 	if (cpi->cpi_maxeax >= 1) {
2512 		uint32_t *edx = &cpi->cpi_support[STD_EDX_FEATURES];
2513 		uint32_t *ecx = &cpi->cpi_support[STD_ECX_FEATURES];
2514 
2515 		*edx = CPI_FEATURES_EDX(cpi);
2516 		*ecx = CPI_FEATURES_ECX(cpi);
2517 
2518 		/*
2519 		 * [these require explicit kernel support]
2520 		 */
2521 		if (!is_x86_feature(x86_featureset, X86FSET_SEP))
2522 			*edx &= ~CPUID_INTC_EDX_SEP;
2523 
2524 		if (!is_x86_feature(x86_featureset, X86FSET_SSE))
2525 			*edx &= ~(CPUID_INTC_EDX_FXSR|CPUID_INTC_EDX_SSE);
2526 		if (!is_x86_feature(x86_featureset, X86FSET_SSE2))
2527 			*edx &= ~CPUID_INTC_EDX_SSE2;
2528 
2529 		if (!is_x86_feature(x86_featureset, X86FSET_HTT))
2530 			*edx &= ~CPUID_INTC_EDX_HTT;
2531 
2532 		if (!is_x86_feature(x86_featureset, X86FSET_SSE3))
2533 			*ecx &= ~CPUID_INTC_ECX_SSE3;
2534 
2535 		if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2536 			if (!is_x86_feature(x86_featureset, X86FSET_SSSE3))
2537 				*ecx &= ~CPUID_INTC_ECX_SSSE3;
2538 			if (!is_x86_feature(x86_featureset, X86FSET_SSE4_1))
2539 				*ecx &= ~CPUID_INTC_ECX_SSE4_1;
2540 			if (!is_x86_feature(x86_featureset, X86FSET_SSE4_2))
2541 				*ecx &= ~CPUID_INTC_ECX_SSE4_2;
2542 			if (!is_x86_feature(x86_featureset, X86FSET_AES))
2543 				*ecx &= ~CPUID_INTC_ECX_AES;
2544 			if (!is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ))
2545 				*ecx &= ~CPUID_INTC_ECX_PCLMULQDQ;
2546 			if (!is_x86_feature(x86_featureset, X86FSET_XSAVE))
2547 				*ecx &= ~(CPUID_INTC_ECX_XSAVE |
2548 				    CPUID_INTC_ECX_OSXSAVE);
2549 			if (!is_x86_feature(x86_featureset, X86FSET_AVX))
2550 				*ecx &= ~CPUID_INTC_ECX_AVX;
2551 		}
2552 
2553 		/*
2554 		 * [no explicit support required beyond x87 fp context]
2555 		 */
2556 		if (!fpu_exists)
2557 			*edx &= ~(CPUID_INTC_EDX_FPU | CPUID_INTC_EDX_MMX);
2558 
2559 		/*
2560 		 * Now map the supported feature vector to things that we
2561 		 * think userland will care about.
2562 		 */
2563 		if (*edx & CPUID_INTC_EDX_SEP)
2564 			hwcap_flags |= AV_386_SEP;
2565 		if (*edx & CPUID_INTC_EDX_SSE)
2566 			hwcap_flags |= AV_386_FXSR | AV_386_SSE;
2567 		if (*edx & CPUID_INTC_EDX_SSE2)
2568 			hwcap_flags |= AV_386_SSE2;
2569 		if (*ecx & CPUID_INTC_ECX_SSE3)
2570 			hwcap_flags |= AV_386_SSE3;
2571 		if (cpi->cpi_vendor == X86_VENDOR_Intel) {
2572 			if (*ecx & CPUID_INTC_ECX_SSSE3)
2573 				hwcap_flags |= AV_386_SSSE3;
2574 			if (*ecx & CPUID_INTC_ECX_SSE4_1)
2575 				hwcap_flags |= AV_386_SSE4_1;
2576 			if (*ecx & CPUID_INTC_ECX_SSE4_2)
2577 				hwcap_flags |= AV_386_SSE4_2;
2578 			if (*ecx & CPUID_INTC_ECX_MOVBE)
2579 				hwcap_flags |= AV_386_MOVBE;
2580 			if (*ecx & CPUID_INTC_ECX_AES)
2581 				hwcap_flags |= AV_386_AES;
2582 			if (*ecx & CPUID_INTC_ECX_PCLMULQDQ)
2583 				hwcap_flags |= AV_386_PCLMULQDQ;
2584 			if ((*ecx & CPUID_INTC_ECX_XSAVE) &&
2585 			    (*ecx & CPUID_INTC_ECX_OSXSAVE))
2586 				hwcap_flags |= AV_386_XSAVE;
2587 		}
2588 		if (*ecx & CPUID_INTC_ECX_POPCNT)
2589 			hwcap_flags |= AV_386_POPCNT;
2590 		if (*edx & CPUID_INTC_EDX_FPU)
2591 			hwcap_flags |= AV_386_FPU;
2592 		if (*edx & CPUID_INTC_EDX_MMX)
2593 			hwcap_flags |= AV_386_MMX;
2594 
2595 		if (*edx & CPUID_INTC_EDX_TSC)
2596 			hwcap_flags |= AV_386_TSC;
2597 		if (*edx & CPUID_INTC_EDX_CX8)
2598 			hwcap_flags |= AV_386_CX8;
2599 		if (*edx & CPUID_INTC_EDX_CMOV)
2600 			hwcap_flags |= AV_386_CMOV;
2601 		if (*ecx & CPUID_INTC_ECX_CX16)
2602 			hwcap_flags |= AV_386_CX16;
2603 	}
2604 
2605 	if (cpi->cpi_xmaxeax < 0x80000001)
2606 		goto pass4_done;
2607 
2608 	switch (cpi->cpi_vendor) {
2609 		struct cpuid_regs cp;
2610 		uint32_t *edx, *ecx;
2611 
2612 	case X86_VENDOR_Intel:
2613 		/*
2614 		 * Seems like Intel duplicated what we necessary
2615 		 * here to make the initial crop of 64-bit OS's work.
2616 		 * Hopefully, those are the only "extended" bits
2617 		 * they'll add.
2618 		 */
2619 		/*FALLTHROUGH*/
2620 
2621 	case X86_VENDOR_AMD:
2622 		edx = &cpi->cpi_support[AMD_EDX_FEATURES];
2623 		ecx = &cpi->cpi_support[AMD_ECX_FEATURES];
2624 
2625 		*edx = CPI_FEATURES_XTD_EDX(cpi);
2626 		*ecx = CPI_FEATURES_XTD_ECX(cpi);
2627 
2628 		/*
2629 		 * [these features require explicit kernel support]
2630 		 */
2631 		switch (cpi->cpi_vendor) {
2632 		case X86_VENDOR_Intel:
2633 			if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2634 				*edx &= ~CPUID_AMD_EDX_TSCP;
2635 			break;
2636 
2637 		case X86_VENDOR_AMD:
2638 			if (!is_x86_feature(x86_featureset, X86FSET_TSCP))
2639 				*edx &= ~CPUID_AMD_EDX_TSCP;
2640 			if (!is_x86_feature(x86_featureset, X86FSET_SSE4A))
2641 				*ecx &= ~CPUID_AMD_ECX_SSE4A;
2642 			break;
2643 
2644 		default:
2645 			break;
2646 		}
2647 
2648 		/*
2649 		 * [no explicit support required beyond
2650 		 * x87 fp context and exception handlers]
2651 		 */
2652 		if (!fpu_exists)
2653 			*edx &= ~(CPUID_AMD_EDX_MMXamd |
2654 			    CPUID_AMD_EDX_3DNow | CPUID_AMD_EDX_3DNowx);
2655 
2656 		if (!is_x86_feature(x86_featureset, X86FSET_NX))
2657 			*edx &= ~CPUID_AMD_EDX_NX;
2658 #if !defined(__amd64)
2659 		*edx &= ~CPUID_AMD_EDX_LM;
2660 #endif
2661 		/*
2662 		 * Now map the supported feature vector to
2663 		 * things that we think userland will care about.
2664 		 */
2665 #if defined(__amd64)
2666 		if (*edx & CPUID_AMD_EDX_SYSC)
2667 			hwcap_flags |= AV_386_AMD_SYSC;
2668 #endif
2669 		if (*edx & CPUID_AMD_EDX_MMXamd)
2670 			hwcap_flags |= AV_386_AMD_MMX;
2671 		if (*edx & CPUID_AMD_EDX_3DNow)
2672 			hwcap_flags |= AV_386_AMD_3DNow;
2673 		if (*edx & CPUID_AMD_EDX_3DNowx)
2674 			hwcap_flags |= AV_386_AMD_3DNowx;
2675 
2676 		switch (cpi->cpi_vendor) {
2677 		case X86_VENDOR_AMD:
2678 			if (*edx & CPUID_AMD_EDX_TSCP)
2679 				hwcap_flags |= AV_386_TSCP;
2680 			if (*ecx & CPUID_AMD_ECX_AHF64)
2681 				hwcap_flags |= AV_386_AHF;
2682 			if (*ecx & CPUID_AMD_ECX_SSE4A)
2683 				hwcap_flags |= AV_386_AMD_SSE4A;
2684 			if (*ecx & CPUID_AMD_ECX_LZCNT)
2685 				hwcap_flags |= AV_386_AMD_LZCNT;
2686 			break;
2687 
2688 		case X86_VENDOR_Intel:
2689 			if (*edx & CPUID_AMD_EDX_TSCP)
2690 				hwcap_flags |= AV_386_TSCP;
2691 			/*
2692 			 * Aarrgh.
2693 			 * Intel uses a different bit in the same word.
2694 			 */
2695 			if (*ecx & CPUID_INTC_ECX_AHF64)
2696 				hwcap_flags |= AV_386_AHF;
2697 			break;
2698 
2699 		default:
2700 			break;
2701 		}
2702 		break;
2703 
2704 	case X86_VENDOR_TM:
2705 		cp.cp_eax = 0x80860001;
2706 		(void) __cpuid_insn(&cp);
2707 		cpi->cpi_support[TM_EDX_FEATURES] = cp.cp_edx;
2708 		break;
2709 
2710 	default:
2711 		break;
2712 	}
2713 
2714 pass4_done:
2715 	cpi->cpi_pass = 4;
2716 	return (hwcap_flags);
2717 }
2718 
2719 
2720 /*
2721  * Simulate the cpuid instruction using the data we previously
2722  * captured about this CPU.  We try our best to return the truth
2723  * about the hardware, independently of kernel support.
2724  */
2725 uint32_t
2726 cpuid_insn(cpu_t *cpu, struct cpuid_regs *cp)
2727 {
2728 	struct cpuid_info *cpi;
2729 	struct cpuid_regs *xcp;
2730 
2731 	if (cpu == NULL)
2732 		cpu = CPU;
2733 	cpi = cpu->cpu_m.mcpu_cpi;
2734 
2735 	ASSERT(cpuid_checkpass(cpu, 3));
2736 
2737 	/*
2738 	 * CPUID data is cached in two separate places: cpi_std for standard
2739 	 * CPUID functions, and cpi_extd for extended CPUID functions.
2740 	 */
2741 	if (cp->cp_eax <= cpi->cpi_maxeax && cp->cp_eax < NMAX_CPI_STD)
2742 		xcp = &cpi->cpi_std[cp->cp_eax];
2743 	else if (cp->cp_eax >= 0x80000000 && cp->cp_eax <= cpi->cpi_xmaxeax &&
2744 	    cp->cp_eax < 0x80000000 + NMAX_CPI_EXTD)
2745 		xcp = &cpi->cpi_extd[cp->cp_eax - 0x80000000];
2746 	else
2747 		/*
2748 		 * The caller is asking for data from an input parameter which
2749 		 * the kernel has not cached.  In this case we go fetch from
2750 		 * the hardware and return the data directly to the user.
2751 		 */
2752 		return (__cpuid_insn(cp));
2753 
2754 	cp->cp_eax = xcp->cp_eax;
2755 	cp->cp_ebx = xcp->cp_ebx;
2756 	cp->cp_ecx = xcp->cp_ecx;
2757 	cp->cp_edx = xcp->cp_edx;
2758 	return (cp->cp_eax);
2759 }
2760 
2761 int
2762 cpuid_checkpass(cpu_t *cpu, int pass)
2763 {
2764 	return (cpu != NULL && cpu->cpu_m.mcpu_cpi != NULL &&
2765 	    cpu->cpu_m.mcpu_cpi->cpi_pass >= pass);
2766 }
2767 
2768 int
2769 cpuid_getbrandstr(cpu_t *cpu, char *s, size_t n)
2770 {
2771 	ASSERT(cpuid_checkpass(cpu, 3));
2772 
2773 	return (snprintf(s, n, "%s", cpu->cpu_m.mcpu_cpi->cpi_brandstr));
2774 }
2775 
2776 int
2777 cpuid_is_cmt(cpu_t *cpu)
2778 {
2779 	if (cpu == NULL)
2780 		cpu = CPU;
2781 
2782 	ASSERT(cpuid_checkpass(cpu, 1));
2783 
2784 	return (cpu->cpu_m.mcpu_cpi->cpi_chipid >= 0);
2785 }
2786 
2787 /*
2788  * AMD and Intel both implement the 64-bit variant of the syscall
2789  * instruction (syscallq), so if there's -any- support for syscall,
2790  * cpuid currently says "yes, we support this".
2791  *
2792  * However, Intel decided to -not- implement the 32-bit variant of the
2793  * syscall instruction, so we provide a predicate to allow our caller
2794  * to test that subtlety here.
2795  *
2796  * XXPV	Currently, 32-bit syscall instructions don't work via the hypervisor,
2797  *	even in the case where the hardware would in fact support it.
2798  */
2799 /*ARGSUSED*/
2800 int
2801 cpuid_syscall32_insn(cpu_t *cpu)
2802 {
2803 	ASSERT(cpuid_checkpass((cpu == NULL ? CPU : cpu), 1));
2804 
2805 #if !defined(__xpv)
2806 	if (cpu == NULL)
2807 		cpu = CPU;
2808 
2809 	/*CSTYLED*/
2810 	{
2811 		struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2812 
2813 		if (cpi->cpi_vendor == X86_VENDOR_AMD &&
2814 		    cpi->cpi_xmaxeax >= 0x80000001 &&
2815 		    (CPI_FEATURES_XTD_EDX(cpi) & CPUID_AMD_EDX_SYSC))
2816 			return (1);
2817 	}
2818 #endif
2819 	return (0);
2820 }
2821 
2822 int
2823 cpuid_getidstr(cpu_t *cpu, char *s, size_t n)
2824 {
2825 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
2826 
2827 	static const char fmt[] =
2828 	    "x86 (%s %X family %d model %d step %d clock %d MHz)";
2829 	static const char fmt_ht[] =
2830 	    "x86 (chipid 0x%x %s %X family %d model %d step %d clock %d MHz)";
2831 
2832 	ASSERT(cpuid_checkpass(cpu, 1));
2833 
2834 	if (cpuid_is_cmt(cpu))
2835 		return (snprintf(s, n, fmt_ht, cpi->cpi_chipid,
2836 		    cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2837 		    cpi->cpi_family, cpi->cpi_model,
2838 		    cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2839 	return (snprintf(s, n, fmt,
2840 	    cpi->cpi_vendorstr, cpi->cpi_std[1].cp_eax,
2841 	    cpi->cpi_family, cpi->cpi_model,
2842 	    cpi->cpi_step, cpu->cpu_type_info.pi_clock));
2843 }
2844 
2845 const char *
2846 cpuid_getvendorstr(cpu_t *cpu)
2847 {
2848 	ASSERT(cpuid_checkpass(cpu, 1));
2849 	return ((const char *)cpu->cpu_m.mcpu_cpi->cpi_vendorstr);
2850 }
2851 
2852 uint_t
2853 cpuid_getvendor(cpu_t *cpu)
2854 {
2855 	ASSERT(cpuid_checkpass(cpu, 1));
2856 	return (cpu->cpu_m.mcpu_cpi->cpi_vendor);
2857 }
2858 
2859 uint_t
2860 cpuid_getfamily(cpu_t *cpu)
2861 {
2862 	ASSERT(cpuid_checkpass(cpu, 1));
2863 	return (cpu->cpu_m.mcpu_cpi->cpi_family);
2864 }
2865 
2866 uint_t
2867 cpuid_getmodel(cpu_t *cpu)
2868 {
2869 	ASSERT(cpuid_checkpass(cpu, 1));
2870 	return (cpu->cpu_m.mcpu_cpi->cpi_model);
2871 }
2872 
2873 uint_t
2874 cpuid_get_ncpu_per_chip(cpu_t *cpu)
2875 {
2876 	ASSERT(cpuid_checkpass(cpu, 1));
2877 	return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_per_chip);
2878 }
2879 
2880 uint_t
2881 cpuid_get_ncore_per_chip(cpu_t *cpu)
2882 {
2883 	ASSERT(cpuid_checkpass(cpu, 1));
2884 	return (cpu->cpu_m.mcpu_cpi->cpi_ncore_per_chip);
2885 }
2886 
2887 uint_t
2888 cpuid_get_ncpu_sharing_last_cache(cpu_t *cpu)
2889 {
2890 	ASSERT(cpuid_checkpass(cpu, 2));
2891 	return (cpu->cpu_m.mcpu_cpi->cpi_ncpu_shr_last_cache);
2892 }
2893 
2894 id_t
2895 cpuid_get_last_lvl_cacheid(cpu_t *cpu)
2896 {
2897 	ASSERT(cpuid_checkpass(cpu, 2));
2898 	return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
2899 }
2900 
2901 uint_t
2902 cpuid_getstep(cpu_t *cpu)
2903 {
2904 	ASSERT(cpuid_checkpass(cpu, 1));
2905 	return (cpu->cpu_m.mcpu_cpi->cpi_step);
2906 }
2907 
2908 uint_t
2909 cpuid_getsig(struct cpu *cpu)
2910 {
2911 	ASSERT(cpuid_checkpass(cpu, 1));
2912 	return (cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_eax);
2913 }
2914 
2915 uint32_t
2916 cpuid_getchiprev(struct cpu *cpu)
2917 {
2918 	ASSERT(cpuid_checkpass(cpu, 1));
2919 	return (cpu->cpu_m.mcpu_cpi->cpi_chiprev);
2920 }
2921 
2922 const char *
2923 cpuid_getchiprevstr(struct cpu *cpu)
2924 {
2925 	ASSERT(cpuid_checkpass(cpu, 1));
2926 	return (cpu->cpu_m.mcpu_cpi->cpi_chiprevstr);
2927 }
2928 
2929 uint32_t
2930 cpuid_getsockettype(struct cpu *cpu)
2931 {
2932 	ASSERT(cpuid_checkpass(cpu, 1));
2933 	return (cpu->cpu_m.mcpu_cpi->cpi_socket);
2934 }
2935 
2936 const char *
2937 cpuid_getsocketstr(cpu_t *cpu)
2938 {
2939 	static const char *socketstr = NULL;
2940 	struct cpuid_info *cpi;
2941 
2942 	ASSERT(cpuid_checkpass(cpu, 1));
2943 	cpi = cpu->cpu_m.mcpu_cpi;
2944 
2945 	/* Assume that socket types are the same across the system */
2946 	if (socketstr == NULL)
2947 		socketstr = _cpuid_sktstr(cpi->cpi_vendor, cpi->cpi_family,
2948 		    cpi->cpi_model, cpi->cpi_step);
2949 
2950 
2951 	return (socketstr);
2952 }
2953 
2954 int
2955 cpuid_get_chipid(cpu_t *cpu)
2956 {
2957 	ASSERT(cpuid_checkpass(cpu, 1));
2958 
2959 	if (cpuid_is_cmt(cpu))
2960 		return (cpu->cpu_m.mcpu_cpi->cpi_chipid);
2961 	return (cpu->cpu_id);
2962 }
2963 
2964 id_t
2965 cpuid_get_coreid(cpu_t *cpu)
2966 {
2967 	ASSERT(cpuid_checkpass(cpu, 1));
2968 	return (cpu->cpu_m.mcpu_cpi->cpi_coreid);
2969 }
2970 
2971 int
2972 cpuid_get_pkgcoreid(cpu_t *cpu)
2973 {
2974 	ASSERT(cpuid_checkpass(cpu, 1));
2975 	return (cpu->cpu_m.mcpu_cpi->cpi_pkgcoreid);
2976 }
2977 
2978 int
2979 cpuid_get_clogid(cpu_t *cpu)
2980 {
2981 	ASSERT(cpuid_checkpass(cpu, 1));
2982 	return (cpu->cpu_m.mcpu_cpi->cpi_clogid);
2983 }
2984 
2985 int
2986 cpuid_get_cacheid(cpu_t *cpu)
2987 {
2988 	ASSERT(cpuid_checkpass(cpu, 1));
2989 	return (cpu->cpu_m.mcpu_cpi->cpi_last_lvl_cacheid);
2990 }
2991 
2992 uint_t
2993 cpuid_get_procnodeid(cpu_t *cpu)
2994 {
2995 	ASSERT(cpuid_checkpass(cpu, 1));
2996 	return (cpu->cpu_m.mcpu_cpi->cpi_procnodeid);
2997 }
2998 
2999 uint_t
3000 cpuid_get_procnodes_per_pkg(cpu_t *cpu)
3001 {
3002 	ASSERT(cpuid_checkpass(cpu, 1));
3003 	return (cpu->cpu_m.mcpu_cpi->cpi_procnodes_per_pkg);
3004 }
3005 
3006 /*ARGSUSED*/
3007 int
3008 cpuid_have_cr8access(cpu_t *cpu)
3009 {
3010 #if defined(__amd64)
3011 	return (1);
3012 #else
3013 	struct cpuid_info *cpi;
3014 
3015 	ASSERT(cpu != NULL);
3016 	cpi = cpu->cpu_m.mcpu_cpi;
3017 	if (cpi->cpi_vendor == X86_VENDOR_AMD && cpi->cpi_maxeax >= 1 &&
3018 	    (CPI_FEATURES_XTD_ECX(cpi) & CPUID_AMD_ECX_CR8D) != 0)
3019 		return (1);
3020 	return (0);
3021 #endif
3022 }
3023 
3024 uint32_t
3025 cpuid_get_apicid(cpu_t *cpu)
3026 {
3027 	ASSERT(cpuid_checkpass(cpu, 1));
3028 	if (cpu->cpu_m.mcpu_cpi->cpi_maxeax < 1) {
3029 		return (UINT32_MAX);
3030 	} else {
3031 		return (cpu->cpu_m.mcpu_cpi->cpi_apicid);
3032 	}
3033 }
3034 
3035 void
3036 cpuid_get_addrsize(cpu_t *cpu, uint_t *pabits, uint_t *vabits)
3037 {
3038 	struct cpuid_info *cpi;
3039 
3040 	if (cpu == NULL)
3041 		cpu = CPU;
3042 	cpi = cpu->cpu_m.mcpu_cpi;
3043 
3044 	ASSERT(cpuid_checkpass(cpu, 1));
3045 
3046 	if (pabits)
3047 		*pabits = cpi->cpi_pabits;
3048 	if (vabits)
3049 		*vabits = cpi->cpi_vabits;
3050 }
3051 
3052 /*
3053  * Returns the number of data TLB entries for a corresponding
3054  * pagesize.  If it can't be computed, or isn't known, the
3055  * routine returns zero.  If you ask about an architecturally
3056  * impossible pagesize, the routine will panic (so that the
3057  * hat implementor knows that things are inconsistent.)
3058  */
3059 uint_t
3060 cpuid_get_dtlb_nent(cpu_t *cpu, size_t pagesize)
3061 {
3062 	struct cpuid_info *cpi;
3063 	uint_t dtlb_nent = 0;
3064 
3065 	if (cpu == NULL)
3066 		cpu = CPU;
3067 	cpi = cpu->cpu_m.mcpu_cpi;
3068 
3069 	ASSERT(cpuid_checkpass(cpu, 1));
3070 
3071 	/*
3072 	 * Check the L2 TLB info
3073 	 */
3074 	if (cpi->cpi_xmaxeax >= 0x80000006) {
3075 		struct cpuid_regs *cp = &cpi->cpi_extd[6];
3076 
3077 		switch (pagesize) {
3078 
3079 		case 4 * 1024:
3080 			/*
3081 			 * All zero in the top 16 bits of the register
3082 			 * indicates a unified TLB. Size is in low 16 bits.
3083 			 */
3084 			if ((cp->cp_ebx & 0xffff0000) == 0)
3085 				dtlb_nent = cp->cp_ebx & 0x0000ffff;
3086 			else
3087 				dtlb_nent = BITX(cp->cp_ebx, 27, 16);
3088 			break;
3089 
3090 		case 2 * 1024 * 1024:
3091 			if ((cp->cp_eax & 0xffff0000) == 0)
3092 				dtlb_nent = cp->cp_eax & 0x0000ffff;
3093 			else
3094 				dtlb_nent = BITX(cp->cp_eax, 27, 16);
3095 			break;
3096 
3097 		default:
3098 			panic("unknown L2 pagesize");
3099 			/*NOTREACHED*/
3100 		}
3101 	}
3102 
3103 	if (dtlb_nent != 0)
3104 		return (dtlb_nent);
3105 
3106 	/*
3107 	 * No L2 TLB support for this size, try L1.
3108 	 */
3109 	if (cpi->cpi_xmaxeax >= 0x80000005) {
3110 		struct cpuid_regs *cp = &cpi->cpi_extd[5];
3111 
3112 		switch (pagesize) {
3113 		case 4 * 1024:
3114 			dtlb_nent = BITX(cp->cp_ebx, 23, 16);
3115 			break;
3116 		case 2 * 1024 * 1024:
3117 			dtlb_nent = BITX(cp->cp_eax, 23, 16);
3118 			break;
3119 		default:
3120 			panic("unknown L1 d-TLB pagesize");
3121 			/*NOTREACHED*/
3122 		}
3123 	}
3124 
3125 	return (dtlb_nent);
3126 }
3127 
3128 /*
3129  * Return 0 if the erratum is not present or not applicable, positive
3130  * if it is, and negative if the status of the erratum is unknown.
3131  *
3132  * See "Revision Guide for AMD Athlon(tm) 64 and AMD Opteron(tm)
3133  * Processors" #25759, Rev 3.57, August 2005
3134  */
3135 int
3136 cpuid_opteron_erratum(cpu_t *cpu, uint_t erratum)
3137 {
3138 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
3139 	uint_t eax;
3140 
3141 	/*
3142 	 * Bail out if this CPU isn't an AMD CPU, or if it's
3143 	 * a legacy (32-bit) AMD CPU.
3144 	 */
3145 	if (cpi->cpi_vendor != X86_VENDOR_AMD ||
3146 	    cpi->cpi_family == 4 || cpi->cpi_family == 5 ||
3147 	    cpi->cpi_family == 6)
3148 
3149 		return (0);
3150 
3151 	eax = cpi->cpi_std[1].cp_eax;
3152 
3153 #define	SH_B0(eax)	(eax == 0xf40 || eax == 0xf50)
3154 #define	SH_B3(eax) 	(eax == 0xf51)
3155 #define	B(eax)		(SH_B0(eax) || SH_B3(eax))
3156 
3157 #define	SH_C0(eax)	(eax == 0xf48 || eax == 0xf58)
3158 
3159 #define	SH_CG(eax)	(eax == 0xf4a || eax == 0xf5a || eax == 0xf7a)
3160 #define	DH_CG(eax)	(eax == 0xfc0 || eax == 0xfe0 || eax == 0xff0)
3161 #define	CH_CG(eax)	(eax == 0xf82 || eax == 0xfb2)
3162 #define	CG(eax)		(SH_CG(eax) || DH_CG(eax) || CH_CG(eax))
3163 
3164 #define	SH_D0(eax)	(eax == 0x10f40 || eax == 0x10f50 || eax == 0x10f70)
3165 #define	DH_D0(eax)	(eax == 0x10fc0 || eax == 0x10ff0)
3166 #define	CH_D0(eax)	(eax == 0x10f80 || eax == 0x10fb0)
3167 #define	D0(eax)		(SH_D0(eax) || DH_D0(eax) || CH_D0(eax))
3168 
3169 #define	SH_E0(eax)	(eax == 0x20f50 || eax == 0x20f40 || eax == 0x20f70)
3170 #define	JH_E1(eax)	(eax == 0x20f10)	/* JH8_E0 had 0x20f30 */
3171 #define	DH_E3(eax)	(eax == 0x20fc0 || eax == 0x20ff0)
3172 #define	SH_E4(eax)	(eax == 0x20f51 || eax == 0x20f71)
3173 #define	BH_E4(eax)	(eax == 0x20fb1)
3174 #define	SH_E5(eax)	(eax == 0x20f42)
3175 #define	DH_E6(eax)	(eax == 0x20ff2 || eax == 0x20fc2)
3176 #define	JH_E6(eax)	(eax == 0x20f12 || eax == 0x20f32)
3177 #define	EX(eax)		(SH_E0(eax) || JH_E1(eax) || DH_E3(eax) || \
3178 			    SH_E4(eax) || BH_E4(eax) || SH_E5(eax) || \
3179 			    DH_E6(eax) || JH_E6(eax))
3180 
3181 #define	DR_AX(eax)	(eax == 0x100f00 || eax == 0x100f01 || eax == 0x100f02)
3182 #define	DR_B0(eax)	(eax == 0x100f20)
3183 #define	DR_B1(eax)	(eax == 0x100f21)
3184 #define	DR_BA(eax)	(eax == 0x100f2a)
3185 #define	DR_B2(eax)	(eax == 0x100f22)
3186 #define	DR_B3(eax)	(eax == 0x100f23)
3187 #define	RB_C0(eax)	(eax == 0x100f40)
3188 
3189 	switch (erratum) {
3190 	case 1:
3191 		return (cpi->cpi_family < 0x10);
3192 	case 51:	/* what does the asterisk mean? */
3193 		return (B(eax) || SH_C0(eax) || CG(eax));
3194 	case 52:
3195 		return (B(eax));
3196 	case 57:
3197 		return (cpi->cpi_family <= 0x11);
3198 	case 58:
3199 		return (B(eax));
3200 	case 60:
3201 		return (cpi->cpi_family <= 0x11);
3202 	case 61:
3203 	case 62:
3204 	case 63:
3205 	case 64:
3206 	case 65:
3207 	case 66:
3208 	case 68:
3209 	case 69:
3210 	case 70:
3211 	case 71:
3212 		return (B(eax));
3213 	case 72:
3214 		return (SH_B0(eax));
3215 	case 74:
3216 		return (B(eax));
3217 	case 75:
3218 		return (cpi->cpi_family < 0x10);
3219 	case 76:
3220 		return (B(eax));
3221 	case 77:
3222 		return (cpi->cpi_family <= 0x11);
3223 	case 78:
3224 		return (B(eax) || SH_C0(eax));
3225 	case 79:
3226 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3227 	case 80:
3228 	case 81:
3229 	case 82:
3230 		return (B(eax));
3231 	case 83:
3232 		return (B(eax) || SH_C0(eax) || CG(eax));
3233 	case 85:
3234 		return (cpi->cpi_family < 0x10);
3235 	case 86:
3236 		return (SH_C0(eax) || CG(eax));
3237 	case 88:
3238 #if !defined(__amd64)
3239 		return (0);
3240 #else
3241 		return (B(eax) || SH_C0(eax));
3242 #endif
3243 	case 89:
3244 		return (cpi->cpi_family < 0x10);
3245 	case 90:
3246 		return (B(eax) || SH_C0(eax) || CG(eax));
3247 	case 91:
3248 	case 92:
3249 		return (B(eax) || SH_C0(eax));
3250 	case 93:
3251 		return (SH_C0(eax));
3252 	case 94:
3253 		return (B(eax) || SH_C0(eax) || CG(eax));
3254 	case 95:
3255 #if !defined(__amd64)
3256 		return (0);
3257 #else
3258 		return (B(eax) || SH_C0(eax));
3259 #endif
3260 	case 96:
3261 		return (B(eax) || SH_C0(eax) || CG(eax));
3262 	case 97:
3263 	case 98:
3264 		return (SH_C0(eax) || CG(eax));
3265 	case 99:
3266 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3267 	case 100:
3268 		return (B(eax) || SH_C0(eax));
3269 	case 101:
3270 	case 103:
3271 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3272 	case 104:
3273 		return (SH_C0(eax) || CG(eax) || D0(eax));
3274 	case 105:
3275 	case 106:
3276 	case 107:
3277 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3278 	case 108:
3279 		return (DH_CG(eax));
3280 	case 109:
3281 		return (SH_C0(eax) || CG(eax) || D0(eax));
3282 	case 110:
3283 		return (D0(eax) || EX(eax));
3284 	case 111:
3285 		return (CG(eax));
3286 	case 112:
3287 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3288 	case 113:
3289 		return (eax == 0x20fc0);
3290 	case 114:
3291 		return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3292 	case 115:
3293 		return (SH_E0(eax) || JH_E1(eax));
3294 	case 116:
3295 		return (SH_E0(eax) || JH_E1(eax) || DH_E3(eax));
3296 	case 117:
3297 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax));
3298 	case 118:
3299 		return (SH_E0(eax) || JH_E1(eax) || SH_E4(eax) || BH_E4(eax) ||
3300 		    JH_E6(eax));
3301 	case 121:
3302 		return (B(eax) || SH_C0(eax) || CG(eax) || D0(eax) || EX(eax));
3303 	case 122:
3304 		return (cpi->cpi_family < 0x10 || cpi->cpi_family == 0x11);
3305 	case 123:
3306 		return (JH_E1(eax) || BH_E4(eax) || JH_E6(eax));
3307 	case 131:
3308 		return (cpi->cpi_family < 0x10);
3309 	case 6336786:
3310 		/*
3311 		 * Test for AdvPowerMgmtInfo.TscPStateInvariant
3312 		 * if this is a K8 family or newer processor
3313 		 */
3314 		if (CPI_FAMILY(cpi) == 0xf) {
3315 			struct cpuid_regs regs;
3316 			regs.cp_eax = 0x80000007;
3317 			(void) __cpuid_insn(&regs);
3318 			return (!(regs.cp_edx & 0x100));
3319 		}
3320 		return (0);
3321 	case 6323525:
3322 		return (((((eax >> 12) & 0xff00) + (eax & 0xf00)) |
3323 		    (((eax >> 4) & 0xf) | ((eax >> 12) & 0xf0))) < 0xf40);
3324 
3325 	case 6671130:
3326 		/*
3327 		 * check for processors (pre-Shanghai) that do not provide
3328 		 * optimal management of 1gb ptes in its tlb.
3329 		 */
3330 		return (cpi->cpi_family == 0x10 && cpi->cpi_model < 4);
3331 
3332 	case 298:
3333 		return (DR_AX(eax) || DR_B0(eax) || DR_B1(eax) || DR_BA(eax) ||
3334 		    DR_B2(eax) || RB_C0(eax));
3335 
3336 	default:
3337 		return (-1);
3338 
3339 	}
3340 }
3341 
3342 /*
3343  * Determine if specified erratum is present via OSVW (OS Visible Workaround).
3344  * Return 1 if erratum is present, 0 if not present and -1 if indeterminate.
3345  */
3346 int
3347 osvw_opteron_erratum(cpu_t *cpu, uint_t erratum)
3348 {
3349 	struct cpuid_info	*cpi;
3350 	uint_t			osvwid;
3351 	static int		osvwfeature = -1;
3352 	uint64_t		osvwlength;
3353 
3354 
3355 	cpi = cpu->cpu_m.mcpu_cpi;
3356 
3357 	/* confirm OSVW supported */
3358 	if (osvwfeature == -1) {
3359 		osvwfeature = cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW;
3360 	} else {
3361 		/* assert that osvw feature setting is consistent on all cpus */
3362 		ASSERT(osvwfeature ==
3363 		    (cpi->cpi_extd[1].cp_ecx & CPUID_AMD_ECX_OSVW));
3364 	}
3365 	if (!osvwfeature)
3366 		return (-1);
3367 
3368 	osvwlength = rdmsr(MSR_AMD_OSVW_ID_LEN) & OSVW_ID_LEN_MASK;
3369 
3370 	switch (erratum) {
3371 	case 298:	/* osvwid is 0 */
3372 		osvwid = 0;
3373 		if (osvwlength <= (uint64_t)osvwid) {
3374 			/* osvwid 0 is unknown */
3375 			return (-1);
3376 		}
3377 
3378 		/*
3379 		 * Check the OSVW STATUS MSR to determine the state
3380 		 * of the erratum where:
3381 		 *   0 - fixed by HW
3382 		 *   1 - BIOS has applied the workaround when BIOS
3383 		 *   workaround is available. (Or for other errata,
3384 		 *   OS workaround is required.)
3385 		 * For a value of 1, caller will confirm that the
3386 		 * erratum 298 workaround has indeed been applied by BIOS.
3387 		 *
3388 		 * A 1 may be set in cpus that have a HW fix
3389 		 * in a mixed cpu system. Regarding erratum 298:
3390 		 *   In a multiprocessor platform, the workaround above
3391 		 *   should be applied to all processors regardless of
3392 		 *   silicon revision when an affected processor is
3393 		 *   present.
3394 		 */
3395 
3396 		return (rdmsr(MSR_AMD_OSVW_STATUS +
3397 		    (osvwid / OSVW_ID_CNT_PER_MSR)) &
3398 		    (1ULL << (osvwid % OSVW_ID_CNT_PER_MSR)));
3399 
3400 	default:
3401 		return (-1);
3402 	}
3403 }
3404 
3405 static const char assoc_str[] = "associativity";
3406 static const char line_str[] = "line-size";
3407 static const char size_str[] = "size";
3408 
3409 static void
3410 add_cache_prop(dev_info_t *devi, const char *label, const char *type,
3411     uint32_t val)
3412 {
3413 	char buf[128];
3414 
3415 	/*
3416 	 * ndi_prop_update_int() is used because it is desirable for
3417 	 * DDI_PROP_HW_DEF and DDI_PROP_DONTSLEEP to be set.
3418 	 */
3419 	if (snprintf(buf, sizeof (buf), "%s-%s", label, type) < sizeof (buf))
3420 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, devi, buf, val);
3421 }
3422 
3423 /*
3424  * Intel-style cache/tlb description
3425  *
3426  * Standard cpuid level 2 gives a randomly ordered
3427  * selection of tags that index into a table that describes
3428  * cache and tlb properties.
3429  */
3430 
3431 static const char l1_icache_str[] = "l1-icache";
3432 static const char l1_dcache_str[] = "l1-dcache";
3433 static const char l2_cache_str[] = "l2-cache";
3434 static const char l3_cache_str[] = "l3-cache";
3435 static const char itlb4k_str[] = "itlb-4K";
3436 static const char dtlb4k_str[] = "dtlb-4K";
3437 static const char itlb2M_str[] = "itlb-2M";
3438 static const char itlb4M_str[] = "itlb-4M";
3439 static const char dtlb4M_str[] = "dtlb-4M";
3440 static const char dtlb24_str[] = "dtlb0-2M-4M";
3441 static const char itlb424_str[] = "itlb-4K-2M-4M";
3442 static const char itlb24_str[] = "itlb-2M-4M";
3443 static const char dtlb44_str[] = "dtlb-4K-4M";
3444 static const char sl1_dcache_str[] = "sectored-l1-dcache";
3445 static const char sl2_cache_str[] = "sectored-l2-cache";
3446 static const char itrace_str[] = "itrace-cache";
3447 static const char sl3_cache_str[] = "sectored-l3-cache";
3448 static const char sh_l2_tlb4k_str[] = "shared-l2-tlb-4k";
3449 
3450 static const struct cachetab {
3451 	uint8_t 	ct_code;
3452 	uint8_t		ct_assoc;
3453 	uint16_t 	ct_line_size;
3454 	size_t		ct_size;
3455 	const char	*ct_label;
3456 } intel_ctab[] = {
3457 	/*
3458 	 * maintain descending order!
3459 	 *
3460 	 * Codes ignored - Reason
3461 	 * ----------------------
3462 	 * 40H - intel_cpuid_4_cache_info() disambiguates l2/l3 cache
3463 	 * f0H/f1H - Currently we do not interpret prefetch size by design
3464 	 */
3465 	{ 0xe4, 16, 64, 8*1024*1024, l3_cache_str},
3466 	{ 0xe3, 16, 64, 4*1024*1024, l3_cache_str},
3467 	{ 0xe2, 16, 64, 2*1024*1024, l3_cache_str},
3468 	{ 0xde, 12, 64, 6*1024*1024, l3_cache_str},
3469 	{ 0xdd, 12, 64, 3*1024*1024, l3_cache_str},
3470 	{ 0xdc, 12, 64, ((1*1024*1024)+(512*1024)), l3_cache_str},
3471 	{ 0xd8, 8, 64, 4*1024*1024, l3_cache_str},
3472 	{ 0xd7, 8, 64, 2*1024*1024, l3_cache_str},
3473 	{ 0xd6, 8, 64, 1*1024*1024, l3_cache_str},
3474 	{ 0xd2, 4, 64, 2*1024*1024, l3_cache_str},
3475 	{ 0xd1, 4, 64, 1*1024*1024, l3_cache_str},
3476 	{ 0xd0, 4, 64, 512*1024, l3_cache_str},
3477 	{ 0xca, 4, 0, 512, sh_l2_tlb4k_str},
3478 	{ 0xc0, 4, 0, 8, dtlb44_str },
3479 	{ 0xba, 4, 0, 64, dtlb4k_str },
3480 	{ 0xb4, 4, 0, 256, dtlb4k_str },
3481 	{ 0xb3, 4, 0, 128, dtlb4k_str },
3482 	{ 0xb2, 4, 0, 64, itlb4k_str },
3483 	{ 0xb0, 4, 0, 128, itlb4k_str },
3484 	{ 0x87, 8, 64, 1024*1024, l2_cache_str},
3485 	{ 0x86, 4, 64, 512*1024, l2_cache_str},
3486 	{ 0x85, 8, 32, 2*1024*1024, l2_cache_str},
3487 	{ 0x84, 8, 32, 1024*1024, l2_cache_str},
3488 	{ 0x83, 8, 32, 512*1024, l2_cache_str},
3489 	{ 0x82, 8, 32, 256*1024, l2_cache_str},
3490 	{ 0x80, 8, 64, 512*1024, l2_cache_str},
3491 	{ 0x7f, 2, 64, 512*1024, l2_cache_str},
3492 	{ 0x7d, 8, 64, 2*1024*1024, sl2_cache_str},
3493 	{ 0x7c, 8, 64, 1024*1024, sl2_cache_str},
3494 	{ 0x7b, 8, 64, 512*1024, sl2_cache_str},
3495 	{ 0x7a, 8, 64, 256*1024, sl2_cache_str},
3496 	{ 0x79, 8, 64, 128*1024, sl2_cache_str},
3497 	{ 0x78, 8, 64, 1024*1024, l2_cache_str},
3498 	{ 0x73, 8, 0, 64*1024, itrace_str},
3499 	{ 0x72, 8, 0, 32*1024, itrace_str},
3500 	{ 0x71, 8, 0, 16*1024, itrace_str},
3501 	{ 0x70, 8, 0, 12*1024, itrace_str},
3502 	{ 0x68, 4, 64, 32*1024, sl1_dcache_str},
3503 	{ 0x67, 4, 64, 16*1024, sl1_dcache_str},
3504 	{ 0x66, 4, 64, 8*1024, sl1_dcache_str},
3505 	{ 0x60, 8, 64, 16*1024, sl1_dcache_str},
3506 	{ 0x5d, 0, 0, 256, dtlb44_str},
3507 	{ 0x5c, 0, 0, 128, dtlb44_str},
3508 	{ 0x5b, 0, 0, 64, dtlb44_str},
3509 	{ 0x5a, 4, 0, 32, dtlb24_str},
3510 	{ 0x59, 0, 0, 16, dtlb4k_str},
3511 	{ 0x57, 4, 0, 16, dtlb4k_str},
3512 	{ 0x56, 4, 0, 16, dtlb4M_str},
3513 	{ 0x55, 0, 0, 7, itlb24_str},
3514 	{ 0x52, 0, 0, 256, itlb424_str},
3515 	{ 0x51, 0, 0, 128, itlb424_str},
3516 	{ 0x50, 0, 0, 64, itlb424_str},
3517 	{ 0x4f, 0, 0, 32, itlb4k_str},
3518 	{ 0x4e, 24, 64, 6*1024*1024, l2_cache_str},
3519 	{ 0x4d, 16, 64, 16*1024*1024, l3_cache_str},
3520 	{ 0x4c, 12, 64, 12*1024*1024, l3_cache_str},
3521 	{ 0x4b, 16, 64, 8*1024*1024, l3_cache_str},
3522 	{ 0x4a, 12, 64, 6*1024*1024, l3_cache_str},
3523 	{ 0x49, 16, 64, 4*1024*1024, l3_cache_str},
3524 	{ 0x48, 12, 64, 3*1024*1024, l2_cache_str},
3525 	{ 0x47, 8, 64, 8*1024*1024, l3_cache_str},
3526 	{ 0x46, 4, 64, 4*1024*1024, l3_cache_str},
3527 	{ 0x45, 4, 32, 2*1024*1024, l2_cache_str},
3528 	{ 0x44, 4, 32, 1024*1024, l2_cache_str},
3529 	{ 0x43, 4, 32, 512*1024, l2_cache_str},
3530 	{ 0x42, 4, 32, 256*1024, l2_cache_str},
3531 	{ 0x41, 4, 32, 128*1024, l2_cache_str},
3532 	{ 0x3e, 4, 64, 512*1024, sl2_cache_str},
3533 	{ 0x3d, 6, 64, 384*1024, sl2_cache_str},
3534 	{ 0x3c, 4, 64, 256*1024, sl2_cache_str},
3535 	{ 0x3b, 2, 64, 128*1024, sl2_cache_str},
3536 	{ 0x3a, 6, 64, 192*1024, sl2_cache_str},
3537 	{ 0x39, 4, 64, 128*1024, sl2_cache_str},
3538 	{ 0x30, 8, 64, 32*1024, l1_icache_str},
3539 	{ 0x2c, 8, 64, 32*1024, l1_dcache_str},
3540 	{ 0x29, 8, 64, 4096*1024, sl3_cache_str},
3541 	{ 0x25, 8, 64, 2048*1024, sl3_cache_str},
3542 	{ 0x23, 8, 64, 1024*1024, sl3_cache_str},
3543 	{ 0x22, 4, 64, 512*1024, sl3_cache_str},
3544 	{ 0x0e, 6, 64, 24*1024, l1_dcache_str},
3545 	{ 0x0d, 4, 32, 16*1024, l1_dcache_str},
3546 	{ 0x0c, 4, 32, 16*1024, l1_dcache_str},
3547 	{ 0x0b, 4, 0, 4, itlb4M_str},
3548 	{ 0x0a, 2, 32, 8*1024, l1_dcache_str},
3549 	{ 0x08, 4, 32, 16*1024, l1_icache_str},
3550 	{ 0x06, 4, 32, 8*1024, l1_icache_str},
3551 	{ 0x05, 4, 0, 32, dtlb4M_str},
3552 	{ 0x04, 4, 0, 8, dtlb4M_str},
3553 	{ 0x03, 4, 0, 64, dtlb4k_str},
3554 	{ 0x02, 4, 0, 2, itlb4M_str},
3555 	{ 0x01, 4, 0, 32, itlb4k_str},
3556 	{ 0 }
3557 };
3558 
3559 static const struct cachetab cyrix_ctab[] = {
3560 	{ 0x70, 4, 0, 32, "tlb-4K" },
3561 	{ 0x80, 4, 16, 16*1024, "l1-cache" },
3562 	{ 0 }
3563 };
3564 
3565 /*
3566  * Search a cache table for a matching entry
3567  */
3568 static const struct cachetab *
3569 find_cacheent(const struct cachetab *ct, uint_t code)
3570 {
3571 	if (code != 0) {
3572 		for (; ct->ct_code != 0; ct++)
3573 			if (ct->ct_code <= code)
3574 				break;
3575 		if (ct->ct_code == code)
3576 			return (ct);
3577 	}
3578 	return (NULL);
3579 }
3580 
3581 /*
3582  * Populate cachetab entry with L2 or L3 cache-information using
3583  * cpuid function 4. This function is called from intel_walk_cacheinfo()
3584  * when descriptor 0x49 is encountered. It returns 0 if no such cache
3585  * information is found.
3586  */
3587 static int
3588 intel_cpuid_4_cache_info(struct cachetab *ct, struct cpuid_info *cpi)
3589 {
3590 	uint32_t level, i;
3591 	int ret = 0;
3592 
3593 	for (i = 0; i < cpi->cpi_std_4_size; i++) {
3594 		level = CPI_CACHE_LVL(cpi->cpi_std_4[i]);
3595 
3596 		if (level == 2 || level == 3) {
3597 			ct->ct_assoc = CPI_CACHE_WAYS(cpi->cpi_std_4[i]) + 1;
3598 			ct->ct_line_size =
3599 			    CPI_CACHE_COH_LN_SZ(cpi->cpi_std_4[i]) + 1;
3600 			ct->ct_size = ct->ct_assoc *
3601 			    (CPI_CACHE_PARTS(cpi->cpi_std_4[i]) + 1) *
3602 			    ct->ct_line_size *
3603 			    (cpi->cpi_std_4[i]->cp_ecx + 1);
3604 
3605 			if (level == 2) {
3606 				ct->ct_label = l2_cache_str;
3607 			} else if (level == 3) {
3608 				ct->ct_label = l3_cache_str;
3609 			}
3610 			ret = 1;
3611 		}
3612 	}
3613 
3614 	return (ret);
3615 }
3616 
3617 /*
3618  * Walk the cacheinfo descriptor, applying 'func' to every valid element
3619  * The walk is terminated if the walker returns non-zero.
3620  */
3621 static void
3622 intel_walk_cacheinfo(struct cpuid_info *cpi,
3623     void *arg, int (*func)(void *, const struct cachetab *))
3624 {
3625 	const struct cachetab *ct;
3626 	struct cachetab des_49_ct, des_b1_ct;
3627 	uint8_t *dp;
3628 	int i;
3629 
3630 	if ((dp = cpi->cpi_cacheinfo) == NULL)
3631 		return;
3632 	for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3633 		/*
3634 		 * For overloaded descriptor 0x49 we use cpuid function 4
3635 		 * if supported by the current processor, to create
3636 		 * cache information.
3637 		 * For overloaded descriptor 0xb1 we use X86_PAE flag
3638 		 * to disambiguate the cache information.
3639 		 */
3640 		if (*dp == 0x49 && cpi->cpi_maxeax >= 0x4 &&
3641 		    intel_cpuid_4_cache_info(&des_49_ct, cpi) == 1) {
3642 				ct = &des_49_ct;
3643 		} else if (*dp == 0xb1) {
3644 			des_b1_ct.ct_code = 0xb1;
3645 			des_b1_ct.ct_assoc = 4;
3646 			des_b1_ct.ct_line_size = 0;
3647 			if (is_x86_feature(x86_featureset, X86FSET_PAE)) {
3648 				des_b1_ct.ct_size = 8;
3649 				des_b1_ct.ct_label = itlb2M_str;
3650 			} else {
3651 				des_b1_ct.ct_size = 4;
3652 				des_b1_ct.ct_label = itlb4M_str;
3653 			}
3654 			ct = &des_b1_ct;
3655 		} else {
3656 			if ((ct = find_cacheent(intel_ctab, *dp)) == NULL) {
3657 				continue;
3658 			}
3659 		}
3660 
3661 		if (func(arg, ct) != 0) {
3662 			break;
3663 		}
3664 	}
3665 }
3666 
3667 /*
3668  * (Like the Intel one, except for Cyrix CPUs)
3669  */
3670 static void
3671 cyrix_walk_cacheinfo(struct cpuid_info *cpi,
3672     void *arg, int (*func)(void *, const struct cachetab *))
3673 {
3674 	const struct cachetab *ct;
3675 	uint8_t *dp;
3676 	int i;
3677 
3678 	if ((dp = cpi->cpi_cacheinfo) == NULL)
3679 		return;
3680 	for (i = 0; i < cpi->cpi_ncache; i++, dp++) {
3681 		/*
3682 		 * Search Cyrix-specific descriptor table first ..
3683 		 */
3684 		if ((ct = find_cacheent(cyrix_ctab, *dp)) != NULL) {
3685 			if (func(arg, ct) != 0)
3686 				break;
3687 			continue;
3688 		}
3689 		/*
3690 		 * .. else fall back to the Intel one
3691 		 */
3692 		if ((ct = find_cacheent(intel_ctab, *dp)) != NULL) {
3693 			if (func(arg, ct) != 0)
3694 				break;
3695 			continue;
3696 		}
3697 	}
3698 }
3699 
3700 /*
3701  * A cacheinfo walker that adds associativity, line-size, and size properties
3702  * to the devinfo node it is passed as an argument.
3703  */
3704 static int
3705 add_cacheent_props(void *arg, const struct cachetab *ct)
3706 {
3707 	dev_info_t *devi = arg;
3708 
3709 	add_cache_prop(devi, ct->ct_label, assoc_str, ct->ct_assoc);
3710 	if (ct->ct_line_size != 0)
3711 		add_cache_prop(devi, ct->ct_label, line_str,
3712 		    ct->ct_line_size);
3713 	add_cache_prop(devi, ct->ct_label, size_str, ct->ct_size);
3714 	return (0);
3715 }
3716 
3717 
3718 static const char fully_assoc[] = "fully-associative?";
3719 
3720 /*
3721  * AMD style cache/tlb description
3722  *
3723  * Extended functions 5 and 6 directly describe properties of
3724  * tlbs and various cache levels.
3725  */
3726 static void
3727 add_amd_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3728 {
3729 	switch (assoc) {
3730 	case 0:	/* reserved; ignore */
3731 		break;
3732 	default:
3733 		add_cache_prop(devi, label, assoc_str, assoc);
3734 		break;
3735 	case 0xff:
3736 		add_cache_prop(devi, label, fully_assoc, 1);
3737 		break;
3738 	}
3739 }
3740 
3741 static void
3742 add_amd_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3743 {
3744 	if (size == 0)
3745 		return;
3746 	add_cache_prop(devi, label, size_str, size);
3747 	add_amd_assoc(devi, label, assoc);
3748 }
3749 
3750 static void
3751 add_amd_cache(dev_info_t *devi, const char *label,
3752     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3753 {
3754 	if (size == 0 || line_size == 0)
3755 		return;
3756 	add_amd_assoc(devi, label, assoc);
3757 	/*
3758 	 * Most AMD parts have a sectored cache. Multiple cache lines are
3759 	 * associated with each tag. A sector consists of all cache lines
3760 	 * associated with a tag. For example, the AMD K6-III has a sector
3761 	 * size of 2 cache lines per tag.
3762 	 */
3763 	if (lines_per_tag != 0)
3764 		add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3765 	add_cache_prop(devi, label, line_str, line_size);
3766 	add_cache_prop(devi, label, size_str, size * 1024);
3767 }
3768 
3769 static void
3770 add_amd_l2_assoc(dev_info_t *devi, const char *label, uint_t assoc)
3771 {
3772 	switch (assoc) {
3773 	case 0:	/* off */
3774 		break;
3775 	case 1:
3776 	case 2:
3777 	case 4:
3778 		add_cache_prop(devi, label, assoc_str, assoc);
3779 		break;
3780 	case 6:
3781 		add_cache_prop(devi, label, assoc_str, 8);
3782 		break;
3783 	case 8:
3784 		add_cache_prop(devi, label, assoc_str, 16);
3785 		break;
3786 	case 0xf:
3787 		add_cache_prop(devi, label, fully_assoc, 1);
3788 		break;
3789 	default: /* reserved; ignore */
3790 		break;
3791 	}
3792 }
3793 
3794 static void
3795 add_amd_l2_tlb(dev_info_t *devi, const char *label, uint_t assoc, uint_t size)
3796 {
3797 	if (size == 0 || assoc == 0)
3798 		return;
3799 	add_amd_l2_assoc(devi, label, assoc);
3800 	add_cache_prop(devi, label, size_str, size);
3801 }
3802 
3803 static void
3804 add_amd_l2_cache(dev_info_t *devi, const char *label,
3805     uint_t size, uint_t assoc, uint_t lines_per_tag, uint_t line_size)
3806 {
3807 	if (size == 0 || assoc == 0 || line_size == 0)
3808 		return;
3809 	add_amd_l2_assoc(devi, label, assoc);
3810 	if (lines_per_tag != 0)
3811 		add_cache_prop(devi, label, "lines-per-tag", lines_per_tag);
3812 	add_cache_prop(devi, label, line_str, line_size);
3813 	add_cache_prop(devi, label, size_str, size * 1024);
3814 }
3815 
3816 static void
3817 amd_cache_info(struct cpuid_info *cpi, dev_info_t *devi)
3818 {
3819 	struct cpuid_regs *cp;
3820 
3821 	if (cpi->cpi_xmaxeax < 0x80000005)
3822 		return;
3823 	cp = &cpi->cpi_extd[5];
3824 
3825 	/*
3826 	 * 4M/2M L1 TLB configuration
3827 	 *
3828 	 * We report the size for 2M pages because AMD uses two
3829 	 * TLB entries for one 4M page.
3830 	 */
3831 	add_amd_tlb(devi, "dtlb-2M",
3832 	    BITX(cp->cp_eax, 31, 24), BITX(cp->cp_eax, 23, 16));
3833 	add_amd_tlb(devi, "itlb-2M",
3834 	    BITX(cp->cp_eax, 15, 8), BITX(cp->cp_eax, 7, 0));
3835 
3836 	/*
3837 	 * 4K L1 TLB configuration
3838 	 */
3839 
3840 	switch (cpi->cpi_vendor) {
3841 		uint_t nentries;
3842 	case X86_VENDOR_TM:
3843 		if (cpi->cpi_family >= 5) {
3844 			/*
3845 			 * Crusoe processors have 256 TLB entries, but
3846 			 * cpuid data format constrains them to only
3847 			 * reporting 255 of them.
3848 			 */
3849 			if ((nentries = BITX(cp->cp_ebx, 23, 16)) == 255)
3850 				nentries = 256;
3851 			/*
3852 			 * Crusoe processors also have a unified TLB
3853 			 */
3854 			add_amd_tlb(devi, "tlb-4K", BITX(cp->cp_ebx, 31, 24),
3855 			    nentries);
3856 			break;
3857 		}
3858 		/*FALLTHROUGH*/
3859 	default:
3860 		add_amd_tlb(devi, itlb4k_str,
3861 		    BITX(cp->cp_ebx, 31, 24), BITX(cp->cp_ebx, 23, 16));
3862 		add_amd_tlb(devi, dtlb4k_str,
3863 		    BITX(cp->cp_ebx, 15, 8), BITX(cp->cp_ebx, 7, 0));
3864 		break;
3865 	}
3866 
3867 	/*
3868 	 * data L1 cache configuration
3869 	 */
3870 
3871 	add_amd_cache(devi, l1_dcache_str,
3872 	    BITX(cp->cp_ecx, 31, 24), BITX(cp->cp_ecx, 23, 16),
3873 	    BITX(cp->cp_ecx, 15, 8), BITX(cp->cp_ecx, 7, 0));
3874 
3875 	/*
3876 	 * code L1 cache configuration
3877 	 */
3878 
3879 	add_amd_cache(devi, l1_icache_str,
3880 	    BITX(cp->cp_edx, 31, 24), BITX(cp->cp_edx, 23, 16),
3881 	    BITX(cp->cp_edx, 15, 8), BITX(cp->cp_edx, 7, 0));
3882 
3883 	if (cpi->cpi_xmaxeax < 0x80000006)
3884 		return;
3885 	cp = &cpi->cpi_extd[6];
3886 
3887 	/* Check for a unified L2 TLB for large pages */
3888 
3889 	if (BITX(cp->cp_eax, 31, 16) == 0)
3890 		add_amd_l2_tlb(devi, "l2-tlb-2M",
3891 		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3892 	else {
3893 		add_amd_l2_tlb(devi, "l2-dtlb-2M",
3894 		    BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
3895 		add_amd_l2_tlb(devi, "l2-itlb-2M",
3896 		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3897 	}
3898 
3899 	/* Check for a unified L2 TLB for 4K pages */
3900 
3901 	if (BITX(cp->cp_ebx, 31, 16) == 0) {
3902 		add_amd_l2_tlb(devi, "l2-tlb-4K",
3903 		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3904 	} else {
3905 		add_amd_l2_tlb(devi, "l2-dtlb-4K",
3906 		    BITX(cp->cp_eax, 31, 28), BITX(cp->cp_eax, 27, 16));
3907 		add_amd_l2_tlb(devi, "l2-itlb-4K",
3908 		    BITX(cp->cp_eax, 15, 12), BITX(cp->cp_eax, 11, 0));
3909 	}
3910 
3911 	add_amd_l2_cache(devi, l2_cache_str,
3912 	    BITX(cp->cp_ecx, 31, 16), BITX(cp->cp_ecx, 15, 12),
3913 	    BITX(cp->cp_ecx, 11, 8), BITX(cp->cp_ecx, 7, 0));
3914 }
3915 
3916 /*
3917  * There are two basic ways that the x86 world describes it cache
3918  * and tlb architecture - Intel's way and AMD's way.
3919  *
3920  * Return which flavor of cache architecture we should use
3921  */
3922 static int
3923 x86_which_cacheinfo(struct cpuid_info *cpi)
3924 {
3925 	switch (cpi->cpi_vendor) {
3926 	case X86_VENDOR_Intel:
3927 		if (cpi->cpi_maxeax >= 2)
3928 			return (X86_VENDOR_Intel);
3929 		break;
3930 	case X86_VENDOR_AMD:
3931 		/*
3932 		 * The K5 model 1 was the first part from AMD that reported
3933 		 * cache sizes via extended cpuid functions.
3934 		 */
3935 		if (cpi->cpi_family > 5 ||
3936 		    (cpi->cpi_family == 5 && cpi->cpi_model >= 1))
3937 			return (X86_VENDOR_AMD);
3938 		break;
3939 	case X86_VENDOR_TM:
3940 		if (cpi->cpi_family >= 5)
3941 			return (X86_VENDOR_AMD);
3942 		/*FALLTHROUGH*/
3943 	default:
3944 		/*
3945 		 * If they have extended CPU data for 0x80000005
3946 		 * then we assume they have AMD-format cache
3947 		 * information.
3948 		 *
3949 		 * If not, and the vendor happens to be Cyrix,
3950 		 * then try our-Cyrix specific handler.
3951 		 *
3952 		 * If we're not Cyrix, then assume we're using Intel's
3953 		 * table-driven format instead.
3954 		 */
3955 		if (cpi->cpi_xmaxeax >= 0x80000005)
3956 			return (X86_VENDOR_AMD);
3957 		else if (cpi->cpi_vendor == X86_VENDOR_Cyrix)
3958 			return (X86_VENDOR_Cyrix);
3959 		else if (cpi->cpi_maxeax >= 2)
3960 			return (X86_VENDOR_Intel);
3961 		break;
3962 	}
3963 	return (-1);
3964 }
3965 
3966 void
3967 cpuid_set_cpu_properties(void *dip, processorid_t cpu_id,
3968     struct cpuid_info *cpi)
3969 {
3970 	dev_info_t *cpu_devi;
3971 	int create;
3972 
3973 	cpu_devi = (dev_info_t *)dip;
3974 
3975 	/* device_type */
3976 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
3977 	    "device_type", "cpu");
3978 
3979 	/* reg */
3980 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3981 	    "reg", cpu_id);
3982 
3983 	/* cpu-mhz, and clock-frequency */
3984 	if (cpu_freq > 0) {
3985 		long long mul;
3986 
3987 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3988 		    "cpu-mhz", cpu_freq);
3989 		if ((mul = cpu_freq * 1000000LL) <= INT_MAX)
3990 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
3991 			    "clock-frequency", (int)mul);
3992 	}
3993 
3994 	if (!is_x86_feature(x86_featureset, X86FSET_CPUID)) {
3995 		return;
3996 	}
3997 
3998 	/* vendor-id */
3999 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4000 	    "vendor-id", cpi->cpi_vendorstr);
4001 
4002 	if (cpi->cpi_maxeax == 0) {
4003 		return;
4004 	}
4005 
4006 	/*
4007 	 * family, model, and step
4008 	 */
4009 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4010 	    "family", CPI_FAMILY(cpi));
4011 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4012 	    "cpu-model", CPI_MODEL(cpi));
4013 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4014 	    "stepping-id", CPI_STEP(cpi));
4015 
4016 	/* type */
4017 	switch (cpi->cpi_vendor) {
4018 	case X86_VENDOR_Intel:
4019 		create = 1;
4020 		break;
4021 	default:
4022 		create = 0;
4023 		break;
4024 	}
4025 	if (create)
4026 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4027 		    "type", CPI_TYPE(cpi));
4028 
4029 	/* ext-family */
4030 	switch (cpi->cpi_vendor) {
4031 	case X86_VENDOR_Intel:
4032 	case X86_VENDOR_AMD:
4033 		create = cpi->cpi_family >= 0xf;
4034 		break;
4035 	default:
4036 		create = 0;
4037 		break;
4038 	}
4039 	if (create)
4040 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4041 		    "ext-family", CPI_FAMILY_XTD(cpi));
4042 
4043 	/* ext-model */
4044 	switch (cpi->cpi_vendor) {
4045 	case X86_VENDOR_Intel:
4046 		create = IS_EXTENDED_MODEL_INTEL(cpi);
4047 		break;
4048 	case X86_VENDOR_AMD:
4049 		create = CPI_FAMILY(cpi) == 0xf;
4050 		break;
4051 	default:
4052 		create = 0;
4053 		break;
4054 	}
4055 	if (create)
4056 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4057 		    "ext-model", CPI_MODEL_XTD(cpi));
4058 
4059 	/* generation */
4060 	switch (cpi->cpi_vendor) {
4061 	case X86_VENDOR_AMD:
4062 		/*
4063 		 * AMD K5 model 1 was the first part to support this
4064 		 */
4065 		create = cpi->cpi_xmaxeax >= 0x80000001;
4066 		break;
4067 	default:
4068 		create = 0;
4069 		break;
4070 	}
4071 	if (create)
4072 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4073 		    "generation", BITX((cpi)->cpi_extd[1].cp_eax, 11, 8));
4074 
4075 	/* brand-id */
4076 	switch (cpi->cpi_vendor) {
4077 	case X86_VENDOR_Intel:
4078 		/*
4079 		 * brand id first appeared on Pentium III Xeon model 8,
4080 		 * and Celeron model 8 processors and Opteron
4081 		 */
4082 		create = cpi->cpi_family > 6 ||
4083 		    (cpi->cpi_family == 6 && cpi->cpi_model >= 8);
4084 		break;
4085 	case X86_VENDOR_AMD:
4086 		create = cpi->cpi_family >= 0xf;
4087 		break;
4088 	default:
4089 		create = 0;
4090 		break;
4091 	}
4092 	if (create && cpi->cpi_brandid != 0) {
4093 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4094 		    "brand-id", cpi->cpi_brandid);
4095 	}
4096 
4097 	/* chunks, and apic-id */
4098 	switch (cpi->cpi_vendor) {
4099 		/*
4100 		 * first available on Pentium IV and Opteron (K8)
4101 		 */
4102 	case X86_VENDOR_Intel:
4103 		create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4104 		break;
4105 	case X86_VENDOR_AMD:
4106 		create = cpi->cpi_family >= 0xf;
4107 		break;
4108 	default:
4109 		create = 0;
4110 		break;
4111 	}
4112 	if (create) {
4113 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4114 		    "chunks", CPI_CHUNKS(cpi));
4115 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4116 		    "apic-id", cpi->cpi_apicid);
4117 		if (cpi->cpi_chipid >= 0) {
4118 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4119 			    "chip#", cpi->cpi_chipid);
4120 			(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4121 			    "clog#", cpi->cpi_clogid);
4122 		}
4123 	}
4124 
4125 	/* cpuid-features */
4126 	(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4127 	    "cpuid-features", CPI_FEATURES_EDX(cpi));
4128 
4129 
4130 	/* cpuid-features-ecx */
4131 	switch (cpi->cpi_vendor) {
4132 	case X86_VENDOR_Intel:
4133 		create = IS_NEW_F6(cpi) || cpi->cpi_family >= 0xf;
4134 		break;
4135 	default:
4136 		create = 0;
4137 		break;
4138 	}
4139 	if (create)
4140 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4141 		    "cpuid-features-ecx", CPI_FEATURES_ECX(cpi));
4142 
4143 	/* ext-cpuid-features */
4144 	switch (cpi->cpi_vendor) {
4145 	case X86_VENDOR_Intel:
4146 	case X86_VENDOR_AMD:
4147 	case X86_VENDOR_Cyrix:
4148 	case X86_VENDOR_TM:
4149 	case X86_VENDOR_Centaur:
4150 		create = cpi->cpi_xmaxeax >= 0x80000001;
4151 		break;
4152 	default:
4153 		create = 0;
4154 		break;
4155 	}
4156 	if (create) {
4157 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4158 		    "ext-cpuid-features", CPI_FEATURES_XTD_EDX(cpi));
4159 		(void) ndi_prop_update_int(DDI_DEV_T_NONE, cpu_devi,
4160 		    "ext-cpuid-features-ecx", CPI_FEATURES_XTD_ECX(cpi));
4161 	}
4162 
4163 	/*
4164 	 * Brand String first appeared in Intel Pentium IV, AMD K5
4165 	 * model 1, and Cyrix GXm.  On earlier models we try and
4166 	 * simulate something similar .. so this string should always
4167 	 * same -something- about the processor, however lame.
4168 	 */
4169 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, cpu_devi,
4170 	    "brand-string", cpi->cpi_brandstr);
4171 
4172 	/*
4173 	 * Finally, cache and tlb information
4174 	 */
4175 	switch (x86_which_cacheinfo(cpi)) {
4176 	case X86_VENDOR_Intel:
4177 		intel_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4178 		break;
4179 	case X86_VENDOR_Cyrix:
4180 		cyrix_walk_cacheinfo(cpi, cpu_devi, add_cacheent_props);
4181 		break;
4182 	case X86_VENDOR_AMD:
4183 		amd_cache_info(cpi, cpu_devi);
4184 		break;
4185 	default:
4186 		break;
4187 	}
4188 }
4189 
4190 struct l2info {
4191 	int *l2i_csz;
4192 	int *l2i_lsz;
4193 	int *l2i_assoc;
4194 	int l2i_ret;
4195 };
4196 
4197 /*
4198  * A cacheinfo walker that fetches the size, line-size and associativity
4199  * of the L2 cache
4200  */
4201 static int
4202 intel_l2cinfo(void *arg, const struct cachetab *ct)
4203 {
4204 	struct l2info *l2i = arg;
4205 	int *ip;
4206 
4207 	if (ct->ct_label != l2_cache_str &&
4208 	    ct->ct_label != sl2_cache_str)
4209 		return (0);	/* not an L2 -- keep walking */
4210 
4211 	if ((ip = l2i->l2i_csz) != NULL)
4212 		*ip = ct->ct_size;
4213 	if ((ip = l2i->l2i_lsz) != NULL)
4214 		*ip = ct->ct_line_size;
4215 	if ((ip = l2i->l2i_assoc) != NULL)
4216 		*ip = ct->ct_assoc;
4217 	l2i->l2i_ret = ct->ct_size;
4218 	return (1);		/* was an L2 -- terminate walk */
4219 }
4220 
4221 /*
4222  * AMD L2/L3 Cache and TLB Associativity Field Definition:
4223  *
4224  *	Unlike the associativity for the L1 cache and tlb where the 8 bit
4225  *	value is the associativity, the associativity for the L2 cache and
4226  *	tlb is encoded in the following table. The 4 bit L2 value serves as
4227  *	an index into the amd_afd[] array to determine the associativity.
4228  *	-1 is undefined. 0 is fully associative.
4229  */
4230 
4231 static int amd_afd[] =
4232 	{-1, 1, 2, -1, 4, -1, 8, -1, 16, -1, 32, 48, 64, 96, 128, 0};
4233 
4234 static void
4235 amd_l2cacheinfo(struct cpuid_info *cpi, struct l2info *l2i)
4236 {
4237 	struct cpuid_regs *cp;
4238 	uint_t size, assoc;
4239 	int i;
4240 	int *ip;
4241 
4242 	if (cpi->cpi_xmaxeax < 0x80000006)
4243 		return;
4244 	cp = &cpi->cpi_extd[6];
4245 
4246 	if ((i = BITX(cp->cp_ecx, 15, 12)) != 0 &&
4247 	    (size = BITX(cp->cp_ecx, 31, 16)) != 0) {
4248 		uint_t cachesz = size * 1024;
4249 		assoc = amd_afd[i];
4250 
4251 		ASSERT(assoc != -1);
4252 
4253 		if ((ip = l2i->l2i_csz) != NULL)
4254 			*ip = cachesz;
4255 		if ((ip = l2i->l2i_lsz) != NULL)
4256 			*ip = BITX(cp->cp_ecx, 7, 0);
4257 		if ((ip = l2i->l2i_assoc) != NULL)
4258 			*ip = assoc;
4259 		l2i->l2i_ret = cachesz;
4260 	}
4261 }
4262 
4263 int
4264 getl2cacheinfo(cpu_t *cpu, int *csz, int *lsz, int *assoc)
4265 {
4266 	struct cpuid_info *cpi = cpu->cpu_m.mcpu_cpi;
4267 	struct l2info __l2info, *l2i = &__l2info;
4268 
4269 	l2i->l2i_csz = csz;
4270 	l2i->l2i_lsz = lsz;
4271 	l2i->l2i_assoc = assoc;
4272 	l2i->l2i_ret = -1;
4273 
4274 	switch (x86_which_cacheinfo(cpi)) {
4275 	case X86_VENDOR_Intel:
4276 		intel_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4277 		break;
4278 	case X86_VENDOR_Cyrix:
4279 		cyrix_walk_cacheinfo(cpi, l2i, intel_l2cinfo);
4280 		break;
4281 	case X86_VENDOR_AMD:
4282 		amd_l2cacheinfo(cpi, l2i);
4283 		break;
4284 	default:
4285 		break;
4286 	}
4287 	return (l2i->l2i_ret);
4288 }
4289 
4290 #if !defined(__xpv)
4291 
4292 uint32_t *
4293 cpuid_mwait_alloc(cpu_t *cpu)
4294 {
4295 	uint32_t	*ret;
4296 	size_t		mwait_size;
4297 
4298 	ASSERT(cpuid_checkpass(CPU, 2));
4299 
4300 	mwait_size = CPU->cpu_m.mcpu_cpi->cpi_mwait.mon_max;
4301 	if (mwait_size == 0)
4302 		return (NULL);
4303 
4304 	/*
4305 	 * kmem_alloc() returns cache line size aligned data for mwait_size
4306 	 * allocations.  mwait_size is currently cache line sized.  Neither
4307 	 * of these implementation details are guarantied to be true in the
4308 	 * future.
4309 	 *
4310 	 * First try allocating mwait_size as kmem_alloc() currently returns
4311 	 * correctly aligned memory.  If kmem_alloc() does not return
4312 	 * mwait_size aligned memory, then use mwait_size ROUNDUP.
4313 	 *
4314 	 * Set cpi_mwait.buf_actual and cpi_mwait.size_actual in case we
4315 	 * decide to free this memory.
4316 	 */
4317 	ret = kmem_zalloc(mwait_size, KM_SLEEP);
4318 	if (ret == (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size)) {
4319 		cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4320 		cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size;
4321 		*ret = MWAIT_RUNNING;
4322 		return (ret);
4323 	} else {
4324 		kmem_free(ret, mwait_size);
4325 		ret = kmem_zalloc(mwait_size * 2, KM_SLEEP);
4326 		cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = ret;
4327 		cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = mwait_size * 2;
4328 		ret = (uint32_t *)P2ROUNDUP((uintptr_t)ret, mwait_size);
4329 		*ret = MWAIT_RUNNING;
4330 		return (ret);
4331 	}
4332 }
4333 
4334 void
4335 cpuid_mwait_free(cpu_t *cpu)
4336 {
4337 	if (cpu->cpu_m.mcpu_cpi == NULL) {
4338 		return;
4339 	}
4340 
4341 	if (cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual != NULL &&
4342 	    cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual > 0) {
4343 		kmem_free(cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual,
4344 		    cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual);
4345 	}
4346 
4347 	cpu->cpu_m.mcpu_cpi->cpi_mwait.buf_actual = NULL;
4348 	cpu->cpu_m.mcpu_cpi->cpi_mwait.size_actual = 0;
4349 }
4350 
4351 void
4352 patch_tsc_read(int flag)
4353 {
4354 	size_t cnt;
4355 
4356 	switch (flag) {
4357 	case X86_NO_TSC:
4358 		cnt = &_no_rdtsc_end - &_no_rdtsc_start;
4359 		(void) memcpy((void *)tsc_read, (void *)&_no_rdtsc_start, cnt);
4360 		break;
4361 	case X86_HAVE_TSCP:
4362 		cnt = &_tscp_end - &_tscp_start;
4363 		(void) memcpy((void *)tsc_read, (void *)&_tscp_start, cnt);
4364 		break;
4365 	case X86_TSC_MFENCE:
4366 		cnt = &_tsc_mfence_end - &_tsc_mfence_start;
4367 		(void) memcpy((void *)tsc_read,
4368 		    (void *)&_tsc_mfence_start, cnt);
4369 		break;
4370 	case X86_TSC_LFENCE:
4371 		cnt = &_tsc_lfence_end - &_tsc_lfence_start;
4372 		(void) memcpy((void *)tsc_read,
4373 		    (void *)&_tsc_lfence_start, cnt);
4374 		break;
4375 	default:
4376 		break;
4377 	}
4378 }
4379 
4380 int
4381 cpuid_deep_cstates_supported(void)
4382 {
4383 	struct cpuid_info *cpi;
4384 	struct cpuid_regs regs;
4385 
4386 	ASSERT(cpuid_checkpass(CPU, 1));
4387 
4388 	cpi = CPU->cpu_m.mcpu_cpi;
4389 
4390 	if (!is_x86_feature(x86_featureset, X86FSET_CPUID))
4391 		return (0);
4392 
4393 	switch (cpi->cpi_vendor) {
4394 	case X86_VENDOR_Intel:
4395 		if (cpi->cpi_xmaxeax < 0x80000007)
4396 			return (0);
4397 
4398 		/*
4399 		 * TSC run at a constant rate in all ACPI C-states?
4400 		 */
4401 		regs.cp_eax = 0x80000007;
4402 		(void) __cpuid_insn(&regs);
4403 		return (regs.cp_edx & CPUID_TSC_CSTATE_INVARIANCE);
4404 
4405 	default:
4406 		return (0);
4407 	}
4408 }
4409 
4410 #endif	/* !__xpv */
4411 
4412 void
4413 post_startup_cpu_fixups(void)
4414 {
4415 #ifndef __xpv
4416 	/*
4417 	 * Some AMD processors support C1E state. Entering this state will
4418 	 * cause the local APIC timer to stop, which we can't deal with at
4419 	 * this time.
4420 	 */
4421 	if (cpuid_getvendor(CPU) == X86_VENDOR_AMD) {
4422 		on_trap_data_t otd;
4423 		uint64_t reg;
4424 
4425 		if (!on_trap(&otd, OT_DATA_ACCESS)) {
4426 			reg = rdmsr(MSR_AMD_INT_PENDING_CMP_HALT);
4427 			/* Disable C1E state if it is enabled by BIOS */
4428 			if ((reg >> AMD_ACTONCMPHALT_SHIFT) &
4429 			    AMD_ACTONCMPHALT_MASK) {
4430 				reg &= ~(AMD_ACTONCMPHALT_MASK <<
4431 				    AMD_ACTONCMPHALT_SHIFT);
4432 				wrmsr(MSR_AMD_INT_PENDING_CMP_HALT, reg);
4433 			}
4434 		}
4435 		no_trap();
4436 	}
4437 #endif	/* !__xpv */
4438 }
4439 
4440 /*
4441  * Setup necessary registers to enable XSAVE feature on this processor.
4442  * This function needs to be called early enough, so that no xsave/xrstor
4443  * ops will execute on the processor before the MSRs are properly set up.
4444  *
4445  * Current implementation has the following assumption:
4446  * - cpuid_pass1() is done, so that X86 features are known.
4447  * - fpu_probe() is done, so that fp_save_mech is chosen.
4448  */
4449 void
4450 xsave_setup_msr(cpu_t *cpu)
4451 {
4452 	ASSERT(fp_save_mech == FP_XSAVE);
4453 	ASSERT(is_x86_feature(x86_featureset, X86FSET_XSAVE));
4454 
4455 	/* Enable OSXSAVE in CR4. */
4456 	setcr4(getcr4() | CR4_OSXSAVE);
4457 	/*
4458 	 * Update SW copy of ECX, so that /dev/cpu/self/cpuid will report
4459 	 * correct value.
4460 	 */
4461 	cpu->cpu_m.mcpu_cpi->cpi_std[1].cp_ecx |= CPUID_INTC_ECX_OSXSAVE;
4462 	setup_xfem();
4463 }
4464 
4465 /*
4466  * Starting with the Westmere processor the local
4467  * APIC timer will continue running in all C-states,
4468  * including the deepest C-states.
4469  */
4470 int
4471 cpuid_arat_supported(void)
4472 {
4473 	struct cpuid_info *cpi;
4474 	struct cpuid_regs regs;
4475 
4476 	ASSERT(cpuid_checkpass(CPU, 1));
4477 	ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4478 
4479 	cpi = CPU->cpu_m.mcpu_cpi;
4480 
4481 	switch (cpi->cpi_vendor) {
4482 	case X86_VENDOR_Intel:
4483 		/*
4484 		 * Always-running Local APIC Timer is
4485 		 * indicated by CPUID.6.EAX[2].
4486 		 */
4487 		if (cpi->cpi_maxeax >= 6) {
4488 			regs.cp_eax = 6;
4489 			(void) cpuid_insn(NULL, &regs);
4490 			return (regs.cp_eax & CPUID_CSTATE_ARAT);
4491 		} else {
4492 			return (0);
4493 		}
4494 	default:
4495 		return (0);
4496 	}
4497 }
4498 
4499 /*
4500  * Check support for Intel ENERGY_PERF_BIAS feature
4501  */
4502 int
4503 cpuid_iepb_supported(struct cpu *cp)
4504 {
4505 	struct cpuid_info *cpi = cp->cpu_m.mcpu_cpi;
4506 	struct cpuid_regs regs;
4507 
4508 	ASSERT(cpuid_checkpass(cp, 1));
4509 
4510 	if (!(is_x86_feature(x86_featureset, X86FSET_CPUID)) ||
4511 	    !(is_x86_feature(x86_featureset, X86FSET_MSR))) {
4512 		return (0);
4513 	}
4514 
4515 	/*
4516 	 * Intel ENERGY_PERF_BIAS MSR is indicated by
4517 	 * capability bit CPUID.6.ECX.3
4518 	 */
4519 	if ((cpi->cpi_vendor != X86_VENDOR_Intel) || (cpi->cpi_maxeax < 6))
4520 		return (0);
4521 
4522 	regs.cp_eax = 0x6;
4523 	(void) cpuid_insn(NULL, &regs);
4524 	return (regs.cp_ecx & CPUID_EPB_SUPPORT);
4525 }
4526 
4527 /*
4528  * Check support for TSC deadline timer
4529  *
4530  * TSC deadline timer provides a superior software programming
4531  * model over local APIC timer that eliminates "time drifts".
4532  * Instead of specifying a relative time, software specifies an
4533  * absolute time as the target at which the processor should
4534  * generate a timer event.
4535  */
4536 int
4537 cpuid_deadline_tsc_supported(void)
4538 {
4539 	struct cpuid_info *cpi = CPU->cpu_m.mcpu_cpi;
4540 	struct cpuid_regs regs;
4541 
4542 	ASSERT(cpuid_checkpass(CPU, 1));
4543 	ASSERT(is_x86_feature(x86_featureset, X86FSET_CPUID));
4544 
4545 	switch (cpi->cpi_vendor) {
4546 	case X86_VENDOR_Intel:
4547 		if (cpi->cpi_maxeax >= 1) {
4548 			regs.cp_eax = 1;
4549 			(void) cpuid_insn(NULL, &regs);
4550 			return (regs.cp_ecx & CPUID_DEADLINE_TSC);
4551 		} else {
4552 			return (0);
4553 		}
4554 	default:
4555 		return (0);
4556 	}
4557 }
4558 
4559 #if defined(__amd64) && !defined(__xpv)
4560 /*
4561  * Patch in versions of bcopy for high performance Intel Nhm processors
4562  * and later...
4563  */
4564 void
4565 patch_memops(uint_t vendor)
4566 {
4567 	size_t cnt, i;
4568 	caddr_t to, from;
4569 
4570 	if ((vendor == X86_VENDOR_Intel) &&
4571 	    is_x86_feature(x86_featureset, X86FSET_SSE4_2)) {
4572 		cnt = &bcopy_patch_end - &bcopy_patch_start;
4573 		to = &bcopy_ck_size;
4574 		from = &bcopy_patch_start;
4575 		for (i = 0; i < cnt; i++) {
4576 			*to++ = *from++;
4577 		}
4578 	}
4579 }
4580 #endif  /* __amd64 && !__xpv */
4581 
4582 /*
4583  * This function finds the number of bits to represent the number of cores per
4584  * chip and the number of strands per core for the Intel platforms.
4585  * It re-uses the x2APIC cpuid code of the cpuid_pass2().
4586  */
4587 void
4588 cpuid_get_ext_topo(uint_t vendor, uint_t *core_nbits, uint_t *strand_nbits)
4589 {
4590 	struct cpuid_regs regs;
4591 	struct cpuid_regs *cp = &regs;
4592 
4593 	if (vendor != X86_VENDOR_Intel) {
4594 		return;
4595 	}
4596 
4597 	/* if the cpuid level is 0xB, extended topo is available. */
4598 	cp->cp_eax = 0;
4599 	if (__cpuid_insn(cp) >= 0xB) {
4600 
4601 		cp->cp_eax = 0xB;
4602 		cp->cp_edx = cp->cp_ebx = cp->cp_ecx = 0;
4603 		(void) __cpuid_insn(cp);
4604 
4605 		/*
4606 		 * Check CPUID.EAX=0BH, ECX=0H:EBX is non-zero, which
4607 		 * indicates that the extended topology enumeration leaf is
4608 		 * available.
4609 		 */
4610 		if (cp->cp_ebx) {
4611 			uint_t coreid_shift = 0;
4612 			uint_t chipid_shift = 0;
4613 			uint_t i;
4614 			uint_t level;
4615 
4616 			for (i = 0; i < CPI_FNB_ECX_MAX; i++) {
4617 				cp->cp_eax = 0xB;
4618 				cp->cp_ecx = i;
4619 
4620 				(void) __cpuid_insn(cp);
4621 				level = CPI_CPU_LEVEL_TYPE(cp);
4622 
4623 				if (level == 1) {
4624 					/*
4625 					 * Thread level processor topology
4626 					 * Number of bits shift right APIC ID
4627 					 * to get the coreid.
4628 					 */
4629 					coreid_shift = BITX(cp->cp_eax, 4, 0);
4630 				} else if (level == 2) {
4631 					/*
4632 					 * Core level processor topology
4633 					 * Number of bits shift right APIC ID
4634 					 * to get the chipid.
4635 					 */
4636 					chipid_shift = BITX(cp->cp_eax, 4, 0);
4637 				}
4638 			}
4639 
4640 			if (coreid_shift > 0 && chipid_shift > coreid_shift) {
4641 				*strand_nbits = coreid_shift;
4642 				*core_nbits = chipid_shift - coreid_shift;
4643 			}
4644 		}
4645 	}
4646 }
4647