xref: /netbsd-src/sys/arch/x86/x86/cpu_topology.c (revision 645350cbba96ca90f853f13318f92516bfb65c60)
1 /*	$NetBSD: cpu_topology.c,v 1.21 2022/10/12 10:26:09 msaitoh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2009 Mindaugas Rasiukevicius <rmind at NetBSD org>,
5  * Copyright (c) 2008 YAMAMOTO Takashi,
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 /*
31  * x86 CPU topology detection.
32  *
33  * References:
34  * - 53668.pdf (7.10.2), 276613.pdf
35  * - 31116.pdf, 41256.pdf, 25481.pdf
36  */
37 
38 #include <sys/cdefs.h>
39 __KERNEL_RCSID(0, "$NetBSD: cpu_topology.c,v 1.21 2022/10/12 10:26:09 msaitoh Exp $");
40 
41 #include "acpica.h"
42 
43 #include <sys/param.h>
44 #include <sys/bitops.h>
45 #include <sys/cpu.h>
46 
47 #include <machine/specialreg.h>
48 
49 #include <dev/acpi/acpi_srat.h>
50 
51 #include <x86/cpufunc.h>
52 #include <x86/cputypes.h>
53 #include <x86/cpuvar.h>
54 
55 static uint32_t
x86_cpu_get_numa_node(uint32_t apic_id)56 x86_cpu_get_numa_node(uint32_t apic_id)
57 {
58 #if NACPICA > 0
59 	uint32_t i, j, nn, nc;
60 	struct acpisrat_cpu c;
61 
62 	nn = acpisrat_nodes();
63 	for (i = 0; i < nn; i++) {
64 		nc = acpisrat_node_cpus(i);
65 		for (j = 0; j < nc; j++) {
66 			acpisrat_cpu(i, j, &c);
67 			if (c.apicid == apic_id) {
68 				return c.nodeid;
69 			}
70 		}
71 	}
72 #endif
73 	return 0;
74 }
75 
76 void
x86_cpu_topology(struct cpu_info * ci)77 x86_cpu_topology(struct cpu_info *ci)
78 {
79 	u_int lp_max;		/* Logical processors per package (node) */
80 	u_int core_max;		/* Core per package */
81 	int n, cpu_family, apic_id, smt_bits, core_bits = 0;
82 	uint32_t descs[4];
83 	u_int package_id, core_id, smt_id, numa_id;
84 
85 	apic_id = ci->ci_initapicid;
86 	cpu_family = CPUID_TO_FAMILY(ci->ci_signature);
87 
88 	/* Initial values. */
89 	package_id = apic_id;
90 	core_id = 0;
91 	smt_id = 0;
92 	numa_id = x86_cpu_get_numa_node(apic_id);
93 
94 	switch (cpu_vendor) {
95 	case CPUVENDOR_INTEL:
96 		if (cpu_family < 6) {
97 			cpu_topology_set(ci, package_id, core_id, smt_id,
98 			    numa_id);
99 			return;
100 		}
101 		break;
102 	case CPUVENDOR_AMD:
103 		if (cpu_family < 0xf) {
104 			cpu_topology_set(ci, package_id, core_id, smt_id,
105 			    numa_id);
106 			return;
107 		}
108 		break;
109 	default:
110 		return;
111 	}
112 
113 	/* Check for HTT support.  See notes below regarding AMD. */
114 	if ((ci->ci_feat_val[0] & CPUID_HTT) != 0) {
115 		/* Maximum number of LPs sharing a cache (ebx[23:16]). */
116 		x86_cpuid(1, descs);
117 		lp_max = __SHIFTOUT(descs[1], CPUID_HTT_CORES);
118 	} else {
119 		lp_max = 1;
120 	}
121 
122 	switch (cpu_vendor) {
123 	case CPUVENDOR_INTEL:
124 		/* Check for leaf 4 support. */
125 		if (ci->ci_max_cpuid >= 4) {
126 			/* Maximum number of Cores per package (eax[31:26]). */
127 			x86_cpuid2(4, 0, descs);
128 			core_max = __SHIFTOUT(descs[0], CPUID_DCP_CORE_P_PKG)
129 			    + 1;
130 		} else {
131 			core_max = 1;
132 		}
133 		break;
134 	case CPUVENDOR_AMD:
135 		/* In a case of AMD, HTT flag means CMP support. */
136 		if ((ci->ci_feat_val[0] & CPUID_HTT) == 0) {
137 			core_max = 1;
138 			break;
139 		}
140 		/* Legacy Method, LPs represent Cores. */
141 		if (cpu_family < 0x10 || ci->ci_max_ext_cpuid < 0x80000008) {
142 			core_max = lp_max;
143 			break;
144 		}
145 
146 		/* Number of Cores (NC) per package. */
147 		x86_cpuid(0x80000008, descs);
148 		core_max = __SHIFTOUT(descs[2], CPUID_CAPEX_NC) + 1;
149 		/* Amount of bits representing Core ID (ecx[15:12]). */
150 		n = __SHIFTOUT(descs[2], CPUID_CAPEX_ApicIdSize);
151 		if (n != 0) {
152 			/*
153 			 * Extended Method.
154 			 * core_max = 2 ^ n (power of two)
155 			 */
156 			core_bits = n;
157 		}
158 		break;
159 	default:
160 		core_max = 1;
161 	}
162 
163 	KASSERT(lp_max >= core_max);
164 	smt_bits = ilog2((lp_max / core_max) - 1) + 1;
165 	if (core_bits == 0) {
166 		core_bits = ilog2(core_max - 1) + 1;
167 	}
168 
169 	/*
170 	 * Family 0xf and 0x10 processors may have different structure of
171 	 * APIC ID.  Detect that via special MSR register and move the bits,
172 	 * if necessary (ref: InitApicIdCpuIdLo).
173 	 */
174 	if (cpu_vendor == CPUVENDOR_AMD && cpu_family < 0x11) {	/* XXX */
175 		const uint64_t reg = rdmsr(MSR_NB_CFG);
176 		if ((reg & NB_CFG_INITAPICCPUIDLO) == 0) {
177 			/*
178 			 * 0xf:  { CoreId, NodeId[2:0] }
179 			 * 0x10: { CoreId[1:0], 000b, NodeId[2:0] }
180 			 */
181 			const u_int node_id = apic_id & __BITS(0, 2);
182 			apic_id = (cpu_family == 0xf) ?
183 			    (apic_id >> core_bits) | (node_id << core_bits) :
184 			    (apic_id >> 5) | (node_id << 2);
185 		}
186 	}
187 
188 	/* Family 0x17 and above support SMT */
189 	if (cpu_vendor == CPUVENDOR_AMD && cpu_family >= 0x17) { /* XXX */
190 		x86_cpuid(0x8000001e, descs);
191 		const u_int threads = __SHIFTOUT(descs[1],
192 		    CPUID_AMD_PROCT_THREADS_PER_CORE) + 1;
193 
194 		KASSERT(smt_bits == 0);
195 		smt_bits = ilog2(threads);
196 		KASSERT(smt_bits <= core_bits);
197 		core_bits -= smt_bits;
198 	}
199 
200 	if (smt_bits + core_bits) {
201 		if (smt_bits + core_bits < sizeof(apic_id) * NBBY)
202 			package_id = apic_id >> (smt_bits + core_bits);
203 		else
204 			package_id = 0;
205 	}
206 	if (core_bits) {
207 		u_int core_mask = __BITS(smt_bits, smt_bits + core_bits - 1);
208 		core_id = __SHIFTOUT(apic_id, core_mask);
209 	}
210 	if (smt_bits) {
211 		u_int smt_mask = __BITS(0, smt_bits - 1);
212 		smt_id = __SHIFTOUT(apic_id, smt_mask);
213 	}
214 
215 	cpu_topology_set(ci, package_id, core_id, smt_id, numa_id);
216 }
217