1*645350cbSmsaitoh /* $NetBSD: cpu_topology.c,v 1.21 2022/10/12 10:26:09 msaitoh Exp $ */
2cc8d7ff4Srmind
3cc8d7ff4Srmind /*-
4cfa533f9Srmind * Copyright (c) 2009 Mindaugas Rasiukevicius <rmind at NetBSD org>,
5cc8d7ff4Srmind * Copyright (c) 2008 YAMAMOTO Takashi,
6cc8d7ff4Srmind * All rights reserved.
7cc8d7ff4Srmind *
8cc8d7ff4Srmind * Redistribution and use in source and binary forms, with or without
9cc8d7ff4Srmind * modification, are permitted provided that the following conditions
10cc8d7ff4Srmind * are met:
11cc8d7ff4Srmind * 1. Redistributions of source code must retain the above copyright
12cc8d7ff4Srmind * notice, this list of conditions and the following disclaimer.
13cc8d7ff4Srmind * 2. Redistributions in binary form must reproduce the above copyright
14cc8d7ff4Srmind * notice, this list of conditions and the following disclaimer in the
15cc8d7ff4Srmind * documentation and/or other materials provided with the distribution.
16cc8d7ff4Srmind *
17cc8d7ff4Srmind * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18cc8d7ff4Srmind * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19cc8d7ff4Srmind * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20cc8d7ff4Srmind * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21cc8d7ff4Srmind * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22cc8d7ff4Srmind * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23cc8d7ff4Srmind * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24cc8d7ff4Srmind * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25cc8d7ff4Srmind * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26cc8d7ff4Srmind * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27cc8d7ff4Srmind * SUCH DAMAGE.
28cc8d7ff4Srmind */
29cc8d7ff4Srmind
30cfa533f9Srmind /*
31cfa533f9Srmind * x86 CPU topology detection.
32cfa533f9Srmind *
33cfa533f9Srmind * References:
34cfa533f9Srmind * - 53668.pdf (7.10.2), 276613.pdf
35cfa533f9Srmind * - 31116.pdf, 41256.pdf, 25481.pdf
36cfa533f9Srmind */
37cfa533f9Srmind
38cc8d7ff4Srmind #include <sys/cdefs.h>
39*645350cbSmsaitoh __KERNEL_RCSID(0, "$NetBSD: cpu_topology.c,v 1.21 2022/10/12 10:26:09 msaitoh Exp $");
408ae3ad1dSad
418ae3ad1dSad #include "acpica.h"
42cc8d7ff4Srmind
43cc8d7ff4Srmind #include <sys/param.h>
44cc8d7ff4Srmind #include <sys/bitops.h>
455d954ab6Sad #include <sys/cpu.h>
46cc8d7ff4Srmind
47cc8d7ff4Srmind #include <machine/specialreg.h>
48cc8d7ff4Srmind
498ae3ad1dSad #include <dev/acpi/acpi_srat.h>
508ae3ad1dSad
51cc8d7ff4Srmind #include <x86/cpufunc.h>
52cc8d7ff4Srmind #include <x86/cputypes.h>
53cc8d7ff4Srmind #include <x86/cpuvar.h>
54cc8d7ff4Srmind
558ae3ad1dSad static uint32_t
x86_cpu_get_numa_node(uint32_t apic_id)568ae3ad1dSad x86_cpu_get_numa_node(uint32_t apic_id)
578ae3ad1dSad {
588ae3ad1dSad #if NACPICA > 0
598ae3ad1dSad uint32_t i, j, nn, nc;
608ae3ad1dSad struct acpisrat_cpu c;
618ae3ad1dSad
628ae3ad1dSad nn = acpisrat_nodes();
638ae3ad1dSad for (i = 0; i < nn; i++) {
648ae3ad1dSad nc = acpisrat_node_cpus(i);
658ae3ad1dSad for (j = 0; j < nc; j++) {
668ae3ad1dSad acpisrat_cpu(i, j, &c);
678ae3ad1dSad if (c.apicid == apic_id) {
688ae3ad1dSad return c.nodeid;
698ae3ad1dSad }
708ae3ad1dSad }
718ae3ad1dSad }
728ae3ad1dSad #endif
738ae3ad1dSad return 0;
748ae3ad1dSad }
758ae3ad1dSad
76cc8d7ff4Srmind void
x86_cpu_topology(struct cpu_info * ci)77b6c25885Srmind x86_cpu_topology(struct cpu_info *ci)
78cc8d7ff4Srmind {
79da7cf0efSrmind u_int lp_max; /* Logical processors per package (node) */
80cfa533f9Srmind u_int core_max; /* Core per package */
81cfa533f9Srmind int n, cpu_family, apic_id, smt_bits, core_bits = 0;
82fc194a52Sdsl uint32_t descs[4];
838ae3ad1dSad u_int package_id, core_id, smt_id, numa_id;
84cc8d7ff4Srmind
85cfa533f9Srmind apic_id = ci->ci_initapicid;
86b1a32cacSmsaitoh cpu_family = CPUID_TO_FAMILY(ci->ci_signature);
87cfa533f9Srmind
88cfa533f9Srmind /* Initial values. */
895d954ab6Sad package_id = apic_id;
905d954ab6Sad core_id = 0;
915d954ab6Sad smt_id = 0;
928ae3ad1dSad numa_id = x86_cpu_get_numa_node(apic_id);
93cfa533f9Srmind
94cfa533f9Srmind switch (cpu_vendor) {
95cfa533f9Srmind case CPUVENDOR_INTEL:
965d954ab6Sad if (cpu_family < 6) {
978ae3ad1dSad cpu_topology_set(ci, package_id, core_id, smt_id,
986149350cSskrll numa_id);
99cc8d7ff4Srmind return;
1005d954ab6Sad }
101cfa533f9Srmind break;
102cfa533f9Srmind case CPUVENDOR_AMD:
1035d954ab6Sad if (cpu_family < 0xf) {
1048ae3ad1dSad cpu_topology_set(ci, package_id, core_id, smt_id,
1056149350cSskrll numa_id);
106cfa533f9Srmind return;
1075d954ab6Sad }
108cfa533f9Srmind break;
109cfa533f9Srmind default:
110cfa533f9Srmind return;
111cfa533f9Srmind }
112cc8d7ff4Srmind
113cfa533f9Srmind /* Check for HTT support. See notes below regarding AMD. */
114bc042041Sjym if ((ci->ci_feat_val[0] & CPUID_HTT) != 0) {
115cfa533f9Srmind /* Maximum number of LPs sharing a cache (ebx[23:16]). */
116cc8d7ff4Srmind x86_cpuid(1, descs);
117ed892e9aSmsaitoh lp_max = __SHIFTOUT(descs[1], CPUID_HTT_CORES);
118cfa533f9Srmind } else {
119cfa533f9Srmind lp_max = 1;
120cc8d7ff4Srmind }
121cfa533f9Srmind
122cfa533f9Srmind switch (cpu_vendor) {
123cfa533f9Srmind case CPUVENDOR_INTEL:
124cfa533f9Srmind /* Check for leaf 4 support. */
125fc194a52Sdsl if (ci->ci_max_cpuid >= 4) {
126cfa533f9Srmind /* Maximum number of Cores per package (eax[31:26]). */
127cc8d7ff4Srmind x86_cpuid2(4, 0, descs);
128b430389aSmsaitoh core_max = __SHIFTOUT(descs[0], CPUID_DCP_CORE_P_PKG)
129b430389aSmsaitoh + 1;
130cfa533f9Srmind } else {
131cfa533f9Srmind core_max = 1;
132cc8d7ff4Srmind }
133cfa533f9Srmind break;
134cfa533f9Srmind case CPUVENDOR_AMD:
135cfa533f9Srmind /* In a case of AMD, HTT flag means CMP support. */
136bc042041Sjym if ((ci->ci_feat_val[0] & CPUID_HTT) == 0) {
137cfa533f9Srmind core_max = 1;
138cfa533f9Srmind break;
139cfa533f9Srmind }
140cfa533f9Srmind /* Legacy Method, LPs represent Cores. */
141fc194a52Sdsl if (cpu_family < 0x10 || ci->ci_max_ext_cpuid < 0x80000008) {
142cfa533f9Srmind core_max = lp_max;
143cfa533f9Srmind break;
144cfa533f9Srmind }
14571d727f2Smlelstv
146*645350cbSmsaitoh /* Number of Cores (NC) per package. */
147cfa533f9Srmind x86_cpuid(0x80000008, descs);
148*645350cbSmsaitoh core_max = __SHIFTOUT(descs[2], CPUID_CAPEX_NC) + 1;
149cfa533f9Srmind /* Amount of bits representing Core ID (ecx[15:12]). */
150*645350cbSmsaitoh n = __SHIFTOUT(descs[2], CPUID_CAPEX_ApicIdSize);
151cfa533f9Srmind if (n != 0) {
152cfa533f9Srmind /*
153cfa533f9Srmind * Extended Method.
15471d727f2Smlelstv * core_max = 2 ^ n (power of two)
155cfa533f9Srmind */
15671d727f2Smlelstv core_bits = n;
157cfa533f9Srmind }
158cfa533f9Srmind break;
159cfa533f9Srmind default:
160cfa533f9Srmind core_max = 1;
161cfa533f9Srmind }
162cfa533f9Srmind
163cc8d7ff4Srmind KASSERT(lp_max >= core_max);
164cfa533f9Srmind smt_bits = ilog2((lp_max / core_max) - 1) + 1;
165cfa533f9Srmind if (core_bits == 0) {
166cc8d7ff4Srmind core_bits = ilog2(core_max - 1) + 1;
167cfa533f9Srmind }
168cfa533f9Srmind
169cfa533f9Srmind /*
170cfa533f9Srmind * Family 0xf and 0x10 processors may have different structure of
171cfa533f9Srmind * APIC ID. Detect that via special MSR register and move the bits,
172cfa533f9Srmind * if necessary (ref: InitApicIdCpuIdLo).
173cfa533f9Srmind */
174cfa533f9Srmind if (cpu_vendor == CPUVENDOR_AMD && cpu_family < 0x11) { /* XXX */
175cfa533f9Srmind const uint64_t reg = rdmsr(MSR_NB_CFG);
176cfa533f9Srmind if ((reg & NB_CFG_INITAPICCPUIDLO) == 0) {
177cfa533f9Srmind /*
178cfa533f9Srmind * 0xf: { CoreId, NodeId[2:0] }
179cfa533f9Srmind * 0x10: { CoreId[1:0], 000b, NodeId[2:0] }
180cfa533f9Srmind */
181cfa533f9Srmind const u_int node_id = apic_id & __BITS(0, 2);
182cfa533f9Srmind apic_id = (cpu_family == 0xf) ?
183cfa533f9Srmind (apic_id >> core_bits) | (node_id << core_bits) :
184cfa533f9Srmind (apic_id >> 5) | (node_id << 2);
185cfa533f9Srmind }
186cfa533f9Srmind }
187cfa533f9Srmind
188d626ccb0Smrg /* Family 0x17 and above support SMT */
189d626ccb0Smrg if (cpu_vendor == CPUVENDOR_AMD && cpu_family >= 0x17) { /* XXX */
1906703d1a2Smlelstv x86_cpuid(0x8000001e, descs);
191*645350cbSmsaitoh const u_int threads = __SHIFTOUT(descs[1],
192*645350cbSmsaitoh CPUID_AMD_PROCT_THREADS_PER_CORE) + 1;
1936703d1a2Smlelstv
19419472c03Smlelstv KASSERT(smt_bits == 0);
1956703d1a2Smlelstv smt_bits = ilog2(threads);
19619472c03Smlelstv KASSERT(smt_bits <= core_bits);
1976703d1a2Smlelstv core_bits -= smt_bits;
1986703d1a2Smlelstv }
1996703d1a2Smlelstv
200cc8d7ff4Srmind if (smt_bits + core_bits) {
201eb468dc1Smlelstv if (smt_bits + core_bits < sizeof(apic_id) * NBBY)
2025d954ab6Sad package_id = apic_id >> (smt_bits + core_bits);
203eb468dc1Smlelstv else
2045d954ab6Sad package_id = 0;
205cc8d7ff4Srmind }
206cc8d7ff4Srmind if (core_bits) {
207cc8d7ff4Srmind u_int core_mask = __BITS(smt_bits, smt_bits + core_bits - 1);
2085d954ab6Sad core_id = __SHIFTOUT(apic_id, core_mask);
209cc8d7ff4Srmind }
210cc8d7ff4Srmind if (smt_bits) {
211cc8d7ff4Srmind u_int smt_mask = __BITS(0, smt_bits - 1);
2125d954ab6Sad smt_id = __SHIFTOUT(apic_id, smt_mask);
213cc8d7ff4Srmind }
2145d954ab6Sad
2156149350cSskrll cpu_topology_set(ci, package_id, core_id, smt_id, numa_id);
216cc8d7ff4Srmind }
217