1 /* $NetBSD: subr_cpu.c,v 1.22 2024/03/05 20:59:41 thorpej Exp $ */
2
3 /*-
4 * Copyright (c) 2007, 2008, 2009, 2010, 2012, 2019, 2020
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*-
34 * Copyright (c)2007 YAMAMOTO Takashi,
35 * All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 *
46 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56 * SUCH DAMAGE.
57 */
58
59 /*
60 * CPU related routines shared with rump.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: subr_cpu.c,v 1.22 2024/03/05 20:59:41 thorpej Exp $");
65
66 #include <sys/param.h>
67 #include <sys/atomic.h>
68 #include <sys/systm.h>
69 #include <sys/sched.h>
70 #include <sys/conf.h>
71 #include <sys/cpu.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/kmem.h>
75
76 static void cpu_topology_fake1(struct cpu_info *);
77
78 kmutex_t cpu_lock __cacheline_aligned;
79 int ncpu __read_mostly;
80 int ncpuonline __read_mostly;
81 bool mp_online __read_mostly;
82 static bool cpu_topology_present __read_mostly;
83 static bool cpu_topology_haveslow __read_mostly;
84 int64_t cpu_counts[CPU_COUNT_MAX];
85
86 /* An array of CPUs. There are ncpu entries. */
87 struct cpu_info **cpu_infos __read_mostly;
88
89 /* Note: set on mi_cpu_attach() and idle_loop(). */
90 kcpuset_t * kcpuset_attached __read_mostly = NULL;
91 kcpuset_t * kcpuset_running __read_mostly = NULL;
92
93 static char cpu_model[128];
94
95 /*
96 * mi_cpu_init: early initialisation of MI CPU related structures.
97 *
98 * Note: may not block and memory allocator is not yet available.
99 */
100 void
mi_cpu_init(void)101 mi_cpu_init(void)
102 {
103 struct cpu_info *ci;
104
105 mutex_init(&cpu_lock, MUTEX_DEFAULT, IPL_NONE);
106
107 kcpuset_create(&kcpuset_attached, true);
108 kcpuset_create(&kcpuset_running, true);
109 kcpuset_set(kcpuset_running, 0);
110
111 ci = curcpu();
112 cpu_topology_fake1(ci);
113 }
114
115 int
cpu_setmodel(const char * fmt,...)116 cpu_setmodel(const char *fmt, ...)
117 {
118 int len;
119 va_list ap;
120
121 va_start(ap, fmt);
122 len = vsnprintf(cpu_model, sizeof(cpu_model), fmt, ap);
123 va_end(ap);
124 return len;
125 }
126
127 const char *
cpu_getmodel(void)128 cpu_getmodel(void)
129 {
130 return cpu_model;
131 }
132
133 bool
cpu_softintr_p(void)134 cpu_softintr_p(void)
135 {
136
137 return (curlwp->l_pflag & LP_INTR) != 0;
138 }
139
140 bool
curcpu_stable(void)141 curcpu_stable(void)
142 {
143 struct lwp *const l = curlwp;
144 const int pflag = l->l_pflag;
145 const int nopreempt = l->l_nopreempt;
146
147 /*
148 * - Softints (LP_INTR) never migrate between CPUs.
149 * - Bound lwps (LP_BOUND), either kthreads created bound to
150 * a CPU or any lwps bound with curlwp_bind, never migrate.
151 * - If kpreemption is disabled, the lwp can't migrate.
152 * - If we're in interrupt context, preemption is blocked.
153 *
154 * We combine the LP_INTR, LP_BOUND, and l_nopreempt test into
155 * a single predicted-true branch so this is cheap to assert in
156 * most contexts where it will be used, then fall back to
157 * calling the full kpreempt_disabled() and cpu_intr_p() as
158 * subroutines.
159 *
160 * XXX Is cpu_intr_p redundant with kpreempt_disabled?
161 */
162 return __predict_true(((pflag & (LP_INTR|LP_BOUND)) | nopreempt)
163 != 0) ||
164 kpreempt_disabled() ||
165 cpu_intr_p();
166 }
167
168 /*
169 * Collect CPU topology information as each CPU is attached. This can be
170 * called early during boot, so we need to be careful what we do.
171 */
172 void
cpu_topology_set(struct cpu_info * ci,u_int package_id,u_int core_id,u_int smt_id,u_int numa_id)173 cpu_topology_set(struct cpu_info *ci, u_int package_id, u_int core_id,
174 u_int smt_id, u_int numa_id)
175 {
176 enum cpu_rel rel;
177
178 cpu_topology_present = true;
179 ci->ci_package_id = package_id;
180 ci->ci_core_id = core_id;
181 ci->ci_smt_id = smt_id;
182 ci->ci_numa_id = numa_id;
183 for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
184 ci->ci_sibling[rel] = ci;
185 ci->ci_nsibling[rel] = 1;
186 }
187 }
188
189 /*
190 * Collect CPU relative speed
191 */
192 void
cpu_topology_setspeed(struct cpu_info * ci,bool slow)193 cpu_topology_setspeed(struct cpu_info *ci, bool slow)
194 {
195
196 cpu_topology_haveslow |= slow;
197 ci->ci_is_slow = slow;
198 }
199
200 /*
201 * Link a CPU into the given circular list.
202 */
203 static void
cpu_topology_link(struct cpu_info * ci,struct cpu_info * ci2,enum cpu_rel rel)204 cpu_topology_link(struct cpu_info *ci, struct cpu_info *ci2, enum cpu_rel rel)
205 {
206 struct cpu_info *ci3;
207
208 /* Walk to the end of the existing circular list and append. */
209 for (ci3 = ci2;; ci3 = ci3->ci_sibling[rel]) {
210 ci3->ci_nsibling[rel]++;
211 if (ci3->ci_sibling[rel] == ci2) {
212 break;
213 }
214 }
215 ci->ci_sibling[rel] = ci2;
216 ci3->ci_sibling[rel] = ci;
217 ci->ci_nsibling[rel] = ci3->ci_nsibling[rel];
218 }
219
220 /*
221 * Print out the topology lists.
222 */
223 static void
cpu_topology_dump(void)224 cpu_topology_dump(void)
225 {
226 CPU_INFO_ITERATOR cii;
227 struct cpu_info *ci, *ci2;
228 const char *names[] = { "core", "pkg", "1st" };
229 enum cpu_rel rel;
230 int i;
231
232 CTASSERT(__arraycount(names) >= __arraycount(ci->ci_sibling));
233 if (ncpu == 1) {
234 return;
235 }
236
237 for (CPU_INFO_FOREACH(cii, ci)) {
238 if (cpu_topology_haveslow)
239 aprint_debug("%s ", ci->ci_is_slow ? "slow" : "fast");
240 for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
241 aprint_debug("%s has %d %s siblings:", cpu_name(ci),
242 ci->ci_nsibling[rel], names[rel]);
243 ci2 = ci->ci_sibling[rel];
244 i = 0;
245 do {
246 aprint_debug(" %s", cpu_name(ci2));
247 ci2 = ci2->ci_sibling[rel];
248 } while (++i < 64 && ci2 != ci->ci_sibling[rel]);
249 if (i == 64) {
250 aprint_debug(" GAVE UP");
251 }
252 aprint_debug("\n");
253 }
254 aprint_debug("%s first in package: %s\n", cpu_name(ci),
255 cpu_name(ci->ci_package1st));
256 }
257 }
258
259 /*
260 * Fake up topology info if we have none, or if what we got was bogus.
261 * Used early in boot, and by cpu_topology_fake().
262 */
263 static void
cpu_topology_fake1(struct cpu_info * ci)264 cpu_topology_fake1(struct cpu_info *ci)
265 {
266 enum cpu_rel rel;
267
268 for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
269 ci->ci_sibling[rel] = ci;
270 ci->ci_nsibling[rel] = 1;
271 }
272 if (!cpu_topology_present) {
273 ci->ci_package_id = cpu_index(ci);
274 }
275 ci->ci_schedstate.spc_flags |=
276 (SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
277 ci->ci_package1st = ci;
278 if (!cpu_topology_haveslow) {
279 ci->ci_is_slow = false;
280 }
281 }
282
283 /*
284 * Fake up topology info if we have none, or if what we got was bogus.
285 * Don't override ci_package_id, etc, if cpu_topology_present is set.
286 * MD code also uses these.
287 */
288 static void
cpu_topology_fake(void)289 cpu_topology_fake(void)
290 {
291 CPU_INFO_ITERATOR cii;
292 struct cpu_info *ci;
293
294 for (CPU_INFO_FOREACH(cii, ci)) {
295 cpu_topology_fake1(ci);
296 /* Undo (early boot) flag set so everything links OK. */
297 ci->ci_schedstate.spc_flags &=
298 ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
299 }
300 }
301
302 /*
303 * Fix up basic CPU topology info. Right now that means attach each CPU to
304 * circular lists of its siblings in the same core, and in the same package.
305 */
306 void
cpu_topology_init(void)307 cpu_topology_init(void)
308 {
309 CPU_INFO_ITERATOR cii, cii2;
310 struct cpu_info *ci, *ci2, *ci3;
311 u_int minsmt, mincore;
312
313 if (!cpu_topology_present) {
314 cpu_topology_fake();
315 goto linkit;
316 }
317
318 /* Find siblings in same core and package. */
319 for (CPU_INFO_FOREACH(cii, ci)) {
320 ci->ci_schedstate.spc_flags &=
321 ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
322 for (CPU_INFO_FOREACH(cii2, ci2)) {
323 /* Avoid bad things happening. */
324 if (ci2->ci_package_id == ci->ci_package_id &&
325 ci2->ci_core_id == ci->ci_core_id &&
326 ci2->ci_smt_id == ci->ci_smt_id &&
327 ci2 != ci) {
328 #ifdef DEBUG
329 printf("cpu%u %p pkg %u core %u smt %u same as "
330 "cpu%u %p pkg %u core %u smt %u\n",
331 cpu_index(ci), ci, ci->ci_package_id,
332 ci->ci_core_id, ci->ci_smt_id,
333 cpu_index(ci2), ci2, ci2->ci_package_id,
334 ci2->ci_core_id, ci2->ci_smt_id);
335 #endif
336 printf("cpu_topology_init: info bogus, "
337 "faking it\n");
338 cpu_topology_fake();
339 goto linkit;
340 }
341 if (ci2 == ci ||
342 ci2->ci_package_id != ci->ci_package_id) {
343 continue;
344 }
345 /* Find CPUs in the same core. */
346 if (ci->ci_nsibling[CPUREL_CORE] == 1 &&
347 ci->ci_core_id == ci2->ci_core_id) {
348 cpu_topology_link(ci, ci2, CPUREL_CORE);
349 }
350 /* Find CPUs in the same package. */
351 if (ci->ci_nsibling[CPUREL_PACKAGE] == 1) {
352 cpu_topology_link(ci, ci2, CPUREL_PACKAGE);
353 }
354 if (ci->ci_nsibling[CPUREL_CORE] > 1 &&
355 ci->ci_nsibling[CPUREL_PACKAGE] > 1) {
356 break;
357 }
358 }
359 }
360
361 linkit:
362 /* Identify lowest numbered SMT in each core. */
363 for (CPU_INFO_FOREACH(cii, ci)) {
364 ci2 = ci3 = ci;
365 minsmt = ci->ci_smt_id;
366 do {
367 if (ci2->ci_smt_id < minsmt) {
368 ci3 = ci2;
369 minsmt = ci2->ci_smt_id;
370 }
371 ci2 = ci2->ci_sibling[CPUREL_CORE];
372 } while (ci2 != ci);
373 ci3->ci_schedstate.spc_flags |= SPCF_CORE1ST;
374 }
375
376 /* Identify lowest numbered SMT in each package. */
377 ci3 = NULL;
378 for (CPU_INFO_FOREACH(cii, ci)) {
379 if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) == 0) {
380 continue;
381 }
382 ci2 = ci3 = ci;
383 mincore = ci->ci_core_id;
384 do {
385 if ((ci2->ci_schedstate.spc_flags &
386 SPCF_CORE1ST) != 0 &&
387 ci2->ci_core_id < mincore) {
388 ci3 = ci2;
389 mincore = ci2->ci_core_id;
390 }
391 ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
392 } while (ci2 != ci);
393
394 if ((ci3->ci_schedstate.spc_flags & SPCF_PACKAGE1ST) != 0) {
395 /* Already identified - nothing more to do. */
396 continue;
397 }
398 ci3->ci_schedstate.spc_flags |= SPCF_PACKAGE1ST;
399
400 /* Walk through all CPUs in package and point to first. */
401 ci2 = ci3;
402 do {
403 ci2->ci_package1st = ci3;
404 ci2->ci_sibling[CPUREL_PACKAGE1ST] = ci3;
405 ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
406 } while (ci2 != ci3);
407
408 /* Now look for somebody else to link to. */
409 for (CPU_INFO_FOREACH(cii2, ci2)) {
410 if ((ci2->ci_schedstate.spc_flags & SPCF_PACKAGE1ST)
411 != 0 && ci2 != ci3) {
412 cpu_topology_link(ci3, ci2, CPUREL_PACKAGE1ST);
413 break;
414 }
415 }
416 }
417
418 /* Walk through all packages, starting with value of ci3 from above. */
419 KASSERT(ci3 != NULL);
420 ci = ci3;
421 do {
422 /* Walk through CPUs in the package and copy in PACKAGE1ST. */
423 ci2 = ci;
424 do {
425 ci2->ci_sibling[CPUREL_PACKAGE1ST] =
426 ci->ci_sibling[CPUREL_PACKAGE1ST];
427 ci2->ci_nsibling[CPUREL_PACKAGE1ST] =
428 ci->ci_nsibling[CPUREL_PACKAGE1ST];
429 ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
430 } while (ci2 != ci);
431 ci = ci->ci_sibling[CPUREL_PACKAGE1ST];
432 } while (ci != ci3);
433
434 if (cpu_topology_haveslow) {
435 /*
436 * For asymmetric systems where some CPUs are slower than
437 * others, mark first class CPUs for the scheduler. This
438 * conflicts with SMT right now so whinge if observed.
439 */
440 if (curcpu()->ci_nsibling[CPUREL_CORE] > 1) {
441 printf("cpu_topology_init: asymmetric & SMT??\n");
442 }
443 for (CPU_INFO_FOREACH(cii, ci)) {
444 if (!ci->ci_is_slow) {
445 ci->ci_schedstate.spc_flags |= SPCF_1STCLASS;
446 }
447 }
448 } else {
449 /*
450 * For any other configuration mark the 1st CPU in each
451 * core as a first class CPU.
452 */
453 for (CPU_INFO_FOREACH(cii, ci)) {
454 if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) != 0) {
455 ci->ci_schedstate.spc_flags |= SPCF_1STCLASS;
456 }
457 }
458 }
459
460 cpu_topology_dump();
461 }
462
463 /*
464 * Adjust one count, for a counter that's NOT updated from interrupt
465 * context. Hardly worth making an inline due to preemption stuff.
466 */
467 void
cpu_count(enum cpu_count idx,int64_t delta)468 cpu_count(enum cpu_count idx, int64_t delta)
469 {
470 lwp_t *l = curlwp;
471 KPREEMPT_DISABLE(l);
472 l->l_cpu->ci_counts[idx] += delta;
473 KPREEMPT_ENABLE(l);
474 }
475
476 /*
477 * Fetch fresh sum total for all counts. Expensive - don't call often.
478 *
479 * If poll is true, the caller is okay with less recent values (but
480 * no more than 1/hz seconds old). Where this is called very often that
481 * should be the case.
482 *
483 * This should be reasonably quick so that any value collected get isn't
484 * totally out of whack, and it can also be called from interrupt context,
485 * so go to splvm() while summing the counters. It's tempting to use a spin
486 * mutex here but this routine is called from DDB.
487 */
488 void
cpu_count_sync(bool poll)489 cpu_count_sync(bool poll)
490 {
491 CPU_INFO_ITERATOR cii;
492 struct cpu_info *ci;
493 int64_t sum[CPU_COUNT_MAX], *ptr;
494 static int lasttick;
495 int curtick, s;
496 enum cpu_count i;
497
498 KASSERT(sizeof(ci->ci_counts) == sizeof(cpu_counts));
499
500 if (__predict_false(!mp_online)) {
501 memcpy(cpu_counts, curcpu()->ci_counts, sizeof(cpu_counts));
502 return;
503 }
504
505 s = splvm();
506 curtick = getticks();
507 if (poll && atomic_load_acquire(&lasttick) == curtick) {
508 splx(s);
509 return;
510 }
511 memset(sum, 0, sizeof(sum));
512 curcpu()->ci_counts[CPU_COUNT_SYNC]++;
513 for (CPU_INFO_FOREACH(cii, ci)) {
514 ptr = ci->ci_counts;
515 for (i = 0; i < CPU_COUNT_MAX; i += 8) {
516 sum[i+0] += ptr[i+0];
517 sum[i+1] += ptr[i+1];
518 sum[i+2] += ptr[i+2];
519 sum[i+3] += ptr[i+3];
520 sum[i+4] += ptr[i+4];
521 sum[i+5] += ptr[i+5];
522 sum[i+6] += ptr[i+6];
523 sum[i+7] += ptr[i+7];
524 }
525 KASSERT(i == CPU_COUNT_MAX);
526 }
527 memcpy(cpu_counts, sum, sizeof(cpu_counts));
528 atomic_store_release(&lasttick, curtick);
529 splx(s);
530 }
531