1 /* $NetBSD: subr_cpu.c,v 1.22 2024/03/05 20:59:41 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 2007, 2008, 2009, 2010, 2012, 2019, 2020 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /*- 34 * Copyright (c)2007 YAMAMOTO Takashi, 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 46 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56 * SUCH DAMAGE. 57 */ 58 59 /* 60 * CPU related routines shared with rump. 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: subr_cpu.c,v 1.22 2024/03/05 20:59:41 thorpej Exp $"); 65 66 #include <sys/param.h> 67 #include <sys/atomic.h> 68 #include <sys/systm.h> 69 #include <sys/sched.h> 70 #include <sys/conf.h> 71 #include <sys/cpu.h> 72 #include <sys/proc.h> 73 #include <sys/kernel.h> 74 #include <sys/kmem.h> 75 76 static void cpu_topology_fake1(struct cpu_info *); 77 78 kmutex_t cpu_lock __cacheline_aligned; 79 int ncpu __read_mostly; 80 int ncpuonline __read_mostly; 81 bool mp_online __read_mostly; 82 static bool cpu_topology_present __read_mostly; 83 static bool cpu_topology_haveslow __read_mostly; 84 int64_t cpu_counts[CPU_COUNT_MAX]; 85 86 /* An array of CPUs. There are ncpu entries. */ 87 struct cpu_info **cpu_infos __read_mostly; 88 89 /* Note: set on mi_cpu_attach() and idle_loop(). */ 90 kcpuset_t * kcpuset_attached __read_mostly = NULL; 91 kcpuset_t * kcpuset_running __read_mostly = NULL; 92 93 static char cpu_model[128]; 94 95 /* 96 * mi_cpu_init: early initialisation of MI CPU related structures. 97 * 98 * Note: may not block and memory allocator is not yet available. 99 */ 100 void 101 mi_cpu_init(void) 102 { 103 struct cpu_info *ci; 104 105 mutex_init(&cpu_lock, MUTEX_DEFAULT, IPL_NONE); 106 107 kcpuset_create(&kcpuset_attached, true); 108 kcpuset_create(&kcpuset_running, true); 109 kcpuset_set(kcpuset_running, 0); 110 111 ci = curcpu(); 112 cpu_topology_fake1(ci); 113 } 114 115 int 116 cpu_setmodel(const char *fmt, ...) 117 { 118 int len; 119 va_list ap; 120 121 va_start(ap, fmt); 122 len = vsnprintf(cpu_model, sizeof(cpu_model), fmt, ap); 123 va_end(ap); 124 return len; 125 } 126 127 const char * 128 cpu_getmodel(void) 129 { 130 return cpu_model; 131 } 132 133 bool 134 cpu_softintr_p(void) 135 { 136 137 return (curlwp->l_pflag & LP_INTR) != 0; 138 } 139 140 bool 141 curcpu_stable(void) 142 { 143 struct lwp *const l = curlwp; 144 const int pflag = l->l_pflag; 145 const int nopreempt = l->l_nopreempt; 146 147 /* 148 * - Softints (LP_INTR) never migrate between CPUs. 149 * - Bound lwps (LP_BOUND), either kthreads created bound to 150 * a CPU or any lwps bound with curlwp_bind, never migrate. 151 * - If kpreemption is disabled, the lwp can't migrate. 152 * - If we're in interrupt context, preemption is blocked. 153 * 154 * We combine the LP_INTR, LP_BOUND, and l_nopreempt test into 155 * a single predicted-true branch so this is cheap to assert in 156 * most contexts where it will be used, then fall back to 157 * calling the full kpreempt_disabled() and cpu_intr_p() as 158 * subroutines. 159 * 160 * XXX Is cpu_intr_p redundant with kpreempt_disabled? 161 */ 162 return __predict_true(((pflag & (LP_INTR|LP_BOUND)) | nopreempt) 163 != 0) || 164 kpreempt_disabled() || 165 cpu_intr_p(); 166 } 167 168 /* 169 * Collect CPU topology information as each CPU is attached. This can be 170 * called early during boot, so we need to be careful what we do. 171 */ 172 void 173 cpu_topology_set(struct cpu_info *ci, u_int package_id, u_int core_id, 174 u_int smt_id, u_int numa_id) 175 { 176 enum cpu_rel rel; 177 178 cpu_topology_present = true; 179 ci->ci_package_id = package_id; 180 ci->ci_core_id = core_id; 181 ci->ci_smt_id = smt_id; 182 ci->ci_numa_id = numa_id; 183 for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) { 184 ci->ci_sibling[rel] = ci; 185 ci->ci_nsibling[rel] = 1; 186 } 187 } 188 189 /* 190 * Collect CPU relative speed 191 */ 192 void 193 cpu_topology_setspeed(struct cpu_info *ci, bool slow) 194 { 195 196 cpu_topology_haveslow |= slow; 197 ci->ci_is_slow = slow; 198 } 199 200 /* 201 * Link a CPU into the given circular list. 202 */ 203 static void 204 cpu_topology_link(struct cpu_info *ci, struct cpu_info *ci2, enum cpu_rel rel) 205 { 206 struct cpu_info *ci3; 207 208 /* Walk to the end of the existing circular list and append. */ 209 for (ci3 = ci2;; ci3 = ci3->ci_sibling[rel]) { 210 ci3->ci_nsibling[rel]++; 211 if (ci3->ci_sibling[rel] == ci2) { 212 break; 213 } 214 } 215 ci->ci_sibling[rel] = ci2; 216 ci3->ci_sibling[rel] = ci; 217 ci->ci_nsibling[rel] = ci3->ci_nsibling[rel]; 218 } 219 220 /* 221 * Print out the topology lists. 222 */ 223 static void 224 cpu_topology_dump(void) 225 { 226 CPU_INFO_ITERATOR cii; 227 struct cpu_info *ci, *ci2; 228 const char *names[] = { "core", "pkg", "1st" }; 229 enum cpu_rel rel; 230 int i; 231 232 CTASSERT(__arraycount(names) >= __arraycount(ci->ci_sibling)); 233 if (ncpu == 1) { 234 return; 235 } 236 237 for (CPU_INFO_FOREACH(cii, ci)) { 238 if (cpu_topology_haveslow) 239 aprint_debug("%s ", ci->ci_is_slow ? "slow" : "fast"); 240 for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) { 241 aprint_debug("%s has %d %s siblings:", cpu_name(ci), 242 ci->ci_nsibling[rel], names[rel]); 243 ci2 = ci->ci_sibling[rel]; 244 i = 0; 245 do { 246 aprint_debug(" %s", cpu_name(ci2)); 247 ci2 = ci2->ci_sibling[rel]; 248 } while (++i < 64 && ci2 != ci->ci_sibling[rel]); 249 if (i == 64) { 250 aprint_debug(" GAVE UP"); 251 } 252 aprint_debug("\n"); 253 } 254 aprint_debug("%s first in package: %s\n", cpu_name(ci), 255 cpu_name(ci->ci_package1st)); 256 } 257 } 258 259 /* 260 * Fake up topology info if we have none, or if what we got was bogus. 261 * Used early in boot, and by cpu_topology_fake(). 262 */ 263 static void 264 cpu_topology_fake1(struct cpu_info *ci) 265 { 266 enum cpu_rel rel; 267 268 for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) { 269 ci->ci_sibling[rel] = ci; 270 ci->ci_nsibling[rel] = 1; 271 } 272 if (!cpu_topology_present) { 273 ci->ci_package_id = cpu_index(ci); 274 } 275 ci->ci_schedstate.spc_flags |= 276 (SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS); 277 ci->ci_package1st = ci; 278 if (!cpu_topology_haveslow) { 279 ci->ci_is_slow = false; 280 } 281 } 282 283 /* 284 * Fake up topology info if we have none, or if what we got was bogus. 285 * Don't override ci_package_id, etc, if cpu_topology_present is set. 286 * MD code also uses these. 287 */ 288 static void 289 cpu_topology_fake(void) 290 { 291 CPU_INFO_ITERATOR cii; 292 struct cpu_info *ci; 293 294 for (CPU_INFO_FOREACH(cii, ci)) { 295 cpu_topology_fake1(ci); 296 /* Undo (early boot) flag set so everything links OK. */ 297 ci->ci_schedstate.spc_flags &= 298 ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS); 299 } 300 } 301 302 /* 303 * Fix up basic CPU topology info. Right now that means attach each CPU to 304 * circular lists of its siblings in the same core, and in the same package. 305 */ 306 void 307 cpu_topology_init(void) 308 { 309 CPU_INFO_ITERATOR cii, cii2; 310 struct cpu_info *ci, *ci2, *ci3; 311 u_int minsmt, mincore; 312 313 if (!cpu_topology_present) { 314 cpu_topology_fake(); 315 goto linkit; 316 } 317 318 /* Find siblings in same core and package. */ 319 for (CPU_INFO_FOREACH(cii, ci)) { 320 ci->ci_schedstate.spc_flags &= 321 ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS); 322 for (CPU_INFO_FOREACH(cii2, ci2)) { 323 /* Avoid bad things happening. */ 324 if (ci2->ci_package_id == ci->ci_package_id && 325 ci2->ci_core_id == ci->ci_core_id && 326 ci2->ci_smt_id == ci->ci_smt_id && 327 ci2 != ci) { 328 #ifdef DEBUG 329 printf("cpu%u %p pkg %u core %u smt %u same as " 330 "cpu%u %p pkg %u core %u smt %u\n", 331 cpu_index(ci), ci, ci->ci_package_id, 332 ci->ci_core_id, ci->ci_smt_id, 333 cpu_index(ci2), ci2, ci2->ci_package_id, 334 ci2->ci_core_id, ci2->ci_smt_id); 335 #endif 336 printf("cpu_topology_init: info bogus, " 337 "faking it\n"); 338 cpu_topology_fake(); 339 goto linkit; 340 } 341 if (ci2 == ci || 342 ci2->ci_package_id != ci->ci_package_id) { 343 continue; 344 } 345 /* Find CPUs in the same core. */ 346 if (ci->ci_nsibling[CPUREL_CORE] == 1 && 347 ci->ci_core_id == ci2->ci_core_id) { 348 cpu_topology_link(ci, ci2, CPUREL_CORE); 349 } 350 /* Find CPUs in the same package. */ 351 if (ci->ci_nsibling[CPUREL_PACKAGE] == 1) { 352 cpu_topology_link(ci, ci2, CPUREL_PACKAGE); 353 } 354 if (ci->ci_nsibling[CPUREL_CORE] > 1 && 355 ci->ci_nsibling[CPUREL_PACKAGE] > 1) { 356 break; 357 } 358 } 359 } 360 361 linkit: 362 /* Identify lowest numbered SMT in each core. */ 363 for (CPU_INFO_FOREACH(cii, ci)) { 364 ci2 = ci3 = ci; 365 minsmt = ci->ci_smt_id; 366 do { 367 if (ci2->ci_smt_id < minsmt) { 368 ci3 = ci2; 369 minsmt = ci2->ci_smt_id; 370 } 371 ci2 = ci2->ci_sibling[CPUREL_CORE]; 372 } while (ci2 != ci); 373 ci3->ci_schedstate.spc_flags |= SPCF_CORE1ST; 374 } 375 376 /* Identify lowest numbered SMT in each package. */ 377 ci3 = NULL; 378 for (CPU_INFO_FOREACH(cii, ci)) { 379 if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) == 0) { 380 continue; 381 } 382 ci2 = ci3 = ci; 383 mincore = ci->ci_core_id; 384 do { 385 if ((ci2->ci_schedstate.spc_flags & 386 SPCF_CORE1ST) != 0 && 387 ci2->ci_core_id < mincore) { 388 ci3 = ci2; 389 mincore = ci2->ci_core_id; 390 } 391 ci2 = ci2->ci_sibling[CPUREL_PACKAGE]; 392 } while (ci2 != ci); 393 394 if ((ci3->ci_schedstate.spc_flags & SPCF_PACKAGE1ST) != 0) { 395 /* Already identified - nothing more to do. */ 396 continue; 397 } 398 ci3->ci_schedstate.spc_flags |= SPCF_PACKAGE1ST; 399 400 /* Walk through all CPUs in package and point to first. */ 401 ci2 = ci3; 402 do { 403 ci2->ci_package1st = ci3; 404 ci2->ci_sibling[CPUREL_PACKAGE1ST] = ci3; 405 ci2 = ci2->ci_sibling[CPUREL_PACKAGE]; 406 } while (ci2 != ci3); 407 408 /* Now look for somebody else to link to. */ 409 for (CPU_INFO_FOREACH(cii2, ci2)) { 410 if ((ci2->ci_schedstate.spc_flags & SPCF_PACKAGE1ST) 411 != 0 && ci2 != ci3) { 412 cpu_topology_link(ci3, ci2, CPUREL_PACKAGE1ST); 413 break; 414 } 415 } 416 } 417 418 /* Walk through all packages, starting with value of ci3 from above. */ 419 KASSERT(ci3 != NULL); 420 ci = ci3; 421 do { 422 /* Walk through CPUs in the package and copy in PACKAGE1ST. */ 423 ci2 = ci; 424 do { 425 ci2->ci_sibling[CPUREL_PACKAGE1ST] = 426 ci->ci_sibling[CPUREL_PACKAGE1ST]; 427 ci2->ci_nsibling[CPUREL_PACKAGE1ST] = 428 ci->ci_nsibling[CPUREL_PACKAGE1ST]; 429 ci2 = ci2->ci_sibling[CPUREL_PACKAGE]; 430 } while (ci2 != ci); 431 ci = ci->ci_sibling[CPUREL_PACKAGE1ST]; 432 } while (ci != ci3); 433 434 if (cpu_topology_haveslow) { 435 /* 436 * For asymmetric systems where some CPUs are slower than 437 * others, mark first class CPUs for the scheduler. This 438 * conflicts with SMT right now so whinge if observed. 439 */ 440 if (curcpu()->ci_nsibling[CPUREL_CORE] > 1) { 441 printf("cpu_topology_init: asymmetric & SMT??\n"); 442 } 443 for (CPU_INFO_FOREACH(cii, ci)) { 444 if (!ci->ci_is_slow) { 445 ci->ci_schedstate.spc_flags |= SPCF_1STCLASS; 446 } 447 } 448 } else { 449 /* 450 * For any other configuration mark the 1st CPU in each 451 * core as a first class CPU. 452 */ 453 for (CPU_INFO_FOREACH(cii, ci)) { 454 if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) != 0) { 455 ci->ci_schedstate.spc_flags |= SPCF_1STCLASS; 456 } 457 } 458 } 459 460 cpu_topology_dump(); 461 } 462 463 /* 464 * Adjust one count, for a counter that's NOT updated from interrupt 465 * context. Hardly worth making an inline due to preemption stuff. 466 */ 467 void 468 cpu_count(enum cpu_count idx, int64_t delta) 469 { 470 lwp_t *l = curlwp; 471 KPREEMPT_DISABLE(l); 472 l->l_cpu->ci_counts[idx] += delta; 473 KPREEMPT_ENABLE(l); 474 } 475 476 /* 477 * Fetch fresh sum total for all counts. Expensive - don't call often. 478 * 479 * If poll is true, the caller is okay with less recent values (but 480 * no more than 1/hz seconds old). Where this is called very often that 481 * should be the case. 482 * 483 * This should be reasonably quick so that any value collected get isn't 484 * totally out of whack, and it can also be called from interrupt context, 485 * so go to splvm() while summing the counters. It's tempting to use a spin 486 * mutex here but this routine is called from DDB. 487 */ 488 void 489 cpu_count_sync(bool poll) 490 { 491 CPU_INFO_ITERATOR cii; 492 struct cpu_info *ci; 493 int64_t sum[CPU_COUNT_MAX], *ptr; 494 static int lasttick; 495 int curtick, s; 496 enum cpu_count i; 497 498 KASSERT(sizeof(ci->ci_counts) == sizeof(cpu_counts)); 499 500 if (__predict_false(!mp_online)) { 501 memcpy(cpu_counts, curcpu()->ci_counts, sizeof(cpu_counts)); 502 return; 503 } 504 505 s = splvm(); 506 curtick = getticks(); 507 if (poll && atomic_load_acquire(&lasttick) == curtick) { 508 splx(s); 509 return; 510 } 511 memset(sum, 0, sizeof(sum)); 512 curcpu()->ci_counts[CPU_COUNT_SYNC]++; 513 for (CPU_INFO_FOREACH(cii, ci)) { 514 ptr = ci->ci_counts; 515 for (i = 0; i < CPU_COUNT_MAX; i += 8) { 516 sum[i+0] += ptr[i+0]; 517 sum[i+1] += ptr[i+1]; 518 sum[i+2] += ptr[i+2]; 519 sum[i+3] += ptr[i+3]; 520 sum[i+4] += ptr[i+4]; 521 sum[i+5] += ptr[i+5]; 522 sum[i+6] += ptr[i+6]; 523 sum[i+7] += ptr[i+7]; 524 } 525 KASSERT(i == CPU_COUNT_MAX); 526 } 527 memcpy(cpu_counts, sum, sizeof(cpu_counts)); 528 atomic_store_release(&lasttick, curtick); 529 splx(s); 530 } 531