1 /* $NetBSD: subr_cpu.c,v 1.19 2023/07/08 13:59:05 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2007, 2008, 2009, 2010, 2012, 2019, 2020 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /*- 34 * Copyright (c)2007 YAMAMOTO Takashi, 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 46 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56 * SUCH DAMAGE. 57 */ 58 59 /* 60 * CPU related routines shared with rump. 61 */ 62 63 #include <sys/cdefs.h> 64 __KERNEL_RCSID(0, "$NetBSD: subr_cpu.c,v 1.19 2023/07/08 13:59:05 riastradh Exp $"); 65 66 #include <sys/param.h> 67 #include <sys/atomic.h> 68 #include <sys/systm.h> 69 #include <sys/sched.h> 70 #include <sys/conf.h> 71 #include <sys/cpu.h> 72 #include <sys/proc.h> 73 #include <sys/kernel.h> 74 #include <sys/kmem.h> 75 76 static void cpu_topology_fake1(struct cpu_info *); 77 78 kmutex_t cpu_lock __cacheline_aligned; 79 int ncpu __read_mostly; 80 int ncpuonline __read_mostly; 81 bool mp_online __read_mostly; 82 static bool cpu_topology_present __read_mostly; 83 static bool cpu_topology_haveslow __read_mostly; 84 int64_t cpu_counts[CPU_COUNT_MAX]; 85 86 /* An array of CPUs. There are ncpu entries. */ 87 struct cpu_info **cpu_infos __read_mostly; 88 89 /* Note: set on mi_cpu_attach() and idle_loop(). */ 90 kcpuset_t * kcpuset_attached __read_mostly = NULL; 91 kcpuset_t * kcpuset_running __read_mostly = NULL; 92 93 static char cpu_model[128]; 94 95 /* 96 * mi_cpu_init: early initialisation of MI CPU related structures. 97 * 98 * Note: may not block and memory allocator is not yet available. 99 */ 100 void 101 mi_cpu_init(void) 102 { 103 struct cpu_info *ci; 104 105 mutex_init(&cpu_lock, MUTEX_DEFAULT, IPL_NONE); 106 107 kcpuset_create(&kcpuset_attached, true); 108 kcpuset_create(&kcpuset_running, true); 109 kcpuset_set(kcpuset_running, 0); 110 111 ci = curcpu(); 112 cpu_topology_fake1(ci); 113 } 114 115 int 116 cpu_setmodel(const char *fmt, ...) 117 { 118 int len; 119 va_list ap; 120 121 va_start(ap, fmt); 122 len = vsnprintf(cpu_model, sizeof(cpu_model), fmt, ap); 123 va_end(ap); 124 return len; 125 } 126 127 const char * 128 cpu_getmodel(void) 129 { 130 return cpu_model; 131 } 132 133 bool 134 cpu_softintr_p(void) 135 { 136 137 return (curlwp->l_pflag & LP_INTR) != 0; 138 } 139 140 bool 141 curcpu_stable(void) 142 { 143 struct lwp *const l = curlwp; 144 const int pflag = l->l_pflag; 145 const int nopreempt = l->l_nopreempt; 146 147 /* 148 * - Softints (LP_INTR) never migrate between CPUs. 149 * - Bound lwps (LP_BOUND), either kthreads created bound to 150 * a CPU or any lwps bound with curlwp_bind, never migrate. 151 * - If kpreemption is disabled, the lwp can't migrate. 152 * - If we're in interrupt context, preemption is blocked. 153 * 154 * We combine the LP_INTR, LP_BOUND, and l_nopreempt test into 155 * a single predicted-true branch so this is cheap to assert in 156 * most contexts where it will be used, then fall back to 157 * calling the full kpreempt_disabled() and cpu_intr_p() as 158 * subroutines. 159 * 160 * XXX Is cpu_intr_p redundant with kpreempt_disabled? 161 */ 162 return __predict_true(((pflag & (LP_INTR|LP_BOUND)) | nopreempt) 163 != 0) || 164 kpreempt_disabled() || 165 cpu_intr_p(); 166 } 167 168 /* 169 * Collect CPU topology information as each CPU is attached. This can be 170 * called early during boot, so we need to be careful what we do. 171 */ 172 void 173 cpu_topology_set(struct cpu_info *ci, u_int package_id, u_int core_id, 174 u_int smt_id, u_int numa_id) 175 { 176 enum cpu_rel rel; 177 178 cpu_topology_present = true; 179 ci->ci_package_id = package_id; 180 ci->ci_core_id = core_id; 181 ci->ci_smt_id = smt_id; 182 ci->ci_numa_id = numa_id; 183 for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) { 184 ci->ci_sibling[rel] = ci; 185 ci->ci_nsibling[rel] = 1; 186 } 187 } 188 189 /* 190 * Collect CPU relative speed 191 */ 192 void 193 cpu_topology_setspeed(struct cpu_info *ci, bool slow) 194 { 195 196 cpu_topology_haveslow |= slow; 197 ci->ci_is_slow = slow; 198 } 199 200 /* 201 * Link a CPU into the given circular list. 202 */ 203 static void 204 cpu_topology_link(struct cpu_info *ci, struct cpu_info *ci2, enum cpu_rel rel) 205 { 206 struct cpu_info *ci3; 207 208 /* Walk to the end of the existing circular list and append. */ 209 for (ci3 = ci2;; ci3 = ci3->ci_sibling[rel]) { 210 ci3->ci_nsibling[rel]++; 211 if (ci3->ci_sibling[rel] == ci2) { 212 break; 213 } 214 } 215 ci->ci_sibling[rel] = ci2; 216 ci3->ci_sibling[rel] = ci; 217 ci->ci_nsibling[rel] = ci3->ci_nsibling[rel]; 218 } 219 220 /* 221 * Print out the topology lists. 222 */ 223 static void 224 cpu_topology_dump(void) 225 { 226 #ifdef DEBUG 227 CPU_INFO_ITERATOR cii; 228 struct cpu_info *ci, *ci2; 229 const char *names[] = { "core", "pkg", "1st" }; 230 enum cpu_rel rel; 231 int i; 232 233 CTASSERT(__arraycount(names) >= __arraycount(ci->ci_sibling)); 234 if (ncpu == 1) { 235 return; 236 } 237 238 for (CPU_INFO_FOREACH(cii, ci)) { 239 if (cpu_topology_haveslow) 240 printf("%s ", ci->ci_is_slow ? "slow" : "fast"); 241 for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) { 242 printf("%s has %d %s siblings:", cpu_name(ci), 243 ci->ci_nsibling[rel], names[rel]); 244 ci2 = ci->ci_sibling[rel]; 245 i = 0; 246 do { 247 printf(" %s", cpu_name(ci2)); 248 ci2 = ci2->ci_sibling[rel]; 249 } while (++i < 64 && ci2 != ci->ci_sibling[rel]); 250 if (i == 64) { 251 printf(" GAVE UP"); 252 } 253 printf("\n"); 254 } 255 printf("%s first in package: %s\n", cpu_name(ci), 256 cpu_name(ci->ci_package1st)); 257 } 258 #endif /* DEBUG */ 259 } 260 261 /* 262 * Fake up topology info if we have none, or if what we got was bogus. 263 * Used early in boot, and by cpu_topology_fake(). 264 */ 265 static void 266 cpu_topology_fake1(struct cpu_info *ci) 267 { 268 enum cpu_rel rel; 269 270 for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) { 271 ci->ci_sibling[rel] = ci; 272 ci->ci_nsibling[rel] = 1; 273 } 274 if (!cpu_topology_present) { 275 ci->ci_package_id = cpu_index(ci); 276 } 277 ci->ci_schedstate.spc_flags |= 278 (SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS); 279 ci->ci_package1st = ci; 280 if (!cpu_topology_haveslow) { 281 ci->ci_is_slow = false; 282 } 283 } 284 285 /* 286 * Fake up topology info if we have none, or if what we got was bogus. 287 * Don't override ci_package_id, etc, if cpu_topology_present is set. 288 * MD code also uses these. 289 */ 290 static void 291 cpu_topology_fake(void) 292 { 293 CPU_INFO_ITERATOR cii; 294 struct cpu_info *ci; 295 296 for (CPU_INFO_FOREACH(cii, ci)) { 297 cpu_topology_fake1(ci); 298 /* Undo (early boot) flag set so everything links OK. */ 299 ci->ci_schedstate.spc_flags &= 300 ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS); 301 } 302 } 303 304 /* 305 * Fix up basic CPU topology info. Right now that means attach each CPU to 306 * circular lists of its siblings in the same core, and in the same package. 307 */ 308 void 309 cpu_topology_init(void) 310 { 311 CPU_INFO_ITERATOR cii, cii2; 312 struct cpu_info *ci, *ci2, *ci3; 313 u_int minsmt, mincore; 314 315 if (!cpu_topology_present) { 316 cpu_topology_fake(); 317 goto linkit; 318 } 319 320 /* Find siblings in same core and package. */ 321 for (CPU_INFO_FOREACH(cii, ci)) { 322 ci->ci_schedstate.spc_flags &= 323 ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS); 324 for (CPU_INFO_FOREACH(cii2, ci2)) { 325 /* Avoid bad things happening. */ 326 if (ci2->ci_package_id == ci->ci_package_id && 327 ci2->ci_core_id == ci->ci_core_id && 328 ci2->ci_smt_id == ci->ci_smt_id && 329 ci2 != ci) { 330 #ifdef DEBUG 331 printf("cpu%u %p pkg %u core %u smt %u same as " 332 "cpu%u %p pkg %u core %u smt %u\n", 333 cpu_index(ci), ci, ci->ci_package_id, 334 ci->ci_core_id, ci->ci_smt_id, 335 cpu_index(ci2), ci2, ci2->ci_package_id, 336 ci2->ci_core_id, ci2->ci_smt_id); 337 #endif 338 printf("cpu_topology_init: info bogus, " 339 "faking it\n"); 340 cpu_topology_fake(); 341 goto linkit; 342 } 343 if (ci2 == ci || 344 ci2->ci_package_id != ci->ci_package_id) { 345 continue; 346 } 347 /* Find CPUs in the same core. */ 348 if (ci->ci_nsibling[CPUREL_CORE] == 1 && 349 ci->ci_core_id == ci2->ci_core_id) { 350 cpu_topology_link(ci, ci2, CPUREL_CORE); 351 } 352 /* Find CPUs in the same package. */ 353 if (ci->ci_nsibling[CPUREL_PACKAGE] == 1) { 354 cpu_topology_link(ci, ci2, CPUREL_PACKAGE); 355 } 356 if (ci->ci_nsibling[CPUREL_CORE] > 1 && 357 ci->ci_nsibling[CPUREL_PACKAGE] > 1) { 358 break; 359 } 360 } 361 } 362 363 linkit: 364 /* Identify lowest numbered SMT in each core. */ 365 for (CPU_INFO_FOREACH(cii, ci)) { 366 ci2 = ci3 = ci; 367 minsmt = ci->ci_smt_id; 368 do { 369 if (ci2->ci_smt_id < minsmt) { 370 ci3 = ci2; 371 minsmt = ci2->ci_smt_id; 372 } 373 ci2 = ci2->ci_sibling[CPUREL_CORE]; 374 } while (ci2 != ci); 375 ci3->ci_schedstate.spc_flags |= SPCF_CORE1ST; 376 } 377 378 /* Identify lowest numbered SMT in each package. */ 379 ci3 = NULL; 380 for (CPU_INFO_FOREACH(cii, ci)) { 381 if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) == 0) { 382 continue; 383 } 384 ci2 = ci3 = ci; 385 mincore = ci->ci_core_id; 386 do { 387 if ((ci2->ci_schedstate.spc_flags & 388 SPCF_CORE1ST) != 0 && 389 ci2->ci_core_id < mincore) { 390 ci3 = ci2; 391 mincore = ci2->ci_core_id; 392 } 393 ci2 = ci2->ci_sibling[CPUREL_PACKAGE]; 394 } while (ci2 != ci); 395 396 if ((ci3->ci_schedstate.spc_flags & SPCF_PACKAGE1ST) != 0) { 397 /* Already identified - nothing more to do. */ 398 continue; 399 } 400 ci3->ci_schedstate.spc_flags |= SPCF_PACKAGE1ST; 401 402 /* Walk through all CPUs in package and point to first. */ 403 ci2 = ci3; 404 do { 405 ci2->ci_package1st = ci3; 406 ci2->ci_sibling[CPUREL_PACKAGE1ST] = ci3; 407 ci2 = ci2->ci_sibling[CPUREL_PACKAGE]; 408 } while (ci2 != ci3); 409 410 /* Now look for somebody else to link to. */ 411 for (CPU_INFO_FOREACH(cii2, ci2)) { 412 if ((ci2->ci_schedstate.spc_flags & SPCF_PACKAGE1ST) 413 != 0 && ci2 != ci3) { 414 cpu_topology_link(ci3, ci2, CPUREL_PACKAGE1ST); 415 break; 416 } 417 } 418 } 419 420 /* Walk through all packages, starting with value of ci3 from above. */ 421 KASSERT(ci3 != NULL); 422 ci = ci3; 423 do { 424 /* Walk through CPUs in the package and copy in PACKAGE1ST. */ 425 ci2 = ci; 426 do { 427 ci2->ci_sibling[CPUREL_PACKAGE1ST] = 428 ci->ci_sibling[CPUREL_PACKAGE1ST]; 429 ci2->ci_nsibling[CPUREL_PACKAGE1ST] = 430 ci->ci_nsibling[CPUREL_PACKAGE1ST]; 431 ci2 = ci2->ci_sibling[CPUREL_PACKAGE]; 432 } while (ci2 != ci); 433 ci = ci->ci_sibling[CPUREL_PACKAGE1ST]; 434 } while (ci != ci3); 435 436 if (cpu_topology_haveslow) { 437 /* 438 * For asymmetric systems where some CPUs are slower than 439 * others, mark first class CPUs for the scheduler. This 440 * conflicts with SMT right now so whinge if observed. 441 */ 442 if (curcpu()->ci_nsibling[CPUREL_CORE] > 1) { 443 printf("cpu_topology_init: asymmetric & SMT??\n"); 444 } 445 for (CPU_INFO_FOREACH(cii, ci)) { 446 if (!ci->ci_is_slow) { 447 ci->ci_schedstate.spc_flags |= SPCF_1STCLASS; 448 } 449 } 450 } else { 451 /* 452 * For any other configuration mark the 1st CPU in each 453 * core as a first class CPU. 454 */ 455 for (CPU_INFO_FOREACH(cii, ci)) { 456 if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) != 0) { 457 ci->ci_schedstate.spc_flags |= SPCF_1STCLASS; 458 } 459 } 460 } 461 462 cpu_topology_dump(); 463 } 464 465 /* 466 * Adjust one count, for a counter that's NOT updated from interrupt 467 * context. Hardly worth making an inline due to preemption stuff. 468 */ 469 void 470 cpu_count(enum cpu_count idx, int64_t delta) 471 { 472 lwp_t *l = curlwp; 473 KPREEMPT_DISABLE(l); 474 l->l_cpu->ci_counts[idx] += delta; 475 KPREEMPT_ENABLE(l); 476 } 477 478 /* 479 * Fetch fresh sum total for all counts. Expensive - don't call often. 480 * 481 * If poll is true, the caller is okay with less recent values (but 482 * no more than 1/hz seconds old). Where this is called very often that 483 * should be the case. 484 * 485 * This should be reasonably quick so that any value collected get isn't 486 * totally out of whack, and it can also be called from interrupt context, 487 * so go to splvm() while summing the counters. It's tempting to use a spin 488 * mutex here but this routine is called from DDB. 489 */ 490 void 491 cpu_count_sync(bool poll) 492 { 493 CPU_INFO_ITERATOR cii; 494 struct cpu_info *ci; 495 int64_t sum[CPU_COUNT_MAX], *ptr; 496 static int lasttick; 497 int curtick, s; 498 enum cpu_count i; 499 500 KASSERT(sizeof(ci->ci_counts) == sizeof(cpu_counts)); 501 502 if (__predict_false(!mp_online)) { 503 memcpy(cpu_counts, curcpu()->ci_counts, sizeof(cpu_counts)); 504 return; 505 } 506 507 s = splvm(); 508 curtick = getticks(); 509 if (poll && atomic_load_acquire(&lasttick) == curtick) { 510 splx(s); 511 return; 512 } 513 memset(sum, 0, sizeof(sum)); 514 curcpu()->ci_counts[CPU_COUNT_SYNC]++; 515 for (CPU_INFO_FOREACH(cii, ci)) { 516 ptr = ci->ci_counts; 517 for (i = 0; i < CPU_COUNT_MAX; i += 8) { 518 sum[i+0] += ptr[i+0]; 519 sum[i+1] += ptr[i+1]; 520 sum[i+2] += ptr[i+2]; 521 sum[i+3] += ptr[i+3]; 522 sum[i+4] += ptr[i+4]; 523 sum[i+5] += ptr[i+5]; 524 sum[i+6] += ptr[i+6]; 525 sum[i+7] += ptr[i+7]; 526 } 527 KASSERT(i == CPU_COUNT_MAX); 528 } 529 memcpy(cpu_counts, sum, sizeof(cpu_counts)); 530 atomic_store_release(&lasttick, curtick); 531 splx(s); 532 } 533