xref: /netbsd-src/sys/kern/subr_cpu.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /*	$NetBSD: subr_cpu.c,v 1.17 2021/10/04 21:02:39 andvar Exp $	*/
2 
3 /*-
4  * Copyright (c) 2007, 2008, 2009, 2010, 2012, 2019, 2020
5  *     The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Andrew Doran.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*-
34  * Copyright (c)2007 YAMAMOTO Takashi,
35  * All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  *
46  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
47  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
50  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56  * SUCH DAMAGE.
57  */
58 
59 /*
60  * CPU related routines shared with rump.
61  */
62 
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: subr_cpu.c,v 1.17 2021/10/04 21:02:39 andvar Exp $");
65 
66 #include <sys/param.h>
67 #include <sys/atomic.h>
68 #include <sys/systm.h>
69 #include <sys/sched.h>
70 #include <sys/conf.h>
71 #include <sys/cpu.h>
72 #include <sys/proc.h>
73 #include <sys/kernel.h>
74 #include <sys/kmem.h>
75 
76 static void	cpu_topology_fake1(struct cpu_info *);
77 
78 kmutex_t	cpu_lock		__cacheline_aligned;
79 int		ncpu			__read_mostly;
80 int		ncpuonline		__read_mostly;
81 bool		mp_online		__read_mostly;
82 static bool	cpu_topology_present	__read_mostly;
83 static bool	cpu_topology_haveslow	__read_mostly;
84 int64_t		cpu_counts[CPU_COUNT_MAX];
85 
86 /* An array of CPUs.  There are ncpu entries. */
87 struct cpu_info **cpu_infos		__read_mostly;
88 
89 /* Note: set on mi_cpu_attach() and idle_loop(). */
90 kcpuset_t *	kcpuset_attached	__read_mostly	= NULL;
91 kcpuset_t *	kcpuset_running		__read_mostly	= NULL;
92 
93 static char cpu_model[128];
94 
95 /*
96  * mi_cpu_init: early initialisation of MI CPU related structures.
97  *
98  * Note: may not block and memory allocator is not yet available.
99  */
100 void
101 mi_cpu_init(void)
102 {
103 	struct cpu_info *ci;
104 
105 	mutex_init(&cpu_lock, MUTEX_DEFAULT, IPL_NONE);
106 
107 	kcpuset_create(&kcpuset_attached, true);
108 	kcpuset_create(&kcpuset_running, true);
109 	kcpuset_set(kcpuset_running, 0);
110 
111 	ci = curcpu();
112 	cpu_topology_fake1(ci);
113 }
114 
115 int
116 cpu_setmodel(const char *fmt, ...)
117 {
118 	int len;
119 	va_list ap;
120 
121 	va_start(ap, fmt);
122 	len = vsnprintf(cpu_model, sizeof(cpu_model), fmt, ap);
123 	va_end(ap);
124 	return len;
125 }
126 
127 const char *
128 cpu_getmodel(void)
129 {
130 	return cpu_model;
131 }
132 
133 bool
134 cpu_softintr_p(void)
135 {
136 
137 	return (curlwp->l_pflag & LP_INTR) != 0;
138 }
139 
140 /*
141  * Collect CPU topology information as each CPU is attached.  This can be
142  * called early during boot, so we need to be careful what we do.
143  */
144 void
145 cpu_topology_set(struct cpu_info *ci, u_int package_id, u_int core_id,
146     u_int smt_id, u_int numa_id)
147 {
148 	enum cpu_rel rel;
149 
150 	cpu_topology_present = true;
151 	ci->ci_package_id = package_id;
152 	ci->ci_core_id = core_id;
153 	ci->ci_smt_id = smt_id;
154 	ci->ci_numa_id = numa_id;
155 	for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
156 		ci->ci_sibling[rel] = ci;
157 		ci->ci_nsibling[rel] = 1;
158 	}
159 }
160 
161 /*
162  * Collect CPU relative speed
163  */
164 void
165 cpu_topology_setspeed(struct cpu_info *ci, bool slow)
166 {
167 
168 	cpu_topology_haveslow |= slow;
169 	ci->ci_is_slow = slow;
170 }
171 
172 /*
173  * Link a CPU into the given circular list.
174  */
175 static void
176 cpu_topology_link(struct cpu_info *ci, struct cpu_info *ci2, enum cpu_rel rel)
177 {
178 	struct cpu_info *ci3;
179 
180 	/* Walk to the end of the existing circular list and append. */
181 	for (ci3 = ci2;; ci3 = ci3->ci_sibling[rel]) {
182 		ci3->ci_nsibling[rel]++;
183 		if (ci3->ci_sibling[rel] == ci2) {
184 			break;
185 		}
186 	}
187 	ci->ci_sibling[rel] = ci2;
188 	ci3->ci_sibling[rel] = ci;
189 	ci->ci_nsibling[rel] = ci3->ci_nsibling[rel];
190 }
191 
192 /*
193  * Print out the topology lists.
194  */
195 static void
196 cpu_topology_dump(void)
197 {
198 #ifdef DEBUG
199 	CPU_INFO_ITERATOR cii;
200 	struct cpu_info *ci, *ci2;
201 	const char *names[] = { "core", "pkg", "1st" };
202 	enum cpu_rel rel;
203 	int i;
204 
205 	CTASSERT(__arraycount(names) >= __arraycount(ci->ci_sibling));
206 	if (ncpu == 1) {
207 		return;
208 	}
209 
210 	for (CPU_INFO_FOREACH(cii, ci)) {
211 		if (cpu_topology_haveslow)
212 			printf("%s ", ci->ci_is_slow ? "slow" : "fast");
213 		for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
214 			printf("%s has %d %s siblings:", cpu_name(ci),
215 			    ci->ci_nsibling[rel], names[rel]);
216 			ci2 = ci->ci_sibling[rel];
217 			i = 0;
218 			do {
219 				printf(" %s", cpu_name(ci2));
220 				ci2 = ci2->ci_sibling[rel];
221 			} while (++i < 64 && ci2 != ci->ci_sibling[rel]);
222 			if (i == 64) {
223 				printf(" GAVE UP");
224 			}
225 			printf("\n");
226 		}
227 		printf("%s first in package: %s\n", cpu_name(ci),
228 		    cpu_name(ci->ci_package1st));
229 	}
230 #endif	/* DEBUG */
231 }
232 
233 /*
234  * Fake up topology info if we have none, or if what we got was bogus.
235  * Used early in boot, and by cpu_topology_fake().
236  */
237 static void
238 cpu_topology_fake1(struct cpu_info *ci)
239 {
240 	enum cpu_rel rel;
241 
242 	for (rel = 0; rel < __arraycount(ci->ci_sibling); rel++) {
243 		ci->ci_sibling[rel] = ci;
244 		ci->ci_nsibling[rel] = 1;
245 	}
246 	if (!cpu_topology_present) {
247 		ci->ci_package_id = cpu_index(ci);
248 	}
249 	ci->ci_schedstate.spc_flags |=
250 	    (SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
251 	ci->ci_package1st = ci;
252 	if (!cpu_topology_haveslow) {
253 		ci->ci_is_slow = false;
254 	}
255 }
256 
257 /*
258  * Fake up topology info if we have none, or if what we got was bogus.
259  * Don't override ci_package_id, etc, if cpu_topology_present is set.
260  * MD code also uses these.
261  */
262 static void
263 cpu_topology_fake(void)
264 {
265 	CPU_INFO_ITERATOR cii;
266 	struct cpu_info *ci;
267 
268 	for (CPU_INFO_FOREACH(cii, ci)) {
269 		cpu_topology_fake1(ci);
270 		/* Undo (early boot) flag set so everything links OK. */
271 		ci->ci_schedstate.spc_flags &=
272 		    ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
273 	}
274 }
275 
276 /*
277  * Fix up basic CPU topology info.  Right now that means attach each CPU to
278  * circular lists of its siblings in the same core, and in the same package.
279  */
280 void
281 cpu_topology_init(void)
282 {
283 	CPU_INFO_ITERATOR cii, cii2;
284 	struct cpu_info *ci, *ci2, *ci3;
285 	u_int minsmt, mincore;
286 
287 	if (!cpu_topology_present) {
288 		cpu_topology_fake();
289 		goto linkit;
290 	}
291 
292 	/* Find siblings in same core and package. */
293 	for (CPU_INFO_FOREACH(cii, ci)) {
294 		ci->ci_schedstate.spc_flags &=
295 		    ~(SPCF_CORE1ST | SPCF_PACKAGE1ST | SPCF_1STCLASS);
296 		for (CPU_INFO_FOREACH(cii2, ci2)) {
297 			/* Avoid bad things happening. */
298 			if (ci2->ci_package_id == ci->ci_package_id &&
299 			    ci2->ci_core_id == ci->ci_core_id &&
300 			    ci2->ci_smt_id == ci->ci_smt_id &&
301 			    ci2 != ci) {
302 #ifdef DEBUG
303 				printf("cpu%u %p pkg %u core %u smt %u same as "
304 				       "cpu%u %p pkg %u core %u smt %u\n",
305 				       cpu_index(ci), ci, ci->ci_package_id,
306 				       ci->ci_core_id, ci->ci_smt_id,
307 				       cpu_index(ci2), ci2, ci2->ci_package_id,
308 				       ci2->ci_core_id, ci2->ci_smt_id);
309 #endif
310 			    	printf("cpu_topology_init: info bogus, "
311 			    	    "faking it\n");
312 			    	cpu_topology_fake();
313 			    	goto linkit;
314 			}
315 			if (ci2 == ci ||
316 			    ci2->ci_package_id != ci->ci_package_id) {
317 				continue;
318 			}
319 			/* Find CPUs in the same core. */
320 			if (ci->ci_nsibling[CPUREL_CORE] == 1 &&
321 			    ci->ci_core_id == ci2->ci_core_id) {
322 			    	cpu_topology_link(ci, ci2, CPUREL_CORE);
323 			}
324 			/* Find CPUs in the same package. */
325 			if (ci->ci_nsibling[CPUREL_PACKAGE] == 1) {
326 			    	cpu_topology_link(ci, ci2, CPUREL_PACKAGE);
327 			}
328 			if (ci->ci_nsibling[CPUREL_CORE] > 1 &&
329 			    ci->ci_nsibling[CPUREL_PACKAGE] > 1) {
330 				break;
331 			}
332 		}
333 	}
334 
335  linkit:
336 	/* Identify lowest numbered SMT in each core. */
337 	for (CPU_INFO_FOREACH(cii, ci)) {
338 		ci2 = ci3 = ci;
339 		minsmt = ci->ci_smt_id;
340 		do {
341 			if (ci2->ci_smt_id < minsmt) {
342 				ci3 = ci2;
343 				minsmt = ci2->ci_smt_id;
344 			}
345 			ci2 = ci2->ci_sibling[CPUREL_CORE];
346 		} while (ci2 != ci);
347 		ci3->ci_schedstate.spc_flags |= SPCF_CORE1ST;
348 	}
349 
350 	/* Identify lowest numbered SMT in each package. */
351 	ci3 = NULL;
352 	for (CPU_INFO_FOREACH(cii, ci)) {
353 		if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) == 0) {
354 			continue;
355 		}
356 		ci2 = ci3 = ci;
357 		mincore = ci->ci_core_id;
358 		do {
359 			if ((ci2->ci_schedstate.spc_flags &
360 			    SPCF_CORE1ST) != 0 &&
361 			    ci2->ci_core_id < mincore) {
362 				ci3 = ci2;
363 				mincore = ci2->ci_core_id;
364 			}
365 			ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
366 		} while (ci2 != ci);
367 
368 		if ((ci3->ci_schedstate.spc_flags & SPCF_PACKAGE1ST) != 0) {
369 			/* Already identified - nothing more to do. */
370 			continue;
371 		}
372 		ci3->ci_schedstate.spc_flags |= SPCF_PACKAGE1ST;
373 
374 		/* Walk through all CPUs in package and point to first. */
375 		ci2 = ci3;
376 		do {
377 			ci2->ci_package1st = ci3;
378 			ci2->ci_sibling[CPUREL_PACKAGE1ST] = ci3;
379 			ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
380 		} while (ci2 != ci3);
381 
382 		/* Now look for somebody else to link to. */
383 		for (CPU_INFO_FOREACH(cii2, ci2)) {
384 			if ((ci2->ci_schedstate.spc_flags & SPCF_PACKAGE1ST)
385 			    != 0 && ci2 != ci3) {
386 			    	cpu_topology_link(ci3, ci2, CPUREL_PACKAGE1ST);
387 			    	break;
388 			}
389 		}
390 	}
391 
392 	/* Walk through all packages, starting with value of ci3 from above. */
393 	KASSERT(ci3 != NULL);
394 	ci = ci3;
395 	do {
396 		/* Walk through CPUs in the package and copy in PACKAGE1ST. */
397 		ci2 = ci;
398 		do {
399 			ci2->ci_sibling[CPUREL_PACKAGE1ST] =
400 			    ci->ci_sibling[CPUREL_PACKAGE1ST];
401 			ci2->ci_nsibling[CPUREL_PACKAGE1ST] =
402 			    ci->ci_nsibling[CPUREL_PACKAGE1ST];
403 			ci2 = ci2->ci_sibling[CPUREL_PACKAGE];
404 		} while (ci2 != ci);
405 		ci = ci->ci_sibling[CPUREL_PACKAGE1ST];
406 	} while (ci != ci3);
407 
408 	if (cpu_topology_haveslow) {
409 		/*
410 		 * For asymmetric systems where some CPUs are slower than
411 		 * others, mark first class CPUs for the scheduler.  This
412 		 * conflicts with SMT right now so whinge if observed.
413 		 */
414 		if (curcpu()->ci_nsibling[CPUREL_CORE] > 1) {
415 			printf("cpu_topology_init: asymmetric & SMT??\n");
416 		}
417 		for (CPU_INFO_FOREACH(cii, ci)) {
418 			if (!ci->ci_is_slow) {
419 				ci->ci_schedstate.spc_flags |= SPCF_1STCLASS;
420 			}
421 		}
422 	} else {
423 		/*
424 		 * For any other configuration mark the 1st CPU in each
425 		 * core as a first class CPU.
426 		 */
427 		for (CPU_INFO_FOREACH(cii, ci)) {
428 			if ((ci->ci_schedstate.spc_flags & SPCF_CORE1ST) != 0) {
429 				ci->ci_schedstate.spc_flags |= SPCF_1STCLASS;
430 			}
431 		}
432 	}
433 
434 	cpu_topology_dump();
435 }
436 
437 /*
438  * Adjust one count, for a counter that's NOT updated from interrupt
439  * context.  Hardly worth making an inline due to preemption stuff.
440  */
441 void
442 cpu_count(enum cpu_count idx, int64_t delta)
443 {
444 	lwp_t *l = curlwp;
445 	KPREEMPT_DISABLE(l);
446 	l->l_cpu->ci_counts[idx] += delta;
447 	KPREEMPT_ENABLE(l);
448 }
449 
450 /*
451  * Fetch fresh sum total for all counts.  Expensive - don't call often.
452  *
453  * If poll is true, the caller is okay with with less recent values (but
454  * no more than 1/hz seconds old).  Where this is called very often that
455  * should be the case.
456  *
457  * This should be reasonably quick so that any value collected get isn't
458  * totally out of whack, and it can also be called from interrupt context,
459  * so go to splvm() while summing the counters.  It's tempting to use a spin
460  * mutex here but this routine is called from DDB.
461  */
462 void
463 cpu_count_sync(bool poll)
464 {
465 	CPU_INFO_ITERATOR cii;
466 	struct cpu_info *ci;
467 	int64_t sum[CPU_COUNT_MAX], *ptr;
468 	static int lasttick;
469 	int curtick, s;
470 	enum cpu_count i;
471 
472 	KASSERT(sizeof(ci->ci_counts) == sizeof(cpu_counts));
473 
474 	if (__predict_false(!mp_online)) {
475 		memcpy(cpu_counts, curcpu()->ci_counts, sizeof(cpu_counts));
476 		return;
477 	}
478 
479 	s = splvm();
480 	curtick = getticks();
481 	if (poll && atomic_load_acquire(&lasttick) == curtick) {
482 		splx(s);
483 		return;
484 	}
485 	memset(sum, 0, sizeof(sum));
486 	curcpu()->ci_counts[CPU_COUNT_SYNC]++;
487 	for (CPU_INFO_FOREACH(cii, ci)) {
488 		ptr = ci->ci_counts;
489 		for (i = 0; i < CPU_COUNT_MAX; i += 8) {
490 			sum[i+0] += ptr[i+0];
491 			sum[i+1] += ptr[i+1];
492 			sum[i+2] += ptr[i+2];
493 			sum[i+3] += ptr[i+3];
494 			sum[i+4] += ptr[i+4];
495 			sum[i+5] += ptr[i+5];
496 			sum[i+6] += ptr[i+6];
497 			sum[i+7] += ptr[i+7];
498 		}
499 		KASSERT(i == CPU_COUNT_MAX);
500 	}
501 	memcpy(cpu_counts, sum, sizeof(cpu_counts));
502 	atomic_store_release(&lasttick, curtick);
503 	splx(s);
504 }
505