1 /* $NetBSD: kern_lock.c,v 1.188 2024/01/14 11:46:05 andvar Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2020, 2023
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10 * NASA Ames Research Center, and by Andrew Doran.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.188 2024/01/14 11:46:05 andvar Exp $");
36
37 #ifdef _KERNEL_OPT
38 #include "opt_lockdebug.h"
39 #endif
40
41 #include <sys/param.h>
42 #include <sys/proc.h>
43 #include <sys/lock.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/lockdebug.h>
47 #include <sys/cpu.h>
48 #include <sys/syslog.h>
49 #include <sys/atomic.h>
50 #include <sys/lwp.h>
51 #include <sys/pserialize.h>
52
53 #if defined(DIAGNOSTIC) && !defined(LOCKDEBUG)
54 #include <sys/ksyms.h>
55 #endif
56
57 #include <machine/lock.h>
58
59 #include <dev/lockstat.h>
60
61 #define RETURN_ADDRESS (uintptr_t)__builtin_return_address(0)
62
63 bool kernel_lock_dodebug;
64
65 __cpu_simple_lock_t kernel_lock[CACHE_LINE_SIZE / sizeof(__cpu_simple_lock_t)]
66 __cacheline_aligned;
67
68 void
assert_sleepable(void)69 assert_sleepable(void)
70 {
71 const char *reason;
72 long pctr;
73 bool idle;
74
75 if (__predict_false(panicstr != NULL)) {
76 return;
77 }
78
79 LOCKDEBUG_BARRIER(kernel_lock, 1);
80
81 /*
82 * Avoid disabling/re-enabling preemption here since this
83 * routine may be called in delicate situations.
84 */
85 do {
86 pctr = lwp_pctr();
87 idle = CURCPU_IDLE_P();
88 } while (__predict_false(pctr != lwp_pctr()));
89
90 reason = NULL;
91 if (__predict_false(idle) && !cold) {
92 reason = "idle";
93 goto panic;
94 }
95 if (__predict_false(cpu_intr_p())) {
96 reason = "interrupt";
97 goto panic;
98 }
99 if (__predict_false(cpu_softintr_p())) {
100 reason = "softint";
101 goto panic;
102 }
103 if (__predict_false(!pserialize_not_in_read_section())) {
104 reason = "pserialize";
105 goto panic;
106 }
107 return;
108
109 panic: panic("%s: %s caller=%p", __func__, reason, (void *)RETURN_ADDRESS);
110 }
111
112 /*
113 * Functions for manipulating the kernel_lock. We put them here
114 * so that they show up in profiles.
115 */
116
117 #define _KERNEL_LOCK_ABORT(msg) \
118 LOCKDEBUG_ABORT(__func__, __LINE__, kernel_lock, &_kernel_lock_ops, msg)
119
120 #ifdef LOCKDEBUG
121 #define _KERNEL_LOCK_ASSERT(cond) \
122 do { \
123 if (!(cond)) \
124 _KERNEL_LOCK_ABORT("assertion failed: " #cond); \
125 } while (/* CONSTCOND */ 0)
126 #else
127 #define _KERNEL_LOCK_ASSERT(cond) /* nothing */
128 #endif
129
130 static void _kernel_lock_dump(const volatile void *, lockop_printer_t);
131
132 lockops_t _kernel_lock_ops = {
133 .lo_name = "Kernel lock",
134 .lo_type = LOCKOPS_SPIN,
135 .lo_dump = _kernel_lock_dump,
136 };
137
138 #ifdef LOCKDEBUG
139
140 #ifdef DDB
141 #include <ddb/ddb.h>
142 #endif
143
144 static void
kernel_lock_trace_ipi(void * cookie)145 kernel_lock_trace_ipi(void *cookie)
146 {
147
148 printf("%s[%d %s]: hogging kernel lock\n", cpu_name(curcpu()),
149 curlwp->l_lid,
150 curlwp->l_name ? curlwp->l_name : curproc->p_comm);
151 #ifdef DDB
152 db_stacktrace();
153 #endif
154 }
155
156 #endif
157
158 /*
159 * Initialize the kernel lock.
160 */
161 void
kernel_lock_init(void)162 kernel_lock_init(void)
163 {
164
165 __cpu_simple_lock_init(kernel_lock);
166 kernel_lock_dodebug = LOCKDEBUG_ALLOC(kernel_lock, &_kernel_lock_ops,
167 RETURN_ADDRESS);
168 }
169 CTASSERT(CACHE_LINE_SIZE >= sizeof(__cpu_simple_lock_t));
170
171 /*
172 * Print debugging information about the kernel lock.
173 */
174 static void
_kernel_lock_dump(const volatile void * junk,lockop_printer_t pr)175 _kernel_lock_dump(const volatile void *junk, lockop_printer_t pr)
176 {
177 struct cpu_info *ci = curcpu();
178
179 (void)junk;
180
181 pr("curcpu holds : %18d wanted by: %#018lx\n",
182 ci->ci_biglock_count, (long)ci->ci_biglock_wanted);
183 }
184
185 /*
186 * Acquire 'nlocks' holds on the kernel lock.
187 *
188 * Although it may not look it, this is one of the most central, intricate
189 * routines in the kernel, and tons of code elsewhere depends on its exact
190 * behaviour. If you change something in here, expect it to bite you in the
191 * rear.
192 */
193 void
_kernel_lock(int nlocks)194 _kernel_lock(int nlocks)
195 {
196 struct cpu_info *ci;
197 LOCKSTAT_TIMER(spintime);
198 LOCKSTAT_FLAG(lsflag);
199 struct lwp *owant;
200 #ifdef LOCKDEBUG
201 static struct cpu_info *kernel_lock_holder;
202 u_int spins = 0;
203 u_int starttime = getticks();
204 #endif
205 int s;
206 struct lwp *l = curlwp;
207
208 _KERNEL_LOCK_ASSERT(nlocks > 0);
209
210 s = splvm();
211 ci = curcpu();
212 if (ci->ci_biglock_count != 0) {
213 _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock));
214 ci->ci_biglock_count += nlocks;
215 l->l_blcnt += nlocks;
216 splx(s);
217 return;
218 }
219
220 _KERNEL_LOCK_ASSERT(l->l_blcnt == 0);
221 LOCKDEBUG_WANTLOCK(kernel_lock_dodebug, kernel_lock, RETURN_ADDRESS,
222 0);
223
224 if (__predict_true(__cpu_simple_lock_try(kernel_lock))) {
225 #ifdef LOCKDEBUG
226 kernel_lock_holder = curcpu();
227 #endif
228 ci->ci_biglock_count = nlocks;
229 l->l_blcnt = nlocks;
230 LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL,
231 RETURN_ADDRESS, 0);
232 splx(s);
233 return;
234 }
235
236 /*
237 * To remove the ordering constraint between adaptive mutexes
238 * and kernel_lock we must make it appear as if this thread is
239 * blocking. For non-interlocked mutex release, a store fence
240 * is required to ensure that the result of any mutex_exit()
241 * by the current LWP becomes visible on the bus before the set
242 * of ci->ci_biglock_wanted becomes visible.
243 *
244 * This membar_producer matches the membar_consumer in
245 * mutex_vector_enter.
246 *
247 * That way, if l has just released a mutex, mutex_vector_enter
248 * can't see this store ci->ci_biglock_wanted := l until it
249 * will also see the mutex_exit store mtx->mtx_owner := 0 which
250 * clears the has-waiters bit.
251 */
252 membar_producer();
253 owant = ci->ci_biglock_wanted;
254 atomic_store_relaxed(&ci->ci_biglock_wanted, l);
255 #if defined(DIAGNOSTIC) && !defined(LOCKDEBUG)
256 l->l_ld_wanted = __builtin_return_address(0);
257 #endif
258
259 /*
260 * Spin until we acquire the lock. Once we have it, record the
261 * time spent with lockstat.
262 */
263 LOCKSTAT_ENTER(lsflag);
264 LOCKSTAT_START_TIMER(lsflag, spintime);
265
266 do {
267 splx(s);
268 while (__SIMPLELOCK_LOCKED_P(kernel_lock)) {
269 #ifdef LOCKDEBUG
270 if (SPINLOCK_SPINOUT(spins) && start_init_exec &&
271 (getticks() - starttime) > 10*hz) {
272 ipi_msg_t msg = {
273 .func = kernel_lock_trace_ipi,
274 };
275 kpreempt_disable();
276 ipi_unicast(&msg, kernel_lock_holder);
277 ipi_wait(&msg);
278 kpreempt_enable();
279 _KERNEL_LOCK_ABORT("spinout");
280 }
281 #endif
282 SPINLOCK_BACKOFF_HOOK;
283 SPINLOCK_SPIN_HOOK;
284 }
285 s = splvm();
286 } while (!__cpu_simple_lock_try(kernel_lock));
287
288 ci->ci_biglock_count = nlocks;
289 l->l_blcnt = nlocks;
290 LOCKSTAT_STOP_TIMER(lsflag, spintime);
291 LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, NULL,
292 RETURN_ADDRESS, 0);
293 if (owant == NULL) {
294 LOCKSTAT_EVENT_RA(lsflag, kernel_lock,
295 LB_KERNEL_LOCK | LB_SPIN, 1, spintime, RETURN_ADDRESS);
296 }
297 LOCKSTAT_EXIT(lsflag);
298 splx(s);
299
300 /*
301 * Now that we have kernel_lock, reset ci_biglock_wanted. This
302 * store must be visible on other CPUs before a mutex_exit() on
303 * this CPU can test the has-waiters bit.
304 *
305 * This membar_enter matches the membar_enter in
306 * mutex_vector_enter. (Yes, not membar_exit -- the legacy
307 * naming is confusing, but store-before-load usually pairs
308 * with store-before-load, in the extremely rare cases where it
309 * is used at all.)
310 *
311 * That way, mutex_vector_enter can't see this store
312 * ci->ci_biglock_wanted := owant until it has set the
313 * has-waiters bit.
314 */
315 (void)atomic_swap_ptr(&ci->ci_biglock_wanted, owant);
316 #ifndef __HAVE_ATOMIC_AS_MEMBAR
317 membar_enter();
318 #endif
319
320 #ifdef LOCKDEBUG
321 kernel_lock_holder = curcpu();
322 #endif
323 }
324
325 /*
326 * Release 'nlocks' holds on the kernel lock. If 'nlocks' is zero, release
327 * all holds.
328 */
329 void
_kernel_unlock(int nlocks,int * countp)330 _kernel_unlock(int nlocks, int *countp)
331 {
332 struct cpu_info *ci;
333 u_int olocks;
334 int s;
335 struct lwp *l = curlwp;
336
337 _KERNEL_LOCK_ASSERT(nlocks < 2);
338
339 olocks = l->l_blcnt;
340
341 if (olocks == 0) {
342 _KERNEL_LOCK_ASSERT(nlocks <= 0);
343 if (countp != NULL)
344 *countp = 0;
345 return;
346 }
347
348 _KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock));
349
350 if (nlocks == 0)
351 nlocks = olocks;
352 else if (nlocks == -1) {
353 nlocks = 1;
354 _KERNEL_LOCK_ASSERT(olocks == 1);
355 }
356 s = splvm();
357 ci = curcpu();
358 _KERNEL_LOCK_ASSERT(ci->ci_biglock_count >= l->l_blcnt);
359 if (ci->ci_biglock_count == nlocks) {
360 LOCKDEBUG_UNLOCKED(kernel_lock_dodebug, kernel_lock,
361 RETURN_ADDRESS, 0);
362 ci->ci_biglock_count = 0;
363 __cpu_simple_unlock(kernel_lock);
364 l->l_blcnt -= nlocks;
365 splx(s);
366 if (l->l_dopreempt)
367 kpreempt(0);
368 } else {
369 ci->ci_biglock_count -= nlocks;
370 l->l_blcnt -= nlocks;
371 splx(s);
372 }
373
374 if (countp != NULL)
375 *countp = olocks;
376 }
377
378 bool
_kernel_locked_p(void)379 _kernel_locked_p(void)
380 {
381 return __SIMPLELOCK_LOCKED_P(kernel_lock);
382 }
383