xref: /dflybsd-src/sys/kern/kern_lock.c (revision d9d67b5976be7caf272382a562fdbf8906f7811b)
1 /*
2  * Copyright (c) 1995
3  *	The Regents of the University of California.  All rights reserved.
4  * Copyright (C) 1997
5  *	John S. Dyson.  All rights reserved.
6  * Copyright (C) 2013-2014
7  *	Matthew Dillon, All rights reserved.
8  *
9  * This code contains ideas from software contributed to Berkeley by
10  * Avadis Tevanian, Jr., Michael Wayne Young, and the Mach Operating
11  * System project at Carnegie-Mellon University.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #include "opt_lint.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/proc.h>
44 #include <sys/lock.h>
45 #include <sys/sysctl.h>
46 #include <sys/spinlock.h>
47 #include <sys/thread2.h>
48 #include <sys/spinlock2.h>
49 #include <sys/indefinite2.h>
50 
51 static void undo_upreq(struct lock *lkp);
52 
53 #ifdef DEBUG_CANCEL_LOCKS
54 
55 static int sysctl_cancel_lock(SYSCTL_HANDLER_ARGS);
56 static int sysctl_cancel_test(SYSCTL_HANDLER_ARGS);
57 
58 static struct lock cancel_lk;
59 LOCK_SYSINIT(cancellk, &cancel_lk, "cancel", 0);
60 SYSCTL_PROC(_kern, OID_AUTO, cancel_lock, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
61 	    sysctl_cancel_lock, "I", "test cancelable locks");
62 SYSCTL_PROC(_kern, OID_AUTO, cancel_test, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
63 	    sysctl_cancel_test, "I", "test cancelable locks");
64 
65 #endif
66 
67 int lock_test_mode;
68 SYSCTL_INT(_debug, OID_AUTO, lock_test_mode, CTLFLAG_RW,
69 	   &lock_test_mode, 0, "");
70 
71 /*
72  * Locking primitives implementation.
73  * Locks provide shared/exclusive sychronization.
74  */
75 
76 #ifdef DEBUG_LOCKS
77 #define COUNT(td, x) (td)->td_locks += (x)
78 #else
79 #define COUNT(td, x)
80 #endif
81 
82 static int lockmgr_waitupgrade(struct lock *lkp, u_int flags);
83 
84 /*
85  * Helper, assert basic conditions
86  */
87 static __inline void
88 _lockmgr_assert(struct lock *lkp, u_int flags)
89 {
90 	if (mycpu->gd_intr_nesting_level &&
91 	    (flags & LK_NOWAIT) == 0 &&
92 	    (flags & LK_TYPE_MASK) != LK_RELEASE &&
93 	    panic_cpu_gd != mycpu
94 	) {
95 #ifndef DEBUG_LOCKS
96 		panic("lockmgr %s from %p: called from interrupt, ipi, "
97 		      "or hard code section",
98 		      lkp->lk_wmesg, ((int **)&lkp)[-1]);
99 #else
100 		panic("lockmgr %s from %s:%d: called from interrupt, ipi, "
101 		      "or hard code section",
102 		      lkp->lk_wmesg, file, line);
103 #endif
104 	}
105 
106 #ifdef DEBUG_LOCKS
107 	if (mycpu->gd_spinlocks && ((flags & LK_NOWAIT) == 0)) {
108 		panic("lockmgr %s from %s:%d: called with %d spinlocks held",
109 		      lkp->lk_wmesg, file, line, mycpu->gd_spinlocks);
110 	}
111 #endif
112 }
113 
114 /*
115  * Acquire a shared lock
116  */
117 int
118 lockmgr_shared(struct lock *lkp, u_int flags)
119 {
120 	uint32_t extflags;
121 	thread_t td;
122 	int count;
123 	int error;
124 	int pflags;
125 	int wflags;
126 	int timo;
127 
128 	_lockmgr_assert(lkp, flags);
129 	extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
130 	td = curthread;
131 	error = 0;
132 	count = lkp->lk_count;
133 
134 	for (;;) {
135 		cpu_ccfence();
136 
137 		/*
138 		 * Normal case
139 		 */
140 		if ((count & (LKC_EXREQ|LKC_UPREQ|LKC_EXCL)) == 0) {
141 			if (atomic_fcmpset_int(&lkp->lk_count,
142 					       &count, count + 1)) {
143 				COUNT(td, 1);
144 				break;
145 			}
146 			continue;
147 		}
148 
149 		/*
150 		 * If the caller already holds the lock exclusively then
151 		 * we silently obtain another count on the exclusive lock.
152 		 *
153 		 * WARNING!  The old FreeBSD behavior was to downgrade,
154 		 *	     but this creates a problem when recursions
155 		 *	     return to the caller and the caller expects
156 		 *	     its original exclusive lock to remain exclusively
157 		 *	     locked.
158 		 */
159 		if (lkp->lk_lockholder == td) {
160 			KKASSERT(count & LKC_EXCL);
161 			if ((extflags & LK_CANRECURSE) == 0) {
162 				if (extflags & LK_NOWAIT) {
163 					error = EBUSY;
164 					break;
165 				}
166 				panic("lockmgr: locking against myself");
167 			}
168 			atomic_add_int(&lkp->lk_count, 1);
169 			COUNT(td, 1);
170 			break;
171 		}
172 
173 		/*
174 		 * Slow path
175 		 */
176 		pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
177 		timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
178 		wflags = (td->td_flags & TDF_DEADLKTREAT) ?
179 				LKC_EXCL : (LKC_EXCL|LKC_EXREQ|LKC_UPREQ);
180 
181 		/*
182 		 * Block while the lock is held exclusively or, conditionally,
183 		 * if other threads are trying to obtain an exclusive lock or
184 		 * upgrade to one.
185 		 */
186 		if (count & wflags) {
187 			if (extflags & LK_CANCELABLE) {
188 				if (count & LKC_CANCEL) {
189 					error = ENOLCK;
190 					break;
191 				}
192 			}
193 			if (extflags & LK_NOWAIT) {
194 				error = EBUSY;
195 				break;
196 			}
197 
198 			if ((extflags & LK_NOCOLLSTATS) == 0) {
199 				indefinite_info_t info;
200 
201 				flags |= LK_NOCOLLSTATS;
202 				indefinite_init(&info, lkp->lk_wmesg, 1, 'l');
203 				error = lockmgr_shared(lkp, flags);
204 				indefinite_done(&info);
205 				break;
206 			}
207 
208 			tsleep_interlock(lkp, pflags);
209 			if (!atomic_fcmpset_int(&lkp->lk_count, &count,
210 					        count | LKC_SHREQ)) {
211 				continue;
212 			}
213 			error = tsleep(lkp, pflags | PINTERLOCKED,
214 				       lkp->lk_wmesg, timo);
215 			if (error)
216 				break;
217 			if (extflags & LK_SLEEPFAIL) {
218 				error = ENOLCK;
219 				break;
220 			}
221 			continue;
222 		}
223 
224 		/*
225 		 * Otherwise we can bump the count
226 		 */
227 		if (atomic_fcmpset_int(&lkp->lk_count, &count, count + 1)) {
228 			COUNT(td, 1);
229 			break;
230 		}
231 		/* retry */
232 	}
233 	return error;
234 }
235 
236 /*
237  * Acquire an exclusive lock
238  */
239 int
240 lockmgr_exclusive(struct lock *lkp, u_int flags)
241 {
242 	uint32_t extflags;
243 	thread_t td;
244 	int count;
245 	int error;
246 	int pflags;
247 	int timo;
248 
249 	_lockmgr_assert(lkp, flags);
250 	extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
251 	td = curthread;
252 
253 	error = 0;
254 	count = lkp->lk_count;
255 
256 	for (;;) {
257 		cpu_ccfence();
258 
259 		/*
260 		 * Exclusive lock critical path.
261 		 */
262 		if (count == 0) {
263 			if (atomic_fcmpset_int(&lkp->lk_count, &count,
264 					       LKC_EXCL | (count + 1))) {
265 				lkp->lk_lockholder = td;
266 				COUNT(td, 1);
267 				break;
268 			}
269 			continue;
270 		}
271 
272 		/*
273 		 * Recursive lock if we already hold it exclusively.
274 		 */
275 		if (lkp->lk_lockholder == td) {
276 			KKASSERT(count & LKC_EXCL);
277 			if ((extflags & LK_CANRECURSE) == 0) {
278 				if (extflags & LK_NOWAIT) {
279 					error = EBUSY;
280 					break;
281 				}
282 				panic("lockmgr: locking against myself");
283 			}
284 			atomic_add_int(&lkp->lk_count, 1);
285 			COUNT(td, 1);
286 			break;
287 		}
288 
289 		/*
290 		 * We will block, handle LK_NOWAIT
291 		 */
292 		if (extflags & LK_NOWAIT) {
293 			error = EBUSY;
294 			break;
295 		}
296 		if (extflags & LK_CANCELABLE) {
297 			if (count & LKC_CANCEL) {
298 				error = ENOLCK;
299 				break;
300 			}
301 		}
302 
303 		if ((extflags & LK_NOCOLLSTATS) == 0) {
304 			indefinite_info_t info;
305 
306 			flags |= LK_NOCOLLSTATS;
307 			indefinite_init(&info, lkp->lk_wmesg, 1, 'L');
308 			error = lockmgr_exclusive(lkp, flags);
309 			indefinite_done(&info);
310 			break;
311 		}
312 
313 		/*
314 		 * Wait until we can obtain the exclusive lock.  EXREQ is
315 		 * automatically cleared when all current holders release
316 		 * so if we abort the operation we can safely leave it set.
317 		 * There might be other exclusive requesters.
318 		 */
319 		pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
320 		timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
321 
322 		tsleep_interlock(lkp, pflags);
323 		if (!atomic_fcmpset_int(&lkp->lk_count, &count,
324 					count | LKC_EXREQ)) {
325 			continue;
326 		}
327 
328 		error = tsleep(lkp, pflags | PINTERLOCKED,
329 			       lkp->lk_wmesg, timo);
330 		if (error)
331 			break;
332 		if (extflags & LK_SLEEPFAIL) {
333 			error = ENOLCK;
334 			break;
335 		}
336 		/* retry */
337 	}
338 	return error;
339 }
340 
341 /*
342  * Downgrade an exclusive lock to shared
343  */
344 int
345 lockmgr_downgrade(struct lock *lkp, u_int flags)
346 {
347 	uint32_t extflags;
348 	thread_t otd;
349 	thread_t td;
350 	int count;
351 
352 	extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
353 	td = curthread;
354 	count = lkp->lk_count;
355 
356 	for (;;) {
357 		cpu_ccfence();
358 
359 		/*
360 		 * Downgrade an exclusive lock into a shared lock.  All
361 		 * counts on a recursive exclusive lock become shared.
362 		 *
363 		 * This function always succeeds.
364 		 */
365 		if (lkp->lk_lockholder != td ||
366 		    (count & (LKC_EXCL|LKC_MASK)) != (LKC_EXCL|1)) {
367 			panic("lockmgr: not holding exclusive lock");
368 		}
369 
370 		/*
371 		 * NOTE! Must NULL-out lockholder before releasing LKC_EXCL.
372 		 */
373 		otd = lkp->lk_lockholder;
374 		lkp->lk_lockholder = NULL;
375 		if (atomic_fcmpset_int(&lkp->lk_count, &count,
376 				       count & ~(LKC_EXCL|LKC_SHREQ))) {
377 			if (count & LKC_SHREQ)
378 				wakeup(lkp);
379 			break;
380 		}
381 		lkp->lk_lockholder = otd;
382 		/* retry */
383 	}
384 	return 0;
385 }
386 
387 /*
388  * Upgrade a shared lock to exclusive.  If LK_EXCLUPGRADE then guarantee
389  * that no other exclusive requester can get in front of us and fail
390  * immediately if another upgrade is pending.
391  */
392 int
393 lockmgr_upgrade(struct lock *lkp, u_int flags)
394 {
395 	uint32_t extflags;
396 	thread_t td;
397 	int count;
398 	int error;
399 	int pflags;
400 	int wflags;
401 	int timo;
402 
403 	_lockmgr_assert(lkp, flags);
404 	extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
405 	td = curthread;
406 	error = 0;
407 	count = lkp->lk_count;
408 
409 	for (;;) {
410 		cpu_ccfence();
411 
412 		/*
413 		 * Upgrade from a single shared lock to an exclusive lock.
414 		 *
415 		 * If another process is ahead of us to get an upgrade,
416 		 * then we want to fail rather than have an intervening
417 		 * exclusive access.  The shared lock is released on
418 		 * failure.
419 		 */
420 		if ((flags & LK_TYPE_MASK) == LK_EXCLUPGRADE) {
421 			if (count & LKC_UPREQ) {
422 				lockmgr_release(lkp, LK_RELEASE);
423 				error = EBUSY;
424 				break;
425 			}
426 		}
427 		/* fall through into normal upgrade */
428 
429 		/*
430 		 * Upgrade a shared lock to an exclusive one.  This can cause
431 		 * the lock to be temporarily released and stolen by other
432 		 * threads.  LK_SLEEPFAIL or LK_NOWAIT may be used to detect
433 		 * this case, or use LK_EXCLUPGRADE.
434 		 *
435 		 * If the lock is already exclusively owned by us, this
436 		 * operation is a NOP.
437 		 *
438 		 * If we return an error (even NOWAIT), the current lock will
439 		 * be released.
440 		 *
441 		 * Start with the critical path.
442 		 */
443 		if ((count & (LKC_UPREQ|LKC_EXCL|LKC_MASK)) == 1) {
444 			if (atomic_fcmpset_int(&lkp->lk_count, &count,
445 					       count | LKC_EXCL)) {
446 				lkp->lk_lockholder = td;
447 				break;
448 			}
449 			continue;
450 		}
451 
452 		/*
453 		 * We own a lock coming into this, so there cannot be an
454 		 * UPGRANT already flagged.
455 		 */
456 		KKASSERT((count & LKC_UPGRANT) == 0);
457 
458 		/*
459 		 * If we already hold the lock exclusively this operation
460 		 * succeeds and is a NOP.
461 		 */
462 		if (count & LKC_EXCL) {
463 			if (lkp->lk_lockholder == td)
464 				break;
465 			panic("lockmgr: upgrade unowned lock");
466 		}
467 		if ((count & LKC_MASK) == 0)
468 			panic("lockmgr: upgrade unowned lock");
469 
470 		/*
471 		 * We cannot upgrade without blocking at this point.
472 		 */
473 		if (extflags & LK_NOWAIT) {
474 			lockmgr_release(lkp, LK_RELEASE);
475 			error = EBUSY;
476 			break;
477 		}
478 		if (extflags & LK_CANCELABLE) {
479 			if (count & LKC_CANCEL) {
480 				error = ENOLCK;
481 				break;
482 			}
483 		}
484 
485 		if ((extflags & LK_NOCOLLSTATS) == 0) {
486 			indefinite_info_t info;
487 
488 			flags |= LK_NOCOLLSTATS;
489 			indefinite_init(&info, lkp->lk_wmesg, 1, 'U');
490 			error = lockmgr_upgrade(lkp, flags);
491 			indefinite_done(&info);
492 			break;
493 		}
494 
495 		/*
496 		 * Release the shared lock and request the upgrade.
497 		 */
498 		pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
499 		timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
500 		tsleep_interlock(lkp, pflags);
501 		wflags = (count & LKC_UPREQ) ? LKC_EXREQ : LKC_UPREQ;
502 
503 		/*
504 		 * If someone else owns UPREQ and this transition would
505 		 * allow it to be granted, we have to grant it.  Our
506 		 * lock count is transfered (we effectively release).
507 		 * We will then request a normal exclusive lock.
508 		 *
509 		 * Otherwise we release the shared lock and either do
510 		 * an UPREQ or an EXREQ.  The count is always > 1 in
511 		 * this case since we handle all other count == 1
512 		 * situations here and above.
513 		 */
514 		if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1)) {
515 			wflags |= LKC_EXCL | LKC_UPGRANT;
516 			wflags |= count;
517 			wflags &= ~LKC_UPREQ;	/* was set from count */
518 		} else {
519 			wflags |= (count - 1);
520 		}
521 
522 		if (atomic_fcmpset_int(&lkp->lk_count, &count, wflags)) {
523 			COUNT(td, -1);
524 
525 			/*
526 			 * Must wakeup the thread granted the upgrade.
527 			 */
528 			if ((count & (LKC_UPREQ|LKC_MASK)) == (LKC_UPREQ | 1))
529 				wakeup(lkp);
530 
531 			error = tsleep(lkp, pflags | PINTERLOCKED,
532 				       lkp->lk_wmesg, timo);
533 			if (error) {
534 				if ((count & LKC_UPREQ) == 0)
535 					undo_upreq(lkp);
536 				break;
537 			}
538 			if (extflags & LK_SLEEPFAIL) {
539 				if ((count & LKC_UPREQ) == 0)
540 					undo_upreq(lkp);
541 				error = ENOLCK;
542 				break;
543 			}
544 
545 			/*
546 			 * Refactor to either LK_EXCLUSIVE or LK_WAITUPGRADE,
547 			 * depending on whether we were able to acquire the
548 			 * LKC_UPREQ bit.
549 			 */
550 			if (count & LKC_UPREQ)
551 				error = lockmgr_exclusive(lkp, flags);
552 			else
553 				error = lockmgr_waitupgrade(lkp, flags);
554 			break;
555 		}
556 		/* retry */
557 	}
558 	return error;
559 }
560 
561 /*
562  * (internal helper)
563  */
564 static int
565 lockmgr_waitupgrade(struct lock *lkp, u_int flags)
566 {
567 	uint32_t extflags;
568 	thread_t td;
569 	int count;
570 	int error;
571 	int pflags;
572 	int timo;
573 
574 	extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
575 	td = curthread;
576 	error = 0;
577 	count = lkp->lk_count;
578 
579 	for (;;) {
580 		cpu_ccfence();
581 
582 		/*
583 		 * We own the LKC_UPREQ bit, wait until we are granted the
584 		 * exclusive lock (LKC_UPGRANT is set).
585 		 *
586 		 * IF THE OPERATION FAILS (tsleep error tsleep+LK_SLEEPFAIL),
587 		 * we have to undo the upgrade request and clean up any lock
588 		 * that might have been granted via a race.
589 		 */
590 		if (count & LKC_UPGRANT) {
591 			if (atomic_fcmpset_int(&lkp->lk_count, &count,
592 					       count & ~LKC_UPGRANT)) {
593 				lkp->lk_lockholder = td;
594 				KKASSERT(count & LKC_EXCL);
595 				break;
596 			}
597 			/* retry */
598 		} else if ((count & LKC_CANCEL) && (extflags & LK_CANCELABLE)) {
599 			undo_upreq(lkp);
600 			error = ENOLCK;
601 			break;
602 		} else {
603 			pflags = (extflags & LK_PCATCH) ? PCATCH : 0;
604 			timo = (extflags & LK_TIMELOCK) ? lkp->lk_timo : 0;
605 			tsleep_interlock(lkp, pflags);
606 			if (atomic_fcmpset_int(&lkp->lk_count, &count, count)) {
607 				error = tsleep(lkp, pflags | PINTERLOCKED,
608 					       lkp->lk_wmesg, timo);
609 				if (error) {
610 					undo_upreq(lkp);
611 					break;
612 				}
613 				if (extflags & LK_SLEEPFAIL) {
614 					error = ENOLCK;
615 					undo_upreq(lkp);
616 					break;
617 				}
618 			}
619 			/* retry */
620 		}
621 		/* retry */
622 	}
623 	return error;
624 }
625 
626 /*
627  * Release a held lock
628  */
629 int
630 lockmgr_release(struct lock *lkp, u_int flags)
631 {
632 	uint32_t extflags;
633 	thread_t otd;
634 	thread_t td;
635 	int count;
636 
637 	extflags = (flags | lkp->lk_flags) & LK_EXTFLG_MASK;
638 	td = curthread;
639 	count = lkp->lk_count;
640 
641 	for (;;) {
642 		cpu_ccfence();
643 
644 		/*
645 		 * Release the currently held lock.  If releasing the current
646 		 * lock as part of an error return, error will ALREADY be
647 		 * non-zero.
648 		 *
649 		 * When releasing the last lock we automatically transition
650 		 * LKC_UPREQ to LKC_EXCL|1.
651 		 *
652 		 * WARNING! We cannot detect when there are multiple exclusive
653 		 *	    requests pending.  We clear EXREQ unconditionally
654 		 *	    on the 1->0 transition so it is possible for
655 		 *	    shared requests to race the next exclusive
656 		 *	    request.
657 		 *
658 		 * WAERNING! lksleep() assumes that LK_RELEASE does not
659 		 *	    block.
660 		 *
661 		 * Always succeeds.
662 		 */
663 		if ((count & LKC_MASK) == 0)
664 			panic("lockmgr: LK_RELEASE: no lock held");
665 
666 		if (count & LKC_EXCL) {
667 			if (lkp->lk_lockholder != LK_KERNTHREAD &&
668 			    lkp->lk_lockholder != td) {
669 				panic("lockmgr: pid %d, not exlusive "
670 				      "lock holder thr %p/%p unlocking",
671 				    (td->td_proc ? td->td_proc->p_pid : -1),
672 				    td, lkp->lk_lockholder);
673 			}
674 			if ((count & (LKC_UPREQ|LKC_MASK)) == 1) {
675 				/*
676 				 * Last exclusive count is being released
677 				 */
678 				otd = lkp->lk_lockholder;
679 				lkp->lk_lockholder = NULL;
680 				if (!atomic_fcmpset_int(&lkp->lk_count,
681 							&count,
682 					      (count - 1) &
683 					   ~(LKC_EXCL | LKC_EXREQ |
684 					     LKC_SHREQ| LKC_CANCEL))) {
685 					lkp->lk_lockholder = otd;
686 					continue;
687 				}
688 				if (count & (LKC_EXREQ|LKC_SHREQ))
689 					wakeup(lkp);
690 				/* success */
691 			} else if ((count & (LKC_UPREQ|LKC_MASK)) ==
692 				   (LKC_UPREQ | 1)) {
693 				/*
694 				 * Last exclusive count is being released but
695 				 * an upgrade request is present, automatically
696 				 * grant an exclusive state to the owner of
697 				 * the upgrade request.
698 				 */
699 				otd = lkp->lk_lockholder;
700 				lkp->lk_lockholder = NULL;
701 				if (!atomic_fcmpset_int(&lkp->lk_count,
702 							&count,
703 						(count & ~LKC_UPREQ) |
704 						LKC_UPGRANT)) {
705 					lkp->lk_lockholder = otd;
706 					continue;
707 				}
708 				wakeup(lkp);
709 				/* success */
710 			} else {
711 				otd = lkp->lk_lockholder;
712 				if (!atomic_fcmpset_int(&lkp->lk_count,
713 							&count,
714 							count - 1)) {
715 					continue;
716 				}
717 				/* success */
718 			}
719 			/* success */
720 			if (otd != LK_KERNTHREAD)
721 				COUNT(td, -1);
722 		} else {
723 			if ((count & (LKC_UPREQ|LKC_MASK)) == 1) {
724 				/*
725 				 * Last shared count is being released,
726 				 * no upgrade request present.
727 				 */
728 				if (!atomic_fcmpset_int(&lkp->lk_count,
729 							&count,
730 					      (count - 1) &
731 					       ~(LKC_EXREQ | LKC_SHREQ |
732 						 LKC_CANCEL))) {
733 					continue;
734 				}
735 				if (count & (LKC_EXREQ|LKC_SHREQ))
736 					wakeup(lkp);
737 				/* success */
738 			} else if ((count & (LKC_UPREQ|LKC_MASK)) ==
739 				   (LKC_UPREQ | 1)) {
740 				/*
741 				 * Last shared count is being released but
742 				 * an upgrade request is present, automatically
743 				 * grant an exclusive state to the owner of
744 				 * the upgrade request.  Masked count
745 				 * remains 1.
746 				 */
747 				if (!atomic_fcmpset_int(&lkp->lk_count,
748 							&count,
749 					      (count & ~(LKC_UPREQ |
750 							 LKC_CANCEL)) |
751 					      LKC_EXCL | LKC_UPGRANT)) {
752 					continue;
753 				}
754 				wakeup(lkp);
755 			} else {
756 				/*
757 				 * Shared count is greater than 1, just
758 				 * decrement it by one.
759 				 */
760 				if (!atomic_fcmpset_int(&lkp->lk_count,
761 							&count,
762 							count - 1)) {
763 					continue;
764 				}
765 			}
766 			/* success */
767 			COUNT(td, -1);
768 		}
769 		break;
770 	}
771 	return 0;
772 }
773 
774 /*
775  * Start canceling blocked requesters or later requestors.
776  * Only blocked requesters using CANCELABLE can be canceled.
777  *
778  * This is intended to then allow other requesters (usually the
779  * caller) to obtain a non-cancelable lock.
780  *
781  * Don't waste time issuing a wakeup if nobody is pending.
782  */
783 int
784 lockmgr_cancel_beg(struct lock *lkp, u_int flags)
785 {
786 	int count;
787 
788 	count = lkp->lk_count;
789 	for (;;) {
790 		cpu_ccfence();
791 
792 		KKASSERT((count & LKC_CANCEL) == 0);	/* disallowed case */
793 		KKASSERT((count & LKC_MASK) != 0);	/* issue w/lock held */
794 		if (!atomic_fcmpset_int(&lkp->lk_count,
795 				        &count, count | LKC_CANCEL)) {
796 			continue;
797 		}
798 		if (count & (LKC_EXREQ|LKC_SHREQ|LKC_UPREQ)) {
799 			wakeup(lkp);
800 		}
801 		break;
802 	}
803 	return 0;
804 }
805 
806 /*
807  * End our cancel request (typically after we have acquired
808  * the lock ourselves).
809  */
810 int
811 lockmgr_cancel_end(struct lock *lkp, u_int flags)
812 {
813 	atomic_clear_int(&lkp->lk_count, LKC_CANCEL);
814 
815 	return 0;
816 }
817 
818 /*
819  * Undo an upgrade request
820  */
821 static
822 void
823 undo_upreq(struct lock *lkp)
824 {
825 	int count;
826 
827 	count = lkp->lk_count;
828 	for (;;) {
829 		cpu_ccfence();
830 
831 		if (count & LKC_UPGRANT) {
832 			/*
833 			 * UPREQ was shifted to UPGRANT.  We own UPGRANT now,
834 			 * another thread might own UPREQ.  Clear UPGRANT
835 			 * and release the granted lock.
836 			 */
837 			if (atomic_fcmpset_int(&lkp->lk_count, &count,
838 					       count & ~LKC_UPGRANT)) {
839 				lkp->lk_lockholder = curthread;
840 				lockmgr(lkp, LK_RELEASE);
841 				break;
842 			}
843 		} else if (count & LKC_EXCL) {
844 			/*
845 			 * Clear the UPREQ we still own.  Nobody to wakeup
846 			 * here because there is an existing exclusive
847 			 * holder.
848 			 */
849 			KKASSERT(count & LKC_UPREQ);
850 			KKASSERT((count & LKC_MASK) > 0);
851 			if (atomic_fcmpset_int(&lkp->lk_count, &count,
852 					       count & ~LKC_UPREQ)) {
853 				wakeup(lkp);
854 				break;
855 			}
856 		} else if (count & LKC_EXREQ) {
857 			/*
858 			 * Clear the UPREQ we still own.  We cannot wakeup any
859 			 * shared waiters because there is an exclusive
860 			 * request pending.
861 			 */
862 			KKASSERT(count & LKC_UPREQ);
863 			KKASSERT((count & LKC_MASK) > 0);
864 			if (atomic_fcmpset_int(&lkp->lk_count, &count,
865 					       count & ~LKC_UPREQ)) {
866 				break;
867 			}
868 		} else {
869 			/*
870 			 * Clear the UPREQ we still own.  Wakeup any shared
871 			 * waiters.
872 			 */
873 			KKASSERT(count & LKC_UPREQ);
874 			KKASSERT((count & LKC_MASK) > 0);
875 			if (atomic_fcmpset_int(&lkp->lk_count, &count,
876 					       count &
877 					       ~(LKC_UPREQ | LKC_SHREQ))) {
878 				if (count & LKC_SHREQ)
879 					wakeup(lkp);
880 				break;
881 			}
882 		}
883 		/* retry */
884 	}
885 }
886 
887 void
888 lockmgr_kernproc(struct lock *lp)
889 {
890 	struct thread *td __debugvar = curthread;
891 
892 	if (lp->lk_lockholder != LK_KERNTHREAD) {
893 		KASSERT(lp->lk_lockholder == td,
894 		    ("lockmgr_kernproc: lock not owned by curthread %p: %p",
895 		    td, lp->lk_lockholder));
896 		lp->lk_lockholder = LK_KERNTHREAD;
897 		COUNT(td, -1);
898 	}
899 }
900 
901 /*
902  * Initialize a lock; required before use.
903  */
904 void
905 lockinit(struct lock *lkp, const char *wmesg, int timo, int flags)
906 {
907 	lkp->lk_flags = (flags & LK_EXTFLG_MASK);
908 	lkp->lk_count = 0;
909 	lkp->lk_wmesg = wmesg;
910 	lkp->lk_timo = timo;
911 	lkp->lk_lockholder = LK_NOTHREAD;
912 }
913 
914 /*
915  * Reinitialize a lock that is being reused for a different purpose, but
916  * which may have pending (blocked) threads sitting on it.  The caller
917  * must already hold the interlock.
918  */
919 void
920 lockreinit(struct lock *lkp, const char *wmesg, int timo, int flags)
921 {
922 	lkp->lk_wmesg = wmesg;
923 	lkp->lk_timo = timo;
924 }
925 
926 /*
927  * De-initialize a lock.  The structure must no longer be used by anyone.
928  */
929 void
930 lockuninit(struct lock *lkp)
931 {
932 	KKASSERT((lkp->lk_count & (LKC_EXREQ|LKC_SHREQ|LKC_UPREQ)) == 0);
933 }
934 
935 /*
936  * Determine the status of a lock.
937  */
938 int
939 lockstatus(struct lock *lkp, struct thread *td)
940 {
941 	int lock_type = 0;
942 	int count;
943 
944 	count = lkp->lk_count;
945 	cpu_ccfence();
946 
947 	if (count & LKC_EXCL) {
948 		if (td == NULL || lkp->lk_lockholder == td)
949 			lock_type = LK_EXCLUSIVE;
950 		else
951 			lock_type = LK_EXCLOTHER;
952 	} else if (count & LKC_MASK) {
953 		lock_type = LK_SHARED;
954 	}
955 	return (lock_type);
956 }
957 
958 /*
959  * Return non-zero if the caller owns the lock shared or exclusive.
960  * We can only guess re: shared locks.
961  */
962 int
963 lockowned(struct lock *lkp)
964 {
965 	thread_t td = curthread;
966 	int count;
967 
968 	count = lkp->lk_count;
969 	cpu_ccfence();
970 
971 	if (count & LKC_EXCL)
972 		return(lkp->lk_lockholder == td);
973 	else
974 		return((count & LKC_MASK) != 0);
975 }
976 
977 /*
978  * Determine the number of holders of a lock.
979  *
980  * The non-blocking version can usually be used for assertions.
981  */
982 int
983 lockcount(struct lock *lkp)
984 {
985 	return(lkp->lk_count & LKC_MASK);
986 }
987 
988 int
989 lockcountnb(struct lock *lkp)
990 {
991 	return(lkp->lk_count & LKC_MASK);
992 }
993 
994 /*
995  * Print out information about state of a lock. Used by VOP_PRINT
996  * routines to display status about contained locks.
997  */
998 void
999 lockmgr_printinfo(struct lock *lkp)
1000 {
1001 	struct thread *td = lkp->lk_lockholder;
1002 	struct proc *p;
1003 	int count;
1004 
1005 	count = lkp->lk_count;
1006 	cpu_ccfence();
1007 
1008 	if (td && td != LK_KERNTHREAD && td != LK_NOTHREAD)
1009 		p = td->td_proc;
1010 	else
1011 		p = NULL;
1012 
1013 	if (count & LKC_EXCL) {
1014 		kprintf(" lock type %s: EXCLUS (count %08x) by td %p pid %d",
1015 		    lkp->lk_wmesg, count, td,
1016 		    p ? p->p_pid : -99);
1017 	} else if (count & LKC_MASK) {
1018 		kprintf(" lock type %s: SHARED (count %08x)",
1019 		    lkp->lk_wmesg, count);
1020 	} else {
1021 		kprintf(" lock type %s: NOTHELD", lkp->lk_wmesg);
1022 	}
1023 	if (count & (LKC_EXREQ|LKC_SHREQ))
1024 		kprintf(" with waiters\n");
1025 	else
1026 		kprintf("\n");
1027 }
1028 
1029 void
1030 lock_sysinit(struct lock_args *arg)
1031 {
1032 	lockinit(arg->la_lock, arg->la_desc, 0, arg->la_flags);
1033 }
1034 
1035 #ifdef DEBUG_CANCEL_LOCKS
1036 
1037 static
1038 int
1039 sysctl_cancel_lock(SYSCTL_HANDLER_ARGS)
1040 {
1041 	int error;
1042 
1043 	if (req->newptr) {
1044 		SYSCTL_XUNLOCK();
1045 		lockmgr(&cancel_lk, LK_EXCLUSIVE);
1046 		error = tsleep(&error, PCATCH, "canmas", hz * 5);
1047 		lockmgr(&cancel_lk, LK_CANCEL_BEG);
1048 		error = tsleep(&error, PCATCH, "canmas", hz * 5);
1049 		lockmgr(&cancel_lk, LK_RELEASE);
1050 		SYSCTL_XLOCK();
1051 		SYSCTL_OUT(req, &error, sizeof(error));
1052 	}
1053 	error = 0;
1054 
1055 	return error;
1056 }
1057 
1058 static
1059 int
1060 sysctl_cancel_test(SYSCTL_HANDLER_ARGS)
1061 {
1062 	int error;
1063 
1064 	if (req->newptr) {
1065 		error = lockmgr(&cancel_lk, LK_EXCLUSIVE|LK_CANCELABLE);
1066 		if (error == 0)
1067 			lockmgr(&cancel_lk, LK_RELEASE);
1068 		SYSCTL_OUT(req, &error, sizeof(error));
1069 		kprintf("test %d\n", error);
1070 	}
1071 
1072 	return 0;
1073 }
1074 
1075 #endif
1076