xref: /dflybsd-src/sys/kern/lwkt_token.c (revision 837afe1aaee7bf67a7757900dc47ec41a47314d9)
1 /*
2  * Copyright (c) 2003,2004,2009 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * lwkt_token - Implement soft token locks.
37  *
38  * Tokens are locks which serialize a thread only while the thread is
39  * running.  If the thread blocks all tokens are released, then reacquired
40  * when the thread resumes.
41  *
42  * This implementation requires no critical sections or spin locks, but
43  * does use atomic_cmpset_ptr().
44  *
45  * Tokens may be recursively acquired by the same thread.  However the
46  * caller must be sure to release such tokens in reverse order.
47  */
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/kernel.h>
51 #include <sys/proc.h>
52 #include <sys/rtprio.h>
53 #include <sys/queue.h>
54 #include <sys/sysctl.h>
55 #include <sys/ktr.h>
56 #include <sys/kthread.h>
57 #include <machine/cpu.h>
58 #include <sys/lock.h>
59 #include <sys/spinlock.h>
60 
61 #include <sys/thread2.h>
62 #include <sys/spinlock2.h>
63 #include <sys/mplock2.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_param.h>
67 #include <vm/vm_kern.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_map.h>
71 #include <vm/vm_pager.h>
72 #include <vm/vm_extern.h>
73 #include <vm/vm_zone.h>
74 
75 #include <machine/stdarg.h>
76 #include <machine/smp.h>
77 
78 #include "opt_ddb.h"
79 #ifdef DDB
80 #include <ddb/ddb.h>
81 #endif
82 
83 extern int lwkt_sched_debug;
84 
85 #define TOKEN_PRIME		66555444443333333ULL
86 
87 #ifndef LWKT_NUM_POOL_TOKENS
88 #define LWKT_NUM_POOL_TOKENS	4096	/* power of 2 */
89 #endif
90 
91 struct lwkt_pool_token {
92 	struct lwkt_token	token;
93 } __cachealign;
94 
95 static struct lwkt_pool_token	pool_tokens[LWKT_NUM_POOL_TOKENS];
96 struct spinlock tok_debug_spin = SPINLOCK_INITIALIZER(&tok_debug_spin,
97 						      "tok_debug_spin");
98 
99 #define TOKEN_STRING	"REF=%p TOK=%p TD=%p"
100 #define TOKEN_ARGS	lwkt_tokref_t ref, lwkt_token_t tok, struct thread *td
101 #define CONTENDED_STRING	TOKEN_STRING " (contention started)"
102 #define UNCONTENDED_STRING	TOKEN_STRING " (contention stopped)"
103 #if !defined(KTR_TOKENS)
104 #define	KTR_TOKENS	KTR_ALL
105 #endif
106 
107 KTR_INFO_MASTER(tokens);
108 KTR_INFO(KTR_TOKENS, tokens, fail, 0, TOKEN_STRING, TOKEN_ARGS);
109 KTR_INFO(KTR_TOKENS, tokens, succ, 1, TOKEN_STRING, TOKEN_ARGS);
110 #if 0
111 KTR_INFO(KTR_TOKENS, tokens, release, 2, TOKEN_STRING, TOKEN_ARGS);
112 KTR_INFO(KTR_TOKENS, tokens, remote, 3, TOKEN_STRING, TOKEN_ARGS);
113 KTR_INFO(KTR_TOKENS, tokens, reqremote, 4, TOKEN_STRING, TOKEN_ARGS);
114 KTR_INFO(KTR_TOKENS, tokens, reqfail, 5, TOKEN_STRING, TOKEN_ARGS);
115 KTR_INFO(KTR_TOKENS, tokens, drain, 6, TOKEN_STRING, TOKEN_ARGS);
116 KTR_INFO(KTR_TOKENS, tokens, contention_start, 7, CONTENDED_STRING, TOKEN_ARGS);
117 KTR_INFO(KTR_TOKENS, tokens, contention_stop, 7, UNCONTENDED_STRING, TOKEN_ARGS);
118 #endif
119 
120 #define logtoken(name, ref)						\
121 	KTR_LOG(tokens_ ## name, ref, ref->tr_tok, curthread)
122 
123 /*
124  * Global tokens.  These replace the MP lock for major subsystem locking.
125  * These tokens are initially used to lockup both global and individual
126  * operations.
127  *
128  * Once individual structures get their own locks these tokens are used
129  * only to protect global lists & other variables and to interlock
130  * allocations and teardowns and such.
131  *
132  * The UP initializer causes token acquisition to also acquire the MP lock
133  * for maximum compatibility.  The feature may be enabled and disabled at
134  * any time, the MP state is copied to the tokref when the token is acquired
135  * and will not race against sysctl changes.
136  */
137 struct lwkt_token mp_token = LWKT_TOKEN_INITIALIZER(mp_token);
138 struct lwkt_token pmap_token = LWKT_TOKEN_INITIALIZER(pmap_token);
139 struct lwkt_token dev_token = LWKT_TOKEN_INITIALIZER(dev_token);
140 struct lwkt_token vm_token = LWKT_TOKEN_INITIALIZER(vm_token);
141 struct lwkt_token vmspace_token = LWKT_TOKEN_INITIALIZER(vmspace_token);
142 struct lwkt_token kvm_token = LWKT_TOKEN_INITIALIZER(kvm_token);
143 struct lwkt_token sigio_token = LWKT_TOKEN_INITIALIZER(sigio_token);
144 struct lwkt_token tty_token = LWKT_TOKEN_INITIALIZER(tty_token);
145 struct lwkt_token vnode_token = LWKT_TOKEN_INITIALIZER(vnode_token);
146 
147 static int lwkt_token_spin = 5;
148 SYSCTL_INT(_lwkt, OID_AUTO, token_spin, CTLFLAG_RW,
149     &lwkt_token_spin, 0, "Decontention spin loops");
150 
151 /*
152  * The collision count is bumped every time the LWKT scheduler fails
153  * to acquire needed tokens in addition to a normal lwkt_gettoken()
154  * stall.
155  */
156 SYSCTL_LONG(_lwkt, OID_AUTO, mp_collisions, CTLFLAG_RW,
157     &mp_token.t_collisions, 0, "Collision counter of mp_token");
158 SYSCTL_LONG(_lwkt, OID_AUTO, pmap_collisions, CTLFLAG_RW,
159     &pmap_token.t_collisions, 0, "Collision counter of pmap_token");
160 SYSCTL_LONG(_lwkt, OID_AUTO, dev_collisions, CTLFLAG_RW,
161     &dev_token.t_collisions, 0, "Collision counter of dev_token");
162 SYSCTL_LONG(_lwkt, OID_AUTO, vm_collisions, CTLFLAG_RW,
163     &vm_token.t_collisions, 0, "Collision counter of vm_token");
164 SYSCTL_LONG(_lwkt, OID_AUTO, vmspace_collisions, CTLFLAG_RW,
165     &vmspace_token.t_collisions, 0, "Collision counter of vmspace_token");
166 SYSCTL_LONG(_lwkt, OID_AUTO, kvm_collisions, CTLFLAG_RW,
167     &kvm_token.t_collisions, 0, "Collision counter of kvm_token");
168 SYSCTL_LONG(_lwkt, OID_AUTO, sigio_collisions, CTLFLAG_RW,
169     &sigio_token.t_collisions, 0, "Collision counter of sigio_token");
170 SYSCTL_LONG(_lwkt, OID_AUTO, tty_collisions, CTLFLAG_RW,
171     &tty_token.t_collisions, 0, "Collision counter of tty_token");
172 SYSCTL_LONG(_lwkt, OID_AUTO, vnode_collisions, CTLFLAG_RW,
173     &vnode_token.t_collisions, 0, "Collision counter of vnode_token");
174 
175 int tokens_debug_output;
176 SYSCTL_INT(_lwkt, OID_AUTO, tokens_debug_output, CTLFLAG_RW,
177     &tokens_debug_output, 0, "Generate stack trace N times");
178 
179 static int _lwkt_getalltokens_sorted(thread_t td);
180 
181 /*
182  * Acquire the initial mplock
183  *
184  * (low level boot only)
185  */
186 void
187 cpu_get_initial_mplock(void)
188 {
189 	KKASSERT(mp_token.t_ref == NULL);
190 	if (lwkt_trytoken(&mp_token) == FALSE)
191 		panic("cpu_get_initial_mplock");
192 }
193 
194 /*
195  * Return a pool token given an address.  Use a prime number to reduce
196  * overlaps.
197  */
198 static __inline
199 lwkt_token_t
200 _lwkt_token_pool_lookup(void *ptr)
201 {
202 	uintptr_t n;
203 
204 	n = (uintptr_t)ptr + ((uintptr_t)ptr >> 18);
205 	n = (n % TOKEN_PRIME);
206 	n = (n ^ (n >> 16)) & (LWKT_NUM_POOL_TOKENS - 1);
207 	return (&pool_tokens[n].token);
208 }
209 
210 /*
211  * Initialize a tokref_t prior to making it visible in the thread's
212  * token array.
213  */
214 static __inline
215 void
216 _lwkt_tokref_init(lwkt_tokref_t ref, lwkt_token_t tok, thread_t td, long excl)
217 {
218 	ref->tr_tok = tok;
219 	ref->tr_count = excl;
220 	ref->tr_owner = td;
221 }
222 
223 /*
224  * Attempt to acquire a shared or exclusive token.  Returns TRUE on success,
225  * FALSE on failure.
226  *
227  * If TOK_EXCLUSIVE is set in mode we are attempting to get an exclusive
228  * token, otherwise are attempting to get a shared token.
229  *
230  * If TOK_EXCLREQ is set in mode this is a blocking operation, otherwise
231  * it is a non-blocking operation (for both exclusive or shared acquisions).
232  */
233 static __inline
234 int
235 _lwkt_trytokref(lwkt_tokref_t ref, thread_t td, long mode)
236 {
237 	lwkt_token_t tok;
238 	lwkt_tokref_t oref;
239 	long count;
240 
241 	tok = ref->tr_tok;
242 	KASSERT(((mode & TOK_EXCLREQ) == 0 ||	/* non blocking */
243 		td->td_gd->gd_intr_nesting_level == 0 ||
244 		panic_cpu_gd == mycpu),
245 		("Attempt to acquire token %p not already "
246 		"held in hard code section", tok));
247 
248 	if (mode & TOK_EXCLUSIVE) {
249 		/*
250 		 * Attempt to get an exclusive token
251 		 */
252 		count = tok->t_count;
253 
254 		for (;;) {
255 			oref = tok->t_ref;	/* can be NULL */
256 			cpu_ccfence();
257 			if ((count & ~TOK_EXCLREQ) == 0) {
258 				/*
259 				 * It is possible to get the exclusive bit.
260 				 * We must clear TOK_EXCLREQ on successful
261 				 * acquisition.
262 				 */
263 				if (atomic_fcmpset_long(&tok->t_count, &count,
264 						        (count & ~TOK_EXCLREQ) |
265 						        TOK_EXCLUSIVE)) {
266 					KKASSERT(tok->t_ref == NULL);
267 					tok->t_ref = ref;
268 					return TRUE;
269 				}
270 				/* retry */
271 			} else if ((count & TOK_EXCLUSIVE) &&
272 				   oref >= &td->td_toks_base &&
273 				   oref < td->td_toks_stop) {
274 				/*
275 				 * Our thread already holds the exclusive
276 				 * bit, we treat this tokref as a shared
277 				 * token (sorta) to make the token release
278 				 * code easier.  Treating this as a shared
279 				 * token allows us to simply increment the
280 				 * count field.
281 				 *
282 				 * NOTE: oref cannot race above if it
283 				 *	 happens to be ours, so we're good.
284 				 *	 But we must still have a stable
285 				 *	 variable for both parts of the
286 				 *	 comparison.
287 				 *
288 				 * NOTE: Since we already have an exclusive
289 				 *	 lock and don't need to check EXCLREQ
290 				 *	 we can just use an atomic_add here
291 				 */
292 				atomic_add_long(&tok->t_count, TOK_INCR);
293 				ref->tr_count &= ~TOK_EXCLUSIVE;
294 				return TRUE;
295 			} else if ((mode & TOK_EXCLREQ) &&
296 				   (count & TOK_EXCLREQ) == 0) {
297 				/*
298 				 * Unable to get the exclusive bit but being
299 				 * asked to set the exclusive-request bit.
300 				 * Since we are going to retry anyway just
301 				 * set the bit unconditionally.
302 				 */
303 				atomic_set_long(&tok->t_count, TOK_EXCLREQ);
304 				return FALSE;
305 			} else {
306 				/*
307 				 * Unable to get the exclusive bit and not
308 				 * being asked to set the exclusive-request
309 				 * (aka lwkt_trytoken()), or EXCLREQ was
310 				 * already set.
311 				 */
312 				cpu_pause();
313 				return FALSE;
314 			}
315 			/* retry */
316 		}
317 	} else {
318 		/*
319 		 * Attempt to get a shared token.  Note that TOK_EXCLREQ
320 		 * for shared tokens simply means the caller intends to
321 		 * block.  We never actually set the bit in tok->t_count.
322 		 *
323 		 * Due to the token's no-deadlock guarantee, and complications
324 		 * created by the sorted reacquisition code, we can only
325 		 * give exclusive requests priority over shared requests
326 		 * in situations where the thread holds only one token.
327 		 */
328 		count = tok->t_count;
329 
330 		for (;;) {
331 			oref = tok->t_ref;	/* can be NULL */
332 			cpu_ccfence();
333 			if ((count & (TOK_EXCLUSIVE|TOK_EXCLREQ)) == 0 ||
334 			    ((count & TOK_EXCLUSIVE) == 0 &&
335 			    td->td_toks_stop != &td->td_toks_base + 1)
336 			) {
337 				/*
338 				 * It may be possible to get the token shared.
339 				 */
340 				if ((atomic_fetchadd_long(&tok->t_count, TOK_INCR) & TOK_EXCLUSIVE) == 0) {
341 					return TRUE;
342 				}
343 				count = atomic_fetchadd_long(&tok->t_count,
344 							     -TOK_INCR);
345 				count -= TOK_INCR;
346 				/* retry */
347 			} else if ((count & TOK_EXCLUSIVE) &&
348 				   oref >= &td->td_toks_base &&
349 				   oref < td->td_toks_stop) {
350 				/*
351 				 * We own the exclusive bit on the token so
352 				 * we can in fact also get it shared.
353 				 */
354 				atomic_add_long(&tok->t_count, TOK_INCR);
355 				return TRUE;
356 			} else {
357 				/*
358 				 * We failed to get the token shared
359 				 */
360 				return FALSE;
361 			}
362 			/* retry */
363 		}
364 	}
365 }
366 
367 static __inline
368 int
369 _lwkt_trytokref_spin(lwkt_tokref_t ref, thread_t td, long mode)
370 {
371 	int spin;
372 
373 	if (_lwkt_trytokref(ref, td, mode))
374 		return TRUE;
375 	for (spin = lwkt_token_spin; spin > 0; --spin) {
376 		cpu_pause();
377 		cpu_pause();
378 		if (_lwkt_trytokref(ref, td, mode))
379 			return TRUE;
380 	}
381 	return FALSE;
382 }
383 
384 /*
385  * Release a token that we hold.
386  *
387  * Since tokens are polled, we don't have to deal with wakeups and releasing
388  * is really easy.
389  */
390 static __inline
391 void
392 _lwkt_reltokref(lwkt_tokref_t ref, thread_t td)
393 {
394 	lwkt_token_t tok;
395 	long count;
396 
397 	tok = ref->tr_tok;
398 	if (tok->t_ref == ref) {
399 		/*
400 		 * We are an exclusive holder.  We must clear tr_ref
401 		 * before we clear the TOK_EXCLUSIVE bit.  If we are
402 		 * unable to clear the bit we must restore
403 		 * tok->t_ref.
404 		 */
405 #if 0
406 		KKASSERT(count & TOK_EXCLUSIVE);
407 #endif
408 		tok->t_ref = NULL;
409 		atomic_clear_long(&tok->t_count, TOK_EXCLUSIVE);
410 	} else {
411 		/*
412 		 * We are a shared holder
413 		 */
414 		count = atomic_fetchadd_long(&tok->t_count, -TOK_INCR);
415 		KKASSERT(count & TOK_COUNTMASK);	/* count prior */
416 	}
417 }
418 
419 /*
420  * Obtain all the tokens required by the specified thread on the current
421  * cpu, return 0 on failure and non-zero on success.  If a failure occurs
422  * any partially acquired tokens will be released prior to return.
423  *
424  * lwkt_getalltokens is called by the LWKT scheduler to re-acquire all
425  * tokens that the thread had to release when it switched away.
426  *
427  * If spinning is non-zero this function acquires the tokens in a particular
428  * order to deal with potential deadlocks.  We simply use address order for
429  * the case.
430  *
431  * Called from a critical section.
432  */
433 int
434 lwkt_getalltokens(thread_t td, int spinning)
435 {
436 	lwkt_tokref_t scan;
437 	lwkt_token_t tok;
438 
439 	if (spinning)
440 		return(_lwkt_getalltokens_sorted(td));
441 
442 	/*
443 	 * Acquire tokens in forward order, assign or validate tok->t_ref.
444 	 */
445 	for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
446 		tok = scan->tr_tok;
447 		for (;;) {
448 			/*
449 			 * Only try really hard on the last token
450 			 */
451 			if (scan == td->td_toks_stop - 1) {
452 			    if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
453 				    break;
454 			} else {
455 			    if (_lwkt_trytokref(scan, td, scan->tr_count))
456 				    break;
457 			}
458 
459 			/*
460 			 * Otherwise we failed to acquire all the tokens.
461 			 * Release whatever we did get.
462 			 */
463 			KASSERT(tok->t_desc,
464 				("token %p is not initialized", tok));
465 			td->td_gd->gd_cnt.v_lock_name[0] = 't';
466 			strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
467 				tok->t_desc,
468 				sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
469 			if (lwkt_sched_debug > 0) {
470 				--lwkt_sched_debug;
471 				kprintf("toka %p %s %s\n",
472 					tok, tok->t_desc, td->td_comm);
473 			}
474 			td->td_wmesg = tok->t_desc;
475 			++tok->t_collisions;
476 			while (--scan >= &td->td_toks_base)
477 				_lwkt_reltokref(scan, td);
478 			return(FALSE);
479 		}
480 	}
481 	return (TRUE);
482 }
483 
484 /*
485  * Release all tokens owned by the specified thread on the current cpu.
486  *
487  * This code is really simple.  Even in cases where we own all the tokens
488  * note that t_ref may not match the scan for recursively held tokens which
489  * are held deeper in the stack, or for the case where a lwkt_getalltokens()
490  * failed.
491  *
492  * Tokens are released in reverse order to reduce chasing race failures.
493  *
494  * Called from a critical section.
495  */
496 void
497 lwkt_relalltokens(thread_t td)
498 {
499 	lwkt_tokref_t scan;
500 
501 	/*
502 	 * Weird order is to try to avoid a panic loop
503 	 */
504 	if (td->td_toks_have) {
505 		scan = td->td_toks_have;
506 		td->td_toks_have = NULL;
507 	} else {
508 		scan = td->td_toks_stop;
509 	}
510 	while (--scan >= &td->td_toks_base)
511 		_lwkt_reltokref(scan, td);
512 }
513 
514 /*
515  * This is the decontention version of lwkt_getalltokens().  The tokens are
516  * acquired in address-sorted order to deal with any deadlocks.  Ultimately
517  * token failures will spin into the scheduler and get here.
518  *
519  * Called from critical section
520  */
521 static
522 int
523 _lwkt_getalltokens_sorted(thread_t td)
524 {
525 	lwkt_tokref_t sort_array[LWKT_MAXTOKENS];
526 	lwkt_tokref_t scan;
527 	lwkt_token_t tok;
528 	int i;
529 	int j;
530 	int n;
531 
532 	/*
533 	 * Sort the token array.  Yah yah, I know this isn't fun.
534 	 *
535 	 * NOTE: Recursively acquired tokens are ordered the same as in the
536 	 *	 td_toks_array so we can always get the earliest one first.
537 	 *	 This is particularly important when a token is acquired
538 	 *	 exclusively multiple times, as only the first acquisition
539 	 *	 is treated as an exclusive token.
540 	 */
541 	i = 0;
542 	scan = &td->td_toks_base;
543 	while (scan < td->td_toks_stop) {
544 		for (j = 0; j < i; ++j) {
545 			if (scan->tr_tok < sort_array[j]->tr_tok)
546 				break;
547 		}
548 		if (j != i) {
549 			bcopy(sort_array + j, sort_array + j + 1,
550 			      (i - j) * sizeof(lwkt_tokref_t));
551 		}
552 		sort_array[j] = scan;
553 		++scan;
554 		++i;
555 	}
556 	n = i;
557 
558 	/*
559 	 * Acquire tokens in forward order, assign or validate tok->t_ref.
560 	 */
561 	for (i = 0; i < n; ++i) {
562 		scan = sort_array[i];
563 		tok = scan->tr_tok;
564 		for (;;) {
565 			/*
566 			 * Only try really hard on the last token
567 			 */
568 			if (scan == td->td_toks_stop - 1) {
569 			    if (_lwkt_trytokref_spin(scan, td, scan->tr_count))
570 				    break;
571 			} else {
572 			    if (_lwkt_trytokref(scan, td, scan->tr_count))
573 				    break;
574 			}
575 
576 			/*
577 			 * Otherwise we failed to acquire all the tokens.
578 			 * Release whatever we did get.
579 			 */
580 			td->td_gd->gd_cnt.v_lock_name[0] = 't';
581 			strncpy(td->td_gd->gd_cnt.v_lock_name + 1,
582 				tok->t_desc,
583 				sizeof(td->td_gd->gd_cnt.v_lock_name) - 2);
584 			if (lwkt_sched_debug > 0) {
585 				--lwkt_sched_debug;
586 				kprintf("tokb %p %s %s\n",
587 					tok, tok->t_desc, td->td_comm);
588 			}
589 			td->td_wmesg = tok->t_desc;
590 			++tok->t_collisions;
591 			while (--i >= 0) {
592 				scan = sort_array[i];
593 				_lwkt_reltokref(scan, td);
594 			}
595 			return(FALSE);
596 		}
597 	}
598 
599 	/*
600 	 * We were successful, there is no need for another core to signal
601 	 * us.
602 	 */
603 	return (TRUE);
604 }
605 
606 /*
607  * Get a serializing token.  This routine can block.
608  */
609 void
610 lwkt_gettoken(lwkt_token_t tok)
611 {
612 	thread_t td = curthread;
613 	lwkt_tokref_t ref;
614 
615 	ref = td->td_toks_stop;
616 	KKASSERT(ref < &td->td_toks_end);
617 	++td->td_toks_stop;
618 	cpu_ccfence();
619 	_lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
620 
621 #ifdef DEBUG_LOCKS
622 	/*
623 	 * Taking an exclusive token after holding it shared will
624 	 * livelock. Scan for that case and assert.
625 	 */
626 	lwkt_tokref_t tk;
627 	int found = 0;
628 	for (tk = &td->td_toks_base; tk < ref; tk++) {
629 		if (tk->tr_tok != tok)
630 			continue;
631 
632 		found++;
633 		if (tk->tr_count & TOK_EXCLUSIVE)
634 			goto good;
635 	}
636 	/* We found only shared instances of this token if found >0 here */
637 	KASSERT((found == 0), ("Token %p s/x livelock", tok));
638 good:
639 #endif
640 
641 	if (_lwkt_trytokref_spin(ref, td, TOK_EXCLUSIVE|TOK_EXCLREQ))
642 		return;
643 
644 	/*
645 	 * Give up running if we can't acquire the token right now.
646 	 *
647 	 * Since the tokref is already active the scheduler now
648 	 * takes care of acquisition, so we need only call
649 	 * lwkt_switch().
650 	 *
651 	 * Since we failed this was not a recursive token so upon
652 	 * return tr_tok->t_ref should be assigned to this specific
653 	 * ref.
654 	 */
655 	td->td_wmesg = tok->t_desc;
656 	++tok->t_collisions;
657 	logtoken(fail, ref);
658 	td->td_toks_have = td->td_toks_stop - 1;
659 
660 	if (tokens_debug_output > 0) {
661 		--tokens_debug_output;
662 		spin_lock(&tok_debug_spin);
663 		kprintf("Excl Token thread %p %s %s\n",
664 			td, tok->t_desc, td->td_comm);
665 		print_backtrace(6);
666 		kprintf("\n");
667 		spin_unlock(&tok_debug_spin);
668 	}
669 
670 	lwkt_switch();
671 	logtoken(succ, ref);
672 	KKASSERT(tok->t_ref == ref);
673 }
674 
675 /*
676  * Similar to gettoken but we acquire a shared token instead of an exclusive
677  * token.
678  */
679 void
680 lwkt_gettoken_shared(lwkt_token_t tok)
681 {
682 	thread_t td = curthread;
683 	lwkt_tokref_t ref;
684 
685 	ref = td->td_toks_stop;
686 	KKASSERT(ref < &td->td_toks_end);
687 	++td->td_toks_stop;
688 	cpu_ccfence();
689 	_lwkt_tokref_init(ref, tok, td, TOK_EXCLREQ);
690 
691 #ifdef DEBUG_LOCKS
692         /*
693          * Taking a pool token in shared mode is a bad idea; other
694          * addresses deeper in the call stack may hash to the same pool
695          * token and you may end up with an exclusive-shared livelock.
696          * Warn in this condition.
697          */
698         if ((tok >= &pool_tokens[0].token) &&
699             (tok < &pool_tokens[LWKT_NUM_POOL_TOKENS].token))
700                 kprintf("Warning! Taking pool token %p in shared mode\n", tok);
701 #endif
702 
703 
704 	if (_lwkt_trytokref_spin(ref, td, TOK_EXCLREQ))
705 		return;
706 
707 	/*
708 	 * Give up running if we can't acquire the token right now.
709 	 *
710 	 * Since the tokref is already active the scheduler now
711 	 * takes care of acquisition, so we need only call
712 	 * lwkt_switch().
713 	 *
714 	 * Since we failed this was not a recursive token so upon
715 	 * return tr_tok->t_ref should be assigned to this specific
716 	 * ref.
717 	 */
718 	td->td_wmesg = tok->t_desc;
719 	++tok->t_collisions;
720 	logtoken(fail, ref);
721 	td->td_toks_have = td->td_toks_stop - 1;
722 
723 	if (tokens_debug_output > 0) {
724 		--tokens_debug_output;
725 		spin_lock(&tok_debug_spin);
726 		kprintf("Shar Token thread %p %s %s\n",
727 			td, tok->t_desc, td->td_comm);
728 		print_backtrace(6);
729 		kprintf("\n");
730 		spin_unlock(&tok_debug_spin);
731 	}
732 
733 	lwkt_switch();
734 	logtoken(succ, ref);
735 }
736 
737 /*
738  * Attempt to acquire a token, return TRUE on success, FALSE on failure.
739  *
740  * We setup the tokref in case we actually get the token (if we switch later
741  * it becomes mandatory so we set TOK_EXCLREQ), but we call trytokref without
742  * TOK_EXCLREQ in case we fail.
743  */
744 int
745 lwkt_trytoken(lwkt_token_t tok)
746 {
747 	thread_t td = curthread;
748 	lwkt_tokref_t ref;
749 
750 	ref = td->td_toks_stop;
751 	KKASSERT(ref < &td->td_toks_end);
752 	++td->td_toks_stop;
753 	cpu_ccfence();
754 	_lwkt_tokref_init(ref, tok, td, TOK_EXCLUSIVE|TOK_EXCLREQ);
755 
756 	if (_lwkt_trytokref(ref, td, TOK_EXCLUSIVE))
757 		return TRUE;
758 
759 	/*
760 	 * Failed, unpend the request
761 	 */
762 	cpu_ccfence();
763 	--td->td_toks_stop;
764 	++tok->t_collisions;
765 	return FALSE;
766 }
767 
768 lwkt_token_t
769 lwkt_getpooltoken(void *ptr)
770 {
771 	lwkt_token_t tok;
772 
773 	tok = _lwkt_token_pool_lookup(ptr);
774 	lwkt_gettoken(tok);
775 	return (tok);
776 }
777 
778 /*
779  * Release a serializing token.
780  *
781  * WARNING!  All tokens must be released in reverse order.  This will be
782  *	     asserted.
783  */
784 void
785 lwkt_reltoken(lwkt_token_t tok)
786 {
787 	thread_t td = curthread;
788 	lwkt_tokref_t ref;
789 
790 	/*
791 	 * Remove ref from thread token list and assert that it matches
792 	 * the token passed in.  Tokens must be released in reverse order.
793 	 */
794 	ref = td->td_toks_stop - 1;
795 	KKASSERT(ref >= &td->td_toks_base && ref->tr_tok == tok);
796 	_lwkt_reltokref(ref, td);
797 	cpu_sfence();
798 	td->td_toks_stop = ref;
799 }
800 
801 /*
802  * It is faster for users of lwkt_getpooltoken() to use the returned
803  * token and just call lwkt_reltoken(), but for convenience we provide
804  * this function which looks the token up based on the ident.
805  */
806 void
807 lwkt_relpooltoken(void *ptr)
808 {
809 	lwkt_token_t tok = _lwkt_token_pool_lookup(ptr);
810 	lwkt_reltoken(tok);
811 }
812 
813 /*
814  * Return a count of the number of token refs the thread has to the
815  * specified token, whether it currently owns the token or not.
816  */
817 int
818 lwkt_cnttoken(lwkt_token_t tok, thread_t td)
819 {
820 	lwkt_tokref_t scan;
821 	int count = 0;
822 
823 	for (scan = &td->td_toks_base; scan < td->td_toks_stop; ++scan) {
824 		if (scan->tr_tok == tok)
825 			++count;
826 	}
827 	return(count);
828 }
829 
830 /*
831  * Pool tokens are used to provide a type-stable serializing token
832  * pointer that does not race against disappearing data structures.
833  *
834  * This routine is called in early boot just after we setup the BSP's
835  * globaldata structure.
836  */
837 void
838 lwkt_token_pool_init(void)
839 {
840 	int i;
841 
842 	for (i = 0; i < LWKT_NUM_POOL_TOKENS; ++i)
843 		lwkt_token_init(&pool_tokens[i].token, "pool");
844 }
845 
846 lwkt_token_t
847 lwkt_token_pool_lookup(void *ptr)
848 {
849 	return (_lwkt_token_pool_lookup(ptr));
850 }
851 
852 /*
853  * Initialize a token.
854  */
855 void
856 lwkt_token_init(lwkt_token_t tok, const char *desc)
857 {
858 	tok->t_count = 0;
859 	tok->t_ref = NULL;
860 	tok->t_collisions = 0;
861 	tok->t_desc = desc;
862 }
863 
864 void
865 lwkt_token_uninit(lwkt_token_t tok)
866 {
867 	/* empty */
868 }
869 
870 /*
871  * Exchange the two most recent tokens on the tokref stack.  This allows
872  * you to release a token out of order.
873  *
874  * We have to be careful about the case where the top two tokens are
875  * the same token.  In this case tok->t_ref will point to the deeper
876  * ref and must remain pointing to the deeper ref.  If we were to swap
877  * it the first release would clear the token even though a second
878  * ref is still present.
879  *
880  * Only exclusively held tokens contain a reference to the tokref which
881  * has to be flipped along with the swap.
882  */
883 void
884 lwkt_token_swap(void)
885 {
886 	lwkt_tokref_t ref1, ref2;
887 	lwkt_token_t tok1, tok2;
888 	long count1, count2;
889 	thread_t td = curthread;
890 
891 	crit_enter();
892 
893 	ref1 = td->td_toks_stop - 1;
894 	ref2 = td->td_toks_stop - 2;
895 	KKASSERT(ref1 >= &td->td_toks_base);
896 	KKASSERT(ref2 >= &td->td_toks_base);
897 
898 	tok1 = ref1->tr_tok;
899 	tok2 = ref2->tr_tok;
900 	count1 = ref1->tr_count;
901 	count2 = ref2->tr_count;
902 
903 	if (tok1 != tok2) {
904 		ref1->tr_tok = tok2;
905 		ref1->tr_count = count2;
906 		ref2->tr_tok = tok1;
907 		ref2->tr_count = count1;
908 		if (tok1->t_ref == ref1)
909 			tok1->t_ref = ref2;
910 		if (tok2->t_ref == ref2)
911 			tok2->t_ref = ref1;
912 	}
913 
914 	crit_exit();
915 }
916 
917 #ifdef DDB
918 DB_SHOW_COMMAND(tokens, db_tok_all)
919 {
920 	struct lwkt_token *tok, **ptr;
921 	struct lwkt_token *toklist[16] = {
922 		&mp_token,
923 		&pmap_token,
924 		&dev_token,
925 		&vm_token,
926 		&vmspace_token,
927 		&kvm_token,
928 		&sigio_token,
929 		&tty_token,
930 		&vnode_token,
931 		NULL
932 	};
933 
934 	ptr = toklist;
935 	for (tok = *ptr; tok; tok = *(++ptr)) {
936 		db_printf("tok=%p tr_owner=%p t_colissions=%ld t_desc=%s\n", tok,
937 		    (tok->t_ref ? tok->t_ref->tr_owner : NULL),
938 		    tok->t_collisions, tok->t_desc);
939 	}
940 }
941 #endif /* DDB */
942