1 /* 2 * Copyright (c) 2009 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * Implement fast persistent locks based on atomic_cmpset_int() with 36 * semantics similar to lockmgr locks but faster and taking up much less 37 * space. Taken from HAMMER's lock implementation. 38 * 39 * These are meant to complement our LWKT tokens. Tokens are only held 40 * while the thread is running. Mutexes can be held across blocking 41 * conditions. 42 * 43 * - Exclusive priority over shared to prevent SMP starvation. 44 * - locks can be aborted (async callback, if any, will be made w/ENOLCK). 45 * - locks can be asynchronous. 46 * - synchronous fast path if no blocking occurs (async callback is not 47 * made in this case). 48 * 49 * Generally speaking any caller-supplied link state must be properly 50 * initialized before use. 51 * 52 * Most of the support is in sys/mutex[2].h. We mostly provide backoff 53 * functions here. 54 */ 55 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/kernel.h> 59 #include <sys/sysctl.h> 60 #include <sys/thread.h> 61 62 #include <machine/cpufunc.h> 63 64 #include <sys/thread2.h> 65 #include <sys/mutex2.h> 66 67 static __int64_t mtx_contention_count; 68 static __int64_t mtx_collision_count; 69 static __int64_t mtx_wakeup_count; 70 71 SYSCTL_QUAD(_kern, OID_AUTO, mtx_contention_count, CTLFLAG_RW, 72 &mtx_contention_count, 0, ""); 73 SYSCTL_QUAD(_kern, OID_AUTO, mtx_collision_count, CTLFLAG_RW, 74 &mtx_collision_count, 0, ""); 75 SYSCTL_QUAD(_kern, OID_AUTO, mtx_wakeup_count, CTLFLAG_RW, 76 &mtx_wakeup_count, 0, ""); 77 78 static int mtx_chain_link_ex(mtx_t *mtx, u_int olock); 79 static int mtx_chain_link_sh(mtx_t *mtx, u_int olock, int addcount); 80 static void mtx_delete_link(mtx_t *mtx, mtx_link_t *link); 81 82 /* 83 * Exclusive-lock a mutex, block until acquired unless link is async. 84 * Recursion is allowed. 85 * 86 * Returns 0 on success, the tsleep() return code on failure, EINPROGRESS 87 * if async. If immediately successful an async exclusive lock will return 0 88 * and not issue the async callback or link the link structure. The caller 89 * must handle this case (typically this is an optimal code path). 90 * 91 * A tsleep() error can only be returned if PCATCH is specified in the flags. 92 */ 93 static __inline int 94 __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, 95 const char *ident, int flags, int to) 96 { 97 thread_t td; 98 u_int lock; 99 u_int nlock; 100 int error; 101 int isasync; 102 103 for (;;) { 104 lock = mtx->mtx_lock; 105 cpu_ccfence(); 106 107 if (lock == 0) { 108 nlock = MTX_EXCLUSIVE | 1; 109 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 110 mtx->mtx_owner = curthread; 111 link->state = MTX_LINK_ACQUIRED; 112 error = 0; 113 break; 114 } 115 continue; 116 } 117 if ((lock & MTX_EXCLUSIVE) && mtx->mtx_owner == curthread) { 118 KKASSERT((lock & MTX_MASK) != MTX_MASK); 119 nlock = lock + 1; 120 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 121 link->state = MTX_LINK_ACQUIRED; 122 error = 0; 123 break; 124 } 125 continue; 126 } 127 128 /* 129 * We need MTX_LINKSPIN to manipulate exlink or 130 * shlink. 131 * 132 * We must set MTX_EXWANTED with MTX_LINKSPIN to indicate 133 * pending shared requests. It cannot be set as a separate 134 * operation prior to acquiring MTX_LINKSPIN. 135 * 136 * To avoid unnecessary cpu cache traffic we poll 137 * for collisions. It is also possible that EXWANTED 138 * state failing the above test was spurious, so all the 139 * tests must be repeated if we cannot obtain LINKSPIN 140 * with the prior state tests intact (i.e. don't reload 141 * the (lock) variable here, for heaven's sake!). 142 */ 143 if (lock & MTX_LINKSPIN) { 144 cpu_pause(); 145 ++mtx_collision_count; 146 continue; 147 } 148 td = curthread; 149 nlock = lock | MTX_EXWANTED | MTX_LINKSPIN; 150 ++td->td_critcount; 151 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) { 152 --td->td_critcount; 153 continue; 154 } 155 156 /* 157 * Check for early abort. 158 */ 159 if (link->state == MTX_LINK_ABORTED) { 160 if (mtx->mtx_exlink == NULL) { 161 atomic_clear_int(&mtx->mtx_lock, 162 MTX_LINKSPIN | 163 MTX_EXWANTED); 164 } else { 165 atomic_clear_int(&mtx->mtx_lock, 166 MTX_LINKSPIN); 167 } 168 --td->td_critcount; 169 link->state = MTX_LINK_IDLE; 170 error = ENOLCK; 171 break; 172 } 173 174 /* 175 * Add our link to the exlink list and release LINKSPIN. 176 */ 177 link->owner = td; 178 link->state = MTX_LINK_LINKED_EX; 179 if (mtx->mtx_exlink) { 180 link->next = mtx->mtx_exlink; 181 link->prev = link->next->prev; 182 link->next->prev = link; 183 link->prev->next = link; 184 } else { 185 link->next = link; 186 link->prev = link; 187 mtx->mtx_exlink = link; 188 } 189 isasync = (link->callback != NULL); 190 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); 191 --td->td_critcount; 192 193 /* 194 * If asynchronous lock request return without 195 * blocking, leave link structure linked. 196 */ 197 if (isasync) { 198 error = EINPROGRESS; 199 break; 200 } 201 202 /* 203 * Wait for lock 204 */ 205 error = mtx_wait_link(mtx, link, ident, flags, to); 206 break; 207 } 208 return (error); 209 } 210 211 int 212 _mtx_lock_ex_link(mtx_t *mtx, mtx_link_t *link, 213 const char *ident, int flags, int to) 214 { 215 return(__mtx_lock_ex(mtx, link, ident, flags, to)); 216 } 217 218 int 219 _mtx_lock_ex(mtx_t *mtx, const char *ident, int flags, int to) 220 { 221 mtx_link_t link; 222 223 mtx_link_init(&link); 224 return(__mtx_lock_ex(mtx, &link, ident, flags, to)); 225 } 226 227 int 228 _mtx_lock_ex_quick(mtx_t *mtx, const char *ident) 229 { 230 mtx_link_t link; 231 232 mtx_link_init(&link); 233 return(__mtx_lock_ex(mtx, &link, ident, 0, 0)); 234 } 235 236 /* 237 * Share-lock a mutex, block until acquired. Recursion is allowed. 238 * 239 * Returns 0 on success, or the tsleep() return code on failure. 240 * An error can only be returned if PCATCH is specified in the flags. 241 * 242 * NOTE: Shared locks get a mass-wakeup so if the tsleep fails we 243 * do not have to chain the wakeup(). 244 */ 245 static __inline int 246 __mtx_lock_sh(mtx_t *mtx, mtx_link_t *link, 247 const char *ident, int flags, int to) 248 { 249 thread_t td; 250 u_int lock; 251 u_int nlock; 252 int error; 253 int isasync; 254 255 for (;;) { 256 lock = mtx->mtx_lock; 257 cpu_ccfence(); 258 259 if (lock == 0) { 260 nlock = 1; 261 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 262 error = 0; 263 link->state = MTX_LINK_ACQUIRED; 264 break; 265 } 266 continue; 267 } 268 if ((lock & (MTX_EXCLUSIVE | MTX_EXWANTED)) == 0) { 269 KKASSERT((lock & MTX_MASK) != MTX_MASK); 270 nlock = lock + 1; 271 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 272 error = 0; 273 link->state = MTX_LINK_ACQUIRED; 274 break; 275 } 276 continue; 277 } 278 279 /* 280 * We need MTX_LINKSPIN to manipulate exlink or 281 * shlink. 282 * 283 * We must set MTX_SHWANTED with MTX_LINKSPIN to indicate 284 * pending shared requests. It cannot be set as a separate 285 * operation prior to acquiring MTX_LINKSPIN. 286 * 287 * To avoid unnecessary cpu cache traffic we poll 288 * for collisions. It is also possible that EXWANTED 289 * state failing the above test was spurious, so all the 290 * tests must be repeated if we cannot obtain LINKSPIN 291 * with the prior state tests intact (i.e. don't reload 292 * the (lock) variable here, for heaven's sake!). 293 */ 294 if (lock & MTX_LINKSPIN) { 295 cpu_pause(); 296 ++mtx_collision_count; 297 continue; 298 } 299 td = curthread; 300 nlock = lock | MTX_SHWANTED | MTX_LINKSPIN; 301 ++td->td_critcount; 302 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) { 303 --td->td_critcount; 304 continue; 305 } 306 307 /* 308 * Check for early abort. 309 */ 310 if (link->state == MTX_LINK_ABORTED) { 311 if (mtx->mtx_exlink == NULL) { 312 atomic_clear_int(&mtx->mtx_lock, 313 MTX_LINKSPIN | 314 MTX_SHWANTED); 315 } else { 316 atomic_clear_int(&mtx->mtx_lock, 317 MTX_LINKSPIN); 318 } 319 --td->td_critcount; 320 link->state = MTX_LINK_IDLE; 321 error = ENOLCK; 322 break; 323 } 324 325 /* 326 * Add our link to the exlink list and release LINKSPIN. 327 */ 328 link->owner = td; 329 link->state = MTX_LINK_LINKED_SH; 330 if (mtx->mtx_shlink) { 331 link->next = mtx->mtx_shlink; 332 link->prev = link->next->prev; 333 link->next->prev = link; 334 link->prev->next = link; 335 } else { 336 link->next = link; 337 link->prev = link; 338 mtx->mtx_shlink = link; 339 } 340 isasync = (link->callback != NULL); 341 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN); 342 --td->td_critcount; 343 344 /* 345 * If asynchronous lock request return without 346 * blocking, leave link structure linked. 347 */ 348 if (isasync) { 349 error = EINPROGRESS; 350 break; 351 } 352 353 /* 354 * Wait for lock 355 */ 356 error = mtx_wait_link(mtx, link, ident, flags, to); 357 break; 358 } 359 return (error); 360 } 361 362 int 363 _mtx_lock_sh_link(mtx_t *mtx, mtx_link_t *link, 364 const char *ident, int flags, int to) 365 { 366 return(__mtx_lock_sh(mtx, link, ident, flags, to)); 367 } 368 369 int 370 _mtx_lock_sh(mtx_t *mtx, const char *ident, int flags, int to) 371 { 372 mtx_link_t link; 373 374 mtx_link_init(&link); 375 return(__mtx_lock_sh(mtx, &link, ident, flags, to)); 376 } 377 378 int 379 _mtx_lock_sh_quick(mtx_t *mtx, const char *ident) 380 { 381 mtx_link_t link; 382 383 mtx_link_init(&link); 384 return(__mtx_lock_sh(mtx, &link, ident, 0, 0)); 385 } 386 387 /* 388 * Get an exclusive spinlock the hard way. 389 */ 390 void 391 _mtx_spinlock(mtx_t *mtx) 392 { 393 u_int lock; 394 u_int nlock; 395 int bb = 1; 396 int bo; 397 398 for (;;) { 399 lock = mtx->mtx_lock; 400 if (lock == 0) { 401 nlock = MTX_EXCLUSIVE | 1; 402 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 403 mtx->mtx_owner = curthread; 404 break; 405 } 406 } else if ((lock & MTX_EXCLUSIVE) && 407 mtx->mtx_owner == curthread) { 408 KKASSERT((lock & MTX_MASK) != MTX_MASK); 409 nlock = lock + 1; 410 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 411 break; 412 } else { 413 /* MWAIT here */ 414 if (bb < 1000) 415 ++bb; 416 cpu_pause(); 417 for (bo = 0; bo < bb; ++bo) 418 ; 419 ++mtx_contention_count; 420 } 421 cpu_pause(); 422 ++mtx_collision_count; 423 } 424 } 425 426 /* 427 * Attempt to acquire a spinlock, if we fail we must undo the 428 * gd->gd_spinlocks/gd->gd_curthead->td_critcount predisposition. 429 * 430 * Returns 0 on success, EAGAIN on failure. 431 */ 432 int 433 _mtx_spinlock_try(mtx_t *mtx) 434 { 435 globaldata_t gd = mycpu; 436 u_int lock; 437 u_int nlock; 438 int res = 0; 439 440 for (;;) { 441 lock = mtx->mtx_lock; 442 if (lock == 0) { 443 nlock = MTX_EXCLUSIVE | 1; 444 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 445 mtx->mtx_owner = gd->gd_curthread; 446 break; 447 } 448 } else if ((lock & MTX_EXCLUSIVE) && 449 mtx->mtx_owner == gd->gd_curthread) { 450 KKASSERT((lock & MTX_MASK) != MTX_MASK); 451 nlock = lock + 1; 452 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 453 break; 454 } else { 455 --gd->gd_spinlocks; 456 cpu_ccfence(); 457 --gd->gd_curthread->td_critcount; 458 res = EAGAIN; 459 break; 460 } 461 cpu_pause(); 462 ++mtx_collision_count; 463 } 464 return res; 465 } 466 467 #if 0 468 469 void 470 _mtx_spinlock_sh(mtx_t *mtx) 471 { 472 u_int lock; 473 u_int nlock; 474 int bb = 1; 475 int bo; 476 477 for (;;) { 478 lock = mtx->mtx_lock; 479 if ((lock & MTX_EXCLUSIVE) == 0) { 480 KKASSERT((lock & MTX_MASK) != MTX_MASK); 481 nlock = lock + 1; 482 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 483 break; 484 } else { 485 /* MWAIT here */ 486 if (bb < 1000) 487 ++bb; 488 cpu_pause(); 489 for (bo = 0; bo < bb; ++bo) 490 ; 491 ++mtx_contention_count; 492 } 493 cpu_pause(); 494 ++mtx_collision_count; 495 } 496 } 497 498 #endif 499 500 int 501 _mtx_lock_ex_try(mtx_t *mtx) 502 { 503 u_int lock; 504 u_int nlock; 505 int error; 506 507 for (;;) { 508 lock = mtx->mtx_lock; 509 if (lock == 0) { 510 nlock = MTX_EXCLUSIVE | 1; 511 if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) { 512 mtx->mtx_owner = curthread; 513 error = 0; 514 break; 515 } 516 } else if ((lock & MTX_EXCLUSIVE) && 517 mtx->mtx_owner == curthread) { 518 KKASSERT((lock & MTX_MASK) != MTX_MASK); 519 nlock = lock + 1; 520 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 521 error = 0; 522 break; 523 } 524 } else { 525 error = EAGAIN; 526 break; 527 } 528 cpu_pause(); 529 ++mtx_collision_count; 530 } 531 return (error); 532 } 533 534 int 535 _mtx_lock_sh_try(mtx_t *mtx) 536 { 537 u_int lock; 538 u_int nlock; 539 int error = 0; 540 541 for (;;) { 542 lock = mtx->mtx_lock; 543 if ((lock & MTX_EXCLUSIVE) == 0) { 544 KKASSERT((lock & MTX_MASK) != MTX_MASK); 545 nlock = lock + 1; 546 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 547 break; 548 } else { 549 error = EAGAIN; 550 break; 551 } 552 cpu_pause(); 553 ++mtx_collision_count; 554 } 555 return (error); 556 } 557 558 /* 559 * If the lock is held exclusively it must be owned by the caller. If the 560 * lock is already a shared lock this operation is a NOP. A panic will 561 * occur if the lock is not held either shared or exclusive. 562 * 563 * The exclusive count is converted to a shared count. 564 */ 565 void 566 _mtx_downgrade(mtx_t *mtx) 567 { 568 u_int lock; 569 u_int nlock; 570 571 for (;;) { 572 lock = mtx->mtx_lock; 573 cpu_ccfence(); 574 575 /* 576 * NOP if already shared. 577 */ 578 if ((lock & MTX_EXCLUSIVE) == 0) { 579 KKASSERT((lock & MTX_MASK) > 0); 580 break; 581 } 582 583 /* 584 * Transfer count to shared. Any additional pending shared 585 * waiters must be woken up. 586 */ 587 if (lock & MTX_SHWANTED) { 588 if (mtx_chain_link_sh(mtx, lock, 1)) 589 break; 590 /* retry */ 591 } else { 592 nlock = lock & ~MTX_EXCLUSIVE; 593 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 594 break; 595 /* retry */ 596 } 597 cpu_pause(); 598 ++mtx_collision_count; 599 } 600 } 601 602 /* 603 * Upgrade a shared lock to an exclusive lock. The upgrade will fail if 604 * the shared lock has a count other then 1. Optimize the most likely case 605 * but note that a single cmpset can fail due to WANTED races. 606 * 607 * If the lock is held exclusively it must be owned by the caller and 608 * this function will simply return without doing anything. A panic will 609 * occur if the lock is held exclusively by someone other then the caller. 610 * 611 * Returns 0 on success, EDEADLK on failure. 612 */ 613 int 614 _mtx_upgrade_try(mtx_t *mtx) 615 { 616 u_int lock; 617 u_int nlock; 618 int error = 0; 619 620 for (;;) { 621 lock = mtx->mtx_lock; 622 623 if ((lock & ~MTX_EXWANTED) == 1) { 624 nlock = lock | MTX_EXCLUSIVE; 625 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) { 626 mtx->mtx_owner = curthread; 627 break; 628 } 629 } else if (lock & MTX_EXCLUSIVE) { 630 KKASSERT(mtx->mtx_owner == curthread); 631 break; 632 } else { 633 error = EDEADLK; 634 break; 635 } 636 cpu_pause(); 637 ++mtx_collision_count; 638 } 639 return (error); 640 } 641 642 /* 643 * Unlock a lock. The caller must hold the lock either shared or exclusive. 644 * 645 * On the last release we handle any pending chains. 646 */ 647 void 648 _mtx_unlock(mtx_t *mtx) 649 { 650 u_int lock; 651 u_int nlock; 652 653 for (;;) { 654 lock = mtx->mtx_lock; 655 cpu_ccfence(); 656 657 switch(lock) { 658 case MTX_EXCLUSIVE | 1: 659 /* 660 * Last release, exclusive lock. 661 * No exclusive or shared requests pending. 662 */ 663 mtx->mtx_owner = NULL; 664 nlock = 0; 665 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 666 goto done; 667 break; 668 case MTX_EXCLUSIVE | MTX_EXWANTED | 1: 669 case MTX_EXCLUSIVE | MTX_EXWANTED | MTX_SHWANTED | 1: 670 /* 671 * Last release, exclusive lock. 672 * Exclusive requests pending. 673 * Exclusive requests have priority over shared reqs. 674 */ 675 if (mtx_chain_link_ex(mtx, lock)) 676 goto done; 677 break; 678 case MTX_EXCLUSIVE | MTX_SHWANTED | 1: 679 /* 680 * Last release, exclusive lock. 681 * 682 * Shared requests are pending. Transfer our count (1) 683 * to the first shared request, wakeup all shared reqs. 684 */ 685 if (mtx_chain_link_sh(mtx, lock, 0)) 686 goto done; 687 break; 688 case 1: 689 /* 690 * Last release, shared lock. 691 * No exclusive or shared requests pending. 692 */ 693 nlock = 0; 694 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 695 goto done; 696 break; 697 case MTX_EXWANTED | 1: 698 case MTX_EXWANTED | MTX_SHWANTED | 1: 699 /* 700 * Last release, shared lock. 701 * 702 * Exclusive requests are pending. Transfer our 703 * count (1) to the next exclusive request. 704 * 705 * Exclusive requests have priority over shared reqs. 706 */ 707 if (mtx_chain_link_ex(mtx, lock)) 708 goto done; 709 break; 710 case MTX_SHWANTED | 1: 711 /* 712 * Last release, shared lock. 713 * Shared requests pending. 714 */ 715 if (mtx_chain_link_sh(mtx, lock, 0)) 716 goto done; 717 break; 718 default: 719 /* 720 * We have to loop if this is the last release but 721 * someone is fiddling with LINKSPIN. 722 */ 723 if ((lock & MTX_MASK) == 1) { 724 KKASSERT(lock & MTX_LINKSPIN); 725 break; 726 } 727 728 /* 729 * Not the last release (shared or exclusive) 730 */ 731 nlock = lock - 1; 732 KKASSERT((nlock & MTX_MASK) != MTX_MASK); 733 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 734 goto done; 735 break; 736 } 737 /* loop try again */ 738 cpu_pause(); 739 ++mtx_collision_count; 740 } 741 done: 742 ; 743 } 744 745 /* 746 * Chain pending links. Called on the last release of an exclusive or 747 * shared lock when the appropriate WANTED bit is set. mtx_lock old state 748 * is passed in with the count left at 1, which we can inherit, and other 749 * bits which we must adjust in a single atomic operation. 750 * 751 * Return non-zero on success, 0 if caller needs to retry. 752 * 753 * NOTE: It's ok if MTX_EXWANTED is in an indeterminant state while we are 754 * acquiring LINKSPIN as all other cases will also need to acquire 755 * LINKSPIN when handling the EXWANTED case. 756 */ 757 static int 758 mtx_chain_link_ex(mtx_t *mtx, u_int olock) 759 { 760 thread_t td = curthread; 761 mtx_link_t *link; 762 u_int nlock; 763 764 olock &= ~MTX_LINKSPIN; 765 nlock = olock | MTX_LINKSPIN | MTX_EXCLUSIVE; 766 ++td->td_critcount; 767 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) { 768 link = mtx->mtx_exlink; 769 KKASSERT(link != NULL); 770 if (link->next == link) { 771 mtx->mtx_exlink = NULL; 772 nlock = MTX_LINKSPIN | MTX_EXWANTED; /* to clear */ 773 } else { 774 mtx->mtx_exlink = link->next; 775 link->next->prev = link->prev; 776 link->prev->next = link->next; 777 nlock = MTX_LINKSPIN; /* to clear */ 778 } 779 KKASSERT(link->state == MTX_LINK_LINKED_EX); 780 mtx->mtx_owner = link->owner; 781 cpu_sfence(); 782 783 /* 784 * WARNING! The callback can only be safely 785 * made with LINKSPIN still held 786 * and in a critical section. 787 * 788 * WARNING! The link can go away after the 789 * state is set, or after the 790 * callback. 791 */ 792 if (link->callback) { 793 link->state = MTX_LINK_CALLEDBACK; 794 link->callback(link, link->arg, 0); 795 } else { 796 link->state = MTX_LINK_ACQUIRED; 797 wakeup(link); 798 } 799 atomic_clear_int(&mtx->mtx_lock, nlock); 800 --td->td_critcount; 801 ++mtx_wakeup_count; 802 return 1; 803 } 804 /* retry */ 805 --td->td_critcount; 806 return 0; 807 } 808 809 /* 810 * Flush waiting shared locks. The lock's prior state is passed in and must 811 * be adjusted atomically only if it matches. 812 * 813 * If addcount is 0, the count for the first shared lock in the chain is 814 * assumed to have already been accounted for. 815 * 816 * If addcount is 1, the count for the first shared lock in the chain has 817 * not yet been accounted for. 818 */ 819 static int 820 mtx_chain_link_sh(mtx_t *mtx, u_int olock, int addcount) 821 { 822 thread_t td = curthread; 823 mtx_link_t *link; 824 u_int nlock; 825 826 olock &= ~MTX_LINKSPIN; 827 nlock = olock | MTX_LINKSPIN; 828 nlock &= ~MTX_EXCLUSIVE; 829 ++td->td_critcount; 830 if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) { 831 KKASSERT(mtx->mtx_shlink != NULL); 832 for (;;) { 833 link = mtx->mtx_shlink; 834 atomic_add_int(&mtx->mtx_lock, addcount); 835 KKASSERT(link->state == MTX_LINK_LINKED_SH); 836 if (link->next == link) { 837 mtx->mtx_shlink = NULL; 838 cpu_sfence(); 839 840 /* 841 * WARNING! The callback can only be safely 842 * made with LINKSPIN still held 843 * and in a critical section. 844 * 845 * WARNING! The link can go away after the 846 * state is set, or after the 847 * callback. 848 */ 849 if (link->callback) { 850 link->state = MTX_LINK_CALLEDBACK; 851 link->callback(link, link->arg, 0); 852 } else { 853 link->state = MTX_LINK_ACQUIRED; 854 wakeup(link); 855 } 856 ++mtx_wakeup_count; 857 break; 858 } 859 mtx->mtx_shlink = link->next; 860 link->next->prev = link->prev; 861 link->prev->next = link->next; 862 cpu_sfence(); 863 link->state = MTX_LINK_ACQUIRED; 864 /* link can go away */ 865 wakeup(link); 866 ++mtx_wakeup_count; 867 addcount = 1; 868 } 869 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN | 870 MTX_SHWANTED); 871 --td->td_critcount; 872 return 1; 873 } 874 /* retry */ 875 --td->td_critcount; 876 return 0; 877 } 878 879 /* 880 * Delete a link structure after tsleep has failed. This code is not 881 * in the critical path as most exclusive waits are chained. 882 */ 883 static 884 void 885 mtx_delete_link(mtx_t *mtx, mtx_link_t *link) 886 { 887 thread_t td = curthread; 888 u_int lock; 889 u_int nlock; 890 891 /* 892 * Acquire MTX_LINKSPIN. 893 * 894 * Do not use cmpxchg to wait for LINKSPIN to clear as this might 895 * result in too much cpu cache traffic. 896 */ 897 ++td->td_critcount; 898 for (;;) { 899 lock = mtx->mtx_lock; 900 if (lock & MTX_LINKSPIN) { 901 cpu_pause(); 902 ++mtx_collision_count; 903 continue; 904 } 905 nlock = lock | MTX_LINKSPIN; 906 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 907 break; 908 cpu_pause(); 909 ++mtx_collision_count; 910 } 911 912 /* 913 * Delete the link and release LINKSPIN. 914 */ 915 nlock = MTX_LINKSPIN; /* to clear */ 916 917 switch(link->state) { 918 case MTX_LINK_LINKED_EX: 919 if (link->next == link) { 920 mtx->mtx_exlink = NULL; 921 nlock |= MTX_EXWANTED; /* to clear */ 922 } else { 923 mtx->mtx_exlink = link->next; 924 link->next->prev = link->prev; 925 link->prev->next = link->next; 926 } 927 break; 928 case MTX_LINK_LINKED_SH: 929 if (link->next == link) { 930 mtx->mtx_shlink = NULL; 931 nlock |= MTX_SHWANTED; /* to clear */ 932 } else { 933 mtx->mtx_shlink = link->next; 934 link->next->prev = link->prev; 935 link->prev->next = link->next; 936 } 937 break; 938 default: 939 /* no change */ 940 break; 941 } 942 atomic_clear_int(&mtx->mtx_lock, nlock); 943 --td->td_critcount; 944 } 945 946 /* 947 * Wait for async lock completion or abort. Returns ENOLCK if an abort 948 * occurred. 949 */ 950 int 951 mtx_wait_link(mtx_t *mtx, mtx_link_t *link, 952 const char *ident, int flags, int to) 953 { 954 int error; 955 956 /* 957 * Sleep. Handle false wakeups, interruptions, etc. 958 * The link may also have been aborted. 959 */ 960 error = 0; 961 while (link->state & MTX_LINK_LINKED) { 962 tsleep_interlock(link, 0); 963 cpu_lfence(); 964 if (link->state & MTX_LINK_LINKED) { 965 ++mtx_contention_count; 966 if (link->state & MTX_LINK_LINKED_SH) 967 mycpu->gd_cnt.v_lock_name[0] = 'S'; 968 else 969 mycpu->gd_cnt.v_lock_name[0] = 'X'; 970 strncpy(mycpu->gd_cnt.v_lock_name + 1, 971 ident, 972 sizeof(mycpu->gd_cnt.v_lock_name) - 2); 973 ++mycpu->gd_cnt.v_lock_colls; 974 975 error = tsleep(link, flags | PINTERLOCKED, 976 ident, to); 977 if (error) 978 break; 979 } 980 } 981 982 /* 983 * We are done, make sure the link structure is unlinked. 984 * It may still be on the list due to e.g. EINTR or 985 * EWOULDBLOCK. 986 * 987 * It is possible for the tsleep to race an ABORT and cause 988 * error to be 0. 989 * 990 * The tsleep() can be woken up for numerous reasons and error 991 * might be zero in situations where we intend to return an error. 992 * 993 * (This is the synchronous case so state cannot be CALLEDBACK) 994 */ 995 switch(link->state) { 996 case MTX_LINK_ACQUIRED: 997 case MTX_LINK_CALLEDBACK: 998 error = 0; 999 break; 1000 case MTX_LINK_ABORTED: 1001 error = ENOLCK; 1002 break; 1003 case MTX_LINK_LINKED_EX: 1004 case MTX_LINK_LINKED_SH: 1005 mtx_delete_link(mtx, link); 1006 /* fall through */ 1007 default: 1008 if (error == 0) 1009 error = EWOULDBLOCK; 1010 break; 1011 } 1012 1013 /* 1014 * Clear state on status returned. 1015 */ 1016 link->state = MTX_LINK_IDLE; 1017 1018 return error; 1019 } 1020 1021 /* 1022 * Abort a mutex locking operation, causing mtx_lock_ex_link() to 1023 * return ENOLCK. This may be called at any time after the mtx_link 1024 * is initialized or the status from a previous lock has been 1025 * returned. If called prior to the next (non-try) lock attempt, the 1026 * next lock attempt using this link structure will abort instantly. 1027 * 1028 * Caller must still wait for the operation to complete, either from a 1029 * blocking call that is still in progress or by calling mtx_wait_link(). 1030 * 1031 * If an asynchronous lock request is possibly in-progress, the caller 1032 * should call mtx_wait_link() synchronously. Note that the asynchronous 1033 * lock callback will NOT be called if a successful abort occurred. XXX 1034 */ 1035 void 1036 mtx_abort_link(mtx_t *mtx, mtx_link_t *link) 1037 { 1038 thread_t td = curthread; 1039 u_int lock; 1040 u_int nlock; 1041 1042 /* 1043 * Acquire MTX_LINKSPIN 1044 */ 1045 ++td->td_critcount; 1046 for (;;) { 1047 lock = mtx->mtx_lock; 1048 if (lock & MTX_LINKSPIN) { 1049 cpu_pause(); 1050 ++mtx_collision_count; 1051 continue; 1052 } 1053 nlock = lock | MTX_LINKSPIN; 1054 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) 1055 break; 1056 cpu_pause(); 1057 ++mtx_collision_count; 1058 } 1059 1060 /* 1061 * Do the abort. 1062 * 1063 * WARNING! Link structure can disappear once link->state is set. 1064 */ 1065 nlock = MTX_LINKSPIN; /* to clear */ 1066 1067 switch(link->state) { 1068 case MTX_LINK_IDLE: 1069 /* 1070 * Link not started yet 1071 */ 1072 link->state = MTX_LINK_ABORTED; 1073 break; 1074 case MTX_LINK_LINKED_EX: 1075 /* 1076 * de-link, mark aborted, and potentially wakeup the thread 1077 * or issue the callback. 1078 */ 1079 if (link->next == link) { 1080 if (mtx->mtx_exlink == link) { 1081 mtx->mtx_exlink = NULL; 1082 nlock |= MTX_EXWANTED; /* to clear */ 1083 } 1084 } else { 1085 if (mtx->mtx_exlink == link) 1086 mtx->mtx_exlink = link->next; 1087 link->next->prev = link->prev; 1088 link->prev->next = link->next; 1089 } 1090 1091 /* 1092 * When aborting the async callback is still made. We must 1093 * not set the link status to ABORTED in the callback case 1094 * since there is nothing else to clear its status if the 1095 * link is reused. 1096 */ 1097 if (link->callback) { 1098 link->state = MTX_LINK_CALLEDBACK; 1099 link->callback(link, link->arg, ENOLCK); 1100 } else { 1101 link->state = MTX_LINK_ABORTED; 1102 wakeup(link); 1103 } 1104 ++mtx_wakeup_count; 1105 break; 1106 case MTX_LINK_LINKED_SH: 1107 /* 1108 * de-link, mark aborted, and potentially wakeup the thread 1109 * or issue the callback. 1110 */ 1111 if (link->next == link) { 1112 if (mtx->mtx_shlink == link) { 1113 mtx->mtx_shlink = NULL; 1114 nlock |= MTX_SHWANTED; /* to clear */ 1115 } 1116 } else { 1117 if (mtx->mtx_shlink == link) 1118 mtx->mtx_shlink = link->next; 1119 link->next->prev = link->prev; 1120 link->prev->next = link->next; 1121 } 1122 1123 /* 1124 * When aborting the async callback is still made. We must 1125 * not set the link status to ABORTED in the callback case 1126 * since there is nothing else to clear its status if the 1127 * link is reused. 1128 */ 1129 if (link->callback) { 1130 link->state = MTX_LINK_CALLEDBACK; 1131 link->callback(link, link->arg, ENOLCK); 1132 } else { 1133 link->state = MTX_LINK_ABORTED; 1134 wakeup(link); 1135 } 1136 ++mtx_wakeup_count; 1137 break; 1138 case MTX_LINK_ACQUIRED: 1139 case MTX_LINK_CALLEDBACK: 1140 /* 1141 * Too late, the lock was acquired. Let it complete. 1142 */ 1143 break; 1144 default: 1145 /* 1146 * link already aborted, do nothing. 1147 */ 1148 break; 1149 } 1150 atomic_clear_int(&mtx->mtx_lock, nlock); 1151 --td->td_critcount; 1152 } 1153