1 /* $NetBSD: vfs_lockf.c,v 1.66 2008/08/01 07:11:24 skrll Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Scooter Morris at Genentech Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.66 2008/08/01 07:11:24 skrll Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/file.h> 44 #include <sys/proc.h> 45 #include <sys/vnode.h> 46 #include <sys/pool.h> 47 #include <sys/fcntl.h> 48 #include <sys/lockf.h> 49 #include <sys/atomic.h> 50 #include <sys/kauth.h> 51 52 /* 53 * The lockf structure is a kernel structure which contains the information 54 * associated with a byte range lock. The lockf structures are linked into 55 * the vnode structure. Locks are sorted by the starting byte of the lock for 56 * efficiency. 57 * 58 * lf_next is used for two purposes, depending on whether the lock is 59 * being held, or is in conflict with an existing lock. If this lock 60 * is held, it indicates the next lock on the same vnode. 61 * For pending locks, if lock->lf_next is non-NULL, then lock->lf_block 62 * must be queued on the lf_blkhd TAILQ of lock->lf_next. 63 */ 64 65 TAILQ_HEAD(locklist, lockf); 66 67 struct lockf { 68 kcondvar_t lf_cv; /* Signalling */ 69 short lf_flags; /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */ 70 short lf_type; /* Lock type: F_RDLCK, F_WRLCK */ 71 off_t lf_start; /* The byte # of the start of the lock */ 72 off_t lf_end; /* The byte # of the end of the lock (-1=EOF)*/ 73 void *lf_id; /* process or file description holding lock */ 74 struct lockf **lf_head; /* Back pointer to the head of lockf list */ 75 struct lockf *lf_next; /* Next lock on this vnode, or blocking lock */ 76 struct locklist lf_blkhd; /* List of requests blocked on this lock */ 77 TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ 78 uid_t lf_uid; /* User ID responsible */ 79 }; 80 81 /* Maximum length of sleep chains to traverse to try and detect deadlock. */ 82 #define MAXDEPTH 50 83 84 static pool_cache_t lockf_cache; 85 static kmutex_t *lockf_lock; 86 static char lockstr[] = "lockf"; 87 88 /* 89 * This variable controls the maximum number of processes that will 90 * be checked in doing deadlock detection. 91 */ 92 int maxlockdepth = MAXDEPTH; 93 94 #ifdef LOCKF_DEBUG 95 int lockf_debug = 0; 96 #endif 97 98 #define SELF 0x1 99 #define OTHERS 0x2 100 101 /* 102 * XXX TODO 103 * Misc cleanups: "void *id" should be visible in the API as a 104 * "struct proc *". 105 * (This requires rototilling all VFS's which support advisory locking). 106 */ 107 108 /* 109 * If there's a lot of lock contention on a single vnode, locking 110 * schemes which allow for more paralleism would be needed. Given how 111 * infrequently byte-range locks are actually used in typical BSD 112 * code, a more complex approach probably isn't worth it. 113 */ 114 115 /* 116 * We enforce a limit on locks by uid, so that a single user cannot 117 * run the kernel out of memory. For now, the limit is pretty coarse. 118 * There is no limit on root. 119 * 120 * Splitting a lock will always succeed, regardless of current allocations. 121 * If you're slightly above the limit, we still have to permit an allocation 122 * so that the unlock can succeed. If the unlocking causes too many splits, 123 * however, you're totally cutoff. 124 */ 125 int maxlocksperuid = 1024; 126 127 #ifdef LOCKF_DEBUG 128 /* 129 * Print out a lock. 130 */ 131 static void 132 lf_print(const char *tag, struct lockf *lock) 133 { 134 135 printf("%s: lock %p for ", tag, lock); 136 if (lock->lf_flags & F_POSIX) 137 printf("proc %d", ((struct proc *)lock->lf_id)->p_pid); 138 else 139 printf("file %p", (struct file *)lock->lf_id); 140 printf(" %s, start %qx, end %qx", 141 lock->lf_type == F_RDLCK ? "shared" : 142 lock->lf_type == F_WRLCK ? "exclusive" : 143 lock->lf_type == F_UNLCK ? "unlock" : 144 "unknown", lock->lf_start, lock->lf_end); 145 if (TAILQ_FIRST(&lock->lf_blkhd)) 146 printf(" block %p\n", TAILQ_FIRST(&lock->lf_blkhd)); 147 else 148 printf("\n"); 149 } 150 151 static void 152 lf_printlist(const char *tag, struct lockf *lock) 153 { 154 struct lockf *lf, *blk; 155 156 printf("%s: Lock list:\n", tag); 157 for (lf = *lock->lf_head; lf; lf = lf->lf_next) { 158 printf("\tlock %p for ", lf); 159 if (lf->lf_flags & F_POSIX) 160 printf("proc %d", ((struct proc *)lf->lf_id)->p_pid); 161 else 162 printf("file %p", (struct file *)lf->lf_id); 163 printf(", %s, start %qx, end %qx", 164 lf->lf_type == F_RDLCK ? "shared" : 165 lf->lf_type == F_WRLCK ? "exclusive" : 166 lf->lf_type == F_UNLCK ? "unlock" : 167 "unknown", lf->lf_start, lf->lf_end); 168 TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) { 169 if (blk->lf_flags & F_POSIX) 170 printf("; proc %d", 171 ((struct proc *)blk->lf_id)->p_pid); 172 else 173 printf("; file %p", (struct file *)blk->lf_id); 174 printf(", %s, start %qx, end %qx", 175 blk->lf_type == F_RDLCK ? "shared" : 176 blk->lf_type == F_WRLCK ? "exclusive" : 177 blk->lf_type == F_UNLCK ? "unlock" : 178 "unknown", blk->lf_start, blk->lf_end); 179 if (TAILQ_FIRST(&blk->lf_blkhd)) 180 panic("lf_printlist: bad list"); 181 } 182 printf("\n"); 183 } 184 } 185 #endif /* LOCKF_DEBUG */ 186 187 /* 188 * 3 options for allowfail. 189 * 0 - always allocate. 1 - cutoff at limit. 2 - cutoff at double limit. 190 */ 191 static struct lockf * 192 lf_alloc(uid_t uid, int allowfail) 193 { 194 struct uidinfo *uip; 195 struct lockf *lock; 196 u_long lcnt; 197 198 uip = uid_find(uid); 199 lcnt = atomic_inc_ulong_nv(&uip->ui_lockcnt); 200 if (uid && allowfail && lcnt > 201 (allowfail == 1 ? maxlocksperuid : (maxlocksperuid * 2))) { 202 atomic_dec_ulong(&uip->ui_lockcnt); 203 return NULL; 204 } 205 206 lock = pool_cache_get(lockf_cache, PR_WAITOK); 207 lock->lf_uid = uid; 208 return lock; 209 } 210 211 static void 212 lf_free(struct lockf *lock) 213 { 214 struct uidinfo *uip; 215 216 uip = uid_find(lock->lf_uid); 217 atomic_dec_ulong(&uip->ui_lockcnt); 218 pool_cache_put(lockf_cache, lock); 219 } 220 221 static int 222 lf_ctor(void *arg, void *obj, int flag) 223 { 224 struct lockf *lock; 225 226 lock = obj; 227 cv_init(&lock->lf_cv, lockstr); 228 229 return 0; 230 } 231 232 static void 233 lf_dtor(void *arg, void *obj) 234 { 235 struct lockf *lock; 236 237 lock = obj; 238 cv_destroy(&lock->lf_cv); 239 } 240 241 /* 242 * Walk the list of locks for an inode to 243 * find an overlapping lock (if any). 244 * 245 * NOTE: this returns only the FIRST overlapping lock. There 246 * may be more than one. 247 */ 248 static int 249 lf_findoverlap(struct lockf *lf, struct lockf *lock, int type, 250 struct lockf ***prev, struct lockf **overlap) 251 { 252 off_t start, end; 253 254 *overlap = lf; 255 if (lf == NULL) 256 return 0; 257 #ifdef LOCKF_DEBUG 258 if (lockf_debug & 2) 259 lf_print("lf_findoverlap: looking for overlap in", lock); 260 #endif /* LOCKF_DEBUG */ 261 start = lock->lf_start; 262 end = lock->lf_end; 263 while (lf != NULL) { 264 if (((type == SELF) && lf->lf_id != lock->lf_id) || 265 ((type == OTHERS) && lf->lf_id == lock->lf_id)) { 266 *prev = &lf->lf_next; 267 *overlap = lf = lf->lf_next; 268 continue; 269 } 270 #ifdef LOCKF_DEBUG 271 if (lockf_debug & 2) 272 lf_print("\tchecking", lf); 273 #endif /* LOCKF_DEBUG */ 274 /* 275 * OK, check for overlap 276 * 277 * Six cases: 278 * 0) no overlap 279 * 1) overlap == lock 280 * 2) overlap contains lock 281 * 3) lock contains overlap 282 * 4) overlap starts before lock 283 * 5) overlap ends after lock 284 */ 285 if ((lf->lf_end != -1 && start > lf->lf_end) || 286 (end != -1 && lf->lf_start > end)) { 287 /* Case 0 */ 288 #ifdef LOCKF_DEBUG 289 if (lockf_debug & 2) 290 printf("no overlap\n"); 291 #endif /* LOCKF_DEBUG */ 292 if ((type & SELF) && end != -1 && lf->lf_start > end) 293 return 0; 294 *prev = &lf->lf_next; 295 *overlap = lf = lf->lf_next; 296 continue; 297 } 298 if ((lf->lf_start == start) && (lf->lf_end == end)) { 299 /* Case 1 */ 300 #ifdef LOCKF_DEBUG 301 if (lockf_debug & 2) 302 printf("overlap == lock\n"); 303 #endif /* LOCKF_DEBUG */ 304 return 1; 305 } 306 if ((lf->lf_start <= start) && 307 (end != -1) && 308 ((lf->lf_end >= end) || (lf->lf_end == -1))) { 309 /* Case 2 */ 310 #ifdef LOCKF_DEBUG 311 if (lockf_debug & 2) 312 printf("overlap contains lock\n"); 313 #endif /* LOCKF_DEBUG */ 314 return 2; 315 } 316 if (start <= lf->lf_start && 317 (end == -1 || 318 (lf->lf_end != -1 && end >= lf->lf_end))) { 319 /* Case 3 */ 320 #ifdef LOCKF_DEBUG 321 if (lockf_debug & 2) 322 printf("lock contains overlap\n"); 323 #endif /* LOCKF_DEBUG */ 324 return 3; 325 } 326 if ((lf->lf_start < start) && 327 ((lf->lf_end >= start) || (lf->lf_end == -1))) { 328 /* Case 4 */ 329 #ifdef LOCKF_DEBUG 330 if (lockf_debug & 2) 331 printf("overlap starts before lock\n"); 332 #endif /* LOCKF_DEBUG */ 333 return 4; 334 } 335 if ((lf->lf_start > start) && 336 (end != -1) && 337 ((lf->lf_end > end) || (lf->lf_end == -1))) { 338 /* Case 5 */ 339 #ifdef LOCKF_DEBUG 340 if (lockf_debug & 2) 341 printf("overlap ends after lock\n"); 342 #endif /* LOCKF_DEBUG */ 343 return 5; 344 } 345 panic("lf_findoverlap: default"); 346 } 347 return 0; 348 } 349 350 /* 351 * Split a lock and a contained region into 352 * two or three locks as necessary. 353 */ 354 static void 355 lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **sparelock) 356 { 357 struct lockf *splitlock; 358 359 #ifdef LOCKF_DEBUG 360 if (lockf_debug & 2) { 361 lf_print("lf_split", lock1); 362 lf_print("splitting from", lock2); 363 } 364 #endif /* LOCKF_DEBUG */ 365 /* 366 * Check to see if spliting into only two pieces. 367 */ 368 if (lock1->lf_start == lock2->lf_start) { 369 lock1->lf_start = lock2->lf_end + 1; 370 lock2->lf_next = lock1; 371 return; 372 } 373 if (lock1->lf_end == lock2->lf_end) { 374 lock1->lf_end = lock2->lf_start - 1; 375 lock2->lf_next = lock1->lf_next; 376 lock1->lf_next = lock2; 377 return; 378 } 379 /* 380 * Make a new lock consisting of the last part of 381 * the encompassing lock 382 */ 383 splitlock = *sparelock; 384 *sparelock = NULL; 385 memcpy(splitlock, lock1, sizeof(*splitlock)); 386 splitlock->lf_start = lock2->lf_end + 1; 387 TAILQ_INIT(&splitlock->lf_blkhd); 388 lock1->lf_end = lock2->lf_start - 1; 389 /* 390 * OK, now link it in 391 */ 392 splitlock->lf_next = lock1->lf_next; 393 lock2->lf_next = splitlock; 394 lock1->lf_next = lock2; 395 } 396 397 /* 398 * Wakeup a blocklist 399 */ 400 static void 401 lf_wakelock(struct lockf *listhead) 402 { 403 struct lockf *wakelock; 404 405 while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) { 406 KASSERT(wakelock->lf_next == listhead); 407 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); 408 wakelock->lf_next = NULL; 409 #ifdef LOCKF_DEBUG 410 if (lockf_debug & 2) 411 lf_print("lf_wakelock: awakening", wakelock); 412 #endif 413 cv_broadcast(&wakelock->lf_cv); 414 } 415 } 416 417 /* 418 * Remove a byte-range lock on an inode. 419 * 420 * Generally, find the lock (or an overlap to that lock) 421 * and remove it (or shrink it), then wakeup anyone we can. 422 */ 423 static int 424 lf_clearlock(struct lockf *unlock, struct lockf **sparelock) 425 { 426 struct lockf **head = unlock->lf_head; 427 struct lockf *lf = *head; 428 struct lockf *overlap, **prev; 429 int ovcase; 430 431 if (lf == NULL) 432 return 0; 433 #ifdef LOCKF_DEBUG 434 if (unlock->lf_type != F_UNLCK) 435 panic("lf_clearlock: bad type"); 436 if (lockf_debug & 1) 437 lf_print("lf_clearlock", unlock); 438 #endif /* LOCKF_DEBUG */ 439 prev = head; 440 while ((ovcase = lf_findoverlap(lf, unlock, SELF, 441 &prev, &overlap)) != 0) { 442 /* 443 * Wakeup the list of locks to be retried. 444 */ 445 lf_wakelock(overlap); 446 447 switch (ovcase) { 448 449 case 1: /* overlap == lock */ 450 *prev = overlap->lf_next; 451 lf_free(overlap); 452 break; 453 454 case 2: /* overlap contains lock: split it */ 455 if (overlap->lf_start == unlock->lf_start) { 456 overlap->lf_start = unlock->lf_end + 1; 457 break; 458 } 459 lf_split(overlap, unlock, sparelock); 460 overlap->lf_next = unlock->lf_next; 461 break; 462 463 case 3: /* lock contains overlap */ 464 *prev = overlap->lf_next; 465 lf = overlap->lf_next; 466 lf_free(overlap); 467 continue; 468 469 case 4: /* overlap starts before lock */ 470 overlap->lf_end = unlock->lf_start - 1; 471 prev = &overlap->lf_next; 472 lf = overlap->lf_next; 473 continue; 474 475 case 5: /* overlap ends after lock */ 476 overlap->lf_start = unlock->lf_end + 1; 477 break; 478 } 479 break; 480 } 481 #ifdef LOCKF_DEBUG 482 if (lockf_debug & 1) 483 lf_printlist("lf_clearlock", unlock); 484 #endif /* LOCKF_DEBUG */ 485 return 0; 486 } 487 488 /* 489 * Walk the list of locks for an inode and 490 * return the first blocking lock. 491 */ 492 static struct lockf * 493 lf_getblock(struct lockf *lock) 494 { 495 struct lockf **prev, *overlap, *lf = *(lock->lf_head); 496 497 prev = lock->lf_head; 498 while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != 0) { 499 /* 500 * We've found an overlap, see if it blocks us 501 */ 502 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) 503 return overlap; 504 /* 505 * Nope, point to the next one on the list and 506 * see if it blocks us 507 */ 508 lf = overlap->lf_next; 509 } 510 return NULL; 511 } 512 513 /* 514 * Set a byte-range lock. 515 */ 516 static int 517 lf_setlock(struct lockf *lock, struct lockf **sparelock, 518 kmutex_t *interlock) 519 { 520 struct lockf *block; 521 struct lockf **head = lock->lf_head; 522 struct lockf **prev, *overlap, *ltmp; 523 int ovcase, needtolink, error; 524 525 #ifdef LOCKF_DEBUG 526 if (lockf_debug & 1) 527 lf_print("lf_setlock", lock); 528 #endif /* LOCKF_DEBUG */ 529 530 /* 531 * Scan lock list for this file looking for locks that would block us. 532 */ 533 while ((block = lf_getblock(lock)) != NULL) { 534 /* 535 * Free the structure and return if nonblocking. 536 */ 537 if ((lock->lf_flags & F_WAIT) == 0) { 538 lf_free(lock); 539 return EAGAIN; 540 } 541 /* 542 * We are blocked. Since flock style locks cover 543 * the whole file, there is no chance for deadlock. 544 * For byte-range locks we must check for deadlock. 545 * 546 * Deadlock detection is done by looking through the 547 * wait channels to see if there are any cycles that 548 * involve us. MAXDEPTH is set just to make sure we 549 * do not go off into neverneverland. 550 */ 551 if ((lock->lf_flags & F_POSIX) && 552 (block->lf_flags & F_POSIX)) { 553 struct lwp *wlwp; 554 volatile const struct lockf *waitblock; 555 int i = 0; 556 struct proc *p; 557 558 p = (struct proc *)block->lf_id; 559 KASSERT(p != NULL); 560 while (i++ < maxlockdepth) { 561 mutex_enter(p->p_lock); 562 if (p->p_nlwps > 1) { 563 mutex_exit(p->p_lock); 564 break; 565 } 566 wlwp = LIST_FIRST(&p->p_lwps); 567 lwp_lock(wlwp); 568 if (wlwp->l_wchan == NULL || 569 wlwp->l_wmesg != lockstr) { 570 lwp_unlock(wlwp); 571 mutex_exit(p->p_lock); 572 break; 573 } 574 waitblock = wlwp->l_wchan; 575 lwp_unlock(wlwp); 576 mutex_exit(p->p_lock); 577 if (waitblock == NULL) { 578 /* 579 * this lwp just got up but 580 * not returned from ltsleep yet. 581 */ 582 break; 583 } 584 /* Get the owner of the blocking lock */ 585 waitblock = waitblock->lf_next; 586 if ((waitblock->lf_flags & F_POSIX) == 0) 587 break; 588 p = (struct proc *)waitblock->lf_id; 589 if (p == curproc) { 590 lf_free(lock); 591 return EDEADLK; 592 } 593 } 594 /* 595 * If we're still following a dependency chain 596 * after maxlockdepth iterations, assume we're in 597 * a cycle to be safe. 598 */ 599 if (i >= maxlockdepth) { 600 lf_free(lock); 601 return EDEADLK; 602 } 603 } 604 /* 605 * For flock type locks, we must first remove 606 * any shared locks that we hold before we sleep 607 * waiting for an exclusive lock. 608 */ 609 if ((lock->lf_flags & F_FLOCK) && 610 lock->lf_type == F_WRLCK) { 611 lock->lf_type = F_UNLCK; 612 (void) lf_clearlock(lock, NULL); 613 lock->lf_type = F_WRLCK; 614 } 615 /* 616 * Add our lock to the blocked list and sleep until we're free. 617 * Remember who blocked us (for deadlock detection). 618 */ 619 lock->lf_next = block; 620 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block); 621 #ifdef LOCKF_DEBUG 622 if (lockf_debug & 1) { 623 lf_print("lf_setlock: blocking on", block); 624 lf_printlist("lf_setlock", block); 625 } 626 #endif /* LOCKF_DEBUG */ 627 error = cv_wait_sig(&lock->lf_cv, interlock); 628 629 /* 630 * We may have been awoken by a signal (in 631 * which case we must remove ourselves from the 632 * blocked list) and/or by another process 633 * releasing a lock (in which case we have already 634 * been removed from the blocked list and our 635 * lf_next field set to NULL). 636 */ 637 if (lock->lf_next != NULL) { 638 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block); 639 lock->lf_next = NULL; 640 } 641 if (error) { 642 lf_free(lock); 643 return error; 644 } 645 } 646 /* 647 * No blocks!! Add the lock. Note that we will 648 * downgrade or upgrade any overlapping locks this 649 * process already owns. 650 * 651 * Skip over locks owned by other processes. 652 * Handle any locks that overlap and are owned by ourselves. 653 */ 654 prev = head; 655 block = *head; 656 needtolink = 1; 657 for (;;) { 658 ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap); 659 if (ovcase) 660 block = overlap->lf_next; 661 /* 662 * Six cases: 663 * 0) no overlap 664 * 1) overlap == lock 665 * 2) overlap contains lock 666 * 3) lock contains overlap 667 * 4) overlap starts before lock 668 * 5) overlap ends after lock 669 */ 670 switch (ovcase) { 671 case 0: /* no overlap */ 672 if (needtolink) { 673 *prev = lock; 674 lock->lf_next = overlap; 675 } 676 break; 677 678 case 1: /* overlap == lock */ 679 /* 680 * If downgrading lock, others may be 681 * able to acquire it. 682 */ 683 if (lock->lf_type == F_RDLCK && 684 overlap->lf_type == F_WRLCK) 685 lf_wakelock(overlap); 686 overlap->lf_type = lock->lf_type; 687 lf_free(lock); 688 lock = overlap; /* for debug output below */ 689 break; 690 691 case 2: /* overlap contains lock */ 692 /* 693 * Check for common starting point and different types. 694 */ 695 if (overlap->lf_type == lock->lf_type) { 696 lf_free(lock); 697 lock = overlap; /* for debug output below */ 698 break; 699 } 700 if (overlap->lf_start == lock->lf_start) { 701 *prev = lock; 702 lock->lf_next = overlap; 703 overlap->lf_start = lock->lf_end + 1; 704 } else 705 lf_split(overlap, lock, sparelock); 706 lf_wakelock(overlap); 707 break; 708 709 case 3: /* lock contains overlap */ 710 /* 711 * If downgrading lock, others may be able to 712 * acquire it, otherwise take the list. 713 */ 714 if (lock->lf_type == F_RDLCK && 715 overlap->lf_type == F_WRLCK) { 716 lf_wakelock(overlap); 717 } else { 718 while ((ltmp = TAILQ_FIRST(&overlap->lf_blkhd))) { 719 KASSERT(ltmp->lf_next == overlap); 720 TAILQ_REMOVE(&overlap->lf_blkhd, ltmp, 721 lf_block); 722 ltmp->lf_next = lock; 723 TAILQ_INSERT_TAIL(&lock->lf_blkhd, 724 ltmp, lf_block); 725 } 726 } 727 /* 728 * Add the new lock if necessary and delete the overlap. 729 */ 730 if (needtolink) { 731 *prev = lock; 732 lock->lf_next = overlap->lf_next; 733 prev = &lock->lf_next; 734 needtolink = 0; 735 } else 736 *prev = overlap->lf_next; 737 lf_free(overlap); 738 continue; 739 740 case 4: /* overlap starts before lock */ 741 /* 742 * Add lock after overlap on the list. 743 */ 744 lock->lf_next = overlap->lf_next; 745 overlap->lf_next = lock; 746 overlap->lf_end = lock->lf_start - 1; 747 prev = &lock->lf_next; 748 lf_wakelock(overlap); 749 needtolink = 0; 750 continue; 751 752 case 5: /* overlap ends after lock */ 753 /* 754 * Add the new lock before overlap. 755 */ 756 if (needtolink) { 757 *prev = lock; 758 lock->lf_next = overlap; 759 } 760 overlap->lf_start = lock->lf_end + 1; 761 lf_wakelock(overlap); 762 break; 763 } 764 break; 765 } 766 #ifdef LOCKF_DEBUG 767 if (lockf_debug & 1) { 768 lf_print("lf_setlock: got the lock", lock); 769 lf_printlist("lf_setlock", lock); 770 } 771 #endif /* LOCKF_DEBUG */ 772 return 0; 773 } 774 775 /* 776 * Check whether there is a blocking lock, 777 * and if so return its process identifier. 778 */ 779 static int 780 lf_getlock(struct lockf *lock, struct flock *fl) 781 { 782 struct lockf *block; 783 784 #ifdef LOCKF_DEBUG 785 if (lockf_debug & 1) 786 lf_print("lf_getlock", lock); 787 #endif /* LOCKF_DEBUG */ 788 789 if ((block = lf_getblock(lock)) != NULL) { 790 fl->l_type = block->lf_type; 791 fl->l_whence = SEEK_SET; 792 fl->l_start = block->lf_start; 793 if (block->lf_end == -1) 794 fl->l_len = 0; 795 else 796 fl->l_len = block->lf_end - block->lf_start + 1; 797 if (block->lf_flags & F_POSIX) 798 fl->l_pid = ((struct proc *)block->lf_id)->p_pid; 799 else 800 fl->l_pid = -1; 801 } else { 802 fl->l_type = F_UNLCK; 803 } 804 return 0; 805 } 806 807 /* 808 * Do an advisory lock operation. 809 */ 810 int 811 lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size) 812 { 813 struct lwp *l = curlwp; 814 struct flock *fl = ap->a_fl; 815 struct lockf *lock = NULL; 816 struct lockf *sparelock; 817 kmutex_t *interlock = lockf_lock; 818 off_t start, end; 819 int error = 0; 820 821 /* 822 * Convert the flock structure into a start and end. 823 */ 824 switch (fl->l_whence) { 825 case SEEK_SET: 826 case SEEK_CUR: 827 /* 828 * Caller is responsible for adding any necessary offset 829 * when SEEK_CUR is used. 830 */ 831 start = fl->l_start; 832 break; 833 834 case SEEK_END: 835 start = size + fl->l_start; 836 break; 837 838 default: 839 return EINVAL; 840 } 841 if (start < 0) 842 return EINVAL; 843 844 /* 845 * Allocate locks before acquiring the interlock. We need two 846 * locks in the worst case. 847 */ 848 switch (ap->a_op) { 849 case F_SETLK: 850 case F_UNLCK: 851 /* 852 * XXX For F_UNLCK case, we can re-use the lock. 853 */ 854 if ((ap->a_flags & F_FLOCK) == 0) { 855 /* 856 * Byte-range lock might need one more lock. 857 */ 858 sparelock = lf_alloc(kauth_cred_geteuid(l->l_cred), 0); 859 if (sparelock == NULL) { 860 error = ENOMEM; 861 goto quit; 862 } 863 break; 864 } 865 /* FALLTHROUGH */ 866 867 case F_GETLK: 868 sparelock = NULL; 869 break; 870 871 default: 872 return EINVAL; 873 } 874 875 lock = lf_alloc(kauth_cred_geteuid(l->l_cred), 876 ap->a_op != F_UNLCK ? 1 : 2); 877 if (lock == NULL) { 878 error = ENOMEM; 879 goto quit; 880 } 881 882 mutex_enter(interlock); 883 884 /* 885 * Avoid the common case of unlocking when inode has no locks. 886 */ 887 if (*head == (struct lockf *)0) { 888 if (ap->a_op != F_SETLK) { 889 fl->l_type = F_UNLCK; 890 error = 0; 891 goto quit_unlock; 892 } 893 } 894 895 if (fl->l_len == 0) 896 end = -1; 897 else 898 end = start + fl->l_len - 1; 899 /* 900 * Create the lockf structure. 901 */ 902 lock->lf_start = start; 903 lock->lf_end = end; 904 lock->lf_head = head; 905 lock->lf_type = fl->l_type; 906 lock->lf_next = (struct lockf *)0; 907 TAILQ_INIT(&lock->lf_blkhd); 908 lock->lf_flags = ap->a_flags; 909 if (lock->lf_flags & F_POSIX) { 910 KASSERT(curproc == (struct proc *)ap->a_id); 911 } 912 lock->lf_id = (struct proc *)ap->a_id; 913 914 /* 915 * Do the requested operation. 916 */ 917 switch (ap->a_op) { 918 919 case F_SETLK: 920 error = lf_setlock(lock, &sparelock, interlock); 921 lock = NULL; /* lf_setlock freed it */ 922 break; 923 924 case F_UNLCK: 925 error = lf_clearlock(lock, &sparelock); 926 break; 927 928 case F_GETLK: 929 error = lf_getlock(lock, fl); 930 break; 931 932 default: 933 break; 934 /* NOTREACHED */ 935 } 936 937 quit_unlock: 938 mutex_exit(interlock); 939 quit: 940 if (lock) 941 lf_free(lock); 942 if (sparelock) 943 lf_free(sparelock); 944 945 return error; 946 } 947 948 /* 949 * Initialize subsystem. XXX We use a global lock. This could be the 950 * vnode interlock, but the deadlock detection code may need to inspect 951 * locks belonging to other files. 952 */ 953 void 954 lf_init(void) 955 { 956 957 lockf_cache = pool_cache_init(sizeof(struct lockf), 0, 0, 0, "lockf", 958 NULL, IPL_NONE, lf_ctor, lf_dtor, NULL); 959 lockf_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 960 } 961