1 /* $NetBSD: vfs_lockf.c,v 1.69 2008/10/11 13:40:57 pooka Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Scooter Morris at Genentech Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.69 2008/10/11 13:40:57 pooka Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/file.h> 44 #include <sys/proc.h> 45 #include <sys/vnode.h> 46 #include <sys/pool.h> 47 #include <sys/fcntl.h> 48 #include <sys/lockf.h> 49 #include <sys/atomic.h> 50 #include <sys/kauth.h> 51 #include <sys/uidinfo.h> 52 53 /* 54 * The lockf structure is a kernel structure which contains the information 55 * associated with a byte range lock. The lockf structures are linked into 56 * the vnode structure. Locks are sorted by the starting byte of the lock for 57 * efficiency. 58 * 59 * lf_next is used for two purposes, depending on whether the lock is 60 * being held, or is in conflict with an existing lock. If this lock 61 * is held, it indicates the next lock on the same vnode. 62 * For pending locks, if lock->lf_next is non-NULL, then lock->lf_block 63 * must be queued on the lf_blkhd TAILQ of lock->lf_next. 64 */ 65 66 TAILQ_HEAD(locklist, lockf); 67 68 struct lockf { 69 kcondvar_t lf_cv; /* Signalling */ 70 short lf_flags; /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */ 71 short lf_type; /* Lock type: F_RDLCK, F_WRLCK */ 72 off_t lf_start; /* The byte # of the start of the lock */ 73 off_t lf_end; /* The byte # of the end of the lock (-1=EOF)*/ 74 void *lf_id; /* process or file description holding lock */ 75 struct lockf **lf_head; /* Back pointer to the head of lockf list */ 76 struct lockf *lf_next; /* Next lock on this vnode, or blocking lock */ 77 struct locklist lf_blkhd; /* List of requests blocked on this lock */ 78 TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ 79 uid_t lf_uid; /* User ID responsible */ 80 }; 81 82 /* Maximum length of sleep chains to traverse to try and detect deadlock. */ 83 #define MAXDEPTH 50 84 85 static pool_cache_t lockf_cache; 86 static kmutex_t *lockf_lock; 87 static char lockstr[] = "lockf"; 88 89 /* 90 * This variable controls the maximum number of processes that will 91 * be checked in doing deadlock detection. 92 */ 93 int maxlockdepth = MAXDEPTH; 94 95 #ifdef LOCKF_DEBUG 96 int lockf_debug = 0; 97 #endif 98 99 #define SELF 0x1 100 #define OTHERS 0x2 101 102 /* 103 * XXX TODO 104 * Misc cleanups: "void *id" should be visible in the API as a 105 * "struct proc *". 106 * (This requires rototilling all VFS's which support advisory locking). 107 */ 108 109 /* 110 * If there's a lot of lock contention on a single vnode, locking 111 * schemes which allow for more paralleism would be needed. Given how 112 * infrequently byte-range locks are actually used in typical BSD 113 * code, a more complex approach probably isn't worth it. 114 */ 115 116 /* 117 * We enforce a limit on locks by uid, so that a single user cannot 118 * run the kernel out of memory. For now, the limit is pretty coarse. 119 * There is no limit on root. 120 * 121 * Splitting a lock will always succeed, regardless of current allocations. 122 * If you're slightly above the limit, we still have to permit an allocation 123 * so that the unlock can succeed. If the unlocking causes too many splits, 124 * however, you're totally cutoff. 125 */ 126 int maxlocksperuid = 1024; 127 128 #ifdef LOCKF_DEBUG 129 /* 130 * Print out a lock. 131 */ 132 static void 133 lf_print(const char *tag, struct lockf *lock) 134 { 135 136 printf("%s: lock %p for ", tag, lock); 137 if (lock->lf_flags & F_POSIX) 138 printf("proc %d", ((struct proc *)lock->lf_id)->p_pid); 139 else 140 printf("file %p", (struct file *)lock->lf_id); 141 printf(" %s, start %qx, end %qx", 142 lock->lf_type == F_RDLCK ? "shared" : 143 lock->lf_type == F_WRLCK ? "exclusive" : 144 lock->lf_type == F_UNLCK ? "unlock" : 145 "unknown", lock->lf_start, lock->lf_end); 146 if (TAILQ_FIRST(&lock->lf_blkhd)) 147 printf(" block %p\n", TAILQ_FIRST(&lock->lf_blkhd)); 148 else 149 printf("\n"); 150 } 151 152 static void 153 lf_printlist(const char *tag, struct lockf *lock) 154 { 155 struct lockf *lf, *blk; 156 157 printf("%s: Lock list:\n", tag); 158 for (lf = *lock->lf_head; lf; lf = lf->lf_next) { 159 printf("\tlock %p for ", lf); 160 if (lf->lf_flags & F_POSIX) 161 printf("proc %d", ((struct proc *)lf->lf_id)->p_pid); 162 else 163 printf("file %p", (struct file *)lf->lf_id); 164 printf(", %s, start %qx, end %qx", 165 lf->lf_type == F_RDLCK ? "shared" : 166 lf->lf_type == F_WRLCK ? "exclusive" : 167 lf->lf_type == F_UNLCK ? "unlock" : 168 "unknown", lf->lf_start, lf->lf_end); 169 TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) { 170 if (blk->lf_flags & F_POSIX) 171 printf("; proc %d", 172 ((struct proc *)blk->lf_id)->p_pid); 173 else 174 printf("; file %p", (struct file *)blk->lf_id); 175 printf(", %s, start %qx, end %qx", 176 blk->lf_type == F_RDLCK ? "shared" : 177 blk->lf_type == F_WRLCK ? "exclusive" : 178 blk->lf_type == F_UNLCK ? "unlock" : 179 "unknown", blk->lf_start, blk->lf_end); 180 if (TAILQ_FIRST(&blk->lf_blkhd)) 181 panic("lf_printlist: bad list"); 182 } 183 printf("\n"); 184 } 185 } 186 #endif /* LOCKF_DEBUG */ 187 188 /* 189 * 3 options for allowfail. 190 * 0 - always allocate. 1 - cutoff at limit. 2 - cutoff at double limit. 191 */ 192 static struct lockf * 193 lf_alloc(uid_t uid, int allowfail) 194 { 195 struct uidinfo *uip; 196 struct lockf *lock; 197 u_long lcnt; 198 199 uip = uid_find(uid); 200 lcnt = atomic_inc_ulong_nv(&uip->ui_lockcnt); 201 if (uid && allowfail && lcnt > 202 (allowfail == 1 ? maxlocksperuid : (maxlocksperuid * 2))) { 203 atomic_dec_ulong(&uip->ui_lockcnt); 204 return NULL; 205 } 206 207 lock = pool_cache_get(lockf_cache, PR_WAITOK); 208 lock->lf_uid = uid; 209 return lock; 210 } 211 212 static void 213 lf_free(struct lockf *lock) 214 { 215 struct uidinfo *uip; 216 217 uip = uid_find(lock->lf_uid); 218 atomic_dec_ulong(&uip->ui_lockcnt); 219 pool_cache_put(lockf_cache, lock); 220 } 221 222 static int 223 lf_ctor(void *arg, void *obj, int flag) 224 { 225 struct lockf *lock; 226 227 lock = obj; 228 cv_init(&lock->lf_cv, lockstr); 229 230 return 0; 231 } 232 233 static void 234 lf_dtor(void *arg, void *obj) 235 { 236 struct lockf *lock; 237 238 lock = obj; 239 cv_destroy(&lock->lf_cv); 240 } 241 242 /* 243 * Walk the list of locks for an inode to 244 * find an overlapping lock (if any). 245 * 246 * NOTE: this returns only the FIRST overlapping lock. There 247 * may be more than one. 248 */ 249 static int 250 lf_findoverlap(struct lockf *lf, struct lockf *lock, int type, 251 struct lockf ***prev, struct lockf **overlap) 252 { 253 off_t start, end; 254 255 *overlap = lf; 256 if (lf == NULL) 257 return 0; 258 #ifdef LOCKF_DEBUG 259 if (lockf_debug & 2) 260 lf_print("lf_findoverlap: looking for overlap in", lock); 261 #endif /* LOCKF_DEBUG */ 262 start = lock->lf_start; 263 end = lock->lf_end; 264 while (lf != NULL) { 265 if (((type == SELF) && lf->lf_id != lock->lf_id) || 266 ((type == OTHERS) && lf->lf_id == lock->lf_id)) { 267 *prev = &lf->lf_next; 268 *overlap = lf = lf->lf_next; 269 continue; 270 } 271 #ifdef LOCKF_DEBUG 272 if (lockf_debug & 2) 273 lf_print("\tchecking", lf); 274 #endif /* LOCKF_DEBUG */ 275 /* 276 * OK, check for overlap 277 * 278 * Six cases: 279 * 0) no overlap 280 * 1) overlap == lock 281 * 2) overlap contains lock 282 * 3) lock contains overlap 283 * 4) overlap starts before lock 284 * 5) overlap ends after lock 285 */ 286 if ((lf->lf_end != -1 && start > lf->lf_end) || 287 (end != -1 && lf->lf_start > end)) { 288 /* Case 0 */ 289 #ifdef LOCKF_DEBUG 290 if (lockf_debug & 2) 291 printf("no overlap\n"); 292 #endif /* LOCKF_DEBUG */ 293 if ((type & SELF) && end != -1 && lf->lf_start > end) 294 return 0; 295 *prev = &lf->lf_next; 296 *overlap = lf = lf->lf_next; 297 continue; 298 } 299 if ((lf->lf_start == start) && (lf->lf_end == end)) { 300 /* Case 1 */ 301 #ifdef LOCKF_DEBUG 302 if (lockf_debug & 2) 303 printf("overlap == lock\n"); 304 #endif /* LOCKF_DEBUG */ 305 return 1; 306 } 307 if ((lf->lf_start <= start) && 308 (end != -1) && 309 ((lf->lf_end >= end) || (lf->lf_end == -1))) { 310 /* Case 2 */ 311 #ifdef LOCKF_DEBUG 312 if (lockf_debug & 2) 313 printf("overlap contains lock\n"); 314 #endif /* LOCKF_DEBUG */ 315 return 2; 316 } 317 if (start <= lf->lf_start && 318 (end == -1 || 319 (lf->lf_end != -1 && end >= lf->lf_end))) { 320 /* Case 3 */ 321 #ifdef LOCKF_DEBUG 322 if (lockf_debug & 2) 323 printf("lock contains overlap\n"); 324 #endif /* LOCKF_DEBUG */ 325 return 3; 326 } 327 if ((lf->lf_start < start) && 328 ((lf->lf_end >= start) || (lf->lf_end == -1))) { 329 /* Case 4 */ 330 #ifdef LOCKF_DEBUG 331 if (lockf_debug & 2) 332 printf("overlap starts before lock\n"); 333 #endif /* LOCKF_DEBUG */ 334 return 4; 335 } 336 if ((lf->lf_start > start) && 337 (end != -1) && 338 ((lf->lf_end > end) || (lf->lf_end == -1))) { 339 /* Case 5 */ 340 #ifdef LOCKF_DEBUG 341 if (lockf_debug & 2) 342 printf("overlap ends after lock\n"); 343 #endif /* LOCKF_DEBUG */ 344 return 5; 345 } 346 panic("lf_findoverlap: default"); 347 } 348 return 0; 349 } 350 351 /* 352 * Split a lock and a contained region into 353 * two or three locks as necessary. 354 */ 355 static void 356 lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **sparelock) 357 { 358 struct lockf *splitlock; 359 360 #ifdef LOCKF_DEBUG 361 if (lockf_debug & 2) { 362 lf_print("lf_split", lock1); 363 lf_print("splitting from", lock2); 364 } 365 #endif /* LOCKF_DEBUG */ 366 /* 367 * Check to see if spliting into only two pieces. 368 */ 369 if (lock1->lf_start == lock2->lf_start) { 370 lock1->lf_start = lock2->lf_end + 1; 371 lock2->lf_next = lock1; 372 return; 373 } 374 if (lock1->lf_end == lock2->lf_end) { 375 lock1->lf_end = lock2->lf_start - 1; 376 lock2->lf_next = lock1->lf_next; 377 lock1->lf_next = lock2; 378 return; 379 } 380 /* 381 * Make a new lock consisting of the last part of 382 * the encompassing lock 383 */ 384 splitlock = *sparelock; 385 *sparelock = NULL; 386 memcpy(splitlock, lock1, sizeof(*splitlock)); 387 cv_init(&splitlock->lf_cv, lockstr); 388 389 splitlock->lf_start = lock2->lf_end + 1; 390 TAILQ_INIT(&splitlock->lf_blkhd); 391 lock1->lf_end = lock2->lf_start - 1; 392 /* 393 * OK, now link it in 394 */ 395 splitlock->lf_next = lock1->lf_next; 396 lock2->lf_next = splitlock; 397 lock1->lf_next = lock2; 398 } 399 400 /* 401 * Wakeup a blocklist 402 */ 403 static void 404 lf_wakelock(struct lockf *listhead) 405 { 406 struct lockf *wakelock; 407 408 while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) { 409 KASSERT(wakelock->lf_next == listhead); 410 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); 411 wakelock->lf_next = NULL; 412 #ifdef LOCKF_DEBUG 413 if (lockf_debug & 2) 414 lf_print("lf_wakelock: awakening", wakelock); 415 #endif 416 cv_broadcast(&wakelock->lf_cv); 417 } 418 } 419 420 /* 421 * Remove a byte-range lock on an inode. 422 * 423 * Generally, find the lock (or an overlap to that lock) 424 * and remove it (or shrink it), then wakeup anyone we can. 425 */ 426 static int 427 lf_clearlock(struct lockf *unlock, struct lockf **sparelock) 428 { 429 struct lockf **head = unlock->lf_head; 430 struct lockf *lf = *head; 431 struct lockf *overlap, **prev; 432 int ovcase; 433 434 if (lf == NULL) 435 return 0; 436 #ifdef LOCKF_DEBUG 437 if (unlock->lf_type != F_UNLCK) 438 panic("lf_clearlock: bad type"); 439 if (lockf_debug & 1) 440 lf_print("lf_clearlock", unlock); 441 #endif /* LOCKF_DEBUG */ 442 prev = head; 443 while ((ovcase = lf_findoverlap(lf, unlock, SELF, 444 &prev, &overlap)) != 0) { 445 /* 446 * Wakeup the list of locks to be retried. 447 */ 448 lf_wakelock(overlap); 449 450 switch (ovcase) { 451 452 case 1: /* overlap == lock */ 453 *prev = overlap->lf_next; 454 lf_free(overlap); 455 break; 456 457 case 2: /* overlap contains lock: split it */ 458 if (overlap->lf_start == unlock->lf_start) { 459 overlap->lf_start = unlock->lf_end + 1; 460 break; 461 } 462 lf_split(overlap, unlock, sparelock); 463 overlap->lf_next = unlock->lf_next; 464 break; 465 466 case 3: /* lock contains overlap */ 467 *prev = overlap->lf_next; 468 lf = overlap->lf_next; 469 lf_free(overlap); 470 continue; 471 472 case 4: /* overlap starts before lock */ 473 overlap->lf_end = unlock->lf_start - 1; 474 prev = &overlap->lf_next; 475 lf = overlap->lf_next; 476 continue; 477 478 case 5: /* overlap ends after lock */ 479 overlap->lf_start = unlock->lf_end + 1; 480 break; 481 } 482 break; 483 } 484 #ifdef LOCKF_DEBUG 485 if (lockf_debug & 1) 486 lf_printlist("lf_clearlock", unlock); 487 #endif /* LOCKF_DEBUG */ 488 return 0; 489 } 490 491 /* 492 * Walk the list of locks for an inode and 493 * return the first blocking lock. 494 */ 495 static struct lockf * 496 lf_getblock(struct lockf *lock) 497 { 498 struct lockf **prev, *overlap, *lf = *(lock->lf_head); 499 500 prev = lock->lf_head; 501 while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != 0) { 502 /* 503 * We've found an overlap, see if it blocks us 504 */ 505 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) 506 return overlap; 507 /* 508 * Nope, point to the next one on the list and 509 * see if it blocks us 510 */ 511 lf = overlap->lf_next; 512 } 513 return NULL; 514 } 515 516 /* 517 * Set a byte-range lock. 518 */ 519 static int 520 lf_setlock(struct lockf *lock, struct lockf **sparelock, 521 kmutex_t *interlock) 522 { 523 struct lockf *block; 524 struct lockf **head = lock->lf_head; 525 struct lockf **prev, *overlap, *ltmp; 526 int ovcase, needtolink, error; 527 528 #ifdef LOCKF_DEBUG 529 if (lockf_debug & 1) 530 lf_print("lf_setlock", lock); 531 #endif /* LOCKF_DEBUG */ 532 533 /* 534 * Scan lock list for this file looking for locks that would block us. 535 */ 536 while ((block = lf_getblock(lock)) != NULL) { 537 /* 538 * Free the structure and return if nonblocking. 539 */ 540 if ((lock->lf_flags & F_WAIT) == 0) { 541 lf_free(lock); 542 return EAGAIN; 543 } 544 /* 545 * We are blocked. Since flock style locks cover 546 * the whole file, there is no chance for deadlock. 547 * For byte-range locks we must check for deadlock. 548 * 549 * Deadlock detection is done by looking through the 550 * wait channels to see if there are any cycles that 551 * involve us. MAXDEPTH is set just to make sure we 552 * do not go off into neverneverland. 553 */ 554 if ((lock->lf_flags & F_POSIX) && 555 (block->lf_flags & F_POSIX)) { 556 struct lwp *wlwp; 557 volatile const struct lockf *waitblock; 558 int i = 0; 559 struct proc *p; 560 561 p = (struct proc *)block->lf_id; 562 KASSERT(p != NULL); 563 while (i++ < maxlockdepth) { 564 mutex_enter(p->p_lock); 565 if (p->p_nlwps > 1) { 566 mutex_exit(p->p_lock); 567 break; 568 } 569 wlwp = LIST_FIRST(&p->p_lwps); 570 lwp_lock(wlwp); 571 if (wlwp->l_wchan == NULL || 572 wlwp->l_wmesg != lockstr) { 573 lwp_unlock(wlwp); 574 mutex_exit(p->p_lock); 575 break; 576 } 577 waitblock = wlwp->l_wchan; 578 lwp_unlock(wlwp); 579 mutex_exit(p->p_lock); 580 /* Get the owner of the blocking lock */ 581 waitblock = waitblock->lf_next; 582 if ((waitblock->lf_flags & F_POSIX) == 0) 583 break; 584 p = (struct proc *)waitblock->lf_id; 585 if (p == curproc) { 586 lf_free(lock); 587 return EDEADLK; 588 } 589 } 590 /* 591 * If we're still following a dependency chain 592 * after maxlockdepth iterations, assume we're in 593 * a cycle to be safe. 594 */ 595 if (i >= maxlockdepth) { 596 lf_free(lock); 597 return EDEADLK; 598 } 599 } 600 /* 601 * For flock type locks, we must first remove 602 * any shared locks that we hold before we sleep 603 * waiting for an exclusive lock. 604 */ 605 if ((lock->lf_flags & F_FLOCK) && 606 lock->lf_type == F_WRLCK) { 607 lock->lf_type = F_UNLCK; 608 (void) lf_clearlock(lock, NULL); 609 lock->lf_type = F_WRLCK; 610 } 611 /* 612 * Add our lock to the blocked list and sleep until we're free. 613 * Remember who blocked us (for deadlock detection). 614 */ 615 lock->lf_next = block; 616 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block); 617 #ifdef LOCKF_DEBUG 618 if (lockf_debug & 1) { 619 lf_print("lf_setlock: blocking on", block); 620 lf_printlist("lf_setlock", block); 621 } 622 #endif /* LOCKF_DEBUG */ 623 error = cv_wait_sig(&lock->lf_cv, interlock); 624 625 /* 626 * We may have been awoken by a signal (in 627 * which case we must remove ourselves from the 628 * blocked list) and/or by another process 629 * releasing a lock (in which case we have already 630 * been removed from the blocked list and our 631 * lf_next field set to NULL). 632 */ 633 if (lock->lf_next != NULL) { 634 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block); 635 lock->lf_next = NULL; 636 } 637 if (error) { 638 lf_free(lock); 639 return error; 640 } 641 } 642 /* 643 * No blocks!! Add the lock. Note that we will 644 * downgrade or upgrade any overlapping locks this 645 * process already owns. 646 * 647 * Skip over locks owned by other processes. 648 * Handle any locks that overlap and are owned by ourselves. 649 */ 650 prev = head; 651 block = *head; 652 needtolink = 1; 653 for (;;) { 654 ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap); 655 if (ovcase) 656 block = overlap->lf_next; 657 /* 658 * Six cases: 659 * 0) no overlap 660 * 1) overlap == lock 661 * 2) overlap contains lock 662 * 3) lock contains overlap 663 * 4) overlap starts before lock 664 * 5) overlap ends after lock 665 */ 666 switch (ovcase) { 667 case 0: /* no overlap */ 668 if (needtolink) { 669 *prev = lock; 670 lock->lf_next = overlap; 671 } 672 break; 673 674 case 1: /* overlap == lock */ 675 /* 676 * If downgrading lock, others may be 677 * able to acquire it. 678 */ 679 if (lock->lf_type == F_RDLCK && 680 overlap->lf_type == F_WRLCK) 681 lf_wakelock(overlap); 682 overlap->lf_type = lock->lf_type; 683 lf_free(lock); 684 lock = overlap; /* for debug output below */ 685 break; 686 687 case 2: /* overlap contains lock */ 688 /* 689 * Check for common starting point and different types. 690 */ 691 if (overlap->lf_type == lock->lf_type) { 692 lf_free(lock); 693 lock = overlap; /* for debug output below */ 694 break; 695 } 696 if (overlap->lf_start == lock->lf_start) { 697 *prev = lock; 698 lock->lf_next = overlap; 699 overlap->lf_start = lock->lf_end + 1; 700 } else 701 lf_split(overlap, lock, sparelock); 702 lf_wakelock(overlap); 703 break; 704 705 case 3: /* lock contains overlap */ 706 /* 707 * If downgrading lock, others may be able to 708 * acquire it, otherwise take the list. 709 */ 710 if (lock->lf_type == F_RDLCK && 711 overlap->lf_type == F_WRLCK) { 712 lf_wakelock(overlap); 713 } else { 714 while ((ltmp = TAILQ_FIRST(&overlap->lf_blkhd))) { 715 KASSERT(ltmp->lf_next == overlap); 716 TAILQ_REMOVE(&overlap->lf_blkhd, ltmp, 717 lf_block); 718 ltmp->lf_next = lock; 719 TAILQ_INSERT_TAIL(&lock->lf_blkhd, 720 ltmp, lf_block); 721 } 722 } 723 /* 724 * Add the new lock if necessary and delete the overlap. 725 */ 726 if (needtolink) { 727 *prev = lock; 728 lock->lf_next = overlap->lf_next; 729 prev = &lock->lf_next; 730 needtolink = 0; 731 } else 732 *prev = overlap->lf_next; 733 lf_free(overlap); 734 continue; 735 736 case 4: /* overlap starts before lock */ 737 /* 738 * Add lock after overlap on the list. 739 */ 740 lock->lf_next = overlap->lf_next; 741 overlap->lf_next = lock; 742 overlap->lf_end = lock->lf_start - 1; 743 prev = &lock->lf_next; 744 lf_wakelock(overlap); 745 needtolink = 0; 746 continue; 747 748 case 5: /* overlap ends after lock */ 749 /* 750 * Add the new lock before overlap. 751 */ 752 if (needtolink) { 753 *prev = lock; 754 lock->lf_next = overlap; 755 } 756 overlap->lf_start = lock->lf_end + 1; 757 lf_wakelock(overlap); 758 break; 759 } 760 break; 761 } 762 #ifdef LOCKF_DEBUG 763 if (lockf_debug & 1) { 764 lf_print("lf_setlock: got the lock", lock); 765 lf_printlist("lf_setlock", lock); 766 } 767 #endif /* LOCKF_DEBUG */ 768 return 0; 769 } 770 771 /* 772 * Check whether there is a blocking lock, 773 * and if so return its process identifier. 774 */ 775 static int 776 lf_getlock(struct lockf *lock, struct flock *fl) 777 { 778 struct lockf *block; 779 780 #ifdef LOCKF_DEBUG 781 if (lockf_debug & 1) 782 lf_print("lf_getlock", lock); 783 #endif /* LOCKF_DEBUG */ 784 785 if ((block = lf_getblock(lock)) != NULL) { 786 fl->l_type = block->lf_type; 787 fl->l_whence = SEEK_SET; 788 fl->l_start = block->lf_start; 789 if (block->lf_end == -1) 790 fl->l_len = 0; 791 else 792 fl->l_len = block->lf_end - block->lf_start + 1; 793 if (block->lf_flags & F_POSIX) 794 fl->l_pid = ((struct proc *)block->lf_id)->p_pid; 795 else 796 fl->l_pid = -1; 797 } else { 798 fl->l_type = F_UNLCK; 799 } 800 return 0; 801 } 802 803 /* 804 * Do an advisory lock operation. 805 */ 806 int 807 lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size) 808 { 809 struct lwp *l = curlwp; 810 struct flock *fl = ap->a_fl; 811 struct lockf *lock = NULL; 812 struct lockf *sparelock; 813 kmutex_t *interlock = lockf_lock; 814 off_t start, end; 815 int error = 0; 816 817 /* 818 * Convert the flock structure into a start and end. 819 */ 820 switch (fl->l_whence) { 821 case SEEK_SET: 822 case SEEK_CUR: 823 /* 824 * Caller is responsible for adding any necessary offset 825 * when SEEK_CUR is used. 826 */ 827 start = fl->l_start; 828 break; 829 830 case SEEK_END: 831 start = size + fl->l_start; 832 break; 833 834 default: 835 return EINVAL; 836 } 837 if (start < 0) 838 return EINVAL; 839 840 /* 841 * Allocate locks before acquiring the interlock. We need two 842 * locks in the worst case. 843 */ 844 switch (ap->a_op) { 845 case F_SETLK: 846 case F_UNLCK: 847 /* 848 * XXX For F_UNLCK case, we can re-use the lock. 849 */ 850 if ((ap->a_flags & F_FLOCK) == 0) { 851 /* 852 * Byte-range lock might need one more lock. 853 */ 854 sparelock = lf_alloc(kauth_cred_geteuid(l->l_cred), 0); 855 if (sparelock == NULL) { 856 error = ENOMEM; 857 goto quit; 858 } 859 break; 860 } 861 /* FALLTHROUGH */ 862 863 case F_GETLK: 864 sparelock = NULL; 865 break; 866 867 default: 868 return EINVAL; 869 } 870 871 lock = lf_alloc(kauth_cred_geteuid(l->l_cred), 872 ap->a_op != F_UNLCK ? 1 : 2); 873 if (lock == NULL) { 874 error = ENOMEM; 875 goto quit; 876 } 877 878 mutex_enter(interlock); 879 880 /* 881 * Avoid the common case of unlocking when inode has no locks. 882 */ 883 if (*head == (struct lockf *)0) { 884 if (ap->a_op != F_SETLK) { 885 fl->l_type = F_UNLCK; 886 error = 0; 887 goto quit_unlock; 888 } 889 } 890 891 if (fl->l_len == 0) 892 end = -1; 893 else 894 end = start + fl->l_len - 1; 895 /* 896 * Create the lockf structure. 897 */ 898 lock->lf_start = start; 899 lock->lf_end = end; 900 lock->lf_head = head; 901 lock->lf_type = fl->l_type; 902 lock->lf_next = (struct lockf *)0; 903 TAILQ_INIT(&lock->lf_blkhd); 904 lock->lf_flags = ap->a_flags; 905 if (lock->lf_flags & F_POSIX) { 906 KASSERT(curproc == (struct proc *)ap->a_id); 907 } 908 lock->lf_id = (struct proc *)ap->a_id; 909 910 /* 911 * Do the requested operation. 912 */ 913 switch (ap->a_op) { 914 915 case F_SETLK: 916 error = lf_setlock(lock, &sparelock, interlock); 917 lock = NULL; /* lf_setlock freed it */ 918 break; 919 920 case F_UNLCK: 921 error = lf_clearlock(lock, &sparelock); 922 break; 923 924 case F_GETLK: 925 error = lf_getlock(lock, fl); 926 break; 927 928 default: 929 break; 930 /* NOTREACHED */ 931 } 932 933 quit_unlock: 934 mutex_exit(interlock); 935 quit: 936 if (lock) 937 lf_free(lock); 938 if (sparelock) 939 lf_free(sparelock); 940 941 return error; 942 } 943 944 /* 945 * Initialize subsystem. XXX We use a global lock. This could be the 946 * vnode interlock, but the deadlock detection code may need to inspect 947 * locks belonging to other files. 948 */ 949 void 950 lf_init(void) 951 { 952 953 lockf_cache = pool_cache_init(sizeof(struct lockf), 0, 0, 0, "lockf", 954 NULL, IPL_NONE, lf_ctor, lf_dtor, NULL); 955 lockf_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 956 } 957