1 /* $NetBSD: vfs_lockf.c,v 1.60 2007/07/09 21:10:57 ad Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Scooter Morris at Genentech Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.60 2007/07/09 21:10:57 ad Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/file.h> 44 #include <sys/proc.h> 45 #include <sys/vnode.h> 46 #include <sys/pool.h> 47 #include <sys/fcntl.h> 48 #include <sys/lockf.h> 49 #include <sys/kauth.h> 50 51 /* 52 * The lockf structure is a kernel structure which contains the information 53 * associated with a byte range lock. The lockf structures are linked into 54 * the vnode structure. Locks are sorted by the starting byte of the lock for 55 * efficiency. 56 * 57 * lf_next is used for two purposes, depending on whether the lock is 58 * being held, or is in conflict with an existing lock. If this lock 59 * is held, it indicates the next lock on the same vnode. 60 * For pending locks, if lock->lf_next is non-NULL, then lock->lf_block 61 * must be queued on the lf_blkhd TAILQ of lock->lf_next. 62 */ 63 64 TAILQ_HEAD(locklist, lockf); 65 66 struct lockf { 67 short lf_flags; /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */ 68 short lf_type; /* Lock type: F_RDLCK, F_WRLCK */ 69 off_t lf_start; /* The byte # of the start of the lock */ 70 off_t lf_end; /* The byte # of the end of the lock (-1=EOF)*/ 71 void *lf_id; /* process or file description holding lock */ 72 struct lockf **lf_head; /* Back pointer to the head of lockf list */ 73 struct lockf *lf_next; /* Next lock on this vnode, or blocking lock */ 74 struct locklist lf_blkhd; /* List of requests blocked on this lock */ 75 TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ 76 uid_t lf_uid; /* User ID responsible */ 77 }; 78 79 /* Maximum length of sleep chains to traverse to try and detect deadlock. */ 80 #define MAXDEPTH 50 81 82 static POOL_INIT(lockfpool, sizeof(struct lockf), 0, 0, 0, "lockfpl", 83 &pool_allocator_nointr, IPL_NONE); 84 85 /* 86 * This variable controls the maximum number of processes that will 87 * be checked in doing deadlock detection. 88 */ 89 int maxlockdepth = MAXDEPTH; 90 91 #ifdef LOCKF_DEBUG 92 int lockf_debug = 0; 93 #endif 94 95 #define SELF 0x1 96 #define OTHERS 0x2 97 98 /* 99 * XXX TODO 100 * Misc cleanups: "void *id" should be visible in the API as a 101 * "struct proc *". 102 * (This requires rototilling all VFS's which support advisory locking). 103 */ 104 105 /* 106 * If there's a lot of lock contention on a single vnode, locking 107 * schemes which allow for more paralleism would be needed. Given how 108 * infrequently byte-range locks are actually used in typical BSD 109 * code, a more complex approach probably isn't worth it. 110 */ 111 112 /* 113 * We enforce a limit on locks by uid, so that a single user cannot 114 * run the kernel out of memory. For now, the limit is pretty coarse. 115 * There is no limit on root. 116 * 117 * Splitting a lock will always succeed, regardless of current allocations. 118 * If you're slightly above the limit, we still have to permit an allocation 119 * so that the unlock can succeed. If the unlocking causes too many splits, 120 * however, you're totally cutoff. 121 */ 122 int maxlocksperuid = 1024; 123 124 #ifdef LOCKF_DEBUG 125 /* 126 * Print out a lock. 127 */ 128 static void 129 lf_print(const char *tag, struct lockf *lock) 130 { 131 132 printf("%s: lock %p for ", tag, lock); 133 if (lock->lf_flags & F_POSIX) 134 printf("proc %d", ((struct proc *)lock->lf_id)->p_pid); 135 else 136 printf("file %p", (struct file *)lock->lf_id); 137 printf(" %s, start %qx, end %qx", 138 lock->lf_type == F_RDLCK ? "shared" : 139 lock->lf_type == F_WRLCK ? "exclusive" : 140 lock->lf_type == F_UNLCK ? "unlock" : 141 "unknown", lock->lf_start, lock->lf_end); 142 if (TAILQ_FIRST(&lock->lf_blkhd)) 143 printf(" block %p\n", TAILQ_FIRST(&lock->lf_blkhd)); 144 else 145 printf("\n"); 146 } 147 148 static void 149 lf_printlist(const char *tag, struct lockf *lock) 150 { 151 struct lockf *lf, *blk; 152 153 printf("%s: Lock list:\n", tag); 154 for (lf = *lock->lf_head; lf; lf = lf->lf_next) { 155 printf("\tlock %p for ", lf); 156 if (lf->lf_flags & F_POSIX) 157 printf("proc %d", ((struct proc *)lf->lf_id)->p_pid); 158 else 159 printf("file %p", (struct file *)lf->lf_id); 160 printf(", %s, start %qx, end %qx", 161 lf->lf_type == F_RDLCK ? "shared" : 162 lf->lf_type == F_WRLCK ? "exclusive" : 163 lf->lf_type == F_UNLCK ? "unlock" : 164 "unknown", lf->lf_start, lf->lf_end); 165 TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) { 166 if (blk->lf_flags & F_POSIX) 167 printf("proc %d", 168 ((struct proc *)blk->lf_id)->p_pid); 169 else 170 printf("file %p", (struct file *)blk->lf_id); 171 printf(", %s, start %qx, end %qx", 172 blk->lf_type == F_RDLCK ? "shared" : 173 blk->lf_type == F_WRLCK ? "exclusive" : 174 blk->lf_type == F_UNLCK ? "unlock" : 175 "unknown", blk->lf_start, blk->lf_end); 176 if (TAILQ_FIRST(&blk->lf_blkhd)) 177 panic("lf_printlist: bad list"); 178 } 179 printf("\n"); 180 } 181 } 182 #endif /* LOCKF_DEBUG */ 183 184 /* 185 * 3 options for allowfail. 186 * 0 - always allocate. 1 - cutoff at limit. 2 - cutoff at double limit. 187 */ 188 static struct lockf * 189 lf_alloc(uid_t uid, int allowfail) 190 { 191 struct uidinfo *uip; 192 struct lockf *lock; 193 194 uip = uid_find(uid); 195 mutex_enter(&uip->ui_lock); 196 if (uid && allowfail && uip->ui_lockcnt > 197 (allowfail == 1 ? maxlocksperuid : (maxlocksperuid * 2))) { 198 mutex_exit(&uip->ui_lock); 199 return NULL; 200 } 201 uip->ui_lockcnt++; 202 mutex_exit(&uip->ui_lock); 203 lock = pool_get(&lockfpool, PR_WAITOK); 204 lock->lf_uid = uid; 205 return lock; 206 } 207 208 static void 209 lf_free(struct lockf *lock) 210 { 211 struct uidinfo *uip; 212 213 uip = uid_find(lock->lf_uid); 214 mutex_enter(&uip->ui_lock); 215 uip->ui_lockcnt--; 216 mutex_exit(&uip->ui_lock); 217 pool_put(&lockfpool, lock); 218 } 219 220 /* 221 * Walk the list of locks for an inode to 222 * find an overlapping lock (if any). 223 * 224 * NOTE: this returns only the FIRST overlapping lock. There 225 * may be more than one. 226 */ 227 static int 228 lf_findoverlap(struct lockf *lf, struct lockf *lock, int type, 229 struct lockf ***prev, struct lockf **overlap) 230 { 231 off_t start, end; 232 233 *overlap = lf; 234 if (lf == NULL) 235 return 0; 236 #ifdef LOCKF_DEBUG 237 if (lockf_debug & 2) 238 lf_print("lf_findoverlap: looking for overlap in", lock); 239 #endif /* LOCKF_DEBUG */ 240 start = lock->lf_start; 241 end = lock->lf_end; 242 while (lf != NULL) { 243 if (((type == SELF) && lf->lf_id != lock->lf_id) || 244 ((type == OTHERS) && lf->lf_id == lock->lf_id)) { 245 *prev = &lf->lf_next; 246 *overlap = lf = lf->lf_next; 247 continue; 248 } 249 #ifdef LOCKF_DEBUG 250 if (lockf_debug & 2) 251 lf_print("\tchecking", lf); 252 #endif /* LOCKF_DEBUG */ 253 /* 254 * OK, check for overlap 255 * 256 * Six cases: 257 * 0) no overlap 258 * 1) overlap == lock 259 * 2) overlap contains lock 260 * 3) lock contains overlap 261 * 4) overlap starts before lock 262 * 5) overlap ends after lock 263 */ 264 if ((lf->lf_end != -1 && start > lf->lf_end) || 265 (end != -1 && lf->lf_start > end)) { 266 /* Case 0 */ 267 #ifdef LOCKF_DEBUG 268 if (lockf_debug & 2) 269 printf("no overlap\n"); 270 #endif /* LOCKF_DEBUG */ 271 if ((type & SELF) && end != -1 && lf->lf_start > end) 272 return 0; 273 *prev = &lf->lf_next; 274 *overlap = lf = lf->lf_next; 275 continue; 276 } 277 if ((lf->lf_start == start) && (lf->lf_end == end)) { 278 /* Case 1 */ 279 #ifdef LOCKF_DEBUG 280 if (lockf_debug & 2) 281 printf("overlap == lock\n"); 282 #endif /* LOCKF_DEBUG */ 283 return 1; 284 } 285 if ((lf->lf_start <= start) && 286 (end != -1) && 287 ((lf->lf_end >= end) || (lf->lf_end == -1))) { 288 /* Case 2 */ 289 #ifdef LOCKF_DEBUG 290 if (lockf_debug & 2) 291 printf("overlap contains lock\n"); 292 #endif /* LOCKF_DEBUG */ 293 return 2; 294 } 295 if (start <= lf->lf_start && 296 (end == -1 || 297 (lf->lf_end != -1 && end >= lf->lf_end))) { 298 /* Case 3 */ 299 #ifdef LOCKF_DEBUG 300 if (lockf_debug & 2) 301 printf("lock contains overlap\n"); 302 #endif /* LOCKF_DEBUG */ 303 return 3; 304 } 305 if ((lf->lf_start < start) && 306 ((lf->lf_end >= start) || (lf->lf_end == -1))) { 307 /* Case 4 */ 308 #ifdef LOCKF_DEBUG 309 if (lockf_debug & 2) 310 printf("overlap starts before lock\n"); 311 #endif /* LOCKF_DEBUG */ 312 return 4; 313 } 314 if ((lf->lf_start > start) && 315 (end != -1) && 316 ((lf->lf_end > end) || (lf->lf_end == -1))) { 317 /* Case 5 */ 318 #ifdef LOCKF_DEBUG 319 if (lockf_debug & 2) 320 printf("overlap ends after lock\n"); 321 #endif /* LOCKF_DEBUG */ 322 return 5; 323 } 324 panic("lf_findoverlap: default"); 325 } 326 return 0; 327 } 328 329 /* 330 * Split a lock and a contained region into 331 * two or three locks as necessary. 332 */ 333 static void 334 lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **sparelock) 335 { 336 struct lockf *splitlock; 337 338 #ifdef LOCKF_DEBUG 339 if (lockf_debug & 2) { 340 lf_print("lf_split", lock1); 341 lf_print("splitting from", lock2); 342 } 343 #endif /* LOCKF_DEBUG */ 344 /* 345 * Check to see if spliting into only two pieces. 346 */ 347 if (lock1->lf_start == lock2->lf_start) { 348 lock1->lf_start = lock2->lf_end + 1; 349 lock2->lf_next = lock1; 350 return; 351 } 352 if (lock1->lf_end == lock2->lf_end) { 353 lock1->lf_end = lock2->lf_start - 1; 354 lock2->lf_next = lock1->lf_next; 355 lock1->lf_next = lock2; 356 return; 357 } 358 /* 359 * Make a new lock consisting of the last part of 360 * the encompassing lock 361 */ 362 splitlock = *sparelock; 363 *sparelock = NULL; 364 memcpy(splitlock, lock1, sizeof(*splitlock)); 365 splitlock->lf_start = lock2->lf_end + 1; 366 TAILQ_INIT(&splitlock->lf_blkhd); 367 lock1->lf_end = lock2->lf_start - 1; 368 /* 369 * OK, now link it in 370 */ 371 splitlock->lf_next = lock1->lf_next; 372 lock2->lf_next = splitlock; 373 lock1->lf_next = lock2; 374 } 375 376 /* 377 * Wakeup a blocklist 378 */ 379 static void 380 lf_wakelock(struct lockf *listhead) 381 { 382 struct lockf *wakelock; 383 384 while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) { 385 KASSERT(wakelock->lf_next == listhead); 386 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); 387 wakelock->lf_next = NULL; 388 #ifdef LOCKF_DEBUG 389 if (lockf_debug & 2) 390 lf_print("lf_wakelock: awakening", wakelock); 391 #endif 392 wakeup(wakelock); 393 } 394 } 395 396 /* 397 * Remove a byte-range lock on an inode. 398 * 399 * Generally, find the lock (or an overlap to that lock) 400 * and remove it (or shrink it), then wakeup anyone we can. 401 */ 402 static int 403 lf_clearlock(struct lockf *unlock, struct lockf **sparelock) 404 { 405 struct lockf **head = unlock->lf_head; 406 struct lockf *lf = *head; 407 struct lockf *overlap, **prev; 408 int ovcase; 409 410 if (lf == NULL) 411 return 0; 412 #ifdef LOCKF_DEBUG 413 if (unlock->lf_type != F_UNLCK) 414 panic("lf_clearlock: bad type"); 415 if (lockf_debug & 1) 416 lf_print("lf_clearlock", unlock); 417 #endif /* LOCKF_DEBUG */ 418 prev = head; 419 while ((ovcase = lf_findoverlap(lf, unlock, SELF, 420 &prev, &overlap)) != 0) { 421 /* 422 * Wakeup the list of locks to be retried. 423 */ 424 lf_wakelock(overlap); 425 426 switch (ovcase) { 427 428 case 1: /* overlap == lock */ 429 *prev = overlap->lf_next; 430 lf_free(overlap); 431 break; 432 433 case 2: /* overlap contains lock: split it */ 434 if (overlap->lf_start == unlock->lf_start) { 435 overlap->lf_start = unlock->lf_end + 1; 436 break; 437 } 438 lf_split(overlap, unlock, sparelock); 439 overlap->lf_next = unlock->lf_next; 440 break; 441 442 case 3: /* lock contains overlap */ 443 *prev = overlap->lf_next; 444 lf = overlap->lf_next; 445 lf_free(overlap); 446 continue; 447 448 case 4: /* overlap starts before lock */ 449 overlap->lf_end = unlock->lf_start - 1; 450 prev = &overlap->lf_next; 451 lf = overlap->lf_next; 452 continue; 453 454 case 5: /* overlap ends after lock */ 455 overlap->lf_start = unlock->lf_end + 1; 456 break; 457 } 458 break; 459 } 460 #ifdef LOCKF_DEBUG 461 if (lockf_debug & 1) 462 lf_printlist("lf_clearlock", unlock); 463 #endif /* LOCKF_DEBUG */ 464 return 0; 465 } 466 467 /* 468 * Walk the list of locks for an inode and 469 * return the first blocking lock. 470 */ 471 static struct lockf * 472 lf_getblock(struct lockf *lock) 473 { 474 struct lockf **prev, *overlap, *lf = *(lock->lf_head); 475 476 prev = lock->lf_head; 477 while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != 0) { 478 /* 479 * We've found an overlap, see if it blocks us 480 */ 481 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) 482 return overlap; 483 /* 484 * Nope, point to the next one on the list and 485 * see if it blocks us 486 */ 487 lf = overlap->lf_next; 488 } 489 return NULL; 490 } 491 492 /* 493 * Set a byte-range lock. 494 */ 495 static int 496 lf_setlock(struct lockf *lock, struct lockf **sparelock, 497 struct simplelock *interlock) 498 { 499 struct lockf *block; 500 struct lockf **head = lock->lf_head; 501 struct lockf **prev, *overlap, *ltmp; 502 static char lockstr[] = "lockf"; 503 int ovcase, priority, needtolink, error; 504 505 #ifdef LOCKF_DEBUG 506 if (lockf_debug & 1) 507 lf_print("lf_setlock", lock); 508 #endif /* LOCKF_DEBUG */ 509 510 /* 511 * Set the priority 512 */ 513 priority = PLOCK; 514 if (lock->lf_type == F_WRLCK) 515 priority += 4; 516 priority |= PCATCH; 517 /* 518 * Scan lock list for this file looking for locks that would block us. 519 */ 520 while ((block = lf_getblock(lock)) != NULL) { 521 /* 522 * Free the structure and return if nonblocking. 523 */ 524 if ((lock->lf_flags & F_WAIT) == 0) { 525 lf_free(lock); 526 return EAGAIN; 527 } 528 /* 529 * We are blocked. Since flock style locks cover 530 * the whole file, there is no chance for deadlock. 531 * For byte-range locks we must check for deadlock. 532 * 533 * Deadlock detection is done by looking through the 534 * wait channels to see if there are any cycles that 535 * involve us. MAXDEPTH is set just to make sure we 536 * do not go off into neverneverland. 537 */ 538 if ((lock->lf_flags & F_POSIX) && 539 (block->lf_flags & F_POSIX)) { 540 struct lwp *wlwp; 541 volatile const struct lockf *waitblock; 542 int i = 0; 543 struct proc *p; 544 545 p = (struct proc *)block->lf_id; 546 KASSERT(p != NULL); 547 while (i++ < maxlockdepth) { 548 mutex_enter(&p->p_smutex); 549 if (p->p_nlwps > 1) { 550 mutex_exit(&p->p_smutex); 551 break; 552 } 553 wlwp = LIST_FIRST(&p->p_lwps); 554 lwp_lock(wlwp); 555 if (wlwp->l_wmesg != lockstr) { 556 lwp_unlock(wlwp); 557 mutex_exit(&p->p_smutex); 558 break; 559 } 560 waitblock = wlwp->l_wchan; 561 lwp_unlock(wlwp); 562 mutex_exit(&p->p_smutex); 563 if (waitblock == NULL) { 564 /* 565 * this lwp just got up but 566 * not returned from ltsleep yet. 567 */ 568 break; 569 } 570 /* Get the owner of the blocking lock */ 571 waitblock = waitblock->lf_next; 572 if ((waitblock->lf_flags & F_POSIX) == 0) 573 break; 574 p = (struct proc *)waitblock->lf_id; 575 if (p == curproc) { 576 lf_free(lock); 577 return EDEADLK; 578 } 579 } 580 /* 581 * If we're still following a dependency chain 582 * after maxlockdepth iterations, assume we're in 583 * a cycle to be safe. 584 */ 585 if (i >= maxlockdepth) { 586 lf_free(lock); 587 return EDEADLK; 588 } 589 } 590 /* 591 * For flock type locks, we must first remove 592 * any shared locks that we hold before we sleep 593 * waiting for an exclusive lock. 594 */ 595 if ((lock->lf_flags & F_FLOCK) && 596 lock->lf_type == F_WRLCK) { 597 lock->lf_type = F_UNLCK; 598 (void) lf_clearlock(lock, NULL); 599 lock->lf_type = F_WRLCK; 600 } 601 /* 602 * Add our lock to the blocked list and sleep until we're free. 603 * Remember who blocked us (for deadlock detection). 604 */ 605 lock->lf_next = block; 606 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block); 607 #ifdef LOCKF_DEBUG 608 if (lockf_debug & 1) { 609 lf_print("lf_setlock: blocking on", block); 610 lf_printlist("lf_setlock", block); 611 } 612 #endif /* LOCKF_DEBUG */ 613 error = ltsleep(lock, priority, lockstr, 0, interlock); 614 615 /* 616 * We may have been awakened by a signal (in 617 * which case we must remove ourselves from the 618 * blocked list) and/or by another process 619 * releasing a lock (in which case we have already 620 * been removed from the blocked list and our 621 * lf_next field set to NULL). 622 */ 623 if (lock->lf_next != NULL) { 624 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block); 625 lock->lf_next = NULL; 626 } 627 if (error) { 628 lf_free(lock); 629 return error; 630 } 631 } 632 /* 633 * No blocks!! Add the lock. Note that we will 634 * downgrade or upgrade any overlapping locks this 635 * process already owns. 636 * 637 * Skip over locks owned by other processes. 638 * Handle any locks that overlap and are owned by ourselves. 639 */ 640 prev = head; 641 block = *head; 642 needtolink = 1; 643 for (;;) { 644 ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap); 645 if (ovcase) 646 block = overlap->lf_next; 647 /* 648 * Six cases: 649 * 0) no overlap 650 * 1) overlap == lock 651 * 2) overlap contains lock 652 * 3) lock contains overlap 653 * 4) overlap starts before lock 654 * 5) overlap ends after lock 655 */ 656 switch (ovcase) { 657 case 0: /* no overlap */ 658 if (needtolink) { 659 *prev = lock; 660 lock->lf_next = overlap; 661 } 662 break; 663 664 case 1: /* overlap == lock */ 665 /* 666 * If downgrading lock, others may be 667 * able to acquire it. 668 */ 669 if (lock->lf_type == F_RDLCK && 670 overlap->lf_type == F_WRLCK) 671 lf_wakelock(overlap); 672 overlap->lf_type = lock->lf_type; 673 lf_free(lock); 674 lock = overlap; /* for debug output below */ 675 break; 676 677 case 2: /* overlap contains lock */ 678 /* 679 * Check for common starting point and different types. 680 */ 681 if (overlap->lf_type == lock->lf_type) { 682 lf_free(lock); 683 lock = overlap; /* for debug output below */ 684 break; 685 } 686 if (overlap->lf_start == lock->lf_start) { 687 *prev = lock; 688 lock->lf_next = overlap; 689 overlap->lf_start = lock->lf_end + 1; 690 } else 691 lf_split(overlap, lock, sparelock); 692 lf_wakelock(overlap); 693 break; 694 695 case 3: /* lock contains overlap */ 696 /* 697 * If downgrading lock, others may be able to 698 * acquire it, otherwise take the list. 699 */ 700 if (lock->lf_type == F_RDLCK && 701 overlap->lf_type == F_WRLCK) { 702 lf_wakelock(overlap); 703 } else { 704 while ((ltmp = TAILQ_FIRST(&overlap->lf_blkhd))) { 705 KASSERT(ltmp->lf_next == overlap); 706 TAILQ_REMOVE(&overlap->lf_blkhd, ltmp, 707 lf_block); 708 ltmp->lf_next = lock; 709 TAILQ_INSERT_TAIL(&lock->lf_blkhd, 710 ltmp, lf_block); 711 } 712 } 713 /* 714 * Add the new lock if necessary and delete the overlap. 715 */ 716 if (needtolink) { 717 *prev = lock; 718 lock->lf_next = overlap->lf_next; 719 prev = &lock->lf_next; 720 needtolink = 0; 721 } else 722 *prev = overlap->lf_next; 723 lf_free(overlap); 724 continue; 725 726 case 4: /* overlap starts before lock */ 727 /* 728 * Add lock after overlap on the list. 729 */ 730 lock->lf_next = overlap->lf_next; 731 overlap->lf_next = lock; 732 overlap->lf_end = lock->lf_start - 1; 733 prev = &lock->lf_next; 734 lf_wakelock(overlap); 735 needtolink = 0; 736 continue; 737 738 case 5: /* overlap ends after lock */ 739 /* 740 * Add the new lock before overlap. 741 */ 742 if (needtolink) { 743 *prev = lock; 744 lock->lf_next = overlap; 745 } 746 overlap->lf_start = lock->lf_end + 1; 747 lf_wakelock(overlap); 748 break; 749 } 750 break; 751 } 752 #ifdef LOCKF_DEBUG 753 if (lockf_debug & 1) { 754 lf_print("lf_setlock: got the lock", lock); 755 lf_printlist("lf_setlock", lock); 756 } 757 #endif /* LOCKF_DEBUG */ 758 return 0; 759 } 760 761 /* 762 * Check whether there is a blocking lock, 763 * and if so return its process identifier. 764 */ 765 static int 766 lf_getlock(struct lockf *lock, struct flock *fl) 767 { 768 struct lockf *block; 769 770 #ifdef LOCKF_DEBUG 771 if (lockf_debug & 1) 772 lf_print("lf_getlock", lock); 773 #endif /* LOCKF_DEBUG */ 774 775 if ((block = lf_getblock(lock)) != NULL) { 776 fl->l_type = block->lf_type; 777 fl->l_whence = SEEK_SET; 778 fl->l_start = block->lf_start; 779 if (block->lf_end == -1) 780 fl->l_len = 0; 781 else 782 fl->l_len = block->lf_end - block->lf_start + 1; 783 if (block->lf_flags & F_POSIX) 784 fl->l_pid = ((struct proc *)block->lf_id)->p_pid; 785 else 786 fl->l_pid = -1; 787 } else { 788 fl->l_type = F_UNLCK; 789 } 790 return 0; 791 } 792 793 /* 794 * Do an advisory lock operation. 795 */ 796 int 797 lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size) 798 { 799 struct lwp *l = curlwp; 800 struct flock *fl = ap->a_fl; 801 struct lockf *lock = NULL; 802 struct lockf *sparelock; 803 struct simplelock *interlock = &ap->a_vp->v_interlock; 804 off_t start, end; 805 int error = 0; 806 807 /* 808 * Convert the flock structure into a start and end. 809 */ 810 switch (fl->l_whence) { 811 case SEEK_SET: 812 case SEEK_CUR: 813 /* 814 * Caller is responsible for adding any necessary offset 815 * when SEEK_CUR is used. 816 */ 817 start = fl->l_start; 818 break; 819 820 case SEEK_END: 821 start = size + fl->l_start; 822 break; 823 824 default: 825 return EINVAL; 826 } 827 if (start < 0) 828 return EINVAL; 829 830 /* 831 * Allocate locks before acquiring the simple lock. We need two 832 * locks in the worst case. 833 */ 834 switch (ap->a_op) { 835 case F_SETLK: 836 case F_UNLCK: 837 /* 838 * XXX For F_UNLCK case, we can re-use the lock. 839 */ 840 if ((ap->a_flags & F_FLOCK) == 0) { 841 /* 842 * Byte-range lock might need one more lock. 843 */ 844 sparelock = lf_alloc(kauth_cred_geteuid(l->l_cred), 0); 845 if (sparelock == NULL) { 846 error = ENOMEM; 847 goto quit; 848 } 849 break; 850 } 851 /* FALLTHROUGH */ 852 853 case F_GETLK: 854 sparelock = NULL; 855 break; 856 857 default: 858 return EINVAL; 859 } 860 861 lock = lf_alloc(kauth_cred_geteuid(l->l_cred), 862 ap->a_op != F_UNLCK ? 1 : 2); 863 if (lock == NULL) { 864 error = ENOMEM; 865 goto quit; 866 } 867 868 simple_lock(interlock); 869 870 /* 871 * Avoid the common case of unlocking when inode has no locks. 872 */ 873 if (*head == (struct lockf *)0) { 874 if (ap->a_op != F_SETLK) { 875 fl->l_type = F_UNLCK; 876 error = 0; 877 goto quit_unlock; 878 } 879 } 880 881 if (fl->l_len == 0) 882 end = -1; 883 else 884 end = start + fl->l_len - 1; 885 /* 886 * Create the lockf structure. 887 */ 888 lock->lf_start = start; 889 lock->lf_end = end; 890 /* XXX NJWLWP 891 * I don't want to make the entire VFS universe use LWPs, because 892 * they don't need them, for the most part. This is an exception, 893 * and a kluge. 894 */ 895 896 lock->lf_head = head; 897 lock->lf_type = fl->l_type; 898 lock->lf_next = (struct lockf *)0; 899 TAILQ_INIT(&lock->lf_blkhd); 900 lock->lf_flags = ap->a_flags; 901 if (lock->lf_flags & F_POSIX) { 902 KASSERT(curproc == (struct proc *)ap->a_id); 903 } 904 lock->lf_id = (struct proc *)ap->a_id; 905 906 /* 907 * Do the requested operation. 908 */ 909 switch (ap->a_op) { 910 911 case F_SETLK: 912 error = lf_setlock(lock, &sparelock, interlock); 913 lock = NULL; /* lf_setlock freed it */ 914 break; 915 916 case F_UNLCK: 917 error = lf_clearlock(lock, &sparelock); 918 break; 919 920 case F_GETLK: 921 error = lf_getlock(lock, fl); 922 break; 923 924 default: 925 break; 926 /* NOTREACHED */ 927 } 928 929 quit_unlock: 930 simple_unlock(interlock); 931 quit: 932 if (lock) 933 lf_free(lock); 934 if (sparelock) 935 lf_free(sparelock); 936 937 return error; 938 } 939