1 /* $NetBSD: vfs_lockf.c,v 1.83 2024/12/07 02:27:38 riastradh Exp $ */ 2 3 /* 4 * Copyright (c) 1982, 1986, 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Scooter Morris at Genentech Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)ufs_lockf.c 8.4 (Berkeley) 10/26/94 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: vfs_lockf.c,v 1.83 2024/12/07 02:27:38 riastradh Exp $"); 39 40 #include <sys/param.h> 41 #include <sys/types.h> 42 43 #include <sys/atomic.h> 44 #include <sys/fcntl.h> 45 #include <sys/file.h> 46 #include <sys/kauth.h> 47 #include <sys/kernel.h> 48 #include <sys/kmem.h> 49 #include <sys/lockf.h> 50 #include <sys/proc.h> 51 #include <sys/sdt.h> 52 #include <sys/systm.h> 53 #include <sys/uidinfo.h> 54 #include <sys/vnode.h> 55 56 /* 57 * The lockf structure is a kernel structure which contains the information 58 * associated with a byte range lock. The lockf structures are linked into 59 * the vnode structure. Locks are sorted by the starting byte of the lock for 60 * efficiency. 61 * 62 * lf_next is used for two purposes, depending on whether the lock is 63 * being held, or is in conflict with an existing lock. If this lock 64 * is held, it indicates the next lock on the same vnode. 65 * For pending locks, if lock->lf_next is non-NULL, then lock->lf_block 66 * must be queued on the lf_blkhd TAILQ of lock->lf_next. 67 */ 68 69 TAILQ_HEAD(locklist, lockf); 70 71 struct lockf { 72 kcondvar_t lf_cv; /* Signalling */ 73 short lf_flags; /* Lock semantics: F_POSIX, F_FLOCK, F_WAIT */ 74 short lf_type; /* Lock type: F_RDLCK, F_WRLCK */ 75 off_t lf_start; /* The byte # of the start of the lock */ 76 off_t lf_end; /* The byte # of the end of the lock (-1=EOF)*/ 77 void *lf_id; /* process or file description holding lock */ 78 struct lockf **lf_head; /* Back pointer to the head of lockf list */ 79 struct lockf *lf_next; /* Next lock on this vnode, or blocking lock */ 80 struct locklist lf_blkhd; /* List of requests blocked on this lock */ 81 TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */ 82 struct uidinfo *lf_uip; /* Cached pointer to uidinfo */ 83 }; 84 85 /* Maximum length of sleep chains to traverse to try and detect deadlock. */ 86 #define MAXDEPTH 50 87 88 static kmutex_t lockf_lock __cacheline_aligned; 89 static char lockstr[] = "lockf"; 90 91 /* 92 * This variable controls the maximum number of processes that will 93 * be checked in doing deadlock detection. 94 */ 95 int maxlockdepth = MAXDEPTH; 96 97 #ifdef LOCKF_DEBUG 98 int lockf_debug = 0; 99 #endif 100 101 #define SELF 0x1 102 #define OTHERS 0x2 103 104 /* 105 * XXX TODO 106 * Misc cleanups: "void *id" should be visible in the API as a 107 * "struct proc *". 108 * (This requires rototilling all VFS's which support advisory locking). 109 */ 110 111 /* 112 * If there's a lot of lock contention on a single vnode, locking 113 * schemes which allow for more paralleism would be needed. Given how 114 * infrequently byte-range locks are actually used in typical BSD 115 * code, a more complex approach probably isn't worth it. 116 */ 117 118 /* 119 * We enforce a limit on locks by uid, so that a single user cannot 120 * run the kernel out of memory. For now, the limit is pretty coarse. 121 * There is no limit on root. 122 * 123 * Splitting a lock will always succeed, regardless of current allocations. 124 * If you're slightly above the limit, we still have to permit an allocation 125 * so that the unlock can succeed. If the unlocking causes too many splits, 126 * however, you're totally cutoff. 127 */ 128 #define MAXLOCKSPERUID (2 * maxfiles) 129 130 #ifdef LOCKF_DEBUG 131 /* 132 * Print out a lock. 133 */ 134 static void 135 lf_print(const char *tag, struct lockf *lock) 136 { 137 138 printf("%s: lock %p for ", tag, lock); 139 if (lock->lf_flags & F_POSIX) 140 printf("proc %d", ((struct proc *)lock->lf_id)->p_pid); 141 else 142 printf("file %p", (struct file *)lock->lf_id); 143 printf(" %s, start %jd, end %jd", 144 lock->lf_type == F_RDLCK ? "shared" : 145 lock->lf_type == F_WRLCK ? "exclusive" : 146 lock->lf_type == F_UNLCK ? "unlock" : 147 "unknown", (intmax_t)lock->lf_start, (intmax_t)lock->lf_end); 148 if (TAILQ_FIRST(&lock->lf_blkhd)) 149 printf(" block %p\n", TAILQ_FIRST(&lock->lf_blkhd)); 150 else 151 printf("\n"); 152 } 153 154 static void 155 lf_printlist(const char *tag, struct lockf *lock) 156 { 157 struct lockf *lf, *blk; 158 159 printf("%s: Lock list:\n", tag); 160 for (lf = *lock->lf_head; lf; lf = lf->lf_next) { 161 printf("\tlock %p for ", lf); 162 if (lf->lf_flags & F_POSIX) 163 printf("proc %d", ((struct proc *)lf->lf_id)->p_pid); 164 else 165 printf("file %p", (struct file *)lf->lf_id); 166 printf(", %s, start %jd, end %jd", 167 lf->lf_type == F_RDLCK ? "shared" : 168 lf->lf_type == F_WRLCK ? "exclusive" : 169 lf->lf_type == F_UNLCK ? "unlock" : 170 "unknown", (intmax_t)lf->lf_start, (intmax_t)lf->lf_end); 171 TAILQ_FOREACH(blk, &lf->lf_blkhd, lf_block) { 172 if (blk->lf_flags & F_POSIX) 173 printf("; proc %d", 174 ((struct proc *)blk->lf_id)->p_pid); 175 else 176 printf("; file %p", (struct file *)blk->lf_id); 177 printf(", %s, start %jd, end %jd", 178 blk->lf_type == F_RDLCK ? "shared" : 179 blk->lf_type == F_WRLCK ? "exclusive" : 180 blk->lf_type == F_UNLCK ? "unlock" : 181 "unknown", 182 (intmax_t)blk->lf_start, (intmax_t)blk->lf_end); 183 if (TAILQ_FIRST(&blk->lf_blkhd)) 184 panic("lf_printlist: bad list"); 185 } 186 printf("\n"); 187 } 188 } 189 #endif /* LOCKF_DEBUG */ 190 191 /* 192 * 3 options for allowfail. 193 * 0 - always allocate. 1 - cutoff at limit. 2 - cutoff at double limit. 194 */ 195 static struct lockf * 196 lf_alloc(int allowfail) 197 { 198 struct uidinfo *uip; 199 struct lockf *lock; 200 u_long lcnt; 201 const uid_t uid = kauth_cred_geteuid(kauth_cred_get()); 202 203 uip = uid_find(uid); 204 lcnt = atomic_inc_ulong_nv(&uip->ui_lockcnt); 205 if (uid && allowfail && lcnt > 206 (allowfail == 1 ? MAXLOCKSPERUID : (MAXLOCKSPERUID * 2))) { 207 atomic_dec_ulong(&uip->ui_lockcnt); 208 return NULL; 209 } 210 211 lock = kmem_alloc(sizeof(*lock), KM_SLEEP); 212 lock->lf_uip = uip; 213 cv_init(&lock->lf_cv, lockstr); 214 return lock; 215 } 216 217 static void 218 lf_free(struct lockf *lock) 219 { 220 221 atomic_dec_ulong(&lock->lf_uip->ui_lockcnt); 222 cv_destroy(&lock->lf_cv); 223 kmem_free(lock, sizeof(*lock)); 224 } 225 226 /* 227 * Walk the list of locks for an inode to 228 * find an overlapping lock (if any). 229 * 230 * NOTE: this returns only the FIRST overlapping lock. There 231 * may be more than one. 232 */ 233 static int 234 lf_findoverlap(struct lockf *lf, struct lockf *lock, int type, 235 struct lockf ***prev, struct lockf **overlap) 236 { 237 off_t start, end; 238 239 *overlap = lf; 240 if (lf == NULL) 241 return 0; 242 #ifdef LOCKF_DEBUG 243 if (lockf_debug & 2) 244 lf_print("lf_findoverlap: looking for overlap in", lock); 245 #endif /* LOCKF_DEBUG */ 246 start = lock->lf_start; 247 end = lock->lf_end; 248 while (lf != NULL) { 249 if (((type == SELF) && lf->lf_id != lock->lf_id) || 250 ((type == OTHERS) && lf->lf_id == lock->lf_id)) { 251 *prev = &lf->lf_next; 252 *overlap = lf = lf->lf_next; 253 continue; 254 } 255 #ifdef LOCKF_DEBUG 256 if (lockf_debug & 2) 257 lf_print("\tchecking", lf); 258 #endif /* LOCKF_DEBUG */ 259 /* 260 * OK, check for overlap 261 * 262 * Six cases: 263 * 0) no overlap 264 * 1) overlap == lock 265 * 2) overlap contains lock 266 * 3) lock contains overlap 267 * 4) overlap starts before lock 268 * 5) overlap ends after lock 269 */ 270 if ((lf->lf_end != -1 && start > lf->lf_end) || 271 (end != -1 && lf->lf_start > end)) { 272 /* Case 0 */ 273 #ifdef LOCKF_DEBUG 274 if (lockf_debug & 2) 275 printf("no overlap\n"); 276 #endif /* LOCKF_DEBUG */ 277 if ((type & SELF) && end != -1 && lf->lf_start > end) 278 return 0; 279 *prev = &lf->lf_next; 280 *overlap = lf = lf->lf_next; 281 continue; 282 } 283 if ((lf->lf_start == start) && (lf->lf_end == end)) { 284 /* Case 1 */ 285 #ifdef LOCKF_DEBUG 286 if (lockf_debug & 2) 287 printf("overlap == lock\n"); 288 #endif /* LOCKF_DEBUG */ 289 return 1; 290 } 291 if ((lf->lf_start <= start) && 292 (end != -1) && 293 ((lf->lf_end >= end) || (lf->lf_end == -1))) { 294 /* Case 2 */ 295 #ifdef LOCKF_DEBUG 296 if (lockf_debug & 2) 297 printf("overlap contains lock\n"); 298 #endif /* LOCKF_DEBUG */ 299 return 2; 300 } 301 if (start <= lf->lf_start && 302 (end == -1 || 303 (lf->lf_end != -1 && end >= lf->lf_end))) { 304 /* Case 3 */ 305 #ifdef LOCKF_DEBUG 306 if (lockf_debug & 2) 307 printf("lock contains overlap\n"); 308 #endif /* LOCKF_DEBUG */ 309 return 3; 310 } 311 if ((lf->lf_start < start) && 312 ((lf->lf_end >= start) || (lf->lf_end == -1))) { 313 /* Case 4 */ 314 #ifdef LOCKF_DEBUG 315 if (lockf_debug & 2) 316 printf("overlap starts before lock\n"); 317 #endif /* LOCKF_DEBUG */ 318 return 4; 319 } 320 if ((lf->lf_start > start) && 321 (end != -1) && 322 ((lf->lf_end > end) || (lf->lf_end == -1))) { 323 /* Case 5 */ 324 #ifdef LOCKF_DEBUG 325 if (lockf_debug & 2) 326 printf("overlap ends after lock\n"); 327 #endif /* LOCKF_DEBUG */ 328 return 5; 329 } 330 panic("lf_findoverlap: default"); 331 } 332 return 0; 333 } 334 335 /* 336 * Split a lock and a contained region into 337 * two or three locks as necessary. 338 */ 339 static void 340 lf_split(struct lockf *lock1, struct lockf *lock2, struct lockf **sparelock) 341 { 342 struct lockf *splitlock; 343 344 #ifdef LOCKF_DEBUG 345 if (lockf_debug & 2) { 346 lf_print("lf_split", lock1); 347 lf_print("splitting from", lock2); 348 } 349 #endif /* LOCKF_DEBUG */ 350 /* 351 * Check to see if splitting into only two pieces. 352 */ 353 if (lock1->lf_start == lock2->lf_start) { 354 lock1->lf_start = lock2->lf_end + 1; 355 lock2->lf_next = lock1; 356 return; 357 } 358 if (lock1->lf_end == lock2->lf_end) { 359 lock1->lf_end = lock2->lf_start - 1; 360 lock2->lf_next = lock1->lf_next; 361 lock1->lf_next = lock2; 362 return; 363 } 364 /* 365 * Make a new lock consisting of the last part of 366 * the encompassing lock 367 */ 368 splitlock = *sparelock; 369 *sparelock = NULL; 370 cv_destroy(&splitlock->lf_cv); 371 memcpy(splitlock, lock1, sizeof(*splitlock)); 372 cv_init(&splitlock->lf_cv, lockstr); 373 374 splitlock->lf_start = lock2->lf_end + 1; 375 TAILQ_INIT(&splitlock->lf_blkhd); 376 lock1->lf_end = lock2->lf_start - 1; 377 /* 378 * OK, now link it in 379 */ 380 splitlock->lf_next = lock1->lf_next; 381 lock2->lf_next = splitlock; 382 lock1->lf_next = lock2; 383 } 384 385 /* 386 * Wakeup a blocklist 387 */ 388 static void 389 lf_wakelock(struct lockf *listhead) 390 { 391 struct lockf *wakelock; 392 393 while ((wakelock = TAILQ_FIRST(&listhead->lf_blkhd))) { 394 KASSERT(wakelock->lf_next == listhead); 395 TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block); 396 wakelock->lf_next = NULL; 397 #ifdef LOCKF_DEBUG 398 if (lockf_debug & 2) 399 lf_print("lf_wakelock: awakening", wakelock); 400 #endif 401 cv_broadcast(&wakelock->lf_cv); 402 } 403 } 404 405 /* 406 * Remove a byte-range lock on an inode. 407 * 408 * Generally, find the lock (or an overlap to that lock) 409 * and remove it (or shrink it), then wakeup anyone we can. 410 */ 411 static int 412 lf_clearlock(struct lockf *unlock, struct lockf **sparelock) 413 { 414 struct lockf **head = unlock->lf_head; 415 struct lockf *lf = *head; 416 struct lockf *overlap, **prev; 417 int ovcase; 418 419 if (lf == NULL) 420 return 0; 421 #ifdef LOCKF_DEBUG 422 if (unlock->lf_type != F_UNLCK) 423 panic("lf_clearlock: bad type"); 424 if (lockf_debug & 1) 425 lf_print("lf_clearlock", unlock); 426 #endif /* LOCKF_DEBUG */ 427 prev = head; 428 while ((ovcase = lf_findoverlap(lf, unlock, SELF, 429 &prev, &overlap)) != 0) { 430 /* 431 * Wakeup the list of locks to be retried. 432 */ 433 lf_wakelock(overlap); 434 435 switch (ovcase) { 436 437 case 1: /* overlap == lock */ 438 *prev = overlap->lf_next; 439 lf_free(overlap); 440 break; 441 442 case 2: /* overlap contains lock: split it */ 443 if (overlap->lf_start == unlock->lf_start) { 444 overlap->lf_start = unlock->lf_end + 1; 445 break; 446 } 447 lf_split(overlap, unlock, sparelock); 448 overlap->lf_next = unlock->lf_next; 449 break; 450 451 case 3: /* lock contains overlap */ 452 *prev = overlap->lf_next; 453 lf = overlap->lf_next; 454 lf_free(overlap); 455 continue; 456 457 case 4: /* overlap starts before lock */ 458 overlap->lf_end = unlock->lf_start - 1; 459 prev = &overlap->lf_next; 460 lf = overlap->lf_next; 461 continue; 462 463 case 5: /* overlap ends after lock */ 464 overlap->lf_start = unlock->lf_end + 1; 465 break; 466 } 467 break; 468 } 469 #ifdef LOCKF_DEBUG 470 if (lockf_debug & 1) 471 lf_printlist("lf_clearlock", unlock); 472 #endif /* LOCKF_DEBUG */ 473 return 0; 474 } 475 476 /* 477 * Walk the list of locks for an inode and 478 * return the first blocking lock. 479 */ 480 static struct lockf * 481 lf_getblock(struct lockf *lock) 482 { 483 struct lockf **prev, *overlap, *lf = *(lock->lf_head); 484 485 prev = lock->lf_head; 486 while (lf_findoverlap(lf, lock, OTHERS, &prev, &overlap) != 0) { 487 /* 488 * We've found an overlap, see if it blocks us 489 */ 490 if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK)) 491 return overlap; 492 /* 493 * Nope, point to the next one on the list and 494 * see if it blocks us 495 */ 496 lf = overlap->lf_next; 497 } 498 return NULL; 499 } 500 501 /* 502 * Set a byte-range lock. 503 */ 504 static int 505 lf_setlock(struct lockf *lock, struct lockf **sparelock, 506 kmutex_t *interlock) 507 { 508 struct lockf *block; 509 struct lockf **head = lock->lf_head; 510 struct lockf **prev, *overlap, *ltmp; 511 int ovcase, needtolink, error; 512 513 #ifdef LOCKF_DEBUG 514 if (lockf_debug & 1) 515 lf_print("lf_setlock", lock); 516 #endif /* LOCKF_DEBUG */ 517 518 /* 519 * Scan lock list for this file looking for locks that would block us. 520 */ 521 while ((block = lf_getblock(lock)) != NULL) { 522 /* 523 * Free the structure and return if nonblocking. 524 */ 525 if ((lock->lf_flags & F_WAIT) == 0) { 526 lf_free(lock); 527 return SET_ERROR(EAGAIN); 528 } 529 /* 530 * We are blocked. Since flock style locks cover 531 * the whole file, there is no chance for deadlock. 532 * For byte-range locks we must check for deadlock. 533 * 534 * Deadlock detection is done by looking through the 535 * wait channels to see if there are any cycles that 536 * involve us. MAXDEPTH is set just to make sure we 537 * do not go off into neverneverland. 538 */ 539 if ((lock->lf_flags & F_POSIX) && 540 (block->lf_flags & F_POSIX)) { 541 struct lwp *wlwp; 542 volatile const struct lockf *waitblock; 543 int i = 0; 544 struct proc *p; 545 546 p = (struct proc *)block->lf_id; 547 KASSERT(p != NULL); 548 while (i++ < maxlockdepth) { 549 mutex_enter(p->p_lock); 550 if (p->p_nlwps > 1) { 551 mutex_exit(p->p_lock); 552 break; 553 } 554 wlwp = LIST_FIRST(&p->p_lwps); 555 lwp_lock(wlwp); 556 if (wlwp->l_wchan == NULL || 557 wlwp->l_wmesg != lockstr) { 558 lwp_unlock(wlwp); 559 mutex_exit(p->p_lock); 560 break; 561 } 562 waitblock = wlwp->l_wchan; 563 lwp_unlock(wlwp); 564 mutex_exit(p->p_lock); 565 /* Get the owner of the blocking lock */ 566 waitblock = waitblock->lf_next; 567 if ((waitblock->lf_flags & F_POSIX) == 0) 568 break; 569 p = (struct proc *)waitblock->lf_id; 570 if (p == curproc) { 571 lf_free(lock); 572 return SET_ERROR(EDEADLK); 573 } 574 } 575 /* 576 * If we're still following a dependency chain 577 * after maxlockdepth iterations, assume we're in 578 * a cycle to be safe. 579 */ 580 if (i >= maxlockdepth) { 581 lf_free(lock); 582 return SET_ERROR(EDEADLK); 583 } 584 } 585 /* 586 * For flock type locks, we must first remove 587 * any shared locks that we hold before we sleep 588 * waiting for an exclusive lock. 589 */ 590 if ((lock->lf_flags & F_FLOCK) && 591 lock->lf_type == F_WRLCK) { 592 lock->lf_type = F_UNLCK; 593 (void) lf_clearlock(lock, NULL); 594 lock->lf_type = F_WRLCK; 595 } 596 /* 597 * Add our lock to the blocked list and sleep until we're free. 598 * Remember who blocked us (for deadlock detection). 599 */ 600 lock->lf_next = block; 601 TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block); 602 #ifdef LOCKF_DEBUG 603 if (lockf_debug & 1) { 604 lf_print("lf_setlock: blocking on", block); 605 lf_printlist("lf_setlock", block); 606 } 607 #endif /* LOCKF_DEBUG */ 608 error = cv_wait_sig(&lock->lf_cv, interlock); 609 610 /* 611 * We may have been awoken by a signal (in 612 * which case we must remove ourselves from the 613 * blocked list) and/or by another process 614 * releasing a lock (in which case we have already 615 * been removed from the blocked list and our 616 * lf_next field set to NULL). 617 */ 618 if (lock->lf_next != NULL) { 619 TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block); 620 lock->lf_next = NULL; 621 } 622 if (error) { 623 lf_free(lock); 624 return error; 625 } 626 } 627 /* 628 * No blocks!! Add the lock. Note that we will 629 * downgrade or upgrade any overlapping locks this 630 * process already owns. 631 * 632 * Skip over locks owned by other processes. 633 * Handle any locks that overlap and are owned by ourselves. 634 */ 635 prev = head; 636 block = *head; 637 needtolink = 1; 638 for (;;) { 639 ovcase = lf_findoverlap(block, lock, SELF, &prev, &overlap); 640 if (ovcase) 641 block = overlap->lf_next; 642 /* 643 * Six cases: 644 * 0) no overlap 645 * 1) overlap == lock 646 * 2) overlap contains lock 647 * 3) lock contains overlap 648 * 4) overlap starts before lock 649 * 5) overlap ends after lock 650 */ 651 switch (ovcase) { 652 case 0: /* no overlap */ 653 if (needtolink) { 654 *prev = lock; 655 lock->lf_next = overlap; 656 } 657 break; 658 659 case 1: /* overlap == lock */ 660 /* 661 * If downgrading lock, others may be 662 * able to acquire it. 663 */ 664 if (lock->lf_type == F_RDLCK && 665 overlap->lf_type == F_WRLCK) 666 lf_wakelock(overlap); 667 overlap->lf_type = lock->lf_type; 668 lf_free(lock); 669 lock = overlap; /* for debug output below */ 670 break; 671 672 case 2: /* overlap contains lock */ 673 /* 674 * Check for common starting point and different types. 675 */ 676 if (overlap->lf_type == lock->lf_type) { 677 lf_free(lock); 678 lock = overlap; /* for debug output below */ 679 break; 680 } 681 if (overlap->lf_start == lock->lf_start) { 682 *prev = lock; 683 lock->lf_next = overlap; 684 overlap->lf_start = lock->lf_end + 1; 685 } else 686 lf_split(overlap, lock, sparelock); 687 lf_wakelock(overlap); 688 break; 689 690 case 3: /* lock contains overlap */ 691 /* 692 * If downgrading lock, others may be able to 693 * acquire it, otherwise take the list. 694 */ 695 if (lock->lf_type == F_RDLCK && 696 overlap->lf_type == F_WRLCK) { 697 lf_wakelock(overlap); 698 } else { 699 while ((ltmp = 700 TAILQ_FIRST(&overlap->lf_blkhd)) 701 != NULL) { 702 KASSERT(ltmp->lf_next == overlap); 703 TAILQ_REMOVE(&overlap->lf_blkhd, ltmp, 704 lf_block); 705 ltmp->lf_next = lock; 706 TAILQ_INSERT_TAIL(&lock->lf_blkhd, 707 ltmp, lf_block); 708 } 709 } 710 /* 711 * Add the new lock if necessary and delete the 712 * overlap. 713 */ 714 if (needtolink) { 715 *prev = lock; 716 lock->lf_next = overlap->lf_next; 717 prev = &lock->lf_next; 718 needtolink = 0; 719 } else 720 *prev = overlap->lf_next; 721 lf_free(overlap); 722 continue; 723 724 case 4: /* overlap starts before lock */ 725 /* 726 * Add lock after overlap on the list. 727 */ 728 lock->lf_next = overlap->lf_next; 729 overlap->lf_next = lock; 730 overlap->lf_end = lock->lf_start - 1; 731 prev = &lock->lf_next; 732 lf_wakelock(overlap); 733 needtolink = 0; 734 continue; 735 736 case 5: /* overlap ends after lock */ 737 /* 738 * Add the new lock before overlap. 739 */ 740 if (needtolink) { 741 *prev = lock; 742 lock->lf_next = overlap; 743 } 744 overlap->lf_start = lock->lf_end + 1; 745 lf_wakelock(overlap); 746 break; 747 } 748 break; 749 } 750 #ifdef LOCKF_DEBUG 751 if (lockf_debug & 1) { 752 lf_print("lf_setlock: got the lock", lock); 753 lf_printlist("lf_setlock", lock); 754 } 755 #endif /* LOCKF_DEBUG */ 756 return 0; 757 } 758 759 /* 760 * Check whether there is a blocking lock, 761 * and if so return its process identifier. 762 */ 763 static int 764 lf_getlock(struct lockf *lock, struct flock *fl) 765 { 766 struct lockf *block; 767 768 #ifdef LOCKF_DEBUG 769 if (lockf_debug & 1) 770 lf_print("lf_getlock", lock); 771 #endif /* LOCKF_DEBUG */ 772 773 if ((block = lf_getblock(lock)) != NULL) { 774 fl->l_type = block->lf_type; 775 fl->l_whence = SEEK_SET; 776 fl->l_start = block->lf_start; 777 if (block->lf_end == -1) 778 fl->l_len = 0; 779 else 780 fl->l_len = block->lf_end - block->lf_start + 1; 781 if (block->lf_flags & F_POSIX) 782 fl->l_pid = ((struct proc *)block->lf_id)->p_pid; 783 else 784 fl->l_pid = -1; 785 } else { 786 fl->l_type = F_UNLCK; 787 } 788 return 0; 789 } 790 791 /* 792 * Do an advisory lock operation. 793 */ 794 int 795 lf_advlock(struct vop_advlock_args *ap, struct lockf **head, off_t size) 796 { 797 struct flock *fl = ap->a_fl; 798 struct lockf *lock = NULL; 799 struct lockf *sparelock; 800 kmutex_t *interlock = &lockf_lock; 801 off_t start, end; 802 int error = 0; 803 804 KASSERTMSG(size >= 0, "size=%jd", (intmax_t)size); 805 806 /* 807 * Convert the flock structure into a start and end. 808 */ 809 switch (fl->l_whence) { 810 case SEEK_SET: 811 case SEEK_CUR: 812 /* 813 * Caller is responsible for adding any necessary offset 814 * when SEEK_CUR is used. 815 */ 816 start = fl->l_start; 817 break; 818 819 case SEEK_END: 820 if (fl->l_start > __type_max(off_t) - size) 821 return SET_ERROR(EINVAL); 822 start = size + fl->l_start; 823 break; 824 825 default: 826 return SET_ERROR(EINVAL); 827 } 828 829 if (fl->l_len == 0) 830 end = -1; 831 else { 832 if (fl->l_len >= 0) { 833 if (start >= 0 && 834 fl->l_len - 1 > __type_max(off_t) - start) 835 return SET_ERROR(EINVAL); 836 end = start + (fl->l_len - 1); 837 } else { 838 /* lockf() allows -ve lengths */ 839 if (start < 0) 840 return SET_ERROR(EINVAL); 841 end = start - 1; 842 start += fl->l_len; 843 } 844 } 845 if (start < 0) 846 return SET_ERROR(EINVAL); 847 848 /* 849 * Allocate locks before acquiring the interlock. We need two 850 * locks in the worst case. 851 */ 852 switch (ap->a_op) { 853 case F_SETLK: 854 case F_UNLCK: 855 /* 856 * XXX For F_UNLCK case, we can re-use the lock. 857 */ 858 if ((ap->a_flags & F_FLOCK) == 0) { 859 /* 860 * Byte-range lock might need one more lock. 861 */ 862 sparelock = lf_alloc(0); 863 if (sparelock == NULL) { 864 error = SET_ERROR(ENOMEM); 865 goto quit; 866 } 867 break; 868 } 869 /* FALLTHROUGH */ 870 871 case F_GETLK: 872 sparelock = NULL; 873 break; 874 875 default: 876 return SET_ERROR(EINVAL); 877 } 878 879 switch (ap->a_op) { 880 case F_SETLK: 881 lock = lf_alloc(1); 882 break; 883 case F_UNLCK: 884 if (start == 0 || end == -1) { 885 /* never split */ 886 lock = lf_alloc(0); 887 } else { 888 /* might split */ 889 lock = lf_alloc(2); 890 } 891 break; 892 case F_GETLK: 893 lock = lf_alloc(0); 894 break; 895 } 896 if (lock == NULL) { 897 error = SET_ERROR(ENOMEM); 898 goto quit; 899 } 900 901 mutex_enter(interlock); 902 903 /* 904 * Avoid the common case of unlocking when inode has no locks. 905 */ 906 if (*head == (struct lockf *)0) { 907 if (ap->a_op != F_SETLK) { 908 fl->l_type = F_UNLCK; 909 error = 0; 910 goto quit_unlock; 911 } 912 } 913 914 /* 915 * Create the lockf structure. 916 */ 917 lock->lf_start = start; 918 lock->lf_end = end; 919 lock->lf_head = head; 920 lock->lf_type = fl->l_type; 921 lock->lf_next = (struct lockf *)0; 922 TAILQ_INIT(&lock->lf_blkhd); 923 lock->lf_flags = ap->a_flags; 924 if (lock->lf_flags & F_POSIX) { 925 KASSERT(curproc == (struct proc *)ap->a_id); 926 } 927 lock->lf_id = ap->a_id; 928 929 /* 930 * Do the requested operation. 931 */ 932 switch (ap->a_op) { 933 934 case F_SETLK: 935 error = lf_setlock(lock, &sparelock, interlock); 936 lock = NULL; /* lf_setlock freed it */ 937 break; 938 939 case F_UNLCK: 940 error = lf_clearlock(lock, &sparelock); 941 break; 942 943 case F_GETLK: 944 error = lf_getlock(lock, fl); 945 break; 946 947 default: 948 break; 949 /* NOTREACHED */ 950 } 951 952 quit_unlock: 953 mutex_exit(interlock); 954 quit: 955 if (lock) 956 lf_free(lock); 957 if (sparelock) 958 lf_free(sparelock); 959 960 return error; 961 } 962 963 /* 964 * Initialize subsystem. 965 * 966 * XXX We use a global lock. This could be the vnode interlock, but 967 * the deadlock detection code may need to inspect locks belonging to 968 * other files. 969 */ 970 void 971 lf_init(void) 972 { 973 974 mutex_init(&lockf_lock, MUTEX_DEFAULT, IPL_NONE); 975 } 976