xref: /openbsd-src/sys/kern/vfs_lockf.c (revision 9f11ffb7133c203312a01e4b986886bc88c7d74b)
1 /*	$OpenBSD: vfs_lockf.c,v 1.33 2019/01/30 17:04:04 anton Exp $	*/
2 /*	$NetBSD: vfs_lockf.c,v 1.7 1996/02/04 02:18:21 christos Exp $	*/
3 
4 /*
5  * Copyright (c) 1982, 1986, 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Scooter Morris at Genentech Inc.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)ufs_lockf.c	8.3 (Berkeley) 1/6/94
36  */
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/kernel.h>
41 #include <sys/proc.h>
42 #include <sys/vnode.h>
43 #include <sys/pool.h>
44 #include <sys/fcntl.h>
45 #include <sys/lockf.h>
46 #include <sys/unistd.h>
47 
48 struct pool lockf_state_pool;
49 struct pool lockf_pool;
50 
51 /*
52  * This variable controls the maximum number of processes that will
53  * be checked in doing deadlock detection.
54  */
55 int maxlockdepth = MAXDEPTH;
56 
57 #define SELF	0x1
58 #define OTHERS	0x2
59 
60 #ifdef LOCKF_DEBUG
61 
62 #define	DEBUG_SETLOCK		0x01
63 #define	DEBUG_CLEARLOCK		0x02
64 #define	DEBUG_GETLOCK		0x04
65 #define	DEBUG_FINDOVR		0x08
66 #define	DEBUG_SPLIT		0x10
67 #define	DEBUG_WAKELOCK		0x20
68 #define	DEBUG_LINK		0x40
69 
70 int	lockf_debug = DEBUG_SETLOCK|DEBUG_CLEARLOCK|DEBUG_WAKELOCK;
71 
72 #define	DPRINTF(args, level)	if (lockf_debug & (level)) printf args
73 #define	LFPRINT(args, level)	if (lockf_debug & (level)) lf_print args
74 #else
75 #define	DPRINTF(args, level)
76 #define	LFPRINT(args, level)
77 #endif
78 
79 void ls_ref(struct lockf_state *);
80 void ls_rele(struct lockf_state *);
81 
82 void
83 lf_init(void)
84 {
85 	pool_init(&lockf_state_pool, sizeof(struct lockf_state), 0, IPL_NONE,
86 	    PR_WAITOK, "lockfspl", NULL);
87 	pool_init(&lockf_pool, sizeof(struct lockf), 0, IPL_NONE,
88 	    PR_WAITOK, "lockfpl", NULL);
89 }
90 
91 struct lockf *lf_alloc(uid_t, int);
92 void lf_free(struct lockf *);
93 
94 void
95 ls_ref(struct lockf_state *ls)
96 {
97 	ls->ls_refs++;
98 }
99 
100 void
101 ls_rele(struct lockf_state *ls)
102 {
103 	if (--ls->ls_refs > 0)
104 		return;
105 
106 #ifdef LOCKF_DIAGNOSTIC
107 	KASSERT(TAILQ_EMPTY(&ls->ls_locks));
108 	KASSERT(ls->ls_pending == 0);
109 #endif
110 
111 	*ls->ls_owner = NULL;
112 	pool_put(&lockf_state_pool, ls);
113 }
114 
115 /*
116  * We enforce a limit on locks by uid, so that a single user cannot
117  * run the kernel out of memory.  For now, the limit is pretty coarse.
118  * There is no limit on root.
119  *
120  * Splitting a lock will always succeed, regardless of current allocations.
121  * If you're slightly above the limit, we still have to permit an allocation
122  * so that the unlock can succeed.  If the unlocking causes too many splits,
123  * however, you're totally cutoff.
124  */
125 int maxlocksperuid = 1024;
126 
127 /*
128  * 3 options for allowfail.
129  * 0 - always allocate.  1 - cutoff at limit.  2 - cutoff at double limit.
130  */
131 struct lockf *
132 lf_alloc(uid_t uid, int allowfail)
133 {
134 	struct uidinfo *uip;
135 	struct lockf *lock;
136 
137 	uip = uid_find(uid);
138 	if (uid && allowfail && uip->ui_lockcnt >
139 	    (allowfail == 1 ? maxlocksperuid : (maxlocksperuid * 2))) {
140 		uid_release(uip);
141 		return (NULL);
142 	}
143 	uip->ui_lockcnt++;
144 	uid_release(uip);
145 	lock = pool_get(&lockf_pool, PR_WAITOK);
146 	lock->lf_uid = uid;
147 	return (lock);
148 }
149 
150 void
151 lf_free(struct lockf *lock)
152 {
153 	struct uidinfo *uip;
154 
155 	LFPRINT(("lf_free", lock), DEBUG_LINK);
156 
157 #ifdef LOCKF_DIAGNOSTIC
158 	KASSERT(TAILQ_EMPTY(&lock->lf_blkhd));
159 #endif /* LOCKF_DIAGNOSTIC */
160 
161 	ls_rele(lock->lf_state);
162 
163 	uip = uid_find(lock->lf_uid);
164 	uip->ui_lockcnt--;
165 	uid_release(uip);
166 	pool_put(&lockf_pool, lock);
167 }
168 
169 
170 /*
171  * Do an advisory lock operation.
172  */
173 int
174 lf_advlock(struct lockf_state **state, off_t size, caddr_t id, int op,
175     struct flock *fl, int flags)
176 {
177 	struct proc *p = curproc;
178 	struct lockf_state *ls;
179 	struct lockf *lock;
180 	off_t start, end;
181 	int error;
182 
183 	/*
184 	 * Convert the flock structure into a start and end.
185 	 */
186 	switch (fl->l_whence) {
187 	case SEEK_SET:
188 	case SEEK_CUR:
189 		/*
190 		 * Caller is responsible for adding any necessary offset
191 		 * when SEEK_CUR is used.
192 		 */
193 		start = fl->l_start;
194 		break;
195 	case SEEK_END:
196 		start = size + fl->l_start;
197 		break;
198 	default:
199 		return (EINVAL);
200 	}
201 	if (start < 0)
202 		return (EINVAL);
203 	if (fl->l_len > 0) {
204 		if (fl->l_len - 1 > LLONG_MAX - start)
205 			return (EOVERFLOW);
206 		end = start + (fl->l_len - 1);
207 	} else if (fl->l_len < 0) {
208 		if (fl->l_start + fl->l_len < 0)
209 			return (EINVAL);
210 		end = fl->l_start - 1;
211 		start += fl->l_len;
212 	} else {
213 		end = -1;
214 	}
215 
216 	if (*state == NULL) {
217 		ls = pool_get(&lockf_state_pool, PR_WAITOK | PR_ZERO);
218 		/* pool_get() can sleep, ensure the race was won. */
219 		if (*state == NULL) {
220 			*state = ls;
221 			ls->ls_owner = state;
222 			TAILQ_INIT(&ls->ls_locks);
223 		} else {
224 			pool_put(&lockf_state_pool, ls);
225 		}
226 	}
227 	ls = *state;
228 	ls_ref(ls);
229 
230 	/*
231 	 * Avoid the common case of unlocking when inode has no locks.
232 	 */
233 	if (TAILQ_EMPTY(&ls->ls_locks)) {
234 		if (op != F_SETLK) {
235 			ls_rele(ls);
236 			fl->l_type = F_UNLCK;
237 			return (0);
238 		}
239 	}
240 
241 	lock = lf_alloc(p->p_ucred->cr_uid, op == F_SETLK ? 1 : 2);
242 	if (!lock) {
243 		ls_rele(ls);
244 		return (ENOLCK);
245 	}
246 	lock->lf_start = start;
247 	lock->lf_end = end;
248 	lock->lf_id = id;
249 	lock->lf_state = ls;
250 	lock->lf_type = fl->l_type;
251 	lock->lf_blk = NULL;
252 	TAILQ_INIT(&lock->lf_blkhd);
253 	lock->lf_flags = flags;
254 	lock->lf_pid = (flags & F_POSIX) ? p->p_p->ps_pid : -1;
255 
256 	switch (op) {
257 	case F_SETLK:
258 		return (lf_setlock(lock));
259 	case F_UNLCK:
260 		error = lf_clearlock(lock);
261 		lf_free(lock);
262 		return (error);
263 	case F_GETLK:
264 		error = lf_getlock(lock, fl);
265 		lf_free(lock);
266 		return (error);
267 	default:
268 		lf_free(lock);
269 		return (EINVAL);
270 	}
271 	/* NOTREACHED */
272 }
273 
274 /*
275  * Set a byte-range lock.
276  */
277 int
278 lf_setlock(struct lockf *lock)
279 {
280 	struct lockf *block;
281 	struct lockf *overlap, *ltmp;
282 	static char lockstr[] = "lockf";
283 	int ovcase, priority, needtolink, error;
284 
285 	LFPRINT(("lf_setlock", lock), DEBUG_SETLOCK);
286 
287 	priority = PLOCK;
288 	if (lock->lf_type == F_WRLCK)
289 		priority += 4;
290 	priority |= PCATCH;
291 	/*
292 	 * Scan lock list for this file looking for locks that would block us.
293 	 */
294 	while ((block = lf_getblock(lock)) != NULL) {
295 		if ((lock->lf_flags & F_WAIT) == 0) {
296 			lf_free(lock);
297 			return (EAGAIN);
298 		}
299 		/*
300 		 * We are blocked. Since flock style locks cover
301 		 * the whole file, there is no chance for deadlock.
302 		 * For byte-range locks we must check for deadlock.
303 		 *
304 		 * Deadlock detection is done by looking through the
305 		 * wait channels to see if there are any cycles that
306 		 * involve us. MAXDEPTH is set just to make sure we
307 		 * do not go off into neverland.
308 		 */
309 		if ((lock->lf_flags & F_POSIX) &&
310 		    (block->lf_flags & F_POSIX)) {
311 			struct proc *wproc;
312 			struct lockf *waitblock;
313 			int i = 0;
314 
315 			/* The block is waiting on something */
316 			wproc = (struct proc *)block->lf_id;
317 			while (wproc->p_wchan &&
318 			    (wproc->p_wmesg == lockstr) &&
319 			    (i++ < maxlockdepth)) {
320 				waitblock = (struct lockf *)wproc->p_wchan;
321 				/* Get the owner of the blocking lock */
322 				waitblock = waitblock->lf_blk;
323 				if ((waitblock->lf_flags & F_POSIX) == 0)
324 					break;
325 				wproc = (struct proc *)waitblock->lf_id;
326 				if (wproc == (struct proc *)lock->lf_id) {
327 					lf_free(lock);
328 					return (EDEADLK);
329 				}
330 			}
331 		}
332 		/*
333 		 * For flock type locks, we must first remove
334 		 * any shared locks that we hold before we sleep
335 		 * waiting for an exclusive lock.
336 		 */
337 		if ((lock->lf_flags & F_FLOCK) && lock->lf_type == F_WRLCK) {
338 			lock->lf_type = F_UNLCK;
339 			(void)lf_clearlock(lock);
340 			lock->lf_type = F_WRLCK;
341 		}
342 		/*
343 		 * Add our lock to the blocked list and sleep until we're free.
344 		 * Remember who blocked us (for deadlock detection).
345 		 */
346 		lock->lf_blk = block;
347 		LFPRINT(("lf_setlock", lock), DEBUG_SETLOCK);
348 		LFPRINT(("lf_setlock: blocking on", block), DEBUG_SETLOCK);
349 		TAILQ_INSERT_TAIL(&block->lf_blkhd, lock, lf_block);
350 		lock->lf_state->ls_pending++;
351 		error = tsleep(lock, priority, lockstr, 0);
352 		lock->lf_state->ls_pending--;
353 		wakeup_one(lock->lf_state);
354 		if (lock->lf_blk != NULL) {
355 			TAILQ_REMOVE(&lock->lf_blk->lf_blkhd, lock, lf_block);
356 			lock->lf_blk = NULL;
357 		}
358 		if (error) {
359 			lf_free(lock);
360 			return (error);
361 		}
362 		if (lock->lf_flags & F_INTR) {
363 			lf_free(lock);
364 			return (EINTR);
365 		}
366 	}
367 	/*
368 	 * No blocks!!  Add the lock.  Note that we will
369 	 * downgrade or upgrade any overlapping locks this
370 	 * process already owns.
371 	 *
372 	 * Skip over locks owned by other processes.
373 	 * Handle any locks that overlap and are owned by ourselves.
374 	 */
375 	block = TAILQ_FIRST(&lock->lf_state->ls_locks);
376 	overlap = NULL;
377 	needtolink = 1;
378 	for (;;) {
379 		ovcase = lf_findoverlap(block, lock, SELF, &overlap);
380 		if (ovcase)
381 			block = TAILQ_NEXT(overlap, lf_entry);
382 		/*
383 		 * Six cases:
384 		 *	0) no overlap
385 		 *	1) overlap == lock
386 		 *	2) overlap contains lock
387 		 *	3) lock contains overlap
388 		 *	4) overlap starts before lock
389 		 *	5) overlap ends after lock
390 		 */
391 		switch (ovcase) {
392 		case 0: /* no overlap */
393 			if (needtolink) {
394 				if (overlap)	/* insert before overlap */
395 					TAILQ_INSERT_BEFORE(overlap, lock,
396 					    lf_entry);
397 				else		/* first or last lock in list */
398 					TAILQ_INSERT_TAIL(&lock->lf_state->ls_locks,
399 					    lock, lf_entry);
400 			}
401 			break;
402 		case 1: /* overlap == lock */
403 			/*
404 			 * If downgrading lock, others may be
405 			 * able to acquire it.
406 			 */
407 			if (lock->lf_type == F_RDLCK &&
408 			    overlap->lf_type == F_WRLCK)
409 				lf_wakelock(overlap, 0);
410 			overlap->lf_type = lock->lf_type;
411 			lf_free(lock);
412 			lock = overlap; /* for debug output below */
413 			break;
414 		case 2: /* overlap contains lock */
415 			/*
416 			 * Check for common starting point and different types.
417 			 */
418 			if (overlap->lf_type == lock->lf_type) {
419 				lf_free(lock);
420 				lock = overlap; /* for debug output below */
421 				break;
422 			}
423 			if (overlap->lf_start == lock->lf_start) {
424 				if (!needtolink)
425 					TAILQ_REMOVE(&lock->lf_state->ls_locks,
426 					    lock, lf_entry);
427 				TAILQ_INSERT_BEFORE(overlap, lock, lf_entry);
428 				overlap->lf_start = lock->lf_end + 1;
429 			} else
430 				lf_split(overlap, lock);
431 			lf_wakelock(overlap, 0);
432 			break;
433 		case 3: /* lock contains overlap */
434 			/*
435 			 * If downgrading lock, others may be able to
436 			 * acquire it, otherwise take the list.
437 			 */
438 			if (lock->lf_type == F_RDLCK &&
439 			    overlap->lf_type == F_WRLCK) {
440 				lf_wakelock(overlap, 0);
441 			} else {
442 				while ((ltmp =
443 				    TAILQ_FIRST(&overlap->lf_blkhd))) {
444 					TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
445 					    lf_block);
446 					ltmp->lf_blk = lock;
447 					TAILQ_INSERT_TAIL(&lock->lf_blkhd,
448 					    ltmp, lf_block);
449 				}
450 			}
451 			/*
452 			 * Add the new lock if necessary and delete the overlap.
453 			 */
454 			if (needtolink) {
455 				TAILQ_INSERT_BEFORE(overlap, lock, lf_entry);
456 				needtolink = 0;
457 			}
458 			TAILQ_REMOVE(&lock->lf_state->ls_locks, overlap, lf_entry);
459 			lf_free(overlap);
460 			continue;
461 		case 4: /* overlap starts before lock */
462 			/*
463 			 * Add lock after overlap on the list.
464 			 */
465 			if (!needtolink)
466 				TAILQ_REMOVE(&lock->lf_state->ls_locks, lock,
467 				    lf_entry);
468 			TAILQ_INSERT_AFTER(&lock->lf_state->ls_locks, overlap,
469 			    lock, lf_entry);
470 			overlap->lf_end = lock->lf_start - 1;
471 			lf_wakelock(overlap, 0);
472 			needtolink = 0;
473 			continue;
474 		case 5: /* overlap ends after lock */
475 			/*
476 			 * Add the new lock before overlap.
477 			 */
478 			if (needtolink)
479 				TAILQ_INSERT_BEFORE(overlap, lock, lf_entry);
480 			overlap->lf_start = lock->lf_end + 1;
481 			lf_wakelock(overlap, 0);
482 			break;
483 		}
484 		break;
485 	}
486 	LFPRINT(("lf_setlock: got the lock", lock), DEBUG_SETLOCK);
487 	return (0);
488 }
489 
490 /*
491  * Remove a byte-range lock on an inode.
492  *
493  * Generally, find the lock (or an overlap to that lock)
494  * and remove it (or shrink it), then wakeup anyone we can.
495  */
496 int
497 lf_clearlock(struct lockf *lock)
498 {
499 	struct lockf *lf = TAILQ_FIRST(&lock->lf_state->ls_locks);
500 	struct lockf *overlap;
501 	int ovcase;
502 
503 	if (lf == NULL)
504 		return (0);
505 	LFPRINT(("lf_clearlock", lock), DEBUG_CLEARLOCK);
506 	while ((ovcase = lf_findoverlap(lf, lock, SELF, &overlap))) {
507 		lf_wakelock(overlap, 0);
508 
509 		switch (ovcase) {
510 		case 1: /* overlap == lock */
511 			TAILQ_REMOVE(&lock->lf_state->ls_locks, overlap,
512 			    lf_entry);
513 			lf_free(overlap);
514 			break;
515 		case 2: /* overlap contains lock: split it */
516 			if (overlap->lf_start == lock->lf_start) {
517 				overlap->lf_start = lock->lf_end + 1;
518 				break;
519 			}
520 			lf_split(overlap, lock);
521 			/*
522 			 * The lock is now part of the list, lf_clearlock() must
523 			 * ensure that the lock remains detached from the list.
524 			 */
525 			TAILQ_REMOVE(&lock->lf_state->ls_locks, lock, lf_entry);
526 			break;
527 		case 3: /* lock contains overlap */
528 			lf = TAILQ_NEXT(overlap, lf_entry);
529 			TAILQ_REMOVE(&lock->lf_state->ls_locks, overlap,
530 			    lf_entry);
531 			lf_free(overlap);
532 			continue;
533 		case 4: /* overlap starts before lock */
534 			overlap->lf_end = lock->lf_start - 1;
535 			lf = TAILQ_NEXT(overlap, lf_entry);
536 			continue;
537 		case 5: /* overlap ends after lock */
538 			overlap->lf_start = lock->lf_end + 1;
539 			break;
540 		}
541 		break;
542 	}
543 	return (0);
544 }
545 
546 /*
547  * Check whether there is a blocking lock,
548  * and if so return its process identifier.
549  */
550 int
551 lf_getlock(struct lockf *lock, struct flock *fl)
552 {
553 	struct lockf *block;
554 
555 	LFPRINT(("lf_getlock", lock), DEBUG_CLEARLOCK);
556 
557 	if ((block = lf_getblock(lock)) != NULL) {
558 		fl->l_type = block->lf_type;
559 		fl->l_whence = SEEK_SET;
560 		fl->l_start = block->lf_start;
561 		if (block->lf_end == -1)
562 			fl->l_len = 0;
563 		else
564 			fl->l_len = block->lf_end - block->lf_start + 1;
565 		fl->l_pid = block->lf_pid;
566 	} else {
567 		fl->l_type = F_UNLCK;
568 	}
569 	return (0);
570 }
571 
572 /*
573  * Walk the list of locks for an inode and
574  * return the first blocking lock.
575  */
576 struct lockf *
577 lf_getblock(struct lockf *lock)
578 {
579 	struct lockf *overlap, *lf;
580 
581 	lf = TAILQ_FIRST(&lock->lf_state->ls_locks);
582 	while (lf_findoverlap(lf, lock, OTHERS, &overlap) != 0) {
583 		/*
584 		 * We've found an overlap, see if it blocks us
585 		 */
586 		if ((lock->lf_type == F_WRLCK || overlap->lf_type == F_WRLCK))
587 			return (overlap);
588 		/*
589 		 * Nope, point to the next one on the list and
590 		 * see if it blocks us
591 		 */
592 		lf = TAILQ_NEXT(overlap, lf_entry);
593 	}
594 	return (NULL);
595 }
596 
597 /*
598  * Walk the list of locks for an inode to
599  * find an overlapping lock (if any).
600  *
601  * NOTE: this returns only the FIRST overlapping lock.  There
602  *	 may be more than one.
603  */
604 int
605 lf_findoverlap(struct lockf *lf, struct lockf *lock, int type,
606     struct lockf **overlap)
607 {
608 	off_t start, end;
609 
610 	LFPRINT(("lf_findoverlap: looking for overlap in", lock), DEBUG_FINDOVR);
611 
612 	*overlap = lf;
613 	start = lock->lf_start;
614 	end = lock->lf_end;
615 	while (lf != NULL) {
616 		if (((type & SELF) && lf->lf_id != lock->lf_id) ||
617 		    ((type & OTHERS) && lf->lf_id == lock->lf_id)) {
618 			*overlap = lf = TAILQ_NEXT(lf, lf_entry);
619 			continue;
620 		}
621 		LFPRINT(("\tchecking", lf), DEBUG_FINDOVR);
622 		/*
623 		 * OK, check for overlap
624 		 *
625 		 * Six cases:
626 		 *	0) no overlap
627 		 *	1) overlap == lock
628 		 *	2) overlap contains lock
629 		 *	3) lock contains overlap
630 		 *	4) overlap starts before lock
631 		 *	5) overlap ends after lock
632 		 */
633 
634 		/* Case 0 */
635 		if ((lf->lf_end != -1 && start > lf->lf_end) ||
636 		    (end != -1 && lf->lf_start > end)) {
637 			DPRINTF(("no overlap\n"), DEBUG_FINDOVR);
638 			if ((type & SELF) && end != -1 && lf->lf_start > end)
639 				return (0);
640 			*overlap = lf = TAILQ_NEXT(lf, lf_entry);
641 			continue;
642 		}
643 		/* Case 1 */
644 		if ((lf->lf_start == start) && (lf->lf_end == end)) {
645 			DPRINTF(("overlap == lock\n"), DEBUG_FINDOVR);
646 			return (1);
647 		}
648 		/* Case 2 */
649 		if ((lf->lf_start <= start) &&
650 		    (lf->lf_end == -1 || (end != -1 && lf->lf_end >= end))) {
651 			DPRINTF(("overlap contains lock\n"), DEBUG_FINDOVR);
652 			return (2);
653 		}
654 		/* Case 3 */
655 		if (start <= lf->lf_start &&
656 		    (end == -1 || (lf->lf_end != -1 && end >= lf->lf_end))) {
657 			DPRINTF(("lock contains overlap\n"), DEBUG_FINDOVR);
658 			return (3);
659 		}
660 		/* Case 4 */
661 		if ((lf->lf_start < start) &&
662 		    ((lf->lf_end >= start) || (lf->lf_end == -1))) {
663 			DPRINTF(("overlap starts before lock\n"),
664 			    DEBUG_FINDOVR);
665 			return (4);
666 		}
667 		/* Case 5 */
668 		if ((lf->lf_start > start) && (end != -1) &&
669 		    ((lf->lf_end > end) || (lf->lf_end == -1))) {
670 			DPRINTF(("overlap ends after lock\n"), DEBUG_FINDOVR);
671 			return (5);
672 		}
673 		panic("lf_findoverlap: default");
674 	}
675 	return (0);
676 }
677 
678 /*
679  * Purge all locks associated with the given lock state.
680  */
681 void
682 lf_purgelocks(struct lockf_state *ls)
683 {
684 	struct lockf *lock;
685 
686 	if (ls == NULL)
687 		return;
688 
689 	ls_ref(ls);
690 
691 	/* Interrupt blocked locks and wait for all of them to finish. */
692 	TAILQ_FOREACH(lock, &ls->ls_locks, lf_entry) {
693 		LFPRINT(("lf_purgelocks: wakeup", lock), DEBUG_SETLOCK);
694 		lf_wakelock(lock, F_INTR);
695 	}
696 	while (ls->ls_pending > 0) {
697 		tsleep(ls, PLOCK, "lockfp", 0);
698 	}
699 
700 	/*
701 	 * Any remaining locks cannot block other locks at this point and can
702 	 * safely be removed.
703 	 */
704 	while ((lock = TAILQ_FIRST(&ls->ls_locks))) {
705 		TAILQ_REMOVE(&ls->ls_locks, lock, lf_entry);
706 		lf_free(lock);
707 	}
708 
709 	/* This is the last expected thread to hold a lock state reference. */
710 #ifdef LOCKF_DIAGNOSTIC
711 	KASSERT(ls->ls_refs == 1);
712 #endif
713 	ls_rele(ls);
714 }
715 
716 /*
717  * Split a lock and a contained region into
718  * two or three locks as necessary.
719  */
720 void
721 lf_split(struct lockf *lock1, struct lockf *lock2)
722 {
723 	struct lockf *splitlock;
724 
725 	LFPRINT(("lf_split", lock1), DEBUG_SPLIT);
726 	LFPRINT(("splitting from", lock2), DEBUG_SPLIT);
727 
728 	/*
729 	 * Check to see if splitting into only two pieces.
730 	 */
731 	if (lock1->lf_start == lock2->lf_start) {
732 		lock1->lf_start = lock2->lf_end + 1;
733 		TAILQ_INSERT_BEFORE(lock1, lock2, lf_entry);
734 		return;
735 	}
736 	if (lock1->lf_end == lock2->lf_end) {
737 		lock1->lf_end = lock2->lf_start - 1;
738 		TAILQ_INSERT_AFTER(&lock1->lf_state->ls_locks, lock1, lock2,
739 		    lf_entry);
740 		return;
741 	}
742 	/*
743 	 * Make a new lock consisting of the last part of
744 	 * the encompassing lock
745 	 */
746 	splitlock = lf_alloc(lock1->lf_uid, 0);
747 	splitlock->lf_flags = lock1->lf_flags;
748 	splitlock->lf_type = lock1->lf_type;
749 	splitlock->lf_start = lock2->lf_end + 1;
750 	splitlock->lf_end = lock1->lf_end;
751 	splitlock->lf_id = lock1->lf_id;
752 	splitlock->lf_state = lock1->lf_state;
753 	splitlock->lf_blk = NULL;
754 	splitlock->lf_pid = lock1->lf_pid;
755 	TAILQ_INIT(&splitlock->lf_blkhd);
756 	ls_ref(splitlock->lf_state);
757 	lock1->lf_end = lock2->lf_start - 1;
758 
759 	TAILQ_INSERT_AFTER(&lock1->lf_state->ls_locks, lock1, lock2, lf_entry);
760 	TAILQ_INSERT_AFTER(&lock1->lf_state->ls_locks, lock2, splitlock,
761 	    lf_entry);
762 }
763 
764 /*
765  * Wakeup a blocklist
766  */
767 void
768 lf_wakelock(struct lockf *lock, int flags)
769 {
770 	struct lockf *wakelock;
771 
772 	while ((wakelock = TAILQ_FIRST(&lock->lf_blkhd))) {
773 		TAILQ_REMOVE(&lock->lf_blkhd, wakelock, lf_block);
774 		wakelock->lf_blk = NULL;
775 		wakelock->lf_flags |= flags;
776 		wakeup_one(wakelock);
777 	}
778 }
779 
780 #ifdef LOCKF_DEBUG
781 /*
782  * Print out a lock.
783  */
784 void
785 lf_print(const char *tag, struct lockf *lock)
786 {
787 	struct lockf	*block;
788 
789 	if (tag)
790 		printf("%s: ", tag);
791 	printf("lock %p", lock);
792 	if (lock == NULL) {
793 		printf("\n");
794 		return;
795 	}
796 	printf(" for ");
797 	if (lock->lf_flags & F_POSIX)
798 		printf("thread %d", ((struct proc *)(lock->lf_id))->p_tid);
799 	else
800 		printf("id %p", lock->lf_id);
801 	printf(" %s, start %llx, end %llx",
802 		lock->lf_type == F_RDLCK ? "shared" :
803 		lock->lf_type == F_WRLCK ? "exclusive" :
804 		lock->lf_type == F_UNLCK ? "unlock" :
805 		"unknown", lock->lf_start, lock->lf_end);
806 	printf(", next %p, state %p",
807 	    TAILQ_NEXT(lock, lf_entry), lock->lf_state);
808 	block = TAILQ_FIRST(&lock->lf_blkhd);
809 	if (block)
810 		printf(", block");
811 	TAILQ_FOREACH(block, &lock->lf_blkhd, lf_block)
812 		printf(" %p,", block);
813 	printf("\n");
814 }
815 
816 void
817 lf_printlist(const char *tag, struct lockf *lock)
818 {
819 	struct lockf *lf;
820 
821 	printf("%s: Lock list:\n", tag);
822 	TAILQ_FOREACH(lf, &lock->lf_state->ls_locks, lf_entry) {
823 		if (lock == lf)
824 			printf(" * ");
825 		else
826 			printf("   ");
827 		lf_print(NULL, lf);
828 	}
829 }
830 #endif /* LOCKF_DEBUG */
831