xref: /netbsd-src/sys/ufs/lfs/lfs_subr.c (revision bf1e9b32e27832f0c493206710fb8b58a980838a)
1 /*	$NetBSD: lfs_subr.c,v 1.53 2005/05/29 21:25:24 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Konrad E. Schroder <perseant@hhhh.org>.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 /*
39  * Copyright (c) 1991, 1993
40  *	The Regents of the University of California.  All rights reserved.
41  *
42  * Redistribution and use in source and binary forms, with or without
43  * modification, are permitted provided that the following conditions
44  * are met:
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  * 2. Redistributions in binary form must reproduce the above copyright
48  *    notice, this list of conditions and the following disclaimer in the
49  *    documentation and/or other materials provided with the distribution.
50  * 3. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	@(#)lfs_subr.c	8.4 (Berkeley) 5/8/95
67  */
68 
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.53 2005/05/29 21:25:24 christos Exp $");
71 
72 #include <sys/param.h>
73 #include <sys/systm.h>
74 #include <sys/namei.h>
75 #include <sys/vnode.h>
76 #include <sys/buf.h>
77 #include <sys/mount.h>
78 #include <sys/malloc.h>
79 #include <sys/proc.h>
80 
81 #include <ufs/ufs/inode.h>
82 #include <ufs/lfs/lfs.h>
83 #include <ufs/lfs/lfs_extern.h>
84 
85 #include <uvm/uvm.h>
86 
87 /*
88  * Return buffer with the contents of block "offset" from the beginning of
89  * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
90  * remaining space in the directory.
91  */
92 int
93 lfs_blkatoff(void *v)
94 {
95 	struct vop_blkatoff_args /* {
96 		struct vnode *a_vp;
97 		off_t a_offset;
98 		char **a_res;
99 		struct buf **a_bpp;
100 		} */ *ap = v;
101 	struct lfs *fs;
102 	struct inode *ip;
103 	struct buf *bp;
104 	daddr_t lbn;
105 	int bsize, error;
106 
107 	ip = VTOI(ap->a_vp);
108 	fs = ip->i_lfs;
109 	lbn = lblkno(fs, ap->a_offset);
110 	bsize = blksize(fs, ip, lbn);
111 
112 	*ap->a_bpp = NULL;
113 	if ((error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) != 0) {
114 		brelse(bp);
115 		return (error);
116 	}
117 	if (ap->a_res)
118 		*ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset);
119 	*ap->a_bpp = bp;
120 	return (0);
121 }
122 
123 #ifdef DEBUG
124 const char *lfs_res_names[LFS_NB_COUNT] = {
125 	"summary",
126 	"superblock",
127 	"file block",
128 	"cluster",
129 	"clean",
130 	"blkiov",
131 };
132 #endif
133 
134 int lfs_res_qty[LFS_NB_COUNT] = {
135 	LFS_N_SUMMARIES,
136 	LFS_N_SBLOCKS,
137 	LFS_N_IBLOCKS,
138 	LFS_N_CLUSTERS,
139 	LFS_N_CLEAN,
140 	LFS_N_BLKIOV,
141 };
142 
143 void
144 lfs_setup_resblks(struct lfs *fs)
145 {
146 	int i, j;
147 	int maxbpp;
148 
149 	ASSERT_NO_SEGLOCK(fs);
150 	fs->lfs_resblk = (res_t *)malloc(LFS_N_TOTAL * sizeof(res_t), M_SEGMENT,
151 					  M_WAITOK);
152 	for (i = 0; i < LFS_N_TOTAL; i++) {
153 		fs->lfs_resblk[i].inuse = 0;
154 		fs->lfs_resblk[i].p = NULL;
155 	}
156 	for (i = 0; i < LFS_RESHASH_WIDTH; i++)
157 		LIST_INIT(fs->lfs_reshash + i);
158 
159 	/*
160 	 * These types of allocations can be larger than a page,
161 	 * so we can't use the pool subsystem for them.
162 	 */
163 	for (i = 0, j = 0; j < LFS_N_SUMMARIES; j++, i++)
164 		fs->lfs_resblk[i].size = fs->lfs_sumsize;
165 	for (j = 0; j < LFS_N_SBLOCKS; j++, i++)
166 		fs->lfs_resblk[i].size = LFS_SBPAD;
167 	for (j = 0; j < LFS_N_IBLOCKS; j++, i++)
168 		fs->lfs_resblk[i].size = fs->lfs_bsize;
169 	for (j = 0; j < LFS_N_CLUSTERS; j++, i++)
170 		fs->lfs_resblk[i].size = MAXPHYS;
171 	for (j = 0; j < LFS_N_CLEAN; j++, i++)
172 		fs->lfs_resblk[i].size = MAXPHYS;
173 	for (j = 0; j < LFS_N_BLKIOV; j++, i++)
174 		fs->lfs_resblk[i].size = LFS_MARKV_MAXBLKCNT * sizeof(BLOCK_INFO);
175 
176 	for (i = 0; i < LFS_N_TOTAL; i++) {
177 		fs->lfs_resblk[i].p = malloc(fs->lfs_resblk[i].size,
178 					     M_SEGMENT, M_WAITOK);
179 	}
180 
181 	/*
182 	 * Initialize pools for small types (XXX is BPP small?)
183 	 */
184 	pool_init(&fs->lfs_clpool, sizeof(struct lfs_cluster), 0, 0, 0,
185 		"lfsclpl", &pool_allocator_nointr);
186 	pool_init(&fs->lfs_segpool, sizeof(struct segment), 0, 0, 0,
187 		"lfssegpool", &pool_allocator_nointr);
188 	maxbpp = ((fs->lfs_sumsize - SEGSUM_SIZE(fs)) / sizeof(int32_t) + 2);
189 	maxbpp = MIN(maxbpp, segsize(fs) / fs->lfs_fsize + 2);
190 	pool_init(&fs->lfs_bpppool, maxbpp * sizeof(struct buf *), 0, 0, 0,
191 		"lfsbpppl", &pool_allocator_nointr);
192 }
193 
194 void
195 lfs_free_resblks(struct lfs *fs)
196 {
197 	int i;
198 
199 	pool_destroy(&fs->lfs_bpppool);
200 	pool_destroy(&fs->lfs_segpool);
201 	pool_destroy(&fs->lfs_clpool);
202 
203 	simple_lock(&fs->lfs_interlock);
204 	for (i = 0; i < LFS_N_TOTAL; i++) {
205 		while (fs->lfs_resblk[i].inuse)
206 			ltsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0,
207 				&fs->lfs_interlock);
208 		if (fs->lfs_resblk[i].p != NULL)
209 			free(fs->lfs_resblk[i].p, M_SEGMENT);
210 	}
211 	free(fs->lfs_resblk, M_SEGMENT);
212 	simple_unlock(&fs->lfs_interlock);
213 }
214 
215 static unsigned int
216 lfs_mhash(void *vp)
217 {
218 	return (unsigned int)(((unsigned long)vp) >> 2) % LFS_RESHASH_WIDTH;
219 }
220 
221 /*
222  * Return memory of the given size for the given purpose, or use one of a
223  * number of spare last-resort buffers, if malloc returns NULL.
224  */
225 void *
226 lfs_malloc(struct lfs *fs, size_t size, int type)
227 {
228 	struct lfs_res_blk *re;
229 	void *r;
230 	int i, s, start;
231 	unsigned int h;
232 
233 	ASSERT_MAYBE_SEGLOCK(fs);
234 	r = NULL;
235 
236 	/* If no mem allocated for this type, it just waits */
237 	if (lfs_res_qty[type] == 0) {
238 		r = malloc(size, M_SEGMENT, M_WAITOK);
239 		return r;
240 	}
241 
242 	/* Otherwise try a quick malloc, and if it works, great */
243 	if ((r = malloc(size, M_SEGMENT, M_NOWAIT)) != NULL) {
244 		return r;
245 	}
246 
247 	/*
248 	 * If malloc returned NULL, we are forced to use one of our
249 	 * reserve blocks.  We have on hand at least one summary block,
250 	 * at least one cluster block, at least one superblock,
251 	 * and several indirect blocks.
252 	 */
253 
254 	simple_lock(&fs->lfs_interlock);
255 	/* skip over blocks of other types */
256 	for (i = 0, start = 0; i < type; i++)
257 		start += lfs_res_qty[i];
258 	while (r == NULL) {
259 		for (i = 0; i < lfs_res_qty[type]; i++) {
260 			if (fs->lfs_resblk[start + i].inuse == 0) {
261 				re = fs->lfs_resblk + start + i;
262 				re->inuse = 1;
263 				r = re->p;
264 				KASSERT(re->size >= size);
265 				h = lfs_mhash(r);
266 				s = splbio();
267 				LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res);
268 				splx(s);
269 				simple_unlock(&fs->lfs_interlock);
270 				return r;
271 			}
272 		}
273 		DLOG((DLOG_MALLOC, "sleeping on %s (%d)\n",
274 		      lfs_res_names[type], lfs_res_qty[type]));
275 		ltsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0,
276 			&fs->lfs_interlock);
277 		DLOG((DLOG_MALLOC, "done sleeping on %s\n",
278 		      lfs_res_names[type]));
279 	}
280 	/* NOTREACHED */
281 	simple_unlock(&fs->lfs_interlock);
282 	return r;
283 }
284 
285 void
286 lfs_free(struct lfs *fs, void *p, int type)
287 {
288 	int s;
289 	unsigned int h;
290 	res_t *re;
291 #ifdef DEBUG
292 	int i;
293 #endif
294 
295 	ASSERT_MAYBE_SEGLOCK(fs);
296 	h = lfs_mhash(p);
297 	simple_lock(&fs->lfs_interlock);
298 	s = splbio();
299 	LIST_FOREACH(re, &fs->lfs_reshash[h], res) {
300 		if (re->p == p) {
301 			KASSERT(re->inuse == 1);
302 			LIST_REMOVE(re, res);
303 			re->inuse = 0;
304 			wakeup(&fs->lfs_resblk);
305 			splx(s);
306 			simple_unlock(&fs->lfs_interlock);
307 			return;
308 		}
309 	}
310 #ifdef DEBUG
311 	for (i = 0; i < LFS_N_TOTAL; i++) {
312 		if (fs->lfs_resblk[i].p == p)
313 			panic("lfs_free: inconsistent reserved block");
314 	}
315 #endif
316 	splx(s);
317 	simple_unlock(&fs->lfs_interlock);
318 
319 	/*
320 	 * If we didn't find it, free it.
321 	 */
322 	free(p, M_SEGMENT);
323 }
324 
325 /*
326  * lfs_seglock --
327  *	Single thread the segment writer.
328  */
329 int
330 lfs_seglock(struct lfs *fs, unsigned long flags)
331 {
332 	struct segment *sp;
333 
334 	simple_lock(&fs->lfs_interlock);
335 	if (fs->lfs_seglock) {
336 		if (fs->lfs_lockpid == curproc->p_pid) {
337 			simple_unlock(&fs->lfs_interlock);
338 			++fs->lfs_seglock;
339 			fs->lfs_sp->seg_flags |= flags;
340 			return 0;
341 		} else if (flags & SEGM_PAGEDAEMON) {
342 			simple_unlock(&fs->lfs_interlock);
343 			return EWOULDBLOCK;
344 		} else {
345 			while (fs->lfs_seglock) {
346 				(void)ltsleep(&fs->lfs_seglock, PRIBIO + 1,
347 					"lfs seglock", 0, &fs->lfs_interlock);
348 			}
349 		}
350 	}
351 
352 	fs->lfs_seglock = 1;
353 	fs->lfs_lockpid = curproc->p_pid;
354 	simple_unlock(&fs->lfs_interlock);
355 	fs->lfs_cleanind = 0;
356 
357 #ifdef DEBUG
358 	LFS_ENTER_LOG("seglock", __FILE__, __LINE__, 0, flags, curproc->p_pid);
359 #endif
360 	/* Drain fragment size changes out */
361 	lockmgr(&fs->lfs_fraglock, LK_EXCLUSIVE, 0);
362 
363 	sp = fs->lfs_sp = pool_get(&fs->lfs_segpool, PR_WAITOK);
364 	sp->bpp = pool_get(&fs->lfs_bpppool, PR_WAITOK);
365 	sp->seg_flags = flags;
366 	sp->vp = NULL;
367 	sp->seg_iocount = 0;
368 	(void) lfs_initseg(fs);
369 
370 	/*
371 	 * Keep a cumulative count of the outstanding I/O operations.  If the
372 	 * disk drive catches up with us it could go to zero before we finish,
373 	 * so we artificially increment it by one until we've scheduled all of
374 	 * the writes we intend to do.
375 	 */
376 	simple_lock(&fs->lfs_interlock);
377 	++fs->lfs_iocount;
378 	simple_unlock(&fs->lfs_interlock);
379 	return 0;
380 }
381 
382 static void lfs_unmark_dirop(struct lfs *);
383 
384 static void
385 lfs_unmark_dirop(struct lfs *fs)
386 {
387 	struct inode *ip, *nip;
388 	struct vnode *vp;
389 	int doit;
390 
391 	ASSERT_NO_SEGLOCK(fs);
392 	simple_lock(&fs->lfs_interlock);
393 	doit = !(fs->lfs_flags & LFS_UNDIROP);
394 	if (doit)
395 		fs->lfs_flags |= LFS_UNDIROP;
396 	if (!doit) {
397 		simple_unlock(&fs->lfs_interlock);
398 		return;
399 	}
400 
401 	for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) {
402 		nip = TAILQ_NEXT(ip, i_lfs_dchain);
403 		simple_unlock(&fs->lfs_interlock);
404 		vp = ITOV(ip);
405 
406 		simple_lock(&vp->v_interlock);
407 		if (VOP_ISLOCKED(vp) &&
408 			   vp->v_lock.lk_lockholder != curproc->p_pid) {
409 			simple_lock(&fs->lfs_interlock);
410 			simple_unlock(&vp->v_interlock);
411 			continue;
412 		}
413 		if ((VTOI(vp)->i_flag & IN_ADIROP) == 0) {
414 			simple_lock(&fs->lfs_interlock);
415 			simple_lock(&lfs_subsys_lock);
416 			--lfs_dirvcount;
417 			simple_unlock(&lfs_subsys_lock);
418 			vp->v_flag &= ~VDIROP;
419 			TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
420 			simple_unlock(&fs->lfs_interlock);
421 			wakeup(&lfs_dirvcount);
422 			simple_unlock(&vp->v_interlock);
423 			simple_lock(&fs->lfs_interlock);
424 			fs->lfs_unlockvp = vp;
425 			simple_unlock(&fs->lfs_interlock);
426 			vrele(vp);
427 			simple_lock(&fs->lfs_interlock);
428 			fs->lfs_unlockvp = NULL;
429 			simple_unlock(&fs->lfs_interlock);
430 		} else
431 			simple_unlock(&vp->v_interlock);
432 		simple_lock(&fs->lfs_interlock);
433 	}
434 
435 	fs->lfs_flags &= ~LFS_UNDIROP;
436 	simple_unlock(&fs->lfs_interlock);
437 	wakeup(&fs->lfs_flags);
438 }
439 
440 static void
441 lfs_auto_segclean(struct lfs *fs)
442 {
443 	int i, error, s, waited;
444 
445 	ASSERT_SEGLOCK(fs);
446 	/*
447 	 * Now that we've swapped lfs_activesb, but while we still
448 	 * hold the segment lock, run through the segment list marking
449 	 * the empty ones clean.
450 	 * XXX - do we really need to do them all at once?
451 	 */
452 	waited = 0;
453 	for (i = 0; i < fs->lfs_nseg; i++) {
454 		if ((fs->lfs_suflags[0][i] &
455 		     (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
456 		    (SEGUSE_DIRTY | SEGUSE_EMPTY) &&
457 		    (fs->lfs_suflags[1][i] &
458 		     (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
459 		    (SEGUSE_DIRTY | SEGUSE_EMPTY)) {
460 
461 			/* Make sure the sb is written before we clean */
462 			simple_lock(&fs->lfs_interlock);
463 			s = splbio();
464 			while (waited == 0 && fs->lfs_sbactive)
465 				ltsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs asb",
466 					0, &fs->lfs_interlock);
467 			splx(s);
468 			simple_unlock(&fs->lfs_interlock);
469 			waited = 1;
470 
471 			if ((error = lfs_do_segclean(fs, i)) != 0) {
472 				DLOG((DLOG_CLEAN, "lfs_auto_segclean: lfs_do_segclean returned %d for seg %d\n", error, i));
473 			}
474 		}
475 		fs->lfs_suflags[1 - fs->lfs_activesb][i] =
476 			fs->lfs_suflags[fs->lfs_activesb][i];
477 	}
478 }
479 
480 /*
481  * lfs_segunlock --
482  *	Single thread the segment writer.
483  */
484 void
485 lfs_segunlock(struct lfs *fs)
486 {
487 	struct segment *sp;
488 	unsigned long sync, ckp;
489 	struct buf *bp;
490 	int do_unmark_dirop = 0;
491 
492 	sp = fs->lfs_sp;
493 
494 	simple_lock(&fs->lfs_interlock);
495 	LOCK_ASSERT(LFS_SEGLOCK_HELD(fs));
496 	if (fs->lfs_seglock == 1) {
497 		if ((sp->seg_flags & SEGM_PROT) == 0)
498 			do_unmark_dirop = 1;
499 		simple_unlock(&fs->lfs_interlock);
500 		sync = sp->seg_flags & SEGM_SYNC;
501 		ckp = sp->seg_flags & SEGM_CKP;
502 		if (sp->bpp != sp->cbpp) {
503 			/* Free allocated segment summary */
504 			fs->lfs_offset -= btofsb(fs, fs->lfs_sumsize);
505 			bp = *sp->bpp;
506 			lfs_freebuf(fs, bp);
507 		} else
508 			DLOG((DLOG_SEG, "lfs_segunlock: unlock to 0 with no summary"));
509 
510 		pool_put(&fs->lfs_bpppool, sp->bpp);
511 		sp->bpp = NULL;
512 
513 		/*
514 		 * If we're not sync, we're done with sp, get rid of it.
515 		 * Otherwise, we keep a local copy around but free
516 		 * fs->lfs_sp so another process can use it (we have to
517 		 * wait but they don't have to wait for us).
518 		 */
519 		if (!sync)
520 			pool_put(&fs->lfs_segpool, sp);
521 		fs->lfs_sp = NULL;
522 
523 		/*
524 		 * If the I/O count is non-zero, sleep until it reaches zero.
525 		 * At the moment, the user's process hangs around so we can
526 		 * sleep.
527 		 */
528 		simple_lock(&fs->lfs_interlock);
529 		if (--fs->lfs_iocount == 0)
530 			LFS_DEBUG_COUNTLOCKED("lfs_segunlock");
531 		if (fs->lfs_iocount <= 1)
532 			wakeup(&fs->lfs_iocount);
533 		simple_unlock(&fs->lfs_interlock);
534 		/*
535 		 * If we're not checkpointing, we don't have to block
536 		 * other processes to wait for a synchronous write
537 		 * to complete.
538 		 */
539 		if (!ckp) {
540 #ifdef DEBUG
541 			LFS_ENTER_LOG("segunlock_std", __FILE__, __LINE__, 0, 0, curproc->p_pid);
542 #endif
543 			simple_lock(&fs->lfs_interlock);
544 			--fs->lfs_seglock;
545 			fs->lfs_lockpid = 0;
546 			simple_unlock(&fs->lfs_interlock);
547 			wakeup(&fs->lfs_seglock);
548 		}
549 		/*
550 		 * We let checkpoints happen asynchronously.  That means
551 		 * that during recovery, we have to roll forward between
552 		 * the two segments described by the first and second
553 		 * superblocks to make sure that the checkpoint described
554 		 * by a superblock completed.
555 		 */
556 		simple_lock(&fs->lfs_interlock);
557 		while (ckp && sync && fs->lfs_iocount)
558 			(void)ltsleep(&fs->lfs_iocount, PRIBIO + 1,
559 				      "lfs_iocount", 0, &fs->lfs_interlock);
560 		while (sync && sp->seg_iocount) {
561 			(void)ltsleep(&sp->seg_iocount, PRIBIO + 1,
562 				     "seg_iocount", 0, &fs->lfs_interlock);
563 			DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", sp, sp->seg_iocount));
564 		}
565 		simple_unlock(&fs->lfs_interlock);
566 		if (sync)
567 			pool_put(&fs->lfs_segpool, sp);
568 
569 		if (ckp) {
570 			fs->lfs_nactive = 0;
571 			/* If we *know* everything's on disk, write both sbs */
572 			/* XXX should wait for this one	 */
573 			if (sync)
574 				lfs_writesuper(fs, fs->lfs_sboffs[fs->lfs_activesb]);
575 			lfs_writesuper(fs, fs->lfs_sboffs[1 - fs->lfs_activesb]);
576 			if (!(fs->lfs_ivnode->v_mount->mnt_iflag & IMNT_UNMOUNT)) {
577 				lfs_auto_segclean(fs);
578 				/* If sync, we can clean the remainder too */
579 				if (sync)
580 					lfs_auto_segclean(fs);
581 			}
582 			fs->lfs_activesb = 1 - fs->lfs_activesb;
583 #ifdef DEBUG
584 			LFS_ENTER_LOG("segunlock_ckp", __FILE__, __LINE__, 0, 0, curproc->p_pid);
585 #endif
586 			simple_lock(&fs->lfs_interlock);
587 			--fs->lfs_seglock;
588 			fs->lfs_lockpid = 0;
589 			simple_unlock(&fs->lfs_interlock);
590 			wakeup(&fs->lfs_seglock);
591 		}
592 		/* Reenable fragment size changes */
593 		lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0);
594 		if (do_unmark_dirop)
595 			lfs_unmark_dirop(fs);
596 	} else if (fs->lfs_seglock == 0) {
597 		simple_unlock(&fs->lfs_interlock);
598 		panic ("Seglock not held");
599 	} else {
600 		--fs->lfs_seglock;
601 		simple_unlock(&fs->lfs_interlock);
602 	}
603 }
604 
605 /*
606  * drain dirops and start writer.
607  */
608 int
609 lfs_writer_enter(struct lfs *fs, const char *wmesg)
610 {
611 	int error = 0;
612 
613 	ASSERT_MAYBE_SEGLOCK(fs);
614 	simple_lock(&fs->lfs_interlock);
615 
616 	/* disallow dirops during flush */
617 	fs->lfs_writer++;
618 
619 	while (fs->lfs_dirops > 0) {
620 		++fs->lfs_diropwait;
621 		error = ltsleep(&fs->lfs_writer, PRIBIO+1, wmesg, 0,
622 				&fs->lfs_interlock);
623 		--fs->lfs_diropwait;
624 	}
625 
626 	if (error)
627 		fs->lfs_writer--;
628 
629 	simple_unlock(&fs->lfs_interlock);
630 
631 	return error;
632 }
633 
634 void
635 lfs_writer_leave(struct lfs *fs)
636 {
637 	boolean_t dowakeup;
638 
639 	ASSERT_MAYBE_SEGLOCK(fs);
640 	simple_lock(&fs->lfs_interlock);
641 	dowakeup = !(--fs->lfs_writer);
642 	simple_unlock(&fs->lfs_interlock);
643 	if (dowakeup)
644 		wakeup(&fs->lfs_dirops);
645 }
646