xref: /netbsd-src/sys/ufs/lfs/lfs_subr.c (revision 9fc453562f6ebe8eabdfd51e21ae0a0058906d4f)
1*9fc45356Sriastradh /*	$NetBSD: lfs_subr.c,v 1.103 2020/09/05 16:30:13 riastradh Exp $	*/
2fccfa11aScgd 
31b8f5ea3Sperseant /*-
4b397c875Sperseant  * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
51b8f5ea3Sperseant  * All rights reserved.
61b8f5ea3Sperseant  *
71b8f5ea3Sperseant  * This code is derived from software contributed to The NetBSD Foundation
81b8f5ea3Sperseant  * by Konrad E. Schroder <perseant@hhhh.org>.
91b8f5ea3Sperseant  *
101b8f5ea3Sperseant  * Redistribution and use in source and binary forms, with or without
111b8f5ea3Sperseant  * modification, are permitted provided that the following conditions
121b8f5ea3Sperseant  * are met:
131b8f5ea3Sperseant  * 1. Redistributions of source code must retain the above copyright
141b8f5ea3Sperseant  *    notice, this list of conditions and the following disclaimer.
151b8f5ea3Sperseant  * 2. Redistributions in binary form must reproduce the above copyright
161b8f5ea3Sperseant  *    notice, this list of conditions and the following disclaimer in the
171b8f5ea3Sperseant  *    documentation and/or other materials provided with the distribution.
181b8f5ea3Sperseant  *
191b8f5ea3Sperseant  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
201b8f5ea3Sperseant  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
211b8f5ea3Sperseant  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
221b8f5ea3Sperseant  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
231b8f5ea3Sperseant  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
241b8f5ea3Sperseant  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
251b8f5ea3Sperseant  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
261b8f5ea3Sperseant  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
271b8f5ea3Sperseant  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
281b8f5ea3Sperseant  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
291b8f5ea3Sperseant  * POSSIBILITY OF SUCH DAMAGE.
301b8f5ea3Sperseant  */
31264b874cSmycroft /*
32264b874cSmycroft  * Copyright (c) 1991, 1993
33264b874cSmycroft  *	The Regents of the University of California.  All rights reserved.
34264b874cSmycroft  *
35264b874cSmycroft  * Redistribution and use in source and binary forms, with or without
36264b874cSmycroft  * modification, are permitted provided that the following conditions
37264b874cSmycroft  * are met:
38264b874cSmycroft  * 1. Redistributions of source code must retain the above copyright
39264b874cSmycroft  *    notice, this list of conditions and the following disclaimer.
40264b874cSmycroft  * 2. Redistributions in binary form must reproduce the above copyright
41264b874cSmycroft  *    notice, this list of conditions and the following disclaimer in the
42264b874cSmycroft  *    documentation and/or other materials provided with the distribution.
43aad01611Sagc  * 3. Neither the name of the University nor the names of its contributors
44264b874cSmycroft  *    may be used to endorse or promote products derived from this software
45264b874cSmycroft  *    without specific prior written permission.
46264b874cSmycroft  *
47264b874cSmycroft  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48264b874cSmycroft  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49264b874cSmycroft  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50264b874cSmycroft  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51264b874cSmycroft  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52264b874cSmycroft  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53264b874cSmycroft  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54264b874cSmycroft  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55264b874cSmycroft  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56264b874cSmycroft  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57264b874cSmycroft  * SUCH DAMAGE.
58264b874cSmycroft  *
59e5bc90f4Sfvdl  *	@(#)lfs_subr.c	8.4 (Berkeley) 5/8/95
60264b874cSmycroft  */
61264b874cSmycroft 
62ec624546Slukem #include <sys/cdefs.h>
63*9fc45356Sriastradh __KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.103 2020/09/05 16:30:13 riastradh Exp $");
64ec624546Slukem 
65264b874cSmycroft #include <sys/param.h>
667bd9e243Schristos #include <sys/systm.h>
67264b874cSmycroft #include <sys/namei.h>
68264b874cSmycroft #include <sys/vnode.h>
69264b874cSmycroft #include <sys/buf.h>
70264b874cSmycroft #include <sys/mount.h>
71264b874cSmycroft #include <sys/malloc.h>
72264b874cSmycroft #include <sys/proc.h>
73437e8552Sperseant #include <sys/kauth.h>
74264b874cSmycroft 
7515158895Sdholland #include <ufs/lfs/ulfs_inode.h>
76264b874cSmycroft #include <ufs/lfs/lfs.h>
7734f0d74cSdholland #include <ufs/lfs/lfs_accessors.h>
785bc8cc2bSdholland #include <ufs/lfs/lfs_kernel.h>
79264b874cSmycroft #include <ufs/lfs/lfs_extern.h>
80264b874cSmycroft 
81eefd94b8Sperseant #ifdef DEBUG
82273df636Schristos const char *lfs_res_names[LFS_NB_COUNT] = {
83b397c875Sperseant 	"summary",
84b397c875Sperseant 	"superblock",
8594decdd2Sperseant 	"file block",
86b397c875Sperseant 	"cluster",
87b397c875Sperseant 	"clean",
8894decdd2Sperseant 	"blkiov",
89b397c875Sperseant };
90b397c875Sperseant #endif
91b397c875Sperseant 
92b397c875Sperseant int lfs_res_qty[LFS_NB_COUNT] = {
93b397c875Sperseant 	LFS_N_SUMMARIES,
94b397c875Sperseant 	LFS_N_SBLOCKS,
95b397c875Sperseant 	LFS_N_IBLOCKS,
96b397c875Sperseant 	LFS_N_CLUSTERS,
97b397c875Sperseant 	LFS_N_CLEAN,
9894decdd2Sperseant 	LFS_N_BLKIOV,
99b397c875Sperseant };
100b397c875Sperseant 
101b397c875Sperseant void
lfs_setup_resblks(struct lfs * fs)102b397c875Sperseant lfs_setup_resblks(struct lfs *fs)
103b397c875Sperseant {
104b397c875Sperseant 	int i, j;
105b397c875Sperseant 	int maxbpp;
106b397c875Sperseant 
1071ebfc508Sperseant 	ASSERT_NO_SEGLOCK(fs);
10861051b20Sdholland 	fs->lfs_resblk = malloc(LFS_N_TOTAL * sizeof(res_t), M_SEGMENT,
109b397c875Sperseant 				M_WAITOK);
110b397c875Sperseant 	for (i = 0; i < LFS_N_TOTAL; i++) {
111b397c875Sperseant 		fs->lfs_resblk[i].inuse = 0;
112b397c875Sperseant 		fs->lfs_resblk[i].p = NULL;
113b397c875Sperseant 	}
114b397c875Sperseant 	for (i = 0; i < LFS_RESHASH_WIDTH; i++)
115b397c875Sperseant 		LIST_INIT(fs->lfs_reshash + i);
116b397c875Sperseant 
117b397c875Sperseant 	/*
118b397c875Sperseant 	 * These types of allocations can be larger than a page,
119b397c875Sperseant 	 * so we can't use the pool subsystem for them.
120b397c875Sperseant 	 */
121b397c875Sperseant 	for (i = 0, j = 0; j < LFS_N_SUMMARIES; j++, i++)
122adca8af5Sdholland 		fs->lfs_resblk[i].size = lfs_sb_getsumsize(fs);
123b397c875Sperseant 	for (j = 0; j < LFS_N_SBLOCKS; j++, i++)
1243ab94fedSperseant 		fs->lfs_resblk[i].size = LFS_SBPAD;
125b397c875Sperseant 	for (j = 0; j < LFS_N_IBLOCKS; j++, i++)
126f59b8f4bSdholland 		fs->lfs_resblk[i].size = lfs_sb_getbsize(fs);
127b397c875Sperseant 	for (j = 0; j < LFS_N_CLUSTERS; j++, i++)
1283ab94fedSperseant 		fs->lfs_resblk[i].size = MAXPHYS;
129b397c875Sperseant 	for (j = 0; j < LFS_N_CLEAN; j++, i++)
1303ab94fedSperseant 		fs->lfs_resblk[i].size = MAXPHYS;
13194decdd2Sperseant 	for (j = 0; j < LFS_N_BLKIOV; j++, i++)
13294decdd2Sperseant 		fs->lfs_resblk[i].size = LFS_MARKV_MAXBLKCNT * sizeof(BLOCK_INFO);
1333ab94fedSperseant 
1343ab94fedSperseant 	for (i = 0; i < LFS_N_TOTAL; i++) {
1353ab94fedSperseant 		fs->lfs_resblk[i].p = malloc(fs->lfs_resblk[i].size,
1363ab94fedSperseant 					     M_SEGMENT, M_WAITOK);
1373ab94fedSperseant 	}
138b397c875Sperseant 
139b397c875Sperseant 	/*
140b397c875Sperseant 	 * Initialize pools for small types (XXX is BPP small?)
141b397c875Sperseant 	 */
142740725d7Ssimonb 	pool_init(&fs->lfs_clpool, sizeof(struct lfs_cluster), 0, 0, 0,
14359d979c5Sad 		"lfsclpl", &pool_allocator_nointr, IPL_NONE);
144740725d7Ssimonb 	pool_init(&fs->lfs_segpool, sizeof(struct segment), 0, 0, 0,
14559d979c5Sad 		"lfssegpool", &pool_allocator_nointr, IPL_NONE);
146a407c8d2Sdholland 	/* XXX: should this int32 be 32/64? */
147adca8af5Sdholland 	maxbpp = ((lfs_sb_getsumsize(fs) - SEGSUM_SIZE(fs)) / sizeof(int32_t) + 2);
148f59b8f4bSdholland 	maxbpp = MIN(maxbpp, lfs_segsize(fs) / lfs_sb_getfsize(fs) + 2);
149740725d7Ssimonb 	pool_init(&fs->lfs_bpppool, maxbpp * sizeof(struct buf *), 0, 0, 0,
15059d979c5Sad 		"lfsbpppl", &pool_allocator_nointr, IPL_NONE);
151b397c875Sperseant }
152b397c875Sperseant 
153b397c875Sperseant void
lfs_free_resblks(struct lfs * fs)154b397c875Sperseant lfs_free_resblks(struct lfs *fs)
155b397c875Sperseant {
156b397c875Sperseant 	int i;
157b397c875Sperseant 
158b397c875Sperseant 	pool_destroy(&fs->lfs_bpppool);
159b397c875Sperseant 	pool_destroy(&fs->lfs_segpool);
160b397c875Sperseant 	pool_destroy(&fs->lfs_clpool);
161b397c875Sperseant 
1624a780c9aSad 	mutex_enter(&lfs_lock);
163b397c875Sperseant 	for (i = 0; i < LFS_N_TOTAL; i++) {
164b397c875Sperseant 		while (fs->lfs_resblk[i].inuse)
1654a780c9aSad 			mtsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0,
1664a780c9aSad 				&lfs_lock);
167b397c875Sperseant 		if (fs->lfs_resblk[i].p != NULL)
168b397c875Sperseant 			free(fs->lfs_resblk[i].p, M_SEGMENT);
169b397c875Sperseant 	}
170b397c875Sperseant 	free(fs->lfs_resblk, M_SEGMENT);
1714a780c9aSad 	mutex_exit(&lfs_lock);
172b397c875Sperseant }
173b397c875Sperseant 
174b397c875Sperseant static unsigned int
lfs_mhash(void * vp)175b397c875Sperseant lfs_mhash(void *vp)
176b397c875Sperseant {
177b397c875Sperseant 	return (unsigned int)(((unsigned long)vp) >> 2) % LFS_RESHASH_WIDTH;
178b397c875Sperseant }
179b397c875Sperseant 
180b397c875Sperseant /*
181b397c875Sperseant  * Return memory of the given size for the given purpose, or use one of a
182b397c875Sperseant  * number of spare last-resort buffers, if malloc returns NULL.
183b397c875Sperseant  */
184b397c875Sperseant void *
lfs_malloc(struct lfs * fs,size_t size,int type)185b397c875Sperseant lfs_malloc(struct lfs *fs, size_t size, int type)
186b397c875Sperseant {
187b397c875Sperseant 	struct lfs_res_blk *re;
188b397c875Sperseant 	void *r;
189b9333d23Smaya 	int i, start;
190b397c875Sperseant 	unsigned int h;
191b397c875Sperseant 
1921ebfc508Sperseant 	ASSERT_MAYBE_SEGLOCK(fs);
1933ab94fedSperseant 	r = NULL;
1943ab94fedSperseant 
195b397c875Sperseant 	/* If no mem allocated for this type, it just waits */
1963ab94fedSperseant 	if (lfs_res_qty[type] == 0) {
1973ab94fedSperseant 		r = malloc(size, M_SEGMENT, M_WAITOK);
1983ab94fedSperseant 		return r;
1993ab94fedSperseant 	}
200b397c875Sperseant 
201b397c875Sperseant 	/* Otherwise try a quick malloc, and if it works, great */
2023ab94fedSperseant 	if ((r = malloc(size, M_SEGMENT, M_NOWAIT)) != NULL) {
203b397c875Sperseant 		return r;
2043ab94fedSperseant 	}
205b397c875Sperseant 
206b397c875Sperseant 	/*
207b397c875Sperseant 	 * If malloc returned NULL, we are forced to use one of our
208b397c875Sperseant 	 * reserve blocks.  We have on hand at least one summary block,
209b397c875Sperseant 	 * at least one cluster block, at least one superblock,
210b397c875Sperseant 	 * and several indirect blocks.
211b397c875Sperseant 	 */
2121ebfc508Sperseant 
2134a780c9aSad 	mutex_enter(&lfs_lock);
214b397c875Sperseant 	/* skip over blocks of other types */
215b397c875Sperseant 	for (i = 0, start = 0; i < type; i++)
216b397c875Sperseant 		start += lfs_res_qty[i];
217b397c875Sperseant 	while (r == NULL) {
218b397c875Sperseant 		for (i = 0; i < lfs_res_qty[type]; i++) {
219b397c875Sperseant 			if (fs->lfs_resblk[start + i].inuse == 0) {
220b397c875Sperseant 				re = fs->lfs_resblk + start + i;
221b397c875Sperseant 				re->inuse = 1;
222b397c875Sperseant 				r = re->p;
2233ab94fedSperseant 				KASSERT(re->size >= size);
224b397c875Sperseant 				h = lfs_mhash(r);
225b397c875Sperseant 				LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res);
2264a780c9aSad 				mutex_exit(&lfs_lock);
227b397c875Sperseant 				return r;
228b397c875Sperseant 			}
229b397c875Sperseant 		}
2301ebfc508Sperseant 		DLOG((DLOG_MALLOC, "sleeping on %s (%d)\n",
2311ebfc508Sperseant 		      lfs_res_names[type], lfs_res_qty[type]));
2324a780c9aSad 		mtsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0,
2334a780c9aSad 			&lfs_lock);
2341ebfc508Sperseant 		DLOG((DLOG_MALLOC, "done sleeping on %s\n",
2351ebfc508Sperseant 		      lfs_res_names[type]));
236b397c875Sperseant 	}
237b397c875Sperseant 	/* NOTREACHED */
2384a780c9aSad 	mutex_exit(&lfs_lock);
239b397c875Sperseant 	return r;
240b397c875Sperseant }
241b397c875Sperseant 
242b397c875Sperseant void
lfs_free(struct lfs * fs,void * p,int type)243168cd830Schristos lfs_free(struct lfs *fs, void *p, int type)
244b397c875Sperseant {
245b397c875Sperseant 	unsigned int h;
246b397c875Sperseant 	res_t *re;
247b397c875Sperseant 
2481ebfc508Sperseant 	ASSERT_MAYBE_SEGLOCK(fs);
249b397c875Sperseant 	h = lfs_mhash(p);
2504a780c9aSad 	mutex_enter(&lfs_lock);
251b397c875Sperseant 	LIST_FOREACH(re, &fs->lfs_reshash[h], res) {
252b397c875Sperseant 		if (re->p == p) {
2535f444770Syamt 			KASSERT(re->inuse == 1);
254b397c875Sperseant 			LIST_REMOVE(re, res);
255b397c875Sperseant 			re->inuse = 0;
256b397c875Sperseant 			wakeup(&fs->lfs_resblk);
2574a780c9aSad 			mutex_exit(&lfs_lock);
258b397c875Sperseant 			return;
259b397c875Sperseant 		}
260b397c875Sperseant 	}
261b030a154Smaya 
2620bf92910Smaya #ifdef notyet /* XXX this assert fires */
263b030a154Smaya 	for (int i = 0; i < LFS_N_TOTAL; i++) {
264b9333d23Smaya 		KDASSERTMSG(fs->lfs_resblk[i].p == p,
265b030a154Smaya 		    "lfs_free: inconsistent reserved block");
2665f444770Syamt 	}
2670bf92910Smaya #endif
268b030a154Smaya 
2694a780c9aSad 	mutex_exit(&lfs_lock);
270b397c875Sperseant 
271b397c875Sperseant 	/*
272b397c875Sperseant 	 * If we didn't find it, free it.
273b397c875Sperseant 	 */
274b397c875Sperseant 	free(p, M_SEGMENT);
275b397c875Sperseant }
276264b874cSmycroft 
277264b874cSmycroft /*
278264b874cSmycroft  * lfs_seglock --
279264b874cSmycroft  *	Single thread the segment writer.
280264b874cSmycroft  */
281b397c875Sperseant int
lfs_seglock(struct lfs * fs,unsigned long flags)2824e3fced9Sperseant lfs_seglock(struct lfs *fs, unsigned long flags)
283264b874cSmycroft {
284264b874cSmycroft 	struct segment *sp;
285264b874cSmycroft 
2864a780c9aSad 	mutex_enter(&lfs_lock);
2879f427887Sthorpej 	if (fs->lfs_seglock) {
288d28248e8Sperseant 		if (fs->lfs_lockpid == curproc->p_pid &&
289d28248e8Sperseant 		    fs->lfs_locklwp == curlwp->l_lid) {
290264b874cSmycroft 			++fs->lfs_seglock;
291264b874cSmycroft 			fs->lfs_sp->seg_flags |= flags;
29279748725Smlelstv 			mutex_exit(&lfs_lock);
293b397c875Sperseant 			return 0;
294ea03a1acSperseant 		} else if (flags & SEGM_PAGEDAEMON) {
2954a780c9aSad 			mutex_exit(&lfs_lock);
296b397c875Sperseant 			return EWOULDBLOCK;
2971ebfc508Sperseant 		} else {
2981ebfc508Sperseant 			while (fs->lfs_seglock) {
2994a780c9aSad 				(void)mtsleep(&fs->lfs_seglock, PRIBIO + 1,
3004a780c9aSad 					"lfs_seglock", 0, &lfs_lock);
3019f427887Sthorpej 			}
3021ebfc508Sperseant 		}
3031ebfc508Sperseant 	}
304264b874cSmycroft 
305264b874cSmycroft 	fs->lfs_seglock = 1;
306264b874cSmycroft 	fs->lfs_lockpid = curproc->p_pid;
307d28248e8Sperseant 	fs->lfs_locklwp = curlwp->l_lid;
3084a780c9aSad 	mutex_exit(&lfs_lock);
3094b4f884bSperseant 	fs->lfs_cleanind = 0;
310264b874cSmycroft 
3111ebfc508Sperseant 	LFS_ENTER_LOG("seglock", __FILE__, __LINE__, 0, flags, curproc->p_pid);
3124408dea4Smaya 
31332ae84b1Sperseant 	/* Drain fragment size changes out */
3149abeea58Sad 	rw_enter(&fs->lfs_fraglock, RW_WRITER);
31532ae84b1Sperseant 
316b397c875Sperseant 	sp = fs->lfs_sp = pool_get(&fs->lfs_segpool, PR_WAITOK);
317b397c875Sperseant 	sp->bpp = pool_get(&fs->lfs_bpppool, PR_WAITOK);
318264b874cSmycroft 	sp->seg_flags = flags;
319264b874cSmycroft 	sp->vp = NULL;
320ddfb1dbbSperseant 	sp->seg_iocount = 0;
321264b874cSmycroft 	(void) lfs_initseg(fs);
322264b874cSmycroft 
323264b874cSmycroft 	/*
324264b874cSmycroft 	 * Keep a cumulative count of the outstanding I/O operations.  If the
325264b874cSmycroft 	 * disk drive catches up with us it could go to zero before we finish,
326264b874cSmycroft 	 * so we artificially increment it by one until we've scheduled all of
327264b874cSmycroft 	 * the writes we intend to do.
328264b874cSmycroft 	 */
3294a780c9aSad 	mutex_enter(&lfs_lock);
330264b874cSmycroft 	++fs->lfs_iocount;
331f59b8f4bSdholland 	fs->lfs_startseg = lfs_sb_getcurseg(fs);
3324a780c9aSad 	mutex_exit(&lfs_lock);
333b397c875Sperseant 	return 0;
334264b874cSmycroft }
3351b8f5ea3Sperseant 
336b397c875Sperseant static void lfs_unmark_dirop(struct lfs *);
337b397c875Sperseant 
338b397c875Sperseant static void
lfs_unmark_dirop(struct lfs * fs)339b397c875Sperseant lfs_unmark_dirop(struct lfs *fs)
340b397c875Sperseant {
3410d249757Sriastradh 	struct inode *ip, *marker;
342b397c875Sperseant 	struct vnode *vp;
343ef3c6076Sperseant 	int doit;
344b397c875Sperseant 
3451ebfc508Sperseant 	ASSERT_NO_SEGLOCK(fs);
3464a780c9aSad 	mutex_enter(&lfs_lock);
347ef3c6076Sperseant 	doit = !(fs->lfs_flags & LFS_UNDIROP);
348ef3c6076Sperseant 	if (doit)
349ef3c6076Sperseant 		fs->lfs_flags |= LFS_UNDIROP;
3504a780c9aSad 	mutex_exit(&lfs_lock);
351ef3c6076Sperseant 
3520d249757Sriastradh 	if (!doit)
3530d249757Sriastradh 		return;
3540d249757Sriastradh 
3550d249757Sriastradh 	marker = pool_get(&lfs_inode_pool, PR_WAITOK);
3560d249757Sriastradh 	KASSERT(fs != NULL);
3570d249757Sriastradh 	memset(marker, 0, sizeof(*marker));
3580d249757Sriastradh 	marker->inode_ext.lfs = pool_get(&lfs_inoext_pool, PR_WAITOK);
3590d249757Sriastradh 	memset(marker->inode_ext.lfs, 0, sizeof(*marker->inode_ext.lfs));
3600d249757Sriastradh 	marker->i_state |= IN_MARKER;
3610d249757Sriastradh 
3620d249757Sriastradh 	mutex_enter(&lfs_lock);
3630d249757Sriastradh 	TAILQ_INSERT_HEAD(&fs->lfs_dchainhd, marker, i_lfs_dchain);
3640d249757Sriastradh 	while ((ip = TAILQ_NEXT(marker, i_lfs_dchain)) != NULL) {
3650d249757Sriastradh 		TAILQ_REMOVE(&fs->lfs_dchainhd, marker, i_lfs_dchain);
3660d249757Sriastradh 		TAILQ_INSERT_AFTER(&fs->lfs_dchainhd, ip, marker,
3670d249757Sriastradh 		    i_lfs_dchain);
368dae5db40Sriastradh 		if (ip->i_state & IN_MARKER)
3690d249757Sriastradh 			continue;
370b397c875Sperseant 		vp = ITOV(ip);
3718f063ba0Smaya 		if ((ip->i_state & (IN_ADIROP | IN_CDIROP)) == IN_CDIROP) {
372b397c875Sperseant 			--lfs_dirvcount;
373ce053245Sperseant 			--fs->lfs_dirvcount;
3747dad9f73Sad 			vp->v_uflag &= ~VU_DIROP;
375b397c875Sperseant 			TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
376b397c875Sperseant 			wakeup(&lfs_dirvcount);
377b397c875Sperseant 			fs->lfs_unlockvp = vp;
3784a780c9aSad 			mutex_exit(&lfs_lock);
37907c29bfbShannken 			vrele(vp);
3804a780c9aSad 			mutex_enter(&lfs_lock);
381b397c875Sperseant 			fs->lfs_unlockvp = NULL;
3828f063ba0Smaya 			ip->i_state &= ~IN_CDIROP;
3834a780c9aSad 		}
384b397c875Sperseant 	}
3850d249757Sriastradh 	TAILQ_REMOVE(&fs->lfs_dchainhd, marker, i_lfs_dchain);
386ef3c6076Sperseant 	fs->lfs_flags &= ~LFS_UNDIROP;
3871ebfc508Sperseant 	wakeup(&fs->lfs_flags);
3884a780c9aSad 	mutex_exit(&lfs_lock);
3890d249757Sriastradh 
3900d249757Sriastradh 	pool_put(&lfs_inoext_pool, marker->inode_ext.lfs);
3910d249757Sriastradh 	pool_put(&lfs_inode_pool, marker);
392b397c875Sperseant }
393b397c875Sperseant 
394b397c875Sperseant static void
lfs_auto_segclean(struct lfs * fs)395b397c875Sperseant lfs_auto_segclean(struct lfs *fs)
396b397c875Sperseant {
39735cd97eaSmaya 	int i, error, waited;
398b397c875Sperseant 
3991ebfc508Sperseant 	ASSERT_SEGLOCK(fs);
400b397c875Sperseant 	/*
401b397c875Sperseant 	 * Now that we've swapped lfs_activesb, but while we still
402b397c875Sperseant 	 * hold the segment lock, run through the segment list marking
403b397c875Sperseant 	 * the empty ones clean.
404b397c875Sperseant 	 * XXX - do we really need to do them all at once?
405b397c875Sperseant 	 */
40625f49c3cSperseant 	waited = 0;
407adca8af5Sdholland 	for (i = 0; i < lfs_sb_getnseg(fs); i++) {
408b397c875Sperseant 		if ((fs->lfs_suflags[0][i] &
409b397c875Sperseant 		     (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
410b397c875Sperseant 		    (SEGUSE_DIRTY | SEGUSE_EMPTY) &&
411b397c875Sperseant 		    (fs->lfs_suflags[1][i] &
412b397c875Sperseant 		     (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
413b397c875Sperseant 		    (SEGUSE_DIRTY | SEGUSE_EMPTY)) {
414b397c875Sperseant 
41525f49c3cSperseant 			/* Make sure the sb is written before we clean */
4164a780c9aSad 			mutex_enter(&lfs_lock);
41725f49c3cSperseant 			while (waited == 0 && fs->lfs_sbactive)
4184a780c9aSad 				mtsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs asb",
4194a780c9aSad 					0, &lfs_lock);
4204a780c9aSad 			mutex_exit(&lfs_lock);
42125f49c3cSperseant 			waited = 1;
42225f49c3cSperseant 
423b397c875Sperseant 			if ((error = lfs_do_segclean(fs, i)) != 0) {
424eefd94b8Sperseant 				DLOG((DLOG_CLEAN, "lfs_auto_segclean: lfs_do_segclean returned %d for seg %d\n", error, i));
425b397c875Sperseant 			}
426b397c875Sperseant 		}
427b397c875Sperseant 		fs->lfs_suflags[1 - fs->lfs_activesb][i] =
428b397c875Sperseant 			fs->lfs_suflags[fs->lfs_activesb][i];
429b397c875Sperseant 	}
430b397c875Sperseant }
431b397c875Sperseant 
432264b874cSmycroft /*
433264b874cSmycroft  * lfs_segunlock --
434264b874cSmycroft  *	Single thread the segment writer.
435264b874cSmycroft  */
436264b874cSmycroft void
lfs_segunlock(struct lfs * fs)4374e3fced9Sperseant lfs_segunlock(struct lfs *fs)
438264b874cSmycroft {
439264b874cSmycroft 	struct segment *sp;
440264b874cSmycroft 	unsigned long sync, ckp;
4418886b0f4Sperseant 	struct buf *bp;
442ef3c6076Sperseant 	int do_unmark_dirop = 0;
443264b874cSmycroft 
4444be4b8adSperseant 	sp = fs->lfs_sp;
4454be4b8adSperseant 
4464a780c9aSad 	mutex_enter(&lfs_lock);
447d1a0c6fbSmaya 
448d1a0c6fbSmaya 	if (!LFS_SEGLOCK_HELD(fs))
449d1a0c6fbSmaya 		panic("lfs seglock not held");
450d1a0c6fbSmaya 
4514be4b8adSperseant 	if (fs->lfs_seglock == 1) {
452f9b3466dSperseant 		if ((sp->seg_flags & (SEGM_PROT | SEGM_CLEAN)) == 0)
453ef3c6076Sperseant 			do_unmark_dirop = 1;
4544a780c9aSad 		mutex_exit(&lfs_lock);
455264b874cSmycroft 		sync = sp->seg_flags & SEGM_SYNC;
456264b874cSmycroft 		ckp = sp->seg_flags & SEGM_CKP;
457dddf5c51Sperseant 
458dddf5c51Sperseant 		/* We should have a segment summary, and nothing else */
459dddf5c51Sperseant 		KASSERT(sp->cbpp == sp->bpp + 1);
460dddf5c51Sperseant 
461264b874cSmycroft 		/* Free allocated segment summary */
462f59b8f4bSdholland 		lfs_sb_suboffset(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs)));
4638886b0f4Sperseant 		bp = *sp->bpp;
464b397c875Sperseant 		lfs_freebuf(fs, bp);
4651b8f5ea3Sperseant 
466b397c875Sperseant 		pool_put(&fs->lfs_bpppool, sp->bpp);
4674e3fced9Sperseant 		sp->bpp = NULL;
4684b4f884bSperseant 
4694b4f884bSperseant 		/*
4704b4f884bSperseant 		 * If we're not sync, we're done with sp, get rid of it.
4714b4f884bSperseant 		 * Otherwise, we keep a local copy around but free
4724b4f884bSperseant 		 * fs->lfs_sp so another process can use it (we have to
4734b4f884bSperseant 		 * wait but they don't have to wait for us).
4744b4f884bSperseant 		 */
475ddfb1dbbSperseant 		if (!sync)
476b397c875Sperseant 			pool_put(&fs->lfs_segpool, sp);
4774e3fced9Sperseant 		fs->lfs_sp = NULL;
478264b874cSmycroft 
479264b874cSmycroft 		/*
480264b874cSmycroft 		 * If the I/O count is non-zero, sleep until it reaches zero.
481264b874cSmycroft 		 * At the moment, the user's process hangs around so we can
482264b874cSmycroft 		 * sleep.
483264b874cSmycroft 		 */
4844a780c9aSad 		mutex_enter(&lfs_lock);
485ec5ea71aSchs 		if (--fs->lfs_iocount <= 1)
4864b4f884bSperseant 			wakeup(&fs->lfs_iocount);
4874a780c9aSad 		mutex_exit(&lfs_lock);
488ec5ea71aSchs 
489264b874cSmycroft 		/*
490ddfb1dbbSperseant 		 * If we're not checkpointing, we don't have to block
491ddfb1dbbSperseant 		 * other processes to wait for a synchronous write
492ddfb1dbbSperseant 		 * to complete.
493ddfb1dbbSperseant 		 */
494ddfb1dbbSperseant 		if (!ckp) {
4951ebfc508Sperseant 			LFS_ENTER_LOG("segunlock_std", __FILE__, __LINE__, 0, 0, curproc->p_pid);
4964408dea4Smaya 
4974a780c9aSad 			mutex_enter(&lfs_lock);
498ddfb1dbbSperseant 			--fs->lfs_seglock;
499ddfb1dbbSperseant 			fs->lfs_lockpid = 0;
500d28248e8Sperseant 			fs->lfs_locklwp = 0;
5014a780c9aSad 			mutex_exit(&lfs_lock);
502ddfb1dbbSperseant 			wakeup(&fs->lfs_seglock);
503ddfb1dbbSperseant 		}
504ddfb1dbbSperseant 		/*
505264b874cSmycroft 		 * We let checkpoints happen asynchronously.  That means
506264b874cSmycroft 		 * that during recovery, we have to roll forward between
507264b874cSmycroft 		 * the two segments described by the first and second
508264b874cSmycroft 		 * superblocks to make sure that the checkpoint described
509264b874cSmycroft 		 * by a superblock completed.
510264b874cSmycroft 		 */
5114a780c9aSad 		mutex_enter(&lfs_lock);
51279748725Smlelstv 		while (ckp && sync && fs->lfs_iocount) {
5134a780c9aSad 			(void)mtsleep(&fs->lfs_iocount, PRIBIO + 1,
5144a780c9aSad 				      "lfs_iocount", 0, &lfs_lock);
51579748725Smlelstv 			DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", fs, fs->lfs_iocount));
51679748725Smlelstv 		}
517ddfb1dbbSperseant 		while (sync && sp->seg_iocount) {
5184a780c9aSad 			(void)mtsleep(&sp->seg_iocount, PRIBIO + 1,
5194a780c9aSad 				     "seg_iocount", 0, &lfs_lock);
520eefd94b8Sperseant 			DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", sp, sp->seg_iocount));
521ddfb1dbbSperseant 		}
5224a780c9aSad 		mutex_exit(&lfs_lock);
523ddfb1dbbSperseant 		if (sync)
524b397c875Sperseant 			pool_put(&fs->lfs_segpool, sp);
5254b4f884bSperseant 
526264b874cSmycroft 		if (ckp) {
527264b874cSmycroft 			fs->lfs_nactive = 0;
5281b8f5ea3Sperseant 			/* If we *know* everything's on disk, write both sbs */
529b397c875Sperseant 			/* XXX should wait for this one	 */
5301b8f5ea3Sperseant 			if (sync)
531adca8af5Sdholland 				lfs_writesuper(fs, lfs_sb_getsboff(fs, fs->lfs_activesb));
532adca8af5Sdholland 			lfs_writesuper(fs, lfs_sb_getsboff(fs, 1 - fs->lfs_activesb));
53325f49c3cSperseant 			if (!(fs->lfs_ivnode->v_mount->mnt_iflag & IMNT_UNMOUNT)) {
534b397c875Sperseant 				lfs_auto_segclean(fs);
53525f49c3cSperseant 				/* If sync, we can clean the remainder too */
53625f49c3cSperseant 				if (sync)
53725f49c3cSperseant 					lfs_auto_segclean(fs);
53825f49c3cSperseant 			}
5391b8f5ea3Sperseant 			fs->lfs_activesb = 1 - fs->lfs_activesb;
5404408dea4Smaya 
5411ebfc508Sperseant 			LFS_ENTER_LOG("segunlock_ckp", __FILE__, __LINE__, 0, 0, curproc->p_pid);
5424408dea4Smaya 
5434a780c9aSad 			mutex_enter(&lfs_lock);
544264b874cSmycroft 			--fs->lfs_seglock;
545264b874cSmycroft 			fs->lfs_lockpid = 0;
546d28248e8Sperseant 			fs->lfs_locklwp = 0;
5474a780c9aSad 			mutex_exit(&lfs_lock);
548264b874cSmycroft 			wakeup(&fs->lfs_seglock);
549ddfb1dbbSperseant 		}
55032ae84b1Sperseant 		/* Reenable fragment size changes */
5519abeea58Sad 		rw_exit(&fs->lfs_fraglock);
552ef3c6076Sperseant 		if (do_unmark_dirop)
553ef3c6076Sperseant 			lfs_unmark_dirop(fs);
554264b874cSmycroft 	} else {
555264b874cSmycroft 		--fs->lfs_seglock;
5563c3d9e74Sriastradh 		KASSERT(fs->lfs_seglock != 0);
5574a780c9aSad 		mutex_exit(&lfs_lock);
558264b874cSmycroft 	}
559264b874cSmycroft }
560102c8a6aSyamt 
561102c8a6aSyamt /*
5620549fd61Sperseant  * Drain dirops and start writer.
5630549fd61Sperseant  *
5640549fd61Sperseant  * No simple_locks are held when we enter and none are held when we return.
565102c8a6aSyamt  */
5663c3d9e74Sriastradh void
lfs_writer_enter(struct lfs * fs,const char * wmesg)567102c8a6aSyamt lfs_writer_enter(struct lfs *fs, const char *wmesg)
568102c8a6aSyamt {
56982cfa759Sad 	int error __diagused;
570102c8a6aSyamt 
5715fc5b909Sriastradh 	ASSERT_NO_SEGLOCK(fs);
5724a780c9aSad 	mutex_enter(&lfs_lock);
573102c8a6aSyamt 
574102c8a6aSyamt 	/* disallow dirops during flush */
575102c8a6aSyamt 	fs->lfs_writer++;
576102c8a6aSyamt 
577102c8a6aSyamt 	while (fs->lfs_dirops > 0) {
578102c8a6aSyamt 		++fs->lfs_diropwait;
5794a780c9aSad 		error = mtsleep(&fs->lfs_writer, PRIBIO+1, wmesg, 0,
5804a780c9aSad 				&lfs_lock);
5813c3d9e74Sriastradh 		KASSERT(error == 0);
582102c8a6aSyamt 		--fs->lfs_diropwait;
583102c8a6aSyamt 	}
584102c8a6aSyamt 
5854a780c9aSad 	mutex_exit(&lfs_lock);
586102c8a6aSyamt }
587102c8a6aSyamt 
5885fc5b909Sriastradh int
lfs_writer_tryenter(struct lfs * fs)5895fc5b909Sriastradh lfs_writer_tryenter(struct lfs *fs)
5905fc5b909Sriastradh {
5915fc5b909Sriastradh 	int writer_set;
5925fc5b909Sriastradh 
5935fc5b909Sriastradh 	ASSERT_MAYBE_SEGLOCK(fs);
5945fc5b909Sriastradh 	mutex_enter(&lfs_lock);
5955fc5b909Sriastradh 	writer_set = (fs->lfs_dirops == 0);
5965fc5b909Sriastradh 	if (writer_set)
5975fc5b909Sriastradh 		fs->lfs_writer++;
5985fc5b909Sriastradh 	mutex_exit(&lfs_lock);
5995fc5b909Sriastradh 
6005fc5b909Sriastradh 	return writer_set;
6015fc5b909Sriastradh }
6025fc5b909Sriastradh 
603102c8a6aSyamt void
lfs_writer_leave(struct lfs * fs)604102c8a6aSyamt lfs_writer_leave(struct lfs *fs)
605102c8a6aSyamt {
606712239e3Sthorpej 	bool dowakeup;
607102c8a6aSyamt 
6081ebfc508Sperseant 	ASSERT_MAYBE_SEGLOCK(fs);
6094a780c9aSad 	mutex_enter(&lfs_lock);
610102c8a6aSyamt 	dowakeup = !(--fs->lfs_writer);
611102c8a6aSyamt 	if (dowakeup)
6129f6a52ecSmaya 		cv_broadcast(&fs->lfs_diropscv);
6137171c6cdSmaya 	mutex_exit(&lfs_lock);
614102c8a6aSyamt }
615dddf5c51Sperseant 
616dddf5c51Sperseant /*
617dddf5c51Sperseant  * Unlock, wait for the cleaner, then relock to where we were before.
618dddf5c51Sperseant  * To be used only at a fairly high level, to address a paucity of free
619dddf5c51Sperseant  * segments propagated back from lfs_gop_write().
620dddf5c51Sperseant  */
621dddf5c51Sperseant void
lfs_segunlock_relock(struct lfs * fs)622dddf5c51Sperseant lfs_segunlock_relock(struct lfs *fs)
623dddf5c51Sperseant {
624dddf5c51Sperseant 	int n = fs->lfs_seglock;
625dddf5c51Sperseant 	u_int16_t seg_flags;
626437e8552Sperseant 	CLEANERINFO *cip;
627437e8552Sperseant 	struct buf *bp;
628dddf5c51Sperseant 
629dddf5c51Sperseant 	if (n == 0)
630dddf5c51Sperseant 		return;
631dddf5c51Sperseant 
632dddf5c51Sperseant 	/* Write anything we've already gathered to disk */
633dddf5c51Sperseant 	lfs_writeseg(fs, fs->lfs_sp);
634dddf5c51Sperseant 
635437e8552Sperseant 	/* Tell cleaner */
636437e8552Sperseant 	LFS_CLEANERINFO(cip, fs, bp);
637f11e4edeSdholland 	lfs_ci_setflags(fs, cip,
638f11e4edeSdholland 			lfs_ci_getflags(fs, cip) | LFS_CLEANER_MUST_CLEAN);
639437e8552Sperseant 	LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
640437e8552Sperseant 
641dddf5c51Sperseant 	/* Save segment flags for later */
642dddf5c51Sperseant 	seg_flags = fs->lfs_sp->seg_flags;
643dddf5c51Sperseant 
644dddf5c51Sperseant 	fs->lfs_sp->seg_flags |= SEGM_PROT; /* Don't unmark dirop nodes */
645dddf5c51Sperseant 	while(fs->lfs_seglock)
646dddf5c51Sperseant 		lfs_segunlock(fs);
647dddf5c51Sperseant 
648dddf5c51Sperseant 	/* Wait for the cleaner */
649b99e4c82Sperseant 	lfs_wakeup_cleaner(fs);
6504a780c9aSad 	mutex_enter(&lfs_lock);
651dddf5c51Sperseant 	while (LFS_STARVED_FOR_SEGS(fs))
652f59b8f4bSdholland 		mtsleep(&fs->lfs_availsleep, PRIBIO, "relock", 0,
6534a780c9aSad 			&lfs_lock);
6544a780c9aSad 	mutex_exit(&lfs_lock);
655dddf5c51Sperseant 
656dddf5c51Sperseant 	/* Put the segment lock back the way it was. */
657dddf5c51Sperseant 	while(n--)
658dddf5c51Sperseant 		lfs_seglock(fs, seg_flags);
659dddf5c51Sperseant 
660437e8552Sperseant 	/* Cleaner can relax now */
661437e8552Sperseant 	LFS_CLEANERINFO(cip, fs, bp);
662f11e4edeSdholland 	lfs_ci_setflags(fs, cip,
663f11e4edeSdholland 			lfs_ci_getflags(fs, cip) & ~LFS_CLEANER_MUST_CLEAN);
664437e8552Sperseant 	LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
665437e8552Sperseant 
666dddf5c51Sperseant 	return;
667dddf5c51Sperseant }
668b99e4c82Sperseant 
669b99e4c82Sperseant /*
670b99e4c82Sperseant  * Wake up the cleaner, provided that nowrap is not set.
671b99e4c82Sperseant  */
672b99e4c82Sperseant void
lfs_wakeup_cleaner(struct lfs * fs)673b99e4c82Sperseant lfs_wakeup_cleaner(struct lfs *fs)
674b99e4c82Sperseant {
675b99e4c82Sperseant 	if (fs->lfs_nowrap > 0)
676b99e4c82Sperseant 		return;
677b99e4c82Sperseant 
6788f5758dbSmaya 	cv_broadcast(&fs->lfs_nextsegsleep);
6798f5758dbSmaya 	cv_broadcast(&lfs_allclean_wakeup);
680b99e4c82Sperseant }
681