1*9fc45356Sriastradh /* $NetBSD: lfs_subr.c,v 1.103 2020/09/05 16:30:13 riastradh Exp $ */
2fccfa11aScgd
31b8f5ea3Sperseant /*-
4b397c875Sperseant * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
51b8f5ea3Sperseant * All rights reserved.
61b8f5ea3Sperseant *
71b8f5ea3Sperseant * This code is derived from software contributed to The NetBSD Foundation
81b8f5ea3Sperseant * by Konrad E. Schroder <perseant@hhhh.org>.
91b8f5ea3Sperseant *
101b8f5ea3Sperseant * Redistribution and use in source and binary forms, with or without
111b8f5ea3Sperseant * modification, are permitted provided that the following conditions
121b8f5ea3Sperseant * are met:
131b8f5ea3Sperseant * 1. Redistributions of source code must retain the above copyright
141b8f5ea3Sperseant * notice, this list of conditions and the following disclaimer.
151b8f5ea3Sperseant * 2. Redistributions in binary form must reproduce the above copyright
161b8f5ea3Sperseant * notice, this list of conditions and the following disclaimer in the
171b8f5ea3Sperseant * documentation and/or other materials provided with the distribution.
181b8f5ea3Sperseant *
191b8f5ea3Sperseant * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
201b8f5ea3Sperseant * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
211b8f5ea3Sperseant * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
221b8f5ea3Sperseant * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
231b8f5ea3Sperseant * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
241b8f5ea3Sperseant * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
251b8f5ea3Sperseant * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
261b8f5ea3Sperseant * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
271b8f5ea3Sperseant * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
281b8f5ea3Sperseant * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
291b8f5ea3Sperseant * POSSIBILITY OF SUCH DAMAGE.
301b8f5ea3Sperseant */
31264b874cSmycroft /*
32264b874cSmycroft * Copyright (c) 1991, 1993
33264b874cSmycroft * The Regents of the University of California. All rights reserved.
34264b874cSmycroft *
35264b874cSmycroft * Redistribution and use in source and binary forms, with or without
36264b874cSmycroft * modification, are permitted provided that the following conditions
37264b874cSmycroft * are met:
38264b874cSmycroft * 1. Redistributions of source code must retain the above copyright
39264b874cSmycroft * notice, this list of conditions and the following disclaimer.
40264b874cSmycroft * 2. Redistributions in binary form must reproduce the above copyright
41264b874cSmycroft * notice, this list of conditions and the following disclaimer in the
42264b874cSmycroft * documentation and/or other materials provided with the distribution.
43aad01611Sagc * 3. Neither the name of the University nor the names of its contributors
44264b874cSmycroft * may be used to endorse or promote products derived from this software
45264b874cSmycroft * without specific prior written permission.
46264b874cSmycroft *
47264b874cSmycroft * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48264b874cSmycroft * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49264b874cSmycroft * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50264b874cSmycroft * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51264b874cSmycroft * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52264b874cSmycroft * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53264b874cSmycroft * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54264b874cSmycroft * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55264b874cSmycroft * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56264b874cSmycroft * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57264b874cSmycroft * SUCH DAMAGE.
58264b874cSmycroft *
59e5bc90f4Sfvdl * @(#)lfs_subr.c 8.4 (Berkeley) 5/8/95
60264b874cSmycroft */
61264b874cSmycroft
62ec624546Slukem #include <sys/cdefs.h>
63*9fc45356Sriastradh __KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.103 2020/09/05 16:30:13 riastradh Exp $");
64ec624546Slukem
65264b874cSmycroft #include <sys/param.h>
667bd9e243Schristos #include <sys/systm.h>
67264b874cSmycroft #include <sys/namei.h>
68264b874cSmycroft #include <sys/vnode.h>
69264b874cSmycroft #include <sys/buf.h>
70264b874cSmycroft #include <sys/mount.h>
71264b874cSmycroft #include <sys/malloc.h>
72264b874cSmycroft #include <sys/proc.h>
73437e8552Sperseant #include <sys/kauth.h>
74264b874cSmycroft
7515158895Sdholland #include <ufs/lfs/ulfs_inode.h>
76264b874cSmycroft #include <ufs/lfs/lfs.h>
7734f0d74cSdholland #include <ufs/lfs/lfs_accessors.h>
785bc8cc2bSdholland #include <ufs/lfs/lfs_kernel.h>
79264b874cSmycroft #include <ufs/lfs/lfs_extern.h>
80264b874cSmycroft
81eefd94b8Sperseant #ifdef DEBUG
82273df636Schristos const char *lfs_res_names[LFS_NB_COUNT] = {
83b397c875Sperseant "summary",
84b397c875Sperseant "superblock",
8594decdd2Sperseant "file block",
86b397c875Sperseant "cluster",
87b397c875Sperseant "clean",
8894decdd2Sperseant "blkiov",
89b397c875Sperseant };
90b397c875Sperseant #endif
91b397c875Sperseant
92b397c875Sperseant int lfs_res_qty[LFS_NB_COUNT] = {
93b397c875Sperseant LFS_N_SUMMARIES,
94b397c875Sperseant LFS_N_SBLOCKS,
95b397c875Sperseant LFS_N_IBLOCKS,
96b397c875Sperseant LFS_N_CLUSTERS,
97b397c875Sperseant LFS_N_CLEAN,
9894decdd2Sperseant LFS_N_BLKIOV,
99b397c875Sperseant };
100b397c875Sperseant
101b397c875Sperseant void
lfs_setup_resblks(struct lfs * fs)102b397c875Sperseant lfs_setup_resblks(struct lfs *fs)
103b397c875Sperseant {
104b397c875Sperseant int i, j;
105b397c875Sperseant int maxbpp;
106b397c875Sperseant
1071ebfc508Sperseant ASSERT_NO_SEGLOCK(fs);
10861051b20Sdholland fs->lfs_resblk = malloc(LFS_N_TOTAL * sizeof(res_t), M_SEGMENT,
109b397c875Sperseant M_WAITOK);
110b397c875Sperseant for (i = 0; i < LFS_N_TOTAL; i++) {
111b397c875Sperseant fs->lfs_resblk[i].inuse = 0;
112b397c875Sperseant fs->lfs_resblk[i].p = NULL;
113b397c875Sperseant }
114b397c875Sperseant for (i = 0; i < LFS_RESHASH_WIDTH; i++)
115b397c875Sperseant LIST_INIT(fs->lfs_reshash + i);
116b397c875Sperseant
117b397c875Sperseant /*
118b397c875Sperseant * These types of allocations can be larger than a page,
119b397c875Sperseant * so we can't use the pool subsystem for them.
120b397c875Sperseant */
121b397c875Sperseant for (i = 0, j = 0; j < LFS_N_SUMMARIES; j++, i++)
122adca8af5Sdholland fs->lfs_resblk[i].size = lfs_sb_getsumsize(fs);
123b397c875Sperseant for (j = 0; j < LFS_N_SBLOCKS; j++, i++)
1243ab94fedSperseant fs->lfs_resblk[i].size = LFS_SBPAD;
125b397c875Sperseant for (j = 0; j < LFS_N_IBLOCKS; j++, i++)
126f59b8f4bSdholland fs->lfs_resblk[i].size = lfs_sb_getbsize(fs);
127b397c875Sperseant for (j = 0; j < LFS_N_CLUSTERS; j++, i++)
1283ab94fedSperseant fs->lfs_resblk[i].size = MAXPHYS;
129b397c875Sperseant for (j = 0; j < LFS_N_CLEAN; j++, i++)
1303ab94fedSperseant fs->lfs_resblk[i].size = MAXPHYS;
13194decdd2Sperseant for (j = 0; j < LFS_N_BLKIOV; j++, i++)
13294decdd2Sperseant fs->lfs_resblk[i].size = LFS_MARKV_MAXBLKCNT * sizeof(BLOCK_INFO);
1333ab94fedSperseant
1343ab94fedSperseant for (i = 0; i < LFS_N_TOTAL; i++) {
1353ab94fedSperseant fs->lfs_resblk[i].p = malloc(fs->lfs_resblk[i].size,
1363ab94fedSperseant M_SEGMENT, M_WAITOK);
1373ab94fedSperseant }
138b397c875Sperseant
139b397c875Sperseant /*
140b397c875Sperseant * Initialize pools for small types (XXX is BPP small?)
141b397c875Sperseant */
142740725d7Ssimonb pool_init(&fs->lfs_clpool, sizeof(struct lfs_cluster), 0, 0, 0,
14359d979c5Sad "lfsclpl", &pool_allocator_nointr, IPL_NONE);
144740725d7Ssimonb pool_init(&fs->lfs_segpool, sizeof(struct segment), 0, 0, 0,
14559d979c5Sad "lfssegpool", &pool_allocator_nointr, IPL_NONE);
146a407c8d2Sdholland /* XXX: should this int32 be 32/64? */
147adca8af5Sdholland maxbpp = ((lfs_sb_getsumsize(fs) - SEGSUM_SIZE(fs)) / sizeof(int32_t) + 2);
148f59b8f4bSdholland maxbpp = MIN(maxbpp, lfs_segsize(fs) / lfs_sb_getfsize(fs) + 2);
149740725d7Ssimonb pool_init(&fs->lfs_bpppool, maxbpp * sizeof(struct buf *), 0, 0, 0,
15059d979c5Sad "lfsbpppl", &pool_allocator_nointr, IPL_NONE);
151b397c875Sperseant }
152b397c875Sperseant
153b397c875Sperseant void
lfs_free_resblks(struct lfs * fs)154b397c875Sperseant lfs_free_resblks(struct lfs *fs)
155b397c875Sperseant {
156b397c875Sperseant int i;
157b397c875Sperseant
158b397c875Sperseant pool_destroy(&fs->lfs_bpppool);
159b397c875Sperseant pool_destroy(&fs->lfs_segpool);
160b397c875Sperseant pool_destroy(&fs->lfs_clpool);
161b397c875Sperseant
1624a780c9aSad mutex_enter(&lfs_lock);
163b397c875Sperseant for (i = 0; i < LFS_N_TOTAL; i++) {
164b397c875Sperseant while (fs->lfs_resblk[i].inuse)
1654a780c9aSad mtsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0,
1664a780c9aSad &lfs_lock);
167b397c875Sperseant if (fs->lfs_resblk[i].p != NULL)
168b397c875Sperseant free(fs->lfs_resblk[i].p, M_SEGMENT);
169b397c875Sperseant }
170b397c875Sperseant free(fs->lfs_resblk, M_SEGMENT);
1714a780c9aSad mutex_exit(&lfs_lock);
172b397c875Sperseant }
173b397c875Sperseant
174b397c875Sperseant static unsigned int
lfs_mhash(void * vp)175b397c875Sperseant lfs_mhash(void *vp)
176b397c875Sperseant {
177b397c875Sperseant return (unsigned int)(((unsigned long)vp) >> 2) % LFS_RESHASH_WIDTH;
178b397c875Sperseant }
179b397c875Sperseant
180b397c875Sperseant /*
181b397c875Sperseant * Return memory of the given size for the given purpose, or use one of a
182b397c875Sperseant * number of spare last-resort buffers, if malloc returns NULL.
183b397c875Sperseant */
184b397c875Sperseant void *
lfs_malloc(struct lfs * fs,size_t size,int type)185b397c875Sperseant lfs_malloc(struct lfs *fs, size_t size, int type)
186b397c875Sperseant {
187b397c875Sperseant struct lfs_res_blk *re;
188b397c875Sperseant void *r;
189b9333d23Smaya int i, start;
190b397c875Sperseant unsigned int h;
191b397c875Sperseant
1921ebfc508Sperseant ASSERT_MAYBE_SEGLOCK(fs);
1933ab94fedSperseant r = NULL;
1943ab94fedSperseant
195b397c875Sperseant /* If no mem allocated for this type, it just waits */
1963ab94fedSperseant if (lfs_res_qty[type] == 0) {
1973ab94fedSperseant r = malloc(size, M_SEGMENT, M_WAITOK);
1983ab94fedSperseant return r;
1993ab94fedSperseant }
200b397c875Sperseant
201b397c875Sperseant /* Otherwise try a quick malloc, and if it works, great */
2023ab94fedSperseant if ((r = malloc(size, M_SEGMENT, M_NOWAIT)) != NULL) {
203b397c875Sperseant return r;
2043ab94fedSperseant }
205b397c875Sperseant
206b397c875Sperseant /*
207b397c875Sperseant * If malloc returned NULL, we are forced to use one of our
208b397c875Sperseant * reserve blocks. We have on hand at least one summary block,
209b397c875Sperseant * at least one cluster block, at least one superblock,
210b397c875Sperseant * and several indirect blocks.
211b397c875Sperseant */
2121ebfc508Sperseant
2134a780c9aSad mutex_enter(&lfs_lock);
214b397c875Sperseant /* skip over blocks of other types */
215b397c875Sperseant for (i = 0, start = 0; i < type; i++)
216b397c875Sperseant start += lfs_res_qty[i];
217b397c875Sperseant while (r == NULL) {
218b397c875Sperseant for (i = 0; i < lfs_res_qty[type]; i++) {
219b397c875Sperseant if (fs->lfs_resblk[start + i].inuse == 0) {
220b397c875Sperseant re = fs->lfs_resblk + start + i;
221b397c875Sperseant re->inuse = 1;
222b397c875Sperseant r = re->p;
2233ab94fedSperseant KASSERT(re->size >= size);
224b397c875Sperseant h = lfs_mhash(r);
225b397c875Sperseant LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res);
2264a780c9aSad mutex_exit(&lfs_lock);
227b397c875Sperseant return r;
228b397c875Sperseant }
229b397c875Sperseant }
2301ebfc508Sperseant DLOG((DLOG_MALLOC, "sleeping on %s (%d)\n",
2311ebfc508Sperseant lfs_res_names[type], lfs_res_qty[type]));
2324a780c9aSad mtsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0,
2334a780c9aSad &lfs_lock);
2341ebfc508Sperseant DLOG((DLOG_MALLOC, "done sleeping on %s\n",
2351ebfc508Sperseant lfs_res_names[type]));
236b397c875Sperseant }
237b397c875Sperseant /* NOTREACHED */
2384a780c9aSad mutex_exit(&lfs_lock);
239b397c875Sperseant return r;
240b397c875Sperseant }
241b397c875Sperseant
242b397c875Sperseant void
lfs_free(struct lfs * fs,void * p,int type)243168cd830Schristos lfs_free(struct lfs *fs, void *p, int type)
244b397c875Sperseant {
245b397c875Sperseant unsigned int h;
246b397c875Sperseant res_t *re;
247b397c875Sperseant
2481ebfc508Sperseant ASSERT_MAYBE_SEGLOCK(fs);
249b397c875Sperseant h = lfs_mhash(p);
2504a780c9aSad mutex_enter(&lfs_lock);
251b397c875Sperseant LIST_FOREACH(re, &fs->lfs_reshash[h], res) {
252b397c875Sperseant if (re->p == p) {
2535f444770Syamt KASSERT(re->inuse == 1);
254b397c875Sperseant LIST_REMOVE(re, res);
255b397c875Sperseant re->inuse = 0;
256b397c875Sperseant wakeup(&fs->lfs_resblk);
2574a780c9aSad mutex_exit(&lfs_lock);
258b397c875Sperseant return;
259b397c875Sperseant }
260b397c875Sperseant }
261b030a154Smaya
2620bf92910Smaya #ifdef notyet /* XXX this assert fires */
263b030a154Smaya for (int i = 0; i < LFS_N_TOTAL; i++) {
264b9333d23Smaya KDASSERTMSG(fs->lfs_resblk[i].p == p,
265b030a154Smaya "lfs_free: inconsistent reserved block");
2665f444770Syamt }
2670bf92910Smaya #endif
268b030a154Smaya
2694a780c9aSad mutex_exit(&lfs_lock);
270b397c875Sperseant
271b397c875Sperseant /*
272b397c875Sperseant * If we didn't find it, free it.
273b397c875Sperseant */
274b397c875Sperseant free(p, M_SEGMENT);
275b397c875Sperseant }
276264b874cSmycroft
277264b874cSmycroft /*
278264b874cSmycroft * lfs_seglock --
279264b874cSmycroft * Single thread the segment writer.
280264b874cSmycroft */
281b397c875Sperseant int
lfs_seglock(struct lfs * fs,unsigned long flags)2824e3fced9Sperseant lfs_seglock(struct lfs *fs, unsigned long flags)
283264b874cSmycroft {
284264b874cSmycroft struct segment *sp;
285264b874cSmycroft
2864a780c9aSad mutex_enter(&lfs_lock);
2879f427887Sthorpej if (fs->lfs_seglock) {
288d28248e8Sperseant if (fs->lfs_lockpid == curproc->p_pid &&
289d28248e8Sperseant fs->lfs_locklwp == curlwp->l_lid) {
290264b874cSmycroft ++fs->lfs_seglock;
291264b874cSmycroft fs->lfs_sp->seg_flags |= flags;
29279748725Smlelstv mutex_exit(&lfs_lock);
293b397c875Sperseant return 0;
294ea03a1acSperseant } else if (flags & SEGM_PAGEDAEMON) {
2954a780c9aSad mutex_exit(&lfs_lock);
296b397c875Sperseant return EWOULDBLOCK;
2971ebfc508Sperseant } else {
2981ebfc508Sperseant while (fs->lfs_seglock) {
2994a780c9aSad (void)mtsleep(&fs->lfs_seglock, PRIBIO + 1,
3004a780c9aSad "lfs_seglock", 0, &lfs_lock);
3019f427887Sthorpej }
3021ebfc508Sperseant }
3031ebfc508Sperseant }
304264b874cSmycroft
305264b874cSmycroft fs->lfs_seglock = 1;
306264b874cSmycroft fs->lfs_lockpid = curproc->p_pid;
307d28248e8Sperseant fs->lfs_locklwp = curlwp->l_lid;
3084a780c9aSad mutex_exit(&lfs_lock);
3094b4f884bSperseant fs->lfs_cleanind = 0;
310264b874cSmycroft
3111ebfc508Sperseant LFS_ENTER_LOG("seglock", __FILE__, __LINE__, 0, flags, curproc->p_pid);
3124408dea4Smaya
31332ae84b1Sperseant /* Drain fragment size changes out */
3149abeea58Sad rw_enter(&fs->lfs_fraglock, RW_WRITER);
31532ae84b1Sperseant
316b397c875Sperseant sp = fs->lfs_sp = pool_get(&fs->lfs_segpool, PR_WAITOK);
317b397c875Sperseant sp->bpp = pool_get(&fs->lfs_bpppool, PR_WAITOK);
318264b874cSmycroft sp->seg_flags = flags;
319264b874cSmycroft sp->vp = NULL;
320ddfb1dbbSperseant sp->seg_iocount = 0;
321264b874cSmycroft (void) lfs_initseg(fs);
322264b874cSmycroft
323264b874cSmycroft /*
324264b874cSmycroft * Keep a cumulative count of the outstanding I/O operations. If the
325264b874cSmycroft * disk drive catches up with us it could go to zero before we finish,
326264b874cSmycroft * so we artificially increment it by one until we've scheduled all of
327264b874cSmycroft * the writes we intend to do.
328264b874cSmycroft */
3294a780c9aSad mutex_enter(&lfs_lock);
330264b874cSmycroft ++fs->lfs_iocount;
331f59b8f4bSdholland fs->lfs_startseg = lfs_sb_getcurseg(fs);
3324a780c9aSad mutex_exit(&lfs_lock);
333b397c875Sperseant return 0;
334264b874cSmycroft }
3351b8f5ea3Sperseant
336b397c875Sperseant static void lfs_unmark_dirop(struct lfs *);
337b397c875Sperseant
338b397c875Sperseant static void
lfs_unmark_dirop(struct lfs * fs)339b397c875Sperseant lfs_unmark_dirop(struct lfs *fs)
340b397c875Sperseant {
3410d249757Sriastradh struct inode *ip, *marker;
342b397c875Sperseant struct vnode *vp;
343ef3c6076Sperseant int doit;
344b397c875Sperseant
3451ebfc508Sperseant ASSERT_NO_SEGLOCK(fs);
3464a780c9aSad mutex_enter(&lfs_lock);
347ef3c6076Sperseant doit = !(fs->lfs_flags & LFS_UNDIROP);
348ef3c6076Sperseant if (doit)
349ef3c6076Sperseant fs->lfs_flags |= LFS_UNDIROP;
3504a780c9aSad mutex_exit(&lfs_lock);
351ef3c6076Sperseant
3520d249757Sriastradh if (!doit)
3530d249757Sriastradh return;
3540d249757Sriastradh
3550d249757Sriastradh marker = pool_get(&lfs_inode_pool, PR_WAITOK);
3560d249757Sriastradh KASSERT(fs != NULL);
3570d249757Sriastradh memset(marker, 0, sizeof(*marker));
3580d249757Sriastradh marker->inode_ext.lfs = pool_get(&lfs_inoext_pool, PR_WAITOK);
3590d249757Sriastradh memset(marker->inode_ext.lfs, 0, sizeof(*marker->inode_ext.lfs));
3600d249757Sriastradh marker->i_state |= IN_MARKER;
3610d249757Sriastradh
3620d249757Sriastradh mutex_enter(&lfs_lock);
3630d249757Sriastradh TAILQ_INSERT_HEAD(&fs->lfs_dchainhd, marker, i_lfs_dchain);
3640d249757Sriastradh while ((ip = TAILQ_NEXT(marker, i_lfs_dchain)) != NULL) {
3650d249757Sriastradh TAILQ_REMOVE(&fs->lfs_dchainhd, marker, i_lfs_dchain);
3660d249757Sriastradh TAILQ_INSERT_AFTER(&fs->lfs_dchainhd, ip, marker,
3670d249757Sriastradh i_lfs_dchain);
368dae5db40Sriastradh if (ip->i_state & IN_MARKER)
3690d249757Sriastradh continue;
370b397c875Sperseant vp = ITOV(ip);
3718f063ba0Smaya if ((ip->i_state & (IN_ADIROP | IN_CDIROP)) == IN_CDIROP) {
372b397c875Sperseant --lfs_dirvcount;
373ce053245Sperseant --fs->lfs_dirvcount;
3747dad9f73Sad vp->v_uflag &= ~VU_DIROP;
375b397c875Sperseant TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
376b397c875Sperseant wakeup(&lfs_dirvcount);
377b397c875Sperseant fs->lfs_unlockvp = vp;
3784a780c9aSad mutex_exit(&lfs_lock);
37907c29bfbShannken vrele(vp);
3804a780c9aSad mutex_enter(&lfs_lock);
381b397c875Sperseant fs->lfs_unlockvp = NULL;
3828f063ba0Smaya ip->i_state &= ~IN_CDIROP;
3834a780c9aSad }
384b397c875Sperseant }
3850d249757Sriastradh TAILQ_REMOVE(&fs->lfs_dchainhd, marker, i_lfs_dchain);
386ef3c6076Sperseant fs->lfs_flags &= ~LFS_UNDIROP;
3871ebfc508Sperseant wakeup(&fs->lfs_flags);
3884a780c9aSad mutex_exit(&lfs_lock);
3890d249757Sriastradh
3900d249757Sriastradh pool_put(&lfs_inoext_pool, marker->inode_ext.lfs);
3910d249757Sriastradh pool_put(&lfs_inode_pool, marker);
392b397c875Sperseant }
393b397c875Sperseant
394b397c875Sperseant static void
lfs_auto_segclean(struct lfs * fs)395b397c875Sperseant lfs_auto_segclean(struct lfs *fs)
396b397c875Sperseant {
39735cd97eaSmaya int i, error, waited;
398b397c875Sperseant
3991ebfc508Sperseant ASSERT_SEGLOCK(fs);
400b397c875Sperseant /*
401b397c875Sperseant * Now that we've swapped lfs_activesb, but while we still
402b397c875Sperseant * hold the segment lock, run through the segment list marking
403b397c875Sperseant * the empty ones clean.
404b397c875Sperseant * XXX - do we really need to do them all at once?
405b397c875Sperseant */
40625f49c3cSperseant waited = 0;
407adca8af5Sdholland for (i = 0; i < lfs_sb_getnseg(fs); i++) {
408b397c875Sperseant if ((fs->lfs_suflags[0][i] &
409b397c875Sperseant (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
410b397c875Sperseant (SEGUSE_DIRTY | SEGUSE_EMPTY) &&
411b397c875Sperseant (fs->lfs_suflags[1][i] &
412b397c875Sperseant (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
413b397c875Sperseant (SEGUSE_DIRTY | SEGUSE_EMPTY)) {
414b397c875Sperseant
41525f49c3cSperseant /* Make sure the sb is written before we clean */
4164a780c9aSad mutex_enter(&lfs_lock);
41725f49c3cSperseant while (waited == 0 && fs->lfs_sbactive)
4184a780c9aSad mtsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs asb",
4194a780c9aSad 0, &lfs_lock);
4204a780c9aSad mutex_exit(&lfs_lock);
42125f49c3cSperseant waited = 1;
42225f49c3cSperseant
423b397c875Sperseant if ((error = lfs_do_segclean(fs, i)) != 0) {
424eefd94b8Sperseant DLOG((DLOG_CLEAN, "lfs_auto_segclean: lfs_do_segclean returned %d for seg %d\n", error, i));
425b397c875Sperseant }
426b397c875Sperseant }
427b397c875Sperseant fs->lfs_suflags[1 - fs->lfs_activesb][i] =
428b397c875Sperseant fs->lfs_suflags[fs->lfs_activesb][i];
429b397c875Sperseant }
430b397c875Sperseant }
431b397c875Sperseant
432264b874cSmycroft /*
433264b874cSmycroft * lfs_segunlock --
434264b874cSmycroft * Single thread the segment writer.
435264b874cSmycroft */
436264b874cSmycroft void
lfs_segunlock(struct lfs * fs)4374e3fced9Sperseant lfs_segunlock(struct lfs *fs)
438264b874cSmycroft {
439264b874cSmycroft struct segment *sp;
440264b874cSmycroft unsigned long sync, ckp;
4418886b0f4Sperseant struct buf *bp;
442ef3c6076Sperseant int do_unmark_dirop = 0;
443264b874cSmycroft
4444be4b8adSperseant sp = fs->lfs_sp;
4454be4b8adSperseant
4464a780c9aSad mutex_enter(&lfs_lock);
447d1a0c6fbSmaya
448d1a0c6fbSmaya if (!LFS_SEGLOCK_HELD(fs))
449d1a0c6fbSmaya panic("lfs seglock not held");
450d1a0c6fbSmaya
4514be4b8adSperseant if (fs->lfs_seglock == 1) {
452f9b3466dSperseant if ((sp->seg_flags & (SEGM_PROT | SEGM_CLEAN)) == 0)
453ef3c6076Sperseant do_unmark_dirop = 1;
4544a780c9aSad mutex_exit(&lfs_lock);
455264b874cSmycroft sync = sp->seg_flags & SEGM_SYNC;
456264b874cSmycroft ckp = sp->seg_flags & SEGM_CKP;
457dddf5c51Sperseant
458dddf5c51Sperseant /* We should have a segment summary, and nothing else */
459dddf5c51Sperseant KASSERT(sp->cbpp == sp->bpp + 1);
460dddf5c51Sperseant
461264b874cSmycroft /* Free allocated segment summary */
462f59b8f4bSdholland lfs_sb_suboffset(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs)));
4638886b0f4Sperseant bp = *sp->bpp;
464b397c875Sperseant lfs_freebuf(fs, bp);
4651b8f5ea3Sperseant
466b397c875Sperseant pool_put(&fs->lfs_bpppool, sp->bpp);
4674e3fced9Sperseant sp->bpp = NULL;
4684b4f884bSperseant
4694b4f884bSperseant /*
4704b4f884bSperseant * If we're not sync, we're done with sp, get rid of it.
4714b4f884bSperseant * Otherwise, we keep a local copy around but free
4724b4f884bSperseant * fs->lfs_sp so another process can use it (we have to
4734b4f884bSperseant * wait but they don't have to wait for us).
4744b4f884bSperseant */
475ddfb1dbbSperseant if (!sync)
476b397c875Sperseant pool_put(&fs->lfs_segpool, sp);
4774e3fced9Sperseant fs->lfs_sp = NULL;
478264b874cSmycroft
479264b874cSmycroft /*
480264b874cSmycroft * If the I/O count is non-zero, sleep until it reaches zero.
481264b874cSmycroft * At the moment, the user's process hangs around so we can
482264b874cSmycroft * sleep.
483264b874cSmycroft */
4844a780c9aSad mutex_enter(&lfs_lock);
485ec5ea71aSchs if (--fs->lfs_iocount <= 1)
4864b4f884bSperseant wakeup(&fs->lfs_iocount);
4874a780c9aSad mutex_exit(&lfs_lock);
488ec5ea71aSchs
489264b874cSmycroft /*
490ddfb1dbbSperseant * If we're not checkpointing, we don't have to block
491ddfb1dbbSperseant * other processes to wait for a synchronous write
492ddfb1dbbSperseant * to complete.
493ddfb1dbbSperseant */
494ddfb1dbbSperseant if (!ckp) {
4951ebfc508Sperseant LFS_ENTER_LOG("segunlock_std", __FILE__, __LINE__, 0, 0, curproc->p_pid);
4964408dea4Smaya
4974a780c9aSad mutex_enter(&lfs_lock);
498ddfb1dbbSperseant --fs->lfs_seglock;
499ddfb1dbbSperseant fs->lfs_lockpid = 0;
500d28248e8Sperseant fs->lfs_locklwp = 0;
5014a780c9aSad mutex_exit(&lfs_lock);
502ddfb1dbbSperseant wakeup(&fs->lfs_seglock);
503ddfb1dbbSperseant }
504ddfb1dbbSperseant /*
505264b874cSmycroft * We let checkpoints happen asynchronously. That means
506264b874cSmycroft * that during recovery, we have to roll forward between
507264b874cSmycroft * the two segments described by the first and second
508264b874cSmycroft * superblocks to make sure that the checkpoint described
509264b874cSmycroft * by a superblock completed.
510264b874cSmycroft */
5114a780c9aSad mutex_enter(&lfs_lock);
51279748725Smlelstv while (ckp && sync && fs->lfs_iocount) {
5134a780c9aSad (void)mtsleep(&fs->lfs_iocount, PRIBIO + 1,
5144a780c9aSad "lfs_iocount", 0, &lfs_lock);
51579748725Smlelstv DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", fs, fs->lfs_iocount));
51679748725Smlelstv }
517ddfb1dbbSperseant while (sync && sp->seg_iocount) {
5184a780c9aSad (void)mtsleep(&sp->seg_iocount, PRIBIO + 1,
5194a780c9aSad "seg_iocount", 0, &lfs_lock);
520eefd94b8Sperseant DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", sp, sp->seg_iocount));
521ddfb1dbbSperseant }
5224a780c9aSad mutex_exit(&lfs_lock);
523ddfb1dbbSperseant if (sync)
524b397c875Sperseant pool_put(&fs->lfs_segpool, sp);
5254b4f884bSperseant
526264b874cSmycroft if (ckp) {
527264b874cSmycroft fs->lfs_nactive = 0;
5281b8f5ea3Sperseant /* If we *know* everything's on disk, write both sbs */
529b397c875Sperseant /* XXX should wait for this one */
5301b8f5ea3Sperseant if (sync)
531adca8af5Sdholland lfs_writesuper(fs, lfs_sb_getsboff(fs, fs->lfs_activesb));
532adca8af5Sdholland lfs_writesuper(fs, lfs_sb_getsboff(fs, 1 - fs->lfs_activesb));
53325f49c3cSperseant if (!(fs->lfs_ivnode->v_mount->mnt_iflag & IMNT_UNMOUNT)) {
534b397c875Sperseant lfs_auto_segclean(fs);
53525f49c3cSperseant /* If sync, we can clean the remainder too */
53625f49c3cSperseant if (sync)
53725f49c3cSperseant lfs_auto_segclean(fs);
53825f49c3cSperseant }
5391b8f5ea3Sperseant fs->lfs_activesb = 1 - fs->lfs_activesb;
5404408dea4Smaya
5411ebfc508Sperseant LFS_ENTER_LOG("segunlock_ckp", __FILE__, __LINE__, 0, 0, curproc->p_pid);
5424408dea4Smaya
5434a780c9aSad mutex_enter(&lfs_lock);
544264b874cSmycroft --fs->lfs_seglock;
545264b874cSmycroft fs->lfs_lockpid = 0;
546d28248e8Sperseant fs->lfs_locklwp = 0;
5474a780c9aSad mutex_exit(&lfs_lock);
548264b874cSmycroft wakeup(&fs->lfs_seglock);
549ddfb1dbbSperseant }
55032ae84b1Sperseant /* Reenable fragment size changes */
5519abeea58Sad rw_exit(&fs->lfs_fraglock);
552ef3c6076Sperseant if (do_unmark_dirop)
553ef3c6076Sperseant lfs_unmark_dirop(fs);
554264b874cSmycroft } else {
555264b874cSmycroft --fs->lfs_seglock;
5563c3d9e74Sriastradh KASSERT(fs->lfs_seglock != 0);
5574a780c9aSad mutex_exit(&lfs_lock);
558264b874cSmycroft }
559264b874cSmycroft }
560102c8a6aSyamt
561102c8a6aSyamt /*
5620549fd61Sperseant * Drain dirops and start writer.
5630549fd61Sperseant *
5640549fd61Sperseant * No simple_locks are held when we enter and none are held when we return.
565102c8a6aSyamt */
5663c3d9e74Sriastradh void
lfs_writer_enter(struct lfs * fs,const char * wmesg)567102c8a6aSyamt lfs_writer_enter(struct lfs *fs, const char *wmesg)
568102c8a6aSyamt {
56982cfa759Sad int error __diagused;
570102c8a6aSyamt
5715fc5b909Sriastradh ASSERT_NO_SEGLOCK(fs);
5724a780c9aSad mutex_enter(&lfs_lock);
573102c8a6aSyamt
574102c8a6aSyamt /* disallow dirops during flush */
575102c8a6aSyamt fs->lfs_writer++;
576102c8a6aSyamt
577102c8a6aSyamt while (fs->lfs_dirops > 0) {
578102c8a6aSyamt ++fs->lfs_diropwait;
5794a780c9aSad error = mtsleep(&fs->lfs_writer, PRIBIO+1, wmesg, 0,
5804a780c9aSad &lfs_lock);
5813c3d9e74Sriastradh KASSERT(error == 0);
582102c8a6aSyamt --fs->lfs_diropwait;
583102c8a6aSyamt }
584102c8a6aSyamt
5854a780c9aSad mutex_exit(&lfs_lock);
586102c8a6aSyamt }
587102c8a6aSyamt
5885fc5b909Sriastradh int
lfs_writer_tryenter(struct lfs * fs)5895fc5b909Sriastradh lfs_writer_tryenter(struct lfs *fs)
5905fc5b909Sriastradh {
5915fc5b909Sriastradh int writer_set;
5925fc5b909Sriastradh
5935fc5b909Sriastradh ASSERT_MAYBE_SEGLOCK(fs);
5945fc5b909Sriastradh mutex_enter(&lfs_lock);
5955fc5b909Sriastradh writer_set = (fs->lfs_dirops == 0);
5965fc5b909Sriastradh if (writer_set)
5975fc5b909Sriastradh fs->lfs_writer++;
5985fc5b909Sriastradh mutex_exit(&lfs_lock);
5995fc5b909Sriastradh
6005fc5b909Sriastradh return writer_set;
6015fc5b909Sriastradh }
6025fc5b909Sriastradh
603102c8a6aSyamt void
lfs_writer_leave(struct lfs * fs)604102c8a6aSyamt lfs_writer_leave(struct lfs *fs)
605102c8a6aSyamt {
606712239e3Sthorpej bool dowakeup;
607102c8a6aSyamt
6081ebfc508Sperseant ASSERT_MAYBE_SEGLOCK(fs);
6094a780c9aSad mutex_enter(&lfs_lock);
610102c8a6aSyamt dowakeup = !(--fs->lfs_writer);
611102c8a6aSyamt if (dowakeup)
6129f6a52ecSmaya cv_broadcast(&fs->lfs_diropscv);
6137171c6cdSmaya mutex_exit(&lfs_lock);
614102c8a6aSyamt }
615dddf5c51Sperseant
616dddf5c51Sperseant /*
617dddf5c51Sperseant * Unlock, wait for the cleaner, then relock to where we were before.
618dddf5c51Sperseant * To be used only at a fairly high level, to address a paucity of free
619dddf5c51Sperseant * segments propagated back from lfs_gop_write().
620dddf5c51Sperseant */
621dddf5c51Sperseant void
lfs_segunlock_relock(struct lfs * fs)622dddf5c51Sperseant lfs_segunlock_relock(struct lfs *fs)
623dddf5c51Sperseant {
624dddf5c51Sperseant int n = fs->lfs_seglock;
625dddf5c51Sperseant u_int16_t seg_flags;
626437e8552Sperseant CLEANERINFO *cip;
627437e8552Sperseant struct buf *bp;
628dddf5c51Sperseant
629dddf5c51Sperseant if (n == 0)
630dddf5c51Sperseant return;
631dddf5c51Sperseant
632dddf5c51Sperseant /* Write anything we've already gathered to disk */
633dddf5c51Sperseant lfs_writeseg(fs, fs->lfs_sp);
634dddf5c51Sperseant
635437e8552Sperseant /* Tell cleaner */
636437e8552Sperseant LFS_CLEANERINFO(cip, fs, bp);
637f11e4edeSdholland lfs_ci_setflags(fs, cip,
638f11e4edeSdholland lfs_ci_getflags(fs, cip) | LFS_CLEANER_MUST_CLEAN);
639437e8552Sperseant LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
640437e8552Sperseant
641dddf5c51Sperseant /* Save segment flags for later */
642dddf5c51Sperseant seg_flags = fs->lfs_sp->seg_flags;
643dddf5c51Sperseant
644dddf5c51Sperseant fs->lfs_sp->seg_flags |= SEGM_PROT; /* Don't unmark dirop nodes */
645dddf5c51Sperseant while(fs->lfs_seglock)
646dddf5c51Sperseant lfs_segunlock(fs);
647dddf5c51Sperseant
648dddf5c51Sperseant /* Wait for the cleaner */
649b99e4c82Sperseant lfs_wakeup_cleaner(fs);
6504a780c9aSad mutex_enter(&lfs_lock);
651dddf5c51Sperseant while (LFS_STARVED_FOR_SEGS(fs))
652f59b8f4bSdholland mtsleep(&fs->lfs_availsleep, PRIBIO, "relock", 0,
6534a780c9aSad &lfs_lock);
6544a780c9aSad mutex_exit(&lfs_lock);
655dddf5c51Sperseant
656dddf5c51Sperseant /* Put the segment lock back the way it was. */
657dddf5c51Sperseant while(n--)
658dddf5c51Sperseant lfs_seglock(fs, seg_flags);
659dddf5c51Sperseant
660437e8552Sperseant /* Cleaner can relax now */
661437e8552Sperseant LFS_CLEANERINFO(cip, fs, bp);
662f11e4edeSdholland lfs_ci_setflags(fs, cip,
663f11e4edeSdholland lfs_ci_getflags(fs, cip) & ~LFS_CLEANER_MUST_CLEAN);
664437e8552Sperseant LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
665437e8552Sperseant
666dddf5c51Sperseant return;
667dddf5c51Sperseant }
668b99e4c82Sperseant
669b99e4c82Sperseant /*
670b99e4c82Sperseant * Wake up the cleaner, provided that nowrap is not set.
671b99e4c82Sperseant */
672b99e4c82Sperseant void
lfs_wakeup_cleaner(struct lfs * fs)673b99e4c82Sperseant lfs_wakeup_cleaner(struct lfs *fs)
674b99e4c82Sperseant {
675b99e4c82Sperseant if (fs->lfs_nowrap > 0)
676b99e4c82Sperseant return;
677b99e4c82Sperseant
6788f5758dbSmaya cv_broadcast(&fs->lfs_nextsegsleep);
6798f5758dbSmaya cv_broadcast(&lfs_allclean_wakeup);
680b99e4c82Sperseant }
681