xref: /netbsd-src/sbin/fsck_lfs/pass6.c (revision a74e29fe24406e588e4c7ad6d03168a00d009bc1)
1 /* $NetBSD: pass6.c,v 1.51 2020/04/03 19:36:33 joerg Exp $	 */
2 
3 /*-
4  * Copyright (c) 2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Konrad E. Schroder <perseant@hhhh.org>.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/time.h>
35 #include <sys/buf.h>
36 #include <sys/mount.h>
37 
38 #define VU_DIROP 0x01000000 /* XXX XXX from sys/vnode.h */
39 #define vnode uvnode
40 #include <ufs/lfs/lfs.h>
41 #include <ufs/lfs/lfs_accessors.h>
42 #include <ufs/lfs/lfs_inode.h>
43 #undef vnode
44 
45 #include <assert.h>
46 #include <err.h>
47 #include <signal.h>
48 #include <string.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <util.h>
52 
53 #include "bufcache.h"
54 #include "lfs_user.h"
55 #include "segwrite.h"
56 
57 #include "fsck.h"
58 #include "extern.h"
59 #include "fsutil.h"
60 
61 static int nnewblocks;
62 
63 /*
64  * Our own copy of lfs_update_single so we can account in seg_table
65  * as well as the Ifile; and so we can add the blocks to their new
66  * segment.
67  *
68  * Change the given block's address to ndaddr, finding its previous
69  * location using ulfs_bmaparray().
70  *
71  * Account for this change in the segment table.
72  */
73 static void
rfw_update_single(struct uvnode * vp,daddr_t lbn,daddr_t ndaddr,size_t size)74 rfw_update_single(struct uvnode *vp, daddr_t lbn, daddr_t ndaddr, size_t size)
75 {
76 	SEGUSE *sup;
77 	struct ubuf *bp;
78 	struct indir a[ULFS_NIADDR + 2], *ap;
79 	struct inode *ip;
80 	daddr_t daddr, ooff;
81 	int num, error;
82 	int i, osize = 0;
83 	int frags, ofrags = 0;
84 	u_int32_t oldsn, sn;
85 
86 	ip = VTOI(vp);
87 	ip->i_state |= IN_MODIFIED;
88 
89 	error = ulfs_bmaparray(fs, vp, lbn, &daddr, a, &num);
90 	if (error)
91 		errx(1, "lfs_updatemeta: ulfs_bmaparray returned %d"
92 		     " looking up lbn %" PRId64 "\n", error, lbn);
93 	if (daddr > 0)
94 		daddr = LFS_DBTOFSB(fs, daddr);
95 
96 	frags = lfs_numfrags(fs, size);
97 	switch (num) {
98 	case 0:
99 		ooff = lfs_dino_getdb(fs, ip->i_din, lbn);
100 		if (ooff <= 0)
101 			lfs_dino_setblocks(fs, ip->i_din,
102 			    lfs_dino_getblocks(fs, ip->i_din) + frags);
103 		else {
104 			/* possible fragment truncation or extension */
105 			ofrags = lfs_numfrags(fs, ip->i_lfs_fragsize[lbn]);
106 			lfs_dino_setblocks(fs, ip->i_din,
107 			    lfs_dino_getblocks(fs, ip->i_din) + (frags - ofrags));
108 		}
109 		lfs_dino_setdb(fs, ip->i_din, lbn, ndaddr);
110 		break;
111 	case 1:
112 		ooff = lfs_dino_getib(fs, ip->i_din, a[0].in_off);
113 		if (ooff <= 0)
114 			lfs_dino_setblocks(fs, ip->i_din,
115 			    lfs_dino_getblocks(fs, ip->i_din) + frags);
116 		lfs_dino_setib(fs, ip->i_din, a[0].in_off, ndaddr);
117 		break;
118 	default:
119 		ap = &a[num - 1];
120 		if (bread(vp, ap->in_lbn, lfs_sb_getbsize(fs), 0, &bp))
121 			errx(1, "lfs_updatemeta: bread bno %" PRId64,
122 			    ap->in_lbn);
123 
124 		ooff = lfs_iblock_get(fs, bp->b_data, ap->in_off);
125 		if (ooff <= 0)
126 			lfs_dino_setblocks(fs, ip->i_din,
127 			    lfs_dino_getblocks(fs, ip->i_din) + frags);
128 		lfs_iblock_set(fs, bp->b_data, ap->in_off, ndaddr);
129 		(void) VOP_BWRITE(bp);
130 	}
131 
132 	/*
133 	 * Update segment usage information, based on old size
134 	 * and location.
135 	 */
136 	if (daddr > 0) {
137 		oldsn = lfs_dtosn(fs, daddr);
138 		if (lbn >= 0 && lbn < ULFS_NDADDR)
139 			osize = ip->i_lfs_fragsize[lbn];
140 		else
141 			osize = lfs_sb_getbsize(fs);
142 		LFS_SEGENTRY(sup, fs, oldsn, bp);
143 		seg_table[oldsn].su_nbytes -= osize;
144 		sup->su_nbytes -= osize;
145 		if (!(bp->b_flags & B_GATHERED))
146 			fs->lfs_flags |= LFS_IFDIRTY;
147 		LFS_WRITESEGENTRY(sup, fs, oldsn, bp);
148 		for (i = 0; i < lfs_btofsb(fs, osize); i++)
149 			clrbmap(daddr + i);
150 	}
151 
152 	/* If block is beyond EOF, update size */
153 	if (lbn >= 0 && lfs_dino_getsize(fs, ip->i_din) <= (lbn << lfs_sb_getbshift(fs))) {
154 		lfs_dino_setsize(fs, ip->i_din, (lbn << lfs_sb_getbshift(fs)) + 1);
155 	}
156 
157 	/* If block frag size is too large for old EOF, update size */
158 	if (lbn < ULFS_NDADDR) {
159 		off_t minsize;
160 
161 		minsize = (lbn << lfs_sb_getbshift(fs));
162 		minsize += (size - lfs_sb_getfsize(fs)) + 1;
163 		if (lfs_dino_getsize(fs, ip->i_din) < minsize)
164 			lfs_dino_setsize(fs, ip->i_din, minsize);
165 	}
166 
167 	/* Count for the user */
168 	++nnewblocks;
169 
170 	/* Add block to its new segment */
171 	sn = lfs_dtosn(fs, ndaddr);
172 	LFS_SEGENTRY(sup, fs, sn, bp);
173 	seg_table[sn].su_nbytes += size;
174 	sup->su_nbytes += size;
175 	if (!(bp->b_flags & B_GATHERED))
176 		fs->lfs_flags |= LFS_IFDIRTY;
177 	LFS_WRITESEGENTRY(sup, fs, sn, bp);
178 	for (i = 0; i < lfs_btofsb(fs, size); i++)
179 #ifndef VERBOSE_BLOCKMAP
180 		setbmap(daddr + i);
181 #else
182 		setbmap(daddr + i, ip->i_number);
183 #endif
184 
185 	/* Check bfree accounting as well */
186 	if (daddr <= 0) {
187 		lfs_sb_subbfree(fs, lfs_btofsb(fs, size));
188 	} else if (size != osize) {
189 		lfs_sb_subbfree(fs, frags - ofrags);
190 	}
191 
192 	/*
193 	 * Now that this block has a new address, and its old
194 	 * segment no longer owns it, we can forget about its
195 	 * old size.
196 	 */
197 	if (lbn >= 0 && lbn < ULFS_NDADDR)
198 		ip->i_lfs_fragsize[lbn] = size;
199 }
200 
201 /*
202  * Remove the vnode from the cache, including any blocks it
203  * may hold.  Account the blocks.  Finally account the removal
204  * of the inode from its segment.
205  */
206 static void
remove_ino(struct uvnode * vp,ino_t ino)207 remove_ino(struct uvnode *vp, ino_t ino)
208 {
209 	IFILE *ifp;
210 	ino_t nextfree;
211 	SEGUSE *sup;
212 	CLEANERINFO *cip;
213 	struct ubuf *bp, *sbp, *cbp;
214 	struct inodesc idesc;
215 	daddr_t daddr;
216 
217 	if (debug)
218 		pwarn("remove ino %d\n", (int)ino);
219 
220 	LFS_IENTRY(ifp, fs, ino, bp);
221 	daddr = lfs_if_getdaddr(fs, ifp);
222 	if (daddr > 0) {
223 		lfs_if_setdaddr(fs, ifp, 0);
224 
225 		LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
226 		lfs_if_setnextfree(fs, ifp, nextfree);
227 		VOP_BWRITE(bp);
228 		LFS_PUT_HEADFREE(fs, cip, cbp, ino);
229 		sbdirty();
230 
231 		if (vp == NULL)
232 			vp = lfs_raw_vget(fs, ino, fs->lfs_ivnode->v_fd, daddr);
233 
234 		LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), sbp);
235 		sup->su_nbytes -= DINOSIZE(fs);
236 		VOP_BWRITE(sbp);
237 		seg_table[lfs_dtosn(fs, daddr)].su_nbytes -= DINOSIZE(fs);
238 	} else
239 		brelse(bp, 0);
240 
241 	/* Do on-disk accounting */
242 	if (vp) {
243 		idesc.id_number = ino;
244 		idesc.id_func = pass4check; /* Delete dinode and blocks */
245 		idesc.id_type = ADDR;
246 		idesc.id_lblkno = 0;
247 		clri(&idesc, "unknown", 2); /* XXX magic number 2 */
248 		/* vp has been destroyed */
249 	}
250 }
251 
252 /*
253  * Use FIP records to update blocks, if the generation number matches.
254  */
255 static void
pass6harvest(daddr_t daddr,FINFO * fip)256 pass6harvest(daddr_t daddr, FINFO *fip)
257 {
258 	struct uvnode *vp;
259 	int i;
260 	size_t size;
261 
262 	vp = vget(fs, lfs_fi_getino(fs, fip));
263 	if (vp && vp != fs->lfs_ivnode &&
264 	    lfs_dino_getgen(fs, VTOI(vp)->i_din) == lfs_fi_getversion(fs, fip)) {
265 		for (i = 0; i < lfs_fi_getnblocks(fs, fip); i++) {
266 			size = (i == lfs_fi_getnblocks(fs, fip) - 1 ?
267 				lfs_fi_getlastlength(fs, fip) : lfs_sb_getbsize(fs));
268 			if (debug)
269 				pwarn("ino %ju lbn %jd -> 0x%jx\n",
270 					(uintmax_t)lfs_fi_getino(fs, fip),
271 					(intmax_t)lfs_fi_getblock(fs, fip, i),
272 					(intmax_t)daddr);
273 			rfw_update_single(vp, lfs_fi_getblock(fs, fip, i), daddr, size);
274 			daddr += lfs_btofsb(fs, size);
275 		}
276 	}
277 }
278 
279 /*
280  * Check validity of blocks on roll-forward inodes.
281  */
282 int
pass6check(struct inodesc * idesc)283 pass6check(struct inodesc * idesc)
284 {
285 	int i, sn, anyout, anynew;
286 
287 	/* Brand new blocks are always OK */
288 	if (idesc->id_blkno == UNWRITTEN)
289 		return KEEPON;
290 
291 	/* Check that the blocks do not lie within clean segments. */
292 	anyout = anynew = 0;
293 	for (i = 0; i < idesc->id_numfrags; i++) {
294 		sn = lfs_dtosn(fs, idesc->id_blkno + i);
295 		if (sn < 0 || sn >= lfs_sb_getnseg(fs) ||
296 		    (seg_table[sn].su_flags & SEGUSE_DIRTY) == 0) {
297 			anyout = 1;
298 			break;
299 		}
300 		if (seg_table[sn].su_flags & SEGUSE_ACTIVE) {
301 			if (sn != lfs_dtosn(fs, lfs_sb_getoffset(fs)) ||
302 			    idesc->id_blkno > lfs_sb_getoffset(fs)) {
303 				++anynew;
304 			}
305 		}
306 		if (!anynew) {
307 			/* Clear so pass1check won't be surprised */
308 			clrbmap(idesc->id_blkno + i);
309 			seg_table[sn].su_nbytes -= lfs_fsbtob(fs, 1);
310 		}
311 	}
312 	if (anyout) {
313 		blkerror(idesc->id_number, "BAD", idesc->id_blkno);
314 		if (badblkcount++ >= MAXBAD) {
315 			pwarn("EXCESSIVE BAD BLKS I=%llu",
316 			    (unsigned long long)idesc->id_number);
317 			if (preen)
318 				pwarn(" (SKIPPING)\n");
319 			else if (reply("CONTINUE") == 0)
320 				err(EEXIT, "%s", "");
321 			return (STOP);
322 		}
323 	}
324 
325 	return pass1check(idesc);
326 }
327 
328 static void
account_indir(struct uvnode * vp,union lfs_dinode * dp,daddr_t ilbn,daddr_t daddr,int lvl)329 account_indir(struct uvnode *vp, union lfs_dinode *dp, daddr_t ilbn, daddr_t daddr, int lvl)
330 {
331 	struct ubuf *bp;
332 	int32_t *dap, *odap, *buf, *obuf;
333 	daddr_t lbn;
334 
335 	if (lvl == 0)
336 		lbn = -ilbn;
337 	else
338 		lbn = ilbn + 1;
339 	bread(fs->lfs_devvp, LFS_FSBTODB(fs, daddr), lfs_sb_getbsize(fs), 0, &bp);
340 	buf = emalloc(lfs_sb_getbsize(fs));
341 	memcpy(buf, bp->b_data, lfs_sb_getbsize(fs));
342 	brelse(bp, 0);
343 
344 	obuf = emalloc(lfs_sb_getbsize(fs));
345 	if (vp) {
346 		bread(vp, ilbn, lfs_sb_getbsize(fs), 0, &bp);
347 		memcpy(obuf, bp->b_data, lfs_sb_getbsize(fs));
348 		brelse(bp, 0);
349 	} else
350 		memset(obuf, 0, lfs_sb_getbsize(fs));
351 
352 	for (dap = buf, odap = obuf;
353 	     dap < (int32_t *)((char *)buf + lfs_sb_getbsize(fs));
354 	     ++dap, ++odap) {
355 		if (*dap > 0 && *dap != *odap) {
356 			rfw_update_single(vp, lbn, *dap, lfs_dblksize(fs, dp, lbn));
357 			if (lvl > 0)
358 				account_indir(vp, dp, lbn, *dap, lvl - 1);
359 		}
360 		if (lvl == 0)
361 			++lbn;
362 		else if (lvl == 1)
363 			lbn -= LFS_NINDIR(fs);
364 		else if (lvl == 2)
365 			lbn -= LFS_NINDIR(fs) * LFS_NINDIR(fs);
366 	}
367 
368 	free(obuf);
369 	free(buf);
370 }
371 
372 /*
373  * Account block changes between new found inode and existing inode.
374  */
375 static void
account_block_changes(union lfs_dinode * dp)376 account_block_changes(union lfs_dinode *dp)
377 {
378 	int i;
379 	daddr_t lbn, off, odaddr;
380 	struct uvnode *vp;
381 	struct inode *ip;
382 
383 	vp = vget(fs, lfs_dino_getinumber(fs, dp));
384 	ip = (vp ? VTOI(vp) : NULL);
385 
386 	/* Check direct block holdings between existing and new */
387 	for (i = 0; i < ULFS_NDADDR; i++) {
388 		odaddr = (ip ? lfs_dino_getdb(fs, ip->i_din, i) : 0x0);
389 		if (lfs_dino_getdb(fs, dp, i) > 0 && lfs_dino_getdb(fs, dp, i) != odaddr)
390 			rfw_update_single(vp, i, lfs_dino_getdb(fs, dp, i),
391 					  lfs_dblksize(fs, dp, i));
392 	}
393 
394 	/* Check indirect block holdings between existing and new */
395 	off = 0;
396 	for (i = 0; i < ULFS_NIADDR; i++) {
397 		odaddr = (ip ? lfs_dino_getib(fs, ip->i_din, i) : 0x0);
398 		if (lfs_dino_getib(fs, dp, i) > 0 && lfs_dino_getib(fs, dp, i) != odaddr) {
399 			lbn = -(ULFS_NDADDR + off + i);
400 			rfw_update_single(vp, i, lfs_dino_getib(fs, dp, i), lfs_sb_getbsize(fs));
401 			account_indir(vp, dp, lbn, lfs_dino_getib(fs, dp, i), i);
402 		}
403 		if (off == 0)
404 			off = LFS_NINDIR(fs);
405 		else
406 			off *= LFS_NINDIR(fs);
407 	}
408 }
409 
410 /*
411  * Give a previously allocated inode a new address; do segment
412  * accounting if necessary.
413  *
414  * Caller has ensured that this inode is not on the free list, so no
415  * free list accounting is done.
416  */
417 static void
readdress_inode(union lfs_dinode * dp,daddr_t daddr)418 readdress_inode(union lfs_dinode *dp, daddr_t daddr)
419 {
420 	IFILE *ifp;
421 	SEGUSE *sup;
422 	struct ubuf *bp;
423 	int sn;
424 	daddr_t odaddr;
425 	ino_t thisino = lfs_dino_getinumber(fs, dp);
426 	struct uvnode *vp;
427 
428 	/* Recursively check all block holdings, account changes */
429 	account_block_changes(dp);
430 
431 	/* Move ifile pointer to this location */
432 	LFS_IENTRY(ifp, fs, thisino, bp);
433 	odaddr = lfs_if_getdaddr(fs, ifp);
434 	assert(odaddr != 0);
435 	lfs_if_setdaddr(fs, ifp, daddr);
436 	VOP_BWRITE(bp);
437 
438 	if (debug)
439 		pwarn("readdress ino %ju from 0x%jx to 0x%jx mode %o nlink %d\n",
440 			(uintmax_t)lfs_dino_getinumber(fs, dp),
441 			(uintmax_t)odaddr,
442 			(intmax_t)daddr,
443 			(int)lfs_dino_getmode(fs, dp),
444 			(int)lfs_dino_getnlink(fs, dp));
445 
446 	/* Copy over preexisting in-core inode, if any */
447 	vp = vget(fs, thisino);
448 	lfs_copy_dinode(fs, VTOI(vp)->i_din, dp);
449 
450 	/* Finally account the inode itself */
451 	sn = lfs_dtosn(fs, odaddr);
452 	LFS_SEGENTRY(sup, fs, sn, bp);
453 	sup->su_nbytes -= DINOSIZE(fs);
454 	VOP_BWRITE(bp);
455 	seg_table[sn].su_nbytes -= DINOSIZE(fs);
456 
457 	sn = lfs_dtosn(fs, daddr);
458 	LFS_SEGENTRY(sup, fs, sn, bp);
459 	sup->su_nbytes += DINOSIZE(fs);
460 	VOP_BWRITE(bp);
461 	seg_table[sn].su_nbytes += DINOSIZE(fs);
462 }
463 
464 /*
465  * Allocate the given inode from the free list.
466  */
467 static void
alloc_inode(ino_t thisino,daddr_t daddr)468 alloc_inode(ino_t thisino, daddr_t daddr)
469 {
470 	ino_t ino, nextfree, oldhead;
471 	IFILE *ifp;
472 	SEGUSE *sup;
473 	struct ubuf *bp, *cbp;
474 	CLEANERINFO *cip;
475 
476 	if (debug)
477 		pwarn("allocating ino %ju at 0x%jx\n", (uintmax_t)thisino,
478 			(intmax_t)daddr);
479 	while (thisino >= maxino) {
480 		extend_ifile(fs);
481 	}
482 
483 	LFS_IENTRY(ifp, fs, thisino, bp);
484 	if (lfs_if_getdaddr(fs, ifp) != 0) {
485 		pwarn("allocated inode %lld already allocated\n",
486 			(long long)thisino);
487 	}
488 	nextfree = lfs_if_getnextfree(fs, ifp);
489 	lfs_if_setnextfree(fs, ifp, 0);
490 	lfs_if_setdaddr(fs, ifp, daddr);
491 	VOP_BWRITE(bp);
492 
493 	LFS_GET_HEADFREE(fs, cip, cbp, &oldhead);
494 	if (oldhead == thisino) {
495 		LFS_PUT_HEADFREE(fs, cip, cbp, nextfree);
496 		sbdirty();
497 		if (nextfree == 0) {
498 			extend_ifile(fs);
499 		}
500 	} else {
501 		/* Search the free list for this inode */
502 		ino = oldhead;
503 		while (ino) {
504 			LFS_IENTRY(ifp, fs, ino, bp);
505 			assert(lfs_if_getnextfree(fs, ifp) != ino);
506 			if (lfs_if_getnextfree(fs, ifp) == thisino) {
507 				lfs_if_setnextfree(fs, ifp, nextfree);
508 				VOP_BWRITE(bp);
509 				if (nextfree == 0)
510 					LFS_PUT_TAILFREE(fs, cip, cbp, ino);
511 				break;
512 			} else
513 				ino = lfs_if_getnextfree(fs, ifp);
514 			brelse(bp, 0);
515 		}
516 	}
517 
518 	/* Account for new location */
519 	LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), bp);
520 	sup->su_nbytes += DINOSIZE(fs);
521 	VOP_BWRITE(bp);
522 	seg_table[lfs_dtosn(fs, daddr)].su_nbytes += DINOSIZE(fs);
523 }
524 
525 /*
526  * Roll forward from the last verified checkpoint.
527  *
528  * Basic strategy:
529  *
530  * Run through the summaries finding the last valid partial segment.
531  * Note segment numbers as we go.  For each inode that we find, compare
532  * its generation number; if newer than old inode's (or if old inode is
533  * USTATE), change to that inode.  Recursively look at inode blocks that
534  * do not have their old disk addresses.  These addresses must lie in
535  * segments we have seen already in our roll forward.
536  *
537  * A second pass through the past-checkpoint area verifies the validity
538  * of these new blocks, as well as updating other blocks that do not
539  * have corresponding new inodes (but their generation number must match
540  * the old generation number).
541  */
542 void
pass6(void)543 pass6(void)
544 {
545 	daddr_t daddr, ibdaddr, odaddr, lastgood;
546 	IINFO *iip;
547 	struct uvnode *vp, *devvp;
548 	CLEANERINFO *cip;
549 	SEGUSE *sup;
550 	SEGSUM *sp;
551 	struct ubuf *bp, *ibp, *sbp, *cbp;
552 	union lfs_dinode *dp;
553 	struct inodesc idesc;
554 	int i, j, bc, hassuper;
555 	unsigned k;
556 	int nnewfiles, ndelfiles, nmvfiles;
557 	int sn, curseg;
558 	char *ibbuf;
559 	long lastserial;
560 
561 	devvp = fs->lfs_devvp;
562 
563 	/* If we can't roll forward because of created files, don't try */
564 	if (no_roll_forward) {
565 		if (debug)
566 			pwarn("not rolling forward due to possible allocation conflict\n");
567 		return;
568 	}
569 
570 	/* Find last valid partial segment */
571 	lastgood = try_verify(fs, devvp, 0, debug);
572 	if (lastgood == lfs_sb_getoffset(fs)) {
573 		if (debug)
574 			pwarn("not rolling forward, nothing to recover\n");
575 		return;
576 	}
577 
578 	if (debug)
579 		pwarn("could roll forward from 0x%jx to 0x%jx\n",
580 			(uintmax_t)lfs_sb_getoffset(fs), (uintmax_t)lastgood);
581 
582 	if (!preen && reply("ROLL FORWARD") == 0)
583 		return;
584 	/*
585 	 * Pass 1: find inode blocks.  We ignore the Ifile inode but accept
586 	 * changes to any other inode.
587 	 */
588 
589 	ibbuf = emalloc(lfs_sb_getibsize(fs));
590 	nnewfiles = ndelfiles = nmvfiles = nnewblocks = 0;
591 	daddr = lfs_sb_getoffset(fs);
592 	hassuper = 0;
593 	lastserial = 0;
594 	while (daddr != lastgood) {
595 		seg_table[lfs_dtosn(fs, daddr)].su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
596 		LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), sbp);
597 		sup->su_flags |= SEGUSE_DIRTY;
598 		VOP_BWRITE(sbp);
599 
600 		/* Could be a superblock */
601 		if (lfs_sntod(fs, lfs_dtosn(fs, daddr)) == daddr) {
602 			if (daddr == lfs_sb_gets0addr(fs)) {
603 				++hassuper;
604 				daddr += lfs_btofsb(fs, LFS_LABELPAD);
605 			}
606 			for (i = 0; i < LFS_MAXNUMSB; i++) {
607 				if (daddr == lfs_sb_getsboff(fs, i)) {
608 					++hassuper;
609 					daddr += lfs_btofsb(fs, LFS_SBPAD);
610 				}
611 				if (daddr < lfs_sb_getsboff(fs, i))
612 					break;
613 			}
614 		}
615 		KASSERT(hassuper == 0 || hassuper == 1);
616 
617 		/* Read in summary block */
618 		bread(devvp, LFS_FSBTODB(fs, daddr), lfs_sb_getsumsize(fs), 0, &bp);
619 		sp = (SEGSUM *)bp->b_data;
620 		if (debug)
621 			pwarn("sum at 0x%jx: ninos=%d nfinfo=%d\n",
622 				(intmax_t)daddr, (int)lfs_ss_getninos(fs, sp),
623 				(int)lfs_ss_getnfinfo(fs, sp));
624 
625 		/* We have verified that this is a good summary. */
626 		LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), sbp);
627 		++sup->su_nsums;
628 		VOP_BWRITE(sbp);
629 		lfs_sb_subbfree(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs)));
630 		lfs_sb_adddmeta(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs)));
631 		sbdirty();
632 		if (lfs_sntod(fs, lfs_dtosn(fs, daddr)) == daddr +
633 		    hassuper * lfs_btofsb(fs, LFS_SBPAD) &&
634 		    lfs_dtosn(fs, daddr) != lfs_dtosn(fs, lfs_sb_getoffset(fs))) {
635 			lfs_sb_subnclean(fs, 1);
636 			sbdirty();
637 		}
638 
639 		/* Find inodes, look at generation number. */
640 		if (lfs_ss_getninos(fs, sp)) {
641 			LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), sbp);
642 			sup->su_ninos += howmany(lfs_ss_getninos(fs, sp), LFS_INOPB(fs));
643 			VOP_BWRITE(sbp);
644 			lfs_sb_adddmeta(fs, lfs_btofsb(fs, howmany(lfs_ss_getninos(fs, sp),
645 							    LFS_INOPB(fs)) *
646 						lfs_sb_getibsize(fs)));
647 		}
648 		iip = SEGSUM_IINFOSTART(fs, bp->b_data);
649 		for (i = 0; i < howmany(lfs_ss_getninos(fs, sp), LFS_INOPB(fs)); i++) {
650 			ino_t *inums;
651 
652 			inums = ecalloc(LFS_INOPB(fs) + 1, sizeof(*inums));
653 			ibdaddr = lfs_ii_getblock(fs, iip);
654 			iip = NEXTLOWER_IINFO(fs, iip);
655 			lfs_sb_subbfree(fs, lfs_btofsb(fs, lfs_sb_getibsize(fs)));
656 			sbdirty();
657 			bread(devvp, LFS_FSBTODB(fs, ibdaddr),
658 			      lfs_sb_getibsize(fs), 0, &ibp);
659 			memcpy(ibbuf, ibp->b_data, lfs_sb_getibsize(fs));
660 			brelse(ibp, 0);
661 
662 			j = 0;
663 			for (k = 0; k < LFS_INOPB(fs); k++) {
664 				dp = DINO_IN_BLOCK(fs, ibbuf, k);
665 				if (lfs_dino_getinumber(fs, dp) == 0 ||
666 				    lfs_dino_getinumber(fs, dp) == LFS_IFILE_INUM)
667 					continue;
668 				/* Basic sanity checks */
669 				if (lfs_dino_getnlink(fs, dp) < 0
670 #if 0
671 				    || lfs_dino_getinumber(fs, dp) < 0
672 				    || lfs_dino_getsize(fs, dp) < 0
673 #endif
674 				) {
675 					pwarn("BAD INODE AT 0x%jx\n",
676 						(intmax_t)ibdaddr);
677 					brelse(bp, 0);
678 					free(inums);
679 					goto out;
680 				}
681 
682 				vp = vget(fs, lfs_dino_getinumber(fs, dp));
683 
684 				/*
685 				 * Four cases:
686 				 * (1) Invalid inode (nlink == 0).
687 				 *     If currently allocated, remove.
688 				 */
689 				if (lfs_dino_getnlink(fs, dp) == 0) {
690 					remove_ino(vp, lfs_dino_getinumber(fs, dp));
691 					++ndelfiles;
692 					continue;
693 				}
694 				/*
695 				 * (2) New valid inode, previously free.
696 				 *     Nothing to do except account
697 				 *     the inode itself, done after the
698 				 *     loop.
699 				 */
700 				if (vp == NULL) {
701 					if (!(lfs_ss_getflags(fs, sp) & SS_DIROP))
702 						pfatal("NEW FILE IN NON-DIROP PARTIAL SEGMENT");
703 					else {
704 						inums[j++] = lfs_dino_getinumber(fs, dp);
705 						nnewfiles++;
706 					}
707 					continue;
708 				}
709 				/*
710 				 * (3) Valid new version of previously
711 				 *     allocated inode.  Delete old file
712 				 *     and proceed as in (2).
713 				 */
714 				if (vp &&
715 				    lfs_dino_getgen(fs, VTOI(vp)->i_din)
716 				    < lfs_dino_getgen(fs, dp)) {
717 					remove_ino(vp, lfs_dino_getinumber(fs, dp));
718 					if (!(lfs_ss_getflags(fs, sp) & SS_DIROP))
719 						pfatal("NEW FILE VERSION IN NON-DIROP PARTIAL SEGMENT");
720 					else {
721 						inums[j++] = lfs_dino_getinumber(fs, dp);
722 						ndelfiles++;
723 						nnewfiles++;
724 					}
725 					continue;
726 				}
727 				/*
728 				 * (4) Same version of previously
729 				 *     allocated inode.  Move inode to
730 				 *     this location, account inode change
731 				 *     only.  We'll pick up any new
732 				 *     blocks when we do the block pass.
733 				 */
734 				if (vp &&
735 				    lfs_dino_getgen(fs, VTOI(vp)->i_din)
736 				    == lfs_dino_getgen(fs, dp)) {
737 					nmvfiles++;
738 					readdress_inode(dp, ibdaddr);
739 
740 					/* Update with new info */
741 					lfs_dino_setmode(fs, VTOD(vp), lfs_dino_getmode(fs, dp));
742 					lfs_dino_setnlink(fs, VTOD(vp), lfs_dino_getmode(fs, dp));
743 					/* XXX size is important */
744 					lfs_dino_setsize(fs, VTOD(vp), lfs_dino_getsize(fs, dp));
745 					lfs_dino_setatime(fs, VTOD(vp), lfs_dino_getatime(fs, dp));
746 					lfs_dino_setatimensec(fs, VTOD(vp), lfs_dino_getatimensec(fs, dp));
747 					lfs_dino_setmtime(fs, VTOD(vp), lfs_dino_getmtime(fs, dp));
748 					lfs_dino_setmtimensec(fs, VTOD(vp), lfs_dino_getmtimensec(fs, dp));
749 					lfs_dino_setctime(fs, VTOD(vp), lfs_dino_getctime(fs, dp));
750 					lfs_dino_setctimensec(fs, VTOD(vp), lfs_dino_getctimensec(fs, dp));
751 					lfs_dino_setflags(fs, VTOD(vp), lfs_dino_getflags(fs, dp));
752 					lfs_dino_setuid(fs, VTOD(vp), lfs_dino_getuid(fs, dp));
753 					lfs_dino_setgid(fs, VTOD(vp), lfs_dino_getgid(fs, dp));
754 					inodirty(VTOI(vp));
755 				}
756 			}
757 			for (j = 0; inums[j]; j++) {
758 				alloc_inode(inums[j], ibdaddr);
759 				vp = lfs_raw_vget(fs, inums[j],
760 					      devvp->v_fd, ibdaddr);
761 				/* We'll get the blocks later */
762 				if (debug)
763 					pwarn("alloc ino %d nlink %d\n",
764 						(int)inums[j], lfs_dino_getnlink(fs, VTOD(vp)));
765 
766 				for (k=0; k<ULFS_NDADDR; k++) {
767 					lfs_dino_setdb(fs, VTOD(vp), k, 0);
768 				}
769 				for (k=0; k<ULFS_NIADDR; k++) {
770 					lfs_dino_setib(fs, VTOD(vp), k, 0);
771 				}
772 				lfs_dino_setblocks(fs, VTOD(vp), 0);
773 
774 				vp->v_uflag |= VU_DIROP;
775 				inodirty(VTOI(vp));
776 			}
777 			free(inums);
778 		}
779 
780 		bc = check_summary(fs, sp, daddr, debug, devvp, NULL);
781 		if (bc == 0) {
782 			pwarn("unexpected bad seg ptr at 0x%jx with serial=%ju\n",
783 				(intmax_t)daddr, (uintmax_t)lfs_ss_getserial(fs, sp));
784 			brelse(bp, 0);
785 			break;
786 		} else {
787 			if (debug)
788 				pwarn("good seg ptr at 0x%jx with serial=%ju\n",
789 					(intmax_t)daddr, (uintmax_t)lfs_ss_getserial(fs, sp));
790 			lastserial = lfs_ss_getserial(fs, sp);
791 		}
792 		odaddr = daddr;
793 		daddr += lfs_btofsb(fs, lfs_sb_getsumsize(fs) + bc);
794 		if (lfs_dtosn(fs, odaddr) != lfs_dtosn(fs, daddr) ||
795 		    lfs_dtosn(fs, daddr) != lfs_dtosn(fs, daddr +
796 			lfs_btofsb(fs, lfs_sb_getsumsize(fs) + lfs_sb_getbsize(fs)) - 1)) {
797 			daddr = lfs_ss_getnext(fs, sp);
798 		}
799 		brelse(bp, 0);
800 	}
801 
802     out:
803 	free(ibbuf);
804 
805 	/* Set serial here, just to be sure (XXX should be right already) */
806 	lfs_sb_setserial(fs, lastserial + 1);
807 
808 	/*
809 	 * Check our new vnodes.  Any blocks must lie in segments that
810 	 * we've seen before (SEGUSE_DIRTY or SEGUSE_RFW); and the rest
811 	 * of the pass 1 checks as well.
812 	 */
813 	memset(&idesc, 0, sizeof(struct inodesc));
814 	idesc.id_type = ADDR;
815 	idesc.id_func = pass6check;
816 	idesc.id_lblkno = 0;
817 	LIST_FOREACH(vp, &vnodelist, v_mntvnodes) {
818 		if ((vp->v_uflag & VU_DIROP) == 0)
819 			--n_files; /* Don't double count */
820 		checkinode(VTOI(vp)->i_number, &idesc);
821 	}
822 
823 	/*
824 	 * Second pass.  Run through FINFO entries looking for blocks
825 	 * with the same generation number as files we've seen before.
826 	 * If they have it, pretend like we just wrote them.  We don't
827 	 * do the pretend-write, though, if we've already seen them
828 	 * (the accounting would have been done for us already).
829 	 */
830 	daddr = lfs_sb_getoffset(fs);
831 	while (daddr != lastgood) {
832 		if (!(seg_table[lfs_dtosn(fs, daddr)].su_flags & SEGUSE_DIRTY)) {
833 			seg_table[lfs_dtosn(fs, daddr)].su_flags |= SEGUSE_DIRTY;
834 			LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), sbp);
835 			sup->su_flags |= SEGUSE_DIRTY;
836 			VOP_BWRITE(sbp);
837 		}
838 
839 		/* Could be a superblock */
840 		if (lfs_sntod(fs, lfs_dtosn(fs, daddr)) == daddr) {
841 			if (daddr == lfs_sb_gets0addr(fs))
842 				daddr += lfs_btofsb(fs, LFS_LABELPAD);
843 			for (i = 0; i < LFS_MAXNUMSB; i++) {
844 				if (daddr == lfs_sb_getsboff(fs, i)) {
845 					daddr += lfs_btofsb(fs, LFS_SBPAD);
846 				}
847 				if (daddr < lfs_sb_getsboff(fs, i))
848 					break;
849 			}
850 		}
851 
852 		/* Read in summary block */
853 		bread(devvp, LFS_FSBTODB(fs, daddr), lfs_sb_getsumsize(fs), 0, &bp);
854 		sp = (SEGSUM *)bp->b_data;
855 		bc = check_summary(fs, sp, daddr, debug, devvp, pass6harvest);
856 		if (bc == 0) {
857 			pwarn("unexpected bad seg ptr [2] at 0x%jx with serial=%ju\n",
858 				(intmax_t)daddr, (uintmax_t)lfs_ss_getserial(fs, sp));
859 			brelse(bp, 0);
860 			break;
861 		}
862 		odaddr = daddr;
863 		daddr += lfs_btofsb(fs, lfs_sb_getsumsize(fs) + bc);
864 		lfs_sb_subavail(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs) + bc));
865 		if (lfs_dtosn(fs, odaddr) != lfs_dtosn(fs, daddr) ||
866 		    lfs_dtosn(fs, daddr) != lfs_dtosn(fs, daddr +
867 			lfs_btofsb(fs, lfs_sb_getsumsize(fs) + lfs_sb_getbsize(fs)) - 1)) {
868 			lfs_sb_subavail(fs, lfs_sntod(fs, lfs_dtosn(fs, daddr) + 1) - daddr);
869 			daddr = lfs_ss_getnext(fs, sp);
870 		}
871 		LFS_CLEANERINFO(cip, fs, cbp);
872 		LFS_SYNC_CLEANERINFO(cip, fs, cbp, 0);
873 		bp->b_flags |= B_AGE;
874 		brelse(bp, 0);
875 	}
876 
877 	/* Final address could also be a superblock */
878 	if (lfs_sntod(fs, lfs_dtosn(fs, lastgood)) == lastgood) {
879 		if (lastgood == lfs_sb_gets0addr(fs))
880 			lastgood += lfs_btofsb(fs, LFS_LABELPAD);
881 		for (i = 0; i < LFS_MAXNUMSB; i++) {
882 			if (lastgood == lfs_sb_getsboff(fs, i))
883 				lastgood += lfs_btofsb(fs, LFS_SBPAD);
884 			if (lastgood < lfs_sb_getsboff(fs, i))
885 				break;
886 		}
887 	}
888 
889 	/* Update offset to point at correct location */
890 	lfs_sb_setoffset(fs, lastgood);
891 	lfs_sb_setcurseg(fs, lfs_sntod(fs, lfs_dtosn(fs, lastgood)));
892 	for (sn = curseg = lfs_dtosn(fs, lfs_sb_getcurseg(fs));;) {
893 		sn = (sn + 1) % lfs_sb_getnseg(fs);
894 		if (sn == curseg)
895 			errx(1, "no clean segments");
896 		LFS_SEGENTRY(sup, fs, sn, bp);
897 		if ((sup->su_flags & SEGUSE_DIRTY) == 0) {
898 			sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
899 			VOP_BWRITE(bp);
900 			break;
901 		}
902 		brelse(bp, 0);
903 	}
904 	lfs_sb_setnextseg(fs, lfs_sntod(fs, sn));
905 
906 	if (preen) {
907 		if (ndelfiles)
908 			pwarn("roll forward deleted %d file%s\n", ndelfiles,
909 				(ndelfiles > 1 ? "s" : ""));
910 		if (nnewfiles)
911 			pwarn("roll forward added %d file%s\n", nnewfiles,
912 				(nnewfiles > 1 ? "s" : ""));
913 		if (nmvfiles)
914 			pwarn("roll forward relocated %d inode%s\n", nmvfiles,
915 				(nmvfiles > 1 ? "s" : ""));
916 		if (nnewblocks)
917 			pwarn("roll forward verified %d data block%s\n", nnewblocks,
918 				(nnewblocks > 1 ? "s" : ""));
919 		if (ndelfiles == 0 && nnewfiles == 0 && nmvfiles == 0 &&
920 		    nnewblocks == 0)
921 			pwarn("roll forward produced nothing new\n");
922 	}
923 
924 	if (!preen) {
925 		/* Run pass 5 again (it's quick anyway). */
926 		pwarn("** Phase 6b - Recheck Segment Block Accounting\n");
927 		pass5();
928 	}
929 
930 	/* Likewise for pass 0 */
931 	if (!preen)
932 		pwarn("** Phase 6c - Recheck Inode Free List\n");
933 	pass0();
934 }
935