xref: /netbsd-src/sbin/fsck_lfs/pass6.c (revision c38e7cc395b1472a774ff828e46123de44c628e9)
1 /* $NetBSD: pass6.c,v 1.50 2017/06/10 08:13:15 pgoyette Exp $	 */
2 
3 /*-
4  * Copyright (c) 2003 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Konrad E. Schroder <perseant@hhhh.org>.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/time.h>
35 #include <sys/buf.h>
36 #include <sys/mount.h>
37 
38 #define VU_DIROP 0x01000000 /* XXX XXX from sys/vnode.h */
39 #define vnode uvnode
40 #include <ufs/lfs/lfs.h>
41 #include <ufs/lfs/lfs_accessors.h>
42 #include <ufs/lfs/lfs_inode.h>
43 #undef vnode
44 
45 #include <assert.h>
46 #include <err.h>
47 #include <signal.h>
48 #include <string.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <util.h>
52 
53 #include "bufcache.h"
54 #include "vnode.h"
55 #include "lfs_user.h"
56 #include "segwrite.h"
57 
58 #include "fsck.h"
59 #include "extern.h"
60 #include "fsutil.h"
61 
62 extern u_int32_t cksum(void *, size_t);
63 extern u_int32_t lfs_sb_cksum(struct dlfs *);
64 
65 static int nnewblocks;
66 
67 /*
68  * Our own copy of lfs_update_single so we can account in seg_table
69  * as well as the Ifile; and so we can add the blocks to their new
70  * segment.
71  *
72  * Change the given block's address to ndaddr, finding its previous
73  * location using ulfs_bmaparray().
74  *
75  * Account for this change in the segment table.
76  */
77 static void
78 rfw_update_single(struct uvnode *vp, daddr_t lbn, daddr_t ndaddr, size_t size)
79 {
80 	SEGUSE *sup;
81 	struct ubuf *bp;
82 	struct indir a[ULFS_NIADDR + 2], *ap;
83 	struct inode *ip;
84 	daddr_t daddr, ooff;
85 	int num, error;
86 	int i, osize = 0;
87 	int frags, ofrags = 0;
88 	u_int32_t oldsn, sn;
89 
90 	ip = VTOI(vp);
91 	ip->i_state |= IN_MODIFIED;
92 
93 	error = ulfs_bmaparray(fs, vp, lbn, &daddr, a, &num);
94 	if (error)
95 		errx(1, "lfs_updatemeta: ulfs_bmaparray returned %d"
96 		     " looking up lbn %" PRId64 "\n", error, lbn);
97 	if (daddr > 0)
98 		daddr = LFS_DBTOFSB(fs, daddr);
99 
100 	frags = lfs_numfrags(fs, size);
101 	switch (num) {
102 	case 0:
103 		ooff = lfs_dino_getdb(fs, ip->i_din, lbn);
104 		if (ooff <= 0)
105 			lfs_dino_setblocks(fs, ip->i_din,
106 			    lfs_dino_getblocks(fs, ip->i_din) + frags);
107 		else {
108 			/* possible fragment truncation or extension */
109 			ofrags = lfs_numfrags(fs, ip->i_lfs_fragsize[lbn]);
110 			lfs_dino_setblocks(fs, ip->i_din,
111 			    lfs_dino_getblocks(fs, ip->i_din) + (frags - ofrags));
112 		}
113 		lfs_dino_setdb(fs, ip->i_din, lbn, ndaddr);
114 		break;
115 	case 1:
116 		ooff = lfs_dino_getib(fs, ip->i_din, a[0].in_off);
117 		if (ooff <= 0)
118 			lfs_dino_setblocks(fs, ip->i_din,
119 			    lfs_dino_getblocks(fs, ip->i_din) + frags);
120 		lfs_dino_setib(fs, ip->i_din, a[0].in_off, ndaddr);
121 		break;
122 	default:
123 		ap = &a[num - 1];
124 		if (bread(vp, ap->in_lbn, lfs_sb_getbsize(fs), 0, &bp))
125 			errx(1, "lfs_updatemeta: bread bno %" PRId64,
126 			    ap->in_lbn);
127 
128 		ooff = lfs_iblock_get(fs, bp->b_data, ap->in_off);
129 		if (ooff <= 0)
130 			lfs_dino_setblocks(fs, ip->i_din,
131 			    lfs_dino_getblocks(fs, ip->i_din) + frags);
132 		lfs_iblock_set(fs, bp->b_data, ap->in_off, ndaddr);
133 		(void) VOP_BWRITE(bp);
134 	}
135 
136 	/*
137 	 * Update segment usage information, based on old size
138 	 * and location.
139 	 */
140 	if (daddr > 0) {
141 		oldsn = lfs_dtosn(fs, daddr);
142 		if (lbn >= 0 && lbn < ULFS_NDADDR)
143 			osize = ip->i_lfs_fragsize[lbn];
144 		else
145 			osize = lfs_sb_getbsize(fs);
146 		LFS_SEGENTRY(sup, fs, oldsn, bp);
147 		seg_table[oldsn].su_nbytes -= osize;
148 		sup->su_nbytes -= osize;
149 		if (!(bp->b_flags & B_GATHERED))
150 			fs->lfs_flags |= LFS_IFDIRTY;
151 		LFS_WRITESEGENTRY(sup, fs, oldsn, bp);
152 		for (i = 0; i < lfs_btofsb(fs, osize); i++)
153 			clrbmap(daddr + i);
154 	}
155 
156 	/* If block is beyond EOF, update size */
157 	if (lbn >= 0 && lfs_dino_getsize(fs, ip->i_din) <= (lbn << lfs_sb_getbshift(fs))) {
158 		lfs_dino_setsize(fs, ip->i_din, (lbn << lfs_sb_getbshift(fs)) + 1);
159 	}
160 
161 	/* If block frag size is too large for old EOF, update size */
162 	if (lbn < ULFS_NDADDR) {
163 		off_t minsize;
164 
165 		minsize = (lbn << lfs_sb_getbshift(fs));
166 		minsize += (size - lfs_sb_getfsize(fs)) + 1;
167 		if (lfs_dino_getsize(fs, ip->i_din) < minsize)
168 			lfs_dino_setsize(fs, ip->i_din, minsize);
169 	}
170 
171 	/* Count for the user */
172 	++nnewblocks;
173 
174 	/* Add block to its new segment */
175 	sn = lfs_dtosn(fs, ndaddr);
176 	LFS_SEGENTRY(sup, fs, sn, bp);
177 	seg_table[sn].su_nbytes += size;
178 	sup->su_nbytes += size;
179 	if (!(bp->b_flags & B_GATHERED))
180 		fs->lfs_flags |= LFS_IFDIRTY;
181 	LFS_WRITESEGENTRY(sup, fs, sn, bp);
182 	for (i = 0; i < lfs_btofsb(fs, size); i++)
183 #ifndef VERBOSE_BLOCKMAP
184 		setbmap(daddr + i);
185 #else
186 		setbmap(daddr + i, ip->i_number);
187 #endif
188 
189 	/* Check bfree accounting as well */
190 	if (daddr <= 0) {
191 		lfs_sb_subbfree(fs, lfs_btofsb(fs, size));
192 	} else if (size != osize) {
193 		lfs_sb_subbfree(fs, frags - ofrags);
194 	}
195 
196 	/*
197 	 * Now that this block has a new address, and its old
198 	 * segment no longer owns it, we can forget about its
199 	 * old size.
200 	 */
201 	if (lbn >= 0 && lbn < ULFS_NDADDR)
202 		ip->i_lfs_fragsize[lbn] = size;
203 }
204 
205 /*
206  * Remove the vnode from the cache, including any blocks it
207  * may hold.  Account the blocks.  Finally account the removal
208  * of the inode from its segment.
209  */
210 static void
211 remove_ino(struct uvnode *vp, ino_t ino)
212 {
213 	IFILE *ifp;
214 	ino_t nextfree;
215 	SEGUSE *sup;
216 	CLEANERINFO *cip;
217 	struct ubuf *bp, *sbp, *cbp;
218 	struct inodesc idesc;
219 	daddr_t daddr;
220 
221 	if (debug)
222 		pwarn("remove ino %d\n", (int)ino);
223 
224 	LFS_IENTRY(ifp, fs, ino, bp);
225 	daddr = lfs_if_getdaddr(fs, ifp);
226 	if (daddr > 0) {
227 		lfs_if_setdaddr(fs, ifp, 0);
228 
229 		LFS_GET_HEADFREE(fs, cip, cbp, &nextfree);
230 		lfs_if_setnextfree(fs, ifp, nextfree);
231 		VOP_BWRITE(bp);
232 		LFS_PUT_HEADFREE(fs, cip, cbp, ino);
233 		sbdirty();
234 
235 		if (vp == NULL)
236 			vp = lfs_raw_vget(fs, ino, fs->lfs_ivnode->v_fd, daddr);
237 
238 		LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), sbp);
239 		sup->su_nbytes -= DINOSIZE(fs);
240 		VOP_BWRITE(sbp);
241 		seg_table[lfs_dtosn(fs, daddr)].su_nbytes -= DINOSIZE(fs);
242 	} else
243 		brelse(bp, 0);
244 
245 	/* Do on-disk accounting */
246 	if (vp) {
247 		idesc.id_number = ino;
248 		idesc.id_func = pass4check; /* Delete dinode and blocks */
249 		idesc.id_type = ADDR;
250 		idesc.id_lblkno = 0;
251 		clri(&idesc, "unknown", 2); /* XXX magic number 2 */
252 		/* vp has been destroyed */
253 	}
254 }
255 
256 /*
257  * Use FIP records to update blocks, if the generation number matches.
258  */
259 static void
260 pass6harvest(daddr_t daddr, FINFO *fip)
261 {
262 	struct uvnode *vp;
263 	int i;
264 	size_t size;
265 
266 	vp = vget(fs, lfs_fi_getino(fs, fip));
267 	if (vp && vp != fs->lfs_ivnode &&
268 	    lfs_dino_getgen(fs, VTOI(vp)->i_din) == lfs_fi_getversion(fs, fip)) {
269 		for (i = 0; i < lfs_fi_getnblocks(fs, fip); i++) {
270 			size = (i == lfs_fi_getnblocks(fs, fip) - 1 ?
271 				lfs_fi_getlastlength(fs, fip) : lfs_sb_getbsize(fs));
272 			if (debug)
273 				pwarn("ino %ju lbn %jd -> 0x%jx\n",
274 					(uintmax_t)lfs_fi_getino(fs, fip),
275 					(intmax_t)lfs_fi_getblock(fs, fip, i),
276 					(intmax_t)daddr);
277 			rfw_update_single(vp, lfs_fi_getblock(fs, fip, i), daddr, size);
278 			daddr += lfs_btofsb(fs, size);
279 		}
280 	}
281 }
282 
283 /*
284  * Check validity of blocks on roll-forward inodes.
285  */
286 int
287 pass6check(struct inodesc * idesc)
288 {
289 	int i, sn, anyout, anynew;
290 
291 	/* Brand new blocks are always OK */
292 	if (idesc->id_blkno == UNWRITTEN)
293 		return KEEPON;
294 
295 	/* Check that the blocks do not lie within clean segments. */
296 	anyout = anynew = 0;
297 	for (i = 0; i < idesc->id_numfrags; i++) {
298 		sn = lfs_dtosn(fs, idesc->id_blkno + i);
299 		if (sn < 0 || sn >= lfs_sb_getnseg(fs) ||
300 		    (seg_table[sn].su_flags & SEGUSE_DIRTY) == 0) {
301 			anyout = 1;
302 			break;
303 		}
304 		if (seg_table[sn].su_flags & SEGUSE_ACTIVE) {
305 			if (sn != lfs_dtosn(fs, lfs_sb_getoffset(fs)) ||
306 			    idesc->id_blkno > lfs_sb_getoffset(fs)) {
307 				++anynew;
308 			}
309 		}
310 		if (!anynew) {
311 			/* Clear so pass1check won't be surprised */
312 			clrbmap(idesc->id_blkno + i);
313 			seg_table[sn].su_nbytes -= lfs_fsbtob(fs, 1);
314 		}
315 	}
316 	if (anyout) {
317 		blkerror(idesc->id_number, "BAD", idesc->id_blkno);
318 		if (badblkcount++ >= MAXBAD) {
319 			pwarn("EXCESSIVE BAD BLKS I=%llu",
320 			    (unsigned long long)idesc->id_number);
321 			if (preen)
322 				pwarn(" (SKIPPING)\n");
323 			else if (reply("CONTINUE") == 0)
324 				err(EEXIT, "%s", "");
325 			return (STOP);
326 		}
327 	}
328 
329 	return pass1check(idesc);
330 }
331 
332 static void
333 account_indir(struct uvnode *vp, union lfs_dinode *dp, daddr_t ilbn, daddr_t daddr, int lvl)
334 {
335 	struct ubuf *bp;
336 	int32_t *dap, *odap, *buf, *obuf;
337 	daddr_t lbn;
338 
339 	if (lvl == 0)
340 		lbn = -ilbn;
341 	else
342 		lbn = ilbn + 1;
343 	bread(fs->lfs_devvp, LFS_FSBTODB(fs, daddr), lfs_sb_getbsize(fs), 0, &bp);
344 	buf = emalloc(lfs_sb_getbsize(fs));
345 	memcpy(buf, bp->b_data, lfs_sb_getbsize(fs));
346 	brelse(bp, 0);
347 
348 	obuf = emalloc(lfs_sb_getbsize(fs));
349 	if (vp) {
350 		bread(vp, ilbn, lfs_sb_getbsize(fs), 0, &bp);
351 		memcpy(obuf, bp->b_data, lfs_sb_getbsize(fs));
352 		brelse(bp, 0);
353 	} else
354 		memset(obuf, 0, lfs_sb_getbsize(fs));
355 
356 	for (dap = buf, odap = obuf;
357 	     dap < (int32_t *)((char *)buf + lfs_sb_getbsize(fs));
358 	     ++dap, ++odap) {
359 		if (*dap > 0 && *dap != *odap) {
360 			rfw_update_single(vp, lbn, *dap, lfs_dblksize(fs, dp, lbn));
361 			if (lvl > 0)
362 				account_indir(vp, dp, lbn, *dap, lvl - 1);
363 		}
364 		if (lvl == 0)
365 			++lbn;
366 		else if (lvl == 1)
367 			lbn -= LFS_NINDIR(fs);
368 		else if (lvl == 2)
369 			lbn -= LFS_NINDIR(fs) * LFS_NINDIR(fs);
370 	}
371 
372 	free(obuf);
373 	free(buf);
374 }
375 
376 /*
377  * Account block changes between new found inode and existing inode.
378  */
379 static void
380 account_block_changes(union lfs_dinode *dp)
381 {
382 	int i;
383 	daddr_t lbn, off, odaddr;
384 	struct uvnode *vp;
385 	struct inode *ip;
386 
387 	vp = vget(fs, lfs_dino_getinumber(fs, dp));
388 	ip = (vp ? VTOI(vp) : NULL);
389 
390 	/* Check direct block holdings between existing and new */
391 	for (i = 0; i < ULFS_NDADDR; i++) {
392 		odaddr = (ip ? lfs_dino_getdb(fs, ip->i_din, i) : 0x0);
393 		if (lfs_dino_getdb(fs, dp, i) > 0 && lfs_dino_getdb(fs, dp, i) != odaddr)
394 			rfw_update_single(vp, i, lfs_dino_getdb(fs, dp, i),
395 					  lfs_dblksize(fs, dp, i));
396 	}
397 
398 	/* Check indirect block holdings between existing and new */
399 	off = 0;
400 	for (i = 0; i < ULFS_NIADDR; i++) {
401 		odaddr = (ip ? lfs_dino_getib(fs, ip->i_din, i) : 0x0);
402 		if (lfs_dino_getib(fs, dp, i) > 0 && lfs_dino_getib(fs, dp, i) != odaddr) {
403 			lbn = -(ULFS_NDADDR + off + i);
404 			rfw_update_single(vp, i, lfs_dino_getib(fs, dp, i), lfs_sb_getbsize(fs));
405 			account_indir(vp, dp, lbn, lfs_dino_getib(fs, dp, i), i);
406 		}
407 		if (off == 0)
408 			off = LFS_NINDIR(fs);
409 		else
410 			off *= LFS_NINDIR(fs);
411 	}
412 }
413 
414 /*
415  * Give a previously allocated inode a new address; do segment
416  * accounting if necessary.
417  *
418  * Caller has ensured that this inode is not on the free list, so no
419  * free list accounting is done.
420  */
421 static void
422 readdress_inode(union lfs_dinode *dp, daddr_t daddr)
423 {
424 	IFILE *ifp;
425 	SEGUSE *sup;
426 	struct ubuf *bp;
427 	int sn;
428 	daddr_t odaddr;
429 	ino_t thisino = lfs_dino_getinumber(fs, dp);
430 	struct uvnode *vp;
431 
432 	/* Recursively check all block holdings, account changes */
433 	account_block_changes(dp);
434 
435 	/* Move ifile pointer to this location */
436 	LFS_IENTRY(ifp, fs, thisino, bp);
437 	odaddr = lfs_if_getdaddr(fs, ifp);
438 	assert(odaddr != 0);
439 	lfs_if_setdaddr(fs, ifp, daddr);
440 	VOP_BWRITE(bp);
441 
442 	if (debug)
443 		pwarn("readdress ino %ju from 0x%jx to 0x%jx mode %o nlink %d\n",
444 			(uintmax_t)lfs_dino_getinumber(fs, dp),
445 			(uintmax_t)odaddr,
446 			(intmax_t)daddr,
447 			(int)lfs_dino_getmode(fs, dp),
448 			(int)lfs_dino_getnlink(fs, dp));
449 
450 	/* Copy over preexisting in-core inode, if any */
451 	vp = vget(fs, thisino);
452 	lfs_copy_dinode(fs, VTOI(vp)->i_din, dp);
453 
454 	/* Finally account the inode itself */
455 	sn = lfs_dtosn(fs, odaddr);
456 	LFS_SEGENTRY(sup, fs, sn, bp);
457 	sup->su_nbytes -= DINOSIZE(fs);
458 	VOP_BWRITE(bp);
459 	seg_table[sn].su_nbytes -= DINOSIZE(fs);
460 
461 	sn = lfs_dtosn(fs, daddr);
462 	LFS_SEGENTRY(sup, fs, sn, bp);
463 	sup->su_nbytes += DINOSIZE(fs);
464 	VOP_BWRITE(bp);
465 	seg_table[sn].su_nbytes += DINOSIZE(fs);
466 }
467 
468 /*
469  * Allocate the given inode from the free list.
470  */
471 static void
472 alloc_inode(ino_t thisino, daddr_t daddr)
473 {
474 	ino_t ino, nextfree, oldhead;
475 	IFILE *ifp;
476 	SEGUSE *sup;
477 	struct ubuf *bp, *cbp;
478 	CLEANERINFO *cip;
479 
480 	if (debug)
481 		pwarn("allocating ino %ju at 0x%jx\n", (uintmax_t)thisino,
482 			(intmax_t)daddr);
483 	while (thisino >= maxino) {
484 		extend_ifile(fs);
485 	}
486 
487 	LFS_IENTRY(ifp, fs, thisino, bp);
488 	if (lfs_if_getdaddr(fs, ifp) != 0) {
489 		pwarn("allocated inode %lld already allocated\n",
490 			(long long)thisino);
491 	}
492 	nextfree = lfs_if_getnextfree(fs, ifp);
493 	lfs_if_setnextfree(fs, ifp, 0);
494 	lfs_if_setdaddr(fs, ifp, daddr);
495 	VOP_BWRITE(bp);
496 
497 	LFS_GET_HEADFREE(fs, cip, cbp, &oldhead);
498 	if (oldhead == thisino) {
499 		LFS_PUT_HEADFREE(fs, cip, cbp, nextfree);
500 		sbdirty();
501 		if (nextfree == 0) {
502 			extend_ifile(fs);
503 		}
504 	} else {
505 		/* Search the free list for this inode */
506 		ino = oldhead;
507 		while (ino) {
508 			LFS_IENTRY(ifp, fs, ino, bp);
509 			assert(lfs_if_getnextfree(fs, ifp) != ino);
510 			if (lfs_if_getnextfree(fs, ifp) == thisino) {
511 				lfs_if_setnextfree(fs, ifp, nextfree);
512 				VOP_BWRITE(bp);
513 				if (nextfree == 0)
514 					LFS_PUT_TAILFREE(fs, cip, cbp, ino);
515 				break;
516 			} else
517 				ino = lfs_if_getnextfree(fs, ifp);
518 			brelse(bp, 0);
519 		}
520 	}
521 
522 	/* Account for new location */
523 	LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), bp);
524 	sup->su_nbytes += DINOSIZE(fs);
525 	VOP_BWRITE(bp);
526 	seg_table[lfs_dtosn(fs, daddr)].su_nbytes += DINOSIZE(fs);
527 }
528 
529 /*
530  * Roll forward from the last verified checkpoint.
531  *
532  * Basic strategy:
533  *
534  * Run through the summaries finding the last valid partial segment.
535  * Note segment numbers as we go.  For each inode that we find, compare
536  * its generation number; if newer than old inode's (or if old inode is
537  * USTATE), change to that inode.  Recursively look at inode blocks that
538  * do not have their old disk addresses.  These addresses must lie in
539  * segments we have seen already in our roll forward.
540  *
541  * A second pass through the past-checkpoint area verifies the validity
542  * of these new blocks, as well as updating other blocks that do not
543  * have corresponding new inodes (but their generation number must match
544  * the old generation number).
545  */
546 void
547 pass6(void)
548 {
549 	daddr_t daddr, ibdaddr, odaddr, lastgood;
550 	IINFO *iip;
551 	struct uvnode *vp, *devvp;
552 	CLEANERINFO *cip;
553 	SEGUSE *sup;
554 	SEGSUM *sp;
555 	struct ubuf *bp, *ibp, *sbp, *cbp;
556 	union lfs_dinode *dp;
557 	struct inodesc idesc;
558 	int i, j, bc, hassuper;
559 	unsigned k;
560 	int nnewfiles, ndelfiles, nmvfiles;
561 	int sn, curseg;
562 	char *ibbuf;
563 	long lastserial;
564 
565 	devvp = fs->lfs_devvp;
566 
567 	/* If we can't roll forward because of created files, don't try */
568 	if (no_roll_forward) {
569 		if (debug)
570 			pwarn("not rolling forward due to possible allocation conflict\n");
571 		return;
572 	}
573 
574 	/* Find last valid partial segment */
575 	lastgood = try_verify(fs, devvp, 0, debug);
576 	if (lastgood == lfs_sb_getoffset(fs)) {
577 		if (debug)
578 			pwarn("not rolling forward, nothing to recover\n");
579 		return;
580 	}
581 
582 	if (debug)
583 		pwarn("could roll forward from 0x%jx to 0x%jx\n",
584 			(uintmax_t)lfs_sb_getoffset(fs), (uintmax_t)lastgood);
585 
586 	if (!preen && reply("ROLL FORWARD") == 0)
587 		return;
588 	/*
589 	 * Pass 1: find inode blocks.  We ignore the Ifile inode but accept
590 	 * changes to any other inode.
591 	 */
592 
593 	ibbuf = emalloc(lfs_sb_getibsize(fs));
594 	nnewfiles = ndelfiles = nmvfiles = nnewblocks = 0;
595 	daddr = lfs_sb_getoffset(fs);
596 	hassuper = 0;
597 	lastserial = 0;
598 	while (daddr != lastgood) {
599 		seg_table[lfs_dtosn(fs, daddr)].su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
600 		LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), sbp);
601 		sup->su_flags |= SEGUSE_DIRTY;
602 		VOP_BWRITE(sbp);
603 
604 		/* Could be a superblock */
605 		if (lfs_sntod(fs, lfs_dtosn(fs, daddr)) == daddr) {
606 			if (daddr == lfs_sb_gets0addr(fs)) {
607 				++hassuper;
608 				daddr += lfs_btofsb(fs, LFS_LABELPAD);
609 			}
610 			for (i = 0; i < LFS_MAXNUMSB; i++) {
611 				if (daddr == lfs_sb_getsboff(fs, i)) {
612 					++hassuper;
613 					daddr += lfs_btofsb(fs, LFS_SBPAD);
614 				}
615 				if (daddr < lfs_sb_getsboff(fs, i))
616 					break;
617 			}
618 		}
619 		KASSERT(hassuper == 0 || hassuper == 1);
620 
621 		/* Read in summary block */
622 		bread(devvp, LFS_FSBTODB(fs, daddr), lfs_sb_getsumsize(fs), 0, &bp);
623 		sp = (SEGSUM *)bp->b_data;
624 		if (debug)
625 			pwarn("sum at 0x%jx: ninos=%d nfinfo=%d\n",
626 				(intmax_t)daddr, (int)lfs_ss_getninos(fs, sp),
627 				(int)lfs_ss_getnfinfo(fs, sp));
628 
629 		/* We have verified that this is a good summary. */
630 		LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), sbp);
631 		++sup->su_nsums;
632 		VOP_BWRITE(sbp);
633 		lfs_sb_subbfree(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs)));
634 		lfs_sb_adddmeta(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs)));
635 		sbdirty();
636 		if (lfs_sntod(fs, lfs_dtosn(fs, daddr)) == daddr +
637 		    hassuper * lfs_btofsb(fs, LFS_SBPAD) &&
638 		    lfs_dtosn(fs, daddr) != lfs_dtosn(fs, lfs_sb_getoffset(fs))) {
639 			lfs_sb_subnclean(fs, 1);
640 			sbdirty();
641 		}
642 
643 		/* Find inodes, look at generation number. */
644 		if (lfs_ss_getninos(fs, sp)) {
645 			LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), sbp);
646 			sup->su_ninos += howmany(lfs_ss_getninos(fs, sp), LFS_INOPB(fs));
647 			VOP_BWRITE(sbp);
648 			lfs_sb_adddmeta(fs, lfs_btofsb(fs, howmany(lfs_ss_getninos(fs, sp),
649 							    LFS_INOPB(fs)) *
650 						lfs_sb_getibsize(fs)));
651 		}
652 		iip = SEGSUM_IINFOSTART(fs, bp->b_data);
653 		for (i = 0; i < howmany(lfs_ss_getninos(fs, sp), LFS_INOPB(fs)); i++) {
654 			ino_t *inums;
655 
656 			inums = ecalloc(LFS_INOPB(fs) + 1, sizeof(*inums));
657 			ibdaddr = lfs_ii_getblock(fs, iip);
658 			iip = NEXTLOWER_IINFO(fs, iip);
659 			lfs_sb_subbfree(fs, lfs_btofsb(fs, lfs_sb_getibsize(fs)));
660 			sbdirty();
661 			bread(devvp, LFS_FSBTODB(fs, ibdaddr),
662 			      lfs_sb_getibsize(fs), 0, &ibp);
663 			memcpy(ibbuf, ibp->b_data, lfs_sb_getibsize(fs));
664 			brelse(ibp, 0);
665 
666 			j = 0;
667 			for (k = 0; k < LFS_INOPB(fs); k++) {
668 				dp = DINO_IN_BLOCK(fs, ibbuf, k);
669 				if (lfs_dino_getinumber(fs, dp) == 0 ||
670 				    lfs_dino_getinumber(fs, dp) == LFS_IFILE_INUM)
671 					continue;
672 				/* Basic sanity checks */
673 				if (lfs_dino_getnlink(fs, dp) < 0
674 #if 0
675 				    || lfs_dino_getinumber(fs, dp) < 0
676 				    || lfs_dino_getsize(fs, dp) < 0
677 #endif
678 				) {
679 					pwarn("BAD INODE AT 0x%jx\n",
680 						(intmax_t)ibdaddr);
681 					brelse(bp, 0);
682 					free(inums);
683 					goto out;
684 				}
685 
686 				vp = vget(fs, lfs_dino_getinumber(fs, dp));
687 
688 				/*
689 				 * Four cases:
690 				 * (1) Invalid inode (nlink == 0).
691 				 *     If currently allocated, remove.
692 				 */
693 				if (lfs_dino_getnlink(fs, dp) == 0) {
694 					remove_ino(vp, lfs_dino_getinumber(fs, dp));
695 					++ndelfiles;
696 					continue;
697 				}
698 				/*
699 				 * (2) New valid inode, previously free.
700 				 *     Nothing to do except account
701 				 *     the inode itself, done after the
702 				 *     loop.
703 				 */
704 				if (vp == NULL) {
705 					if (!(lfs_ss_getflags(fs, sp) & SS_DIROP))
706 						pfatal("NEW FILE IN NON-DIROP PARTIAL SEGMENT");
707 					else {
708 						inums[j++] = lfs_dino_getinumber(fs, dp);
709 						nnewfiles++;
710 					}
711 					continue;
712 				}
713 				/*
714 				 * (3) Valid new version of previously
715 				 *     allocated inode.  Delete old file
716 				 *     and proceed as in (2).
717 				 */
718 				if (vp &&
719 				    lfs_dino_getgen(fs, VTOI(vp)->i_din)
720 				    < lfs_dino_getgen(fs, dp)) {
721 					remove_ino(vp, lfs_dino_getinumber(fs, dp));
722 					if (!(lfs_ss_getflags(fs, sp) & SS_DIROP))
723 						pfatal("NEW FILE VERSION IN NON-DIROP PARTIAL SEGMENT");
724 					else {
725 						inums[j++] = lfs_dino_getinumber(fs, dp);
726 						ndelfiles++;
727 						nnewfiles++;
728 					}
729 					continue;
730 				}
731 				/*
732 				 * (4) Same version of previously
733 				 *     allocated inode.  Move inode to
734 				 *     this location, account inode change
735 				 *     only.  We'll pick up any new
736 				 *     blocks when we do the block pass.
737 				 */
738 				if (vp &&
739 				    lfs_dino_getgen(fs, VTOI(vp)->i_din)
740 				    == lfs_dino_getgen(fs, dp)) {
741 					nmvfiles++;
742 					readdress_inode(dp, ibdaddr);
743 
744 					/* Update with new info */
745 					lfs_dino_setmode(fs, VTOD(vp), lfs_dino_getmode(fs, dp));
746 					lfs_dino_setnlink(fs, VTOD(vp), lfs_dino_getmode(fs, dp));
747 					/* XXX size is important */
748 					lfs_dino_setsize(fs, VTOD(vp), lfs_dino_getsize(fs, dp));
749 					lfs_dino_setatime(fs, VTOD(vp), lfs_dino_getatime(fs, dp));
750 					lfs_dino_setatimensec(fs, VTOD(vp), lfs_dino_getatimensec(fs, dp));
751 					lfs_dino_setmtime(fs, VTOD(vp), lfs_dino_getmtime(fs, dp));
752 					lfs_dino_setmtimensec(fs, VTOD(vp), lfs_dino_getmtimensec(fs, dp));
753 					lfs_dino_setctime(fs, VTOD(vp), lfs_dino_getctime(fs, dp));
754 					lfs_dino_setctimensec(fs, VTOD(vp), lfs_dino_getctimensec(fs, dp));
755 					lfs_dino_setflags(fs, VTOD(vp), lfs_dino_getflags(fs, dp));
756 					lfs_dino_setuid(fs, VTOD(vp), lfs_dino_getuid(fs, dp));
757 					lfs_dino_setgid(fs, VTOD(vp), lfs_dino_getgid(fs, dp));
758 					inodirty(VTOI(vp));
759 				}
760 			}
761 			for (j = 0; inums[j]; j++) {
762 				alloc_inode(inums[j], ibdaddr);
763 				vp = lfs_raw_vget(fs, inums[j],
764 					      devvp->v_fd, ibdaddr);
765 				/* We'll get the blocks later */
766 				if (debug)
767 					pwarn("alloc ino %d nlink %d\n",
768 						(int)inums[j], lfs_dino_getnlink(fs, VTOD(vp)));
769 
770 				for (k=0; k<ULFS_NDADDR; k++) {
771 					lfs_dino_setdb(fs, VTOD(vp), k, 0);
772 				}
773 				for (k=0; k<ULFS_NIADDR; k++) {
774 					lfs_dino_setib(fs, VTOD(vp), k, 0);
775 				}
776 				lfs_dino_setblocks(fs, VTOD(vp), 0);
777 
778 				vp->v_uflag |= VU_DIROP;
779 				inodirty(VTOI(vp));
780 			}
781 			free(inums);
782 		}
783 
784 		bc = check_summary(fs, sp, daddr, debug, devvp, NULL);
785 		if (bc == 0) {
786 			pwarn("unexpected bad seg ptr at 0x%jx with serial=%ju\n",
787 				(intmax_t)daddr, (uintmax_t)lfs_ss_getserial(fs, sp));
788 			brelse(bp, 0);
789 			break;
790 		} else {
791 			if (debug)
792 				pwarn("good seg ptr at 0x%jx with serial=%ju\n",
793 					(intmax_t)daddr, (uintmax_t)lfs_ss_getserial(fs, sp));
794 			lastserial = lfs_ss_getserial(fs, sp);
795 		}
796 		odaddr = daddr;
797 		daddr += lfs_btofsb(fs, lfs_sb_getsumsize(fs) + bc);
798 		if (lfs_dtosn(fs, odaddr) != lfs_dtosn(fs, daddr) ||
799 		    lfs_dtosn(fs, daddr) != lfs_dtosn(fs, daddr +
800 			lfs_btofsb(fs, lfs_sb_getsumsize(fs) + lfs_sb_getbsize(fs)) - 1)) {
801 			daddr = lfs_ss_getnext(fs, sp);
802 		}
803 		brelse(bp, 0);
804 	}
805 
806     out:
807 	free(ibbuf);
808 
809 	/* Set serial here, just to be sure (XXX should be right already) */
810 	lfs_sb_setserial(fs, lastserial + 1);
811 
812 	/*
813 	 * Check our new vnodes.  Any blocks must lie in segments that
814 	 * we've seen before (SEGUSE_DIRTY or SEGUSE_RFW); and the rest
815 	 * of the pass 1 checks as well.
816 	 */
817 	memset(&idesc, 0, sizeof(struct inodesc));
818 	idesc.id_type = ADDR;
819 	idesc.id_func = pass6check;
820 	idesc.id_lblkno = 0;
821 	LIST_FOREACH(vp, &vnodelist, v_mntvnodes) {
822 		if ((vp->v_uflag & VU_DIROP) == 0)
823 			--n_files; /* Don't double count */
824 		checkinode(VTOI(vp)->i_number, &idesc);
825 	}
826 
827 	/*
828 	 * Second pass.  Run through FINFO entries looking for blocks
829 	 * with the same generation number as files we've seen before.
830 	 * If they have it, pretend like we just wrote them.  We don't
831 	 * do the pretend-write, though, if we've already seen them
832 	 * (the accounting would have been done for us already).
833 	 */
834 	daddr = lfs_sb_getoffset(fs);
835 	while (daddr != lastgood) {
836 		if (!(seg_table[lfs_dtosn(fs, daddr)].su_flags & SEGUSE_DIRTY)) {
837 			seg_table[lfs_dtosn(fs, daddr)].su_flags |= SEGUSE_DIRTY;
838 			LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, daddr), sbp);
839 			sup->su_flags |= SEGUSE_DIRTY;
840 			VOP_BWRITE(sbp);
841 		}
842 
843 		/* Could be a superblock */
844 		if (lfs_sntod(fs, lfs_dtosn(fs, daddr)) == daddr) {
845 			if (daddr == lfs_sb_gets0addr(fs))
846 				daddr += lfs_btofsb(fs, LFS_LABELPAD);
847 			for (i = 0; i < LFS_MAXNUMSB; i++) {
848 				if (daddr == lfs_sb_getsboff(fs, i)) {
849 					daddr += lfs_btofsb(fs, LFS_SBPAD);
850 				}
851 				if (daddr < lfs_sb_getsboff(fs, i))
852 					break;
853 			}
854 		}
855 
856 		/* Read in summary block */
857 		bread(devvp, LFS_FSBTODB(fs, daddr), lfs_sb_getsumsize(fs), 0, &bp);
858 		sp = (SEGSUM *)bp->b_data;
859 		bc = check_summary(fs, sp, daddr, debug, devvp, pass6harvest);
860 		if (bc == 0) {
861 			pwarn("unexpected bad seg ptr [2] at 0x%jx with serial=%ju\n",
862 				(intmax_t)daddr, (uintmax_t)lfs_ss_getserial(fs, sp));
863 			brelse(bp, 0);
864 			break;
865 		}
866 		odaddr = daddr;
867 		daddr += lfs_btofsb(fs, lfs_sb_getsumsize(fs) + bc);
868 		lfs_sb_subavail(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs) + bc));
869 		if (lfs_dtosn(fs, odaddr) != lfs_dtosn(fs, daddr) ||
870 		    lfs_dtosn(fs, daddr) != lfs_dtosn(fs, daddr +
871 			lfs_btofsb(fs, lfs_sb_getsumsize(fs) + lfs_sb_getbsize(fs)) - 1)) {
872 			lfs_sb_subavail(fs, lfs_sntod(fs, lfs_dtosn(fs, daddr) + 1) - daddr);
873 			daddr = lfs_ss_getnext(fs, sp);
874 		}
875 		LFS_CLEANERINFO(cip, fs, cbp);
876 		LFS_SYNC_CLEANERINFO(cip, fs, cbp, 0);
877 		bp->b_flags |= B_AGE;
878 		brelse(bp, 0);
879 	}
880 
881 	/* Final address could also be a superblock */
882 	if (lfs_sntod(fs, lfs_dtosn(fs, lastgood)) == lastgood) {
883 		if (lastgood == lfs_sb_gets0addr(fs))
884 			lastgood += lfs_btofsb(fs, LFS_LABELPAD);
885 		for (i = 0; i < LFS_MAXNUMSB; i++) {
886 			if (lastgood == lfs_sb_getsboff(fs, i))
887 				lastgood += lfs_btofsb(fs, LFS_SBPAD);
888 			if (lastgood < lfs_sb_getsboff(fs, i))
889 				break;
890 		}
891 	}
892 
893 	/* Update offset to point at correct location */
894 	lfs_sb_setoffset(fs, lastgood);
895 	lfs_sb_setcurseg(fs, lfs_sntod(fs, lfs_dtosn(fs, lastgood)));
896 	for (sn = curseg = lfs_dtosn(fs, lfs_sb_getcurseg(fs));;) {
897 		sn = (sn + 1) % lfs_sb_getnseg(fs);
898 		if (sn == curseg)
899 			errx(1, "no clean segments");
900 		LFS_SEGENTRY(sup, fs, sn, bp);
901 		if ((sup->su_flags & SEGUSE_DIRTY) == 0) {
902 			sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE;
903 			VOP_BWRITE(bp);
904 			break;
905 		}
906 		brelse(bp, 0);
907 	}
908 	lfs_sb_setnextseg(fs, lfs_sntod(fs, sn));
909 
910 	if (preen) {
911 		if (ndelfiles)
912 			pwarn("roll forward deleted %d file%s\n", ndelfiles,
913 				(ndelfiles > 1 ? "s" : ""));
914 		if (nnewfiles)
915 			pwarn("roll forward added %d file%s\n", nnewfiles,
916 				(nnewfiles > 1 ? "s" : ""));
917 		if (nmvfiles)
918 			pwarn("roll forward relocated %d inode%s\n", nmvfiles,
919 				(nmvfiles > 1 ? "s" : ""));
920 		if (nnewblocks)
921 			pwarn("roll forward verified %d data block%s\n", nnewblocks,
922 				(nnewblocks > 1 ? "s" : ""));
923 		if (ndelfiles == 0 && nnewfiles == 0 && nmvfiles == 0 &&
924 		    nnewblocks == 0)
925 			pwarn("roll forward produced nothing new\n");
926 	}
927 
928 	if (!preen) {
929 		/* Run pass 5 again (it's quick anyway). */
930 		pwarn("** Phase 6b - Recheck Segment Block Accounting\n");
931 		pass5();
932 	}
933 
934 	/* Likewise for pass 0 */
935 	if (!preen)
936 		pwarn("** Phase 6c - Recheck Inode Free List\n");
937 	pass0();
938 }
939