1 /* $NetBSD: lfs_rfw.c,v 1.5 2007/10/10 20:42:35 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 #ifdef LFS_KERNEL_RFW 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: lfs_rfw.c,v 1.5 2007/10/10 20:42:35 ad Exp $"); 43 44 #if defined(_KERNEL_OPT) 45 #include "opt_quota.h" 46 #endif 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/namei.h> 51 #include <sys/proc.h> 52 #include <sys/kernel.h> 53 #include <sys/vnode.h> 54 #include <sys/mount.h> 55 #include <sys/kthread.h> 56 #include <sys/buf.h> 57 #include <sys/device.h> 58 #include <sys/mbuf.h> 59 #include <sys/file.h> 60 #include <sys/disklabel.h> 61 #include <sys/ioctl.h> 62 #include <sys/errno.h> 63 #include <sys/malloc.h> 64 #include <sys/pool.h> 65 #include <sys/socket.h> 66 #include <sys/syslog.h> 67 #include <uvm/uvm_extern.h> 68 #include <sys/sysctl.h> 69 #include <sys/conf.h> 70 #include <sys/kauth.h> 71 72 #include <miscfs/specfs/specdev.h> 73 74 #include <ufs/ufs/quota.h> 75 #include <ufs/ufs/inode.h> 76 #include <ufs/ufs/ufsmount.h> 77 #include <ufs/ufs/ufs_extern.h> 78 79 #include <uvm/uvm.h> 80 #include <uvm/uvm_stat.h> 81 #include <uvm/uvm_pager.h> 82 #include <uvm/uvm_pdaemon.h> 83 84 #include <ufs/lfs/lfs.h> 85 #include <ufs/lfs/lfs_extern.h> 86 87 #include <miscfs/genfs/genfs.h> 88 #include <miscfs/genfs/genfs_node.h> 89 90 /* 91 * Roll-forward code. 92 */ 93 static daddr_t check_segsum(struct lfs *, daddr_t, u_int64_t, 94 kauth_cred_t, int, int *, struct lwp *); 95 96 extern int lfs_do_rfw; 97 98 /* 99 * Allocate a particular inode with a particular version number, freeing 100 * any previous versions of this inode that may have gone before. 101 * Used by the roll-forward code. 102 * 103 * XXX this function does not have appropriate locking to be used on a live fs; 104 * XXX but something similar could probably be used for an "undelete" call. 105 * 106 * Called with the Ifile inode locked. 107 */ 108 int 109 lfs_rf_valloc(struct lfs *fs, ino_t ino, int vers, struct lwp *l, 110 struct vnode **vpp) 111 { 112 IFILE *ifp; 113 struct buf *bp, *cbp; 114 struct vnode *vp; 115 struct inode *ip; 116 ino_t tino, oldnext; 117 int error; 118 CLEANERINFO *cip; 119 120 ASSERT_SEGLOCK(fs); /* XXX it doesn't, really */ 121 122 /* 123 * First, just try a vget. If the version number is the one we want, 124 * we don't have to do anything else. If the version number is wrong, 125 * take appropriate action. 126 */ 127 error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, &vp); 128 if (error == 0) { 129 DLOG((DLOG_RF, "lfs_rf_valloc[1]: ino %d vp %p\n", ino, vp)); 130 131 *vpp = vp; 132 ip = VTOI(vp); 133 if (ip->i_gen == vers) 134 return 0; 135 else if (ip->i_gen < vers) { 136 lfs_truncate(vp, (off_t)0, 0, NOCRED, l); 137 ip->i_gen = ip->i_ffs1_gen = vers; 138 LFS_SET_UINO(ip, IN_CHANGE | IN_UPDATE); 139 return 0; 140 } else { 141 DLOG((DLOG_RF, "ino %d: sought version %d, got %d\n", 142 ino, vers, ip->i_ffs1_gen)); 143 vput(vp); 144 *vpp = NULLVP; 145 return EEXIST; 146 } 147 } 148 149 /* 150 * The inode is not in use. Find it on the free list. 151 */ 152 /* If the Ifile is too short to contain this inum, extend it */ 153 while (VTOI(fs->lfs_ivnode)->i_size <= (ino / 154 fs->lfs_ifpb + fs->lfs_cleansz + fs->lfs_segtabsz) 155 << fs->lfs_bshift) { 156 lfs_extend_ifile(fs, NOCRED); 157 } 158 159 LFS_IENTRY(ifp, fs, ino, bp); 160 oldnext = ifp->if_nextfree; 161 ifp->if_version = vers; 162 brelse(bp, 0); 163 164 LFS_GET_HEADFREE(fs, cip, cbp, &ino); 165 if (ino) { 166 LFS_PUT_HEADFREE(fs, cip, cbp, oldnext); 167 } else { 168 tino = ino; 169 while (1) { 170 LFS_IENTRY(ifp, fs, tino, bp); 171 if (ifp->if_nextfree == ino || 172 ifp->if_nextfree == LFS_UNUSED_INUM) 173 break; 174 tino = ifp->if_nextfree; 175 brelse(bp, 0); 176 } 177 if (ifp->if_nextfree == LFS_UNUSED_INUM) { 178 brelse(bp, 0); 179 return ENOENT; 180 } 181 ifp->if_nextfree = oldnext; 182 LFS_BWRITE_LOG(bp); 183 } 184 185 error = lfs_ialloc(fs, fs->lfs_ivnode, ino, vers, &vp); 186 if (error == 0) { 187 /* 188 * Make it VREG so we can put blocks on it. We will change 189 * this later if it turns out to be some other kind of file. 190 */ 191 ip = VTOI(vp); 192 ip->i_mode = ip->i_ffs1_mode = IFREG; 193 ip->i_nlink = ip->i_ffs1_nlink = 1; 194 ip->i_ffs_effnlink = 1; 195 ufs_vinit(vp->v_mount, lfs_specop_p, lfs_fifoop_p, &vp); 196 ip = VTOI(vp); 197 198 DLOG((DLOG_RF, "lfs_rf_valloc: ino %d vp %p\n", ino, vp)); 199 200 /* The dirop-nature of this vnode is past */ 201 lfs_unmark_vnode(vp); 202 (void)lfs_vunref(vp); 203 vp->v_uflag &= ~VU_DIROP; 204 simple_lock(&fs->lfs_interlock); 205 simple_lock(&lfs_subsys_lock); 206 --lfs_dirvcount; 207 simple_unlock(&lfs_subsys_lock); 208 --fs->lfs_dirvcount; 209 TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain); 210 wakeup(&lfs_dirvcount); 211 wakeup(&fs->lfs_dirvcount); 212 simple_unlock(&fs->lfs_interlock); 213 } 214 *vpp = vp; 215 return error; 216 } 217 218 /* 219 * Load the appropriate indirect block, and change the appropriate pointer. 220 * Mark the block dirty. Do segment and avail accounting. 221 */ 222 static int 223 update_meta(struct lfs *fs, ino_t ino, int vers, daddr_t lbn, 224 daddr_t ndaddr, size_t size, struct lwp *l) 225 { 226 int error; 227 struct vnode *vp; 228 struct inode *ip; 229 #ifdef DEBUG 230 daddr_t odaddr; 231 struct indir a[NIADDR]; 232 int num; 233 int i; 234 #endif /* DEBUG */ 235 struct buf *bp; 236 SEGUSE *sup; 237 238 KASSERT(lbn >= 0); /* no indirect blocks */ 239 240 if ((error = lfs_rf_valloc(fs, ino, vers, l, &vp)) != 0) { 241 DLOG((DLOG_RF, "update_meta: ino %d: lfs_rf_valloc" 242 " returned %d\n", ino, error)); 243 return error; 244 } 245 246 if ((error = lfs_balloc(vp, (lbn << fs->lfs_bshift), size, 247 NOCRED, 0, &bp)) != 0) { 248 vput(vp); 249 return (error); 250 } 251 /* No need to write, the block is already on disk */ 252 if (bp->b_flags & B_DELWRI) { 253 LFS_UNLOCK_BUF(bp); 254 fs->lfs_avail += btofsb(fs, bp->b_bcount); 255 } 256 brelse(bp, BC_INVAL); 257 258 /* 259 * Extend the file, if it is not large enough already. 260 * XXX this is not exactly right, we don't know how much of the 261 * XXX last block is actually used. We hope that an inode will 262 * XXX appear later to give the correct size. 263 */ 264 ip = VTOI(vp); 265 if (ip->i_size <= (lbn << fs->lfs_bshift)) { 266 u_int64_t newsize; 267 268 if (lbn < NDADDR) 269 newsize = ip->i_ffs1_size = (lbn << fs->lfs_bshift) + 270 (size - fs->lfs_fsize) + 1; 271 else 272 newsize = ip->i_ffs1_size = (lbn << fs->lfs_bshift) + 1; 273 274 if (ip->i_size < newsize) { 275 ip->i_size = newsize; 276 /* 277 * tell vm our new size for the case the inode won't 278 * appear later. 279 */ 280 uvm_vnp_setsize(vp, newsize); 281 } 282 } 283 284 lfs_update_single(fs, NULL, vp, lbn, ndaddr, size); 285 286 LFS_SEGENTRY(sup, fs, dtosn(fs, ndaddr), bp); 287 sup->su_nbytes += size; 288 LFS_WRITESEGENTRY(sup, fs, dtosn(fs, ndaddr), bp); 289 290 /* differences here should be due to UNWRITTEN indirect blocks. */ 291 KASSERT((lblkno(fs, ip->i_size) > NDADDR && 292 ip->i_lfs_effnblks == ip->i_ffs1_blocks) || 293 ip->i_lfs_effnblks >= ip->i_ffs1_blocks); 294 295 #ifdef DEBUG 296 /* Now look again to make sure it worked */ 297 ufs_bmaparray(vp, lbn, &odaddr, &a[0], &num, NULL, NULL); 298 for (i = num; i > 0; i--) { 299 if (!a[i].in_exists) 300 panic("update_meta: absent %d lv indirect block", i); 301 } 302 if (dbtofsb(fs, odaddr) != ndaddr) 303 DLOG((DLOG_RF, "update_meta: failed setting ino %d lbn %" 304 PRId64 " to %" PRId64 "\n", ino, lbn, ndaddr)); 305 #endif /* DEBUG */ 306 vput(vp); 307 return 0; 308 } 309 310 static int 311 update_inoblk(struct lfs *fs, daddr_t offset, kauth_cred_t cred, 312 struct lwp *l) 313 { 314 struct vnode *devvp, *vp; 315 struct inode *ip; 316 struct ufs1_dinode *dip; 317 struct buf *dbp, *ibp; 318 int error; 319 daddr_t daddr; 320 IFILE *ifp; 321 SEGUSE *sup; 322 323 devvp = VTOI(fs->lfs_ivnode)->i_devvp; 324 325 /* 326 * Get the inode, update times and perms. 327 * DO NOT update disk blocks, we do that separately. 328 */ 329 error = bread(devvp, fsbtodb(fs, offset), fs->lfs_ibsize, cred, &dbp); 330 if (error) { 331 DLOG((DLOG_RF, "update_inoblk: bread returned %d\n", error)); 332 return error; 333 } 334 dip = ((struct ufs1_dinode *)(dbp->b_data)) + INOPB(fs); 335 while (--dip >= (struct ufs1_dinode *)dbp->b_data) { 336 if (dip->di_inumber > LFS_IFILE_INUM) { 337 error = lfs_rf_valloc(fs, dip->di_inumber, dip->di_gen, 338 l, &vp); 339 if (error) { 340 DLOG((DLOG_RF, "update_inoblk: lfs_rf_valloc" 341 " returned %d\n", error)); 342 continue; 343 } 344 ip = VTOI(vp); 345 if (dip->di_size != ip->i_size) 346 lfs_truncate(vp, dip->di_size, 0, NOCRED, l); 347 /* Get mode, link count, size, and times */ 348 memcpy(ip->i_din.ffs1_din, dip, 349 offsetof(struct ufs1_dinode, di_db[0])); 350 351 /* Then the rest, except di_blocks */ 352 ip->i_flags = ip->i_ffs1_flags = dip->di_flags; 353 ip->i_gen = ip->i_ffs1_gen = dip->di_gen; 354 ip->i_uid = ip->i_ffs1_uid = dip->di_uid; 355 ip->i_gid = ip->i_ffs1_gid = dip->di_gid; 356 357 ip->i_mode = ip->i_ffs1_mode; 358 ip->i_nlink = ip->i_ffs_effnlink = ip->i_ffs1_nlink; 359 ip->i_size = ip->i_ffs1_size; 360 361 LFS_SET_UINO(ip, IN_CHANGE | IN_UPDATE); 362 363 /* Re-initialize to get type right */ 364 ufs_vinit(vp->v_mount, lfs_specop_p, lfs_fifoop_p, 365 &vp); 366 vput(vp); 367 368 /* Record change in location */ 369 LFS_IENTRY(ifp, fs, dip->di_inumber, ibp); 370 daddr = ifp->if_daddr; 371 ifp->if_daddr = dbtofsb(fs, dbp->b_blkno); 372 error = LFS_BWRITE_LOG(ibp); /* Ifile */ 373 /* And do segment accounting */ 374 if (dtosn(fs, daddr) != dtosn(fs, dbtofsb(fs, dbp->b_blkno))) { 375 if (daddr > 0) { 376 LFS_SEGENTRY(sup, fs, dtosn(fs, daddr), 377 ibp); 378 sup->su_nbytes -= sizeof (struct ufs1_dinode); 379 LFS_WRITESEGENTRY(sup, fs, 380 dtosn(fs, daddr), 381 ibp); 382 } 383 LFS_SEGENTRY(sup, fs, dtosn(fs, dbtofsb(fs, dbp->b_blkno)), 384 ibp); 385 sup->su_nbytes += sizeof (struct ufs1_dinode); 386 LFS_WRITESEGENTRY(sup, fs, 387 dtosn(fs, dbtofsb(fs, dbp->b_blkno)), 388 ibp); 389 } 390 } 391 } 392 brelse(dbp, BC_AGE); 393 394 return 0; 395 } 396 397 #define CHECK_CKSUM 0x0001 /* Check the checksum to make sure it's valid */ 398 #define CHECK_UPDATE 0x0002 /* Update Ifile for new data blocks / inodes */ 399 400 static daddr_t 401 check_segsum(struct lfs *fs, daddr_t offset, u_int64_t nextserial, 402 kauth_cred_t cred, int flags, int *pseg_flags, struct lwp *l) 403 { 404 struct vnode *devvp; 405 struct buf *bp, *dbp; 406 int error, nblocks = 0, ninos, i, j; /* XXX: gcc */ 407 SEGSUM *ssp; 408 u_long *dp = NULL, *datap = NULL; /* XXX u_int32_t */ 409 daddr_t oldoffset; 410 int32_t *iaddr; /* XXX ondisk32 */ 411 FINFO *fip; 412 SEGUSE *sup; 413 size_t size; 414 415 devvp = VTOI(fs->lfs_ivnode)->i_devvp; 416 /* 417 * If the segment has a superblock and we're at the top 418 * of the segment, skip the superblock. 419 */ 420 if (sntod(fs, dtosn(fs, offset)) == offset) { 421 LFS_SEGENTRY(sup, fs, dtosn(fs, offset), bp); 422 if (sup->su_flags & SEGUSE_SUPERBLOCK) 423 offset += btofsb(fs, LFS_SBPAD); 424 brelse(bp, 0); 425 } 426 427 /* Read in the segment summary */ 428 error = bread(devvp, fsbtodb(fs, offset), fs->lfs_sumsize, cred, &bp); 429 if (error) 430 return -1; 431 432 /* Check summary checksum */ 433 ssp = (SEGSUM *)bp->b_data; 434 if (flags & CHECK_CKSUM) { 435 if (ssp->ss_sumsum != cksum(&ssp->ss_datasum, 436 fs->lfs_sumsize - 437 sizeof(ssp->ss_sumsum))) { 438 DLOG((DLOG_RF, "Sumsum error at 0x%" PRIx64 "\n", offset)); 439 offset = -1; 440 goto err1; 441 } 442 if (ssp->ss_nfinfo == 0 && ssp->ss_ninos == 0) { 443 DLOG((DLOG_RF, "Empty pseg at 0x%" PRIx64 "\n", offset)); 444 offset = -1; 445 goto err1; 446 } 447 if (ssp->ss_create < fs->lfs_tstamp) { 448 DLOG((DLOG_RF, "Old data at 0x%" PRIx64 "\n", offset)); 449 offset = -1; 450 goto err1; 451 } 452 } 453 if (fs->lfs_version > 1) { 454 if (ssp->ss_serial != nextserial) { 455 DLOG((DLOG_RF, "Unexpected serial number at 0x%" PRIx64 456 "\n", offset)); 457 offset = -1; 458 goto err1; 459 } 460 if (ssp->ss_ident != fs->lfs_ident) { 461 DLOG((DLOG_RF, "Incorrect fsid (0x%x vs 0x%x) at 0x%" 462 PRIx64 "\n", ssp->ss_ident, fs->lfs_ident, offset)); 463 offset = -1; 464 goto err1; 465 } 466 } 467 if (pseg_flags) 468 *pseg_flags = ssp->ss_flags; 469 oldoffset = offset; 470 offset += btofsb(fs, fs->lfs_sumsize); 471 472 ninos = howmany(ssp->ss_ninos, INOPB(fs)); 473 /* XXX ondisk32 */ 474 iaddr = (int32_t *)(bp->b_data + fs->lfs_sumsize - sizeof(int32_t)); 475 if (flags & CHECK_CKSUM) { 476 /* Count blocks */ 477 nblocks = 0; 478 fip = (FINFO *)(bp->b_data + SEGSUM_SIZE(fs)); 479 for (i = 0; i < ssp->ss_nfinfo; ++i) { 480 nblocks += fip->fi_nblocks; 481 if (fip->fi_nblocks <= 0) 482 break; 483 /* XXX ondisk32 */ 484 fip = (FINFO *)(((char *)fip) + FINFOSIZE + 485 (fip->fi_nblocks * sizeof(int32_t))); 486 } 487 nblocks += ninos; 488 /* Create the sum array */ 489 datap = dp = (u_long *)malloc(nblocks * sizeof(u_long), 490 M_SEGMENT, M_WAITOK); 491 } 492 493 /* Handle individual blocks */ 494 fip = (FINFO *)(bp->b_data + SEGSUM_SIZE(fs)); 495 for (i = 0; i < ssp->ss_nfinfo || ninos; ++i) { 496 /* Inode block? */ 497 if (ninos && *iaddr == offset) { 498 if (flags & CHECK_CKSUM) { 499 /* Read in the head and add to the buffer */ 500 error = bread(devvp, fsbtodb(fs, offset), fs->lfs_bsize, 501 cred, &dbp); 502 if (error) { 503 offset = -1; 504 goto err2; 505 } 506 (*dp++) = ((u_long *)(dbp->b_data))[0]; 507 brelse(dbp, BC_AGE); 508 } 509 if (flags & CHECK_UPDATE) { 510 if ((error = update_inoblk(fs, offset, cred, l)) 511 != 0) { 512 offset = -1; 513 goto err2; 514 } 515 } 516 offset += btofsb(fs, fs->lfs_ibsize); 517 --iaddr; 518 --ninos; 519 --i; /* compensate */ 520 continue; 521 } 522 size = fs->lfs_bsize; 523 for (j = 0; j < fip->fi_nblocks; ++j) { 524 if (j == fip->fi_nblocks - 1) 525 size = fip->fi_lastlength; 526 if (flags & CHECK_CKSUM) { 527 error = bread(devvp, fsbtodb(fs, offset), size, cred, &dbp); 528 if (error) { 529 offset = -1; 530 goto err2; 531 } 532 (*dp++) = ((u_long *)(dbp->b_data))[0]; 533 brelse(dbp, BC_AGE); 534 } 535 /* Account for and update any direct blocks */ 536 if ((flags & CHECK_UPDATE) && 537 fip->fi_ino > LFS_IFILE_INUM && 538 fip->fi_blocks[j] >= 0) { 539 update_meta(fs, fip->fi_ino, fip->fi_version, 540 fip->fi_blocks[j], offset, size, l); 541 } 542 offset += btofsb(fs, size); 543 } 544 /* XXX ondisk32 */ 545 fip = (FINFO *)(((char *)fip) + FINFOSIZE 546 + fip->fi_nblocks * sizeof(int32_t)); 547 } 548 /* Checksum the array, compare */ 549 if ((flags & CHECK_CKSUM) && 550 ssp->ss_datasum != cksum(datap, nblocks * sizeof(u_long))) 551 { 552 DLOG((DLOG_RF, "Datasum error at 0x%" PRIx64 553 " (wanted %x got %x)\n", 554 offset, ssp->ss_datasum, cksum(datap, nblocks * 555 sizeof(u_long)))); 556 offset = -1; 557 goto err2; 558 } 559 560 /* If we're at the end of the segment, move to the next */ 561 if (dtosn(fs, offset + btofsb(fs, fs->lfs_sumsize + fs->lfs_bsize)) != 562 dtosn(fs, offset)) { 563 if (dtosn(fs, offset) == dtosn(fs, ssp->ss_next)) { 564 offset = -1; 565 goto err2; 566 } 567 offset = ssp->ss_next; 568 DLOG((DLOG_RF, "LFS roll forward: moving to offset 0x%" PRIx64 569 " -> segment %d\n", offset, dtosn(fs,offset))); 570 } 571 572 if (flags & CHECK_UPDATE) { 573 fs->lfs_avail -= (offset - oldoffset); 574 /* Don't clog the buffer queue */ 575 simple_lock(&lfs_subsys_lock); 576 if (locked_queue_count > LFS_MAX_BUFS || 577 locked_queue_bytes > LFS_MAX_BYTES) { 578 lfs_flush(fs, SEGM_CKP, 0); 579 } 580 simple_unlock(&lfs_subsys_lock); 581 } 582 583 err2: 584 if (flags & CHECK_CKSUM) 585 free(datap, M_SEGMENT); 586 err1: 587 bp->b_flags |= B_AGE; 588 brelse(bp); 589 590 /* XXX should we update the serial number even for bad psegs? */ 591 if ((flags & CHECK_UPDATE) && offset > 0 && fs->lfs_version > 1) 592 fs->lfs_serial = nextserial; 593 return offset; 594 } 595 596 void 597 lfs_roll_forward(struct lfs *fs, struct mount *mp, struct lwp *l) 598 { 599 int flags, dirty; 600 daddr_t offset, oldoffset, lastgoodpseg; 601 int sn, curseg, do_rollforward; 602 struct proc *p; 603 kauth_cred_t cred; 604 SEGUSE *sup; 605 struct buf *bp; 606 607 p = l ? l->l_proc : NULL; 608 cred = p ? p->p_cred : NOCRED; 609 610 /* 611 * Roll forward. 612 * 613 * We don't roll forward for v1 filesystems, because 614 * of the danger that the clock was turned back between the last 615 * checkpoint and crash. This would roll forward garbage. 616 * 617 * v2 filesystems don't have this problem because they use a 618 * monotonically increasing serial number instead of a timestamp. 619 */ 620 do_rollforward = (!(fs->lfs_pflags & LFS_PF_CLEAN) && 621 lfs_do_rfw && fs->lfs_version > 1 && p != NULL); 622 if (do_rollforward) { 623 u_int64_t nextserial; 624 /* 625 * Phase I: Find the address of the last good partial 626 * segment that was written after the checkpoint. Mark 627 * the segments in question dirty, so they won't be 628 * reallocated. 629 */ 630 lastgoodpseg = oldoffset = offset = fs->lfs_offset; 631 flags = 0x0; 632 DLOG((DLOG_RF, "LFS roll forward phase 1: start at offset 0x%" 633 PRIx64 "\n", offset)); 634 LFS_SEGENTRY(sup, fs, dtosn(fs, offset), bp); 635 if (!(sup->su_flags & SEGUSE_DIRTY)) 636 --fs->lfs_nclean; 637 sup->su_flags |= SEGUSE_DIRTY; 638 LFS_WRITESEGENTRY(sup, fs, dtosn(fs, offset), bp); 639 nextserial = fs->lfs_serial + 1; 640 while ((offset = check_segsum(fs, offset, nextserial, 641 cred, CHECK_CKSUM, &flags, l)) > 0) { 642 nextserial++; 643 if (sntod(fs, oldoffset) != sntod(fs, offset)) { 644 LFS_SEGENTRY(sup, fs, dtosn(fs, oldoffset), 645 bp); 646 if (!(sup->su_flags & SEGUSE_DIRTY)) 647 --fs->lfs_nclean; 648 sup->su_flags |= SEGUSE_DIRTY; 649 LFS_WRITESEGENTRY(sup, fs, dtosn(fs, oldoffset), 650 bp); 651 } 652 653 DLOG((DLOG_RF, "LFS roll forward phase 1: offset=0x%" 654 PRIx64 "\n", offset)); 655 if (flags & SS_DIROP) { 656 DLOG((DLOG_RF, "lfs_mountfs: dirops at 0x%" 657 PRIx64 "\n", oldoffset)); 658 if (!(flags & SS_CONT)) 659 DLOG((DLOG_RF, "lfs_mountfs: dirops end " 660 "at 0x%" PRIx64 "\n", oldoffset)); 661 } 662 if (!(flags & SS_CONT)) 663 lastgoodpseg = offset; 664 oldoffset = offset; 665 } 666 if (flags & SS_CONT) { 667 DLOG((DLOG_RF, "LFS roll forward: warning: incomplete " 668 "dirops discarded\n")); 669 } 670 DLOG((DLOG_RF, "LFS roll forward phase 1: completed: " 671 "lastgoodpseg=0x%" PRIx64 "\n", lastgoodpseg)); 672 oldoffset = fs->lfs_offset; 673 if (fs->lfs_offset != lastgoodpseg) { 674 /* Don't overwrite what we're trying to preserve */ 675 offset = fs->lfs_offset; 676 fs->lfs_offset = lastgoodpseg; 677 fs->lfs_curseg = sntod(fs, dtosn(fs, fs->lfs_offset)); 678 for (sn = curseg = dtosn(fs, fs->lfs_curseg);;) { 679 sn = (sn + 1) % fs->lfs_nseg; 680 if (sn == curseg) 681 panic("lfs_mountfs: no clean segments"); 682 LFS_SEGENTRY(sup, fs, sn, bp); 683 dirty = (sup->su_flags & SEGUSE_DIRTY); 684 brelse(bp, 0); 685 if (!dirty) 686 break; 687 } 688 fs->lfs_nextseg = sntod(fs, sn); 689 690 /* 691 * Phase II: Roll forward from the first superblock. 692 */ 693 while (offset != lastgoodpseg) { 694 DLOG((DLOG_RF, "LFS roll forward phase 2: 0x%" 695 PRIx64 "\n", offset)); 696 offset = check_segsum(fs, offset, 697 fs->lfs_serial + 1, cred, CHECK_UPDATE, 698 NULL, l); 699 } 700 701 /* 702 * Finish: flush our changes to disk. 703 */ 704 lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC); 705 DLOG((DLOG_RF, "lfs_mountfs: roll forward ", 706 "recovered %lld blocks\n", 707 (long long)(lastgoodpseg - oldoffset))); 708 } 709 DLOG((DLOG_RF, "LFS roll forward complete\n")); 710 } 711 } 712 #endif /* LFS_KERNEL_RFW */ 713