1 /* $NetBSD: segwrite.c,v 1.45 2015/10/03 08:28:15 dholland Exp $ */ 2 /*- 3 * Copyright (c) 2003 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Konrad E. Schroder <perseant@hhhh.org>. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 /* 31 * Copyright (c) 1991, 1993 32 * The Regents of the University of California. All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. Neither the name of the University nor the names of its contributors 43 * may be used to endorse or promote products derived from this software 44 * without specific prior written permission. 45 * 46 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 47 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 48 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 49 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 50 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 51 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 52 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 54 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 55 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 56 * SUCH DAMAGE. 57 * 58 * @(#)lfs_segment.c 8.10 (Berkeley) 6/10/95 59 */ 60 61 /* 62 * Partial segment writer, taken from the kernel and adapted for userland. 63 */ 64 #include <sys/types.h> 65 #include <sys/param.h> 66 #include <sys/time.h> 67 #include <sys/buf.h> 68 #include <sys/mount.h> 69 70 /* Override certain things to make <ufs/lfs/lfs.h> work */ 71 #define VU_DIROP 0x01000000 /* XXX XXX from sys/vnode.h */ 72 #define vnode uvnode 73 #define buf ubuf 74 #define panic call_panic 75 76 #include <ufs/lfs/lfs.h> 77 #include <ufs/lfs/lfs_accessors.h> 78 #include <ufs/lfs/lfs_inode.h> 79 80 #include <assert.h> 81 #include <stdio.h> 82 #include <stdlib.h> 83 #include <string.h> 84 #include <err.h> 85 #include <errno.h> 86 #include <util.h> 87 88 #include "bufcache.h" 89 #include "vnode.h" 90 #include "lfs_user.h" 91 #include "segwrite.h" 92 93 /* Compatibility definitions */ 94 extern off_t locked_queue_bytes; 95 int locked_queue_count; 96 off_t written_bytes = 0; 97 off_t written_data = 0; 98 off_t written_indir = 0; 99 off_t written_dev = 0; 100 int written_inodes = 0; 101 102 /* Global variables */ 103 time_t write_time; 104 105 extern u_int32_t cksum(void *, size_t); 106 extern u_int32_t lfs_sb_cksum(struct lfs *); 107 extern int preen; 108 109 static void lfs_shellsort(struct lfs *, 110 struct ubuf **, union lfs_blocks *, int, int); 111 112 /* 113 * Logical block number match routines used when traversing the dirty block 114 * chain. 115 */ 116 int 117 lfs_match_data(struct lfs * fs, struct ubuf * bp) 118 { 119 return (bp->b_lblkno >= 0); 120 } 121 122 int 123 lfs_match_indir(struct lfs * fs, struct ubuf * bp) 124 { 125 daddr_t lbn; 126 127 lbn = bp->b_lblkno; 128 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 0); 129 } 130 131 int 132 lfs_match_dindir(struct lfs * fs, struct ubuf * bp) 133 { 134 daddr_t lbn; 135 136 lbn = bp->b_lblkno; 137 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 1); 138 } 139 140 int 141 lfs_match_tindir(struct lfs * fs, struct ubuf * bp) 142 { 143 daddr_t lbn; 144 145 lbn = bp->b_lblkno; 146 return (lbn < 0 && (-lbn - ULFS_NDADDR) % LFS_NINDIR(fs) == 2); 147 } 148 149 /* 150 * Do a checkpoint. 151 */ 152 int 153 lfs_segwrite(struct lfs * fs, int flags) 154 { 155 struct inode *ip; 156 struct segment *sp; 157 struct uvnode *vp; 158 SEGSUM *ssp; 159 int redo; 160 161 lfs_seglock(fs, flags | SEGM_CKP); 162 sp = fs->lfs_sp; 163 164 lfs_writevnodes(fs, sp, VN_REG); 165 lfs_writevnodes(fs, sp, VN_DIROP); 166 ssp = (SEGSUM *)sp->segsum; 167 lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) & ~(SS_CONT)); 168 169 do { 170 vp = fs->lfs_ivnode; 171 fs->lfs_flags &= ~LFS_IFDIRTY; 172 ip = VTOI(vp); 173 if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL || lfs_sb_getidaddr(fs) <= 0) 174 lfs_writefile(fs, sp, vp); 175 176 redo = lfs_writeinode(fs, sp, ip); 177 redo += lfs_writeseg(fs, sp); 178 redo += (fs->lfs_flags & LFS_IFDIRTY); 179 } while (redo); 180 181 lfs_segunlock(fs); 182 #if 0 183 printf("wrote %" PRId64 " bytes (%" PRId32 " fsb)\n", 184 written_bytes, (ulfs_daddr_t)lfs_btofsb(fs, written_bytes)); 185 printf("wrote %" PRId64 " bytes data (%" PRId32 " fsb)\n", 186 written_data, (ulfs_daddr_t)lfs_btofsb(fs, written_data)); 187 printf("wrote %" PRId64 " bytes indir (%" PRId32 " fsb)\n", 188 written_indir, (ulfs_daddr_t)lfs_btofsb(fs, written_indir)); 189 printf("wrote %" PRId64 " bytes dev (%" PRId32 " fsb)\n", 190 written_dev, (ulfs_daddr_t)lfs_btofsb(fs, written_dev)); 191 printf("wrote %d inodes (%" PRId32 " fsb)\n", 192 written_inodes, lfs_btofsb(fs, written_inodes * fs->lfs_ibsize)); 193 #endif 194 return 0; 195 } 196 197 /* 198 * Write the dirty blocks associated with a vnode. 199 */ 200 void 201 lfs_writefile(struct lfs * fs, struct segment * sp, struct uvnode * vp) 202 { 203 struct ubuf *bp; 204 FINFO *fip; 205 struct inode *ip; 206 IFILE *ifp; 207 SEGSUM *ssp; 208 209 ip = VTOI(vp); 210 211 if (sp->seg_bytes_left < lfs_sb_getbsize(fs) || 212 sp->sum_bytes_left < FINFOSIZE(fs) + LFS_BLKPTRSIZE(fs)) 213 (void) lfs_writeseg(fs, sp); 214 215 sp->sum_bytes_left -= FINFOSIZE(fs); 216 ssp = (SEGSUM *)sp->segsum; 217 lfs_ss_setnfinfo(fs, ssp, lfs_ss_getnfinfo(fs, ssp) + 1); 218 219 if (vp->v_uflag & VU_DIROP) { 220 lfs_ss_setflags(fs, ssp, 221 lfs_ss_getflags(fs, ssp) | (SS_DIROP | SS_CONT)); 222 } 223 224 fip = sp->fip; 225 lfs_fi_setnblocks(fs, fip, 0); 226 lfs_fi_setino(fs, fip, ip->i_number); 227 LFS_IENTRY(ifp, fs, lfs_fi_getino(fs, fip), bp); 228 lfs_fi_setversion(fs, fip, lfs_if_getversion(fs, ifp)); 229 brelse(bp, 0); 230 231 lfs_gather(fs, sp, vp, lfs_match_data); 232 lfs_gather(fs, sp, vp, lfs_match_indir); 233 lfs_gather(fs, sp, vp, lfs_match_dindir); 234 lfs_gather(fs, sp, vp, lfs_match_tindir); 235 236 fip = sp->fip; 237 if (lfs_fi_getnblocks(fs, fip) != 0) { 238 sp->fip = NEXT_FINFO(fs, fip); 239 lfs_blocks_fromfinfo(fs, &sp->start_lbp, sp->fip); 240 } else { 241 /* XXX shouldn't this update sp->fip? */ 242 sp->sum_bytes_left += FINFOSIZE(fs); 243 lfs_ss_setnfinfo(fs, ssp, lfs_ss_getnfinfo(fs, ssp) - 1); 244 } 245 } 246 247 int 248 lfs_writeinode(struct lfs * fs, struct segment * sp, struct inode * ip) 249 { 250 struct ubuf *bp, *ibp; 251 union lfs_dinode *cdp; 252 IFILE *ifp; 253 SEGUSE *sup; 254 SEGSUM *ssp; 255 daddr_t daddr; 256 ino_t ino; 257 IINFO *iip; 258 int i, fsb = 0; 259 int redo_ifile = 0; 260 struct timespec ts; 261 int gotblk = 0; 262 263 /* Allocate a new inode block if necessary. */ 264 if ((ip->i_number != LFS_IFILE_INUM || sp->idp == NULL) && 265 sp->ibp == NULL) { 266 /* Allocate a new segment if necessary. */ 267 if (sp->seg_bytes_left < lfs_sb_getibsize(fs) || 268 sp->sum_bytes_left < LFS_BLKPTRSIZE(fs)) 269 (void) lfs_writeseg(fs, sp); 270 271 /* Get next inode block. */ 272 daddr = lfs_sb_getoffset(fs); 273 lfs_sb_addoffset(fs, lfs_btofsb(fs, lfs_sb_getibsize(fs))); 274 sp->ibp = *sp->cbpp++ = 275 getblk(fs->lfs_devvp, LFS_FSBTODB(fs, daddr), 276 lfs_sb_getibsize(fs)); 277 sp->ibp->b_flags |= B_GATHERED; 278 gotblk++; 279 280 /* Zero out inode numbers */ 281 for (i = 0; i < LFS_INOPB(fs); ++i) { 282 union lfs_dinode *tmpdip; 283 284 tmpdip = DINO_IN_BLOCK(fs, sp->ibp->b_data, i); 285 lfs_dino_setinumber(fs, tmpdip, 0); 286 } 287 288 ++sp->start_bpp; 289 lfs_sb_subavail(fs, lfs_btofsb(fs, lfs_sb_getibsize(fs))); 290 /* Set remaining space counters. */ 291 sp->seg_bytes_left -= lfs_sb_getibsize(fs); 292 sp->sum_bytes_left -= LFS_BLKPTRSIZE(fs); 293 294 /* Store the address in the segment summary. */ 295 iip = NTH_IINFO(fs, sp->segsum, sp->ninodes / LFS_INOPB(fs)); 296 lfs_ii_setblock(fs, iip, daddr); 297 } 298 /* Update the inode times and copy the inode onto the inode page. */ 299 ts.tv_nsec = 0; 300 ts.tv_sec = write_time; 301 /* XXX kludge --- don't redirty the ifile just to put times on it */ 302 if (ip->i_number != LFS_IFILE_INUM) 303 LFS_ITIMES(ip, &ts, &ts, &ts); 304 305 /* 306 * If this is the Ifile, and we've already written the Ifile in this 307 * partial segment, just overwrite it (it's not on disk yet) and 308 * continue. 309 * 310 * XXX we know that the bp that we get the second time around has 311 * already been gathered. 312 */ 313 if (ip->i_number == LFS_IFILE_INUM && sp->idp) { 314 lfs_copy_dinode(fs, sp->idp, ip->i_din); 315 ip->i_lfs_osize = lfs_dino_getsize(fs, ip->i_din); 316 return 0; 317 } 318 bp = sp->ibp; 319 cdp = DINO_IN_BLOCK(fs, bp->b_data, sp->ninodes % LFS_INOPB(fs)); 320 lfs_copy_dinode(fs, cdp, ip->i_din); 321 322 /* If all blocks are goig to disk, update the "size on disk" */ 323 ip->i_lfs_osize = lfs_dino_getsize(fs, ip->i_din); 324 325 if (ip->i_number == LFS_IFILE_INUM) /* We know sp->idp == NULL */ 326 sp->idp = DINO_IN_BLOCK(fs, bp->b_data, sp->ninodes % LFS_INOPB(fs)); 327 if (gotblk) { 328 LFS_LOCK_BUF(bp); 329 assert(!(bp->b_flags & B_INVAL)); 330 brelse(bp, 0); 331 } 332 /* Increment inode count in segment summary block. */ 333 ssp = (SEGSUM *)sp->segsum; 334 lfs_ss_setninos(fs, ssp, lfs_ss_getninos(fs, ssp) + 1); 335 336 /* If this page is full, set flag to allocate a new page. */ 337 if (++sp->ninodes % LFS_INOPB(fs) == 0) 338 sp->ibp = NULL; 339 340 /* 341 * If updating the ifile, update the super-block. Update the disk 342 * address for this inode in the ifile. 343 */ 344 ino = ip->i_number; 345 if (ino == LFS_IFILE_INUM) { 346 daddr = lfs_sb_getidaddr(fs); 347 lfs_sb_setidaddr(fs, LFS_DBTOFSB(fs, bp->b_blkno)); 348 sbdirty(); 349 } else { 350 LFS_IENTRY(ifp, fs, ino, ibp); 351 daddr = lfs_if_getdaddr(fs, ifp); 352 lfs_if_setdaddr(fs, ifp, LFS_DBTOFSB(fs, bp->b_blkno) + fsb); 353 (void)LFS_BWRITE_LOG(ibp); /* Ifile */ 354 } 355 356 /* 357 * Account the inode: it no longer belongs to its former segment, 358 * though it will not belong to the new segment until that segment 359 * is actually written. 360 */ 361 if (daddr != LFS_UNUSED_DADDR) { 362 u_int32_t oldsn = lfs_dtosn(fs, daddr); 363 LFS_SEGENTRY(sup, fs, oldsn, bp); 364 sup->su_nbytes -= DINOSIZE(fs); 365 redo_ifile = 366 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 367 if (redo_ifile) 368 fs->lfs_flags |= LFS_IFDIRTY; 369 LFS_WRITESEGENTRY(sup, fs, oldsn, bp); /* Ifile */ 370 } 371 return redo_ifile; 372 } 373 374 int 375 lfs_gatherblock(struct segment * sp, struct ubuf * bp) 376 { 377 struct lfs *fs; 378 SEGSUM *ssp; 379 int version; 380 int j, blksinblk; 381 382 /* 383 * If full, finish this segment. We may be doing I/O, so 384 * release and reacquire the splbio(). 385 */ 386 fs = sp->fs; 387 blksinblk = howmany(bp->b_bcount, lfs_sb_getbsize(fs)); 388 if (sp->sum_bytes_left < LFS_BLKPTRSIZE(fs) * blksinblk || 389 sp->seg_bytes_left < bp->b_bcount) { 390 lfs_updatemeta(sp); 391 392 version = lfs_fi_getversion(fs, sp->fip); 393 (void) lfs_writeseg(fs, sp); 394 395 lfs_fi_setversion(fs, sp->fip, version); 396 lfs_fi_setino(fs, sp->fip, VTOI(sp->vp)->i_number); 397 /* Add the current file to the segment summary. */ 398 ssp = (SEGSUM *)sp->segsum; 399 lfs_ss_setnfinfo(fs, ssp, lfs_ss_getnfinfo(fs, ssp) + 1); 400 sp->sum_bytes_left -= FINFOSIZE(fs); 401 402 return 1; 403 } 404 /* Insert into the buffer list, update the FINFO block. */ 405 bp->b_flags |= B_GATHERED; 406 /* bp->b_flags &= ~B_DONE; */ 407 408 *sp->cbpp++ = bp; 409 for (j = 0; j < blksinblk; j++) { 410 unsigned bn; 411 412 bn = lfs_fi_getnblocks(fs, sp->fip); 413 lfs_fi_setnblocks(fs, sp->fip, bn + 1); 414 lfs_fi_setblock(fs, sp->fip, bn, bp->b_lblkno + j);; 415 } 416 417 sp->sum_bytes_left -= LFS_BLKPTRSIZE(fs) * blksinblk; 418 sp->seg_bytes_left -= bp->b_bcount; 419 return 0; 420 } 421 422 int 423 lfs_gather(struct lfs * fs, struct segment * sp, struct uvnode * vp, int (*match) (struct lfs *, struct ubuf *)) 424 { 425 struct ubuf *bp, *nbp; 426 int count = 0; 427 428 sp->vp = vp; 429 loop: 430 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 431 nbp = LIST_NEXT(bp, b_vnbufs); 432 433 assert(bp->b_flags & B_DELWRI); 434 if ((bp->b_flags & (B_BUSY | B_GATHERED)) || !match(fs, bp)) { 435 continue; 436 } 437 if (lfs_gatherblock(sp, bp)) { 438 goto loop; 439 } 440 count++; 441 } 442 443 lfs_updatemeta(sp); 444 sp->vp = NULL; 445 return count; 446 } 447 448 449 /* 450 * Change the given block's address to ndaddr, finding its previous 451 * location using ulfs_bmaparray(). 452 * 453 * Account for this change in the segment table. 454 */ 455 static void 456 lfs_update_single(struct lfs * fs, struct segment * sp, daddr_t lbn, 457 daddr_t ndaddr, int size) 458 { 459 SEGUSE *sup; 460 struct ubuf *bp; 461 struct indir a[ULFS_NIADDR + 2], *ap; 462 struct inode *ip; 463 struct uvnode *vp; 464 daddr_t daddr, ooff; 465 int num, error; 466 int osize; 467 int frags, ofrags; 468 469 vp = sp->vp; 470 ip = VTOI(vp); 471 472 error = ulfs_bmaparray(fs, vp, lbn, &daddr, a, &num); 473 if (error) 474 errx(EXIT_FAILURE, "%s: ulfs_bmaparray returned %d looking up lbn %" 475 PRId64 "", __func__, error, lbn); 476 if (daddr > 0) 477 daddr = LFS_DBTOFSB(fs, daddr); 478 479 frags = lfs_numfrags(fs, size); 480 switch (num) { 481 case 0: 482 ooff = lfs_dino_getdb(fs, ip->i_din, lbn); 483 if (ooff == UNWRITTEN) 484 lfs_dino_setblocks(fs, ip->i_din, 485 lfs_dino_getblocks(fs, ip->i_din) + frags); 486 else { 487 /* possible fragment truncation or extension */ 488 ofrags = lfs_btofsb(fs, ip->i_lfs_fragsize[lbn]); 489 lfs_dino_setblocks(fs, ip->i_din, 490 lfs_dino_getblocks(fs, ip->i_din) + (frags - ofrags)); 491 } 492 lfs_dino_setdb(fs, ip->i_din, lbn, ndaddr); 493 break; 494 case 1: 495 ooff = lfs_dino_getib(fs, ip->i_din, a[0].in_off); 496 if (ooff == UNWRITTEN) 497 lfs_dino_setblocks(fs, ip->i_din, 498 lfs_dino_getblocks(fs, ip->i_din) + frags); 499 lfs_dino_setib(fs, ip->i_din, a[0].in_off, ndaddr); 500 break; 501 default: 502 ap = &a[num - 1]; 503 if (bread(vp, ap->in_lbn, lfs_sb_getbsize(fs), 0, &bp)) 504 errx(EXIT_FAILURE, "%s: bread bno %" PRId64, __func__, 505 ap->in_lbn); 506 507 ooff = lfs_iblock_get(fs, bp->b_data, ap->in_off); 508 if (ooff == UNWRITTEN) 509 lfs_dino_setblocks(fs, ip->i_din, 510 lfs_dino_getblocks(fs, ip->i_din) + frags); 511 lfs_iblock_set(fs, bp->b_data, ap->in_off, ndaddr); 512 (void) VOP_BWRITE(bp); 513 } 514 515 /* 516 * Update segment usage information, based on old size 517 * and location. 518 */ 519 if (daddr > 0) { 520 u_int32_t oldsn = lfs_dtosn(fs, daddr); 521 if (lbn >= 0 && lbn < ULFS_NDADDR) 522 osize = ip->i_lfs_fragsize[lbn]; 523 else 524 osize = lfs_sb_getbsize(fs); 525 LFS_SEGENTRY(sup, fs, oldsn, bp); 526 sup->su_nbytes -= osize; 527 if (!(bp->b_flags & B_GATHERED)) 528 fs->lfs_flags |= LFS_IFDIRTY; 529 LFS_WRITESEGENTRY(sup, fs, oldsn, bp); 530 } 531 /* 532 * Now that this block has a new address, and its old 533 * segment no longer owns it, we can forget about its 534 * old size. 535 */ 536 if (lbn >= 0 && lbn < ULFS_NDADDR) 537 ip->i_lfs_fragsize[lbn] = size; 538 } 539 540 /* 541 * Update the metadata that points to the blocks listed in the FINFO 542 * array. 543 */ 544 void 545 lfs_updatemeta(struct segment * sp) 546 { 547 struct ubuf *sbp; 548 struct lfs *fs; 549 struct uvnode *vp; 550 daddr_t lbn; 551 int i, nblocks, num; 552 int frags; 553 int bytesleft, size; 554 union lfs_blocks tmpptr; 555 556 fs = sp->fs; 557 vp = sp->vp; 558 559 /* 560 * This code was cutpasted from the kernel. See the 561 * corresponding comment in lfs_segment.c. 562 */ 563 #if 0 564 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp; 565 #else 566 lfs_blocks_fromvoid(fs, &tmpptr, (void *)NEXT_FINFO(fs, sp->fip)); 567 nblocks = lfs_blocks_sub(fs, &tmpptr, &sp->start_lbp); 568 //nblocks_orig = nblocks; 569 #endif 570 571 if (vp == NULL || nblocks == 0) 572 return; 573 574 /* 575 * This count may be high due to oversize blocks from lfs_gop_write. 576 * Correct for this. (XXX we should be able to keep track of these.) 577 */ 578 for (i = 0; i < nblocks; i++) { 579 if (sp->start_bpp[i] == NULL) { 580 printf("nblocks = %d, not %d\n", i, nblocks); 581 nblocks = i; 582 break; 583 } 584 num = howmany(sp->start_bpp[i]->b_bcount, lfs_sb_getbsize(fs)); 585 nblocks -= num - 1; 586 } 587 588 /* 589 * Sort the blocks. 590 */ 591 lfs_shellsort(fs, sp->start_bpp, &sp->start_lbp, nblocks, lfs_sb_getbsize(fs)); 592 593 /* 594 * Record the length of the last block in case it's a fragment. 595 * If there are indirect blocks present, they sort last. An 596 * indirect block will be lfs_bsize and its presence indicates 597 * that you cannot have fragments. 598 */ 599 lfs_fi_setlastlength(fs, sp->fip, ((sp->start_bpp[nblocks - 1]->b_bcount - 1) & 600 lfs_sb_getbmask(fs)) + 1); 601 602 /* 603 * Assign disk addresses, and update references to the logical 604 * block and the segment usage information. 605 */ 606 for (i = nblocks; i--; ++sp->start_bpp) { 607 sbp = *sp->start_bpp; 608 lbn = lfs_blocks_get(fs, &sp->start_lbp, 0); 609 610 sbp->b_blkno = LFS_FSBTODB(fs, lfs_sb_getoffset(fs)); 611 612 /* 613 * If we write a frag in the wrong place, the cleaner won't 614 * be able to correctly identify its size later, and the 615 * segment will be uncleanable. (Even worse, it will assume 616 * that the indirect block that actually ends the list 617 * is of a smaller size!) 618 */ 619 if ((sbp->b_bcount & lfs_sb_getbmask(fs)) && i != 0) 620 errx(EXIT_FAILURE, "%s: fragment is not last block", __func__); 621 622 /* 623 * For each subblock in this possibly oversized block, 624 * update its address on disk. 625 */ 626 for (bytesleft = sbp->b_bcount; bytesleft > 0; 627 bytesleft -= lfs_sb_getbsize(fs)) { 628 size = MIN(bytesleft, lfs_sb_getbsize(fs)); 629 frags = lfs_numfrags(fs, size); 630 lbn = lfs_blocks_get(fs, &sp->start_lbp, 0); 631 lfs_blocks_inc(fs, &sp->start_lbp); 632 lfs_update_single(fs, sp, lbn, lfs_sb_getoffset(fs), size); 633 lfs_sb_addoffset(fs, frags); 634 } 635 636 } 637 } 638 639 /* 640 * Start a new segment. 641 */ 642 int 643 lfs_initseg(struct lfs * fs) 644 { 645 struct segment *sp; 646 SEGUSE *sup; 647 SEGSUM *ssp; 648 struct ubuf *bp, *sbp; 649 int repeat; 650 651 sp = fs->lfs_sp; 652 653 repeat = 0; 654 655 /* Advance to the next segment. */ 656 if (!LFS_PARTIAL_FITS(fs)) { 657 /* lfs_avail eats the remaining space */ 658 lfs_sb_subavail(fs, lfs_sb_getfsbpseg(fs) - (lfs_sb_getoffset(fs) - 659 lfs_sb_getcurseg(fs))); 660 lfs_newseg(fs); 661 repeat = 1; 662 lfs_sb_setoffset(fs, lfs_sb_getcurseg(fs)); 663 664 sp->seg_number = lfs_dtosn(fs, lfs_sb_getcurseg(fs)); 665 sp->seg_bytes_left = lfs_fsbtob(fs, lfs_sb_getfsbpseg(fs)); 666 667 /* 668 * If the segment contains a superblock, update the offset 669 * and summary address to skip over it. 670 */ 671 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 672 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 673 lfs_sb_addoffset(fs, lfs_btofsb(fs, LFS_SBPAD)); 674 sp->seg_bytes_left -= LFS_SBPAD; 675 } 676 brelse(bp, 0); 677 /* Segment zero could also contain the labelpad */ 678 if (lfs_sb_getversion(fs) > 1 && sp->seg_number == 0 && 679 lfs_sb_gets0addr(fs) < lfs_btofsb(fs, LFS_LABELPAD)) { 680 lfs_sb_addoffset(fs, lfs_btofsb(fs, LFS_LABELPAD) - lfs_sb_gets0addr(fs)); 681 sp->seg_bytes_left -= LFS_LABELPAD - lfs_fsbtob(fs, lfs_sb_gets0addr(fs)); 682 } 683 } else { 684 sp->seg_number = lfs_dtosn(fs, lfs_sb_getcurseg(fs)); 685 sp->seg_bytes_left = lfs_fsbtob(fs, lfs_sb_getfsbpseg(fs) - 686 (lfs_sb_getoffset(fs) - lfs_sb_getcurseg(fs))); 687 } 688 lfs_sb_setlastpseg(fs, lfs_sb_getoffset(fs)); 689 690 sp->fs = fs; 691 sp->ibp = NULL; 692 sp->idp = NULL; 693 sp->ninodes = 0; 694 sp->ndupino = 0; 695 696 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 697 sp->cbpp = sp->bpp; 698 sbp = *sp->cbpp = getblk(fs->lfs_devvp, 699 LFS_FSBTODB(fs, lfs_sb_getoffset(fs)), lfs_sb_getsumsize(fs)); 700 sp->segsum = sbp->b_data; 701 memset(sp->segsum, 0, lfs_sb_getsumsize(fs)); 702 sp->start_bpp = ++sp->cbpp; 703 lfs_sb_addoffset(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs))); 704 705 /* Set point to SEGSUM, initialize it. */ 706 ssp = sp->segsum; 707 lfs_ss_setnext(fs, ssp, lfs_sb_getnextseg(fs)); 708 lfs_ss_setnfinfo(fs, ssp, 0); 709 lfs_ss_setninos(fs, ssp, 0); 710 lfs_ss_setmagic(fs, ssp, SS_MAGIC); 711 712 /* Set pointer to first FINFO, initialize it. */ 713 sp->fip = SEGSUM_FINFOBASE(fs, ssp); 714 lfs_fi_setnblocks(fs, sp->fip, 0); 715 lfs_blocks_fromfinfo(fs, &sp->start_lbp, sp->fip); 716 lfs_fi_setlastlength(fs, sp->fip, 0); 717 718 sp->seg_bytes_left -= lfs_sb_getsumsize(fs); 719 sp->sum_bytes_left = lfs_sb_getsumsize(fs) - SEGSUM_SIZE(fs); 720 721 LFS_LOCK_BUF(sbp); 722 brelse(sbp, 0); 723 return repeat; 724 } 725 726 /* 727 * Return the next segment to write. 728 */ 729 void 730 lfs_newseg(struct lfs * fs) 731 { 732 CLEANERINFO *cip; 733 SEGUSE *sup; 734 struct ubuf *bp; 735 int curseg, isdirty, sn; 736 737 LFS_SEGENTRY(sup, fs, lfs_dtosn(fs, lfs_sb_getnextseg(fs)), bp); 738 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 739 sup->su_nbytes = 0; 740 sup->su_nsums = 0; 741 sup->su_ninos = 0; 742 LFS_WRITESEGENTRY(sup, fs, lfs_dtosn(fs, lfs_sb_getnextseg(fs)), bp); 743 744 LFS_CLEANERINFO(cip, fs, bp); 745 lfs_ci_shiftcleantodirty(fs, cip, 1); 746 lfs_sb_setnclean(fs, lfs_ci_getclean(fs, cip)); 747 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 748 749 lfs_sb_setlastseg(fs, lfs_sb_getcurseg(fs)); 750 lfs_sb_setcurseg(fs, lfs_sb_getnextseg(fs)); 751 for (sn = curseg = lfs_dtosn(fs, lfs_sb_getcurseg(fs)) + lfs_sb_getinterleave(fs);;) { 752 sn = (sn + 1) % lfs_sb_getnseg(fs); 753 if (sn == curseg) 754 errx(EXIT_FAILURE, "%s: no clean segments", __func__); 755 LFS_SEGENTRY(sup, fs, sn, bp); 756 isdirty = sup->su_flags & SEGUSE_DIRTY; 757 brelse(bp, 0); 758 759 if (!isdirty) 760 break; 761 } 762 763 ++fs->lfs_nactive; 764 lfs_sb_setnextseg(fs, lfs_sntod(fs, sn)); 765 } 766 767 768 int 769 lfs_writeseg(struct lfs * fs, struct segment * sp) 770 { 771 struct ubuf **bpp, *bp; 772 SEGUSE *sup; 773 SEGSUM *ssp; 774 char *datap, *dp; 775 int i; 776 int do_again, nblocks, byteoffset; 777 size_t el_size; 778 u_short ninos; 779 size_t sumstart; 780 struct uvnode *devvp; 781 782 /* 783 * If there are no buffers other than the segment summary to write 784 * and it is not a checkpoint, don't do anything. On a checkpoint, 785 * even if there aren't any buffers, you need to write the superblock. 786 */ 787 nblocks = sp->cbpp - sp->bpp; 788 #if 0 789 printf("write %d blocks at 0x%x\n", 790 nblocks, (int)LFS_DBTOFSB(fs, (*sp->bpp)->b_blkno)); 791 #endif 792 if (nblocks == 1) 793 return 0; 794 795 devvp = fs->lfs_devvp; 796 797 /* Update the segment usage information. */ 798 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 799 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 800 801 /* Loop through all blocks, except the segment summary. */ 802 for (bpp = sp->bpp; ++bpp < sp->cbpp;) { 803 if ((*bpp)->b_vp != devvp) { 804 sup->su_nbytes += (*bpp)->b_bcount; 805 } 806 assert(lfs_dtosn(fs, LFS_DBTOFSB(fs, (*bpp)->b_blkno)) == sp->seg_number); 807 } 808 809 ssp = (SEGSUM *) sp->segsum; 810 lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) | SS_RFW); 811 812 ninos = (lfs_ss_getninos(fs, ssp) + LFS_INOPB(fs) - 1) / LFS_INOPB(fs); 813 sup->su_nbytes += lfs_ss_getninos(fs, ssp) * DINOSIZE(fs); 814 815 if (lfs_sb_getversion(fs) == 1) 816 sup->su_olastmod = write_time; 817 else 818 sup->su_lastmod = write_time; 819 sup->su_ninos += ninos; 820 ++sup->su_nsums; 821 lfs_sb_adddmeta(fs, (lfs_btofsb(fs, lfs_sb_getsumsize(fs)) + lfs_btofsb(fs, ninos * 822 lfs_sb_getibsize(fs)))); 823 lfs_sb_subavail(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs))); 824 825 do_again = !(bp->b_flags & B_GATHERED); 826 LFS_WRITESEGENTRY(sup, fs, sp->seg_number, bp); /* Ifile */ 827 828 /* 829 * Compute checksum across data and then across summary; the first 830 * block (the summary block) is skipped. Set the create time here 831 * so that it's guaranteed to be later than the inode mod times. 832 */ 833 if (lfs_sb_getversion(fs) == 1) 834 el_size = sizeof(u_long); 835 else 836 el_size = sizeof(u_int32_t); 837 datap = dp = emalloc(nblocks * el_size); 838 for (bpp = sp->bpp, i = nblocks - 1; i--;) { 839 ++bpp; 840 /* Loop through gop_write cluster blocks */ 841 for (byteoffset = 0; byteoffset < (*bpp)->b_bcount; 842 byteoffset += lfs_sb_getbsize(fs)) { 843 memcpy(dp, (*bpp)->b_data + byteoffset, el_size); 844 dp += el_size; 845 } 846 bremfree(*bpp); 847 (*bpp)->b_flags |= B_BUSY; 848 } 849 if (lfs_sb_getversion(fs) == 1) 850 lfs_ss_setocreate(fs, ssp, write_time); 851 else { 852 lfs_ss_setcreate(fs, ssp, write_time); 853 lfs_sb_addserial(fs, 1); 854 lfs_ss_setserial(fs, ssp, lfs_sb_getserial(fs)); 855 lfs_ss_setident(fs, ssp, lfs_sb_getident(fs)); 856 } 857 /* Set the summary block busy too */ 858 bremfree(*(sp->bpp)); 859 (*(sp->bpp))->b_flags |= B_BUSY; 860 861 lfs_ss_setdatasum(fs, ssp, cksum(datap, (nblocks - 1) * el_size)); 862 sumstart = lfs_ss_getsumstart(fs); 863 lfs_ss_setsumsum(fs, ssp, 864 cksum((char *)ssp + sumstart, lfs_sb_getsumsize(fs) - sumstart)); 865 free(datap); 866 datap = dp = NULL; 867 lfs_sb_subbfree(fs, (lfs_btofsb(fs, ninos * lfs_sb_getibsize(fs)) + 868 lfs_btofsb(fs, lfs_sb_getsumsize(fs)))); 869 870 if (devvp == NULL) 871 errx(EXIT_FAILURE, "devvp is NULL"); 872 for (bpp = sp->bpp, i = nblocks; i; bpp++, i--) { 873 bp = *bpp; 874 #if 0 875 printf("i = %d, bp = %p, flags %lx, bn = %" PRIx64 "\n", 876 nblocks - i, bp, bp->b_flags, bp->b_blkno); 877 printf(" vp = %p\n", bp->b_vp); 878 if (bp->b_vp != fs->lfs_devvp) 879 printf(" ino = %d lbn = %" PRId64 "\n", 880 VTOI(bp->b_vp)->i_number, bp->b_lblkno); 881 #endif 882 if (bp->b_vp == fs->lfs_devvp) 883 written_dev += bp->b_bcount; 884 else { 885 if (bp->b_lblkno >= 0) 886 written_data += bp->b_bcount; 887 else 888 written_indir += bp->b_bcount; 889 } 890 bp->b_flags &= ~(B_DELWRI | B_READ | B_GATHERED | B_ERROR | 891 B_LOCKED); 892 bwrite(bp); 893 written_bytes += bp->b_bcount; 894 } 895 written_inodes += ninos; 896 897 return (lfs_initseg(fs) || do_again); 898 } 899 900 /* 901 * Our own copy of shellsort. XXX use qsort or heapsort. 902 */ 903 static void 904 lfs_shellsort(struct lfs *fs, 905 struct ubuf ** bp_array, union lfs_blocks *lb_array, int nmemb, int size) 906 { 907 static int __rsshell_increments[] = {4, 1, 0}; 908 int incr, *incrp, t1, t2; 909 struct ubuf *bp_temp; 910 911 for (incrp = __rsshell_increments; (incr = *incrp++) != 0;) 912 for (t1 = incr; t1 < nmemb; ++t1) 913 for (t2 = t1 - incr; t2 >= 0;) 914 if ((u_int32_t) bp_array[t2]->b_lblkno > 915 (u_int32_t) bp_array[t2 + incr]->b_lblkno) { 916 bp_temp = bp_array[t2]; 917 bp_array[t2] = bp_array[t2 + incr]; 918 bp_array[t2 + incr] = bp_temp; 919 t2 -= incr; 920 } else 921 break; 922 923 /* Reform the list of logical blocks */ 924 incr = 0; 925 for (t1 = 0; t1 < nmemb; t1++) { 926 for (t2 = 0; t2 * size < bp_array[t1]->b_bcount; t2++) { 927 lfs_blocks_set(fs, lb_array, incr++, 928 bp_array[t1]->b_lblkno + t2); 929 } 930 } 931 } 932 933 934 /* 935 * lfs_seglock -- 936 * Single thread the segment writer. 937 */ 938 int 939 lfs_seglock(struct lfs * fs, unsigned long flags) 940 { 941 struct segment *sp; 942 size_t allocsize; 943 944 if (fs->lfs_seglock) { 945 ++fs->lfs_seglock; 946 fs->lfs_sp->seg_flags |= flags; 947 return 0; 948 } 949 fs->lfs_seglock = 1; 950 951 sp = fs->lfs_sp = emalloc(sizeof(*sp)); 952 allocsize = lfs_sb_getssize(fs) * sizeof(struct ubuf *); 953 sp->bpp = emalloc(allocsize); 954 if (!sp->bpp) 955 err(!preen, "Could not allocate %zu bytes", allocsize); 956 sp->seg_flags = flags; 957 sp->vp = NULL; 958 sp->seg_iocount = 0; 959 (void) lfs_initseg(fs); 960 961 return 0; 962 } 963 964 /* 965 * lfs_segunlock -- 966 * Single thread the segment writer. 967 */ 968 void 969 lfs_segunlock(struct lfs * fs) 970 { 971 struct segment *sp; 972 struct ubuf *bp; 973 974 sp = fs->lfs_sp; 975 976 if (fs->lfs_seglock == 1) { 977 if (sp->bpp != sp->cbpp) { 978 /* Free allocated segment summary */ 979 lfs_sb_suboffset(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs))); 980 bp = *sp->bpp; 981 bremfree(bp); 982 bp->b_flags |= B_DONE | B_INVAL; 983 bp->b_flags &= ~B_DELWRI; 984 reassignbuf(bp, bp->b_vp); 985 bp->b_flags |= B_BUSY; /* XXX */ 986 brelse(bp, 0); 987 } else 988 printf("unlock to 0 with no summary"); 989 990 free(sp->bpp); 991 sp->bpp = NULL; 992 free(sp); 993 fs->lfs_sp = NULL; 994 995 fs->lfs_nactive = 0; 996 997 /* Since we *know* everything's on disk, write both sbs */ 998 lfs_writesuper(fs, lfs_sb_getsboff(fs, 0)); 999 lfs_writesuper(fs, lfs_sb_getsboff(fs, 1)); 1000 1001 --fs->lfs_seglock; 1002 fs->lfs_lockpid = 0; 1003 } else if (fs->lfs_seglock == 0) { 1004 errx(EXIT_FAILURE, "Seglock not held"); 1005 } else { 1006 --fs->lfs_seglock; 1007 } 1008 } 1009 1010 int 1011 lfs_writevnodes(struct lfs *fs, struct segment *sp, int op) 1012 { 1013 struct inode *ip; 1014 struct uvnode *vp; 1015 int inodes_written = 0; 1016 1017 LIST_FOREACH(vp, &vnodelist, v_mntvnodes) { 1018 if (vp->v_bmap_op != lfs_vop_bmap) 1019 continue; 1020 1021 ip = VTOI(vp); 1022 1023 if ((op == VN_DIROP && !(vp->v_uflag & VU_DIROP)) || 1024 (op != VN_DIROP && (vp->v_uflag & VU_DIROP))) { 1025 continue; 1026 } 1027 /* 1028 * Write the inode/file if dirty and it's not the IFILE. 1029 */ 1030 if (ip->i_flag & IN_ALLMOD || !LIST_EMPTY(&vp->v_dirtyblkhd)) { 1031 if (ip->i_number != LFS_IFILE_INUM) 1032 lfs_writefile(fs, sp, vp); 1033 (void) lfs_writeinode(fs, sp, ip); 1034 inodes_written++; 1035 } 1036 } 1037 return inodes_written; 1038 } 1039 1040 void 1041 lfs_writesuper(struct lfs *fs, daddr_t daddr) 1042 { 1043 struct ubuf *bp; 1044 1045 /* Set timestamp of this version of the superblock */ 1046 if (lfs_sb_getversion(fs) == 1) 1047 lfs_sb_setotstamp(fs, write_time); 1048 lfs_sb_settstamp(fs, write_time); 1049 1050 __CTASSERT(sizeof(struct dlfs) == sizeof(struct dlfs64)); 1051 1052 /* Checksum the superblock and copy it into a buffer. */ 1053 lfs_sb_setcksum(fs, lfs_sb_cksum(fs)); 1054 assert(daddr > 0); 1055 bp = getblk(fs->lfs_devvp, LFS_FSBTODB(fs, daddr), LFS_SBPAD); 1056 memcpy(bp->b_data, &fs->lfs_dlfs_u, sizeof(struct dlfs)); 1057 memset(bp->b_data + sizeof(struct dlfs), 0, 1058 LFS_SBPAD - sizeof(struct dlfs)); 1059 1060 bwrite(bp); 1061 } 1062