1 /* $NetBSD: segwrite.c,v 1.17 2007/10/10 20:42:20 ad Exp $ */ 2 /*- 3 * Copyright (c) 2003 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Konrad E. Schroder <perseant@hhhh.org>. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 /* 38 * Copyright (c) 1991, 1993 39 * The Regents of the University of California. All rights reserved. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)lfs_segment.c 8.10 (Berkeley) 6/10/95 66 */ 67 68 /* 69 * Partial segment writer, taken from the kernel and adapted for userland. 70 */ 71 #include <sys/types.h> 72 #include <sys/param.h> 73 #include <sys/time.h> 74 #include <sys/buf.h> 75 #include <sys/mount.h> 76 77 #include <ufs/ufs/inode.h> 78 #include <ufs/ufs/ufsmount.h> 79 80 /* Override certain things to make <ufs/lfs/lfs.h> work */ 81 #define vnode uvnode 82 #define buf ubuf 83 #define panic call_panic 84 85 #include <ufs/lfs/lfs.h> 86 87 #include <assert.h> 88 #include <stdio.h> 89 #include <stdlib.h> 90 #include <string.h> 91 #include <err.h> 92 #include <errno.h> 93 #include <util.h> 94 95 #include "bufcache.h" 96 #include "vnode.h" 97 #include "lfs_user.h" 98 #include "segwrite.h" 99 100 /* Compatibility definitions */ 101 extern off_t locked_queue_bytes; 102 int locked_queue_count; 103 off_t written_bytes = 0; 104 off_t written_data = 0; 105 off_t written_indir = 0; 106 off_t written_dev = 0; 107 int written_inodes = 0; 108 109 /* Global variables */ 110 time_t write_time; 111 112 extern u_int32_t cksum(void *, size_t); 113 extern u_int32_t lfs_sb_cksum(struct dlfs *); 114 extern int preen; 115 116 /* 117 * Logical block number match routines used when traversing the dirty block 118 * chain. 119 */ 120 int 121 lfs_match_data(struct lfs * fs, struct ubuf * bp) 122 { 123 return (bp->b_lblkno >= 0); 124 } 125 126 int 127 lfs_match_indir(struct lfs * fs, struct ubuf * bp) 128 { 129 daddr_t lbn; 130 131 lbn = bp->b_lblkno; 132 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 133 } 134 135 int 136 lfs_match_dindir(struct lfs * fs, struct ubuf * bp) 137 { 138 daddr_t lbn; 139 140 lbn = bp->b_lblkno; 141 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 142 } 143 144 int 145 lfs_match_tindir(struct lfs * fs, struct ubuf * bp) 146 { 147 daddr_t lbn; 148 149 lbn = bp->b_lblkno; 150 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 151 } 152 153 /* 154 * Do a checkpoint. 155 */ 156 int 157 lfs_segwrite(struct lfs * fs, int flags) 158 { 159 struct inode *ip; 160 struct segment *sp; 161 struct uvnode *vp; 162 int redo; 163 164 lfs_seglock(fs, flags | SEGM_CKP); 165 sp = fs->lfs_sp; 166 167 lfs_writevnodes(fs, sp, VN_REG); 168 lfs_writevnodes(fs, sp, VN_DIROP); 169 ((SEGSUM *) (sp->segsum))->ss_flags &= ~(SS_CONT); 170 171 do { 172 vp = fs->lfs_ivnode; 173 fs->lfs_flags &= ~LFS_IFDIRTY; 174 ip = VTOI(vp); 175 if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL || fs->lfs_idaddr <= 0) 176 lfs_writefile(fs, sp, vp); 177 178 redo = lfs_writeinode(fs, sp, ip); 179 redo += lfs_writeseg(fs, sp); 180 redo += (fs->lfs_flags & LFS_IFDIRTY); 181 } while (redo); 182 183 lfs_segunlock(fs); 184 #if 0 185 printf("wrote %" PRId64 " bytes (%" PRId32 " fsb)\n", 186 written_bytes, (ufs_daddr_t)btofsb(fs, written_bytes)); 187 printf("wrote %" PRId64 " bytes data (%" PRId32 " fsb)\n", 188 written_data, (ufs_daddr_t)btofsb(fs, written_data)); 189 printf("wrote %" PRId64 " bytes indir (%" PRId32 " fsb)\n", 190 written_indir, (ufs_daddr_t)btofsb(fs, written_indir)); 191 printf("wrote %" PRId64 " bytes dev (%" PRId32 " fsb)\n", 192 written_dev, (ufs_daddr_t)btofsb(fs, written_dev)); 193 printf("wrote %d inodes (%" PRId32 " fsb)\n", 194 written_inodes, btofsb(fs, written_inodes * fs->lfs_ibsize)); 195 #endif 196 return 0; 197 } 198 199 /* 200 * Write the dirty blocks associated with a vnode. 201 */ 202 void 203 lfs_writefile(struct lfs * fs, struct segment * sp, struct uvnode * vp) 204 { 205 struct ubuf *bp; 206 struct finfo *fip; 207 struct inode *ip; 208 IFILE *ifp; 209 210 ip = VTOI(vp); 211 212 if (sp->seg_bytes_left < fs->lfs_bsize || 213 sp->sum_bytes_left < sizeof(struct finfo)) 214 (void) lfs_writeseg(fs, sp); 215 216 sp->sum_bytes_left -= FINFOSIZE; 217 ++((SEGSUM *) (sp->segsum))->ss_nfinfo; 218 219 if (vp->v_uflag & VU_DIROP) 220 ((SEGSUM *) (sp->segsum))->ss_flags |= (SS_DIROP | SS_CONT); 221 222 fip = sp->fip; 223 fip->fi_nblocks = 0; 224 fip->fi_ino = ip->i_number; 225 LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 226 fip->fi_version = ifp->if_version; 227 brelse(bp, 0); 228 229 lfs_gather(fs, sp, vp, lfs_match_data); 230 lfs_gather(fs, sp, vp, lfs_match_indir); 231 lfs_gather(fs, sp, vp, lfs_match_dindir); 232 lfs_gather(fs, sp, vp, lfs_match_tindir); 233 234 fip = sp->fip; 235 if (fip->fi_nblocks != 0) { 236 sp->fip = (FINFO *) ((caddr_t) fip + FINFOSIZE + 237 sizeof(ufs_daddr_t) * (fip->fi_nblocks)); 238 sp->start_lbp = &sp->fip->fi_blocks[0]; 239 } else { 240 sp->sum_bytes_left += FINFOSIZE; 241 --((SEGSUM *) (sp->segsum))->ss_nfinfo; 242 } 243 } 244 245 int 246 lfs_writeinode(struct lfs * fs, struct segment * sp, struct inode * ip) 247 { 248 struct ubuf *bp, *ibp; 249 struct ufs1_dinode *cdp; 250 IFILE *ifp; 251 SEGUSE *sup; 252 daddr_t daddr; 253 ino_t ino; 254 int error, i, ndx, fsb = 0; 255 int redo_ifile = 0; 256 struct timespec ts; 257 int gotblk = 0; 258 259 /* Allocate a new inode block if necessary. */ 260 if ((ip->i_number != LFS_IFILE_INUM || sp->idp == NULL) && 261 sp->ibp == NULL) { 262 /* Allocate a new segment if necessary. */ 263 if (sp->seg_bytes_left < fs->lfs_ibsize || 264 sp->sum_bytes_left < sizeof(ufs_daddr_t)) 265 (void) lfs_writeseg(fs, sp); 266 267 /* Get next inode block. */ 268 daddr = fs->lfs_offset; 269 fs->lfs_offset += btofsb(fs, fs->lfs_ibsize); 270 sp->ibp = *sp->cbpp++ = 271 getblk(fs->lfs_devvp, fsbtodb(fs, daddr), 272 fs->lfs_ibsize); 273 sp->ibp->b_flags |= B_GATHERED; 274 gotblk++; 275 276 /* Zero out inode numbers */ 277 for (i = 0; i < INOPB(fs); ++i) 278 ((struct ufs1_dinode *) sp->ibp->b_data)[i].di_inumber = 0; 279 280 ++sp->start_bpp; 281 fs->lfs_avail -= btofsb(fs, fs->lfs_ibsize); 282 /* Set remaining space counters. */ 283 sp->seg_bytes_left -= fs->lfs_ibsize; 284 sp->sum_bytes_left -= sizeof(ufs_daddr_t); 285 ndx = fs->lfs_sumsize / sizeof(ufs_daddr_t) - 286 sp->ninodes / INOPB(fs) - 1; 287 ((ufs_daddr_t *) (sp->segsum))[ndx] = daddr; 288 } 289 /* Update the inode times and copy the inode onto the inode page. */ 290 ts.tv_nsec = 0; 291 ts.tv_sec = write_time; 292 /* XXX kludge --- don't redirty the ifile just to put times on it */ 293 if (ip->i_number != LFS_IFILE_INUM) 294 LFS_ITIMES(ip, &ts, &ts, &ts); 295 296 /* 297 * If this is the Ifile, and we've already written the Ifile in this 298 * partial segment, just overwrite it (it's not on disk yet) and 299 * continue. 300 * 301 * XXX we know that the bp that we get the second time around has 302 * already been gathered. 303 */ 304 if (ip->i_number == LFS_IFILE_INUM && sp->idp) { 305 *(sp->idp) = *ip->i_din.ffs1_din; 306 ip->i_lfs_osize = ip->i_ffs1_size; 307 return 0; 308 } 309 bp = sp->ibp; 310 cdp = ((struct ufs1_dinode *) bp->b_data) + (sp->ninodes % INOPB(fs)); 311 *cdp = *ip->i_din.ffs1_din; 312 313 /* If all blocks are goig to disk, update the "size on disk" */ 314 ip->i_lfs_osize = ip->i_ffs1_size; 315 316 if (ip->i_number == LFS_IFILE_INUM) /* We know sp->idp == NULL */ 317 sp->idp = ((struct ufs1_dinode *) bp->b_data) + 318 (sp->ninodes % INOPB(fs)); 319 if (gotblk) { 320 LFS_LOCK_BUF(bp); 321 assert(!(bp->b_flags & B_INVAL)); 322 brelse(bp, 0); 323 } 324 /* Increment inode count in segment summary block. */ 325 ++((SEGSUM *) (sp->segsum))->ss_ninos; 326 327 /* If this page is full, set flag to allocate a new page. */ 328 if (++sp->ninodes % INOPB(fs) == 0) 329 sp->ibp = NULL; 330 331 /* 332 * If updating the ifile, update the super-block. Update the disk 333 * address and access times for this inode in the ifile. 334 */ 335 ino = ip->i_number; 336 if (ino == LFS_IFILE_INUM) { 337 daddr = fs->lfs_idaddr; 338 fs->lfs_idaddr = dbtofsb(fs, bp->b_blkno); 339 sbdirty(); 340 } else { 341 LFS_IENTRY(ifp, fs, ino, ibp); 342 daddr = ifp->if_daddr; 343 ifp->if_daddr = dbtofsb(fs, bp->b_blkno) + fsb; 344 error = LFS_BWRITE_LOG(ibp); /* Ifile */ 345 } 346 347 /* 348 * Account the inode: it no longer belongs to its former segment, 349 * though it will not belong to the new segment until that segment 350 * is actually written. 351 */ 352 if (daddr != LFS_UNUSED_DADDR) { 353 u_int32_t oldsn = dtosn(fs, daddr); 354 LFS_SEGENTRY(sup, fs, oldsn, bp); 355 sup->su_nbytes -= DINODE1_SIZE; 356 redo_ifile = 357 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 358 if (redo_ifile) 359 fs->lfs_flags |= LFS_IFDIRTY; 360 LFS_WRITESEGENTRY(sup, fs, oldsn, bp); /* Ifile */ 361 } 362 return redo_ifile; 363 } 364 365 int 366 lfs_gatherblock(struct segment * sp, struct ubuf * bp) 367 { 368 struct lfs *fs; 369 int version; 370 int j, blksinblk; 371 372 /* 373 * If full, finish this segment. We may be doing I/O, so 374 * release and reacquire the splbio(). 375 */ 376 fs = sp->fs; 377 blksinblk = howmany(bp->b_bcount, fs->lfs_bsize); 378 if (sp->sum_bytes_left < sizeof(ufs_daddr_t) * blksinblk || 379 sp->seg_bytes_left < bp->b_bcount) { 380 lfs_updatemeta(sp); 381 382 version = sp->fip->fi_version; 383 (void) lfs_writeseg(fs, sp); 384 385 sp->fip->fi_version = version; 386 sp->fip->fi_ino = VTOI(sp->vp)->i_number; 387 /* Add the current file to the segment summary. */ 388 ++((SEGSUM *) (sp->segsum))->ss_nfinfo; 389 sp->sum_bytes_left -= FINFOSIZE; 390 391 return 1; 392 } 393 /* Insert into the buffer list, update the FINFO block. */ 394 bp->b_flags |= B_GATHERED; 395 /* bp->b_flags &= ~B_DONE; */ 396 397 *sp->cbpp++ = bp; 398 for (j = 0; j < blksinblk; j++) 399 sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno + j; 400 401 sp->sum_bytes_left -= sizeof(ufs_daddr_t) * blksinblk; 402 sp->seg_bytes_left -= bp->b_bcount; 403 return 0; 404 } 405 406 int 407 lfs_gather(struct lfs * fs, struct segment * sp, struct uvnode * vp, int (*match) (struct lfs *, struct ubuf *)) 408 { 409 struct ubuf *bp, *nbp; 410 int count = 0; 411 412 sp->vp = vp; 413 loop: 414 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 415 nbp = LIST_NEXT(bp, b_vnbufs); 416 417 assert(bp->b_flags & B_DELWRI); 418 if ((bp->b_flags & (B_BUSY | B_GATHERED)) || !match(fs, bp)) { 419 continue; 420 } 421 if (lfs_gatherblock(sp, bp)) { 422 goto loop; 423 } 424 count++; 425 } 426 427 lfs_updatemeta(sp); 428 sp->vp = NULL; 429 return count; 430 } 431 432 433 /* 434 * Change the given block's address to ndaddr, finding its previous 435 * location using ufs_bmaparray(). 436 * 437 * Account for this change in the segment table. 438 */ 439 void 440 lfs_update_single(struct lfs * fs, struct segment * sp, daddr_t lbn, 441 ufs_daddr_t ndaddr, int size) 442 { 443 SEGUSE *sup; 444 struct ubuf *bp; 445 struct indir a[NIADDR + 2], *ap; 446 struct inode *ip; 447 struct uvnode *vp; 448 daddr_t daddr, ooff; 449 int num, error; 450 int bb, osize, obb; 451 452 vp = sp->vp; 453 ip = VTOI(vp); 454 455 error = ufs_bmaparray(fs, vp, lbn, &daddr, a, &num); 456 if (error) 457 errx(1, "lfs_updatemeta: ufs_bmaparray returned %d looking up lbn %" PRId64 "\n", error, lbn); 458 if (daddr > 0) 459 daddr = dbtofsb(fs, daddr); 460 461 bb = fragstofsb(fs, numfrags(fs, size)); 462 switch (num) { 463 case 0: 464 ooff = ip->i_ffs1_db[lbn]; 465 if (ooff == UNWRITTEN) 466 ip->i_ffs1_blocks += bb; 467 else { 468 /* possible fragment truncation or extension */ 469 obb = btofsb(fs, ip->i_lfs_fragsize[lbn]); 470 ip->i_ffs1_blocks += (bb - obb); 471 } 472 ip->i_ffs1_db[lbn] = ndaddr; 473 break; 474 case 1: 475 ooff = ip->i_ffs1_ib[a[0].in_off]; 476 if (ooff == UNWRITTEN) 477 ip->i_ffs1_blocks += bb; 478 ip->i_ffs1_ib[a[0].in_off] = ndaddr; 479 break; 480 default: 481 ap = &a[num - 1]; 482 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NULL, &bp)) 483 errx(1, "lfs_updatemeta: bread bno %" PRId64, 484 ap->in_lbn); 485 486 ooff = ((ufs_daddr_t *) bp->b_data)[ap->in_off]; 487 if (ooff == UNWRITTEN) 488 ip->i_ffs1_blocks += bb; 489 ((ufs_daddr_t *) bp->b_data)[ap->in_off] = ndaddr; 490 (void) VOP_BWRITE(bp); 491 } 492 493 /* 494 * Update segment usage information, based on old size 495 * and location. 496 */ 497 if (daddr > 0) { 498 u_int32_t oldsn = dtosn(fs, daddr); 499 if (lbn >= 0 && lbn < NDADDR) 500 osize = ip->i_lfs_fragsize[lbn]; 501 else 502 osize = fs->lfs_bsize; 503 LFS_SEGENTRY(sup, fs, oldsn, bp); 504 sup->su_nbytes -= osize; 505 if (!(bp->b_flags & B_GATHERED)) 506 fs->lfs_flags |= LFS_IFDIRTY; 507 LFS_WRITESEGENTRY(sup, fs, oldsn, bp); 508 } 509 /* 510 * Now that this block has a new address, and its old 511 * segment no longer owns it, we can forget about its 512 * old size. 513 */ 514 if (lbn >= 0 && lbn < NDADDR) 515 ip->i_lfs_fragsize[lbn] = size; 516 } 517 518 /* 519 * Update the metadata that points to the blocks listed in the FINFO 520 * array. 521 */ 522 void 523 lfs_updatemeta(struct segment * sp) 524 { 525 struct ubuf *sbp; 526 struct lfs *fs; 527 struct uvnode *vp; 528 daddr_t lbn; 529 int i, nblocks, num; 530 int bb; 531 int bytesleft, size; 532 533 vp = sp->vp; 534 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp; 535 536 if (vp == NULL || nblocks == 0) 537 return; 538 539 /* 540 * This count may be high due to oversize blocks from lfs_gop_write. 541 * Correct for this. (XXX we should be able to keep track of these.) 542 */ 543 fs = sp->fs; 544 for (i = 0; i < nblocks; i++) { 545 if (sp->start_bpp[i] == NULL) { 546 printf("nblocks = %d, not %d\n", i, nblocks); 547 nblocks = i; 548 break; 549 } 550 num = howmany(sp->start_bpp[i]->b_bcount, fs->lfs_bsize); 551 nblocks -= num - 1; 552 } 553 554 /* 555 * Sort the blocks. 556 */ 557 lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks, fs->lfs_bsize); 558 559 /* 560 * Record the length of the last block in case it's a fragment. 561 * If there are indirect blocks present, they sort last. An 562 * indirect block will be lfs_bsize and its presence indicates 563 * that you cannot have fragments. 564 */ 565 sp->fip->fi_lastlength = ((sp->start_bpp[nblocks - 1]->b_bcount - 1) & 566 fs->lfs_bmask) + 1; 567 568 /* 569 * Assign disk addresses, and update references to the logical 570 * block and the segment usage information. 571 */ 572 for (i = nblocks; i--; ++sp->start_bpp) { 573 sbp = *sp->start_bpp; 574 lbn = *sp->start_lbp; 575 576 sbp->b_blkno = fsbtodb(fs, fs->lfs_offset); 577 578 /* 579 * If we write a frag in the wrong place, the cleaner won't 580 * be able to correctly identify its size later, and the 581 * segment will be uncleanable. (Even worse, it will assume 582 * that the indirect block that actually ends the list 583 * is of a smaller size!) 584 */ 585 if ((sbp->b_bcount & fs->lfs_bmask) && i != 0) 586 errx(1, "lfs_updatemeta: fragment is not last block"); 587 588 /* 589 * For each subblock in this possibly oversized block, 590 * update its address on disk. 591 */ 592 for (bytesleft = sbp->b_bcount; bytesleft > 0; 593 bytesleft -= fs->lfs_bsize) { 594 size = MIN(bytesleft, fs->lfs_bsize); 595 bb = fragstofsb(fs, numfrags(fs, size)); 596 lbn = *sp->start_lbp++; 597 lfs_update_single(fs, sp, lbn, fs->lfs_offset, size); 598 fs->lfs_offset += bb; 599 } 600 601 } 602 } 603 604 /* 605 * Start a new segment. 606 */ 607 int 608 lfs_initseg(struct lfs * fs) 609 { 610 struct segment *sp; 611 SEGUSE *sup; 612 SEGSUM *ssp; 613 struct ubuf *bp, *sbp; 614 int repeat; 615 616 sp = fs->lfs_sp; 617 618 repeat = 0; 619 620 /* Advance to the next segment. */ 621 if (!LFS_PARTIAL_FITS(fs)) { 622 /* lfs_avail eats the remaining space */ 623 fs->lfs_avail -= fs->lfs_fsbpseg - (fs->lfs_offset - 624 fs->lfs_curseg); 625 lfs_newseg(fs); 626 repeat = 1; 627 fs->lfs_offset = fs->lfs_curseg; 628 629 sp->seg_number = dtosn(fs, fs->lfs_curseg); 630 sp->seg_bytes_left = fsbtob(fs, fs->lfs_fsbpseg); 631 632 /* 633 * If the segment contains a superblock, update the offset 634 * and summary address to skip over it. 635 */ 636 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 637 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 638 fs->lfs_offset += btofsb(fs, LFS_SBPAD); 639 sp->seg_bytes_left -= LFS_SBPAD; 640 } 641 brelse(bp, 0); 642 /* Segment zero could also contain the labelpad */ 643 if (fs->lfs_version > 1 && sp->seg_number == 0 && 644 fs->lfs_start < btofsb(fs, LFS_LABELPAD)) { 645 fs->lfs_offset += btofsb(fs, LFS_LABELPAD) - fs->lfs_start; 646 sp->seg_bytes_left -= LFS_LABELPAD - fsbtob(fs, fs->lfs_start); 647 } 648 } else { 649 sp->seg_number = dtosn(fs, fs->lfs_curseg); 650 sp->seg_bytes_left = fsbtob(fs, fs->lfs_fsbpseg - 651 (fs->lfs_offset - fs->lfs_curseg)); 652 } 653 fs->lfs_lastpseg = fs->lfs_offset; 654 655 sp->fs = fs; 656 sp->ibp = NULL; 657 sp->idp = NULL; 658 sp->ninodes = 0; 659 sp->ndupino = 0; 660 661 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 662 sp->cbpp = sp->bpp; 663 sbp = *sp->cbpp = getblk(fs->lfs_devvp, 664 fsbtodb(fs, fs->lfs_offset), fs->lfs_sumsize); 665 sp->segsum = sbp->b_data; 666 memset(sp->segsum, 0, fs->lfs_sumsize); 667 sp->start_bpp = ++sp->cbpp; 668 fs->lfs_offset += btofsb(fs, fs->lfs_sumsize); 669 670 /* Set point to SEGSUM, initialize it. */ 671 ssp = sp->segsum; 672 ssp->ss_next = fs->lfs_nextseg; 673 ssp->ss_nfinfo = ssp->ss_ninos = 0; 674 ssp->ss_magic = SS_MAGIC; 675 676 /* Set pointer to first FINFO, initialize it. */ 677 sp->fip = (struct finfo *) ((caddr_t) sp->segsum + SEGSUM_SIZE(fs)); 678 sp->fip->fi_nblocks = 0; 679 sp->start_lbp = &sp->fip->fi_blocks[0]; 680 sp->fip->fi_lastlength = 0; 681 682 sp->seg_bytes_left -= fs->lfs_sumsize; 683 sp->sum_bytes_left = fs->lfs_sumsize - SEGSUM_SIZE(fs); 684 685 LFS_LOCK_BUF(sbp); 686 brelse(sbp, 0); 687 return repeat; 688 } 689 690 /* 691 * Return the next segment to write. 692 */ 693 void 694 lfs_newseg(struct lfs * fs) 695 { 696 CLEANERINFO *cip; 697 SEGUSE *sup; 698 struct ubuf *bp; 699 int curseg, isdirty, sn; 700 701 LFS_SEGENTRY(sup, fs, dtosn(fs, fs->lfs_nextseg), bp); 702 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 703 sup->su_nbytes = 0; 704 sup->su_nsums = 0; 705 sup->su_ninos = 0; 706 LFS_WRITESEGENTRY(sup, fs, dtosn(fs, fs->lfs_nextseg), bp); 707 708 LFS_CLEANERINFO(cip, fs, bp); 709 --cip->clean; 710 ++cip->dirty; 711 fs->lfs_nclean = cip->clean; 712 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 713 714 fs->lfs_lastseg = fs->lfs_curseg; 715 fs->lfs_curseg = fs->lfs_nextseg; 716 for (sn = curseg = dtosn(fs, fs->lfs_curseg) + fs->lfs_interleave;;) { 717 sn = (sn + 1) % fs->lfs_nseg; 718 if (sn == curseg) 719 errx(1, "lfs_nextseg: no clean segments"); 720 LFS_SEGENTRY(sup, fs, sn, bp); 721 isdirty = sup->su_flags & SEGUSE_DIRTY; 722 brelse(bp, 0); 723 724 if (!isdirty) 725 break; 726 } 727 728 ++fs->lfs_nactive; 729 fs->lfs_nextseg = sntod(fs, sn); 730 } 731 732 733 int 734 lfs_writeseg(struct lfs * fs, struct segment * sp) 735 { 736 struct ubuf **bpp, *bp; 737 SEGUSE *sup; 738 SEGSUM *ssp; 739 char *datap, *dp; 740 int i; 741 int do_again, nblocks, byteoffset; 742 size_t el_size; 743 u_short ninos; 744 struct uvnode *devvp; 745 746 /* 747 * If there are no buffers other than the segment summary to write 748 * and it is not a checkpoint, don't do anything. On a checkpoint, 749 * even if there aren't any buffers, you need to write the superblock. 750 */ 751 nblocks = sp->cbpp - sp->bpp; 752 #if 0 753 printf("write %d blocks at 0x%x\n", 754 nblocks, (int)dbtofsb(fs, (*sp->bpp)->b_blkno)); 755 #endif 756 if (nblocks == 1) 757 return 0; 758 759 devvp = fs->lfs_devvp; 760 761 /* Update the segment usage information. */ 762 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 763 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 764 765 /* Loop through all blocks, except the segment summary. */ 766 for (bpp = sp->bpp; ++bpp < sp->cbpp;) { 767 if ((*bpp)->b_vp != devvp) { 768 sup->su_nbytes += (*bpp)->b_bcount; 769 } 770 assert(dtosn(fs, dbtofsb(fs, (*bpp)->b_blkno)) == sp->seg_number); 771 } 772 773 ssp = (SEGSUM *) sp->segsum; 774 ssp->ss_flags |= SS_RFW; 775 776 ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs); 777 sup->su_nbytes += ssp->ss_ninos * DINODE1_SIZE; 778 779 if (fs->lfs_version == 1) 780 sup->su_olastmod = write_time; 781 else 782 sup->su_lastmod = write_time; 783 sup->su_ninos += ninos; 784 ++sup->su_nsums; 785 fs->lfs_dmeta += (btofsb(fs, fs->lfs_sumsize) + btofsb(fs, ninos * 786 fs->lfs_ibsize)); 787 fs->lfs_avail -= btofsb(fs, fs->lfs_sumsize); 788 789 do_again = !(bp->b_flags & B_GATHERED); 790 LFS_WRITESEGENTRY(sup, fs, sp->seg_number, bp); /* Ifile */ 791 792 /* 793 * Compute checksum across data and then across summary; the first 794 * block (the summary block) is skipped. Set the create time here 795 * so that it's guaranteed to be later than the inode mod times. 796 */ 797 if (fs->lfs_version == 1) 798 el_size = sizeof(u_long); 799 else 800 el_size = sizeof(u_int32_t); 801 datap = dp = emalloc(nblocks * el_size); 802 for (bpp = sp->bpp, i = nblocks - 1; i--;) { 803 ++bpp; 804 /* Loop through gop_write cluster blocks */ 805 for (byteoffset = 0; byteoffset < (*bpp)->b_bcount; 806 byteoffset += fs->lfs_bsize) { 807 memcpy(dp, (*bpp)->b_data + byteoffset, el_size); 808 dp += el_size; 809 } 810 bremfree(*bpp); 811 (*bpp)->b_flags |= B_BUSY; 812 } 813 if (fs->lfs_version == 1) 814 ssp->ss_ocreate = write_time; 815 else { 816 ssp->ss_create = write_time; 817 ssp->ss_serial = ++fs->lfs_serial; 818 ssp->ss_ident = fs->lfs_ident; 819 } 820 /* Set the summary block busy too */ 821 bremfree(*(sp->bpp)); 822 (*(sp->bpp))->b_flags |= B_BUSY; 823 824 ssp->ss_datasum = cksum(datap, (nblocks - 1) * el_size); 825 ssp->ss_sumsum = 826 cksum(&ssp->ss_datasum, fs->lfs_sumsize - sizeof(ssp->ss_sumsum)); 827 free(datap); 828 datap = dp = NULL; 829 fs->lfs_bfree -= (btofsb(fs, ninos * fs->lfs_ibsize) + 830 btofsb(fs, fs->lfs_sumsize)); 831 832 if (devvp == NULL) 833 errx(1, "devvp is NULL"); 834 for (bpp = sp->bpp, i = nblocks; i; bpp++, i--) { 835 bp = *bpp; 836 #if 0 837 printf("i = %d, bp = %p, flags %lx, bn = %" PRIx64 "\n", 838 nblocks - i, bp, bp->b_flags, bp->b_blkno); 839 printf(" vp = %p\n", bp->b_vp); 840 if (bp->b_vp != fs->lfs_devvp) 841 printf(" ino = %d lbn = %" PRId64 "\n", 842 VTOI(bp->b_vp)->i_number, bp->b_lblkno); 843 #endif 844 if (bp->b_vp == fs->lfs_devvp) 845 written_dev += bp->b_bcount; 846 else { 847 if (bp->b_lblkno >= 0) 848 written_data += bp->b_bcount; 849 else 850 written_indir += bp->b_bcount; 851 } 852 bp->b_flags &= ~(B_DELWRI | B_READ | B_GATHERED | B_ERROR | 853 B_LOCKED); 854 bwrite(bp); 855 written_bytes += bp->b_bcount; 856 } 857 written_inodes += ninos; 858 859 return (lfs_initseg(fs) || do_again); 860 } 861 862 /* 863 * Our own copy of shellsort. XXX use qsort or heapsort. 864 */ 865 void 866 lfs_shellsort(struct ubuf ** bp_array, ufs_daddr_t * lb_array, int nmemb, int size) 867 { 868 static int __rsshell_increments[] = {4, 1, 0}; 869 int incr, *incrp, t1, t2; 870 struct ubuf *bp_temp; 871 872 for (incrp = __rsshell_increments; (incr = *incrp++) != 0;) 873 for (t1 = incr; t1 < nmemb; ++t1) 874 for (t2 = t1 - incr; t2 >= 0;) 875 if ((u_int32_t) bp_array[t2]->b_lblkno > 876 (u_int32_t) bp_array[t2 + incr]->b_lblkno) { 877 bp_temp = bp_array[t2]; 878 bp_array[t2] = bp_array[t2 + incr]; 879 bp_array[t2 + incr] = bp_temp; 880 t2 -= incr; 881 } else 882 break; 883 884 /* Reform the list of logical blocks */ 885 incr = 0; 886 for (t1 = 0; t1 < nmemb; t1++) { 887 for (t2 = 0; t2 * size < bp_array[t1]->b_bcount; t2++) { 888 lb_array[incr++] = bp_array[t1]->b_lblkno + t2; 889 } 890 } 891 } 892 893 894 /* 895 * lfs_seglock -- 896 * Single thread the segment writer. 897 */ 898 int 899 lfs_seglock(struct lfs * fs, unsigned long flags) 900 { 901 struct segment *sp; 902 903 if (fs->lfs_seglock) { 904 ++fs->lfs_seglock; 905 fs->lfs_sp->seg_flags |= flags; 906 return 0; 907 } 908 fs->lfs_seglock = 1; 909 910 sp = fs->lfs_sp = emalloc(sizeof(*sp)); 911 sp->bpp = emalloc(fs->lfs_ssize * sizeof(struct ubuf *)); 912 if (!sp->bpp) 913 errx(!preen, "Could not allocate %zu bytes: %s", 914 (size_t)(fs->lfs_ssize * sizeof(struct ubuf *)), 915 strerror(errno)); 916 sp->seg_flags = flags; 917 sp->vp = NULL; 918 sp->seg_iocount = 0; 919 (void) lfs_initseg(fs); 920 921 return 0; 922 } 923 924 /* 925 * lfs_segunlock -- 926 * Single thread the segment writer. 927 */ 928 void 929 lfs_segunlock(struct lfs * fs) 930 { 931 struct segment *sp; 932 struct ubuf *bp; 933 934 sp = fs->lfs_sp; 935 936 if (fs->lfs_seglock == 1) { 937 if (sp->bpp != sp->cbpp) { 938 /* Free allocated segment summary */ 939 fs->lfs_offset -= btofsb(fs, fs->lfs_sumsize); 940 bp = *sp->bpp; 941 bremfree(bp); 942 bp->b_flags |= B_DONE | B_INVAL; 943 bp->b_flags &= ~B_DELWRI; 944 reassignbuf(bp, bp->b_vp); 945 bp->b_flags |= B_BUSY; /* XXX */ 946 brelse(bp, 0); 947 } else 948 printf("unlock to 0 with no summary"); 949 950 free(sp->bpp); 951 sp->bpp = NULL; 952 free(sp); 953 fs->lfs_sp = NULL; 954 955 fs->lfs_nactive = 0; 956 957 /* Since we *know* everything's on disk, write both sbs */ 958 lfs_writesuper(fs, fs->lfs_sboffs[0]); 959 lfs_writesuper(fs, fs->lfs_sboffs[1]); 960 961 --fs->lfs_seglock; 962 fs->lfs_lockpid = 0; 963 } else if (fs->lfs_seglock == 0) { 964 errx(1, "Seglock not held"); 965 } else { 966 --fs->lfs_seglock; 967 } 968 } 969 970 int 971 lfs_writevnodes(struct lfs *fs, struct segment *sp, int op) 972 { 973 struct inode *ip; 974 struct uvnode *vp; 975 int inodes_written = 0; 976 977 LIST_FOREACH(vp, &vnodelist, v_mntvnodes) { 978 if (vp->v_bmap_op != lfs_vop_bmap) 979 continue; 980 981 ip = VTOI(vp); 982 983 if ((op == VN_DIROP && !(vp->v_uflag & VU_DIROP)) || 984 (op != VN_DIROP && (vp->v_uflag & VU_DIROP))) { 985 continue; 986 } 987 /* 988 * Write the inode/file if dirty and it's not the IFILE. 989 */ 990 if (ip->i_flag & IN_ALLMOD || !LIST_EMPTY(&vp->v_dirtyblkhd)) { 991 if (ip->i_number != LFS_IFILE_INUM) 992 lfs_writefile(fs, sp, vp); 993 (void) lfs_writeinode(fs, sp, ip); 994 inodes_written++; 995 } 996 } 997 return inodes_written; 998 } 999 1000 void 1001 lfs_writesuper(struct lfs *fs, ufs_daddr_t daddr) 1002 { 1003 struct ubuf *bp; 1004 1005 /* Set timestamp of this version of the superblock */ 1006 if (fs->lfs_version == 1) 1007 fs->lfs_otstamp = write_time; 1008 fs->lfs_tstamp = write_time; 1009 1010 /* Checksum the superblock and copy it into a buffer. */ 1011 fs->lfs_cksum = lfs_sb_cksum(&(fs->lfs_dlfs)); 1012 assert(daddr > 0); 1013 bp = getblk(fs->lfs_devvp, fsbtodb(fs, daddr), LFS_SBPAD); 1014 memset(bp->b_data + sizeof(struct dlfs), 0, 1015 LFS_SBPAD - sizeof(struct dlfs)); 1016 *(struct dlfs *) bp->b_data = fs->lfs_dlfs; 1017 1018 bwrite(bp); 1019 } 1020