1 /* $NetBSD: segwrite.c,v 1.6 2003/12/24 01:39:27 heas Exp $ */ 2 /*- 3 * Copyright (c) 2003 The NetBSD Foundation, Inc. 4 * All rights reserved. 5 * 6 * This code is derived from software contributed to The NetBSD Foundation 7 * by Konrad E. Schroder <perseant@hhhh.org>. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. All advertising materials mentioning features or use of this software 18 * must display the following acknowledgement: 19 * This product includes software developed by the NetBSD 20 * Foundation, Inc. and its contributors. 21 * 4. Neither the name of The NetBSD Foundation nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 * POSSIBILITY OF SUCH DAMAGE. 36 */ 37 /* 38 * Copyright (c) 1991, 1993 39 * The Regents of the University of California. All rights reserved. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)lfs_segment.c 8.10 (Berkeley) 6/10/95 66 */ 67 68 /* 69 * Partial segment writer, taken from the kernel and adapted for userland. 70 */ 71 #include <sys/types.h> 72 #include <sys/param.h> 73 #include <sys/time.h> 74 #include <sys/buf.h> 75 #include <sys/mount.h> 76 77 #include <ufs/ufs/inode.h> 78 #include <ufs/ufs/ufsmount.h> 79 80 /* Override certain things to make <ufs/lfs/lfs.h> work */ 81 #undef simple_lock 82 #define simple_lock(x) 83 #undef simple_unlock 84 #define simple_unlock(x) 85 #define vnode uvnode 86 #define buf ubuf 87 #define panic call_panic 88 89 #include <ufs/lfs/lfs.h> 90 91 #include <assert.h> 92 #include <stdio.h> 93 #include <stdlib.h> 94 #include <string.h> 95 #include <err.h> 96 #include <errno.h> 97 98 #include "bufcache.h" 99 #include "vnode.h" 100 #include "lfs.h" 101 #include "segwrite.h" 102 103 /* Compatibility definitions */ 104 extern off_t locked_queue_bytes; 105 int locked_queue_count; 106 off_t written_bytes = 0; 107 off_t written_data = 0; 108 off_t written_indir = 0; 109 off_t written_dev = 0; 110 int written_inodes = 0; 111 112 /* Global variables */ 113 time_t write_time; 114 115 extern u_int32_t cksum(void *, size_t); 116 extern u_int32_t lfs_sb_cksum(struct dlfs *); 117 118 /* 119 * Logical block number match routines used when traversing the dirty block 120 * chain. 121 */ 122 int 123 lfs_match_data(struct lfs * fs, struct ubuf * bp) 124 { 125 return (bp->b_lblkno >= 0); 126 } 127 128 int 129 lfs_match_indir(struct lfs * fs, struct ubuf * bp) 130 { 131 daddr_t lbn; 132 133 lbn = bp->b_lblkno; 134 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 135 } 136 137 int 138 lfs_match_dindir(struct lfs * fs, struct ubuf * bp) 139 { 140 daddr_t lbn; 141 142 lbn = bp->b_lblkno; 143 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 144 } 145 146 int 147 lfs_match_tindir(struct lfs * fs, struct ubuf * bp) 148 { 149 daddr_t lbn; 150 151 lbn = bp->b_lblkno; 152 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 153 } 154 155 /* 156 * Do a checkpoint. 157 */ 158 int 159 lfs_segwrite(struct lfs * fs, int flags) 160 { 161 struct inode *ip; 162 struct segment *sp; 163 struct uvnode *vp; 164 int redo; 165 166 lfs_seglock(fs, flags | SEGM_CKP); 167 sp = fs->lfs_sp; 168 169 lfs_writevnodes(fs, sp, VN_REG); 170 lfs_writevnodes(fs, sp, VN_DIROP); 171 ((SEGSUM *) (sp->segsum))->ss_flags &= ~(SS_CONT); 172 173 do { 174 vp = fs->lfs_ivnode; 175 fs->lfs_flags &= ~LFS_IFDIRTY; 176 ip = VTOI(vp); 177 if (LIST_FIRST(&vp->v_dirtyblkhd) != NULL) 178 lfs_writefile(fs, sp, vp); 179 180 redo = lfs_writeinode(fs, sp, ip); 181 redo += lfs_writeseg(fs, sp); 182 redo += (fs->lfs_flags & LFS_IFDIRTY); 183 } while (redo); 184 185 lfs_segunlock(fs); 186 #if 0 187 printf("wrote %" PRId64 " bytes (%" PRId32 " fsb)\n", 188 written_bytes, (ufs_daddr_t)btofsb(fs, written_bytes)); 189 printf("wrote %" PRId64 " bytes data (%" PRId32 " fsb)\n", 190 written_data, (ufs_daddr_t)btofsb(fs, written_data)); 191 printf("wrote %" PRId64 " bytes indir (%" PRId32 " fsb)\n", 192 written_indir, (ufs_daddr_t)btofsb(fs, written_indir)); 193 printf("wrote %" PRId64 " bytes dev (%" PRId32 " fsb)\n", 194 written_dev, (ufs_daddr_t)btofsb(fs, written_dev)); 195 printf("wrote %d inodes (%" PRId32 " fsb)\n", 196 written_inodes, btofsb(fs, written_inodes * fs->lfs_ibsize)); 197 #endif 198 return 0; 199 } 200 201 /* 202 * Write the dirty blocks associated with a vnode. 203 */ 204 void 205 lfs_writefile(struct lfs * fs, struct segment * sp, struct uvnode * vp) 206 { 207 struct ubuf *bp; 208 struct finfo *fip; 209 struct inode *ip; 210 IFILE *ifp; 211 212 ip = VTOI(vp); 213 214 if (sp->seg_bytes_left < fs->lfs_bsize || 215 sp->sum_bytes_left < sizeof(struct finfo)) 216 (void) lfs_writeseg(fs, sp); 217 218 sp->sum_bytes_left -= FINFOSIZE; 219 ++((SEGSUM *) (sp->segsum))->ss_nfinfo; 220 221 if (vp->v_flag & VDIROP) 222 ((SEGSUM *) (sp->segsum))->ss_flags |= (SS_DIROP | SS_CONT); 223 224 fip = sp->fip; 225 fip->fi_nblocks = 0; 226 fip->fi_ino = ip->i_number; 227 LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 228 fip->fi_version = ifp->if_version; 229 brelse(bp); 230 231 lfs_gather(fs, sp, vp, lfs_match_data); 232 lfs_gather(fs, sp, vp, lfs_match_indir); 233 lfs_gather(fs, sp, vp, lfs_match_dindir); 234 lfs_gather(fs, sp, vp, lfs_match_tindir); 235 236 fip = sp->fip; 237 if (fip->fi_nblocks != 0) { 238 sp->fip = (FINFO *) ((caddr_t) fip + FINFOSIZE + 239 sizeof(ufs_daddr_t) * (fip->fi_nblocks)); 240 sp->start_lbp = &sp->fip->fi_blocks[0]; 241 } else { 242 sp->sum_bytes_left += FINFOSIZE; 243 --((SEGSUM *) (sp->segsum))->ss_nfinfo; 244 } 245 } 246 247 int 248 lfs_writeinode(struct lfs * fs, struct segment * sp, struct inode * ip) 249 { 250 struct ubuf *bp, *ibp; 251 struct ufs1_dinode *cdp; 252 IFILE *ifp; 253 SEGUSE *sup; 254 daddr_t daddr; 255 ino_t ino; 256 int error, i, ndx, fsb = 0; 257 int redo_ifile = 0; 258 struct timespec ts; 259 int gotblk = 0; 260 261 /* Allocate a new inode block if necessary. */ 262 if ((ip->i_number != LFS_IFILE_INUM || sp->idp == NULL) && 263 sp->ibp == NULL) { 264 /* Allocate a new segment if necessary. */ 265 if (sp->seg_bytes_left < fs->lfs_ibsize || 266 sp->sum_bytes_left < sizeof(ufs_daddr_t)) 267 (void) lfs_writeseg(fs, sp); 268 269 /* Get next inode block. */ 270 daddr = fs->lfs_offset; 271 fs->lfs_offset += btofsb(fs, fs->lfs_ibsize); 272 sp->ibp = *sp->cbpp++ = 273 getblk(fs->lfs_unlockvp, fsbtodb(fs, daddr), 274 fs->lfs_ibsize); 275 sp->ibp->b_flags |= B_GATHERED; 276 gotblk++; 277 278 /* Zero out inode numbers */ 279 for (i = 0; i < INOPB(fs); ++i) 280 ((struct ufs1_dinode *) sp->ibp->b_data)[i].di_inumber = 0; 281 282 ++sp->start_bpp; 283 fs->lfs_avail -= btofsb(fs, fs->lfs_ibsize); 284 /* Set remaining space counters. */ 285 sp->seg_bytes_left -= fs->lfs_ibsize; 286 sp->sum_bytes_left -= sizeof(ufs_daddr_t); 287 ndx = fs->lfs_sumsize / sizeof(ufs_daddr_t) - 288 sp->ninodes / INOPB(fs) - 1; 289 ((ufs_daddr_t *) (sp->segsum))[ndx] = daddr; 290 } 291 /* Update the inode times and copy the inode onto the inode page. */ 292 ts.tv_nsec = 0; 293 ts.tv_sec = write_time; 294 /* XXX kludge --- don't redirty the ifile just to put times on it */ 295 if (ip->i_number != LFS_IFILE_INUM) 296 LFS_ITIMES(ip, &ts, &ts, &ts); 297 298 /* 299 * If this is the Ifile, and we've already written the Ifile in this 300 * partial segment, just overwrite it (it's not on disk yet) and 301 * continue. 302 * 303 * XXX we know that the bp that we get the second time around has 304 * already been gathered. 305 */ 306 if (ip->i_number == LFS_IFILE_INUM && sp->idp) { 307 *(sp->idp) = *ip->i_din.ffs1_din; 308 ip->i_lfs_osize = ip->i_ffs1_size; 309 return 0; 310 } 311 bp = sp->ibp; 312 cdp = ((struct ufs1_dinode *) bp->b_data) + (sp->ninodes % INOPB(fs)); 313 *cdp = *ip->i_din.ffs1_din; 314 315 /* If all blocks are goig to disk, update the "size on disk" */ 316 ip->i_lfs_osize = ip->i_ffs1_size; 317 318 if (ip->i_number == LFS_IFILE_INUM) /* We know sp->idp == NULL */ 319 sp->idp = ((struct ufs1_dinode *) bp->b_data) + 320 (sp->ninodes % INOPB(fs)); 321 if (gotblk) { 322 LFS_LOCK_BUF(bp); 323 brelse(bp); 324 } 325 /* Increment inode count in segment summary block. */ 326 ++((SEGSUM *) (sp->segsum))->ss_ninos; 327 328 /* If this page is full, set flag to allocate a new page. */ 329 if (++sp->ninodes % INOPB(fs) == 0) 330 sp->ibp = NULL; 331 332 /* 333 * If updating the ifile, update the super-block. Update the disk 334 * address and access times for this inode in the ifile. 335 */ 336 ino = ip->i_number; 337 if (ino == LFS_IFILE_INUM) { 338 daddr = fs->lfs_idaddr; 339 fs->lfs_idaddr = dbtofsb(fs, bp->b_blkno); 340 } else { 341 LFS_IENTRY(ifp, fs, ino, ibp); 342 daddr = ifp->if_daddr; 343 ifp->if_daddr = dbtofsb(fs, bp->b_blkno) + fsb; 344 error = LFS_BWRITE_LOG(ibp); /* Ifile */ 345 } 346 347 /* 348 * Account the inode: it no longer belongs to its former segment, 349 * though it will not belong to the new segment until that segment 350 * is actually written. 351 */ 352 if (daddr != LFS_UNUSED_DADDR) { 353 u_int32_t oldsn = dtosn(fs, daddr); 354 LFS_SEGENTRY(sup, fs, oldsn, bp); 355 sup->su_nbytes -= DINODE1_SIZE; 356 redo_ifile = 357 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 358 if (redo_ifile) 359 fs->lfs_flags |= LFS_IFDIRTY; 360 LFS_WRITESEGENTRY(sup, fs, oldsn, bp); /* Ifile */ 361 } 362 return redo_ifile; 363 } 364 365 int 366 lfs_gatherblock(struct segment * sp, struct ubuf * bp) 367 { 368 struct lfs *fs; 369 int version; 370 int j, blksinblk; 371 372 /* 373 * If full, finish this segment. We may be doing I/O, so 374 * release and reacquire the splbio(). 375 */ 376 fs = sp->fs; 377 blksinblk = howmany(bp->b_bcount, fs->lfs_bsize); 378 if (sp->sum_bytes_left < sizeof(ufs_daddr_t) * blksinblk || 379 sp->seg_bytes_left < bp->b_bcount) { 380 lfs_updatemeta(sp); 381 382 version = sp->fip->fi_version; 383 (void) lfs_writeseg(fs, sp); 384 385 sp->fip->fi_version = version; 386 sp->fip->fi_ino = VTOI(sp->vp)->i_number; 387 /* Add the current file to the segment summary. */ 388 ++((SEGSUM *) (sp->segsum))->ss_nfinfo; 389 sp->sum_bytes_left -= FINFOSIZE; 390 391 return 1; 392 } 393 /* Insert into the buffer list, update the FINFO block. */ 394 bp->b_flags |= B_GATHERED; 395 /* bp->b_flags &= ~B_DONE; */ 396 397 *sp->cbpp++ = bp; 398 for (j = 0; j < blksinblk; j++) 399 sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno + j; 400 401 sp->sum_bytes_left -= sizeof(ufs_daddr_t) * blksinblk; 402 sp->seg_bytes_left -= bp->b_bcount; 403 return 0; 404 } 405 406 int 407 lfs_gather(struct lfs * fs, struct segment * sp, struct uvnode * vp, int (*match) (struct lfs *, struct ubuf *)) 408 { 409 struct ubuf *bp, *nbp; 410 int count = 0; 411 412 sp->vp = vp; 413 loop: 414 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 415 nbp = LIST_NEXT(bp, b_vnbufs); 416 417 assert(bp->b_flags & B_DELWRI); 418 if ((bp->b_flags & (B_BUSY | B_GATHERED)) || !match(fs, bp)) { 419 continue; 420 } 421 if (lfs_gatherblock(sp, bp)) { 422 goto loop; 423 } 424 count++; 425 } 426 427 lfs_updatemeta(sp); 428 sp->vp = NULL; 429 return count; 430 } 431 432 433 /* 434 * Change the given block's address to ndaddr, finding its previous 435 * location using ufs_bmaparray(). 436 * 437 * Account for this change in the segment table. 438 */ 439 void 440 lfs_update_single(struct lfs * fs, struct segment * sp, daddr_t lbn, 441 ufs_daddr_t ndaddr, int size) 442 { 443 SEGUSE *sup; 444 struct ubuf *bp; 445 struct indir a[NIADDR + 2], *ap; 446 struct inode *ip; 447 struct uvnode *vp; 448 daddr_t daddr, ooff; 449 int num, error; 450 int bb, osize, obb; 451 452 vp = sp->vp; 453 ip = VTOI(vp); 454 455 error = ufs_bmaparray(fs, vp, lbn, &daddr, a, &num); 456 if (error) 457 errx(1, "lfs_updatemeta: ufs_bmaparray returned %d looking up lbn %" PRId64 "\n", error, lbn); 458 if (daddr > 0) 459 daddr = dbtofsb(fs, daddr); 460 461 bb = fragstofsb(fs, numfrags(fs, size)); 462 switch (num) { 463 case 0: 464 ooff = ip->i_ffs1_db[lbn]; 465 if (ooff == UNWRITTEN) 466 ip->i_ffs1_blocks += bb; 467 else { 468 /* possible fragment truncation or extension */ 469 obb = btofsb(fs, ip->i_lfs_fragsize[lbn]); 470 ip->i_ffs1_blocks += (bb - obb); 471 } 472 ip->i_ffs1_db[lbn] = ndaddr; 473 break; 474 case 1: 475 ooff = ip->i_ffs1_ib[a[0].in_off]; 476 if (ooff == UNWRITTEN) 477 ip->i_ffs1_blocks += bb; 478 ip->i_ffs1_ib[a[0].in_off] = ndaddr; 479 break; 480 default: 481 ap = &a[num - 1]; 482 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NULL, &bp)) 483 errx(1, "lfs_updatemeta: bread bno %" PRId64, 484 ap->in_lbn); 485 486 ooff = ((ufs_daddr_t *) bp->b_data)[ap->in_off]; 487 if (ooff == UNWRITTEN) 488 ip->i_ffs1_blocks += bb; 489 ((ufs_daddr_t *) bp->b_data)[ap->in_off] = ndaddr; 490 (void) VOP_BWRITE(bp); 491 } 492 493 /* 494 * Update segment usage information, based on old size 495 * and location. 496 */ 497 if (daddr > 0) { 498 u_int32_t oldsn = dtosn(fs, daddr); 499 if (lbn >= 0 && lbn < NDADDR) 500 osize = ip->i_lfs_fragsize[lbn]; 501 else 502 osize = fs->lfs_bsize; 503 LFS_SEGENTRY(sup, fs, oldsn, bp); 504 sup->su_nbytes -= osize; 505 if (!(bp->b_flags & B_GATHERED)) 506 fs->lfs_flags |= LFS_IFDIRTY; 507 LFS_WRITESEGENTRY(sup, fs, oldsn, bp); 508 } 509 /* 510 * Now that this block has a new address, and its old 511 * segment no longer owns it, we can forget about its 512 * old size. 513 */ 514 if (lbn >= 0 && lbn < NDADDR) 515 ip->i_lfs_fragsize[lbn] = size; 516 } 517 518 /* 519 * Update the metadata that points to the blocks listed in the FINFO 520 * array. 521 */ 522 void 523 lfs_updatemeta(struct segment * sp) 524 { 525 struct ubuf *sbp; 526 struct lfs *fs; 527 struct uvnode *vp; 528 daddr_t lbn; 529 int i, nblocks, num; 530 int bb; 531 int bytesleft, size; 532 533 vp = sp->vp; 534 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp; 535 536 if (vp == NULL || nblocks == 0) 537 return; 538 539 /* 540 * This count may be high due to oversize blocks from lfs_gop_write. 541 * Correct for this. (XXX we should be able to keep track of these.) 542 */ 543 fs = sp->fs; 544 for (i = 0; i < nblocks; i++) { 545 if (sp->start_bpp[i] == NULL) { 546 printf("nblocks = %d, not %d\n", i, nblocks); 547 nblocks = i; 548 break; 549 } 550 num = howmany(sp->start_bpp[i]->b_bcount, fs->lfs_bsize); 551 nblocks -= num - 1; 552 } 553 554 /* 555 * Sort the blocks. 556 */ 557 lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks, fs->lfs_bsize); 558 559 /* 560 * Record the length of the last block in case it's a fragment. 561 * If there are indirect blocks present, they sort last. An 562 * indirect block will be lfs_bsize and its presence indicates 563 * that you cannot have fragments. 564 */ 565 sp->fip->fi_lastlength = ((sp->start_bpp[nblocks - 1]->b_bcount - 1) & 566 fs->lfs_bmask) + 1; 567 568 /* 569 * Assign disk addresses, and update references to the logical 570 * block and the segment usage information. 571 */ 572 for (i = nblocks; i--; ++sp->start_bpp) { 573 sbp = *sp->start_bpp; 574 lbn = *sp->start_lbp; 575 576 sbp->b_blkno = fsbtodb(fs, fs->lfs_offset); 577 578 /* 579 * If we write a frag in the wrong place, the cleaner won't 580 * be able to correctly identify its size later, and the 581 * segment will be uncleanable. (Even worse, it will assume 582 * that the indirect block that actually ends the list 583 * is of a smaller size!) 584 */ 585 if ((sbp->b_bcount & fs->lfs_bmask) && i != 0) 586 errx(1, "lfs_updatemeta: fragment is not last block"); 587 588 /* 589 * For each subblock in this possibly oversized block, 590 * update its address on disk. 591 */ 592 for (bytesleft = sbp->b_bcount; bytesleft > 0; 593 bytesleft -= fs->lfs_bsize) { 594 size = MIN(bytesleft, fs->lfs_bsize); 595 bb = fragstofsb(fs, numfrags(fs, size)); 596 lbn = *sp->start_lbp++; 597 lfs_update_single(fs, sp, lbn, fs->lfs_offset, size); 598 fs->lfs_offset += bb; 599 } 600 601 } 602 } 603 604 /* 605 * Start a new segment. 606 */ 607 int 608 lfs_initseg(struct lfs * fs) 609 { 610 struct segment *sp; 611 SEGUSE *sup; 612 SEGSUM *ssp; 613 struct ubuf *bp, *sbp; 614 int repeat; 615 616 sp = fs->lfs_sp; 617 618 repeat = 0; 619 620 /* Advance to the next segment. */ 621 if (!LFS_PARTIAL_FITS(fs)) { 622 /* lfs_avail eats the remaining space */ 623 fs->lfs_avail -= fs->lfs_fsbpseg - (fs->lfs_offset - 624 fs->lfs_curseg); 625 lfs_newseg(fs); 626 repeat = 1; 627 fs->lfs_offset = fs->lfs_curseg; 628 629 sp->seg_number = dtosn(fs, fs->lfs_curseg); 630 sp->seg_bytes_left = fsbtob(fs, fs->lfs_fsbpseg); 631 632 /* 633 * If the segment contains a superblock, update the offset 634 * and summary address to skip over it. 635 */ 636 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 637 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 638 fs->lfs_offset += btofsb(fs, LFS_SBPAD); 639 sp->seg_bytes_left -= LFS_SBPAD; 640 } 641 brelse(bp); 642 /* Segment zero could also contain the labelpad */ 643 if (fs->lfs_version > 1 && sp->seg_number == 0 && 644 fs->lfs_start < btofsb(fs, LFS_LABELPAD)) { 645 fs->lfs_offset += btofsb(fs, LFS_LABELPAD) - fs->lfs_start; 646 sp->seg_bytes_left -= LFS_LABELPAD - fsbtob(fs, fs->lfs_start); 647 } 648 } else { 649 sp->seg_number = dtosn(fs, fs->lfs_curseg); 650 sp->seg_bytes_left = fsbtob(fs, fs->lfs_fsbpseg - 651 (fs->lfs_offset - fs->lfs_curseg)); 652 } 653 fs->lfs_lastpseg = fs->lfs_offset; 654 655 sp->fs = fs; 656 sp->ibp = NULL; 657 sp->idp = NULL; 658 sp->ninodes = 0; 659 sp->ndupino = 0; 660 661 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 662 sp->cbpp = sp->bpp; 663 sbp = *sp->cbpp = getblk(fs->lfs_unlockvp, 664 fsbtodb(fs, fs->lfs_offset), fs->lfs_sumsize); 665 sp->segsum = sbp->b_data; 666 memset(sp->segsum, 0, fs->lfs_sumsize); 667 sp->start_bpp = ++sp->cbpp; 668 fs->lfs_offset += btofsb(fs, fs->lfs_sumsize); 669 670 /* Set point to SEGSUM, initialize it. */ 671 ssp = sp->segsum; 672 ssp->ss_next = fs->lfs_nextseg; 673 ssp->ss_nfinfo = ssp->ss_ninos = 0; 674 ssp->ss_magic = SS_MAGIC; 675 676 /* Set pointer to first FINFO, initialize it. */ 677 sp->fip = (struct finfo *) ((caddr_t) sp->segsum + SEGSUM_SIZE(fs)); 678 sp->fip->fi_nblocks = 0; 679 sp->start_lbp = &sp->fip->fi_blocks[0]; 680 sp->fip->fi_lastlength = 0; 681 682 sp->seg_bytes_left -= fs->lfs_sumsize; 683 sp->sum_bytes_left = fs->lfs_sumsize - SEGSUM_SIZE(fs); 684 685 LFS_LOCK_BUF(sbp); 686 brelse(sbp); 687 return repeat; 688 } 689 690 /* 691 * Return the next segment to write. 692 */ 693 void 694 lfs_newseg(struct lfs * fs) 695 { 696 CLEANERINFO *cip; 697 SEGUSE *sup; 698 struct ubuf *bp; 699 int curseg, isdirty, sn; 700 701 LFS_SEGENTRY(sup, fs, dtosn(fs, fs->lfs_nextseg), bp); 702 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 703 sup->su_nbytes = 0; 704 sup->su_nsums = 0; 705 sup->su_ninos = 0; 706 LFS_WRITESEGENTRY(sup, fs, dtosn(fs, fs->lfs_nextseg), bp); 707 708 LFS_CLEANERINFO(cip, fs, bp); 709 --cip->clean; 710 ++cip->dirty; 711 fs->lfs_nclean = cip->clean; 712 LFS_SYNC_CLEANERINFO(cip, fs, bp, 1); 713 714 fs->lfs_lastseg = fs->lfs_curseg; 715 fs->lfs_curseg = fs->lfs_nextseg; 716 for (sn = curseg = dtosn(fs, fs->lfs_curseg) + fs->lfs_interleave;;) { 717 sn = (sn + 1) % fs->lfs_nseg; 718 if (sn == curseg) 719 errx(1, "lfs_nextseg: no clean segments"); 720 LFS_SEGENTRY(sup, fs, sn, bp); 721 isdirty = sup->su_flags & SEGUSE_DIRTY; 722 brelse(bp); 723 724 if (!isdirty) 725 break; 726 } 727 728 ++fs->lfs_nactive; 729 fs->lfs_nextseg = sntod(fs, sn); 730 } 731 732 733 int 734 lfs_writeseg(struct lfs * fs, struct segment * sp) 735 { 736 struct ubuf **bpp, *bp; 737 SEGUSE *sup; 738 SEGSUM *ssp; 739 char *datap, *dp; 740 int i; 741 int do_again, nblocks, byteoffset; 742 size_t el_size; 743 u_short ninos; 744 struct uvnode *devvp; 745 746 /* 747 * If there are no buffers other than the segment summary to write 748 * and it is not a checkpoint, don't do anything. On a checkpoint, 749 * even if there aren't any buffers, you need to write the superblock. 750 */ 751 if ((nblocks = sp->cbpp - sp->bpp) == 1) 752 return 0; 753 754 devvp = fs->lfs_unlockvp; 755 756 /* Update the segment usage information. */ 757 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 758 759 /* Loop through all blocks, except the segment summary. */ 760 for (bpp = sp->bpp; ++bpp < sp->cbpp;) { 761 if ((*bpp)->b_vp != devvp) { 762 sup->su_nbytes += (*bpp)->b_bcount; 763 } 764 } 765 766 ssp = (SEGSUM *) sp->segsum; 767 768 ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs); 769 sup->su_nbytes += ssp->ss_ninos * DINODE1_SIZE; 770 771 if (fs->lfs_version == 1) 772 sup->su_olastmod = write_time; 773 else 774 sup->su_lastmod = write_time; 775 sup->su_ninos += ninos; 776 ++sup->su_nsums; 777 fs->lfs_dmeta += (btofsb(fs, fs->lfs_sumsize) + btofsb(fs, ninos * 778 fs->lfs_ibsize)); 779 fs->lfs_avail -= btofsb(fs, fs->lfs_sumsize); 780 781 do_again = !(bp->b_flags & B_GATHERED); 782 LFS_WRITESEGENTRY(sup, fs, sp->seg_number, bp); /* Ifile */ 783 784 /* 785 * Compute checksum across data and then across summary; the first 786 * block (the summary block) is skipped. Set the create time here 787 * so that it's guaranteed to be later than the inode mod times. 788 */ 789 if (fs->lfs_version == 1) 790 el_size = sizeof(u_long); 791 else 792 el_size = sizeof(u_int32_t); 793 datap = dp = malloc(nblocks * el_size); 794 for (bpp = sp->bpp, i = nblocks - 1; i--;) { 795 ++bpp; 796 /* Loop through gop_write cluster blocks */ 797 for (byteoffset = 0; byteoffset < (*bpp)->b_bcount; 798 byteoffset += fs->lfs_bsize) { 799 memcpy(dp, (*bpp)->b_data + byteoffset, el_size); 800 dp += el_size; 801 } 802 bremfree(*bpp); 803 (*bpp)->b_flags |= B_BUSY; 804 } 805 if (fs->lfs_version == 1) 806 ssp->ss_ocreate = write_time; 807 else { 808 ssp->ss_create = write_time; 809 ssp->ss_serial = ++fs->lfs_serial; 810 ssp->ss_ident = fs->lfs_ident; 811 } 812 /* Set the summary block busy too */ 813 bremfree(*(sp->bpp)); 814 (*(sp->bpp))->b_flags |= B_BUSY; 815 816 ssp->ss_datasum = cksum(datap, (nblocks - 1) * el_size); 817 ssp->ss_sumsum = 818 cksum(&ssp->ss_datasum, fs->lfs_sumsize - sizeof(ssp->ss_sumsum)); 819 free(datap); 820 datap = dp = NULL; 821 fs->lfs_bfree -= (btofsb(fs, ninos * fs->lfs_ibsize) + 822 btofsb(fs, fs->lfs_sumsize)); 823 824 if (devvp == NULL) 825 errx(1, "devvp is NULL"); 826 for (bpp = sp->bpp, i = nblocks; i; bpp++, i--) { 827 bp = *bpp; 828 #if 0 829 printf("i = %d, bp = %p, flags %lx, bn = %" PRIx64 "\n", 830 nblocks - i, bp, bp->b_flags, bp->b_blkno); 831 printf(" vp = %p\n", bp->b_vp); 832 if (bp->b_vp != fs->lfs_unlockvp) 833 printf(" ino = %d lbn = %" PRId64 "\n", 834 VTOI(bp->b_vp)->i_number, bp->b_lblkno); 835 #endif 836 if (bp->b_vp == fs->lfs_unlockvp) 837 written_dev += bp->b_bcount; 838 else { 839 if (bp->b_lblkno >= 0) 840 written_data += bp->b_bcount; 841 else 842 written_indir += bp->b_bcount; 843 } 844 bp->b_flags &= ~(B_DELWRI | B_READ | B_GATHERED | B_ERROR | 845 B_LOCKED); 846 bwrite(bp); 847 written_bytes += bp->b_bcount; 848 } 849 written_inodes += ninos; 850 851 return (lfs_initseg(fs) || do_again); 852 } 853 854 /* 855 * Our own copy of shellsort. XXX use qsort or heapsort. 856 */ 857 void 858 lfs_shellsort(struct ubuf ** bp_array, ufs_daddr_t * lb_array, int nmemb, int size) 859 { 860 static int __rsshell_increments[] = {4, 1, 0}; 861 int incr, *incrp, t1, t2; 862 struct ubuf *bp_temp; 863 864 for (incrp = __rsshell_increments; (incr = *incrp++) != 0;) 865 for (t1 = incr; t1 < nmemb; ++t1) 866 for (t2 = t1 - incr; t2 >= 0;) 867 if ((u_int32_t) bp_array[t2]->b_lblkno > 868 (u_int32_t) bp_array[t2 + incr]->b_lblkno) { 869 bp_temp = bp_array[t2]; 870 bp_array[t2] = bp_array[t2 + incr]; 871 bp_array[t2 + incr] = bp_temp; 872 t2 -= incr; 873 } else 874 break; 875 876 /* Reform the list of logical blocks */ 877 incr = 0; 878 for (t1 = 0; t1 < nmemb; t1++) { 879 for (t2 = 0; t2 * size < bp_array[t1]->b_bcount; t2++) { 880 lb_array[incr++] = bp_array[t1]->b_lblkno + t2; 881 } 882 } 883 } 884 885 886 /* 887 * lfs_seglock -- 888 * Single thread the segment writer. 889 */ 890 int 891 lfs_seglock(struct lfs * fs, unsigned long flags) 892 { 893 struct segment *sp; 894 895 if (fs->lfs_seglock) { 896 ++fs->lfs_seglock; 897 fs->lfs_sp->seg_flags |= flags; 898 return 0; 899 } 900 fs->lfs_seglock = 1; 901 902 sp = fs->lfs_sp = (struct segment *) malloc(sizeof(*sp)); 903 sp->bpp = (struct ubuf **) malloc(fs->lfs_ssize * sizeof(struct ubuf *)); 904 if (!sp->bpp) 905 errx(1, "Could not allocate %zu bytes: %s", 906 (size_t)(fs->lfs_ssize * sizeof(struct ubuf *)), 907 strerror(errno)); 908 sp->seg_flags = flags; 909 sp->vp = NULL; 910 sp->seg_iocount = 0; 911 (void) lfs_initseg(fs); 912 913 return 0; 914 } 915 916 /* 917 * lfs_segunlock -- 918 * Single thread the segment writer. 919 */ 920 void 921 lfs_segunlock(struct lfs * fs) 922 { 923 struct segment *sp; 924 struct ubuf *bp; 925 926 sp = fs->lfs_sp; 927 928 if (fs->lfs_seglock == 1) { 929 if (sp->bpp != sp->cbpp) { 930 /* Free allocated segment summary */ 931 fs->lfs_offset -= btofsb(fs, fs->lfs_sumsize); 932 bp = *sp->bpp; 933 bremfree(bp); 934 bp->b_flags |= B_DONE | B_INVAL; 935 bp->b_flags &= ~B_DELWRI; 936 reassignbuf(bp, bp->b_vp); 937 bp->b_flags |= B_BUSY; /* XXX */ 938 brelse(bp); 939 } else 940 printf("unlock to 0 with no summary"); 941 942 free(sp->bpp); 943 sp->bpp = NULL; 944 free(sp); 945 fs->lfs_sp = NULL; 946 947 fs->lfs_nactive = 0; 948 949 /* Since we *know* everything's on disk, write both sbs */ 950 lfs_writesuper(fs, fs->lfs_sboffs[0]); 951 lfs_writesuper(fs, fs->lfs_sboffs[1]); 952 953 --fs->lfs_seglock; 954 fs->lfs_lockpid = 0; 955 } else if (fs->lfs_seglock == 0) { 956 errx(1, "Seglock not held"); 957 } else { 958 --fs->lfs_seglock; 959 } 960 } 961 962 int 963 lfs_writevnodes(struct lfs *fs, struct segment *sp, int op) 964 { 965 struct inode *ip; 966 struct uvnode *vp; 967 int inodes_written = 0; 968 969 LIST_FOREACH(vp, &vnodelist, v_mntvnodes) { 970 if (vp->v_bmap_op != lfs_vop_bmap) 971 continue; 972 973 ip = VTOI(vp); 974 975 if ((op == VN_DIROP && !(vp->v_flag & VDIROP)) || 976 (op != VN_DIROP && (vp->v_flag & VDIROP))) { 977 continue; 978 } 979 /* 980 * Write the inode/file if dirty and it's not the IFILE. 981 */ 982 if (ip->i_flag & IN_ALLMOD || !LIST_EMPTY(&vp->v_dirtyblkhd)) { 983 if (ip->i_number != LFS_IFILE_INUM) 984 lfs_writefile(fs, sp, vp); 985 (void) lfs_writeinode(fs, sp, ip); 986 inodes_written++; 987 } 988 } 989 return inodes_written; 990 } 991 992 void 993 lfs_writesuper(struct lfs *fs, ufs_daddr_t daddr) 994 { 995 struct ubuf *bp; 996 997 /* Set timestamp of this version of the superblock */ 998 if (fs->lfs_version == 1) 999 fs->lfs_otstamp = write_time; 1000 fs->lfs_tstamp = write_time; 1001 1002 /* Checksum the superblock and copy it into a buffer. */ 1003 fs->lfs_cksum = lfs_sb_cksum(&(fs->lfs_dlfs)); 1004 assert(daddr > 0); 1005 bp = getblk(fs->lfs_unlockvp, fsbtodb(fs, daddr), LFS_SBPAD); 1006 memset(bp->b_data + sizeof(struct dlfs), 0, 1007 LFS_SBPAD - sizeof(struct dlfs)); 1008 *(struct dlfs *) bp->b_data = fs->lfs_dlfs; 1009 1010 bwrite(bp); 1011 } 1012