1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_segment.c 7.28 (Berkeley) 08/01/92 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/namei.h> 13 #include <sys/kernel.h> 14 #include <sys/resourcevar.h> 15 #include <sys/file.h> 16 #include <sys/stat.h> 17 #include <sys/buf.h> 18 #include <sys/proc.h> 19 #include <sys/conf.h> 20 #include <sys/vnode.h> 21 #include <sys/malloc.h> 22 #include <sys/mount.h> 23 24 #include <miscfs/specfs/specdev.h> 25 #include <miscfs/fifofs/fifo.h> 26 27 #include <ufs/ufs/quota.h> 28 #include <ufs/ufs/inode.h> 29 #include <ufs/ufs/dir.h> 30 #include <ufs/ufs/ufsmount.h> 31 32 #include <ufs/lfs/lfs.h> 33 #include <ufs/lfs/lfs_extern.h> 34 35 /* In-memory description of a segment about to be written. */ 36 struct segment { 37 struct buf **bpp; /* pointer to buffer array */ 38 struct buf **cbpp; /* pointer to next available bp */ 39 struct buf *ibp; /* buffer pointer to inode page */ 40 struct finfo *fip; /* current fileinfo pointer */ 41 void *segsum; /* segment summary info */ 42 u_long ninodes; /* number of inodes in this segment */ 43 u_long seg_bytes_left; /* bytes left in segment */ 44 u_long sum_bytes_left; /* bytes left in summary block */ 45 u_long seg_number; /* number of this segment */ 46 #define SEGM_CKP 0x01 /* doing a checkpoint */ 47 u_long seg_flags; /* run-time flags for this segment */ 48 }; 49 50 /* 51 * Determine if it's OK to start a partial in this segment, or if we need 52 * to go on to a new segment. 53 */ 54 #define LFS_PARTIAL_FITS(fs) \ 55 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 56 1 << (fs)->lfs_fsbtodb) 57 58 void lfs_callback __P((struct buf *)); 59 void lfs_gather __P((struct lfs *, struct segment *, 60 struct vnode *, int (*) __P((struct lfs *, struct buf *)))); 61 void lfs_initseg __P((struct lfs *, struct segment *)); 62 void lfs_iset __P((struct inode *, daddr_t, time_t)); 63 int lfs_match_data __P((struct lfs *, struct buf *)); 64 int lfs_match_dindir __P((struct lfs *, struct buf *)); 65 int lfs_match_indir __P((struct lfs *, struct buf *)); 66 int lfs_match_tindir __P((struct lfs *, struct buf *)); 67 struct buf * 68 lfs_newbuf __P((struct lfs *, daddr_t, size_t)); 69 void lfs_newseg __P((struct lfs *)); 70 void lfs_shellsort __P((struct buf **, daddr_t *, register int)); 71 void lfs_updatemeta __P((struct lfs *, 72 struct segment *, struct vnode *, daddr_t *, struct buf **, int)); 73 void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *)); 74 int lfs_writeinode __P((struct lfs *, struct segment *, struct inode *)); 75 int lfs_writeseg __P((struct lfs *, struct segment *)); 76 void lfs_writesuper __P((struct lfs *, struct segment *)); 77 void lfs_writevnodes __P((struct lfs *fs, struct mount *mp, 78 struct segment *sp, int dirops)); 79 80 int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 81 82 /* 83 * Ifile and meta data blocks are not marked busy, so segment writes MUST be 84 * single threaded. Currently, there are two paths into lfs_segwrite, sync() 85 * and getnewbuf(). They both mark the file system busy. Lfs_vflush() 86 * explicitly marks the file system busy. So lfs_segwrite is safe. I think. 87 */ 88 89 int 90 lfs_vflush(vp) 91 struct vnode *vp; 92 { 93 struct inode *ip; 94 struct lfs *fs; 95 struct segment *sp; 96 int error, s; 97 98 fs = VFSTOUFS(vp->v_mount)->um_lfs; 99 lfs_seglock(fs); 100 101 /* 102 * Allocate a segment structure and enough space to hold pointers to 103 * the maximum possible number of buffers which can be described in a 104 * single summary block. 105 */ 106 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 107 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 108 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 109 sp->seg_flags = SEGM_CKP; 110 111 /* 112 * Keep a cumulative count of the outstanding I/O operations. If the 113 * disk drive catches up with us it could go to zero before we finish, 114 * so we artificially increment it by one until we've scheduled all of 115 * the writes we intend to do. 116 */ 117 s = splbio(); 118 ++fs->lfs_iocount; 119 splx(s); 120 121 ip = VTOI(vp); 122 do { 123 lfs_initseg(fs, sp); 124 do { 125 if (vp->v_dirtyblkhd != NULL) 126 lfs_writefile(fs, sp, vp); 127 } while (lfs_writeinode(fs, sp, ip)); 128 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 129 130 } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM); 131 132 /* 133 * If the I/O count is non-zero, sleep until it reaches zero. At the 134 * moment, the user's process hangs around so we can sleep. 135 */ 136 s = splbio(); 137 if (--fs->lfs_iocount && (error = 138 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0))) { 139 free(sp->bpp, M_SEGMENT); 140 free(sp, M_SEGMENT); 141 return (error); 142 } 143 splx(s); 144 lfs_segunlock(fs); 145 146 /* 147 * XXX 148 * Should be writing a checkpoint? 149 */ 150 free(sp->bpp, M_SEGMENT); 151 free(sp, M_SEGMENT); 152 153 return (0); 154 } 155 156 void 157 lfs_writevnodes(fs, mp, sp, dirops) 158 struct lfs *fs; 159 struct mount *mp; 160 struct segment *sp; 161 int dirops; 162 { 163 struct inode *ip; 164 struct vnode *vp; 165 int error, s; 166 167 loop: for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { 168 /* 169 * If the vnode that we are about to sync is no longer 170 * associated with this mount point, start over. 171 */ 172 if (vp->v_mount != mp) 173 goto loop; 174 175 if (dirops && !(vp->v_flag & VDIROP) || 176 !dirops && (vp->v_flag & VDIROP)) 177 continue; 178 /* 179 * XXX 180 * Up the ref count so we don't get tossed out of 181 * memory. 182 */ 183 VREF(vp); 184 185 /* 186 * Write the inode/file if dirty and it's not the 187 * the IFILE. 188 */ 189 ip = VTOI(vp); 190 if ((ip->i_flag & (IMOD | IACC | IUPD | ICHG) || 191 vp->v_dirtyblkhd != NULL) && 192 ip->i_number != LFS_IFILE_INUM) { 193 if (vp->v_dirtyblkhd != NULL) 194 lfs_writefile(fs, sp, vp); 195 (void) lfs_writeinode(fs, sp, ip); 196 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 197 } 198 vp->v_flag &= ~VDIROP; 199 vrele(vp); 200 } 201 } 202 203 int 204 lfs_segwrite(mp, do_ckp) 205 struct mount *mp; 206 int do_ckp; /* Do a checkpoint. */ 207 { 208 struct buf *bp; 209 struct inode *ip; 210 struct lfs *fs; 211 struct segment *sp; 212 struct vnode *vp; 213 SEGUSE *segusep; 214 daddr_t ibno; 215 int error, i, s; 216 217 fs = VFSTOUFS(mp)->um_lfs; 218 lfs_seglock(fs); 219 220 /* 221 * Allocate a segment structure and enough space to hold pointers to 222 * the maximum possible number of buffers which can be described in a 223 * single summary block. 224 */ 225 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 226 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 227 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 228 sp->seg_flags = do_ckp ? SEGM_CKP : 0; 229 lfs_initseg(fs, sp); 230 231 /* 232 * Keep a cumulative count of the outstanding I/O operations. If the 233 * disk drive catches up with us it could go to zero before we finish, 234 * so we artificially increment it by one until we've scheduled all of 235 * the writes we intend to do. If not a checkpoint, we never do the 236 * final decrement, avoiding the wakeup in the callback routine. 237 */ 238 s = splbio(); 239 ++fs->lfs_iocount; 240 splx(s); 241 242 lfs_writevnodes(fs, mp, sp, 0); 243 fs->lfs_writer = 1; 244 if (fs->lfs_dirops && (error = 245 tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) { 246 free(sp->bpp, M_SEGMENT); 247 free(sp, M_SEGMENT); 248 fs->lfs_writer = 0; 249 return (error); 250 } 251 252 lfs_writevnodes(fs, mp, sp, 1); 253 254 /* 255 * If we are doing a checkpoint, mark everything since the 256 * last checkpoint as no longer ACTIVE. 257 */ 258 if (do_ckp) 259 for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz; 260 --ibno >= fs->lfs_cleansz; ) { 261 if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize, 262 NOCRED, &bp)) 263 264 panic("lfs: ifile read"); 265 segusep = (SEGUSE *)bp->b_un.b_addr; 266 for (i = fs->lfs_sepb; i--; segusep++) 267 segusep->su_flags &= ~SEGUSE_ACTIVE; 268 269 LFS_UBWRITE(bp); 270 } 271 272 if (do_ckp || fs->lfs_doifile) { 273 vp = fs->lfs_ivnode; 274 while (vget(vp)); 275 ip = VTOI(vp); 276 if (vp->v_dirtyblkhd != NULL) 277 lfs_writefile(fs, sp, vp); 278 (void)lfs_writeinode(fs, sp, ip); 279 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 280 vput(vp); 281 /* 282 * This should never happen because we just guaranteed 283 * that all the segment usage table blocks are dirty, so 284 * no new ones should get written. 285 */ 286 if (lfs_writeseg(fs, sp) && do_ckp) 287 panic("lfs_segwrite: created dirty blocks on ckp"); 288 } else 289 (void) lfs_writeseg(fs, sp); 290 291 /* 292 * If the I/O count is non-zero, sleep until it reaches zero. At the 293 * moment, the user's process hangs around so we can sleep. 294 */ 295 fs->lfs_writer = 0; 296 fs->lfs_doifile = 0; 297 wakeup(&fs->lfs_dirops); 298 299 s = splbio(); 300 --fs->lfs_iocount; 301 if (do_ckp) { 302 if (fs->lfs_iocount && (error = 303 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs sync", 0))) { 304 free(sp->bpp, M_SEGMENT); 305 free(sp, M_SEGMENT); 306 return (error); 307 } 308 splx(s); 309 lfs_writesuper(fs, sp); 310 } else 311 splx(s); 312 313 lfs_segunlock(fs); 314 315 free(sp->bpp, M_SEGMENT); 316 free(sp, M_SEGMENT); 317 318 return (0); 319 } 320 321 /* 322 * Write the dirty blocks associated with a vnode. 323 */ 324 void 325 lfs_writefile(fs, sp, vp) 326 struct lfs *fs; 327 struct segment *sp; 328 struct vnode *vp; 329 { 330 struct buf *bp; 331 struct finfo *fip; 332 IFILE *ifp; 333 334 if (sp->seg_bytes_left < fs->lfs_bsize || 335 sp->sum_bytes_left < sizeof(struct finfo)) { 336 (void) lfs_writeseg(fs, sp); 337 lfs_initseg(fs, sp); 338 } 339 sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t); 340 341 fip = sp->fip; 342 fip->fi_nblocks = 0; 343 fip->fi_ino = VTOI(vp)->i_number; 344 LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 345 fip->fi_version = ifp->if_version; 346 brelse(bp); 347 348 /* 349 * It may not be necessary to write the meta-data blocks at this point, 350 * as the roll-forward recovery code should be able to reconstruct the 351 * list. 352 */ 353 lfs_gather(fs, sp, vp, lfs_match_data); 354 lfs_gather(fs, sp, vp, lfs_match_indir); 355 lfs_gather(fs, sp, vp, lfs_match_dindir); 356 #ifdef TRIPLE 357 lfs_gather(fs, sp, vp, lfs_match_tindir); 358 #endif 359 360 fip = sp->fip; 361 #ifdef META 362 printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks); 363 #endif 364 if (fip->fi_nblocks != 0) { 365 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 366 sp->fip = 367 (struct finfo *)((caddr_t)fip + sizeof(struct finfo) + 368 sizeof(daddr_t) * (fip->fi_nblocks - 1)); 369 } else 370 sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t); 371 } 372 373 int 374 lfs_writeinode(fs, sp, ip) 375 struct lfs *fs; 376 struct segment *sp; 377 struct inode *ip; 378 { 379 struct buf *bp, *ibp; 380 IFILE *ifp; 381 SEGUSE *sup; 382 daddr_t daddr; 383 ino_t ino; 384 int ndx; 385 int redo_ifile = 0; 386 387 /* Allocate a new inode block if necessary. */ 388 if (sp->ibp == NULL) { 389 /* Allocate a new segment if necessary. */ 390 if (sp->seg_bytes_left < fs->lfs_bsize || 391 sp->sum_bytes_left < sizeof(daddr_t)) { 392 (void) lfs_writeseg(fs, sp); 393 lfs_initseg(fs, sp); 394 } 395 396 /* Get next inode block. */ 397 daddr = fs->lfs_offset; 398 fs->lfs_offset += fsbtodb(fs, 1); 399 sp->ibp = *sp->cbpp++ = 400 lfs_newbuf(fs, daddr, fs->lfs_bsize); 401 402 /* Set remaining space counters. */ 403 sp->seg_bytes_left -= fs->lfs_bsize; 404 sp->sum_bytes_left -= sizeof(daddr_t); 405 ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) - 406 sp->ninodes / INOPB(fs) - 1; 407 ((daddr_t *)(sp->segsum))[ndx] = daddr; 408 } 409 410 /* Update the inode times and copy the inode onto the inode page. */ 411 ITIMES(ip, &time, &time); 412 bp = sp->ibp; 413 bp->b_un.b_dino[sp->ninodes % INOPB(fs)] = ip->i_din; 414 415 /* Increment inode count in segment summary block. */ 416 ++((SEGSUM *)(sp->segsum))->ss_ninos; 417 418 /* If this page is full, set flag to allocate a new page. */ 419 if (++sp->ninodes % INOPB(fs) == 0) 420 sp->ibp = NULL; 421 422 /* 423 * If updating the ifile, update the super-block. Update the disk 424 * address and access times for this inode in the ifile. 425 */ 426 ino = ip->i_number; 427 if (ino == LFS_IFILE_INUM) { 428 daddr = fs->lfs_idaddr; 429 fs->lfs_idaddr = bp->b_blkno; 430 } else { 431 LFS_IENTRY(ifp, fs, ino, ibp); 432 daddr = ifp->if_daddr; 433 ifp->if_daddr = bp->b_blkno; 434 LFS_UBWRITE(ibp); 435 } 436 437 /* 438 * No need to update segment usage if there was no former inode address 439 * or if the last inode address is in the current partial segment. 440 */ 441 if (daddr != LFS_UNUSED_DADDR && 442 !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) { 443 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 444 #ifdef DIAGNOSTIC 445 if (sup->su_nbytes < sizeof(struct dinode)) { 446 /* XXX -- Change to a panic. */ 447 printf("lfs: negative bytes (segment %d)\n", 448 datosn(fs, daddr)); 449 panic("negative bytes"); 450 } 451 #endif 452 sup->su_nbytes -= sizeof(struct dinode); 453 LFS_UBWRITE(bp); 454 redo_ifile = (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 455 } 456 return (redo_ifile); 457 } 458 459 void 460 lfs_gather(fs, sp, vp, match) 461 struct lfs *fs; 462 struct segment *sp; 463 struct vnode *vp; 464 int (*match) __P((struct lfs *, struct buf *)); 465 { 466 struct buf **bpp, *bp; 467 struct buf *lastbp; 468 struct finfo *fip; 469 struct inode *ip; 470 daddr_t *lbp, *start_lbp; 471 u_long version; 472 int s; 473 474 ip = VTOI(vp); 475 bpp = sp->cbpp; 476 fip = sp->fip; 477 start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks]; 478 479 loop: s = splbio(); 480 lastbp = NULL; 481 for (bp = vp->v_dirtyblkhd; bp; lastbp = bp, bp = bp->b_blockf) { 482 if (bp->b_flags & B_BUSY || !match(fs, bp) || 483 bp->b_flags & B_GATHERED) 484 continue; 485 #ifdef DIAGNOSTIC 486 if (!(bp->b_flags & B_DELWRI)) 487 panic("lfs_gather: bp not B_DELWRI"); 488 if (!(bp->b_flags & B_LOCKED)) 489 panic("lfs_gather: bp not B_LOCKED"); 490 #endif 491 /* 492 * If full, finish this segment. We may be doing I/O, so 493 * release and reacquire the splbio(). 494 */ 495 if (sp->sum_bytes_left < sizeof(daddr_t) || 496 sp->seg_bytes_left < fs->lfs_bsize) { 497 splx(s); 498 lfs_updatemeta(fs, 499 sp, vp, start_lbp, bpp, lbp - start_lbp); 500 501 /* Add the current file to the segment summary. */ 502 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 503 504 version = fip->fi_version; 505 (void) lfs_writeseg(fs, sp); 506 lfs_initseg(fs, sp); 507 508 fip = sp->fip; 509 fip->fi_version = version; 510 fip->fi_ino = ip->i_number; 511 start_lbp = lbp = fip->fi_blocks; 512 513 sp->sum_bytes_left -= 514 sizeof(struct finfo) - sizeof(daddr_t); 515 516 bpp = sp->cbpp; 517 goto loop; 518 } 519 520 /* Insert into the buffer list, update the FINFO block. */ 521 bp->b_flags |= B_GATHERED; 522 *sp->cbpp++ = bp; 523 ++fip->fi_nblocks; 524 *lbp++ = bp->b_lblkno; 525 526 sp->sum_bytes_left -= sizeof(daddr_t); 527 sp->seg_bytes_left -= bp->b_bufsize; 528 } 529 splx(s); 530 lfs_updatemeta(fs, sp, vp, start_lbp, bpp, lbp - start_lbp); 531 } 532 533 /* 534 * Update the metadata that points to the blocks listed in the FINFO 535 * array. 536 */ 537 void 538 lfs_updatemeta(fs, sp, vp, lbp, bpp, nblocks) 539 struct lfs *fs; 540 struct segment *sp; 541 struct vnode *vp; 542 daddr_t *lbp; 543 struct buf **bpp; 544 int nblocks; 545 { 546 SEGUSE *sup; 547 struct buf *bp; 548 INDIR a[NIADDR], *ap; 549 struct inode *ip; 550 daddr_t daddr, lbn, off; 551 int db_per_fsb, error, i, num; 552 553 if (nblocks == 0) 554 return; 555 556 /* Sort the blocks. */ 557 lfs_shellsort(bpp, lbp, nblocks); 558 559 /* 560 * Assign disk addresses, and update references to the logical 561 * block and the segment usage information. 562 */ 563 db_per_fsb = fsbtodb(fs, 1); 564 for (i = nblocks; i--; ++bpp) { 565 lbn = *lbp++; 566 (*bpp)->b_blkno = off = fs->lfs_offset; 567 fs->lfs_offset += db_per_fsb; 568 569 if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num)) 570 panic("lfs_updatemeta: lfs_bmaparray %d", error); 571 ip = VTOI(vp); 572 switch (num) { 573 case 0: 574 ip->i_db[lbn] = off; 575 break; 576 case 1: 577 ip->i_ib[a[0].in_off] = off; 578 break; 579 default: 580 ap = &a[num - 1]; 581 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 582 panic("lfs_updatemeta: bread bno %d", 583 ap->in_lbn); 584 /* 585 * Bread may create a new indirect block which needs 586 * to get counted for the inode. 587 */ 588 if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) { 589 ip->i_blocks += btodb(fs->lfs_bsize); 590 fs->lfs_bfree -= btodb(fs->lfs_bsize); 591 } 592 bp->b_un.b_daddr[ap->in_off] = off; 593 VOP_BWRITE(bp); 594 } 595 596 /* Update segment usage information. */ 597 if (daddr != UNASSIGNED) { 598 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 599 #ifdef DIAGNOSTIC 600 if (sup->su_nbytes < fs->lfs_bsize) { 601 /* XXX -- Change to a panic. */ 602 printf("lfs: negative bytes (segment %d)\n", 603 datosn(fs, daddr)); 604 panic ("Negative Bytes"); 605 } 606 #endif 607 sup->su_nbytes -= fs->lfs_bsize; 608 LFS_UBWRITE(bp); 609 } 610 } 611 } 612 613 /* 614 * Start a new segment. 615 */ 616 void 617 lfs_initseg(fs, sp) 618 struct lfs *fs; 619 struct segment *sp; 620 { 621 SEGUSE *sup; 622 SEGSUM *ssp; 623 struct buf *bp; 624 daddr_t lbn, *lbnp; 625 626 /* Advance to the next segment. */ 627 if (!LFS_PARTIAL_FITS(fs)) { 628 /* Wake up any cleaning procs waiting on this file system. */ 629 wakeup(&fs->lfs_nextseg); 630 wakeup(&lfs_allclean_wakeup); 631 632 lfs_newseg(fs); 633 fs->lfs_offset = fs->lfs_curseg; 634 sp->seg_number = datosn(fs, fs->lfs_curseg); 635 sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 636 637 /* 638 * If the segment contains a superblock, update the offset 639 * and summary address to skip over it. 640 */ 641 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 642 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 643 fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 644 sp->seg_bytes_left -= LFS_SBPAD; 645 } 646 brelse(bp); 647 } else { 648 sp->seg_number = datosn(fs, fs->lfs_curseg); 649 sp->seg_bytes_left = (fs->lfs_dbpseg - 650 (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 651 } 652 fs->lfs_lastpseg = fs->lfs_offset; 653 654 sp->ibp = NULL; 655 sp->ninodes = 0; 656 657 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 658 sp->cbpp = sp->bpp; 659 *sp->cbpp = lfs_newbuf(fs, fs->lfs_offset, LFS_SUMMARY_SIZE); 660 sp->segsum = (*sp->cbpp)->b_un.b_addr; 661 ++sp->cbpp; 662 fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 663 664 /* Set point to SEGSUM, initialize it. */ 665 ssp = sp->segsum; 666 ssp->ss_next = fs->lfs_nextseg; 667 ssp->ss_nfinfo = ssp->ss_ninos = 0; 668 669 /* Set pointer to first FINFO, initialize it. */ 670 sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM)); 671 sp->fip->fi_nblocks = 0; 672 673 sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 674 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 675 } 676 677 /* 678 * Return the next segment to write. 679 */ 680 void 681 lfs_newseg(fs) 682 struct lfs *fs; 683 { 684 CLEANERINFO *cip; 685 SEGUSE *sup; 686 struct buf *bp; 687 int curseg, isdirty, sn; 688 689 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp); 690 sup->su_flags |= SEGUSE_DIRTY; 691 LFS_UBWRITE(bp); 692 693 LFS_CLEANERINFO(cip, fs, bp); 694 --cip->clean; 695 ++cip->dirty; 696 LFS_UBWRITE(bp); 697 698 fs->lfs_lastseg = fs->lfs_curseg; 699 fs->lfs_curseg = fs->lfs_nextseg; 700 for (sn = curseg = datosn(fs, fs->lfs_curseg);;) { 701 sn = (sn + 1) % fs->lfs_nseg; 702 if (sn == curseg) 703 panic("lfs_nextseg: no clean segments"); 704 LFS_SEGENTRY(sup, fs, sn, bp); 705 isdirty = sup->su_flags & SEGUSE_DIRTY; 706 brelse(bp); 707 if (!isdirty) 708 break; 709 } 710 711 fs->lfs_nextseg = sntoda(fs, sn); 712 } 713 714 int 715 lfs_writeseg(fs, sp) 716 struct lfs *fs; 717 struct segment *sp; 718 { 719 struct buf **bpp, *bp, *cbp; 720 SEGUSE *sup; 721 SEGSUM *ssp; 722 dev_t i_dev; 723 size_t size; 724 u_long *datap, *dp; 725 int ch_per_blk, do_again, i, nblocks, num, s; 726 int (*strategy)__P((struct vop_strategy_args *)); 727 struct vop_strategy_args vop_strategy_a; 728 u_short ninos; 729 char *p; 730 731 /* Checkpoint always writes superblock, even if no data blocks. */ 732 if ((nblocks = sp->cbpp - sp->bpp) == 0 && !(sp->seg_flags & SEGM_CKP)) 733 return (0); 734 735 /* 736 * Compute checksum across data and then across summary; the first 737 * block (the summary block) is skipped. Set the create time here 738 * so that it's guaranteed to be later than the inode mod times. 739 * 740 * XXX 741 * Fix this to do it inline, instead of malloc/copy. 742 */ 743 datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 744 for (bpp = sp->bpp, i = nblocks - 1; i--;) 745 *dp++ = (*++bpp)->b_un.b_words[0]; 746 ssp = (SEGSUM *)sp->segsum; 747 ssp->ss_create = time.tv_sec; 748 ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long)); 749 ssp->ss_sumsum = 750 cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum)); 751 free(datap, M_SEGMENT); 752 753 /* Update the segment usage information. */ 754 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 755 ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs); 756 sup->su_nbytes += nblocks - 1 - ninos << fs->lfs_bshift; 757 sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode); 758 sup->su_nbytes += LFS_SUMMARY_SIZE; 759 sup->su_lastmod = time.tv_sec; 760 sup->su_flags |= SEGUSE_ACTIVE; 761 sup->su_ninos += ninos; 762 ++sup->su_nsums; 763 LFS_UBWRITE(bp); 764 fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE); 765 do_again = !(bp->b_flags & B_GATHERED); 766 767 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 768 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 769 770 /* 771 * When we simply write the blocks we lose a rotation for every block 772 * written. To avoid this problem, we allocate memory in chunks, copy 773 * the buffers into the chunk and write the chunk. 56K was chosen as 774 * some driver/controllers can't handle unsigned 16 bit transfers. 775 * When the data is copied to the chunk, turn off the the B_LOCKED bit 776 * and brelse the buffer (which will move them to the LRU list). Add 777 * the B_CALL flag to the buffer header so we can count I/O's for the 778 * checkpoints and so we can release the allocated memory. 779 * 780 * XXX 781 * This should be removed if the new virtual memory system allows us to 782 * easily make the buffers contiguous in kernel memory and if that's 783 * fast enough. 784 */ 785 #define LFS_CHUNKSIZE (56 * 1024) 786 ch_per_blk = LFS_CHUNKSIZE / fs->lfs_bsize; 787 for (bpp = sp->bpp, i = nblocks; i;) { 788 num = ch_per_blk; 789 if (num > i) 790 num = i; 791 i -= num; 792 size = num * fs->lfs_bsize; 793 794 cbp = lfs_newbuf(fs, (*bpp)->b_blkno, 0); 795 cbp->b_dev = i_dev; 796 cbp->b_flags = B_ASYNC | B_BUSY | B_CALL; 797 cbp->b_iodone = lfs_callback; 798 cbp->b_saveaddr = cbp->b_un.b_addr; 799 cbp->b_un.b_addr = malloc(size, M_SEGMENT, M_WAITOK); 800 801 s = splbio(); 802 ++fs->lfs_iocount; 803 for (p = cbp->b_un.b_addr; num--;) { 804 bp = *bpp++; 805 bcopy(bp->b_un.b_addr, p, bp->b_bcount); 806 p += bp->b_bcount; 807 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI | 808 B_LOCKED | B_GATHERED); 809 if (!(bp->b_flags & (B_NOCACHE | B_INVAL))) { 810 bremfree(bp); 811 reassignbuf(bp, bp->b_vp); 812 } 813 brelse(bp); 814 } 815 splx(s); 816 cbp->b_bcount = p - cbp->b_un.b_addr; 817 vop_strategy_a.a_desc = VDESC(vop_strategy); 818 vop_strategy_a.a_bp = cbp; 819 (strategy)(&vop_strategy_a); 820 } 821 return (do_again); 822 } 823 824 void 825 lfs_writesuper(fs, sp) 826 struct lfs *fs; 827 struct segment *sp; 828 { 829 struct buf *bp; 830 dev_t i_dev; 831 int (*strategy) __P((struct vop_strategy_args *)); 832 struct vop_strategy_args vop_strategy_a; 833 834 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 835 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 836 837 /* Checksum the superblock and copy it into a buffer. */ 838 fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); 839 bp = lfs_newbuf(fs, fs->lfs_sboffs[0], LFS_SBPAD); 840 *bp->b_un.b_lfs = *fs; 841 842 /* Write the first superblock (wait). */ 843 bp->b_dev = i_dev; 844 bp->b_flags |= B_BUSY; 845 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 846 vop_strategy_a.a_desc = VDESC(vop_strategy); 847 vop_strategy_a.a_bp = bp; 848 (strategy)(&vop_strategy_a); 849 biowait(bp); 850 851 /* Write the second superblock (don't wait). */ 852 bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; 853 bp->b_flags |= B_ASYNC | B_BUSY; 854 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 855 (strategy)(&vop_strategy_a); 856 } 857 858 /* 859 * Logical block number match routines used when traversing the dirty block 860 * chain. 861 */ 862 int 863 lfs_match_data(fs, bp) 864 struct lfs *fs; 865 struct buf *bp; 866 { 867 return (bp->b_lblkno >= 0); 868 } 869 870 int 871 lfs_match_indir(fs, bp) 872 struct lfs *fs; 873 struct buf *bp; 874 { 875 int lbn; 876 877 lbn = bp->b_lblkno; 878 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 879 } 880 881 int 882 lfs_match_dindir(fs, bp) 883 struct lfs *fs; 884 struct buf *bp; 885 { 886 int lbn; 887 888 lbn = bp->b_lblkno; 889 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 890 } 891 892 int 893 lfs_match_tindir(fs, bp) 894 struct lfs *fs; 895 struct buf *bp; 896 { 897 int lbn; 898 899 lbn = bp->b_lblkno; 900 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 901 } 902 903 /* 904 * Allocate a new buffer header. 905 */ 906 struct buf * 907 lfs_newbuf(fs, daddr, size) 908 struct lfs *fs; 909 daddr_t daddr; 910 size_t size; 911 { 912 struct buf *bp; 913 914 bp = getnewbuf(); 915 bremhash(bp); 916 bgetvp(fs->lfs_ivnode, bp); 917 bp->b_bcount = 0; 918 bp->b_lblkno = daddr; 919 bp->b_blkno = daddr; 920 bp->b_error = 0; 921 bp->b_resid = 0; 922 if (size) 923 allocbuf(bp, size); 924 bp->b_flags |= B_NOCACHE; 925 bp->b_saveaddr = NULL; 926 binshash(bp, &bfreelist[BQ_AGE]); 927 return (bp); 928 } 929 930 void 931 lfs_callback(bp) 932 struct buf *bp; 933 { 934 struct lfs *fs; 935 936 fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs; 937 #ifdef DIAGNOSTIC 938 if (fs->lfs_iocount == 0) 939 panic("lfs_callback: zero iocount\n"); 940 #endif 941 if (--fs->lfs_iocount == 0) 942 wakeup(&fs->lfs_iocount); 943 944 if (bp->b_saveaddr) { 945 free(bp->b_un.b_addr, M_SEGMENT); 946 bp->b_un.b_addr = bp->b_saveaddr; 947 bp->b_saveaddr = NULL; 948 } 949 brelse(bp); 950 } 951 952 /* 953 * Shellsort (diminishing increment sort) from Data Structures and 954 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 955 * see also Knuth Vol. 3, page 84. The increments are selected from 956 * formula (8), page 95. Roughly O(N^3/2). 957 */ 958 /* 959 * This is our own private copy of shellsort because we want to sort 960 * two parallel arrays (the array of buffer pointers and the array of 961 * logical block numbers) simultaneously. Note that we cast the array 962 * of logical block numbers to a unsigned in this routine so that the 963 * negative block numbers (meta data blocks) sort AFTER the data blocks. 964 */ 965 void 966 lfs_shellsort(bp_array, lb_array, nmemb) 967 struct buf **bp_array; 968 daddr_t *lb_array; 969 register int nmemb; 970 { 971 static int __rsshell_increments[] = { 4, 1, 0 }; 972 register int incr, *incrp, t1, t2; 973 struct buf *bp_temp; 974 u_long lb_temp; 975 976 for (incrp = __rsshell_increments; incr = *incrp++;) 977 for (t1 = incr; t1 < nmemb; ++t1) 978 for (t2 = t1 - incr; t2 >= 0;) 979 if (lb_array[t2] > lb_array[t2 + incr]) { 980 lb_temp = lb_array[t2]; 981 lb_array[t2] = lb_array[t2 + incr]; 982 lb_array[t2 + incr] = lb_temp; 983 bp_temp = bp_array[t2]; 984 bp_array[t2] = bp_array[t2 + incr]; 985 bp_array[t2 + incr] = bp_temp; 986 t2 -= incr; 987 } else 988 break; 989 } 990