1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_segment.c 7.18 (Berkeley) 05/14/92 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/namei.h> 13 #include <sys/kernel.h> 14 #include <sys/resourcevar.h> 15 #include <sys/file.h> 16 #include <sys/stat.h> 17 #include <sys/buf.h> 18 #include <sys/proc.h> 19 #include <sys/conf.h> 20 #include <sys/vnode.h> 21 #include <sys/specdev.h> 22 #include <sys/fifo.h> 23 #include <sys/malloc.h> 24 #include <sys/mount.h> 25 26 #include <ufs/ufs/quota.h> 27 #include <ufs/ufs/inode.h> 28 #include <ufs/ufs/dir.h> 29 #include <ufs/ufs/ufsmount.h> 30 31 #include <ufs/lfs/lfs.h> 32 #include <ufs/lfs/lfs_extern.h> 33 34 /* In-memory description of a segment about to be written. */ 35 struct segment { 36 struct buf **bpp; /* pointer to buffer array */ 37 struct buf **cbpp; /* pointer to next available bp */ 38 struct buf *ibp; /* buffer pointer to inode page */ 39 struct finfo *fip; /* current fileinfo pointer */ 40 void *segsum; /* segment summary info */ 41 u_long ninodes; /* number of inodes in this segment */ 42 u_long seg_bytes_left; /* bytes left in segment */ 43 u_long sum_bytes_left; /* bytes left in summary block */ 44 u_long seg_number; /* number of this segment */ 45 #define SEGM_CKP 0x01 /* doing a checkpoint */ 46 u_long seg_flags; /* run-time flags for this segment */ 47 }; 48 49 /* 50 * Determine if it's OK to start a partial in this segment, or if we need 51 * to go on to a new segment. 52 */ 53 #define LFS_PARTIAL_FITS(fs) \ 54 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 55 1 << (fs)->lfs_fsbtodb) 56 57 void lfs_callback __P((struct buf *)); 58 void lfs_gather __P((struct lfs *, struct segment *, 59 struct vnode *, int (*) __P((struct lfs *, struct buf *)))); 60 void lfs_initseg __P((struct lfs *, struct segment *)); 61 void lfs_iset __P((struct inode *, daddr_t, time_t)); 62 int lfs_match_data __P((struct lfs *, struct buf *)); 63 int lfs_match_dindir __P((struct lfs *, struct buf *)); 64 int lfs_match_indir __P((struct lfs *, struct buf *)); 65 int lfs_match_tindir __P((struct lfs *, struct buf *)); 66 struct buf * 67 lfs_newbuf __P((struct lfs *, daddr_t, size_t)); 68 void lfs_newseg __P((struct lfs *)); 69 void lfs_shellsort __P((struct buf **, daddr_t *, register int)); 70 void lfs_updatemeta __P((struct lfs *, 71 struct segment *, struct vnode *, daddr_t *, struct buf **, int)); 72 void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *)); 73 void lfs_writeinode __P((struct lfs *, struct segment *, struct inode *)); 74 void lfs_writeseg __P((struct lfs *, struct segment *)); 75 void lfs_writesuper __P((struct lfs *, struct segment *)); 76 77 int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 78 79 /* 80 * Ifile and meta data blocks are not marked busy, so segment writes MUST be 81 * single threaded. Currently, there are two paths into lfs_segwrite, sync() 82 * and getnewbuf(). They both mark the file system busy. Lfs_vflush() 83 * explicitly marks the file system busy. So lfs_segwrite is safe. I think. 84 */ 85 86 int 87 lfs_vflush(vp) 88 struct vnode *vp; 89 { 90 struct inode *ip; 91 struct lfs *fs; 92 struct mount *mp; 93 struct segment *sp; 94 int error, s; 95 96 #ifdef VERBOSE 97 printf("lfs_vflush\n"); 98 #endif 99 mp = vp->v_mount; 100 fs = VFSTOUFS(mp)->um_lfs; 101 102 /* 103 * XXX 104 * check flags? 105 * mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY) || 106 */ 107 if (vfs_busy(mp)) 108 return (0); 109 110 /* 111 * Allocate a segment structure and enough space to hold pointers to 112 * the maximum possible number of buffers which can be described in a 113 * single summary block. 114 */ 115 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 116 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 117 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 118 sp->seg_flags = SEGM_CKP; 119 lfs_initseg(fs, sp); 120 121 /* 122 * Keep a cumulative count of the outstanding I/O operations. If the 123 * disk drive catches up with us it could go to zero before we finish, 124 * so we artificially increment it by one until we've scheduled all of 125 * the writes we intend to do. 126 */ 127 s = splbio(); 128 ++fs->lfs_iocount; 129 splx(s); 130 131 if (vp->v_dirtyblkhd != NULL) 132 lfs_writefile(fs, sp, vp); 133 ip = VTOI(vp); 134 lfs_writeinode(fs, sp, ip); 135 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 136 137 lfs_writeseg(fs, sp); 138 139 /* 140 * If the I/O count is non-zero, sleep until it reaches zero. At the 141 * moment, the user's process hangs around so we can sleep. 142 */ 143 s = splbio(); 144 if (--fs->lfs_iocount && (error = 145 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0))) { 146 free(sp->bpp, M_SEGMENT); 147 free(sp, M_SEGMENT); 148 return (error); 149 } 150 splx(s); 151 vfs_unbusy(mp); 152 153 /* 154 * XXX 155 * Should be writing a checkpoint? 156 */ 157 free(sp->bpp, M_SEGMENT); 158 free(sp, M_SEGMENT); 159 160 return (0); 161 } 162 163 int 164 lfs_segwrite(mp, do_ckp) 165 struct mount *mp; 166 int do_ckp; /* Do a checkpoint. */ 167 { 168 USES_VOP_ISLOCKED; 169 struct inode *ip; 170 struct lfs *fs; 171 struct segment *sp; 172 struct vnode *vp; 173 int error, islocked, s; 174 175 #ifdef VERBOSE 176 printf("lfs_segwrite\n"); 177 #endif 178 fs = VFSTOUFS(mp)->um_lfs; 179 180 /* 181 * Allocate a segment structure and enough space to hold pointers to 182 * the maximum possible number of buffers which can be described in a 183 * single summary block. 184 */ 185 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 186 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 187 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 188 sp->seg_flags = do_ckp ? SEGM_CKP : 0; 189 lfs_initseg(fs, sp); 190 191 /* 192 * Keep a cumulative count of the outstanding I/O operations. If the 193 * disk drive catches up with us it could go to zero before we finish, 194 * so we artificially increment it by one until we've scheduled all of 195 * the writes we intend to do. If not a checkpoint, we never do the 196 * final decrement, avoiding the wakeup in the callback routine. 197 */ 198 s = splbio(); 199 ++fs->lfs_iocount; 200 splx(s); 201 202 loop: for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { 203 /* 204 * If the vnode that we are about to sync is no longer 205 * associated with this mount point, start over. 206 */ 207 if (vp->v_mount != mp) 208 goto loop; 209 210 islocked = VOP_ISLOCKED(vp); 211 212 /* 213 * XXX 214 * This is wrong, I think -- we should just wait until we 215 * get the vnode and go on. Probably going to reschedule 216 * all of the writes we already scheduled... 217 */ 218 if (islocked) 219 VREF(vp); 220 else if (vget(vp)) 221 { 222 printf("lfs_segment: failed to get vnode (tell Keith)!\n"); 223 goto loop; 224 } 225 /* 226 * Write the inode/file if dirty and it's not the 227 * the IFILE. 228 */ 229 ip = VTOI(vp); 230 if ((ip->i_flag & (IMOD | IACC | IUPD | ICHG) || 231 vp->v_dirtyblkhd != NULL) && 232 ip->i_number != LFS_IFILE_INUM) { 233 if (vp->v_dirtyblkhd != NULL) 234 lfs_writefile(fs, sp, vp); 235 lfs_writeinode(fs, sp, ip); 236 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 237 } 238 if (islocked) 239 vrele(vp); 240 else 241 vput(vp); 242 } 243 if (do_ckp) { 244 vp = fs->lfs_ivnode; 245 while (vget(vp)); 246 ip = VTOI(vp); 247 if (vp->v_dirtyblkhd != NULL) 248 lfs_writefile(fs, sp, vp); 249 lfs_writeinode(fs, sp, ip); 250 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 251 vput(vp); 252 } 253 lfs_writeseg(fs, sp); 254 255 /* 256 * If the I/O count is non-zero, sleep until it reaches zero. At the 257 * moment, the user's process hangs around so we can sleep. 258 */ 259 s = splbio(); 260 --fs->lfs_iocount; 261 if (do_ckp) { 262 if (fs->lfs_iocount && (error = 263 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs sync", 0))) { 264 free(sp->bpp, M_SEGMENT); 265 free(sp, M_SEGMENT); 266 return (error); 267 } 268 splx(s); 269 lfs_writesuper(fs, sp); 270 } else 271 splx(s); 272 273 free(sp->bpp, M_SEGMENT); 274 free(sp, M_SEGMENT); 275 276 return (0); 277 } 278 279 /* 280 * Write the dirty blocks associated with a vnode. 281 */ 282 void 283 lfs_writefile(fs, sp, vp) 284 struct lfs *fs; 285 struct segment *sp; 286 struct vnode *vp; 287 { 288 struct buf *bp; 289 struct finfo *fip; 290 IFILE *ifp; 291 292 #ifdef VERBOSE 293 printf("lfs_writefile\n"); 294 #endif 295 if (sp->seg_bytes_left < fs->lfs_bsize || 296 sp->sum_bytes_left < sizeof(struct finfo)) { 297 lfs_writeseg(fs, sp); 298 lfs_initseg(fs, sp); 299 } 300 sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t); 301 302 fip = sp->fip; 303 fip->fi_nblocks = 0; 304 fip->fi_ino = VTOI(vp)->i_number; 305 LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 306 fip->fi_version = ifp->if_version; 307 brelse(bp); 308 309 /* 310 * It may not be necessary to write the meta-data blocks at this point, 311 * as the roll-forward recovery code should be able to reconstruct the 312 * list. 313 */ 314 lfs_gather(fs, sp, vp, lfs_match_data); 315 lfs_gather(fs, sp, vp, lfs_match_indir); 316 lfs_gather(fs, sp, vp, lfs_match_dindir); 317 #ifdef TRIPLE 318 lfs_gather(fs, sp, vp, lfs_match_tindir); 319 #endif 320 321 fip = sp->fip; 322 #ifdef META 323 printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks); 324 #endif 325 if (fip->fi_nblocks != 0) { 326 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 327 sp->fip = 328 (struct finfo *)((caddr_t)fip + sizeof(struct finfo) + 329 sizeof(daddr_t) * (fip->fi_nblocks - 1)); 330 } else 331 sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t); 332 } 333 334 void 335 lfs_writeinode(fs, sp, ip) 336 struct lfs *fs; 337 struct segment *sp; 338 struct inode *ip; 339 { 340 struct buf *bp, *ibp; 341 IFILE *ifp; 342 SEGUSE *sup; 343 daddr_t daddr; 344 ino_t ino; 345 int ndx; 346 347 #ifdef VERBOSE 348 printf("lfs_writeinode\n"); 349 #endif 350 /* Allocate a new inode block if necessary. */ 351 if (sp->ibp == NULL) { 352 /* Allocate a new segment if necessary. */ 353 if (sp->seg_bytes_left < fs->lfs_bsize || 354 sp->sum_bytes_left < sizeof(daddr_t)) { 355 lfs_writeseg(fs, sp); 356 lfs_initseg(fs, sp); 357 } 358 359 /* Get next inode block. */ 360 daddr = fs->lfs_offset; 361 fs->lfs_offset += fsbtodb(fs, 1); 362 sp->ibp = *sp->cbpp++ = 363 lfs_newbuf(fs, daddr, fs->lfs_bsize); 364 365 /* Set remaining space counters. */ 366 sp->seg_bytes_left -= fs->lfs_bsize; 367 sp->sum_bytes_left -= sizeof(daddr_t); 368 ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) - 369 sp->ninodes / INOPB(fs) - 1; 370 ((daddr_t *)(sp->segsum))[ndx] = daddr; 371 } 372 373 /* Update the inode times and copy the inode onto the inode page. */ 374 ITIMES(ip, &time, &time); 375 bp = sp->ibp; 376 bp->b_un.b_dino[sp->ninodes % INOPB(fs)] = ip->i_din; 377 378 /* Increment inode count in segment summary block. */ 379 ++((SEGSUM *)(sp->segsum))->ss_ninos; 380 381 /* If this page is full, set flag to allocate a new page. */ 382 if (++sp->ninodes % INOPB(fs) == 0) 383 sp->ibp = NULL; 384 385 /* 386 * If updating the ifile, update the super-block. Update the disk 387 * address and access times for this inode in the ifile. 388 */ 389 ino = ip->i_number; 390 if (ino == LFS_IFILE_INUM) 391 fs->lfs_idaddr = bp->b_blkno; 392 393 LFS_IENTRY(ifp, fs, ino, ibp); 394 daddr = ifp->if_daddr; 395 ifp->if_daddr = bp->b_blkno; 396 LFS_UBWRITE(ibp); 397 398 if (daddr != LFS_UNUSED_DADDR) { 399 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 400 #ifdef DIAGNOSTIC 401 if (sup->su_nbytes < sizeof(struct dinode)) 402 /* XXX -- Change to a panic. */ 403 printf("lfs: negative bytes (segment %d)\n", 404 datosn(fs, daddr)); 405 #endif 406 sup->su_nbytes -= sizeof(struct dinode); 407 LFS_UBWRITE(bp); 408 } 409 } 410 411 void 412 lfs_gather(fs, sp, vp, match) 413 struct lfs *fs; 414 struct segment *sp; 415 struct vnode *vp; 416 int (*match) __P((struct lfs *, struct buf *)); 417 { 418 struct buf **bpp, *bp, *nbp; 419 struct finfo *fip; 420 struct inode *ip; 421 daddr_t *lbp, *start_lbp; 422 u_long version; 423 int s; 424 425 #ifdef VERBOSE 426 printf("lfs_gather\n"); 427 #endif 428 ip = VTOI(vp); 429 bpp = sp->cbpp; 430 fip = sp->fip; 431 start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks]; 432 433 loop: s = splbio(); 434 for (bp = vp->v_dirtyblkhd; bp; bp = nbp) { 435 nbp = bp->b_blockf; 436 /* 437 * XXX 438 * Should sleep on any BUSY buffer if doing an fsync? 439 */ 440 if (bp->b_flags & B_BUSY || !match(fs, bp)) 441 continue; 442 #ifdef DIAGNOSTIC 443 if (!(bp->b_flags & B_DELWRI)) 444 panic("lfs_gather: bp not B_DELWRI"); 445 if (!(bp->b_flags & B_LOCKED)) 446 panic("lfs_gather: bp not B_LOCKED"); 447 #endif 448 /* 449 * If full, finish this segment. We may be doing I/O, so 450 * release and reacquire the splbio(). 451 */ 452 if (sp->sum_bytes_left < sizeof(daddr_t) || 453 sp->seg_bytes_left < fs->lfs_bsize) { 454 splx(s); 455 lfs_updatemeta(fs, 456 sp, vp, start_lbp, bpp, lbp - start_lbp); 457 458 /* Add the current file to the segment summary. */ 459 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 460 461 version = fip->fi_version; 462 lfs_writeseg(fs, sp); 463 lfs_initseg(fs, sp); 464 465 fip = sp->fip; 466 fip->fi_version = version; 467 fip->fi_ino = ip->i_number; 468 start_lbp = lbp = fip->fi_blocks; 469 470 sp->sum_bytes_left -= 471 sizeof(struct finfo) - sizeof(daddr_t); 472 473 bpp = sp->cbpp; 474 goto loop; 475 } 476 477 /* Insert into the buffer list, update the FINFO block. */ 478 *sp->cbpp++ = bp; 479 ++fip->fi_nblocks; 480 *lbp++ = bp->b_lblkno; 481 482 sp->sum_bytes_left -= sizeof(daddr_t); 483 sp->seg_bytes_left -= bp->b_bufsize; 484 } 485 splx(s); 486 lfs_updatemeta(fs, sp, vp, start_lbp, bpp, lbp - start_lbp); 487 } 488 489 /* 490 * Update the metadata that points to the blocks listed in the FINFO 491 * array. 492 */ 493 void 494 lfs_updatemeta(fs, sp, vp, lbp, bpp, nblocks) 495 struct lfs *fs; 496 struct segment *sp; 497 struct vnode *vp; 498 daddr_t *lbp; 499 struct buf **bpp; 500 int nblocks; 501 { 502 USES_VOP_BWRITE; 503 SEGUSE *sup; 504 struct buf *bp; 505 INDIR a[NIADDR], *ap; 506 struct inode *ip; 507 daddr_t daddr, lbn, off; 508 int db_per_fsb, error, i, num; 509 510 #ifdef VERBOSE 511 printf("lfs_updatemeta\n"); 512 #endif 513 if (nblocks == 0) 514 return; 515 516 /* Sort the blocks. */ 517 lfs_shellsort(bpp, lbp, nblocks); 518 519 /* 520 * Assign disk addresses, and update references to the logical 521 * block and the segment usage information. 522 */ 523 db_per_fsb = fsbtodb(fs, 1); 524 for (i = nblocks; i--; ++bpp) { 525 lbn = *lbp++; 526 (*bpp)->b_blkno = off = fs->lfs_offset; 527 fs->lfs_offset += db_per_fsb; 528 529 if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num)) 530 panic("lfs_updatemeta: lfs_bmaparray %d", error); 531 ip = VTOI(vp); 532 switch (num) { 533 case 0: 534 ip->i_db[lbn] = off; 535 break; 536 case 1: 537 ip->i_ib[a[0].in_off] = off; 538 break; 539 default: 540 ap = &a[num - 1]; 541 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 542 panic("lfs_updatemeta: bread bno %d", 543 ap->in_lbn); 544 bp->b_un.b_daddr[ap->in_off] = off; 545 VOP_BWRITE(bp); 546 } 547 548 /* Update segment usage information. */ 549 if (daddr != UNASSIGNED) { 550 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 551 #ifdef DIAGNOSTIC 552 if (sup->su_nbytes < fs->lfs_bsize) 553 /* XXX -- Change to a panic. */ 554 printf("lfs: negative bytes (segment %d)\n", 555 datosn(fs, daddr)); 556 #endif 557 sup->su_nbytes -= fs->lfs_bsize; 558 LFS_UBWRITE(bp); 559 } 560 } 561 } 562 563 /* 564 * Start a new segment. 565 */ 566 void 567 lfs_initseg(fs, sp) 568 struct lfs *fs; 569 struct segment *sp; 570 { 571 SEGUSE *sup; 572 SEGSUM *ssp; 573 struct buf *bp; 574 daddr_t lbn, *lbnp; 575 576 #ifdef VERBOSE 577 printf("lfs_initseg\n"); 578 #endif 579 /* Advance to the next segment. */ 580 if (!LFS_PARTIAL_FITS(fs)) { 581 /* Wake up any cleaning procs waiting on this file system. */ 582 wakeup(&fs->lfs_nextseg); 583 wakeup(&lfs_allclean_wakeup); 584 585 lfs_newseg(fs); 586 fs->lfs_offset = fs->lfs_curseg; 587 sp->seg_number = datosn(fs, fs->lfs_curseg); 588 sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 589 590 /* 591 * If the segment contains a superblock, update the offset 592 * and summary address to skip over it. 593 */ 594 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 595 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 596 fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 597 sp->seg_bytes_left -= LFS_SBPAD; 598 } 599 brelse(bp); 600 } else { 601 sp->seg_number = datosn(fs, fs->lfs_curseg); 602 sp->seg_bytes_left = (fs->lfs_dbpseg - 603 (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 604 } 605 606 sp->ibp = NULL; 607 sp->ninodes = 0; 608 609 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 610 sp->cbpp = sp->bpp; 611 *sp->cbpp = lfs_newbuf(fs, fs->lfs_offset, LFS_SUMMARY_SIZE); 612 sp->segsum = (*sp->cbpp)->b_un.b_addr; 613 ++sp->cbpp; 614 fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 615 616 /* Set point to SEGSUM, initialize it. */ 617 ssp = sp->segsum; 618 ssp->ss_next = fs->lfs_nextseg; 619 ssp->ss_nfinfo = ssp->ss_ninos = 0; 620 621 /* Set pointer to first FINFO, initialize it. */ 622 sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM)); 623 sp->fip->fi_nblocks = 0; 624 625 sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 626 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 627 } 628 629 /* 630 * Return the next segment to write. 631 */ 632 void 633 lfs_newseg(fs) 634 struct lfs *fs; 635 { 636 CLEANERINFO *cip; 637 SEGUSE *sup; 638 struct buf *bp; 639 int curseg, isdirty, sn; 640 641 #ifdef VERBOSE 642 printf("lfs_newseg\n"); 643 #endif 644 /* 645 * Turn off the active bit for the current segment, turn on the 646 * active and dirty bits for the next segment, update the cleaner 647 * info. Set the current segment to the next segment, get a new 648 * next segment. 649 */ 650 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_curseg), bp); 651 sup->su_flags &= ~SEGUSE_ACTIVE; 652 LFS_UBWRITE(bp); 653 654 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp); 655 sup->su_flags |= SEGUSE_ACTIVE | SEGUSE_DIRTY; 656 LFS_UBWRITE(bp); 657 658 LFS_CLEANERINFO(cip, fs, bp); 659 --cip->clean; 660 ++cip->dirty; 661 LFS_UBWRITE(bp); 662 663 fs->lfs_lastseg = fs->lfs_curseg; 664 fs->lfs_curseg = fs->lfs_nextseg; 665 for (sn = curseg = datosn(fs, fs->lfs_curseg);;) { 666 sn = (sn + 1) % fs->lfs_nseg; 667 if (sn == curseg) 668 panic("lfs_nextseg: no clean segments"); 669 LFS_SEGENTRY(sup, fs, sn, bp); 670 isdirty = sup->su_flags & SEGUSE_DIRTY; 671 brelse(bp); 672 if (!isdirty) 673 break; 674 } 675 fs->lfs_nextseg = sntoda(fs, sn); 676 } 677 678 void 679 lfs_writeseg(fs, sp) 680 struct lfs *fs; 681 struct segment *sp; 682 { 683 struct buf **bpp, *bp, *cbp; 684 SEGUSE *sup; 685 SEGSUM *ssp; 686 dev_t i_dev; 687 u_long *datap, *dp; 688 size_t size; 689 int ch_per_blk, i, nblocks, num, s, (*strategy)__P((struct buf *)); 690 char *p; 691 692 #ifdef VERBOSE 693 printf("lfs_writeseg\n"); 694 #endif 695 if ((nblocks = sp->cbpp - sp->bpp) == 0) 696 return; 697 698 /* 699 * Compute checksum across data and then across summary; the first 700 * block (the summary block) is skipped. Set the create time here 701 * so that it's guaranteed to be later than the inode mod times. 702 * 703 * XXX 704 * Fix this to do it inline, instead of malloc/copy. 705 */ 706 datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 707 for (bpp = sp->bpp, i = nblocks - 1; i--;) 708 *dp++ = (*++bpp)->b_un.b_words[0]; 709 ssp = (SEGSUM *)sp->segsum; 710 ssp->ss_create = time.tv_sec; 711 ssp->ss_datasum = cksum(datap, nblocks * sizeof(u_long)); 712 ssp->ss_sumsum = 713 cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum)); 714 free(datap, M_SEGMENT); 715 716 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 717 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy; 718 719 /* 720 * When we simply write the blocks we lose a rotation for every block 721 * written. To avoid this problem, we allocate memory in chunks, copy 722 * the buffers into the chunk and write the chunk. 56K was chosen as 723 * some driver/controllers can't handle unsigned 16 bit transfers. 724 * When the data is copied to the chunk, turn off the the B_LOCKED bit 725 * and brelse the buffer (which will move them to the LRU list). Add 726 * the B_CALL flag to the buffer header so we can count I/O's for the 727 * checkpoints and so we can release the allocated memory. 728 * 729 * XXX 730 * This should be removed if the new virtual memory system allows us to 731 * easily make the buffers contiguous in kernel memory and if that's 732 * fast enough. 733 */ 734 #define LFS_CHUNKSIZE (56 * 1024) 735 ch_per_blk = LFS_CHUNKSIZE / fs->lfs_bsize; 736 for (bpp = sp->bpp, i = nblocks; i;) { 737 num = ch_per_blk; 738 if (num > i) 739 num = i; 740 i -= num; 741 size = num * fs->lfs_bsize; 742 743 cbp = lfs_newbuf(fs, (*bpp)->b_blkno, 0); 744 cbp->b_dev = i_dev; 745 cbp->b_flags = B_ASYNC | B_BUSY | B_CALL; 746 cbp->b_iodone = lfs_callback; 747 cbp->b_saveaddr = cbp->b_un.b_addr; 748 cbp->b_un.b_addr = malloc(size, M_SEGMENT, M_WAITOK); 749 750 s = splbio(); 751 ++fs->lfs_iocount; 752 for (p = cbp->b_un.b_addr; num--;) { 753 bp = *bpp++; 754 bcopy(bp->b_un.b_addr, p, bp->b_bcount); 755 p += bp->b_bcount; 756 bp->b_flags &= 757 ~(B_DONE | B_ERROR | B_READ | B_DELWRI | B_LOCKED); 758 if (!(bp->b_flags & B_NOCACHE)) { 759 bremfree(bp); 760 reassignbuf(bp, bp->b_vp); 761 } 762 brelse(bp); 763 } 764 splx(s); 765 cbp->b_bcount = p - cbp->b_un.b_addr; 766 (strategy)(cbp); 767 } 768 769 /* Update the segment usage information. */ 770 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 771 sup->su_nbytes += nblocks - 1 - 772 (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs) << fs->lfs_bshift; 773 sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode); 774 sup->su_lastmod = time.tv_sec; 775 LFS_UBWRITE(bp); 776 } 777 778 void 779 lfs_writesuper(fs, sp) 780 struct lfs *fs; 781 struct segment *sp; 782 { 783 struct buf *bp; 784 dev_t i_dev; 785 int (*strategy) __P((struct buf *)); 786 787 #ifdef VERBOSE 788 printf("lfs_writesuper\n"); 789 #endif 790 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 791 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy; 792 793 /* Checksum the superblock and copy it into a buffer. */ 794 fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); 795 bp = lfs_newbuf(fs, fs->lfs_sboffs[0], LFS_SBPAD); 796 *bp->b_un.b_lfs = *fs; 797 798 /* Write the first superblock (wait). */ 799 bp->b_dev = i_dev; 800 bp->b_flags |= B_BUSY; 801 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 802 (strategy)(bp); 803 biowait(bp); 804 805 /* Write the second superblock (don't wait). */ 806 bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; 807 bp->b_flags |= B_ASYNC | B_BUSY; 808 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 809 (strategy)(bp); 810 } 811 812 /* 813 * Logical block number match routines used when traversing the dirty block 814 * chain. 815 */ 816 int 817 lfs_match_data(fs, bp) 818 struct lfs *fs; 819 struct buf *bp; 820 { 821 return (bp->b_lblkno >= 0); 822 } 823 824 int 825 lfs_match_indir(fs, bp) 826 struct lfs *fs; 827 struct buf *bp; 828 { 829 int lbn; 830 831 lbn = bp->b_lblkno; 832 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 833 } 834 835 int 836 lfs_match_dindir(fs, bp) 837 struct lfs *fs; 838 struct buf *bp; 839 { 840 int lbn; 841 842 lbn = bp->b_lblkno; 843 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 844 } 845 846 int 847 lfs_match_tindir(fs, bp) 848 struct lfs *fs; 849 struct buf *bp; 850 { 851 int lbn; 852 853 lbn = bp->b_lblkno; 854 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 855 } 856 857 /* 858 * Allocate a new buffer header. 859 */ 860 struct buf * 861 lfs_newbuf(fs, daddr, size) 862 struct lfs *fs; 863 daddr_t daddr; 864 size_t size; 865 { 866 struct buf *bp; 867 868 #ifdef VERBOSE 869 printf("lfs_newbuf\n"); 870 #endif 871 bp = getnewbuf(); 872 bremhash(bp); 873 bgetvp(fs->lfs_ivnode, bp); 874 bp->b_bcount = 0; 875 bp->b_lblkno = daddr; 876 bp->b_blkno = daddr; 877 bp->b_error = 0; 878 bp->b_resid = 0; 879 if (size) 880 allocbuf(bp, size); 881 bp->b_flags |= B_NOCACHE; 882 bp->b_saveaddr = NULL; 883 binshash(bp, &bfreelist[BQ_AGE]); 884 return (bp); 885 } 886 887 void 888 lfs_callback(bp) 889 struct buf *bp; 890 { 891 struct lfs *fs; 892 893 fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs; 894 #ifdef DIAGNOSTIC 895 if (fs->lfs_iocount == 0) 896 panic("lfs_callback: zero iocount\n"); 897 #endif 898 if (--fs->lfs_iocount == 0) 899 wakeup(&fs->lfs_iocount); 900 901 if (bp->b_saveaddr) { 902 free(bp->b_un.b_addr, M_SEGMENT); 903 bp->b_un.b_addr = bp->b_saveaddr; 904 bp->b_saveaddr = NULL; 905 } 906 brelse(bp); 907 } 908 909 /* 910 * Shellsort (diminishing increment sort) from Data Structures and 911 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 912 * see also Knuth Vol. 3, page 84. The increments are selected from 913 * formula (8), page 95. Roughly O(N^3/2). 914 */ 915 /* 916 * This is our own private copy of shellsort because we want to sort 917 * two parallel arrays (the array of buffer pointers and the array of 918 * logical block numbers) simultaneously. Note that we cast the array 919 * of logical block numbers to a unsigned in this routine so that the 920 * negative block numbers (meta data blocks) sort AFTER the data blocks. 921 */ 922 void 923 lfs_shellsort(bp_array, lb_array, nmemb) 924 struct buf **bp_array; 925 daddr_t *lb_array; 926 register int nmemb; 927 { 928 static int __rsshell_increments[] = { 4, 1, 0 }; 929 register int incr, *incrp, t1, t2; 930 struct buf *bp_temp; 931 u_long lb_temp; 932 933 for (incrp = __rsshell_increments; incr = *incrp++;) 934 for (t1 = incr; t1 < nmemb; ++t1) 935 for (t2 = t1 - incr; t2 >= 0;) 936 if (lb_array[t2] > lb_array[t2 + incr]) { 937 lb_temp = lb_array[t2]; 938 lb_array[t2] = lb_array[t2 + incr]; 939 lb_array[t2 + incr] = lb_temp; 940 bp_temp = bp_array[t2]; 941 bp_array[t2] = bp_array[t2 + incr]; 942 bp_array[t2 + incr] = bp_temp; 943 t2 -= incr; 944 } else 945 break; 946 } 947