1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_segment.c 7.20 (Berkeley) 06/23/92 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/namei.h> 13 #include <sys/kernel.h> 14 #include <sys/resourcevar.h> 15 #include <sys/file.h> 16 #include <sys/stat.h> 17 #include <sys/buf.h> 18 #include <sys/proc.h> 19 #include <sys/conf.h> 20 #include <sys/vnode.h> 21 #include <sys/specdev.h> 22 #include <sys/fifo.h> 23 #include <sys/malloc.h> 24 #include <sys/mount.h> 25 26 #include <ufs/ufs/quota.h> 27 #include <ufs/ufs/inode.h> 28 #include <ufs/ufs/dir.h> 29 #include <ufs/ufs/ufsmount.h> 30 31 #include <ufs/lfs/lfs.h> 32 #include <ufs/lfs/lfs_extern.h> 33 34 /* In-memory description of a segment about to be written. */ 35 struct segment { 36 struct buf **bpp; /* pointer to buffer array */ 37 struct buf **cbpp; /* pointer to next available bp */ 38 struct buf *ibp; /* buffer pointer to inode page */ 39 struct finfo *fip; /* current fileinfo pointer */ 40 void *segsum; /* segment summary info */ 41 u_long ninodes; /* number of inodes in this segment */ 42 u_long seg_bytes_left; /* bytes left in segment */ 43 u_long sum_bytes_left; /* bytes left in summary block */ 44 u_long seg_number; /* number of this segment */ 45 #define SEGM_CKP 0x01 /* doing a checkpoint */ 46 u_long seg_flags; /* run-time flags for this segment */ 47 }; 48 49 /* 50 * Determine if it's OK to start a partial in this segment, or if we need 51 * to go on to a new segment. 52 */ 53 #define LFS_PARTIAL_FITS(fs) \ 54 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 55 1 << (fs)->lfs_fsbtodb) 56 57 void lfs_callback __P((struct buf *)); 58 void lfs_gather __P((struct lfs *, struct segment *, 59 struct vnode *, int (*) __P((struct lfs *, struct buf *)))); 60 void lfs_initseg __P((struct lfs *, struct segment *)); 61 void lfs_iset __P((struct inode *, daddr_t, time_t)); 62 int lfs_match_data __P((struct lfs *, struct buf *)); 63 int lfs_match_dindir __P((struct lfs *, struct buf *)); 64 int lfs_match_indir __P((struct lfs *, struct buf *)); 65 int lfs_match_tindir __P((struct lfs *, struct buf *)); 66 struct buf * 67 lfs_newbuf __P((struct lfs *, daddr_t, size_t)); 68 void lfs_newseg __P((struct lfs *)); 69 void lfs_shellsort __P((struct buf **, daddr_t *, register int)); 70 void lfs_updatemeta __P((struct lfs *, 71 struct segment *, struct vnode *, daddr_t *, struct buf **, int)); 72 void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *)); 73 int lfs_writeinode __P((struct lfs *, struct segment *, struct inode *)); 74 int lfs_writeseg __P((struct lfs *, struct segment *)); 75 void lfs_writesuper __P((struct lfs *, struct segment *)); 76 void lfs_writevnodes __P((struct lfs *fs, struct mount *mp, 77 struct segment *sp, int dirops)); 78 79 int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 80 81 /* 82 * Ifile and meta data blocks are not marked busy, so segment writes MUST be 83 * single threaded. Currently, there are two paths into lfs_segwrite, sync() 84 * and getnewbuf(). They both mark the file system busy. Lfs_vflush() 85 * explicitly marks the file system busy. So lfs_segwrite is safe. I think. 86 */ 87 88 int 89 lfs_vflush(vp) 90 struct vnode *vp; 91 { 92 struct inode *ip; 93 struct lfs *fs; 94 struct mount *mp; 95 struct segment *sp; 96 int error, s; 97 98 #ifdef VERBOSE 99 printf("lfs_vflush\n"); 100 #endif 101 mp = vp->v_mount; 102 fs = VFSTOUFS(mp)->um_lfs; 103 104 /* 105 * XXX 106 * check flags? 107 * mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY) || 108 */ 109 if (vfs_busy(mp)) 110 return (0); 111 112 /* 113 * Allocate a segment structure and enough space to hold pointers to 114 * the maximum possible number of buffers which can be described in a 115 * single summary block. 116 */ 117 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 118 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 119 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 120 sp->seg_flags = SEGM_CKP; 121 lfs_initseg(fs, sp); 122 123 /* 124 * Keep a cumulative count of the outstanding I/O operations. If the 125 * disk drive catches up with us it could go to zero before we finish, 126 * so we artificially increment it by one until we've scheduled all of 127 * the writes we intend to do. 128 */ 129 s = splbio(); 130 ++fs->lfs_iocount; 131 splx(s); 132 133 if (vp->v_dirtyblkhd != NULL) 134 lfs_writefile(fs, sp, vp); 135 ip = VTOI(vp); 136 (void) lfs_writeinode(fs, sp, ip); 137 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 138 139 (void) lfs_writeseg(fs, sp); 140 141 /* 142 * If the I/O count is non-zero, sleep until it reaches zero. At the 143 * moment, the user's process hangs around so we can sleep. 144 */ 145 s = splbio(); 146 if (--fs->lfs_iocount && (error = 147 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs vflush", 0))) { 148 free(sp->bpp, M_SEGMENT); 149 free(sp, M_SEGMENT); 150 return (error); 151 } 152 splx(s); 153 vfs_unbusy(mp); 154 155 /* 156 * XXX 157 * Should be writing a checkpoint? 158 */ 159 free(sp->bpp, M_SEGMENT); 160 free(sp, M_SEGMENT); 161 162 return (0); 163 } 164 165 void 166 lfs_writevnodes(fs, mp, sp, dirops) 167 struct lfs *fs; 168 struct mount *mp; 169 struct segment *sp; 170 int dirops; 171 { 172 struct inode *ip; 173 struct vnode *vp; 174 int error, s; 175 176 loop: for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) { 177 /* 178 * If the vnode that we are about to sync is no longer 179 * associated with this mount point, start over. 180 */ 181 if (vp->v_mount != mp) 182 goto loop; 183 184 if (dirops && !(vp->v_flag & VDIROP) || 185 !dirops && (vp->v_flag & VDIROP)) 186 continue; 187 /* 188 * XXX 189 * Up the ref count so we don't get tossed out of 190 * memory. 191 */ 192 VREF(vp); 193 194 /* 195 * Write the inode/file if dirty and it's not the 196 * the IFILE. 197 */ 198 ip = VTOI(vp); 199 if ((ip->i_flag & (IMOD | IACC | IUPD | ICHG) || 200 vp->v_dirtyblkhd != NULL) && 201 ip->i_number != LFS_IFILE_INUM) { 202 if (vp->v_dirtyblkhd != NULL) 203 lfs_writefile(fs, sp, vp); 204 (void) lfs_writeinode(fs, sp, ip); 205 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 206 } 207 vp->v_flag &= ~VDIROP; 208 vrele(vp); 209 } 210 } 211 212 int 213 lfs_segwrite(mp, do_ckp) 214 struct mount *mp; 215 int do_ckp; /* Do a checkpoint. */ 216 { 217 USES_VOP_ISLOCKED; 218 struct inode *ip; 219 struct lfs *fs; 220 struct segment *sp; 221 struct vnode *vp; 222 int error, s; 223 224 #ifdef VERBOSE 225 printf("lfs_segwrite\n"); 226 #endif 227 fs = VFSTOUFS(mp)->um_lfs; 228 229 /* 230 * Allocate a segment structure and enough space to hold pointers to 231 * the maximum possible number of buffers which can be described in a 232 * single summary block. 233 */ 234 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 235 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 236 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 237 sp->seg_flags = do_ckp ? SEGM_CKP : 0; 238 lfs_initseg(fs, sp); 239 240 /* 241 * Keep a cumulative count of the outstanding I/O operations. If the 242 * disk drive catches up with us it could go to zero before we finish, 243 * so we artificially increment it by one until we've scheduled all of 244 * the writes we intend to do. If not a checkpoint, we never do the 245 * final decrement, avoiding the wakeup in the callback routine. 246 */ 247 s = splbio(); 248 fs->lfs_iocount++; 249 splx(s); 250 251 lfs_writevnodes(fs, mp, sp, 0); 252 s = splbio(); 253 fs->lfs_writer = 1; 254 if (fs->lfs_dirops && (error = 255 tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) { 256 free(sp->bpp, M_SEGMENT); 257 free(sp, M_SEGMENT); 258 fs->lfs_writer = 0; 259 splx(s); 260 return(error); 261 } 262 splx(s); 263 264 lfs_writevnodes(fs, mp, sp, 1); 265 266 /* 267 * If this is a checkpoint, we need to loop on both the ifile and 268 * the writeseg to make sure that we don't end up with any dirty 269 * buffers left when this is all over. 270 */ 271 if (do_ckp || fs->lfs_doifile) { 272 redo: 273 vp = fs->lfs_ivnode; 274 while (vget(vp)); 275 ip = VTOI(vp); 276 do { 277 if (vp->v_dirtyblkhd != NULL) 278 lfs_writefile(fs, sp, vp); 279 } while (lfs_writeinode(fs, sp, ip) && do_ckp); 280 ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG); 281 vput(vp); 282 if (lfs_writeseg(fs, sp) && do_ckp) { 283 lfs_initseg(fs, sp); 284 goto redo; 285 } 286 } else 287 (void) lfs_writeseg(fs, sp); 288 289 /* 290 * If the I/O count is non-zero, sleep until it reaches zero. At the 291 * moment, the user's process hangs around so we can sleep. 292 */ 293 s = splbio(); 294 --fs->lfs_iocount; 295 fs->lfs_writer = 0; 296 fs->lfs_doifile = 0; 297 wakeup(&fs->lfs_dirops); 298 299 if (do_ckp) { 300 if (fs->lfs_iocount && (error = 301 tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs sync", 0))) { 302 free(sp->bpp, M_SEGMENT); 303 free(sp, M_SEGMENT); 304 return (error); 305 } 306 splx(s); 307 lfs_writesuper(fs, sp); 308 } else 309 splx(s); 310 311 free(sp->bpp, M_SEGMENT); 312 free(sp, M_SEGMENT); 313 314 return (0); 315 } 316 317 /* 318 * Write the dirty blocks associated with a vnode. 319 */ 320 void 321 lfs_writefile(fs, sp, vp) 322 struct lfs *fs; 323 struct segment *sp; 324 struct vnode *vp; 325 { 326 struct buf *bp; 327 struct finfo *fip; 328 IFILE *ifp; 329 330 #ifdef VERBOSE 331 printf("lfs_writefile\n"); 332 #endif 333 if (sp->seg_bytes_left < fs->lfs_bsize || 334 sp->sum_bytes_left < sizeof(struct finfo)) { 335 (void) lfs_writeseg(fs, sp); 336 lfs_initseg(fs, sp); 337 } 338 sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t); 339 340 fip = sp->fip; 341 fip->fi_nblocks = 0; 342 fip->fi_ino = VTOI(vp)->i_number; 343 LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 344 fip->fi_version = ifp->if_version; 345 brelse(bp); 346 347 /* 348 * It may not be necessary to write the meta-data blocks at this point, 349 * as the roll-forward recovery code should be able to reconstruct the 350 * list. 351 */ 352 lfs_gather(fs, sp, vp, lfs_match_data); 353 lfs_gather(fs, sp, vp, lfs_match_indir); 354 lfs_gather(fs, sp, vp, lfs_match_dindir); 355 #ifdef TRIPLE 356 lfs_gather(fs, sp, vp, lfs_match_tindir); 357 #endif 358 359 fip = sp->fip; 360 #ifdef META 361 printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks); 362 #endif 363 if (fip->fi_nblocks != 0) { 364 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 365 sp->fip = 366 (struct finfo *)((caddr_t)fip + sizeof(struct finfo) + 367 sizeof(daddr_t) * (fip->fi_nblocks - 1)); 368 } else 369 sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t); 370 } 371 372 int 373 lfs_writeinode(fs, sp, ip) 374 struct lfs *fs; 375 struct segment *sp; 376 struct inode *ip; 377 { 378 struct buf *bp, *ibp; 379 IFILE *ifp; 380 SEGUSE *sup; 381 daddr_t daddr; 382 ino_t ino; 383 int ndx; 384 int redo_ifile = 0; 385 386 #ifdef VERBOSE 387 printf("lfs_writeinode\n"); 388 #endif 389 /* Allocate a new inode block if necessary. */ 390 if (sp->ibp == NULL) { 391 /* Allocate a new segment if necessary. */ 392 if (sp->seg_bytes_left < fs->lfs_bsize || 393 sp->sum_bytes_left < sizeof(daddr_t)) { 394 (void) lfs_writeseg(fs, sp); 395 lfs_initseg(fs, sp); 396 } 397 398 /* Get next inode block. */ 399 daddr = fs->lfs_offset; 400 fs->lfs_offset += fsbtodb(fs, 1); 401 sp->ibp = *sp->cbpp++ = 402 lfs_newbuf(fs, daddr, fs->lfs_bsize); 403 404 /* Set remaining space counters. */ 405 sp->seg_bytes_left -= fs->lfs_bsize; 406 sp->sum_bytes_left -= sizeof(daddr_t); 407 ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) - 408 sp->ninodes / INOPB(fs) - 1; 409 ((daddr_t *)(sp->segsum))[ndx] = daddr; 410 } 411 412 /* Update the inode times and copy the inode onto the inode page. */ 413 ITIMES(ip, &time, &time); 414 bp = sp->ibp; 415 bp->b_un.b_dino[sp->ninodes % INOPB(fs)] = ip->i_din; 416 417 /* Increment inode count in segment summary block. */ 418 ++((SEGSUM *)(sp->segsum))->ss_ninos; 419 420 /* If this page is full, set flag to allocate a new page. */ 421 if (++sp->ninodes % INOPB(fs) == 0) 422 sp->ibp = NULL; 423 424 /* 425 * If updating the ifile, update the super-block. Update the disk 426 * address and access times for this inode in the ifile. 427 */ 428 ino = ip->i_number; 429 if (ino == LFS_IFILE_INUM) 430 fs->lfs_idaddr = bp->b_blkno; 431 432 LFS_IENTRY(ifp, fs, ino, ibp); 433 daddr = ifp->if_daddr; 434 ifp->if_daddr = bp->b_blkno; 435 LFS_UBWRITE(ibp); 436 redo_ifile = (ino == LFS_IFILE_INUM && !(ibp->b_flags & B_GATHERED)); 437 438 /* 439 * No need to update segment usage if there was no former inode address 440 * or if the last inode address is in the current partial segment. 441 */ 442 if (daddr != LFS_UNUSED_DADDR && 443 !(daddr >= fs->lfs_curseg && daddr <= ifp->if_daddr) ) { 444 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 445 #ifdef DIAGNOSTIC 446 if (sup->su_nbytes < sizeof(struct dinode)) { 447 /* XXX -- Change to a panic. */ 448 printf("lfs: negative bytes (segment %d)\n", 449 datosn(fs, daddr)); 450 panic("negative bytes"); 451 } 452 #endif 453 sup->su_nbytes -= sizeof(struct dinode); 454 LFS_UBWRITE(bp); 455 redo_ifile |= 456 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 457 } 458 return(redo_ifile); 459 } 460 461 void 462 lfs_gather(fs, sp, vp, match) 463 struct lfs *fs; 464 struct segment *sp; 465 struct vnode *vp; 466 int (*match) __P((struct lfs *, struct buf *)); 467 { 468 struct buf **bpp, *bp; 469 struct buf *lastbp; 470 struct finfo *fip; 471 struct inode *ip; 472 daddr_t *lbp, *start_lbp; 473 u_long version; 474 int s; 475 476 #ifdef VERBOSE 477 printf("lfs_gather\n"); 478 #endif 479 ip = VTOI(vp); 480 bpp = sp->cbpp; 481 fip = sp->fip; 482 start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks]; 483 484 loop: s = splbio(); 485 lastbp = NULL; 486 for (bp = vp->v_dirtyblkhd; bp; lastbp = bp, bp = bp->b_blockf) { 487 if (bp->b_flags & B_BUSY || !match(fs, bp) || 488 bp->b_flags & B_GATHERED) 489 continue; 490 #ifdef DIAGNOSTIC 491 if (!(bp->b_flags & B_DELWRI)) 492 panic("lfs_gather: bp not B_DELWRI"); 493 if (!(bp->b_flags & B_LOCKED)) 494 panic("lfs_gather: bp not B_LOCKED"); 495 #endif 496 /* 497 * If full, finish this segment. We may be doing I/O, so 498 * release and reacquire the splbio(). 499 */ 500 if (sp->sum_bytes_left < sizeof(daddr_t) || 501 sp->seg_bytes_left < fs->lfs_bsize) { 502 splx(s); 503 lfs_updatemeta(fs, 504 sp, vp, start_lbp, bpp, lbp - start_lbp); 505 506 /* Add the current file to the segment summary. */ 507 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 508 509 version = fip->fi_version; 510 (void) lfs_writeseg(fs, sp); 511 lfs_initseg(fs, sp); 512 513 fip = sp->fip; 514 fip->fi_version = version; 515 fip->fi_ino = ip->i_number; 516 start_lbp = lbp = fip->fi_blocks; 517 518 sp->sum_bytes_left -= 519 sizeof(struct finfo) - sizeof(daddr_t); 520 521 bpp = sp->cbpp; 522 goto loop; 523 } 524 525 /* Insert into the buffer list, update the FINFO block. */ 526 bp->b_flags |= B_GATHERED; 527 *sp->cbpp++ = bp; 528 ++fip->fi_nblocks; 529 *lbp++ = bp->b_lblkno; 530 531 sp->sum_bytes_left -= sizeof(daddr_t); 532 sp->seg_bytes_left -= bp->b_bufsize; 533 } 534 splx(s); 535 lfs_updatemeta(fs, sp, vp, start_lbp, bpp, lbp - start_lbp); 536 } 537 538 /* 539 * Update the metadata that points to the blocks listed in the FINFO 540 * array. 541 */ 542 void 543 lfs_updatemeta(fs, sp, vp, lbp, bpp, nblocks) 544 struct lfs *fs; 545 struct segment *sp; 546 struct vnode *vp; 547 daddr_t *lbp; 548 struct buf **bpp; 549 int nblocks; 550 { 551 USES_VOP_BWRITE; 552 SEGUSE *sup; 553 struct buf *bp; 554 INDIR a[NIADDR], *ap; 555 struct inode *ip; 556 daddr_t daddr, lbn, off; 557 int db_per_fsb, error, i, num; 558 559 #ifdef VERBOSE 560 printf("lfs_updatemeta\n"); 561 #endif 562 if (nblocks == 0) 563 return; 564 565 /* Sort the blocks. */ 566 lfs_shellsort(bpp, lbp, nblocks); 567 568 /* 569 * Assign disk addresses, and update references to the logical 570 * block and the segment usage information. 571 */ 572 db_per_fsb = fsbtodb(fs, 1); 573 for (i = nblocks; i--; ++bpp) { 574 lbn = *lbp++; 575 (*bpp)->b_blkno = off = fs->lfs_offset; 576 fs->lfs_offset += db_per_fsb; 577 578 if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num)) 579 panic("lfs_updatemeta: lfs_bmaparray %d", error); 580 ip = VTOI(vp); 581 switch (num) { 582 case 0: 583 ip->i_db[lbn] = off; 584 break; 585 case 1: 586 ip->i_ib[a[0].in_off] = off; 587 break; 588 default: 589 ap = &a[num - 1]; 590 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 591 panic("lfs_updatemeta: bread bno %d", 592 ap->in_lbn); 593 bp->b_un.b_daddr[ap->in_off] = off; 594 VOP_BWRITE(bp); 595 } 596 597 /* Update segment usage information. */ 598 if (daddr != UNASSIGNED) { 599 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 600 #ifdef DIAGNOSTIC 601 if (sup->su_nbytes < fs->lfs_bsize) { 602 /* XXX -- Change to a panic. */ 603 printf("lfs: negative bytes (segment %d)\n", 604 datosn(fs, daddr)); 605 panic ("Negative Bytes"); 606 } 607 #endif 608 sup->su_nbytes -= fs->lfs_bsize; 609 LFS_UBWRITE(bp); 610 } 611 } 612 } 613 614 /* 615 * Start a new segment. 616 */ 617 void 618 lfs_initseg(fs, sp) 619 struct lfs *fs; 620 struct segment *sp; 621 { 622 SEGUSE *sup; 623 SEGSUM *ssp; 624 struct buf *bp; 625 daddr_t lbn, *lbnp; 626 627 #ifdef VERBOSE 628 printf("lfs_initseg\n"); 629 #endif 630 /* Advance to the next segment. */ 631 if (!LFS_PARTIAL_FITS(fs)) { 632 /* Wake up any cleaning procs waiting on this file system. */ 633 wakeup(&fs->lfs_nextseg); 634 wakeup(&lfs_allclean_wakeup); 635 636 lfs_newseg(fs); 637 fs->lfs_offset = fs->lfs_curseg; 638 sp->seg_number = datosn(fs, fs->lfs_curseg); 639 sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 640 641 /* 642 * If the segment contains a superblock, update the offset 643 * and summary address to skip over it. 644 */ 645 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 646 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 647 fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 648 sp->seg_bytes_left -= LFS_SBPAD; 649 } 650 brelse(bp); 651 } else { 652 sp->seg_number = datosn(fs, fs->lfs_curseg); 653 sp->seg_bytes_left = (fs->lfs_dbpseg - 654 (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 655 } 656 fs->lfs_lastpseg = fs->lfs_offset; 657 658 sp->ibp = NULL; 659 sp->ninodes = 0; 660 661 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 662 sp->cbpp = sp->bpp; 663 *sp->cbpp = lfs_newbuf(fs, fs->lfs_offset, LFS_SUMMARY_SIZE); 664 sp->segsum = (*sp->cbpp)->b_un.b_addr; 665 ++sp->cbpp; 666 fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 667 668 /* Set point to SEGSUM, initialize it. */ 669 ssp = sp->segsum; 670 ssp->ss_next = fs->lfs_nextseg; 671 ssp->ss_nfinfo = ssp->ss_ninos = 0; 672 673 /* Set pointer to first FINFO, initialize it. */ 674 sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM)); 675 sp->fip->fi_nblocks = 0; 676 677 sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 678 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 679 } 680 681 /* 682 * Return the next segment to write. 683 */ 684 void 685 lfs_newseg(fs) 686 struct lfs *fs; 687 { 688 CLEANERINFO *cip; 689 SEGUSE *sup; 690 struct buf *bp; 691 int curseg, isdirty, sn; 692 693 #ifdef VERBOSE 694 printf("lfs_newseg\n"); 695 #endif 696 /* 697 * Turn off the active bit for the current segment, turn on the 698 * active and dirty bits for the next segment, update the cleaner 699 * info. Set the current segment to the next segment, get a new 700 * next segment. 701 */ 702 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_curseg), bp); 703 sup->su_flags &= ~SEGUSE_ACTIVE; 704 LFS_UBWRITE(bp); 705 706 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp); 707 sup->su_flags |= SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_LIVELOG; 708 LFS_UBWRITE(bp); 709 710 LFS_CLEANERINFO(cip, fs, bp); 711 --cip->clean; 712 ++cip->dirty; 713 LFS_UBWRITE(bp); 714 715 fs->lfs_lastseg = fs->lfs_curseg; 716 fs->lfs_curseg = fs->lfs_nextseg; 717 for (sn = curseg = datosn(fs, fs->lfs_curseg);;) { 718 sn = (sn + 1) % fs->lfs_nseg; 719 if (sn == curseg) 720 panic("lfs_nextseg: no clean segments"); 721 LFS_SEGENTRY(sup, fs, sn, bp); 722 isdirty = sup->su_flags & SEGUSE_DIRTY; 723 brelse(bp); 724 if (!isdirty) 725 break; 726 } 727 fs->lfs_nextseg = sntoda(fs, sn); 728 } 729 730 int 731 lfs_writeseg(fs, sp) 732 struct lfs *fs; 733 struct segment *sp; 734 { 735 USES_VOP_STRATEGY; 736 struct buf **bpp, *bp, *cbp; 737 SEGUSE *sup; 738 SEGSUM *ssp; 739 dev_t i_dev; 740 size_t size; 741 u_long *datap, *dp; 742 int ch_per_blk, do_again, i, nblocks, num, s; 743 int (*strategy)__P((struct vop_strategy_args *)); 744 char *p; 745 746 #ifdef VERBOSE 747 printf("lfs_writeseg\n"); 748 #endif 749 /* Checkpoint always writes superblock, even if no data blocks. */ 750 if ((nblocks = sp->cbpp - sp->bpp) == 0 && !(sp->seg_flags & SEGM_CKP)) 751 return; 752 753 /* 754 * Compute checksum across data and then across summary; the first 755 * block (the summary block) is skipped. Set the create time here 756 * so that it's guaranteed to be later than the inode mod times. 757 * 758 * XXX 759 * Fix this to do it inline, instead of malloc/copy. 760 */ 761 datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 762 for (bpp = sp->bpp, i = nblocks - 1; i--;) 763 *dp++ = (*++bpp)->b_un.b_words[0]; 764 ssp = (SEGSUM *)sp->segsum; 765 ssp->ss_create = time.tv_sec; 766 ssp->ss_datasum = cksum(datap, nblocks * sizeof(u_long)); 767 ssp->ss_sumsum = 768 cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum)); 769 free(datap, M_SEGMENT); 770 771 /* Update the segment usage information. */ 772 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 773 sup->su_nbytes += nblocks - 1 - 774 (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs) << fs->lfs_bshift; 775 sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode); 776 sup->su_lastmod = time.tv_sec; 777 LFS_UBWRITE(bp); 778 do_again = !(bp->b_flags & B_GATHERED); 779 780 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 781 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 782 783 /* 784 * When we simply write the blocks we lose a rotation for every block 785 * written. To avoid this problem, we allocate memory in chunks, copy 786 * the buffers into the chunk and write the chunk. 56K was chosen as 787 * some driver/controllers can't handle unsigned 16 bit transfers. 788 * When the data is copied to the chunk, turn off the the B_LOCKED bit 789 * and brelse the buffer (which will move them to the LRU list). Add 790 * the B_CALL flag to the buffer header so we can count I/O's for the 791 * checkpoints and so we can release the allocated memory. 792 * 793 * XXX 794 * This should be removed if the new virtual memory system allows us to 795 * easily make the buffers contiguous in kernel memory and if that's 796 * fast enough. 797 */ 798 #define LFS_CHUNKSIZE (56 * 1024) 799 ch_per_blk = LFS_CHUNKSIZE / fs->lfs_bsize; 800 for (bpp = sp->bpp, i = nblocks; i;) { 801 num = ch_per_blk; 802 if (num > i) 803 num = i; 804 i -= num; 805 size = num * fs->lfs_bsize; 806 807 cbp = lfs_newbuf(fs, (*bpp)->b_blkno, 0); 808 cbp->b_dev = i_dev; 809 cbp->b_flags = B_ASYNC | B_BUSY | B_CALL; 810 cbp->b_iodone = lfs_callback; 811 cbp->b_saveaddr = cbp->b_un.b_addr; 812 cbp->b_un.b_addr = malloc(size, M_SEGMENT, M_WAITOK); 813 814 s = splbio(); 815 ++fs->lfs_iocount; 816 for (p = cbp->b_un.b_addr; num--;) { 817 bp = *bpp++; 818 bcopy(bp->b_un.b_addr, p, bp->b_bcount); 819 p += bp->b_bcount; 820 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI | 821 B_LOCKED | B_GATHERED); 822 if (!(bp->b_flags & (B_NOCACHE | B_INVAL))) { 823 bremfree(bp); 824 reassignbuf(bp, bp->b_vp); 825 } 826 brelse(bp); 827 } 828 splx(s); 829 cbp->b_bcount = p - cbp->b_un.b_addr; 830 vop_strategy_a.a_desc = VDESC(vop_strategy); 831 vop_strategy_a.a_bp = cbp; 832 (strategy)(&vop_strategy_a); 833 } 834 return(do_again); 835 } 836 837 void 838 lfs_writesuper(fs, sp) 839 struct lfs *fs; 840 struct segment *sp; 841 { 842 USES_VOP_STRATEGY; 843 struct buf *bp; 844 dev_t i_dev; 845 int (*strategy) __P((struct vop_strategy_args *)); 846 847 #ifdef VERBOSE 848 printf("lfs_writesuper\n"); 849 #endif 850 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 851 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 852 853 /* Checksum the superblock and copy it into a buffer. */ 854 fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); 855 bp = lfs_newbuf(fs, fs->lfs_sboffs[0], LFS_SBPAD); 856 *bp->b_un.b_lfs = *fs; 857 858 /* Write the first superblock (wait). */ 859 bp->b_dev = i_dev; 860 bp->b_flags |= B_BUSY; 861 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 862 vop_strategy_a.a_desc = VDESC(vop_strategy); 863 vop_strategy_a.a_bp = bp; 864 (strategy)(&vop_strategy_a); 865 biowait(bp); 866 867 /* Write the second superblock (don't wait). */ 868 bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1]; 869 bp->b_flags |= B_ASYNC | B_BUSY; 870 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 871 (strategy)(&vop_strategy_a); 872 } 873 874 /* 875 * Logical block number match routines used when traversing the dirty block 876 * chain. 877 */ 878 int 879 lfs_match_data(fs, bp) 880 struct lfs *fs; 881 struct buf *bp; 882 { 883 return (bp->b_lblkno >= 0); 884 } 885 886 int 887 lfs_match_indir(fs, bp) 888 struct lfs *fs; 889 struct buf *bp; 890 { 891 int lbn; 892 893 lbn = bp->b_lblkno; 894 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 895 } 896 897 int 898 lfs_match_dindir(fs, bp) 899 struct lfs *fs; 900 struct buf *bp; 901 { 902 int lbn; 903 904 lbn = bp->b_lblkno; 905 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 906 } 907 908 int 909 lfs_match_tindir(fs, bp) 910 struct lfs *fs; 911 struct buf *bp; 912 { 913 int lbn; 914 915 lbn = bp->b_lblkno; 916 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 917 } 918 919 /* 920 * Allocate a new buffer header. 921 */ 922 struct buf * 923 lfs_newbuf(fs, daddr, size) 924 struct lfs *fs; 925 daddr_t daddr; 926 size_t size; 927 { 928 struct buf *bp; 929 930 #ifdef VERBOSE 931 printf("lfs_newbuf\n"); 932 #endif 933 bp = getnewbuf(); 934 bremhash(bp); 935 bgetvp(fs->lfs_ivnode, bp); 936 bp->b_bcount = 0; 937 bp->b_lblkno = daddr; 938 bp->b_blkno = daddr; 939 bp->b_error = 0; 940 bp->b_resid = 0; 941 if (size) 942 allocbuf(bp, size); 943 bp->b_flags |= B_NOCACHE; 944 bp->b_saveaddr = NULL; 945 binshash(bp, &bfreelist[BQ_AGE]); 946 return (bp); 947 } 948 949 void 950 lfs_callback(bp) 951 struct buf *bp; 952 { 953 struct lfs *fs; 954 955 fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs; 956 #ifdef DIAGNOSTIC 957 if (fs->lfs_iocount == 0) 958 panic("lfs_callback: zero iocount\n"); 959 #endif 960 if (--fs->lfs_iocount == 0) 961 wakeup(&fs->lfs_iocount); 962 963 if (bp->b_saveaddr) { 964 free(bp->b_un.b_addr, M_SEGMENT); 965 bp->b_un.b_addr = bp->b_saveaddr; 966 bp->b_saveaddr = NULL; 967 } 968 brelse(bp); 969 } 970 971 /* 972 * Shellsort (diminishing increment sort) from Data Structures and 973 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 974 * see also Knuth Vol. 3, page 84. The increments are selected from 975 * formula (8), page 95. Roughly O(N^3/2). 976 */ 977 /* 978 * This is our own private copy of shellsort because we want to sort 979 * two parallel arrays (the array of buffer pointers and the array of 980 * logical block numbers) simultaneously. Note that we cast the array 981 * of logical block numbers to a unsigned in this routine so that the 982 * negative block numbers (meta data blocks) sort AFTER the data blocks. 983 */ 984 void 985 lfs_shellsort(bp_array, lb_array, nmemb) 986 struct buf **bp_array; 987 daddr_t *lb_array; 988 register int nmemb; 989 { 990 static int __rsshell_increments[] = { 4, 1, 0 }; 991 register int incr, *incrp, t1, t2; 992 struct buf *bp_temp; 993 u_long lb_temp; 994 995 for (incrp = __rsshell_increments; incr = *incrp++;) 996 for (t1 = incr; t1 < nmemb; ++t1) 997 for (t2 = t1 - incr; t2 >= 0;) 998 if (lb_array[t2] > lb_array[t2 + incr]) { 999 lb_temp = lb_array[t2]; 1000 lb_array[t2] = lb_array[t2 + incr]; 1001 lb_array[t2 + incr] = lb_temp; 1002 bp_temp = bp_array[t2]; 1003 bp_array[t2] = bp_array[t2 + incr]; 1004 bp_array[t2 + incr] = bp_temp; 1005 t2 -= incr; 1006 } else 1007 break; 1008 } 1009