1 /* $NetBSD: lfs_segment.c,v 1.9 1997/06/13 08:59:51 pk Exp $ */ 2 3 /* 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)lfs_segment.c 8.5 (Berkeley) 1/4/94 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/namei.h> 41 #include <sys/kernel.h> 42 #include <sys/resourcevar.h> 43 #include <sys/file.h> 44 #include <sys/stat.h> 45 #include <sys/buf.h> 46 #include <sys/proc.h> 47 #include <sys/conf.h> 48 #include <sys/vnode.h> 49 #include <sys/malloc.h> 50 #include <sys/mount.h> 51 52 #include <miscfs/specfs/specdev.h> 53 #include <miscfs/fifofs/fifo.h> 54 55 #include <ufs/ufs/quota.h> 56 #include <ufs/ufs/inode.h> 57 #include <ufs/ufs/dir.h> 58 #include <ufs/ufs/ufsmount.h> 59 #include <ufs/ufs/ufs_extern.h> 60 61 #include <ufs/lfs/lfs.h> 62 #include <ufs/lfs/lfs_extern.h> 63 64 extern int count_lock_queue __P((void)); 65 66 #define MAX_ACTIVE 10 67 /* 68 * Determine if it's OK to start a partial in this segment, or if we need 69 * to go on to a new segment. 70 */ 71 #define LFS_PARTIAL_FITS(fs) \ 72 ((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \ 73 1 << (fs)->lfs_fsbtodb) 74 75 void lfs_callback __P((struct buf *)); 76 void lfs_gather __P((struct lfs *, struct segment *, 77 struct vnode *, int (*) __P((struct lfs *, struct buf *)))); 78 int lfs_gatherblock __P((struct segment *, struct buf *, int *)); 79 void lfs_iset __P((struct inode *, daddr_t, time_t)); 80 int lfs_match_data __P((struct lfs *, struct buf *)); 81 int lfs_match_dindir __P((struct lfs *, struct buf *)); 82 int lfs_match_indir __P((struct lfs *, struct buf *)); 83 int lfs_match_tindir __P((struct lfs *, struct buf *)); 84 void lfs_newseg __P((struct lfs *)); 85 void lfs_shellsort __P((struct buf **, daddr_t *, register int)); 86 void lfs_supercallback __P((struct buf *)); 87 void lfs_updatemeta __P((struct segment *)); 88 int lfs_vref __P((struct vnode *)); 89 void lfs_vunref __P((struct vnode *)); 90 void lfs_writefile __P((struct lfs *, struct segment *, struct vnode *)); 91 int lfs_writeinode __P((struct lfs *, struct segment *, struct inode *)); 92 int lfs_writeseg __P((struct lfs *, struct segment *)); 93 void lfs_writesuper __P((struct lfs *)); 94 void lfs_writevnodes __P((struct lfs *fs, struct mount *mp, 95 struct segment *sp, int dirops)); 96 97 int lfs_allclean_wakeup; /* Cleaner wakeup address. */ 98 99 /* Statistics Counters */ 100 #define DOSTATS 101 struct lfs_stats lfs_stats; 102 103 /* op values to lfs_writevnodes */ 104 #define VN_REG 0 105 #define VN_DIROP 1 106 #define VN_EMPTY 2 107 108 /* 109 * Ifile and meta data blocks are not marked busy, so segment writes MUST be 110 * single threaded. Currently, there are two paths into lfs_segwrite, sync() 111 * and getnewbuf(). They both mark the file system busy. Lfs_vflush() 112 * explicitly marks the file system busy. So lfs_segwrite is safe. I think. 113 */ 114 115 int 116 lfs_vflush(vp) 117 struct vnode *vp; 118 { 119 struct inode *ip; 120 struct lfs *fs; 121 struct segment *sp; 122 123 fs = VFSTOUFS(vp->v_mount)->um_lfs; 124 if (fs->lfs_nactive > MAX_ACTIVE) 125 return(lfs_segwrite(vp->v_mount, SEGM_SYNC|SEGM_CKP)); 126 lfs_seglock(fs, SEGM_SYNC); 127 sp = fs->lfs_sp; 128 129 130 ip = VTOI(vp); 131 if (vp->v_dirtyblkhd.lh_first == NULL) 132 lfs_writevnodes(fs, vp->v_mount, sp, VN_EMPTY); 133 134 do { 135 do { 136 if (vp->v_dirtyblkhd.lh_first != NULL) 137 lfs_writefile(fs, sp, vp); 138 } while (lfs_writeinode(fs, sp, ip)); 139 140 } while (lfs_writeseg(fs, sp) && ip->i_number == LFS_IFILE_INUM); 141 142 #ifdef DOSTATS 143 ++lfs_stats.nwrites; 144 if (sp->seg_flags & SEGM_SYNC) 145 ++lfs_stats.nsync_writes; 146 if (sp->seg_flags & SEGM_CKP) 147 ++lfs_stats.ncheckpoints; 148 #endif 149 lfs_segunlock(fs); 150 return (0); 151 } 152 153 void 154 lfs_writevnodes(fs, mp, sp, op) 155 struct lfs *fs; 156 struct mount *mp; 157 struct segment *sp; 158 int op; 159 { 160 struct inode *ip; 161 struct vnode *vp; 162 163 loop: 164 for (vp = mp->mnt_vnodelist.lh_first; 165 vp != NULL; 166 vp = vp->v_mntvnodes.le_next) { 167 /* 168 * If the vnode that we are about to sync is no longer 169 * associated with this mount point, start over. 170 */ 171 if (vp->v_mount != mp) 172 goto loop; 173 174 /* XXX ignore dirops for now 175 if (op == VN_DIROP && !(vp->v_flag & VDIROP) || 176 op != VN_DIROP && (vp->v_flag & VDIROP)) 177 continue; 178 */ 179 180 if (op == VN_EMPTY && vp->v_dirtyblkhd.lh_first) 181 continue; 182 183 if (vp->v_type == VNON) 184 continue; 185 186 if (lfs_vref(vp)) 187 continue; 188 189 /* 190 * Write the inode/file if dirty and it's not the 191 * the IFILE. 192 */ 193 ip = VTOI(vp); 194 if ((ip->i_flag & 195 (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE) || 196 vp->v_dirtyblkhd.lh_first != NULL) && 197 ip->i_number != LFS_IFILE_INUM) { 198 if (vp->v_dirtyblkhd.lh_first != NULL) 199 lfs_writefile(fs, sp, vp); 200 (void) lfs_writeinode(fs, sp, ip); 201 } 202 vp->v_flag &= ~VDIROP; 203 lfs_vunref(vp); 204 } 205 } 206 207 int 208 lfs_segwrite(mp, flags) 209 struct mount *mp; 210 int flags; /* Do a checkpoint. */ 211 { 212 struct buf *bp; 213 struct inode *ip; 214 struct lfs *fs; 215 struct segment *sp; 216 struct vnode *vp; 217 SEGUSE *segusep; 218 daddr_t ibno; 219 CLEANERINFO *cip; 220 int clean, do_ckp, error, i; 221 222 fs = VFSTOUFS(mp)->um_lfs; 223 224 /* 225 * If we have fewer than 2 clean segments, wait until cleaner 226 * writes. 227 */ 228 do { 229 LFS_CLEANERINFO(cip, fs, bp); 230 clean = cip->clean; 231 brelse(bp); 232 if (clean <= 2) { 233 printf ("segs clean: %d\n", clean); 234 wakeup(&lfs_allclean_wakeup); 235 error = tsleep(&fs->lfs_avail, PRIBIO + 1, 236 "lfs writer", 0); 237 if (error) 238 return (error); 239 } 240 } while (clean <= 2 ); 241 242 /* 243 * Allocate a segment structure and enough space to hold pointers to 244 * the maximum possible number of buffers which can be described in a 245 * single summary block. 246 */ 247 do_ckp = flags & SEGM_CKP || fs->lfs_nactive > MAX_ACTIVE; 248 lfs_seglock(fs, flags | (do_ckp ? SEGM_CKP : 0)); 249 sp = fs->lfs_sp; 250 251 lfs_writevnodes(fs, mp, sp, VN_REG); 252 253 /* XXX ignore ordering of dirops for now */ 254 /* XXX 255 fs->lfs_writer = 1; 256 if (fs->lfs_dirops && (error = 257 tsleep(&fs->lfs_writer, PRIBIO + 1, "lfs writer", 0))) { 258 free(sp->bpp, M_SEGMENT); 259 free(sp, M_SEGMENT); 260 fs->lfs_writer = 0; 261 return (error); 262 } 263 264 lfs_writevnodes(fs, mp, sp, VN_DIROP); 265 */ 266 267 /* 268 * If we are doing a checkpoint, mark everything since the 269 * last checkpoint as no longer ACTIVE. 270 */ 271 if (do_ckp) 272 for (ibno = fs->lfs_cleansz + fs->lfs_segtabsz; 273 --ibno >= fs->lfs_cleansz; ) { 274 if (bread(fs->lfs_ivnode, ibno, fs->lfs_bsize, 275 NOCRED, &bp)) 276 277 panic("lfs: ifile read"); 278 segusep = (SEGUSE *)bp->b_data; 279 for (i = fs->lfs_sepb; i--; segusep++) 280 segusep->su_flags &= ~SEGUSE_ACTIVE; 281 282 error = VOP_BWRITE(bp); 283 } 284 285 if (do_ckp || fs->lfs_doifile) { 286 redo: 287 vp = fs->lfs_ivnode; 288 while (vget(vp, 1)); 289 ip = VTOI(vp); 290 if (vp->v_dirtyblkhd.lh_first != NULL) 291 lfs_writefile(fs, sp, vp); 292 (void)lfs_writeinode(fs, sp, ip); 293 vput(vp); 294 if (lfs_writeseg(fs, sp) && do_ckp) 295 goto redo; 296 } else 297 (void) lfs_writeseg(fs, sp); 298 299 /* 300 * If the I/O count is non-zero, sleep until it reaches zero. At the 301 * moment, the user's process hangs around so we can sleep. 302 */ 303 /* XXX ignore dirops for now 304 fs->lfs_writer = 0; 305 fs->lfs_doifile = 0; 306 wakeup(&fs->lfs_dirops); 307 */ 308 309 #ifdef DOSTATS 310 ++lfs_stats.nwrites; 311 if (sp->seg_flags & SEGM_SYNC) 312 ++lfs_stats.nsync_writes; 313 if (sp->seg_flags & SEGM_CKP) 314 ++lfs_stats.ncheckpoints; 315 #endif 316 lfs_segunlock(fs); 317 return (0); 318 } 319 320 /* 321 * Write the dirty blocks associated with a vnode. 322 */ 323 void 324 lfs_writefile(fs, sp, vp) 325 struct lfs *fs; 326 struct segment *sp; 327 struct vnode *vp; 328 { 329 struct buf *bp; 330 struct finfo *fip; 331 IFILE *ifp; 332 333 if (sp->seg_bytes_left < fs->lfs_bsize || 334 sp->sum_bytes_left < sizeof(struct finfo)) 335 (void) lfs_writeseg(fs, sp); 336 337 sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t); 338 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 339 340 fip = sp->fip; 341 fip->fi_nblocks = 0; 342 fip->fi_ino = VTOI(vp)->i_number; 343 LFS_IENTRY(ifp, fs, fip->fi_ino, bp); 344 fip->fi_version = ifp->if_version; 345 brelse(bp); 346 347 /* 348 * It may not be necessary to write the meta-data blocks at this point, 349 * as the roll-forward recovery code should be able to reconstruct the 350 * list. 351 */ 352 lfs_gather(fs, sp, vp, lfs_match_data); 353 lfs_gather(fs, sp, vp, lfs_match_indir); 354 lfs_gather(fs, sp, vp, lfs_match_dindir); 355 #ifdef TRIPLE 356 lfs_gather(fs, sp, vp, lfs_match_tindir); 357 #endif 358 359 fip = sp->fip; 360 if (fip->fi_nblocks != 0) { 361 sp->fip = 362 (struct finfo *)((caddr_t)fip + sizeof(struct finfo) + 363 sizeof(daddr_t) * (fip->fi_nblocks - 1)); 364 sp->start_lbp = &sp->fip->fi_blocks[0]; 365 } else { 366 sp->sum_bytes_left += sizeof(struct finfo) - sizeof(daddr_t); 367 --((SEGSUM *)(sp->segsum))->ss_nfinfo; 368 } 369 } 370 371 int 372 lfs_writeinode(fs, sp, ip) 373 struct lfs *fs; 374 struct segment *sp; 375 struct inode *ip; 376 { 377 struct buf *bp, *ibp; 378 IFILE *ifp; 379 SEGUSE *sup; 380 daddr_t daddr; 381 ino_t ino; 382 int error, i, ndx; 383 int redo_ifile = 0; 384 struct timespec ts; 385 386 if (!(ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE))) 387 return(0); 388 389 /* Allocate a new inode block if necessary. */ 390 if (sp->ibp == NULL) { 391 /* Allocate a new segment if necessary. */ 392 if (sp->seg_bytes_left < fs->lfs_bsize || 393 sp->sum_bytes_left < sizeof(daddr_t)) 394 (void) lfs_writeseg(fs, sp); 395 396 /* Get next inode block. */ 397 daddr = fs->lfs_offset; 398 fs->lfs_offset += fsbtodb(fs, 1); 399 sp->ibp = *sp->cbpp++ = 400 lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, daddr, 401 fs->lfs_bsize); 402 /* Zero out inode numbers */ 403 for (i = 0; i < INOPB(fs); ++i) 404 ((struct dinode *)sp->ibp->b_data)[i].di_inumber = 0; 405 ++sp->start_bpp; 406 fs->lfs_avail -= fsbtodb(fs, 1); 407 /* Set remaining space counters. */ 408 sp->seg_bytes_left -= fs->lfs_bsize; 409 sp->sum_bytes_left -= sizeof(daddr_t); 410 ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) - 411 sp->ninodes / INOPB(fs) - 1; 412 ((daddr_t *)(sp->segsum))[ndx] = daddr; 413 } 414 415 /* Update the inode times and copy the inode onto the inode page. */ 416 if (ip->i_flag & IN_MODIFIED) 417 --fs->lfs_uinodes; 418 TIMEVAL_TO_TIMESPEC(&time, &ts); 419 FFS_ITIMES(ip, &ts, &ts, &ts); 420 ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); 421 bp = sp->ibp; 422 ((struct dinode *)bp->b_data)[sp->ninodes % INOPB(fs)] = ip->i_din.ffs_din; 423 /* Increment inode count in segment summary block. */ 424 ++((SEGSUM *)(sp->segsum))->ss_ninos; 425 426 /* If this page is full, set flag to allocate a new page. */ 427 if (++sp->ninodes % INOPB(fs) == 0) 428 sp->ibp = NULL; 429 430 /* 431 * If updating the ifile, update the super-block. Update the disk 432 * address and access times for this inode in the ifile. 433 */ 434 ino = ip->i_number; 435 if (ino == LFS_IFILE_INUM) { 436 daddr = fs->lfs_idaddr; 437 fs->lfs_idaddr = bp->b_blkno; 438 } else { 439 LFS_IENTRY(ifp, fs, ino, ibp); 440 daddr = ifp->if_daddr; 441 ifp->if_daddr = bp->b_blkno; 442 error = VOP_BWRITE(ibp); 443 } 444 445 /* 446 * No need to update segment usage if there was no former inode address 447 * or if the last inode address is in the current partial segment. 448 */ 449 if (daddr != LFS_UNUSED_DADDR && 450 !(daddr >= fs->lfs_lastpseg && daddr <= bp->b_blkno)) { 451 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 452 #ifdef DIAGNOSTIC 453 if (sup->su_nbytes < sizeof(struct dinode)) { 454 /* XXX -- Change to a panic. */ 455 printf("lfs: negative bytes (segment %d)\n", 456 datosn(fs, daddr)); 457 panic("negative bytes"); 458 } 459 #endif 460 sup->su_nbytes -= sizeof(struct dinode); 461 redo_ifile = 462 (ino == LFS_IFILE_INUM && !(bp->b_flags & B_GATHERED)); 463 error = VOP_BWRITE(bp); 464 } 465 return (redo_ifile); 466 } 467 468 int 469 lfs_gatherblock(sp, bp, sptr) 470 struct segment *sp; 471 struct buf *bp; 472 int *sptr; 473 { 474 struct lfs *fs; 475 int version; 476 477 /* 478 * If full, finish this segment. We may be doing I/O, so 479 * release and reacquire the splbio(). 480 */ 481 #ifdef DIAGNOSTIC 482 if (sp->vp == NULL) 483 panic ("lfs_gatherblock: Null vp in segment"); 484 #endif 485 fs = sp->fs; 486 if (sp->sum_bytes_left < sizeof(daddr_t) || 487 sp->seg_bytes_left < fs->lfs_bsize) { 488 if (sptr) 489 splx(*sptr); 490 lfs_updatemeta(sp); 491 492 version = sp->fip->fi_version; 493 (void) lfs_writeseg(fs, sp); 494 495 sp->fip->fi_version = version; 496 sp->fip->fi_ino = VTOI(sp->vp)->i_number; 497 /* Add the current file to the segment summary. */ 498 ++((SEGSUM *)(sp->segsum))->ss_nfinfo; 499 sp->sum_bytes_left -= 500 sizeof(struct finfo) - sizeof(daddr_t); 501 502 if (sptr) 503 *sptr = splbio(); 504 return(1); 505 } 506 507 /* Insert into the buffer list, update the FINFO block. */ 508 bp->b_flags |= B_GATHERED; 509 *sp->cbpp++ = bp; 510 sp->fip->fi_blocks[sp->fip->fi_nblocks++] = bp->b_lblkno; 511 512 sp->sum_bytes_left -= sizeof(daddr_t); 513 sp->seg_bytes_left -= fs->lfs_bsize; 514 return(0); 515 } 516 517 void 518 lfs_gather(fs, sp, vp, match) 519 struct lfs *fs; 520 struct segment *sp; 521 struct vnode *vp; 522 int (*match) __P((struct lfs *, struct buf *)); 523 { 524 struct buf *bp; 525 int s; 526 527 sp->vp = vp; 528 s = splbio(); 529 loop: for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) { 530 if (bp->b_flags & B_BUSY || !match(fs, bp) || 531 bp->b_flags & B_GATHERED) 532 continue; 533 #ifdef DIAGNOSTIC 534 if (!(bp->b_flags & B_DELWRI)) 535 panic("lfs_gather: bp not B_DELWRI"); 536 if (!(bp->b_flags & B_LOCKED)) 537 panic("lfs_gather: bp not B_LOCKED"); 538 #endif 539 if (lfs_gatherblock(sp, bp, &s)) 540 goto loop; 541 } 542 splx(s); 543 lfs_updatemeta(sp); 544 sp->vp = NULL; 545 } 546 547 548 /* 549 * Update the metadata that points to the blocks listed in the FINFO 550 * array. 551 */ 552 void 553 lfs_updatemeta(sp) 554 struct segment *sp; 555 { 556 SEGUSE *sup; 557 struct buf *bp; 558 struct lfs *fs; 559 struct vnode *vp; 560 struct indir a[NIADDR + 2], *ap; 561 struct inode *ip; 562 daddr_t daddr, lbn, off; 563 int db_per_fsb, error, i, nblocks, num; 564 565 vp = sp->vp; 566 nblocks = &sp->fip->fi_blocks[sp->fip->fi_nblocks] - sp->start_lbp; 567 if (vp == NULL || nblocks == 0) 568 return; 569 570 /* Sort the blocks. */ 571 if (!(sp->seg_flags & SEGM_CLEAN)) 572 lfs_shellsort(sp->start_bpp, sp->start_lbp, nblocks); 573 574 /* 575 * Assign disk addresses, and update references to the logical 576 * block and the segment usage information. 577 */ 578 fs = sp->fs; 579 db_per_fsb = fsbtodb(fs, 1); 580 for (i = nblocks; i--; ++sp->start_bpp) { 581 lbn = *sp->start_lbp++; 582 (*sp->start_bpp)->b_blkno = off = fs->lfs_offset; 583 fs->lfs_offset += db_per_fsb; 584 585 error = ufs_bmaparray(vp, lbn, &daddr, a, &num, NULL); 586 if (error) 587 panic("lfs_updatemeta: ufs_bmaparray %d", error); 588 ip = VTOI(vp); 589 switch (num) { 590 case 0: 591 ip->i_ffs_db[lbn] = off; 592 break; 593 case 1: 594 ip->i_ffs_ib[a[0].in_off] = off; 595 break; 596 default: 597 ap = &a[num - 1]; 598 if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp)) 599 panic("lfs_updatemeta: bread bno %d", 600 ap->in_lbn); 601 /* 602 * Bread may create a new indirect block which needs 603 * to get counted for the inode. 604 */ 605 if (bp->b_blkno == -1 && !(bp->b_flags & B_CACHE)) { 606 printf ("Updatemeta allocating indirect block: shouldn't happen\n"); 607 ip->i_ffs_blocks += btodb(fs->lfs_bsize); 608 fs->lfs_bfree -= btodb(fs->lfs_bsize); 609 } 610 ((daddr_t *)bp->b_data)[ap->in_off] = off; 611 VOP_BWRITE(bp); 612 } 613 614 /* Update segment usage information. */ 615 if (daddr != UNASSIGNED && 616 !(daddr >= fs->lfs_lastpseg && daddr <= off)) { 617 LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp); 618 #ifdef DIAGNOSTIC 619 if (sup->su_nbytes < fs->lfs_bsize) { 620 /* XXX -- Change to a panic. */ 621 printf("lfs: negative bytes (segment %d)\n", 622 datosn(fs, daddr)); 623 panic ("Negative Bytes"); 624 } 625 #endif 626 sup->su_nbytes -= fs->lfs_bsize; 627 error = VOP_BWRITE(bp); 628 } 629 } 630 } 631 632 /* 633 * Start a new segment. 634 */ 635 int 636 lfs_initseg(fs) 637 struct lfs *fs; 638 { 639 struct segment *sp; 640 SEGUSE *sup; 641 SEGSUM *ssp; 642 struct buf *bp; 643 int repeat; 644 645 sp = fs->lfs_sp; 646 647 repeat = 0; 648 /* Advance to the next segment. */ 649 if (!LFS_PARTIAL_FITS(fs)) { 650 /* Wake up any cleaning procs waiting on this file system. */ 651 wakeup(&lfs_allclean_wakeup); 652 653 lfs_newseg(fs); 654 repeat = 1; 655 fs->lfs_offset = fs->lfs_curseg; 656 sp->seg_number = datosn(fs, fs->lfs_curseg); 657 sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE; 658 659 /* 660 * If the segment contains a superblock, update the offset 661 * and summary address to skip over it. 662 */ 663 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 664 if (sup->su_flags & SEGUSE_SUPERBLOCK) { 665 fs->lfs_offset += LFS_SBPAD / DEV_BSIZE; 666 sp->seg_bytes_left -= LFS_SBPAD; 667 } 668 brelse(bp); 669 } else { 670 sp->seg_number = datosn(fs, fs->lfs_curseg); 671 sp->seg_bytes_left = (fs->lfs_dbpseg - 672 (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE; 673 } 674 fs->lfs_lastpseg = fs->lfs_offset; 675 676 sp->fs = fs; 677 sp->ibp = NULL; 678 sp->ninodes = 0; 679 680 /* Get a new buffer for SEGSUM and enter it into the buffer list. */ 681 sp->cbpp = sp->bpp; 682 *sp->cbpp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_offset, 683 LFS_SUMMARY_SIZE); 684 sp->segsum = (*sp->cbpp)->b_data; 685 bzero(sp->segsum, LFS_SUMMARY_SIZE); 686 sp->start_bpp = ++sp->cbpp; 687 fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE; 688 689 /* Set point to SEGSUM, initialize it. */ 690 ssp = sp->segsum; 691 ssp->ss_next = fs->lfs_nextseg; 692 ssp->ss_nfinfo = ssp->ss_ninos = 0; 693 694 /* Set pointer to first FINFO, initialize it. */ 695 sp->fip = (struct finfo *)((caddr_t)sp->segsum + sizeof(SEGSUM)); 696 sp->fip->fi_nblocks = 0; 697 sp->start_lbp = &sp->fip->fi_blocks[0]; 698 699 sp->seg_bytes_left -= LFS_SUMMARY_SIZE; 700 sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM); 701 702 return(repeat); 703 } 704 705 /* 706 * Return the next segment to write. 707 */ 708 void 709 lfs_newseg(fs) 710 struct lfs *fs; 711 { 712 CLEANERINFO *cip; 713 SEGUSE *sup; 714 struct buf *bp; 715 int curseg, isdirty, sn; 716 717 LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp); 718 sup->su_flags |= SEGUSE_DIRTY | SEGUSE_ACTIVE; 719 sup->su_nbytes = 0; 720 sup->su_nsums = 0; 721 sup->su_ninos = 0; 722 (void) VOP_BWRITE(bp); 723 724 LFS_CLEANERINFO(cip, fs, bp); 725 --cip->clean; 726 ++cip->dirty; 727 (void) VOP_BWRITE(bp); 728 729 fs->lfs_lastseg = fs->lfs_curseg; 730 fs->lfs_curseg = fs->lfs_nextseg; 731 for (sn = curseg = datosn(fs, fs->lfs_curseg);;) { 732 sn = (sn + 1) % fs->lfs_nseg; 733 if (sn == curseg) 734 panic("lfs_nextseg: no clean segments"); 735 LFS_SEGENTRY(sup, fs, sn, bp); 736 isdirty = sup->su_flags & SEGUSE_DIRTY; 737 brelse(bp); 738 if (!isdirty) 739 break; 740 } 741 742 ++fs->lfs_nactive; 743 fs->lfs_nextseg = sntoda(fs, sn); 744 #ifdef DOSTATS 745 ++lfs_stats.segsused; 746 #endif 747 } 748 749 int 750 lfs_writeseg(fs, sp) 751 struct lfs *fs; 752 struct segment *sp; 753 { 754 extern int locked_queue_count; 755 struct buf **bpp, *bp, *cbp; 756 SEGUSE *sup; 757 SEGSUM *ssp; 758 dev_t i_dev; 759 size_t size; 760 u_long *datap, *dp; 761 int ch_per_blk, do_again, i, nblocks, num, s; 762 int (*strategy)__P((void *)); 763 struct vop_strategy_args vop_strategy_a; 764 u_short ninos; 765 char *p; 766 767 /* 768 * If there are no buffers other than the segment summary to write 769 * and it is not a checkpoint, don't do anything. On a checkpoint, 770 * even if there aren't any buffers, you need to write the superblock. 771 */ 772 if ((nblocks = sp->cbpp - sp->bpp) == 1) 773 return (0); 774 775 ssp = (SEGSUM *)sp->segsum; 776 777 /* Update the segment usage information. */ 778 LFS_SEGENTRY(sup, fs, sp->seg_number, bp); 779 ninos = (ssp->ss_ninos + INOPB(fs) - 1) / INOPB(fs); 780 sup->su_nbytes += (nblocks - 1 - ninos) << fs->lfs_bshift; 781 sup->su_nbytes += ssp->ss_ninos * sizeof(struct dinode); 782 sup->su_nbytes += LFS_SUMMARY_SIZE; 783 sup->su_lastmod = time.tv_sec; 784 sup->su_ninos += ninos; 785 ++sup->su_nsums; 786 do_again = !(bp->b_flags & B_GATHERED); 787 (void)VOP_BWRITE(bp); 788 /* 789 * Compute checksum across data and then across summary; the first 790 * block (the summary block) is skipped. Set the create time here 791 * so that it's guaranteed to be later than the inode mod times. 792 * 793 * XXX 794 * Fix this to do it inline, instead of malloc/copy. 795 */ 796 datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK); 797 for (bpp = sp->bpp, i = nblocks - 1; i--;) { 798 if ((*++bpp)->b_flags & B_INVAL) { 799 if (copyin((*bpp)->b_saveaddr, dp++, sizeof(u_long))) 800 panic("lfs_writeseg: copyin failed"); 801 } else 802 *dp++ = ((u_long *)(*bpp)->b_data)[0]; 803 } 804 ssp->ss_create = time.tv_sec; 805 ssp->ss_datasum = cksum(datap, (nblocks - 1) * sizeof(u_long)); 806 ssp->ss_sumsum = 807 cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum)); 808 free(datap, M_SEGMENT); 809 #ifdef DIAGNOSTIC 810 if (fs->lfs_bfree < fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE) 811 panic("lfs_writeseg: No diskspace for summary"); 812 #endif 813 fs->lfs_bfree -= (fsbtodb(fs, ninos) + LFS_SUMMARY_SIZE / DEV_BSIZE); 814 815 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 816 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 817 818 /* 819 * When we simply write the blocks we lose a rotation for every block 820 * written. To avoid this problem, we allocate memory in chunks, copy 821 * the buffers into the chunk and write the chunk. MAXPHYS is the 822 * largest size I/O devices can handle. 823 * When the data is copied to the chunk, turn off the the B_LOCKED bit 824 * and brelse the buffer (which will move them to the LRU list). Add 825 * the B_CALL flag to the buffer header so we can count I/O's for the 826 * checkpoints and so we can release the allocated memory. 827 * 828 * XXX 829 * This should be removed if the new virtual memory system allows us to 830 * easily make the buffers contiguous in kernel memory and if that's 831 * fast enough. 832 */ 833 ch_per_blk = MAXPHYS / fs->lfs_bsize; 834 for (bpp = sp->bpp, i = nblocks; i;) { 835 num = ch_per_blk; 836 if (num > i) 837 num = i; 838 i -= num; 839 size = num * fs->lfs_bsize; 840 841 cbp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, 842 (*bpp)->b_blkno, size); 843 cbp->b_dev = i_dev; 844 cbp->b_flags |= B_ASYNC | B_BUSY; 845 846 s = splbio(); 847 ++fs->lfs_iocount; 848 for (p = cbp->b_data; num--;) { 849 bp = *bpp++; 850 /* 851 * Fake buffers from the cleaner are marked as B_INVAL. 852 * We need to copy the data from user space rather than 853 * from the buffer indicated. 854 * XXX == what do I do on an error? 855 */ 856 if (bp->b_flags & B_INVAL) { 857 if (copyin(bp->b_saveaddr, p, bp->b_bcount)) 858 panic("lfs_writeseg: copyin failed"); 859 } else 860 bcopy(bp->b_data, p, bp->b_bcount); 861 p += bp->b_bcount; 862 if (bp->b_flags & B_LOCKED) 863 --locked_queue_count; 864 bp->b_flags &= ~(B_ERROR | B_READ | B_DELWRI | 865 B_LOCKED | B_GATHERED); 866 if (bp->b_flags & B_CALL) { 867 /* if B_CALL, it was created with newbuf */ 868 brelvp(bp); 869 if (!(bp->b_flags & B_INVAL)) 870 free(bp->b_data, M_SEGMENT); 871 free(bp, M_SEGMENT); 872 } else { 873 bremfree(bp); 874 bp->b_flags |= B_DONE; 875 reassignbuf(bp, bp->b_vp); 876 brelse(bp); 877 } 878 } 879 ++cbp->b_vp->v_numoutput; 880 splx(s); 881 cbp->b_bcount = p - (char *)cbp->b_data; 882 /* 883 * XXXX This is a gross and disgusting hack. Since these 884 * buffers are physically addressed, they hang off the 885 * device vnode (devvp). As a result, they have no way 886 * of getting to the LFS superblock or lfs structure to 887 * keep track of the number of I/O's pending. So, I am 888 * going to stuff the fs into the saveaddr field of 889 * the buffer (yuk). 890 */ 891 cbp->b_saveaddr = (caddr_t)fs; 892 vop_strategy_a.a_desc = VDESC(vop_strategy); 893 vop_strategy_a.a_bp = cbp; 894 (strategy)(&vop_strategy_a); 895 } 896 /* 897 * XXX 898 * Vinvalbuf can move locked buffers off the locked queue 899 * and we have no way of knowing about this. So, after 900 * doing a big write, we recalculate how many bufers are 901 * really still left on the locked queue. 902 */ 903 locked_queue_count = count_lock_queue(); 904 wakeup(&locked_queue_count); 905 #ifdef DOSTATS 906 ++lfs_stats.psegwrites; 907 lfs_stats.blocktot += nblocks - 1; 908 if (fs->lfs_sp->seg_flags & SEGM_SYNC) 909 ++lfs_stats.psyncwrites; 910 if (fs->lfs_sp->seg_flags & SEGM_CLEAN) { 911 ++lfs_stats.pcleanwrites; 912 lfs_stats.cleanblocks += nblocks - 1; 913 } 914 #endif 915 return (lfs_initseg(fs) || do_again); 916 } 917 918 void 919 lfs_writesuper(fs) 920 struct lfs *fs; 921 { 922 struct buf *bp; 923 dev_t i_dev; 924 int (*strategy) __P((void *)); 925 int s; 926 struct vop_strategy_args vop_strategy_a; 927 928 i_dev = VTOI(fs->lfs_ivnode)->i_dev; 929 strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op[VOFFSET(vop_strategy)]; 930 931 /* Checksum the superblock and copy it into a buffer. */ 932 fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum)); 933 bp = lfs_newbuf(VTOI(fs->lfs_ivnode)->i_devvp, fs->lfs_sboffs[0], 934 LFS_SBPAD); 935 *(struct lfs *)bp->b_data = *fs; 936 937 /* XXX Toggle between first two superblocks; for now just write first */ 938 bp->b_dev = i_dev; 939 bp->b_flags |= B_BUSY | B_CALL | B_ASYNC; 940 bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI); 941 bp->b_iodone = lfs_supercallback; 942 vop_strategy_a.a_desc = VDESC(vop_strategy); 943 vop_strategy_a.a_bp = bp; 944 s = splbio(); 945 ++bp->b_vp->v_numoutput; 946 splx(s); 947 (strategy)(&vop_strategy_a); 948 } 949 950 /* 951 * Logical block number match routines used when traversing the dirty block 952 * chain. 953 */ 954 int 955 lfs_match_data(fs, bp) 956 struct lfs *fs; 957 struct buf *bp; 958 { 959 return (bp->b_lblkno >= 0); 960 } 961 962 int 963 lfs_match_indir(fs, bp) 964 struct lfs *fs; 965 struct buf *bp; 966 { 967 int lbn; 968 969 lbn = bp->b_lblkno; 970 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0); 971 } 972 973 int 974 lfs_match_dindir(fs, bp) 975 struct lfs *fs; 976 struct buf *bp; 977 { 978 int lbn; 979 980 lbn = bp->b_lblkno; 981 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1); 982 } 983 984 int 985 lfs_match_tindir(fs, bp) 986 struct lfs *fs; 987 struct buf *bp; 988 { 989 int lbn; 990 991 lbn = bp->b_lblkno; 992 return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2); 993 } 994 995 /* 996 * Allocate a new buffer header. 997 */ 998 struct buf * 999 lfs_newbuf(vp, daddr, size) 1000 struct vnode *vp; 1001 daddr_t daddr; 1002 size_t size; 1003 { 1004 struct buf *bp; 1005 size_t nbytes; 1006 1007 nbytes = roundup(size, DEV_BSIZE); 1008 bp = malloc(sizeof(struct buf), M_SEGMENT, M_WAITOK); 1009 bzero(bp, sizeof(struct buf)); 1010 if (nbytes) 1011 bp->b_data = malloc(nbytes, M_SEGMENT, M_WAITOK); 1012 bgetvp(vp, bp); 1013 bp->b_bufsize = size; 1014 bp->b_bcount = size; 1015 bp->b_lblkno = daddr; 1016 bp->b_blkno = daddr; 1017 bp->b_error = 0; 1018 bp->b_resid = 0; 1019 bp->b_iodone = lfs_callback; 1020 bp->b_flags |= B_BUSY | B_CALL | B_NOCACHE; 1021 return (bp); 1022 } 1023 1024 void 1025 lfs_callback(bp) 1026 struct buf *bp; 1027 { 1028 struct lfs *fs; 1029 1030 fs = (struct lfs *)bp->b_saveaddr; 1031 #ifdef DIAGNOSTIC 1032 if (fs->lfs_iocount == 0) 1033 panic("lfs_callback: zero iocount\n"); 1034 #endif 1035 if (--fs->lfs_iocount == 0) 1036 wakeup(&fs->lfs_iocount); 1037 1038 brelvp(bp); 1039 free(bp->b_data, M_SEGMENT); 1040 free(bp, M_SEGMENT); 1041 } 1042 1043 void 1044 lfs_supercallback(bp) 1045 struct buf *bp; 1046 { 1047 brelvp(bp); 1048 free(bp->b_data, M_SEGMENT); 1049 free(bp, M_SEGMENT); 1050 } 1051 1052 /* 1053 * Shellsort (diminishing increment sort) from Data Structures and 1054 * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290; 1055 * see also Knuth Vol. 3, page 84. The increments are selected from 1056 * formula (8), page 95. Roughly O(N^3/2). 1057 */ 1058 /* 1059 * This is our own private copy of shellsort because we want to sort 1060 * two parallel arrays (the array of buffer pointers and the array of 1061 * logical block numbers) simultaneously. Note that we cast the array 1062 * of logical block numbers to a unsigned in this routine so that the 1063 * negative block numbers (meta data blocks) sort AFTER the data blocks. 1064 */ 1065 void 1066 lfs_shellsort(bp_array, lb_array, nmemb) 1067 struct buf **bp_array; 1068 daddr_t *lb_array; 1069 register int nmemb; 1070 { 1071 static int __rsshell_increments[] = { 4, 1, 0 }; 1072 register int incr, *incrp, t1, t2; 1073 struct buf *bp_temp; 1074 u_long lb_temp; 1075 1076 for (incrp = __rsshell_increments; (incr = *incrp++) != 0;) 1077 for (t1 = incr; t1 < nmemb; ++t1) 1078 for (t2 = t1 - incr; t2 >= 0;) 1079 if (lb_array[t2] > lb_array[t2 + incr]) { 1080 lb_temp = lb_array[t2]; 1081 lb_array[t2] = lb_array[t2 + incr]; 1082 lb_array[t2 + incr] = lb_temp; 1083 bp_temp = bp_array[t2]; 1084 bp_array[t2] = bp_array[t2 + incr]; 1085 bp_array[t2 + incr] = bp_temp; 1086 t2 -= incr; 1087 } else 1088 break; 1089 } 1090 1091 /* 1092 * Check VXLOCK. Return 1 if the vnode is locked. Otherwise, vget it. 1093 */ 1094 int 1095 lfs_vref(vp) 1096 register struct vnode *vp; 1097 { 1098 1099 if (vp->v_flag & VXLOCK) 1100 return(1); 1101 return (vget(vp, 0)); 1102 } 1103 1104 void 1105 lfs_vunref(vp) 1106 register struct vnode *vp; 1107 { 1108 extern int lfs_no_inactive; 1109 1110 /* 1111 * This is vrele except that we do not want to VOP_INACTIVE 1112 * this vnode. Rather than inline vrele here, we use a global 1113 * flag to tell lfs_inactive not to run. Yes, its gross. 1114 */ 1115 lfs_no_inactive = 1; 1116 vrele(vp); 1117 lfs_no_inactive = 0; 1118 } 1119