1 /* 2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)vfs_cluster.c 7.30 (Berkeley) 06/28/90 8 */ 9 10 #include "param.h" 11 #include "user.h" 12 #include "buf.h" 13 #include "vnode.h" 14 #include "specdev.h" 15 #include "mount.h" 16 #include "trace.h" 17 #include "ucred.h" 18 19 /* 20 * Read in (if necessary) the block and return a buffer pointer. 21 */ 22 bread(vp, blkno, size, cred, bpp) 23 struct vnode *vp; 24 daddr_t blkno; 25 int size; 26 struct ucred *cred; 27 struct buf **bpp; 28 { 29 register struct buf *bp; 30 31 if (size == 0) 32 panic("bread: size 0"); 33 *bpp = bp = getblk(vp, blkno, size); 34 if (bp->b_flags&(B_DONE|B_DELWRI)) { 35 trace(TR_BREADHIT, pack(vp, size), blkno); 36 return (0); 37 } 38 bp->b_flags |= B_READ; 39 if (bp->b_bcount > bp->b_bufsize) 40 panic("bread"); 41 if (bp->b_rcred == NOCRED && cred != NOCRED) { 42 crhold(cred); 43 bp->b_rcred = cred; 44 } 45 VOP_STRATEGY(bp); 46 trace(TR_BREADMISS, pack(vp, size), blkno); 47 u.u_ru.ru_inblock++; /* pay for read */ 48 return (biowait(bp)); 49 } 50 51 /* 52 * Read in the block, like bread, but also start I/O on the 53 * read-ahead block (which is not allocated to the caller) 54 */ 55 breada(vp, blkno, size, rablkno, rabsize, cred, bpp) 56 struct vnode *vp; 57 daddr_t blkno; int size; 58 daddr_t rablkno; int rabsize; 59 struct ucred *cred; 60 struct buf **bpp; 61 { 62 register struct buf *bp, *rabp; 63 64 bp = NULL; 65 /* 66 * If the block isn't in core, then allocate 67 * a buffer and initiate i/o (getblk checks 68 * for a cache hit). 69 */ 70 if (!incore(vp, blkno)) { 71 *bpp = bp = getblk(vp, blkno, size); 72 if ((bp->b_flags&(B_DONE|B_DELWRI)) == 0) { 73 bp->b_flags |= B_READ; 74 if (bp->b_bcount > bp->b_bufsize) 75 panic("breada"); 76 if (bp->b_rcred == NOCRED && cred != NOCRED) { 77 crhold(cred); 78 bp->b_rcred = cred; 79 } 80 VOP_STRATEGY(bp); 81 trace(TR_BREADMISS, pack(vp, size), blkno); 82 u.u_ru.ru_inblock++; /* pay for read */ 83 } else 84 trace(TR_BREADHIT, pack(vp, size), blkno); 85 } 86 87 /* 88 * If there's a read-ahead block, start i/o 89 * on it also (as above). 90 */ 91 if (!incore(vp, rablkno)) { 92 rabp = getblk(vp, rablkno, rabsize); 93 if (rabp->b_flags & (B_DONE|B_DELWRI)) { 94 brelse(rabp); 95 trace(TR_BREADHITRA, pack(vp, rabsize), rablkno); 96 } else { 97 rabp->b_flags |= B_READ|B_ASYNC; 98 if (rabp->b_bcount > rabp->b_bufsize) 99 panic("breadrabp"); 100 if (rabp->b_rcred == NOCRED && cred != NOCRED) { 101 crhold(cred); 102 rabp->b_rcred = cred; 103 } 104 VOP_STRATEGY(rabp); 105 trace(TR_BREADMISSRA, pack(vp, rabsize), rablkno); 106 u.u_ru.ru_inblock++; /* pay in advance */ 107 } 108 } 109 110 /* 111 * If block was in core, let bread get it. 112 * If block wasn't in core, then the read was started 113 * above, and just wait for it. 114 */ 115 if (bp == NULL) 116 return (bread(vp, blkno, size, cred, bpp)); 117 return (biowait(bp)); 118 } 119 120 /* 121 * Write the buffer, waiting for completion. 122 * Then release the buffer. 123 */ 124 bwrite(bp) 125 register struct buf *bp; 126 { 127 register int flag; 128 int s, error; 129 130 flag = bp->b_flags; 131 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 132 if ((flag&B_DELWRI) == 0) 133 u.u_ru.ru_oublock++; /* noone paid yet */ 134 else 135 reassignbuf(bp, bp->b_vp); 136 trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno); 137 if (bp->b_bcount > bp->b_bufsize) 138 panic("bwrite"); 139 s = splbio(); 140 bp->b_vp->v_numoutput++; 141 splx(s); 142 VOP_STRATEGY(bp); 143 144 /* 145 * If the write was synchronous, then await i/o completion. 146 * If the write was "delayed", then we put the buffer on 147 * the q of blocks awaiting i/o completion status. 148 */ 149 if ((flag&B_ASYNC) == 0) { 150 error = biowait(bp); 151 brelse(bp); 152 } else if (flag & B_DELWRI) { 153 bp->b_flags |= B_AGE; 154 error = 0; 155 } 156 return (error); 157 } 158 159 /* 160 * Release the buffer, marking it so that if it is grabbed 161 * for another purpose it will be written out before being 162 * given up (e.g. when writing a partial block where it is 163 * assumed that another write for the same block will soon follow). 164 * This can't be done for magtape, since writes must be done 165 * in the same order as requested. 166 */ 167 bdwrite(bp) 168 register struct buf *bp; 169 { 170 171 if ((bp->b_flags & B_DELWRI) == 0) { 172 bp->b_flags |= B_DELWRI; 173 reassignbuf(bp, bp->b_vp); 174 u.u_ru.ru_oublock++; /* noone paid yet */ 175 } 176 /* 177 * If this is a tape drive, the write must be initiated. 178 */ 179 if (VOP_IOCTL(bp->b_vp, 0, B_TAPE, 0, NOCRED) == 0) { 180 bawrite(bp); 181 } else { 182 bp->b_flags |= B_DELWRI | B_DONE; 183 brelse(bp); 184 } 185 } 186 187 /* 188 * Release the buffer, start I/O on it, but don't wait for completion. 189 */ 190 bawrite(bp) 191 register struct buf *bp; 192 { 193 194 bp->b_flags |= B_ASYNC; 195 (void) bwrite(bp); 196 } 197 198 /* 199 * Release the buffer, with no I/O implied. 200 */ 201 brelse(bp) 202 register struct buf *bp; 203 { 204 register struct buf *flist; 205 register s; 206 207 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 208 /* 209 * If a process is waiting for the buffer, or 210 * is waiting for a free buffer, awaken it. 211 */ 212 if (bp->b_flags&B_WANTED) 213 wakeup((caddr_t)bp); 214 if (bfreelist[0].b_flags&B_WANTED) { 215 bfreelist[0].b_flags &= ~B_WANTED; 216 wakeup((caddr_t)bfreelist); 217 } 218 /* 219 * Retry I/O for locked buffers rather than invalidating them. 220 */ 221 if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED)) 222 bp->b_flags &= ~B_ERROR; 223 224 /* 225 * Disassociate buffers that are no longer valid. 226 */ 227 if (bp->b_flags & (B_NOCACHE|B_ERROR)) 228 bp->b_flags |= B_INVAL; 229 if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR|B_INVAL))) { 230 if (bp->b_vp) 231 brelvp(bp); 232 bp->b_flags &= ~B_DELWRI; 233 } 234 /* 235 * Stick the buffer back on a free list. 236 */ 237 s = splbio(); 238 if (bp->b_bufsize <= 0) { 239 /* block has no buffer ... put at front of unused buffer list */ 240 flist = &bfreelist[BQ_EMPTY]; 241 binsheadfree(bp, flist); 242 } else if (bp->b_flags & (B_ERROR|B_INVAL)) { 243 /* block has no info ... put at front of most free list */ 244 flist = &bfreelist[BQ_AGE]; 245 binsheadfree(bp, flist); 246 } else { 247 if (bp->b_flags & B_LOCKED) 248 flist = &bfreelist[BQ_LOCKED]; 249 else if (bp->b_flags & B_AGE) 250 flist = &bfreelist[BQ_AGE]; 251 else 252 flist = &bfreelist[BQ_LRU]; 253 binstailfree(bp, flist); 254 } 255 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE|B_NOCACHE); 256 splx(s); 257 } 258 259 /* 260 * See if the block is associated with some buffer 261 * (mainly to avoid getting hung up on a wait in breada) 262 */ 263 incore(vp, blkno) 264 struct vnode *vp; 265 daddr_t blkno; 266 { 267 register struct buf *bp; 268 register struct buf *dp; 269 270 dp = BUFHASH(vp, blkno); 271 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 272 if (bp->b_lblkno == blkno && bp->b_vp == vp && 273 (bp->b_flags & B_INVAL) == 0) 274 return (1); 275 return (0); 276 } 277 278 /* 279 * Return a block if it is in memory. 280 */ 281 baddr(vp, blkno, size, cred, bpp) 282 struct vnode *vp; 283 daddr_t blkno; 284 int size; 285 struct ucred *cred; 286 struct buf **bpp; 287 { 288 289 if (incore(vp, blkno)) 290 return (bread(vp, blkno, size, cred, bpp)); 291 *bpp = 0; 292 return (0); 293 } 294 295 /* 296 * Assign a buffer for the given block. If the appropriate 297 * block is already associated, return it; otherwise search 298 * for the oldest non-busy buffer and reassign it. 299 * 300 * We use splx here because this routine may be called 301 * on the interrupt stack during a dump, and we don't 302 * want to lower the ipl back to 0. 303 */ 304 struct buf * 305 getblk(vp, blkno, size) 306 register struct vnode *vp; 307 daddr_t blkno; 308 int size; 309 { 310 register struct buf *bp, *dp; 311 int s; 312 313 if (size > MAXBSIZE) 314 panic("getblk: size too big"); 315 /* 316 * Search the cache for the block. If we hit, but 317 * the buffer is in use for i/o, then we wait until 318 * the i/o has completed. 319 */ 320 dp = BUFHASH(vp, blkno); 321 loop: 322 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 323 if (bp->b_lblkno != blkno || bp->b_vp != vp || 324 bp->b_flags&B_INVAL) 325 continue; 326 s = splbio(); 327 if (bp->b_flags&B_BUSY) { 328 bp->b_flags |= B_WANTED; 329 sleep((caddr_t)bp, PRIBIO+1); 330 splx(s); 331 goto loop; 332 } 333 bremfree(bp); 334 bp->b_flags |= B_BUSY; 335 splx(s); 336 if (bp->b_bcount != size) { 337 printf("getblk: stray size"); 338 bp->b_flags |= B_INVAL; 339 bwrite(bp); 340 goto loop; 341 } 342 bp->b_flags |= B_CACHE; 343 return (bp); 344 } 345 bp = getnewbuf(); 346 bfree(bp); 347 bremhash(bp); 348 bgetvp(vp, bp); 349 bp->b_lblkno = blkno; 350 bp->b_blkno = blkno; 351 bp->b_error = 0; 352 bp->b_resid = 0; 353 binshash(bp, dp); 354 brealloc(bp, size); 355 return (bp); 356 } 357 358 /* 359 * get an empty block, 360 * not assigned to any particular device 361 */ 362 struct buf * 363 geteblk(size) 364 int size; 365 { 366 register struct buf *bp, *flist; 367 368 if (size > MAXBSIZE) 369 panic("geteblk: size too big"); 370 bp = getnewbuf(); 371 bp->b_flags |= B_INVAL; 372 bfree(bp); 373 bremhash(bp); 374 flist = &bfreelist[BQ_AGE]; 375 bp->b_error = 0; 376 bp->b_resid = 0; 377 binshash(bp, flist); 378 brealloc(bp, size); 379 return (bp); 380 } 381 382 /* 383 * Allocate space associated with a buffer. 384 */ 385 brealloc(bp, size) 386 register struct buf *bp; 387 int size; 388 { 389 daddr_t start, last; 390 register struct buf *ep; 391 struct buf *dp; 392 int s; 393 394 if (size == bp->b_bcount) 395 return; 396 allocbuf(bp, size); 397 } 398 399 /* 400 * Find a buffer which is available for use. 401 * Select something from a free list. 402 * Preference is to AGE list, then LRU list. 403 */ 404 struct buf * 405 getnewbuf() 406 { 407 register struct buf *bp, *dp; 408 register struct ucred *cred; 409 int s; 410 411 loop: 412 s = splbio(); 413 for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) 414 if (dp->av_forw != dp) 415 break; 416 if (dp == bfreelist) { /* no free blocks */ 417 dp->b_flags |= B_WANTED; 418 sleep((caddr_t)dp, PRIBIO+1); 419 splx(s); 420 goto loop; 421 } 422 bp = dp->av_forw; 423 bremfree(bp); 424 bp->b_flags |= B_BUSY; 425 splx(s); 426 if (bp->b_flags & B_DELWRI) { 427 (void) bawrite(bp); 428 goto loop; 429 } 430 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 431 if (bp->b_vp) 432 brelvp(bp); 433 if (bp->b_rcred != NOCRED) { 434 cred = bp->b_rcred; 435 bp->b_rcred = NOCRED; 436 crfree(cred); 437 } 438 if (bp->b_wcred != NOCRED) { 439 cred = bp->b_wcred; 440 bp->b_wcred = NOCRED; 441 crfree(cred); 442 } 443 bp->b_flags = B_BUSY; 444 return (bp); 445 } 446 447 /* 448 * Wait for I/O completion on the buffer; return errors 449 * to the user. 450 */ 451 biowait(bp) 452 register struct buf *bp; 453 { 454 int s; 455 456 s = splbio(); 457 while ((bp->b_flags & B_DONE) == 0) 458 sleep((caddr_t)bp, PRIBIO); 459 splx(s); 460 /* 461 * Pick up the device's error number and pass it to the user; 462 * if there is an error but the number is 0 set a generalized code. 463 */ 464 if ((bp->b_flags & B_ERROR) == 0) 465 return (0); 466 if (bp->b_error) 467 return (bp->b_error); 468 return (EIO); 469 } 470 471 /* 472 * Mark I/O complete on a buffer. 473 * If someone should be called, e.g. the pageout 474 * daemon, do so. Otherwise, wake up anyone 475 * waiting for it. 476 */ 477 biodone(bp) 478 register struct buf *bp; 479 { 480 register struct vnode *vp; 481 482 if (bp->b_flags & B_DONE) 483 panic("dup biodone"); 484 bp->b_flags |= B_DONE; 485 if ((bp->b_flags & B_READ) == 0) { 486 bp->b_dirtyoff = bp->b_dirtyend = 0; 487 if (vp = bp->b_vp) { 488 vp->v_numoutput--; 489 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 490 if (vp->v_numoutput < 0) 491 panic("biodone: neg numoutput"); 492 vp->v_flag &= ~VBWAIT; 493 wakeup((caddr_t)&vp->v_numoutput); 494 } 495 } 496 } 497 if (bp->b_flags & B_CALL) { 498 bp->b_flags &= ~B_CALL; 499 (*bp->b_iodone)(bp); 500 return; 501 } 502 if (bp->b_flags&B_ASYNC) 503 brelse(bp); 504 else { 505 bp->b_flags &= ~B_WANTED; 506 wakeup((caddr_t)bp); 507 } 508 } 509 510 /* 511 * Make sure all write-behind blocks associated 512 * with mount point are flushed out (from sync). 513 */ 514 mntflushbuf(mountp, flags) 515 struct mount *mountp; 516 int flags; 517 { 518 register struct vnode *vp; 519 520 if ((mountp->mnt_flag & MNT_MPBUSY) == 0) 521 panic("mntflushbuf: not busy"); 522 loop: 523 for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) { 524 if (vget(vp)) 525 goto loop; 526 vflushbuf(vp, flags); 527 vput(vp); 528 if (vp->v_mount != mountp) 529 goto loop; 530 } 531 } 532 533 /* 534 * Flush all dirty buffers associated with a vnode. 535 */ 536 vflushbuf(vp, flags) 537 register struct vnode *vp; 538 int flags; 539 { 540 register struct buf *bp; 541 struct buf *nbp; 542 int s; 543 544 loop: 545 s = splbio(); 546 for (bp = vp->v_dirtyblkhd; bp; bp = nbp) { 547 nbp = bp->b_blockf; 548 if ((bp->b_flags & B_BUSY)) 549 continue; 550 if ((bp->b_flags & B_DELWRI) == 0) 551 panic("vflushbuf: not dirty"); 552 bremfree(bp); 553 bp->b_flags |= B_BUSY; 554 splx(s); 555 /* 556 * Wait for I/O associated with indirect blocks to complete, 557 * since there is no way to quickly wait for them below. 558 * NB - This is really specific to ufs, but is done here 559 * as it is easier and quicker. 560 */ 561 if (bp->b_vp == vp || (flags & B_SYNC) == 0) { 562 (void) bawrite(bp); 563 s = splbio(); 564 } else { 565 (void) bwrite(bp); 566 goto loop; 567 } 568 } 569 splx(s); 570 if ((flags & B_SYNC) == 0) 571 return; 572 s = splbio(); 573 while (vp->v_numoutput) { 574 vp->v_flag |= VBWAIT; 575 sleep((caddr_t)&vp->v_numoutput, PRIBIO+1); 576 } 577 splx(s); 578 if (vp->v_dirtyblkhd) { 579 vprint("vflushbuf: dirty", vp); 580 goto loop; 581 } 582 } 583 584 /* 585 * Invalidate in core blocks belonging to closed or umounted filesystem 586 * 587 * Go through the list of vnodes associated with the file system; 588 * for each vnode invalidate any buffers that it holds. Normally 589 * this routine is preceeded by a bflush call, so that on a quiescent 590 * filesystem there will be no dirty buffers when we are done. Binval 591 * returns the count of dirty buffers when it is finished. 592 */ 593 mntinvalbuf(mountp) 594 struct mount *mountp; 595 { 596 register struct vnode *vp; 597 int dirty = 0; 598 599 if ((mountp->mnt_flag & MNT_MPBUSY) == 0) 600 panic("mntinvalbuf: not busy"); 601 loop: 602 for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) { 603 if (vget(vp)) 604 goto loop; 605 dirty += vinvalbuf(vp, 1); 606 vput(vp); 607 if (vp->v_mount != mountp) 608 goto loop; 609 } 610 return (dirty); 611 } 612 613 /* 614 * Flush out and invalidate all buffers associated with a vnode. 615 * Called with the underlying object locked. 616 */ 617 vinvalbuf(vp, save) 618 register struct vnode *vp; 619 int save; 620 { 621 register struct buf *bp; 622 struct buf *nbp, *blist; 623 int s, dirty = 0; 624 625 for (;;) { 626 if (blist = vp->v_dirtyblkhd) 627 /* void */; 628 else if (blist = vp->v_cleanblkhd) 629 /* void */; 630 else 631 break; 632 for (bp = blist; bp; bp = nbp) { 633 nbp = bp->b_blockf; 634 s = splbio(); 635 if (bp->b_flags & B_BUSY) { 636 bp->b_flags |= B_WANTED; 637 sleep((caddr_t)bp, PRIBIO+1); 638 splx(s); 639 break; 640 } 641 bremfree(bp); 642 bp->b_flags |= B_BUSY; 643 splx(s); 644 if (save && (bp->b_flags & B_DELWRI)) { 645 dirty++; 646 (void) bwrite(bp); 647 break; 648 } 649 if (bp->b_vp != vp) 650 reassignbuf(bp, bp->b_vp); 651 else 652 bp->b_flags |= B_INVAL; 653 brelse(bp); 654 } 655 } 656 if (vp->v_dirtyblkhd || vp->v_cleanblkhd) 657 panic("vinvalbuf: flush failed"); 658 return (dirty); 659 } 660 661 /* 662 * Associate a buffer with a vnode. 663 */ 664 bgetvp(vp, bp) 665 register struct vnode *vp; 666 register struct buf *bp; 667 { 668 669 if (bp->b_vp) 670 panic("bgetvp: not free"); 671 VHOLD(vp); 672 bp->b_vp = vp; 673 if (vp->v_type == VBLK || vp->v_type == VCHR) 674 bp->b_dev = vp->v_rdev; 675 else 676 bp->b_dev = NODEV; 677 /* 678 * Insert onto list for new vnode. 679 */ 680 if (vp->v_cleanblkhd) { 681 bp->b_blockf = vp->v_cleanblkhd; 682 bp->b_blockb = &vp->v_cleanblkhd; 683 vp->v_cleanblkhd->b_blockb = &bp->b_blockf; 684 vp->v_cleanblkhd = bp; 685 } else { 686 vp->v_cleanblkhd = bp; 687 bp->b_blockb = &vp->v_cleanblkhd; 688 bp->b_blockf = NULL; 689 } 690 } 691 692 /* 693 * Disassociate a buffer from a vnode. 694 */ 695 brelvp(bp) 696 register struct buf *bp; 697 { 698 struct buf *bq; 699 struct vnode *vp; 700 701 if (bp->b_vp == (struct vnode *) 0) 702 panic("brelvp: NULL"); 703 /* 704 * Delete from old vnode list, if on one. 705 */ 706 if (bp->b_blockb) { 707 if (bq = bp->b_blockf) 708 bq->b_blockb = bp->b_blockb; 709 *bp->b_blockb = bq; 710 bp->b_blockf = NULL; 711 bp->b_blockb = NULL; 712 } 713 vp = bp->b_vp; 714 bp->b_vp = (struct vnode *) 0; 715 HOLDRELE(vp); 716 } 717 718 /* 719 * Reassign a buffer from one vnode to another. 720 * Used to assign file specific control information 721 * (indirect blocks) to the vnode to which they belong. 722 */ 723 reassignbuf(bp, newvp) 724 register struct buf *bp; 725 register struct vnode *newvp; 726 { 727 register struct buf *bq, **listheadp; 728 729 if (newvp == NULL) 730 panic("reassignbuf: NULL"); 731 /* 732 * Delete from old vnode list, if on one. 733 */ 734 if (bp->b_blockb) { 735 if (bq = bp->b_blockf) 736 bq->b_blockb = bp->b_blockb; 737 *bp->b_blockb = bq; 738 } 739 /* 740 * If dirty, put on list of dirty buffers; 741 * otherwise insert onto list of clean buffers. 742 */ 743 if (bp->b_flags & B_DELWRI) 744 listheadp = &newvp->v_dirtyblkhd; 745 else 746 listheadp = &newvp->v_cleanblkhd; 747 if (*listheadp) { 748 bp->b_blockf = *listheadp; 749 bp->b_blockb = listheadp; 750 bp->b_blockf->b_blockb = &bp->b_blockf; 751 *listheadp = bp; 752 } else { 753 *listheadp = bp; 754 bp->b_blockb = listheadp; 755 bp->b_blockf = NULL; 756 } 757 } 758