1 /* vfs_cluster.c 4.30 82/05/22 */ 2 3 /* merged into kernel: @(#)bio.c 2.3 4/8/82 */ 4 5 #include "../h/param.h" 6 #include "../h/systm.h" 7 #include "../h/dir.h" 8 #include "../h/user.h" 9 #include "../h/buf.h" 10 #include "../h/conf.h" 11 #include "../h/proc.h" 12 #include "../h/seg.h" 13 #include "../h/pte.h" 14 #include "../h/vm.h" 15 #include "../h/trace.h" 16 17 /* 18 * The following several routines allocate and free 19 * buffers with various side effects. In general the 20 * arguments to an allocate routine are a device and 21 * a block number, and the value is a pointer to 22 * to the buffer header; the buffer is marked "busy" 23 * so that no one else can touch it. If the block was 24 * already in core, no I/O need be done; if it is 25 * already busy, the process waits until it becomes free. 26 * The following routines allocate a buffer: 27 * getblk 28 * bread 29 * breada 30 * baddr (if it is incore) 31 * Eventually the buffer must be released, possibly with the 32 * side effect of writing it out, by using one of 33 * bwrite 34 * bdwrite 35 * bawrite 36 * brelse 37 */ 38 39 struct buf bfreelist[BQUEUES]; 40 struct buf bswlist, *bclnlist; 41 42 #define BUFHSZ 63 43 #define RND (MAXBSIZE/DEV_BSIZE) 44 struct bufhd bufhash[BUFHSZ]; 45 #define BUFHASH(dev, dblkno) \ 46 ((struct buf *)&bufhash[((int)(dev)+(((int)(dblkno))/RND)) % BUFHSZ]) 47 48 /* 49 * Initialize hash links for buffers. 50 */ 51 bhinit() 52 { 53 register int i; 54 register struct bufhd *bp; 55 56 for (bp = bufhash, i = 0; i < BUFHSZ; i++, bp++) 57 bp->b_forw = bp->b_back = (struct buf *)bp; 58 } 59 60 /* #define DISKMON 1 */ 61 62 #ifdef DISKMON 63 struct { 64 int nbuf; 65 long nread; 66 long nreada; 67 long ncache; 68 long nwrite; 69 long bufcount[64]; 70 } io_info; 71 #endif 72 73 /* 74 75 #ifndef UNFAST 76 #define notavail(bp) \ 77 { \ 78 int x = spl6(); \ 79 (bp)->av_back->av_forw = (bp)->av_forw; \ 80 (bp)->av_forw->av_back = (bp)->av_back; \ 81 (bp)->b_flags |= B_BUSY; \ 82 splx(x); \ 83 } 84 #endif 85 86 /* 87 * Read in (if necessary) the block and return a buffer pointer. 88 */ 89 struct buf * 90 bread(dev, blkno, size) 91 dev_t dev; 92 daddr_t blkno; 93 int size; 94 { 95 register struct buf *bp; 96 97 bp = getblk(dev, blkno, size); 98 if (bp->b_flags&B_DONE) { 99 #ifdef TRACE 100 trace(TR_BREADHIT, dev, blkno); 101 #endif 102 #ifdef DISKMON 103 io_info.ncache++; 104 #endif 105 return(bp); 106 } 107 bp->b_flags |= B_READ; 108 (*bdevsw[major(dev)].d_strategy)(bp); 109 #ifdef TRACE 110 trace(TR_BREADMISS, dev, blkno); 111 #endif 112 #ifdef DISKMON 113 io_info.nread++; 114 #endif 115 u.u_vm.vm_inblk++; /* pay for read */ 116 iowait(bp); 117 return(bp); 118 } 119 120 /* 121 * Read in the block, like bread, but also start I/O on the 122 * read-ahead block (which is not allocated to the caller) 123 */ 124 struct buf * 125 breada(dev, blkno, rablkno, size) 126 dev_t dev; 127 daddr_t blkno, rablkno; 128 int size; 129 { 130 register struct buf *bp, *rabp; 131 132 bp = NULL; 133 if (!incore(dev, blkno)) { 134 bp = getblk(dev, blkno, size); 135 if ((bp->b_flags&B_DONE) == 0) { 136 bp->b_flags |= B_READ; 137 (*bdevsw[major(dev)].d_strategy)(bp); 138 #ifdef TRACE 139 trace(TR_BREADMISS, dev, blkno); 140 #endif 141 #ifdef DISKMON 142 io_info.nread++; 143 #endif 144 u.u_vm.vm_inblk++; /* pay for read */ 145 } 146 #ifdef TRACE 147 else 148 trace(TR_BREADHIT, dev, blkno); 149 #endif 150 } 151 if (rablkno && !incore(dev, rablkno)) { 152 rabp = getblk(dev, rablkno, size); 153 if (rabp->b_flags & B_DONE) { 154 brelse(rabp); 155 #ifdef TRACE 156 trace(TR_BREADHITRA, dev, blkno); 157 #endif 158 } else { 159 rabp->b_flags |= B_READ|B_ASYNC; 160 (*bdevsw[major(dev)].d_strategy)(rabp); 161 #ifdef TRACE 162 trace(TR_BREADMISSRA, dev, rablock); 163 #endif 164 #ifdef DISKMON 165 io_info.nreada++; 166 #endif 167 u.u_vm.vm_inblk++; /* pay in advance */ 168 } 169 } 170 if(bp == NULL) 171 return(bread(dev, blkno, size)); 172 iowait(bp); 173 return(bp); 174 } 175 176 /* 177 * Write the buffer, waiting for completion. 178 * Then release the buffer. 179 */ 180 bwrite(bp) 181 register struct buf *bp; 182 { 183 register flag; 184 185 flag = bp->b_flags; 186 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 187 #ifdef DISKMON 188 io_info.nwrite++; 189 #endif 190 if ((flag&B_DELWRI) == 0) 191 u.u_vm.vm_oublk++; /* noone paid yet */ 192 #ifdef TRACE 193 trace(TR_BWRITE, bp->b_dev, bp->b_blkno); 194 #endif 195 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 196 if ((flag&B_ASYNC) == 0) { 197 iowait(bp); 198 brelse(bp); 199 } else if (flag & B_DELWRI) 200 bp->b_flags |= B_AGE; 201 else 202 geterror(bp); 203 } 204 205 /* 206 * Release the buffer, marking it so that if it is grabbed 207 * for another purpose it will be written out before being 208 * given up (e.g. when writing a partial block where it is 209 * assumed that another write for the same block will soon follow). 210 * This can't be done for magtape, since writes must be done 211 * in the same order as requested. 212 */ 213 bdwrite(bp) 214 register struct buf *bp; 215 { 216 register int flags; 217 218 if ((bp->b_flags&B_DELWRI) == 0) 219 u.u_vm.vm_oublk++; /* noone paid yet */ 220 flags = bdevsw[major(bp->b_dev)].d_flags; 221 if(flags & B_TAPE) 222 bawrite(bp); 223 else { 224 bp->b_flags |= B_DELWRI | B_DONE; 225 brelse(bp); 226 } 227 } 228 229 /* 230 * Release the buffer, start I/O on it, but don't wait for completion. 231 */ 232 bawrite(bp) 233 register struct buf *bp; 234 { 235 236 bp->b_flags |= B_ASYNC; 237 bwrite(bp); 238 } 239 240 /* 241 * release the buffer, with no I/O implied. 242 */ 243 brelse(bp) 244 register struct buf *bp; 245 { 246 register struct buf *flist; 247 register s; 248 249 if (bp->b_flags&B_WANTED) 250 wakeup((caddr_t)bp); 251 if (bfreelist[0].b_flags&B_WANTED) { 252 bfreelist[0].b_flags &= ~B_WANTED; 253 wakeup((caddr_t)bfreelist); 254 } 255 if (bp->b_flags&B_ERROR) 256 if (bp->b_flags & B_LOCKED) 257 bp->b_flags &= ~B_ERROR; /* try again later */ 258 else 259 bp->b_dev = NODEV; /* no assoc */ 260 s = spl6(); 261 if (bp->b_flags & (B_ERROR|B_INVAL)) { 262 /* block has no info ... put at front of most free list */ 263 flist = &bfreelist[BQUEUES-1]; 264 flist->av_forw->av_back = bp; 265 bp->av_forw = flist->av_forw; 266 flist->av_forw = bp; 267 bp->av_back = flist; 268 } else { 269 if (bp->b_flags & B_LOCKED) 270 flist = &bfreelist[BQ_LOCKED]; 271 else if (bp->b_flags & B_AGE) 272 flist = &bfreelist[BQ_AGE]; 273 else 274 flist = &bfreelist[BQ_LRU]; 275 flist->av_back->av_forw = bp; 276 bp->av_back = flist->av_back; 277 flist->av_back = bp; 278 bp->av_forw = flist; 279 } 280 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 281 splx(s); 282 } 283 284 /* 285 * See if the block is associated with some buffer 286 * (mainly to avoid getting hung up on a wait in breada) 287 */ 288 incore(dev, blkno) 289 dev_t dev; 290 daddr_t blkno; 291 { 292 register struct buf *bp; 293 register struct buf *dp; 294 295 dp = BUFHASH(dev, blkno); 296 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 297 if (bp->b_blkno == blkno && bp->b_dev == dev && 298 !(bp->b_flags & B_INVAL)) 299 return (1); 300 return (0); 301 } 302 303 struct buf * 304 baddr(dev, blkno, size) 305 dev_t dev; 306 daddr_t blkno; 307 int size; 308 { 309 310 if (incore(dev, blkno)) 311 return (bread(dev, blkno, size)); 312 return (0); 313 } 314 315 /* 316 * Assign a buffer for the given block. If the appropriate 317 * block is already associated, return it; otherwise search 318 * for the oldest non-busy buffer and reassign it. 319 * 320 * We use splx here because this routine may be called 321 * on the interrupt stack during a dump, and we don't 322 * want to lower the ipl back to 0. 323 */ 324 struct buf * 325 getblk(dev, blkno, size) 326 dev_t dev; 327 daddr_t blkno; 328 int size; 329 { 330 register struct buf *bp, *dp, *ep; 331 #ifdef DISKMON 332 register int i; 333 #endif 334 int s; 335 336 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) 337 blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 338 dp = BUFHASH(dev, blkno); 339 loop: 340 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 341 if (bp->b_blkno != blkno || bp->b_dev != dev || 342 bp->b_flags&B_INVAL) 343 continue; 344 s = spl6(); 345 if (bp->b_flags&B_BUSY) { 346 bp->b_flags |= B_WANTED; 347 sleep((caddr_t)bp, PRIBIO+1); 348 splx(s); 349 goto loop; 350 } 351 splx(s); 352 #ifdef DISKMON 353 i = 0; 354 dp = bp->av_forw; 355 while ((dp->b_flags & B_HEAD) == 0) { 356 i++; 357 dp = dp->av_forw; 358 } 359 if (i<64) 360 io_info.bufcount[i]++; 361 #endif 362 notavail(bp); 363 brealloc(bp, size); 364 bp->b_flags |= B_CACHE; 365 return(bp); 366 } 367 if (major(dev) >= nblkdev) 368 panic("blkdev"); 369 s = spl6(); 370 for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--) 371 if (ep->av_forw != ep) 372 break; 373 if (ep == bfreelist) { /* no free blocks at all */ 374 ep->b_flags |= B_WANTED; 375 sleep((caddr_t)ep, PRIBIO+1); 376 splx(s); 377 goto loop; 378 } 379 splx(s); 380 bp = ep->av_forw; 381 notavail(bp); 382 if (bp->b_flags & B_DELWRI) { 383 bp->b_flags |= B_ASYNC; 384 bwrite(bp); 385 goto loop; 386 } 387 #ifdef TRACE 388 trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 389 #endif 390 bp->b_flags = B_BUSY; 391 bfree(bp); 392 bp->b_back->b_forw = bp->b_forw; 393 bp->b_forw->b_back = bp->b_back; 394 bp->b_forw = dp->b_forw; 395 bp->b_back = dp; 396 dp->b_forw->b_back = bp; 397 dp->b_forw = bp; 398 bp->b_dev = dev; 399 bp->b_blkno = blkno; 400 brealloc(bp, size); 401 return(bp); 402 } 403 404 /* 405 * get an empty block, 406 * not assigned to any particular device 407 */ 408 struct buf * 409 geteblk(size) 410 int size; 411 { 412 register struct buf *bp, *dp; 413 int s; 414 415 loop: 416 s = spl6(); 417 for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--) 418 if (dp->av_forw != dp) 419 break; 420 if (dp == bfreelist) { /* no free blocks */ 421 dp->b_flags |= B_WANTED; 422 sleep((caddr_t)dp, PRIBIO+1); 423 goto loop; 424 } 425 splx(s); 426 bp = dp->av_forw; 427 notavail(bp); 428 if (bp->b_flags & B_DELWRI) { 429 bp->b_flags |= B_ASYNC; 430 bwrite(bp); 431 goto loop; 432 } 433 #ifdef TRACE 434 trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 435 #endif 436 bp->b_flags = B_BUSY|B_INVAL; 437 bp->b_back->b_forw = bp->b_forw; 438 bp->b_forw->b_back = bp->b_back; 439 bp->b_forw = dp->b_forw; 440 bp->b_back = dp; 441 dp->b_forw->b_back = bp; 442 dp->b_forw = bp; 443 bp->b_dev = (dev_t)NODEV; 444 bp->b_bcount = size; 445 return(bp); 446 } 447 448 /* 449 * Allocate space associated with a buffer. 450 */ 451 brealloc(bp, size) 452 register struct buf *bp; 453 int size; 454 { 455 daddr_t start, last; 456 register struct buf *ep; 457 struct buf *dp; 458 int s; 459 460 /* 461 * First need to make sure that all overlaping previous I/O 462 * is dispatched with. 463 */ 464 if (size == bp->b_bcount) 465 return; 466 if (size < bp->b_bcount) { 467 bp->b_bcount = size; 468 return; 469 } 470 start = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); 471 last = bp->b_blkno + (size / DEV_BSIZE) - 1; 472 if (bp->b_bcount == 0) { 473 start++; 474 if (start == last) 475 goto allocit; 476 } 477 dp = BUFHASH(bp->b_dev, bp->b_blkno); 478 loop: 479 (void) spl0(); 480 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 481 if (ep->b_blkno < start || ep->b_blkno > last || 482 ep->b_dev != bp->b_dev || ep->b_flags&B_INVAL) 483 continue; 484 s = spl6(); 485 if (ep->b_flags&B_BUSY) { 486 ep->b_flags |= B_WANTED; 487 sleep((caddr_t)ep, PRIBIO+1); 488 splx(s); 489 goto loop; 490 } 491 (void) spl0(); 492 /* 493 * What we would really like to do is kill this 494 * I/O since it is now useless. We cannot do that 495 * so we force it to complete, so that it cannot 496 * over-write our useful data later. 497 */ 498 if (ep->b_flags & B_DELWRI) { 499 notavail(ep); 500 ep->b_flags |= B_ASYNC; 501 bwrite(ep); 502 goto loop; 503 } 504 } 505 allocit: 506 /* 507 * Here the buffer is already available, so all we 508 * need to do is set the size. Someday a better memory 509 * management scheme will be implemented. 510 */ 511 bp->b_bcount = size; 512 } 513 514 /* 515 * Release space associated with a buffer. 516 */ 517 bfree(bp) 518 struct buf *bp; 519 { 520 /* 521 * Here the buffer does not change, so all we 522 * need to do is set the size. Someday a better memory 523 * management scheme will be implemented. 524 */ 525 bp->b_bcount = 0; 526 } 527 528 /* 529 * Wait for I/O completion on the buffer; return errors 530 * to the user. 531 */ 532 iowait(bp) 533 register struct buf *bp; 534 { 535 int s; 536 537 s = spl6(); 538 while ((bp->b_flags&B_DONE)==0) 539 sleep((caddr_t)bp, PRIBIO); 540 splx(s); 541 geterror(bp); 542 } 543 544 #ifdef UNFAST 545 /* 546 * Unlink a buffer from the available list and mark it busy. 547 * (internal interface) 548 */ 549 notavail(bp) 550 register struct buf *bp; 551 { 552 register s; 553 554 s = spl6(); 555 bp->av_back->av_forw = bp->av_forw; 556 bp->av_forw->av_back = bp->av_back; 557 bp->b_flags |= B_BUSY; 558 splx(s); 559 } 560 #endif 561 562 /* 563 * Mark I/O complete on a buffer. If the header 564 * indicates a dirty page push completion, the 565 * header is inserted into the ``cleaned'' list 566 * to be processed by the pageout daemon. Otherwise 567 * release it if I/O is asynchronous, and wake 568 * up anyone waiting for it. 569 */ 570 iodone(bp) 571 register struct buf *bp; 572 { 573 register int s; 574 575 if (bp->b_flags & B_DONE) 576 panic("dup iodone"); 577 bp->b_flags |= B_DONE; 578 if (bp->b_flags & B_DIRTY) { 579 if (bp->b_flags & B_ERROR) 580 panic("IO err in push"); 581 s = spl6(); 582 bp->av_forw = bclnlist; 583 bp->b_bcount = swsize[bp - swbuf]; 584 bp->b_pfcent = swpf[bp - swbuf]; 585 cnt.v_pgout++; 586 cnt.v_pgpgout += bp->b_bcount / NBPG; 587 bclnlist = bp; 588 if (bswlist.b_flags & B_WANTED) 589 wakeup((caddr_t)&proc[2]); 590 splx(s); 591 return; 592 } 593 if (bp->b_flags&B_ASYNC) 594 brelse(bp); 595 else { 596 bp->b_flags &= ~B_WANTED; 597 wakeup((caddr_t)bp); 598 } 599 } 600 601 /* 602 * Zero the core associated with a buffer. 603 */ 604 clrbuf(bp) 605 struct buf *bp; 606 { 607 register int *p; 608 register int c; 609 610 p = bp->b_un.b_words; 611 c = bp->b_bcount/sizeof(int); 612 do 613 *p++ = 0; 614 while (--c); 615 bp->b_resid = 0; 616 } 617 618 /* 619 * make sure all write-behind blocks 620 * on dev (or NODEV for all) 621 * are flushed out. 622 * (from umount and update) 623 * (and temporarily pagein) 624 */ 625 bflush(dev) 626 dev_t dev; 627 { 628 register struct buf *bp; 629 register struct buf *flist; 630 int s; 631 632 loop: 633 s = spl6(); 634 for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++) 635 for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 636 if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) { 637 bp->b_flags |= B_ASYNC; 638 notavail(bp); 639 bwrite(bp); 640 goto loop; 641 } 642 } 643 splx(s); 644 } 645 646 /* 647 * Pick up the device's error number and pass it to the user; 648 * if there is an error but the number is 0 set a generalized 649 * code. Actually the latter is always true because devices 650 * don't yet return specific errors. 651 */ 652 geterror(bp) 653 register struct buf *bp; 654 { 655 656 if (bp->b_flags&B_ERROR) 657 if ((u.u_error = bp->b_error)==0) 658 u.u_error = EIO; 659 } 660 661 /* 662 * Invalidate in core blocks belonging to closed or umounted filesystem 663 * 664 * This is not nicely done at all - the buffer ought to be removed from the 665 * hash chains & have its dev/blkno fields clobbered, but unfortunately we 666 * can't do that here, as it is quite possible that the block is still 667 * being used for i/o. Eventually, all disc drivers should be forced to 668 * have a close routine, which ought ensure that the queue is empty, then 669 * properly flush the queues. Until that happy day, this suffices for 670 * correctness. ... kre 671 */ 672 binval(dev) 673 dev_t dev; 674 { 675 register struct buf *bp; 676 register struct bufhd *hp; 677 #define dp ((struct buf *)hp) 678 679 for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 680 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 681 if (bp->b_dev == dev) 682 bp->b_flags |= B_INVAL; 683 } 684