1 /* vfs_cluster.c 4.47 83/06/14 */ 2 3 #include "../machine/pte.h" 4 5 #include "../h/param.h" 6 #include "../h/systm.h" 7 #include "../h/dir.h" 8 #include "../h/user.h" 9 #include "../h/buf.h" 10 #include "../h/conf.h" 11 #include "../h/proc.h" 12 #include "../h/seg.h" 13 #include "../h/vm.h" 14 #include "../h/trace.h" 15 16 /* 17 * Read in (if necessary) the block and return a buffer pointer. 18 */ 19 struct buf * 20 bread(dev, blkno, size) 21 dev_t dev; 22 daddr_t blkno; 23 int size; 24 { 25 register struct buf *bp; 26 27 if (size == 0) 28 panic("bread: size 0"); 29 bp = getblk(dev, blkno, size); 30 if (bp->b_flags&B_DONE) { 31 trace(TR_BREADHIT, dev, blkno); 32 return(bp); 33 } 34 bp->b_flags |= B_READ; 35 if (bp->b_bcount > bp->b_bufsize) 36 panic("bread"); 37 (*bdevsw[major(dev)].d_strategy)(bp); 38 trace(TR_BREADMISS, dev, blkno); 39 u.u_ru.ru_inblock++; /* pay for read */ 40 biowait(bp); 41 return(bp); 42 } 43 44 /* 45 * Read in the block, like bread, but also start I/O on the 46 * read-ahead block (which is not allocated to the caller) 47 */ 48 struct buf * 49 breada(dev, blkno, size, rablkno, rabsize) 50 dev_t dev; 51 daddr_t blkno; int size; 52 daddr_t rablkno; int rabsize; 53 { 54 register struct buf *bp, *rabp; 55 56 bp = NULL; 57 /* 58 * If the block isn't in core, then allocate 59 * a buffer and initiate i/o (getblk checks 60 * for a cache hit). 61 */ 62 if (!incore(dev, blkno)) { 63 bp = getblk(dev, blkno, size); 64 if ((bp->b_flags&B_DONE) == 0) { 65 bp->b_flags |= B_READ; 66 if (bp->b_bcount > bp->b_bufsize) 67 panic("breada"); 68 (*bdevsw[major(dev)].d_strategy)(bp); 69 trace(TR_BREADMISS, dev, blkno); 70 u.u_ru.ru_inblock++; /* pay for read */ 71 } else 72 trace(TR_BREADHIT, dev, blkno); 73 } 74 75 /* 76 * If there's a read-ahead block, start i/o 77 * on it also (as above). 78 */ 79 if (rablkno && !incore(dev, rablkno)) { 80 rabp = getblk(dev, rablkno, rabsize); 81 if (rabp->b_flags & B_DONE) { 82 brelse(rabp); 83 trace(TR_BREADHITRA, dev, blkno); 84 } else { 85 rabp->b_flags |= B_READ|B_ASYNC; 86 if (rabp->b_bcount > rabp->b_bufsize) 87 panic("breadrabp"); 88 (*bdevsw[major(dev)].d_strategy)(rabp); 89 trace(TR_BREADMISSRA, dev, rablock); 90 u.u_ru.ru_inblock++; /* pay in advance */ 91 } 92 } 93 94 /* 95 * If block was in core, let bread get it. 96 * If block wasn't in core, then the read was started 97 * above, and just wait for it. 98 */ 99 if (bp == NULL) 100 return (bread(dev, blkno, size)); 101 biowait(bp); 102 return (bp); 103 } 104 105 /* 106 * Write the buffer, waiting for completion. 107 * Then release the buffer. 108 */ 109 bwrite(bp) 110 register struct buf *bp; 111 { 112 register flag; 113 114 flag = bp->b_flags; 115 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 116 if ((flag&B_DELWRI) == 0) 117 u.u_ru.ru_oublock++; /* noone paid yet */ 118 trace(TR_BWRITE, bp->b_dev, bp->b_blkno); 119 if (bp->b_bcount > bp->b_bufsize) 120 panic("bwrite"); 121 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 122 123 /* 124 * If the write was synchronous, then await i/o completion. 125 * If the write was "delayed", then we put the buffer on 126 * the q of blocks awaiting i/o completion status. 127 */ 128 if ((flag&B_ASYNC) == 0) { 129 biowait(bp); 130 brelse(bp); 131 } else if (flag & B_DELWRI) 132 bp->b_flags |= B_AGE; 133 } 134 135 /* 136 * Release the buffer, marking it so that if it is grabbed 137 * for another purpose it will be written out before being 138 * given up (e.g. when writing a partial block where it is 139 * assumed that another write for the same block will soon follow). 140 * This can't be done for magtape, since writes must be done 141 * in the same order as requested. 142 */ 143 bdwrite(bp) 144 register struct buf *bp; 145 { 146 register int flags; 147 148 if ((bp->b_flags&B_DELWRI) == 0) 149 u.u_ru.ru_oublock++; /* noone paid yet */ 150 flags = bdevsw[major(bp->b_dev)].d_flags; 151 if(flags & B_TAPE) 152 bawrite(bp); 153 else { 154 bp->b_flags |= B_DELWRI | B_DONE; 155 brelse(bp); 156 } 157 } 158 159 /* 160 * Release the buffer, start I/O on it, but don't wait for completion. 161 */ 162 bawrite(bp) 163 register struct buf *bp; 164 { 165 166 bp->b_flags |= B_ASYNC; 167 bwrite(bp); 168 } 169 170 /* 171 * Release the buffer, with no I/O implied. 172 */ 173 brelse(bp) 174 register struct buf *bp; 175 { 176 register struct buf *flist; 177 register s; 178 179 /* 180 * If someone's waiting for the buffer, or 181 * is waiting for a buffer wake 'em up. 182 */ 183 if (bp->b_flags&B_WANTED) 184 wakeup((caddr_t)bp); 185 if (bfreelist[0].b_flags&B_WANTED) { 186 bfreelist[0].b_flags &= ~B_WANTED; 187 wakeup((caddr_t)bfreelist); 188 } 189 if (bp->b_flags&B_ERROR) 190 if (bp->b_flags & B_LOCKED) 191 bp->b_flags &= ~B_ERROR; /* try again later */ 192 else 193 bp->b_dev = NODEV; /* no assoc */ 194 195 /* 196 * Stick the buffer back on a free list. 197 */ 198 s = spl6(); 199 if (bp->b_bufsize <= 0) { 200 /* block has no buffer ... put at front of unused buffer list */ 201 flist = &bfreelist[BQ_EMPTY]; 202 binsheadfree(bp, flist); 203 } else if (bp->b_flags & (B_ERROR|B_INVAL)) { 204 /* block has no info ... put at front of most free list */ 205 flist = &bfreelist[BQ_AGE]; 206 binsheadfree(bp, flist); 207 } else { 208 if (bp->b_flags & B_LOCKED) 209 flist = &bfreelist[BQ_LOCKED]; 210 else if (bp->b_flags & B_AGE) 211 flist = &bfreelist[BQ_AGE]; 212 else 213 flist = &bfreelist[BQ_LRU]; 214 binstailfree(bp, flist); 215 } 216 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 217 splx(s); 218 } 219 220 /* 221 * See if the block is associated with some buffer 222 * (mainly to avoid getting hung up on a wait in breada) 223 */ 224 incore(dev, blkno) 225 dev_t dev; 226 daddr_t blkno; 227 { 228 register struct buf *bp; 229 register struct buf *dp; 230 231 dp = BUFHASH(dev, blkno); 232 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 233 if (bp->b_blkno == blkno && bp->b_dev == dev && 234 (bp->b_flags & B_INVAL) == 0) 235 return (1); 236 return (0); 237 } 238 239 struct buf * 240 baddr(dev, blkno, size) 241 dev_t dev; 242 daddr_t blkno; 243 int size; 244 { 245 246 if (incore(dev, blkno)) 247 return (bread(dev, blkno, size)); 248 return (0); 249 } 250 251 /* 252 * Assign a buffer for the given block. If the appropriate 253 * block is already associated, return it; otherwise search 254 * for the oldest non-busy buffer and reassign it. 255 * 256 * We use splx here because this routine may be called 257 * on the interrupt stack during a dump, and we don't 258 * want to lower the ipl back to 0. 259 */ 260 struct buf * 261 getblk(dev, blkno, size) 262 dev_t dev; 263 daddr_t blkno; 264 int size; 265 { 266 register struct buf *bp, *dp; 267 int s; 268 269 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) /* XXX */ 270 blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 271 /* 272 * Search the cache for the block. If we hit, but 273 * the buffer is in use for i/o, then we wait until 274 * the i/o has completed. 275 */ 276 dp = BUFHASH(dev, blkno); 277 loop: 278 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 279 if (bp->b_blkno != blkno || bp->b_dev != dev || 280 bp->b_flags&B_INVAL) 281 continue; 282 s = spl6(); 283 if (bp->b_flags&B_BUSY) { 284 bp->b_flags |= B_WANTED; 285 sleep((caddr_t)bp, PRIBIO+1); 286 splx(s); 287 goto loop; 288 } 289 splx(s); 290 notavail(bp); 291 if (brealloc(bp, size) == 0) 292 goto loop; 293 bp->b_flags |= B_CACHE; 294 return(bp); 295 } 296 if (major(dev) >= nblkdev) 297 panic("blkdev"); 298 bp = getnewbuf(); 299 bfree(bp); 300 bremhash(bp); 301 binshash(bp, dp); 302 bp->b_dev = dev; 303 bp->b_blkno = blkno; 304 bp->b_error = 0; 305 if (brealloc(bp, size) == 0) 306 goto loop; 307 return(bp); 308 } 309 310 /* 311 * get an empty block, 312 * not assigned to any particular device 313 */ 314 struct buf * 315 geteblk(size) 316 int size; 317 { 318 register struct buf *bp, *flist; 319 320 loop: 321 bp = getnewbuf(); 322 bp->b_flags |= B_INVAL; 323 bfree(bp); 324 bremhash(bp); 325 flist = &bfreelist[BQ_AGE]; 326 binshash(bp, flist); 327 bp->b_dev = (dev_t)NODEV; 328 bp->b_error = 0; 329 if (brealloc(bp, size) == 0) 330 goto loop; 331 return(bp); 332 } 333 334 /* 335 * Allocate space associated with a buffer. 336 * If can't get space, buffer is released 337 */ 338 brealloc(bp, size) 339 register struct buf *bp; 340 int size; 341 { 342 daddr_t start, last; 343 register struct buf *ep; 344 struct buf *dp; 345 int s; 346 347 /* 348 * First need to make sure that all overlaping previous I/O 349 * is dispatched with. 350 */ 351 if (size == bp->b_bcount) 352 return (1); 353 if (size < bp->b_bcount) { 354 if (bp->b_flags & B_DELWRI) { 355 bwrite(bp); 356 return (0); 357 } 358 if (bp->b_flags & B_LOCKED) 359 panic("brealloc"); 360 return (allocbuf(bp, size)); 361 } 362 bp->b_flags &= ~B_DONE; 363 if (bp->b_dev == NODEV) 364 return (allocbuf(bp, size)); 365 366 /* 367 * Search cache for any buffers that overlap the one that we 368 * are trying to allocate. Overlapping buffers must be marked 369 * invalid, after being written out if they are dirty. (indicated 370 * by B_DELWRI) A disk block must be mapped by at most one buffer 371 * at any point in time. Care must be taken to avoid deadlocking 372 * when two buffer are trying to get the same set of disk blocks. 373 */ 374 start = bp->b_blkno; 375 last = start + btodb(size) - 1; 376 dp = BUFHASH(bp->b_dev, bp->b_blkno); 377 loop: 378 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 379 if (ep == bp || ep->b_dev != bp->b_dev || (ep->b_flags&B_INVAL)) 380 continue; 381 /* look for overlap */ 382 if (ep->b_bcount == 0 || ep->b_blkno > last || 383 ep->b_blkno + btodb(ep->b_bcount) <= start) 384 continue; 385 s = spl6(); 386 if (ep->b_flags&B_BUSY) { 387 ep->b_flags |= B_WANTED; 388 sleep((caddr_t)ep, PRIBIO+1); 389 splx(s); 390 goto loop; 391 } 392 splx(s); 393 notavail(ep); 394 if (ep->b_flags & B_DELWRI) { 395 bwrite(ep); 396 goto loop; 397 } 398 ep->b_flags |= B_INVAL; 399 brelse(ep); 400 } 401 return (allocbuf(bp, size)); 402 } 403 404 /* 405 * Find a buffer which is available for use. 406 * Select something from a free list. 407 * Preference is to AGE list, then LRU list. 408 */ 409 struct buf * 410 getnewbuf() 411 { 412 register struct buf *bp, *dp; 413 int s; 414 415 loop: 416 s = spl6(); 417 #ifndef sun 418 for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) 419 #else 420 for (dp = &bfreelist[BQ_EMPTY]; dp > bfreelist; dp--) 421 #endif 422 if (dp->av_forw != dp) 423 break; 424 if (dp == bfreelist) { /* no free blocks */ 425 dp->b_flags |= B_WANTED; 426 sleep((caddr_t)dp, PRIBIO+1); 427 splx(s); 428 goto loop; 429 } 430 splx(s); 431 bp = dp->av_forw; 432 notavail(bp); 433 if (bp->b_flags & B_DELWRI) { 434 bp->b_flags |= B_ASYNC; 435 bwrite(bp); 436 goto loop; 437 } 438 trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 439 bp->b_flags = B_BUSY; 440 return (bp); 441 } 442 443 /* 444 * Wait for I/O completion on the buffer; return errors 445 * to the user. 446 */ 447 biowait(bp) 448 register struct buf *bp; 449 { 450 int s; 451 452 s = spl6(); 453 while ((bp->b_flags&B_DONE)==0) 454 sleep((caddr_t)bp, PRIBIO); 455 splx(s); 456 if (u.u_error == 0) /* XXX */ 457 u.u_error = geterror(bp); 458 } 459 460 /* 461 * Mark I/O complete on a buffer. 462 * If someone should be called, e.g. the pageout 463 * daemon, do so. Otherwise, wake up anyone 464 * waiting for it. 465 */ 466 biodone(bp) 467 register struct buf *bp; 468 { 469 470 if (bp->b_flags & B_DONE) 471 panic("dup biodone"); 472 bp->b_flags |= B_DONE; 473 if (bp->b_flags & B_CALL) { 474 bp->b_flags &= ~B_CALL; 475 (*bp->b_iodone)(bp); 476 return; 477 } 478 if (bp->b_flags&B_ASYNC) 479 brelse(bp); 480 else { 481 bp->b_flags &= ~B_WANTED; 482 wakeup((caddr_t)bp); 483 } 484 } 485 486 /* 487 * Insure that no part of a specified block is in an incore buffer. 488 */ 489 blkflush(dev, blkno, size) 490 dev_t dev; 491 daddr_t blkno; 492 long size; 493 { 494 register struct buf *ep; 495 struct buf *dp; 496 daddr_t start, last; 497 int s; 498 499 start = blkno; 500 last = start + btodb(size) - 1; 501 dp = BUFHASH(dev, blkno); 502 loop: 503 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 504 if (ep->b_dev != dev || (ep->b_flags&B_INVAL)) 505 continue; 506 /* look for overlap */ 507 if (ep->b_bcount == 0 || ep->b_blkno > last || 508 ep->b_blkno + btodb(ep->b_bcount) <= start) 509 continue; 510 s = spl6(); 511 if (ep->b_flags&B_BUSY) { 512 ep->b_flags |= B_WANTED; 513 sleep((caddr_t)ep, PRIBIO+1); 514 splx(s); 515 goto loop; 516 } 517 if (ep->b_flags & B_DELWRI) { 518 splx(s); 519 notavail(ep); 520 bwrite(ep); 521 goto loop; 522 } 523 splx(s); 524 } 525 } 526 527 /* 528 * Make sure all write-behind blocks 529 * on dev (or NODEV for all) 530 * are flushed out. 531 * (from umount and update) 532 */ 533 bflush(dev) 534 dev_t dev; 535 { 536 register struct buf *bp; 537 register struct buf *flist; 538 int s; 539 540 loop: 541 s = spl6(); 542 for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++) 543 for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 544 if ((bp->b_flags & B_DELWRI) == 0) 545 continue; 546 if (dev == NODEV || dev == bp->b_dev) { 547 bp->b_flags |= B_ASYNC; 548 notavail(bp); 549 bwrite(bp); 550 splx(s); 551 goto loop; 552 } 553 } 554 splx(s); 555 } 556 557 /* 558 * Pick up the device's error number and pass it to the user; 559 * if there is an error but the number is 0 set a generalized 560 * code. Actually the latter is always true because devices 561 * don't yet return specific errors. 562 */ 563 geterror(bp) 564 register struct buf *bp; 565 { 566 int error = 0; 567 568 if (bp->b_flags&B_ERROR) 569 if ((error = bp->b_error)==0) 570 return (EIO); 571 return (error); 572 } 573 574 /* 575 * Invalidate in core blocks belonging to closed or umounted filesystem 576 * 577 * This is not nicely done at all - the buffer ought to be removed from the 578 * hash chains & have its dev/blkno fields clobbered, but unfortunately we 579 * can't do that here, as it is quite possible that the block is still 580 * being used for i/o. Eventually, all disc drivers should be forced to 581 * have a close routine, which ought ensure that the queue is empty, then 582 * properly flush the queues. Until that happy day, this suffices for 583 * correctness. ... kre 584 */ 585 binval(dev) 586 dev_t dev; 587 { 588 register struct buf *bp; 589 register struct bufhd *hp; 590 #define dp ((struct buf *)hp) 591 592 for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 593 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 594 if (bp->b_dev == dev) 595 bp->b_flags |= B_INVAL; 596 } 597