1 /* vfs_cluster.c 6.1 83/07/29 */ 2 3 #include "../machine/pte.h" 4 5 #include "../h/param.h" 6 #include "../h/systm.h" 7 #include "../h/dir.h" 8 #include "../h/user.h" 9 #include "../h/buf.h" 10 #include "../h/conf.h" 11 #include "../h/proc.h" 12 #include "../h/seg.h" 13 #include "../h/vm.h" 14 #include "../h/trace.h" 15 16 /* 17 * Read in (if necessary) the block and return a buffer pointer. 18 */ 19 struct buf * 20 bread(dev, blkno, size) 21 dev_t dev; 22 daddr_t blkno; 23 int size; 24 { 25 register struct buf *bp; 26 27 if (size == 0) 28 panic("bread: size 0"); 29 bp = getblk(dev, blkno, size); 30 if (bp->b_flags&B_DONE) { 31 trace(TR_BREADHIT, dev, blkno); 32 return(bp); 33 } 34 bp->b_flags |= B_READ; 35 if (bp->b_bcount > bp->b_bufsize) 36 panic("bread"); 37 (*bdevsw[major(dev)].d_strategy)(bp); 38 trace(TR_BREADMISS, dev, blkno); 39 u.u_ru.ru_inblock++; /* pay for read */ 40 biowait(bp); 41 return(bp); 42 } 43 44 /* 45 * Read in the block, like bread, but also start I/O on the 46 * read-ahead block (which is not allocated to the caller) 47 */ 48 struct buf * 49 breada(dev, blkno, size, rablkno, rabsize) 50 dev_t dev; 51 daddr_t blkno; int size; 52 daddr_t rablkno; int rabsize; 53 { 54 register struct buf *bp, *rabp; 55 56 bp = NULL; 57 /* 58 * If the block isn't in core, then allocate 59 * a buffer and initiate i/o (getblk checks 60 * for a cache hit). 61 */ 62 if (!incore(dev, blkno)) { 63 bp = getblk(dev, blkno, size); 64 if ((bp->b_flags&B_DONE) == 0) { 65 bp->b_flags |= B_READ; 66 if (bp->b_bcount > bp->b_bufsize) 67 panic("breada"); 68 (*bdevsw[major(dev)].d_strategy)(bp); 69 trace(TR_BREADMISS, dev, blkno); 70 u.u_ru.ru_inblock++; /* pay for read */ 71 } else 72 trace(TR_BREADHIT, dev, blkno); 73 } 74 75 /* 76 * If there's a read-ahead block, start i/o 77 * on it also (as above). 78 */ 79 if (rablkno && !incore(dev, rablkno)) { 80 rabp = getblk(dev, rablkno, rabsize); 81 if (rabp->b_flags & B_DONE) { 82 brelse(rabp); 83 trace(TR_BREADHITRA, dev, blkno); 84 } else { 85 rabp->b_flags |= B_READ|B_ASYNC; 86 if (rabp->b_bcount > rabp->b_bufsize) 87 panic("breadrabp"); 88 (*bdevsw[major(dev)].d_strategy)(rabp); 89 trace(TR_BREADMISSRA, dev, rablock); 90 u.u_ru.ru_inblock++; /* pay in advance */ 91 } 92 } 93 94 /* 95 * If block was in core, let bread get it. 96 * If block wasn't in core, then the read was started 97 * above, and just wait for it. 98 */ 99 if (bp == NULL) 100 return (bread(dev, blkno, size)); 101 biowait(bp); 102 return (bp); 103 } 104 105 /* 106 * Write the buffer, waiting for completion. 107 * Then release the buffer. 108 */ 109 bwrite(bp) 110 register struct buf *bp; 111 { 112 register flag; 113 114 flag = bp->b_flags; 115 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 116 if ((flag&B_DELWRI) == 0) 117 u.u_ru.ru_oublock++; /* noone paid yet */ 118 trace(TR_BWRITE, bp->b_dev, bp->b_blkno); 119 if (bp->b_bcount > bp->b_bufsize) 120 panic("bwrite"); 121 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 122 123 /* 124 * If the write was synchronous, then await i/o completion. 125 * If the write was "delayed", then we put the buffer on 126 * the q of blocks awaiting i/o completion status. 127 */ 128 if ((flag&B_ASYNC) == 0) { 129 biowait(bp); 130 brelse(bp); 131 } else if (flag & B_DELWRI) 132 bp->b_flags |= B_AGE; 133 } 134 135 /* 136 * Release the buffer, marking it so that if it is grabbed 137 * for another purpose it will be written out before being 138 * given up (e.g. when writing a partial block where it is 139 * assumed that another write for the same block will soon follow). 140 * This can't be done for magtape, since writes must be done 141 * in the same order as requested. 142 */ 143 bdwrite(bp) 144 register struct buf *bp; 145 { 146 register int flags; 147 148 if ((bp->b_flags&B_DELWRI) == 0) 149 u.u_ru.ru_oublock++; /* noone paid yet */ 150 flags = bdevsw[major(bp->b_dev)].d_flags; 151 if(flags & B_TAPE) 152 bawrite(bp); 153 else { 154 bp->b_flags |= B_DELWRI | B_DONE; 155 brelse(bp); 156 } 157 } 158 159 /* 160 * Release the buffer, start I/O on it, but don't wait for completion. 161 */ 162 bawrite(bp) 163 register struct buf *bp; 164 { 165 166 bp->b_flags |= B_ASYNC; 167 bwrite(bp); 168 } 169 170 /* 171 * Release the buffer, with no I/O implied. 172 */ 173 brelse(bp) 174 register struct buf *bp; 175 { 176 register struct buf *flist; 177 register s; 178 179 /* 180 * If someone's waiting for the buffer, or 181 * is waiting for a buffer wake 'em up. 182 */ 183 if (bp->b_flags&B_WANTED) 184 wakeup((caddr_t)bp); 185 if (bfreelist[0].b_flags&B_WANTED) { 186 bfreelist[0].b_flags &= ~B_WANTED; 187 wakeup((caddr_t)bfreelist); 188 } 189 if (bp->b_flags&B_ERROR) 190 if (bp->b_flags & B_LOCKED) 191 bp->b_flags &= ~B_ERROR; /* try again later */ 192 else 193 bp->b_dev = NODEV; /* no assoc */ 194 195 /* 196 * Stick the buffer back on a free list. 197 */ 198 s = spl6(); 199 if (bp->b_bufsize <= 0) { 200 /* block has no buffer ... put at front of unused buffer list */ 201 flist = &bfreelist[BQ_EMPTY]; 202 binsheadfree(bp, flist); 203 } else if (bp->b_flags & (B_ERROR|B_INVAL)) { 204 /* block has no info ... put at front of most free list */ 205 flist = &bfreelist[BQ_AGE]; 206 binsheadfree(bp, flist); 207 } else { 208 if (bp->b_flags & B_LOCKED) 209 flist = &bfreelist[BQ_LOCKED]; 210 else if (bp->b_flags & B_AGE) 211 flist = &bfreelist[BQ_AGE]; 212 else 213 flist = &bfreelist[BQ_LRU]; 214 binstailfree(bp, flist); 215 } 216 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 217 splx(s); 218 } 219 220 /* 221 * See if the block is associated with some buffer 222 * (mainly to avoid getting hung up on a wait in breada) 223 */ 224 incore(dev, blkno) 225 dev_t dev; 226 daddr_t blkno; 227 { 228 register struct buf *bp; 229 register struct buf *dp; 230 231 dp = BUFHASH(dev, blkno); 232 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 233 if (bp->b_blkno == blkno && bp->b_dev == dev && 234 (bp->b_flags & B_INVAL) == 0) 235 return (1); 236 return (0); 237 } 238 239 struct buf * 240 baddr(dev, blkno, size) 241 dev_t dev; 242 daddr_t blkno; 243 int size; 244 { 245 246 if (incore(dev, blkno)) 247 return (bread(dev, blkno, size)); 248 return (0); 249 } 250 251 /* 252 * Assign a buffer for the given block. If the appropriate 253 * block is already associated, return it; otherwise search 254 * for the oldest non-busy buffer and reassign it. 255 * 256 * We use splx here because this routine may be called 257 * on the interrupt stack during a dump, and we don't 258 * want to lower the ipl back to 0. 259 */ 260 struct buf * 261 getblk(dev, blkno, size) 262 dev_t dev; 263 daddr_t blkno; 264 int size; 265 { 266 register struct buf *bp, *dp; 267 int s; 268 269 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) /* XXX */ 270 blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 271 /* 272 * Search the cache for the block. If we hit, but 273 * the buffer is in use for i/o, then we wait until 274 * the i/o has completed. 275 */ 276 dp = BUFHASH(dev, blkno); 277 loop: 278 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 279 if (bp->b_blkno != blkno || bp->b_dev != dev || 280 bp->b_flags&B_INVAL) 281 continue; 282 s = spl6(); 283 if (bp->b_flags&B_BUSY) { 284 bp->b_flags |= B_WANTED; 285 sleep((caddr_t)bp, PRIBIO+1); 286 splx(s); 287 goto loop; 288 } 289 splx(s); 290 notavail(bp); 291 if (brealloc(bp, size) == 0) 292 goto loop; 293 bp->b_flags |= B_CACHE; 294 return(bp); 295 } 296 if (major(dev) >= nblkdev) 297 panic("blkdev"); 298 bp = getnewbuf(); 299 bfree(bp); 300 bremhash(bp); 301 binshash(bp, dp); 302 bp->b_dev = dev; 303 bp->b_blkno = blkno; 304 bp->b_error = 0; 305 if (brealloc(bp, size) == 0) 306 goto loop; 307 return(bp); 308 } 309 310 /* 311 * get an empty block, 312 * not assigned to any particular device 313 */ 314 struct buf * 315 geteblk(size) 316 int size; 317 { 318 register struct buf *bp, *flist; 319 320 loop: 321 bp = getnewbuf(); 322 bp->b_flags |= B_INVAL; 323 bfree(bp); 324 bremhash(bp); 325 flist = &bfreelist[BQ_AGE]; 326 binshash(bp, flist); 327 bp->b_dev = (dev_t)NODEV; 328 bp->b_error = 0; 329 if (brealloc(bp, size) == 0) 330 goto loop; 331 return(bp); 332 } 333 334 /* 335 * Allocate space associated with a buffer. 336 * If can't get space, buffer is released 337 */ 338 brealloc(bp, size) 339 register struct buf *bp; 340 int size; 341 { 342 daddr_t start, last; 343 register struct buf *ep; 344 struct buf *dp; 345 int s; 346 347 /* 348 * First need to make sure that all overlaping previous I/O 349 * is dispatched with. 350 */ 351 if (size == bp->b_bcount) 352 return (1); 353 if (size < bp->b_bcount) { 354 if (bp->b_flags & B_DELWRI) { 355 bwrite(bp); 356 return (0); 357 } 358 if (bp->b_flags & B_LOCKED) 359 panic("brealloc"); 360 return (allocbuf(bp, size)); 361 } 362 bp->b_flags &= ~B_DONE; 363 if (bp->b_dev == NODEV) 364 return (allocbuf(bp, size)); 365 366 /* 367 * Search cache for any buffers that overlap the one that we 368 * are trying to allocate. Overlapping buffers must be marked 369 * invalid, after being written out if they are dirty. (indicated 370 * by B_DELWRI) A disk block must be mapped by at most one buffer 371 * at any point in time. Care must be taken to avoid deadlocking 372 * when two buffer are trying to get the same set of disk blocks. 373 */ 374 start = bp->b_blkno; 375 last = start + btodb(size) - 1; 376 dp = BUFHASH(bp->b_dev, bp->b_blkno); 377 loop: 378 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 379 if (ep == bp || ep->b_dev != bp->b_dev || (ep->b_flags&B_INVAL)) 380 continue; 381 /* look for overlap */ 382 if (ep->b_bcount == 0 || ep->b_blkno > last || 383 ep->b_blkno + btodb(ep->b_bcount) <= start) 384 continue; 385 s = spl6(); 386 if (ep->b_flags&B_BUSY) { 387 ep->b_flags |= B_WANTED; 388 sleep((caddr_t)ep, PRIBIO+1); 389 splx(s); 390 goto loop; 391 } 392 splx(s); 393 notavail(ep); 394 if (ep->b_flags & B_DELWRI) { 395 bwrite(ep); 396 goto loop; 397 } 398 ep->b_flags |= B_INVAL; 399 brelse(ep); 400 } 401 return (allocbuf(bp, size)); 402 } 403 404 /* 405 * Find a buffer which is available for use. 406 * Select something from a free list. 407 * Preference is to AGE list, then LRU list. 408 */ 409 struct buf * 410 getnewbuf() 411 { 412 register struct buf *bp, *dp; 413 int s; 414 415 loop: 416 s = spl6(); 417 for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) 418 if (dp->av_forw != dp) 419 break; 420 if (dp == bfreelist) { /* no free blocks */ 421 dp->b_flags |= B_WANTED; 422 sleep((caddr_t)dp, PRIBIO+1); 423 splx(s); 424 goto loop; 425 } 426 splx(s); 427 bp = dp->av_forw; 428 notavail(bp); 429 if (bp->b_flags & B_DELWRI) { 430 bp->b_flags |= B_ASYNC; 431 bwrite(bp); 432 goto loop; 433 } 434 trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 435 bp->b_flags = B_BUSY; 436 return (bp); 437 } 438 439 /* 440 * Wait for I/O completion on the buffer; return errors 441 * to the user. 442 */ 443 biowait(bp) 444 register struct buf *bp; 445 { 446 int s; 447 448 s = spl6(); 449 while ((bp->b_flags&B_DONE)==0) 450 sleep((caddr_t)bp, PRIBIO); 451 splx(s); 452 if (u.u_error == 0) /* XXX */ 453 u.u_error = geterror(bp); 454 } 455 456 /* 457 * Mark I/O complete on a buffer. 458 * If someone should be called, e.g. the pageout 459 * daemon, do so. Otherwise, wake up anyone 460 * waiting for it. 461 */ 462 biodone(bp) 463 register struct buf *bp; 464 { 465 466 if (bp->b_flags & B_DONE) 467 panic("dup biodone"); 468 bp->b_flags |= B_DONE; 469 if (bp->b_flags & B_CALL) { 470 bp->b_flags &= ~B_CALL; 471 (*bp->b_iodone)(bp); 472 return; 473 } 474 if (bp->b_flags&B_ASYNC) 475 brelse(bp); 476 else { 477 bp->b_flags &= ~B_WANTED; 478 wakeup((caddr_t)bp); 479 } 480 } 481 482 /* 483 * Insure that no part of a specified block is in an incore buffer. 484 */ 485 blkflush(dev, blkno, size) 486 dev_t dev; 487 daddr_t blkno; 488 long size; 489 { 490 register struct buf *ep; 491 struct buf *dp; 492 daddr_t start, last; 493 int s; 494 495 start = blkno; 496 last = start + btodb(size) - 1; 497 dp = BUFHASH(dev, blkno); 498 loop: 499 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 500 if (ep->b_dev != dev || (ep->b_flags&B_INVAL)) 501 continue; 502 /* look for overlap */ 503 if (ep->b_bcount == 0 || ep->b_blkno > last || 504 ep->b_blkno + btodb(ep->b_bcount) <= start) 505 continue; 506 s = spl6(); 507 if (ep->b_flags&B_BUSY) { 508 ep->b_flags |= B_WANTED; 509 sleep((caddr_t)ep, PRIBIO+1); 510 splx(s); 511 goto loop; 512 } 513 if (ep->b_flags & B_DELWRI) { 514 splx(s); 515 notavail(ep); 516 bwrite(ep); 517 goto loop; 518 } 519 splx(s); 520 } 521 } 522 523 /* 524 * Make sure all write-behind blocks 525 * on dev (or NODEV for all) 526 * are flushed out. 527 * (from umount and update) 528 */ 529 bflush(dev) 530 dev_t dev; 531 { 532 register struct buf *bp; 533 register struct buf *flist; 534 int s; 535 536 loop: 537 s = spl6(); 538 for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++) 539 for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 540 if ((bp->b_flags & B_DELWRI) == 0) 541 continue; 542 if (dev == NODEV || dev == bp->b_dev) { 543 bp->b_flags |= B_ASYNC; 544 notavail(bp); 545 bwrite(bp); 546 splx(s); 547 goto loop; 548 } 549 } 550 splx(s); 551 } 552 553 /* 554 * Pick up the device's error number and pass it to the user; 555 * if there is an error but the number is 0 set a generalized 556 * code. Actually the latter is always true because devices 557 * don't yet return specific errors. 558 */ 559 geterror(bp) 560 register struct buf *bp; 561 { 562 int error = 0; 563 564 if (bp->b_flags&B_ERROR) 565 if ((error = bp->b_error)==0) 566 return (EIO); 567 return (error); 568 } 569 570 /* 571 * Invalidate in core blocks belonging to closed or umounted filesystem 572 * 573 * This is not nicely done at all - the buffer ought to be removed from the 574 * hash chains & have its dev/blkno fields clobbered, but unfortunately we 575 * can't do that here, as it is quite possible that the block is still 576 * being used for i/o. Eventually, all disc drivers should be forced to 577 * have a close routine, which ought ensure that the queue is empty, then 578 * properly flush the queues. Until that happy day, this suffices for 579 * correctness. ... kre 580 */ 581 binval(dev) 582 dev_t dev; 583 { 584 register struct buf *bp; 585 register struct bufhd *hp; 586 #define dp ((struct buf *)hp) 587 588 for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 589 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 590 if (bp->b_dev == dev) 591 bp->b_flags |= B_INVAL; 592 } 593