1 /* vfs_cluster.c 6.2 84/01/03 */ 2 3 #include "../machine/pte.h" 4 5 #include "../h/param.h" 6 #include "../h/systm.h" 7 #include "../h/dir.h" 8 #include "../h/user.h" 9 #include "../h/buf.h" 10 #include "../h/conf.h" 11 #include "../h/proc.h" 12 #include "../h/seg.h" 13 #include "../h/vm.h" 14 #include "../h/trace.h" 15 16 /* 17 * Read in (if necessary) the block and return a buffer pointer. 18 */ 19 struct buf * 20 bread(dev, blkno, size) 21 dev_t dev; 22 daddr_t blkno; 23 int size; 24 { 25 register struct buf *bp; 26 27 if (size == 0) 28 panic("bread: size 0"); 29 bp = getblk(dev, blkno, size); 30 if (bp->b_flags&B_DONE) { 31 trace(TR_BREADHIT, pack(dev, size), blkno); 32 return(bp); 33 } 34 bp->b_flags |= B_READ; 35 if (bp->b_bcount > bp->b_bufsize) 36 panic("bread"); 37 (*bdevsw[major(dev)].d_strategy)(bp); 38 trace(TR_BREADMISS, pack(dev, size), blkno); 39 u.u_ru.ru_inblock++; /* pay for read */ 40 biowait(bp); 41 return(bp); 42 } 43 44 /* 45 * Read in the block, like bread, but also start I/O on the 46 * read-ahead block (which is not allocated to the caller) 47 */ 48 struct buf * 49 breada(dev, blkno, size, rablkno, rabsize) 50 dev_t dev; 51 daddr_t blkno; int size; 52 daddr_t rablkno; int rabsize; 53 { 54 register struct buf *bp, *rabp; 55 56 bp = NULL; 57 /* 58 * If the block isn't in core, then allocate 59 * a buffer and initiate i/o (getblk checks 60 * for a cache hit). 61 */ 62 if (!incore(dev, blkno)) { 63 bp = getblk(dev, blkno, size); 64 if ((bp->b_flags&B_DONE) == 0) { 65 bp->b_flags |= B_READ; 66 if (bp->b_bcount > bp->b_bufsize) 67 panic("breada"); 68 (*bdevsw[major(dev)].d_strategy)(bp); 69 trace(TR_BREADMISS, pack(dev, size), blkno); 70 u.u_ru.ru_inblock++; /* pay for read */ 71 } else 72 trace(TR_BREADHIT, pack(dev, size), blkno); 73 } 74 75 /* 76 * If there's a read-ahead block, start i/o 77 * on it also (as above). 78 */ 79 if (rablkno && !incore(dev, rablkno)) { 80 rabp = getblk(dev, rablkno, rabsize); 81 if (rabp->b_flags & B_DONE) { 82 brelse(rabp); 83 trace(TR_BREADHITRA, pack(dev, rabsize), blkno); 84 } else { 85 rabp->b_flags |= B_READ|B_ASYNC; 86 if (rabp->b_bcount > rabp->b_bufsize) 87 panic("breadrabp"); 88 (*bdevsw[major(dev)].d_strategy)(rabp); 89 trace(TR_BREADMISSRA, pack(dev, rabsize), rablock); 90 u.u_ru.ru_inblock++; /* pay in advance */ 91 } 92 } 93 94 /* 95 * If block was in core, let bread get it. 96 * If block wasn't in core, then the read was started 97 * above, and just wait for it. 98 */ 99 if (bp == NULL) 100 return (bread(dev, blkno, size)); 101 biowait(bp); 102 return (bp); 103 } 104 105 /* 106 * Write the buffer, waiting for completion. 107 * Then release the buffer. 108 */ 109 bwrite(bp) 110 register struct buf *bp; 111 { 112 register flag; 113 114 flag = bp->b_flags; 115 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 116 if ((flag&B_DELWRI) == 0) 117 u.u_ru.ru_oublock++; /* noone paid yet */ 118 trace(TR_BWRITE, pack(bp->b_dev, bp->b_bcount), bp->b_blkno); 119 if (bp->b_bcount > bp->b_bufsize) 120 panic("bwrite"); 121 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 122 123 /* 124 * If the write was synchronous, then await i/o completion. 125 * If the write was "delayed", then we put the buffer on 126 * the q of blocks awaiting i/o completion status. 127 */ 128 if ((flag&B_ASYNC) == 0) { 129 biowait(bp); 130 brelse(bp); 131 } else if (flag & B_DELWRI) 132 bp->b_flags |= B_AGE; 133 } 134 135 /* 136 * Release the buffer, marking it so that if it is grabbed 137 * for another purpose it will be written out before being 138 * given up (e.g. when writing a partial block where it is 139 * assumed that another write for the same block will soon follow). 140 * This can't be done for magtape, since writes must be done 141 * in the same order as requested. 142 */ 143 bdwrite(bp) 144 register struct buf *bp; 145 { 146 register int flags; 147 148 if ((bp->b_flags&B_DELWRI) == 0) 149 u.u_ru.ru_oublock++; /* noone paid yet */ 150 flags = bdevsw[major(bp->b_dev)].d_flags; 151 if(flags & B_TAPE) 152 bawrite(bp); 153 else { 154 bp->b_flags |= B_DELWRI | B_DONE; 155 brelse(bp); 156 } 157 } 158 159 /* 160 * Release the buffer, start I/O on it, but don't wait for completion. 161 */ 162 bawrite(bp) 163 register struct buf *bp; 164 { 165 166 bp->b_flags |= B_ASYNC; 167 bwrite(bp); 168 } 169 170 /* 171 * Release the buffer, with no I/O implied. 172 */ 173 brelse(bp) 174 register struct buf *bp; 175 { 176 register struct buf *flist; 177 register s; 178 179 trace(TR_BRELSE, pack(bp->b_dev, bp->b_bufsize), bp->b_blkno); 180 /* 181 * If someone's waiting for the buffer, or 182 * is waiting for a buffer wake 'em up. 183 */ 184 if (bp->b_flags&B_WANTED) 185 wakeup((caddr_t)bp); 186 if (bfreelist[0].b_flags&B_WANTED) { 187 bfreelist[0].b_flags &= ~B_WANTED; 188 wakeup((caddr_t)bfreelist); 189 } 190 if (bp->b_flags&B_ERROR) 191 if (bp->b_flags & B_LOCKED) 192 bp->b_flags &= ~B_ERROR; /* try again later */ 193 else 194 bp->b_dev = NODEV; /* no assoc */ 195 196 /* 197 * Stick the buffer back on a free list. 198 */ 199 s = spl6(); 200 if (bp->b_bufsize <= 0) { 201 /* block has no buffer ... put at front of unused buffer list */ 202 flist = &bfreelist[BQ_EMPTY]; 203 binsheadfree(bp, flist); 204 } else if (bp->b_flags & (B_ERROR|B_INVAL)) { 205 /* block has no info ... put at front of most free list */ 206 flist = &bfreelist[BQ_AGE]; 207 binsheadfree(bp, flist); 208 } else { 209 if (bp->b_flags & B_LOCKED) 210 flist = &bfreelist[BQ_LOCKED]; 211 else if (bp->b_flags & B_AGE) 212 flist = &bfreelist[BQ_AGE]; 213 else 214 flist = &bfreelist[BQ_LRU]; 215 binstailfree(bp, flist); 216 } 217 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 218 splx(s); 219 } 220 221 /* 222 * See if the block is associated with some buffer 223 * (mainly to avoid getting hung up on a wait in breada) 224 */ 225 incore(dev, blkno) 226 dev_t dev; 227 daddr_t blkno; 228 { 229 register struct buf *bp; 230 register struct buf *dp; 231 232 dp = BUFHASH(dev, blkno); 233 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 234 if (bp->b_blkno == blkno && bp->b_dev == dev && 235 (bp->b_flags & B_INVAL) == 0) 236 return (1); 237 return (0); 238 } 239 240 struct buf * 241 baddr(dev, blkno, size) 242 dev_t dev; 243 daddr_t blkno; 244 int size; 245 { 246 247 if (incore(dev, blkno)) 248 return (bread(dev, blkno, size)); 249 return (0); 250 } 251 252 /* 253 * Assign a buffer for the given block. If the appropriate 254 * block is already associated, return it; otherwise search 255 * for the oldest non-busy buffer and reassign it. 256 * 257 * We use splx here because this routine may be called 258 * on the interrupt stack during a dump, and we don't 259 * want to lower the ipl back to 0. 260 */ 261 struct buf * 262 getblk(dev, blkno, size) 263 dev_t dev; 264 daddr_t blkno; 265 int size; 266 { 267 register struct buf *bp, *dp; 268 int s; 269 270 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) /* XXX */ 271 blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 272 /* 273 * Search the cache for the block. If we hit, but 274 * the buffer is in use for i/o, then we wait until 275 * the i/o has completed. 276 */ 277 dp = BUFHASH(dev, blkno); 278 loop: 279 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 280 if (bp->b_blkno != blkno || bp->b_dev != dev || 281 bp->b_flags&B_INVAL) 282 continue; 283 s = spl6(); 284 if (bp->b_flags&B_BUSY) { 285 bp->b_flags |= B_WANTED; 286 sleep((caddr_t)bp, PRIBIO+1); 287 splx(s); 288 goto loop; 289 } 290 splx(s); 291 notavail(bp); 292 if (brealloc(bp, size) == 0) 293 goto loop; 294 bp->b_flags |= B_CACHE; 295 return(bp); 296 } 297 if (major(dev) >= nblkdev) 298 panic("blkdev"); 299 bp = getnewbuf(); 300 bfree(bp); 301 bremhash(bp); 302 binshash(bp, dp); 303 bp->b_dev = dev; 304 bp->b_blkno = blkno; 305 bp->b_error = 0; 306 if (brealloc(bp, size) == 0) 307 goto loop; 308 return(bp); 309 } 310 311 /* 312 * get an empty block, 313 * not assigned to any particular device 314 */ 315 struct buf * 316 geteblk(size) 317 int size; 318 { 319 register struct buf *bp, *flist; 320 321 loop: 322 bp = getnewbuf(); 323 bp->b_flags |= B_INVAL; 324 bfree(bp); 325 bremhash(bp); 326 flist = &bfreelist[BQ_AGE]; 327 binshash(bp, flist); 328 bp->b_dev = (dev_t)NODEV; 329 bp->b_error = 0; 330 if (brealloc(bp, size) == 0) 331 goto loop; 332 return(bp); 333 } 334 335 /* 336 * Allocate space associated with a buffer. 337 * If can't get space, buffer is released 338 */ 339 brealloc(bp, size) 340 register struct buf *bp; 341 int size; 342 { 343 daddr_t start, last; 344 register struct buf *ep; 345 struct buf *dp; 346 int s; 347 348 /* 349 * First need to make sure that all overlaping previous I/O 350 * is dispatched with. 351 */ 352 if (size == bp->b_bcount) 353 return (1); 354 if (size < bp->b_bcount) { 355 if (bp->b_flags & B_DELWRI) { 356 bwrite(bp); 357 return (0); 358 } 359 if (bp->b_flags & B_LOCKED) 360 panic("brealloc"); 361 return (allocbuf(bp, size)); 362 } 363 bp->b_flags &= ~B_DONE; 364 if (bp->b_dev == NODEV) 365 return (allocbuf(bp, size)); 366 367 trace(TR_BREALLOC, pack(bp->b_dev, size), bp->b_blkno); 368 /* 369 * Search cache for any buffers that overlap the one that we 370 * are trying to allocate. Overlapping buffers must be marked 371 * invalid, after being written out if they are dirty. (indicated 372 * by B_DELWRI) A disk block must be mapped by at most one buffer 373 * at any point in time. Care must be taken to avoid deadlocking 374 * when two buffer are trying to get the same set of disk blocks. 375 */ 376 start = bp->b_blkno; 377 last = start + btodb(size) - 1; 378 dp = BUFHASH(bp->b_dev, bp->b_blkno); 379 loop: 380 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 381 if (ep == bp || ep->b_dev != bp->b_dev || (ep->b_flags&B_INVAL)) 382 continue; 383 /* look for overlap */ 384 if (ep->b_bcount == 0 || ep->b_blkno > last || 385 ep->b_blkno + btodb(ep->b_bcount) <= start) 386 continue; 387 s = spl6(); 388 if (ep->b_flags&B_BUSY) { 389 ep->b_flags |= B_WANTED; 390 sleep((caddr_t)ep, PRIBIO+1); 391 splx(s); 392 goto loop; 393 } 394 splx(s); 395 notavail(ep); 396 if (ep->b_flags & B_DELWRI) { 397 bwrite(ep); 398 goto loop; 399 } 400 ep->b_flags |= B_INVAL; 401 brelse(ep); 402 } 403 return (allocbuf(bp, size)); 404 } 405 406 /* 407 * Find a buffer which is available for use. 408 * Select something from a free list. 409 * Preference is to AGE list, then LRU list. 410 */ 411 struct buf * 412 getnewbuf() 413 { 414 register struct buf *bp, *dp; 415 int s; 416 417 loop: 418 s = spl6(); 419 for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) 420 if (dp->av_forw != dp) 421 break; 422 if (dp == bfreelist) { /* no free blocks */ 423 dp->b_flags |= B_WANTED; 424 sleep((caddr_t)dp, PRIBIO+1); 425 splx(s); 426 goto loop; 427 } 428 splx(s); 429 bp = dp->av_forw; 430 notavail(bp); 431 if (bp->b_flags & B_DELWRI) { 432 bp->b_flags |= B_ASYNC; 433 bwrite(bp); 434 goto loop; 435 } 436 trace(TR_BRELSE, pack(bp->b_dev, bp->b_bufsize), bp->b_blkno); 437 bp->b_flags = B_BUSY; 438 return (bp); 439 } 440 441 /* 442 * Wait for I/O completion on the buffer; return errors 443 * to the user. 444 */ 445 biowait(bp) 446 register struct buf *bp; 447 { 448 int s; 449 450 s = spl6(); 451 while ((bp->b_flags&B_DONE)==0) 452 sleep((caddr_t)bp, PRIBIO); 453 splx(s); 454 if (u.u_error == 0) /* XXX */ 455 u.u_error = geterror(bp); 456 } 457 458 /* 459 * Mark I/O complete on a buffer. 460 * If someone should be called, e.g. the pageout 461 * daemon, do so. Otherwise, wake up anyone 462 * waiting for it. 463 */ 464 biodone(bp) 465 register struct buf *bp; 466 { 467 468 if (bp->b_flags & B_DONE) 469 panic("dup biodone"); 470 bp->b_flags |= B_DONE; 471 if (bp->b_flags & B_CALL) { 472 bp->b_flags &= ~B_CALL; 473 (*bp->b_iodone)(bp); 474 return; 475 } 476 if (bp->b_flags&B_ASYNC) 477 brelse(bp); 478 else { 479 bp->b_flags &= ~B_WANTED; 480 wakeup((caddr_t)bp); 481 } 482 } 483 484 /* 485 * Insure that no part of a specified block is in an incore buffer. 486 */ 487 blkflush(dev, blkno, size) 488 dev_t dev; 489 daddr_t blkno; 490 long size; 491 { 492 register struct buf *ep; 493 struct buf *dp; 494 daddr_t start, last; 495 int s; 496 497 start = blkno; 498 last = start + btodb(size) - 1; 499 dp = BUFHASH(dev, blkno); 500 loop: 501 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 502 if (ep->b_dev != dev || (ep->b_flags&B_INVAL)) 503 continue; 504 /* look for overlap */ 505 if (ep->b_bcount == 0 || ep->b_blkno > last || 506 ep->b_blkno + btodb(ep->b_bcount) <= start) 507 continue; 508 s = spl6(); 509 if (ep->b_flags&B_BUSY) { 510 ep->b_flags |= B_WANTED; 511 sleep((caddr_t)ep, PRIBIO+1); 512 splx(s); 513 goto loop; 514 } 515 if (ep->b_flags & B_DELWRI) { 516 splx(s); 517 notavail(ep); 518 bwrite(ep); 519 goto loop; 520 } 521 splx(s); 522 } 523 } 524 525 /* 526 * Make sure all write-behind blocks 527 * on dev (or NODEV for all) 528 * are flushed out. 529 * (from umount and update) 530 */ 531 bflush(dev) 532 dev_t dev; 533 { 534 register struct buf *bp; 535 register struct buf *flist; 536 int s; 537 538 loop: 539 s = spl6(); 540 for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++) 541 for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 542 if ((bp->b_flags & B_DELWRI) == 0) 543 continue; 544 if (dev == NODEV || dev == bp->b_dev) { 545 bp->b_flags |= B_ASYNC; 546 notavail(bp); 547 bwrite(bp); 548 splx(s); 549 goto loop; 550 } 551 } 552 splx(s); 553 } 554 555 /* 556 * Pick up the device's error number and pass it to the user; 557 * if there is an error but the number is 0 set a generalized 558 * code. Actually the latter is always true because devices 559 * don't yet return specific errors. 560 */ 561 geterror(bp) 562 register struct buf *bp; 563 { 564 int error = 0; 565 566 if (bp->b_flags&B_ERROR) 567 if ((error = bp->b_error)==0) 568 return (EIO); 569 return (error); 570 } 571 572 /* 573 * Invalidate in core blocks belonging to closed or umounted filesystem 574 * 575 * This is not nicely done at all - the buffer ought to be removed from the 576 * hash chains & have its dev/blkno fields clobbered, but unfortunately we 577 * can't do that here, as it is quite possible that the block is still 578 * being used for i/o. Eventually, all disc drivers should be forced to 579 * have a close routine, which ought ensure that the queue is empty, then 580 * properly flush the queues. Until that happy day, this suffices for 581 * correctness. ... kre 582 */ 583 binval(dev) 584 dev_t dev; 585 { 586 register struct buf *bp; 587 register struct bufhd *hp; 588 #define dp ((struct buf *)hp) 589 590 for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 591 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 592 if (bp->b_dev == dev) 593 bp->b_flags |= B_INVAL; 594 } 595