1 /* vfs_cluster.c 4.32 82/06/01 */ 2 3 #include "../h/param.h" 4 #include "../h/systm.h" 5 #include "../h/dir.h" 6 #include "../h/user.h" 7 #include "../h/buf.h" 8 #include "../h/conf.h" 9 #include "../h/proc.h" 10 #include "../h/seg.h" 11 #include "../h/pte.h" 12 #include "../h/vm.h" 13 #include "../h/trace.h" 14 15 /* 16 * Read in (if necessary) the block and return a buffer pointer. 17 */ 18 struct buf * 19 bread(dev, blkno, size) 20 dev_t dev; 21 daddr_t blkno; 22 int size; 23 { 24 register struct buf *bp; 25 26 bp = getblk(dev, blkno, size); 27 if (bp->b_flags&B_DONE) { 28 trace(TR_BREADHIT, dev, blkno); 29 return(bp); 30 } 31 bp->b_flags |= B_READ; 32 (*bdevsw[major(dev)].d_strategy)(bp); 33 trace(TR_BREADMISS, dev, blkno); 34 u.u_vm.vm_inblk++; /* pay for read */ 35 biowait(bp); 36 return(bp); 37 } 38 39 /* 40 * Read in the block, like bread, but also start I/O on the 41 * read-ahead block (which is not allocated to the caller) 42 */ 43 struct buf * 44 breada(dev, blkno, rablkno, size) 45 dev_t dev; 46 daddr_t blkno, rablkno; 47 int size; 48 { 49 register struct buf *bp, *rabp; 50 51 bp = NULL; 52 /* 53 * If the block isn't in core, then allocate 54 * a buffer and initiate i/o (getblk checks 55 * for a cache hit). 56 */ 57 if (!incore(dev, blkno)) { 58 bp = getblk(dev, blkno, size); 59 if ((bp->b_flags&B_DONE) == 0) { 60 bp->b_flags |= B_READ; 61 (*bdevsw[major(dev)].d_strategy)(bp); 62 trace(TR_BREADMISS, dev, blkno); 63 u.u_vm.vm_inblk++; /* pay for read */ 64 } else 65 trace(TR_BREADHIT, dev, blkno); 66 } 67 68 /* 69 * If there's a read-ahead block, start i/o 70 * on it also (as above). 71 */ 72 if (rablkno && !incore(dev, rablkno)) { 73 rabp = getblk(dev, rablkno, size); 74 if (rabp->b_flags & B_DONE) { 75 brelse(rabp); 76 trace(TR_BREADHITRA, dev, blkno); 77 } else { 78 rabp->b_flags |= B_READ|B_ASYNC; 79 (*bdevsw[major(dev)].d_strategy)(rabp); 80 trace(TR_BREADMISSRA, dev, rablock); 81 u.u_vm.vm_inblk++; /* pay in advance */ 82 } 83 } 84 85 /* 86 * If we get here with bp NULL, then the block 87 * must've been in core and bread will find it for us. 88 */ 89 if(bp == NULL) 90 return(bread(dev, blkno, size)); 91 biowait(bp); 92 return(bp); 93 } 94 95 /* 96 * Write the buffer, waiting for completion. 97 * Then release the buffer. 98 */ 99 bwrite(bp) 100 register struct buf *bp; 101 { 102 register flag; 103 104 flag = bp->b_flags; 105 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 106 if ((flag&B_DELWRI) == 0) 107 u.u_vm.vm_oublk++; /* noone paid yet */ 108 trace(TR_BWRITE, bp->b_dev, bp->b_blkno); 109 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 110 111 /* 112 * If the write was synchronous, then await i/o completion. 113 * If the write was "delayed", then we put the buffer on 114 * the q of blocks awaiting i/o completion status. 115 * Otherwise, the i/o must be finished and we check for 116 * an error. 117 */ 118 if ((flag&B_ASYNC) == 0) { 119 biowait(bp); 120 brelse(bp); 121 } else if (flag & B_DELWRI) 122 bp->b_flags |= B_AGE; 123 else 124 geterror(bp); 125 } 126 127 /* 128 * Release the buffer, marking it so that if it is grabbed 129 * for another purpose it will be written out before being 130 * given up (e.g. when writing a partial block where it is 131 * assumed that another write for the same block will soon follow). 132 * This can't be done for magtape, since writes must be done 133 * in the same order as requested. 134 */ 135 bdwrite(bp) 136 register struct buf *bp; 137 { 138 register int flags; 139 140 if ((bp->b_flags&B_DELWRI) == 0) 141 u.u_vm.vm_oublk++; /* noone paid yet */ 142 flags = bdevsw[major(bp->b_dev)].d_flags; 143 if(flags & B_TAPE) 144 bawrite(bp); 145 else { 146 bp->b_flags |= B_DELWRI | B_DONE; 147 brelse(bp); 148 } 149 } 150 151 /* 152 * Release the buffer, start I/O on it, but don't wait for completion. 153 */ 154 bawrite(bp) 155 register struct buf *bp; 156 { 157 158 bp->b_flags |= B_ASYNC; 159 bwrite(bp); 160 } 161 162 /* 163 * Release the buffer, with no I/O implied. 164 */ 165 brelse(bp) 166 register struct buf *bp; 167 { 168 register struct buf *flist; 169 register s; 170 171 /* 172 * If someone's waiting for the buffer, or 173 * is waiting for a buffer wake 'em up. 174 */ 175 if (bp->b_flags&B_WANTED) 176 wakeup((caddr_t)bp); 177 if (bfreelist[0].b_flags&B_WANTED) { 178 bfreelist[0].b_flags &= ~B_WANTED; 179 wakeup((caddr_t)bfreelist); 180 } 181 if (bp->b_flags&B_ERROR) 182 if (bp->b_flags & B_LOCKED) 183 bp->b_flags &= ~B_ERROR; /* try again later */ 184 else 185 bp->b_dev = NODEV; /* no assoc */ 186 187 /* 188 * Stick the buffer back on a free list. 189 */ 190 s = spl6(); 191 if (bp->b_flags & (B_ERROR|B_INVAL)) { 192 /* block has no info ... put at front of most free list */ 193 flist = &bfreelist[BQUEUES-1]; 194 binsheadfree(bp, flist); 195 } else { 196 if (bp->b_flags & B_LOCKED) 197 flist = &bfreelist[BQ_LOCKED]; 198 else if (bp->b_flags & B_AGE) 199 flist = &bfreelist[BQ_AGE]; 200 else 201 flist = &bfreelist[BQ_LRU]; 202 binstailfree(bp, flist); 203 } 204 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 205 splx(s); 206 } 207 208 /* 209 * See if the block is associated with some buffer 210 * (mainly to avoid getting hung up on a wait in breada) 211 */ 212 incore(dev, blkno) 213 dev_t dev; 214 daddr_t blkno; 215 { 216 register struct buf *bp; 217 register struct buf *dp; 218 219 dp = BUFHASH(dev, blkno); 220 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 221 if (bp->b_blkno == blkno && bp->b_dev == dev && 222 (bp->b_flags & B_INVAL) == 0) 223 return (1); 224 return (0); 225 } 226 227 struct buf * 228 baddr(dev, blkno, size) 229 dev_t dev; 230 daddr_t blkno; 231 int size; 232 { 233 234 if (incore(dev, blkno)) 235 return (bread(dev, blkno, size)); 236 return (0); 237 } 238 239 /* 240 * Assign a buffer for the given block. If the appropriate 241 * block is already associated, return it; otherwise search 242 * for the oldest non-busy buffer and reassign it. 243 * 244 * We use splx here because this routine may be called 245 * on the interrupt stack during a dump, and we don't 246 * want to lower the ipl back to 0. 247 */ 248 struct buf * 249 getblk(dev, blkno, size) 250 dev_t dev; 251 daddr_t blkno; 252 int size; 253 { 254 register struct buf *bp, *dp, *ep; 255 int s; 256 257 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) 258 blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 259 /* 260 * Search the cache for the block. If we hit, but 261 * the buffer is in use for i/o, then we wait until 262 * the i/o has completed. 263 */ 264 dp = BUFHASH(dev, blkno); 265 loop: 266 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 267 if (bp->b_blkno != blkno || bp->b_dev != dev || 268 bp->b_flags&B_INVAL) 269 continue; 270 s = spl6(); 271 if (bp->b_flags&B_BUSY) { 272 bp->b_flags |= B_WANTED; 273 sleep((caddr_t)bp, PRIBIO+1); 274 splx(s); 275 goto loop; 276 } 277 splx(s); 278 notavail(bp); 279 brealloc(bp, size); 280 bp->b_flags |= B_CACHE; 281 return(bp); 282 } 283 if (major(dev) >= nblkdev) 284 panic("blkdev"); 285 /* 286 * Not found in the cache, select something from 287 * a free list. Preference is to LRU list, then AGE list. 288 */ 289 s = spl6(); 290 for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--) 291 if (ep->av_forw != ep) 292 break; 293 if (ep == bfreelist) { /* no free blocks at all */ 294 ep->b_flags |= B_WANTED; 295 sleep((caddr_t)ep, PRIBIO+1); 296 splx(s); 297 goto loop; 298 } 299 splx(s); 300 bp = ep->av_forw; 301 notavail(bp); 302 if (bp->b_flags & B_DELWRI) { 303 bp->b_flags |= B_ASYNC; 304 bwrite(bp); 305 goto loop; 306 } 307 trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 308 bp->b_flags = B_BUSY; 309 bfree(bp); 310 bremhash(bp); 311 binshash(bp, dp); 312 bp->b_dev = dev; 313 bp->b_blkno = blkno; 314 brealloc(bp, size); 315 return(bp); 316 } 317 318 /* 319 * get an empty block, 320 * not assigned to any particular device 321 */ 322 struct buf * 323 geteblk(size) 324 int size; 325 { 326 register struct buf *bp, *dp; 327 int s; 328 329 loop: 330 s = spl6(); 331 for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--) 332 if (dp->av_forw != dp) 333 break; 334 if (dp == bfreelist) { /* no free blocks */ 335 dp->b_flags |= B_WANTED; 336 sleep((caddr_t)dp, PRIBIO+1); 337 goto loop; 338 } 339 splx(s); 340 bp = dp->av_forw; 341 notavail(bp); 342 if (bp->b_flags & B_DELWRI) { 343 bp->b_flags |= B_ASYNC; 344 bwrite(bp); 345 goto loop; 346 } 347 trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 348 bp->b_flags = B_BUSY|B_INVAL; 349 bfree(bp); 350 bremhash(bp); 351 binshash(bp, dp); 352 bp->b_dev = (dev_t)NODEV; 353 brealloc(bp, size); 354 return(bp); 355 } 356 357 /* 358 * Allocate space associated with a buffer. 359 */ 360 brealloc(bp, size) 361 register struct buf *bp; 362 int size; 363 { 364 daddr_t start, last; 365 register struct buf *ep; 366 struct buf *dp; 367 int s; 368 369 /* 370 * First need to make sure that all overlaping previous I/O 371 * is dispatched with. 372 */ 373 if (size == bp->b_bcount) 374 return; 375 if (size < bp->b_bcount || bp->b_dev == NODEV) 376 goto allocit; 377 378 start = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); 379 last = bp->b_blkno + (size / DEV_BSIZE) - 1; 380 if (bp->b_bcount == 0) { 381 start++; 382 if (start == last) 383 goto allocit; 384 } 385 dp = BUFHASH(bp->b_dev, bp->b_blkno); 386 loop: 387 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 388 if (ep->b_blkno < start || ep->b_blkno > last || 389 ep->b_dev != bp->b_dev || ep->b_flags&B_INVAL) 390 continue; 391 s = spl6(); 392 if (ep->b_flags&B_BUSY) { 393 ep->b_flags |= B_WANTED; 394 sleep((caddr_t)ep, PRIBIO+1); 395 (void) splx(s); 396 goto loop; 397 } 398 (void) splx(s); 399 /* 400 * What we would really like to do is kill this 401 * I/O since it is now useless. We cannot do that 402 * so we force it to complete, so that it cannot 403 * over-write our useful data later. 404 */ 405 if (ep->b_flags & B_DELWRI) { 406 notavail(ep); 407 ep->b_flags |= B_ASYNC; 408 bwrite(ep); 409 goto loop; 410 } 411 } 412 allocit: 413 /* 414 * Here the buffer is already available, so all we 415 * need to do is set the size. Someday a better memory 416 * management scheme will be implemented. 417 */ 418 bp->b_bcount = size; 419 } 420 421 /* 422 * Release space associated with a buffer. 423 */ 424 bfree(bp) 425 struct buf *bp; 426 { 427 /* 428 * Here the buffer does not change, so all we 429 * need to do is set the size. Someday a better memory 430 * management scheme will be implemented. 431 */ 432 bp->b_bcount = 0; 433 } 434 435 /* 436 * Wait for I/O completion on the buffer; return errors 437 * to the user. 438 */ 439 biowait(bp) 440 register struct buf *bp; 441 { 442 int s; 443 444 s = spl6(); 445 while ((bp->b_flags&B_DONE)==0) 446 sleep((caddr_t)bp, PRIBIO); 447 splx(s); 448 geterror(bp); 449 } 450 451 /* 452 * Mark I/O complete on a buffer. If the header 453 * indicates a dirty page push completion, the 454 * header is inserted into the ``cleaned'' list 455 * to be processed by the pageout daemon. Otherwise 456 * release it if I/O is asynchronous, and wake 457 * up anyone waiting for it. 458 */ 459 biodone(bp) 460 register struct buf *bp; 461 { 462 register int s; 463 464 if (bp->b_flags & B_DONE) 465 panic("dup biodone"); 466 bp->b_flags |= B_DONE; 467 if (bp->b_flags & B_DIRTY) { 468 if (bp->b_flags & B_ERROR) 469 panic("IO err in push"); 470 s = spl6(); 471 bp->av_forw = bclnlist; 472 bp->b_bcount = swsize[bp - swbuf]; 473 bp->b_pfcent = swpf[bp - swbuf]; 474 cnt.v_pgout++; 475 cnt.v_pgpgout += bp->b_bcount / NBPG; 476 bclnlist = bp; 477 if (bswlist.b_flags & B_WANTED) 478 wakeup((caddr_t)&proc[2]); 479 splx(s); 480 return; 481 } 482 if (bp->b_flags&B_ASYNC) 483 brelse(bp); 484 else { 485 bp->b_flags &= ~B_WANTED; 486 wakeup((caddr_t)bp); 487 } 488 } 489 490 /* 491 * make sure all write-behind blocks 492 * on dev (or NODEV for all) 493 * are flushed out. 494 * (from umount and update) 495 * (and temporarily pagein) 496 */ 497 bflush(dev) 498 dev_t dev; 499 { 500 register struct buf *bp; 501 register struct buf *flist; 502 int s; 503 504 loop: 505 s = spl6(); 506 for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++) 507 for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 508 if ((bp->b_flags & B_DELWRI) == 0) 509 continue; 510 if (dev == NODEV || dev == bp->b_dev) { 511 bp->b_flags |= B_ASYNC; 512 notavail(bp); 513 bwrite(bp); 514 goto loop; 515 } 516 } 517 splx(s); 518 } 519 520 /* 521 * Pick up the device's error number and pass it to the user; 522 * if there is an error but the number is 0 set a generalized 523 * code. Actually the latter is always true because devices 524 * don't yet return specific errors. 525 */ 526 geterror(bp) 527 register struct buf *bp; 528 { 529 530 if (bp->b_flags&B_ERROR) 531 if ((u.u_error = bp->b_error)==0) 532 u.u_error = EIO; 533 } 534 535 /* 536 * Invalidate in core blocks belonging to closed or umounted filesystem 537 * 538 * This is not nicely done at all - the buffer ought to be removed from the 539 * hash chains & have its dev/blkno fields clobbered, but unfortunately we 540 * can't do that here, as it is quite possible that the block is still 541 * being used for i/o. Eventually, all disc drivers should be forced to 542 * have a close routine, which ought ensure that the queue is empty, then 543 * properly flush the queues. Until that happy day, this suffices for 544 * correctness. ... kre 545 */ 546 binval(dev) 547 dev_t dev; 548 { 549 register struct buf *bp; 550 register struct bufhd *hp; 551 #define dp ((struct buf *)hp) 552 553 for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 554 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 555 if (bp->b_dev == dev) 556 bp->b_flags |= B_INVAL; 557 } 558