1 /* vfs_cluster.c 4.33 82/06/07 */ 2 3 #include "../h/param.h" 4 #include "../h/systm.h" 5 #include "../h/dir.h" 6 #include "../h/user.h" 7 #include "../h/buf.h" 8 #include "../h/conf.h" 9 #include "../h/proc.h" 10 #include "../h/seg.h" 11 #include "../h/pte.h" 12 #include "../h/vm.h" 13 #include "../h/trace.h" 14 15 /* 16 * Read in (if necessary) the block and return a buffer pointer. 17 */ 18 struct buf * 19 bread(dev, blkno, size) 20 dev_t dev; 21 daddr_t blkno; 22 int size; 23 { 24 register struct buf *bp; 25 26 bp = getblk(dev, blkno, size); 27 if (bp->b_flags&B_DONE) { 28 trace(TR_BREADHIT, dev, blkno); 29 return(bp); 30 } 31 bp->b_flags |= B_READ; 32 (*bdevsw[major(dev)].d_strategy)(bp); 33 trace(TR_BREADMISS, dev, blkno); 34 u.u_vm.vm_inblk++; /* pay for read */ 35 biowait(bp); 36 return(bp); 37 } 38 39 /* 40 * Read in the block, like bread, but also start I/O on the 41 * read-ahead block (which is not allocated to the caller) 42 */ 43 struct buf * 44 breada(dev, blkno, size, rablkno, rasize) 45 dev_t dev; 46 daddr_t blkno; int size; 47 daddr_t rablkno; int rasize; 48 { 49 register struct buf *bp, *rabp; 50 51 bp = NULL; 52 /* 53 * If the block isn't in core, then allocate 54 * a buffer and initiate i/o (getblk checks 55 * for a cache hit). 56 */ 57 if (!incore(dev, blkno)) { 58 bp = getblk(dev, blkno, size); 59 if ((bp->b_flags&B_DONE) == 0) { 60 bp->b_flags |= B_READ; 61 (*bdevsw[major(dev)].d_strategy)(bp); 62 trace(TR_BREADMISS, dev, blkno); 63 u.u_vm.vm_inblk++; /* pay for read */ 64 } else 65 trace(TR_BREADHIT, dev, blkno); 66 } 67 68 /* 69 * If there's a read-ahead block, start i/o 70 * on it also (as above). 71 */ 72 if (rablkno && !incore(dev, rablkno)) { 73 rabp = getblk(dev, rablkno, rasize); 74 if (rabp->b_flags & B_DONE) { 75 brelse(rabp); 76 trace(TR_BREADHITRA, dev, blkno); 77 } else { 78 rabp->b_flags |= B_READ|B_ASYNC; 79 (*bdevsw[major(dev)].d_strategy)(rabp); 80 trace(TR_BREADMISSRA, dev, rablock); 81 u.u_vm.vm_inblk++; /* pay in advance */ 82 } 83 } 84 85 /* 86 * If block was in core, let bread get it. 87 * If block wasn't in core, then the read was started 88 * above, and just wait for it. 89 */ 90 if (bp == NULL) 91 return (bread(dev, blkno, size)); 92 biowait(bp); 93 return (bp); 94 } 95 96 /* 97 * Write the buffer, waiting for completion. 98 * Then release the buffer. 99 */ 100 bwrite(bp) 101 register struct buf *bp; 102 { 103 register flag; 104 105 flag = bp->b_flags; 106 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 107 if ((flag&B_DELWRI) == 0) 108 u.u_vm.vm_oublk++; /* noone paid yet */ 109 trace(TR_BWRITE, bp->b_dev, bp->b_blkno); 110 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 111 112 /* 113 * If the write was synchronous, then await i/o completion. 114 * If the write was "delayed", then we put the buffer on 115 * the q of blocks awaiting i/o completion status. 116 * Otherwise, the i/o must be finished and we check for 117 * an error. 118 */ 119 if ((flag&B_ASYNC) == 0) { 120 biowait(bp); 121 brelse(bp); 122 } else if (flag & B_DELWRI) 123 bp->b_flags |= B_AGE; 124 else 125 geterror(bp); 126 } 127 128 /* 129 * Release the buffer, marking it so that if it is grabbed 130 * for another purpose it will be written out before being 131 * given up (e.g. when writing a partial block where it is 132 * assumed that another write for the same block will soon follow). 133 * This can't be done for magtape, since writes must be done 134 * in the same order as requested. 135 */ 136 bdwrite(bp) 137 register struct buf *bp; 138 { 139 register int flags; 140 141 if ((bp->b_flags&B_DELWRI) == 0) 142 u.u_vm.vm_oublk++; /* noone paid yet */ 143 flags = bdevsw[major(bp->b_dev)].d_flags; 144 if(flags & B_TAPE) 145 bawrite(bp); 146 else { 147 bp->b_flags |= B_DELWRI | B_DONE; 148 brelse(bp); 149 } 150 } 151 152 /* 153 * Release the buffer, start I/O on it, but don't wait for completion. 154 */ 155 bawrite(bp) 156 register struct buf *bp; 157 { 158 159 bp->b_flags |= B_ASYNC; 160 bwrite(bp); 161 } 162 163 /* 164 * Release the buffer, with no I/O implied. 165 */ 166 brelse(bp) 167 register struct buf *bp; 168 { 169 register struct buf *flist; 170 register s; 171 172 /* 173 * If someone's waiting for the buffer, or 174 * is waiting for a buffer wake 'em up. 175 */ 176 if (bp->b_flags&B_WANTED) 177 wakeup((caddr_t)bp); 178 if (bfreelist[0].b_flags&B_WANTED) { 179 bfreelist[0].b_flags &= ~B_WANTED; 180 wakeup((caddr_t)bfreelist); 181 } 182 if (bp->b_flags&B_ERROR) 183 if (bp->b_flags & B_LOCKED) 184 bp->b_flags &= ~B_ERROR; /* try again later */ 185 else 186 bp->b_dev = NODEV; /* no assoc */ 187 188 /* 189 * Stick the buffer back on a free list. 190 */ 191 s = spl6(); 192 if (bp->b_flags & (B_ERROR|B_INVAL)) { 193 /* block has no info ... put at front of most free list */ 194 flist = &bfreelist[BQUEUES-1]; 195 binsheadfree(bp, flist); 196 } else { 197 if (bp->b_flags & B_LOCKED) 198 flist = &bfreelist[BQ_LOCKED]; 199 else if (bp->b_flags & B_AGE) 200 flist = &bfreelist[BQ_AGE]; 201 else 202 flist = &bfreelist[BQ_LRU]; 203 binstailfree(bp, flist); 204 } 205 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 206 splx(s); 207 } 208 209 /* 210 * See if the block is associated with some buffer 211 * (mainly to avoid getting hung up on a wait in breada) 212 */ 213 incore(dev, blkno) 214 dev_t dev; 215 daddr_t blkno; 216 { 217 register struct buf *bp; 218 register struct buf *dp; 219 220 dp = BUFHASH(dev, blkno); 221 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 222 if (bp->b_blkno == blkno && bp->b_dev == dev && 223 (bp->b_flags & B_INVAL) == 0) 224 return (1); 225 return (0); 226 } 227 228 struct buf * 229 baddr(dev, blkno, size) 230 dev_t dev; 231 daddr_t blkno; 232 int size; 233 { 234 235 if (incore(dev, blkno)) 236 return (bread(dev, blkno, size)); 237 return (0); 238 } 239 240 /* 241 * Assign a buffer for the given block. If the appropriate 242 * block is already associated, return it; otherwise search 243 * for the oldest non-busy buffer and reassign it. 244 * 245 * We use splx here because this routine may be called 246 * on the interrupt stack during a dump, and we don't 247 * want to lower the ipl back to 0. 248 */ 249 struct buf * 250 getblk(dev, blkno, size) 251 dev_t dev; 252 daddr_t blkno; 253 int size; 254 { 255 register struct buf *bp, *dp, *ep; 256 int s; 257 258 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) 259 blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 260 /* 261 * Search the cache for the block. If we hit, but 262 * the buffer is in use for i/o, then we wait until 263 * the i/o has completed. 264 */ 265 dp = BUFHASH(dev, blkno); 266 loop: 267 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 268 if (bp->b_blkno != blkno || bp->b_dev != dev || 269 bp->b_flags&B_INVAL) 270 continue; 271 s = spl6(); 272 if (bp->b_flags&B_BUSY) { 273 bp->b_flags |= B_WANTED; 274 sleep((caddr_t)bp, PRIBIO+1); 275 splx(s); 276 goto loop; 277 } 278 splx(s); 279 notavail(bp); 280 brealloc(bp, size); 281 bp->b_flags |= B_CACHE; 282 return(bp); 283 } 284 if (major(dev) >= nblkdev) 285 panic("blkdev"); 286 /* 287 * Not found in the cache, select something from 288 * a free list. Preference is to LRU list, then AGE list. 289 */ 290 s = spl6(); 291 for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--) 292 if (ep->av_forw != ep) 293 break; 294 if (ep == bfreelist) { /* no free blocks at all */ 295 ep->b_flags |= B_WANTED; 296 sleep((caddr_t)ep, PRIBIO+1); 297 splx(s); 298 goto loop; 299 } 300 splx(s); 301 bp = ep->av_forw; 302 notavail(bp); 303 if (bp->b_flags & B_DELWRI) { 304 bp->b_flags |= B_ASYNC; 305 bwrite(bp); 306 goto loop; 307 } 308 trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 309 bp->b_flags = B_BUSY; 310 bfree(bp); 311 bremhash(bp); 312 binshash(bp, dp); 313 bp->b_dev = dev; 314 bp->b_blkno = blkno; 315 brealloc(bp, size); 316 return(bp); 317 } 318 319 /* 320 * get an empty block, 321 * not assigned to any particular device 322 */ 323 struct buf * 324 geteblk(size) 325 int size; 326 { 327 register struct buf *bp, *dp; 328 int s; 329 330 loop: 331 s = spl6(); 332 for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--) 333 if (dp->av_forw != dp) 334 break; 335 if (dp == bfreelist) { /* no free blocks */ 336 dp->b_flags |= B_WANTED; 337 sleep((caddr_t)dp, PRIBIO+1); 338 goto loop; 339 } 340 splx(s); 341 bp = dp->av_forw; 342 notavail(bp); 343 if (bp->b_flags & B_DELWRI) { 344 bp->b_flags |= B_ASYNC; 345 bwrite(bp); 346 goto loop; 347 } 348 trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 349 bp->b_flags = B_BUSY|B_INVAL; 350 bfree(bp); 351 bremhash(bp); 352 binshash(bp, dp); 353 bp->b_dev = (dev_t)NODEV; 354 brealloc(bp, size); 355 return(bp); 356 } 357 358 /* 359 * Allocate space associated with a buffer. 360 */ 361 brealloc(bp, size) 362 register struct buf *bp; 363 int size; 364 { 365 daddr_t start, last; 366 register struct buf *ep; 367 struct buf *dp; 368 int s; 369 370 /* 371 * First need to make sure that all overlaping previous I/O 372 * is dispatched with. 373 */ 374 if (size == bp->b_bcount) 375 return; 376 if (size < bp->b_bcount || bp->b_dev == NODEV) 377 goto allocit; 378 379 start = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); 380 last = bp->b_blkno + (size / DEV_BSIZE) - 1; 381 if (bp->b_bcount == 0) { 382 start++; 383 if (start == last) 384 goto allocit; 385 } 386 dp = BUFHASH(bp->b_dev, bp->b_blkno); 387 loop: 388 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 389 if (ep->b_blkno < start || ep->b_blkno > last || 390 ep->b_dev != bp->b_dev || ep->b_flags&B_INVAL) 391 continue; 392 s = spl6(); 393 if (ep->b_flags&B_BUSY) { 394 ep->b_flags |= B_WANTED; 395 sleep((caddr_t)ep, PRIBIO+1); 396 (void) splx(s); 397 goto loop; 398 } 399 (void) splx(s); 400 /* 401 * What we would really like to do is kill this 402 * I/O since it is now useless. We cannot do that 403 * so we force it to complete, so that it cannot 404 * over-write our useful data later. 405 */ 406 if (ep->b_flags & B_DELWRI) { 407 notavail(ep); 408 ep->b_flags |= B_ASYNC; 409 bwrite(ep); 410 goto loop; 411 } 412 } 413 allocit: 414 /* 415 * Here the buffer is already available, so all we 416 * need to do is set the size. Someday a better memory 417 * management scheme will be implemented. 418 */ 419 bp->b_bcount = size; 420 } 421 422 /* 423 * Release space associated with a buffer. 424 */ 425 bfree(bp) 426 struct buf *bp; 427 { 428 /* 429 * Here the buffer does not change, so all we 430 * need to do is set the size. Someday a better memory 431 * management scheme will be implemented. 432 */ 433 bp->b_bcount = 0; 434 } 435 436 /* 437 * Wait for I/O completion on the buffer; return errors 438 * to the user. 439 */ 440 biowait(bp) 441 register struct buf *bp; 442 { 443 int s; 444 445 s = spl6(); 446 while ((bp->b_flags&B_DONE)==0) 447 sleep((caddr_t)bp, PRIBIO); 448 splx(s); 449 geterror(bp); 450 } 451 452 /* 453 * Mark I/O complete on a buffer. If the header 454 * indicates a dirty page push completion, the 455 * header is inserted into the ``cleaned'' list 456 * to be processed by the pageout daemon. Otherwise 457 * release it if I/O is asynchronous, and wake 458 * up anyone waiting for it. 459 */ 460 biodone(bp) 461 register struct buf *bp; 462 { 463 register int s; 464 465 if (bp->b_flags & B_DONE) 466 panic("dup biodone"); 467 bp->b_flags |= B_DONE; 468 if (bp->b_flags & B_DIRTY) { 469 if (bp->b_flags & B_ERROR) 470 panic("IO err in push"); 471 s = spl6(); 472 bp->av_forw = bclnlist; 473 bp->b_bcount = swsize[bp - swbuf]; 474 bp->b_pfcent = swpf[bp - swbuf]; 475 cnt.v_pgout++; 476 cnt.v_pgpgout += bp->b_bcount / NBPG; 477 bclnlist = bp; 478 if (bswlist.b_flags & B_WANTED) 479 wakeup((caddr_t)&proc[2]); 480 splx(s); 481 return; 482 } 483 if (bp->b_flags&B_ASYNC) 484 brelse(bp); 485 else { 486 bp->b_flags &= ~B_WANTED; 487 wakeup((caddr_t)bp); 488 } 489 } 490 491 /* 492 * make sure all write-behind blocks 493 * on dev (or NODEV for all) 494 * are flushed out. 495 * (from umount and update) 496 * (and temporarily pagein) 497 */ 498 bflush(dev) 499 dev_t dev; 500 { 501 register struct buf *bp; 502 register struct buf *flist; 503 int s; 504 505 loop: 506 s = spl6(); 507 for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++) 508 for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 509 if ((bp->b_flags & B_DELWRI) == 0) 510 continue; 511 if (dev == NODEV || dev == bp->b_dev) { 512 bp->b_flags |= B_ASYNC; 513 notavail(bp); 514 bwrite(bp); 515 goto loop; 516 } 517 } 518 splx(s); 519 } 520 521 /* 522 * Pick up the device's error number and pass it to the user; 523 * if there is an error but the number is 0 set a generalized 524 * code. Actually the latter is always true because devices 525 * don't yet return specific errors. 526 */ 527 geterror(bp) 528 register struct buf *bp; 529 { 530 531 if (bp->b_flags&B_ERROR) 532 if ((u.u_error = bp->b_error)==0) 533 u.u_error = EIO; 534 } 535 536 /* 537 * Invalidate in core blocks belonging to closed or umounted filesystem 538 * 539 * This is not nicely done at all - the buffer ought to be removed from the 540 * hash chains & have its dev/blkno fields clobbered, but unfortunately we 541 * can't do that here, as it is quite possible that the block is still 542 * being used for i/o. Eventually, all disc drivers should be forced to 543 * have a close routine, which ought ensure that the queue is empty, then 544 * properly flush the queues. Until that happy day, this suffices for 545 * correctness. ... kre 546 */ 547 binval(dev) 548 dev_t dev; 549 { 550 register struct buf *bp; 551 register struct bufhd *hp; 552 #define dp ((struct buf *)hp) 553 554 for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 555 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 556 if (bp->b_dev == dev) 557 bp->b_flags |= B_INVAL; 558 } 559