1 /* vfs_cluster.c 4.31 82/05/31 */ 2 3 #include "../h/param.h" 4 #include "../h/systm.h" 5 #include "../h/dir.h" 6 #include "../h/user.h" 7 #include "../h/buf.h" 8 #include "../h/conf.h" 9 #include "../h/proc.h" 10 #include "../h/seg.h" 11 #include "../h/pte.h" 12 #include "../h/vm.h" 13 #include "../h/trace.h" 14 15 /* 16 * Read in (if necessary) the block and return a buffer pointer. 17 */ 18 struct buf * 19 bread(dev, blkno, size) 20 dev_t dev; 21 daddr_t blkno; 22 int size; 23 { 24 register struct buf *bp; 25 26 bp = getblk(dev, blkno, size); 27 if (bp->b_flags&B_DONE) { 28 trace(TR_BREADHIT, dev, blkno); 29 return(bp); 30 } 31 bp->b_flags |= B_READ; 32 (*bdevsw[major(dev)].d_strategy)(bp); 33 trace(TR_BREADMISS, dev, blkno); 34 u.u_vm.vm_inblk++; /* pay for read */ 35 biowait(bp); 36 return(bp); 37 } 38 39 /* 40 * Read in the block, like bread, but also start I/O on the 41 * read-ahead block (which is not allocated to the caller) 42 */ 43 struct buf * 44 breada(dev, blkno, rablkno, size) 45 dev_t dev; 46 daddr_t blkno, rablkno; 47 int size; 48 { 49 register struct buf *bp, *rabp; 50 51 bp = NULL; 52 /* 53 * If the block isn't in core, then allocate 54 * a buffer and initiate i/o (getblk checks 55 * for a cache hit). 56 */ 57 if (!incore(dev, blkno)) { 58 bp = getblk(dev, blkno, size); 59 if ((bp->b_flags&B_DONE) == 0) { 60 bp->b_flags |= B_READ; 61 (*bdevsw[major(dev)].d_strategy)(bp); 62 trace(TR_BREADMISS, dev, blkno); 63 u.u_vm.vm_inblk++; /* pay for read */ 64 } else 65 trace(TR_BREADHIT, dev, blkno); 66 } 67 68 /* 69 * If there's a read-ahead block, start i/o 70 * on it also (as above). 71 */ 72 if (rablkno && !incore(dev, rablkno)) { 73 rabp = getblk(dev, rablkno, size); 74 if (rabp->b_flags & B_DONE) { 75 brelse(rabp); 76 trace(TR_BREADHITRA, dev, blkno); 77 } else { 78 rabp->b_flags |= B_READ|B_ASYNC; 79 (*bdevsw[major(dev)].d_strategy)(rabp); 80 trace(TR_BREADMISSRA, dev, rablock); 81 u.u_vm.vm_inblk++; /* pay in advance */ 82 } 83 } 84 85 /* 86 * If we get here with bp NULL, then the block 87 * must've been in core and bread will find it for us. 88 */ 89 if(bp == NULL) 90 return(bread(dev, blkno, size)); 91 biowait(bp); 92 return(bp); 93 } 94 95 /* 96 * Write the buffer, waiting for completion. 97 * Then release the buffer. 98 */ 99 bwrite(bp) 100 register struct buf *bp; 101 { 102 register flag; 103 104 flag = bp->b_flags; 105 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 106 if ((flag&B_DELWRI) == 0) 107 u.u_vm.vm_oublk++; /* noone paid yet */ 108 trace(TR_BWRITE, bp->b_dev, bp->b_blkno); 109 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 110 111 /* 112 * If the write was synchronous, then await i/o completion. 113 * If the write was "delayed", then we put the buffer on 114 * the q of blocks awaiting i/o completion status. 115 * Otherwise, the i/o must be finished and we check for 116 * an error. 117 */ 118 if ((flag&B_ASYNC) == 0) { 119 biowait(bp); 120 brelse(bp); 121 } else if (flag & B_DELWRI) 122 bp->b_flags |= B_AGE; 123 else 124 geterror(bp); 125 } 126 127 /* 128 * Release the buffer, marking it so that if it is grabbed 129 * for another purpose it will be written out before being 130 * given up (e.g. when writing a partial block where it is 131 * assumed that another write for the same block will soon follow). 132 * This can't be done for magtape, since writes must be done 133 * in the same order as requested. 134 */ 135 bdwrite(bp) 136 register struct buf *bp; 137 { 138 register int flags; 139 140 if ((bp->b_flags&B_DELWRI) == 0) 141 u.u_vm.vm_oublk++; /* noone paid yet */ 142 flags = bdevsw[major(bp->b_dev)].d_flags; 143 if(flags & B_TAPE) 144 bawrite(bp); 145 else { 146 bp->b_flags |= B_DELWRI | B_DONE; 147 brelse(bp); 148 } 149 } 150 151 /* 152 * Release the buffer, start I/O on it, but don't wait for completion. 153 */ 154 bawrite(bp) 155 register struct buf *bp; 156 { 157 158 bp->b_flags |= B_ASYNC; 159 bwrite(bp); 160 } 161 162 /* 163 * Release the buffer, with no I/O implied. 164 */ 165 brelse(bp) 166 register struct buf *bp; 167 { 168 register struct buf *flist; 169 register s; 170 171 /* 172 * If someone's waiting for the buffer, or 173 * is waiting for a buffer wake 'em up. 174 */ 175 if (bp->b_flags&B_WANTED) 176 wakeup((caddr_t)bp); 177 if (bfreelist[0].b_flags&B_WANTED) { 178 bfreelist[0].b_flags &= ~B_WANTED; 179 wakeup((caddr_t)bfreelist); 180 } 181 if (bp->b_flags&B_ERROR) 182 if (bp->b_flags & B_LOCKED) 183 bp->b_flags &= ~B_ERROR; /* try again later */ 184 else 185 bp->b_dev = NODEV; /* no assoc */ 186 187 /* 188 * Stick the buffer back on a free list. 189 */ 190 s = spl6(); 191 if (bp->b_flags & (B_ERROR|B_INVAL)) { 192 /* block has no info ... put at front of most free list */ 193 flist = &bfreelist[BQUEUES-1]; 194 binsheadfree(bp, flist); 195 } else { 196 if (bp->b_flags & B_LOCKED) 197 flist = &bfreelist[BQ_LOCKED]; 198 else if (bp->b_flags & B_AGE) 199 flist = &bfreelist[BQ_AGE]; 200 else 201 flist = &bfreelist[BQ_LRU]; 202 binstailfree(bp, flist); 203 } 204 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 205 splx(s); 206 } 207 208 /* 209 * See if the block is associated with some buffer 210 * (mainly to avoid getting hung up on a wait in breada) 211 */ 212 incore(dev, blkno) 213 dev_t dev; 214 daddr_t blkno; 215 { 216 register struct buf *bp; 217 register struct buf *dp; 218 219 dp = BUFHASH(dev, blkno); 220 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 221 if (bp->b_blkno == blkno && bp->b_dev == dev && 222 (bp->b_flags & B_INVAL) == 0) 223 return (1); 224 return (0); 225 } 226 227 struct buf * 228 baddr(dev, blkno, size) 229 dev_t dev; 230 daddr_t blkno; 231 int size; 232 { 233 234 if (incore(dev, blkno)) 235 return (bread(dev, blkno, size)); 236 return (0); 237 } 238 239 /* 240 * Assign a buffer for the given block. If the appropriate 241 * block is already associated, return it; otherwise search 242 * for the oldest non-busy buffer and reassign it. 243 * 244 * We use splx here because this routine may be called 245 * on the interrupt stack during a dump, and we don't 246 * want to lower the ipl back to 0. 247 */ 248 struct buf * 249 getblk(dev, blkno, size) 250 dev_t dev; 251 daddr_t blkno; 252 int size; 253 { 254 register struct buf *bp, *dp, *ep; 255 int s; 256 257 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) 258 blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 259 /* 260 * Search the cache for the block. If we hit, but 261 * the buffer is in use for i/o, then we wait until 262 * the i/o has completed. 263 */ 264 dp = BUFHASH(dev, blkno); 265 loop: 266 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 267 if (bp->b_blkno != blkno || bp->b_dev != dev || 268 bp->b_flags&B_INVAL) 269 continue; 270 s = spl6(); 271 if (bp->b_flags&B_BUSY) { 272 bp->b_flags |= B_WANTED; 273 sleep((caddr_t)bp, PRIBIO+1); 274 splx(s); 275 goto loop; 276 } 277 splx(s); 278 notavail(bp); 279 brealloc(bp, size); 280 bp->b_flags |= B_CACHE; 281 return(bp); 282 } 283 if (major(dev) >= nblkdev) 284 panic("blkdev"); 285 /* 286 * Not found in the cache, select something from 287 * a free list. Preference is to LRU list, then AGE list. 288 */ 289 s = spl6(); 290 for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--) 291 if (ep->av_forw != ep) 292 break; 293 if (ep == bfreelist) { /* no free blocks at all */ 294 ep->b_flags |= B_WANTED; 295 sleep((caddr_t)ep, PRIBIO+1); 296 splx(s); 297 goto loop; 298 } 299 splx(s); 300 bp = ep->av_forw; 301 notavail(bp); 302 if (bp->b_flags & B_DELWRI) { 303 bp->b_flags |= B_ASYNC; 304 bwrite(bp); 305 goto loop; 306 } 307 trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 308 bp->b_flags = B_BUSY; 309 bfree(bp); 310 bremhash(bp); 311 binshash(bp, dp); 312 bp->b_dev = dev; 313 bp->b_blkno = blkno; 314 brealloc(bp, size); 315 return(bp); 316 } 317 318 /* 319 * get an empty block, 320 * not assigned to any particular device 321 */ 322 struct buf * 323 geteblk(size) 324 int size; 325 { 326 register struct buf *bp, *dp; 327 int s; 328 329 loop: 330 s = spl6(); 331 for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--) 332 if (dp->av_forw != dp) 333 break; 334 if (dp == bfreelist) { /* no free blocks */ 335 dp->b_flags |= B_WANTED; 336 sleep((caddr_t)dp, PRIBIO+1); 337 goto loop; 338 } 339 splx(s); 340 bp = dp->av_forw; 341 notavail(bp); 342 if (bp->b_flags & B_DELWRI) { 343 bp->b_flags |= B_ASYNC; 344 bwrite(bp); 345 goto loop; 346 } 347 trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 348 bp->b_flags = B_BUSY|B_INVAL; 349 bfree(bp); 350 bremhash(bp); 351 binshash(bp, dp); 352 bp->b_dev = (dev_t)NODEV; 353 bp->b_bcount = size; 354 return(bp); 355 } 356 357 /* 358 * Allocate space associated with a buffer. 359 */ 360 brealloc(bp, size) 361 register struct buf *bp; 362 int size; 363 { 364 daddr_t start, last; 365 register struct buf *ep; 366 struct buf *dp; 367 int s; 368 369 /* 370 * First need to make sure that all overlaping previous I/O 371 * is dispatched with. 372 */ 373 if (size == bp->b_bcount) 374 return; 375 if (size < bp->b_bcount) { 376 bp->b_bcount = size; 377 return; 378 } 379 start = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); 380 last = bp->b_blkno + (size / DEV_BSIZE) - 1; 381 if (bp->b_bcount == 0) { 382 start++; 383 if (start == last) 384 goto allocit; 385 } 386 dp = BUFHASH(bp->b_dev, bp->b_blkno); 387 loop: 388 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 389 if (ep->b_blkno < start || ep->b_blkno > last || 390 ep->b_dev != bp->b_dev || ep->b_flags&B_INVAL) 391 continue; 392 s = spl6(); 393 if (ep->b_flags&B_BUSY) { 394 ep->b_flags |= B_WANTED; 395 sleep((caddr_t)ep, PRIBIO+1); 396 (void) splx(s); 397 goto loop; 398 } 399 (void) splx(s); 400 /* 401 * What we would really like to do is kill this 402 * I/O since it is now useless. We cannot do that 403 * so we force it to complete, so that it cannot 404 * over-write our useful data later. 405 */ 406 if (ep->b_flags & B_DELWRI) { 407 notavail(ep); 408 ep->b_flags |= B_ASYNC; 409 bwrite(ep); 410 goto loop; 411 } 412 } 413 allocit: 414 /* 415 * Here the buffer is already available, so all we 416 * need to do is set the size. Someday a better memory 417 * management scheme will be implemented. 418 */ 419 bp->b_bcount = size; 420 } 421 422 /* 423 * Release space associated with a buffer. 424 */ 425 bfree(bp) 426 struct buf *bp; 427 { 428 /* 429 * Here the buffer does not change, so all we 430 * need to do is set the size. Someday a better memory 431 * management scheme will be implemented. 432 */ 433 bp->b_bcount = 0; 434 } 435 436 /* 437 * Wait for I/O completion on the buffer; return errors 438 * to the user. 439 */ 440 biowait(bp) 441 register struct buf *bp; 442 { 443 int s; 444 445 s = spl6(); 446 while ((bp->b_flags&B_DONE)==0) 447 sleep((caddr_t)bp, PRIBIO); 448 splx(s); 449 geterror(bp); 450 } 451 452 /* 453 * Mark I/O complete on a buffer. If the header 454 * indicates a dirty page push completion, the 455 * header is inserted into the ``cleaned'' list 456 * to be processed by the pageout daemon. Otherwise 457 * release it if I/O is asynchronous, and wake 458 * up anyone waiting for it. 459 */ 460 biodone(bp) 461 register struct buf *bp; 462 { 463 register int s; 464 465 if (bp->b_flags & B_DONE) 466 panic("dup biodone"); 467 bp->b_flags |= B_DONE; 468 if (bp->b_flags & B_DIRTY) { 469 if (bp->b_flags & B_ERROR) 470 panic("IO err in push"); 471 s = spl6(); 472 bp->av_forw = bclnlist; 473 bp->b_bcount = swsize[bp - swbuf]; 474 bp->b_pfcent = swpf[bp - swbuf]; 475 cnt.v_pgout++; 476 cnt.v_pgpgout += bp->b_bcount / NBPG; 477 bclnlist = bp; 478 if (bswlist.b_flags & B_WANTED) 479 wakeup((caddr_t)&proc[2]); 480 splx(s); 481 return; 482 } 483 if (bp->b_flags&B_ASYNC) 484 brelse(bp); 485 else { 486 bp->b_flags &= ~B_WANTED; 487 wakeup((caddr_t)bp); 488 } 489 } 490 491 /* 492 * make sure all write-behind blocks 493 * on dev (or NODEV for all) 494 * are flushed out. 495 * (from umount and update) 496 * (and temporarily pagein) 497 */ 498 bflush(dev) 499 dev_t dev; 500 { 501 register struct buf *bp; 502 register struct buf *flist; 503 int s; 504 505 loop: 506 s = spl6(); 507 for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++) 508 for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 509 if ((bp->b_flags & B_DELWRI) == 0) 510 continue; 511 if (dev == NODEV || dev == bp->b_dev) { 512 bp->b_flags |= B_ASYNC; 513 notavail(bp); 514 bwrite(bp); 515 goto loop; 516 } 517 } 518 splx(s); 519 } 520 521 /* 522 * Pick up the device's error number and pass it to the user; 523 * if there is an error but the number is 0 set a generalized 524 * code. Actually the latter is always true because devices 525 * don't yet return specific errors. 526 */ 527 geterror(bp) 528 register struct buf *bp; 529 { 530 531 if (bp->b_flags&B_ERROR) 532 if ((u.u_error = bp->b_error)==0) 533 u.u_error = EIO; 534 } 535 536 /* 537 * Invalidate in core blocks belonging to closed or umounted filesystem 538 * 539 * This is not nicely done at all - the buffer ought to be removed from the 540 * hash chains & have its dev/blkno fields clobbered, but unfortunately we 541 * can't do that here, as it is quite possible that the block is still 542 * being used for i/o. Eventually, all disc drivers should be forced to 543 * have a close routine, which ought ensure that the queue is empty, then 544 * properly flush the queues. Until that happy day, this suffices for 545 * correctness. ... kre 546 */ 547 binval(dev) 548 dev_t dev; 549 { 550 register struct buf *bp; 551 register struct bufhd *hp; 552 #define dp ((struct buf *)hp) 553 554 for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 555 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 556 if (bp->b_dev == dev) 557 bp->b_flags |= B_INVAL; 558 } 559