1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)vfs_bio.c 7.2 (Berkeley) 04/02/87 7 */ 8 9 #include "../machine/pte.h" 10 11 #include "param.h" 12 #include "systm.h" 13 #include "dir.h" 14 #include "user.h" 15 #include "buf.h" 16 #include "conf.h" 17 #include "proc.h" 18 #include "seg.h" 19 #include "vm.h" 20 #include "trace.h" 21 22 /* 23 * Read in (if necessary) the block and return a buffer pointer. 24 */ 25 struct buf * 26 bread(dev, blkno, size) 27 dev_t dev; 28 daddr_t blkno; 29 int size; 30 { 31 register struct buf *bp; 32 33 if (size == 0) 34 panic("bread: size 0"); 35 bp = getblk(dev, blkno, size); 36 if (bp->b_flags&B_DONE) { 37 trace(TR_BREADHIT, pack(dev, size), blkno); 38 return (bp); 39 } 40 bp->b_flags |= B_READ; 41 if (bp->b_bcount > bp->b_bufsize) 42 panic("bread"); 43 (*bdevsw[major(dev)].d_strategy)(bp); 44 trace(TR_BREADMISS, pack(dev, size), blkno); 45 u.u_ru.ru_inblock++; /* pay for read */ 46 biowait(bp); 47 return (bp); 48 } 49 50 /* 51 * Read in the block, like bread, but also start I/O on the 52 * read-ahead block (which is not allocated to the caller) 53 */ 54 struct buf * 55 breada(dev, blkno, size, rablkno, rabsize) 56 dev_t dev; 57 daddr_t blkno; int size; 58 daddr_t rablkno; int rabsize; 59 { 60 register struct buf *bp, *rabp; 61 62 bp = NULL; 63 /* 64 * If the block isn't in core, then allocate 65 * a buffer and initiate i/o (getblk checks 66 * for a cache hit). 67 */ 68 if (!incore(dev, blkno)) { 69 bp = getblk(dev, blkno, size); 70 if ((bp->b_flags&B_DONE) == 0) { 71 bp->b_flags |= B_READ; 72 if (bp->b_bcount > bp->b_bufsize) 73 panic("breada"); 74 (*bdevsw[major(dev)].d_strategy)(bp); 75 trace(TR_BREADMISS, pack(dev, size), blkno); 76 u.u_ru.ru_inblock++; /* pay for read */ 77 } else 78 trace(TR_BREADHIT, pack(dev, size), blkno); 79 } 80 81 /* 82 * If there's a read-ahead block, start i/o 83 * on it also (as above). 84 */ 85 if (rablkno && !incore(dev, rablkno)) { 86 rabp = getblk(dev, rablkno, rabsize); 87 if (rabp->b_flags & B_DONE) { 88 brelse(rabp); 89 trace(TR_BREADHITRA, pack(dev, rabsize), blkno); 90 } else { 91 rabp->b_flags |= B_READ|B_ASYNC; 92 if (rabp->b_bcount > rabp->b_bufsize) 93 panic("breadrabp"); 94 (*bdevsw[major(dev)].d_strategy)(rabp); 95 trace(TR_BREADMISSRA, pack(dev, rabsize), rablock); 96 u.u_ru.ru_inblock++; /* pay in advance */ 97 } 98 } 99 100 /* 101 * If block was in core, let bread get it. 102 * If block wasn't in core, then the read was started 103 * above, and just wait for it. 104 */ 105 if (bp == NULL) 106 return (bread(dev, blkno, size)); 107 biowait(bp); 108 return (bp); 109 } 110 111 /* 112 * Write the buffer, waiting for completion. 113 * Then release the buffer. 114 */ 115 bwrite(bp) 116 register struct buf *bp; 117 { 118 register flag; 119 120 flag = bp->b_flags; 121 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 122 if ((flag&B_DELWRI) == 0) 123 u.u_ru.ru_oublock++; /* noone paid yet */ 124 trace(TR_BWRITE, pack(bp->b_dev, bp->b_bcount), bp->b_blkno); 125 if (bp->b_bcount > bp->b_bufsize) 126 panic("bwrite"); 127 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 128 129 /* 130 * If the write was synchronous, then await i/o completion. 131 * If the write was "delayed", then we put the buffer on 132 * the q of blocks awaiting i/o completion status. 133 */ 134 if ((flag&B_ASYNC) == 0) { 135 biowait(bp); 136 brelse(bp); 137 } else if (flag & B_DELWRI) 138 bp->b_flags |= B_AGE; 139 } 140 141 /* 142 * Release the buffer, marking it so that if it is grabbed 143 * for another purpose it will be written out before being 144 * given up (e.g. when writing a partial block where it is 145 * assumed that another write for the same block will soon follow). 146 * This can't be done for magtape, since writes must be done 147 * in the same order as requested. 148 */ 149 bdwrite(bp) 150 register struct buf *bp; 151 { 152 153 if ((bp->b_flags&B_DELWRI) == 0) 154 u.u_ru.ru_oublock++; /* noone paid yet */ 155 if (bdevsw[major(bp->b_dev)].d_flags & B_TAPE) 156 bawrite(bp); 157 else { 158 bp->b_flags |= B_DELWRI | B_DONE; 159 brelse(bp); 160 } 161 } 162 163 /* 164 * Release the buffer, start I/O on it, but don't wait for completion. 165 */ 166 bawrite(bp) 167 register struct buf *bp; 168 { 169 170 bp->b_flags |= B_ASYNC; 171 bwrite(bp); 172 } 173 174 /* 175 * Release the buffer, with no I/O implied. 176 */ 177 brelse(bp) 178 register struct buf *bp; 179 { 180 register struct buf *flist; 181 register s; 182 183 trace(TR_BRELSE, pack(bp->b_dev, bp->b_bufsize), bp->b_blkno); 184 /* 185 * If someone's waiting for the buffer, or 186 * is waiting for a buffer wake 'em up. 187 */ 188 if (bp->b_flags&B_WANTED) 189 wakeup((caddr_t)bp); 190 if (bfreelist[0].b_flags&B_WANTED) { 191 bfreelist[0].b_flags &= ~B_WANTED; 192 wakeup((caddr_t)bfreelist); 193 } 194 if (bp->b_flags&B_ERROR) 195 if (bp->b_flags & B_LOCKED) 196 bp->b_flags &= ~B_ERROR; /* try again later */ 197 else 198 bp->b_dev = NODEV; /* no assoc */ 199 200 /* 201 * Stick the buffer back on a free list. 202 */ 203 s = splbio(); 204 if (bp->b_bufsize <= 0) { 205 /* block has no buffer ... put at front of unused buffer list */ 206 flist = &bfreelist[BQ_EMPTY]; 207 binsheadfree(bp, flist); 208 } else if (bp->b_flags & (B_ERROR|B_INVAL)) { 209 /* block has no info ... put at front of most free list */ 210 flist = &bfreelist[BQ_AGE]; 211 binsheadfree(bp, flist); 212 } else { 213 if (bp->b_flags & B_LOCKED) 214 flist = &bfreelist[BQ_LOCKED]; 215 else if (bp->b_flags & B_AGE) 216 flist = &bfreelist[BQ_AGE]; 217 else 218 flist = &bfreelist[BQ_LRU]; 219 binstailfree(bp, flist); 220 } 221 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 222 splx(s); 223 } 224 225 /* 226 * See if the block is associated with some buffer 227 * (mainly to avoid getting hung up on a wait in breada) 228 */ 229 incore(dev, blkno) 230 dev_t dev; 231 daddr_t blkno; 232 { 233 register struct buf *bp; 234 register struct buf *dp; 235 236 dp = BUFHASH(dev, blkno); 237 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 238 if (bp->b_blkno == blkno && bp->b_dev == dev && 239 (bp->b_flags & B_INVAL) == 0) 240 return (1); 241 return (0); 242 } 243 244 struct buf * 245 baddr(dev, blkno, size) 246 dev_t dev; 247 daddr_t blkno; 248 int size; 249 { 250 251 if (incore(dev, blkno)) 252 return (bread(dev, blkno, size)); 253 return (0); 254 } 255 256 /* 257 * Assign a buffer for the given block. If the appropriate 258 * block is already associated, return it; otherwise search 259 * for the oldest non-busy buffer and reassign it. 260 * 261 * We use splx here because this routine may be called 262 * on the interrupt stack during a dump, and we don't 263 * want to lower the ipl back to 0. 264 */ 265 struct buf * 266 getblk(dev, blkno, size) 267 dev_t dev; 268 daddr_t blkno; 269 int size; 270 { 271 register struct buf *bp, *dp; 272 int s; 273 274 if (size > MAXBSIZE) 275 panic("getblk: size too big"); 276 /* 277 * To prevent overflow of 32-bit ints when converting block 278 * numbers to byte offsets, blknos > 2^32 / DEV_BSIZE are set 279 * to the maximum number that can be converted to a byte offset 280 * without overflow. This is historic code; what bug it fixed, 281 * or whether it is still a reasonable thing to do is open to 282 * dispute. mkm 9/85 283 */ 284 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-DEV_BSHIFT)) 285 blkno = 1 << ((sizeof(int)*NBBY-DEV_BSHIFT) + 1); 286 /* 287 * Search the cache for the block. If we hit, but 288 * the buffer is in use for i/o, then we wait until 289 * the i/o has completed. 290 */ 291 dp = BUFHASH(dev, blkno); 292 loop: 293 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 294 if (bp->b_blkno != blkno || bp->b_dev != dev || 295 bp->b_flags&B_INVAL) 296 continue; 297 s = splbio(); 298 if (bp->b_flags&B_BUSY) { 299 bp->b_flags |= B_WANTED; 300 sleep((caddr_t)bp, PRIBIO+1); 301 splx(s); 302 goto loop; 303 } 304 splx(s); 305 notavail(bp); 306 if (bp->b_bcount != size && brealloc(bp, size) == 0) 307 goto loop; 308 bp->b_flags |= B_CACHE; 309 return (bp); 310 } 311 if (major(dev) >= nblkdev) 312 panic("blkdev"); 313 bp = getnewbuf(); 314 bfree(bp); 315 bremhash(bp); 316 binshash(bp, dp); 317 bp->b_dev = dev; 318 bp->b_blkno = blkno; 319 bp->b_error = 0; 320 if (brealloc(bp, size) == 0) 321 goto loop; 322 return (bp); 323 } 324 325 /* 326 * get an empty block, 327 * not assigned to any particular device 328 */ 329 struct buf * 330 geteblk(size) 331 int size; 332 { 333 register struct buf *bp, *flist; 334 335 if (size > MAXBSIZE) 336 panic("geteblk: size too big"); 337 loop: 338 bp = getnewbuf(); 339 bp->b_flags |= B_INVAL; 340 bfree(bp); 341 bremhash(bp); 342 flist = &bfreelist[BQ_AGE]; 343 binshash(bp, flist); 344 bp->b_dev = (dev_t)NODEV; 345 bp->b_error = 0; 346 if (brealloc(bp, size) == 0) 347 goto loop; 348 return (bp); 349 } 350 351 /* 352 * Allocate space associated with a buffer. 353 * If can't get space, buffer is released 354 */ 355 brealloc(bp, size) 356 register struct buf *bp; 357 int size; 358 { 359 daddr_t start, last; 360 register struct buf *ep; 361 struct buf *dp; 362 int s; 363 364 /* 365 * First need to make sure that all overlapping previous I/O 366 * is dispatched with. 367 */ 368 if (size == bp->b_bcount) 369 return (1); 370 if (size < bp->b_bcount) { 371 if (bp->b_flags & B_DELWRI) { 372 bwrite(bp); 373 return (0); 374 } 375 if (bp->b_flags & B_LOCKED) 376 panic("brealloc"); 377 return (allocbuf(bp, size)); 378 } 379 bp->b_flags &= ~B_DONE; 380 if (bp->b_dev == NODEV) 381 return (allocbuf(bp, size)); 382 383 trace(TR_BREALLOC, pack(bp->b_dev, size), bp->b_blkno); 384 /* 385 * Search cache for any buffers that overlap the one that we 386 * are trying to allocate. Overlapping buffers must be marked 387 * invalid, after being written out if they are dirty. (indicated 388 * by B_DELWRI) A disk block must be mapped by at most one buffer 389 * at any point in time. Care must be taken to avoid deadlocking 390 * when two buffer are trying to get the same set of disk blocks. 391 */ 392 start = bp->b_blkno; 393 last = start + btodb(size) - 1; 394 dp = BUFHASH(bp->b_dev, bp->b_blkno); 395 loop: 396 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 397 if (ep == bp || ep->b_dev != bp->b_dev || (ep->b_flags&B_INVAL)) 398 continue; 399 /* look for overlap */ 400 if (ep->b_bcount == 0 || ep->b_blkno > last || 401 ep->b_blkno + btodb(ep->b_bcount) <= start) 402 continue; 403 s = splbio(); 404 if (ep->b_flags&B_BUSY) { 405 ep->b_flags |= B_WANTED; 406 sleep((caddr_t)ep, PRIBIO+1); 407 splx(s); 408 goto loop; 409 } 410 splx(s); 411 notavail(ep); 412 if (ep->b_flags & B_DELWRI) { 413 bwrite(ep); 414 goto loop; 415 } 416 ep->b_flags |= B_INVAL; 417 brelse(ep); 418 } 419 return (allocbuf(bp, size)); 420 } 421 422 /* 423 * Find a buffer which is available for use. 424 * Select something from a free list. 425 * Preference is to AGE list, then LRU list. 426 */ 427 struct buf * 428 getnewbuf() 429 { 430 register struct buf *bp, *dp; 431 int s; 432 433 loop: 434 s = splbio(); 435 for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) 436 if (dp->av_forw != dp) 437 break; 438 if (dp == bfreelist) { /* no free blocks */ 439 dp->b_flags |= B_WANTED; 440 sleep((caddr_t)dp, PRIBIO+1); 441 splx(s); 442 goto loop; 443 } 444 splx(s); 445 bp = dp->av_forw; 446 notavail(bp); 447 if (bp->b_flags & B_DELWRI) { 448 bp->b_flags |= B_ASYNC; 449 bwrite(bp); 450 goto loop; 451 } 452 trace(TR_BRELSE, pack(bp->b_dev, bp->b_bufsize), bp->b_blkno); 453 bp->b_flags = B_BUSY; 454 return (bp); 455 } 456 457 /* 458 * Wait for I/O completion on the buffer; return errors 459 * to the user. 460 */ 461 biowait(bp) 462 register struct buf *bp; 463 { 464 int s; 465 466 s = splbio(); 467 while ((bp->b_flags&B_DONE)==0) 468 sleep((caddr_t)bp, PRIBIO); 469 splx(s); 470 if (u.u_error == 0) /* XXX */ 471 u.u_error = geterror(bp); 472 } 473 474 /* 475 * Mark I/O complete on a buffer. 476 * If someone should be called, e.g. the pageout 477 * daemon, do so. Otherwise, wake up anyone 478 * waiting for it. 479 */ 480 biodone(bp) 481 register struct buf *bp; 482 { 483 484 if (bp->b_flags & B_DONE) 485 panic("dup biodone"); 486 bp->b_flags |= B_DONE; 487 if (bp->b_flags & B_CALL) { 488 bp->b_flags &= ~B_CALL; 489 (*bp->b_iodone)(bp); 490 return; 491 } 492 if (bp->b_flags&B_ASYNC) 493 brelse(bp); 494 else { 495 bp->b_flags &= ~B_WANTED; 496 wakeup((caddr_t)bp); 497 } 498 } 499 500 /* 501 * Insure that no part of a specified block is in an incore buffer. 502 #ifdef SECSIZE 503 * "size" is given in device blocks (the units of b_blkno). 504 #endif SECSIZE 505 */ 506 blkflush(dev, blkno, size) 507 dev_t dev; 508 daddr_t blkno; 509 long size; 510 { 511 register struct buf *ep; 512 struct buf *dp; 513 daddr_t start, last; 514 int s; 515 516 start = blkno; 517 last = start + btodb(size) - 1; 518 dp = BUFHASH(dev, blkno); 519 loop: 520 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 521 if (ep->b_dev != dev || (ep->b_flags&B_INVAL)) 522 continue; 523 /* look for overlap */ 524 if (ep->b_bcount == 0 || ep->b_blkno > last || 525 ep->b_blkno + btodb(ep->b_bcount) <= start) 526 continue; 527 s = splbio(); 528 if (ep->b_flags&B_BUSY) { 529 ep->b_flags |= B_WANTED; 530 sleep((caddr_t)ep, PRIBIO+1); 531 splx(s); 532 goto loop; 533 } 534 if (ep->b_flags & B_DELWRI) { 535 splx(s); 536 notavail(ep); 537 bwrite(ep); 538 goto loop; 539 } 540 splx(s); 541 } 542 } 543 544 /* 545 * Make sure all write-behind blocks 546 * on dev (or NODEV for all) 547 * are flushed out. 548 * (from umount and update) 549 */ 550 bflush(dev) 551 dev_t dev; 552 { 553 register struct buf *bp; 554 register struct buf *flist; 555 int s; 556 557 loop: 558 s = splbio(); 559 for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++) 560 for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 561 if ((bp->b_flags & B_DELWRI) == 0) 562 continue; 563 if (dev == NODEV || dev == bp->b_dev) { 564 bp->b_flags |= B_ASYNC; 565 notavail(bp); 566 bwrite(bp); 567 splx(s); 568 goto loop; 569 } 570 } 571 splx(s); 572 } 573 574 /* 575 * Pick up the device's error number and pass it to the user; 576 * if there is an error but the number is 0 set a generalized code. 577 */ 578 geterror(bp) 579 register struct buf *bp; 580 { 581 int error = 0; 582 583 if (bp->b_flags&B_ERROR) 584 if ((error = bp->b_error)==0) 585 return (EIO); 586 return (error); 587 } 588 589 /* 590 * Invalidate in core blocks belonging to closed or umounted filesystem 591 * 592 * This is not nicely done at all - the buffer ought to be removed from the 593 * hash chains & have its dev/blkno fields clobbered, but unfortunately we 594 * can't do that here, as it is quite possible that the block is still 595 * being used for i/o. Eventually, all disc drivers should be forced to 596 * have a close routine, which ought ensure that the queue is empty, then 597 * properly flush the queues. Until that happy day, this suffices for 598 * correctness. ... kre 599 */ 600 binval(dev) 601 dev_t dev; 602 { 603 register struct buf *bp; 604 register struct bufhd *hp; 605 #define dp ((struct buf *)hp) 606 607 for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 608 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 609 if (bp->b_dev == dev) 610 bp->b_flags |= B_INVAL; 611 } 612