1 /* 2 * Copyright (c) 1982 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)vfs_cluster.c 6.5 (Berkeley) 06/08/85 7 */ 8 9 #include "../machine/pte.h" 10 11 #include "param.h" 12 #include "systm.h" 13 #include "dir.h" 14 #include "user.h" 15 #include "buf.h" 16 #include "conf.h" 17 #include "proc.h" 18 #include "seg.h" 19 #include "vm.h" 20 #include "trace.h" 21 22 /* 23 * Read in (if necessary) the block and return a buffer pointer. 24 */ 25 struct buf * 26 bread(dev, blkno, size) 27 dev_t dev; 28 daddr_t blkno; 29 int size; 30 { 31 register struct buf *bp; 32 33 if (size == 0) 34 panic("bread: size 0"); 35 bp = getblk(dev, blkno, size); 36 if (bp->b_flags&B_DONE) { 37 trace(TR_BREADHIT, pack(dev, size), blkno); 38 return(bp); 39 } 40 bp->b_flags |= B_READ; 41 if (bp->b_bcount > bp->b_bufsize) 42 panic("bread"); 43 (*bdevsw[major(dev)].d_strategy)(bp); 44 trace(TR_BREADMISS, pack(dev, size), blkno); 45 u.u_ru.ru_inblock++; /* pay for read */ 46 biowait(bp); 47 return(bp); 48 } 49 50 /* 51 * Read in the block, like bread, but also start I/O on the 52 * read-ahead block (which is not allocated to the caller) 53 */ 54 struct buf * 55 breada(dev, blkno, size, rablkno, rabsize) 56 dev_t dev; 57 daddr_t blkno; int size; 58 daddr_t rablkno; int rabsize; 59 { 60 register struct buf *bp, *rabp; 61 62 bp = NULL; 63 /* 64 * If the block isn't in core, then allocate 65 * a buffer and initiate i/o (getblk checks 66 * for a cache hit). 67 */ 68 if (!incore(dev, blkno)) { 69 bp = getblk(dev, blkno, size); 70 if ((bp->b_flags&B_DONE) == 0) { 71 bp->b_flags |= B_READ; 72 if (bp->b_bcount > bp->b_bufsize) 73 panic("breada"); 74 (*bdevsw[major(dev)].d_strategy)(bp); 75 trace(TR_BREADMISS, pack(dev, size), blkno); 76 u.u_ru.ru_inblock++; /* pay for read */ 77 } else 78 trace(TR_BREADHIT, pack(dev, size), blkno); 79 } 80 81 /* 82 * If there's a read-ahead block, start i/o 83 * on it also (as above). 84 */ 85 if (rablkno && !incore(dev, rablkno)) { 86 rabp = getblk(dev, rablkno, rabsize); 87 if (rabp->b_flags & B_DONE) { 88 brelse(rabp); 89 trace(TR_BREADHITRA, pack(dev, rabsize), blkno); 90 } else { 91 rabp->b_flags |= B_READ|B_ASYNC; 92 if (rabp->b_bcount > rabp->b_bufsize) 93 panic("breadrabp"); 94 (*bdevsw[major(dev)].d_strategy)(rabp); 95 trace(TR_BREADMISSRA, pack(dev, rabsize), rablock); 96 u.u_ru.ru_inblock++; /* pay in advance */ 97 } 98 } 99 100 /* 101 * If block was in core, let bread get it. 102 * If block wasn't in core, then the read was started 103 * above, and just wait for it. 104 */ 105 if (bp == NULL) 106 return (bread(dev, blkno, size)); 107 biowait(bp); 108 return (bp); 109 } 110 111 /* 112 * Write the buffer, waiting for completion. 113 * Then release the buffer. 114 */ 115 bwrite(bp) 116 register struct buf *bp; 117 { 118 register flag; 119 120 flag = bp->b_flags; 121 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 122 if ((flag&B_DELWRI) == 0) 123 u.u_ru.ru_oublock++; /* noone paid yet */ 124 trace(TR_BWRITE, pack(bp->b_dev, bp->b_bcount), bp->b_blkno); 125 if (bp->b_bcount > bp->b_bufsize) 126 panic("bwrite"); 127 (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 128 129 /* 130 * If the write was synchronous, then await i/o completion. 131 * If the write was "delayed", then we put the buffer on 132 * the q of blocks awaiting i/o completion status. 133 */ 134 if ((flag&B_ASYNC) == 0) { 135 biowait(bp); 136 brelse(bp); 137 } else if (flag & B_DELWRI) 138 bp->b_flags |= B_AGE; 139 } 140 141 /* 142 * Release the buffer, marking it so that if it is grabbed 143 * for another purpose it will be written out before being 144 * given up (e.g. when writing a partial block where it is 145 * assumed that another write for the same block will soon follow). 146 * This can't be done for magtape, since writes must be done 147 * in the same order as requested. 148 */ 149 bdwrite(bp) 150 register struct buf *bp; 151 { 152 register int flags; 153 154 if ((bp->b_flags&B_DELWRI) == 0) 155 u.u_ru.ru_oublock++; /* noone paid yet */ 156 flags = bdevsw[major(bp->b_dev)].d_flags; 157 if(flags & B_TAPE) 158 bawrite(bp); 159 else { 160 bp->b_flags |= B_DELWRI | B_DONE; 161 brelse(bp); 162 } 163 } 164 165 /* 166 * Release the buffer, start I/O on it, but don't wait for completion. 167 */ 168 bawrite(bp) 169 register struct buf *bp; 170 { 171 172 bp->b_flags |= B_ASYNC; 173 bwrite(bp); 174 } 175 176 /* 177 * Release the buffer, with no I/O implied. 178 */ 179 brelse(bp) 180 register struct buf *bp; 181 { 182 register struct buf *flist; 183 register s; 184 185 trace(TR_BRELSE, pack(bp->b_dev, bp->b_bufsize), bp->b_blkno); 186 /* 187 * If someone's waiting for the buffer, or 188 * is waiting for a buffer wake 'em up. 189 */ 190 if (bp->b_flags&B_WANTED) 191 wakeup((caddr_t)bp); 192 if (bfreelist[0].b_flags&B_WANTED) { 193 bfreelist[0].b_flags &= ~B_WANTED; 194 wakeup((caddr_t)bfreelist); 195 } 196 if (bp->b_flags&B_ERROR) 197 if (bp->b_flags & B_LOCKED) 198 bp->b_flags &= ~B_ERROR; /* try again later */ 199 else 200 bp->b_dev = NODEV; /* no assoc */ 201 202 /* 203 * Stick the buffer back on a free list. 204 */ 205 s = spl6(); 206 if (bp->b_bufsize <= 0) { 207 /* block has no buffer ... put at front of unused buffer list */ 208 flist = &bfreelist[BQ_EMPTY]; 209 binsheadfree(bp, flist); 210 } else if (bp->b_flags & (B_ERROR|B_INVAL)) { 211 /* block has no info ... put at front of most free list */ 212 flist = &bfreelist[BQ_AGE]; 213 binsheadfree(bp, flist); 214 } else { 215 if (bp->b_flags & B_LOCKED) 216 flist = &bfreelist[BQ_LOCKED]; 217 else if (bp->b_flags & B_AGE) 218 flist = &bfreelist[BQ_AGE]; 219 else 220 flist = &bfreelist[BQ_LRU]; 221 binstailfree(bp, flist); 222 } 223 bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 224 splx(s); 225 } 226 227 /* 228 * See if the block is associated with some buffer 229 * (mainly to avoid getting hung up on a wait in breada) 230 */ 231 incore(dev, blkno) 232 dev_t dev; 233 daddr_t blkno; 234 { 235 register struct buf *bp; 236 register struct buf *dp; 237 238 dp = BUFHASH(dev, blkno); 239 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 240 if (bp->b_blkno == blkno && bp->b_dev == dev && 241 (bp->b_flags & B_INVAL) == 0) 242 return (1); 243 return (0); 244 } 245 246 struct buf * 247 baddr(dev, blkno, size) 248 dev_t dev; 249 daddr_t blkno; 250 int size; 251 { 252 253 if (incore(dev, blkno)) 254 return (bread(dev, blkno, size)); 255 return (0); 256 } 257 258 /* 259 * Assign a buffer for the given block. If the appropriate 260 * block is already associated, return it; otherwise search 261 * for the oldest non-busy buffer and reassign it. 262 * 263 * We use splx here because this routine may be called 264 * on the interrupt stack during a dump, and we don't 265 * want to lower the ipl back to 0. 266 */ 267 struct buf * 268 getblk(dev, blkno, size) 269 dev_t dev; 270 daddr_t blkno; 271 int size; 272 { 273 register struct buf *bp, *dp; 274 int s; 275 276 if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) /* XXX */ 277 blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 278 /* 279 * Search the cache for the block. If we hit, but 280 * the buffer is in use for i/o, then we wait until 281 * the i/o has completed. 282 */ 283 dp = BUFHASH(dev, blkno); 284 loop: 285 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 286 if (bp->b_blkno != blkno || bp->b_dev != dev || 287 bp->b_flags&B_INVAL) 288 continue; 289 s = spl6(); 290 if (bp->b_flags&B_BUSY) { 291 bp->b_flags |= B_WANTED; 292 sleep((caddr_t)bp, PRIBIO+1); 293 splx(s); 294 goto loop; 295 } 296 splx(s); 297 notavail(bp); 298 if (bp->b_bcount != size && brealloc(bp, size) == 0) 299 goto loop; 300 bp->b_flags |= B_CACHE; 301 return(bp); 302 } 303 if (major(dev) >= nblkdev) 304 panic("blkdev"); 305 bp = getnewbuf(); 306 bfree(bp); 307 bremhash(bp); 308 binshash(bp, dp); 309 bp->b_dev = dev; 310 bp->b_blkno = blkno; 311 bp->b_error = 0; 312 if (brealloc(bp, size) == 0) 313 goto loop; 314 return(bp); 315 } 316 317 /* 318 * get an empty block, 319 * not assigned to any particular device 320 */ 321 struct buf * 322 geteblk(size) 323 int size; 324 { 325 register struct buf *bp, *flist; 326 327 loop: 328 bp = getnewbuf(); 329 bp->b_flags |= B_INVAL; 330 bfree(bp); 331 bremhash(bp); 332 flist = &bfreelist[BQ_AGE]; 333 binshash(bp, flist); 334 bp->b_dev = (dev_t)NODEV; 335 bp->b_error = 0; 336 if (brealloc(bp, size) == 0) 337 goto loop; 338 return(bp); 339 } 340 341 /* 342 * Allocate space associated with a buffer. 343 * If can't get space, buffer is released 344 */ 345 brealloc(bp, size) 346 register struct buf *bp; 347 int size; 348 { 349 daddr_t start, last; 350 register struct buf *ep; 351 struct buf *dp; 352 int s; 353 354 /* 355 * First need to make sure that all overlaping previous I/O 356 * is dispatched with. 357 */ 358 if (size == bp->b_bcount) 359 return (1); 360 if (size < bp->b_bcount) { 361 if (bp->b_flags & B_DELWRI) { 362 bwrite(bp); 363 return (0); 364 } 365 if (bp->b_flags & B_LOCKED) 366 panic("brealloc"); 367 return (allocbuf(bp, size)); 368 } 369 bp->b_flags &= ~B_DONE; 370 if (bp->b_dev == NODEV) 371 return (allocbuf(bp, size)); 372 373 trace(TR_BREALLOC, pack(bp->b_dev, size), bp->b_blkno); 374 /* 375 * Search cache for any buffers that overlap the one that we 376 * are trying to allocate. Overlapping buffers must be marked 377 * invalid, after being written out if they are dirty. (indicated 378 * by B_DELWRI) A disk block must be mapped by at most one buffer 379 * at any point in time. Care must be taken to avoid deadlocking 380 * when two buffer are trying to get the same set of disk blocks. 381 */ 382 start = bp->b_blkno; 383 last = start + btodb(size) - 1; 384 dp = BUFHASH(bp->b_dev, bp->b_blkno); 385 loop: 386 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 387 if (ep == bp || ep->b_dev != bp->b_dev || (ep->b_flags&B_INVAL)) 388 continue; 389 /* look for overlap */ 390 if (ep->b_bcount == 0 || ep->b_blkno > last || 391 ep->b_blkno + btodb(ep->b_bcount) <= start) 392 continue; 393 s = spl6(); 394 if (ep->b_flags&B_BUSY) { 395 ep->b_flags |= B_WANTED; 396 sleep((caddr_t)ep, PRIBIO+1); 397 splx(s); 398 goto loop; 399 } 400 splx(s); 401 notavail(ep); 402 if (ep->b_flags & B_DELWRI) { 403 bwrite(ep); 404 goto loop; 405 } 406 ep->b_flags |= B_INVAL; 407 brelse(ep); 408 } 409 return (allocbuf(bp, size)); 410 } 411 412 /* 413 * Find a buffer which is available for use. 414 * Select something from a free list. 415 * Preference is to AGE list, then LRU list. 416 */ 417 struct buf * 418 getnewbuf() 419 { 420 register struct buf *bp, *dp; 421 int s; 422 423 loop: 424 s = spl6(); 425 for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) 426 if (dp->av_forw != dp) 427 break; 428 if (dp == bfreelist) { /* no free blocks */ 429 dp->b_flags |= B_WANTED; 430 sleep((caddr_t)dp, PRIBIO+1); 431 splx(s); 432 goto loop; 433 } 434 splx(s); 435 bp = dp->av_forw; 436 notavail(bp); 437 if (bp->b_flags & B_DELWRI) { 438 bp->b_flags |= B_ASYNC; 439 bwrite(bp); 440 goto loop; 441 } 442 trace(TR_BRELSE, pack(bp->b_dev, bp->b_bufsize), bp->b_blkno); 443 bp->b_flags = B_BUSY; 444 return (bp); 445 } 446 447 /* 448 * Wait for I/O completion on the buffer; return errors 449 * to the user. 450 */ 451 biowait(bp) 452 register struct buf *bp; 453 { 454 int s; 455 456 s = spl6(); 457 while ((bp->b_flags&B_DONE)==0) 458 sleep((caddr_t)bp, PRIBIO); 459 splx(s); 460 if (u.u_error == 0) /* XXX */ 461 u.u_error = geterror(bp); 462 } 463 464 /* 465 * Mark I/O complete on a buffer. 466 * If someone should be called, e.g. the pageout 467 * daemon, do so. Otherwise, wake up anyone 468 * waiting for it. 469 */ 470 biodone(bp) 471 register struct buf *bp; 472 { 473 474 if (bp->b_flags & B_DONE) 475 panic("dup biodone"); 476 bp->b_flags |= B_DONE; 477 if (bp->b_flags & B_CALL) { 478 bp->b_flags &= ~B_CALL; 479 (*bp->b_iodone)(bp); 480 return; 481 } 482 if (bp->b_flags&B_ASYNC) 483 brelse(bp); 484 else { 485 bp->b_flags &= ~B_WANTED; 486 wakeup((caddr_t)bp); 487 } 488 } 489 490 /* 491 * Insure that no part of a specified block is in an incore buffer. 492 */ 493 blkflush(dev, blkno, size) 494 dev_t dev; 495 daddr_t blkno; 496 long size; 497 { 498 register struct buf *ep; 499 struct buf *dp; 500 daddr_t start, last; 501 int s; 502 503 start = blkno; 504 last = start + btodb(size) - 1; 505 dp = BUFHASH(dev, blkno); 506 loop: 507 for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 508 if (ep->b_dev != dev || (ep->b_flags&B_INVAL)) 509 continue; 510 /* look for overlap */ 511 if (ep->b_bcount == 0 || ep->b_blkno > last || 512 ep->b_blkno + btodb(ep->b_bcount) <= start) 513 continue; 514 s = spl6(); 515 if (ep->b_flags&B_BUSY) { 516 ep->b_flags |= B_WANTED; 517 sleep((caddr_t)ep, PRIBIO+1); 518 splx(s); 519 goto loop; 520 } 521 if (ep->b_flags & B_DELWRI) { 522 splx(s); 523 notavail(ep); 524 bwrite(ep); 525 goto loop; 526 } 527 splx(s); 528 } 529 } 530 531 /* 532 * Make sure all write-behind blocks 533 * on dev (or NODEV for all) 534 * are flushed out. 535 * (from umount and update) 536 */ 537 bflush(dev) 538 dev_t dev; 539 { 540 register struct buf *bp; 541 register struct buf *flist; 542 int s; 543 544 loop: 545 s = spl6(); 546 for (flist = bfreelist; flist < &bfreelist[BQ_EMPTY]; flist++) 547 for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 548 if ((bp->b_flags & B_DELWRI) == 0) 549 continue; 550 if (dev == NODEV || dev == bp->b_dev) { 551 bp->b_flags |= B_ASYNC; 552 notavail(bp); 553 bwrite(bp); 554 splx(s); 555 goto loop; 556 } 557 } 558 splx(s); 559 } 560 561 /* 562 * Pick up the device's error number and pass it to the user; 563 * if there is an error but the number is 0 set a generalized 564 * code. Actually the latter is always true because devices 565 * don't yet return specific errors. 566 */ 567 geterror(bp) 568 register struct buf *bp; 569 { 570 int error = 0; 571 572 if (bp->b_flags&B_ERROR) 573 if ((error = bp->b_error)==0) 574 return (EIO); 575 return (error); 576 } 577 578 /* 579 * Invalidate in core blocks belonging to closed or umounted filesystem 580 * 581 * This is not nicely done at all - the buffer ought to be removed from the 582 * hash chains & have its dev/blkno fields clobbered, but unfortunately we 583 * can't do that here, as it is quite possible that the block is still 584 * being used for i/o. Eventually, all disc drivers should be forced to 585 * have a close routine, which ought ensure that the queue is empty, then 586 * properly flush the queues. Until that happy day, this suffices for 587 * correctness. ... kre 588 */ 589 binval(dev) 590 dev_t dev; 591 { 592 register struct buf *bp; 593 register struct bufhd *hp; 594 #define dp ((struct buf *)hp) 595 596 for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 597 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 598 if (bp->b_dev == dev) 599 bp->b_flags |= B_INVAL; 600 } 601