1 /* 2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)vfs_cluster.c 7.40 (Berkeley) 05/08/91 8 */ 9 10 #include "param.h" 11 #include "proc.h" 12 #include "buf.h" 13 #include "vnode.h" 14 #include "specdev.h" 15 #include "mount.h" 16 #include "trace.h" 17 #include "resourcevar.h" 18 19 /* 20 * Initialize buffers and hash links for buffers. 21 */ 22 bufinit() 23 { 24 register int i; 25 register struct buf *bp, *dp; 26 register struct bufhd *hp; 27 int base, residual; 28 29 for (hp = bufhash, i = 0; i < BUFHSZ; i++, hp++) 30 hp->b_forw = hp->b_back = (struct buf *)hp; 31 32 for (dp = bfreelist; dp < &bfreelist[BQUEUES]; dp++) { 33 dp->b_forw = dp->b_back = dp->av_forw = dp->av_back = dp; 34 dp->b_flags = B_HEAD; 35 } 36 base = bufpages / nbuf; 37 residual = bufpages % nbuf; 38 for (i = 0; i < nbuf; i++) { 39 bp = &buf[i]; 40 bp->b_dev = NODEV; 41 bp->b_bcount = 0; 42 bp->b_rcred = NOCRED; 43 bp->b_wcred = NOCRED; 44 bp->b_dirtyoff = 0; 45 bp->b_dirtyend = 0; 46 bp->b_un.b_addr = buffers + i * MAXBSIZE; 47 if (i < residual) 48 bp->b_bufsize = (base + 1) * CLBYTES; 49 else 50 bp->b_bufsize = base * CLBYTES; 51 binshash(bp, &bfreelist[BQ_AGE]); 52 bp->b_flags = B_BUSY|B_INVAL; 53 brelse(bp); 54 } 55 } 56 57 /* 58 * Find the block in the buffer pool. 59 * If the buffer is not present, allocate a new buffer and load 60 * its contents according to the filesystem fill routine. 61 */ 62 bread(vp, blkno, size, cred, bpp) 63 struct vnode *vp; 64 daddr_t blkno; 65 int size; 66 struct ucred *cred; 67 struct buf **bpp; 68 { 69 struct proc *p = curproc; /* XXX */ 70 register struct buf *bp; 71 72 if (size == 0) 73 panic("bread: size 0"); 74 *bpp = bp = getblk(vp, blkno, size); 75 if (bp->b_flags & (B_DONE | B_DELWRI)) { 76 trace(TR_BREADHIT, pack(vp, size), blkno); 77 return (0); 78 } 79 bp->b_flags |= B_READ; 80 if (bp->b_bcount > bp->b_bufsize) 81 panic("bread"); 82 if (bp->b_rcred == NOCRED && cred != NOCRED) { 83 crhold(cred); 84 bp->b_rcred = cred; 85 } 86 VOP_STRATEGY(bp); 87 trace(TR_BREADMISS, pack(vp, size), blkno); 88 p->p_stats->p_ru.ru_inblock++; /* pay for read */ 89 return (biowait(bp)); 90 } 91 92 /* 93 * Operates like bread, but also starts I/O on the specified 94 * read-ahead block. 95 */ 96 breada(vp, blkno, size, rablkno, rabsize, cred, bpp) 97 struct vnode *vp; 98 daddr_t blkno; int size; 99 daddr_t rablkno; int rabsize; 100 struct ucred *cred; 101 struct buf **bpp; 102 { 103 struct proc *p = curproc; /* XXX */ 104 register struct buf *bp, *rabp; 105 106 bp = NULL; 107 /* 108 * If the block is not memory resident, 109 * allocate a buffer and start I/O. 110 */ 111 if (!incore(vp, blkno)) { 112 *bpp = bp = getblk(vp, blkno, size); 113 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 114 bp->b_flags |= B_READ; 115 if (bp->b_bcount > bp->b_bufsize) 116 panic("breada"); 117 if (bp->b_rcred == NOCRED && cred != NOCRED) { 118 crhold(cred); 119 bp->b_rcred = cred; 120 } 121 VOP_STRATEGY(bp); 122 trace(TR_BREADMISS, pack(vp, size), blkno); 123 p->p_stats->p_ru.ru_inblock++; /* pay for read */ 124 } else 125 trace(TR_BREADHIT, pack(vp, size), blkno); 126 } 127 128 /* 129 * If there is a read-ahead block, start I/O on it too. 130 */ 131 if (!incore(vp, rablkno)) { 132 rabp = getblk(vp, rablkno, rabsize); 133 if (rabp->b_flags & (B_DONE | B_DELWRI)) { 134 brelse(rabp); 135 trace(TR_BREADHITRA, pack(vp, rabsize), rablkno); 136 } else { 137 rabp->b_flags |= B_ASYNC | B_READ; 138 if (rabp->b_bcount > rabp->b_bufsize) 139 panic("breadrabp"); 140 if (rabp->b_rcred == NOCRED && cred != NOCRED) { 141 crhold(cred); 142 rabp->b_rcred = cred; 143 } 144 VOP_STRATEGY(rabp); 145 trace(TR_BREADMISSRA, pack(vp, rabsize), rablkno); 146 p->p_stats->p_ru.ru_inblock++; /* pay in advance */ 147 } 148 } 149 150 /* 151 * If block was memory resident, let bread get it. 152 * If block was not memory resident, the read was 153 * started above, so just wait for the read to complete. 154 */ 155 if (bp == NULL) 156 return (bread(vp, blkno, size, cred, bpp)); 157 return (biowait(bp)); 158 } 159 160 /* 161 * Synchronous write. 162 * Release buffer on completion. 163 */ 164 bwrite(bp) 165 register struct buf *bp; 166 { 167 struct proc *p = curproc; /* XXX */ 168 register int flag; 169 int s, error; 170 171 flag = bp->b_flags; 172 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 173 if (flag & B_ASYNC) { 174 if ((flag & B_DELWRI) == 0) 175 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 176 else 177 reassignbuf(bp, bp->b_vp); 178 } 179 trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno); 180 if (bp->b_bcount > bp->b_bufsize) 181 panic("bwrite"); 182 s = splbio(); 183 bp->b_vp->v_numoutput++; 184 splx(s); 185 VOP_STRATEGY(bp); 186 187 /* 188 * If the write was synchronous, then await I/O completion. 189 * If the write was "delayed", then we put the buffer on 190 * the queue of blocks awaiting I/O completion status. 191 */ 192 if ((flag & B_ASYNC) == 0) { 193 error = biowait(bp); 194 if ((flag&B_DELWRI) == 0) 195 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 196 else 197 reassignbuf(bp, bp->b_vp); 198 brelse(bp); 199 } else if (flag & B_DELWRI) { 200 bp->b_flags |= B_AGE; 201 error = 0; 202 } 203 return (error); 204 } 205 206 /* 207 * Delayed write. 208 * 209 * The buffer is marked dirty, but is not queued for I/O. 210 * This routine should be used when the buffer is expected 211 * to be modified again soon, typically a small write that 212 * partially fills a buffer. 213 * 214 * NB: magnetic tapes cannot be delayed; they must be 215 * written in the order that the writes are requested. 216 */ 217 bdwrite(bp) 218 register struct buf *bp; 219 { 220 struct proc *p = curproc; /* XXX */ 221 222 if ((bp->b_flags & B_DELWRI) == 0) { 223 bp->b_flags |= B_DELWRI; 224 reassignbuf(bp, bp->b_vp); 225 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 226 } 227 /* 228 * If this is a tape drive, the write must be initiated. 229 */ 230 if (VOP_IOCTL(bp->b_vp, 0, (caddr_t)B_TAPE, 0, NOCRED, p) == 0) { 231 bawrite(bp); 232 } else { 233 bp->b_flags |= (B_DONE | B_DELWRI); 234 brelse(bp); 235 } 236 } 237 238 /* 239 * Asynchronous write. 240 * Start I/O on a buffer, but do not wait for it to complete. 241 * The buffer is released when the I/O completes. 242 */ 243 bawrite(bp) 244 register struct buf *bp; 245 { 246 247 /* 248 * Setting the ASYNC flag causes bwrite to return 249 * after starting the I/O. 250 */ 251 bp->b_flags |= B_ASYNC; 252 (void) bwrite(bp); 253 } 254 255 /* 256 * Release a buffer. 257 * Even if the buffer is dirty, no I/O is started. 258 */ 259 brelse(bp) 260 register struct buf *bp; 261 { 262 register struct buf *flist; 263 int s; 264 265 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 266 /* 267 * If a process is waiting for the buffer, or 268 * is waiting for a free buffer, awaken it. 269 */ 270 if (bp->b_flags & B_WANTED) 271 wakeup((caddr_t)bp); 272 if (bfreelist[0].b_flags & B_WANTED) { 273 bfreelist[0].b_flags &= ~B_WANTED; 274 wakeup((caddr_t)bfreelist); 275 } 276 /* 277 * Retry I/O for locked buffers rather than invalidating them. 278 */ 279 if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED)) 280 bp->b_flags &= ~B_ERROR; 281 /* 282 * Disassociate buffers that are no longer valid. 283 */ 284 if (bp->b_flags & (B_NOCACHE | B_ERROR)) 285 bp->b_flags |= B_INVAL; 286 if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) { 287 if (bp->b_vp) 288 brelvp(bp); 289 bp->b_flags &= ~B_DELWRI; 290 } 291 /* 292 * Stick the buffer back on a free list. 293 */ 294 s = splbio(); 295 if (bp->b_bufsize <= 0) { 296 /* block has no buffer ... put at front of unused buffer list */ 297 flist = &bfreelist[BQ_EMPTY]; 298 binsheadfree(bp, flist); 299 } else if (bp->b_flags & (B_ERROR | B_INVAL)) { 300 /* block has no info ... put at front of most free list */ 301 flist = &bfreelist[BQ_AGE]; 302 binsheadfree(bp, flist); 303 } else { 304 if (bp->b_flags & B_LOCKED) 305 flist = &bfreelist[BQ_LOCKED]; 306 else if (bp->b_flags & B_AGE) 307 flist = &bfreelist[BQ_AGE]; 308 else 309 flist = &bfreelist[BQ_LRU]; 310 binstailfree(bp, flist); 311 } 312 bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE); 313 splx(s); 314 } 315 316 /* 317 * Check to see if a block is currently memory resident. 318 */ 319 incore(vp, blkno) 320 struct vnode *vp; 321 daddr_t blkno; 322 { 323 register struct buf *bp; 324 register struct buf *dp; 325 326 dp = BUFHASH(vp, blkno); 327 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 328 if (bp->b_lblkno == blkno && bp->b_vp == vp && 329 (bp->b_flags & B_INVAL) == 0) 330 return (1); 331 return (0); 332 } 333 334 /* 335 * Check to see if a block is currently memory resident. 336 * If it is resident, return it. If it is not resident, 337 * allocate a new buffer and assign it to the block. 338 */ 339 struct buf * 340 getblk(vp, blkno, size) 341 register struct vnode *vp; 342 daddr_t blkno; 343 int size; 344 { 345 register struct buf *bp, *dp; 346 int s; 347 348 if (size > MAXBSIZE) 349 panic("getblk: size too big"); 350 /* 351 * Search the cache for the block. If the buffer is found, 352 * but it is currently locked, the we must wait for it to 353 * become available. 354 */ 355 dp = BUFHASH(vp, blkno); 356 loop: 357 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 358 if (bp->b_lblkno != blkno || bp->b_vp != vp || 359 (bp->b_flags & B_INVAL)) 360 continue; 361 s = splbio(); 362 if (bp->b_flags & B_BUSY) { 363 bp->b_flags |= B_WANTED; 364 sleep((caddr_t)bp, PRIBIO + 1); 365 splx(s); 366 goto loop; 367 } 368 bremfree(bp); 369 bp->b_flags |= B_BUSY; 370 splx(s); 371 if (bp->b_bcount != size) { 372 printf("getblk: stray size"); 373 bp->b_flags |= B_INVAL; 374 bwrite(bp); 375 goto loop; 376 } 377 bp->b_flags |= B_CACHE; 378 return (bp); 379 } 380 bp = getnewbuf(); 381 bremhash(bp); 382 bgetvp(vp, bp); 383 bp->b_bcount = 0; 384 bp->b_lblkno = blkno; 385 bp->b_blkno = blkno; 386 bp->b_error = 0; 387 bp->b_resid = 0; 388 binshash(bp, dp); 389 allocbuf(bp, size); 390 return (bp); 391 } 392 393 /* 394 * Allocate a buffer. 395 * The caller will assign it to a block. 396 */ 397 struct buf * 398 geteblk(size) 399 int size; 400 { 401 register struct buf *bp, *flist; 402 403 if (size > MAXBSIZE) 404 panic("geteblk: size too big"); 405 bp = getnewbuf(); 406 bp->b_flags |= B_INVAL; 407 bremhash(bp); 408 flist = &bfreelist[BQ_AGE]; 409 bp->b_bcount = 0; 410 bp->b_error = 0; 411 bp->b_resid = 0; 412 binshash(bp, flist); 413 allocbuf(bp, size); 414 return (bp); 415 } 416 417 /* 418 * Expand or contract the actual memory allocated to a buffer. 419 * If no memory is available, release buffer and take error exit. 420 */ 421 allocbuf(tp, size) 422 register struct buf *tp; 423 int size; 424 { 425 register struct buf *bp, *ep; 426 int sizealloc, take, s; 427 428 sizealloc = roundup(size, CLBYTES); 429 /* 430 * Buffer size does not change 431 */ 432 if (sizealloc == tp->b_bufsize) 433 goto out; 434 /* 435 * Buffer size is shrinking. 436 * Place excess space in a buffer header taken from the 437 * BQ_EMPTY buffer list and placed on the "most free" list. 438 * If no extra buffer headers are available, leave the 439 * extra space in the present buffer. 440 */ 441 if (sizealloc < tp->b_bufsize) { 442 ep = bfreelist[BQ_EMPTY].av_forw; 443 if (ep == &bfreelist[BQ_EMPTY]) 444 goto out; 445 s = splbio(); 446 bremfree(ep); 447 ep->b_flags |= B_BUSY; 448 splx(s); 449 pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr, 450 (int)tp->b_bufsize - sizealloc); 451 ep->b_bufsize = tp->b_bufsize - sizealloc; 452 tp->b_bufsize = sizealloc; 453 ep->b_flags |= B_INVAL; 454 ep->b_bcount = 0; 455 brelse(ep); 456 goto out; 457 } 458 /* 459 * More buffer space is needed. Get it out of buffers on 460 * the "most free" list, placing the empty headers on the 461 * BQ_EMPTY buffer header list. 462 */ 463 while (tp->b_bufsize < sizealloc) { 464 take = sizealloc - tp->b_bufsize; 465 bp = getnewbuf(); 466 if (take >= bp->b_bufsize) 467 take = bp->b_bufsize; 468 pagemove(&bp->b_un.b_addr[bp->b_bufsize - take], 469 &tp->b_un.b_addr[tp->b_bufsize], take); 470 tp->b_bufsize += take; 471 bp->b_bufsize = bp->b_bufsize - take; 472 if (bp->b_bcount > bp->b_bufsize) 473 bp->b_bcount = bp->b_bufsize; 474 if (bp->b_bufsize <= 0) { 475 bremhash(bp); 476 binshash(bp, &bfreelist[BQ_EMPTY]); 477 bp->b_dev = NODEV; 478 bp->b_error = 0; 479 bp->b_flags |= B_INVAL; 480 } 481 brelse(bp); 482 } 483 out: 484 tp->b_bcount = size; 485 return (1); 486 } 487 488 /* 489 * Find a buffer which is available for use. 490 * Select something from a free list. 491 * Preference is to AGE list, then LRU list. 492 */ 493 struct buf * 494 getnewbuf() 495 { 496 register struct buf *bp, *dp; 497 register struct ucred *cred; 498 int s; 499 500 loop: 501 s = splbio(); 502 for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) 503 if (dp->av_forw != dp) 504 break; 505 if (dp == bfreelist) { /* no free blocks */ 506 dp->b_flags |= B_WANTED; 507 sleep((caddr_t)dp, PRIBIO + 1); 508 splx(s); 509 goto loop; 510 } 511 bp = dp->av_forw; 512 bremfree(bp); 513 bp->b_flags |= B_BUSY; 514 splx(s); 515 if (bp->b_flags & B_DELWRI) { 516 (void) bawrite(bp); 517 goto loop; 518 } 519 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 520 if (bp->b_vp) 521 brelvp(bp); 522 if (bp->b_rcred != NOCRED) { 523 cred = bp->b_rcred; 524 bp->b_rcred = NOCRED; 525 crfree(cred); 526 } 527 if (bp->b_wcred != NOCRED) { 528 cred = bp->b_wcred; 529 bp->b_wcred = NOCRED; 530 crfree(cred); 531 } 532 bp->b_flags = B_BUSY; 533 bp->b_dirtyoff = bp->b_dirtyend = 0; 534 return (bp); 535 } 536 537 /* 538 * Wait for I/O to complete. 539 * 540 * Extract and return any errors associated with the I/O. 541 * If the error flag is set, but no specific error is 542 * given, return EIO. 543 */ 544 biowait(bp) 545 register struct buf *bp; 546 { 547 int s; 548 549 s = splbio(); 550 while ((bp->b_flags & B_DONE) == 0) 551 sleep((caddr_t)bp, PRIBIO); 552 splx(s); 553 if ((bp->b_flags & B_ERROR) == 0) 554 return (0); 555 if (bp->b_error) 556 return (bp->b_error); 557 return (EIO); 558 } 559 560 /* 561 * Mark I/O complete on a buffer. 562 * 563 * If a callback has been requested, e.g. the pageout 564 * daemon, do so. Otherwise, awaken waiting processes. 565 */ 566 biodone(bp) 567 register struct buf *bp; 568 { 569 570 if (bp->b_flags & B_DONE) 571 panic("dup biodone"); 572 bp->b_flags |= B_DONE; 573 if ((bp->b_flags & B_READ) == 0) 574 vwakeup(bp); 575 if (bp->b_flags & B_CALL) { 576 bp->b_flags &= ~B_CALL; 577 (*bp->b_iodone)(bp); 578 return; 579 } 580 if (bp->b_flags & B_ASYNC) 581 brelse(bp); 582 else { 583 bp->b_flags &= ~B_WANTED; 584 wakeup((caddr_t)bp); 585 } 586 } 587