1 /* 2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)vfs_cluster.c 7.39 (Berkeley) 05/07/91 8 */ 9 10 #include "param.h" 11 #include "proc.h" 12 #include "buf.h" 13 #include "vnode.h" 14 #include "specdev.h" 15 #include "mount.h" 16 #include "trace.h" 17 #include "resourcevar.h" 18 19 /* 20 * Initialize buffers and hash links for buffers. 21 */ 22 bufinit() 23 { 24 register int i; 25 register struct buf *bp, *dp; 26 register struct bufhd *hp; 27 int base, residual; 28 29 for (hp = bufhash, i = 0; i < BUFHSZ; i++, hp++) 30 hp->b_forw = hp->b_back = (struct buf *)hp; 31 32 for (dp = bfreelist; dp < &bfreelist[BQUEUES]; dp++) { 33 dp->b_forw = dp->b_back = dp->av_forw = dp->av_back = dp; 34 dp->b_flags = B_HEAD; 35 } 36 base = bufpages / nbuf; 37 residual = bufpages % nbuf; 38 for (i = 0; i < nbuf; i++) { 39 bp = &buf[i]; 40 bp->b_dev = NODEV; 41 bp->b_bcount = 0; 42 bp->b_rcred = NOCRED; 43 bp->b_wcred = NOCRED; 44 bp->b_dirtyoff = 0; 45 bp->b_dirtyend = 0; 46 bp->b_un.b_addr = buffers + i * MAXBSIZE; 47 if (i < residual) 48 bp->b_bufsize = (base + 1) * CLBYTES; 49 else 50 bp->b_bufsize = base * CLBYTES; 51 binshash(bp, &bfreelist[BQ_AGE]); 52 bp->b_flags = B_BUSY|B_INVAL; 53 brelse(bp); 54 } 55 } 56 57 /* 58 * Find the block in the buffer pool. 59 * If the buffer is not present, allocate a new buffer and load 60 * its contents according to the filesystem fill routine. 61 */ 62 bread(vp, blkno, size, cred, bpp) 63 struct vnode *vp; 64 daddr_t blkno; 65 int size; 66 struct ucred *cred; 67 struct buf **bpp; 68 { 69 struct proc *p = curproc; /* XXX */ 70 register struct buf *bp; 71 72 if (size == 0) 73 panic("bread: size 0"); 74 *bpp = bp = getblk(vp, blkno, size); 75 if (bp->b_flags & (B_DONE | B_DELWRI)) { 76 trace(TR_BREADHIT, pack(vp, size), blkno); 77 return (0); 78 } 79 bp->b_flags |= B_READ; 80 if (bp->b_bcount > bp->b_bufsize) 81 panic("bread"); 82 if (bp->b_rcred == NOCRED && cred != NOCRED) { 83 crhold(cred); 84 bp->b_rcred = cred; 85 } 86 VOP_STRATEGY(bp); 87 trace(TR_BREADMISS, pack(vp, size), blkno); 88 p->p_stats->p_ru.ru_inblock++; /* pay for read */ 89 return (biowait(bp)); 90 } 91 92 /* 93 * Operates like bread, but also starts I/O on the specified 94 * read-ahead block. 95 */ 96 breada(vp, blkno, size, rablkno, rabsize, cred, bpp) 97 struct vnode *vp; 98 daddr_t blkno; int size; 99 daddr_t rablkno; int rabsize; 100 struct ucred *cred; 101 struct buf **bpp; 102 { 103 struct proc *p = curproc; /* XXX */ 104 register struct buf *bp, *rabp; 105 106 bp = NULL; 107 /* 108 * If the block is not memory resident, 109 * allocate a buffer and start I/O. 110 */ 111 if (!incore(vp, blkno)) { 112 *bpp = bp = getblk(vp, blkno, size); 113 if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 114 bp->b_flags |= B_READ; 115 if (bp->b_bcount > bp->b_bufsize) 116 panic("breada"); 117 if (bp->b_rcred == NOCRED && cred != NOCRED) { 118 crhold(cred); 119 bp->b_rcred = cred; 120 } 121 VOP_STRATEGY(bp); 122 trace(TR_BREADMISS, pack(vp, size), blkno); 123 p->p_stats->p_ru.ru_inblock++; /* pay for read */ 124 } else 125 trace(TR_BREADHIT, pack(vp, size), blkno); 126 } 127 128 /* 129 * If there is a read-ahead block, start I/O on it too. 130 */ 131 if (!incore(vp, rablkno)) { 132 rabp = getblk(vp, rablkno, rabsize); 133 if (rabp->b_flags & (B_DONE | B_DELWRI)) { 134 brelse(rabp); 135 trace(TR_BREADHITRA, pack(vp, rabsize), rablkno); 136 } else { 137 rabp->b_flags |= B_ASYNC | B_READ; 138 if (rabp->b_bcount > rabp->b_bufsize) 139 panic("breadrabp"); 140 if (rabp->b_rcred == NOCRED && cred != NOCRED) { 141 crhold(cred); 142 rabp->b_rcred = cred; 143 } 144 VOP_STRATEGY(rabp); 145 trace(TR_BREADMISSRA, pack(vp, rabsize), rablkno); 146 p->p_stats->p_ru.ru_inblock++; /* pay in advance */ 147 } 148 } 149 150 /* 151 * If block was memory resident, let bread get it. 152 * If block was not memory resident, the read was 153 * started above, so just wait for the read to complete. 154 */ 155 if (bp == NULL) 156 return (bread(vp, blkno, size, cred, bpp)); 157 return (biowait(bp)); 158 } 159 160 /* 161 * Synchronous write. 162 * Release buffer on completion. 163 */ 164 bwrite(bp) 165 register struct buf *bp; 166 { 167 struct proc *p = curproc; /* XXX */ 168 register int flag; 169 int s, error; 170 171 flag = bp->b_flags; 172 bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 173 if ((flag & B_DELWRI) == 0) 174 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 175 else 176 reassignbuf(bp, bp->b_vp); 177 trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno); 178 if (bp->b_bcount > bp->b_bufsize) 179 panic("bwrite"); 180 s = splbio(); 181 bp->b_vp->v_numoutput++; 182 splx(s); 183 VOP_STRATEGY(bp); 184 185 /* 186 * If the write was synchronous, then await I/O completion. 187 * If the write was "delayed", then we put the buffer on 188 * the queue of blocks awaiting I/O completion status. 189 */ 190 if ((flag & B_ASYNC) == 0) { 191 error = biowait(bp); 192 brelse(bp); 193 } else if (flag & B_DELWRI) { 194 bp->b_flags |= B_AGE; 195 error = 0; 196 } 197 return (error); 198 } 199 200 /* 201 * Delayed write. 202 * 203 * The buffer is marked dirty, but is not queued for I/O. 204 * This routine should be used when the buffer is expected 205 * to be modified again soon, typically a small write that 206 * partially fills a buffer. 207 * 208 * NB: magnetic tapes cannot be delayed; they must be 209 * written in the order that the writes are requested. 210 */ 211 bdwrite(bp) 212 register struct buf *bp; 213 { 214 struct proc *p = curproc; /* XXX */ 215 216 if ((bp->b_flags & B_DELWRI) == 0) { 217 bp->b_flags |= B_DELWRI; 218 reassignbuf(bp, bp->b_vp); 219 p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 220 } 221 /* 222 * If this is a tape drive, the write must be initiated. 223 */ 224 if (VOP_IOCTL(bp->b_vp, 0, (caddr_t)B_TAPE, 0, NOCRED, p) == 0) { 225 bawrite(bp); 226 } else { 227 bp->b_flags |= (B_DONE | B_DELWRI); 228 brelse(bp); 229 } 230 } 231 232 /* 233 * Asynchronous write. 234 * Start I/O on a buffer, but do not wait for it to complete. 235 * The buffer is released when the I/O completes. 236 */ 237 bawrite(bp) 238 register struct buf *bp; 239 { 240 241 /* 242 * Setting the ASYNC flag causes bwrite to return 243 * after starting the I/O. 244 */ 245 bp->b_flags |= B_ASYNC; 246 (void) bwrite(bp); 247 } 248 249 /* 250 * Release a buffer. 251 * Even if the buffer is dirty, no I/O is started. 252 */ 253 brelse(bp) 254 register struct buf *bp; 255 { 256 register struct buf *flist; 257 int s; 258 259 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 260 /* 261 * If a process is waiting for the buffer, or 262 * is waiting for a free buffer, awaken it. 263 */ 264 if (bp->b_flags & B_WANTED) 265 wakeup((caddr_t)bp); 266 if (bfreelist[0].b_flags & B_WANTED) { 267 bfreelist[0].b_flags &= ~B_WANTED; 268 wakeup((caddr_t)bfreelist); 269 } 270 /* 271 * Retry I/O for locked buffers rather than invalidating them. 272 */ 273 if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED)) 274 bp->b_flags &= ~B_ERROR; 275 /* 276 * Disassociate buffers that are no longer valid. 277 */ 278 if (bp->b_flags & (B_NOCACHE | B_ERROR)) 279 bp->b_flags |= B_INVAL; 280 if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) { 281 if (bp->b_vp) 282 brelvp(bp); 283 bp->b_flags &= ~B_DELWRI; 284 } 285 /* 286 * Stick the buffer back on a free list. 287 */ 288 s = splbio(); 289 if (bp->b_bufsize <= 0) { 290 /* block has no buffer ... put at front of unused buffer list */ 291 flist = &bfreelist[BQ_EMPTY]; 292 binsheadfree(bp, flist); 293 } else if (bp->b_flags & (B_ERROR | B_INVAL)) { 294 /* block has no info ... put at front of most free list */ 295 flist = &bfreelist[BQ_AGE]; 296 binsheadfree(bp, flist); 297 } else { 298 if (bp->b_flags & B_LOCKED) 299 flist = &bfreelist[BQ_LOCKED]; 300 else if (bp->b_flags & B_AGE) 301 flist = &bfreelist[BQ_AGE]; 302 else 303 flist = &bfreelist[BQ_LRU]; 304 binstailfree(bp, flist); 305 } 306 bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE); 307 splx(s); 308 } 309 310 /* 311 * Check to see if a block is currently memory resident. 312 */ 313 incore(vp, blkno) 314 struct vnode *vp; 315 daddr_t blkno; 316 { 317 register struct buf *bp; 318 register struct buf *dp; 319 320 dp = BUFHASH(vp, blkno); 321 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 322 if (bp->b_lblkno == blkno && bp->b_vp == vp && 323 (bp->b_flags & B_INVAL) == 0) 324 return (1); 325 return (0); 326 } 327 328 /* 329 * Check to see if a block is currently memory resident. 330 * If it is resident, return it. If it is not resident, 331 * allocate a new buffer and assign it to the block. 332 */ 333 struct buf * 334 getblk(vp, blkno, size) 335 register struct vnode *vp; 336 daddr_t blkno; 337 int size; 338 { 339 register struct buf *bp, *dp; 340 int s; 341 342 if (size > MAXBSIZE) 343 panic("getblk: size too big"); 344 /* 345 * Search the cache for the block. If the buffer is found, 346 * but it is currently locked, the we must wait for it to 347 * become available. 348 */ 349 dp = BUFHASH(vp, blkno); 350 loop: 351 for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 352 if (bp->b_lblkno != blkno || bp->b_vp != vp || 353 (bp->b_flags & B_INVAL)) 354 continue; 355 s = splbio(); 356 if (bp->b_flags & B_BUSY) { 357 bp->b_flags |= B_WANTED; 358 sleep((caddr_t)bp, PRIBIO + 1); 359 splx(s); 360 goto loop; 361 } 362 bremfree(bp); 363 bp->b_flags |= B_BUSY; 364 splx(s); 365 if (bp->b_bcount != size) { 366 printf("getblk: stray size"); 367 bp->b_flags |= B_INVAL; 368 bwrite(bp); 369 goto loop; 370 } 371 bp->b_flags |= B_CACHE; 372 return (bp); 373 } 374 bp = getnewbuf(); 375 bremhash(bp); 376 bgetvp(vp, bp); 377 bp->b_bcount = 0; 378 bp->b_lblkno = blkno; 379 bp->b_blkno = blkno; 380 bp->b_error = 0; 381 bp->b_resid = 0; 382 binshash(bp, dp); 383 allocbuf(bp, size); 384 return (bp); 385 } 386 387 /* 388 * Allocate a buffer. 389 * The caller will assign it to a block. 390 */ 391 struct buf * 392 geteblk(size) 393 int size; 394 { 395 register struct buf *bp, *flist; 396 397 if (size > MAXBSIZE) 398 panic("geteblk: size too big"); 399 bp = getnewbuf(); 400 bp->b_flags |= B_INVAL; 401 bremhash(bp); 402 flist = &bfreelist[BQ_AGE]; 403 bp->b_bcount = 0; 404 bp->b_error = 0; 405 bp->b_resid = 0; 406 binshash(bp, flist); 407 allocbuf(bp, size); 408 return (bp); 409 } 410 411 /* 412 * Expand or contract the actual memory allocated to a buffer. 413 * If no memory is available, release buffer and take error exit. 414 */ 415 allocbuf(tp, size) 416 register struct buf *tp; 417 int size; 418 { 419 register struct buf *bp, *ep; 420 int sizealloc, take, s; 421 422 sizealloc = roundup(size, CLBYTES); 423 /* 424 * Buffer size does not change 425 */ 426 if (sizealloc == tp->b_bufsize) 427 goto out; 428 /* 429 * Buffer size is shrinking. 430 * Place excess space in a buffer header taken from the 431 * BQ_EMPTY buffer list and placed on the "most free" list. 432 * If no extra buffer headers are available, leave the 433 * extra space in the present buffer. 434 */ 435 if (sizealloc < tp->b_bufsize) { 436 ep = bfreelist[BQ_EMPTY].av_forw; 437 if (ep == &bfreelist[BQ_EMPTY]) 438 goto out; 439 s = splbio(); 440 bremfree(ep); 441 ep->b_flags |= B_BUSY; 442 splx(s); 443 pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr, 444 (int)tp->b_bufsize - sizealloc); 445 ep->b_bufsize = tp->b_bufsize - sizealloc; 446 tp->b_bufsize = sizealloc; 447 ep->b_flags |= B_INVAL; 448 ep->b_bcount = 0; 449 brelse(ep); 450 goto out; 451 } 452 /* 453 * More buffer space is needed. Get it out of buffers on 454 * the "most free" list, placing the empty headers on the 455 * BQ_EMPTY buffer header list. 456 */ 457 while (tp->b_bufsize < sizealloc) { 458 take = sizealloc - tp->b_bufsize; 459 bp = getnewbuf(); 460 if (take >= bp->b_bufsize) 461 take = bp->b_bufsize; 462 pagemove(&bp->b_un.b_addr[bp->b_bufsize - take], 463 &tp->b_un.b_addr[tp->b_bufsize], take); 464 tp->b_bufsize += take; 465 bp->b_bufsize = bp->b_bufsize - take; 466 if (bp->b_bcount > bp->b_bufsize) 467 bp->b_bcount = bp->b_bufsize; 468 if (bp->b_bufsize <= 0) { 469 bremhash(bp); 470 binshash(bp, &bfreelist[BQ_EMPTY]); 471 bp->b_dev = NODEV; 472 bp->b_error = 0; 473 bp->b_flags |= B_INVAL; 474 } 475 brelse(bp); 476 } 477 out: 478 tp->b_bcount = size; 479 return (1); 480 } 481 482 /* 483 * Find a buffer which is available for use. 484 * Select something from a free list. 485 * Preference is to AGE list, then LRU list. 486 */ 487 struct buf * 488 getnewbuf() 489 { 490 register struct buf *bp, *dp; 491 register struct ucred *cred; 492 int s; 493 494 loop: 495 s = splbio(); 496 for (dp = &bfreelist[BQ_AGE]; dp > bfreelist; dp--) 497 if (dp->av_forw != dp) 498 break; 499 if (dp == bfreelist) { /* no free blocks */ 500 dp->b_flags |= B_WANTED; 501 sleep((caddr_t)dp, PRIBIO + 1); 502 splx(s); 503 goto loop; 504 } 505 bp = dp->av_forw; 506 bremfree(bp); 507 bp->b_flags |= B_BUSY; 508 splx(s); 509 if (bp->b_flags & B_DELWRI) { 510 (void) bawrite(bp); 511 goto loop; 512 } 513 trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 514 if (bp->b_vp) 515 brelvp(bp); 516 if (bp->b_rcred != NOCRED) { 517 cred = bp->b_rcred; 518 bp->b_rcred = NOCRED; 519 crfree(cred); 520 } 521 if (bp->b_wcred != NOCRED) { 522 cred = bp->b_wcred; 523 bp->b_wcred = NOCRED; 524 crfree(cred); 525 } 526 bp->b_flags = B_BUSY; 527 bp->b_dirtyoff = bp->b_dirtyend = 0; 528 return (bp); 529 } 530 531 /* 532 * Wait for I/O to complete. 533 * 534 * Extract and return any errors associated with the I/O. 535 * If the error flag is set, but no specific error is 536 * given, return EIO. 537 */ 538 biowait(bp) 539 register struct buf *bp; 540 { 541 int s; 542 543 s = splbio(); 544 while ((bp->b_flags & B_DONE) == 0) 545 sleep((caddr_t)bp, PRIBIO); 546 splx(s); 547 if ((bp->b_flags & B_ERROR) == 0) 548 return (0); 549 if (bp->b_error) 550 return (bp->b_error); 551 return (EIO); 552 } 553 554 /* 555 * Mark I/O complete on a buffer. 556 * 557 * If a callback has been requested, e.g. the pageout 558 * daemon, do so. Otherwise, awaken waiting processes. 559 */ 560 biodone(bp) 561 register struct buf *bp; 562 { 563 564 if (bp->b_flags & B_DONE) 565 panic("dup biodone"); 566 bp->b_flags |= B_DONE; 567 if ((bp->b_flags & B_READ) == 0) 568 vwakeup(bp); 569 if (bp->b_flags & B_CALL) { 570 bp->b_flags &= ~B_CALL; 571 (*bp->b_iodone)(bp); 572 return; 573 } 574 if (bp->b_flags & B_ASYNC) 575 brelse(bp); 576 else { 577 bp->b_flags &= ~B_WANTED; 578 wakeup((caddr_t)bp); 579 } 580 } 581