149589Sbostic /*- 249589Sbostic * Copyright (c) 1982, 1986, 1989 The Regents of the University of California. 337736Smckusick * All rights reserved. 423395Smckusick * 549618Smckusick * This module is believed to contain source code proprietary to AT&T. 649618Smckusick * Use and redistribution is subject to the Berkeley Software License 749618Smckusick * Agreement and your Software Agreement with AT&T (Western Electric). 837736Smckusick * 9*56395Smckusick * @(#)vfs_cluster.c 7.54 (Berkeley) 10/02/92 1023395Smckusick */ 118Sbill 1251455Sbostic #include <sys/param.h> 1351455Sbostic #include <sys/proc.h> 1451455Sbostic #include <sys/buf.h> 1551455Sbostic #include <sys/vnode.h> 1651455Sbostic #include <sys/mount.h> 1751455Sbostic #include <sys/trace.h> 1851455Sbostic #include <sys/resourcevar.h> 19*56395Smckusick #include <sys/malloc.h> 20*56395Smckusick #include <libkern/libkern.h> 218Sbill 2291Sbill /* 23*56395Smckusick * Definitions for the buffer hash lists. 24*56395Smckusick */ 25*56395Smckusick #define BUFHASH(dvp, lbn) \ 26*56395Smckusick (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash]) 27*56395Smckusick struct buf **bufhashtbl, *invalhash; 28*56395Smckusick u_long bufhash; 29*56395Smckusick 30*56395Smckusick /* 31*56395Smckusick * Insq/Remq for the buffer hash lists. 32*56395Smckusick */ 33*56395Smckusick #define bremhash(bp) { \ 34*56395Smckusick struct buf *bq; \ 35*56395Smckusick if (bq = (bp)->b_forw) \ 36*56395Smckusick bq->b_back = (bp)->b_back; \ 37*56395Smckusick *(bp)->b_back = bq; \ 38*56395Smckusick } 39*56395Smckusick #define binshash(bp, dp) { \ 40*56395Smckusick struct buf *bq; \ 41*56395Smckusick if (bq = *(dp)) \ 42*56395Smckusick bq->b_back = &(bp)->b_forw; \ 43*56395Smckusick (bp)->b_forw = bq; \ 44*56395Smckusick (bp)->b_back = (dp); \ 45*56395Smckusick *(dp) = (bp); \ 46*56395Smckusick } 47*56395Smckusick 48*56395Smckusick /* 49*56395Smckusick * Definitions for the buffer free lists. 50*56395Smckusick */ 51*56395Smckusick #define BQUEUES 4 /* number of free buffer queues */ 52*56395Smckusick 53*56395Smckusick #define BQ_LOCKED 0 /* super-blocks &c */ 54*56395Smckusick #define BQ_LRU 1 /* lru, useful buffers */ 55*56395Smckusick #define BQ_AGE 2 /* rubbish */ 56*56395Smckusick #define BQ_EMPTY 3 /* buffer headers with no memory */ 57*56395Smckusick 58*56395Smckusick struct bufqueue { 59*56395Smckusick struct buf *buffreehead; /* head of available list */ 60*56395Smckusick struct buf **buffreetail; /* tail of available list */ 61*56395Smckusick } bufqueues[BQUEUES]; 62*56395Smckusick int needbuffer; 63*56395Smckusick 64*56395Smckusick /* 65*56395Smckusick * Insq/Remq for the buffer free lists. 66*56395Smckusick */ 67*56395Smckusick void 68*56395Smckusick bremfree(bp) 69*56395Smckusick struct buf *bp; 70*56395Smckusick { 71*56395Smckusick struct buf *bq; 72*56395Smckusick struct bufqueue *dp; 73*56395Smckusick 74*56395Smckusick if (bq = bp->b_actf) { 75*56395Smckusick bq->b_actb = bp->b_actb; 76*56395Smckusick } else { 77*56395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 78*56395Smckusick if (dp->buffreetail == &bp->b_actf) 79*56395Smckusick break; 80*56395Smckusick if (dp == &bufqueues[BQUEUES]) 81*56395Smckusick panic("bremfree: lost tail"); 82*56395Smckusick dp->buffreetail = bp->b_actb; 83*56395Smckusick } 84*56395Smckusick *bp->b_actb = bq; 85*56395Smckusick } 86*56395Smckusick 87*56395Smckusick #define binsheadfree(bp, dp) { \ 88*56395Smckusick struct buf *bq; \ 89*56395Smckusick if (bq = (dp)->buffreehead) \ 90*56395Smckusick bq->b_actb = &(bp)->b_actf; \ 91*56395Smckusick else \ 92*56395Smckusick (dp)->buffreetail = &(bp)->b_actf; \ 93*56395Smckusick (dp)->buffreehead = (bp); \ 94*56395Smckusick (bp)->b_actf = bq; \ 95*56395Smckusick (bp)->b_actb = &(dp)->buffreehead; \ 96*56395Smckusick } 97*56395Smckusick #define binstailfree(bp, dp) { \ 98*56395Smckusick (bp)->b_actf = NULL; \ 99*56395Smckusick (bp)->b_actb = (dp)->buffreetail; \ 100*56395Smckusick *(dp)->buffreetail = (bp); \ 101*56395Smckusick (dp)->buffreetail = &(bp)->b_actf; \ 102*56395Smckusick } 103*56395Smckusick 104*56395Smckusick /* 10549280Skarels * Initialize buffers and hash links for buffers. 10649280Skarels */ 10751455Sbostic void 10849280Skarels bufinit() 10949280Skarels { 110*56395Smckusick register struct buf *bp; 111*56395Smckusick struct bufqueue *dp; 11249280Skarels register int i; 11349280Skarels int base, residual; 11449280Skarels 115*56395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 116*56395Smckusick dp->buffreetail = &dp->buffreehead; 117*56395Smckusick bufhashtbl = (struct buf **)hashinit(nbuf, M_CACHE, &bufhash); 11849280Skarels base = bufpages / nbuf; 11949280Skarels residual = bufpages % nbuf; 12049280Skarels for (i = 0; i < nbuf; i++) { 12149280Skarels bp = &buf[i]; 122*56395Smckusick bzero((char *)bp, sizeof *bp); 12349280Skarels bp->b_dev = NODEV; 12449280Skarels bp->b_rcred = NOCRED; 12549280Skarels bp->b_wcred = NOCRED; 12649280Skarels bp->b_un.b_addr = buffers + i * MAXBSIZE; 12749280Skarels if (i < residual) 12849280Skarels bp->b_bufsize = (base + 1) * CLBYTES; 12949280Skarels else 13049280Skarels bp->b_bufsize = base * CLBYTES; 13152413Storek bp->b_flags = B_INVAL; 132*56395Smckusick dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY]; 13352413Storek binsheadfree(bp, dp); 134*56395Smckusick binshash(bp, &invalhash); 13549280Skarels } 13649280Skarels } 13749280Skarels 13849280Skarels /* 13946151Smckusick * Find the block in the buffer pool. 14046151Smckusick * If the buffer is not present, allocate a new buffer and load 14146151Smckusick * its contents according to the filesystem fill routine. 1428Sbill */ 14338776Smckusick bread(vp, blkno, size, cred, bpp) 14437736Smckusick struct vnode *vp; 1456563Smckusic daddr_t blkno; 1466563Smckusic int size; 14738776Smckusick struct ucred *cred; 14837736Smckusick struct buf **bpp; 1498Sbill { 15047545Skarels struct proc *p = curproc; /* XXX */ 1518Sbill register struct buf *bp; 1528Sbill 1538670S if (size == 0) 1548670S panic("bread: size 0"); 15537736Smckusick *bpp = bp = getblk(vp, blkno, size); 15646151Smckusick if (bp->b_flags & (B_DONE | B_DELWRI)) { 15740341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 15837736Smckusick return (0); 1598Sbill } 1608Sbill bp->b_flags |= B_READ; 1618670S if (bp->b_bcount > bp->b_bufsize) 1628670S panic("bread"); 16338776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 16438776Smckusick crhold(cred); 16538776Smckusick bp->b_rcred = cred; 16638776Smckusick } 16737736Smckusick VOP_STRATEGY(bp); 16840341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 16947545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 17037736Smckusick return (biowait(bp)); 1718Sbill } 1728Sbill 1738Sbill /* 17452189Smckusick * Operates like bread, but also starts I/O on the N specified 17552189Smckusick * read-ahead blocks. 1768Sbill */ 17752189Smckusick breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp) 17837736Smckusick struct vnode *vp; 1797114Smckusick daddr_t blkno; int size; 18052189Smckusick daddr_t rablkno[]; int rabsize[]; 18152189Smckusick int num; 18238776Smckusick struct ucred *cred; 18337736Smckusick struct buf **bpp; 1848Sbill { 18547545Skarels struct proc *p = curproc; /* XXX */ 1868Sbill register struct buf *bp, *rabp; 18752189Smckusick register int i; 1888Sbill 1898Sbill bp = NULL; 1907015Smckusick /* 19146151Smckusick * If the block is not memory resident, 19246151Smckusick * allocate a buffer and start I/O. 1937015Smckusick */ 19437736Smckusick if (!incore(vp, blkno)) { 19537736Smckusick *bpp = bp = getblk(vp, blkno, size); 19646151Smckusick if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 1978Sbill bp->b_flags |= B_READ; 1988670S if (bp->b_bcount > bp->b_bufsize) 19952189Smckusick panic("breadn"); 20038776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 20138776Smckusick crhold(cred); 20238776Smckusick bp->b_rcred = cred; 20338776Smckusick } 20437736Smckusick VOP_STRATEGY(bp); 20540341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 20647545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 20754342Smckusick } else { 20840341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 20954342Smckusick } 2108Sbill } 2117015Smckusick 2127015Smckusick /* 21352189Smckusick * If there's read-ahead block(s), start I/O 21452189Smckusick * on them also (as above). 2157015Smckusick */ 21652189Smckusick for (i = 0; i < num; i++) { 21752189Smckusick if (incore(vp, rablkno[i])) 21852189Smckusick continue; 21952189Smckusick rabp = getblk(vp, rablkno[i], rabsize[i]); 22046151Smckusick if (rabp->b_flags & (B_DONE | B_DELWRI)) { 2218Sbill brelse(rabp); 22252189Smckusick trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]); 2232045Swnj } else { 22446151Smckusick rabp->b_flags |= B_ASYNC | B_READ; 2258670S if (rabp->b_bcount > rabp->b_bufsize) 2268670S panic("breadrabp"); 22738880Smckusick if (rabp->b_rcred == NOCRED && cred != NOCRED) { 22838776Smckusick crhold(cred); 22938880Smckusick rabp->b_rcred = cred; 23038776Smckusick } 23137736Smckusick VOP_STRATEGY(rabp); 23252189Smckusick trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]); 23347545Skarels p->p_stats->p_ru.ru_inblock++; /* pay in advance */ 2348Sbill } 2358Sbill } 2367015Smckusick 2377015Smckusick /* 23846151Smckusick * If block was memory resident, let bread get it. 23946151Smckusick * If block was not memory resident, the read was 24046151Smckusick * started above, so just wait for the read to complete. 2417015Smckusick */ 2427114Smckusick if (bp == NULL) 24338776Smckusick return (bread(vp, blkno, size, cred, bpp)); 24437736Smckusick return (biowait(bp)); 2458Sbill } 2468Sbill 2478Sbill /* 24846151Smckusick * Synchronous write. 24946151Smckusick * Release buffer on completion. 2508Sbill */ 2518Sbill bwrite(bp) 2527015Smckusick register struct buf *bp; 2538Sbill { 25447545Skarels struct proc *p = curproc; /* XXX */ 25537736Smckusick register int flag; 25652413Storek int s, error = 0; 2578Sbill 2588Sbill flag = bp->b_flags; 2599857Ssam bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 26049459Smckusick if (flag & B_ASYNC) { 26149459Smckusick if ((flag & B_DELWRI) == 0) 26249459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 26349459Smckusick else 26449459Smckusick reassignbuf(bp, bp->b_vp); 26549459Smckusick } 26640341Smckusick trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno); 2678670S if (bp->b_bcount > bp->b_bufsize) 2688670S panic("bwrite"); 26940226Smckusick s = splbio(); 27039882Smckusick bp->b_vp->v_numoutput++; 27140226Smckusick splx(s); 27237736Smckusick VOP_STRATEGY(bp); 2737015Smckusick 2747015Smckusick /* 27546151Smckusick * If the write was synchronous, then await I/O completion. 2767015Smckusick * If the write was "delayed", then we put the buffer on 27746151Smckusick * the queue of blocks awaiting I/O completion status. 2787015Smckusick */ 27946151Smckusick if ((flag & B_ASYNC) == 0) { 28037736Smckusick error = biowait(bp); 28149459Smckusick if ((flag&B_DELWRI) == 0) 28249459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 28349459Smckusick else 28449459Smckusick reassignbuf(bp, bp->b_vp); 2858Sbill brelse(bp); 28637736Smckusick } else if (flag & B_DELWRI) { 28752413Storek s = splbio(); 2888Sbill bp->b_flags |= B_AGE; 28952413Storek splx(s); 29037736Smckusick } 29137736Smckusick return (error); 2928Sbill } 2938Sbill 29453578Sheideman int 29553578Sheideman vn_bwrite(ap) 29653578Sheideman struct vop_bwrite_args *ap; 29753578Sheideman { 298*56395Smckusick return (bwrite(ap->a_bp)); 29953578Sheideman } 30053578Sheideman 30153578Sheideman 3028Sbill /* 30346151Smckusick * Delayed write. 30446151Smckusick * 30546151Smckusick * The buffer is marked dirty, but is not queued for I/O. 30646151Smckusick * This routine should be used when the buffer is expected 30746151Smckusick * to be modified again soon, typically a small write that 30846151Smckusick * partially fills a buffer. 30946151Smckusick * 31046151Smckusick * NB: magnetic tapes cannot be delayed; they must be 31146151Smckusick * written in the order that the writes are requested. 3128Sbill */ 3138Sbill bdwrite(bp) 3147015Smckusick register struct buf *bp; 3158Sbill { 31647545Skarels struct proc *p = curproc; /* XXX */ 3178Sbill 31839882Smckusick if ((bp->b_flags & B_DELWRI) == 0) { 31939882Smckusick bp->b_flags |= B_DELWRI; 32039882Smckusick reassignbuf(bp, bp->b_vp); 32147545Skarels p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 32239882Smckusick } 32337736Smckusick /* 32439668Smckusick * If this is a tape drive, the write must be initiated. 32537736Smckusick */ 32648360Smckusick if (VOP_IOCTL(bp->b_vp, 0, (caddr_t)B_TAPE, 0, NOCRED, p) == 0) { 3278Sbill bawrite(bp); 32839668Smckusick } else { 32946151Smckusick bp->b_flags |= (B_DONE | B_DELWRI); 3308Sbill brelse(bp); 3318Sbill } 3328Sbill } 3338Sbill 3348Sbill /* 33546151Smckusick * Asynchronous write. 33646151Smckusick * Start I/O on a buffer, but do not wait for it to complete. 33746151Smckusick * The buffer is released when the I/O completes. 3388Sbill */ 3398Sbill bawrite(bp) 3407015Smckusick register struct buf *bp; 3418Sbill { 3428Sbill 34346151Smckusick /* 34446151Smckusick * Setting the ASYNC flag causes bwrite to return 34546151Smckusick * after starting the I/O. 34646151Smckusick */ 3478Sbill bp->b_flags |= B_ASYNC; 34837736Smckusick (void) bwrite(bp); 3498Sbill } 3508Sbill 3518Sbill /* 35246151Smckusick * Release a buffer. 35346151Smckusick * Even if the buffer is dirty, no I/O is started. 3548Sbill */ 3558Sbill brelse(bp) 3567015Smckusick register struct buf *bp; 3578Sbill { 358*56395Smckusick register struct bufqueue *flist; 35946151Smckusick int s; 3608Sbill 36140341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 3627015Smckusick /* 36339668Smckusick * If a process is waiting for the buffer, or 36439668Smckusick * is waiting for a free buffer, awaken it. 3657015Smckusick */ 36646151Smckusick if (bp->b_flags & B_WANTED) 3678Sbill wakeup((caddr_t)bp); 368*56395Smckusick if (needbuffer) { 369*56395Smckusick needbuffer = 0; 370*56395Smckusick wakeup((caddr_t)&needbuffer); 3718Sbill } 37239668Smckusick /* 37339668Smckusick * Retry I/O for locked buffers rather than invalidating them. 37439668Smckusick */ 37552413Storek s = splbio(); 37639668Smckusick if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED)) 37739668Smckusick bp->b_flags &= ~B_ERROR; 37839668Smckusick /* 37939668Smckusick * Disassociate buffers that are no longer valid. 38039668Smckusick */ 38146151Smckusick if (bp->b_flags & (B_NOCACHE | B_ERROR)) 38237736Smckusick bp->b_flags |= B_INVAL; 38346151Smckusick if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) { 38439668Smckusick if (bp->b_vp) 38539668Smckusick brelvp(bp); 38639668Smckusick bp->b_flags &= ~B_DELWRI; 38737736Smckusick } 3887015Smckusick /* 3897015Smckusick * Stick the buffer back on a free list. 3907015Smckusick */ 3918670S if (bp->b_bufsize <= 0) { 3928670S /* block has no buffer ... put at front of unused buffer list */ 393*56395Smckusick flist = &bufqueues[BQ_EMPTY]; 3948670S binsheadfree(bp, flist); 39546151Smckusick } else if (bp->b_flags & (B_ERROR | B_INVAL)) { 3962325Swnj /* block has no info ... put at front of most free list */ 397*56395Smckusick flist = &bufqueues[BQ_AGE]; 3987015Smckusick binsheadfree(bp, flist); 3998Sbill } else { 4002325Swnj if (bp->b_flags & B_LOCKED) 401*56395Smckusick flist = &bufqueues[BQ_LOCKED]; 4022325Swnj else if (bp->b_flags & B_AGE) 403*56395Smckusick flist = &bufqueues[BQ_AGE]; 4042325Swnj else 405*56395Smckusick flist = &bufqueues[BQ_LRU]; 4067015Smckusick binstailfree(bp, flist); 4078Sbill } 40846151Smckusick bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE); 4098Sbill splx(s); 4108Sbill } 4118Sbill 4128Sbill /* 41346151Smckusick * Check to see if a block is currently memory resident. 4148Sbill */ 41537736Smckusick incore(vp, blkno) 41637736Smckusick struct vnode *vp; 4177015Smckusick daddr_t blkno; 4188Sbill { 4198Sbill register struct buf *bp; 4208Sbill 421*56395Smckusick for (bp = *BUFHASH(vp, blkno); bp; bp = bp->b_forw) 42239668Smckusick if (bp->b_lblkno == blkno && bp->b_vp == vp && 4237015Smckusick (bp->b_flags & B_INVAL) == 0) 42491Sbill return (1); 42591Sbill return (0); 4268Sbill } 4278Sbill 42839668Smckusick /* 42946151Smckusick * Check to see if a block is currently memory resident. 43046151Smckusick * If it is resident, return it. If it is not resident, 43146151Smckusick * allocate a new buffer and assign it to the block. 43239668Smckusick */ 4338Sbill struct buf * 43437736Smckusick getblk(vp, blkno, size) 43537736Smckusick register struct vnode *vp; 4366563Smckusic daddr_t blkno; 4376563Smckusic int size; 4388Sbill { 439*56395Smckusick register struct buf *bp, **dp; 4405424Swnj int s; 4418Sbill 44225255Smckusick if (size > MAXBSIZE) 44325255Smckusick panic("getblk: size too big"); 4447015Smckusick /* 44546151Smckusick * Search the cache for the block. If the buffer is found, 44646151Smckusick * but it is currently locked, the we must wait for it to 44746151Smckusick * become available. 4487015Smckusick */ 44937736Smckusick dp = BUFHASH(vp, blkno); 4507015Smckusick loop: 451*56395Smckusick for (bp = *dp; bp; bp = bp->b_forw) { 45239668Smckusick if (bp->b_lblkno != blkno || bp->b_vp != vp || 45346151Smckusick (bp->b_flags & B_INVAL)) 4548Sbill continue; 45526271Skarels s = splbio(); 45646151Smckusick if (bp->b_flags & B_BUSY) { 4578Sbill bp->b_flags |= B_WANTED; 45846151Smckusick sleep((caddr_t)bp, PRIBIO + 1); 4595424Swnj splx(s); 4608Sbill goto loop; 4618Sbill } 46239882Smckusick bremfree(bp); 46339882Smckusick bp->b_flags |= B_BUSY; 4645424Swnj splx(s); 46532608Smckusick if (bp->b_bcount != size) { 46639668Smckusick printf("getblk: stray size"); 46739668Smckusick bp->b_flags |= B_INVAL; 46839668Smckusick bwrite(bp); 46939668Smckusick goto loop; 47032608Smckusick } 4718Sbill bp->b_flags |= B_CACHE; 47226271Skarels return (bp); 4738Sbill } 4748670S bp = getnewbuf(); 4757015Smckusick bremhash(bp); 47639668Smckusick bgetvp(vp, bp); 47745116Smckusick bp->b_bcount = 0; 47839668Smckusick bp->b_lblkno = blkno; 4796563Smckusic bp->b_blkno = blkno; 4808670S bp->b_error = 0; 48137736Smckusick bp->b_resid = 0; 48237736Smckusick binshash(bp, dp); 48345116Smckusick allocbuf(bp, size); 48426271Skarels return (bp); 4858Sbill } 4868Sbill 4878Sbill /* 48846151Smckusick * Allocate a buffer. 48946151Smckusick * The caller will assign it to a block. 4908Sbill */ 4918Sbill struct buf * 4926563Smckusic geteblk(size) 4936563Smckusic int size; 4948Sbill { 495*56395Smckusick register struct buf *bp; 4968Sbill 49725255Smckusick if (size > MAXBSIZE) 49825255Smckusick panic("geteblk: size too big"); 4998670S bp = getnewbuf(); 5008670S bp->b_flags |= B_INVAL; 5017015Smckusick bremhash(bp); 502*56395Smckusick binshash(bp, &invalhash); 50345116Smckusick bp->b_bcount = 0; 50437736Smckusick bp->b_error = 0; 50537736Smckusick bp->b_resid = 0; 50645116Smckusick allocbuf(bp, size); 50726271Skarels return (bp); 5088Sbill } 5098Sbill 5108Sbill /* 51145116Smckusick * Expand or contract the actual memory allocated to a buffer. 51246151Smckusick * If no memory is available, release buffer and take error exit. 5136563Smckusic */ 51445116Smckusick allocbuf(tp, size) 51545116Smckusick register struct buf *tp; 5166563Smckusic int size; 5176563Smckusic { 51845116Smckusick register struct buf *bp, *ep; 51945116Smckusick int sizealloc, take, s; 5206563Smckusic 52145116Smckusick sizealloc = roundup(size, CLBYTES); 52245116Smckusick /* 52345116Smckusick * Buffer size does not change 52445116Smckusick */ 52545116Smckusick if (sizealloc == tp->b_bufsize) 52645116Smckusick goto out; 52745116Smckusick /* 52845116Smckusick * Buffer size is shrinking. 52945116Smckusick * Place excess space in a buffer header taken from the 53045116Smckusick * BQ_EMPTY buffer list and placed on the "most free" list. 53145116Smckusick * If no extra buffer headers are available, leave the 53245116Smckusick * extra space in the present buffer. 53345116Smckusick */ 53445116Smckusick if (sizealloc < tp->b_bufsize) { 535*56395Smckusick if ((ep = bufqueues[BQ_EMPTY].buffreehead) == NULL) 53645116Smckusick goto out; 53745116Smckusick s = splbio(); 53845116Smckusick bremfree(ep); 53945116Smckusick ep->b_flags |= B_BUSY; 54045116Smckusick splx(s); 54145116Smckusick pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr, 54245116Smckusick (int)tp->b_bufsize - sizealloc); 54345116Smckusick ep->b_bufsize = tp->b_bufsize - sizealloc; 54445116Smckusick tp->b_bufsize = sizealloc; 54545116Smckusick ep->b_flags |= B_INVAL; 54645116Smckusick ep->b_bcount = 0; 54745116Smckusick brelse(ep); 54845116Smckusick goto out; 54945116Smckusick } 55045116Smckusick /* 55145116Smckusick * More buffer space is needed. Get it out of buffers on 55245116Smckusick * the "most free" list, placing the empty headers on the 55345116Smckusick * BQ_EMPTY buffer header list. 55445116Smckusick */ 55545116Smckusick while (tp->b_bufsize < sizealloc) { 55645116Smckusick take = sizealloc - tp->b_bufsize; 55745116Smckusick bp = getnewbuf(); 55845116Smckusick if (take >= bp->b_bufsize) 55945116Smckusick take = bp->b_bufsize; 56045116Smckusick pagemove(&bp->b_un.b_addr[bp->b_bufsize - take], 56145116Smckusick &tp->b_un.b_addr[tp->b_bufsize], take); 56245116Smckusick tp->b_bufsize += take; 56345116Smckusick bp->b_bufsize = bp->b_bufsize - take; 56445116Smckusick if (bp->b_bcount > bp->b_bufsize) 56545116Smckusick bp->b_bcount = bp->b_bufsize; 56645116Smckusick if (bp->b_bufsize <= 0) { 56745116Smckusick bremhash(bp); 568*56395Smckusick binshash(bp, &invalhash); 56946151Smckusick bp->b_dev = NODEV; 57045116Smckusick bp->b_error = 0; 57145116Smckusick bp->b_flags |= B_INVAL; 57245116Smckusick } 57345116Smckusick brelse(bp); 57445116Smckusick } 57545116Smckusick out: 57645116Smckusick tp->b_bcount = size; 57745116Smckusick return (1); 5788670S } 5798670S 5808670S /* 5818670S * Find a buffer which is available for use. 5828670S * Select something from a free list. 5838670S * Preference is to AGE list, then LRU list. 5848670S */ 5858670S struct buf * 5868670S getnewbuf() 5878670S { 588*56395Smckusick register struct buf *bp; 589*56395Smckusick register struct bufqueue *dp; 59038776Smckusick register struct ucred *cred; 5918670S int s; 5928670S 5938670S loop: 59426271Skarels s = splbio(); 595*56395Smckusick for (dp = &bufqueues[BQ_AGE]; dp > bufqueues; dp--) 596*56395Smckusick if (dp->buffreehead) 5978670S break; 598*56395Smckusick if (dp == bufqueues) { /* no free blocks */ 599*56395Smckusick needbuffer = 1; 600*56395Smckusick sleep((caddr_t)&needbuffer, PRIBIO + 1); 60112170Ssam splx(s); 6028670S goto loop; 6038670S } 604*56395Smckusick bp = dp->buffreehead; 60539882Smckusick bremfree(bp); 60639882Smckusick bp->b_flags |= B_BUSY; 6078670S splx(s); 6088670S if (bp->b_flags & B_DELWRI) { 60938614Smckusick (void) bawrite(bp); 6108670S goto loop; 6118670S } 61240341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 61339668Smckusick if (bp->b_vp) 61439668Smckusick brelvp(bp); 61538776Smckusick if (bp->b_rcred != NOCRED) { 61638776Smckusick cred = bp->b_rcred; 61738776Smckusick bp->b_rcred = NOCRED; 61838776Smckusick crfree(cred); 61938776Smckusick } 62038776Smckusick if (bp->b_wcred != NOCRED) { 62138776Smckusick cred = bp->b_wcred; 62238776Smckusick bp->b_wcred = NOCRED; 62338776Smckusick crfree(cred); 62438776Smckusick } 6258670S bp->b_flags = B_BUSY; 62646989Smckusick bp->b_dirtyoff = bp->b_dirtyend = 0; 62752189Smckusick bp->b_validoff = bp->b_validend = 0; 6288670S return (bp); 6298670S } 6308670S 6318670S /* 63246151Smckusick * Wait for I/O to complete. 63346151Smckusick * 63446151Smckusick * Extract and return any errors associated with the I/O. 63546151Smckusick * If the error flag is set, but no specific error is 63646151Smckusick * given, return EIO. 6378Sbill */ 6387015Smckusick biowait(bp) 6396563Smckusic register struct buf *bp; 6408Sbill { 6415431Sroot int s; 6428Sbill 64326271Skarels s = splbio(); 64438776Smckusick while ((bp->b_flags & B_DONE) == 0) 6458Sbill sleep((caddr_t)bp, PRIBIO); 6465431Sroot splx(s); 64737736Smckusick if ((bp->b_flags & B_ERROR) == 0) 64837736Smckusick return (0); 64937736Smckusick if (bp->b_error) 65037736Smckusick return (bp->b_error); 65137736Smckusick return (EIO); 6528Sbill } 6538Sbill 6548Sbill /* 65513128Ssam * Mark I/O complete on a buffer. 65646151Smckusick * 65746151Smckusick * If a callback has been requested, e.g. the pageout 65846151Smckusick * daemon, do so. Otherwise, awaken waiting processes. 6598Sbill */ 66051455Sbostic void 6617015Smckusick biodone(bp) 6627015Smckusick register struct buf *bp; 6638Sbill { 6648Sbill 665420Sbill if (bp->b_flags & B_DONE) 6667015Smckusick panic("dup biodone"); 6678Sbill bp->b_flags |= B_DONE; 66849232Smckusick if ((bp->b_flags & B_READ) == 0) 66949232Smckusick vwakeup(bp); 6709763Ssam if (bp->b_flags & B_CALL) { 6719763Ssam bp->b_flags &= ~B_CALL; 6729763Ssam (*bp->b_iodone)(bp); 6739763Ssam return; 6749763Ssam } 67546151Smckusick if (bp->b_flags & B_ASYNC) 6768Sbill brelse(bp); 6778Sbill else { 6788Sbill bp->b_flags &= ~B_WANTED; 6798Sbill wakeup((caddr_t)bp); 6808Sbill } 6818Sbill } 68256356Smckusick 68356356Smckusick #ifdef DIAGNOSTIC 68456356Smckusick /* 68556356Smckusick * Print out statistics on the current allocation of the buffer pool. 68656356Smckusick * Can be enabled to print out on every ``sync'' by setting "syncprt" 68756356Smckusick * above. 68856356Smckusick */ 68956356Smckusick void 69056356Smckusick vfs_bufstats() 69156356Smckusick { 69256356Smckusick int s, i, j, count; 693*56395Smckusick register struct buf *bp; 694*56395Smckusick register struct bufqueue *dp; 69556356Smckusick int counts[MAXBSIZE/CLBYTES+1]; 69656356Smckusick static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" }; 69756356Smckusick 698*56395Smckusick for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) { 69956356Smckusick count = 0; 70056356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 70156356Smckusick counts[j] = 0; 70256356Smckusick s = splbio(); 703*56395Smckusick for (bp = dp->buffreehead; bp; bp = bp->b_actf) { 70456356Smckusick counts[bp->b_bufsize/CLBYTES]++; 70556356Smckusick count++; 70656356Smckusick } 70756356Smckusick splx(s); 70856356Smckusick printf("%s: total-%d", bname[i], count); 70956356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 71056356Smckusick if (counts[j] != 0) 71156356Smckusick printf(", %d-%d", j * CLBYTES, counts[j]); 71256356Smckusick printf("\n"); 71356356Smckusick } 71456356Smckusick } 71556356Smckusick #endif /* DIAGNOSTIC */ 716