149589Sbostic /*- 264437Sbostic * Copyright (c) 1986, 1989, 1993 The Regents of the University of California. 364437Sbostic * All rights reserved. 423395Smckusick * 564437Sbostic * This code is derived from software contributed to Berkeley by 664437Sbostic * Berkeley Software Design Inc. 737736Smckusick * 864437Sbostic * %sccs.include.redist.c% 964437Sbostic * 10*65552Smckusick * @(#)vfs_bio.c 8.5 (Berkeley) 01/06/94 1123395Smckusick */ 128Sbill 1351455Sbostic #include <sys/param.h> 1465256Smckusick #include <sys/systm.h> 1551455Sbostic #include <sys/proc.h> 1651455Sbostic #include <sys/buf.h> 1751455Sbostic #include <sys/vnode.h> 1851455Sbostic #include <sys/mount.h> 1951455Sbostic #include <sys/trace.h> 2059879Smckusick #include <sys/malloc.h> 2151455Sbostic #include <sys/resourcevar.h> 228Sbill 2391Sbill /* 2456395Smckusick * Definitions for the buffer hash lists. 2556395Smckusick */ 2656395Smckusick #define BUFHASH(dvp, lbn) \ 2756395Smckusick (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash]) 2865256Smckusick LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash; 2956395Smckusick u_long bufhash; 3056395Smckusick 3156395Smckusick /* 3256395Smckusick * Insq/Remq for the buffer hash lists. 3356395Smckusick */ 3465256Smckusick #define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash) 3565256Smckusick #define bremhash(bp) LIST_REMOVE(bp, b_hash) 3656395Smckusick 3756395Smckusick /* 3856395Smckusick * Definitions for the buffer free lists. 3956395Smckusick */ 4056395Smckusick #define BQUEUES 4 /* number of free buffer queues */ 4156395Smckusick 4256395Smckusick #define BQ_LOCKED 0 /* super-blocks &c */ 4356395Smckusick #define BQ_LRU 1 /* lru, useful buffers */ 4456395Smckusick #define BQ_AGE 2 /* rubbish */ 4556395Smckusick #define BQ_EMPTY 3 /* buffer headers with no memory */ 4656395Smckusick 4765256Smckusick TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES]; 4856395Smckusick int needbuffer; 4956395Smckusick 5056395Smckusick /* 5156395Smckusick * Insq/Remq for the buffer free lists. 5256395Smckusick */ 5365256Smckusick #define binsheadfree(bp, dp) TAILQ_INSERT_HEAD(dp, bp, b_freelist) 5465256Smckusick #define binstailfree(bp, dp) TAILQ_INSERT_TAIL(dp, bp, b_freelist) 5556607Smckusick 5656395Smckusick void 5756395Smckusick bremfree(bp) 5856395Smckusick struct buf *bp; 5956395Smckusick { 6065256Smckusick struct bqueues *dp = NULL; 6156395Smckusick 6256607Smckusick /* 6356607Smckusick * We only calculate the head of the freelist when removing 6456607Smckusick * the last element of the list as that is the only time that 6556607Smckusick * it is needed (e.g. to reset the tail pointer). 6665256Smckusick * 6765256Smckusick * NB: This makes an assumption about how tailq's are implemented. 6856607Smckusick */ 6965256Smckusick if (bp->b_freelist.tqe_next == NULL) { 7056395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 7165256Smckusick if (dp->tqh_last == &bp->b_freelist.tqe_next) 7256395Smckusick break; 7356395Smckusick if (dp == &bufqueues[BQUEUES]) 7456395Smckusick panic("bremfree: lost tail"); 7556395Smckusick } 7665256Smckusick TAILQ_REMOVE(dp, bp, b_freelist); 7756395Smckusick } 7856395Smckusick 7956395Smckusick /* 8049280Skarels * Initialize buffers and hash links for buffers. 8149280Skarels */ 8251455Sbostic void 8349280Skarels bufinit() 8449280Skarels { 8556395Smckusick register struct buf *bp; 8665256Smckusick struct bqueues *dp; 8749280Skarels register int i; 8849280Skarels int base, residual; 8949280Skarels 9056395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 9165256Smckusick TAILQ_INIT(dp); 9265256Smckusick bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash); 9349280Skarels base = bufpages / nbuf; 9449280Skarels residual = bufpages % nbuf; 9549280Skarels for (i = 0; i < nbuf; i++) { 9649280Skarels bp = &buf[i]; 9756395Smckusick bzero((char *)bp, sizeof *bp); 9849280Skarels bp->b_dev = NODEV; 9949280Skarels bp->b_rcred = NOCRED; 10049280Skarels bp->b_wcred = NOCRED; 101*65552Smckusick bp->b_vnbufs.le_next = NOLIST; 10264536Sbostic bp->b_data = buffers + i * MAXBSIZE; 10349280Skarels if (i < residual) 10449280Skarels bp->b_bufsize = (base + 1) * CLBYTES; 10549280Skarels else 10649280Skarels bp->b_bufsize = base * CLBYTES; 10752413Storek bp->b_flags = B_INVAL; 10856395Smckusick dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY]; 10952413Storek binsheadfree(bp, dp); 11056395Smckusick binshash(bp, &invalhash); 11149280Skarels } 11249280Skarels } 11349280Skarels 11449280Skarels /* 11546151Smckusick * Find the block in the buffer pool. 11646151Smckusick * If the buffer is not present, allocate a new buffer and load 11746151Smckusick * its contents according to the filesystem fill routine. 1188Sbill */ 11938776Smckusick bread(vp, blkno, size, cred, bpp) 12037736Smckusick struct vnode *vp; 1216563Smckusic daddr_t blkno; 1226563Smckusic int size; 12338776Smckusick struct ucred *cred; 12437736Smckusick struct buf **bpp; 1258Sbill { 12647545Skarels struct proc *p = curproc; /* XXX */ 1278Sbill register struct buf *bp; 1288Sbill 1298670S if (size == 0) 1308670S panic("bread: size 0"); 13157797Smckusick *bpp = bp = getblk(vp, blkno, size, 0, 0); 13246151Smckusick if (bp->b_flags & (B_DONE | B_DELWRI)) { 13340341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 13437736Smckusick return (0); 1358Sbill } 1368Sbill bp->b_flags |= B_READ; 1378670S if (bp->b_bcount > bp->b_bufsize) 1388670S panic("bread"); 13938776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 14038776Smckusick crhold(cred); 14138776Smckusick bp->b_rcred = cred; 14238776Smckusick } 14337736Smckusick VOP_STRATEGY(bp); 14440341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 14547545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 14637736Smckusick return (biowait(bp)); 1478Sbill } 1488Sbill 1498Sbill /* 15052189Smckusick * Operates like bread, but also starts I/O on the N specified 15152189Smckusick * read-ahead blocks. 1528Sbill */ 15352189Smckusick breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp) 15437736Smckusick struct vnode *vp; 1557114Smckusick daddr_t blkno; int size; 15652189Smckusick daddr_t rablkno[]; int rabsize[]; 15752189Smckusick int num; 15838776Smckusick struct ucred *cred; 15937736Smckusick struct buf **bpp; 1608Sbill { 16147545Skarels struct proc *p = curproc; /* XXX */ 1628Sbill register struct buf *bp, *rabp; 16352189Smckusick register int i; 1648Sbill 1658Sbill bp = NULL; 1667015Smckusick /* 16746151Smckusick * If the block is not memory resident, 16846151Smckusick * allocate a buffer and start I/O. 1697015Smckusick */ 17037736Smckusick if (!incore(vp, blkno)) { 17157797Smckusick *bpp = bp = getblk(vp, blkno, size, 0, 0); 17246151Smckusick if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 1738Sbill bp->b_flags |= B_READ; 1748670S if (bp->b_bcount > bp->b_bufsize) 17552189Smckusick panic("breadn"); 17638776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 17738776Smckusick crhold(cred); 17838776Smckusick bp->b_rcred = cred; 17938776Smckusick } 18037736Smckusick VOP_STRATEGY(bp); 18140341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 18247545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 18354342Smckusick } else { 18440341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 18554342Smckusick } 1868Sbill } 1877015Smckusick 1887015Smckusick /* 18952189Smckusick * If there's read-ahead block(s), start I/O 19052189Smckusick * on them also (as above). 1917015Smckusick */ 19252189Smckusick for (i = 0; i < num; i++) { 19352189Smckusick if (incore(vp, rablkno[i])) 19452189Smckusick continue; 19557797Smckusick rabp = getblk(vp, rablkno[i], rabsize[i], 0, 0); 19646151Smckusick if (rabp->b_flags & (B_DONE | B_DELWRI)) { 1978Sbill brelse(rabp); 19852189Smckusick trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]); 1992045Swnj } else { 20046151Smckusick rabp->b_flags |= B_ASYNC | B_READ; 2018670S if (rabp->b_bcount > rabp->b_bufsize) 2028670S panic("breadrabp"); 20338880Smckusick if (rabp->b_rcred == NOCRED && cred != NOCRED) { 20438776Smckusick crhold(cred); 20538880Smckusick rabp->b_rcred = cred; 20638776Smckusick } 20737736Smckusick VOP_STRATEGY(rabp); 20852189Smckusick trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]); 20947545Skarels p->p_stats->p_ru.ru_inblock++; /* pay in advance */ 2108Sbill } 2118Sbill } 2127015Smckusick 2137015Smckusick /* 21446151Smckusick * If block was memory resident, let bread get it. 21546151Smckusick * If block was not memory resident, the read was 21646151Smckusick * started above, so just wait for the read to complete. 2177015Smckusick */ 2187114Smckusick if (bp == NULL) 21938776Smckusick return (bread(vp, blkno, size, cred, bpp)); 22037736Smckusick return (biowait(bp)); 2218Sbill } 2228Sbill 2238Sbill /* 22446151Smckusick * Synchronous write. 22546151Smckusick * Release buffer on completion. 2268Sbill */ 2278Sbill bwrite(bp) 2287015Smckusick register struct buf *bp; 2298Sbill { 23047545Skarels struct proc *p = curproc; /* XXX */ 23137736Smckusick register int flag; 23252413Storek int s, error = 0; 2338Sbill 2348Sbill flag = bp->b_flags; 2359857Ssam bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 23649459Smckusick if (flag & B_ASYNC) { 23749459Smckusick if ((flag & B_DELWRI) == 0) 23849459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 23949459Smckusick else 24049459Smckusick reassignbuf(bp, bp->b_vp); 24149459Smckusick } 24240341Smckusick trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno); 2438670S if (bp->b_bcount > bp->b_bufsize) 2448670S panic("bwrite"); 24540226Smckusick s = splbio(); 24639882Smckusick bp->b_vp->v_numoutput++; 24757797Smckusick bp->b_flags |= B_WRITEINPROG; 24840226Smckusick splx(s); 24937736Smckusick VOP_STRATEGY(bp); 2507015Smckusick 2517015Smckusick /* 25246151Smckusick * If the write was synchronous, then await I/O completion. 2537015Smckusick * If the write was "delayed", then we put the buffer on 25446151Smckusick * the queue of blocks awaiting I/O completion status. 2557015Smckusick */ 25646151Smckusick if ((flag & B_ASYNC) == 0) { 25737736Smckusick error = biowait(bp); 25849459Smckusick if ((flag&B_DELWRI) == 0) 25949459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 26049459Smckusick else 26149459Smckusick reassignbuf(bp, bp->b_vp); 26257797Smckusick if (bp->b_flags & B_EINTR) { 26357797Smckusick bp->b_flags &= ~B_EINTR; 26457797Smckusick error = EINTR; 26557797Smckusick } 2668Sbill brelse(bp); 26737736Smckusick } else if (flag & B_DELWRI) { 26852413Storek s = splbio(); 2698Sbill bp->b_flags |= B_AGE; 27052413Storek splx(s); 27137736Smckusick } 27237736Smckusick return (error); 2738Sbill } 2748Sbill 27553578Sheideman int 27653578Sheideman vn_bwrite(ap) 27753578Sheideman struct vop_bwrite_args *ap; 27853578Sheideman { 27956395Smckusick return (bwrite(ap->a_bp)); 28053578Sheideman } 28153578Sheideman 28253578Sheideman 2838Sbill /* 28446151Smckusick * Delayed write. 28546151Smckusick * 28646151Smckusick * The buffer is marked dirty, but is not queued for I/O. 28746151Smckusick * This routine should be used when the buffer is expected 28846151Smckusick * to be modified again soon, typically a small write that 28946151Smckusick * partially fills a buffer. 29046151Smckusick * 29146151Smckusick * NB: magnetic tapes cannot be delayed; they must be 29246151Smckusick * written in the order that the writes are requested. 2938Sbill */ 2948Sbill bdwrite(bp) 2957015Smckusick register struct buf *bp; 2968Sbill { 29747545Skarels struct proc *p = curproc; /* XXX */ 2988Sbill 29939882Smckusick if ((bp->b_flags & B_DELWRI) == 0) { 30039882Smckusick bp->b_flags |= B_DELWRI; 30139882Smckusick reassignbuf(bp, bp->b_vp); 30247545Skarels p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 30339882Smckusick } 30437736Smckusick /* 30539668Smckusick * If this is a tape drive, the write must be initiated. 30637736Smckusick */ 30748360Smckusick if (VOP_IOCTL(bp->b_vp, 0, (caddr_t)B_TAPE, 0, NOCRED, p) == 0) { 3088Sbill bawrite(bp); 30939668Smckusick } else { 31046151Smckusick bp->b_flags |= (B_DONE | B_DELWRI); 3118Sbill brelse(bp); 3128Sbill } 3138Sbill } 3148Sbill 3158Sbill /* 31646151Smckusick * Asynchronous write. 31746151Smckusick * Start I/O on a buffer, but do not wait for it to complete. 31846151Smckusick * The buffer is released when the I/O completes. 3198Sbill */ 3208Sbill bawrite(bp) 3217015Smckusick register struct buf *bp; 3228Sbill { 3238Sbill 32446151Smckusick /* 32546151Smckusick * Setting the ASYNC flag causes bwrite to return 32646151Smckusick * after starting the I/O. 32746151Smckusick */ 3288Sbill bp->b_flags |= B_ASYNC; 32957797Smckusick (void) VOP_BWRITE(bp); 3308Sbill } 3318Sbill 3328Sbill /* 33346151Smckusick * Release a buffer. 33446151Smckusick * Even if the buffer is dirty, no I/O is started. 3358Sbill */ 3368Sbill brelse(bp) 3377015Smckusick register struct buf *bp; 3388Sbill { 33965256Smckusick register struct bqueues *flist; 34046151Smckusick int s; 3418Sbill 34240341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 3437015Smckusick /* 34439668Smckusick * If a process is waiting for the buffer, or 34539668Smckusick * is waiting for a free buffer, awaken it. 3467015Smckusick */ 34746151Smckusick if (bp->b_flags & B_WANTED) 3488Sbill wakeup((caddr_t)bp); 34956395Smckusick if (needbuffer) { 35056395Smckusick needbuffer = 0; 35156395Smckusick wakeup((caddr_t)&needbuffer); 3528Sbill } 35339668Smckusick /* 35439668Smckusick * Retry I/O for locked buffers rather than invalidating them. 35539668Smckusick */ 35652413Storek s = splbio(); 35739668Smckusick if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED)) 35839668Smckusick bp->b_flags &= ~B_ERROR; 35939668Smckusick /* 36039668Smckusick * Disassociate buffers that are no longer valid. 36139668Smckusick */ 36246151Smckusick if (bp->b_flags & (B_NOCACHE | B_ERROR)) 36337736Smckusick bp->b_flags |= B_INVAL; 36446151Smckusick if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) { 36539668Smckusick if (bp->b_vp) 36639668Smckusick brelvp(bp); 36739668Smckusick bp->b_flags &= ~B_DELWRI; 36837736Smckusick } 3697015Smckusick /* 3707015Smckusick * Stick the buffer back on a free list. 3717015Smckusick */ 3728670S if (bp->b_bufsize <= 0) { 3738670S /* block has no buffer ... put at front of unused buffer list */ 37456395Smckusick flist = &bufqueues[BQ_EMPTY]; 3758670S binsheadfree(bp, flist); 37646151Smckusick } else if (bp->b_flags & (B_ERROR | B_INVAL)) { 3772325Swnj /* block has no info ... put at front of most free list */ 37856395Smckusick flist = &bufqueues[BQ_AGE]; 3797015Smckusick binsheadfree(bp, flist); 3808Sbill } else { 3812325Swnj if (bp->b_flags & B_LOCKED) 38256395Smckusick flist = &bufqueues[BQ_LOCKED]; 3832325Swnj else if (bp->b_flags & B_AGE) 38456395Smckusick flist = &bufqueues[BQ_AGE]; 3852325Swnj else 38656395Smckusick flist = &bufqueues[BQ_LRU]; 3877015Smckusick binstailfree(bp, flist); 3888Sbill } 38946151Smckusick bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE); 3908Sbill splx(s); 3918Sbill } 3928Sbill 3938Sbill /* 39446151Smckusick * Check to see if a block is currently memory resident. 3958Sbill */ 39657797Smckusick struct buf * 39737736Smckusick incore(vp, blkno) 39837736Smckusick struct vnode *vp; 3997015Smckusick daddr_t blkno; 4008Sbill { 4018Sbill register struct buf *bp; 4028Sbill 40365256Smckusick for (bp = BUFHASH(vp, blkno)->lh_first; bp; bp = bp->b_hash.le_next) 40439668Smckusick if (bp->b_lblkno == blkno && bp->b_vp == vp && 4057015Smckusick (bp->b_flags & B_INVAL) == 0) 40657797Smckusick return (bp); 40757797Smckusick return (NULL); 4088Sbill } 4098Sbill 41039668Smckusick /* 41146151Smckusick * Check to see if a block is currently memory resident. 41246151Smckusick * If it is resident, return it. If it is not resident, 41346151Smckusick * allocate a new buffer and assign it to the block. 41439668Smckusick */ 4158Sbill struct buf * 41657797Smckusick getblk(vp, blkno, size, slpflag, slptimeo) 41737736Smckusick register struct vnode *vp; 4186563Smckusic daddr_t blkno; 41957797Smckusick int size, slpflag, slptimeo; 4208Sbill { 42156607Smckusick register struct buf *bp; 42265256Smckusick struct bufhashhdr *dp; 42357797Smckusick int s, error; 4248Sbill 42525255Smckusick if (size > MAXBSIZE) 42625255Smckusick panic("getblk: size too big"); 4277015Smckusick /* 42846151Smckusick * Search the cache for the block. If the buffer is found, 42946151Smckusick * but it is currently locked, the we must wait for it to 43046151Smckusick * become available. 4317015Smckusick */ 43237736Smckusick dp = BUFHASH(vp, blkno); 4337015Smckusick loop: 43465256Smckusick for (bp = dp->lh_first; bp; bp = bp->b_hash.le_next) { 43557797Smckusick if (bp->b_lblkno != blkno || bp->b_vp != vp) 4368Sbill continue; 43726271Skarels s = splbio(); 43846151Smckusick if (bp->b_flags & B_BUSY) { 4398Sbill bp->b_flags |= B_WANTED; 44057797Smckusick error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 44157797Smckusick "getblk", slptimeo); 4425424Swnj splx(s); 44357797Smckusick if (error) 44457797Smckusick return (NULL); 4458Sbill goto loop; 4468Sbill } 44757797Smckusick /* 44857797Smckusick * The test for B_INVAL is moved down here, since there 44957797Smckusick * are cases where B_INVAL is set before VOP_BWRITE() is 45057797Smckusick * called and for NFS, the process cannot be allowed to 45157797Smckusick * allocate a new buffer for the same block until the write 45257797Smckusick * back to the server has been completed. (ie. B_BUSY clears) 45357797Smckusick */ 45457797Smckusick if (bp->b_flags & B_INVAL) { 45557797Smckusick splx(s); 45657797Smckusick continue; 45757797Smckusick } 45839882Smckusick bremfree(bp); 45939882Smckusick bp->b_flags |= B_BUSY; 4605424Swnj splx(s); 46132608Smckusick if (bp->b_bcount != size) { 46239668Smckusick printf("getblk: stray size"); 46339668Smckusick bp->b_flags |= B_INVAL; 46457797Smckusick VOP_BWRITE(bp); 46539668Smckusick goto loop; 46632608Smckusick } 4678Sbill bp->b_flags |= B_CACHE; 46826271Skarels return (bp); 4698Sbill } 47057797Smckusick /* 47157797Smckusick * The loop back to the top when getnewbuf() fails is because 47257797Smckusick * stateless filesystems like NFS have no node locks. Thus, 47357797Smckusick * there is a slight chance that more than one process will 47457797Smckusick * try and getnewbuf() for the same block concurrently when 47557797Smckusick * the first sleeps in getnewbuf(). So after a sleep, go back 47657797Smckusick * up to the top to check the hash lists again. 47757797Smckusick */ 47857797Smckusick if ((bp = getnewbuf(slpflag, slptimeo)) == 0) 47957797Smckusick goto loop; 4807015Smckusick bremhash(bp); 48139668Smckusick bgetvp(vp, bp); 48245116Smckusick bp->b_bcount = 0; 48339668Smckusick bp->b_lblkno = blkno; 4846563Smckusic bp->b_blkno = blkno; 4858670S bp->b_error = 0; 48637736Smckusick bp->b_resid = 0; 48737736Smckusick binshash(bp, dp); 48845116Smckusick allocbuf(bp, size); 48926271Skarels return (bp); 4908Sbill } 4918Sbill 4928Sbill /* 49346151Smckusick * Allocate a buffer. 49446151Smckusick * The caller will assign it to a block. 4958Sbill */ 4968Sbill struct buf * 4976563Smckusic geteblk(size) 4986563Smckusic int size; 4998Sbill { 50056395Smckusick register struct buf *bp; 5018Sbill 50225255Smckusick if (size > MAXBSIZE) 50325255Smckusick panic("geteblk: size too big"); 50457797Smckusick while ((bp = getnewbuf(0, 0)) == NULL) 50557797Smckusick /* void */; 5068670S bp->b_flags |= B_INVAL; 5077015Smckusick bremhash(bp); 50856395Smckusick binshash(bp, &invalhash); 50945116Smckusick bp->b_bcount = 0; 51037736Smckusick bp->b_error = 0; 51137736Smckusick bp->b_resid = 0; 51245116Smckusick allocbuf(bp, size); 51326271Skarels return (bp); 5148Sbill } 5158Sbill 5168Sbill /* 51745116Smckusick * Expand or contract the actual memory allocated to a buffer. 51846151Smckusick * If no memory is available, release buffer and take error exit. 5196563Smckusic */ 52045116Smckusick allocbuf(tp, size) 52145116Smckusick register struct buf *tp; 5226563Smckusic int size; 5236563Smckusic { 52445116Smckusick register struct buf *bp, *ep; 52545116Smckusick int sizealloc, take, s; 5266563Smckusic 52745116Smckusick sizealloc = roundup(size, CLBYTES); 52845116Smckusick /* 52945116Smckusick * Buffer size does not change 53045116Smckusick */ 53145116Smckusick if (sizealloc == tp->b_bufsize) 53245116Smckusick goto out; 53345116Smckusick /* 53445116Smckusick * Buffer size is shrinking. 53545116Smckusick * Place excess space in a buffer header taken from the 53645116Smckusick * BQ_EMPTY buffer list and placed on the "most free" list. 53745116Smckusick * If no extra buffer headers are available, leave the 53845116Smckusick * extra space in the present buffer. 53945116Smckusick */ 54045116Smckusick if (sizealloc < tp->b_bufsize) { 54165256Smckusick if ((ep = bufqueues[BQ_EMPTY].tqh_first) == NULL) 54245116Smckusick goto out; 54345116Smckusick s = splbio(); 54445116Smckusick bremfree(ep); 54545116Smckusick ep->b_flags |= B_BUSY; 54645116Smckusick splx(s); 54764536Sbostic pagemove((char *)tp->b_data + sizealloc, ep->b_data, 54845116Smckusick (int)tp->b_bufsize - sizealloc); 54945116Smckusick ep->b_bufsize = tp->b_bufsize - sizealloc; 55045116Smckusick tp->b_bufsize = sizealloc; 55145116Smckusick ep->b_flags |= B_INVAL; 55245116Smckusick ep->b_bcount = 0; 55345116Smckusick brelse(ep); 55445116Smckusick goto out; 55545116Smckusick } 55645116Smckusick /* 55745116Smckusick * More buffer space is needed. Get it out of buffers on 55845116Smckusick * the "most free" list, placing the empty headers on the 55945116Smckusick * BQ_EMPTY buffer header list. 56045116Smckusick */ 56145116Smckusick while (tp->b_bufsize < sizealloc) { 56245116Smckusick take = sizealloc - tp->b_bufsize; 56357797Smckusick while ((bp = getnewbuf(0, 0)) == NULL) 56457797Smckusick /* void */; 56545116Smckusick if (take >= bp->b_bufsize) 56645116Smckusick take = bp->b_bufsize; 56764536Sbostic pagemove(&((char *)bp->b_data)[bp->b_bufsize - take], 56864536Sbostic &((char *)tp->b_data)[tp->b_bufsize], take); 56945116Smckusick tp->b_bufsize += take; 57045116Smckusick bp->b_bufsize = bp->b_bufsize - take; 57145116Smckusick if (bp->b_bcount > bp->b_bufsize) 57245116Smckusick bp->b_bcount = bp->b_bufsize; 57345116Smckusick if (bp->b_bufsize <= 0) { 57445116Smckusick bremhash(bp); 57556395Smckusick binshash(bp, &invalhash); 57646151Smckusick bp->b_dev = NODEV; 57745116Smckusick bp->b_error = 0; 57845116Smckusick bp->b_flags |= B_INVAL; 57945116Smckusick } 58045116Smckusick brelse(bp); 58145116Smckusick } 58245116Smckusick out: 58345116Smckusick tp->b_bcount = size; 58445116Smckusick return (1); 5858670S } 5868670S 5878670S /* 5888670S * Find a buffer which is available for use. 5898670S * Select something from a free list. 5908670S * Preference is to AGE list, then LRU list. 5918670S */ 5928670S struct buf * 59357797Smckusick getnewbuf(slpflag, slptimeo) 59457797Smckusick int slpflag, slptimeo; 5958670S { 59656395Smckusick register struct buf *bp; 59765256Smckusick register struct bqueues *dp; 59838776Smckusick register struct ucred *cred; 5998670S int s; 6008670S 6018670S loop: 60226271Skarels s = splbio(); 60359879Smckusick for (dp = &bufqueues[BQ_AGE]; dp > bufqueues; dp--) 60465256Smckusick if (dp->tqh_first) 60559879Smckusick break; 60656395Smckusick if (dp == bufqueues) { /* no free blocks */ 60756395Smckusick needbuffer = 1; 60857797Smckusick (void) tsleep((caddr_t)&needbuffer, slpflag | (PRIBIO + 1), 60957797Smckusick "getnewbuf", slptimeo); 61012170Ssam splx(s); 61157797Smckusick return (NULL); 6128670S } 61365256Smckusick bp = dp->tqh_first; 61439882Smckusick bremfree(bp); 61539882Smckusick bp->b_flags |= B_BUSY; 6168670S splx(s); 6178670S if (bp->b_flags & B_DELWRI) { 61838614Smckusick (void) bawrite(bp); 6198670S goto loop; 6208670S } 62140341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 62239668Smckusick if (bp->b_vp) 62339668Smckusick brelvp(bp); 62438776Smckusick if (bp->b_rcred != NOCRED) { 62538776Smckusick cred = bp->b_rcred; 62638776Smckusick bp->b_rcred = NOCRED; 62738776Smckusick crfree(cred); 62838776Smckusick } 62938776Smckusick if (bp->b_wcred != NOCRED) { 63038776Smckusick cred = bp->b_wcred; 63138776Smckusick bp->b_wcred = NOCRED; 63238776Smckusick crfree(cred); 63338776Smckusick } 6348670S bp->b_flags = B_BUSY; 63546989Smckusick bp->b_dirtyoff = bp->b_dirtyend = 0; 63652189Smckusick bp->b_validoff = bp->b_validend = 0; 6378670S return (bp); 6388670S } 6398670S 6408670S /* 64146151Smckusick * Wait for I/O to complete. 64246151Smckusick * 64346151Smckusick * Extract and return any errors associated with the I/O. 64446151Smckusick * If the error flag is set, but no specific error is 64546151Smckusick * given, return EIO. 6468Sbill */ 6477015Smckusick biowait(bp) 6486563Smckusic register struct buf *bp; 6498Sbill { 6505431Sroot int s; 6518Sbill 65226271Skarels s = splbio(); 65338776Smckusick while ((bp->b_flags & B_DONE) == 0) 6548Sbill sleep((caddr_t)bp, PRIBIO); 6555431Sroot splx(s); 65637736Smckusick if ((bp->b_flags & B_ERROR) == 0) 65737736Smckusick return (0); 65837736Smckusick if (bp->b_error) 65937736Smckusick return (bp->b_error); 66037736Smckusick return (EIO); 6618Sbill } 6628Sbill 6638Sbill /* 66413128Ssam * Mark I/O complete on a buffer. 66546151Smckusick * 66646151Smckusick * If a callback has been requested, e.g. the pageout 66746151Smckusick * daemon, do so. Otherwise, awaken waiting processes. 6688Sbill */ 66951455Sbostic void 6707015Smckusick biodone(bp) 6717015Smckusick register struct buf *bp; 6728Sbill { 6738Sbill 674420Sbill if (bp->b_flags & B_DONE) 6757015Smckusick panic("dup biodone"); 6768Sbill bp->b_flags |= B_DONE; 67749232Smckusick if ((bp->b_flags & B_READ) == 0) 67849232Smckusick vwakeup(bp); 6799763Ssam if (bp->b_flags & B_CALL) { 6809763Ssam bp->b_flags &= ~B_CALL; 6819763Ssam (*bp->b_iodone)(bp); 6829763Ssam return; 6839763Ssam } 68446151Smckusick if (bp->b_flags & B_ASYNC) 6858Sbill brelse(bp); 6868Sbill else { 6878Sbill bp->b_flags &= ~B_WANTED; 6888Sbill wakeup((caddr_t)bp); 6898Sbill } 6908Sbill } 69156356Smckusick 69257035Smargo int 69357035Smargo count_lock_queue() 69457035Smargo { 69557035Smargo register struct buf *bp; 69657035Smargo register int ret; 69757035Smargo 69865256Smckusick for (ret = 0, bp = (struct buf *)bufqueues[BQ_LOCKED].tqh_first; 69965256Smckusick bp; bp = (struct buf *)bp->b_freelist.tqe_next) 70057035Smargo ++ret; 70157035Smargo return(ret); 70257035Smargo } 70357035Smargo 70456356Smckusick #ifdef DIAGNOSTIC 70556356Smckusick /* 70656356Smckusick * Print out statistics on the current allocation of the buffer pool. 70756356Smckusick * Can be enabled to print out on every ``sync'' by setting "syncprt" 70859879Smckusick * in vfs_syscalls.c using sysctl. 70956356Smckusick */ 71056356Smckusick void 71156356Smckusick vfs_bufstats() 71256356Smckusick { 71356356Smckusick int s, i, j, count; 71456395Smckusick register struct buf *bp; 71565256Smckusick register struct bqueues *dp; 71656356Smckusick int counts[MAXBSIZE/CLBYTES+1]; 71756356Smckusick static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" }; 71856356Smckusick 71956395Smckusick for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) { 72056356Smckusick count = 0; 72156356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 72256356Smckusick counts[j] = 0; 72356356Smckusick s = splbio(); 72465256Smckusick for (bp = dp->tqh_first; bp; bp = bp->b_freelist.tqe_next) { 72556356Smckusick counts[bp->b_bufsize/CLBYTES]++; 72656356Smckusick count++; 72756356Smckusick } 72856356Smckusick splx(s); 72956356Smckusick printf("%s: total-%d", bname[i], count); 73056356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 73156356Smckusick if (counts[j] != 0) 73256356Smckusick printf(", %d-%d", j * CLBYTES, counts[j]); 73356356Smckusick printf("\n"); 73456356Smckusick } 73556356Smckusick } 73656356Smckusick #endif /* DIAGNOSTIC */ 737