149589Sbostic /*- 2*65771Sbostic * Copyright (c) 1986, 1989, 1993 3*65771Sbostic * The Regents of the University of California. All rights reserved. 4*65771Sbostic * (c) UNIX System Laboratories, Inc. 5*65771Sbostic * All or some portions of this file are derived from material licensed 6*65771Sbostic * to the University of California by American Telephone and Telegraph 7*65771Sbostic * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8*65771Sbostic * the permission of UNIX System Laboratories, Inc. 923395Smckusick * 1064437Sbostic * This code is derived from software contributed to Berkeley by 1164437Sbostic * Berkeley Software Design Inc. 1237736Smckusick * 1364437Sbostic * %sccs.include.redist.c% 1464437Sbostic * 15*65771Sbostic * @(#)vfs_bio.c 8.7 (Berkeley) 01/21/94 1623395Smckusick */ 178Sbill 1851455Sbostic #include <sys/param.h> 1965256Smckusick #include <sys/systm.h> 2051455Sbostic #include <sys/proc.h> 2151455Sbostic #include <sys/buf.h> 2251455Sbostic #include <sys/vnode.h> 2351455Sbostic #include <sys/mount.h> 2451455Sbostic #include <sys/trace.h> 2559879Smckusick #include <sys/malloc.h> 2651455Sbostic #include <sys/resourcevar.h> 278Sbill 2891Sbill /* 2956395Smckusick * Definitions for the buffer hash lists. 3056395Smckusick */ 3156395Smckusick #define BUFHASH(dvp, lbn) \ 3256395Smckusick (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash]) 3365256Smckusick LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash; 3456395Smckusick u_long bufhash; 3556395Smckusick 3656395Smckusick /* 3756395Smckusick * Insq/Remq for the buffer hash lists. 3856395Smckusick */ 3965256Smckusick #define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash) 4065256Smckusick #define bremhash(bp) LIST_REMOVE(bp, b_hash) 4156395Smckusick 4256395Smckusick /* 4356395Smckusick * Definitions for the buffer free lists. 4456395Smckusick */ 4556395Smckusick #define BQUEUES 4 /* number of free buffer queues */ 4656395Smckusick 4756395Smckusick #define BQ_LOCKED 0 /* super-blocks &c */ 4856395Smckusick #define BQ_LRU 1 /* lru, useful buffers */ 4956395Smckusick #define BQ_AGE 2 /* rubbish */ 5056395Smckusick #define BQ_EMPTY 3 /* buffer headers with no memory */ 5156395Smckusick 5265256Smckusick TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES]; 5356395Smckusick int needbuffer; 5456395Smckusick 5556395Smckusick /* 5656395Smckusick * Insq/Remq for the buffer free lists. 5756395Smckusick */ 5865256Smckusick #define binsheadfree(bp, dp) TAILQ_INSERT_HEAD(dp, bp, b_freelist) 5965256Smckusick #define binstailfree(bp, dp) TAILQ_INSERT_TAIL(dp, bp, b_freelist) 6056607Smckusick 6156395Smckusick void 6256395Smckusick bremfree(bp) 6356395Smckusick struct buf *bp; 6456395Smckusick { 6565256Smckusick struct bqueues *dp = NULL; 6656395Smckusick 6756607Smckusick /* 6856607Smckusick * We only calculate the head of the freelist when removing 6956607Smckusick * the last element of the list as that is the only time that 7056607Smckusick * it is needed (e.g. to reset the tail pointer). 7165256Smckusick * 7265256Smckusick * NB: This makes an assumption about how tailq's are implemented. 7356607Smckusick */ 7465256Smckusick if (bp->b_freelist.tqe_next == NULL) { 7556395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 7665256Smckusick if (dp->tqh_last == &bp->b_freelist.tqe_next) 7756395Smckusick break; 7856395Smckusick if (dp == &bufqueues[BQUEUES]) 7956395Smckusick panic("bremfree: lost tail"); 8056395Smckusick } 8165256Smckusick TAILQ_REMOVE(dp, bp, b_freelist); 8256395Smckusick } 8356395Smckusick 8456395Smckusick /* 8549280Skarels * Initialize buffers and hash links for buffers. 8649280Skarels */ 8751455Sbostic void 8849280Skarels bufinit() 8949280Skarels { 9056395Smckusick register struct buf *bp; 9165256Smckusick struct bqueues *dp; 9249280Skarels register int i; 9349280Skarels int base, residual; 9449280Skarels 9556395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 9665256Smckusick TAILQ_INIT(dp); 9765256Smckusick bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash); 9849280Skarels base = bufpages / nbuf; 9949280Skarels residual = bufpages % nbuf; 10049280Skarels for (i = 0; i < nbuf; i++) { 10149280Skarels bp = &buf[i]; 10256395Smckusick bzero((char *)bp, sizeof *bp); 10349280Skarels bp->b_dev = NODEV; 10449280Skarels bp->b_rcred = NOCRED; 10549280Skarels bp->b_wcred = NOCRED; 10665552Smckusick bp->b_vnbufs.le_next = NOLIST; 10764536Sbostic bp->b_data = buffers + i * MAXBSIZE; 10849280Skarels if (i < residual) 10949280Skarels bp->b_bufsize = (base + 1) * CLBYTES; 11049280Skarels else 11149280Skarels bp->b_bufsize = base * CLBYTES; 11252413Storek bp->b_flags = B_INVAL; 11356395Smckusick dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY]; 11452413Storek binsheadfree(bp, dp); 11556395Smckusick binshash(bp, &invalhash); 11649280Skarels } 11749280Skarels } 11849280Skarels 11949280Skarels /* 12046151Smckusick * Find the block in the buffer pool. 12146151Smckusick * If the buffer is not present, allocate a new buffer and load 12246151Smckusick * its contents according to the filesystem fill routine. 1238Sbill */ 12438776Smckusick bread(vp, blkno, size, cred, bpp) 12537736Smckusick struct vnode *vp; 1266563Smckusic daddr_t blkno; 1276563Smckusic int size; 12838776Smckusick struct ucred *cred; 12937736Smckusick struct buf **bpp; 1308Sbill { 13147545Skarels struct proc *p = curproc; /* XXX */ 1328Sbill register struct buf *bp; 1338Sbill 1348670S if (size == 0) 1358670S panic("bread: size 0"); 13657797Smckusick *bpp = bp = getblk(vp, blkno, size, 0, 0); 13746151Smckusick if (bp->b_flags & (B_DONE | B_DELWRI)) { 13840341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 13937736Smckusick return (0); 1408Sbill } 1418Sbill bp->b_flags |= B_READ; 1428670S if (bp->b_bcount > bp->b_bufsize) 1438670S panic("bread"); 14438776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 14538776Smckusick crhold(cred); 14638776Smckusick bp->b_rcred = cred; 14738776Smckusick } 14837736Smckusick VOP_STRATEGY(bp); 14940341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 15047545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 15137736Smckusick return (biowait(bp)); 1528Sbill } 1538Sbill 1548Sbill /* 15552189Smckusick * Operates like bread, but also starts I/O on the N specified 15652189Smckusick * read-ahead blocks. 1578Sbill */ 15852189Smckusick breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp) 15937736Smckusick struct vnode *vp; 1607114Smckusick daddr_t blkno; int size; 16152189Smckusick daddr_t rablkno[]; int rabsize[]; 16252189Smckusick int num; 16338776Smckusick struct ucred *cred; 16437736Smckusick struct buf **bpp; 1658Sbill { 16647545Skarels struct proc *p = curproc; /* XXX */ 1678Sbill register struct buf *bp, *rabp; 16852189Smckusick register int i; 1698Sbill 1708Sbill bp = NULL; 1717015Smckusick /* 17246151Smckusick * If the block is not memory resident, 17346151Smckusick * allocate a buffer and start I/O. 1747015Smckusick */ 17537736Smckusick if (!incore(vp, blkno)) { 17657797Smckusick *bpp = bp = getblk(vp, blkno, size, 0, 0); 17746151Smckusick if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 1788Sbill bp->b_flags |= B_READ; 1798670S if (bp->b_bcount > bp->b_bufsize) 18052189Smckusick panic("breadn"); 18138776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 18238776Smckusick crhold(cred); 18338776Smckusick bp->b_rcred = cred; 18438776Smckusick } 18537736Smckusick VOP_STRATEGY(bp); 18640341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 18747545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 18854342Smckusick } else { 18940341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 19054342Smckusick } 1918Sbill } 1927015Smckusick 1937015Smckusick /* 19452189Smckusick * If there's read-ahead block(s), start I/O 19552189Smckusick * on them also (as above). 1967015Smckusick */ 19752189Smckusick for (i = 0; i < num; i++) { 19852189Smckusick if (incore(vp, rablkno[i])) 19952189Smckusick continue; 20057797Smckusick rabp = getblk(vp, rablkno[i], rabsize[i], 0, 0); 20146151Smckusick if (rabp->b_flags & (B_DONE | B_DELWRI)) { 2028Sbill brelse(rabp); 20352189Smckusick trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]); 2042045Swnj } else { 20546151Smckusick rabp->b_flags |= B_ASYNC | B_READ; 2068670S if (rabp->b_bcount > rabp->b_bufsize) 2078670S panic("breadrabp"); 20838880Smckusick if (rabp->b_rcred == NOCRED && cred != NOCRED) { 20938776Smckusick crhold(cred); 21038880Smckusick rabp->b_rcred = cred; 21138776Smckusick } 21237736Smckusick VOP_STRATEGY(rabp); 21352189Smckusick trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]); 21447545Skarels p->p_stats->p_ru.ru_inblock++; /* pay in advance */ 2158Sbill } 2168Sbill } 2177015Smckusick 2187015Smckusick /* 21946151Smckusick * If block was memory resident, let bread get it. 22046151Smckusick * If block was not memory resident, the read was 22146151Smckusick * started above, so just wait for the read to complete. 2227015Smckusick */ 2237114Smckusick if (bp == NULL) 22438776Smckusick return (bread(vp, blkno, size, cred, bpp)); 22537736Smckusick return (biowait(bp)); 2268Sbill } 2278Sbill 2288Sbill /* 22946151Smckusick * Synchronous write. 23046151Smckusick * Release buffer on completion. 2318Sbill */ 2328Sbill bwrite(bp) 2337015Smckusick register struct buf *bp; 2348Sbill { 23547545Skarels struct proc *p = curproc; /* XXX */ 23637736Smckusick register int flag; 23752413Storek int s, error = 0; 2388Sbill 23965612Smckusick if (bp->b_vp && (bp->b_vp->v_mount->mnt_flag & MNT_ASYNC)) 24065612Smckusick bp->b_flags |= B_ASYNC; 2418Sbill flag = bp->b_flags; 2429857Ssam bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 24349459Smckusick if (flag & B_ASYNC) { 24449459Smckusick if ((flag & B_DELWRI) == 0) 24549459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 24649459Smckusick else 24749459Smckusick reassignbuf(bp, bp->b_vp); 24849459Smckusick } 24940341Smckusick trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno); 2508670S if (bp->b_bcount > bp->b_bufsize) 2518670S panic("bwrite"); 25240226Smckusick s = splbio(); 25339882Smckusick bp->b_vp->v_numoutput++; 25457797Smckusick bp->b_flags |= B_WRITEINPROG; 25540226Smckusick splx(s); 25637736Smckusick VOP_STRATEGY(bp); 2577015Smckusick 2587015Smckusick /* 25946151Smckusick * If the write was synchronous, then await I/O completion. 2607015Smckusick * If the write was "delayed", then we put the buffer on 26146151Smckusick * the queue of blocks awaiting I/O completion status. 2627015Smckusick */ 26346151Smckusick if ((flag & B_ASYNC) == 0) { 26437736Smckusick error = biowait(bp); 26549459Smckusick if ((flag&B_DELWRI) == 0) 26649459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 26749459Smckusick else 26849459Smckusick reassignbuf(bp, bp->b_vp); 26957797Smckusick if (bp->b_flags & B_EINTR) { 27057797Smckusick bp->b_flags &= ~B_EINTR; 27157797Smckusick error = EINTR; 27257797Smckusick } 2738Sbill brelse(bp); 27437736Smckusick } else if (flag & B_DELWRI) { 27552413Storek s = splbio(); 2768Sbill bp->b_flags |= B_AGE; 27752413Storek splx(s); 27837736Smckusick } 27937736Smckusick return (error); 2808Sbill } 2818Sbill 28253578Sheideman int 28353578Sheideman vn_bwrite(ap) 28453578Sheideman struct vop_bwrite_args *ap; 28553578Sheideman { 28665612Smckusick 28756395Smckusick return (bwrite(ap->a_bp)); 28853578Sheideman } 28953578Sheideman 29053578Sheideman 2918Sbill /* 29246151Smckusick * Delayed write. 29346151Smckusick * 29446151Smckusick * The buffer is marked dirty, but is not queued for I/O. 29546151Smckusick * This routine should be used when the buffer is expected 29646151Smckusick * to be modified again soon, typically a small write that 29746151Smckusick * partially fills a buffer. 29846151Smckusick * 29946151Smckusick * NB: magnetic tapes cannot be delayed; they must be 30046151Smckusick * written in the order that the writes are requested. 3018Sbill */ 3028Sbill bdwrite(bp) 3037015Smckusick register struct buf *bp; 3048Sbill { 30547545Skarels struct proc *p = curproc; /* XXX */ 3068Sbill 30739882Smckusick if ((bp->b_flags & B_DELWRI) == 0) { 30839882Smckusick bp->b_flags |= B_DELWRI; 30939882Smckusick reassignbuf(bp, bp->b_vp); 31047545Skarels p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 31139882Smckusick } 31237736Smckusick /* 31339668Smckusick * If this is a tape drive, the write must be initiated. 31437736Smckusick */ 31548360Smckusick if (VOP_IOCTL(bp->b_vp, 0, (caddr_t)B_TAPE, 0, NOCRED, p) == 0) { 3168Sbill bawrite(bp); 31739668Smckusick } else { 31846151Smckusick bp->b_flags |= (B_DONE | B_DELWRI); 3198Sbill brelse(bp); 3208Sbill } 3218Sbill } 3228Sbill 3238Sbill /* 32446151Smckusick * Asynchronous write. 32546151Smckusick * Start I/O on a buffer, but do not wait for it to complete. 32646151Smckusick * The buffer is released when the I/O completes. 3278Sbill */ 3288Sbill bawrite(bp) 3297015Smckusick register struct buf *bp; 3308Sbill { 3318Sbill 33246151Smckusick /* 33346151Smckusick * Setting the ASYNC flag causes bwrite to return 33446151Smckusick * after starting the I/O. 33546151Smckusick */ 3368Sbill bp->b_flags |= B_ASYNC; 33757797Smckusick (void) VOP_BWRITE(bp); 3388Sbill } 3398Sbill 3408Sbill /* 34146151Smckusick * Release a buffer. 34246151Smckusick * Even if the buffer is dirty, no I/O is started. 3438Sbill */ 3448Sbill brelse(bp) 3457015Smckusick register struct buf *bp; 3468Sbill { 34765256Smckusick register struct bqueues *flist; 34846151Smckusick int s; 3498Sbill 35040341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 3517015Smckusick /* 35239668Smckusick * If a process is waiting for the buffer, or 35339668Smckusick * is waiting for a free buffer, awaken it. 3547015Smckusick */ 35546151Smckusick if (bp->b_flags & B_WANTED) 3568Sbill wakeup((caddr_t)bp); 35756395Smckusick if (needbuffer) { 35856395Smckusick needbuffer = 0; 35956395Smckusick wakeup((caddr_t)&needbuffer); 3608Sbill } 36139668Smckusick /* 36239668Smckusick * Retry I/O for locked buffers rather than invalidating them. 36339668Smckusick */ 36452413Storek s = splbio(); 36539668Smckusick if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED)) 36639668Smckusick bp->b_flags &= ~B_ERROR; 36739668Smckusick /* 36839668Smckusick * Disassociate buffers that are no longer valid. 36939668Smckusick */ 37046151Smckusick if (bp->b_flags & (B_NOCACHE | B_ERROR)) 37137736Smckusick bp->b_flags |= B_INVAL; 37246151Smckusick if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) { 37339668Smckusick if (bp->b_vp) 37439668Smckusick brelvp(bp); 37539668Smckusick bp->b_flags &= ~B_DELWRI; 37637736Smckusick } 3777015Smckusick /* 3787015Smckusick * Stick the buffer back on a free list. 3797015Smckusick */ 3808670S if (bp->b_bufsize <= 0) { 3818670S /* block has no buffer ... put at front of unused buffer list */ 38256395Smckusick flist = &bufqueues[BQ_EMPTY]; 3838670S binsheadfree(bp, flist); 38446151Smckusick } else if (bp->b_flags & (B_ERROR | B_INVAL)) { 3852325Swnj /* block has no info ... put at front of most free list */ 38656395Smckusick flist = &bufqueues[BQ_AGE]; 3877015Smckusick binsheadfree(bp, flist); 3888Sbill } else { 3892325Swnj if (bp->b_flags & B_LOCKED) 39056395Smckusick flist = &bufqueues[BQ_LOCKED]; 3912325Swnj else if (bp->b_flags & B_AGE) 39256395Smckusick flist = &bufqueues[BQ_AGE]; 3932325Swnj else 39456395Smckusick flist = &bufqueues[BQ_LRU]; 3957015Smckusick binstailfree(bp, flist); 3968Sbill } 39746151Smckusick bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE); 3988Sbill splx(s); 3998Sbill } 4008Sbill 4018Sbill /* 40246151Smckusick * Check to see if a block is currently memory resident. 4038Sbill */ 40457797Smckusick struct buf * 40537736Smckusick incore(vp, blkno) 40637736Smckusick struct vnode *vp; 4077015Smckusick daddr_t blkno; 4088Sbill { 4098Sbill register struct buf *bp; 4108Sbill 41165256Smckusick for (bp = BUFHASH(vp, blkno)->lh_first; bp; bp = bp->b_hash.le_next) 41239668Smckusick if (bp->b_lblkno == blkno && bp->b_vp == vp && 4137015Smckusick (bp->b_flags & B_INVAL) == 0) 41457797Smckusick return (bp); 41557797Smckusick return (NULL); 4168Sbill } 4178Sbill 41839668Smckusick /* 41946151Smckusick * Check to see if a block is currently memory resident. 42046151Smckusick * If it is resident, return it. If it is not resident, 42146151Smckusick * allocate a new buffer and assign it to the block. 42239668Smckusick */ 4238Sbill struct buf * 42457797Smckusick getblk(vp, blkno, size, slpflag, slptimeo) 42537736Smckusick register struct vnode *vp; 4266563Smckusic daddr_t blkno; 42757797Smckusick int size, slpflag, slptimeo; 4288Sbill { 42956607Smckusick register struct buf *bp; 43065256Smckusick struct bufhashhdr *dp; 43157797Smckusick int s, error; 4328Sbill 43325255Smckusick if (size > MAXBSIZE) 43425255Smckusick panic("getblk: size too big"); 4357015Smckusick /* 43646151Smckusick * Search the cache for the block. If the buffer is found, 43746151Smckusick * but it is currently locked, the we must wait for it to 43846151Smckusick * become available. 4397015Smckusick */ 44037736Smckusick dp = BUFHASH(vp, blkno); 4417015Smckusick loop: 44265256Smckusick for (bp = dp->lh_first; bp; bp = bp->b_hash.le_next) { 44357797Smckusick if (bp->b_lblkno != blkno || bp->b_vp != vp) 4448Sbill continue; 44526271Skarels s = splbio(); 44646151Smckusick if (bp->b_flags & B_BUSY) { 4478Sbill bp->b_flags |= B_WANTED; 44857797Smckusick error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 44957797Smckusick "getblk", slptimeo); 4505424Swnj splx(s); 45157797Smckusick if (error) 45257797Smckusick return (NULL); 4538Sbill goto loop; 4548Sbill } 45557797Smckusick /* 45657797Smckusick * The test for B_INVAL is moved down here, since there 45757797Smckusick * are cases where B_INVAL is set before VOP_BWRITE() is 45857797Smckusick * called and for NFS, the process cannot be allowed to 45957797Smckusick * allocate a new buffer for the same block until the write 46057797Smckusick * back to the server has been completed. (ie. B_BUSY clears) 46157797Smckusick */ 46257797Smckusick if (bp->b_flags & B_INVAL) { 46357797Smckusick splx(s); 46457797Smckusick continue; 46557797Smckusick } 46639882Smckusick bremfree(bp); 46739882Smckusick bp->b_flags |= B_BUSY; 4685424Swnj splx(s); 46932608Smckusick if (bp->b_bcount != size) { 47039668Smckusick printf("getblk: stray size"); 47139668Smckusick bp->b_flags |= B_INVAL; 47257797Smckusick VOP_BWRITE(bp); 47339668Smckusick goto loop; 47432608Smckusick } 4758Sbill bp->b_flags |= B_CACHE; 47626271Skarels return (bp); 4778Sbill } 47857797Smckusick /* 47957797Smckusick * The loop back to the top when getnewbuf() fails is because 48057797Smckusick * stateless filesystems like NFS have no node locks. Thus, 48157797Smckusick * there is a slight chance that more than one process will 48257797Smckusick * try and getnewbuf() for the same block concurrently when 48357797Smckusick * the first sleeps in getnewbuf(). So after a sleep, go back 48457797Smckusick * up to the top to check the hash lists again. 48557797Smckusick */ 48657797Smckusick if ((bp = getnewbuf(slpflag, slptimeo)) == 0) 48757797Smckusick goto loop; 4887015Smckusick bremhash(bp); 48939668Smckusick bgetvp(vp, bp); 49045116Smckusick bp->b_bcount = 0; 49139668Smckusick bp->b_lblkno = blkno; 4926563Smckusic bp->b_blkno = blkno; 4938670S bp->b_error = 0; 49437736Smckusick bp->b_resid = 0; 49537736Smckusick binshash(bp, dp); 49645116Smckusick allocbuf(bp, size); 49726271Skarels return (bp); 4988Sbill } 4998Sbill 5008Sbill /* 50146151Smckusick * Allocate a buffer. 50246151Smckusick * The caller will assign it to a block. 5038Sbill */ 5048Sbill struct buf * 5056563Smckusic geteblk(size) 5066563Smckusic int size; 5078Sbill { 50856395Smckusick register struct buf *bp; 5098Sbill 51025255Smckusick if (size > MAXBSIZE) 51125255Smckusick panic("geteblk: size too big"); 51257797Smckusick while ((bp = getnewbuf(0, 0)) == NULL) 51357797Smckusick /* void */; 5148670S bp->b_flags |= B_INVAL; 5157015Smckusick bremhash(bp); 51656395Smckusick binshash(bp, &invalhash); 51745116Smckusick bp->b_bcount = 0; 51837736Smckusick bp->b_error = 0; 51937736Smckusick bp->b_resid = 0; 52045116Smckusick allocbuf(bp, size); 52126271Skarels return (bp); 5228Sbill } 5238Sbill 5248Sbill /* 52545116Smckusick * Expand or contract the actual memory allocated to a buffer. 52646151Smckusick * If no memory is available, release buffer and take error exit. 5276563Smckusic */ 52845116Smckusick allocbuf(tp, size) 52945116Smckusick register struct buf *tp; 5306563Smckusic int size; 5316563Smckusic { 53245116Smckusick register struct buf *bp, *ep; 53345116Smckusick int sizealloc, take, s; 5346563Smckusic 53545116Smckusick sizealloc = roundup(size, CLBYTES); 53645116Smckusick /* 53745116Smckusick * Buffer size does not change 53845116Smckusick */ 53945116Smckusick if (sizealloc == tp->b_bufsize) 54045116Smckusick goto out; 54145116Smckusick /* 54245116Smckusick * Buffer size is shrinking. 54345116Smckusick * Place excess space in a buffer header taken from the 54445116Smckusick * BQ_EMPTY buffer list and placed on the "most free" list. 54545116Smckusick * If no extra buffer headers are available, leave the 54645116Smckusick * extra space in the present buffer. 54745116Smckusick */ 54845116Smckusick if (sizealloc < tp->b_bufsize) { 54965256Smckusick if ((ep = bufqueues[BQ_EMPTY].tqh_first) == NULL) 55045116Smckusick goto out; 55145116Smckusick s = splbio(); 55245116Smckusick bremfree(ep); 55345116Smckusick ep->b_flags |= B_BUSY; 55445116Smckusick splx(s); 55564536Sbostic pagemove((char *)tp->b_data + sizealloc, ep->b_data, 55645116Smckusick (int)tp->b_bufsize - sizealloc); 55745116Smckusick ep->b_bufsize = tp->b_bufsize - sizealloc; 55845116Smckusick tp->b_bufsize = sizealloc; 55945116Smckusick ep->b_flags |= B_INVAL; 56045116Smckusick ep->b_bcount = 0; 56145116Smckusick brelse(ep); 56245116Smckusick goto out; 56345116Smckusick } 56445116Smckusick /* 56545116Smckusick * More buffer space is needed. Get it out of buffers on 56645116Smckusick * the "most free" list, placing the empty headers on the 56745116Smckusick * BQ_EMPTY buffer header list. 56845116Smckusick */ 56945116Smckusick while (tp->b_bufsize < sizealloc) { 57045116Smckusick take = sizealloc - tp->b_bufsize; 57157797Smckusick while ((bp = getnewbuf(0, 0)) == NULL) 57257797Smckusick /* void */; 57345116Smckusick if (take >= bp->b_bufsize) 57445116Smckusick take = bp->b_bufsize; 57564536Sbostic pagemove(&((char *)bp->b_data)[bp->b_bufsize - take], 57664536Sbostic &((char *)tp->b_data)[tp->b_bufsize], take); 57745116Smckusick tp->b_bufsize += take; 57845116Smckusick bp->b_bufsize = bp->b_bufsize - take; 57945116Smckusick if (bp->b_bcount > bp->b_bufsize) 58045116Smckusick bp->b_bcount = bp->b_bufsize; 58145116Smckusick if (bp->b_bufsize <= 0) { 58245116Smckusick bremhash(bp); 58356395Smckusick binshash(bp, &invalhash); 58446151Smckusick bp->b_dev = NODEV; 58545116Smckusick bp->b_error = 0; 58645116Smckusick bp->b_flags |= B_INVAL; 58745116Smckusick } 58845116Smckusick brelse(bp); 58945116Smckusick } 59045116Smckusick out: 59145116Smckusick tp->b_bcount = size; 59245116Smckusick return (1); 5938670S } 5948670S 5958670S /* 5968670S * Find a buffer which is available for use. 5978670S * Select something from a free list. 5988670S * Preference is to AGE list, then LRU list. 5998670S */ 6008670S struct buf * 60157797Smckusick getnewbuf(slpflag, slptimeo) 60257797Smckusick int slpflag, slptimeo; 6038670S { 60456395Smckusick register struct buf *bp; 60565256Smckusick register struct bqueues *dp; 60638776Smckusick register struct ucred *cred; 6078670S int s; 6088670S 6098670S loop: 61026271Skarels s = splbio(); 61159879Smckusick for (dp = &bufqueues[BQ_AGE]; dp > bufqueues; dp--) 61265256Smckusick if (dp->tqh_first) 61359879Smckusick break; 61456395Smckusick if (dp == bufqueues) { /* no free blocks */ 61556395Smckusick needbuffer = 1; 61657797Smckusick (void) tsleep((caddr_t)&needbuffer, slpflag | (PRIBIO + 1), 61757797Smckusick "getnewbuf", slptimeo); 61812170Ssam splx(s); 61957797Smckusick return (NULL); 6208670S } 62165256Smckusick bp = dp->tqh_first; 62239882Smckusick bremfree(bp); 62339882Smckusick bp->b_flags |= B_BUSY; 6248670S splx(s); 6258670S if (bp->b_flags & B_DELWRI) { 62638614Smckusick (void) bawrite(bp); 6278670S goto loop; 6288670S } 62940341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 63039668Smckusick if (bp->b_vp) 63139668Smckusick brelvp(bp); 63238776Smckusick if (bp->b_rcred != NOCRED) { 63338776Smckusick cred = bp->b_rcred; 63438776Smckusick bp->b_rcred = NOCRED; 63538776Smckusick crfree(cred); 63638776Smckusick } 63738776Smckusick if (bp->b_wcred != NOCRED) { 63838776Smckusick cred = bp->b_wcred; 63938776Smckusick bp->b_wcred = NOCRED; 64038776Smckusick crfree(cred); 64138776Smckusick } 6428670S bp->b_flags = B_BUSY; 64346989Smckusick bp->b_dirtyoff = bp->b_dirtyend = 0; 64452189Smckusick bp->b_validoff = bp->b_validend = 0; 6458670S return (bp); 6468670S } 6478670S 6488670S /* 64946151Smckusick * Wait for I/O to complete. 65046151Smckusick * 65146151Smckusick * Extract and return any errors associated with the I/O. 65246151Smckusick * If the error flag is set, but no specific error is 65346151Smckusick * given, return EIO. 6548Sbill */ 6557015Smckusick biowait(bp) 6566563Smckusic register struct buf *bp; 6578Sbill { 6585431Sroot int s; 6598Sbill 66026271Skarels s = splbio(); 66138776Smckusick while ((bp->b_flags & B_DONE) == 0) 6628Sbill sleep((caddr_t)bp, PRIBIO); 6635431Sroot splx(s); 66437736Smckusick if ((bp->b_flags & B_ERROR) == 0) 66537736Smckusick return (0); 66637736Smckusick if (bp->b_error) 66737736Smckusick return (bp->b_error); 66837736Smckusick return (EIO); 6698Sbill } 6708Sbill 6718Sbill /* 67213128Ssam * Mark I/O complete on a buffer. 67346151Smckusick * 67446151Smckusick * If a callback has been requested, e.g. the pageout 67546151Smckusick * daemon, do so. Otherwise, awaken waiting processes. 6768Sbill */ 67751455Sbostic void 6787015Smckusick biodone(bp) 6797015Smckusick register struct buf *bp; 6808Sbill { 6818Sbill 682420Sbill if (bp->b_flags & B_DONE) 6837015Smckusick panic("dup biodone"); 6848Sbill bp->b_flags |= B_DONE; 68549232Smckusick if ((bp->b_flags & B_READ) == 0) 68649232Smckusick vwakeup(bp); 6879763Ssam if (bp->b_flags & B_CALL) { 6889763Ssam bp->b_flags &= ~B_CALL; 6899763Ssam (*bp->b_iodone)(bp); 6909763Ssam return; 6919763Ssam } 69246151Smckusick if (bp->b_flags & B_ASYNC) 6938Sbill brelse(bp); 6948Sbill else { 6958Sbill bp->b_flags &= ~B_WANTED; 6968Sbill wakeup((caddr_t)bp); 6978Sbill } 6988Sbill } 69956356Smckusick 70057035Smargo int 70157035Smargo count_lock_queue() 70257035Smargo { 70357035Smargo register struct buf *bp; 70457035Smargo register int ret; 70557035Smargo 70665256Smckusick for (ret = 0, bp = (struct buf *)bufqueues[BQ_LOCKED].tqh_first; 70765256Smckusick bp; bp = (struct buf *)bp->b_freelist.tqe_next) 70857035Smargo ++ret; 70957035Smargo return(ret); 71057035Smargo } 71157035Smargo 71256356Smckusick #ifdef DIAGNOSTIC 71356356Smckusick /* 71456356Smckusick * Print out statistics on the current allocation of the buffer pool. 71556356Smckusick * Can be enabled to print out on every ``sync'' by setting "syncprt" 71659879Smckusick * in vfs_syscalls.c using sysctl. 71756356Smckusick */ 71856356Smckusick void 71956356Smckusick vfs_bufstats() 72056356Smckusick { 72156356Smckusick int s, i, j, count; 72256395Smckusick register struct buf *bp; 72365256Smckusick register struct bqueues *dp; 72456356Smckusick int counts[MAXBSIZE/CLBYTES+1]; 72556356Smckusick static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" }; 72656356Smckusick 72756395Smckusick for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) { 72856356Smckusick count = 0; 72956356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 73056356Smckusick counts[j] = 0; 73156356Smckusick s = splbio(); 73265256Smckusick for (bp = dp->tqh_first; bp; bp = bp->b_freelist.tqe_next) { 73356356Smckusick counts[bp->b_bufsize/CLBYTES]++; 73456356Smckusick count++; 73556356Smckusick } 73656356Smckusick splx(s); 73756356Smckusick printf("%s: total-%d", bname[i], count); 73856356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 73956356Smckusick if (counts[j] != 0) 74056356Smckusick printf(", %d-%d", j * CLBYTES, counts[j]); 74156356Smckusick printf("\n"); 74256356Smckusick } 74356356Smckusick } 74456356Smckusick #endif /* DIAGNOSTIC */ 745