149589Sbostic /*- 265771Sbostic * Copyright (c) 1986, 1989, 1993 365771Sbostic * The Regents of the University of California. All rights reserved. 465771Sbostic * (c) UNIX System Laboratories, Inc. 565771Sbostic * All or some portions of this file are derived from material licensed 665771Sbostic * to the University of California by American Telephone and Telegraph 765771Sbostic * Co. or Unix System Laboratories, Inc. and are reproduced herein with 865771Sbostic * the permission of UNIX System Laboratories, Inc. 923395Smckusick * 1064437Sbostic * This code is derived from software contributed to Berkeley by 1164437Sbostic * Berkeley Software Design Inc. 1237736Smckusick * 1364437Sbostic * %sccs.include.redist.c% 1464437Sbostic * 15*65858Smckusick * @(#)vfs_bio.c 8.8 (Berkeley) 01/24/94 1623395Smckusick */ 178Sbill 1851455Sbostic #include <sys/param.h> 1965256Smckusick #include <sys/systm.h> 2051455Sbostic #include <sys/proc.h> 2151455Sbostic #include <sys/buf.h> 2251455Sbostic #include <sys/vnode.h> 2351455Sbostic #include <sys/mount.h> 2451455Sbostic #include <sys/trace.h> 2559879Smckusick #include <sys/malloc.h> 2651455Sbostic #include <sys/resourcevar.h> 278Sbill 2891Sbill /* 2956395Smckusick * Definitions for the buffer hash lists. 3056395Smckusick */ 3156395Smckusick #define BUFHASH(dvp, lbn) \ 3256395Smckusick (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash]) 3365256Smckusick LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash; 3456395Smckusick u_long bufhash; 3556395Smckusick 3656395Smckusick /* 3756395Smckusick * Insq/Remq for the buffer hash lists. 3856395Smckusick */ 3965256Smckusick #define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash) 4065256Smckusick #define bremhash(bp) LIST_REMOVE(bp, b_hash) 4156395Smckusick 4256395Smckusick /* 4356395Smckusick * Definitions for the buffer free lists. 4456395Smckusick */ 4556395Smckusick #define BQUEUES 4 /* number of free buffer queues */ 4656395Smckusick 4756395Smckusick #define BQ_LOCKED 0 /* super-blocks &c */ 4856395Smckusick #define BQ_LRU 1 /* lru, useful buffers */ 4956395Smckusick #define BQ_AGE 2 /* rubbish */ 5056395Smckusick #define BQ_EMPTY 3 /* buffer headers with no memory */ 5156395Smckusick 5265256Smckusick TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES]; 5356395Smckusick int needbuffer; 5456395Smckusick 5556395Smckusick /* 5656395Smckusick * Insq/Remq for the buffer free lists. 5756395Smckusick */ 5865256Smckusick #define binsheadfree(bp, dp) TAILQ_INSERT_HEAD(dp, bp, b_freelist) 5965256Smckusick #define binstailfree(bp, dp) TAILQ_INSERT_TAIL(dp, bp, b_freelist) 6056607Smckusick 6156395Smckusick void 6256395Smckusick bremfree(bp) 6356395Smckusick struct buf *bp; 6456395Smckusick { 6565256Smckusick struct bqueues *dp = NULL; 6656395Smckusick 6756607Smckusick /* 6856607Smckusick * We only calculate the head of the freelist when removing 6956607Smckusick * the last element of the list as that is the only time that 7056607Smckusick * it is needed (e.g. to reset the tail pointer). 7165256Smckusick * 7265256Smckusick * NB: This makes an assumption about how tailq's are implemented. 7356607Smckusick */ 7465256Smckusick if (bp->b_freelist.tqe_next == NULL) { 7556395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 7665256Smckusick if (dp->tqh_last == &bp->b_freelist.tqe_next) 7756395Smckusick break; 7856395Smckusick if (dp == &bufqueues[BQUEUES]) 7956395Smckusick panic("bremfree: lost tail"); 8056395Smckusick } 8165256Smckusick TAILQ_REMOVE(dp, bp, b_freelist); 8256395Smckusick } 8356395Smckusick 8456395Smckusick /* 8549280Skarels * Initialize buffers and hash links for buffers. 8649280Skarels */ 8751455Sbostic void 8849280Skarels bufinit() 8949280Skarels { 9056395Smckusick register struct buf *bp; 9165256Smckusick struct bqueues *dp; 9249280Skarels register int i; 9349280Skarels int base, residual; 9449280Skarels 9556395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 9665256Smckusick TAILQ_INIT(dp); 9765256Smckusick bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash); 9849280Skarels base = bufpages / nbuf; 9949280Skarels residual = bufpages % nbuf; 10049280Skarels for (i = 0; i < nbuf; i++) { 10149280Skarels bp = &buf[i]; 10256395Smckusick bzero((char *)bp, sizeof *bp); 10349280Skarels bp->b_dev = NODEV; 10449280Skarels bp->b_rcred = NOCRED; 10549280Skarels bp->b_wcred = NOCRED; 10665552Smckusick bp->b_vnbufs.le_next = NOLIST; 10764536Sbostic bp->b_data = buffers + i * MAXBSIZE; 10849280Skarels if (i < residual) 10949280Skarels bp->b_bufsize = (base + 1) * CLBYTES; 11049280Skarels else 11149280Skarels bp->b_bufsize = base * CLBYTES; 11252413Storek bp->b_flags = B_INVAL; 11356395Smckusick dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY]; 11452413Storek binsheadfree(bp, dp); 11556395Smckusick binshash(bp, &invalhash); 11649280Skarels } 11749280Skarels } 11849280Skarels 11949280Skarels /* 12046151Smckusick * Find the block in the buffer pool. 12146151Smckusick * If the buffer is not present, allocate a new buffer and load 12246151Smckusick * its contents according to the filesystem fill routine. 1238Sbill */ 12438776Smckusick bread(vp, blkno, size, cred, bpp) 12537736Smckusick struct vnode *vp; 1266563Smckusic daddr_t blkno; 1276563Smckusic int size; 12838776Smckusick struct ucred *cred; 12937736Smckusick struct buf **bpp; 1308Sbill { 13147545Skarels struct proc *p = curproc; /* XXX */ 1328Sbill register struct buf *bp; 1338Sbill 1348670S if (size == 0) 1358670S panic("bread: size 0"); 13657797Smckusick *bpp = bp = getblk(vp, blkno, size, 0, 0); 13746151Smckusick if (bp->b_flags & (B_DONE | B_DELWRI)) { 13840341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 13937736Smckusick return (0); 1408Sbill } 1418Sbill bp->b_flags |= B_READ; 1428670S if (bp->b_bcount > bp->b_bufsize) 1438670S panic("bread"); 14438776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 14538776Smckusick crhold(cred); 14638776Smckusick bp->b_rcred = cred; 14738776Smckusick } 14837736Smckusick VOP_STRATEGY(bp); 14940341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 15047545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 15137736Smckusick return (biowait(bp)); 1528Sbill } 1538Sbill 1548Sbill /* 15552189Smckusick * Operates like bread, but also starts I/O on the N specified 15652189Smckusick * read-ahead blocks. 1578Sbill */ 15852189Smckusick breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp) 15937736Smckusick struct vnode *vp; 1607114Smckusick daddr_t blkno; int size; 16152189Smckusick daddr_t rablkno[]; int rabsize[]; 16252189Smckusick int num; 16338776Smckusick struct ucred *cred; 16437736Smckusick struct buf **bpp; 1658Sbill { 16647545Skarels struct proc *p = curproc; /* XXX */ 1678Sbill register struct buf *bp, *rabp; 16852189Smckusick register int i; 1698Sbill 1708Sbill bp = NULL; 1717015Smckusick /* 17246151Smckusick * If the block is not memory resident, 17346151Smckusick * allocate a buffer and start I/O. 1747015Smckusick */ 17537736Smckusick if (!incore(vp, blkno)) { 17657797Smckusick *bpp = bp = getblk(vp, blkno, size, 0, 0); 17746151Smckusick if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 1788Sbill bp->b_flags |= B_READ; 1798670S if (bp->b_bcount > bp->b_bufsize) 18052189Smckusick panic("breadn"); 18138776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 18238776Smckusick crhold(cred); 18338776Smckusick bp->b_rcred = cred; 18438776Smckusick } 18537736Smckusick VOP_STRATEGY(bp); 18640341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 18747545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 18854342Smckusick } else { 18940341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 19054342Smckusick } 1918Sbill } 1927015Smckusick 1937015Smckusick /* 19452189Smckusick * If there's read-ahead block(s), start I/O 19552189Smckusick * on them also (as above). 1967015Smckusick */ 19752189Smckusick for (i = 0; i < num; i++) { 19852189Smckusick if (incore(vp, rablkno[i])) 19952189Smckusick continue; 20057797Smckusick rabp = getblk(vp, rablkno[i], rabsize[i], 0, 0); 20146151Smckusick if (rabp->b_flags & (B_DONE | B_DELWRI)) { 2028Sbill brelse(rabp); 20352189Smckusick trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]); 2042045Swnj } else { 20546151Smckusick rabp->b_flags |= B_ASYNC | B_READ; 2068670S if (rabp->b_bcount > rabp->b_bufsize) 2078670S panic("breadrabp"); 20838880Smckusick if (rabp->b_rcred == NOCRED && cred != NOCRED) { 20938776Smckusick crhold(cred); 21038880Smckusick rabp->b_rcred = cred; 21138776Smckusick } 21237736Smckusick VOP_STRATEGY(rabp); 21352189Smckusick trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]); 21447545Skarels p->p_stats->p_ru.ru_inblock++; /* pay in advance */ 2158Sbill } 2168Sbill } 2177015Smckusick 2187015Smckusick /* 21946151Smckusick * If block was memory resident, let bread get it. 22046151Smckusick * If block was not memory resident, the read was 22146151Smckusick * started above, so just wait for the read to complete. 2227015Smckusick */ 2237114Smckusick if (bp == NULL) 22438776Smckusick return (bread(vp, blkno, size, cred, bpp)); 22537736Smckusick return (biowait(bp)); 2268Sbill } 2278Sbill 2288Sbill /* 22946151Smckusick * Synchronous write. 23046151Smckusick * Release buffer on completion. 2318Sbill */ 2328Sbill bwrite(bp) 2337015Smckusick register struct buf *bp; 2348Sbill { 23547545Skarels struct proc *p = curproc; /* XXX */ 23637736Smckusick register int flag; 23752413Storek int s, error = 0; 2388Sbill 239*65858Smckusick if ((bp->b_flags & B_ASYNC) == 0 && 240*65858Smckusick bp->b_vp && (bp->b_vp->v_mount->mnt_flag & MNT_ASYNC)) { 241*65858Smckusick bdwrite(bp); 242*65858Smckusick return (0); 243*65858Smckusick } 2448Sbill flag = bp->b_flags; 2459857Ssam bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 24649459Smckusick if (flag & B_ASYNC) { 24749459Smckusick if ((flag & B_DELWRI) == 0) 24849459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 24949459Smckusick else 25049459Smckusick reassignbuf(bp, bp->b_vp); 25149459Smckusick } 25240341Smckusick trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno); 2538670S if (bp->b_bcount > bp->b_bufsize) 2548670S panic("bwrite"); 25540226Smckusick s = splbio(); 25639882Smckusick bp->b_vp->v_numoutput++; 25757797Smckusick bp->b_flags |= B_WRITEINPROG; 25840226Smckusick splx(s); 25937736Smckusick VOP_STRATEGY(bp); 2607015Smckusick 2617015Smckusick /* 26246151Smckusick * If the write was synchronous, then await I/O completion. 2637015Smckusick * If the write was "delayed", then we put the buffer on 26446151Smckusick * the queue of blocks awaiting I/O completion status. 2657015Smckusick */ 26646151Smckusick if ((flag & B_ASYNC) == 0) { 26737736Smckusick error = biowait(bp); 26849459Smckusick if ((flag&B_DELWRI) == 0) 26949459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 27049459Smckusick else 27149459Smckusick reassignbuf(bp, bp->b_vp); 27257797Smckusick if (bp->b_flags & B_EINTR) { 27357797Smckusick bp->b_flags &= ~B_EINTR; 27457797Smckusick error = EINTR; 27557797Smckusick } 2768Sbill brelse(bp); 27737736Smckusick } else if (flag & B_DELWRI) { 27852413Storek s = splbio(); 2798Sbill bp->b_flags |= B_AGE; 28052413Storek splx(s); 28137736Smckusick } 28237736Smckusick return (error); 2838Sbill } 2848Sbill 28553578Sheideman int 28653578Sheideman vn_bwrite(ap) 28753578Sheideman struct vop_bwrite_args *ap; 28853578Sheideman { 28965612Smckusick 29056395Smckusick return (bwrite(ap->a_bp)); 29153578Sheideman } 29253578Sheideman 29353578Sheideman 2948Sbill /* 29546151Smckusick * Delayed write. 29646151Smckusick * 29746151Smckusick * The buffer is marked dirty, but is not queued for I/O. 29846151Smckusick * This routine should be used when the buffer is expected 29946151Smckusick * to be modified again soon, typically a small write that 30046151Smckusick * partially fills a buffer. 30146151Smckusick * 30246151Smckusick * NB: magnetic tapes cannot be delayed; they must be 30346151Smckusick * written in the order that the writes are requested. 3048Sbill */ 3058Sbill bdwrite(bp) 3067015Smckusick register struct buf *bp; 3078Sbill { 30847545Skarels struct proc *p = curproc; /* XXX */ 3098Sbill 31039882Smckusick if ((bp->b_flags & B_DELWRI) == 0) { 31139882Smckusick bp->b_flags |= B_DELWRI; 31239882Smckusick reassignbuf(bp, bp->b_vp); 31347545Skarels p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 31439882Smckusick } 31537736Smckusick /* 31639668Smckusick * If this is a tape drive, the write must be initiated. 31737736Smckusick */ 31848360Smckusick if (VOP_IOCTL(bp->b_vp, 0, (caddr_t)B_TAPE, 0, NOCRED, p) == 0) { 3198Sbill bawrite(bp); 32039668Smckusick } else { 32146151Smckusick bp->b_flags |= (B_DONE | B_DELWRI); 3228Sbill brelse(bp); 3238Sbill } 3248Sbill } 3258Sbill 3268Sbill /* 32746151Smckusick * Asynchronous write. 32846151Smckusick * Start I/O on a buffer, but do not wait for it to complete. 32946151Smckusick * The buffer is released when the I/O completes. 3308Sbill */ 3318Sbill bawrite(bp) 3327015Smckusick register struct buf *bp; 3338Sbill { 3348Sbill 33546151Smckusick /* 33646151Smckusick * Setting the ASYNC flag causes bwrite to return 33746151Smckusick * after starting the I/O. 33846151Smckusick */ 3398Sbill bp->b_flags |= B_ASYNC; 34057797Smckusick (void) VOP_BWRITE(bp); 3418Sbill } 3428Sbill 3438Sbill /* 34446151Smckusick * Release a buffer. 34546151Smckusick * Even if the buffer is dirty, no I/O is started. 3468Sbill */ 3478Sbill brelse(bp) 3487015Smckusick register struct buf *bp; 3498Sbill { 35065256Smckusick register struct bqueues *flist; 35146151Smckusick int s; 3528Sbill 35340341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 3547015Smckusick /* 35539668Smckusick * If a process is waiting for the buffer, or 35639668Smckusick * is waiting for a free buffer, awaken it. 3577015Smckusick */ 35846151Smckusick if (bp->b_flags & B_WANTED) 3598Sbill wakeup((caddr_t)bp); 36056395Smckusick if (needbuffer) { 36156395Smckusick needbuffer = 0; 36256395Smckusick wakeup((caddr_t)&needbuffer); 3638Sbill } 36439668Smckusick /* 36539668Smckusick * Retry I/O for locked buffers rather than invalidating them. 36639668Smckusick */ 36752413Storek s = splbio(); 36839668Smckusick if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED)) 36939668Smckusick bp->b_flags &= ~B_ERROR; 37039668Smckusick /* 37139668Smckusick * Disassociate buffers that are no longer valid. 37239668Smckusick */ 37346151Smckusick if (bp->b_flags & (B_NOCACHE | B_ERROR)) 37437736Smckusick bp->b_flags |= B_INVAL; 37546151Smckusick if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) { 37639668Smckusick if (bp->b_vp) 37739668Smckusick brelvp(bp); 37839668Smckusick bp->b_flags &= ~B_DELWRI; 37937736Smckusick } 3807015Smckusick /* 3817015Smckusick * Stick the buffer back on a free list. 3827015Smckusick */ 3838670S if (bp->b_bufsize <= 0) { 3848670S /* block has no buffer ... put at front of unused buffer list */ 38556395Smckusick flist = &bufqueues[BQ_EMPTY]; 3868670S binsheadfree(bp, flist); 38746151Smckusick } else if (bp->b_flags & (B_ERROR | B_INVAL)) { 3882325Swnj /* block has no info ... put at front of most free list */ 38956395Smckusick flist = &bufqueues[BQ_AGE]; 3907015Smckusick binsheadfree(bp, flist); 3918Sbill } else { 3922325Swnj if (bp->b_flags & B_LOCKED) 39356395Smckusick flist = &bufqueues[BQ_LOCKED]; 3942325Swnj else if (bp->b_flags & B_AGE) 39556395Smckusick flist = &bufqueues[BQ_AGE]; 3962325Swnj else 39756395Smckusick flist = &bufqueues[BQ_LRU]; 3987015Smckusick binstailfree(bp, flist); 3998Sbill } 40046151Smckusick bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE); 4018Sbill splx(s); 4028Sbill } 4038Sbill 4048Sbill /* 40546151Smckusick * Check to see if a block is currently memory resident. 4068Sbill */ 40757797Smckusick struct buf * 40837736Smckusick incore(vp, blkno) 40937736Smckusick struct vnode *vp; 4107015Smckusick daddr_t blkno; 4118Sbill { 4128Sbill register struct buf *bp; 4138Sbill 41465256Smckusick for (bp = BUFHASH(vp, blkno)->lh_first; bp; bp = bp->b_hash.le_next) 41539668Smckusick if (bp->b_lblkno == blkno && bp->b_vp == vp && 4167015Smckusick (bp->b_flags & B_INVAL) == 0) 41757797Smckusick return (bp); 41857797Smckusick return (NULL); 4198Sbill } 4208Sbill 42139668Smckusick /* 42246151Smckusick * Check to see if a block is currently memory resident. 42346151Smckusick * If it is resident, return it. If it is not resident, 42446151Smckusick * allocate a new buffer and assign it to the block. 42539668Smckusick */ 4268Sbill struct buf * 42757797Smckusick getblk(vp, blkno, size, slpflag, slptimeo) 42837736Smckusick register struct vnode *vp; 4296563Smckusic daddr_t blkno; 43057797Smckusick int size, slpflag, slptimeo; 4318Sbill { 43256607Smckusick register struct buf *bp; 43365256Smckusick struct bufhashhdr *dp; 43457797Smckusick int s, error; 4358Sbill 43625255Smckusick if (size > MAXBSIZE) 43725255Smckusick panic("getblk: size too big"); 4387015Smckusick /* 43946151Smckusick * Search the cache for the block. If the buffer is found, 44046151Smckusick * but it is currently locked, the we must wait for it to 44146151Smckusick * become available. 4427015Smckusick */ 44337736Smckusick dp = BUFHASH(vp, blkno); 4447015Smckusick loop: 44565256Smckusick for (bp = dp->lh_first; bp; bp = bp->b_hash.le_next) { 44657797Smckusick if (bp->b_lblkno != blkno || bp->b_vp != vp) 4478Sbill continue; 44826271Skarels s = splbio(); 44946151Smckusick if (bp->b_flags & B_BUSY) { 4508Sbill bp->b_flags |= B_WANTED; 45157797Smckusick error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 45257797Smckusick "getblk", slptimeo); 4535424Swnj splx(s); 45457797Smckusick if (error) 45557797Smckusick return (NULL); 4568Sbill goto loop; 4578Sbill } 45857797Smckusick /* 45957797Smckusick * The test for B_INVAL is moved down here, since there 46057797Smckusick * are cases where B_INVAL is set before VOP_BWRITE() is 46157797Smckusick * called and for NFS, the process cannot be allowed to 46257797Smckusick * allocate a new buffer for the same block until the write 46357797Smckusick * back to the server has been completed. (ie. B_BUSY clears) 46457797Smckusick */ 46557797Smckusick if (bp->b_flags & B_INVAL) { 46657797Smckusick splx(s); 46757797Smckusick continue; 46857797Smckusick } 46939882Smckusick bremfree(bp); 47039882Smckusick bp->b_flags |= B_BUSY; 4715424Swnj splx(s); 47232608Smckusick if (bp->b_bcount != size) { 47339668Smckusick printf("getblk: stray size"); 47439668Smckusick bp->b_flags |= B_INVAL; 47557797Smckusick VOP_BWRITE(bp); 47639668Smckusick goto loop; 47732608Smckusick } 4788Sbill bp->b_flags |= B_CACHE; 47926271Skarels return (bp); 4808Sbill } 48157797Smckusick /* 48257797Smckusick * The loop back to the top when getnewbuf() fails is because 48357797Smckusick * stateless filesystems like NFS have no node locks. Thus, 48457797Smckusick * there is a slight chance that more than one process will 48557797Smckusick * try and getnewbuf() for the same block concurrently when 48657797Smckusick * the first sleeps in getnewbuf(). So after a sleep, go back 48757797Smckusick * up to the top to check the hash lists again. 48857797Smckusick */ 48957797Smckusick if ((bp = getnewbuf(slpflag, slptimeo)) == 0) 49057797Smckusick goto loop; 4917015Smckusick bremhash(bp); 49239668Smckusick bgetvp(vp, bp); 49345116Smckusick bp->b_bcount = 0; 49439668Smckusick bp->b_lblkno = blkno; 4956563Smckusic bp->b_blkno = blkno; 4968670S bp->b_error = 0; 49737736Smckusick bp->b_resid = 0; 49837736Smckusick binshash(bp, dp); 49945116Smckusick allocbuf(bp, size); 50026271Skarels return (bp); 5018Sbill } 5028Sbill 5038Sbill /* 50446151Smckusick * Allocate a buffer. 50546151Smckusick * The caller will assign it to a block. 5068Sbill */ 5078Sbill struct buf * 5086563Smckusic geteblk(size) 5096563Smckusic int size; 5108Sbill { 51156395Smckusick register struct buf *bp; 5128Sbill 51325255Smckusick if (size > MAXBSIZE) 51425255Smckusick panic("geteblk: size too big"); 51557797Smckusick while ((bp = getnewbuf(0, 0)) == NULL) 51657797Smckusick /* void */; 5178670S bp->b_flags |= B_INVAL; 5187015Smckusick bremhash(bp); 51956395Smckusick binshash(bp, &invalhash); 52045116Smckusick bp->b_bcount = 0; 52137736Smckusick bp->b_error = 0; 52237736Smckusick bp->b_resid = 0; 52345116Smckusick allocbuf(bp, size); 52426271Skarels return (bp); 5258Sbill } 5268Sbill 5278Sbill /* 52845116Smckusick * Expand or contract the actual memory allocated to a buffer. 52946151Smckusick * If no memory is available, release buffer and take error exit. 5306563Smckusic */ 53145116Smckusick allocbuf(tp, size) 53245116Smckusick register struct buf *tp; 5336563Smckusic int size; 5346563Smckusic { 53545116Smckusick register struct buf *bp, *ep; 53645116Smckusick int sizealloc, take, s; 5376563Smckusic 53845116Smckusick sizealloc = roundup(size, CLBYTES); 53945116Smckusick /* 54045116Smckusick * Buffer size does not change 54145116Smckusick */ 54245116Smckusick if (sizealloc == tp->b_bufsize) 54345116Smckusick goto out; 54445116Smckusick /* 54545116Smckusick * Buffer size is shrinking. 54645116Smckusick * Place excess space in a buffer header taken from the 54745116Smckusick * BQ_EMPTY buffer list and placed on the "most free" list. 54845116Smckusick * If no extra buffer headers are available, leave the 54945116Smckusick * extra space in the present buffer. 55045116Smckusick */ 55145116Smckusick if (sizealloc < tp->b_bufsize) { 55265256Smckusick if ((ep = bufqueues[BQ_EMPTY].tqh_first) == NULL) 55345116Smckusick goto out; 55445116Smckusick s = splbio(); 55545116Smckusick bremfree(ep); 55645116Smckusick ep->b_flags |= B_BUSY; 55745116Smckusick splx(s); 55864536Sbostic pagemove((char *)tp->b_data + sizealloc, ep->b_data, 55945116Smckusick (int)tp->b_bufsize - sizealloc); 56045116Smckusick ep->b_bufsize = tp->b_bufsize - sizealloc; 56145116Smckusick tp->b_bufsize = sizealloc; 56245116Smckusick ep->b_flags |= B_INVAL; 56345116Smckusick ep->b_bcount = 0; 56445116Smckusick brelse(ep); 56545116Smckusick goto out; 56645116Smckusick } 56745116Smckusick /* 56845116Smckusick * More buffer space is needed. Get it out of buffers on 56945116Smckusick * the "most free" list, placing the empty headers on the 57045116Smckusick * BQ_EMPTY buffer header list. 57145116Smckusick */ 57245116Smckusick while (tp->b_bufsize < sizealloc) { 57345116Smckusick take = sizealloc - tp->b_bufsize; 57457797Smckusick while ((bp = getnewbuf(0, 0)) == NULL) 57557797Smckusick /* void */; 57645116Smckusick if (take >= bp->b_bufsize) 57745116Smckusick take = bp->b_bufsize; 57864536Sbostic pagemove(&((char *)bp->b_data)[bp->b_bufsize - take], 57964536Sbostic &((char *)tp->b_data)[tp->b_bufsize], take); 58045116Smckusick tp->b_bufsize += take; 58145116Smckusick bp->b_bufsize = bp->b_bufsize - take; 58245116Smckusick if (bp->b_bcount > bp->b_bufsize) 58345116Smckusick bp->b_bcount = bp->b_bufsize; 58445116Smckusick if (bp->b_bufsize <= 0) { 58545116Smckusick bremhash(bp); 58656395Smckusick binshash(bp, &invalhash); 58746151Smckusick bp->b_dev = NODEV; 58845116Smckusick bp->b_error = 0; 58945116Smckusick bp->b_flags |= B_INVAL; 59045116Smckusick } 59145116Smckusick brelse(bp); 59245116Smckusick } 59345116Smckusick out: 59445116Smckusick tp->b_bcount = size; 59545116Smckusick return (1); 5968670S } 5978670S 5988670S /* 5998670S * Find a buffer which is available for use. 6008670S * Select something from a free list. 6018670S * Preference is to AGE list, then LRU list. 6028670S */ 6038670S struct buf * 60457797Smckusick getnewbuf(slpflag, slptimeo) 60557797Smckusick int slpflag, slptimeo; 6068670S { 60756395Smckusick register struct buf *bp; 60865256Smckusick register struct bqueues *dp; 60938776Smckusick register struct ucred *cred; 6108670S int s; 6118670S 6128670S loop: 61326271Skarels s = splbio(); 61459879Smckusick for (dp = &bufqueues[BQ_AGE]; dp > bufqueues; dp--) 61565256Smckusick if (dp->tqh_first) 61659879Smckusick break; 61756395Smckusick if (dp == bufqueues) { /* no free blocks */ 61856395Smckusick needbuffer = 1; 61957797Smckusick (void) tsleep((caddr_t)&needbuffer, slpflag | (PRIBIO + 1), 62057797Smckusick "getnewbuf", slptimeo); 62112170Ssam splx(s); 62257797Smckusick return (NULL); 6238670S } 62465256Smckusick bp = dp->tqh_first; 62539882Smckusick bremfree(bp); 62639882Smckusick bp->b_flags |= B_BUSY; 6278670S splx(s); 6288670S if (bp->b_flags & B_DELWRI) { 62938614Smckusick (void) bawrite(bp); 6308670S goto loop; 6318670S } 63240341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 63339668Smckusick if (bp->b_vp) 63439668Smckusick brelvp(bp); 63538776Smckusick if (bp->b_rcred != NOCRED) { 63638776Smckusick cred = bp->b_rcred; 63738776Smckusick bp->b_rcred = NOCRED; 63838776Smckusick crfree(cred); 63938776Smckusick } 64038776Smckusick if (bp->b_wcred != NOCRED) { 64138776Smckusick cred = bp->b_wcred; 64238776Smckusick bp->b_wcred = NOCRED; 64338776Smckusick crfree(cred); 64438776Smckusick } 6458670S bp->b_flags = B_BUSY; 64646989Smckusick bp->b_dirtyoff = bp->b_dirtyend = 0; 64752189Smckusick bp->b_validoff = bp->b_validend = 0; 6488670S return (bp); 6498670S } 6508670S 6518670S /* 65246151Smckusick * Wait for I/O to complete. 65346151Smckusick * 65446151Smckusick * Extract and return any errors associated with the I/O. 65546151Smckusick * If the error flag is set, but no specific error is 65646151Smckusick * given, return EIO. 6578Sbill */ 6587015Smckusick biowait(bp) 6596563Smckusic register struct buf *bp; 6608Sbill { 6615431Sroot int s; 6628Sbill 66326271Skarels s = splbio(); 66438776Smckusick while ((bp->b_flags & B_DONE) == 0) 6658Sbill sleep((caddr_t)bp, PRIBIO); 6665431Sroot splx(s); 66737736Smckusick if ((bp->b_flags & B_ERROR) == 0) 66837736Smckusick return (0); 66937736Smckusick if (bp->b_error) 67037736Smckusick return (bp->b_error); 67137736Smckusick return (EIO); 6728Sbill } 6738Sbill 6748Sbill /* 67513128Ssam * Mark I/O complete on a buffer. 67646151Smckusick * 67746151Smckusick * If a callback has been requested, e.g. the pageout 67846151Smckusick * daemon, do so. Otherwise, awaken waiting processes. 6798Sbill */ 68051455Sbostic void 6817015Smckusick biodone(bp) 6827015Smckusick register struct buf *bp; 6838Sbill { 6848Sbill 685420Sbill if (bp->b_flags & B_DONE) 6867015Smckusick panic("dup biodone"); 6878Sbill bp->b_flags |= B_DONE; 68849232Smckusick if ((bp->b_flags & B_READ) == 0) 68949232Smckusick vwakeup(bp); 6909763Ssam if (bp->b_flags & B_CALL) { 6919763Ssam bp->b_flags &= ~B_CALL; 6929763Ssam (*bp->b_iodone)(bp); 6939763Ssam return; 6949763Ssam } 69546151Smckusick if (bp->b_flags & B_ASYNC) 6968Sbill brelse(bp); 6978Sbill else { 6988Sbill bp->b_flags &= ~B_WANTED; 6998Sbill wakeup((caddr_t)bp); 7008Sbill } 7018Sbill } 70256356Smckusick 70357035Smargo int 70457035Smargo count_lock_queue() 70557035Smargo { 70657035Smargo register struct buf *bp; 70757035Smargo register int ret; 70857035Smargo 70965256Smckusick for (ret = 0, bp = (struct buf *)bufqueues[BQ_LOCKED].tqh_first; 71065256Smckusick bp; bp = (struct buf *)bp->b_freelist.tqe_next) 71157035Smargo ++ret; 71257035Smargo return(ret); 71357035Smargo } 71457035Smargo 71556356Smckusick #ifdef DIAGNOSTIC 71656356Smckusick /* 71756356Smckusick * Print out statistics on the current allocation of the buffer pool. 71856356Smckusick * Can be enabled to print out on every ``sync'' by setting "syncprt" 71959879Smckusick * in vfs_syscalls.c using sysctl. 72056356Smckusick */ 72156356Smckusick void 72256356Smckusick vfs_bufstats() 72356356Smckusick { 72456356Smckusick int s, i, j, count; 72556395Smckusick register struct buf *bp; 72665256Smckusick register struct bqueues *dp; 72756356Smckusick int counts[MAXBSIZE/CLBYTES+1]; 72856356Smckusick static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" }; 72956356Smckusick 73056395Smckusick for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) { 73156356Smckusick count = 0; 73256356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 73356356Smckusick counts[j] = 0; 73456356Smckusick s = splbio(); 73565256Smckusick for (bp = dp->tqh_first; bp; bp = bp->b_freelist.tqe_next) { 73656356Smckusick counts[bp->b_bufsize/CLBYTES]++; 73756356Smckusick count++; 73856356Smckusick } 73956356Smckusick splx(s); 74056356Smckusick printf("%s: total-%d", bname[i], count); 74156356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 74256356Smckusick if (counts[j] != 0) 74356356Smckusick printf(", %d-%d", j * CLBYTES, counts[j]); 74456356Smckusick printf("\n"); 74556356Smckusick } 74656356Smckusick } 74756356Smckusick #endif /* DIAGNOSTIC */ 748