149589Sbostic /*- 264437Sbostic * Copyright (c) 1986, 1989, 1993 The Regents of the University of California. 364437Sbostic * All rights reserved. 423395Smckusick * 564437Sbostic * This code is derived from software contributed to Berkeley by 664437Sbostic * Berkeley Software Design Inc. 737736Smckusick * 864437Sbostic * %sccs.include.redist.c% 964437Sbostic * 10*65612Smckusick * @(#)vfs_bio.c 8.6 (Berkeley) 01/11/94 1123395Smckusick */ 128Sbill 1351455Sbostic #include <sys/param.h> 1465256Smckusick #include <sys/systm.h> 1551455Sbostic #include <sys/proc.h> 1651455Sbostic #include <sys/buf.h> 1751455Sbostic #include <sys/vnode.h> 1851455Sbostic #include <sys/mount.h> 1951455Sbostic #include <sys/trace.h> 2059879Smckusick #include <sys/malloc.h> 2151455Sbostic #include <sys/resourcevar.h> 228Sbill 2391Sbill /* 2456395Smckusick * Definitions for the buffer hash lists. 2556395Smckusick */ 2656395Smckusick #define BUFHASH(dvp, lbn) \ 2756395Smckusick (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash]) 2865256Smckusick LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash; 2956395Smckusick u_long bufhash; 3056395Smckusick 3156395Smckusick /* 3256395Smckusick * Insq/Remq for the buffer hash lists. 3356395Smckusick */ 3465256Smckusick #define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash) 3565256Smckusick #define bremhash(bp) LIST_REMOVE(bp, b_hash) 3656395Smckusick 3756395Smckusick /* 3856395Smckusick * Definitions for the buffer free lists. 3956395Smckusick */ 4056395Smckusick #define BQUEUES 4 /* number of free buffer queues */ 4156395Smckusick 4256395Smckusick #define BQ_LOCKED 0 /* super-blocks &c */ 4356395Smckusick #define BQ_LRU 1 /* lru, useful buffers */ 4456395Smckusick #define BQ_AGE 2 /* rubbish */ 4556395Smckusick #define BQ_EMPTY 3 /* buffer headers with no memory */ 4656395Smckusick 4765256Smckusick TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES]; 4856395Smckusick int needbuffer; 4956395Smckusick 5056395Smckusick /* 5156395Smckusick * Insq/Remq for the buffer free lists. 5256395Smckusick */ 5365256Smckusick #define binsheadfree(bp, dp) TAILQ_INSERT_HEAD(dp, bp, b_freelist) 5465256Smckusick #define binstailfree(bp, dp) TAILQ_INSERT_TAIL(dp, bp, b_freelist) 5556607Smckusick 5656395Smckusick void 5756395Smckusick bremfree(bp) 5856395Smckusick struct buf *bp; 5956395Smckusick { 6065256Smckusick struct bqueues *dp = NULL; 6156395Smckusick 6256607Smckusick /* 6356607Smckusick * We only calculate the head of the freelist when removing 6456607Smckusick * the last element of the list as that is the only time that 6556607Smckusick * it is needed (e.g. to reset the tail pointer). 6665256Smckusick * 6765256Smckusick * NB: This makes an assumption about how tailq's are implemented. 6856607Smckusick */ 6965256Smckusick if (bp->b_freelist.tqe_next == NULL) { 7056395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 7165256Smckusick if (dp->tqh_last == &bp->b_freelist.tqe_next) 7256395Smckusick break; 7356395Smckusick if (dp == &bufqueues[BQUEUES]) 7456395Smckusick panic("bremfree: lost tail"); 7556395Smckusick } 7665256Smckusick TAILQ_REMOVE(dp, bp, b_freelist); 7756395Smckusick } 7856395Smckusick 7956395Smckusick /* 8049280Skarels * Initialize buffers and hash links for buffers. 8149280Skarels */ 8251455Sbostic void 8349280Skarels bufinit() 8449280Skarels { 8556395Smckusick register struct buf *bp; 8665256Smckusick struct bqueues *dp; 8749280Skarels register int i; 8849280Skarels int base, residual; 8949280Skarels 9056395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 9165256Smckusick TAILQ_INIT(dp); 9265256Smckusick bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash); 9349280Skarels base = bufpages / nbuf; 9449280Skarels residual = bufpages % nbuf; 9549280Skarels for (i = 0; i < nbuf; i++) { 9649280Skarels bp = &buf[i]; 9756395Smckusick bzero((char *)bp, sizeof *bp); 9849280Skarels bp->b_dev = NODEV; 9949280Skarels bp->b_rcred = NOCRED; 10049280Skarels bp->b_wcred = NOCRED; 10165552Smckusick bp->b_vnbufs.le_next = NOLIST; 10264536Sbostic bp->b_data = buffers + i * MAXBSIZE; 10349280Skarels if (i < residual) 10449280Skarels bp->b_bufsize = (base + 1) * CLBYTES; 10549280Skarels else 10649280Skarels bp->b_bufsize = base * CLBYTES; 10752413Storek bp->b_flags = B_INVAL; 10856395Smckusick dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY]; 10952413Storek binsheadfree(bp, dp); 11056395Smckusick binshash(bp, &invalhash); 11149280Skarels } 11249280Skarels } 11349280Skarels 11449280Skarels /* 11546151Smckusick * Find the block in the buffer pool. 11646151Smckusick * If the buffer is not present, allocate a new buffer and load 11746151Smckusick * its contents according to the filesystem fill routine. 1188Sbill */ 11938776Smckusick bread(vp, blkno, size, cred, bpp) 12037736Smckusick struct vnode *vp; 1216563Smckusic daddr_t blkno; 1226563Smckusic int size; 12338776Smckusick struct ucred *cred; 12437736Smckusick struct buf **bpp; 1258Sbill { 12647545Skarels struct proc *p = curproc; /* XXX */ 1278Sbill register struct buf *bp; 1288Sbill 1298670S if (size == 0) 1308670S panic("bread: size 0"); 13157797Smckusick *bpp = bp = getblk(vp, blkno, size, 0, 0); 13246151Smckusick if (bp->b_flags & (B_DONE | B_DELWRI)) { 13340341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 13437736Smckusick return (0); 1358Sbill } 1368Sbill bp->b_flags |= B_READ; 1378670S if (bp->b_bcount > bp->b_bufsize) 1388670S panic("bread"); 13938776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 14038776Smckusick crhold(cred); 14138776Smckusick bp->b_rcred = cred; 14238776Smckusick } 14337736Smckusick VOP_STRATEGY(bp); 14440341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 14547545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 14637736Smckusick return (biowait(bp)); 1478Sbill } 1488Sbill 1498Sbill /* 15052189Smckusick * Operates like bread, but also starts I/O on the N specified 15152189Smckusick * read-ahead blocks. 1528Sbill */ 15352189Smckusick breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp) 15437736Smckusick struct vnode *vp; 1557114Smckusick daddr_t blkno; int size; 15652189Smckusick daddr_t rablkno[]; int rabsize[]; 15752189Smckusick int num; 15838776Smckusick struct ucred *cred; 15937736Smckusick struct buf **bpp; 1608Sbill { 16147545Skarels struct proc *p = curproc; /* XXX */ 1628Sbill register struct buf *bp, *rabp; 16352189Smckusick register int i; 1648Sbill 1658Sbill bp = NULL; 1667015Smckusick /* 16746151Smckusick * If the block is not memory resident, 16846151Smckusick * allocate a buffer and start I/O. 1697015Smckusick */ 17037736Smckusick if (!incore(vp, blkno)) { 17157797Smckusick *bpp = bp = getblk(vp, blkno, size, 0, 0); 17246151Smckusick if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 1738Sbill bp->b_flags |= B_READ; 1748670S if (bp->b_bcount > bp->b_bufsize) 17552189Smckusick panic("breadn"); 17638776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 17738776Smckusick crhold(cred); 17838776Smckusick bp->b_rcred = cred; 17938776Smckusick } 18037736Smckusick VOP_STRATEGY(bp); 18140341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 18247545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 18354342Smckusick } else { 18440341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 18554342Smckusick } 1868Sbill } 1877015Smckusick 1887015Smckusick /* 18952189Smckusick * If there's read-ahead block(s), start I/O 19052189Smckusick * on them also (as above). 1917015Smckusick */ 19252189Smckusick for (i = 0; i < num; i++) { 19352189Smckusick if (incore(vp, rablkno[i])) 19452189Smckusick continue; 19557797Smckusick rabp = getblk(vp, rablkno[i], rabsize[i], 0, 0); 19646151Smckusick if (rabp->b_flags & (B_DONE | B_DELWRI)) { 1978Sbill brelse(rabp); 19852189Smckusick trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]); 1992045Swnj } else { 20046151Smckusick rabp->b_flags |= B_ASYNC | B_READ; 2018670S if (rabp->b_bcount > rabp->b_bufsize) 2028670S panic("breadrabp"); 20338880Smckusick if (rabp->b_rcred == NOCRED && cred != NOCRED) { 20438776Smckusick crhold(cred); 20538880Smckusick rabp->b_rcred = cred; 20638776Smckusick } 20737736Smckusick VOP_STRATEGY(rabp); 20852189Smckusick trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]); 20947545Skarels p->p_stats->p_ru.ru_inblock++; /* pay in advance */ 2108Sbill } 2118Sbill } 2127015Smckusick 2137015Smckusick /* 21446151Smckusick * If block was memory resident, let bread get it. 21546151Smckusick * If block was not memory resident, the read was 21646151Smckusick * started above, so just wait for the read to complete. 2177015Smckusick */ 2187114Smckusick if (bp == NULL) 21938776Smckusick return (bread(vp, blkno, size, cred, bpp)); 22037736Smckusick return (biowait(bp)); 2218Sbill } 2228Sbill 2238Sbill /* 22446151Smckusick * Synchronous write. 22546151Smckusick * Release buffer on completion. 2268Sbill */ 2278Sbill bwrite(bp) 2287015Smckusick register struct buf *bp; 2298Sbill { 23047545Skarels struct proc *p = curproc; /* XXX */ 23137736Smckusick register int flag; 23252413Storek int s, error = 0; 2338Sbill 234*65612Smckusick if (bp->b_vp && (bp->b_vp->v_mount->mnt_flag & MNT_ASYNC)) 235*65612Smckusick bp->b_flags |= B_ASYNC; 2368Sbill flag = bp->b_flags; 2379857Ssam bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 23849459Smckusick if (flag & B_ASYNC) { 23949459Smckusick if ((flag & B_DELWRI) == 0) 24049459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 24149459Smckusick else 24249459Smckusick reassignbuf(bp, bp->b_vp); 24349459Smckusick } 24440341Smckusick trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno); 2458670S if (bp->b_bcount > bp->b_bufsize) 2468670S panic("bwrite"); 24740226Smckusick s = splbio(); 24839882Smckusick bp->b_vp->v_numoutput++; 24957797Smckusick bp->b_flags |= B_WRITEINPROG; 25040226Smckusick splx(s); 25137736Smckusick VOP_STRATEGY(bp); 2527015Smckusick 2537015Smckusick /* 25446151Smckusick * If the write was synchronous, then await I/O completion. 2557015Smckusick * If the write was "delayed", then we put the buffer on 25646151Smckusick * the queue of blocks awaiting I/O completion status. 2577015Smckusick */ 25846151Smckusick if ((flag & B_ASYNC) == 0) { 25937736Smckusick error = biowait(bp); 26049459Smckusick if ((flag&B_DELWRI) == 0) 26149459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 26249459Smckusick else 26349459Smckusick reassignbuf(bp, bp->b_vp); 26457797Smckusick if (bp->b_flags & B_EINTR) { 26557797Smckusick bp->b_flags &= ~B_EINTR; 26657797Smckusick error = EINTR; 26757797Smckusick } 2688Sbill brelse(bp); 26937736Smckusick } else if (flag & B_DELWRI) { 27052413Storek s = splbio(); 2718Sbill bp->b_flags |= B_AGE; 27252413Storek splx(s); 27337736Smckusick } 27437736Smckusick return (error); 2758Sbill } 2768Sbill 27753578Sheideman int 27853578Sheideman vn_bwrite(ap) 27953578Sheideman struct vop_bwrite_args *ap; 28053578Sheideman { 281*65612Smckusick 28256395Smckusick return (bwrite(ap->a_bp)); 28353578Sheideman } 28453578Sheideman 28553578Sheideman 2868Sbill /* 28746151Smckusick * Delayed write. 28846151Smckusick * 28946151Smckusick * The buffer is marked dirty, but is not queued for I/O. 29046151Smckusick * This routine should be used when the buffer is expected 29146151Smckusick * to be modified again soon, typically a small write that 29246151Smckusick * partially fills a buffer. 29346151Smckusick * 29446151Smckusick * NB: magnetic tapes cannot be delayed; they must be 29546151Smckusick * written in the order that the writes are requested. 2968Sbill */ 2978Sbill bdwrite(bp) 2987015Smckusick register struct buf *bp; 2998Sbill { 30047545Skarels struct proc *p = curproc; /* XXX */ 3018Sbill 30239882Smckusick if ((bp->b_flags & B_DELWRI) == 0) { 30339882Smckusick bp->b_flags |= B_DELWRI; 30439882Smckusick reassignbuf(bp, bp->b_vp); 30547545Skarels p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 30639882Smckusick } 30737736Smckusick /* 30839668Smckusick * If this is a tape drive, the write must be initiated. 30937736Smckusick */ 31048360Smckusick if (VOP_IOCTL(bp->b_vp, 0, (caddr_t)B_TAPE, 0, NOCRED, p) == 0) { 3118Sbill bawrite(bp); 31239668Smckusick } else { 31346151Smckusick bp->b_flags |= (B_DONE | B_DELWRI); 3148Sbill brelse(bp); 3158Sbill } 3168Sbill } 3178Sbill 3188Sbill /* 31946151Smckusick * Asynchronous write. 32046151Smckusick * Start I/O on a buffer, but do not wait for it to complete. 32146151Smckusick * The buffer is released when the I/O completes. 3228Sbill */ 3238Sbill bawrite(bp) 3247015Smckusick register struct buf *bp; 3258Sbill { 3268Sbill 32746151Smckusick /* 32846151Smckusick * Setting the ASYNC flag causes bwrite to return 32946151Smckusick * after starting the I/O. 33046151Smckusick */ 3318Sbill bp->b_flags |= B_ASYNC; 33257797Smckusick (void) VOP_BWRITE(bp); 3338Sbill } 3348Sbill 3358Sbill /* 33646151Smckusick * Release a buffer. 33746151Smckusick * Even if the buffer is dirty, no I/O is started. 3388Sbill */ 3398Sbill brelse(bp) 3407015Smckusick register struct buf *bp; 3418Sbill { 34265256Smckusick register struct bqueues *flist; 34346151Smckusick int s; 3448Sbill 34540341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 3467015Smckusick /* 34739668Smckusick * If a process is waiting for the buffer, or 34839668Smckusick * is waiting for a free buffer, awaken it. 3497015Smckusick */ 35046151Smckusick if (bp->b_flags & B_WANTED) 3518Sbill wakeup((caddr_t)bp); 35256395Smckusick if (needbuffer) { 35356395Smckusick needbuffer = 0; 35456395Smckusick wakeup((caddr_t)&needbuffer); 3558Sbill } 35639668Smckusick /* 35739668Smckusick * Retry I/O for locked buffers rather than invalidating them. 35839668Smckusick */ 35952413Storek s = splbio(); 36039668Smckusick if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED)) 36139668Smckusick bp->b_flags &= ~B_ERROR; 36239668Smckusick /* 36339668Smckusick * Disassociate buffers that are no longer valid. 36439668Smckusick */ 36546151Smckusick if (bp->b_flags & (B_NOCACHE | B_ERROR)) 36637736Smckusick bp->b_flags |= B_INVAL; 36746151Smckusick if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) { 36839668Smckusick if (bp->b_vp) 36939668Smckusick brelvp(bp); 37039668Smckusick bp->b_flags &= ~B_DELWRI; 37137736Smckusick } 3727015Smckusick /* 3737015Smckusick * Stick the buffer back on a free list. 3747015Smckusick */ 3758670S if (bp->b_bufsize <= 0) { 3768670S /* block has no buffer ... put at front of unused buffer list */ 37756395Smckusick flist = &bufqueues[BQ_EMPTY]; 3788670S binsheadfree(bp, flist); 37946151Smckusick } else if (bp->b_flags & (B_ERROR | B_INVAL)) { 3802325Swnj /* block has no info ... put at front of most free list */ 38156395Smckusick flist = &bufqueues[BQ_AGE]; 3827015Smckusick binsheadfree(bp, flist); 3838Sbill } else { 3842325Swnj if (bp->b_flags & B_LOCKED) 38556395Smckusick flist = &bufqueues[BQ_LOCKED]; 3862325Swnj else if (bp->b_flags & B_AGE) 38756395Smckusick flist = &bufqueues[BQ_AGE]; 3882325Swnj else 38956395Smckusick flist = &bufqueues[BQ_LRU]; 3907015Smckusick binstailfree(bp, flist); 3918Sbill } 39246151Smckusick bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE); 3938Sbill splx(s); 3948Sbill } 3958Sbill 3968Sbill /* 39746151Smckusick * Check to see if a block is currently memory resident. 3988Sbill */ 39957797Smckusick struct buf * 40037736Smckusick incore(vp, blkno) 40137736Smckusick struct vnode *vp; 4027015Smckusick daddr_t blkno; 4038Sbill { 4048Sbill register struct buf *bp; 4058Sbill 40665256Smckusick for (bp = BUFHASH(vp, blkno)->lh_first; bp; bp = bp->b_hash.le_next) 40739668Smckusick if (bp->b_lblkno == blkno && bp->b_vp == vp && 4087015Smckusick (bp->b_flags & B_INVAL) == 0) 40957797Smckusick return (bp); 41057797Smckusick return (NULL); 4118Sbill } 4128Sbill 41339668Smckusick /* 41446151Smckusick * Check to see if a block is currently memory resident. 41546151Smckusick * If it is resident, return it. If it is not resident, 41646151Smckusick * allocate a new buffer and assign it to the block. 41739668Smckusick */ 4188Sbill struct buf * 41957797Smckusick getblk(vp, blkno, size, slpflag, slptimeo) 42037736Smckusick register struct vnode *vp; 4216563Smckusic daddr_t blkno; 42257797Smckusick int size, slpflag, slptimeo; 4238Sbill { 42456607Smckusick register struct buf *bp; 42565256Smckusick struct bufhashhdr *dp; 42657797Smckusick int s, error; 4278Sbill 42825255Smckusick if (size > MAXBSIZE) 42925255Smckusick panic("getblk: size too big"); 4307015Smckusick /* 43146151Smckusick * Search the cache for the block. If the buffer is found, 43246151Smckusick * but it is currently locked, the we must wait for it to 43346151Smckusick * become available. 4347015Smckusick */ 43537736Smckusick dp = BUFHASH(vp, blkno); 4367015Smckusick loop: 43765256Smckusick for (bp = dp->lh_first; bp; bp = bp->b_hash.le_next) { 43857797Smckusick if (bp->b_lblkno != blkno || bp->b_vp != vp) 4398Sbill continue; 44026271Skarels s = splbio(); 44146151Smckusick if (bp->b_flags & B_BUSY) { 4428Sbill bp->b_flags |= B_WANTED; 44357797Smckusick error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 44457797Smckusick "getblk", slptimeo); 4455424Swnj splx(s); 44657797Smckusick if (error) 44757797Smckusick return (NULL); 4488Sbill goto loop; 4498Sbill } 45057797Smckusick /* 45157797Smckusick * The test for B_INVAL is moved down here, since there 45257797Smckusick * are cases where B_INVAL is set before VOP_BWRITE() is 45357797Smckusick * called and for NFS, the process cannot be allowed to 45457797Smckusick * allocate a new buffer for the same block until the write 45557797Smckusick * back to the server has been completed. (ie. B_BUSY clears) 45657797Smckusick */ 45757797Smckusick if (bp->b_flags & B_INVAL) { 45857797Smckusick splx(s); 45957797Smckusick continue; 46057797Smckusick } 46139882Smckusick bremfree(bp); 46239882Smckusick bp->b_flags |= B_BUSY; 4635424Swnj splx(s); 46432608Smckusick if (bp->b_bcount != size) { 46539668Smckusick printf("getblk: stray size"); 46639668Smckusick bp->b_flags |= B_INVAL; 46757797Smckusick VOP_BWRITE(bp); 46839668Smckusick goto loop; 46932608Smckusick } 4708Sbill bp->b_flags |= B_CACHE; 47126271Skarels return (bp); 4728Sbill } 47357797Smckusick /* 47457797Smckusick * The loop back to the top when getnewbuf() fails is because 47557797Smckusick * stateless filesystems like NFS have no node locks. Thus, 47657797Smckusick * there is a slight chance that more than one process will 47757797Smckusick * try and getnewbuf() for the same block concurrently when 47857797Smckusick * the first sleeps in getnewbuf(). So after a sleep, go back 47957797Smckusick * up to the top to check the hash lists again. 48057797Smckusick */ 48157797Smckusick if ((bp = getnewbuf(slpflag, slptimeo)) == 0) 48257797Smckusick goto loop; 4837015Smckusick bremhash(bp); 48439668Smckusick bgetvp(vp, bp); 48545116Smckusick bp->b_bcount = 0; 48639668Smckusick bp->b_lblkno = blkno; 4876563Smckusic bp->b_blkno = blkno; 4888670S bp->b_error = 0; 48937736Smckusick bp->b_resid = 0; 49037736Smckusick binshash(bp, dp); 49145116Smckusick allocbuf(bp, size); 49226271Skarels return (bp); 4938Sbill } 4948Sbill 4958Sbill /* 49646151Smckusick * Allocate a buffer. 49746151Smckusick * The caller will assign it to a block. 4988Sbill */ 4998Sbill struct buf * 5006563Smckusic geteblk(size) 5016563Smckusic int size; 5028Sbill { 50356395Smckusick register struct buf *bp; 5048Sbill 50525255Smckusick if (size > MAXBSIZE) 50625255Smckusick panic("geteblk: size too big"); 50757797Smckusick while ((bp = getnewbuf(0, 0)) == NULL) 50857797Smckusick /* void */; 5098670S bp->b_flags |= B_INVAL; 5107015Smckusick bremhash(bp); 51156395Smckusick binshash(bp, &invalhash); 51245116Smckusick bp->b_bcount = 0; 51337736Smckusick bp->b_error = 0; 51437736Smckusick bp->b_resid = 0; 51545116Smckusick allocbuf(bp, size); 51626271Skarels return (bp); 5178Sbill } 5188Sbill 5198Sbill /* 52045116Smckusick * Expand or contract the actual memory allocated to a buffer. 52146151Smckusick * If no memory is available, release buffer and take error exit. 5226563Smckusic */ 52345116Smckusick allocbuf(tp, size) 52445116Smckusick register struct buf *tp; 5256563Smckusic int size; 5266563Smckusic { 52745116Smckusick register struct buf *bp, *ep; 52845116Smckusick int sizealloc, take, s; 5296563Smckusic 53045116Smckusick sizealloc = roundup(size, CLBYTES); 53145116Smckusick /* 53245116Smckusick * Buffer size does not change 53345116Smckusick */ 53445116Smckusick if (sizealloc == tp->b_bufsize) 53545116Smckusick goto out; 53645116Smckusick /* 53745116Smckusick * Buffer size is shrinking. 53845116Smckusick * Place excess space in a buffer header taken from the 53945116Smckusick * BQ_EMPTY buffer list and placed on the "most free" list. 54045116Smckusick * If no extra buffer headers are available, leave the 54145116Smckusick * extra space in the present buffer. 54245116Smckusick */ 54345116Smckusick if (sizealloc < tp->b_bufsize) { 54465256Smckusick if ((ep = bufqueues[BQ_EMPTY].tqh_first) == NULL) 54545116Smckusick goto out; 54645116Smckusick s = splbio(); 54745116Smckusick bremfree(ep); 54845116Smckusick ep->b_flags |= B_BUSY; 54945116Smckusick splx(s); 55064536Sbostic pagemove((char *)tp->b_data + sizealloc, ep->b_data, 55145116Smckusick (int)tp->b_bufsize - sizealloc); 55245116Smckusick ep->b_bufsize = tp->b_bufsize - sizealloc; 55345116Smckusick tp->b_bufsize = sizealloc; 55445116Smckusick ep->b_flags |= B_INVAL; 55545116Smckusick ep->b_bcount = 0; 55645116Smckusick brelse(ep); 55745116Smckusick goto out; 55845116Smckusick } 55945116Smckusick /* 56045116Smckusick * More buffer space is needed. Get it out of buffers on 56145116Smckusick * the "most free" list, placing the empty headers on the 56245116Smckusick * BQ_EMPTY buffer header list. 56345116Smckusick */ 56445116Smckusick while (tp->b_bufsize < sizealloc) { 56545116Smckusick take = sizealloc - tp->b_bufsize; 56657797Smckusick while ((bp = getnewbuf(0, 0)) == NULL) 56757797Smckusick /* void */; 56845116Smckusick if (take >= bp->b_bufsize) 56945116Smckusick take = bp->b_bufsize; 57064536Sbostic pagemove(&((char *)bp->b_data)[bp->b_bufsize - take], 57164536Sbostic &((char *)tp->b_data)[tp->b_bufsize], take); 57245116Smckusick tp->b_bufsize += take; 57345116Smckusick bp->b_bufsize = bp->b_bufsize - take; 57445116Smckusick if (bp->b_bcount > bp->b_bufsize) 57545116Smckusick bp->b_bcount = bp->b_bufsize; 57645116Smckusick if (bp->b_bufsize <= 0) { 57745116Smckusick bremhash(bp); 57856395Smckusick binshash(bp, &invalhash); 57946151Smckusick bp->b_dev = NODEV; 58045116Smckusick bp->b_error = 0; 58145116Smckusick bp->b_flags |= B_INVAL; 58245116Smckusick } 58345116Smckusick brelse(bp); 58445116Smckusick } 58545116Smckusick out: 58645116Smckusick tp->b_bcount = size; 58745116Smckusick return (1); 5888670S } 5898670S 5908670S /* 5918670S * Find a buffer which is available for use. 5928670S * Select something from a free list. 5938670S * Preference is to AGE list, then LRU list. 5948670S */ 5958670S struct buf * 59657797Smckusick getnewbuf(slpflag, slptimeo) 59757797Smckusick int slpflag, slptimeo; 5988670S { 59956395Smckusick register struct buf *bp; 60065256Smckusick register struct bqueues *dp; 60138776Smckusick register struct ucred *cred; 6028670S int s; 6038670S 6048670S loop: 60526271Skarels s = splbio(); 60659879Smckusick for (dp = &bufqueues[BQ_AGE]; dp > bufqueues; dp--) 60765256Smckusick if (dp->tqh_first) 60859879Smckusick break; 60956395Smckusick if (dp == bufqueues) { /* no free blocks */ 61056395Smckusick needbuffer = 1; 61157797Smckusick (void) tsleep((caddr_t)&needbuffer, slpflag | (PRIBIO + 1), 61257797Smckusick "getnewbuf", slptimeo); 61312170Ssam splx(s); 61457797Smckusick return (NULL); 6158670S } 61665256Smckusick bp = dp->tqh_first; 61739882Smckusick bremfree(bp); 61839882Smckusick bp->b_flags |= B_BUSY; 6198670S splx(s); 6208670S if (bp->b_flags & B_DELWRI) { 62138614Smckusick (void) bawrite(bp); 6228670S goto loop; 6238670S } 62440341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 62539668Smckusick if (bp->b_vp) 62639668Smckusick brelvp(bp); 62738776Smckusick if (bp->b_rcred != NOCRED) { 62838776Smckusick cred = bp->b_rcred; 62938776Smckusick bp->b_rcred = NOCRED; 63038776Smckusick crfree(cred); 63138776Smckusick } 63238776Smckusick if (bp->b_wcred != NOCRED) { 63338776Smckusick cred = bp->b_wcred; 63438776Smckusick bp->b_wcred = NOCRED; 63538776Smckusick crfree(cred); 63638776Smckusick } 6378670S bp->b_flags = B_BUSY; 63846989Smckusick bp->b_dirtyoff = bp->b_dirtyend = 0; 63952189Smckusick bp->b_validoff = bp->b_validend = 0; 6408670S return (bp); 6418670S } 6428670S 6438670S /* 64446151Smckusick * Wait for I/O to complete. 64546151Smckusick * 64646151Smckusick * Extract and return any errors associated with the I/O. 64746151Smckusick * If the error flag is set, but no specific error is 64846151Smckusick * given, return EIO. 6498Sbill */ 6507015Smckusick biowait(bp) 6516563Smckusic register struct buf *bp; 6528Sbill { 6535431Sroot int s; 6548Sbill 65526271Skarels s = splbio(); 65638776Smckusick while ((bp->b_flags & B_DONE) == 0) 6578Sbill sleep((caddr_t)bp, PRIBIO); 6585431Sroot splx(s); 65937736Smckusick if ((bp->b_flags & B_ERROR) == 0) 66037736Smckusick return (0); 66137736Smckusick if (bp->b_error) 66237736Smckusick return (bp->b_error); 66337736Smckusick return (EIO); 6648Sbill } 6658Sbill 6668Sbill /* 66713128Ssam * Mark I/O complete on a buffer. 66846151Smckusick * 66946151Smckusick * If a callback has been requested, e.g. the pageout 67046151Smckusick * daemon, do so. Otherwise, awaken waiting processes. 6718Sbill */ 67251455Sbostic void 6737015Smckusick biodone(bp) 6747015Smckusick register struct buf *bp; 6758Sbill { 6768Sbill 677420Sbill if (bp->b_flags & B_DONE) 6787015Smckusick panic("dup biodone"); 6798Sbill bp->b_flags |= B_DONE; 68049232Smckusick if ((bp->b_flags & B_READ) == 0) 68149232Smckusick vwakeup(bp); 6829763Ssam if (bp->b_flags & B_CALL) { 6839763Ssam bp->b_flags &= ~B_CALL; 6849763Ssam (*bp->b_iodone)(bp); 6859763Ssam return; 6869763Ssam } 68746151Smckusick if (bp->b_flags & B_ASYNC) 6888Sbill brelse(bp); 6898Sbill else { 6908Sbill bp->b_flags &= ~B_WANTED; 6918Sbill wakeup((caddr_t)bp); 6928Sbill } 6938Sbill } 69456356Smckusick 69557035Smargo int 69657035Smargo count_lock_queue() 69757035Smargo { 69857035Smargo register struct buf *bp; 69957035Smargo register int ret; 70057035Smargo 70165256Smckusick for (ret = 0, bp = (struct buf *)bufqueues[BQ_LOCKED].tqh_first; 70265256Smckusick bp; bp = (struct buf *)bp->b_freelist.tqe_next) 70357035Smargo ++ret; 70457035Smargo return(ret); 70557035Smargo } 70657035Smargo 70756356Smckusick #ifdef DIAGNOSTIC 70856356Smckusick /* 70956356Smckusick * Print out statistics on the current allocation of the buffer pool. 71056356Smckusick * Can be enabled to print out on every ``sync'' by setting "syncprt" 71159879Smckusick * in vfs_syscalls.c using sysctl. 71256356Smckusick */ 71356356Smckusick void 71456356Smckusick vfs_bufstats() 71556356Smckusick { 71656356Smckusick int s, i, j, count; 71756395Smckusick register struct buf *bp; 71865256Smckusick register struct bqueues *dp; 71956356Smckusick int counts[MAXBSIZE/CLBYTES+1]; 72056356Smckusick static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" }; 72156356Smckusick 72256395Smckusick for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) { 72356356Smckusick count = 0; 72456356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 72556356Smckusick counts[j] = 0; 72656356Smckusick s = splbio(); 72765256Smckusick for (bp = dp->tqh_first; bp; bp = bp->b_freelist.tqe_next) { 72856356Smckusick counts[bp->b_bufsize/CLBYTES]++; 72956356Smckusick count++; 73056356Smckusick } 73156356Smckusick splx(s); 73256356Smckusick printf("%s: total-%d", bname[i], count); 73356356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 73456356Smckusick if (counts[j] != 0) 73556356Smckusick printf(", %d-%d", j * CLBYTES, counts[j]); 73656356Smckusick printf("\n"); 73756356Smckusick } 73856356Smckusick } 73956356Smckusick #endif /* DIAGNOSTIC */ 740