149589Sbostic /*- 2*64437Sbostic * Copyright (c) 1986, 1989, 1993 The Regents of the University of California. 3*64437Sbostic * All rights reserved. 423395Smckusick * 5*64437Sbostic * This code is derived from software contributed to Berkeley by 6*64437Sbostic * Berkeley Software Design Inc. 737736Smckusick * 8*64437Sbostic * %sccs.include.redist.c% 9*64437Sbostic * 10*64437Sbostic * @(#)vfs_bio.c 8.2 (Berkeley) 09/05/93 1123395Smckusick */ 128Sbill 1351455Sbostic #include <sys/param.h> 1451455Sbostic #include <sys/proc.h> 1551455Sbostic #include <sys/buf.h> 1651455Sbostic #include <sys/vnode.h> 1751455Sbostic #include <sys/mount.h> 1851455Sbostic #include <sys/trace.h> 1959879Smckusick #include <sys/malloc.h> 2051455Sbostic #include <sys/resourcevar.h> 2156395Smckusick #include <libkern/libkern.h> 228Sbill 2391Sbill /* 2456395Smckusick * Definitions for the buffer hash lists. 2556395Smckusick */ 2656395Smckusick #define BUFHASH(dvp, lbn) \ 2756395Smckusick (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash]) 2856607Smckusick struct list_entry *bufhashtbl, invalhash; 2956395Smckusick u_long bufhash; 3056395Smckusick 3156395Smckusick /* 3256395Smckusick * Insq/Remq for the buffer hash lists. 3356395Smckusick */ 3456607Smckusick #define binshash(bp, dp) list_enter_head(dp, bp, struct buf *, b_hash) 3556607Smckusick #define bremhash(bp) list_remove(bp, struct buf *, b_hash) 3656395Smckusick 3756395Smckusick /* 3856395Smckusick * Definitions for the buffer free lists. 3956395Smckusick */ 4056395Smckusick #define BQUEUES 4 /* number of free buffer queues */ 4156395Smckusick 4256395Smckusick #define BQ_LOCKED 0 /* super-blocks &c */ 4356395Smckusick #define BQ_LRU 1 /* lru, useful buffers */ 4456395Smckusick #define BQ_AGE 2 /* rubbish */ 4556395Smckusick #define BQ_EMPTY 3 /* buffer headers with no memory */ 4656395Smckusick 4756607Smckusick struct queue_entry bufqueues[BQUEUES]; 4856395Smckusick int needbuffer; 4956395Smckusick 5056395Smckusick /* 5156395Smckusick * Insq/Remq for the buffer free lists. 5256395Smckusick */ 5356607Smckusick #define binsheadfree(bp, dp) \ 5456607Smckusick queue_enter_head(dp, bp, struct buf *, b_freelist) 5556607Smckusick #define binstailfree(bp, dp) \ 5656607Smckusick queue_enter_tail(dp, bp, struct buf *, b_freelist) 5756607Smckusick 5856395Smckusick void 5956395Smckusick bremfree(bp) 6056395Smckusick struct buf *bp; 6156395Smckusick { 6256607Smckusick struct queue_entry *dp; 6356395Smckusick 6456607Smckusick /* 6556607Smckusick * We only calculate the head of the freelist when removing 6656607Smckusick * the last element of the list as that is the only time that 6756607Smckusick * it is needed (e.g. to reset the tail pointer). 6856607Smckusick */ 6956607Smckusick if (bp->b_freelist.qe_next == NULL) { 7056395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 7156607Smckusick if (dp->qe_prev == &bp->b_freelist.qe_next) 7256395Smckusick break; 7356395Smckusick if (dp == &bufqueues[BQUEUES]) 7456395Smckusick panic("bremfree: lost tail"); 7556395Smckusick } 7656607Smckusick queue_remove(dp, bp, struct buf *, b_freelist); 7756395Smckusick } 7856395Smckusick 7956395Smckusick /* 8049280Skarels * Initialize buffers and hash links for buffers. 8149280Skarels */ 8251455Sbostic void 8349280Skarels bufinit() 8449280Skarels { 8556395Smckusick register struct buf *bp; 8656607Smckusick struct queue_entry *dp; 8749280Skarels register int i; 8849280Skarels int base, residual; 8949280Skarels 9056395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 9156607Smckusick queue_init(dp); 9256607Smckusick bufhashtbl = (struct list_entry *)hashinit(nbuf, M_CACHE, &bufhash); 9349280Skarels base = bufpages / nbuf; 9449280Skarels residual = bufpages % nbuf; 9549280Skarels for (i = 0; i < nbuf; i++) { 9649280Skarels bp = &buf[i]; 9756395Smckusick bzero((char *)bp, sizeof *bp); 9849280Skarels bp->b_dev = NODEV; 9949280Skarels bp->b_rcred = NOCRED; 10049280Skarels bp->b_wcred = NOCRED; 10149280Skarels bp->b_un.b_addr = buffers + i * MAXBSIZE; 10249280Skarels if (i < residual) 10349280Skarels bp->b_bufsize = (base + 1) * CLBYTES; 10449280Skarels else 10549280Skarels bp->b_bufsize = base * CLBYTES; 10652413Storek bp->b_flags = B_INVAL; 10756395Smckusick dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY]; 10852413Storek binsheadfree(bp, dp); 10956395Smckusick binshash(bp, &invalhash); 11049280Skarels } 11149280Skarels } 11249280Skarels 11349280Skarels /* 11446151Smckusick * Find the block in the buffer pool. 11546151Smckusick * If the buffer is not present, allocate a new buffer and load 11646151Smckusick * its contents according to the filesystem fill routine. 1178Sbill */ 11838776Smckusick bread(vp, blkno, size, cred, bpp) 11937736Smckusick struct vnode *vp; 1206563Smckusic daddr_t blkno; 1216563Smckusic int size; 12238776Smckusick struct ucred *cred; 12337736Smckusick struct buf **bpp; 1248Sbill { 12547545Skarels struct proc *p = curproc; /* XXX */ 1268Sbill register struct buf *bp; 1278Sbill 1288670S if (size == 0) 1298670S panic("bread: size 0"); 13057797Smckusick *bpp = bp = getblk(vp, blkno, size, 0, 0); 13146151Smckusick if (bp->b_flags & (B_DONE | B_DELWRI)) { 13240341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 13337736Smckusick return (0); 1348Sbill } 1358Sbill bp->b_flags |= B_READ; 1368670S if (bp->b_bcount > bp->b_bufsize) 1378670S panic("bread"); 13838776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 13938776Smckusick crhold(cred); 14038776Smckusick bp->b_rcred = cred; 14138776Smckusick } 14237736Smckusick VOP_STRATEGY(bp); 14340341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 14447545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 14537736Smckusick return (biowait(bp)); 1468Sbill } 1478Sbill 1488Sbill /* 14952189Smckusick * Operates like bread, but also starts I/O on the N specified 15052189Smckusick * read-ahead blocks. 1518Sbill */ 15252189Smckusick breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp) 15337736Smckusick struct vnode *vp; 1547114Smckusick daddr_t blkno; int size; 15552189Smckusick daddr_t rablkno[]; int rabsize[]; 15652189Smckusick int num; 15738776Smckusick struct ucred *cred; 15837736Smckusick struct buf **bpp; 1598Sbill { 16047545Skarels struct proc *p = curproc; /* XXX */ 1618Sbill register struct buf *bp, *rabp; 16252189Smckusick register int i; 1638Sbill 1648Sbill bp = NULL; 1657015Smckusick /* 16646151Smckusick * If the block is not memory resident, 16746151Smckusick * allocate a buffer and start I/O. 1687015Smckusick */ 16937736Smckusick if (!incore(vp, blkno)) { 17057797Smckusick *bpp = bp = getblk(vp, blkno, size, 0, 0); 17146151Smckusick if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 1728Sbill bp->b_flags |= B_READ; 1738670S if (bp->b_bcount > bp->b_bufsize) 17452189Smckusick panic("breadn"); 17538776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 17638776Smckusick crhold(cred); 17738776Smckusick bp->b_rcred = cred; 17838776Smckusick } 17937736Smckusick VOP_STRATEGY(bp); 18040341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 18147545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 18254342Smckusick } else { 18340341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 18454342Smckusick } 1858Sbill } 1867015Smckusick 1877015Smckusick /* 18852189Smckusick * If there's read-ahead block(s), start I/O 18952189Smckusick * on them also (as above). 1907015Smckusick */ 19152189Smckusick for (i = 0; i < num; i++) { 19252189Smckusick if (incore(vp, rablkno[i])) 19352189Smckusick continue; 19457797Smckusick rabp = getblk(vp, rablkno[i], rabsize[i], 0, 0); 19546151Smckusick if (rabp->b_flags & (B_DONE | B_DELWRI)) { 1968Sbill brelse(rabp); 19752189Smckusick trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]); 1982045Swnj } else { 19946151Smckusick rabp->b_flags |= B_ASYNC | B_READ; 2008670S if (rabp->b_bcount > rabp->b_bufsize) 2018670S panic("breadrabp"); 20238880Smckusick if (rabp->b_rcred == NOCRED && cred != NOCRED) { 20338776Smckusick crhold(cred); 20438880Smckusick rabp->b_rcred = cred; 20538776Smckusick } 20637736Smckusick VOP_STRATEGY(rabp); 20752189Smckusick trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]); 20847545Skarels p->p_stats->p_ru.ru_inblock++; /* pay in advance */ 2098Sbill } 2108Sbill } 2117015Smckusick 2127015Smckusick /* 21346151Smckusick * If block was memory resident, let bread get it. 21446151Smckusick * If block was not memory resident, the read was 21546151Smckusick * started above, so just wait for the read to complete. 2167015Smckusick */ 2177114Smckusick if (bp == NULL) 21838776Smckusick return (bread(vp, blkno, size, cred, bpp)); 21937736Smckusick return (biowait(bp)); 2208Sbill } 2218Sbill 2228Sbill /* 22346151Smckusick * Synchronous write. 22446151Smckusick * Release buffer on completion. 2258Sbill */ 2268Sbill bwrite(bp) 2277015Smckusick register struct buf *bp; 2288Sbill { 22947545Skarels struct proc *p = curproc; /* XXX */ 23037736Smckusick register int flag; 23152413Storek int s, error = 0; 2328Sbill 2338Sbill flag = bp->b_flags; 2349857Ssam bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 23549459Smckusick if (flag & B_ASYNC) { 23649459Smckusick if ((flag & B_DELWRI) == 0) 23749459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 23849459Smckusick else 23949459Smckusick reassignbuf(bp, bp->b_vp); 24049459Smckusick } 24140341Smckusick trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno); 2428670S if (bp->b_bcount > bp->b_bufsize) 2438670S panic("bwrite"); 24440226Smckusick s = splbio(); 24539882Smckusick bp->b_vp->v_numoutput++; 24657797Smckusick bp->b_flags |= B_WRITEINPROG; 24740226Smckusick splx(s); 24837736Smckusick VOP_STRATEGY(bp); 2497015Smckusick 2507015Smckusick /* 25146151Smckusick * If the write was synchronous, then await I/O completion. 2527015Smckusick * If the write was "delayed", then we put the buffer on 25346151Smckusick * the queue of blocks awaiting I/O completion status. 2547015Smckusick */ 25546151Smckusick if ((flag & B_ASYNC) == 0) { 25637736Smckusick error = biowait(bp); 25749459Smckusick if ((flag&B_DELWRI) == 0) 25849459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 25949459Smckusick else 26049459Smckusick reassignbuf(bp, bp->b_vp); 26157797Smckusick if (bp->b_flags & B_EINTR) { 26257797Smckusick bp->b_flags &= ~B_EINTR; 26357797Smckusick error = EINTR; 26457797Smckusick } 2658Sbill brelse(bp); 26637736Smckusick } else if (flag & B_DELWRI) { 26752413Storek s = splbio(); 2688Sbill bp->b_flags |= B_AGE; 26952413Storek splx(s); 27037736Smckusick } 27137736Smckusick return (error); 2728Sbill } 2738Sbill 27453578Sheideman int 27553578Sheideman vn_bwrite(ap) 27653578Sheideman struct vop_bwrite_args *ap; 27753578Sheideman { 27856395Smckusick return (bwrite(ap->a_bp)); 27953578Sheideman } 28053578Sheideman 28153578Sheideman 2828Sbill /* 28346151Smckusick * Delayed write. 28446151Smckusick * 28546151Smckusick * The buffer is marked dirty, but is not queued for I/O. 28646151Smckusick * This routine should be used when the buffer is expected 28746151Smckusick * to be modified again soon, typically a small write that 28846151Smckusick * partially fills a buffer. 28946151Smckusick * 29046151Smckusick * NB: magnetic tapes cannot be delayed; they must be 29146151Smckusick * written in the order that the writes are requested. 2928Sbill */ 2938Sbill bdwrite(bp) 2947015Smckusick register struct buf *bp; 2958Sbill { 29647545Skarels struct proc *p = curproc; /* XXX */ 2978Sbill 29839882Smckusick if ((bp->b_flags & B_DELWRI) == 0) { 29939882Smckusick bp->b_flags |= B_DELWRI; 30039882Smckusick reassignbuf(bp, bp->b_vp); 30147545Skarels p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 30239882Smckusick } 30337736Smckusick /* 30439668Smckusick * If this is a tape drive, the write must be initiated. 30537736Smckusick */ 30648360Smckusick if (VOP_IOCTL(bp->b_vp, 0, (caddr_t)B_TAPE, 0, NOCRED, p) == 0) { 3078Sbill bawrite(bp); 30839668Smckusick } else { 30946151Smckusick bp->b_flags |= (B_DONE | B_DELWRI); 3108Sbill brelse(bp); 3118Sbill } 3128Sbill } 3138Sbill 3148Sbill /* 31546151Smckusick * Asynchronous write. 31646151Smckusick * Start I/O on a buffer, but do not wait for it to complete. 31746151Smckusick * The buffer is released when the I/O completes. 3188Sbill */ 3198Sbill bawrite(bp) 3207015Smckusick register struct buf *bp; 3218Sbill { 3228Sbill 32346151Smckusick /* 32446151Smckusick * Setting the ASYNC flag causes bwrite to return 32546151Smckusick * after starting the I/O. 32646151Smckusick */ 3278Sbill bp->b_flags |= B_ASYNC; 32857797Smckusick (void) VOP_BWRITE(bp); 3298Sbill } 3308Sbill 3318Sbill /* 33246151Smckusick * Release a buffer. 33346151Smckusick * Even if the buffer is dirty, no I/O is started. 3348Sbill */ 3358Sbill brelse(bp) 3367015Smckusick register struct buf *bp; 3378Sbill { 33856607Smckusick register struct queue_entry *flist; 33946151Smckusick int s; 3408Sbill 34140341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 3427015Smckusick /* 34339668Smckusick * If a process is waiting for the buffer, or 34439668Smckusick * is waiting for a free buffer, awaken it. 3457015Smckusick */ 34646151Smckusick if (bp->b_flags & B_WANTED) 3478Sbill wakeup((caddr_t)bp); 34856395Smckusick if (needbuffer) { 34956395Smckusick needbuffer = 0; 35056395Smckusick wakeup((caddr_t)&needbuffer); 3518Sbill } 35239668Smckusick /* 35339668Smckusick * Retry I/O for locked buffers rather than invalidating them. 35439668Smckusick */ 35552413Storek s = splbio(); 35639668Smckusick if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED)) 35739668Smckusick bp->b_flags &= ~B_ERROR; 35839668Smckusick /* 35939668Smckusick * Disassociate buffers that are no longer valid. 36039668Smckusick */ 36146151Smckusick if (bp->b_flags & (B_NOCACHE | B_ERROR)) 36237736Smckusick bp->b_flags |= B_INVAL; 36346151Smckusick if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) { 36439668Smckusick if (bp->b_vp) 36539668Smckusick brelvp(bp); 36639668Smckusick bp->b_flags &= ~B_DELWRI; 36737736Smckusick } 3687015Smckusick /* 3697015Smckusick * Stick the buffer back on a free list. 3707015Smckusick */ 3718670S if (bp->b_bufsize <= 0) { 3728670S /* block has no buffer ... put at front of unused buffer list */ 37356395Smckusick flist = &bufqueues[BQ_EMPTY]; 3748670S binsheadfree(bp, flist); 37546151Smckusick } else if (bp->b_flags & (B_ERROR | B_INVAL)) { 3762325Swnj /* block has no info ... put at front of most free list */ 37756395Smckusick flist = &bufqueues[BQ_AGE]; 3787015Smckusick binsheadfree(bp, flist); 3798Sbill } else { 3802325Swnj if (bp->b_flags & B_LOCKED) 38156395Smckusick flist = &bufqueues[BQ_LOCKED]; 3822325Swnj else if (bp->b_flags & B_AGE) 38356395Smckusick flist = &bufqueues[BQ_AGE]; 3842325Swnj else 38556395Smckusick flist = &bufqueues[BQ_LRU]; 3867015Smckusick binstailfree(bp, flist); 3878Sbill } 38846151Smckusick bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE); 3898Sbill splx(s); 3908Sbill } 3918Sbill 3928Sbill /* 39346151Smckusick * Check to see if a block is currently memory resident. 3948Sbill */ 39557797Smckusick struct buf * 39637736Smckusick incore(vp, blkno) 39737736Smckusick struct vnode *vp; 3987015Smckusick daddr_t blkno; 3998Sbill { 4008Sbill register struct buf *bp; 4018Sbill 40256607Smckusick for (bp = BUFHASH(vp, blkno)->le_next; bp; bp = bp->b_hash.qe_next) 40339668Smckusick if (bp->b_lblkno == blkno && bp->b_vp == vp && 4047015Smckusick (bp->b_flags & B_INVAL) == 0) 40557797Smckusick return (bp); 40657797Smckusick return (NULL); 4078Sbill } 4088Sbill 40939668Smckusick /* 41046151Smckusick * Check to see if a block is currently memory resident. 41146151Smckusick * If it is resident, return it. If it is not resident, 41246151Smckusick * allocate a new buffer and assign it to the block. 41339668Smckusick */ 4148Sbill struct buf * 41557797Smckusick getblk(vp, blkno, size, slpflag, slptimeo) 41637736Smckusick register struct vnode *vp; 4176563Smckusic daddr_t blkno; 41857797Smckusick int size, slpflag, slptimeo; 4198Sbill { 42056607Smckusick register struct buf *bp; 42156607Smckusick struct list_entry *dp; 42257797Smckusick int s, error; 4238Sbill 42425255Smckusick if (size > MAXBSIZE) 42525255Smckusick panic("getblk: size too big"); 4267015Smckusick /* 42746151Smckusick * Search the cache for the block. If the buffer is found, 42846151Smckusick * but it is currently locked, the we must wait for it to 42946151Smckusick * become available. 4307015Smckusick */ 43137736Smckusick dp = BUFHASH(vp, blkno); 4327015Smckusick loop: 43356607Smckusick for (bp = dp->le_next; bp; bp = bp->b_hash.qe_next) { 43457797Smckusick if (bp->b_lblkno != blkno || bp->b_vp != vp) 4358Sbill continue; 43626271Skarels s = splbio(); 43746151Smckusick if (bp->b_flags & B_BUSY) { 4388Sbill bp->b_flags |= B_WANTED; 43957797Smckusick error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 44057797Smckusick "getblk", slptimeo); 4415424Swnj splx(s); 44257797Smckusick if (error) 44357797Smckusick return (NULL); 4448Sbill goto loop; 4458Sbill } 44657797Smckusick /* 44757797Smckusick * The test for B_INVAL is moved down here, since there 44857797Smckusick * are cases where B_INVAL is set before VOP_BWRITE() is 44957797Smckusick * called and for NFS, the process cannot be allowed to 45057797Smckusick * allocate a new buffer for the same block until the write 45157797Smckusick * back to the server has been completed. (ie. B_BUSY clears) 45257797Smckusick */ 45357797Smckusick if (bp->b_flags & B_INVAL) { 45457797Smckusick splx(s); 45557797Smckusick continue; 45657797Smckusick } 45739882Smckusick bremfree(bp); 45839882Smckusick bp->b_flags |= B_BUSY; 4595424Swnj splx(s); 46032608Smckusick if (bp->b_bcount != size) { 46139668Smckusick printf("getblk: stray size"); 46239668Smckusick bp->b_flags |= B_INVAL; 46357797Smckusick VOP_BWRITE(bp); 46439668Smckusick goto loop; 46532608Smckusick } 4668Sbill bp->b_flags |= B_CACHE; 46726271Skarels return (bp); 4688Sbill } 46957797Smckusick /* 47057797Smckusick * The loop back to the top when getnewbuf() fails is because 47157797Smckusick * stateless filesystems like NFS have no node locks. Thus, 47257797Smckusick * there is a slight chance that more than one process will 47357797Smckusick * try and getnewbuf() for the same block concurrently when 47457797Smckusick * the first sleeps in getnewbuf(). So after a sleep, go back 47557797Smckusick * up to the top to check the hash lists again. 47657797Smckusick */ 47757797Smckusick if ((bp = getnewbuf(slpflag, slptimeo)) == 0) 47857797Smckusick goto loop; 4797015Smckusick bremhash(bp); 48039668Smckusick bgetvp(vp, bp); 48145116Smckusick bp->b_bcount = 0; 48239668Smckusick bp->b_lblkno = blkno; 4836563Smckusic bp->b_blkno = blkno; 4848670S bp->b_error = 0; 48537736Smckusick bp->b_resid = 0; 48637736Smckusick binshash(bp, dp); 48745116Smckusick allocbuf(bp, size); 48826271Skarels return (bp); 4898Sbill } 4908Sbill 4918Sbill /* 49246151Smckusick * Allocate a buffer. 49346151Smckusick * The caller will assign it to a block. 4948Sbill */ 4958Sbill struct buf * 4966563Smckusic geteblk(size) 4976563Smckusic int size; 4988Sbill { 49956395Smckusick register struct buf *bp; 5008Sbill 50125255Smckusick if (size > MAXBSIZE) 50225255Smckusick panic("geteblk: size too big"); 50357797Smckusick while ((bp = getnewbuf(0, 0)) == NULL) 50457797Smckusick /* void */; 5058670S bp->b_flags |= B_INVAL; 5067015Smckusick bremhash(bp); 50756395Smckusick binshash(bp, &invalhash); 50845116Smckusick bp->b_bcount = 0; 50937736Smckusick bp->b_error = 0; 51037736Smckusick bp->b_resid = 0; 51145116Smckusick allocbuf(bp, size); 51226271Skarels return (bp); 5138Sbill } 5148Sbill 5158Sbill /* 51645116Smckusick * Expand or contract the actual memory allocated to a buffer. 51746151Smckusick * If no memory is available, release buffer and take error exit. 5186563Smckusic */ 51945116Smckusick allocbuf(tp, size) 52045116Smckusick register struct buf *tp; 5216563Smckusic int size; 5226563Smckusic { 52345116Smckusick register struct buf *bp, *ep; 52445116Smckusick int sizealloc, take, s; 5256563Smckusic 52645116Smckusick sizealloc = roundup(size, CLBYTES); 52745116Smckusick /* 52845116Smckusick * Buffer size does not change 52945116Smckusick */ 53045116Smckusick if (sizealloc == tp->b_bufsize) 53145116Smckusick goto out; 53245116Smckusick /* 53345116Smckusick * Buffer size is shrinking. 53445116Smckusick * Place excess space in a buffer header taken from the 53545116Smckusick * BQ_EMPTY buffer list and placed on the "most free" list. 53645116Smckusick * If no extra buffer headers are available, leave the 53745116Smckusick * extra space in the present buffer. 53845116Smckusick */ 53945116Smckusick if (sizealloc < tp->b_bufsize) { 54056607Smckusick if ((ep = bufqueues[BQ_EMPTY].qe_next) == NULL) 54145116Smckusick goto out; 54245116Smckusick s = splbio(); 54345116Smckusick bremfree(ep); 54445116Smckusick ep->b_flags |= B_BUSY; 54545116Smckusick splx(s); 54645116Smckusick pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr, 54745116Smckusick (int)tp->b_bufsize - sizealloc); 54845116Smckusick ep->b_bufsize = tp->b_bufsize - sizealloc; 54945116Smckusick tp->b_bufsize = sizealloc; 55045116Smckusick ep->b_flags |= B_INVAL; 55145116Smckusick ep->b_bcount = 0; 55245116Smckusick brelse(ep); 55345116Smckusick goto out; 55445116Smckusick } 55545116Smckusick /* 55645116Smckusick * More buffer space is needed. Get it out of buffers on 55745116Smckusick * the "most free" list, placing the empty headers on the 55845116Smckusick * BQ_EMPTY buffer header list. 55945116Smckusick */ 56045116Smckusick while (tp->b_bufsize < sizealloc) { 56145116Smckusick take = sizealloc - tp->b_bufsize; 56257797Smckusick while ((bp = getnewbuf(0, 0)) == NULL) 56357797Smckusick /* void */; 56445116Smckusick if (take >= bp->b_bufsize) 56545116Smckusick take = bp->b_bufsize; 56645116Smckusick pagemove(&bp->b_un.b_addr[bp->b_bufsize - take], 56745116Smckusick &tp->b_un.b_addr[tp->b_bufsize], take); 56845116Smckusick tp->b_bufsize += take; 56945116Smckusick bp->b_bufsize = bp->b_bufsize - take; 57045116Smckusick if (bp->b_bcount > bp->b_bufsize) 57145116Smckusick bp->b_bcount = bp->b_bufsize; 57245116Smckusick if (bp->b_bufsize <= 0) { 57345116Smckusick bremhash(bp); 57456395Smckusick binshash(bp, &invalhash); 57546151Smckusick bp->b_dev = NODEV; 57645116Smckusick bp->b_error = 0; 57745116Smckusick bp->b_flags |= B_INVAL; 57845116Smckusick } 57945116Smckusick brelse(bp); 58045116Smckusick } 58145116Smckusick out: 58245116Smckusick tp->b_bcount = size; 58345116Smckusick return (1); 5848670S } 5858670S 5868670S /* 5878670S * Find a buffer which is available for use. 5888670S * Select something from a free list. 5898670S * Preference is to AGE list, then LRU list. 5908670S */ 5918670S struct buf * 59257797Smckusick getnewbuf(slpflag, slptimeo) 59357797Smckusick int slpflag, slptimeo; 5948670S { 59556395Smckusick register struct buf *bp; 59656607Smckusick register struct queue_entry *dp; 59738776Smckusick register struct ucred *cred; 5988670S int s; 5998670S 6008670S loop: 60126271Skarels s = splbio(); 60259879Smckusick for (dp = &bufqueues[BQ_AGE]; dp > bufqueues; dp--) 60359879Smckusick if (dp->qe_next) 60459879Smckusick break; 60556395Smckusick if (dp == bufqueues) { /* no free blocks */ 60656395Smckusick needbuffer = 1; 60757797Smckusick (void) tsleep((caddr_t)&needbuffer, slpflag | (PRIBIO + 1), 60857797Smckusick "getnewbuf", slptimeo); 60912170Ssam splx(s); 61057797Smckusick return (NULL); 6118670S } 61259879Smckusick bp = dp->qe_next; 61339882Smckusick bremfree(bp); 61439882Smckusick bp->b_flags |= B_BUSY; 6158670S splx(s); 6168670S if (bp->b_flags & B_DELWRI) { 61738614Smckusick (void) bawrite(bp); 6188670S goto loop; 6198670S } 62040341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 62139668Smckusick if (bp->b_vp) 62239668Smckusick brelvp(bp); 62338776Smckusick if (bp->b_rcred != NOCRED) { 62438776Smckusick cred = bp->b_rcred; 62538776Smckusick bp->b_rcred = NOCRED; 62638776Smckusick crfree(cred); 62738776Smckusick } 62838776Smckusick if (bp->b_wcred != NOCRED) { 62938776Smckusick cred = bp->b_wcred; 63038776Smckusick bp->b_wcred = NOCRED; 63138776Smckusick crfree(cred); 63238776Smckusick } 6338670S bp->b_flags = B_BUSY; 63446989Smckusick bp->b_dirtyoff = bp->b_dirtyend = 0; 63552189Smckusick bp->b_validoff = bp->b_validend = 0; 6368670S return (bp); 6378670S } 6388670S 6398670S /* 64046151Smckusick * Wait for I/O to complete. 64146151Smckusick * 64246151Smckusick * Extract and return any errors associated with the I/O. 64346151Smckusick * If the error flag is set, but no specific error is 64446151Smckusick * given, return EIO. 6458Sbill */ 6467015Smckusick biowait(bp) 6476563Smckusic register struct buf *bp; 6488Sbill { 6495431Sroot int s; 6508Sbill 65126271Skarels s = splbio(); 65238776Smckusick while ((bp->b_flags & B_DONE) == 0) 6538Sbill sleep((caddr_t)bp, PRIBIO); 6545431Sroot splx(s); 65537736Smckusick if ((bp->b_flags & B_ERROR) == 0) 65637736Smckusick return (0); 65737736Smckusick if (bp->b_error) 65837736Smckusick return (bp->b_error); 65937736Smckusick return (EIO); 6608Sbill } 6618Sbill 6628Sbill /* 66313128Ssam * Mark I/O complete on a buffer. 66446151Smckusick * 66546151Smckusick * If a callback has been requested, e.g. the pageout 66646151Smckusick * daemon, do so. Otherwise, awaken waiting processes. 6678Sbill */ 66851455Sbostic void 6697015Smckusick biodone(bp) 6707015Smckusick register struct buf *bp; 6718Sbill { 6728Sbill 673420Sbill if (bp->b_flags & B_DONE) 6747015Smckusick panic("dup biodone"); 6758Sbill bp->b_flags |= B_DONE; 67649232Smckusick if ((bp->b_flags & B_READ) == 0) 67749232Smckusick vwakeup(bp); 6789763Ssam if (bp->b_flags & B_CALL) { 6799763Ssam bp->b_flags &= ~B_CALL; 6809763Ssam (*bp->b_iodone)(bp); 6819763Ssam return; 6829763Ssam } 68346151Smckusick if (bp->b_flags & B_ASYNC) 6848Sbill brelse(bp); 6858Sbill else { 6868Sbill bp->b_flags &= ~B_WANTED; 6878Sbill wakeup((caddr_t)bp); 6888Sbill } 6898Sbill } 69056356Smckusick 69157035Smargo int 69257035Smargo count_lock_queue() 69357035Smargo { 69457035Smargo register struct buf *bp; 69557035Smargo register int ret; 69657035Smargo 69757035Smargo for (ret = 0, bp = (struct buf *)bufqueues[BQ_LOCKED].qe_next; 69857035Smargo bp; bp = (struct buf *)bp->b_freelist.qe_next) 69957035Smargo ++ret; 70057035Smargo return(ret); 70157035Smargo } 70257035Smargo 70356356Smckusick #ifdef DIAGNOSTIC 70456356Smckusick /* 70556356Smckusick * Print out statistics on the current allocation of the buffer pool. 70656356Smckusick * Can be enabled to print out on every ``sync'' by setting "syncprt" 70759879Smckusick * in vfs_syscalls.c using sysctl. 70856356Smckusick */ 70956356Smckusick void 71056356Smckusick vfs_bufstats() 71156356Smckusick { 71256356Smckusick int s, i, j, count; 71356395Smckusick register struct buf *bp; 71456607Smckusick register struct queue_entry *dp; 71556356Smckusick int counts[MAXBSIZE/CLBYTES+1]; 71656356Smckusick static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" }; 71756356Smckusick 71856395Smckusick for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) { 71956356Smckusick count = 0; 72056356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 72156356Smckusick counts[j] = 0; 72256356Smckusick s = splbio(); 72356607Smckusick for (bp = dp->qe_next; bp; bp = bp->b_freelist.qe_next) { 72456356Smckusick counts[bp->b_bufsize/CLBYTES]++; 72556356Smckusick count++; 72656356Smckusick } 72756356Smckusick splx(s); 72856356Smckusick printf("%s: total-%d", bname[i], count); 72956356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 73056356Smckusick if (counts[j] != 0) 73156356Smckusick printf(", %d-%d", j * CLBYTES, counts[j]); 73256356Smckusick printf("\n"); 73356356Smckusick } 73456356Smckusick } 73556356Smckusick #endif /* DIAGNOSTIC */ 736