149589Sbostic /*- 249589Sbostic * Copyright (c) 1982, 1986, 1989 The Regents of the University of California. 337736Smckusick * All rights reserved. 423395Smckusick * 549618Smckusick * This module is believed to contain source code proprietary to AT&T. 649618Smckusick * Use and redistribution is subject to the Berkeley Software License 749618Smckusick * Agreement and your Software Agreement with AT&T (Western Electric). 837736Smckusick * 9*59879Smckusick * @(#)vfs_bio.c 7.60 (Berkeley) 05/10/93 1023395Smckusick */ 118Sbill 1251455Sbostic #include <sys/param.h> 1351455Sbostic #include <sys/proc.h> 1451455Sbostic #include <sys/buf.h> 1551455Sbostic #include <sys/vnode.h> 1651455Sbostic #include <sys/mount.h> 1751455Sbostic #include <sys/trace.h> 18*59879Smckusick #include <sys/malloc.h> 1951455Sbostic #include <sys/resourcevar.h> 2056395Smckusick #include <libkern/libkern.h> 218Sbill 2291Sbill /* 2356395Smckusick * Definitions for the buffer hash lists. 2456395Smckusick */ 2556395Smckusick #define BUFHASH(dvp, lbn) \ 2656395Smckusick (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash]) 2756607Smckusick struct list_entry *bufhashtbl, invalhash; 2856395Smckusick u_long bufhash; 2956395Smckusick 3056395Smckusick /* 3156395Smckusick * Insq/Remq for the buffer hash lists. 3256395Smckusick */ 3356607Smckusick #define binshash(bp, dp) list_enter_head(dp, bp, struct buf *, b_hash) 3456607Smckusick #define bremhash(bp) list_remove(bp, struct buf *, b_hash) 3556395Smckusick 3656395Smckusick /* 3756395Smckusick * Definitions for the buffer free lists. 3856395Smckusick */ 3956395Smckusick #define BQUEUES 4 /* number of free buffer queues */ 4056395Smckusick 4156395Smckusick #define BQ_LOCKED 0 /* super-blocks &c */ 4256395Smckusick #define BQ_LRU 1 /* lru, useful buffers */ 4356395Smckusick #define BQ_AGE 2 /* rubbish */ 4456395Smckusick #define BQ_EMPTY 3 /* buffer headers with no memory */ 4556395Smckusick 4656607Smckusick struct queue_entry bufqueues[BQUEUES]; 4756395Smckusick int needbuffer; 4856395Smckusick 4956395Smckusick /* 5056395Smckusick * Insq/Remq for the buffer free lists. 5156395Smckusick */ 5256607Smckusick #define binsheadfree(bp, dp) \ 5356607Smckusick queue_enter_head(dp, bp, struct buf *, b_freelist) 5456607Smckusick #define binstailfree(bp, dp) \ 5556607Smckusick queue_enter_tail(dp, bp, struct buf *, b_freelist) 5656607Smckusick 5756395Smckusick void 5856395Smckusick bremfree(bp) 5956395Smckusick struct buf *bp; 6056395Smckusick { 6156607Smckusick struct queue_entry *dp; 6256395Smckusick 6356607Smckusick /* 6456607Smckusick * We only calculate the head of the freelist when removing 6556607Smckusick * the last element of the list as that is the only time that 6656607Smckusick * it is needed (e.g. to reset the tail pointer). 6756607Smckusick */ 6856607Smckusick if (bp->b_freelist.qe_next == NULL) { 6956395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 7056607Smckusick if (dp->qe_prev == &bp->b_freelist.qe_next) 7156395Smckusick break; 7256395Smckusick if (dp == &bufqueues[BQUEUES]) 7356395Smckusick panic("bremfree: lost tail"); 7456395Smckusick } 7556607Smckusick queue_remove(dp, bp, struct buf *, b_freelist); 7656395Smckusick } 7756395Smckusick 7856395Smckusick /* 7949280Skarels * Initialize buffers and hash links for buffers. 8049280Skarels */ 8151455Sbostic void 8249280Skarels bufinit() 8349280Skarels { 8456395Smckusick register struct buf *bp; 8556607Smckusick struct queue_entry *dp; 8649280Skarels register int i; 8749280Skarels int base, residual; 8849280Skarels 8956395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 9056607Smckusick queue_init(dp); 9156607Smckusick bufhashtbl = (struct list_entry *)hashinit(nbuf, M_CACHE, &bufhash); 9249280Skarels base = bufpages / nbuf; 9349280Skarels residual = bufpages % nbuf; 9449280Skarels for (i = 0; i < nbuf; i++) { 9549280Skarels bp = &buf[i]; 9656395Smckusick bzero((char *)bp, sizeof *bp); 9749280Skarels bp->b_dev = NODEV; 9849280Skarels bp->b_rcred = NOCRED; 9949280Skarels bp->b_wcred = NOCRED; 10049280Skarels bp->b_un.b_addr = buffers + i * MAXBSIZE; 10149280Skarels if (i < residual) 10249280Skarels bp->b_bufsize = (base + 1) * CLBYTES; 10349280Skarels else 10449280Skarels bp->b_bufsize = base * CLBYTES; 10552413Storek bp->b_flags = B_INVAL; 10656395Smckusick dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY]; 10752413Storek binsheadfree(bp, dp); 10856395Smckusick binshash(bp, &invalhash); 10949280Skarels } 11049280Skarels } 11149280Skarels 11249280Skarels /* 11346151Smckusick * Find the block in the buffer pool. 11446151Smckusick * If the buffer is not present, allocate a new buffer and load 11546151Smckusick * its contents according to the filesystem fill routine. 1168Sbill */ 11738776Smckusick bread(vp, blkno, size, cred, bpp) 11837736Smckusick struct vnode *vp; 1196563Smckusic daddr_t blkno; 1206563Smckusic int size; 12138776Smckusick struct ucred *cred; 12237736Smckusick struct buf **bpp; 1238Sbill { 12447545Skarels struct proc *p = curproc; /* XXX */ 1258Sbill register struct buf *bp; 1268Sbill 1278670S if (size == 0) 1288670S panic("bread: size 0"); 12957797Smckusick *bpp = bp = getblk(vp, blkno, size, 0, 0); 13046151Smckusick if (bp->b_flags & (B_DONE | B_DELWRI)) { 13140341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 13237736Smckusick return (0); 1338Sbill } 1348Sbill bp->b_flags |= B_READ; 1358670S if (bp->b_bcount > bp->b_bufsize) 1368670S panic("bread"); 13738776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 13838776Smckusick crhold(cred); 13938776Smckusick bp->b_rcred = cred; 14038776Smckusick } 14137736Smckusick VOP_STRATEGY(bp); 14240341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 14347545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 14437736Smckusick return (biowait(bp)); 1458Sbill } 1468Sbill 1478Sbill /* 14852189Smckusick * Operates like bread, but also starts I/O on the N specified 14952189Smckusick * read-ahead blocks. 1508Sbill */ 15152189Smckusick breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp) 15237736Smckusick struct vnode *vp; 1537114Smckusick daddr_t blkno; int size; 15452189Smckusick daddr_t rablkno[]; int rabsize[]; 15552189Smckusick int num; 15638776Smckusick struct ucred *cred; 15737736Smckusick struct buf **bpp; 1588Sbill { 15947545Skarels struct proc *p = curproc; /* XXX */ 1608Sbill register struct buf *bp, *rabp; 16152189Smckusick register int i; 1628Sbill 1638Sbill bp = NULL; 1647015Smckusick /* 16546151Smckusick * If the block is not memory resident, 16646151Smckusick * allocate a buffer and start I/O. 1677015Smckusick */ 16837736Smckusick if (!incore(vp, blkno)) { 16957797Smckusick *bpp = bp = getblk(vp, blkno, size, 0, 0); 17046151Smckusick if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 1718Sbill bp->b_flags |= B_READ; 1728670S if (bp->b_bcount > bp->b_bufsize) 17352189Smckusick panic("breadn"); 17438776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 17538776Smckusick crhold(cred); 17638776Smckusick bp->b_rcred = cred; 17738776Smckusick } 17837736Smckusick VOP_STRATEGY(bp); 17940341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 18047545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 18154342Smckusick } else { 18240341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 18354342Smckusick } 1848Sbill } 1857015Smckusick 1867015Smckusick /* 18752189Smckusick * If there's read-ahead block(s), start I/O 18852189Smckusick * on them also (as above). 1897015Smckusick */ 19052189Smckusick for (i = 0; i < num; i++) { 19152189Smckusick if (incore(vp, rablkno[i])) 19252189Smckusick continue; 19357797Smckusick rabp = getblk(vp, rablkno[i], rabsize[i], 0, 0); 19446151Smckusick if (rabp->b_flags & (B_DONE | B_DELWRI)) { 1958Sbill brelse(rabp); 19652189Smckusick trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]); 1972045Swnj } else { 19846151Smckusick rabp->b_flags |= B_ASYNC | B_READ; 1998670S if (rabp->b_bcount > rabp->b_bufsize) 2008670S panic("breadrabp"); 20138880Smckusick if (rabp->b_rcred == NOCRED && cred != NOCRED) { 20238776Smckusick crhold(cred); 20338880Smckusick rabp->b_rcred = cred; 20438776Smckusick } 20537736Smckusick VOP_STRATEGY(rabp); 20652189Smckusick trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]); 20747545Skarels p->p_stats->p_ru.ru_inblock++; /* pay in advance */ 2088Sbill } 2098Sbill } 2107015Smckusick 2117015Smckusick /* 21246151Smckusick * If block was memory resident, let bread get it. 21346151Smckusick * If block was not memory resident, the read was 21446151Smckusick * started above, so just wait for the read to complete. 2157015Smckusick */ 2167114Smckusick if (bp == NULL) 21738776Smckusick return (bread(vp, blkno, size, cred, bpp)); 21837736Smckusick return (biowait(bp)); 2198Sbill } 2208Sbill 2218Sbill /* 22246151Smckusick * Synchronous write. 22346151Smckusick * Release buffer on completion. 2248Sbill */ 2258Sbill bwrite(bp) 2267015Smckusick register struct buf *bp; 2278Sbill { 22847545Skarels struct proc *p = curproc; /* XXX */ 22937736Smckusick register int flag; 23052413Storek int s, error = 0; 2318Sbill 2328Sbill flag = bp->b_flags; 2339857Ssam bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 23449459Smckusick if (flag & B_ASYNC) { 23549459Smckusick if ((flag & B_DELWRI) == 0) 23649459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 23749459Smckusick else 23849459Smckusick reassignbuf(bp, bp->b_vp); 23949459Smckusick } 24040341Smckusick trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno); 2418670S if (bp->b_bcount > bp->b_bufsize) 2428670S panic("bwrite"); 24340226Smckusick s = splbio(); 24439882Smckusick bp->b_vp->v_numoutput++; 24557797Smckusick bp->b_flags |= B_WRITEINPROG; 24640226Smckusick splx(s); 24737736Smckusick VOP_STRATEGY(bp); 2487015Smckusick 2497015Smckusick /* 25046151Smckusick * If the write was synchronous, then await I/O completion. 2517015Smckusick * If the write was "delayed", then we put the buffer on 25246151Smckusick * the queue of blocks awaiting I/O completion status. 2537015Smckusick */ 25446151Smckusick if ((flag & B_ASYNC) == 0) { 25537736Smckusick error = biowait(bp); 25649459Smckusick if ((flag&B_DELWRI) == 0) 25749459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 25849459Smckusick else 25949459Smckusick reassignbuf(bp, bp->b_vp); 26057797Smckusick if (bp->b_flags & B_EINTR) { 26157797Smckusick bp->b_flags &= ~B_EINTR; 26257797Smckusick error = EINTR; 26357797Smckusick } 2648Sbill brelse(bp); 26537736Smckusick } else if (flag & B_DELWRI) { 26652413Storek s = splbio(); 2678Sbill bp->b_flags |= B_AGE; 26852413Storek splx(s); 26937736Smckusick } 27037736Smckusick return (error); 2718Sbill } 2728Sbill 27353578Sheideman int 27453578Sheideman vn_bwrite(ap) 27553578Sheideman struct vop_bwrite_args *ap; 27653578Sheideman { 27756395Smckusick return (bwrite(ap->a_bp)); 27853578Sheideman } 27953578Sheideman 28053578Sheideman 2818Sbill /* 28246151Smckusick * Delayed write. 28346151Smckusick * 28446151Smckusick * The buffer is marked dirty, but is not queued for I/O. 28546151Smckusick * This routine should be used when the buffer is expected 28646151Smckusick * to be modified again soon, typically a small write that 28746151Smckusick * partially fills a buffer. 28846151Smckusick * 28946151Smckusick * NB: magnetic tapes cannot be delayed; they must be 29046151Smckusick * written in the order that the writes are requested. 2918Sbill */ 2928Sbill bdwrite(bp) 2937015Smckusick register struct buf *bp; 2948Sbill { 29547545Skarels struct proc *p = curproc; /* XXX */ 2968Sbill 29739882Smckusick if ((bp->b_flags & B_DELWRI) == 0) { 29839882Smckusick bp->b_flags |= B_DELWRI; 29939882Smckusick reassignbuf(bp, bp->b_vp); 30047545Skarels p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 30139882Smckusick } 30237736Smckusick /* 30339668Smckusick * If this is a tape drive, the write must be initiated. 30437736Smckusick */ 30548360Smckusick if (VOP_IOCTL(bp->b_vp, 0, (caddr_t)B_TAPE, 0, NOCRED, p) == 0) { 3068Sbill bawrite(bp); 30739668Smckusick } else { 30846151Smckusick bp->b_flags |= (B_DONE | B_DELWRI); 3098Sbill brelse(bp); 3108Sbill } 3118Sbill } 3128Sbill 3138Sbill /* 31446151Smckusick * Asynchronous write. 31546151Smckusick * Start I/O on a buffer, but do not wait for it to complete. 31646151Smckusick * The buffer is released when the I/O completes. 3178Sbill */ 3188Sbill bawrite(bp) 3197015Smckusick register struct buf *bp; 3208Sbill { 3218Sbill 32246151Smckusick /* 32346151Smckusick * Setting the ASYNC flag causes bwrite to return 32446151Smckusick * after starting the I/O. 32546151Smckusick */ 3268Sbill bp->b_flags |= B_ASYNC; 32757797Smckusick (void) VOP_BWRITE(bp); 3288Sbill } 3298Sbill 3308Sbill /* 33146151Smckusick * Release a buffer. 33246151Smckusick * Even if the buffer is dirty, no I/O is started. 3338Sbill */ 3348Sbill brelse(bp) 3357015Smckusick register struct buf *bp; 3368Sbill { 33756607Smckusick register struct queue_entry *flist; 33846151Smckusick int s; 3398Sbill 34040341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 3417015Smckusick /* 34239668Smckusick * If a process is waiting for the buffer, or 34339668Smckusick * is waiting for a free buffer, awaken it. 3447015Smckusick */ 34546151Smckusick if (bp->b_flags & B_WANTED) 3468Sbill wakeup((caddr_t)bp); 34756395Smckusick if (needbuffer) { 34856395Smckusick needbuffer = 0; 34956395Smckusick wakeup((caddr_t)&needbuffer); 3508Sbill } 35139668Smckusick /* 35239668Smckusick * Retry I/O for locked buffers rather than invalidating them. 35339668Smckusick */ 35452413Storek s = splbio(); 35539668Smckusick if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED)) 35639668Smckusick bp->b_flags &= ~B_ERROR; 35739668Smckusick /* 35839668Smckusick * Disassociate buffers that are no longer valid. 35939668Smckusick */ 36046151Smckusick if (bp->b_flags & (B_NOCACHE | B_ERROR)) 36137736Smckusick bp->b_flags |= B_INVAL; 36246151Smckusick if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) { 36339668Smckusick if (bp->b_vp) 36439668Smckusick brelvp(bp); 36539668Smckusick bp->b_flags &= ~B_DELWRI; 36637736Smckusick } 3677015Smckusick /* 3687015Smckusick * Stick the buffer back on a free list. 3697015Smckusick */ 3708670S if (bp->b_bufsize <= 0) { 3718670S /* block has no buffer ... put at front of unused buffer list */ 37256395Smckusick flist = &bufqueues[BQ_EMPTY]; 3738670S binsheadfree(bp, flist); 37446151Smckusick } else if (bp->b_flags & (B_ERROR | B_INVAL)) { 3752325Swnj /* block has no info ... put at front of most free list */ 37656395Smckusick flist = &bufqueues[BQ_AGE]; 3777015Smckusick binsheadfree(bp, flist); 3788Sbill } else { 3792325Swnj if (bp->b_flags & B_LOCKED) 38056395Smckusick flist = &bufqueues[BQ_LOCKED]; 3812325Swnj else if (bp->b_flags & B_AGE) 38256395Smckusick flist = &bufqueues[BQ_AGE]; 3832325Swnj else 38456395Smckusick flist = &bufqueues[BQ_LRU]; 3857015Smckusick binstailfree(bp, flist); 3868Sbill } 38746151Smckusick bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE); 3888Sbill splx(s); 3898Sbill } 3908Sbill 3918Sbill /* 39246151Smckusick * Check to see if a block is currently memory resident. 3938Sbill */ 39457797Smckusick struct buf * 39537736Smckusick incore(vp, blkno) 39637736Smckusick struct vnode *vp; 3977015Smckusick daddr_t blkno; 3988Sbill { 3998Sbill register struct buf *bp; 4008Sbill 40156607Smckusick for (bp = BUFHASH(vp, blkno)->le_next; bp; bp = bp->b_hash.qe_next) 40239668Smckusick if (bp->b_lblkno == blkno && bp->b_vp == vp && 4037015Smckusick (bp->b_flags & B_INVAL) == 0) 40457797Smckusick return (bp); 40557797Smckusick return (NULL); 4068Sbill } 4078Sbill 40839668Smckusick /* 40946151Smckusick * Check to see if a block is currently memory resident. 41046151Smckusick * If it is resident, return it. If it is not resident, 41146151Smckusick * allocate a new buffer and assign it to the block. 41239668Smckusick */ 4138Sbill struct buf * 41457797Smckusick getblk(vp, blkno, size, slpflag, slptimeo) 41537736Smckusick register struct vnode *vp; 4166563Smckusic daddr_t blkno; 41757797Smckusick int size, slpflag, slptimeo; 4188Sbill { 41956607Smckusick register struct buf *bp; 42056607Smckusick struct list_entry *dp; 42157797Smckusick int s, error; 4228Sbill 42325255Smckusick if (size > MAXBSIZE) 42425255Smckusick panic("getblk: size too big"); 4257015Smckusick /* 42646151Smckusick * Search the cache for the block. If the buffer is found, 42746151Smckusick * but it is currently locked, the we must wait for it to 42846151Smckusick * become available. 4297015Smckusick */ 43037736Smckusick dp = BUFHASH(vp, blkno); 4317015Smckusick loop: 43256607Smckusick for (bp = dp->le_next; bp; bp = bp->b_hash.qe_next) { 43357797Smckusick if (bp->b_lblkno != blkno || bp->b_vp != vp) 4348Sbill continue; 43526271Skarels s = splbio(); 43646151Smckusick if (bp->b_flags & B_BUSY) { 4378Sbill bp->b_flags |= B_WANTED; 43857797Smckusick error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 43957797Smckusick "getblk", slptimeo); 4405424Swnj splx(s); 44157797Smckusick if (error) 44257797Smckusick return (NULL); 4438Sbill goto loop; 4448Sbill } 44557797Smckusick /* 44657797Smckusick * The test for B_INVAL is moved down here, since there 44757797Smckusick * are cases where B_INVAL is set before VOP_BWRITE() is 44857797Smckusick * called and for NFS, the process cannot be allowed to 44957797Smckusick * allocate a new buffer for the same block until the write 45057797Smckusick * back to the server has been completed. (ie. B_BUSY clears) 45157797Smckusick */ 45257797Smckusick if (bp->b_flags & B_INVAL) { 45357797Smckusick splx(s); 45457797Smckusick continue; 45557797Smckusick } 45639882Smckusick bremfree(bp); 45739882Smckusick bp->b_flags |= B_BUSY; 4585424Swnj splx(s); 45932608Smckusick if (bp->b_bcount != size) { 46039668Smckusick printf("getblk: stray size"); 46139668Smckusick bp->b_flags |= B_INVAL; 46257797Smckusick VOP_BWRITE(bp); 46339668Smckusick goto loop; 46432608Smckusick } 4658Sbill bp->b_flags |= B_CACHE; 46626271Skarels return (bp); 4678Sbill } 46857797Smckusick /* 46957797Smckusick * The loop back to the top when getnewbuf() fails is because 47057797Smckusick * stateless filesystems like NFS have no node locks. Thus, 47157797Smckusick * there is a slight chance that more than one process will 47257797Smckusick * try and getnewbuf() for the same block concurrently when 47357797Smckusick * the first sleeps in getnewbuf(). So after a sleep, go back 47457797Smckusick * up to the top to check the hash lists again. 47557797Smckusick */ 47657797Smckusick if ((bp = getnewbuf(slpflag, slptimeo)) == 0) 47757797Smckusick goto loop; 4787015Smckusick bremhash(bp); 47939668Smckusick bgetvp(vp, bp); 48045116Smckusick bp->b_bcount = 0; 48139668Smckusick bp->b_lblkno = blkno; 4826563Smckusic bp->b_blkno = blkno; 4838670S bp->b_error = 0; 48437736Smckusick bp->b_resid = 0; 48537736Smckusick binshash(bp, dp); 48645116Smckusick allocbuf(bp, size); 48726271Skarels return (bp); 4888Sbill } 4898Sbill 4908Sbill /* 49146151Smckusick * Allocate a buffer. 49246151Smckusick * The caller will assign it to a block. 4938Sbill */ 4948Sbill struct buf * 4956563Smckusic geteblk(size) 4966563Smckusic int size; 4978Sbill { 49856395Smckusick register struct buf *bp; 4998Sbill 50025255Smckusick if (size > MAXBSIZE) 50125255Smckusick panic("geteblk: size too big"); 50257797Smckusick while ((bp = getnewbuf(0, 0)) == NULL) 50357797Smckusick /* void */; 5048670S bp->b_flags |= B_INVAL; 5057015Smckusick bremhash(bp); 50656395Smckusick binshash(bp, &invalhash); 50745116Smckusick bp->b_bcount = 0; 50837736Smckusick bp->b_error = 0; 50937736Smckusick bp->b_resid = 0; 51045116Smckusick allocbuf(bp, size); 51126271Skarels return (bp); 5128Sbill } 5138Sbill 5148Sbill /* 51545116Smckusick * Expand or contract the actual memory allocated to a buffer. 51646151Smckusick * If no memory is available, release buffer and take error exit. 5176563Smckusic */ 51845116Smckusick allocbuf(tp, size) 51945116Smckusick register struct buf *tp; 5206563Smckusic int size; 5216563Smckusic { 52245116Smckusick register struct buf *bp, *ep; 52345116Smckusick int sizealloc, take, s; 5246563Smckusic 52545116Smckusick sizealloc = roundup(size, CLBYTES); 52645116Smckusick /* 52745116Smckusick * Buffer size does not change 52845116Smckusick */ 52945116Smckusick if (sizealloc == tp->b_bufsize) 53045116Smckusick goto out; 53145116Smckusick /* 53245116Smckusick * Buffer size is shrinking. 53345116Smckusick * Place excess space in a buffer header taken from the 53445116Smckusick * BQ_EMPTY buffer list and placed on the "most free" list. 53545116Smckusick * If no extra buffer headers are available, leave the 53645116Smckusick * extra space in the present buffer. 53745116Smckusick */ 53845116Smckusick if (sizealloc < tp->b_bufsize) { 53956607Smckusick if ((ep = bufqueues[BQ_EMPTY].qe_next) == NULL) 54045116Smckusick goto out; 54145116Smckusick s = splbio(); 54245116Smckusick bremfree(ep); 54345116Smckusick ep->b_flags |= B_BUSY; 54445116Smckusick splx(s); 54545116Smckusick pagemove(tp->b_un.b_addr + sizealloc, ep->b_un.b_addr, 54645116Smckusick (int)tp->b_bufsize - sizealloc); 54745116Smckusick ep->b_bufsize = tp->b_bufsize - sizealloc; 54845116Smckusick tp->b_bufsize = sizealloc; 54945116Smckusick ep->b_flags |= B_INVAL; 55045116Smckusick ep->b_bcount = 0; 55145116Smckusick brelse(ep); 55245116Smckusick goto out; 55345116Smckusick } 55445116Smckusick /* 55545116Smckusick * More buffer space is needed. Get it out of buffers on 55645116Smckusick * the "most free" list, placing the empty headers on the 55745116Smckusick * BQ_EMPTY buffer header list. 55845116Smckusick */ 55945116Smckusick while (tp->b_bufsize < sizealloc) { 56045116Smckusick take = sizealloc - tp->b_bufsize; 56157797Smckusick while ((bp = getnewbuf(0, 0)) == NULL) 56257797Smckusick /* void */; 56345116Smckusick if (take >= bp->b_bufsize) 56445116Smckusick take = bp->b_bufsize; 56545116Smckusick pagemove(&bp->b_un.b_addr[bp->b_bufsize - take], 56645116Smckusick &tp->b_un.b_addr[tp->b_bufsize], take); 56745116Smckusick tp->b_bufsize += take; 56845116Smckusick bp->b_bufsize = bp->b_bufsize - take; 56945116Smckusick if (bp->b_bcount > bp->b_bufsize) 57045116Smckusick bp->b_bcount = bp->b_bufsize; 57145116Smckusick if (bp->b_bufsize <= 0) { 57245116Smckusick bremhash(bp); 57356395Smckusick binshash(bp, &invalhash); 57446151Smckusick bp->b_dev = NODEV; 57545116Smckusick bp->b_error = 0; 57645116Smckusick bp->b_flags |= B_INVAL; 57745116Smckusick } 57845116Smckusick brelse(bp); 57945116Smckusick } 58045116Smckusick out: 58145116Smckusick tp->b_bcount = size; 58245116Smckusick return (1); 5838670S } 5848670S 5858670S /* 5868670S * Find a buffer which is available for use. 5878670S * Select something from a free list. 5888670S * Preference is to AGE list, then LRU list. 5898670S */ 5908670S struct buf * 59157797Smckusick getnewbuf(slpflag, slptimeo) 59257797Smckusick int slpflag, slptimeo; 5938670S { 59456395Smckusick register struct buf *bp; 59556607Smckusick register struct queue_entry *dp; 59638776Smckusick register struct ucred *cred; 5978670S int s; 5988670S 5998670S loop: 60026271Skarels s = splbio(); 601*59879Smckusick for (dp = &bufqueues[BQ_AGE]; dp > bufqueues; dp--) 602*59879Smckusick if (dp->qe_next) 603*59879Smckusick break; 60456395Smckusick if (dp == bufqueues) { /* no free blocks */ 60556395Smckusick needbuffer = 1; 60657797Smckusick (void) tsleep((caddr_t)&needbuffer, slpflag | (PRIBIO + 1), 60757797Smckusick "getnewbuf", slptimeo); 60812170Ssam splx(s); 60957797Smckusick return (NULL); 6108670S } 611*59879Smckusick bp = dp->qe_next; 61239882Smckusick bremfree(bp); 61339882Smckusick bp->b_flags |= B_BUSY; 6148670S splx(s); 6158670S if (bp->b_flags & B_DELWRI) { 61638614Smckusick (void) bawrite(bp); 6178670S goto loop; 6188670S } 61940341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 62039668Smckusick if (bp->b_vp) 62139668Smckusick brelvp(bp); 62238776Smckusick if (bp->b_rcred != NOCRED) { 62338776Smckusick cred = bp->b_rcred; 62438776Smckusick bp->b_rcred = NOCRED; 62538776Smckusick crfree(cred); 62638776Smckusick } 62738776Smckusick if (bp->b_wcred != NOCRED) { 62838776Smckusick cred = bp->b_wcred; 62938776Smckusick bp->b_wcred = NOCRED; 63038776Smckusick crfree(cred); 63138776Smckusick } 6328670S bp->b_flags = B_BUSY; 63346989Smckusick bp->b_dirtyoff = bp->b_dirtyend = 0; 63452189Smckusick bp->b_validoff = bp->b_validend = 0; 6358670S return (bp); 6368670S } 6378670S 6388670S /* 63946151Smckusick * Wait for I/O to complete. 64046151Smckusick * 64146151Smckusick * Extract and return any errors associated with the I/O. 64246151Smckusick * If the error flag is set, but no specific error is 64346151Smckusick * given, return EIO. 6448Sbill */ 6457015Smckusick biowait(bp) 6466563Smckusic register struct buf *bp; 6478Sbill { 6485431Sroot int s; 6498Sbill 65026271Skarels s = splbio(); 65138776Smckusick while ((bp->b_flags & B_DONE) == 0) 6528Sbill sleep((caddr_t)bp, PRIBIO); 6535431Sroot splx(s); 65437736Smckusick if ((bp->b_flags & B_ERROR) == 0) 65537736Smckusick return (0); 65637736Smckusick if (bp->b_error) 65737736Smckusick return (bp->b_error); 65837736Smckusick return (EIO); 6598Sbill } 6608Sbill 6618Sbill /* 66213128Ssam * Mark I/O complete on a buffer. 66346151Smckusick * 66446151Smckusick * If a callback has been requested, e.g. the pageout 66546151Smckusick * daemon, do so. Otherwise, awaken waiting processes. 6668Sbill */ 66751455Sbostic void 6687015Smckusick biodone(bp) 6697015Smckusick register struct buf *bp; 6708Sbill { 6718Sbill 672420Sbill if (bp->b_flags & B_DONE) 6737015Smckusick panic("dup biodone"); 6748Sbill bp->b_flags |= B_DONE; 67549232Smckusick if ((bp->b_flags & B_READ) == 0) 67649232Smckusick vwakeup(bp); 6779763Ssam if (bp->b_flags & B_CALL) { 6789763Ssam bp->b_flags &= ~B_CALL; 6799763Ssam (*bp->b_iodone)(bp); 6809763Ssam return; 6819763Ssam } 68246151Smckusick if (bp->b_flags & B_ASYNC) 6838Sbill brelse(bp); 6848Sbill else { 6858Sbill bp->b_flags &= ~B_WANTED; 6868Sbill wakeup((caddr_t)bp); 6878Sbill } 6888Sbill } 68956356Smckusick 69057035Smargo int 69157035Smargo count_lock_queue() 69257035Smargo { 69357035Smargo register struct buf *bp; 69457035Smargo register int ret; 69557035Smargo 69657035Smargo for (ret = 0, bp = (struct buf *)bufqueues[BQ_LOCKED].qe_next; 69757035Smargo bp; bp = (struct buf *)bp->b_freelist.qe_next) 69857035Smargo ++ret; 69957035Smargo return(ret); 70057035Smargo } 70157035Smargo 70256356Smckusick #ifdef DIAGNOSTIC 70356356Smckusick /* 70456356Smckusick * Print out statistics on the current allocation of the buffer pool. 70556356Smckusick * Can be enabled to print out on every ``sync'' by setting "syncprt" 706*59879Smckusick * in vfs_syscalls.c using sysctl. 70756356Smckusick */ 70856356Smckusick void 70956356Smckusick vfs_bufstats() 71056356Smckusick { 71156356Smckusick int s, i, j, count; 71256395Smckusick register struct buf *bp; 71356607Smckusick register struct queue_entry *dp; 71456356Smckusick int counts[MAXBSIZE/CLBYTES+1]; 71556356Smckusick static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" }; 71656356Smckusick 71756395Smckusick for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) { 71856356Smckusick count = 0; 71956356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 72056356Smckusick counts[j] = 0; 72156356Smckusick s = splbio(); 72256607Smckusick for (bp = dp->qe_next; bp; bp = bp->b_freelist.qe_next) { 72356356Smckusick counts[bp->b_bufsize/CLBYTES]++; 72456356Smckusick count++; 72556356Smckusick } 72656356Smckusick splx(s); 72756356Smckusick printf("%s: total-%d", bname[i], count); 72856356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 72956356Smckusick if (counts[j] != 0) 73056356Smckusick printf(", %d-%d", j * CLBYTES, counts[j]); 73156356Smckusick printf("\n"); 73256356Smckusick } 73356356Smckusick } 73456356Smckusick #endif /* DIAGNOSTIC */ 735