149589Sbostic /*- 265771Sbostic * Copyright (c) 1986, 1989, 1993 365771Sbostic * The Regents of the University of California. All rights reserved. 465771Sbostic * (c) UNIX System Laboratories, Inc. 565771Sbostic * All or some portions of this file are derived from material licensed 665771Sbostic * to the University of California by American Telephone and Telegraph 765771Sbostic * Co. or Unix System Laboratories, Inc. and are reproduced herein with 865771Sbostic * the permission of UNIX System Laboratories, Inc. 923395Smckusick * 1064437Sbostic * This code is derived from software contributed to Berkeley by 1164437Sbostic * Berkeley Software Design Inc. 1237736Smckusick * 1364437Sbostic * %sccs.include.redist.c% 1464437Sbostic * 15*65996Spendry * @(#)vfs_bio.c 8.10 (Berkeley) 02/04/94 1623395Smckusick */ 178Sbill 1851455Sbostic #include <sys/param.h> 1965256Smckusick #include <sys/systm.h> 2051455Sbostic #include <sys/proc.h> 2151455Sbostic #include <sys/buf.h> 2251455Sbostic #include <sys/vnode.h> 2351455Sbostic #include <sys/mount.h> 2451455Sbostic #include <sys/trace.h> 2559879Smckusick #include <sys/malloc.h> 2651455Sbostic #include <sys/resourcevar.h> 278Sbill 2891Sbill /* 2956395Smckusick * Definitions for the buffer hash lists. 3056395Smckusick */ 3156395Smckusick #define BUFHASH(dvp, lbn) \ 3256395Smckusick (&bufhashtbl[((int)(dvp) / sizeof(*(dvp)) + (int)(lbn)) & bufhash]) 3365256Smckusick LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash; 3456395Smckusick u_long bufhash; 3556395Smckusick 3656395Smckusick /* 3756395Smckusick * Insq/Remq for the buffer hash lists. 3856395Smckusick */ 3965256Smckusick #define binshash(bp, dp) LIST_INSERT_HEAD(dp, bp, b_hash) 4065256Smckusick #define bremhash(bp) LIST_REMOVE(bp, b_hash) 4156395Smckusick 4256395Smckusick /* 4356395Smckusick * Definitions for the buffer free lists. 4456395Smckusick */ 4556395Smckusick #define BQUEUES 4 /* number of free buffer queues */ 4656395Smckusick 4756395Smckusick #define BQ_LOCKED 0 /* super-blocks &c */ 4856395Smckusick #define BQ_LRU 1 /* lru, useful buffers */ 4956395Smckusick #define BQ_AGE 2 /* rubbish */ 5056395Smckusick #define BQ_EMPTY 3 /* buffer headers with no memory */ 5156395Smckusick 5265256Smckusick TAILQ_HEAD(bqueues, buf) bufqueues[BQUEUES]; 5356395Smckusick int needbuffer; 5456395Smckusick 5556395Smckusick /* 5656395Smckusick * Insq/Remq for the buffer free lists. 5756395Smckusick */ 5865256Smckusick #define binsheadfree(bp, dp) TAILQ_INSERT_HEAD(dp, bp, b_freelist) 5965256Smckusick #define binstailfree(bp, dp) TAILQ_INSERT_TAIL(dp, bp, b_freelist) 6056607Smckusick 6156395Smckusick void 6256395Smckusick bremfree(bp) 6356395Smckusick struct buf *bp; 6456395Smckusick { 6565256Smckusick struct bqueues *dp = NULL; 6656395Smckusick 6756607Smckusick /* 6856607Smckusick * We only calculate the head of the freelist when removing 6956607Smckusick * the last element of the list as that is the only time that 7056607Smckusick * it is needed (e.g. to reset the tail pointer). 7165256Smckusick * 7265256Smckusick * NB: This makes an assumption about how tailq's are implemented. 7356607Smckusick */ 7465256Smckusick if (bp->b_freelist.tqe_next == NULL) { 7556395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 7665256Smckusick if (dp->tqh_last == &bp->b_freelist.tqe_next) 7756395Smckusick break; 7856395Smckusick if (dp == &bufqueues[BQUEUES]) 7956395Smckusick panic("bremfree: lost tail"); 8056395Smckusick } 8165256Smckusick TAILQ_REMOVE(dp, bp, b_freelist); 8256395Smckusick } 8356395Smckusick 8456395Smckusick /* 8549280Skarels * Initialize buffers and hash links for buffers. 8649280Skarels */ 8751455Sbostic void 8849280Skarels bufinit() 8949280Skarels { 9056395Smckusick register struct buf *bp; 9165256Smckusick struct bqueues *dp; 9249280Skarels register int i; 9349280Skarels int base, residual; 9449280Skarels 9556395Smckusick for (dp = bufqueues; dp < &bufqueues[BQUEUES]; dp++) 9665256Smckusick TAILQ_INIT(dp); 9765256Smckusick bufhashtbl = hashinit(nbuf, M_CACHE, &bufhash); 9849280Skarels base = bufpages / nbuf; 9949280Skarels residual = bufpages % nbuf; 10049280Skarels for (i = 0; i < nbuf; i++) { 10149280Skarels bp = &buf[i]; 10256395Smckusick bzero((char *)bp, sizeof *bp); 10349280Skarels bp->b_dev = NODEV; 10449280Skarels bp->b_rcred = NOCRED; 10549280Skarels bp->b_wcred = NOCRED; 10665552Smckusick bp->b_vnbufs.le_next = NOLIST; 10764536Sbostic bp->b_data = buffers + i * MAXBSIZE; 10849280Skarels if (i < residual) 10949280Skarels bp->b_bufsize = (base + 1) * CLBYTES; 11049280Skarels else 11149280Skarels bp->b_bufsize = base * CLBYTES; 11252413Storek bp->b_flags = B_INVAL; 11356395Smckusick dp = bp->b_bufsize ? &bufqueues[BQ_AGE] : &bufqueues[BQ_EMPTY]; 11452413Storek binsheadfree(bp, dp); 11556395Smckusick binshash(bp, &invalhash); 11649280Skarels } 11749280Skarels } 11849280Skarels 11949280Skarels /* 12046151Smckusick * Find the block in the buffer pool. 12146151Smckusick * If the buffer is not present, allocate a new buffer and load 12246151Smckusick * its contents according to the filesystem fill routine. 1238Sbill */ 12438776Smckusick bread(vp, blkno, size, cred, bpp) 12537736Smckusick struct vnode *vp; 1266563Smckusic daddr_t blkno; 1276563Smckusic int size; 12838776Smckusick struct ucred *cred; 12937736Smckusick struct buf **bpp; 1308Sbill { 13147545Skarels struct proc *p = curproc; /* XXX */ 1328Sbill register struct buf *bp; 1338Sbill 1348670S if (size == 0) 1358670S panic("bread: size 0"); 13657797Smckusick *bpp = bp = getblk(vp, blkno, size, 0, 0); 13746151Smckusick if (bp->b_flags & (B_DONE | B_DELWRI)) { 13840341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 13937736Smckusick return (0); 1408Sbill } 1418Sbill bp->b_flags |= B_READ; 1428670S if (bp->b_bcount > bp->b_bufsize) 1438670S panic("bread"); 14438776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 14538776Smckusick crhold(cred); 14638776Smckusick bp->b_rcred = cred; 14738776Smckusick } 14837736Smckusick VOP_STRATEGY(bp); 14940341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 15047545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 15137736Smckusick return (biowait(bp)); 1528Sbill } 1538Sbill 1548Sbill /* 15552189Smckusick * Operates like bread, but also starts I/O on the N specified 15652189Smckusick * read-ahead blocks. 1578Sbill */ 15852189Smckusick breadn(vp, blkno, size, rablkno, rabsize, num, cred, bpp) 15937736Smckusick struct vnode *vp; 1607114Smckusick daddr_t blkno; int size; 16152189Smckusick daddr_t rablkno[]; int rabsize[]; 16252189Smckusick int num; 16338776Smckusick struct ucred *cred; 16437736Smckusick struct buf **bpp; 1658Sbill { 16647545Skarels struct proc *p = curproc; /* XXX */ 1678Sbill register struct buf *bp, *rabp; 16852189Smckusick register int i; 1698Sbill 1708Sbill bp = NULL; 1717015Smckusick /* 17246151Smckusick * If the block is not memory resident, 17346151Smckusick * allocate a buffer and start I/O. 1747015Smckusick */ 17537736Smckusick if (!incore(vp, blkno)) { 17657797Smckusick *bpp = bp = getblk(vp, blkno, size, 0, 0); 17746151Smckusick if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { 1788Sbill bp->b_flags |= B_READ; 1798670S if (bp->b_bcount > bp->b_bufsize) 18052189Smckusick panic("breadn"); 18138776Smckusick if (bp->b_rcred == NOCRED && cred != NOCRED) { 18238776Smckusick crhold(cred); 18338776Smckusick bp->b_rcred = cred; 18438776Smckusick } 18537736Smckusick VOP_STRATEGY(bp); 18640341Smckusick trace(TR_BREADMISS, pack(vp, size), blkno); 18747545Skarels p->p_stats->p_ru.ru_inblock++; /* pay for read */ 18854342Smckusick } else { 18940341Smckusick trace(TR_BREADHIT, pack(vp, size), blkno); 19054342Smckusick } 1918Sbill } 1927015Smckusick 1937015Smckusick /* 19452189Smckusick * If there's read-ahead block(s), start I/O 19552189Smckusick * on them also (as above). 1967015Smckusick */ 19752189Smckusick for (i = 0; i < num; i++) { 19852189Smckusick if (incore(vp, rablkno[i])) 19952189Smckusick continue; 20057797Smckusick rabp = getblk(vp, rablkno[i], rabsize[i], 0, 0); 20146151Smckusick if (rabp->b_flags & (B_DONE | B_DELWRI)) { 2028Sbill brelse(rabp); 20352189Smckusick trace(TR_BREADHITRA, pack(vp, rabsize[i]), rablkno[i]); 2042045Swnj } else { 20546151Smckusick rabp->b_flags |= B_ASYNC | B_READ; 2068670S if (rabp->b_bcount > rabp->b_bufsize) 2078670S panic("breadrabp"); 20838880Smckusick if (rabp->b_rcred == NOCRED && cred != NOCRED) { 20938776Smckusick crhold(cred); 21038880Smckusick rabp->b_rcred = cred; 21138776Smckusick } 21237736Smckusick VOP_STRATEGY(rabp); 21352189Smckusick trace(TR_BREADMISSRA, pack(vp, rabsize[i]), rablkno[i]); 21447545Skarels p->p_stats->p_ru.ru_inblock++; /* pay in advance */ 2158Sbill } 2168Sbill } 2177015Smckusick 2187015Smckusick /* 21946151Smckusick * If block was memory resident, let bread get it. 22046151Smckusick * If block was not memory resident, the read was 22146151Smckusick * started above, so just wait for the read to complete. 2227015Smckusick */ 2237114Smckusick if (bp == NULL) 22438776Smckusick return (bread(vp, blkno, size, cred, bpp)); 22537736Smckusick return (biowait(bp)); 2268Sbill } 2278Sbill 2288Sbill /* 22946151Smckusick * Synchronous write. 23046151Smckusick * Release buffer on completion. 2318Sbill */ 2328Sbill bwrite(bp) 2337015Smckusick register struct buf *bp; 2348Sbill { 23547545Skarels struct proc *p = curproc; /* XXX */ 23637736Smckusick register int flag; 23752413Storek int s, error = 0; 2388Sbill 23965858Smckusick if ((bp->b_flags & B_ASYNC) == 0 && 24065898Shibler bp->b_vp && bp->b_vp->v_mount && 24165898Shibler (bp->b_vp->v_mount->mnt_flag & MNT_ASYNC)) { 24265858Smckusick bdwrite(bp); 24365858Smckusick return (0); 24465858Smckusick } 2458Sbill flag = bp->b_flags; 2469857Ssam bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 24749459Smckusick if (flag & B_ASYNC) { 24849459Smckusick if ((flag & B_DELWRI) == 0) 24949459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 25049459Smckusick else 25149459Smckusick reassignbuf(bp, bp->b_vp); 25249459Smckusick } 25340341Smckusick trace(TR_BWRITE, pack(bp->b_vp, bp->b_bcount), bp->b_lblkno); 2548670S if (bp->b_bcount > bp->b_bufsize) 2558670S panic("bwrite"); 25640226Smckusick s = splbio(); 25739882Smckusick bp->b_vp->v_numoutput++; 25857797Smckusick bp->b_flags |= B_WRITEINPROG; 25940226Smckusick splx(s); 26037736Smckusick VOP_STRATEGY(bp); 2617015Smckusick 2627015Smckusick /* 26346151Smckusick * If the write was synchronous, then await I/O completion. 2647015Smckusick * If the write was "delayed", then we put the buffer on 26546151Smckusick * the queue of blocks awaiting I/O completion status. 2667015Smckusick */ 26746151Smckusick if ((flag & B_ASYNC) == 0) { 26837736Smckusick error = biowait(bp); 26949459Smckusick if ((flag&B_DELWRI) == 0) 27049459Smckusick p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 27149459Smckusick else 27249459Smckusick reassignbuf(bp, bp->b_vp); 27357797Smckusick if (bp->b_flags & B_EINTR) { 27457797Smckusick bp->b_flags &= ~B_EINTR; 27557797Smckusick error = EINTR; 27657797Smckusick } 2778Sbill brelse(bp); 27837736Smckusick } else if (flag & B_DELWRI) { 27952413Storek s = splbio(); 2808Sbill bp->b_flags |= B_AGE; 28152413Storek splx(s); 28237736Smckusick } 28337736Smckusick return (error); 2848Sbill } 2858Sbill 28653578Sheideman int 28753578Sheideman vn_bwrite(ap) 28853578Sheideman struct vop_bwrite_args *ap; 28953578Sheideman { 29065612Smckusick 29156395Smckusick return (bwrite(ap->a_bp)); 29253578Sheideman } 29353578Sheideman 29453578Sheideman 2958Sbill /* 29646151Smckusick * Delayed write. 29746151Smckusick * 29846151Smckusick * The buffer is marked dirty, but is not queued for I/O. 29946151Smckusick * This routine should be used when the buffer is expected 30046151Smckusick * to be modified again soon, typically a small write that 30146151Smckusick * partially fills a buffer. 30246151Smckusick * 30346151Smckusick * NB: magnetic tapes cannot be delayed; they must be 30446151Smckusick * written in the order that the writes are requested. 3058Sbill */ 3068Sbill bdwrite(bp) 3077015Smckusick register struct buf *bp; 3088Sbill { 30947545Skarels struct proc *p = curproc; /* XXX */ 3108Sbill 31139882Smckusick if ((bp->b_flags & B_DELWRI) == 0) { 31239882Smckusick bp->b_flags |= B_DELWRI; 31339882Smckusick reassignbuf(bp, bp->b_vp); 31447545Skarels p->p_stats->p_ru.ru_oublock++; /* no one paid yet */ 31539882Smckusick } 31637736Smckusick /* 31739668Smckusick * If this is a tape drive, the write must be initiated. 31837736Smckusick */ 31948360Smckusick if (VOP_IOCTL(bp->b_vp, 0, (caddr_t)B_TAPE, 0, NOCRED, p) == 0) { 3208Sbill bawrite(bp); 32139668Smckusick } else { 32246151Smckusick bp->b_flags |= (B_DONE | B_DELWRI); 3238Sbill brelse(bp); 3248Sbill } 3258Sbill } 3268Sbill 3278Sbill /* 32846151Smckusick * Asynchronous write. 32946151Smckusick * Start I/O on a buffer, but do not wait for it to complete. 33046151Smckusick * The buffer is released when the I/O completes. 3318Sbill */ 3328Sbill bawrite(bp) 3337015Smckusick register struct buf *bp; 3348Sbill { 3358Sbill 33646151Smckusick /* 33746151Smckusick * Setting the ASYNC flag causes bwrite to return 33846151Smckusick * after starting the I/O. 33946151Smckusick */ 3408Sbill bp->b_flags |= B_ASYNC; 34157797Smckusick (void) VOP_BWRITE(bp); 3428Sbill } 3438Sbill 3448Sbill /* 34546151Smckusick * Release a buffer. 34646151Smckusick * Even if the buffer is dirty, no I/O is started. 3478Sbill */ 3488Sbill brelse(bp) 3497015Smckusick register struct buf *bp; 3508Sbill { 35165256Smckusick register struct bqueues *flist; 35246151Smckusick int s; 3538Sbill 35440341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 3557015Smckusick /* 35639668Smckusick * If a process is waiting for the buffer, or 35739668Smckusick * is waiting for a free buffer, awaken it. 3587015Smckusick */ 35946151Smckusick if (bp->b_flags & B_WANTED) 3608Sbill wakeup((caddr_t)bp); 36156395Smckusick if (needbuffer) { 36256395Smckusick needbuffer = 0; 36356395Smckusick wakeup((caddr_t)&needbuffer); 3648Sbill } 36539668Smckusick /* 36639668Smckusick * Retry I/O for locked buffers rather than invalidating them. 36739668Smckusick */ 36852413Storek s = splbio(); 36939668Smckusick if ((bp->b_flags & B_ERROR) && (bp->b_flags & B_LOCKED)) 37039668Smckusick bp->b_flags &= ~B_ERROR; 37139668Smckusick /* 37239668Smckusick * Disassociate buffers that are no longer valid. 37339668Smckusick */ 37446151Smckusick if (bp->b_flags & (B_NOCACHE | B_ERROR)) 37537736Smckusick bp->b_flags |= B_INVAL; 37646151Smckusick if ((bp->b_bufsize <= 0) || (bp->b_flags & (B_ERROR | B_INVAL))) { 37739668Smckusick if (bp->b_vp) 37839668Smckusick brelvp(bp); 37939668Smckusick bp->b_flags &= ~B_DELWRI; 38037736Smckusick } 3817015Smckusick /* 3827015Smckusick * Stick the buffer back on a free list. 3837015Smckusick */ 3848670S if (bp->b_bufsize <= 0) { 3858670S /* block has no buffer ... put at front of unused buffer list */ 38656395Smckusick flist = &bufqueues[BQ_EMPTY]; 3878670S binsheadfree(bp, flist); 38846151Smckusick } else if (bp->b_flags & (B_ERROR | B_INVAL)) { 3892325Swnj /* block has no info ... put at front of most free list */ 39056395Smckusick flist = &bufqueues[BQ_AGE]; 3917015Smckusick binsheadfree(bp, flist); 3928Sbill } else { 3932325Swnj if (bp->b_flags & B_LOCKED) 39456395Smckusick flist = &bufqueues[BQ_LOCKED]; 3952325Swnj else if (bp->b_flags & B_AGE) 39656395Smckusick flist = &bufqueues[BQ_AGE]; 3972325Swnj else 39856395Smckusick flist = &bufqueues[BQ_LRU]; 3997015Smckusick binstailfree(bp, flist); 4008Sbill } 40146151Smckusick bp->b_flags &= ~(B_WANTED | B_BUSY | B_ASYNC | B_AGE | B_NOCACHE); 4028Sbill splx(s); 4038Sbill } 4048Sbill 4058Sbill /* 40646151Smckusick * Check to see if a block is currently memory resident. 4078Sbill */ 40857797Smckusick struct buf * 40937736Smckusick incore(vp, blkno) 41037736Smckusick struct vnode *vp; 4117015Smckusick daddr_t blkno; 4128Sbill { 4138Sbill register struct buf *bp; 4148Sbill 41565256Smckusick for (bp = BUFHASH(vp, blkno)->lh_first; bp; bp = bp->b_hash.le_next) 41639668Smckusick if (bp->b_lblkno == blkno && bp->b_vp == vp && 4177015Smckusick (bp->b_flags & B_INVAL) == 0) 41857797Smckusick return (bp); 41957797Smckusick return (NULL); 4208Sbill } 4218Sbill 42239668Smckusick /* 42346151Smckusick * Check to see if a block is currently memory resident. 42446151Smckusick * If it is resident, return it. If it is not resident, 42546151Smckusick * allocate a new buffer and assign it to the block. 42639668Smckusick */ 4278Sbill struct buf * 42857797Smckusick getblk(vp, blkno, size, slpflag, slptimeo) 42937736Smckusick register struct vnode *vp; 4306563Smckusic daddr_t blkno; 43157797Smckusick int size, slpflag, slptimeo; 4328Sbill { 43356607Smckusick register struct buf *bp; 43465256Smckusick struct bufhashhdr *dp; 43557797Smckusick int s, error; 4368Sbill 43725255Smckusick if (size > MAXBSIZE) 43825255Smckusick panic("getblk: size too big"); 4397015Smckusick /* 44046151Smckusick * Search the cache for the block. If the buffer is found, 44146151Smckusick * but it is currently locked, the we must wait for it to 44246151Smckusick * become available. 4437015Smckusick */ 44437736Smckusick dp = BUFHASH(vp, blkno); 4457015Smckusick loop: 44665256Smckusick for (bp = dp->lh_first; bp; bp = bp->b_hash.le_next) { 44757797Smckusick if (bp->b_lblkno != blkno || bp->b_vp != vp) 4488Sbill continue; 44926271Skarels s = splbio(); 45046151Smckusick if (bp->b_flags & B_BUSY) { 4518Sbill bp->b_flags |= B_WANTED; 45257797Smckusick error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 45357797Smckusick "getblk", slptimeo); 4545424Swnj splx(s); 45557797Smckusick if (error) 45657797Smckusick return (NULL); 4578Sbill goto loop; 4588Sbill } 45957797Smckusick /* 46057797Smckusick * The test for B_INVAL is moved down here, since there 46157797Smckusick * are cases where B_INVAL is set before VOP_BWRITE() is 46257797Smckusick * called and for NFS, the process cannot be allowed to 46357797Smckusick * allocate a new buffer for the same block until the write 46457797Smckusick * back to the server has been completed. (ie. B_BUSY clears) 46557797Smckusick */ 46657797Smckusick if (bp->b_flags & B_INVAL) { 46757797Smckusick splx(s); 46857797Smckusick continue; 46957797Smckusick } 47039882Smckusick bremfree(bp); 47139882Smckusick bp->b_flags |= B_BUSY; 4725424Swnj splx(s); 47332608Smckusick if (bp->b_bcount != size) { 474*65996Spendry printf("getblk: stray size\n"); 47539668Smckusick bp->b_flags |= B_INVAL; 47657797Smckusick VOP_BWRITE(bp); 47739668Smckusick goto loop; 47832608Smckusick } 4798Sbill bp->b_flags |= B_CACHE; 48026271Skarels return (bp); 4818Sbill } 48257797Smckusick /* 48357797Smckusick * The loop back to the top when getnewbuf() fails is because 48457797Smckusick * stateless filesystems like NFS have no node locks. Thus, 48557797Smckusick * there is a slight chance that more than one process will 48657797Smckusick * try and getnewbuf() for the same block concurrently when 48757797Smckusick * the first sleeps in getnewbuf(). So after a sleep, go back 48857797Smckusick * up to the top to check the hash lists again. 48957797Smckusick */ 49057797Smckusick if ((bp = getnewbuf(slpflag, slptimeo)) == 0) 49157797Smckusick goto loop; 4927015Smckusick bremhash(bp); 49339668Smckusick bgetvp(vp, bp); 49445116Smckusick bp->b_bcount = 0; 49539668Smckusick bp->b_lblkno = blkno; 4966563Smckusic bp->b_blkno = blkno; 4978670S bp->b_error = 0; 49837736Smckusick bp->b_resid = 0; 49937736Smckusick binshash(bp, dp); 50045116Smckusick allocbuf(bp, size); 50126271Skarels return (bp); 5028Sbill } 5038Sbill 5048Sbill /* 50546151Smckusick * Allocate a buffer. 50646151Smckusick * The caller will assign it to a block. 5078Sbill */ 5088Sbill struct buf * 5096563Smckusic geteblk(size) 5106563Smckusic int size; 5118Sbill { 51256395Smckusick register struct buf *bp; 5138Sbill 51425255Smckusick if (size > MAXBSIZE) 51525255Smckusick panic("geteblk: size too big"); 51657797Smckusick while ((bp = getnewbuf(0, 0)) == NULL) 51757797Smckusick /* void */; 5188670S bp->b_flags |= B_INVAL; 5197015Smckusick bremhash(bp); 52056395Smckusick binshash(bp, &invalhash); 52145116Smckusick bp->b_bcount = 0; 52237736Smckusick bp->b_error = 0; 52337736Smckusick bp->b_resid = 0; 52445116Smckusick allocbuf(bp, size); 52526271Skarels return (bp); 5268Sbill } 5278Sbill 5288Sbill /* 52945116Smckusick * Expand or contract the actual memory allocated to a buffer. 53046151Smckusick * If no memory is available, release buffer and take error exit. 5316563Smckusic */ 53245116Smckusick allocbuf(tp, size) 53345116Smckusick register struct buf *tp; 5346563Smckusic int size; 5356563Smckusic { 53645116Smckusick register struct buf *bp, *ep; 53745116Smckusick int sizealloc, take, s; 5386563Smckusic 53945116Smckusick sizealloc = roundup(size, CLBYTES); 54045116Smckusick /* 54145116Smckusick * Buffer size does not change 54245116Smckusick */ 54345116Smckusick if (sizealloc == tp->b_bufsize) 54445116Smckusick goto out; 54545116Smckusick /* 54645116Smckusick * Buffer size is shrinking. 54745116Smckusick * Place excess space in a buffer header taken from the 54845116Smckusick * BQ_EMPTY buffer list and placed on the "most free" list. 54945116Smckusick * If no extra buffer headers are available, leave the 55045116Smckusick * extra space in the present buffer. 55145116Smckusick */ 55245116Smckusick if (sizealloc < tp->b_bufsize) { 55365256Smckusick if ((ep = bufqueues[BQ_EMPTY].tqh_first) == NULL) 55445116Smckusick goto out; 55545116Smckusick s = splbio(); 55645116Smckusick bremfree(ep); 55745116Smckusick ep->b_flags |= B_BUSY; 55845116Smckusick splx(s); 55964536Sbostic pagemove((char *)tp->b_data + sizealloc, ep->b_data, 56045116Smckusick (int)tp->b_bufsize - sizealloc); 56145116Smckusick ep->b_bufsize = tp->b_bufsize - sizealloc; 56245116Smckusick tp->b_bufsize = sizealloc; 56345116Smckusick ep->b_flags |= B_INVAL; 56445116Smckusick ep->b_bcount = 0; 56545116Smckusick brelse(ep); 56645116Smckusick goto out; 56745116Smckusick } 56845116Smckusick /* 56945116Smckusick * More buffer space is needed. Get it out of buffers on 57045116Smckusick * the "most free" list, placing the empty headers on the 57145116Smckusick * BQ_EMPTY buffer header list. 57245116Smckusick */ 57345116Smckusick while (tp->b_bufsize < sizealloc) { 57445116Smckusick take = sizealloc - tp->b_bufsize; 57557797Smckusick while ((bp = getnewbuf(0, 0)) == NULL) 57657797Smckusick /* void */; 57745116Smckusick if (take >= bp->b_bufsize) 57845116Smckusick take = bp->b_bufsize; 57964536Sbostic pagemove(&((char *)bp->b_data)[bp->b_bufsize - take], 58064536Sbostic &((char *)tp->b_data)[tp->b_bufsize], take); 58145116Smckusick tp->b_bufsize += take; 58245116Smckusick bp->b_bufsize = bp->b_bufsize - take; 58345116Smckusick if (bp->b_bcount > bp->b_bufsize) 58445116Smckusick bp->b_bcount = bp->b_bufsize; 58545116Smckusick if (bp->b_bufsize <= 0) { 58645116Smckusick bremhash(bp); 58756395Smckusick binshash(bp, &invalhash); 58846151Smckusick bp->b_dev = NODEV; 58945116Smckusick bp->b_error = 0; 59045116Smckusick bp->b_flags |= B_INVAL; 59145116Smckusick } 59245116Smckusick brelse(bp); 59345116Smckusick } 59445116Smckusick out: 59545116Smckusick tp->b_bcount = size; 59645116Smckusick return (1); 5978670S } 5988670S 5998670S /* 6008670S * Find a buffer which is available for use. 6018670S * Select something from a free list. 6028670S * Preference is to AGE list, then LRU list. 6038670S */ 6048670S struct buf * 60557797Smckusick getnewbuf(slpflag, slptimeo) 60657797Smckusick int slpflag, slptimeo; 6078670S { 60856395Smckusick register struct buf *bp; 60965256Smckusick register struct bqueues *dp; 61038776Smckusick register struct ucred *cred; 6118670S int s; 6128670S 6138670S loop: 61426271Skarels s = splbio(); 61559879Smckusick for (dp = &bufqueues[BQ_AGE]; dp > bufqueues; dp--) 61665256Smckusick if (dp->tqh_first) 61759879Smckusick break; 61856395Smckusick if (dp == bufqueues) { /* no free blocks */ 61956395Smckusick needbuffer = 1; 62057797Smckusick (void) tsleep((caddr_t)&needbuffer, slpflag | (PRIBIO + 1), 62157797Smckusick "getnewbuf", slptimeo); 62212170Ssam splx(s); 62357797Smckusick return (NULL); 6248670S } 62565256Smckusick bp = dp->tqh_first; 62639882Smckusick bremfree(bp); 62739882Smckusick bp->b_flags |= B_BUSY; 6288670S splx(s); 6298670S if (bp->b_flags & B_DELWRI) { 63038614Smckusick (void) bawrite(bp); 6318670S goto loop; 6328670S } 63340341Smckusick trace(TR_BRELSE, pack(bp->b_vp, bp->b_bufsize), bp->b_lblkno); 63439668Smckusick if (bp->b_vp) 63539668Smckusick brelvp(bp); 63638776Smckusick if (bp->b_rcred != NOCRED) { 63738776Smckusick cred = bp->b_rcred; 63838776Smckusick bp->b_rcred = NOCRED; 63938776Smckusick crfree(cred); 64038776Smckusick } 64138776Smckusick if (bp->b_wcred != NOCRED) { 64238776Smckusick cred = bp->b_wcred; 64338776Smckusick bp->b_wcred = NOCRED; 64438776Smckusick crfree(cred); 64538776Smckusick } 6468670S bp->b_flags = B_BUSY; 64746989Smckusick bp->b_dirtyoff = bp->b_dirtyend = 0; 64852189Smckusick bp->b_validoff = bp->b_validend = 0; 6498670S return (bp); 6508670S } 6518670S 6528670S /* 65346151Smckusick * Wait for I/O to complete. 65446151Smckusick * 65546151Smckusick * Extract and return any errors associated with the I/O. 65646151Smckusick * If the error flag is set, but no specific error is 65746151Smckusick * given, return EIO. 6588Sbill */ 6597015Smckusick biowait(bp) 6606563Smckusic register struct buf *bp; 6618Sbill { 6625431Sroot int s; 6638Sbill 66426271Skarels s = splbio(); 66538776Smckusick while ((bp->b_flags & B_DONE) == 0) 6668Sbill sleep((caddr_t)bp, PRIBIO); 6675431Sroot splx(s); 66837736Smckusick if ((bp->b_flags & B_ERROR) == 0) 66937736Smckusick return (0); 67037736Smckusick if (bp->b_error) 67137736Smckusick return (bp->b_error); 67237736Smckusick return (EIO); 6738Sbill } 6748Sbill 6758Sbill /* 67613128Ssam * Mark I/O complete on a buffer. 67746151Smckusick * 67846151Smckusick * If a callback has been requested, e.g. the pageout 67946151Smckusick * daemon, do so. Otherwise, awaken waiting processes. 6808Sbill */ 68151455Sbostic void 6827015Smckusick biodone(bp) 6837015Smckusick register struct buf *bp; 6848Sbill { 6858Sbill 686420Sbill if (bp->b_flags & B_DONE) 6877015Smckusick panic("dup biodone"); 6888Sbill bp->b_flags |= B_DONE; 68949232Smckusick if ((bp->b_flags & B_READ) == 0) 69049232Smckusick vwakeup(bp); 6919763Ssam if (bp->b_flags & B_CALL) { 6929763Ssam bp->b_flags &= ~B_CALL; 6939763Ssam (*bp->b_iodone)(bp); 6949763Ssam return; 6959763Ssam } 69646151Smckusick if (bp->b_flags & B_ASYNC) 6978Sbill brelse(bp); 6988Sbill else { 6998Sbill bp->b_flags &= ~B_WANTED; 7008Sbill wakeup((caddr_t)bp); 7018Sbill } 7028Sbill } 70356356Smckusick 70457035Smargo int 70557035Smargo count_lock_queue() 70657035Smargo { 70757035Smargo register struct buf *bp; 70857035Smargo register int ret; 70957035Smargo 71065256Smckusick for (ret = 0, bp = (struct buf *)bufqueues[BQ_LOCKED].tqh_first; 71165256Smckusick bp; bp = (struct buf *)bp->b_freelist.tqe_next) 71257035Smargo ++ret; 71357035Smargo return(ret); 71457035Smargo } 71557035Smargo 71656356Smckusick #ifdef DIAGNOSTIC 71756356Smckusick /* 71856356Smckusick * Print out statistics on the current allocation of the buffer pool. 71956356Smckusick * Can be enabled to print out on every ``sync'' by setting "syncprt" 72059879Smckusick * in vfs_syscalls.c using sysctl. 72156356Smckusick */ 72256356Smckusick void 72356356Smckusick vfs_bufstats() 72456356Smckusick { 72556356Smckusick int s, i, j, count; 72656395Smckusick register struct buf *bp; 72765256Smckusick register struct bqueues *dp; 72856356Smckusick int counts[MAXBSIZE/CLBYTES+1]; 72956356Smckusick static char *bname[BQUEUES] = { "LOCKED", "LRU", "AGE", "EMPTY" }; 73056356Smckusick 73156395Smckusick for (dp = bufqueues, i = 0; dp < &bufqueues[BQUEUES]; dp++, i++) { 73256356Smckusick count = 0; 73356356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 73456356Smckusick counts[j] = 0; 73556356Smckusick s = splbio(); 73665256Smckusick for (bp = dp->tqh_first; bp; bp = bp->b_freelist.tqe_next) { 73756356Smckusick counts[bp->b_bufsize/CLBYTES]++; 73856356Smckusick count++; 73956356Smckusick } 74056356Smckusick splx(s); 74156356Smckusick printf("%s: total-%d", bname[i], count); 74256356Smckusick for (j = 0; j <= MAXBSIZE/CLBYTES; j++) 74356356Smckusick if (counts[j] != 0) 74456356Smckusick printf(", %d-%d", j * CLBYTES, counts[j]); 74556356Smckusick printf("\n"); 74656356Smckusick } 74756356Smckusick } 74856356Smckusick #endif /* DIAGNOSTIC */ 749