1*6563Smckusic /* vfs_cluster.c 4.29 82/04/19 */ 28Sbill 3*6563Smckusic /* merged into kernel: @(#)bio.c 2.3 4/8/82 */ 4*6563Smckusic 58Sbill #include "../h/param.h" 68Sbill #include "../h/systm.h" 78Sbill #include "../h/dir.h" 88Sbill #include "../h/user.h" 98Sbill #include "../h/buf.h" 108Sbill #include "../h/conf.h" 118Sbill #include "../h/proc.h" 128Sbill #include "../h/seg.h" 138Sbill #include "../h/pte.h" 148Sbill #include "../h/vm.h" 152045Swnj #include "../h/trace.h" 168Sbill 1791Sbill /* 1891Sbill * The following several routines allocate and free 1991Sbill * buffers with various side effects. In general the 2091Sbill * arguments to an allocate routine are a device and 2191Sbill * a block number, and the value is a pointer to 2291Sbill * to the buffer header; the buffer is marked "busy" 2391Sbill * so that no one else can touch it. If the block was 2491Sbill * already in core, no I/O need be done; if it is 2591Sbill * already busy, the process waits until it becomes free. 2691Sbill * The following routines allocate a buffer: 2791Sbill * getblk 2891Sbill * bread 2991Sbill * breada 3091Sbill * baddr (if it is incore) 3191Sbill * Eventually the buffer must be released, possibly with the 3291Sbill * side effect of writing it out, by using one of 3391Sbill * bwrite 3491Sbill * bdwrite 3591Sbill * bawrite 3691Sbill * brelse 3791Sbill */ 3891Sbill 393099Swnj struct buf bfreelist[BQUEUES]; 403099Swnj struct buf bswlist, *bclnlist; 413099Swnj 4291Sbill #define BUFHSZ 63 43*6563Smckusic #define RND (MAXBSIZE/DEV_BSIZE) 442325Swnj struct bufhd bufhash[BUFHSZ]; 452328Swnj #define BUFHASH(dev, dblkno) \ 46*6563Smckusic ((struct buf *)&bufhash[((int)(dev)+(((int)(dblkno))/RND)) % BUFHSZ]) 4791Sbill 4891Sbill /* 4991Sbill * Initialize hash links for buffers. 5091Sbill */ 5191Sbill bhinit() 5291Sbill { 5391Sbill register int i; 542325Swnj register struct bufhd *bp; 5591Sbill 562325Swnj for (bp = bufhash, i = 0; i < BUFHSZ; i++, bp++) 572325Swnj bp->b_forw = bp->b_back = (struct buf *)bp; 5891Sbill } 5991Sbill 608Sbill /* #define DISKMON 1 */ 618Sbill 628Sbill #ifdef DISKMON 638Sbill struct { 648Sbill int nbuf; 658Sbill long nread; 668Sbill long nreada; 678Sbill long ncache; 688Sbill long nwrite; 692771Swnj long bufcount[64]; 708Sbill } io_info; 718Sbill #endif 728Sbill 738Sbill /* 748Sbill * Swap IO headers - 758Sbill * They contain the necessary information for the swap I/O. 768Sbill * At any given time, a swap header can be in three 778Sbill * different lists. When free it is in the free list, 788Sbill * when allocated and the I/O queued, it is on the swap 798Sbill * device list, and finally, if the operation was a dirty 808Sbill * page push, when the I/O completes, it is inserted 818Sbill * in a list of cleaned pages to be processed by the pageout daemon. 828Sbill */ 832771Swnj struct buf *swbuf; 842771Swnj short *swsize; /* CAN WE JUST USE B_BCOUNT? */ 852771Swnj int *swpf; 868Sbill 878Sbill 882706Swnj #ifndef UNFAST 898Sbill #define notavail(bp) \ 908Sbill { \ 916156Ssam int x = spl6(); \ 928Sbill (bp)->av_back->av_forw = (bp)->av_forw; \ 938Sbill (bp)->av_forw->av_back = (bp)->av_back; \ 948Sbill (bp)->b_flags |= B_BUSY; \ 956156Ssam splx(x); \ 968Sbill } 978Sbill #endif 988Sbill 998Sbill /* 1008Sbill * Read in (if necessary) the block and return a buffer pointer. 1018Sbill */ 1028Sbill struct buf * 103*6563Smckusic bread(dev, blkno, size) 104*6563Smckusic dev_t dev; 105*6563Smckusic daddr_t blkno; 106*6563Smckusic int size; 1078Sbill { 1088Sbill register struct buf *bp; 1098Sbill 110*6563Smckusic bp = getblk(dev, blkno, size); 1118Sbill if (bp->b_flags&B_DONE) { 1123199Swnj #ifdef TRACE 1133199Swnj trace(TR_BREADHIT, dev, blkno); 1142045Swnj #endif 1158Sbill #ifdef DISKMON 1168Sbill io_info.ncache++; 1178Sbill #endif 1188Sbill return(bp); 1198Sbill } 1208Sbill bp->b_flags |= B_READ; 1218Sbill (*bdevsw[major(dev)].d_strategy)(bp); 1223199Swnj #ifdef TRACE 1233199Swnj trace(TR_BREADMISS, dev, blkno); 1242045Swnj #endif 1258Sbill #ifdef DISKMON 1268Sbill io_info.nread++; 1278Sbill #endif 1288Sbill u.u_vm.vm_inblk++; /* pay for read */ 1298Sbill iowait(bp); 1308Sbill return(bp); 1318Sbill } 1328Sbill 1338Sbill /* 1348Sbill * Read in the block, like bread, but also start I/O on the 1358Sbill * read-ahead block (which is not allocated to the caller) 1368Sbill */ 1378Sbill struct buf * 138*6563Smckusic breada(dev, blkno, rablkno, size) 139*6563Smckusic dev_t dev; 140*6563Smckusic daddr_t blkno, rablkno; 141*6563Smckusic int size; 1428Sbill { 1438Sbill register struct buf *bp, *rabp; 1448Sbill 1458Sbill bp = NULL; 1468Sbill if (!incore(dev, blkno)) { 147*6563Smckusic bp = getblk(dev, blkno, size); 1488Sbill if ((bp->b_flags&B_DONE) == 0) { 1498Sbill bp->b_flags |= B_READ; 1508Sbill (*bdevsw[major(dev)].d_strategy)(bp); 1513199Swnj #ifdef TRACE 1523199Swnj trace(TR_BREADMISS, dev, blkno); 1532045Swnj #endif 1548Sbill #ifdef DISKMON 1558Sbill io_info.nread++; 1568Sbill #endif 1578Sbill u.u_vm.vm_inblk++; /* pay for read */ 1588Sbill } 1593199Swnj #ifdef TRACE 1602045Swnj else 1613199Swnj trace(TR_BREADHIT, dev, blkno); 1622045Swnj #endif 1638Sbill } 1648Sbill if (rablkno && !incore(dev, rablkno)) { 165*6563Smckusic rabp = getblk(dev, rablkno, size); 1662045Swnj if (rabp->b_flags & B_DONE) { 1678Sbill brelse(rabp); 1683199Swnj #ifdef TRACE 1693199Swnj trace(TR_BREADHITRA, dev, blkno); 1702045Swnj #endif 1712045Swnj } else { 1728Sbill rabp->b_flags |= B_READ|B_ASYNC; 1738Sbill (*bdevsw[major(dev)].d_strategy)(rabp); 1743199Swnj #ifdef TRACE 1753199Swnj trace(TR_BREADMISSRA, dev, rablock); 1762045Swnj #endif 1778Sbill #ifdef DISKMON 1788Sbill io_info.nreada++; 1798Sbill #endif 1808Sbill u.u_vm.vm_inblk++; /* pay in advance */ 1818Sbill } 1828Sbill } 1838Sbill if(bp == NULL) 184*6563Smckusic return(bread(dev, blkno, size)); 1858Sbill iowait(bp); 1868Sbill return(bp); 1878Sbill } 1888Sbill 1898Sbill /* 1908Sbill * Write the buffer, waiting for completion. 1918Sbill * Then release the buffer. 1928Sbill */ 1938Sbill bwrite(bp) 1948Sbill register struct buf *bp; 1958Sbill { 1968Sbill register flag; 1978Sbill 1988Sbill flag = bp->b_flags; 1998Sbill bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 2008Sbill #ifdef DISKMON 2018Sbill io_info.nwrite++; 2028Sbill #endif 2038Sbill if ((flag&B_DELWRI) == 0) 2048Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 2053199Swnj #ifdef TRACE 2064033Swnj trace(TR_BWRITE, bp->b_dev, bp->b_blkno); 2072045Swnj #endif 2088Sbill (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 2098Sbill if ((flag&B_ASYNC) == 0) { 2108Sbill iowait(bp); 2118Sbill brelse(bp); 2128Sbill } else if (flag & B_DELWRI) 2138Sbill bp->b_flags |= B_AGE; 2148Sbill else 2158Sbill geterror(bp); 2168Sbill } 2178Sbill 2188Sbill /* 2198Sbill * Release the buffer, marking it so that if it is grabbed 2208Sbill * for another purpose it will be written out before being 2218Sbill * given up (e.g. when writing a partial block where it is 2228Sbill * assumed that another write for the same block will soon follow). 2238Sbill * This can't be done for magtape, since writes must be done 2248Sbill * in the same order as requested. 2258Sbill */ 2268Sbill bdwrite(bp) 2278Sbill register struct buf *bp; 2288Sbill { 2292403Skre register int flags; 2308Sbill 2318Sbill if ((bp->b_flags&B_DELWRI) == 0) 2328Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 2332403Skre flags = bdevsw[major(bp->b_dev)].d_flags; 2342403Skre if(flags & B_TAPE) 2358Sbill bawrite(bp); 2368Sbill else { 2378Sbill bp->b_flags |= B_DELWRI | B_DONE; 2388Sbill brelse(bp); 2398Sbill } 2408Sbill } 2418Sbill 2428Sbill /* 2438Sbill * Release the buffer, start I/O on it, but don't wait for completion. 2448Sbill */ 2458Sbill bawrite(bp) 2468Sbill register struct buf *bp; 2478Sbill { 2488Sbill 2498Sbill bp->b_flags |= B_ASYNC; 2508Sbill bwrite(bp); 2518Sbill } 2528Sbill 2538Sbill /* 2548Sbill * release the buffer, with no I/O implied. 2558Sbill */ 2568Sbill brelse(bp) 2578Sbill register struct buf *bp; 2588Sbill { 2592325Swnj register struct buf *flist; 2608Sbill register s; 2618Sbill 2628Sbill if (bp->b_flags&B_WANTED) 2638Sbill wakeup((caddr_t)bp); 2642325Swnj if (bfreelist[0].b_flags&B_WANTED) { 2652325Swnj bfreelist[0].b_flags &= ~B_WANTED; 2662325Swnj wakeup((caddr_t)bfreelist); 2678Sbill } 2682683Swnj if (bp->b_flags&B_ERROR) 2692683Swnj if (bp->b_flags & B_LOCKED) 2702683Swnj bp->b_flags &= ~B_ERROR; /* try again later */ 2712683Swnj else 2722683Swnj bp->b_dev = NODEV; /* no assoc */ 2738Sbill s = spl6(); 2742325Swnj if (bp->b_flags & (B_ERROR|B_INVAL)) { 2752325Swnj /* block has no info ... put at front of most free list */ 2762325Swnj flist = &bfreelist[BQUEUES-1]; 2772325Swnj flist->av_forw->av_back = bp; 2782325Swnj bp->av_forw = flist->av_forw; 2792325Swnj flist->av_forw = bp; 2802325Swnj bp->av_back = flist; 2818Sbill } else { 2822325Swnj if (bp->b_flags & B_LOCKED) 2832325Swnj flist = &bfreelist[BQ_LOCKED]; 2842325Swnj else if (bp->b_flags & B_AGE) 2852325Swnj flist = &bfreelist[BQ_AGE]; 2862325Swnj else 2872325Swnj flist = &bfreelist[BQ_LRU]; 2882325Swnj flist->av_back->av_forw = bp; 2892325Swnj bp->av_back = flist->av_back; 2902325Swnj flist->av_back = bp; 2912325Swnj bp->av_forw = flist; 2928Sbill } 2938Sbill bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 2948Sbill splx(s); 2958Sbill } 2968Sbill 2978Sbill /* 2988Sbill * See if the block is associated with some buffer 2998Sbill * (mainly to avoid getting hung up on a wait in breada) 3008Sbill */ 3018Sbill incore(dev, blkno) 3028Sbill dev_t dev; 3038Sbill daddr_t blkno; 3048Sbill { 3058Sbill register struct buf *bp; 3062325Swnj register struct buf *dp; 3078Sbill 308*6563Smckusic dp = BUFHASH(dev, blkno); 3092325Swnj for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 310*6563Smckusic if (bp->b_blkno == blkno && bp->b_dev == dev && 3112325Swnj !(bp->b_flags & B_INVAL)) 31291Sbill return (1); 31391Sbill return (0); 3148Sbill } 3158Sbill 3168Sbill struct buf * 317*6563Smckusic baddr(dev, blkno, size) 318*6563Smckusic dev_t dev; 319*6563Smckusic daddr_t blkno; 320*6563Smckusic int size; 3218Sbill { 3228Sbill 3238Sbill if (incore(dev, blkno)) 324*6563Smckusic return (bread(dev, blkno, size)); 3258Sbill return (0); 3268Sbill } 3278Sbill 3288Sbill /* 3298Sbill * Assign a buffer for the given block. If the appropriate 3308Sbill * block is already associated, return it; otherwise search 3318Sbill * for the oldest non-busy buffer and reassign it. 3325424Swnj * 3335424Swnj * We use splx here because this routine may be called 3345424Swnj * on the interrupt stack during a dump, and we don't 3355424Swnj * want to lower the ipl back to 0. 3368Sbill */ 3378Sbill struct buf * 338*6563Smckusic getblk(dev, blkno, size) 339*6563Smckusic dev_t dev; 340*6563Smckusic daddr_t blkno; 341*6563Smckusic int size; 3428Sbill { 34391Sbill register struct buf *bp, *dp, *ep; 3442423Skre #ifdef DISKMON 3452423Skre register int i; 3462423Skre #endif 3475424Swnj int s; 3488Sbill 3491831Sbill if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) 3501831Sbill blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 351*6563Smckusic dp = BUFHASH(dev, blkno); 3528Sbill loop: 3532325Swnj for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 354*6563Smckusic if (bp->b_blkno != blkno || bp->b_dev != dev || 3552325Swnj bp->b_flags&B_INVAL) 3568Sbill continue; 3575424Swnj s = spl6(); 3588Sbill if (bp->b_flags&B_BUSY) { 3598Sbill bp->b_flags |= B_WANTED; 3608Sbill sleep((caddr_t)bp, PRIBIO+1); 3615424Swnj splx(s); 3628Sbill goto loop; 3638Sbill } 3645424Swnj splx(s); 3658Sbill #ifdef DISKMON 3668Sbill i = 0; 3678Sbill dp = bp->av_forw; 3682325Swnj while ((dp->b_flags & B_HEAD) == 0) { 3698Sbill i++; 3708Sbill dp = dp->av_forw; 3718Sbill } 3722771Swnj if (i<64) 3738Sbill io_info.bufcount[i]++; 3748Sbill #endif 3758Sbill notavail(bp); 376*6563Smckusic brealloc(bp, size); 3778Sbill bp->b_flags |= B_CACHE; 3788Sbill return(bp); 3798Sbill } 38091Sbill if (major(dev) >= nblkdev) 38191Sbill panic("blkdev"); 3825424Swnj s = spl6(); 3832325Swnj for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--) 3842325Swnj if (ep->av_forw != ep) 3852325Swnj break; 3862325Swnj if (ep == bfreelist) { /* no free blocks at all */ 3872325Swnj ep->b_flags |= B_WANTED; 3882325Swnj sleep((caddr_t)ep, PRIBIO+1); 3895424Swnj splx(s); 3908Sbill goto loop; 3918Sbill } 3925424Swnj splx(s); 3932325Swnj bp = ep->av_forw; 3948Sbill notavail(bp); 3958Sbill if (bp->b_flags & B_DELWRI) { 3968Sbill bp->b_flags |= B_ASYNC; 3978Sbill bwrite(bp); 3988Sbill goto loop; 3998Sbill } 4003199Swnj #ifdef TRACE 4014033Swnj trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 4022045Swnj #endif 4038Sbill bp->b_flags = B_BUSY; 404*6563Smckusic bfree(bp); 4058Sbill bp->b_back->b_forw = bp->b_forw; 4068Sbill bp->b_forw->b_back = bp->b_back; 4078Sbill bp->b_forw = dp->b_forw; 4088Sbill bp->b_back = dp; 4098Sbill dp->b_forw->b_back = bp; 4108Sbill dp->b_forw = bp; 4118Sbill bp->b_dev = dev; 412*6563Smckusic bp->b_blkno = blkno; 413*6563Smckusic brealloc(bp, size); 4148Sbill return(bp); 4158Sbill } 4168Sbill 4178Sbill /* 4188Sbill * get an empty block, 4198Sbill * not assigned to any particular device 4208Sbill */ 4218Sbill struct buf * 422*6563Smckusic geteblk(size) 423*6563Smckusic int size; 4248Sbill { 425182Sbill register struct buf *bp, *dp; 4265431Sroot int s; 4278Sbill 4288Sbill loop: 4295431Sroot s = spl6(); 4302325Swnj for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--) 4312325Swnj if (dp->av_forw != dp) 4322325Swnj break; 4332325Swnj if (dp == bfreelist) { /* no free blocks */ 4342325Swnj dp->b_flags |= B_WANTED; 4352325Swnj sleep((caddr_t)dp, PRIBIO+1); 4362325Swnj goto loop; 4378Sbill } 4385431Sroot splx(s); 4392325Swnj bp = dp->av_forw; 4408Sbill notavail(bp); 4418Sbill if (bp->b_flags & B_DELWRI) { 4428Sbill bp->b_flags |= B_ASYNC; 4438Sbill bwrite(bp); 4448Sbill goto loop; 4458Sbill } 4463199Swnj #ifdef TRACE 4474033Swnj trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 4482045Swnj #endif 4492325Swnj bp->b_flags = B_BUSY|B_INVAL; 4508Sbill bp->b_back->b_forw = bp->b_forw; 4518Sbill bp->b_forw->b_back = bp->b_back; 4528Sbill bp->b_forw = dp->b_forw; 4538Sbill bp->b_back = dp; 4548Sbill dp->b_forw->b_back = bp; 4558Sbill dp->b_forw = bp; 4568Sbill bp->b_dev = (dev_t)NODEV; 457*6563Smckusic bp->b_bcount = size; 4588Sbill return(bp); 4598Sbill } 4608Sbill 4618Sbill /* 462*6563Smckusic * Allocate space associated with a buffer. 463*6563Smckusic */ 464*6563Smckusic brealloc(bp, size) 465*6563Smckusic register struct buf *bp; 466*6563Smckusic int size; 467*6563Smckusic { 468*6563Smckusic daddr_t start, last; 469*6563Smckusic register struct buf *ep; 470*6563Smckusic struct buf *dp; 471*6563Smckusic int s; 472*6563Smckusic 473*6563Smckusic /* 474*6563Smckusic * First need to make sure that all overlaping previous I/O 475*6563Smckusic * is dispatched with. 476*6563Smckusic */ 477*6563Smckusic if (size == bp->b_bcount) 478*6563Smckusic return; 479*6563Smckusic if (size < bp->b_bcount) { 480*6563Smckusic bp->b_bcount = size; 481*6563Smckusic return; 482*6563Smckusic } 483*6563Smckusic start = bp->b_blkno + (bp->b_bcount / DEV_BSIZE); 484*6563Smckusic last = bp->b_blkno + (size / DEV_BSIZE) - 1; 485*6563Smckusic if (bp->b_bcount == 0) { 486*6563Smckusic start++; 487*6563Smckusic if (start == last) 488*6563Smckusic goto allocit; 489*6563Smckusic } 490*6563Smckusic dp = BUFHASH(bp->b_dev, bp->b_blkno); 491*6563Smckusic loop: 492*6563Smckusic (void) spl0(); 493*6563Smckusic for (ep = dp->b_forw; ep != dp; ep = ep->b_forw) { 494*6563Smckusic if (ep->b_blkno < start || ep->b_blkno > last || 495*6563Smckusic ep->b_dev != bp->b_dev || ep->b_flags&B_INVAL) 496*6563Smckusic continue; 497*6563Smckusic s = spl6(); 498*6563Smckusic if (ep->b_flags&B_BUSY) { 499*6563Smckusic ep->b_flags |= B_WANTED; 500*6563Smckusic sleep((caddr_t)ep, PRIBIO+1); 501*6563Smckusic splx(s); 502*6563Smckusic goto loop; 503*6563Smckusic } 504*6563Smckusic (void) spl0(); 505*6563Smckusic /* 506*6563Smckusic * What we would really like to do is kill this 507*6563Smckusic * I/O since it is now useless. We cannot do that 508*6563Smckusic * so we force it to complete, so that it cannot 509*6563Smckusic * over-write our useful data later. 510*6563Smckusic */ 511*6563Smckusic if (ep->b_flags & B_DELWRI) { 512*6563Smckusic notavail(ep); 513*6563Smckusic ep->b_flags |= B_ASYNC; 514*6563Smckusic bwrite(ep); 515*6563Smckusic goto loop; 516*6563Smckusic } 517*6563Smckusic } 518*6563Smckusic allocit: 519*6563Smckusic /* 520*6563Smckusic * Here the buffer is already available, so all we 521*6563Smckusic * need to do is set the size. Someday a better memory 522*6563Smckusic * management scheme will be implemented. 523*6563Smckusic */ 524*6563Smckusic bp->b_bcount = size; 525*6563Smckusic } 526*6563Smckusic 527*6563Smckusic /* 528*6563Smckusic * Release space associated with a buffer. 529*6563Smckusic */ 530*6563Smckusic bfree(bp) 531*6563Smckusic struct buf *bp; 532*6563Smckusic { 533*6563Smckusic /* 534*6563Smckusic * Here the buffer does not change, so all we 535*6563Smckusic * need to do is set the size. Someday a better memory 536*6563Smckusic * management scheme will be implemented. 537*6563Smckusic */ 538*6563Smckusic bp->b_bcount = 0; 539*6563Smckusic } 540*6563Smckusic 541*6563Smckusic /* 5428Sbill * Wait for I/O completion on the buffer; return errors 5438Sbill * to the user. 5448Sbill */ 5458Sbill iowait(bp) 546*6563Smckusic register struct buf *bp; 5478Sbill { 5485431Sroot int s; 5498Sbill 5505431Sroot s = spl6(); 5518Sbill while ((bp->b_flags&B_DONE)==0) 5528Sbill sleep((caddr_t)bp, PRIBIO); 5535431Sroot splx(s); 5548Sbill geterror(bp); 5558Sbill } 5568Sbill 5572706Swnj #ifdef UNFAST 5588Sbill /* 5598Sbill * Unlink a buffer from the available list and mark it busy. 5608Sbill * (internal interface) 5618Sbill */ 5628Sbill notavail(bp) 5638Sbill register struct buf *bp; 5648Sbill { 5658Sbill register s; 5668Sbill 5678Sbill s = spl6(); 5688Sbill bp->av_back->av_forw = bp->av_forw; 5698Sbill bp->av_forw->av_back = bp->av_back; 5708Sbill bp->b_flags |= B_BUSY; 5718Sbill splx(s); 5728Sbill } 5738Sbill #endif 5748Sbill 5758Sbill /* 5768Sbill * Mark I/O complete on a buffer. If the header 5778Sbill * indicates a dirty page push completion, the 5788Sbill * header is inserted into the ``cleaned'' list 5798Sbill * to be processed by the pageout daemon. Otherwise 5808Sbill * release it if I/O is asynchronous, and wake 5818Sbill * up anyone waiting for it. 5828Sbill */ 5838Sbill iodone(bp) 5848Sbill register struct buf *bp; 5858Sbill { 5868Sbill register int s; 5878Sbill 588420Sbill if (bp->b_flags & B_DONE) 589420Sbill panic("dup iodone"); 5908Sbill bp->b_flags |= B_DONE; 5918Sbill if (bp->b_flags & B_DIRTY) { 5928Sbill if (bp->b_flags & B_ERROR) 5938Sbill panic("IO err in push"); 5948Sbill s = spl6(); 5958Sbill bp->av_forw = bclnlist; 5968Sbill bp->b_bcount = swsize[bp - swbuf]; 5978Sbill bp->b_pfcent = swpf[bp - swbuf]; 5983601Swnj cnt.v_pgout++; 5993601Swnj cnt.v_pgpgout += bp->b_bcount / NBPG; 6008Sbill bclnlist = bp; 6018Sbill if (bswlist.b_flags & B_WANTED) 6028Sbill wakeup((caddr_t)&proc[2]); 6038Sbill splx(s); 604383Sbill return; 6058Sbill } 6068Sbill if (bp->b_flags&B_ASYNC) 6078Sbill brelse(bp); 6088Sbill else { 6098Sbill bp->b_flags &= ~B_WANTED; 6108Sbill wakeup((caddr_t)bp); 6118Sbill } 6128Sbill } 6138Sbill 6148Sbill /* 6158Sbill * Zero the core associated with a buffer. 6168Sbill */ 6178Sbill clrbuf(bp) 618*6563Smckusic struct buf *bp; 6198Sbill { 620*6563Smckusic register int *p; 621*6563Smckusic register int c; 6228Sbill 6238Sbill p = bp->b_un.b_words; 624*6563Smckusic c = bp->b_bcount/sizeof(int); 6258Sbill do 6268Sbill *p++ = 0; 6278Sbill while (--c); 6288Sbill bp->b_resid = 0; 6298Sbill } 6308Sbill 6318Sbill /* 6328Sbill * swap I/O - 6338Sbill * 6348Sbill * If the flag indicates a dirty page push initiated 6358Sbill * by the pageout daemon, we map the page into the i th 6368Sbill * virtual page of process 2 (the daemon itself) where i is 6378Sbill * the index of the swap header that has been allocated. 6388Sbill * We simply initialize the header and queue the I/O but 6398Sbill * do not wait for completion. When the I/O completes, 6408Sbill * iodone() will link the header to a list of cleaned 6418Sbill * pages to be processed by the pageout daemon. 6428Sbill */ 6438Sbill swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) 6448Sbill struct proc *p; 6458Sbill swblk_t dblkno; 6468Sbill caddr_t addr; 6478Sbill int flag, nbytes; 6488Sbill dev_t dev; 6498Sbill unsigned pfcent; 6508Sbill { 6518Sbill register struct buf *bp; 6528Sbill register int c; 6538Sbill int p2dp; 6548Sbill register struct pte *dpte, *vpte; 6555431Sroot int s; 6568Sbill 6575431Sroot s = spl6(); 6588Sbill while (bswlist.av_forw == NULL) { 6598Sbill bswlist.b_flags |= B_WANTED; 6608Sbill sleep((caddr_t)&bswlist, PSWP+1); 6618Sbill } 6628Sbill bp = bswlist.av_forw; 6638Sbill bswlist.av_forw = bp->av_forw; 6645431Sroot splx(s); 6658Sbill 6668Sbill bp->b_flags = B_BUSY | B_PHYS | rdflg | flag; 6678Sbill if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0) 6688Sbill if (rdflg == B_READ) 6698Sbill sum.v_pswpin += btoc(nbytes); 6708Sbill else 6718Sbill sum.v_pswpout += btoc(nbytes); 6728Sbill bp->b_proc = p; 6738Sbill if (flag & B_DIRTY) { 6748Sbill p2dp = ((bp - swbuf) * CLSIZE) * KLMAX; 6758Sbill dpte = dptopte(&proc[2], p2dp); 6768Sbill vpte = vtopte(p, btop(addr)); 6778Sbill for (c = 0; c < nbytes; c += NBPG) { 6788Sbill if (vpte->pg_pfnum == 0 || vpte->pg_fod) 6798Sbill panic("swap bad pte"); 6808Sbill *dpte++ = *vpte++; 6818Sbill } 6828Sbill bp->b_un.b_addr = (caddr_t)ctob(p2dp); 6838Sbill } else 6848Sbill bp->b_un.b_addr = addr; 6858Sbill while (nbytes > 0) { 6868Sbill c = imin(ctob(120), nbytes); 6878Sbill bp->b_bcount = c; 6888Sbill bp->b_blkno = dblkno; 6898Sbill bp->b_dev = dev; 690718Sbill if (flag & B_DIRTY) { 691718Sbill swpf[bp - swbuf] = pfcent; 692718Sbill swsize[bp - swbuf] = nbytes; 693718Sbill } 6944033Swnj #ifdef TRACE 6954033Swnj trace(TR_SWAPIO, dev, bp->b_blkno); 6964033Swnj #endif 6978Sbill (*bdevsw[major(dev)].d_strategy)(bp); 6988Sbill if (flag & B_DIRTY) { 6998Sbill if (c < nbytes) 7008Sbill panic("big push"); 7018Sbill return; 7028Sbill } 7035431Sroot s = spl6(); 7048Sbill while((bp->b_flags&B_DONE)==0) 7058Sbill sleep((caddr_t)bp, PSWP); 7065431Sroot splx(s); 7078Sbill bp->b_un.b_addr += c; 7088Sbill bp->b_flags &= ~B_DONE; 7098Sbill if (bp->b_flags & B_ERROR) { 7108Sbill if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE) 7118Sbill panic("hard IO err in swap"); 7128Sbill swkill(p, (char *)0); 7138Sbill } 7148Sbill nbytes -= c; 7158Sbill dblkno += btoc(c); 7168Sbill } 7175431Sroot s = spl6(); 7188Sbill bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 7198Sbill bp->av_forw = bswlist.av_forw; 7208Sbill bswlist.av_forw = bp; 7218Sbill if (bswlist.b_flags & B_WANTED) { 7228Sbill bswlist.b_flags &= ~B_WANTED; 7238Sbill wakeup((caddr_t)&bswlist); 7248Sbill wakeup((caddr_t)&proc[2]); 7258Sbill } 7265431Sroot splx(s); 7278Sbill } 7288Sbill 7298Sbill /* 7308Sbill * If rout == 0 then killed on swap error, else 7318Sbill * rout is the name of the routine where we ran out of 7328Sbill * swap space. 7338Sbill */ 7348Sbill swkill(p, rout) 7358Sbill struct proc *p; 7368Sbill char *rout; 7378Sbill { 7382922Swnj char *mesg; 7398Sbill 7402922Swnj printf("pid %d: ", p->p_pid); 7418Sbill if (rout) 7422922Swnj printf(mesg = "killed due to no swap space\n"); 7438Sbill else 7442922Swnj printf(mesg = "killed on swap error\n"); 7452922Swnj uprintf("sorry, pid %d was %s", p->p_pid, mesg); 7468Sbill /* 7478Sbill * To be sure no looping (e.g. in vmsched trying to 7488Sbill * swap out) mark process locked in core (as though 7498Sbill * done by user) after killing it so noone will try 7508Sbill * to swap it out. 7518Sbill */ 752165Sbill psignal(p, SIGKILL); 7538Sbill p->p_flag |= SULOCK; 7548Sbill } 7558Sbill 7568Sbill /* 7578Sbill * make sure all write-behind blocks 7588Sbill * on dev (or NODEV for all) 7598Sbill * are flushed out. 7608Sbill * (from umount and update) 761*6563Smckusic * (and temporarily pagein) 7628Sbill */ 7638Sbill bflush(dev) 7648Sbill dev_t dev; 7658Sbill { 7668Sbill register struct buf *bp; 7672325Swnj register struct buf *flist; 7685431Sroot int s; 7698Sbill 7708Sbill loop: 7715431Sroot s = spl6(); 7722325Swnj for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++) 7732325Swnj for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 7748Sbill if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) { 7758Sbill bp->b_flags |= B_ASYNC; 7768Sbill notavail(bp); 7778Sbill bwrite(bp); 7788Sbill goto loop; 7798Sbill } 7808Sbill } 7815431Sroot splx(s); 7828Sbill } 7838Sbill 7848Sbill /* 7858Sbill * Raw I/O. The arguments are 7868Sbill * The strategy routine for the device 7878Sbill * A buffer, which will always be a special buffer 7888Sbill * header owned exclusively by the device for this purpose 7898Sbill * The device number 7908Sbill * Read/write flag 7918Sbill * Essentially all the work is computing physical addresses and 7928Sbill * validating them. 7938Sbill * If the user has the proper access privilidges, the process is 7948Sbill * marked 'delayed unlock' and the pages involved in the I/O are 7958Sbill * faulted and locked. After the completion of the I/O, the above pages 7968Sbill * are unlocked. 7978Sbill */ 7988Sbill physio(strat, bp, dev, rw, mincnt) 7998Sbill int (*strat)(); 8008Sbill register struct buf *bp; 8018Sbill unsigned (*mincnt)(); 8028Sbill { 8038Sbill register int c; 8048Sbill char *a; 8055431Sroot int s; 8068Sbill 8078Sbill if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) { 8088Sbill u.u_error = EFAULT; 8098Sbill return; 8108Sbill } 8115431Sroot s = spl6(); 8128Sbill while (bp->b_flags&B_BUSY) { 8138Sbill bp->b_flags |= B_WANTED; 8148Sbill sleep((caddr_t)bp, PRIBIO+1); 8158Sbill } 8166319Swnj splx(s); 8178Sbill bp->b_error = 0; 8188Sbill bp->b_proc = u.u_procp; 8198Sbill bp->b_un.b_addr = u.u_base; 8203667Swnj while (u.u_count != 0) { 8218Sbill bp->b_flags = B_BUSY | B_PHYS | rw; 8228Sbill bp->b_dev = dev; 8238Sbill bp->b_blkno = u.u_offset >> PGSHIFT; 8248Sbill bp->b_bcount = u.u_count; 8258Sbill (*mincnt)(bp); 8268Sbill c = bp->b_bcount; 8278Sbill u.u_procp->p_flag |= SPHYSIO; 8288Sbill vslock(a = bp->b_un.b_addr, c); 8298Sbill (*strat)(bp); 830124Sbill (void) spl6(); 8318Sbill while ((bp->b_flags&B_DONE) == 0) 8328Sbill sleep((caddr_t)bp, PRIBIO); 8338Sbill vsunlock(a, c, rw); 8348Sbill u.u_procp->p_flag &= ~SPHYSIO; 8358Sbill if (bp->b_flags&B_WANTED) 8368Sbill wakeup((caddr_t)bp); 8375431Sroot splx(s); 8388Sbill bp->b_un.b_addr += c; 8398Sbill u.u_count -= c; 8408Sbill u.u_offset += c; 8413667Swnj if (bp->b_flags&B_ERROR) 8423667Swnj break; 8438Sbill } 8448Sbill bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS); 8458Sbill u.u_count = bp->b_resid; 8468Sbill geterror(bp); 8478Sbill } 8488Sbill 8498Sbill /*ARGSUSED*/ 8508Sbill unsigned 8518Sbill minphys(bp) 8528Sbill struct buf *bp; 8538Sbill { 8548Sbill 8556379Swnj if (bp->b_bcount > 63 * 1024) 8566379Swnj bp->b_bcount = 63 * 1024; 8578Sbill } 8588Sbill 859*6563Smckusic 8608Sbill /* 8618Sbill * Pick up the device's error number and pass it to the user; 8628Sbill * if there is an error but the number is 0 set a generalized 8638Sbill * code. Actually the latter is always true because devices 8648Sbill * don't yet return specific errors. 8658Sbill */ 8668Sbill geterror(bp) 8678Sbill register struct buf *bp; 8688Sbill { 8698Sbill 8708Sbill if (bp->b_flags&B_ERROR) 8718Sbill if ((u.u_error = bp->b_error)==0) 8728Sbill u.u_error = EIO; 8738Sbill } 8742299Skre 8752299Skre /* 8762299Skre * Invalidate in core blocks belonging to closed or umounted filesystem 8772299Skre * 8782299Skre * This is not nicely done at all - the buffer ought to be removed from the 8792299Skre * hash chains & have its dev/blkno fields clobbered, but unfortunately we 8802299Skre * can't do that here, as it is quite possible that the block is still 8812299Skre * being used for i/o. Eventually, all disc drivers should be forced to 8822299Skre * have a close routine, which ought ensure that the queue is empty, then 8832299Skre * properly flush the queues. Until that happy day, this suffices for 8842299Skre * correctness. ... kre 8852299Skre */ 8862299Skre binval(dev) 8872299Skre dev_t dev; 8882299Skre { 8892361Skre register struct buf *bp; 8902361Skre register struct bufhd *hp; 8912361Skre #define dp ((struct buf *)hp) 8922299Skre 8932361Skre for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 8942361Skre for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 8952361Skre if (bp->b_dev == dev) 8962361Skre bp->b_flags |= B_INVAL; 8972299Skre } 898