1*6156Ssam /* vfs_cluster.c 4.26 82/03/13 */ 28Sbill 38Sbill #include "../h/param.h" 48Sbill #include "../h/systm.h" 58Sbill #include "../h/dir.h" 68Sbill #include "../h/user.h" 78Sbill #include "../h/buf.h" 88Sbill #include "../h/conf.h" 98Sbill #include "../h/proc.h" 108Sbill #include "../h/seg.h" 118Sbill #include "../h/pte.h" 128Sbill #include "../h/vm.h" 132045Swnj #include "../h/trace.h" 148Sbill 1591Sbill /* 1691Sbill * The following several routines allocate and free 1791Sbill * buffers with various side effects. In general the 1891Sbill * arguments to an allocate routine are a device and 1991Sbill * a block number, and the value is a pointer to 2091Sbill * to the buffer header; the buffer is marked "busy" 2191Sbill * so that no one else can touch it. If the block was 2291Sbill * already in core, no I/O need be done; if it is 2391Sbill * already busy, the process waits until it becomes free. 2491Sbill * The following routines allocate a buffer: 2591Sbill * getblk 2691Sbill * bread 2791Sbill * breada 2891Sbill * baddr (if it is incore) 2991Sbill * Eventually the buffer must be released, possibly with the 3091Sbill * side effect of writing it out, by using one of 3191Sbill * bwrite 3291Sbill * bdwrite 3391Sbill * bawrite 3491Sbill * brelse 3591Sbill */ 3691Sbill 373099Swnj struct buf bfreelist[BQUEUES]; 383099Swnj struct buf bswlist, *bclnlist; 393099Swnj 4091Sbill #define BUFHSZ 63 412325Swnj struct bufhd bufhash[BUFHSZ]; 422328Swnj #define BUFHASH(dev, dblkno) \ 432328Swnj ((struct buf *)&bufhash[((int)(dev)+(int)(dblkno)) % BUFHSZ]) 4491Sbill 4591Sbill /* 4691Sbill * Initialize hash links for buffers. 4791Sbill */ 4891Sbill bhinit() 4991Sbill { 5091Sbill register int i; 512325Swnj register struct bufhd *bp; 5291Sbill 532325Swnj for (bp = bufhash, i = 0; i < BUFHSZ; i++, bp++) 542325Swnj bp->b_forw = bp->b_back = (struct buf *)bp; 5591Sbill } 5691Sbill 578Sbill /* #define DISKMON 1 */ 588Sbill 598Sbill #ifdef DISKMON 608Sbill struct { 618Sbill int nbuf; 628Sbill long nread; 638Sbill long nreada; 648Sbill long ncache; 658Sbill long nwrite; 662771Swnj long bufcount[64]; 678Sbill } io_info; 688Sbill #endif 698Sbill 708Sbill /* 718Sbill * Swap IO headers - 728Sbill * They contain the necessary information for the swap I/O. 738Sbill * At any given time, a swap header can be in three 748Sbill * different lists. When free it is in the free list, 758Sbill * when allocated and the I/O queued, it is on the swap 768Sbill * device list, and finally, if the operation was a dirty 778Sbill * page push, when the I/O completes, it is inserted 788Sbill * in a list of cleaned pages to be processed by the pageout daemon. 798Sbill */ 802771Swnj struct buf *swbuf; 812771Swnj short *swsize; /* CAN WE JUST USE B_BCOUNT? */ 822771Swnj int *swpf; 838Sbill 848Sbill 852706Swnj #ifndef UNFAST 868Sbill #define notavail(bp) \ 878Sbill { \ 88*6156Ssam int x = spl6(); \ 898Sbill (bp)->av_back->av_forw = (bp)->av_forw; \ 908Sbill (bp)->av_forw->av_back = (bp)->av_back; \ 918Sbill (bp)->b_flags |= B_BUSY; \ 92*6156Ssam splx(x); \ 938Sbill } 948Sbill #endif 958Sbill 968Sbill /* 978Sbill * Read in (if necessary) the block and return a buffer pointer. 988Sbill */ 998Sbill struct buf * 1008Sbill bread(dev, blkno) 1018Sbill dev_t dev; 1028Sbill daddr_t blkno; 1038Sbill { 1048Sbill register struct buf *bp; 1058Sbill 1068Sbill bp = getblk(dev, blkno); 1078Sbill if (bp->b_flags&B_DONE) { 1083199Swnj #ifdef TRACE 1093199Swnj trace(TR_BREADHIT, dev, blkno); 1102045Swnj #endif 1118Sbill #ifdef DISKMON 1128Sbill io_info.ncache++; 1138Sbill #endif 1148Sbill return(bp); 1158Sbill } 1168Sbill bp->b_flags |= B_READ; 1178Sbill bp->b_bcount = BSIZE; 1188Sbill (*bdevsw[major(dev)].d_strategy)(bp); 1193199Swnj #ifdef TRACE 1203199Swnj trace(TR_BREADMISS, dev, blkno); 1212045Swnj #endif 1228Sbill #ifdef DISKMON 1238Sbill io_info.nread++; 1248Sbill #endif 1258Sbill u.u_vm.vm_inblk++; /* pay for read */ 1268Sbill iowait(bp); 1278Sbill return(bp); 1288Sbill } 1298Sbill 1308Sbill /* 1318Sbill * Read in the block, like bread, but also start I/O on the 1328Sbill * read-ahead block (which is not allocated to the caller) 1338Sbill */ 1348Sbill struct buf * 1358Sbill breada(dev, blkno, rablkno) 1368Sbill dev_t dev; 1378Sbill daddr_t blkno, rablkno; 1388Sbill { 1398Sbill register struct buf *bp, *rabp; 1408Sbill 1418Sbill bp = NULL; 1428Sbill if (!incore(dev, blkno)) { 1438Sbill bp = getblk(dev, blkno); 1448Sbill if ((bp->b_flags&B_DONE) == 0) { 1458Sbill bp->b_flags |= B_READ; 1468Sbill bp->b_bcount = BSIZE; 1478Sbill (*bdevsw[major(dev)].d_strategy)(bp); 1483199Swnj #ifdef TRACE 1493199Swnj trace(TR_BREADMISS, dev, blkno); 1502045Swnj #endif 1518Sbill #ifdef DISKMON 1528Sbill io_info.nread++; 1538Sbill #endif 1548Sbill u.u_vm.vm_inblk++; /* pay for read */ 1558Sbill } 1563199Swnj #ifdef TRACE 1572045Swnj else 1583199Swnj trace(TR_BREADHIT, dev, blkno); 1592045Swnj #endif 1608Sbill } 1618Sbill if (rablkno && !incore(dev, rablkno)) { 1628Sbill rabp = getblk(dev, rablkno); 1632045Swnj if (rabp->b_flags & B_DONE) { 1648Sbill brelse(rabp); 1653199Swnj #ifdef TRACE 1663199Swnj trace(TR_BREADHITRA, dev, blkno); 1672045Swnj #endif 1682045Swnj } else { 1698Sbill rabp->b_flags |= B_READ|B_ASYNC; 1708Sbill rabp->b_bcount = BSIZE; 1718Sbill (*bdevsw[major(dev)].d_strategy)(rabp); 1723199Swnj #ifdef TRACE 1733199Swnj trace(TR_BREADMISSRA, dev, rablock); 1742045Swnj #endif 1758Sbill #ifdef DISKMON 1768Sbill io_info.nreada++; 1778Sbill #endif 1788Sbill u.u_vm.vm_inblk++; /* pay in advance */ 1798Sbill } 1808Sbill } 1818Sbill if(bp == NULL) 1828Sbill return(bread(dev, blkno)); 1838Sbill iowait(bp); 1848Sbill return(bp); 1858Sbill } 1868Sbill 1878Sbill /* 1888Sbill * Write the buffer, waiting for completion. 1898Sbill * Then release the buffer. 1908Sbill */ 1918Sbill bwrite(bp) 1928Sbill register struct buf *bp; 1938Sbill { 1948Sbill register flag; 1958Sbill 1968Sbill flag = bp->b_flags; 1978Sbill bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 1988Sbill bp->b_bcount = BSIZE; 1998Sbill #ifdef DISKMON 2008Sbill io_info.nwrite++; 2018Sbill #endif 2028Sbill if ((flag&B_DELWRI) == 0) 2038Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 2043199Swnj #ifdef TRACE 2054033Swnj trace(TR_BWRITE, bp->b_dev, bp->b_blkno); 2062045Swnj #endif 2078Sbill (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 2088Sbill if ((flag&B_ASYNC) == 0) { 2098Sbill iowait(bp); 2108Sbill brelse(bp); 2118Sbill } else if (flag & B_DELWRI) 2128Sbill bp->b_flags |= B_AGE; 2138Sbill else 2148Sbill geterror(bp); 2158Sbill } 2168Sbill 2178Sbill /* 2188Sbill * Release the buffer, marking it so that if it is grabbed 2198Sbill * for another purpose it will be written out before being 2208Sbill * given up (e.g. when writing a partial block where it is 2218Sbill * assumed that another write for the same block will soon follow). 2228Sbill * This can't be done for magtape, since writes must be done 2238Sbill * in the same order as requested. 2248Sbill */ 2258Sbill bdwrite(bp) 2268Sbill register struct buf *bp; 2278Sbill { 2282403Skre register int flags; 2298Sbill 2308Sbill if ((bp->b_flags&B_DELWRI) == 0) 2318Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 2322403Skre flags = bdevsw[major(bp->b_dev)].d_flags; 2332403Skre if(flags & B_TAPE) 2348Sbill bawrite(bp); 2358Sbill else { 2368Sbill bp->b_flags |= B_DELWRI | B_DONE; 2378Sbill brelse(bp); 2388Sbill } 2398Sbill } 2408Sbill 2418Sbill /* 2428Sbill * Release the buffer, start I/O on it, but don't wait for completion. 2438Sbill */ 2448Sbill bawrite(bp) 2458Sbill register struct buf *bp; 2468Sbill { 2478Sbill 2488Sbill bp->b_flags |= B_ASYNC; 2498Sbill bwrite(bp); 2508Sbill } 2518Sbill 2528Sbill /* 2538Sbill * release the buffer, with no I/O implied. 2548Sbill */ 2558Sbill brelse(bp) 2568Sbill register struct buf *bp; 2578Sbill { 2582325Swnj register struct buf *flist; 2598Sbill register s; 2608Sbill 2618Sbill if (bp->b_flags&B_WANTED) 2628Sbill wakeup((caddr_t)bp); 2632325Swnj if (bfreelist[0].b_flags&B_WANTED) { 2642325Swnj bfreelist[0].b_flags &= ~B_WANTED; 2652325Swnj wakeup((caddr_t)bfreelist); 2668Sbill } 2672683Swnj if (bp->b_flags&B_ERROR) 2682683Swnj if (bp->b_flags & B_LOCKED) 2692683Swnj bp->b_flags &= ~B_ERROR; /* try again later */ 2702683Swnj else 2712683Swnj bp->b_dev = NODEV; /* no assoc */ 2728Sbill s = spl6(); 2732325Swnj if (bp->b_flags & (B_ERROR|B_INVAL)) { 2742325Swnj /* block has no info ... put at front of most free list */ 2752325Swnj flist = &bfreelist[BQUEUES-1]; 2762325Swnj flist->av_forw->av_back = bp; 2772325Swnj bp->av_forw = flist->av_forw; 2782325Swnj flist->av_forw = bp; 2792325Swnj bp->av_back = flist; 2808Sbill } else { 2812325Swnj if (bp->b_flags & B_LOCKED) 2822325Swnj flist = &bfreelist[BQ_LOCKED]; 2832325Swnj else if (bp->b_flags & B_AGE) 2842325Swnj flist = &bfreelist[BQ_AGE]; 2852325Swnj else 2862325Swnj flist = &bfreelist[BQ_LRU]; 2872325Swnj flist->av_back->av_forw = bp; 2882325Swnj bp->av_back = flist->av_back; 2892325Swnj flist->av_back = bp; 2902325Swnj bp->av_forw = flist; 2918Sbill } 2928Sbill bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 2938Sbill splx(s); 2948Sbill } 2958Sbill 2968Sbill /* 2978Sbill * See if the block is associated with some buffer 2988Sbill * (mainly to avoid getting hung up on a wait in breada) 2998Sbill */ 3008Sbill incore(dev, blkno) 3018Sbill dev_t dev; 3028Sbill daddr_t blkno; 3038Sbill { 3048Sbill register struct buf *bp; 3052325Swnj register struct buf *dp; 3068Sbill register int dblkno = fsbtodb(blkno); 3078Sbill 3082328Swnj dp = BUFHASH(dev, dblkno); 3092325Swnj for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 3102325Swnj if (bp->b_blkno == dblkno && bp->b_dev == dev && 3112325Swnj !(bp->b_flags & B_INVAL)) 31291Sbill return (1); 31391Sbill return (0); 3148Sbill } 3158Sbill 3168Sbill struct buf * 3178Sbill baddr(dev, blkno) 3188Sbill dev_t dev; 3198Sbill daddr_t blkno; 3208Sbill { 3218Sbill 3228Sbill if (incore(dev, blkno)) 3238Sbill return (bread(dev, blkno)); 3248Sbill return (0); 3258Sbill } 3268Sbill 3278Sbill /* 3288Sbill * Assign a buffer for the given block. If the appropriate 3298Sbill * block is already associated, return it; otherwise search 3308Sbill * for the oldest non-busy buffer and reassign it. 3315424Swnj * 3325424Swnj * We use splx here because this routine may be called 3335424Swnj * on the interrupt stack during a dump, and we don't 3345424Swnj * want to lower the ipl back to 0. 3358Sbill */ 3368Sbill struct buf * 3378Sbill getblk(dev, blkno) 3388Sbill dev_t dev; 3398Sbill daddr_t blkno; 3408Sbill { 34191Sbill register struct buf *bp, *dp, *ep; 3422325Swnj register int dblkno = fsbtodb(blkno); 3432423Skre #ifdef DISKMON 3442423Skre register int i; 3452423Skre #endif 3465424Swnj int s; 3478Sbill 3481831Sbill if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) 3491831Sbill blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 3501831Sbill dblkno = fsbtodb(blkno); 3512325Swnj dp = BUFHASH(dev, dblkno); 3528Sbill loop: 3532325Swnj for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 3542325Swnj if (bp->b_blkno != dblkno || bp->b_dev != dev || 3552325Swnj bp->b_flags&B_INVAL) 3568Sbill continue; 3575424Swnj s = spl6(); 3588Sbill if (bp->b_flags&B_BUSY) { 3598Sbill bp->b_flags |= B_WANTED; 3608Sbill sleep((caddr_t)bp, PRIBIO+1); 3615424Swnj splx(s); 3628Sbill goto loop; 3638Sbill } 3645424Swnj splx(s); 3658Sbill #ifdef DISKMON 3668Sbill i = 0; 3678Sbill dp = bp->av_forw; 3682325Swnj while ((dp->b_flags & B_HEAD) == 0) { 3698Sbill i++; 3708Sbill dp = dp->av_forw; 3718Sbill } 3722771Swnj if (i<64) 3738Sbill io_info.bufcount[i]++; 3748Sbill #endif 3758Sbill notavail(bp); 3768Sbill bp->b_flags |= B_CACHE; 3778Sbill return(bp); 3788Sbill } 37991Sbill if (major(dev) >= nblkdev) 38091Sbill panic("blkdev"); 3815424Swnj s = spl6(); 3822325Swnj for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--) 3832325Swnj if (ep->av_forw != ep) 3842325Swnj break; 3852325Swnj if (ep == bfreelist) { /* no free blocks at all */ 3862325Swnj ep->b_flags |= B_WANTED; 3872325Swnj sleep((caddr_t)ep, PRIBIO+1); 3885424Swnj splx(s); 3898Sbill goto loop; 3908Sbill } 3915424Swnj splx(s); 3922325Swnj bp = ep->av_forw; 3938Sbill notavail(bp); 3948Sbill if (bp->b_flags & B_DELWRI) { 3958Sbill bp->b_flags |= B_ASYNC; 3968Sbill bwrite(bp); 3978Sbill goto loop; 3988Sbill } 3993199Swnj #ifdef TRACE 4004033Swnj trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 4012045Swnj #endif 4028Sbill bp->b_flags = B_BUSY; 4038Sbill bp->b_back->b_forw = bp->b_forw; 4048Sbill bp->b_forw->b_back = bp->b_back; 4058Sbill bp->b_forw = dp->b_forw; 4068Sbill bp->b_back = dp; 4078Sbill dp->b_forw->b_back = bp; 4088Sbill dp->b_forw = bp; 4098Sbill bp->b_dev = dev; 4108Sbill bp->b_blkno = dblkno; 4118Sbill return(bp); 4128Sbill } 4138Sbill 4148Sbill /* 4158Sbill * get an empty block, 4168Sbill * not assigned to any particular device 4178Sbill */ 4188Sbill struct buf * 4198Sbill geteblk() 4208Sbill { 421182Sbill register struct buf *bp, *dp; 4225431Sroot int s; 4238Sbill 4248Sbill loop: 4255431Sroot s = spl6(); 4262325Swnj for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--) 4272325Swnj if (dp->av_forw != dp) 4282325Swnj break; 4292325Swnj if (dp == bfreelist) { /* no free blocks */ 4302325Swnj dp->b_flags |= B_WANTED; 4312325Swnj sleep((caddr_t)dp, PRIBIO+1); 4322325Swnj goto loop; 4338Sbill } 4345431Sroot splx(s); 4352325Swnj bp = dp->av_forw; 4368Sbill notavail(bp); 4378Sbill if (bp->b_flags & B_DELWRI) { 4388Sbill bp->b_flags |= B_ASYNC; 4398Sbill bwrite(bp); 4408Sbill goto loop; 4418Sbill } 4423199Swnj #ifdef TRACE 4434033Swnj trace(TR_BRELSE, bp->b_dev, bp->b_blkno); 4442045Swnj #endif 4452325Swnj bp->b_flags = B_BUSY|B_INVAL; 4468Sbill bp->b_back->b_forw = bp->b_forw; 4478Sbill bp->b_forw->b_back = bp->b_back; 4488Sbill bp->b_forw = dp->b_forw; 4498Sbill bp->b_back = dp; 4508Sbill dp->b_forw->b_back = bp; 4518Sbill dp->b_forw = bp; 4528Sbill bp->b_dev = (dev_t)NODEV; 4538Sbill return(bp); 4548Sbill } 4558Sbill 4568Sbill /* 4578Sbill * Wait for I/O completion on the buffer; return errors 4588Sbill * to the user. 4598Sbill */ 4608Sbill iowait(bp) 4618Sbill register struct buf *bp; 4628Sbill { 4635431Sroot int s; 4648Sbill 4655431Sroot s = spl6(); 4668Sbill while ((bp->b_flags&B_DONE)==0) 4678Sbill sleep((caddr_t)bp, PRIBIO); 4685431Sroot splx(s); 4698Sbill geterror(bp); 4708Sbill } 4718Sbill 4722706Swnj #ifdef UNFAST 4738Sbill /* 4748Sbill * Unlink a buffer from the available list and mark it busy. 4758Sbill * (internal interface) 4768Sbill */ 4778Sbill notavail(bp) 4788Sbill register struct buf *bp; 4798Sbill { 4808Sbill register s; 4818Sbill 4828Sbill s = spl6(); 4838Sbill bp->av_back->av_forw = bp->av_forw; 4848Sbill bp->av_forw->av_back = bp->av_back; 4858Sbill bp->b_flags |= B_BUSY; 4868Sbill splx(s); 4878Sbill } 4888Sbill #endif 4898Sbill 4908Sbill /* 4918Sbill * Mark I/O complete on a buffer. If the header 4928Sbill * indicates a dirty page push completion, the 4938Sbill * header is inserted into the ``cleaned'' list 4948Sbill * to be processed by the pageout daemon. Otherwise 4958Sbill * release it if I/O is asynchronous, and wake 4968Sbill * up anyone waiting for it. 4978Sbill */ 4988Sbill iodone(bp) 4998Sbill register struct buf *bp; 5008Sbill { 5018Sbill register int s; 5028Sbill 503420Sbill if (bp->b_flags & B_DONE) 504420Sbill panic("dup iodone"); 5058Sbill bp->b_flags |= B_DONE; 5068Sbill if (bp->b_flags & B_DIRTY) { 5078Sbill if (bp->b_flags & B_ERROR) 5088Sbill panic("IO err in push"); 5098Sbill s = spl6(); 5108Sbill bp->av_forw = bclnlist; 5118Sbill bp->b_bcount = swsize[bp - swbuf]; 5128Sbill bp->b_pfcent = swpf[bp - swbuf]; 5133601Swnj cnt.v_pgout++; 5143601Swnj cnt.v_pgpgout += bp->b_bcount / NBPG; 5158Sbill bclnlist = bp; 5168Sbill if (bswlist.b_flags & B_WANTED) 5178Sbill wakeup((caddr_t)&proc[2]); 5188Sbill splx(s); 519383Sbill return; 5208Sbill } 5218Sbill if (bp->b_flags&B_ASYNC) 5228Sbill brelse(bp); 5238Sbill else { 5248Sbill bp->b_flags &= ~B_WANTED; 5258Sbill wakeup((caddr_t)bp); 5268Sbill } 5278Sbill } 5288Sbill 5298Sbill /* 5308Sbill * Zero the core associated with a buffer. 5318Sbill */ 5328Sbill clrbuf(bp) 5338Sbill struct buf *bp; 5348Sbill { 5358Sbill register *p; 5368Sbill register c; 5378Sbill 5388Sbill p = bp->b_un.b_words; 5398Sbill c = BSIZE/sizeof(int); 5408Sbill do 5418Sbill *p++ = 0; 5428Sbill while (--c); 5438Sbill bp->b_resid = 0; 5448Sbill } 5458Sbill 5468Sbill /* 5478Sbill * swap I/O - 5488Sbill * 5498Sbill * If the flag indicates a dirty page push initiated 5508Sbill * by the pageout daemon, we map the page into the i th 5518Sbill * virtual page of process 2 (the daemon itself) where i is 5528Sbill * the index of the swap header that has been allocated. 5538Sbill * We simply initialize the header and queue the I/O but 5548Sbill * do not wait for completion. When the I/O completes, 5558Sbill * iodone() will link the header to a list of cleaned 5568Sbill * pages to be processed by the pageout daemon. 5578Sbill */ 5588Sbill swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) 5598Sbill struct proc *p; 5608Sbill swblk_t dblkno; 5618Sbill caddr_t addr; 5628Sbill int flag, nbytes; 5638Sbill dev_t dev; 5648Sbill unsigned pfcent; 5658Sbill { 5668Sbill register struct buf *bp; 5678Sbill register int c; 5688Sbill int p2dp; 5698Sbill register struct pte *dpte, *vpte; 5705431Sroot int s; 5718Sbill 5725431Sroot s = spl6(); 5738Sbill while (bswlist.av_forw == NULL) { 5748Sbill bswlist.b_flags |= B_WANTED; 5758Sbill sleep((caddr_t)&bswlist, PSWP+1); 5768Sbill } 5778Sbill bp = bswlist.av_forw; 5788Sbill bswlist.av_forw = bp->av_forw; 5795431Sroot splx(s); 5808Sbill 5818Sbill bp->b_flags = B_BUSY | B_PHYS | rdflg | flag; 5828Sbill if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0) 5838Sbill if (rdflg == B_READ) 5848Sbill sum.v_pswpin += btoc(nbytes); 5858Sbill else 5868Sbill sum.v_pswpout += btoc(nbytes); 5878Sbill bp->b_proc = p; 5888Sbill if (flag & B_DIRTY) { 5898Sbill p2dp = ((bp - swbuf) * CLSIZE) * KLMAX; 5908Sbill dpte = dptopte(&proc[2], p2dp); 5918Sbill vpte = vtopte(p, btop(addr)); 5928Sbill for (c = 0; c < nbytes; c += NBPG) { 5938Sbill if (vpte->pg_pfnum == 0 || vpte->pg_fod) 5948Sbill panic("swap bad pte"); 5958Sbill *dpte++ = *vpte++; 5968Sbill } 5978Sbill bp->b_un.b_addr = (caddr_t)ctob(p2dp); 5988Sbill } else 5998Sbill bp->b_un.b_addr = addr; 6008Sbill while (nbytes > 0) { 6018Sbill c = imin(ctob(120), nbytes); 6028Sbill bp->b_bcount = c; 6038Sbill bp->b_blkno = dblkno; 6048Sbill bp->b_dev = dev; 605718Sbill if (flag & B_DIRTY) { 606718Sbill swpf[bp - swbuf] = pfcent; 607718Sbill swsize[bp - swbuf] = nbytes; 608718Sbill } 6094033Swnj #ifdef TRACE 6104033Swnj trace(TR_SWAPIO, dev, bp->b_blkno); 6114033Swnj #endif 6128Sbill (*bdevsw[major(dev)].d_strategy)(bp); 6138Sbill if (flag & B_DIRTY) { 6148Sbill if (c < nbytes) 6158Sbill panic("big push"); 6168Sbill return; 6178Sbill } 6185431Sroot s = spl6(); 6198Sbill while((bp->b_flags&B_DONE)==0) 6208Sbill sleep((caddr_t)bp, PSWP); 6215431Sroot splx(s); 6228Sbill bp->b_un.b_addr += c; 6238Sbill bp->b_flags &= ~B_DONE; 6248Sbill if (bp->b_flags & B_ERROR) { 6258Sbill if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE) 6268Sbill panic("hard IO err in swap"); 6278Sbill swkill(p, (char *)0); 6288Sbill } 6298Sbill nbytes -= c; 6308Sbill dblkno += btoc(c); 6318Sbill } 6325431Sroot s = spl6(); 6338Sbill bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 6348Sbill bp->av_forw = bswlist.av_forw; 6358Sbill bswlist.av_forw = bp; 6368Sbill if (bswlist.b_flags & B_WANTED) { 6378Sbill bswlist.b_flags &= ~B_WANTED; 6388Sbill wakeup((caddr_t)&bswlist); 6398Sbill wakeup((caddr_t)&proc[2]); 6408Sbill } 6415431Sroot splx(s); 6428Sbill } 6438Sbill 6448Sbill /* 6458Sbill * If rout == 0 then killed on swap error, else 6468Sbill * rout is the name of the routine where we ran out of 6478Sbill * swap space. 6488Sbill */ 6498Sbill swkill(p, rout) 6508Sbill struct proc *p; 6518Sbill char *rout; 6528Sbill { 6532922Swnj char *mesg; 6548Sbill 6552922Swnj printf("pid %d: ", p->p_pid); 6568Sbill if (rout) 6572922Swnj printf(mesg = "killed due to no swap space\n"); 6588Sbill else 6592922Swnj printf(mesg = "killed on swap error\n"); 6602922Swnj uprintf("sorry, pid %d was %s", p->p_pid, mesg); 6618Sbill /* 6628Sbill * To be sure no looping (e.g. in vmsched trying to 6638Sbill * swap out) mark process locked in core (as though 6648Sbill * done by user) after killing it so noone will try 6658Sbill * to swap it out. 6668Sbill */ 667165Sbill psignal(p, SIGKILL); 6688Sbill p->p_flag |= SULOCK; 6698Sbill } 6708Sbill 6718Sbill /* 6728Sbill * make sure all write-behind blocks 6738Sbill * on dev (or NODEV for all) 6748Sbill * are flushed out. 6758Sbill * (from umount and update) 6768Sbill */ 6778Sbill bflush(dev) 6788Sbill dev_t dev; 6798Sbill { 6808Sbill register struct buf *bp; 6812325Swnj register struct buf *flist; 6825431Sroot int s; 6838Sbill 6848Sbill loop: 6855431Sroot s = spl6(); 6862325Swnj for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++) 6872325Swnj for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 6888Sbill if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) { 6898Sbill bp->b_flags |= B_ASYNC; 6908Sbill notavail(bp); 6918Sbill bwrite(bp); 6928Sbill goto loop; 6938Sbill } 6948Sbill } 6955431Sroot splx(s); 6968Sbill } 6978Sbill 6988Sbill /* 6998Sbill * Raw I/O. The arguments are 7008Sbill * The strategy routine for the device 7018Sbill * A buffer, which will always be a special buffer 7028Sbill * header owned exclusively by the device for this purpose 7038Sbill * The device number 7048Sbill * Read/write flag 7058Sbill * Essentially all the work is computing physical addresses and 7068Sbill * validating them. 7078Sbill * If the user has the proper access privilidges, the process is 7088Sbill * marked 'delayed unlock' and the pages involved in the I/O are 7098Sbill * faulted and locked. After the completion of the I/O, the above pages 7108Sbill * are unlocked. 7118Sbill */ 7128Sbill physio(strat, bp, dev, rw, mincnt) 7138Sbill int (*strat)(); 7148Sbill register struct buf *bp; 7158Sbill unsigned (*mincnt)(); 7168Sbill { 7178Sbill register int c; 7188Sbill char *a; 7195431Sroot int s; 7208Sbill 7218Sbill if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) { 7228Sbill u.u_error = EFAULT; 7238Sbill return; 7248Sbill } 7255431Sroot s = spl6(); 7268Sbill while (bp->b_flags&B_BUSY) { 7278Sbill bp->b_flags |= B_WANTED; 7288Sbill sleep((caddr_t)bp, PRIBIO+1); 7298Sbill } 7308Sbill bp->b_error = 0; 7318Sbill bp->b_proc = u.u_procp; 7328Sbill bp->b_un.b_addr = u.u_base; 7333667Swnj while (u.u_count != 0) { 7348Sbill bp->b_flags = B_BUSY | B_PHYS | rw; 7358Sbill bp->b_dev = dev; 7368Sbill bp->b_blkno = u.u_offset >> PGSHIFT; 7378Sbill bp->b_bcount = u.u_count; 7388Sbill (*mincnt)(bp); 7398Sbill c = bp->b_bcount; 7408Sbill u.u_procp->p_flag |= SPHYSIO; 7418Sbill vslock(a = bp->b_un.b_addr, c); 7428Sbill (*strat)(bp); 743124Sbill (void) spl6(); 7448Sbill while ((bp->b_flags&B_DONE) == 0) 7458Sbill sleep((caddr_t)bp, PRIBIO); 7468Sbill vsunlock(a, c, rw); 7478Sbill u.u_procp->p_flag &= ~SPHYSIO; 7488Sbill if (bp->b_flags&B_WANTED) 7498Sbill wakeup((caddr_t)bp); 7505431Sroot splx(s); 7518Sbill bp->b_un.b_addr += c; 7528Sbill u.u_count -= c; 7538Sbill u.u_offset += c; 7543667Swnj if (bp->b_flags&B_ERROR) 7553667Swnj break; 7568Sbill } 7578Sbill bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS); 7588Sbill u.u_count = bp->b_resid; 7598Sbill geterror(bp); 7608Sbill } 7618Sbill 7628Sbill /*ARGSUSED*/ 7638Sbill unsigned 7648Sbill minphys(bp) 7658Sbill struct buf *bp; 7668Sbill { 7678Sbill 7688Sbill if (bp->b_bcount > 60 * 1024) 7698Sbill bp->b_bcount = 60 * 1024; 7708Sbill } 7718Sbill 7728Sbill /* 7738Sbill * Pick up the device's error number and pass it to the user; 7748Sbill * if there is an error but the number is 0 set a generalized 7758Sbill * code. Actually the latter is always true because devices 7768Sbill * don't yet return specific errors. 7778Sbill */ 7788Sbill geterror(bp) 7798Sbill register struct buf *bp; 7808Sbill { 7818Sbill 7828Sbill if (bp->b_flags&B_ERROR) 7838Sbill if ((u.u_error = bp->b_error)==0) 7848Sbill u.u_error = EIO; 7858Sbill } 7862299Skre 7872299Skre /* 7882299Skre * Invalidate in core blocks belonging to closed or umounted filesystem 7892299Skre * 7902299Skre * This is not nicely done at all - the buffer ought to be removed from the 7912299Skre * hash chains & have its dev/blkno fields clobbered, but unfortunately we 7922299Skre * can't do that here, as it is quite possible that the block is still 7932299Skre * being used for i/o. Eventually, all disc drivers should be forced to 7942299Skre * have a close routine, which ought ensure that the queue is empty, then 7952299Skre * properly flush the queues. Until that happy day, this suffices for 7962299Skre * correctness. ... kre 7972299Skre */ 7982299Skre binval(dev) 7992299Skre dev_t dev; 8002299Skre { 8012361Skre register struct buf *bp; 8022361Skre register struct bufhd *hp; 8032361Skre #define dp ((struct buf *)hp) 8042299Skre 8052361Skre for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 8062361Skre for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 8072361Skre if (bp->b_dev == dev) 8082361Skre bp->b_flags |= B_INVAL; 8092299Skre } 810