1*2045Swnj /* vfs_cluster.c 4.4 12/26/80 */ 28Sbill 38Sbill #include "../h/param.h" 48Sbill #include "../h/systm.h" 58Sbill #include "../h/dir.h" 68Sbill #include "../h/user.h" 78Sbill #include "../h/buf.h" 88Sbill #include "../h/conf.h" 98Sbill #include "../h/proc.h" 108Sbill #include "../h/seg.h" 118Sbill #include "../h/pte.h" 128Sbill #include "../h/vm.h" 13*2045Swnj #include "../h/trace.h" 148Sbill 1591Sbill /* 1691Sbill * The following several routines allocate and free 1791Sbill * buffers with various side effects. In general the 1891Sbill * arguments to an allocate routine are a device and 1991Sbill * a block number, and the value is a pointer to 2091Sbill * to the buffer header; the buffer is marked "busy" 2191Sbill * so that no one else can touch it. If the block was 2291Sbill * already in core, no I/O need be done; if it is 2391Sbill * already busy, the process waits until it becomes free. 2491Sbill * The following routines allocate a buffer: 2591Sbill * getblk 2691Sbill * bread 2791Sbill * breada 2891Sbill * baddr (if it is incore) 2991Sbill * Eventually the buffer must be released, possibly with the 3091Sbill * side effect of writing it out, by using one of 3191Sbill * bwrite 3291Sbill * bdwrite 3391Sbill * bawrite 3491Sbill * brelse 3591Sbill */ 3691Sbill 3791Sbill #define BUFHSZ 63 3891Sbill #define BUFHASH(blkno) (blkno % BUFHSZ) 3991Sbill short bufhash[BUFHSZ]; 4091Sbill 4191Sbill /* 4291Sbill * Initialize hash links for buffers. 4391Sbill */ 4491Sbill bhinit() 4591Sbill { 4691Sbill register int i; 4791Sbill 4891Sbill for (i = 0; i < BUFHSZ; i++) 4991Sbill bufhash[i] = -1; 5091Sbill } 5191Sbill 528Sbill /* #define DISKMON 1 */ 538Sbill 548Sbill #ifdef DISKMON 558Sbill struct { 568Sbill int nbuf; 578Sbill long nread; 588Sbill long nreada; 598Sbill long ncache; 608Sbill long nwrite; 618Sbill long bufcount[NBUF]; 628Sbill } io_info; 638Sbill #endif 648Sbill 658Sbill /* 668Sbill * Swap IO headers - 678Sbill * They contain the necessary information for the swap I/O. 688Sbill * At any given time, a swap header can be in three 698Sbill * different lists. When free it is in the free list, 708Sbill * when allocated and the I/O queued, it is on the swap 718Sbill * device list, and finally, if the operation was a dirty 728Sbill * page push, when the I/O completes, it is inserted 738Sbill * in a list of cleaned pages to be processed by the pageout daemon. 748Sbill */ 758Sbill struct buf swbuf[NSWBUF]; 768Sbill short swsize[NSWBUF]; /* CAN WE JUST USE B_BCOUNT? */ 778Sbill int swpf[NSWBUF]; 788Sbill 798Sbill 808Sbill #ifdef FASTVAX 818Sbill #define notavail(bp) \ 828Sbill { \ 838Sbill int s = spl6(); \ 848Sbill (bp)->av_back->av_forw = (bp)->av_forw; \ 858Sbill (bp)->av_forw->av_back = (bp)->av_back; \ 868Sbill (bp)->b_flags |= B_BUSY; \ 878Sbill splx(s); \ 888Sbill } 898Sbill #endif 908Sbill 918Sbill /* 928Sbill * Read in (if necessary) the block and return a buffer pointer. 938Sbill */ 948Sbill struct buf * 958Sbill bread(dev, blkno) 968Sbill dev_t dev; 978Sbill daddr_t blkno; 988Sbill { 998Sbill register struct buf *bp; 1008Sbill 1018Sbill bp = getblk(dev, blkno); 1028Sbill if (bp->b_flags&B_DONE) { 103*2045Swnj #ifdef EPAWNJ 104*2045Swnj trace(TR_BREAD|TR_HIT, dev, blkno); 105*2045Swnj #endif 1068Sbill #ifdef DISKMON 1078Sbill io_info.ncache++; 1088Sbill #endif 1098Sbill return(bp); 1108Sbill } 1118Sbill bp->b_flags |= B_READ; 1128Sbill bp->b_bcount = BSIZE; 1138Sbill (*bdevsw[major(dev)].d_strategy)(bp); 114*2045Swnj #ifdef EPAWNJ 115*2045Swnj trace(TR_BREAD|TR_MISS, dev, blkno); 116*2045Swnj #endif 1178Sbill #ifdef DISKMON 1188Sbill io_info.nread++; 1198Sbill #endif 1208Sbill u.u_vm.vm_inblk++; /* pay for read */ 1218Sbill iowait(bp); 1228Sbill return(bp); 1238Sbill } 1248Sbill 1258Sbill /* 1268Sbill * Read in the block, like bread, but also start I/O on the 1278Sbill * read-ahead block (which is not allocated to the caller) 1288Sbill */ 1298Sbill struct buf * 1308Sbill breada(dev, blkno, rablkno) 1318Sbill dev_t dev; 1328Sbill daddr_t blkno, rablkno; 1338Sbill { 1348Sbill register struct buf *bp, *rabp; 1358Sbill 1368Sbill bp = NULL; 1378Sbill if (!incore(dev, blkno)) { 1388Sbill bp = getblk(dev, blkno); 1398Sbill if ((bp->b_flags&B_DONE) == 0) { 1408Sbill bp->b_flags |= B_READ; 1418Sbill bp->b_bcount = BSIZE; 1428Sbill (*bdevsw[major(dev)].d_strategy)(bp); 143*2045Swnj #ifdef EPAWNJ 144*2045Swnj trace(TR_BREAD|TR_MISS, dev, blkno); 145*2045Swnj #endif 1468Sbill #ifdef DISKMON 1478Sbill io_info.nread++; 1488Sbill #endif 1498Sbill u.u_vm.vm_inblk++; /* pay for read */ 1508Sbill } 151*2045Swnj #ifdef EPAWNJ 152*2045Swnj else 153*2045Swnj trace(TR_BREAD|TR_HIT, dev, blkno); 154*2045Swnj #endif 1558Sbill } 1568Sbill if (rablkno && !incore(dev, rablkno)) { 1578Sbill rabp = getblk(dev, rablkno); 158*2045Swnj if (rabp->b_flags & B_DONE) { 1598Sbill brelse(rabp); 160*2045Swnj #ifdef EPAWNJ 161*2045Swnj trace(TR_BREAD|TR_HIT|TR_RA, dev, blkno); 162*2045Swnj #endif 163*2045Swnj } else { 1648Sbill rabp->b_flags |= B_READ|B_ASYNC; 1658Sbill rabp->b_bcount = BSIZE; 1668Sbill (*bdevsw[major(dev)].d_strategy)(rabp); 167*2045Swnj #ifdef EPAWNJ 168*2045Swnj trace(TR_BREAD|TR_MISS|TR_RA, dev, rablock); 169*2045Swnj #endif 1708Sbill #ifdef DISKMON 1718Sbill io_info.nreada++; 1728Sbill #endif 1738Sbill u.u_vm.vm_inblk++; /* pay in advance */ 1748Sbill } 1758Sbill } 1768Sbill if(bp == NULL) 1778Sbill return(bread(dev, blkno)); 1788Sbill iowait(bp); 1798Sbill return(bp); 1808Sbill } 1818Sbill 1828Sbill /* 1838Sbill * Write the buffer, waiting for completion. 1848Sbill * Then release the buffer. 1858Sbill */ 1868Sbill bwrite(bp) 1878Sbill register struct buf *bp; 1888Sbill { 1898Sbill register flag; 1908Sbill 1918Sbill flag = bp->b_flags; 1928Sbill bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 1938Sbill bp->b_bcount = BSIZE; 1948Sbill #ifdef DISKMON 1958Sbill io_info.nwrite++; 1968Sbill #endif 1978Sbill if ((flag&B_DELWRI) == 0) 1988Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 199*2045Swnj #ifdef EPAWNJ 200*2045Swnj trace(TR_BWRITE, bp->b_dev, dbtofsb(bp->b_blkno)); 201*2045Swnj #endif 2028Sbill (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 2038Sbill if ((flag&B_ASYNC) == 0) { 2048Sbill iowait(bp); 2058Sbill brelse(bp); 2068Sbill } else if (flag & B_DELWRI) 2078Sbill bp->b_flags |= B_AGE; 2088Sbill else 2098Sbill geterror(bp); 2108Sbill } 2118Sbill 2128Sbill /* 2138Sbill * Release the buffer, marking it so that if it is grabbed 2148Sbill * for another purpose it will be written out before being 2158Sbill * given up (e.g. when writing a partial block where it is 2168Sbill * assumed that another write for the same block will soon follow). 2178Sbill * This can't be done for magtape, since writes must be done 2188Sbill * in the same order as requested. 2198Sbill */ 2208Sbill bdwrite(bp) 2218Sbill register struct buf *bp; 2228Sbill { 2238Sbill register struct buf *dp; 2248Sbill 2258Sbill if ((bp->b_flags&B_DELWRI) == 0) 2268Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 2278Sbill dp = bdevsw[major(bp->b_dev)].d_tab; 2288Sbill if(dp->b_flags & B_TAPE) 2298Sbill bawrite(bp); 2308Sbill else { 2318Sbill bp->b_flags |= B_DELWRI | B_DONE; 2328Sbill brelse(bp); 2338Sbill } 2348Sbill } 2358Sbill 2368Sbill /* 2378Sbill * Release the buffer, start I/O on it, but don't wait for completion. 2388Sbill */ 2398Sbill bawrite(bp) 2408Sbill register struct buf *bp; 2418Sbill { 2428Sbill 2438Sbill bp->b_flags |= B_ASYNC; 2448Sbill bwrite(bp); 2458Sbill } 2468Sbill 2478Sbill /* 2488Sbill * release the buffer, with no I/O implied. 2498Sbill */ 2508Sbill brelse(bp) 2518Sbill register struct buf *bp; 2528Sbill { 2538Sbill register struct buf **backp; 2548Sbill register s; 2558Sbill 2568Sbill if (bp->b_flags&B_WANTED) 2578Sbill wakeup((caddr_t)bp); 2588Sbill if (bfreelist.b_flags&B_WANTED) { 2598Sbill bfreelist.b_flags &= ~B_WANTED; 2608Sbill wakeup((caddr_t)&bfreelist); 2618Sbill } 26291Sbill if ((bp->b_flags&B_ERROR) && bp->b_dev != NODEV) { 26391Sbill bunhash(bp); 2648Sbill bp->b_dev = NODEV; /* no assoc. on error */ 26591Sbill } 2668Sbill s = spl6(); 2678Sbill if(bp->b_flags & (B_AGE|B_ERROR)) { 2688Sbill backp = &bfreelist.av_forw; 2698Sbill (*backp)->av_back = bp; 2708Sbill bp->av_forw = *backp; 2718Sbill *backp = bp; 2728Sbill bp->av_back = &bfreelist; 2738Sbill } else { 2748Sbill backp = &bfreelist.av_back; 2758Sbill (*backp)->av_forw = bp; 2768Sbill bp->av_back = *backp; 2778Sbill *backp = bp; 2788Sbill bp->av_forw = &bfreelist; 2798Sbill } 2808Sbill bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 2818Sbill splx(s); 2828Sbill } 2838Sbill 2848Sbill /* 2858Sbill * See if the block is associated with some buffer 2868Sbill * (mainly to avoid getting hung up on a wait in breada) 2878Sbill */ 2888Sbill incore(dev, blkno) 2898Sbill dev_t dev; 2908Sbill daddr_t blkno; 2918Sbill { 2928Sbill register struct buf *bp; 2938Sbill register int dblkno = fsbtodb(blkno); 2948Sbill 29591Sbill for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1]; 29691Sbill bp = &buf[bp->b_hlink]) 29795Sbill if (bp->b_blkno == dblkno && bp->b_dev == dev) 29891Sbill return (1); 29991Sbill return (0); 3008Sbill } 3018Sbill 3028Sbill struct buf * 3038Sbill baddr(dev, blkno) 3048Sbill dev_t dev; 3058Sbill daddr_t blkno; 3068Sbill { 3078Sbill 3088Sbill if (incore(dev, blkno)) 3098Sbill return (bread(dev, blkno)); 3108Sbill return (0); 3118Sbill } 3128Sbill 3138Sbill /* 3148Sbill * Assign a buffer for the given block. If the appropriate 3158Sbill * block is already associated, return it; otherwise search 3168Sbill * for the oldest non-busy buffer and reassign it. 3178Sbill */ 3188Sbill struct buf * 3198Sbill getblk(dev, blkno) 3208Sbill dev_t dev; 3218Sbill daddr_t blkno; 3228Sbill { 32391Sbill register struct buf *bp, *dp, *ep; 3241831Sbill register int i, x, dblkno; 3258Sbill 3261831Sbill if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) 3271831Sbill blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 3281831Sbill dblkno = fsbtodb(blkno); 3298Sbill loop: 330124Sbill (void) spl0(); 33191Sbill for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1]; 33291Sbill bp = &buf[bp->b_hlink]) { 33391Sbill if (bp->b_blkno != dblkno || bp->b_dev != dev) 3348Sbill continue; 335124Sbill (void) spl6(); 3368Sbill if (bp->b_flags&B_BUSY) { 3378Sbill bp->b_flags |= B_WANTED; 3388Sbill sleep((caddr_t)bp, PRIBIO+1); 3398Sbill goto loop; 3408Sbill } 341124Sbill (void) spl0(); 3428Sbill #ifdef DISKMON 3438Sbill i = 0; 3448Sbill dp = bp->av_forw; 3458Sbill while (dp != &bfreelist) { 3468Sbill i++; 3478Sbill dp = dp->av_forw; 3488Sbill } 3498Sbill if (i<NBUF) 3508Sbill io_info.bufcount[i]++; 3518Sbill #endif 3528Sbill notavail(bp); 3538Sbill bp->b_flags |= B_CACHE; 3548Sbill return(bp); 3558Sbill } 35691Sbill if (major(dev) >= nblkdev) 35791Sbill panic("blkdev"); 35891Sbill dp = bdevsw[major(dev)].d_tab; 35991Sbill if (dp == NULL) 36091Sbill panic("devtab"); 361124Sbill (void) spl6(); 3628Sbill if (bfreelist.av_forw == &bfreelist) { 3638Sbill bfreelist.b_flags |= B_WANTED; 3648Sbill sleep((caddr_t)&bfreelist, PRIBIO+1); 3658Sbill goto loop; 3668Sbill } 3671792Sbill (void) spl0(); 3688Sbill bp = bfreelist.av_forw; 3698Sbill notavail(bp); 3708Sbill if (bp->b_flags & B_DELWRI) { 3718Sbill bp->b_flags |= B_ASYNC; 3728Sbill bwrite(bp); 3738Sbill goto loop; 3748Sbill } 37591Sbill if (bp->b_dev == NODEV) 37691Sbill goto done; 37791Sbill /* INLINE EXPANSION OF bunhash(bp) */ 378*2045Swnj #ifdef EPAWNJ 379*2045Swnj trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno)); 380*2045Swnj #endif 381884Sbill (void) spl6(); 38291Sbill i = BUFHASH(dbtofsb(bp->b_blkno)); 38391Sbill x = bp - buf; 38491Sbill if (bufhash[i] == x) { 38591Sbill bufhash[i] = bp->b_hlink; 38691Sbill } else { 38791Sbill for (ep = &buf[bufhash[i]]; ep != &buf[-1]; 38891Sbill ep = &buf[ep->b_hlink]) 38991Sbill if (ep->b_hlink == x) { 39091Sbill ep->b_hlink = bp->b_hlink; 39191Sbill goto done; 39291Sbill } 39391Sbill panic("getblk"); 39491Sbill } 39591Sbill done: 396884Sbill (void) spl0(); 39791Sbill /* END INLINE EXPANSION */ 3988Sbill bp->b_flags = B_BUSY; 3998Sbill bp->b_back->b_forw = bp->b_forw; 4008Sbill bp->b_forw->b_back = bp->b_back; 4018Sbill bp->b_forw = dp->b_forw; 4028Sbill bp->b_back = dp; 4038Sbill dp->b_forw->b_back = bp; 4048Sbill dp->b_forw = bp; 4058Sbill bp->b_dev = dev; 4068Sbill bp->b_blkno = dblkno; 40791Sbill i = BUFHASH(blkno); 40891Sbill bp->b_hlink = bufhash[i]; 40991Sbill bufhash[i] = bp - buf; 4108Sbill return(bp); 4118Sbill } 4128Sbill 4138Sbill /* 4148Sbill * get an empty block, 4158Sbill * not assigned to any particular device 4168Sbill */ 4178Sbill struct buf * 4188Sbill geteblk() 4198Sbill { 420182Sbill register struct buf *bp, *dp; 4218Sbill 4228Sbill loop: 423124Sbill (void) spl6(); 4248Sbill while (bfreelist.av_forw == &bfreelist) { 4258Sbill bfreelist.b_flags |= B_WANTED; 4268Sbill sleep((caddr_t)&bfreelist, PRIBIO+1); 4278Sbill } 428124Sbill (void) spl0(); 4298Sbill dp = &bfreelist; 4308Sbill bp = bfreelist.av_forw; 4318Sbill notavail(bp); 4328Sbill if (bp->b_flags & B_DELWRI) { 4338Sbill bp->b_flags |= B_ASYNC; 4348Sbill bwrite(bp); 4358Sbill goto loop; 4368Sbill } 437*2045Swnj if (bp->b_dev != NODEV) { 438*2045Swnj #ifdef EPAWNJ 439*2045Swnj trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno)); 440*2045Swnj #endif 44191Sbill bunhash(bp); 442*2045Swnj } 4438Sbill bp->b_flags = B_BUSY; 4448Sbill bp->b_back->b_forw = bp->b_forw; 4458Sbill bp->b_forw->b_back = bp->b_back; 4468Sbill bp->b_forw = dp->b_forw; 4478Sbill bp->b_back = dp; 4488Sbill dp->b_forw->b_back = bp; 4498Sbill dp->b_forw = bp; 4508Sbill bp->b_dev = (dev_t)NODEV; 45191Sbill bp->b_hlink = -1; 4528Sbill return(bp); 4538Sbill } 4548Sbill 45591Sbill bunhash(bp) 45691Sbill register struct buf *bp; 45791Sbill { 45891Sbill register struct buf *ep; 459884Sbill register int i, x, s; 46091Sbill 46191Sbill if (bp->b_dev == NODEV) 46291Sbill return; 463884Sbill s = spl6(); 46491Sbill i = BUFHASH(dbtofsb(bp->b_blkno)); 46591Sbill x = bp - buf; 46691Sbill if (bufhash[i] == x) { 46791Sbill bufhash[i] = bp->b_hlink; 468884Sbill goto ret; 46991Sbill } 47091Sbill for (ep = &buf[bufhash[i]]; ep != &buf[-1]; 47191Sbill ep = &buf[ep->b_hlink]) 47291Sbill if (ep->b_hlink == x) { 47391Sbill ep->b_hlink = bp->b_hlink; 474884Sbill goto ret; 47591Sbill } 47691Sbill panic("bunhash"); 477884Sbill ret: 478884Sbill splx(s); 47991Sbill } 48091Sbill 4818Sbill /* 4828Sbill * Wait for I/O completion on the buffer; return errors 4838Sbill * to the user. 4848Sbill */ 4858Sbill iowait(bp) 4868Sbill register struct buf *bp; 4878Sbill { 4888Sbill 489124Sbill (void) spl6(); 4908Sbill while ((bp->b_flags&B_DONE)==0) 4918Sbill sleep((caddr_t)bp, PRIBIO); 492124Sbill (void) spl0(); 4938Sbill geterror(bp); 4948Sbill } 4958Sbill 4968Sbill #ifndef FASTVAX 4978Sbill /* 4988Sbill * Unlink a buffer from the available list and mark it busy. 4998Sbill * (internal interface) 5008Sbill */ 5018Sbill notavail(bp) 5028Sbill register struct buf *bp; 5038Sbill { 5048Sbill register s; 5058Sbill 5068Sbill s = spl6(); 5078Sbill bp->av_back->av_forw = bp->av_forw; 5088Sbill bp->av_forw->av_back = bp->av_back; 5098Sbill bp->b_flags |= B_BUSY; 5108Sbill splx(s); 5118Sbill } 5128Sbill #endif 5138Sbill 5148Sbill /* 5158Sbill * Mark I/O complete on a buffer. If the header 5168Sbill * indicates a dirty page push completion, the 5178Sbill * header is inserted into the ``cleaned'' list 5188Sbill * to be processed by the pageout daemon. Otherwise 5198Sbill * release it if I/O is asynchronous, and wake 5208Sbill * up anyone waiting for it. 5218Sbill */ 5228Sbill iodone(bp) 5238Sbill register struct buf *bp; 5248Sbill { 5258Sbill register int s; 5268Sbill 527420Sbill if (bp->b_flags & B_DONE) 528420Sbill panic("dup iodone"); 5298Sbill bp->b_flags |= B_DONE; 5308Sbill if (bp->b_flags & B_DIRTY) { 5318Sbill if (bp->b_flags & B_ERROR) 5328Sbill panic("IO err in push"); 5338Sbill s = spl6(); 5348Sbill cnt.v_pgout++; 5358Sbill bp->av_forw = bclnlist; 5368Sbill bp->b_bcount = swsize[bp - swbuf]; 5378Sbill bp->b_pfcent = swpf[bp - swbuf]; 5388Sbill bclnlist = bp; 5398Sbill if (bswlist.b_flags & B_WANTED) 5408Sbill wakeup((caddr_t)&proc[2]); 5418Sbill splx(s); 542383Sbill return; 5438Sbill } 5448Sbill if (bp->b_flags&B_ASYNC) 5458Sbill brelse(bp); 5468Sbill else { 5478Sbill bp->b_flags &= ~B_WANTED; 5488Sbill wakeup((caddr_t)bp); 5498Sbill } 5508Sbill } 5518Sbill 5528Sbill /* 5538Sbill * Zero the core associated with a buffer. 5548Sbill */ 5558Sbill clrbuf(bp) 5568Sbill struct buf *bp; 5578Sbill { 5588Sbill register *p; 5598Sbill register c; 5608Sbill 5618Sbill p = bp->b_un.b_words; 5628Sbill c = BSIZE/sizeof(int); 5638Sbill do 5648Sbill *p++ = 0; 5658Sbill while (--c); 5668Sbill bp->b_resid = 0; 5678Sbill } 5688Sbill 5698Sbill /* 5708Sbill * swap I/O - 5718Sbill * 5728Sbill * If the flag indicates a dirty page push initiated 5738Sbill * by the pageout daemon, we map the page into the i th 5748Sbill * virtual page of process 2 (the daemon itself) where i is 5758Sbill * the index of the swap header that has been allocated. 5768Sbill * We simply initialize the header and queue the I/O but 5778Sbill * do not wait for completion. When the I/O completes, 5788Sbill * iodone() will link the header to a list of cleaned 5798Sbill * pages to be processed by the pageout daemon. 5808Sbill */ 5818Sbill swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) 5828Sbill struct proc *p; 5838Sbill swblk_t dblkno; 5848Sbill caddr_t addr; 5858Sbill int flag, nbytes; 5868Sbill dev_t dev; 5878Sbill unsigned pfcent; 5888Sbill { 5898Sbill register struct buf *bp; 5908Sbill register int c; 5918Sbill int p2dp; 5928Sbill register struct pte *dpte, *vpte; 5938Sbill 594124Sbill (void) spl6(); 5958Sbill while (bswlist.av_forw == NULL) { 5968Sbill bswlist.b_flags |= B_WANTED; 5978Sbill sleep((caddr_t)&bswlist, PSWP+1); 5988Sbill } 5998Sbill bp = bswlist.av_forw; 6008Sbill bswlist.av_forw = bp->av_forw; 601124Sbill (void) spl0(); 6028Sbill 6038Sbill bp->b_flags = B_BUSY | B_PHYS | rdflg | flag; 6048Sbill if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0) 6058Sbill if (rdflg == B_READ) 6068Sbill sum.v_pswpin += btoc(nbytes); 6078Sbill else 6088Sbill sum.v_pswpout += btoc(nbytes); 6098Sbill bp->b_proc = p; 6108Sbill if (flag & B_DIRTY) { 6118Sbill p2dp = ((bp - swbuf) * CLSIZE) * KLMAX; 6128Sbill dpte = dptopte(&proc[2], p2dp); 6138Sbill vpte = vtopte(p, btop(addr)); 6148Sbill for (c = 0; c < nbytes; c += NBPG) { 6158Sbill if (vpte->pg_pfnum == 0 || vpte->pg_fod) 6168Sbill panic("swap bad pte"); 6178Sbill *dpte++ = *vpte++; 6188Sbill } 6198Sbill bp->b_un.b_addr = (caddr_t)ctob(p2dp); 6208Sbill } else 6218Sbill bp->b_un.b_addr = addr; 6228Sbill while (nbytes > 0) { 6238Sbill c = imin(ctob(120), nbytes); 6248Sbill bp->b_bcount = c; 6258Sbill bp->b_blkno = dblkno; 6268Sbill bp->b_dev = dev; 627718Sbill if (flag & B_DIRTY) { 628718Sbill swpf[bp - swbuf] = pfcent; 629718Sbill swsize[bp - swbuf] = nbytes; 630718Sbill } 6318Sbill (*bdevsw[major(dev)].d_strategy)(bp); 6328Sbill if (flag & B_DIRTY) { 6338Sbill if (c < nbytes) 6348Sbill panic("big push"); 6358Sbill return; 6368Sbill } 637124Sbill (void) spl6(); 6388Sbill while((bp->b_flags&B_DONE)==0) 6398Sbill sleep((caddr_t)bp, PSWP); 640124Sbill (void) spl0(); 6418Sbill bp->b_un.b_addr += c; 6428Sbill bp->b_flags &= ~B_DONE; 6438Sbill if (bp->b_flags & B_ERROR) { 6448Sbill if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE) 6458Sbill panic("hard IO err in swap"); 6468Sbill swkill(p, (char *)0); 6478Sbill } 6488Sbill nbytes -= c; 6498Sbill dblkno += btoc(c); 6508Sbill } 651124Sbill (void) spl6(); 6528Sbill bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 6538Sbill bp->av_forw = bswlist.av_forw; 6548Sbill bswlist.av_forw = bp; 6558Sbill if (bswlist.b_flags & B_WANTED) { 6568Sbill bswlist.b_flags &= ~B_WANTED; 6578Sbill wakeup((caddr_t)&bswlist); 6588Sbill wakeup((caddr_t)&proc[2]); 6598Sbill } 660124Sbill (void) spl0(); 6618Sbill } 6628Sbill 6638Sbill /* 6648Sbill * If rout == 0 then killed on swap error, else 6658Sbill * rout is the name of the routine where we ran out of 6668Sbill * swap space. 6678Sbill */ 6688Sbill swkill(p, rout) 6698Sbill struct proc *p; 6708Sbill char *rout; 6718Sbill { 6728Sbill 6738Sbill printf("%d: ", p->p_pid); 6748Sbill if (rout) 6758Sbill printf("out of swap space in %s\n", rout); 6768Sbill else 6778Sbill printf("killed on swap error\n"); 6788Sbill /* 6798Sbill * To be sure no looping (e.g. in vmsched trying to 6808Sbill * swap out) mark process locked in core (as though 6818Sbill * done by user) after killing it so noone will try 6828Sbill * to swap it out. 6838Sbill */ 684165Sbill psignal(p, SIGKILL); 6858Sbill p->p_flag |= SULOCK; 6868Sbill } 6878Sbill 6888Sbill /* 6898Sbill * make sure all write-behind blocks 6908Sbill * on dev (or NODEV for all) 6918Sbill * are flushed out. 6928Sbill * (from umount and update) 6938Sbill */ 6948Sbill bflush(dev) 6958Sbill dev_t dev; 6968Sbill { 6978Sbill register struct buf *bp; 6988Sbill 6998Sbill loop: 700124Sbill (void) spl6(); 7018Sbill for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) { 7028Sbill if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) { 7038Sbill bp->b_flags |= B_ASYNC; 7048Sbill notavail(bp); 7058Sbill bwrite(bp); 7068Sbill goto loop; 7078Sbill } 7088Sbill } 709124Sbill (void) spl0(); 7108Sbill } 7118Sbill 7128Sbill /* 7138Sbill * Raw I/O. The arguments are 7148Sbill * The strategy routine for the device 7158Sbill * A buffer, which will always be a special buffer 7168Sbill * header owned exclusively by the device for this purpose 7178Sbill * The device number 7188Sbill * Read/write flag 7198Sbill * Essentially all the work is computing physical addresses and 7208Sbill * validating them. 7218Sbill * If the user has the proper access privilidges, the process is 7228Sbill * marked 'delayed unlock' and the pages involved in the I/O are 7238Sbill * faulted and locked. After the completion of the I/O, the above pages 7248Sbill * are unlocked. 7258Sbill */ 7268Sbill physio(strat, bp, dev, rw, mincnt) 7278Sbill int (*strat)(); 7288Sbill register struct buf *bp; 7298Sbill unsigned (*mincnt)(); 7308Sbill { 7318Sbill register int c; 7328Sbill char *a; 7338Sbill 7348Sbill if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) { 7358Sbill u.u_error = EFAULT; 7368Sbill return; 7378Sbill } 738124Sbill (void) spl6(); 7398Sbill while (bp->b_flags&B_BUSY) { 7408Sbill bp->b_flags |= B_WANTED; 7418Sbill sleep((caddr_t)bp, PRIBIO+1); 7428Sbill } 7438Sbill bp->b_error = 0; 7448Sbill bp->b_proc = u.u_procp; 7458Sbill bp->b_un.b_addr = u.u_base; 7468Sbill while (u.u_count != 0 && bp->b_error==0) { 7478Sbill bp->b_flags = B_BUSY | B_PHYS | rw; 7488Sbill bp->b_dev = dev; 7498Sbill bp->b_blkno = u.u_offset >> PGSHIFT; 7508Sbill bp->b_bcount = u.u_count; 7518Sbill (*mincnt)(bp); 7528Sbill c = bp->b_bcount; 7538Sbill u.u_procp->p_flag |= SPHYSIO; 7548Sbill vslock(a = bp->b_un.b_addr, c); 7558Sbill (*strat)(bp); 756124Sbill (void) spl6(); 7578Sbill while ((bp->b_flags&B_DONE) == 0) 7588Sbill sleep((caddr_t)bp, PRIBIO); 7598Sbill vsunlock(a, c, rw); 7608Sbill u.u_procp->p_flag &= ~SPHYSIO; 7618Sbill if (bp->b_flags&B_WANTED) 7628Sbill wakeup((caddr_t)bp); 763124Sbill (void) spl0(); 7648Sbill bp->b_un.b_addr += c; 7658Sbill u.u_count -= c; 7668Sbill u.u_offset += c; 7678Sbill } 7688Sbill bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS); 7698Sbill u.u_count = bp->b_resid; 7708Sbill geterror(bp); 7718Sbill } 7728Sbill 7738Sbill /*ARGSUSED*/ 7748Sbill unsigned 7758Sbill minphys(bp) 7768Sbill struct buf *bp; 7778Sbill { 7788Sbill 7798Sbill if (bp->b_bcount > 60 * 1024) 7808Sbill bp->b_bcount = 60 * 1024; 7818Sbill } 7828Sbill 7838Sbill /* 7848Sbill * Pick up the device's error number and pass it to the user; 7858Sbill * if there is an error but the number is 0 set a generalized 7868Sbill * code. Actually the latter is always true because devices 7878Sbill * don't yet return specific errors. 7888Sbill */ 7898Sbill geterror(bp) 7908Sbill register struct buf *bp; 7918Sbill { 7928Sbill 7938Sbill if (bp->b_flags&B_ERROR) 7948Sbill if ((u.u_error = bp->b_error)==0) 7958Sbill u.u_error = EIO; 7968Sbill } 797