1*1831Sbill /* vfs_cluster.c 4.3 11/24/80 */ 28Sbill 38Sbill #include "../h/param.h" 48Sbill #include "../h/systm.h" 58Sbill #include "../h/dir.h" 68Sbill #include "../h/user.h" 78Sbill #include "../h/buf.h" 88Sbill #include "../h/conf.h" 98Sbill #include "../h/proc.h" 108Sbill #include "../h/seg.h" 118Sbill #include "../h/pte.h" 128Sbill #include "../h/vm.h" 138Sbill 1491Sbill /* 1591Sbill * The following several routines allocate and free 1691Sbill * buffers with various side effects. In general the 1791Sbill * arguments to an allocate routine are a device and 1891Sbill * a block number, and the value is a pointer to 1991Sbill * to the buffer header; the buffer is marked "busy" 2091Sbill * so that no one else can touch it. If the block was 2191Sbill * already in core, no I/O need be done; if it is 2291Sbill * already busy, the process waits until it becomes free. 2391Sbill * The following routines allocate a buffer: 2491Sbill * getblk 2591Sbill * bread 2691Sbill * breada 2791Sbill * baddr (if it is incore) 2891Sbill * Eventually the buffer must be released, possibly with the 2991Sbill * side effect of writing it out, by using one of 3091Sbill * bwrite 3191Sbill * bdwrite 3291Sbill * bawrite 3391Sbill * brelse 3491Sbill */ 3591Sbill 3691Sbill #define BUFHSZ 63 3791Sbill #define BUFHASH(blkno) (blkno % BUFHSZ) 3891Sbill short bufhash[BUFHSZ]; 3991Sbill 4091Sbill /* 4191Sbill * Initialize hash links for buffers. 4291Sbill */ 4391Sbill bhinit() 4491Sbill { 4591Sbill register int i; 4691Sbill 4791Sbill for (i = 0; i < BUFHSZ; i++) 4891Sbill bufhash[i] = -1; 4991Sbill } 5091Sbill 518Sbill /* #define DISKMON 1 */ 528Sbill 538Sbill #ifdef DISKMON 548Sbill struct { 558Sbill int nbuf; 568Sbill long nread; 578Sbill long nreada; 588Sbill long ncache; 598Sbill long nwrite; 608Sbill long bufcount[NBUF]; 618Sbill } io_info; 628Sbill #endif 638Sbill 648Sbill /* 658Sbill * Swap IO headers - 668Sbill * They contain the necessary information for the swap I/O. 678Sbill * At any given time, a swap header can be in three 688Sbill * different lists. When free it is in the free list, 698Sbill * when allocated and the I/O queued, it is on the swap 708Sbill * device list, and finally, if the operation was a dirty 718Sbill * page push, when the I/O completes, it is inserted 728Sbill * in a list of cleaned pages to be processed by the pageout daemon. 738Sbill */ 748Sbill struct buf swbuf[NSWBUF]; 758Sbill short swsize[NSWBUF]; /* CAN WE JUST USE B_BCOUNT? */ 768Sbill int swpf[NSWBUF]; 778Sbill 788Sbill 798Sbill #ifdef FASTVAX 808Sbill #define notavail(bp) \ 818Sbill { \ 828Sbill int s = spl6(); \ 838Sbill (bp)->av_back->av_forw = (bp)->av_forw; \ 848Sbill (bp)->av_forw->av_back = (bp)->av_back; \ 858Sbill (bp)->b_flags |= B_BUSY; \ 868Sbill splx(s); \ 878Sbill } 888Sbill #endif 898Sbill 908Sbill /* 918Sbill * Read in (if necessary) the block and return a buffer pointer. 928Sbill */ 938Sbill struct buf * 948Sbill bread(dev, blkno) 958Sbill dev_t dev; 968Sbill daddr_t blkno; 978Sbill { 988Sbill register struct buf *bp; 998Sbill 1008Sbill bp = getblk(dev, blkno); 1018Sbill if (bp->b_flags&B_DONE) { 1028Sbill #ifdef DISKMON 1038Sbill io_info.ncache++; 1048Sbill #endif 1058Sbill return(bp); 1068Sbill } 1078Sbill bp->b_flags |= B_READ; 1088Sbill bp->b_bcount = BSIZE; 1098Sbill (*bdevsw[major(dev)].d_strategy)(bp); 1108Sbill #ifdef DISKMON 1118Sbill io_info.nread++; 1128Sbill #endif 1138Sbill u.u_vm.vm_inblk++; /* pay for read */ 1148Sbill iowait(bp); 1158Sbill return(bp); 1168Sbill } 1178Sbill 1188Sbill /* 1198Sbill * Read in the block, like bread, but also start I/O on the 1208Sbill * read-ahead block (which is not allocated to the caller) 1218Sbill */ 1228Sbill struct buf * 1238Sbill breada(dev, blkno, rablkno) 1248Sbill dev_t dev; 1258Sbill daddr_t blkno, rablkno; 1268Sbill { 1278Sbill register struct buf *bp, *rabp; 1288Sbill 1298Sbill bp = NULL; 1308Sbill if (!incore(dev, blkno)) { 1318Sbill bp = getblk(dev, blkno); 1328Sbill if ((bp->b_flags&B_DONE) == 0) { 1338Sbill bp->b_flags |= B_READ; 1348Sbill bp->b_bcount = BSIZE; 1358Sbill (*bdevsw[major(dev)].d_strategy)(bp); 1368Sbill #ifdef DISKMON 1378Sbill io_info.nread++; 1388Sbill #endif 1398Sbill u.u_vm.vm_inblk++; /* pay for read */ 1408Sbill } 1418Sbill } 1428Sbill if (rablkno && !incore(dev, rablkno)) { 1438Sbill rabp = getblk(dev, rablkno); 1448Sbill if (rabp->b_flags & B_DONE) 1458Sbill brelse(rabp); 1468Sbill else { 1478Sbill rabp->b_flags |= B_READ|B_ASYNC; 1488Sbill rabp->b_bcount = BSIZE; 1498Sbill (*bdevsw[major(dev)].d_strategy)(rabp); 1508Sbill #ifdef DISKMON 1518Sbill io_info.nreada++; 1528Sbill #endif 1538Sbill u.u_vm.vm_inblk++; /* pay in advance */ 1548Sbill } 1558Sbill } 1568Sbill if(bp == NULL) 1578Sbill return(bread(dev, blkno)); 1588Sbill iowait(bp); 1598Sbill return(bp); 1608Sbill } 1618Sbill 1628Sbill /* 1638Sbill * Write the buffer, waiting for completion. 1648Sbill * Then release the buffer. 1658Sbill */ 1668Sbill bwrite(bp) 1678Sbill register struct buf *bp; 1688Sbill { 1698Sbill register flag; 1708Sbill 1718Sbill flag = bp->b_flags; 1728Sbill bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 1738Sbill bp->b_bcount = BSIZE; 1748Sbill #ifdef DISKMON 1758Sbill io_info.nwrite++; 1768Sbill #endif 1778Sbill if ((flag&B_DELWRI) == 0) 1788Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 1798Sbill (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 1808Sbill if ((flag&B_ASYNC) == 0) { 1818Sbill iowait(bp); 1828Sbill brelse(bp); 1838Sbill } else if (flag & B_DELWRI) 1848Sbill bp->b_flags |= B_AGE; 1858Sbill else 1868Sbill geterror(bp); 1878Sbill } 1888Sbill 1898Sbill /* 1908Sbill * Release the buffer, marking it so that if it is grabbed 1918Sbill * for another purpose it will be written out before being 1928Sbill * given up (e.g. when writing a partial block where it is 1938Sbill * assumed that another write for the same block will soon follow). 1948Sbill * This can't be done for magtape, since writes must be done 1958Sbill * in the same order as requested. 1968Sbill */ 1978Sbill bdwrite(bp) 1988Sbill register struct buf *bp; 1998Sbill { 2008Sbill register struct buf *dp; 2018Sbill 2028Sbill if ((bp->b_flags&B_DELWRI) == 0) 2038Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 2048Sbill dp = bdevsw[major(bp->b_dev)].d_tab; 2058Sbill if(dp->b_flags & B_TAPE) 2068Sbill bawrite(bp); 2078Sbill else { 2088Sbill bp->b_flags |= B_DELWRI | B_DONE; 2098Sbill brelse(bp); 2108Sbill } 2118Sbill } 2128Sbill 2138Sbill /* 2148Sbill * Release the buffer, start I/O on it, but don't wait for completion. 2158Sbill */ 2168Sbill bawrite(bp) 2178Sbill register struct buf *bp; 2188Sbill { 2198Sbill 2208Sbill bp->b_flags |= B_ASYNC; 2218Sbill bwrite(bp); 2228Sbill } 2238Sbill 2248Sbill /* 2258Sbill * release the buffer, with no I/O implied. 2268Sbill */ 2278Sbill brelse(bp) 2288Sbill register struct buf *bp; 2298Sbill { 2308Sbill register struct buf **backp; 2318Sbill register s; 2328Sbill 2338Sbill if (bp->b_flags&B_WANTED) 2348Sbill wakeup((caddr_t)bp); 2358Sbill if (bfreelist.b_flags&B_WANTED) { 2368Sbill bfreelist.b_flags &= ~B_WANTED; 2378Sbill wakeup((caddr_t)&bfreelist); 2388Sbill } 23991Sbill if ((bp->b_flags&B_ERROR) && bp->b_dev != NODEV) { 24091Sbill bunhash(bp); 2418Sbill bp->b_dev = NODEV; /* no assoc. on error */ 24291Sbill } 2438Sbill s = spl6(); 2448Sbill if(bp->b_flags & (B_AGE|B_ERROR)) { 2458Sbill backp = &bfreelist.av_forw; 2468Sbill (*backp)->av_back = bp; 2478Sbill bp->av_forw = *backp; 2488Sbill *backp = bp; 2498Sbill bp->av_back = &bfreelist; 2508Sbill } else { 2518Sbill backp = &bfreelist.av_back; 2528Sbill (*backp)->av_forw = bp; 2538Sbill bp->av_back = *backp; 2548Sbill *backp = bp; 2558Sbill bp->av_forw = &bfreelist; 2568Sbill } 2578Sbill bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 2588Sbill splx(s); 2598Sbill } 2608Sbill 2618Sbill /* 2628Sbill * See if the block is associated with some buffer 2638Sbill * (mainly to avoid getting hung up on a wait in breada) 2648Sbill */ 2658Sbill incore(dev, blkno) 2668Sbill dev_t dev; 2678Sbill daddr_t blkno; 2688Sbill { 2698Sbill register struct buf *bp; 2708Sbill register int dblkno = fsbtodb(blkno); 2718Sbill 27291Sbill for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1]; 27391Sbill bp = &buf[bp->b_hlink]) 27495Sbill if (bp->b_blkno == dblkno && bp->b_dev == dev) 27591Sbill return (1); 27691Sbill return (0); 2778Sbill } 2788Sbill 2798Sbill struct buf * 2808Sbill baddr(dev, blkno) 2818Sbill dev_t dev; 2828Sbill daddr_t blkno; 2838Sbill { 2848Sbill 2858Sbill if (incore(dev, blkno)) 2868Sbill return (bread(dev, blkno)); 2878Sbill return (0); 2888Sbill } 2898Sbill 2908Sbill /* 2918Sbill * Assign a buffer for the given block. If the appropriate 2928Sbill * block is already associated, return it; otherwise search 2938Sbill * for the oldest non-busy buffer and reassign it. 2948Sbill */ 2958Sbill struct buf * 2968Sbill getblk(dev, blkno) 2978Sbill dev_t dev; 2988Sbill daddr_t blkno; 2998Sbill { 30091Sbill register struct buf *bp, *dp, *ep; 301*1831Sbill register int i, x, dblkno; 3028Sbill 303*1831Sbill if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) 304*1831Sbill blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 305*1831Sbill dblkno = fsbtodb(blkno); 3068Sbill loop: 307124Sbill (void) spl0(); 30891Sbill for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1]; 30991Sbill bp = &buf[bp->b_hlink]) { 31091Sbill if (bp->b_blkno != dblkno || bp->b_dev != dev) 3118Sbill continue; 312124Sbill (void) spl6(); 3138Sbill if (bp->b_flags&B_BUSY) { 3148Sbill bp->b_flags |= B_WANTED; 3158Sbill sleep((caddr_t)bp, PRIBIO+1); 3168Sbill goto loop; 3178Sbill } 318124Sbill (void) spl0(); 3198Sbill #ifdef DISKMON 3208Sbill i = 0; 3218Sbill dp = bp->av_forw; 3228Sbill while (dp != &bfreelist) { 3238Sbill i++; 3248Sbill dp = dp->av_forw; 3258Sbill } 3268Sbill if (i<NBUF) 3278Sbill io_info.bufcount[i]++; 3288Sbill #endif 3298Sbill notavail(bp); 3308Sbill bp->b_flags |= B_CACHE; 3318Sbill return(bp); 3328Sbill } 33391Sbill if (major(dev) >= nblkdev) 33491Sbill panic("blkdev"); 33591Sbill dp = bdevsw[major(dev)].d_tab; 33691Sbill if (dp == NULL) 33791Sbill panic("devtab"); 338124Sbill (void) spl6(); 3398Sbill if (bfreelist.av_forw == &bfreelist) { 3408Sbill bfreelist.b_flags |= B_WANTED; 3418Sbill sleep((caddr_t)&bfreelist, PRIBIO+1); 3428Sbill goto loop; 3438Sbill } 3441792Sbill (void) spl0(); 3458Sbill bp = bfreelist.av_forw; 3468Sbill notavail(bp); 3478Sbill if (bp->b_flags & B_DELWRI) { 3488Sbill bp->b_flags |= B_ASYNC; 3498Sbill bwrite(bp); 3508Sbill goto loop; 3518Sbill } 35291Sbill if (bp->b_dev == NODEV) 35391Sbill goto done; 35491Sbill /* INLINE EXPANSION OF bunhash(bp) */ 355884Sbill (void) spl6(); 35691Sbill i = BUFHASH(dbtofsb(bp->b_blkno)); 35791Sbill x = bp - buf; 35891Sbill if (bufhash[i] == x) { 35991Sbill bufhash[i] = bp->b_hlink; 36091Sbill } else { 36191Sbill for (ep = &buf[bufhash[i]]; ep != &buf[-1]; 36291Sbill ep = &buf[ep->b_hlink]) 36391Sbill if (ep->b_hlink == x) { 36491Sbill ep->b_hlink = bp->b_hlink; 36591Sbill goto done; 36691Sbill } 36791Sbill panic("getblk"); 36891Sbill } 36991Sbill done: 370884Sbill (void) spl0(); 37191Sbill /* END INLINE EXPANSION */ 3728Sbill bp->b_flags = B_BUSY; 3738Sbill bp->b_back->b_forw = bp->b_forw; 3748Sbill bp->b_forw->b_back = bp->b_back; 3758Sbill bp->b_forw = dp->b_forw; 3768Sbill bp->b_back = dp; 3778Sbill dp->b_forw->b_back = bp; 3788Sbill dp->b_forw = bp; 3798Sbill bp->b_dev = dev; 3808Sbill bp->b_blkno = dblkno; 38191Sbill i = BUFHASH(blkno); 38291Sbill bp->b_hlink = bufhash[i]; 38391Sbill bufhash[i] = bp - buf; 3848Sbill return(bp); 3858Sbill } 3868Sbill 3878Sbill /* 3888Sbill * get an empty block, 3898Sbill * not assigned to any particular device 3908Sbill */ 3918Sbill struct buf * 3928Sbill geteblk() 3938Sbill { 394182Sbill register struct buf *bp, *dp; 3958Sbill 3968Sbill loop: 397124Sbill (void) spl6(); 3988Sbill while (bfreelist.av_forw == &bfreelist) { 3998Sbill bfreelist.b_flags |= B_WANTED; 4008Sbill sleep((caddr_t)&bfreelist, PRIBIO+1); 4018Sbill } 402124Sbill (void) spl0(); 4038Sbill dp = &bfreelist; 4048Sbill bp = bfreelist.av_forw; 4058Sbill notavail(bp); 4068Sbill if (bp->b_flags & B_DELWRI) { 4078Sbill bp->b_flags |= B_ASYNC; 4088Sbill bwrite(bp); 4098Sbill goto loop; 4108Sbill } 41191Sbill if (bp->b_dev != NODEV) 41291Sbill bunhash(bp); 4138Sbill bp->b_flags = B_BUSY; 4148Sbill bp->b_back->b_forw = bp->b_forw; 4158Sbill bp->b_forw->b_back = bp->b_back; 4168Sbill bp->b_forw = dp->b_forw; 4178Sbill bp->b_back = dp; 4188Sbill dp->b_forw->b_back = bp; 4198Sbill dp->b_forw = bp; 4208Sbill bp->b_dev = (dev_t)NODEV; 42191Sbill bp->b_hlink = -1; 4228Sbill return(bp); 4238Sbill } 4248Sbill 42591Sbill bunhash(bp) 42691Sbill register struct buf *bp; 42791Sbill { 42891Sbill register struct buf *ep; 429884Sbill register int i, x, s; 43091Sbill 43191Sbill if (bp->b_dev == NODEV) 43291Sbill return; 433884Sbill s = spl6(); 43491Sbill i = BUFHASH(dbtofsb(bp->b_blkno)); 43591Sbill x = bp - buf; 43691Sbill if (bufhash[i] == x) { 43791Sbill bufhash[i] = bp->b_hlink; 438884Sbill goto ret; 43991Sbill } 44091Sbill for (ep = &buf[bufhash[i]]; ep != &buf[-1]; 44191Sbill ep = &buf[ep->b_hlink]) 44291Sbill if (ep->b_hlink == x) { 44391Sbill ep->b_hlink = bp->b_hlink; 444884Sbill goto ret; 44591Sbill } 44691Sbill panic("bunhash"); 447884Sbill ret: 448884Sbill splx(s); 44991Sbill } 45091Sbill 4518Sbill /* 4528Sbill * Wait for I/O completion on the buffer; return errors 4538Sbill * to the user. 4548Sbill */ 4558Sbill iowait(bp) 4568Sbill register struct buf *bp; 4578Sbill { 4588Sbill 459124Sbill (void) spl6(); 4608Sbill while ((bp->b_flags&B_DONE)==0) 4618Sbill sleep((caddr_t)bp, PRIBIO); 462124Sbill (void) spl0(); 4638Sbill geterror(bp); 4648Sbill } 4658Sbill 4668Sbill #ifndef FASTVAX 4678Sbill /* 4688Sbill * Unlink a buffer from the available list and mark it busy. 4698Sbill * (internal interface) 4708Sbill */ 4718Sbill notavail(bp) 4728Sbill register struct buf *bp; 4738Sbill { 4748Sbill register s; 4758Sbill 4768Sbill s = spl6(); 4778Sbill bp->av_back->av_forw = bp->av_forw; 4788Sbill bp->av_forw->av_back = bp->av_back; 4798Sbill bp->b_flags |= B_BUSY; 4808Sbill splx(s); 4818Sbill } 4828Sbill #endif 4838Sbill 4848Sbill /* 4858Sbill * Mark I/O complete on a buffer. If the header 4868Sbill * indicates a dirty page push completion, the 4878Sbill * header is inserted into the ``cleaned'' list 4888Sbill * to be processed by the pageout daemon. Otherwise 4898Sbill * release it if I/O is asynchronous, and wake 4908Sbill * up anyone waiting for it. 4918Sbill */ 4928Sbill iodone(bp) 4938Sbill register struct buf *bp; 4948Sbill { 4958Sbill register int s; 4968Sbill 497420Sbill if (bp->b_flags & B_DONE) 498420Sbill panic("dup iodone"); 4998Sbill bp->b_flags |= B_DONE; 5008Sbill if (bp->b_flags & B_DIRTY) { 5018Sbill if (bp->b_flags & B_ERROR) 5028Sbill panic("IO err in push"); 5038Sbill s = spl6(); 5048Sbill cnt.v_pgout++; 5058Sbill bp->av_forw = bclnlist; 5068Sbill bp->b_bcount = swsize[bp - swbuf]; 5078Sbill bp->b_pfcent = swpf[bp - swbuf]; 5088Sbill bclnlist = bp; 5098Sbill if (bswlist.b_flags & B_WANTED) 5108Sbill wakeup((caddr_t)&proc[2]); 5118Sbill splx(s); 512383Sbill return; 5138Sbill } 5148Sbill if (bp->b_flags&B_ASYNC) 5158Sbill brelse(bp); 5168Sbill else { 5178Sbill bp->b_flags &= ~B_WANTED; 5188Sbill wakeup((caddr_t)bp); 5198Sbill } 5208Sbill } 5218Sbill 5228Sbill /* 5238Sbill * Zero the core associated with a buffer. 5248Sbill */ 5258Sbill clrbuf(bp) 5268Sbill struct buf *bp; 5278Sbill { 5288Sbill register *p; 5298Sbill register c; 5308Sbill 5318Sbill p = bp->b_un.b_words; 5328Sbill c = BSIZE/sizeof(int); 5338Sbill do 5348Sbill *p++ = 0; 5358Sbill while (--c); 5368Sbill bp->b_resid = 0; 5378Sbill } 5388Sbill 5398Sbill /* 5408Sbill * swap I/O - 5418Sbill * 5428Sbill * If the flag indicates a dirty page push initiated 5438Sbill * by the pageout daemon, we map the page into the i th 5448Sbill * virtual page of process 2 (the daemon itself) where i is 5458Sbill * the index of the swap header that has been allocated. 5468Sbill * We simply initialize the header and queue the I/O but 5478Sbill * do not wait for completion. When the I/O completes, 5488Sbill * iodone() will link the header to a list of cleaned 5498Sbill * pages to be processed by the pageout daemon. 5508Sbill */ 5518Sbill swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) 5528Sbill struct proc *p; 5538Sbill swblk_t dblkno; 5548Sbill caddr_t addr; 5558Sbill int flag, nbytes; 5568Sbill dev_t dev; 5578Sbill unsigned pfcent; 5588Sbill { 5598Sbill register struct buf *bp; 5608Sbill register int c; 5618Sbill int p2dp; 5628Sbill register struct pte *dpte, *vpte; 5638Sbill 564124Sbill (void) spl6(); 5658Sbill while (bswlist.av_forw == NULL) { 5668Sbill bswlist.b_flags |= B_WANTED; 5678Sbill sleep((caddr_t)&bswlist, PSWP+1); 5688Sbill } 5698Sbill bp = bswlist.av_forw; 5708Sbill bswlist.av_forw = bp->av_forw; 571124Sbill (void) spl0(); 5728Sbill 5738Sbill bp->b_flags = B_BUSY | B_PHYS | rdflg | flag; 5748Sbill if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0) 5758Sbill if (rdflg == B_READ) 5768Sbill sum.v_pswpin += btoc(nbytes); 5778Sbill else 5788Sbill sum.v_pswpout += btoc(nbytes); 5798Sbill bp->b_proc = p; 5808Sbill if (flag & B_DIRTY) { 5818Sbill p2dp = ((bp - swbuf) * CLSIZE) * KLMAX; 5828Sbill dpte = dptopte(&proc[2], p2dp); 5838Sbill vpte = vtopte(p, btop(addr)); 5848Sbill for (c = 0; c < nbytes; c += NBPG) { 5858Sbill if (vpte->pg_pfnum == 0 || vpte->pg_fod) 5868Sbill panic("swap bad pte"); 5878Sbill *dpte++ = *vpte++; 5888Sbill } 5898Sbill bp->b_un.b_addr = (caddr_t)ctob(p2dp); 5908Sbill } else 5918Sbill bp->b_un.b_addr = addr; 5928Sbill while (nbytes > 0) { 5938Sbill c = imin(ctob(120), nbytes); 5948Sbill bp->b_bcount = c; 5958Sbill bp->b_blkno = dblkno; 5968Sbill bp->b_dev = dev; 597718Sbill if (flag & B_DIRTY) { 598718Sbill swpf[bp - swbuf] = pfcent; 599718Sbill swsize[bp - swbuf] = nbytes; 600718Sbill } 6018Sbill (*bdevsw[major(dev)].d_strategy)(bp); 6028Sbill if (flag & B_DIRTY) { 6038Sbill if (c < nbytes) 6048Sbill panic("big push"); 6058Sbill return; 6068Sbill } 607124Sbill (void) spl6(); 6088Sbill while((bp->b_flags&B_DONE)==0) 6098Sbill sleep((caddr_t)bp, PSWP); 610124Sbill (void) spl0(); 6118Sbill bp->b_un.b_addr += c; 6128Sbill bp->b_flags &= ~B_DONE; 6138Sbill if (bp->b_flags & B_ERROR) { 6148Sbill if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE) 6158Sbill panic("hard IO err in swap"); 6168Sbill swkill(p, (char *)0); 6178Sbill } 6188Sbill nbytes -= c; 6198Sbill dblkno += btoc(c); 6208Sbill } 621124Sbill (void) spl6(); 6228Sbill bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 6238Sbill bp->av_forw = bswlist.av_forw; 6248Sbill bswlist.av_forw = bp; 6258Sbill if (bswlist.b_flags & B_WANTED) { 6268Sbill bswlist.b_flags &= ~B_WANTED; 6278Sbill wakeup((caddr_t)&bswlist); 6288Sbill wakeup((caddr_t)&proc[2]); 6298Sbill } 630124Sbill (void) spl0(); 6318Sbill } 6328Sbill 6338Sbill /* 6348Sbill * If rout == 0 then killed on swap error, else 6358Sbill * rout is the name of the routine where we ran out of 6368Sbill * swap space. 6378Sbill */ 6388Sbill swkill(p, rout) 6398Sbill struct proc *p; 6408Sbill char *rout; 6418Sbill { 6428Sbill 6438Sbill printf("%d: ", p->p_pid); 6448Sbill if (rout) 6458Sbill printf("out of swap space in %s\n", rout); 6468Sbill else 6478Sbill printf("killed on swap error\n"); 6488Sbill /* 6498Sbill * To be sure no looping (e.g. in vmsched trying to 6508Sbill * swap out) mark process locked in core (as though 6518Sbill * done by user) after killing it so noone will try 6528Sbill * to swap it out. 6538Sbill */ 654165Sbill psignal(p, SIGKILL); 6558Sbill p->p_flag |= SULOCK; 6568Sbill } 6578Sbill 6588Sbill /* 6598Sbill * make sure all write-behind blocks 6608Sbill * on dev (or NODEV for all) 6618Sbill * are flushed out. 6628Sbill * (from umount and update) 6638Sbill */ 6648Sbill bflush(dev) 6658Sbill dev_t dev; 6668Sbill { 6678Sbill register struct buf *bp; 6688Sbill 6698Sbill loop: 670124Sbill (void) spl6(); 6718Sbill for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) { 6728Sbill if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) { 6738Sbill bp->b_flags |= B_ASYNC; 6748Sbill notavail(bp); 6758Sbill bwrite(bp); 6768Sbill goto loop; 6778Sbill } 6788Sbill } 679124Sbill (void) spl0(); 6808Sbill } 6818Sbill 6828Sbill /* 6838Sbill * Raw I/O. The arguments are 6848Sbill * The strategy routine for the device 6858Sbill * A buffer, which will always be a special buffer 6868Sbill * header owned exclusively by the device for this purpose 6878Sbill * The device number 6888Sbill * Read/write flag 6898Sbill * Essentially all the work is computing physical addresses and 6908Sbill * validating them. 6918Sbill * If the user has the proper access privilidges, the process is 6928Sbill * marked 'delayed unlock' and the pages involved in the I/O are 6938Sbill * faulted and locked. After the completion of the I/O, the above pages 6948Sbill * are unlocked. 6958Sbill */ 6968Sbill physio(strat, bp, dev, rw, mincnt) 6978Sbill int (*strat)(); 6988Sbill register struct buf *bp; 6998Sbill unsigned (*mincnt)(); 7008Sbill { 7018Sbill register int c; 7028Sbill char *a; 7038Sbill 7048Sbill if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) { 7058Sbill u.u_error = EFAULT; 7068Sbill return; 7078Sbill } 708124Sbill (void) spl6(); 7098Sbill while (bp->b_flags&B_BUSY) { 7108Sbill bp->b_flags |= B_WANTED; 7118Sbill sleep((caddr_t)bp, PRIBIO+1); 7128Sbill } 7138Sbill bp->b_error = 0; 7148Sbill bp->b_proc = u.u_procp; 7158Sbill bp->b_un.b_addr = u.u_base; 7168Sbill while (u.u_count != 0 && bp->b_error==0) { 7178Sbill bp->b_flags = B_BUSY | B_PHYS | rw; 7188Sbill bp->b_dev = dev; 7198Sbill bp->b_blkno = u.u_offset >> PGSHIFT; 7208Sbill bp->b_bcount = u.u_count; 7218Sbill (*mincnt)(bp); 7228Sbill c = bp->b_bcount; 7238Sbill u.u_procp->p_flag |= SPHYSIO; 7248Sbill vslock(a = bp->b_un.b_addr, c); 7258Sbill (*strat)(bp); 726124Sbill (void) spl6(); 7278Sbill while ((bp->b_flags&B_DONE) == 0) 7288Sbill sleep((caddr_t)bp, PRIBIO); 7298Sbill vsunlock(a, c, rw); 7308Sbill u.u_procp->p_flag &= ~SPHYSIO; 7318Sbill if (bp->b_flags&B_WANTED) 7328Sbill wakeup((caddr_t)bp); 733124Sbill (void) spl0(); 7348Sbill bp->b_un.b_addr += c; 7358Sbill u.u_count -= c; 7368Sbill u.u_offset += c; 7378Sbill } 7388Sbill bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS); 7398Sbill u.u_count = bp->b_resid; 7408Sbill geterror(bp); 7418Sbill } 7428Sbill 7438Sbill /*ARGSUSED*/ 7448Sbill unsigned 7458Sbill minphys(bp) 7468Sbill struct buf *bp; 7478Sbill { 7488Sbill 7498Sbill if (bp->b_bcount > 60 * 1024) 7508Sbill bp->b_bcount = 60 * 1024; 7518Sbill } 7528Sbill 7538Sbill /* 7548Sbill * Pick up the device's error number and pass it to the user; 7558Sbill * if there is an error but the number is 0 set a generalized 7568Sbill * code. Actually the latter is always true because devices 7578Sbill * don't yet return specific errors. 7588Sbill */ 7598Sbill geterror(bp) 7608Sbill register struct buf *bp; 7618Sbill { 7628Sbill 7638Sbill if (bp->b_flags&B_ERROR) 7648Sbill if ((u.u_error = bp->b_error)==0) 7658Sbill u.u_error = EIO; 7668Sbill } 767