1*91Sbill /* vfs_cluster.c 3.2 10/14/12 */ 28Sbill 3*91Sbill int distrust = 1; /* TEST */ 48Sbill #include "../h/param.h" 58Sbill #include "../h/systm.h" 68Sbill #include "../h/dir.h" 78Sbill #include "../h/user.h" 88Sbill #include "../h/buf.h" 98Sbill #include "../h/conf.h" 108Sbill #include "../h/proc.h" 118Sbill #include "../h/seg.h" 128Sbill #include "../h/pte.h" 138Sbill #include "../h/vm.h" 148Sbill 15*91Sbill /* 16*91Sbill * The following several routines allocate and free 17*91Sbill * buffers with various side effects. In general the 18*91Sbill * arguments to an allocate routine are a device and 19*91Sbill * a block number, and the value is a pointer to 20*91Sbill * to the buffer header; the buffer is marked "busy" 21*91Sbill * so that no one else can touch it. If the block was 22*91Sbill * already in core, no I/O need be done; if it is 23*91Sbill * already busy, the process waits until it becomes free. 24*91Sbill * The following routines allocate a buffer: 25*91Sbill * getblk 26*91Sbill * bread 27*91Sbill * breada 28*91Sbill * baddr (if it is incore) 29*91Sbill * Eventually the buffer must be released, possibly with the 30*91Sbill * side effect of writing it out, by using one of 31*91Sbill * bwrite 32*91Sbill * bdwrite 33*91Sbill * bawrite 34*91Sbill * brelse 35*91Sbill */ 36*91Sbill 37*91Sbill #define BUFHSZ 63 38*91Sbill #define BUFHASH(blkno) (blkno % BUFHSZ) 39*91Sbill short bufhash[BUFHSZ]; 40*91Sbill 41*91Sbill /* 42*91Sbill * Initialize hash links for buffers. 43*91Sbill */ 44*91Sbill bhinit() 45*91Sbill { 46*91Sbill register int i; 47*91Sbill 48*91Sbill for (i = 0; i < BUFHSZ; i++) 49*91Sbill bufhash[i] = -1; 50*91Sbill } 51*91Sbill 528Sbill /* #define DISKMON 1 */ 538Sbill 548Sbill #ifdef DISKMON 558Sbill struct { 568Sbill int nbuf; 578Sbill long nread; 588Sbill long nreada; 598Sbill long ncache; 608Sbill long nwrite; 618Sbill long bufcount[NBUF]; 628Sbill } io_info; 638Sbill #endif 648Sbill 658Sbill /* 668Sbill * Swap IO headers - 678Sbill * They contain the necessary information for the swap I/O. 688Sbill * At any given time, a swap header can be in three 698Sbill * different lists. When free it is in the free list, 708Sbill * when allocated and the I/O queued, it is on the swap 718Sbill * device list, and finally, if the operation was a dirty 728Sbill * page push, when the I/O completes, it is inserted 738Sbill * in a list of cleaned pages to be processed by the pageout daemon. 748Sbill */ 758Sbill struct buf swbuf[NSWBUF]; 768Sbill short swsize[NSWBUF]; /* CAN WE JUST USE B_BCOUNT? */ 778Sbill int swpf[NSWBUF]; 788Sbill 798Sbill 808Sbill #ifdef FASTVAX 818Sbill #define notavail(bp) \ 828Sbill { \ 838Sbill int s = spl6(); \ 848Sbill (bp)->av_back->av_forw = (bp)->av_forw; \ 858Sbill (bp)->av_forw->av_back = (bp)->av_back; \ 868Sbill (bp)->b_flags |= B_BUSY; \ 878Sbill splx(s); \ 888Sbill } 898Sbill #endif 908Sbill 918Sbill /* 928Sbill * Read in (if necessary) the block and return a buffer pointer. 938Sbill */ 948Sbill struct buf * 958Sbill bread(dev, blkno) 968Sbill dev_t dev; 978Sbill daddr_t blkno; 988Sbill { 998Sbill register struct buf *bp; 1008Sbill 1018Sbill bp = getblk(dev, blkno); 1028Sbill if (bp->b_flags&B_DONE) { 1038Sbill #ifdef DISKMON 1048Sbill io_info.ncache++; 1058Sbill #endif 1068Sbill return(bp); 1078Sbill } 1088Sbill bp->b_flags |= B_READ; 1098Sbill bp->b_bcount = BSIZE; 1108Sbill (*bdevsw[major(dev)].d_strategy)(bp); 1118Sbill #ifdef DISKMON 1128Sbill io_info.nread++; 1138Sbill #endif 1148Sbill u.u_vm.vm_inblk++; /* pay for read */ 1158Sbill iowait(bp); 1168Sbill return(bp); 1178Sbill } 1188Sbill 1198Sbill /* 1208Sbill * Read in the block, like bread, but also start I/O on the 1218Sbill * read-ahead block (which is not allocated to the caller) 1228Sbill */ 1238Sbill struct buf * 1248Sbill breada(dev, blkno, rablkno) 1258Sbill dev_t dev; 1268Sbill daddr_t blkno, rablkno; 1278Sbill { 1288Sbill register struct buf *bp, *rabp; 1298Sbill 1308Sbill bp = NULL; 1318Sbill if (!incore(dev, blkno)) { 1328Sbill bp = getblk(dev, blkno); 1338Sbill if ((bp->b_flags&B_DONE) == 0) { 1348Sbill bp->b_flags |= B_READ; 1358Sbill bp->b_bcount = BSIZE; 1368Sbill (*bdevsw[major(dev)].d_strategy)(bp); 1378Sbill #ifdef DISKMON 1388Sbill io_info.nread++; 1398Sbill #endif 1408Sbill u.u_vm.vm_inblk++; /* pay for read */ 1418Sbill } 1428Sbill } 1438Sbill if (rablkno && !incore(dev, rablkno)) { 1448Sbill rabp = getblk(dev, rablkno); 1458Sbill if (rabp->b_flags & B_DONE) 1468Sbill brelse(rabp); 1478Sbill else { 1488Sbill rabp->b_flags |= B_READ|B_ASYNC; 1498Sbill rabp->b_bcount = BSIZE; 1508Sbill (*bdevsw[major(dev)].d_strategy)(rabp); 1518Sbill #ifdef DISKMON 1528Sbill io_info.nreada++; 1538Sbill #endif 1548Sbill u.u_vm.vm_inblk++; /* pay in advance */ 1558Sbill } 1568Sbill } 1578Sbill if(bp == NULL) 1588Sbill return(bread(dev, blkno)); 1598Sbill iowait(bp); 1608Sbill return(bp); 1618Sbill } 1628Sbill 1638Sbill /* 1648Sbill * Write the buffer, waiting for completion. 1658Sbill * Then release the buffer. 1668Sbill */ 1678Sbill bwrite(bp) 1688Sbill register struct buf *bp; 1698Sbill { 1708Sbill register flag; 1718Sbill 1728Sbill flag = bp->b_flags; 1738Sbill bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 1748Sbill bp->b_bcount = BSIZE; 1758Sbill #ifdef DISKMON 1768Sbill io_info.nwrite++; 1778Sbill #endif 1788Sbill if ((flag&B_DELWRI) == 0) 1798Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 1808Sbill (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 1818Sbill if ((flag&B_ASYNC) == 0) { 1828Sbill iowait(bp); 1838Sbill brelse(bp); 1848Sbill } else if (flag & B_DELWRI) 1858Sbill bp->b_flags |= B_AGE; 1868Sbill else 1878Sbill geterror(bp); 1888Sbill } 1898Sbill 1908Sbill /* 1918Sbill * Release the buffer, marking it so that if it is grabbed 1928Sbill * for another purpose it will be written out before being 1938Sbill * given up (e.g. when writing a partial block where it is 1948Sbill * assumed that another write for the same block will soon follow). 1958Sbill * This can't be done for magtape, since writes must be done 1968Sbill * in the same order as requested. 1978Sbill */ 1988Sbill bdwrite(bp) 1998Sbill register struct buf *bp; 2008Sbill { 2018Sbill register struct buf *dp; 2028Sbill 2038Sbill if ((bp->b_flags&B_DELWRI) == 0) 2048Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 2058Sbill dp = bdevsw[major(bp->b_dev)].d_tab; 2068Sbill if(dp->b_flags & B_TAPE) 2078Sbill bawrite(bp); 2088Sbill else { 2098Sbill bp->b_flags |= B_DELWRI | B_DONE; 2108Sbill brelse(bp); 2118Sbill } 2128Sbill } 2138Sbill 2148Sbill /* 2158Sbill * Release the buffer, start I/O on it, but don't wait for completion. 2168Sbill */ 2178Sbill bawrite(bp) 2188Sbill register struct buf *bp; 2198Sbill { 2208Sbill 2218Sbill bp->b_flags |= B_ASYNC; 2228Sbill bwrite(bp); 2238Sbill } 2248Sbill 2258Sbill /* 2268Sbill * release the buffer, with no I/O implied. 2278Sbill */ 2288Sbill brelse(bp) 2298Sbill register struct buf *bp; 2308Sbill { 2318Sbill register struct buf **backp; 2328Sbill register s; 2338Sbill 2348Sbill if (bp->b_flags&B_WANTED) 2358Sbill wakeup((caddr_t)bp); 2368Sbill if (bfreelist.b_flags&B_WANTED) { 2378Sbill bfreelist.b_flags &= ~B_WANTED; 2388Sbill wakeup((caddr_t)&bfreelist); 2398Sbill } 240*91Sbill if ((bp->b_flags&B_ERROR) && bp->b_dev != NODEV) { 241*91Sbill bunhash(bp); 2428Sbill bp->b_dev = NODEV; /* no assoc. on error */ 243*91Sbill } 2448Sbill s = spl6(); 2458Sbill if(bp->b_flags & (B_AGE|B_ERROR)) { 2468Sbill backp = &bfreelist.av_forw; 2478Sbill (*backp)->av_back = bp; 2488Sbill bp->av_forw = *backp; 2498Sbill *backp = bp; 2508Sbill bp->av_back = &bfreelist; 2518Sbill } else { 2528Sbill backp = &bfreelist.av_back; 2538Sbill (*backp)->av_forw = bp; 2548Sbill bp->av_back = *backp; 2558Sbill *backp = bp; 2568Sbill bp->av_forw = &bfreelist; 2578Sbill } 2588Sbill bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 2598Sbill splx(s); 2608Sbill } 2618Sbill 262*91Sbill /* HASHING IS A GUN LIKE CHANGE, THIS IS THE SAFETY */ 263*91Sbill struct buf * 264*91Sbill oincore(dev, blkno) 265*91Sbill dev_t dev; 266*91Sbill daddr_t blkno; 267*91Sbill { 268*91Sbill register struct buf *bp; 269*91Sbill register struct buf *dp; 270*91Sbill register int dblkno = fsbtodb(blkno); 271*91Sbill 272*91Sbill dp = bdevsw[major(dev)].d_tab; 273*91Sbill for (bp=dp->b_forw; bp != dp; bp = bp->b_forw) 274*91Sbill if (bp->b_blkno==dblkno && bp->b_dev==dev && 275*91Sbill bp >= buf && bp < &buf[NBUF]) 276*91Sbill return (bp); 277*91Sbill return ((struct buf *)0); 278*91Sbill } 279*91Sbill 2808Sbill /* 2818Sbill * See if the block is associated with some buffer 2828Sbill * (mainly to avoid getting hung up on a wait in breada) 2838Sbill */ 2848Sbill incore(dev, blkno) 2858Sbill dev_t dev; 2868Sbill daddr_t blkno; 2878Sbill { 2888Sbill register struct buf *bp; 2898Sbill register int dblkno = fsbtodb(blkno); 2908Sbill 291*91Sbill for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1]; 292*91Sbill bp = &buf[bp->b_hlink]) 293*91Sbill if (bp->b_blkno == dblkno && bp->b_dev == dev) { 294*91Sbill if (distrust) 295*91Sbill if (oincore(dev, blkno) != bp) /* TEST */ 296*91Sbill panic("incore 1"); /* TEST */ 297*91Sbill return (1); 298*91Sbill } 299*91Sbill if (distrust) 300*91Sbill if (oincore(dev, blkno)) /* TEST */ 301*91Sbill panic("incore 2"); /* TEST */ 302*91Sbill return (0); 3038Sbill } 3048Sbill 3058Sbill struct buf * 3068Sbill baddr(dev, blkno) 3078Sbill dev_t dev; 3088Sbill daddr_t blkno; 3098Sbill { 3108Sbill 3118Sbill if (incore(dev, blkno)) 3128Sbill return (bread(dev, blkno)); 3138Sbill return (0); 3148Sbill } 3158Sbill 3168Sbill /* 3178Sbill * Assign a buffer for the given block. If the appropriate 3188Sbill * block is already associated, return it; otherwise search 3198Sbill * for the oldest non-busy buffer and reassign it. 3208Sbill */ 3218Sbill struct buf * 3228Sbill getblk(dev, blkno) 3238Sbill dev_t dev; 3248Sbill daddr_t blkno; 3258Sbill { 326*91Sbill register struct buf *bp, *dp, *ep; 327*91Sbill register int i, x; 3288Sbill register int dblkno = fsbtodb(blkno); 3298Sbill 3308Sbill loop: 3318Sbill VOID spl0(); 332*91Sbill for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1]; 333*91Sbill bp = &buf[bp->b_hlink]) { 334*91Sbill if (bp->b_blkno != dblkno || bp->b_dev != dev) 3358Sbill continue; 336*91Sbill if (distrust) 337*91Sbill if (bp != oincore(dev, blkno)) /* TEST */ 338*91Sbill panic("getblk 1"); /* TEST */ 3398Sbill VOID spl6(); 3408Sbill if (bp->b_flags&B_BUSY) { 3418Sbill bp->b_flags |= B_WANTED; 3428Sbill sleep((caddr_t)bp, PRIBIO+1); 3438Sbill goto loop; 3448Sbill } 3458Sbill VOID spl0(); 3468Sbill #ifdef DISKMON 3478Sbill i = 0; 3488Sbill dp = bp->av_forw; 3498Sbill while (dp != &bfreelist) { 3508Sbill i++; 3518Sbill dp = dp->av_forw; 3528Sbill } 3538Sbill if (i<NBUF) 3548Sbill io_info.bufcount[i]++; 3558Sbill #endif 3568Sbill notavail(bp); 3578Sbill bp->b_flags |= B_CACHE; 3588Sbill return(bp); 3598Sbill } 360*91Sbill if (distrust) 361*91Sbill if (oincore(dev, blkno)) /* TEST */ 362*91Sbill panic("getblk 2"); /* TEST */ 363*91Sbill if (major(dev) >= nblkdev) 364*91Sbill panic("blkdev"); 365*91Sbill dp = bdevsw[major(dev)].d_tab; 366*91Sbill if (dp == NULL) 367*91Sbill panic("devtab"); 3688Sbill VOID spl6(); 3698Sbill if (bfreelist.av_forw == &bfreelist) { 3708Sbill bfreelist.b_flags |= B_WANTED; 3718Sbill sleep((caddr_t)&bfreelist, PRIBIO+1); 3728Sbill goto loop; 3738Sbill } 3748Sbill spl0(); 3758Sbill bp = bfreelist.av_forw; 3768Sbill notavail(bp); 3778Sbill if (bp->b_flags & B_DELWRI) { 3788Sbill bp->b_flags |= B_ASYNC; 3798Sbill bwrite(bp); 3808Sbill goto loop; 3818Sbill } 382*91Sbill if (bp->b_dev == NODEV) 383*91Sbill goto done; 384*91Sbill /* INLINE EXPANSION OF bunhash(bp) */ 385*91Sbill i = BUFHASH(dbtofsb(bp->b_blkno)); 386*91Sbill x = bp - buf; 387*91Sbill if (bufhash[i] == x) { 388*91Sbill bufhash[i] = bp->b_hlink; 389*91Sbill } else { 390*91Sbill for (ep = &buf[bufhash[i]]; ep != &buf[-1]; 391*91Sbill ep = &buf[ep->b_hlink]) 392*91Sbill if (ep->b_hlink == x) { 393*91Sbill ep->b_hlink = bp->b_hlink; 394*91Sbill goto done; 395*91Sbill } 396*91Sbill panic("getblk"); 397*91Sbill } 398*91Sbill done: 399*91Sbill /* END INLINE EXPANSION */ 4008Sbill bp->b_flags = B_BUSY; 4018Sbill bp->b_back->b_forw = bp->b_forw; 4028Sbill bp->b_forw->b_back = bp->b_back; 4038Sbill bp->b_forw = dp->b_forw; 4048Sbill bp->b_back = dp; 4058Sbill dp->b_forw->b_back = bp; 4068Sbill dp->b_forw = bp; 4078Sbill bp->b_dev = dev; 4088Sbill bp->b_blkno = dblkno; 409*91Sbill i = BUFHASH(blkno); 410*91Sbill bp->b_hlink = bufhash[i]; 411*91Sbill bufhash[i] = bp - buf; 4128Sbill return(bp); 4138Sbill } 4148Sbill 4158Sbill /* 4168Sbill * get an empty block, 4178Sbill * not assigned to any particular device 4188Sbill */ 4198Sbill struct buf * 4208Sbill geteblk() 4218Sbill { 422*91Sbill register struct buf *bp, *dp, *ep; 423*91Sbill register int i, x; 4248Sbill 4258Sbill loop: 4268Sbill VOID spl6(); 4278Sbill while (bfreelist.av_forw == &bfreelist) { 4288Sbill bfreelist.b_flags |= B_WANTED; 4298Sbill sleep((caddr_t)&bfreelist, PRIBIO+1); 4308Sbill } 4318Sbill VOID spl0(); 4328Sbill dp = &bfreelist; 4338Sbill bp = bfreelist.av_forw; 4348Sbill notavail(bp); 4358Sbill if (bp->b_flags & B_DELWRI) { 4368Sbill bp->b_flags |= B_ASYNC; 4378Sbill bwrite(bp); 4388Sbill goto loop; 4398Sbill } 440*91Sbill if (bp->b_dev != NODEV) 441*91Sbill bunhash(bp); 4428Sbill bp->b_flags = B_BUSY; 4438Sbill bp->b_back->b_forw = bp->b_forw; 4448Sbill bp->b_forw->b_back = bp->b_back; 4458Sbill bp->b_forw = dp->b_forw; 4468Sbill bp->b_back = dp; 4478Sbill dp->b_forw->b_back = bp; 4488Sbill dp->b_forw = bp; 4498Sbill bp->b_dev = (dev_t)NODEV; 450*91Sbill bp->b_hlink = -1; 4518Sbill return(bp); 4528Sbill } 4538Sbill 454*91Sbill bunhash(bp) 455*91Sbill register struct buf *bp; 456*91Sbill { 457*91Sbill register struct buf *ep; 458*91Sbill register int i, x; 459*91Sbill 460*91Sbill if (bp->b_dev == NODEV) 461*91Sbill return; 462*91Sbill i = BUFHASH(dbtofsb(bp->b_blkno)); 463*91Sbill x = bp - buf; 464*91Sbill if (bufhash[i] == x) { 465*91Sbill bufhash[i] = bp->b_hlink; 466*91Sbill return; 467*91Sbill } 468*91Sbill for (ep = &buf[bufhash[i]]; ep != &buf[-1]; 469*91Sbill ep = &buf[ep->b_hlink]) 470*91Sbill if (ep->b_hlink == x) { 471*91Sbill ep->b_hlink = bp->b_hlink; 472*91Sbill return; 473*91Sbill } 474*91Sbill panic("bunhash"); 475*91Sbill } 476*91Sbill 4778Sbill /* 4788Sbill * Wait for I/O completion on the buffer; return errors 4798Sbill * to the user. 4808Sbill */ 4818Sbill iowait(bp) 4828Sbill register struct buf *bp; 4838Sbill { 4848Sbill 4858Sbill VOID spl6(); 4868Sbill while ((bp->b_flags&B_DONE)==0) 4878Sbill sleep((caddr_t)bp, PRIBIO); 4888Sbill VOID spl0(); 4898Sbill geterror(bp); 4908Sbill } 4918Sbill 4928Sbill #ifndef FASTVAX 4938Sbill /* 4948Sbill * Unlink a buffer from the available list and mark it busy. 4958Sbill * (internal interface) 4968Sbill */ 4978Sbill notavail(bp) 4988Sbill register struct buf *bp; 4998Sbill { 5008Sbill register s; 5018Sbill 5028Sbill s = spl6(); 5038Sbill bp->av_back->av_forw = bp->av_forw; 5048Sbill bp->av_forw->av_back = bp->av_back; 5058Sbill bp->b_flags |= B_BUSY; 5068Sbill splx(s); 5078Sbill } 5088Sbill #endif 5098Sbill 5108Sbill /* 5118Sbill * Mark I/O complete on a buffer. If the header 5128Sbill * indicates a dirty page push completion, the 5138Sbill * header is inserted into the ``cleaned'' list 5148Sbill * to be processed by the pageout daemon. Otherwise 5158Sbill * release it if I/O is asynchronous, and wake 5168Sbill * up anyone waiting for it. 5178Sbill */ 5188Sbill iodone(bp) 5198Sbill register struct buf *bp; 5208Sbill { 5218Sbill register int s; 5228Sbill 5238Sbill bp->b_flags |= B_DONE; 5248Sbill if (bp->b_flags & B_DIRTY) { 5258Sbill if (bp->b_flags & B_ERROR) 5268Sbill panic("IO err in push"); 5278Sbill s = spl6(); 5288Sbill cnt.v_pgout++; 5298Sbill bp->av_forw = bclnlist; 5308Sbill bp->b_bcount = swsize[bp - swbuf]; 5318Sbill bp->b_pfcent = swpf[bp - swbuf]; 5328Sbill bclnlist = bp; 5338Sbill if (bswlist.b_flags & B_WANTED) 5348Sbill wakeup((caddr_t)&proc[2]); 5358Sbill splx(s); 5368Sbill } 5378Sbill if (bp->b_flags&B_ASYNC) 5388Sbill brelse(bp); 5398Sbill else { 5408Sbill bp->b_flags &= ~B_WANTED; 5418Sbill wakeup((caddr_t)bp); 5428Sbill } 5438Sbill } 5448Sbill 5458Sbill /* 5468Sbill * Zero the core associated with a buffer. 5478Sbill */ 5488Sbill clrbuf(bp) 5498Sbill struct buf *bp; 5508Sbill { 5518Sbill register *p; 5528Sbill register c; 5538Sbill 5548Sbill p = bp->b_un.b_words; 5558Sbill c = BSIZE/sizeof(int); 5568Sbill do 5578Sbill *p++ = 0; 5588Sbill while (--c); 5598Sbill bp->b_resid = 0; 5608Sbill } 5618Sbill 5628Sbill /* 5638Sbill * swap I/O - 5648Sbill * 5658Sbill * If the flag indicates a dirty page push initiated 5668Sbill * by the pageout daemon, we map the page into the i th 5678Sbill * virtual page of process 2 (the daemon itself) where i is 5688Sbill * the index of the swap header that has been allocated. 5698Sbill * We simply initialize the header and queue the I/O but 5708Sbill * do not wait for completion. When the I/O completes, 5718Sbill * iodone() will link the header to a list of cleaned 5728Sbill * pages to be processed by the pageout daemon. 5738Sbill */ 5748Sbill swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) 5758Sbill struct proc *p; 5768Sbill swblk_t dblkno; 5778Sbill caddr_t addr; 5788Sbill int flag, nbytes; 5798Sbill dev_t dev; 5808Sbill unsigned pfcent; 5818Sbill { 5828Sbill register struct buf *bp; 5838Sbill register int c; 5848Sbill int p2dp; 5858Sbill register struct pte *dpte, *vpte; 5868Sbill 5878Sbill VOID spl6(); 5888Sbill while (bswlist.av_forw == NULL) { 5898Sbill bswlist.b_flags |= B_WANTED; 5908Sbill sleep((caddr_t)&bswlist, PSWP+1); 5918Sbill } 5928Sbill bp = bswlist.av_forw; 5938Sbill bswlist.av_forw = bp->av_forw; 5948Sbill VOID spl0(); 5958Sbill 5968Sbill bp->b_flags = B_BUSY | B_PHYS | rdflg | flag; 5978Sbill if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0) 5988Sbill if (rdflg == B_READ) 5998Sbill sum.v_pswpin += btoc(nbytes); 6008Sbill else 6018Sbill sum.v_pswpout += btoc(nbytes); 6028Sbill bp->b_proc = p; 6038Sbill if (flag & B_DIRTY) { 6048Sbill p2dp = ((bp - swbuf) * CLSIZE) * KLMAX; 6058Sbill dpte = dptopte(&proc[2], p2dp); 6068Sbill vpte = vtopte(p, btop(addr)); 6078Sbill for (c = 0; c < nbytes; c += NBPG) { 6088Sbill if (vpte->pg_pfnum == 0 || vpte->pg_fod) 6098Sbill panic("swap bad pte"); 6108Sbill *dpte++ = *vpte++; 6118Sbill } 6128Sbill bp->b_un.b_addr = (caddr_t)ctob(p2dp); 6138Sbill } else 6148Sbill bp->b_un.b_addr = addr; 6158Sbill while (nbytes > 0) { 6168Sbill c = imin(ctob(120), nbytes); 6178Sbill bp->b_bcount = c; 6188Sbill bp->b_blkno = dblkno; 6198Sbill bp->b_dev = dev; 6208Sbill if (dev == swapdev) 6218Sbill bp->b_blkno += swplo; 6228Sbill (*bdevsw[major(dev)].d_strategy)(bp); 6238Sbill if (flag & B_DIRTY) { 6248Sbill if (c < nbytes) 6258Sbill panic("big push"); 6268Sbill swsize[bp - swbuf] = nbytes; 6278Sbill swpf[bp - swbuf] = pfcent; 6288Sbill return; 6298Sbill } 6308Sbill VOID spl6(); 6318Sbill while((bp->b_flags&B_DONE)==0) 6328Sbill sleep((caddr_t)bp, PSWP); 6338Sbill VOID spl0(); 6348Sbill bp->b_un.b_addr += c; 6358Sbill bp->b_flags &= ~B_DONE; 6368Sbill if (bp->b_flags & B_ERROR) { 6378Sbill if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE) 6388Sbill panic("hard IO err in swap"); 6398Sbill swkill(p, (char *)0); 6408Sbill } 6418Sbill nbytes -= c; 6428Sbill dblkno += btoc(c); 6438Sbill } 6448Sbill VOID spl6(); 6458Sbill bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 6468Sbill bp->av_forw = bswlist.av_forw; 6478Sbill bswlist.av_forw = bp; 6488Sbill if (bswlist.b_flags & B_WANTED) { 6498Sbill bswlist.b_flags &= ~B_WANTED; 6508Sbill wakeup((caddr_t)&bswlist); 6518Sbill wakeup((caddr_t)&proc[2]); 6528Sbill } 6538Sbill VOID spl0(); 6548Sbill } 6558Sbill 6568Sbill /* 6578Sbill * If rout == 0 then killed on swap error, else 6588Sbill * rout is the name of the routine where we ran out of 6598Sbill * swap space. 6608Sbill */ 6618Sbill swkill(p, rout) 6628Sbill struct proc *p; 6638Sbill char *rout; 6648Sbill { 6658Sbill 6668Sbill printf("%d: ", p->p_pid); 6678Sbill if (rout) 6688Sbill printf("out of swap space in %s\n", rout); 6698Sbill else 6708Sbill printf("killed on swap error\n"); 6718Sbill /* 6728Sbill * To be sure no looping (e.g. in vmsched trying to 6738Sbill * swap out) mark process locked in core (as though 6748Sbill * done by user) after killing it so noone will try 6758Sbill * to swap it out. 6768Sbill */ 6778Sbill psignal(p, SIGKIL); 6788Sbill p->p_flag |= SULOCK; 6798Sbill } 6808Sbill 6818Sbill /* 6828Sbill * make sure all write-behind blocks 6838Sbill * on dev (or NODEV for all) 6848Sbill * are flushed out. 6858Sbill * (from umount and update) 6868Sbill */ 6878Sbill bflush(dev) 6888Sbill dev_t dev; 6898Sbill { 6908Sbill register struct buf *bp; 6918Sbill 6928Sbill loop: 6938Sbill VOID spl6(); 6948Sbill for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) { 6958Sbill if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) { 6968Sbill bp->b_flags |= B_ASYNC; 6978Sbill notavail(bp); 6988Sbill bwrite(bp); 6998Sbill goto loop; 7008Sbill } 7018Sbill } 7028Sbill VOID spl0(); 7038Sbill } 7048Sbill 7058Sbill /* 7068Sbill * Raw I/O. The arguments are 7078Sbill * The strategy routine for the device 7088Sbill * A buffer, which will always be a special buffer 7098Sbill * header owned exclusively by the device for this purpose 7108Sbill * The device number 7118Sbill * Read/write flag 7128Sbill * Essentially all the work is computing physical addresses and 7138Sbill * validating them. 7148Sbill * If the user has the proper access privilidges, the process is 7158Sbill * marked 'delayed unlock' and the pages involved in the I/O are 7168Sbill * faulted and locked. After the completion of the I/O, the above pages 7178Sbill * are unlocked. 7188Sbill */ 7198Sbill physio(strat, bp, dev, rw, mincnt) 7208Sbill int (*strat)(); 7218Sbill register struct buf *bp; 7228Sbill unsigned (*mincnt)(); 7238Sbill { 7248Sbill register int c; 7258Sbill char *a; 7268Sbill 7278Sbill if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) { 7288Sbill u.u_error = EFAULT; 7298Sbill return; 7308Sbill } 7318Sbill VOID spl6(); 7328Sbill while (bp->b_flags&B_BUSY) { 7338Sbill bp->b_flags |= B_WANTED; 7348Sbill sleep((caddr_t)bp, PRIBIO+1); 7358Sbill } 7368Sbill bp->b_error = 0; 7378Sbill bp->b_proc = u.u_procp; 7388Sbill bp->b_un.b_addr = u.u_base; 7398Sbill while (u.u_count != 0 && bp->b_error==0) { 7408Sbill bp->b_flags = B_BUSY | B_PHYS | rw; 7418Sbill bp->b_dev = dev; 7428Sbill bp->b_blkno = u.u_offset >> PGSHIFT; 7438Sbill bp->b_bcount = u.u_count; 7448Sbill (*mincnt)(bp); 7458Sbill c = bp->b_bcount; 7468Sbill u.u_procp->p_flag |= SPHYSIO; 7478Sbill vslock(a = bp->b_un.b_addr, c); 7488Sbill (*strat)(bp); 7498Sbill VOID spl6(); 7508Sbill while ((bp->b_flags&B_DONE) == 0) 7518Sbill sleep((caddr_t)bp, PRIBIO); 7528Sbill vsunlock(a, c, rw); 7538Sbill u.u_procp->p_flag &= ~SPHYSIO; 7548Sbill if (bp->b_flags&B_WANTED) 7558Sbill wakeup((caddr_t)bp); 7568Sbill VOID spl0(); 7578Sbill bp->b_un.b_addr += c; 7588Sbill u.u_count -= c; 7598Sbill u.u_offset += c; 7608Sbill } 7618Sbill bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS); 7628Sbill u.u_count = bp->b_resid; 7638Sbill geterror(bp); 7648Sbill } 7658Sbill 7668Sbill /*ARGSUSED*/ 7678Sbill unsigned 7688Sbill minphys(bp) 7698Sbill struct buf *bp; 7708Sbill { 7718Sbill 7728Sbill if (bp->b_bcount > 60 * 1024) 7738Sbill bp->b_bcount = 60 * 1024; 7748Sbill } 7758Sbill 7768Sbill /* 7778Sbill * Pick up the device's error number and pass it to the user; 7788Sbill * if there is an error but the number is 0 set a generalized 7798Sbill * code. Actually the latter is always true because devices 7808Sbill * don't yet return specific errors. 7818Sbill */ 7828Sbill geterror(bp) 7838Sbill register struct buf *bp; 7848Sbill { 7858Sbill 7868Sbill if (bp->b_flags&B_ERROR) 7878Sbill if ((u.u_error = bp->b_error)==0) 7888Sbill u.u_error = EIO; 7898Sbill } 790