1*2299Skre /* vfs_cluster.c 4.5 01/28/81 */ 28Sbill 38Sbill #include "../h/param.h" 48Sbill #include "../h/systm.h" 58Sbill #include "../h/dir.h" 68Sbill #include "../h/user.h" 78Sbill #include "../h/buf.h" 88Sbill #include "../h/conf.h" 98Sbill #include "../h/proc.h" 108Sbill #include "../h/seg.h" 118Sbill #include "../h/pte.h" 128Sbill #include "../h/vm.h" 132045Swnj #include "../h/trace.h" 148Sbill 1591Sbill /* 1691Sbill * The following several routines allocate and free 1791Sbill * buffers with various side effects. In general the 1891Sbill * arguments to an allocate routine are a device and 1991Sbill * a block number, and the value is a pointer to 2091Sbill * to the buffer header; the buffer is marked "busy" 2191Sbill * so that no one else can touch it. If the block was 2291Sbill * already in core, no I/O need be done; if it is 2391Sbill * already busy, the process waits until it becomes free. 2491Sbill * The following routines allocate a buffer: 2591Sbill * getblk 2691Sbill * bread 2791Sbill * breada 2891Sbill * baddr (if it is incore) 2991Sbill * Eventually the buffer must be released, possibly with the 3091Sbill * side effect of writing it out, by using one of 3191Sbill * bwrite 3291Sbill * bdwrite 3391Sbill * bawrite 3491Sbill * brelse 3591Sbill */ 3691Sbill 3791Sbill #define BUFHSZ 63 3891Sbill #define BUFHASH(blkno) (blkno % BUFHSZ) 3991Sbill short bufhash[BUFHSZ]; 4091Sbill 4191Sbill /* 4291Sbill * Initialize hash links for buffers. 4391Sbill */ 4491Sbill bhinit() 4591Sbill { 4691Sbill register int i; 4791Sbill 4891Sbill for (i = 0; i < BUFHSZ; i++) 4991Sbill bufhash[i] = -1; 5091Sbill } 5191Sbill 528Sbill /* #define DISKMON 1 */ 538Sbill 548Sbill #ifdef DISKMON 558Sbill struct { 568Sbill int nbuf; 578Sbill long nread; 588Sbill long nreada; 598Sbill long ncache; 608Sbill long nwrite; 618Sbill long bufcount[NBUF]; 628Sbill } io_info; 638Sbill #endif 648Sbill 658Sbill /* 668Sbill * Swap IO headers - 678Sbill * They contain the necessary information for the swap I/O. 688Sbill * At any given time, a swap header can be in three 698Sbill * different lists. When free it is in the free list, 708Sbill * when allocated and the I/O queued, it is on the swap 718Sbill * device list, and finally, if the operation was a dirty 728Sbill * page push, when the I/O completes, it is inserted 738Sbill * in a list of cleaned pages to be processed by the pageout daemon. 748Sbill */ 758Sbill struct buf swbuf[NSWBUF]; 768Sbill short swsize[NSWBUF]; /* CAN WE JUST USE B_BCOUNT? */ 778Sbill int swpf[NSWBUF]; 788Sbill 798Sbill 808Sbill #ifdef FASTVAX 818Sbill #define notavail(bp) \ 828Sbill { \ 838Sbill int s = spl6(); \ 848Sbill (bp)->av_back->av_forw = (bp)->av_forw; \ 858Sbill (bp)->av_forw->av_back = (bp)->av_back; \ 868Sbill (bp)->b_flags |= B_BUSY; \ 878Sbill splx(s); \ 888Sbill } 898Sbill #endif 908Sbill 918Sbill /* 928Sbill * Read in (if necessary) the block and return a buffer pointer. 938Sbill */ 948Sbill struct buf * 958Sbill bread(dev, blkno) 968Sbill dev_t dev; 978Sbill daddr_t blkno; 988Sbill { 998Sbill register struct buf *bp; 1008Sbill 1018Sbill bp = getblk(dev, blkno); 1028Sbill if (bp->b_flags&B_DONE) { 1032045Swnj #ifdef EPAWNJ 1042045Swnj trace(TR_BREAD|TR_HIT, dev, blkno); 1052045Swnj #endif 1068Sbill #ifdef DISKMON 1078Sbill io_info.ncache++; 1088Sbill #endif 1098Sbill return(bp); 1108Sbill } 1118Sbill bp->b_flags |= B_READ; 1128Sbill bp->b_bcount = BSIZE; 1138Sbill (*bdevsw[major(dev)].d_strategy)(bp); 1142045Swnj #ifdef EPAWNJ 1152045Swnj trace(TR_BREAD|TR_MISS, dev, blkno); 1162045Swnj #endif 1178Sbill #ifdef DISKMON 1188Sbill io_info.nread++; 1198Sbill #endif 1208Sbill u.u_vm.vm_inblk++; /* pay for read */ 1218Sbill iowait(bp); 1228Sbill return(bp); 1238Sbill } 1248Sbill 1258Sbill /* 1268Sbill * Read in the block, like bread, but also start I/O on the 1278Sbill * read-ahead block (which is not allocated to the caller) 1288Sbill */ 1298Sbill struct buf * 1308Sbill breada(dev, blkno, rablkno) 1318Sbill dev_t dev; 1328Sbill daddr_t blkno, rablkno; 1338Sbill { 1348Sbill register struct buf *bp, *rabp; 1358Sbill 1368Sbill bp = NULL; 1378Sbill if (!incore(dev, blkno)) { 1388Sbill bp = getblk(dev, blkno); 1398Sbill if ((bp->b_flags&B_DONE) == 0) { 1408Sbill bp->b_flags |= B_READ; 1418Sbill bp->b_bcount = BSIZE; 1428Sbill (*bdevsw[major(dev)].d_strategy)(bp); 1432045Swnj #ifdef EPAWNJ 1442045Swnj trace(TR_BREAD|TR_MISS, dev, blkno); 1452045Swnj #endif 1468Sbill #ifdef DISKMON 1478Sbill io_info.nread++; 1488Sbill #endif 1498Sbill u.u_vm.vm_inblk++; /* pay for read */ 1508Sbill } 1512045Swnj #ifdef EPAWNJ 1522045Swnj else 1532045Swnj trace(TR_BREAD|TR_HIT, dev, blkno); 1542045Swnj #endif 1558Sbill } 1568Sbill if (rablkno && !incore(dev, rablkno)) { 1578Sbill rabp = getblk(dev, rablkno); 1582045Swnj if (rabp->b_flags & B_DONE) { 1598Sbill brelse(rabp); 1602045Swnj #ifdef EPAWNJ 1612045Swnj trace(TR_BREAD|TR_HIT|TR_RA, dev, blkno); 1622045Swnj #endif 1632045Swnj } else { 1648Sbill rabp->b_flags |= B_READ|B_ASYNC; 1658Sbill rabp->b_bcount = BSIZE; 1668Sbill (*bdevsw[major(dev)].d_strategy)(rabp); 1672045Swnj #ifdef EPAWNJ 1682045Swnj trace(TR_BREAD|TR_MISS|TR_RA, dev, rablock); 1692045Swnj #endif 1708Sbill #ifdef DISKMON 1718Sbill io_info.nreada++; 1728Sbill #endif 1738Sbill u.u_vm.vm_inblk++; /* pay in advance */ 1748Sbill } 1758Sbill } 1768Sbill if(bp == NULL) 1778Sbill return(bread(dev, blkno)); 1788Sbill iowait(bp); 1798Sbill return(bp); 1808Sbill } 1818Sbill 1828Sbill /* 1838Sbill * Write the buffer, waiting for completion. 1848Sbill * Then release the buffer. 1858Sbill */ 1868Sbill bwrite(bp) 1878Sbill register struct buf *bp; 1888Sbill { 1898Sbill register flag; 1908Sbill 1918Sbill flag = bp->b_flags; 1928Sbill bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 1938Sbill bp->b_bcount = BSIZE; 1948Sbill #ifdef DISKMON 1958Sbill io_info.nwrite++; 1968Sbill #endif 1978Sbill if ((flag&B_DELWRI) == 0) 1988Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 1992045Swnj #ifdef EPAWNJ 2002045Swnj trace(TR_BWRITE, bp->b_dev, dbtofsb(bp->b_blkno)); 2012045Swnj #endif 2028Sbill (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 2038Sbill if ((flag&B_ASYNC) == 0) { 2048Sbill iowait(bp); 2058Sbill brelse(bp); 2068Sbill } else if (flag & B_DELWRI) 2078Sbill bp->b_flags |= B_AGE; 2088Sbill else 2098Sbill geterror(bp); 2108Sbill } 2118Sbill 2128Sbill /* 2138Sbill * Release the buffer, marking it so that if it is grabbed 2148Sbill * for another purpose it will be written out before being 2158Sbill * given up (e.g. when writing a partial block where it is 2168Sbill * assumed that another write for the same block will soon follow). 2178Sbill * This can't be done for magtape, since writes must be done 2188Sbill * in the same order as requested. 2198Sbill */ 2208Sbill bdwrite(bp) 2218Sbill register struct buf *bp; 2228Sbill { 2238Sbill register struct buf *dp; 2248Sbill 2258Sbill if ((bp->b_flags&B_DELWRI) == 0) 2268Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 2278Sbill dp = bdevsw[major(bp->b_dev)].d_tab; 2288Sbill if(dp->b_flags & B_TAPE) 2298Sbill bawrite(bp); 2308Sbill else { 2318Sbill bp->b_flags |= B_DELWRI | B_DONE; 2328Sbill brelse(bp); 2338Sbill } 2348Sbill } 2358Sbill 2368Sbill /* 2378Sbill * Release the buffer, start I/O on it, but don't wait for completion. 2388Sbill */ 2398Sbill bawrite(bp) 2408Sbill register struct buf *bp; 2418Sbill { 2428Sbill 2438Sbill bp->b_flags |= B_ASYNC; 2448Sbill bwrite(bp); 2458Sbill } 2468Sbill 2478Sbill /* 2488Sbill * release the buffer, with no I/O implied. 2498Sbill */ 2508Sbill brelse(bp) 2518Sbill register struct buf *bp; 2528Sbill { 2538Sbill register struct buf **backp; 2548Sbill register s; 2558Sbill 2568Sbill if (bp->b_flags&B_WANTED) 2578Sbill wakeup((caddr_t)bp); 2588Sbill if (bfreelist.b_flags&B_WANTED) { 2598Sbill bfreelist.b_flags &= ~B_WANTED; 2608Sbill wakeup((caddr_t)&bfreelist); 2618Sbill } 26291Sbill if ((bp->b_flags&B_ERROR) && bp->b_dev != NODEV) { 26391Sbill bunhash(bp); 2648Sbill bp->b_dev = NODEV; /* no assoc. on error */ 26591Sbill } 2668Sbill s = spl6(); 2678Sbill if(bp->b_flags & (B_AGE|B_ERROR)) { 2688Sbill backp = &bfreelist.av_forw; 2698Sbill (*backp)->av_back = bp; 2708Sbill bp->av_forw = *backp; 2718Sbill *backp = bp; 2728Sbill bp->av_back = &bfreelist; 2738Sbill } else { 2748Sbill backp = &bfreelist.av_back; 2758Sbill (*backp)->av_forw = bp; 2768Sbill bp->av_back = *backp; 2778Sbill *backp = bp; 2788Sbill bp->av_forw = &bfreelist; 2798Sbill } 2808Sbill bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 2818Sbill splx(s); 2828Sbill } 2838Sbill 2848Sbill /* 2858Sbill * See if the block is associated with some buffer 2868Sbill * (mainly to avoid getting hung up on a wait in breada) 2878Sbill */ 2888Sbill incore(dev, blkno) 2898Sbill dev_t dev; 2908Sbill daddr_t blkno; 2918Sbill { 2928Sbill register struct buf *bp; 2938Sbill register int dblkno = fsbtodb(blkno); 2948Sbill 29591Sbill for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1]; 29691Sbill bp = &buf[bp->b_hlink]) 297*2299Skre if (bp->b_blkno == dblkno && bp->b_dev == dev 298*2299Skre && !(bp->b_flags & B_INVAL)) 29991Sbill return (1); 30091Sbill return (0); 3018Sbill } 3028Sbill 3038Sbill struct buf * 3048Sbill baddr(dev, blkno) 3058Sbill dev_t dev; 3068Sbill daddr_t blkno; 3078Sbill { 3088Sbill 3098Sbill if (incore(dev, blkno)) 3108Sbill return (bread(dev, blkno)); 3118Sbill return (0); 3128Sbill } 3138Sbill 3148Sbill /* 3158Sbill * Assign a buffer for the given block. If the appropriate 3168Sbill * block is already associated, return it; otherwise search 3178Sbill * for the oldest non-busy buffer and reassign it. 3188Sbill */ 3198Sbill struct buf * 3208Sbill getblk(dev, blkno) 3218Sbill dev_t dev; 3228Sbill daddr_t blkno; 3238Sbill { 32491Sbill register struct buf *bp, *dp, *ep; 3251831Sbill register int i, x, dblkno; 3268Sbill 3271831Sbill if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) 3281831Sbill blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 3291831Sbill dblkno = fsbtodb(blkno); 3308Sbill loop: 331124Sbill (void) spl0(); 33291Sbill for (bp = &buf[bufhash[BUFHASH(blkno)]]; bp != &buf[-1]; 33391Sbill bp = &buf[bp->b_hlink]) { 334*2299Skre if (bp->b_blkno != dblkno || bp->b_dev != dev 335*2299Skre || bp->b_flags & B_INVAL) 3368Sbill continue; 337124Sbill (void) spl6(); 3388Sbill if (bp->b_flags&B_BUSY) { 3398Sbill bp->b_flags |= B_WANTED; 3408Sbill sleep((caddr_t)bp, PRIBIO+1); 3418Sbill goto loop; 3428Sbill } 343124Sbill (void) spl0(); 3448Sbill #ifdef DISKMON 3458Sbill i = 0; 3468Sbill dp = bp->av_forw; 3478Sbill while (dp != &bfreelist) { 3488Sbill i++; 3498Sbill dp = dp->av_forw; 3508Sbill } 3518Sbill if (i<NBUF) 3528Sbill io_info.bufcount[i]++; 3538Sbill #endif 3548Sbill notavail(bp); 3558Sbill bp->b_flags |= B_CACHE; 3568Sbill return(bp); 3578Sbill } 35891Sbill if (major(dev) >= nblkdev) 35991Sbill panic("blkdev"); 36091Sbill dp = bdevsw[major(dev)].d_tab; 36191Sbill if (dp == NULL) 36291Sbill panic("devtab"); 363124Sbill (void) spl6(); 3648Sbill if (bfreelist.av_forw == &bfreelist) { 3658Sbill bfreelist.b_flags |= B_WANTED; 3668Sbill sleep((caddr_t)&bfreelist, PRIBIO+1); 3678Sbill goto loop; 3688Sbill } 3691792Sbill (void) spl0(); 3708Sbill bp = bfreelist.av_forw; 3718Sbill notavail(bp); 3728Sbill if (bp->b_flags & B_DELWRI) { 3738Sbill bp->b_flags |= B_ASYNC; 3748Sbill bwrite(bp); 3758Sbill goto loop; 3768Sbill } 37791Sbill if (bp->b_dev == NODEV) 37891Sbill goto done; 37991Sbill /* INLINE EXPANSION OF bunhash(bp) */ 3802045Swnj #ifdef EPAWNJ 3812045Swnj trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno)); 3822045Swnj #endif 383884Sbill (void) spl6(); 38491Sbill i = BUFHASH(dbtofsb(bp->b_blkno)); 38591Sbill x = bp - buf; 38691Sbill if (bufhash[i] == x) { 38791Sbill bufhash[i] = bp->b_hlink; 38891Sbill } else { 38991Sbill for (ep = &buf[bufhash[i]]; ep != &buf[-1]; 39091Sbill ep = &buf[ep->b_hlink]) 39191Sbill if (ep->b_hlink == x) { 39291Sbill ep->b_hlink = bp->b_hlink; 39391Sbill goto done; 39491Sbill } 39591Sbill panic("getblk"); 39691Sbill } 39791Sbill done: 398884Sbill (void) spl0(); 39991Sbill /* END INLINE EXPANSION */ 4008Sbill bp->b_flags = B_BUSY; 4018Sbill bp->b_back->b_forw = bp->b_forw; 4028Sbill bp->b_forw->b_back = bp->b_back; 4038Sbill bp->b_forw = dp->b_forw; 4048Sbill bp->b_back = dp; 4058Sbill dp->b_forw->b_back = bp; 4068Sbill dp->b_forw = bp; 4078Sbill bp->b_dev = dev; 4088Sbill bp->b_blkno = dblkno; 40991Sbill i = BUFHASH(blkno); 41091Sbill bp->b_hlink = bufhash[i]; 41191Sbill bufhash[i] = bp - buf; 4128Sbill return(bp); 4138Sbill } 4148Sbill 4158Sbill /* 4168Sbill * get an empty block, 4178Sbill * not assigned to any particular device 4188Sbill */ 4198Sbill struct buf * 4208Sbill geteblk() 4218Sbill { 422182Sbill register struct buf *bp, *dp; 4238Sbill 4248Sbill loop: 425124Sbill (void) spl6(); 4268Sbill while (bfreelist.av_forw == &bfreelist) { 4278Sbill bfreelist.b_flags |= B_WANTED; 4288Sbill sleep((caddr_t)&bfreelist, PRIBIO+1); 4298Sbill } 430124Sbill (void) spl0(); 4318Sbill dp = &bfreelist; 4328Sbill bp = bfreelist.av_forw; 4338Sbill notavail(bp); 4348Sbill if (bp->b_flags & B_DELWRI) { 4358Sbill bp->b_flags |= B_ASYNC; 4368Sbill bwrite(bp); 4378Sbill goto loop; 4388Sbill } 4392045Swnj if (bp->b_dev != NODEV) { 4402045Swnj #ifdef EPAWNJ 4412045Swnj trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno)); 4422045Swnj #endif 44391Sbill bunhash(bp); 4442045Swnj } 4458Sbill bp->b_flags = B_BUSY; 4468Sbill bp->b_back->b_forw = bp->b_forw; 4478Sbill bp->b_forw->b_back = bp->b_back; 4488Sbill bp->b_forw = dp->b_forw; 4498Sbill bp->b_back = dp; 4508Sbill dp->b_forw->b_back = bp; 4518Sbill dp->b_forw = bp; 4528Sbill bp->b_dev = (dev_t)NODEV; 45391Sbill bp->b_hlink = -1; 4548Sbill return(bp); 4558Sbill } 4568Sbill 45791Sbill bunhash(bp) 45891Sbill register struct buf *bp; 45991Sbill { 46091Sbill register struct buf *ep; 461884Sbill register int i, x, s; 46291Sbill 46391Sbill if (bp->b_dev == NODEV) 46491Sbill return; 465884Sbill s = spl6(); 46691Sbill i = BUFHASH(dbtofsb(bp->b_blkno)); 46791Sbill x = bp - buf; 46891Sbill if (bufhash[i] == x) { 46991Sbill bufhash[i] = bp->b_hlink; 470884Sbill goto ret; 47191Sbill } 47291Sbill for (ep = &buf[bufhash[i]]; ep != &buf[-1]; 47391Sbill ep = &buf[ep->b_hlink]) 47491Sbill if (ep->b_hlink == x) { 47591Sbill ep->b_hlink = bp->b_hlink; 476884Sbill goto ret; 47791Sbill } 47891Sbill panic("bunhash"); 479884Sbill ret: 480884Sbill splx(s); 48191Sbill } 48291Sbill 4838Sbill /* 4848Sbill * Wait for I/O completion on the buffer; return errors 4858Sbill * to the user. 4868Sbill */ 4878Sbill iowait(bp) 4888Sbill register struct buf *bp; 4898Sbill { 4908Sbill 491124Sbill (void) spl6(); 4928Sbill while ((bp->b_flags&B_DONE)==0) 4938Sbill sleep((caddr_t)bp, PRIBIO); 494124Sbill (void) spl0(); 4958Sbill geterror(bp); 4968Sbill } 4978Sbill 4988Sbill #ifndef FASTVAX 4998Sbill /* 5008Sbill * Unlink a buffer from the available list and mark it busy. 5018Sbill * (internal interface) 5028Sbill */ 5038Sbill notavail(bp) 5048Sbill register struct buf *bp; 5058Sbill { 5068Sbill register s; 5078Sbill 5088Sbill s = spl6(); 5098Sbill bp->av_back->av_forw = bp->av_forw; 5108Sbill bp->av_forw->av_back = bp->av_back; 5118Sbill bp->b_flags |= B_BUSY; 5128Sbill splx(s); 5138Sbill } 5148Sbill #endif 5158Sbill 5168Sbill /* 5178Sbill * Mark I/O complete on a buffer. If the header 5188Sbill * indicates a dirty page push completion, the 5198Sbill * header is inserted into the ``cleaned'' list 5208Sbill * to be processed by the pageout daemon. Otherwise 5218Sbill * release it if I/O is asynchronous, and wake 5228Sbill * up anyone waiting for it. 5238Sbill */ 5248Sbill iodone(bp) 5258Sbill register struct buf *bp; 5268Sbill { 5278Sbill register int s; 5288Sbill 529420Sbill if (bp->b_flags & B_DONE) 530420Sbill panic("dup iodone"); 5318Sbill bp->b_flags |= B_DONE; 5328Sbill if (bp->b_flags & B_DIRTY) { 5338Sbill if (bp->b_flags & B_ERROR) 5348Sbill panic("IO err in push"); 5358Sbill s = spl6(); 5368Sbill cnt.v_pgout++; 5378Sbill bp->av_forw = bclnlist; 5388Sbill bp->b_bcount = swsize[bp - swbuf]; 5398Sbill bp->b_pfcent = swpf[bp - swbuf]; 5408Sbill bclnlist = bp; 5418Sbill if (bswlist.b_flags & B_WANTED) 5428Sbill wakeup((caddr_t)&proc[2]); 5438Sbill splx(s); 544383Sbill return; 5458Sbill } 5468Sbill if (bp->b_flags&B_ASYNC) 5478Sbill brelse(bp); 5488Sbill else { 5498Sbill bp->b_flags &= ~B_WANTED; 5508Sbill wakeup((caddr_t)bp); 5518Sbill } 5528Sbill } 5538Sbill 5548Sbill /* 5558Sbill * Zero the core associated with a buffer. 5568Sbill */ 5578Sbill clrbuf(bp) 5588Sbill struct buf *bp; 5598Sbill { 5608Sbill register *p; 5618Sbill register c; 5628Sbill 5638Sbill p = bp->b_un.b_words; 5648Sbill c = BSIZE/sizeof(int); 5658Sbill do 5668Sbill *p++ = 0; 5678Sbill while (--c); 5688Sbill bp->b_resid = 0; 5698Sbill } 5708Sbill 5718Sbill /* 5728Sbill * swap I/O - 5738Sbill * 5748Sbill * If the flag indicates a dirty page push initiated 5758Sbill * by the pageout daemon, we map the page into the i th 5768Sbill * virtual page of process 2 (the daemon itself) where i is 5778Sbill * the index of the swap header that has been allocated. 5788Sbill * We simply initialize the header and queue the I/O but 5798Sbill * do not wait for completion. When the I/O completes, 5808Sbill * iodone() will link the header to a list of cleaned 5818Sbill * pages to be processed by the pageout daemon. 5828Sbill */ 5838Sbill swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) 5848Sbill struct proc *p; 5858Sbill swblk_t dblkno; 5868Sbill caddr_t addr; 5878Sbill int flag, nbytes; 5888Sbill dev_t dev; 5898Sbill unsigned pfcent; 5908Sbill { 5918Sbill register struct buf *bp; 5928Sbill register int c; 5938Sbill int p2dp; 5948Sbill register struct pte *dpte, *vpte; 5958Sbill 596124Sbill (void) spl6(); 5978Sbill while (bswlist.av_forw == NULL) { 5988Sbill bswlist.b_flags |= B_WANTED; 5998Sbill sleep((caddr_t)&bswlist, PSWP+1); 6008Sbill } 6018Sbill bp = bswlist.av_forw; 6028Sbill bswlist.av_forw = bp->av_forw; 603124Sbill (void) spl0(); 6048Sbill 6058Sbill bp->b_flags = B_BUSY | B_PHYS | rdflg | flag; 6068Sbill if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0) 6078Sbill if (rdflg == B_READ) 6088Sbill sum.v_pswpin += btoc(nbytes); 6098Sbill else 6108Sbill sum.v_pswpout += btoc(nbytes); 6118Sbill bp->b_proc = p; 6128Sbill if (flag & B_DIRTY) { 6138Sbill p2dp = ((bp - swbuf) * CLSIZE) * KLMAX; 6148Sbill dpte = dptopte(&proc[2], p2dp); 6158Sbill vpte = vtopte(p, btop(addr)); 6168Sbill for (c = 0; c < nbytes; c += NBPG) { 6178Sbill if (vpte->pg_pfnum == 0 || vpte->pg_fod) 6188Sbill panic("swap bad pte"); 6198Sbill *dpte++ = *vpte++; 6208Sbill } 6218Sbill bp->b_un.b_addr = (caddr_t)ctob(p2dp); 6228Sbill } else 6238Sbill bp->b_un.b_addr = addr; 6248Sbill while (nbytes > 0) { 6258Sbill c = imin(ctob(120), nbytes); 6268Sbill bp->b_bcount = c; 6278Sbill bp->b_blkno = dblkno; 6288Sbill bp->b_dev = dev; 629718Sbill if (flag & B_DIRTY) { 630718Sbill swpf[bp - swbuf] = pfcent; 631718Sbill swsize[bp - swbuf] = nbytes; 632718Sbill } 6338Sbill (*bdevsw[major(dev)].d_strategy)(bp); 6348Sbill if (flag & B_DIRTY) { 6358Sbill if (c < nbytes) 6368Sbill panic("big push"); 6378Sbill return; 6388Sbill } 639124Sbill (void) spl6(); 6408Sbill while((bp->b_flags&B_DONE)==0) 6418Sbill sleep((caddr_t)bp, PSWP); 642124Sbill (void) spl0(); 6438Sbill bp->b_un.b_addr += c; 6448Sbill bp->b_flags &= ~B_DONE; 6458Sbill if (bp->b_flags & B_ERROR) { 6468Sbill if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE) 6478Sbill panic("hard IO err in swap"); 6488Sbill swkill(p, (char *)0); 6498Sbill } 6508Sbill nbytes -= c; 6518Sbill dblkno += btoc(c); 6528Sbill } 653124Sbill (void) spl6(); 6548Sbill bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 6558Sbill bp->av_forw = bswlist.av_forw; 6568Sbill bswlist.av_forw = bp; 6578Sbill if (bswlist.b_flags & B_WANTED) { 6588Sbill bswlist.b_flags &= ~B_WANTED; 6598Sbill wakeup((caddr_t)&bswlist); 6608Sbill wakeup((caddr_t)&proc[2]); 6618Sbill } 662124Sbill (void) spl0(); 6638Sbill } 6648Sbill 6658Sbill /* 6668Sbill * If rout == 0 then killed on swap error, else 6678Sbill * rout is the name of the routine where we ran out of 6688Sbill * swap space. 6698Sbill */ 6708Sbill swkill(p, rout) 6718Sbill struct proc *p; 6728Sbill char *rout; 6738Sbill { 6748Sbill 6758Sbill printf("%d: ", p->p_pid); 6768Sbill if (rout) 6778Sbill printf("out of swap space in %s\n", rout); 6788Sbill else 6798Sbill printf("killed on swap error\n"); 6808Sbill /* 6818Sbill * To be sure no looping (e.g. in vmsched trying to 6828Sbill * swap out) mark process locked in core (as though 6838Sbill * done by user) after killing it so noone will try 6848Sbill * to swap it out. 6858Sbill */ 686165Sbill psignal(p, SIGKILL); 6878Sbill p->p_flag |= SULOCK; 6888Sbill } 6898Sbill 6908Sbill /* 6918Sbill * make sure all write-behind blocks 6928Sbill * on dev (or NODEV for all) 6938Sbill * are flushed out. 6948Sbill * (from umount and update) 6958Sbill */ 6968Sbill bflush(dev) 6978Sbill dev_t dev; 6988Sbill { 6998Sbill register struct buf *bp; 7008Sbill 7018Sbill loop: 702124Sbill (void) spl6(); 7038Sbill for (bp = bfreelist.av_forw; bp != &bfreelist; bp = bp->av_forw) { 7048Sbill if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) { 7058Sbill bp->b_flags |= B_ASYNC; 7068Sbill notavail(bp); 7078Sbill bwrite(bp); 7088Sbill goto loop; 7098Sbill } 7108Sbill } 711124Sbill (void) spl0(); 7128Sbill } 7138Sbill 7148Sbill /* 7158Sbill * Raw I/O. The arguments are 7168Sbill * The strategy routine for the device 7178Sbill * A buffer, which will always be a special buffer 7188Sbill * header owned exclusively by the device for this purpose 7198Sbill * The device number 7208Sbill * Read/write flag 7218Sbill * Essentially all the work is computing physical addresses and 7228Sbill * validating them. 7238Sbill * If the user has the proper access privilidges, the process is 7248Sbill * marked 'delayed unlock' and the pages involved in the I/O are 7258Sbill * faulted and locked. After the completion of the I/O, the above pages 7268Sbill * are unlocked. 7278Sbill */ 7288Sbill physio(strat, bp, dev, rw, mincnt) 7298Sbill int (*strat)(); 7308Sbill register struct buf *bp; 7318Sbill unsigned (*mincnt)(); 7328Sbill { 7338Sbill register int c; 7348Sbill char *a; 7358Sbill 7368Sbill if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) { 7378Sbill u.u_error = EFAULT; 7388Sbill return; 7398Sbill } 740124Sbill (void) spl6(); 7418Sbill while (bp->b_flags&B_BUSY) { 7428Sbill bp->b_flags |= B_WANTED; 7438Sbill sleep((caddr_t)bp, PRIBIO+1); 7448Sbill } 7458Sbill bp->b_error = 0; 7468Sbill bp->b_proc = u.u_procp; 7478Sbill bp->b_un.b_addr = u.u_base; 7488Sbill while (u.u_count != 0 && bp->b_error==0) { 7498Sbill bp->b_flags = B_BUSY | B_PHYS | rw; 7508Sbill bp->b_dev = dev; 7518Sbill bp->b_blkno = u.u_offset >> PGSHIFT; 7528Sbill bp->b_bcount = u.u_count; 7538Sbill (*mincnt)(bp); 7548Sbill c = bp->b_bcount; 7558Sbill u.u_procp->p_flag |= SPHYSIO; 7568Sbill vslock(a = bp->b_un.b_addr, c); 7578Sbill (*strat)(bp); 758124Sbill (void) spl6(); 7598Sbill while ((bp->b_flags&B_DONE) == 0) 7608Sbill sleep((caddr_t)bp, PRIBIO); 7618Sbill vsunlock(a, c, rw); 7628Sbill u.u_procp->p_flag &= ~SPHYSIO; 7638Sbill if (bp->b_flags&B_WANTED) 7648Sbill wakeup((caddr_t)bp); 765124Sbill (void) spl0(); 7668Sbill bp->b_un.b_addr += c; 7678Sbill u.u_count -= c; 7688Sbill u.u_offset += c; 7698Sbill } 7708Sbill bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS); 7718Sbill u.u_count = bp->b_resid; 7728Sbill geterror(bp); 7738Sbill } 7748Sbill 7758Sbill /*ARGSUSED*/ 7768Sbill unsigned 7778Sbill minphys(bp) 7788Sbill struct buf *bp; 7798Sbill { 7808Sbill 7818Sbill if (bp->b_bcount > 60 * 1024) 7828Sbill bp->b_bcount = 60 * 1024; 7838Sbill } 7848Sbill 7858Sbill /* 7868Sbill * Pick up the device's error number and pass it to the user; 7878Sbill * if there is an error but the number is 0 set a generalized 7888Sbill * code. Actually the latter is always true because devices 7898Sbill * don't yet return specific errors. 7908Sbill */ 7918Sbill geterror(bp) 7928Sbill register struct buf *bp; 7938Sbill { 7948Sbill 7958Sbill if (bp->b_flags&B_ERROR) 7968Sbill if ((u.u_error = bp->b_error)==0) 7978Sbill u.u_error = EIO; 7988Sbill } 799*2299Skre 800*2299Skre /* 801*2299Skre * Invalidate in core blocks belonging to closed or umounted filesystem 802*2299Skre * 803*2299Skre * This is not nicely done at all - the buffer ought to be removed from the 804*2299Skre * hash chains & have its dev/blkno fields clobbered, but unfortunately we 805*2299Skre * can't do that here, as it is quite possible that the block is still 806*2299Skre * being used for i/o. Eventually, all disc drivers should be forced to 807*2299Skre * have a close routine, which ought ensure that the queue is empty, then 808*2299Skre * properly flush the queues. Until that happy day, this suffices for 809*2299Skre * correctness. ... kre 810*2299Skre */ 811*2299Skre binval(dev) 812*2299Skre dev_t dev; 813*2299Skre { 814*2299Skre register struct buf *bp, *dp; 815*2299Skre 816*2299Skre dp = bdevsw[major(dev)].d_tab; 817*2299Skre 818*2299Skre for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 819*2299Skre if (bp->b_dev == dev) 820*2299Skre bp->b_flags |= B_INVAL; 821*2299Skre } 822