1*2683Swnj /* vfs_cluster.c 4.12 02/25/81 */ 28Sbill 38Sbill #include "../h/param.h" 48Sbill #include "../h/systm.h" 58Sbill #include "../h/dir.h" 68Sbill #include "../h/user.h" 78Sbill #include "../h/buf.h" 88Sbill #include "../h/conf.h" 98Sbill #include "../h/proc.h" 108Sbill #include "../h/seg.h" 118Sbill #include "../h/pte.h" 128Sbill #include "../h/vm.h" 132045Swnj #include "../h/trace.h" 148Sbill 1591Sbill /* 1691Sbill * The following several routines allocate and free 1791Sbill * buffers with various side effects. In general the 1891Sbill * arguments to an allocate routine are a device and 1991Sbill * a block number, and the value is a pointer to 2091Sbill * to the buffer header; the buffer is marked "busy" 2191Sbill * so that no one else can touch it. If the block was 2291Sbill * already in core, no I/O need be done; if it is 2391Sbill * already busy, the process waits until it becomes free. 2491Sbill * The following routines allocate a buffer: 2591Sbill * getblk 2691Sbill * bread 2791Sbill * breada 2891Sbill * baddr (if it is incore) 2991Sbill * Eventually the buffer must be released, possibly with the 3091Sbill * side effect of writing it out, by using one of 3191Sbill * bwrite 3291Sbill * bdwrite 3391Sbill * bawrite 3491Sbill * brelse 3591Sbill */ 3691Sbill 3791Sbill #define BUFHSZ 63 382325Swnj struct bufhd bufhash[BUFHSZ]; 392328Swnj #define BUFHASH(dev, dblkno) \ 402328Swnj ((struct buf *)&bufhash[((int)(dev)+(int)(dblkno)) % BUFHSZ]) 4191Sbill 4291Sbill /* 4391Sbill * Initialize hash links for buffers. 4491Sbill */ 4591Sbill bhinit() 4691Sbill { 4791Sbill register int i; 482325Swnj register struct bufhd *bp; 4991Sbill 502325Swnj for (bp = bufhash, i = 0; i < BUFHSZ; i++, bp++) 512325Swnj bp->b_forw = bp->b_back = (struct buf *)bp; 5291Sbill } 5391Sbill 548Sbill /* #define DISKMON 1 */ 558Sbill 568Sbill #ifdef DISKMON 578Sbill struct { 588Sbill int nbuf; 598Sbill long nread; 608Sbill long nreada; 618Sbill long ncache; 628Sbill long nwrite; 638Sbill long bufcount[NBUF]; 648Sbill } io_info; 658Sbill #endif 668Sbill 678Sbill /* 688Sbill * Swap IO headers - 698Sbill * They contain the necessary information for the swap I/O. 708Sbill * At any given time, a swap header can be in three 718Sbill * different lists. When free it is in the free list, 728Sbill * when allocated and the I/O queued, it is on the swap 738Sbill * device list, and finally, if the operation was a dirty 748Sbill * page push, when the I/O completes, it is inserted 758Sbill * in a list of cleaned pages to be processed by the pageout daemon. 768Sbill */ 778Sbill struct buf swbuf[NSWBUF]; 788Sbill short swsize[NSWBUF]; /* CAN WE JUST USE B_BCOUNT? */ 798Sbill int swpf[NSWBUF]; 808Sbill 818Sbill 828Sbill #ifdef FASTVAX 838Sbill #define notavail(bp) \ 848Sbill { \ 858Sbill int s = spl6(); \ 868Sbill (bp)->av_back->av_forw = (bp)->av_forw; \ 878Sbill (bp)->av_forw->av_back = (bp)->av_back; \ 888Sbill (bp)->b_flags |= B_BUSY; \ 898Sbill splx(s); \ 908Sbill } 918Sbill #endif 928Sbill 938Sbill /* 948Sbill * Read in (if necessary) the block and return a buffer pointer. 958Sbill */ 968Sbill struct buf * 978Sbill bread(dev, blkno) 988Sbill dev_t dev; 998Sbill daddr_t blkno; 1008Sbill { 1018Sbill register struct buf *bp; 1028Sbill 1038Sbill bp = getblk(dev, blkno); 1048Sbill if (bp->b_flags&B_DONE) { 1052045Swnj #ifdef EPAWNJ 1062045Swnj trace(TR_BREAD|TR_HIT, dev, blkno); 1072045Swnj #endif 1088Sbill #ifdef DISKMON 1098Sbill io_info.ncache++; 1108Sbill #endif 1118Sbill return(bp); 1128Sbill } 1138Sbill bp->b_flags |= B_READ; 1148Sbill bp->b_bcount = BSIZE; 1158Sbill (*bdevsw[major(dev)].d_strategy)(bp); 1162045Swnj #ifdef EPAWNJ 1172045Swnj trace(TR_BREAD|TR_MISS, dev, blkno); 1182045Swnj #endif 1198Sbill #ifdef DISKMON 1208Sbill io_info.nread++; 1218Sbill #endif 1228Sbill u.u_vm.vm_inblk++; /* pay for read */ 1238Sbill iowait(bp); 1248Sbill return(bp); 1258Sbill } 1268Sbill 1278Sbill /* 1288Sbill * Read in the block, like bread, but also start I/O on the 1298Sbill * read-ahead block (which is not allocated to the caller) 1308Sbill */ 1318Sbill struct buf * 1328Sbill breada(dev, blkno, rablkno) 1338Sbill dev_t dev; 1348Sbill daddr_t blkno, rablkno; 1358Sbill { 1368Sbill register struct buf *bp, *rabp; 1378Sbill 1388Sbill bp = NULL; 1398Sbill if (!incore(dev, blkno)) { 1408Sbill bp = getblk(dev, blkno); 1418Sbill if ((bp->b_flags&B_DONE) == 0) { 1428Sbill bp->b_flags |= B_READ; 1438Sbill bp->b_bcount = BSIZE; 1448Sbill (*bdevsw[major(dev)].d_strategy)(bp); 1452045Swnj #ifdef EPAWNJ 1462045Swnj trace(TR_BREAD|TR_MISS, dev, blkno); 1472045Swnj #endif 1488Sbill #ifdef DISKMON 1498Sbill io_info.nread++; 1508Sbill #endif 1518Sbill u.u_vm.vm_inblk++; /* pay for read */ 1528Sbill } 1532045Swnj #ifdef EPAWNJ 1542045Swnj else 1552045Swnj trace(TR_BREAD|TR_HIT, dev, blkno); 1562045Swnj #endif 1578Sbill } 1588Sbill if (rablkno && !incore(dev, rablkno)) { 1598Sbill rabp = getblk(dev, rablkno); 1602045Swnj if (rabp->b_flags & B_DONE) { 1618Sbill brelse(rabp); 1622045Swnj #ifdef EPAWNJ 1632045Swnj trace(TR_BREAD|TR_HIT|TR_RA, dev, blkno); 1642045Swnj #endif 1652045Swnj } else { 1668Sbill rabp->b_flags |= B_READ|B_ASYNC; 1678Sbill rabp->b_bcount = BSIZE; 1688Sbill (*bdevsw[major(dev)].d_strategy)(rabp); 1692045Swnj #ifdef EPAWNJ 1702045Swnj trace(TR_BREAD|TR_MISS|TR_RA, dev, rablock); 1712045Swnj #endif 1728Sbill #ifdef DISKMON 1738Sbill io_info.nreada++; 1748Sbill #endif 1758Sbill u.u_vm.vm_inblk++; /* pay in advance */ 1768Sbill } 1778Sbill } 1788Sbill if(bp == NULL) 1798Sbill return(bread(dev, blkno)); 1808Sbill iowait(bp); 1818Sbill return(bp); 1828Sbill } 1838Sbill 1848Sbill /* 1858Sbill * Write the buffer, waiting for completion. 1868Sbill * Then release the buffer. 1878Sbill */ 1888Sbill bwrite(bp) 1898Sbill register struct buf *bp; 1908Sbill { 1918Sbill register flag; 1928Sbill 1938Sbill flag = bp->b_flags; 1948Sbill bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI | B_AGE); 1958Sbill bp->b_bcount = BSIZE; 1968Sbill #ifdef DISKMON 1978Sbill io_info.nwrite++; 1988Sbill #endif 1998Sbill if ((flag&B_DELWRI) == 0) 2008Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 2012045Swnj #ifdef EPAWNJ 2022045Swnj trace(TR_BWRITE, bp->b_dev, dbtofsb(bp->b_blkno)); 2032045Swnj #endif 2048Sbill (*bdevsw[major(bp->b_dev)].d_strategy)(bp); 2058Sbill if ((flag&B_ASYNC) == 0) { 2068Sbill iowait(bp); 2078Sbill brelse(bp); 2088Sbill } else if (flag & B_DELWRI) 2098Sbill bp->b_flags |= B_AGE; 2108Sbill else 2118Sbill geterror(bp); 2128Sbill } 2138Sbill 2148Sbill /* 2158Sbill * Release the buffer, marking it so that if it is grabbed 2168Sbill * for another purpose it will be written out before being 2178Sbill * given up (e.g. when writing a partial block where it is 2188Sbill * assumed that another write for the same block will soon follow). 2198Sbill * This can't be done for magtape, since writes must be done 2208Sbill * in the same order as requested. 2218Sbill */ 2228Sbill bdwrite(bp) 2238Sbill register struct buf *bp; 2248Sbill { 2252403Skre register int flags; 2268Sbill 2278Sbill if ((bp->b_flags&B_DELWRI) == 0) 2288Sbill u.u_vm.vm_oublk++; /* noone paid yet */ 2292403Skre flags = bdevsw[major(bp->b_dev)].d_flags; 2302403Skre if(flags & B_TAPE) 2318Sbill bawrite(bp); 2328Sbill else { 2338Sbill bp->b_flags |= B_DELWRI | B_DONE; 2348Sbill brelse(bp); 2358Sbill } 2368Sbill } 2378Sbill 2388Sbill /* 2398Sbill * Release the buffer, start I/O on it, but don't wait for completion. 2408Sbill */ 2418Sbill bawrite(bp) 2428Sbill register struct buf *bp; 2438Sbill { 2448Sbill 2458Sbill bp->b_flags |= B_ASYNC; 2468Sbill bwrite(bp); 2478Sbill } 2488Sbill 2498Sbill /* 2508Sbill * release the buffer, with no I/O implied. 2518Sbill */ 2528Sbill brelse(bp) 2538Sbill register struct buf *bp; 2548Sbill { 2552325Swnj register struct buf *flist; 2568Sbill register s; 2578Sbill 2588Sbill if (bp->b_flags&B_WANTED) 2598Sbill wakeup((caddr_t)bp); 2602325Swnj if (bfreelist[0].b_flags&B_WANTED) { 2612325Swnj bfreelist[0].b_flags &= ~B_WANTED; 2622325Swnj wakeup((caddr_t)bfreelist); 2638Sbill } 264*2683Swnj if (bp->b_flags&B_ERROR) 265*2683Swnj if (bp->b_flags & B_LOCKED) 266*2683Swnj bp->b_flags &= ~B_ERROR; /* try again later */ 267*2683Swnj else 268*2683Swnj bp->b_dev = NODEV; /* no assoc */ 2698Sbill s = spl6(); 2702325Swnj if (bp->b_flags & (B_ERROR|B_INVAL)) { 2712325Swnj /* block has no info ... put at front of most free list */ 2722325Swnj flist = &bfreelist[BQUEUES-1]; 2732325Swnj flist->av_forw->av_back = bp; 2742325Swnj bp->av_forw = flist->av_forw; 2752325Swnj flist->av_forw = bp; 2762325Swnj bp->av_back = flist; 2778Sbill } else { 2782325Swnj if (bp->b_flags & B_LOCKED) 2792325Swnj flist = &bfreelist[BQ_LOCKED]; 2802325Swnj else if (bp->b_flags & B_AGE) 2812325Swnj flist = &bfreelist[BQ_AGE]; 2822325Swnj else 2832325Swnj flist = &bfreelist[BQ_LRU]; 2842325Swnj flist->av_back->av_forw = bp; 2852325Swnj bp->av_back = flist->av_back; 2862325Swnj flist->av_back = bp; 2872325Swnj bp->av_forw = flist; 2888Sbill } 2898Sbill bp->b_flags &= ~(B_WANTED|B_BUSY|B_ASYNC|B_AGE); 2908Sbill splx(s); 2918Sbill } 2928Sbill 2938Sbill /* 2948Sbill * See if the block is associated with some buffer 2958Sbill * (mainly to avoid getting hung up on a wait in breada) 2968Sbill */ 2978Sbill incore(dev, blkno) 2988Sbill dev_t dev; 2998Sbill daddr_t blkno; 3008Sbill { 3018Sbill register struct buf *bp; 3022325Swnj register struct buf *dp; 3038Sbill register int dblkno = fsbtodb(blkno); 3048Sbill 3052328Swnj dp = BUFHASH(dev, dblkno); 3062325Swnj for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 3072325Swnj if (bp->b_blkno == dblkno && bp->b_dev == dev && 3082325Swnj !(bp->b_flags & B_INVAL)) 30991Sbill return (1); 31091Sbill return (0); 3118Sbill } 3128Sbill 3138Sbill struct buf * 3148Sbill baddr(dev, blkno) 3158Sbill dev_t dev; 3168Sbill daddr_t blkno; 3178Sbill { 3188Sbill 3198Sbill if (incore(dev, blkno)) 3208Sbill return (bread(dev, blkno)); 3218Sbill return (0); 3228Sbill } 3238Sbill 3248Sbill /* 3258Sbill * Assign a buffer for the given block. If the appropriate 3268Sbill * block is already associated, return it; otherwise search 3278Sbill * for the oldest non-busy buffer and reassign it. 3288Sbill */ 3298Sbill struct buf * 3308Sbill getblk(dev, blkno) 3318Sbill dev_t dev; 3328Sbill daddr_t blkno; 3338Sbill { 33491Sbill register struct buf *bp, *dp, *ep; 3352325Swnj register int dblkno = fsbtodb(blkno); 3362423Skre #ifdef DISKMON 3372423Skre register int i; 3382423Skre #endif 3398Sbill 3401831Sbill if ((unsigned)blkno >= 1 << (sizeof(int)*NBBY-PGSHIFT)) 3411831Sbill blkno = 1 << ((sizeof(int)*NBBY-PGSHIFT) + 1); 3421831Sbill dblkno = fsbtodb(blkno); 3432325Swnj dp = BUFHASH(dev, dblkno); 3448Sbill loop: 345124Sbill (void) spl0(); 3462325Swnj for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) { 3472325Swnj if (bp->b_blkno != dblkno || bp->b_dev != dev || 3482325Swnj bp->b_flags&B_INVAL) 3498Sbill continue; 350124Sbill (void) spl6(); 3518Sbill if (bp->b_flags&B_BUSY) { 3528Sbill bp->b_flags |= B_WANTED; 3538Sbill sleep((caddr_t)bp, PRIBIO+1); 3548Sbill goto loop; 3558Sbill } 356124Sbill (void) spl0(); 3578Sbill #ifdef DISKMON 3588Sbill i = 0; 3598Sbill dp = bp->av_forw; 3602325Swnj while ((dp->b_flags & B_HEAD) == 0) { 3618Sbill i++; 3628Sbill dp = dp->av_forw; 3638Sbill } 3648Sbill if (i<NBUF) 3658Sbill io_info.bufcount[i]++; 3668Sbill #endif 3678Sbill notavail(bp); 3688Sbill bp->b_flags |= B_CACHE; 3698Sbill return(bp); 3708Sbill } 37191Sbill if (major(dev) >= nblkdev) 37291Sbill panic("blkdev"); 373124Sbill (void) spl6(); 3742325Swnj for (ep = &bfreelist[BQUEUES-1]; ep > bfreelist; ep--) 3752325Swnj if (ep->av_forw != ep) 3762325Swnj break; 3772325Swnj if (ep == bfreelist) { /* no free blocks at all */ 3782325Swnj ep->b_flags |= B_WANTED; 3792325Swnj sleep((caddr_t)ep, PRIBIO+1); 3808Sbill goto loop; 3818Sbill } 3821792Sbill (void) spl0(); 3832325Swnj bp = ep->av_forw; 3848Sbill notavail(bp); 3858Sbill if (bp->b_flags & B_DELWRI) { 3868Sbill bp->b_flags |= B_ASYNC; 3878Sbill bwrite(bp); 3888Sbill goto loop; 3898Sbill } 3902045Swnj #ifdef EPAWNJ 3912045Swnj trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno)); 3922045Swnj #endif 3938Sbill bp->b_flags = B_BUSY; 3948Sbill bp->b_back->b_forw = bp->b_forw; 3958Sbill bp->b_forw->b_back = bp->b_back; 3968Sbill bp->b_forw = dp->b_forw; 3978Sbill bp->b_back = dp; 3988Sbill dp->b_forw->b_back = bp; 3998Sbill dp->b_forw = bp; 4008Sbill bp->b_dev = dev; 4018Sbill bp->b_blkno = dblkno; 4028Sbill return(bp); 4038Sbill } 4048Sbill 4058Sbill /* 4068Sbill * get an empty block, 4078Sbill * not assigned to any particular device 4088Sbill */ 4098Sbill struct buf * 4108Sbill geteblk() 4118Sbill { 412182Sbill register struct buf *bp, *dp; 4138Sbill 4148Sbill loop: 415124Sbill (void) spl6(); 4162325Swnj for (dp = &bfreelist[BQUEUES-1]; dp > bfreelist; dp--) 4172325Swnj if (dp->av_forw != dp) 4182325Swnj break; 4192325Swnj if (dp == bfreelist) { /* no free blocks */ 4202325Swnj dp->b_flags |= B_WANTED; 4212325Swnj sleep((caddr_t)dp, PRIBIO+1); 4222325Swnj goto loop; 4238Sbill } 424124Sbill (void) spl0(); 4252325Swnj bp = dp->av_forw; 4268Sbill notavail(bp); 4278Sbill if (bp->b_flags & B_DELWRI) { 4288Sbill bp->b_flags |= B_ASYNC; 4298Sbill bwrite(bp); 4308Sbill goto loop; 4318Sbill } 4322045Swnj #ifdef EPAWNJ 4332325Swnj trace(TR_BRELSE, bp->b_dev, dbtofsb(bp->b_blkno)); 4342045Swnj #endif 4352325Swnj bp->b_flags = B_BUSY|B_INVAL; 4368Sbill bp->b_back->b_forw = bp->b_forw; 4378Sbill bp->b_forw->b_back = bp->b_back; 4388Sbill bp->b_forw = dp->b_forw; 4398Sbill bp->b_back = dp; 4408Sbill dp->b_forw->b_back = bp; 4418Sbill dp->b_forw = bp; 4428Sbill bp->b_dev = (dev_t)NODEV; 4438Sbill return(bp); 4448Sbill } 4458Sbill 4468Sbill /* 4478Sbill * Wait for I/O completion on the buffer; return errors 4488Sbill * to the user. 4498Sbill */ 4508Sbill iowait(bp) 4518Sbill register struct buf *bp; 4528Sbill { 4538Sbill 454124Sbill (void) spl6(); 4558Sbill while ((bp->b_flags&B_DONE)==0) 4568Sbill sleep((caddr_t)bp, PRIBIO); 457124Sbill (void) spl0(); 4588Sbill geterror(bp); 4598Sbill } 4608Sbill 4618Sbill #ifndef FASTVAX 4628Sbill /* 4638Sbill * Unlink a buffer from the available list and mark it busy. 4648Sbill * (internal interface) 4658Sbill */ 4668Sbill notavail(bp) 4678Sbill register struct buf *bp; 4688Sbill { 4698Sbill register s; 4708Sbill 4718Sbill s = spl6(); 4728Sbill bp->av_back->av_forw = bp->av_forw; 4738Sbill bp->av_forw->av_back = bp->av_back; 4748Sbill bp->b_flags |= B_BUSY; 4758Sbill splx(s); 4768Sbill } 4778Sbill #endif 4788Sbill 4798Sbill /* 4808Sbill * Mark I/O complete on a buffer. If the header 4818Sbill * indicates a dirty page push completion, the 4828Sbill * header is inserted into the ``cleaned'' list 4838Sbill * to be processed by the pageout daemon. Otherwise 4848Sbill * release it if I/O is asynchronous, and wake 4858Sbill * up anyone waiting for it. 4868Sbill */ 4878Sbill iodone(bp) 4888Sbill register struct buf *bp; 4898Sbill { 4908Sbill register int s; 4918Sbill 492420Sbill if (bp->b_flags & B_DONE) 493420Sbill panic("dup iodone"); 4948Sbill bp->b_flags |= B_DONE; 4958Sbill if (bp->b_flags & B_DIRTY) { 4968Sbill if (bp->b_flags & B_ERROR) 4978Sbill panic("IO err in push"); 4988Sbill s = spl6(); 4998Sbill cnt.v_pgout++; 5008Sbill bp->av_forw = bclnlist; 5018Sbill bp->b_bcount = swsize[bp - swbuf]; 5028Sbill bp->b_pfcent = swpf[bp - swbuf]; 5038Sbill bclnlist = bp; 5048Sbill if (bswlist.b_flags & B_WANTED) 5058Sbill wakeup((caddr_t)&proc[2]); 5068Sbill splx(s); 507383Sbill return; 5088Sbill } 5098Sbill if (bp->b_flags&B_ASYNC) 5108Sbill brelse(bp); 5118Sbill else { 5128Sbill bp->b_flags &= ~B_WANTED; 5138Sbill wakeup((caddr_t)bp); 5148Sbill } 5158Sbill } 5168Sbill 5178Sbill /* 5188Sbill * Zero the core associated with a buffer. 5198Sbill */ 5208Sbill clrbuf(bp) 5218Sbill struct buf *bp; 5228Sbill { 5238Sbill register *p; 5248Sbill register c; 5258Sbill 5268Sbill p = bp->b_un.b_words; 5278Sbill c = BSIZE/sizeof(int); 5288Sbill do 5298Sbill *p++ = 0; 5308Sbill while (--c); 5318Sbill bp->b_resid = 0; 5328Sbill } 5338Sbill 5348Sbill /* 5358Sbill * swap I/O - 5368Sbill * 5378Sbill * If the flag indicates a dirty page push initiated 5388Sbill * by the pageout daemon, we map the page into the i th 5398Sbill * virtual page of process 2 (the daemon itself) where i is 5408Sbill * the index of the swap header that has been allocated. 5418Sbill * We simply initialize the header and queue the I/O but 5428Sbill * do not wait for completion. When the I/O completes, 5438Sbill * iodone() will link the header to a list of cleaned 5448Sbill * pages to be processed by the pageout daemon. 5458Sbill */ 5468Sbill swap(p, dblkno, addr, nbytes, rdflg, flag, dev, pfcent) 5478Sbill struct proc *p; 5488Sbill swblk_t dblkno; 5498Sbill caddr_t addr; 5508Sbill int flag, nbytes; 5518Sbill dev_t dev; 5528Sbill unsigned pfcent; 5538Sbill { 5548Sbill register struct buf *bp; 5558Sbill register int c; 5568Sbill int p2dp; 5578Sbill register struct pte *dpte, *vpte; 5588Sbill 559124Sbill (void) spl6(); 5608Sbill while (bswlist.av_forw == NULL) { 5618Sbill bswlist.b_flags |= B_WANTED; 5628Sbill sleep((caddr_t)&bswlist, PSWP+1); 5638Sbill } 5648Sbill bp = bswlist.av_forw; 5658Sbill bswlist.av_forw = bp->av_forw; 566124Sbill (void) spl0(); 5678Sbill 5688Sbill bp->b_flags = B_BUSY | B_PHYS | rdflg | flag; 5698Sbill if ((bp->b_flags & (B_DIRTY|B_PGIN)) == 0) 5708Sbill if (rdflg == B_READ) 5718Sbill sum.v_pswpin += btoc(nbytes); 5728Sbill else 5738Sbill sum.v_pswpout += btoc(nbytes); 5748Sbill bp->b_proc = p; 5758Sbill if (flag & B_DIRTY) { 5768Sbill p2dp = ((bp - swbuf) * CLSIZE) * KLMAX; 5778Sbill dpte = dptopte(&proc[2], p2dp); 5788Sbill vpte = vtopte(p, btop(addr)); 5798Sbill for (c = 0; c < nbytes; c += NBPG) { 5808Sbill if (vpte->pg_pfnum == 0 || vpte->pg_fod) 5818Sbill panic("swap bad pte"); 5828Sbill *dpte++ = *vpte++; 5838Sbill } 5848Sbill bp->b_un.b_addr = (caddr_t)ctob(p2dp); 5858Sbill } else 5868Sbill bp->b_un.b_addr = addr; 5878Sbill while (nbytes > 0) { 5888Sbill c = imin(ctob(120), nbytes); 5898Sbill bp->b_bcount = c; 5908Sbill bp->b_blkno = dblkno; 5918Sbill bp->b_dev = dev; 592718Sbill if (flag & B_DIRTY) { 593718Sbill swpf[bp - swbuf] = pfcent; 594718Sbill swsize[bp - swbuf] = nbytes; 595718Sbill } 5968Sbill (*bdevsw[major(dev)].d_strategy)(bp); 5978Sbill if (flag & B_DIRTY) { 5988Sbill if (c < nbytes) 5998Sbill panic("big push"); 6008Sbill return; 6018Sbill } 602124Sbill (void) spl6(); 6038Sbill while((bp->b_flags&B_DONE)==0) 6048Sbill sleep((caddr_t)bp, PSWP); 605124Sbill (void) spl0(); 6068Sbill bp->b_un.b_addr += c; 6078Sbill bp->b_flags &= ~B_DONE; 6088Sbill if (bp->b_flags & B_ERROR) { 6098Sbill if ((flag & (B_UAREA|B_PAGET)) || rdflg == B_WRITE) 6108Sbill panic("hard IO err in swap"); 6118Sbill swkill(p, (char *)0); 6128Sbill } 6138Sbill nbytes -= c; 6148Sbill dblkno += btoc(c); 6158Sbill } 616124Sbill (void) spl6(); 6178Sbill bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 6188Sbill bp->av_forw = bswlist.av_forw; 6198Sbill bswlist.av_forw = bp; 6208Sbill if (bswlist.b_flags & B_WANTED) { 6218Sbill bswlist.b_flags &= ~B_WANTED; 6228Sbill wakeup((caddr_t)&bswlist); 6238Sbill wakeup((caddr_t)&proc[2]); 6248Sbill } 625124Sbill (void) spl0(); 6268Sbill } 6278Sbill 6288Sbill /* 6298Sbill * If rout == 0 then killed on swap error, else 6308Sbill * rout is the name of the routine where we ran out of 6318Sbill * swap space. 6328Sbill */ 6338Sbill swkill(p, rout) 6348Sbill struct proc *p; 6358Sbill char *rout; 6368Sbill { 6378Sbill 6388Sbill printf("%d: ", p->p_pid); 6398Sbill if (rout) 6408Sbill printf("out of swap space in %s\n", rout); 6418Sbill else 6428Sbill printf("killed on swap error\n"); 6438Sbill /* 6448Sbill * To be sure no looping (e.g. in vmsched trying to 6458Sbill * swap out) mark process locked in core (as though 6468Sbill * done by user) after killing it so noone will try 6478Sbill * to swap it out. 6488Sbill */ 649165Sbill psignal(p, SIGKILL); 6508Sbill p->p_flag |= SULOCK; 6518Sbill } 6528Sbill 6538Sbill /* 6548Sbill * make sure all write-behind blocks 6558Sbill * on dev (or NODEV for all) 6568Sbill * are flushed out. 6578Sbill * (from umount and update) 6588Sbill */ 6598Sbill bflush(dev) 6608Sbill dev_t dev; 6618Sbill { 6628Sbill register struct buf *bp; 6632325Swnj register struct buf *flist; 6648Sbill 6658Sbill loop: 666124Sbill (void) spl6(); 6672325Swnj for (flist = bfreelist; flist < &bfreelist[BQUEUES]; flist++) 6682325Swnj for (bp = flist->av_forw; bp != flist; bp = bp->av_forw) { 6698Sbill if (bp->b_flags&B_DELWRI && (dev == NODEV||dev==bp->b_dev)) { 6708Sbill bp->b_flags |= B_ASYNC; 6718Sbill notavail(bp); 6728Sbill bwrite(bp); 6738Sbill goto loop; 6748Sbill } 6758Sbill } 676124Sbill (void) spl0(); 6778Sbill } 6788Sbill 6798Sbill /* 6808Sbill * Raw I/O. The arguments are 6818Sbill * The strategy routine for the device 6828Sbill * A buffer, which will always be a special buffer 6838Sbill * header owned exclusively by the device for this purpose 6848Sbill * The device number 6858Sbill * Read/write flag 6868Sbill * Essentially all the work is computing physical addresses and 6878Sbill * validating them. 6888Sbill * If the user has the proper access privilidges, the process is 6898Sbill * marked 'delayed unlock' and the pages involved in the I/O are 6908Sbill * faulted and locked. After the completion of the I/O, the above pages 6918Sbill * are unlocked. 6928Sbill */ 6938Sbill physio(strat, bp, dev, rw, mincnt) 6948Sbill int (*strat)(); 6958Sbill register struct buf *bp; 6968Sbill unsigned (*mincnt)(); 6978Sbill { 6988Sbill register int c; 6998Sbill char *a; 7008Sbill 7018Sbill if (useracc(u.u_base,u.u_count,rw==B_READ?B_WRITE:B_READ) == NULL) { 7028Sbill u.u_error = EFAULT; 7038Sbill return; 7048Sbill } 705124Sbill (void) spl6(); 7068Sbill while (bp->b_flags&B_BUSY) { 7078Sbill bp->b_flags |= B_WANTED; 7088Sbill sleep((caddr_t)bp, PRIBIO+1); 7098Sbill } 7108Sbill bp->b_error = 0; 7118Sbill bp->b_proc = u.u_procp; 7128Sbill bp->b_un.b_addr = u.u_base; 7138Sbill while (u.u_count != 0 && bp->b_error==0) { 7148Sbill bp->b_flags = B_BUSY | B_PHYS | rw; 7158Sbill bp->b_dev = dev; 7168Sbill bp->b_blkno = u.u_offset >> PGSHIFT; 7178Sbill bp->b_bcount = u.u_count; 7188Sbill (*mincnt)(bp); 7198Sbill c = bp->b_bcount; 7208Sbill u.u_procp->p_flag |= SPHYSIO; 7218Sbill vslock(a = bp->b_un.b_addr, c); 7228Sbill (*strat)(bp); 723124Sbill (void) spl6(); 7248Sbill while ((bp->b_flags&B_DONE) == 0) 7258Sbill sleep((caddr_t)bp, PRIBIO); 7268Sbill vsunlock(a, c, rw); 7278Sbill u.u_procp->p_flag &= ~SPHYSIO; 7288Sbill if (bp->b_flags&B_WANTED) 7298Sbill wakeup((caddr_t)bp); 730124Sbill (void) spl0(); 7318Sbill bp->b_un.b_addr += c; 7328Sbill u.u_count -= c; 7338Sbill u.u_offset += c; 7348Sbill } 7358Sbill bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS); 7368Sbill u.u_count = bp->b_resid; 7378Sbill geterror(bp); 7388Sbill } 7398Sbill 7408Sbill /*ARGSUSED*/ 7418Sbill unsigned 7428Sbill minphys(bp) 7438Sbill struct buf *bp; 7448Sbill { 7458Sbill 7468Sbill if (bp->b_bcount > 60 * 1024) 7478Sbill bp->b_bcount = 60 * 1024; 7488Sbill } 7498Sbill 7508Sbill /* 7518Sbill * Pick up the device's error number and pass it to the user; 7528Sbill * if there is an error but the number is 0 set a generalized 7538Sbill * code. Actually the latter is always true because devices 7548Sbill * don't yet return specific errors. 7558Sbill */ 7568Sbill geterror(bp) 7578Sbill register struct buf *bp; 7588Sbill { 7598Sbill 7608Sbill if (bp->b_flags&B_ERROR) 7618Sbill if ((u.u_error = bp->b_error)==0) 7628Sbill u.u_error = EIO; 7638Sbill } 7642299Skre 7652299Skre /* 7662299Skre * Invalidate in core blocks belonging to closed or umounted filesystem 7672299Skre * 7682299Skre * This is not nicely done at all - the buffer ought to be removed from the 7692299Skre * hash chains & have its dev/blkno fields clobbered, but unfortunately we 7702299Skre * can't do that here, as it is quite possible that the block is still 7712299Skre * being used for i/o. Eventually, all disc drivers should be forced to 7722299Skre * have a close routine, which ought ensure that the queue is empty, then 7732299Skre * properly flush the queues. Until that happy day, this suffices for 7742299Skre * correctness. ... kre 7752299Skre */ 7762299Skre binval(dev) 7772299Skre dev_t dev; 7782299Skre { 7792361Skre register struct buf *bp; 7802361Skre register struct bufhd *hp; 7812361Skre #define dp ((struct buf *)hp) 7822299Skre 7832361Skre for (hp = bufhash; hp < &bufhash[BUFHSZ]; hp++) 7842361Skre for (bp = dp->b_forw; bp != dp; bp = bp->b_forw) 7852361Skre if (bp->b_dev == dev) 7862361Skre bp->b_flags |= B_INVAL; 7872299Skre } 788