145749Smckusick /* 245749Smckusick * Copyright (c) 1990 University of Utah. 363379Sbostic * Copyright (c) 1991, 1993 463379Sbostic * The Regents of the University of California. All rights reserved. 545749Smckusick * 645749Smckusick * This code is derived from software contributed to Berkeley by 745749Smckusick * the Systems Programming Group of the University of Utah Computer 845749Smckusick * Science Department. 945749Smckusick * 1045749Smckusick * %sccs.include.redist.c% 1145749Smckusick * 1249289Shibler * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$ 1349289Shibler * 14*64827Storek * @(#)swap_pager.c 8.3 (Berkeley) 11/10/93 1545749Smckusick */ 1645749Smckusick 1745749Smckusick /* 1845749Smckusick * Quick hack to page to dedicated partition(s). 1945749Smckusick * TODO: 2045749Smckusick * Add multiprocessor locks 2145749Smckusick * Deal with async writes in a better fashion 2245749Smckusick */ 2345749Smckusick 2453341Sbostic #include <sys/param.h> 2553341Sbostic #include <sys/systm.h> 2653341Sbostic #include <sys/proc.h> 2753341Sbostic #include <sys/buf.h> 2853341Sbostic #include <sys/map.h> 2953496Sheideman #include <sys/vnode.h> 3053341Sbostic #include <sys/malloc.h> 3145749Smckusick 3255051Spendry #include <miscfs/specfs/specdev.h> 3355051Spendry 3453341Sbostic #include <vm/vm.h> 3553341Sbostic #include <vm/vm_page.h> 3653341Sbostic #include <vm/vm_pageout.h> 3753341Sbostic #include <vm/swap_pager.h> 3845749Smckusick 3945749Smckusick #define NSWSIZES 16 /* size of swtab */ 4045749Smckusick #define NPENDINGIO 64 /* max # of pending cleans */ 4145749Smckusick #define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */ 4245749Smckusick 4345749Smckusick #ifdef DEBUG 4445749Smckusick int swpagerdebug = 0x100; 4545749Smckusick #define SDB_FOLLOW 0x001 4645749Smckusick #define SDB_INIT 0x002 4745749Smckusick #define SDB_ALLOC 0x004 4845749Smckusick #define SDB_IO 0x008 4945749Smckusick #define SDB_WRITE 0x010 5045749Smckusick #define SDB_FAIL 0x020 5145749Smckusick #define SDB_ALLOCBLK 0x040 5245749Smckusick #define SDB_FULL 0x080 5345749Smckusick #define SDB_ANOM 0x100 5445749Smckusick #define SDB_ANOMPANIC 0x200 5545749Smckusick #endif 5645749Smckusick 5745749Smckusick struct swpagerclean { 5845749Smckusick queue_head_t spc_list; 5945749Smckusick int spc_flags; 6045749Smckusick struct buf *spc_bp; 6145749Smckusick sw_pager_t spc_swp; 6245749Smckusick vm_offset_t spc_kva; 6345749Smckusick vm_page_t spc_m; 6445749Smckusick } swcleanlist[NPENDINGIO]; 6553341Sbostic typedef struct swpagerclean *swp_clean_t; 6645749Smckusick 6753341Sbostic 6845749Smckusick /* spc_flags values */ 6945749Smckusick #define SPC_FREE 0x00 7045749Smckusick #define SPC_BUSY 0x01 7145749Smckusick #define SPC_DONE 0x02 7245749Smckusick #define SPC_ERROR 0x04 7345749Smckusick #define SPC_DIRTY 0x08 7445749Smckusick 7545749Smckusick struct swtab { 7645749Smckusick vm_size_t st_osize; /* size of object (bytes) */ 7745749Smckusick int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */ 7845749Smckusick #ifdef DEBUG 7945749Smckusick u_long st_inuse; /* number in this range in use */ 8045749Smckusick u_long st_usecnt; /* total used of this size */ 8145749Smckusick #endif 8245749Smckusick } swtab[NSWSIZES+1]; 8345749Smckusick 8445749Smckusick #ifdef DEBUG 8545749Smckusick int swap_pager_pendingio; /* max pending async "clean" ops */ 8645749Smckusick int swap_pager_poip; /* pageouts in progress */ 8745749Smckusick int swap_pager_piip; /* pageins in progress */ 8845749Smckusick #endif 8945749Smckusick 9045749Smckusick queue_head_t swap_pager_inuse; /* list of pending page cleans */ 9145749Smckusick queue_head_t swap_pager_free; /* list of free pager clean structs */ 9245749Smckusick queue_head_t swap_pager_list; /* list of "named" anon regions */ 9345749Smckusick 9453341Sbostic static int swap_pager_finish __P((swp_clean_t)); 9553341Sbostic static void swap_pager_init __P((void)); 96*64827Storek static vm_pager_t swap_pager_alloc 97*64827Storek __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t)); 9853341Sbostic static boolean_t swap_pager_clean __P((vm_page_t, int)); 9953341Sbostic static void swap_pager_dealloc __P((vm_pager_t)); 10053341Sbostic static int swap_pager_getpage 10153341Sbostic __P((vm_pager_t, vm_page_t, boolean_t)); 10253341Sbostic static boolean_t swap_pager_haspage __P((vm_pager_t, vm_offset_t)); 10353341Sbostic static int swap_pager_io __P((sw_pager_t, vm_page_t, int)); 10453341Sbostic static void swap_pager_iodone __P((struct buf *)); 10553341Sbostic static int swap_pager_putpage 10653341Sbostic __P((vm_pager_t, vm_page_t, boolean_t)); 10753341Sbostic 10853341Sbostic struct pagerops swappagerops = { 10953341Sbostic swap_pager_init, 11053341Sbostic swap_pager_alloc, 11153341Sbostic swap_pager_dealloc, 11253341Sbostic swap_pager_getpage, 11353341Sbostic swap_pager_putpage, 11453341Sbostic swap_pager_haspage 11553341Sbostic }; 11653341Sbostic 11753341Sbostic static void 11845749Smckusick swap_pager_init() 11945749Smckusick { 12045749Smckusick register swp_clean_t spc; 12145749Smckusick register int i, bsize; 12245749Smckusick extern int dmmin, dmmax; 12345749Smckusick int maxbsize; 12445749Smckusick 12545749Smckusick #ifdef DEBUG 12645749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_INIT)) 12745749Smckusick printf("swpg_init()\n"); 12845749Smckusick #endif 12945749Smckusick dfltpagerops = &swappagerops; 13045749Smckusick queue_init(&swap_pager_list); 13145749Smckusick 13245749Smckusick /* 13345749Smckusick * Initialize clean lists 13445749Smckusick */ 13545749Smckusick queue_init(&swap_pager_inuse); 13645749Smckusick queue_init(&swap_pager_free); 13745749Smckusick for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) { 13845749Smckusick queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); 13945749Smckusick spc->spc_flags = SPC_FREE; 14045749Smckusick } 14145749Smckusick 14245749Smckusick /* 14345749Smckusick * Calculate the swap allocation constants. 14445749Smckusick */ 14545749Smckusick if (dmmin == 0) { 14645749Smckusick dmmin = DMMIN; 14745749Smckusick if (dmmin < CLBYTES/DEV_BSIZE) 14845749Smckusick dmmin = CLBYTES/DEV_BSIZE; 14945749Smckusick } 15045749Smckusick if (dmmax == 0) 15145749Smckusick dmmax = DMMAX; 15245749Smckusick 15345749Smckusick /* 15445749Smckusick * Fill in our table of object size vs. allocation size 15545749Smckusick */ 15645749Smckusick bsize = btodb(PAGE_SIZE); 15745749Smckusick if (bsize < dmmin) 15845749Smckusick bsize = dmmin; 15945749Smckusick maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE); 16045749Smckusick if (maxbsize > dmmax) 16145749Smckusick maxbsize = dmmax; 16245749Smckusick for (i = 0; i < NSWSIZES; i++) { 16345749Smckusick swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize)); 16445749Smckusick swtab[i].st_bsize = bsize; 16545749Smckusick #ifdef DEBUG 16645749Smckusick if (swpagerdebug & SDB_INIT) 16745749Smckusick printf("swpg_init: ix %d, size %x, bsize %x\n", 16845749Smckusick i, swtab[i].st_osize, swtab[i].st_bsize); 16945749Smckusick #endif 17045749Smckusick if (bsize >= maxbsize) 17145749Smckusick break; 17245749Smckusick bsize *= 2; 17345749Smckusick } 17445749Smckusick swtab[i].st_osize = 0; 17545749Smckusick swtab[i].st_bsize = bsize; 17645749Smckusick } 17745749Smckusick 17845749Smckusick /* 17945749Smckusick * Allocate a pager structure and associated resources. 18045749Smckusick * Note that if we are called from the pageout daemon (handle == NULL) 18145749Smckusick * we should not wait for memory as it could resulting in deadlock. 18245749Smckusick */ 18353341Sbostic static vm_pager_t 184*64827Storek swap_pager_alloc(handle, size, prot, foff) 18545749Smckusick caddr_t handle; 18645749Smckusick register vm_size_t size; 18745749Smckusick vm_prot_t prot; 188*64827Storek vm_offset_t foff; 18945749Smckusick { 19045749Smckusick register vm_pager_t pager; 19145749Smckusick register sw_pager_t swp; 19245749Smckusick struct swtab *swt; 19345749Smckusick int waitok; 19445749Smckusick 19545749Smckusick #ifdef DEBUG 19645749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) 19745749Smckusick printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot); 19845749Smckusick #endif 19945749Smckusick /* 20045749Smckusick * If this is a "named" anonymous region, look it up and 20145749Smckusick * return the appropriate pager if it exists. 20245749Smckusick */ 20345749Smckusick if (handle) { 20445749Smckusick pager = vm_pager_lookup(&swap_pager_list, handle); 20548386Skarels if (pager != NULL) { 20645749Smckusick /* 20745749Smckusick * Use vm_object_lookup to gain a reference 20845749Smckusick * to the object and also to remove from the 20945749Smckusick * object cache. 21045749Smckusick */ 21148386Skarels if (vm_object_lookup(pager) == NULL) 21245749Smckusick panic("swap_pager_alloc: bad object"); 21345749Smckusick return(pager); 21445749Smckusick } 21545749Smckusick } 21645749Smckusick /* 21745749Smckusick * Pager doesn't exist, allocate swap management resources 21845749Smckusick * and initialize. 21945749Smckusick */ 22045749Smckusick waitok = handle ? M_WAITOK : M_NOWAIT; 22145749Smckusick pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 22248386Skarels if (pager == NULL) 22348386Skarels return(NULL); 22445749Smckusick swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 22545749Smckusick if (swp == NULL) { 22645749Smckusick #ifdef DEBUG 22745749Smckusick if (swpagerdebug & SDB_FAIL) 22845749Smckusick printf("swpg_alloc: swpager malloc failed\n"); 22945749Smckusick #endif 23045749Smckusick free((caddr_t)pager, M_VMPAGER); 23148386Skarels return(NULL); 23245749Smckusick } 23345749Smckusick size = round_page(size); 23445749Smckusick for (swt = swtab; swt->st_osize; swt++) 23545749Smckusick if (size <= swt->st_osize) 23645749Smckusick break; 23745749Smckusick #ifdef DEBUG 23845749Smckusick swt->st_inuse++; 23945749Smckusick swt->st_usecnt++; 24045749Smckusick #endif 24145749Smckusick swp->sw_osize = size; 24245749Smckusick swp->sw_bsize = swt->st_bsize; 24345749Smckusick swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize; 24445749Smckusick swp->sw_blocks = (sw_blk_t) 24545749Smckusick malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 24645749Smckusick M_VMPGDATA, M_NOWAIT); 24745749Smckusick if (swp->sw_blocks == NULL) { 24845749Smckusick free((caddr_t)swp, M_VMPGDATA); 24945749Smckusick free((caddr_t)pager, M_VMPAGER); 25045749Smckusick #ifdef DEBUG 25145749Smckusick if (swpagerdebug & SDB_FAIL) 25245749Smckusick printf("swpg_alloc: sw_blocks malloc failed\n"); 25345749Smckusick swt->st_inuse--; 25445749Smckusick swt->st_usecnt--; 25545749Smckusick #endif 25645749Smckusick return(FALSE); 25745749Smckusick } 25845749Smckusick bzero((caddr_t)swp->sw_blocks, 25945749Smckusick swp->sw_nblocks * sizeof(*swp->sw_blocks)); 26045749Smckusick swp->sw_poip = 0; 26145749Smckusick if (handle) { 26245749Smckusick vm_object_t object; 26345749Smckusick 26445749Smckusick swp->sw_flags = SW_NAMED; 26545749Smckusick queue_enter(&swap_pager_list, pager, vm_pager_t, pg_list); 26645749Smckusick /* 26745749Smckusick * Consistant with other pagers: return with object 26845749Smckusick * referenced. Can't do this with handle == NULL 26945749Smckusick * since it might be the pageout daemon calling. 27045749Smckusick */ 27145749Smckusick object = vm_object_allocate(size); 27245749Smckusick vm_object_enter(object, pager); 27345749Smckusick vm_object_setpager(object, pager, 0, FALSE); 27445749Smckusick } else { 27545749Smckusick swp->sw_flags = 0; 27645749Smckusick queue_init(&pager->pg_list); 27745749Smckusick } 27845749Smckusick pager->pg_handle = handle; 27945749Smckusick pager->pg_ops = &swappagerops; 28045749Smckusick pager->pg_type = PG_SWAP; 28145749Smckusick pager->pg_data = (caddr_t)swp; 28245749Smckusick 28345749Smckusick #ifdef DEBUG 28445749Smckusick if (swpagerdebug & SDB_ALLOC) 28545749Smckusick printf("swpg_alloc: pg_data %x, %x of %x at %x\n", 28645749Smckusick swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks); 28745749Smckusick #endif 28845749Smckusick return(pager); 28945749Smckusick } 29045749Smckusick 29153341Sbostic static void 29245749Smckusick swap_pager_dealloc(pager) 29345749Smckusick vm_pager_t pager; 29445749Smckusick { 29545749Smckusick register int i; 29645749Smckusick register sw_blk_t bp; 29745749Smckusick register sw_pager_t swp; 29845749Smckusick struct swtab *swt; 29945749Smckusick int s; 30045749Smckusick 30145749Smckusick #ifdef DEBUG 30245749Smckusick /* save panic time state */ 30345749Smckusick if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 30445749Smckusick return; 30545749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) 30645749Smckusick printf("swpg_dealloc(%x)\n", pager); 30745749Smckusick #endif 30845749Smckusick /* 30945749Smckusick * Remove from list right away so lookups will fail if we 31045749Smckusick * block for pageout completion. 31145749Smckusick */ 31245749Smckusick swp = (sw_pager_t) pager->pg_data; 31345749Smckusick if (swp->sw_flags & SW_NAMED) { 31445749Smckusick queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list); 31545749Smckusick swp->sw_flags &= ~SW_NAMED; 31645749Smckusick } 31745749Smckusick #ifdef DEBUG 31845749Smckusick for (swt = swtab; swt->st_osize; swt++) 31945749Smckusick if (swp->sw_osize <= swt->st_osize) 32045749Smckusick break; 32145749Smckusick swt->st_inuse--; 32245749Smckusick #endif 32345749Smckusick 32445749Smckusick /* 32545749Smckusick * Wait for all pageouts to finish and remove 32645749Smckusick * all entries from cleaning list. 32745749Smckusick */ 32845749Smckusick s = splbio(); 32945749Smckusick while (swp->sw_poip) { 33045749Smckusick swp->sw_flags |= SW_WANTED; 33153341Sbostic assert_wait((int)swp, 0); 33245749Smckusick thread_block(); 33345749Smckusick } 33445749Smckusick splx(s); 33548386Skarels (void) swap_pager_clean(NULL, B_WRITE); 33645749Smckusick 33745749Smckusick /* 33845749Smckusick * Free left over swap blocks 33945749Smckusick */ 34045749Smckusick for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) 34145749Smckusick if (bp->swb_block) { 34245749Smckusick #ifdef DEBUG 34345749Smckusick if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL)) 34445749Smckusick printf("swpg_dealloc: blk %x\n", 34545749Smckusick bp->swb_block); 34645749Smckusick #endif 34745749Smckusick rmfree(swapmap, swp->sw_bsize, bp->swb_block); 34845749Smckusick } 34945749Smckusick /* 35045749Smckusick * Free swap management resources 35145749Smckusick */ 35245749Smckusick free((caddr_t)swp->sw_blocks, M_VMPGDATA); 35345749Smckusick free((caddr_t)swp, M_VMPGDATA); 35445749Smckusick free((caddr_t)pager, M_VMPAGER); 35545749Smckusick } 35645749Smckusick 35753341Sbostic static int 35845749Smckusick swap_pager_getpage(pager, m, sync) 35945749Smckusick vm_pager_t pager; 36045749Smckusick vm_page_t m; 36145749Smckusick boolean_t sync; 36245749Smckusick { 36345749Smckusick #ifdef DEBUG 36445749Smckusick if (swpagerdebug & SDB_FOLLOW) 36545749Smckusick printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync); 36645749Smckusick #endif 36745749Smckusick return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ)); 36845749Smckusick } 36945749Smckusick 37053341Sbostic static int 37145749Smckusick swap_pager_putpage(pager, m, sync) 37245749Smckusick vm_pager_t pager; 37345749Smckusick vm_page_t m; 37445749Smckusick boolean_t sync; 37545749Smckusick { 37645749Smckusick int flags; 37745749Smckusick 37845749Smckusick #ifdef DEBUG 37945749Smckusick if (swpagerdebug & SDB_FOLLOW) 38045749Smckusick printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync); 38145749Smckusick #endif 38248386Skarels if (pager == NULL) { 38348386Skarels (void) swap_pager_clean(NULL, B_WRITE); 38454817Storek return (VM_PAGER_OK); /* ??? */ 38545749Smckusick } 38645749Smckusick flags = B_WRITE; 38745749Smckusick if (!sync) 38845749Smckusick flags |= B_ASYNC; 38945749Smckusick return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags)); 39045749Smckusick } 39145749Smckusick 39253341Sbostic static boolean_t 39345749Smckusick swap_pager_haspage(pager, offset) 39445749Smckusick vm_pager_t pager; 39545749Smckusick vm_offset_t offset; 39645749Smckusick { 39745749Smckusick register sw_pager_t swp; 39845749Smckusick register sw_blk_t swb; 39945749Smckusick int ix; 40045749Smckusick 40145749Smckusick #ifdef DEBUG 40245749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) 40345749Smckusick printf("swpg_haspage(%x, %x) ", pager, offset); 40445749Smckusick #endif 40545749Smckusick swp = (sw_pager_t) pager->pg_data; 40645749Smckusick ix = offset / dbtob(swp->sw_bsize); 40745749Smckusick if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 40845749Smckusick #ifdef DEBUG 40945749Smckusick if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK)) 41045749Smckusick printf("swpg_haspage: %x bad offset %x, ix %x\n", 41145749Smckusick swp->sw_blocks, offset, ix); 41245749Smckusick #endif 41345749Smckusick return(FALSE); 41445749Smckusick } 41545749Smckusick swb = &swp->sw_blocks[ix]; 41645749Smckusick if (swb->swb_block) 41745749Smckusick ix = atop(offset % dbtob(swp->sw_bsize)); 41845749Smckusick #ifdef DEBUG 41945749Smckusick if (swpagerdebug & SDB_ALLOCBLK) 42045749Smckusick printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix); 42145749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) 42245749Smckusick printf("-> %c\n", 42345749Smckusick "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]); 42445749Smckusick #endif 42545749Smckusick if (swb->swb_block && (swb->swb_mask & (1 << ix))) 42645749Smckusick return(TRUE); 42745749Smckusick return(FALSE); 42845749Smckusick } 42945749Smckusick 43045749Smckusick /* 43145749Smckusick * Scaled down version of swap(). 43245749Smckusick * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed. 43345749Smckusick * BOGUS: lower level IO routines expect a KVA so we have to map our 43445749Smckusick * provided physical page into the KVA to keep them happy. 43545749Smckusick */ 43653341Sbostic static int 43745749Smckusick swap_pager_io(swp, m, flags) 43845749Smckusick register sw_pager_t swp; 43945749Smckusick vm_page_t m; 44045749Smckusick int flags; 44145749Smckusick { 44245749Smckusick register struct buf *bp; 44345749Smckusick register sw_blk_t swb; 44445749Smckusick register int s; 44545749Smckusick int ix; 44645749Smckusick boolean_t rv; 44745749Smckusick vm_offset_t kva, off; 44845749Smckusick swp_clean_t spc; 44945749Smckusick 45045749Smckusick #ifdef DEBUG 45145749Smckusick /* save panic time state */ 45245749Smckusick if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 45353341Sbostic return (VM_PAGER_FAIL); /* XXX: correct return? */ 45445749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_IO)) 45545749Smckusick printf("swpg_io(%x, %x, %x)\n", swp, m, flags); 45645749Smckusick #endif 45745749Smckusick 45845749Smckusick /* 45945749Smckusick * For reads (pageins) and synchronous writes, we clean up 46049289Shibler * all completed async pageouts. 46145749Smckusick */ 46245749Smckusick if ((flags & B_ASYNC) == 0) { 46345749Smckusick s = splbio(); 46449289Shibler #ifdef DEBUG 46549289Shibler /* 46649289Shibler * Check to see if this page is currently being cleaned. 46749289Shibler * If it is, we just wait til the operation is done before 46849289Shibler * continuing. 46949289Shibler */ 47045749Smckusick while (swap_pager_clean(m, flags&B_READ)) { 47149289Shibler if (swpagerdebug & SDB_ANOM) 47249289Shibler printf("swap_pager_io: page %x cleaning\n", m); 47349289Shibler 47445749Smckusick swp->sw_flags |= SW_WANTED; 47553341Sbostic assert_wait((int)swp, 0); 47645749Smckusick thread_block(); 47745749Smckusick } 47849289Shibler #else 47949289Shibler (void) swap_pager_clean(m, flags&B_READ); 48049289Shibler #endif 48145749Smckusick splx(s); 48245749Smckusick } 48345749Smckusick /* 48445749Smckusick * For async writes (pageouts), we cleanup completed pageouts so 48545749Smckusick * that all available resources are freed. Also tells us if this 48645749Smckusick * page is already being cleaned. If it is, or no resources 48745749Smckusick * are available, we try again later. 48845749Smckusick */ 48949289Shibler else if (swap_pager_clean(m, B_WRITE) || 49049289Shibler queue_empty(&swap_pager_free)) { 49149289Shibler #ifdef DEBUG 49249289Shibler if ((swpagerdebug & SDB_ANOM) && 49349289Shibler !queue_empty(&swap_pager_free)) 49449289Shibler printf("swap_pager_io: page %x already cleaning\n", m); 49549289Shibler #endif 49645749Smckusick return(VM_PAGER_FAIL); 49749289Shibler } 49845749Smckusick 49945749Smckusick /* 50045749Smckusick * Determine swap block and allocate as necessary. 50145749Smckusick */ 50245749Smckusick off = m->offset + m->object->paging_offset; 50345749Smckusick ix = off / dbtob(swp->sw_bsize); 50445749Smckusick if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 50545749Smckusick #ifdef DEBUG 50645749Smckusick if (swpagerdebug & SDB_FAIL) 50745749Smckusick printf("swpg_io: bad offset %x+%x(%d) in %x\n", 50845749Smckusick m->offset, m->object->paging_offset, 50945749Smckusick ix, swp->sw_blocks); 51045749Smckusick #endif 51145749Smckusick return(VM_PAGER_FAIL); 51245749Smckusick } 51345749Smckusick swb = &swp->sw_blocks[ix]; 51445749Smckusick off = off % dbtob(swp->sw_bsize); 51545749Smckusick if (flags & B_READ) { 51645749Smckusick if (swb->swb_block == 0 || 51745749Smckusick (swb->swb_mask & (1 << atop(off))) == 0) { 51845749Smckusick #ifdef DEBUG 51945749Smckusick if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL)) 52045749Smckusick printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n", 52145749Smckusick swp->sw_blocks, 52245749Smckusick swb->swb_block, atop(off), 52345749Smckusick swb->swb_mask, 52445749Smckusick m->offset, m->object->paging_offset); 52545749Smckusick #endif 52645749Smckusick /* XXX: should we zero page here?? */ 52745749Smckusick return(VM_PAGER_FAIL); 52845749Smckusick } 52945749Smckusick } else if (swb->swb_block == 0) { 53045749Smckusick swb->swb_block = rmalloc(swapmap, swp->sw_bsize); 53145749Smckusick if (swb->swb_block == 0) { 53245749Smckusick #ifdef DEBUG 53345749Smckusick if (swpagerdebug & SDB_FAIL) 53445749Smckusick printf("swpg_io: rmalloc of %x failed\n", 53545749Smckusick swp->sw_bsize); 53645749Smckusick #endif 53745749Smckusick return(VM_PAGER_FAIL); 53845749Smckusick } 53945749Smckusick #ifdef DEBUG 54045749Smckusick if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK)) 54145749Smckusick printf("swpg_io: %x alloc blk %x at ix %x\n", 54245749Smckusick swp->sw_blocks, swb->swb_block, ix); 54345749Smckusick #endif 54445749Smckusick } 54545749Smckusick 54645749Smckusick /* 54745749Smckusick * Allocate a kernel virtual address and initialize so that PTE 54845749Smckusick * is available for lower level IO drivers. 54945749Smckusick */ 55045749Smckusick kva = vm_pager_map_page(m); 55145749Smckusick 55245749Smckusick /* 55345749Smckusick * Get a swap buffer header and perform the IO 55445749Smckusick */ 55545749Smckusick s = splbio(); 55656393Smckusick while (bswlist.b_actf == NULL) { 55745749Smckusick #ifdef DEBUG 55845749Smckusick if (swpagerdebug & SDB_ANOM) 55949289Shibler printf("swap_pager_io: wait on swbuf for %x (%d)\n", 56045749Smckusick m, flags); 56145749Smckusick #endif 56245749Smckusick bswlist.b_flags |= B_WANTED; 56345749Smckusick sleep((caddr_t)&bswlist, PSWP+1); 56445749Smckusick } 56556393Smckusick bp = bswlist.b_actf; 56656393Smckusick bswlist.b_actf = bp->b_actf; 56745749Smckusick splx(s); 56845749Smckusick bp->b_flags = B_BUSY | (flags & B_READ); 56948386Skarels bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */ 57064546Sbostic bp->b_data = (caddr_t)kva; 57145749Smckusick bp->b_blkno = swb->swb_block + btodb(off); 57245749Smckusick VHOLD(swapdev_vp); 57345749Smckusick bp->b_vp = swapdev_vp; 57446985Smckusick if (swapdev_vp->v_type == VBLK) 57546985Smckusick bp->b_dev = swapdev_vp->v_rdev; 57645749Smckusick bp->b_bcount = PAGE_SIZE; 57753213Smckusick if ((bp->b_flags & B_READ) == 0) { 57853213Smckusick bp->b_dirtyoff = 0; 57953213Smckusick bp->b_dirtyend = PAGE_SIZE; 58045749Smckusick swapdev_vp->v_numoutput++; 58153213Smckusick } 58245749Smckusick 58345749Smckusick /* 58445749Smckusick * If this is an async write we set up additional buffer fields 58545749Smckusick * and place a "cleaning" entry on the inuse queue. 58645749Smckusick */ 58745749Smckusick if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { 58845749Smckusick #ifdef DEBUG 58945749Smckusick if (queue_empty(&swap_pager_free)) 59045749Smckusick panic("swpg_io: lost spc"); 59145749Smckusick #endif 59245749Smckusick queue_remove_first(&swap_pager_free, 59345749Smckusick spc, swp_clean_t, spc_list); 59445749Smckusick #ifdef DEBUG 59545749Smckusick if (spc->spc_flags != SPC_FREE) 59645749Smckusick panic("swpg_io: bad free spc"); 59745749Smckusick #endif 59845749Smckusick spc->spc_flags = SPC_BUSY; 59945749Smckusick spc->spc_bp = bp; 60045749Smckusick spc->spc_swp = swp; 60145749Smckusick spc->spc_kva = kva; 60245749Smckusick spc->spc_m = m; 60345749Smckusick bp->b_flags |= B_CALL; 60445749Smckusick bp->b_iodone = swap_pager_iodone; 60545749Smckusick s = splbio(); 60645749Smckusick swp->sw_poip++; 60745749Smckusick queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list); 60845749Smckusick 60945749Smckusick #ifdef DEBUG 61045749Smckusick swap_pager_poip++; 61145749Smckusick if (swpagerdebug & SDB_WRITE) 61245749Smckusick printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n", 61345749Smckusick bp, swp, spc, swp->sw_poip); 61445749Smckusick if ((swpagerdebug & SDB_ALLOCBLK) && 61545749Smckusick (swb->swb_mask & (1 << atop(off))) == 0) 61645749Smckusick printf("swpg_io: %x write blk %x+%x\n", 61745749Smckusick swp->sw_blocks, swb->swb_block, atop(off)); 61845749Smckusick #endif 61945749Smckusick swb->swb_mask |= (1 << atop(off)); 62045749Smckusick splx(s); 62145749Smckusick } 62245749Smckusick #ifdef DEBUG 62345749Smckusick if (swpagerdebug & SDB_IO) 62445749Smckusick printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n", 62545749Smckusick bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m)); 62645749Smckusick #endif 62745749Smckusick VOP_STRATEGY(bp); 62845749Smckusick if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { 62945749Smckusick #ifdef DEBUG 63045749Smckusick if (swpagerdebug & SDB_IO) 63145749Smckusick printf("swpg_io: IO started: bp %x\n", bp); 63245749Smckusick #endif 63345749Smckusick return(VM_PAGER_PEND); 63445749Smckusick } 63545749Smckusick s = splbio(); 63645749Smckusick #ifdef DEBUG 63745749Smckusick if (flags & B_READ) 63845749Smckusick swap_pager_piip++; 63945749Smckusick else 64045749Smckusick swap_pager_poip++; 64145749Smckusick #endif 64245749Smckusick while ((bp->b_flags & B_DONE) == 0) { 64353341Sbostic assert_wait((int)bp, 0); 64445749Smckusick thread_block(); 64545749Smckusick } 64645749Smckusick #ifdef DEBUG 64745749Smckusick if (flags & B_READ) 64845749Smckusick --swap_pager_piip; 64945749Smckusick else 65045749Smckusick --swap_pager_poip; 65145749Smckusick #endif 65256320Shibler rv = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK; 65345749Smckusick bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 65456393Smckusick bp->b_actf = bswlist.b_actf; 65556393Smckusick bswlist.b_actf = bp; 65645749Smckusick if (bp->b_vp) 65745749Smckusick brelvp(bp); 65845749Smckusick if (bswlist.b_flags & B_WANTED) { 65945749Smckusick bswlist.b_flags &= ~B_WANTED; 66045749Smckusick thread_wakeup((int)&bswlist); 66145749Smckusick } 66245749Smckusick if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) { 66356382Smckusick m->flags |= PG_CLEAN; 66445749Smckusick pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 66545749Smckusick } 66645749Smckusick splx(s); 66745749Smckusick #ifdef DEBUG 66845749Smckusick if (swpagerdebug & SDB_IO) 66945749Smckusick printf("swpg_io: IO done: bp %x, rv %d\n", bp, rv); 67056320Shibler if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_ERROR) 67145749Smckusick printf("swpg_io: IO error\n"); 67245749Smckusick #endif 67345749Smckusick vm_pager_unmap_page(kva); 67445749Smckusick return(rv); 67545749Smckusick } 67645749Smckusick 67753341Sbostic static boolean_t 67845749Smckusick swap_pager_clean(m, rw) 67945749Smckusick vm_page_t m; 68045749Smckusick int rw; 68145749Smckusick { 68245749Smckusick register swp_clean_t spc, tspc; 68345749Smckusick register int s; 68445749Smckusick 68545749Smckusick #ifdef DEBUG 68645749Smckusick /* save panic time state */ 68745749Smckusick if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 68854817Storek return (FALSE); /* ??? */ 68945749Smckusick if (swpagerdebug & SDB_FOLLOW) 69045749Smckusick printf("swpg_clean(%x, %d)\n", m, rw); 69145749Smckusick #endif 69248386Skarels tspc = NULL; 69345749Smckusick for (;;) { 69445749Smckusick /* 69545749Smckusick * Look up and removal from inuse list must be done 69645749Smckusick * at splbio() to avoid conflicts with swap_pager_iodone. 69745749Smckusick */ 69845749Smckusick s = splbio(); 69945749Smckusick spc = (swp_clean_t) queue_first(&swap_pager_inuse); 70045749Smckusick while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) { 70145749Smckusick if ((spc->spc_flags & SPC_DONE) && 70245749Smckusick swap_pager_finish(spc)) { 70345749Smckusick queue_remove(&swap_pager_inuse, spc, 70445749Smckusick swp_clean_t, spc_list); 70545749Smckusick break; 70645749Smckusick } 70745749Smckusick if (m && m == spc->spc_m) { 70845749Smckusick #ifdef DEBUG 70945749Smckusick if (swpagerdebug & SDB_ANOM) 71049289Shibler printf("swap_pager_clean: page %x on list, flags %x\n", 71145749Smckusick m, spc->spc_flags); 71245749Smckusick #endif 71345749Smckusick tspc = spc; 71445749Smckusick } 71545749Smckusick spc = (swp_clean_t) queue_next(&spc->spc_list); 71645749Smckusick } 71745749Smckusick 71845749Smckusick /* 71945749Smckusick * No operations done, thats all we can do for now. 72045749Smckusick */ 72145749Smckusick if (queue_end(&swap_pager_inuse, (queue_entry_t)spc)) 72245749Smckusick break; 72345749Smckusick splx(s); 72445749Smckusick 72545749Smckusick /* 72645749Smckusick * The desired page was found to be busy earlier in 72745749Smckusick * the scan but has since completed. 72845749Smckusick */ 72945749Smckusick if (tspc && tspc == spc) { 73045749Smckusick #ifdef DEBUG 73145749Smckusick if (swpagerdebug & SDB_ANOM) 73249289Shibler printf("swap_pager_clean: page %x done while looking\n", 73345749Smckusick m); 73445749Smckusick #endif 73548386Skarels tspc = NULL; 73645749Smckusick } 73745749Smckusick spc->spc_flags = SPC_FREE; 73845749Smckusick vm_pager_unmap_page(spc->spc_kva); 73945749Smckusick queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); 74045749Smckusick #ifdef DEBUG 74145749Smckusick if (swpagerdebug & SDB_WRITE) 74245749Smckusick printf("swpg_clean: free spc %x\n", spc); 74345749Smckusick #endif 74445749Smckusick } 74549289Shibler #ifdef DEBUG 74645749Smckusick /* 74745749Smckusick * If we found that the desired page is already being cleaned 74845749Smckusick * mark it so that swap_pager_iodone() will not set the clean 74945749Smckusick * flag before the pageout daemon has another chance to clean it. 75045749Smckusick */ 75145749Smckusick if (tspc && rw == B_WRITE) { 75245749Smckusick if (swpagerdebug & SDB_ANOM) 75349289Shibler printf("swap_pager_clean: page %x on clean list\n", 75449289Shibler tspc); 75545749Smckusick tspc->spc_flags |= SPC_DIRTY; 75645749Smckusick } 75749289Shibler #endif 75845749Smckusick splx(s); 75945749Smckusick 76045749Smckusick #ifdef DEBUG 76145749Smckusick if (swpagerdebug & SDB_WRITE) 76245749Smckusick printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE); 76345749Smckusick if ((swpagerdebug & SDB_ANOM) && tspc) 76445749Smckusick printf("swpg_clean: %s of cleaning page %x\n", 76545749Smckusick rw == B_READ ? "get" : "put", m); 76645749Smckusick #endif 76745749Smckusick return(tspc ? TRUE : FALSE); 76845749Smckusick } 76945749Smckusick 77053341Sbostic static int 77145749Smckusick swap_pager_finish(spc) 77245749Smckusick register swp_clean_t spc; 77345749Smckusick { 77445749Smckusick vm_object_t object = spc->spc_m->object; 77545749Smckusick 77645749Smckusick /* 77745749Smckusick * Mark the paging operation as done. 77845749Smckusick * (XXX) If we cannot get the lock, leave it til later. 77945749Smckusick * (XXX) Also we are assuming that an async write is a 78045749Smckusick * pageout operation that has incremented the counter. 78145749Smckusick */ 78245749Smckusick if (!vm_object_lock_try(object)) 78345749Smckusick return(0); 78445749Smckusick 78545749Smckusick if (--object->paging_in_progress == 0) 78645749Smckusick thread_wakeup((int) object); 78745749Smckusick 78849289Shibler #ifdef DEBUG 78945749Smckusick /* 79045749Smckusick * XXX: this isn't even close to the right thing to do, 79145749Smckusick * introduces a variety of race conditions. 79245749Smckusick * 79345749Smckusick * If dirty, vm_pageout() has attempted to clean the page 79445749Smckusick * again. In this case we do not do anything as we will 79549289Shibler * see the page again shortly. 79645749Smckusick */ 79749289Shibler if (spc->spc_flags & SPC_DIRTY) { 79849289Shibler if (swpagerdebug & SDB_ANOM) 79949289Shibler printf("swap_pager_finish: page %x dirty again\n", 80049289Shibler spc->spc_m); 80156382Smckusick spc->spc_m->flags &= ~PG_BUSY; 80249289Shibler PAGE_WAKEUP(spc->spc_m); 80349289Shibler vm_object_unlock(object); 80449289Shibler return(1); 80545749Smckusick } 80649289Shibler #endif 80745749Smckusick /* 80849289Shibler * If no error mark as clean and inform the pmap system. 80949289Shibler * If error, mark as dirty so we will try again. 81049289Shibler * (XXX could get stuck doing this, should give up after awhile) 81145749Smckusick */ 81249289Shibler if (spc->spc_flags & SPC_ERROR) { 81349289Shibler printf("swap_pager_finish: clean of page %x failed\n", 81449289Shibler VM_PAGE_TO_PHYS(spc->spc_m)); 81556382Smckusick spc->spc_m->flags |= PG_LAUNDRY; 81649289Shibler } else { 81756382Smckusick spc->spc_m->flags |= PG_CLEAN; 81849289Shibler pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m)); 81949289Shibler } 82056382Smckusick spc->spc_m->flags &= ~PG_BUSY; 82145749Smckusick PAGE_WAKEUP(spc->spc_m); 82245749Smckusick 82345749Smckusick vm_object_unlock(object); 82445749Smckusick return(1); 82545749Smckusick } 82645749Smckusick 82753341Sbostic static void 82845749Smckusick swap_pager_iodone(bp) 82945749Smckusick register struct buf *bp; 83045749Smckusick { 83145749Smckusick register swp_clean_t spc; 83245749Smckusick daddr_t blk; 83345749Smckusick int s; 83445749Smckusick 83545749Smckusick #ifdef DEBUG 83645749Smckusick /* save panic time state */ 83745749Smckusick if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 83845749Smckusick return; 83945749Smckusick if (swpagerdebug & SDB_FOLLOW) 84045749Smckusick printf("swpg_iodone(%x)\n", bp); 84145749Smckusick #endif 84245749Smckusick s = splbio(); 84345749Smckusick spc = (swp_clean_t) queue_first(&swap_pager_inuse); 84445749Smckusick while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) { 84545749Smckusick if (spc->spc_bp == bp) 84645749Smckusick break; 84745749Smckusick spc = (swp_clean_t) queue_next(&spc->spc_list); 84845749Smckusick } 84945749Smckusick #ifdef DEBUG 85045749Smckusick if (queue_end(&swap_pager_inuse, (queue_entry_t)spc)) 85149289Shibler panic("swap_pager_iodone: bp not found"); 85245749Smckusick #endif 85345749Smckusick 85445749Smckusick spc->spc_flags &= ~SPC_BUSY; 85545749Smckusick spc->spc_flags |= SPC_DONE; 85645749Smckusick if (bp->b_flags & B_ERROR) 85745749Smckusick spc->spc_flags |= SPC_ERROR; 85845749Smckusick spc->spc_bp = NULL; 85945749Smckusick blk = bp->b_blkno; 86045749Smckusick 86145749Smckusick #ifdef DEBUG 86245749Smckusick --swap_pager_poip; 86345749Smckusick if (swpagerdebug & SDB_WRITE) 86445749Smckusick printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n", 86545749Smckusick bp, spc->spc_swp, spc->spc_swp->sw_flags, 86645749Smckusick spc, spc->spc_swp->sw_poip); 86745749Smckusick #endif 86845749Smckusick 86945749Smckusick spc->spc_swp->sw_poip--; 87045749Smckusick if (spc->spc_swp->sw_flags & SW_WANTED) { 87145749Smckusick spc->spc_swp->sw_flags &= ~SW_WANTED; 87245749Smckusick thread_wakeup((int)spc->spc_swp); 87345749Smckusick } 87445749Smckusick 87545749Smckusick bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 87656393Smckusick bp->b_actf = bswlist.b_actf; 87756393Smckusick bswlist.b_actf = bp; 87845749Smckusick if (bp->b_vp) 87945749Smckusick brelvp(bp); 88045749Smckusick if (bswlist.b_flags & B_WANTED) { 88145749Smckusick bswlist.b_flags &= ~B_WANTED; 88245749Smckusick thread_wakeup((int)&bswlist); 88345749Smckusick } 88456917Shibler /* 88556917Shibler * Only kick the pageout daemon if we are really hurting 88656917Shibler * for pages, otherwise this page will be picked up later. 88756917Shibler */ 88856917Shibler if (cnt.v_free_count < cnt.v_free_min) 88956917Shibler thread_wakeup((int) &vm_pages_needed); 89045749Smckusick splx(s); 89145749Smckusick } 892