145749Smckusick /* 245749Smckusick * Copyright (c) 1990 University of Utah. 345749Smckusick * Copyright (c) 1991 The Regents of the University of California. 445749Smckusick * All rights reserved. 545749Smckusick * 645749Smckusick * This code is derived from software contributed to Berkeley by 745749Smckusick * the Systems Programming Group of the University of Utah Computer 845749Smckusick * Science Department. 945749Smckusick * 1045749Smckusick * %sccs.include.redist.c% 1145749Smckusick * 12*46985Smckusick * @(#)swap_pager.c 7.2 (Berkeley) 03/04/91 1345749Smckusick */ 1445749Smckusick 1545749Smckusick /* 1645749Smckusick * Quick hack to page to dedicated partition(s). 1745749Smckusick * TODO: 1845749Smckusick * Add multiprocessor locks 1945749Smckusick * Deal with async writes in a better fashion 2045749Smckusick */ 2145749Smckusick 2245749Smckusick #include "swappager.h" 2345749Smckusick #if NSWAPPAGER > 0 2445749Smckusick 2545749Smckusick #include "param.h" 2645749Smckusick #include "user.h" 2745749Smckusick #include "proc.h" 2845749Smckusick #include "buf.h" 2945749Smckusick #include "map.h" 3045749Smckusick #include "systm.h" 3145749Smckusick #include "specdev.h" 3245749Smckusick #include "vnode.h" 3345749Smckusick #include "malloc.h" 3445749Smckusick #include "queue.h" 3545749Smckusick 3645749Smckusick #include "../vm/vm_param.h" 3745749Smckusick #include "../vm/vm_pager.h" 3845749Smckusick #include "../vm/vm_page.h" 3945749Smckusick #include "../vm/vm_pageout.h" 4045749Smckusick #include "../vm/swap_pager.h" 4145749Smckusick 4245749Smckusick #define NSWSIZES 16 /* size of swtab */ 4345749Smckusick #define NPENDINGIO 64 /* max # of pending cleans */ 4445749Smckusick #define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */ 4545749Smckusick 4645749Smckusick #ifdef DEBUG 4745749Smckusick int swpagerdebug = 0x100; 4845749Smckusick #define SDB_FOLLOW 0x001 4945749Smckusick #define SDB_INIT 0x002 5045749Smckusick #define SDB_ALLOC 0x004 5145749Smckusick #define SDB_IO 0x008 5245749Smckusick #define SDB_WRITE 0x010 5345749Smckusick #define SDB_FAIL 0x020 5445749Smckusick #define SDB_ALLOCBLK 0x040 5545749Smckusick #define SDB_FULL 0x080 5645749Smckusick #define SDB_ANOM 0x100 5745749Smckusick #define SDB_ANOMPANIC 0x200 5845749Smckusick #endif 5945749Smckusick 6045749Smckusick struct swpagerclean { 6145749Smckusick queue_head_t spc_list; 6245749Smckusick int spc_flags; 6345749Smckusick struct buf *spc_bp; 6445749Smckusick sw_pager_t spc_swp; 6545749Smckusick vm_offset_t spc_kva; 6645749Smckusick vm_page_t spc_m; 6745749Smckusick } swcleanlist[NPENDINGIO]; 6845749Smckusick typedef struct swpagerclean *swp_clean_t; 6945749Smckusick 7045749Smckusick #define SWP_CLEAN_NULL ((swp_clean_t)0) 7145749Smckusick 7245749Smckusick /* spc_flags values */ 7345749Smckusick #define SPC_FREE 0x00 7445749Smckusick #define SPC_BUSY 0x01 7545749Smckusick #define SPC_DONE 0x02 7645749Smckusick #define SPC_ERROR 0x04 7745749Smckusick #define SPC_DIRTY 0x08 7845749Smckusick 7945749Smckusick struct swtab { 8045749Smckusick vm_size_t st_osize; /* size of object (bytes) */ 8145749Smckusick int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */ 8245749Smckusick #ifdef DEBUG 8345749Smckusick u_long st_inuse; /* number in this range in use */ 8445749Smckusick u_long st_usecnt; /* total used of this size */ 8545749Smckusick #endif 8645749Smckusick } swtab[NSWSIZES+1]; 8745749Smckusick 8845749Smckusick #ifdef DEBUG 8945749Smckusick int swap_pager_pendingio; /* max pending async "clean" ops */ 9045749Smckusick int swap_pager_poip; /* pageouts in progress */ 9145749Smckusick int swap_pager_piip; /* pageins in progress */ 9245749Smckusick #endif 9345749Smckusick 9445749Smckusick queue_head_t swap_pager_inuse; /* list of pending page cleans */ 9545749Smckusick queue_head_t swap_pager_free; /* list of free pager clean structs */ 9645749Smckusick queue_head_t swap_pager_list; /* list of "named" anon regions */ 9745749Smckusick 9845749Smckusick void 9945749Smckusick swap_pager_init() 10045749Smckusick { 10145749Smckusick register swp_clean_t spc; 10245749Smckusick register int i, bsize; 10345749Smckusick extern int dmmin, dmmax; 10445749Smckusick int maxbsize; 10545749Smckusick 10645749Smckusick #ifdef DEBUG 10745749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_INIT)) 10845749Smckusick printf("swpg_init()\n"); 10945749Smckusick #endif 11045749Smckusick dfltpagerops = &swappagerops; 11145749Smckusick queue_init(&swap_pager_list); 11245749Smckusick 11345749Smckusick /* 11445749Smckusick * Initialize clean lists 11545749Smckusick */ 11645749Smckusick queue_init(&swap_pager_inuse); 11745749Smckusick queue_init(&swap_pager_free); 11845749Smckusick for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) { 11945749Smckusick queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); 12045749Smckusick spc->spc_flags = SPC_FREE; 12145749Smckusick } 12245749Smckusick 12345749Smckusick /* 12445749Smckusick * Calculate the swap allocation constants. 12545749Smckusick */ 12645749Smckusick if (dmmin == 0) { 12745749Smckusick dmmin = DMMIN; 12845749Smckusick if (dmmin < CLBYTES/DEV_BSIZE) 12945749Smckusick dmmin = CLBYTES/DEV_BSIZE; 13045749Smckusick } 13145749Smckusick if (dmmax == 0) 13245749Smckusick dmmax = DMMAX; 13345749Smckusick 13445749Smckusick /* 13545749Smckusick * Fill in our table of object size vs. allocation size 13645749Smckusick */ 13745749Smckusick bsize = btodb(PAGE_SIZE); 13845749Smckusick if (bsize < dmmin) 13945749Smckusick bsize = dmmin; 14045749Smckusick maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE); 14145749Smckusick if (maxbsize > dmmax) 14245749Smckusick maxbsize = dmmax; 14345749Smckusick for (i = 0; i < NSWSIZES; i++) { 14445749Smckusick swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize)); 14545749Smckusick swtab[i].st_bsize = bsize; 14645749Smckusick #ifdef DEBUG 14745749Smckusick if (swpagerdebug & SDB_INIT) 14845749Smckusick printf("swpg_init: ix %d, size %x, bsize %x\n", 14945749Smckusick i, swtab[i].st_osize, swtab[i].st_bsize); 15045749Smckusick #endif 15145749Smckusick if (bsize >= maxbsize) 15245749Smckusick break; 15345749Smckusick bsize *= 2; 15445749Smckusick } 15545749Smckusick swtab[i].st_osize = 0; 15645749Smckusick swtab[i].st_bsize = bsize; 15745749Smckusick } 15845749Smckusick 15945749Smckusick /* 16045749Smckusick * Allocate a pager structure and associated resources. 16145749Smckusick * Note that if we are called from the pageout daemon (handle == NULL) 16245749Smckusick * we should not wait for memory as it could resulting in deadlock. 16345749Smckusick */ 16445749Smckusick vm_pager_t 16545749Smckusick swap_pager_alloc(handle, size, prot) 16645749Smckusick caddr_t handle; 16745749Smckusick register vm_size_t size; 16845749Smckusick vm_prot_t prot; 16945749Smckusick { 17045749Smckusick register vm_pager_t pager; 17145749Smckusick register sw_pager_t swp; 17245749Smckusick struct swtab *swt; 17345749Smckusick int waitok; 17445749Smckusick 17545749Smckusick #ifdef DEBUG 17645749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) 17745749Smckusick printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot); 17845749Smckusick #endif 17945749Smckusick /* 18045749Smckusick * If this is a "named" anonymous region, look it up and 18145749Smckusick * return the appropriate pager if it exists. 18245749Smckusick */ 18345749Smckusick if (handle) { 18445749Smckusick pager = vm_pager_lookup(&swap_pager_list, handle); 18545749Smckusick if (pager != VM_PAGER_NULL) { 18645749Smckusick /* 18745749Smckusick * Use vm_object_lookup to gain a reference 18845749Smckusick * to the object and also to remove from the 18945749Smckusick * object cache. 19045749Smckusick */ 19145749Smckusick if (vm_object_lookup(pager) == VM_OBJECT_NULL) 19245749Smckusick panic("swap_pager_alloc: bad object"); 19345749Smckusick return(pager); 19445749Smckusick } 19545749Smckusick } 19645749Smckusick /* 19745749Smckusick * Pager doesn't exist, allocate swap management resources 19845749Smckusick * and initialize. 19945749Smckusick */ 20045749Smckusick waitok = handle ? M_WAITOK : M_NOWAIT; 20145749Smckusick pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 20245749Smckusick if (pager == VM_PAGER_NULL) 20345749Smckusick return(VM_PAGER_NULL); 20445749Smckusick swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 20545749Smckusick if (swp == NULL) { 20645749Smckusick #ifdef DEBUG 20745749Smckusick if (swpagerdebug & SDB_FAIL) 20845749Smckusick printf("swpg_alloc: swpager malloc failed\n"); 20945749Smckusick #endif 21045749Smckusick free((caddr_t)pager, M_VMPAGER); 21145749Smckusick return(VM_PAGER_NULL); 21245749Smckusick } 21345749Smckusick size = round_page(size); 21445749Smckusick for (swt = swtab; swt->st_osize; swt++) 21545749Smckusick if (size <= swt->st_osize) 21645749Smckusick break; 21745749Smckusick #ifdef DEBUG 21845749Smckusick swt->st_inuse++; 21945749Smckusick swt->st_usecnt++; 22045749Smckusick #endif 22145749Smckusick swp->sw_osize = size; 22245749Smckusick swp->sw_bsize = swt->st_bsize; 22345749Smckusick swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize; 22445749Smckusick swp->sw_blocks = (sw_blk_t) 22545749Smckusick malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 22645749Smckusick M_VMPGDATA, M_NOWAIT); 22745749Smckusick if (swp->sw_blocks == NULL) { 22845749Smckusick free((caddr_t)swp, M_VMPGDATA); 22945749Smckusick free((caddr_t)pager, M_VMPAGER); 23045749Smckusick #ifdef DEBUG 23145749Smckusick if (swpagerdebug & SDB_FAIL) 23245749Smckusick printf("swpg_alloc: sw_blocks malloc failed\n"); 23345749Smckusick swt->st_inuse--; 23445749Smckusick swt->st_usecnt--; 23545749Smckusick #endif 23645749Smckusick return(FALSE); 23745749Smckusick } 23845749Smckusick bzero((caddr_t)swp->sw_blocks, 23945749Smckusick swp->sw_nblocks * sizeof(*swp->sw_blocks)); 24045749Smckusick swp->sw_poip = 0; 24145749Smckusick if (handle) { 24245749Smckusick vm_object_t object; 24345749Smckusick 24445749Smckusick swp->sw_flags = SW_NAMED; 24545749Smckusick queue_enter(&swap_pager_list, pager, vm_pager_t, pg_list); 24645749Smckusick /* 24745749Smckusick * Consistant with other pagers: return with object 24845749Smckusick * referenced. Can't do this with handle == NULL 24945749Smckusick * since it might be the pageout daemon calling. 25045749Smckusick */ 25145749Smckusick object = vm_object_allocate(size); 25245749Smckusick vm_object_enter(object, pager); 25345749Smckusick vm_object_setpager(object, pager, 0, FALSE); 25445749Smckusick } else { 25545749Smckusick swp->sw_flags = 0; 25645749Smckusick queue_init(&pager->pg_list); 25745749Smckusick } 25845749Smckusick pager->pg_handle = handle; 25945749Smckusick pager->pg_ops = &swappagerops; 26045749Smckusick pager->pg_type = PG_SWAP; 26145749Smckusick pager->pg_data = (caddr_t)swp; 26245749Smckusick 26345749Smckusick #ifdef DEBUG 26445749Smckusick if (swpagerdebug & SDB_ALLOC) 26545749Smckusick printf("swpg_alloc: pg_data %x, %x of %x at %x\n", 26645749Smckusick swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks); 26745749Smckusick #endif 26845749Smckusick return(pager); 26945749Smckusick } 27045749Smckusick 27145749Smckusick void 27245749Smckusick swap_pager_dealloc(pager) 27345749Smckusick vm_pager_t pager; 27445749Smckusick { 27545749Smckusick register int i; 27645749Smckusick register sw_blk_t bp; 27745749Smckusick register sw_pager_t swp; 27845749Smckusick struct swtab *swt; 27945749Smckusick int s; 28045749Smckusick 28145749Smckusick #ifdef DEBUG 28245749Smckusick /* save panic time state */ 28345749Smckusick if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 28445749Smckusick return; 28545749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) 28645749Smckusick printf("swpg_dealloc(%x)\n", pager); 28745749Smckusick #endif 28845749Smckusick /* 28945749Smckusick * Remove from list right away so lookups will fail if we 29045749Smckusick * block for pageout completion. 29145749Smckusick */ 29245749Smckusick swp = (sw_pager_t) pager->pg_data; 29345749Smckusick if (swp->sw_flags & SW_NAMED) { 29445749Smckusick queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list); 29545749Smckusick swp->sw_flags &= ~SW_NAMED; 29645749Smckusick } 29745749Smckusick #ifdef DEBUG 29845749Smckusick for (swt = swtab; swt->st_osize; swt++) 29945749Smckusick if (swp->sw_osize <= swt->st_osize) 30045749Smckusick break; 30145749Smckusick swt->st_inuse--; 30245749Smckusick #endif 30345749Smckusick 30445749Smckusick /* 30545749Smckusick * Wait for all pageouts to finish and remove 30645749Smckusick * all entries from cleaning list. 30745749Smckusick */ 30845749Smckusick s = splbio(); 30945749Smckusick while (swp->sw_poip) { 31045749Smckusick swp->sw_flags |= SW_WANTED; 31145749Smckusick assert_wait((int)swp); 31245749Smckusick thread_block(); 31345749Smckusick } 31445749Smckusick splx(s); 31545749Smckusick (void) swap_pager_clean(VM_PAGE_NULL, B_WRITE); 31645749Smckusick 31745749Smckusick /* 31845749Smckusick * Free left over swap blocks 31945749Smckusick */ 32045749Smckusick for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) 32145749Smckusick if (bp->swb_block) { 32245749Smckusick #ifdef DEBUG 32345749Smckusick if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL)) 32445749Smckusick printf("swpg_dealloc: blk %x\n", 32545749Smckusick bp->swb_block); 32645749Smckusick #endif 32745749Smckusick rmfree(swapmap, swp->sw_bsize, bp->swb_block); 32845749Smckusick } 32945749Smckusick /* 33045749Smckusick * Free swap management resources 33145749Smckusick */ 33245749Smckusick free((caddr_t)swp->sw_blocks, M_VMPGDATA); 33345749Smckusick free((caddr_t)swp, M_VMPGDATA); 33445749Smckusick free((caddr_t)pager, M_VMPAGER); 33545749Smckusick } 33645749Smckusick 33745749Smckusick swap_pager_getpage(pager, m, sync) 33845749Smckusick vm_pager_t pager; 33945749Smckusick vm_page_t m; 34045749Smckusick boolean_t sync; 34145749Smckusick { 34245749Smckusick #ifdef DEBUG 34345749Smckusick if (swpagerdebug & SDB_FOLLOW) 34445749Smckusick printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync); 34545749Smckusick #endif 34645749Smckusick return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ)); 34745749Smckusick } 34845749Smckusick 34945749Smckusick swap_pager_putpage(pager, m, sync) 35045749Smckusick vm_pager_t pager; 35145749Smckusick vm_page_t m; 35245749Smckusick boolean_t sync; 35345749Smckusick { 35445749Smckusick int flags; 35545749Smckusick 35645749Smckusick #ifdef DEBUG 35745749Smckusick if (swpagerdebug & SDB_FOLLOW) 35845749Smckusick printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync); 35945749Smckusick #endif 36045749Smckusick if (pager == VM_PAGER_NULL) { 36145749Smckusick (void) swap_pager_clean(VM_PAGE_NULL, B_WRITE); 36245749Smckusick return; 36345749Smckusick } 36445749Smckusick flags = B_WRITE; 36545749Smckusick if (!sync) 36645749Smckusick flags |= B_ASYNC; 36745749Smckusick return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags)); 36845749Smckusick } 36945749Smckusick 37045749Smckusick boolean_t 37145749Smckusick swap_pager_haspage(pager, offset) 37245749Smckusick vm_pager_t pager; 37345749Smckusick vm_offset_t offset; 37445749Smckusick { 37545749Smckusick register sw_pager_t swp; 37645749Smckusick register sw_blk_t swb; 37745749Smckusick int ix; 37845749Smckusick 37945749Smckusick #ifdef DEBUG 38045749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) 38145749Smckusick printf("swpg_haspage(%x, %x) ", pager, offset); 38245749Smckusick #endif 38345749Smckusick swp = (sw_pager_t) pager->pg_data; 38445749Smckusick ix = offset / dbtob(swp->sw_bsize); 38545749Smckusick if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 38645749Smckusick #ifdef DEBUG 38745749Smckusick if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK)) 38845749Smckusick printf("swpg_haspage: %x bad offset %x, ix %x\n", 38945749Smckusick swp->sw_blocks, offset, ix); 39045749Smckusick #endif 39145749Smckusick return(FALSE); 39245749Smckusick } 39345749Smckusick swb = &swp->sw_blocks[ix]; 39445749Smckusick if (swb->swb_block) 39545749Smckusick ix = atop(offset % dbtob(swp->sw_bsize)); 39645749Smckusick #ifdef DEBUG 39745749Smckusick if (swpagerdebug & SDB_ALLOCBLK) 39845749Smckusick printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix); 39945749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) 40045749Smckusick printf("-> %c\n", 40145749Smckusick "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]); 40245749Smckusick #endif 40345749Smckusick if (swb->swb_block && (swb->swb_mask & (1 << ix))) 40445749Smckusick return(TRUE); 40545749Smckusick return(FALSE); 40645749Smckusick } 40745749Smckusick 40845749Smckusick /* 40945749Smckusick * Scaled down version of swap(). 41045749Smckusick * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed. 41145749Smckusick * BOGUS: lower level IO routines expect a KVA so we have to map our 41245749Smckusick * provided physical page into the KVA to keep them happy. 41345749Smckusick */ 41445749Smckusick swap_pager_io(swp, m, flags) 41545749Smckusick register sw_pager_t swp; 41645749Smckusick vm_page_t m; 41745749Smckusick int flags; 41845749Smckusick { 41945749Smckusick register struct buf *bp; 42045749Smckusick register sw_blk_t swb; 42145749Smckusick register int s; 42245749Smckusick int ix; 42345749Smckusick boolean_t rv; 42445749Smckusick vm_offset_t kva, off; 42545749Smckusick swp_clean_t spc; 42645749Smckusick 42745749Smckusick #ifdef DEBUG 42845749Smckusick /* save panic time state */ 42945749Smckusick if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 43045749Smckusick return; 43145749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_IO)) 43245749Smckusick printf("swpg_io(%x, %x, %x)\n", swp, m, flags); 43345749Smckusick #endif 43445749Smckusick 43545749Smckusick /* 43645749Smckusick * For reads (pageins) and synchronous writes, we clean up 43745749Smckusick * all completed async pageouts and check to see if this 43845749Smckusick * page is currently being cleaned. If it is, we just wait 43945749Smckusick * til the operation is done before continuing. 44045749Smckusick */ 44145749Smckusick if ((flags & B_ASYNC) == 0) { 44245749Smckusick s = splbio(); 44345749Smckusick while (swap_pager_clean(m, flags&B_READ)) { 44445749Smckusick swp->sw_flags |= SW_WANTED; 44545749Smckusick assert_wait((int)swp); 44645749Smckusick thread_block(); 44745749Smckusick } 44845749Smckusick splx(s); 44945749Smckusick } 45045749Smckusick /* 45145749Smckusick * For async writes (pageouts), we cleanup completed pageouts so 45245749Smckusick * that all available resources are freed. Also tells us if this 45345749Smckusick * page is already being cleaned. If it is, or no resources 45445749Smckusick * are available, we try again later. 45545749Smckusick */ 45645749Smckusick else if (swap_pager_clean(m, B_WRITE) || queue_empty(&swap_pager_free)) 45745749Smckusick return(VM_PAGER_FAIL); 45845749Smckusick 45945749Smckusick /* 46045749Smckusick * Determine swap block and allocate as necessary. 46145749Smckusick */ 46245749Smckusick off = m->offset + m->object->paging_offset; 46345749Smckusick ix = off / dbtob(swp->sw_bsize); 46445749Smckusick if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 46545749Smckusick #ifdef DEBUG 46645749Smckusick if (swpagerdebug & SDB_FAIL) 46745749Smckusick printf("swpg_io: bad offset %x+%x(%d) in %x\n", 46845749Smckusick m->offset, m->object->paging_offset, 46945749Smckusick ix, swp->sw_blocks); 47045749Smckusick #endif 47145749Smckusick return(VM_PAGER_FAIL); 47245749Smckusick } 47345749Smckusick swb = &swp->sw_blocks[ix]; 47445749Smckusick off = off % dbtob(swp->sw_bsize); 47545749Smckusick if (flags & B_READ) { 47645749Smckusick if (swb->swb_block == 0 || 47745749Smckusick (swb->swb_mask & (1 << atop(off))) == 0) { 47845749Smckusick #ifdef DEBUG 47945749Smckusick if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL)) 48045749Smckusick printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n", 48145749Smckusick swp->sw_blocks, 48245749Smckusick swb->swb_block, atop(off), 48345749Smckusick swb->swb_mask, 48445749Smckusick m->offset, m->object->paging_offset); 48545749Smckusick #endif 48645749Smckusick /* XXX: should we zero page here?? */ 48745749Smckusick return(VM_PAGER_FAIL); 48845749Smckusick } 48945749Smckusick } else if (swb->swb_block == 0) { 49045749Smckusick swb->swb_block = rmalloc(swapmap, swp->sw_bsize); 49145749Smckusick if (swb->swb_block == 0) { 49245749Smckusick #ifdef DEBUG 49345749Smckusick if (swpagerdebug & SDB_FAIL) 49445749Smckusick printf("swpg_io: rmalloc of %x failed\n", 49545749Smckusick swp->sw_bsize); 49645749Smckusick #endif 49745749Smckusick return(VM_PAGER_FAIL); 49845749Smckusick } 49945749Smckusick #ifdef DEBUG 50045749Smckusick if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK)) 50145749Smckusick printf("swpg_io: %x alloc blk %x at ix %x\n", 50245749Smckusick swp->sw_blocks, swb->swb_block, ix); 50345749Smckusick #endif 50445749Smckusick } 50545749Smckusick 50645749Smckusick /* 50745749Smckusick * Allocate a kernel virtual address and initialize so that PTE 50845749Smckusick * is available for lower level IO drivers. 50945749Smckusick */ 51045749Smckusick kva = vm_pager_map_page(m); 51145749Smckusick 51245749Smckusick /* 51345749Smckusick * Get a swap buffer header and perform the IO 51445749Smckusick */ 51545749Smckusick s = splbio(); 51645749Smckusick while (bswlist.av_forw == NULL) { 51745749Smckusick #ifdef DEBUG 51845749Smckusick if (swpagerdebug & SDB_ANOM) 51945749Smckusick printf("swpg_io: wait on swbuf for %x (%d)\n", 52045749Smckusick m, flags); 52145749Smckusick #endif 52245749Smckusick bswlist.b_flags |= B_WANTED; 52345749Smckusick sleep((caddr_t)&bswlist, PSWP+1); 52445749Smckusick } 52545749Smckusick bp = bswlist.av_forw; 52645749Smckusick bswlist.av_forw = bp->av_forw; 52745749Smckusick splx(s); 52845749Smckusick bp->b_flags = B_BUSY | (flags & B_READ); 52945749Smckusick bp->b_proc = &proc[0]; /* XXX (but without B_PHYS set this is ok) */ 53045749Smckusick bp->b_un.b_addr = (caddr_t)kva; 53145749Smckusick bp->b_blkno = swb->swb_block + btodb(off); 53245749Smckusick VHOLD(swapdev_vp); 53345749Smckusick bp->b_vp = swapdev_vp; 534*46985Smckusick if (swapdev_vp->v_type == VBLK) 535*46985Smckusick bp->b_dev = swapdev_vp->v_rdev; 53645749Smckusick bp->b_bcount = PAGE_SIZE; 53745749Smckusick if ((bp->b_flags & B_READ) == 0) 53845749Smckusick swapdev_vp->v_numoutput++; 53945749Smckusick 54045749Smckusick /* 54145749Smckusick * If this is an async write we set up additional buffer fields 54245749Smckusick * and place a "cleaning" entry on the inuse queue. 54345749Smckusick */ 54445749Smckusick if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { 54545749Smckusick #ifdef DEBUG 54645749Smckusick if (queue_empty(&swap_pager_free)) 54745749Smckusick panic("swpg_io: lost spc"); 54845749Smckusick #endif 54945749Smckusick queue_remove_first(&swap_pager_free, 55045749Smckusick spc, swp_clean_t, spc_list); 55145749Smckusick #ifdef DEBUG 55245749Smckusick if (spc->spc_flags != SPC_FREE) 55345749Smckusick panic("swpg_io: bad free spc"); 55445749Smckusick #endif 55545749Smckusick spc->spc_flags = SPC_BUSY; 55645749Smckusick spc->spc_bp = bp; 55745749Smckusick spc->spc_swp = swp; 55845749Smckusick spc->spc_kva = kva; 55945749Smckusick spc->spc_m = m; 56045749Smckusick #ifdef DEBUG 56145749Smckusick m->pagerowned = 1; 56245749Smckusick #endif 56345749Smckusick bp->b_flags |= B_CALL; 56445749Smckusick bp->b_iodone = swap_pager_iodone; 56545749Smckusick s = splbio(); 56645749Smckusick swp->sw_poip++; 56745749Smckusick queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list); 56845749Smckusick 56945749Smckusick #ifdef DEBUG 57045749Smckusick swap_pager_poip++; 57145749Smckusick if (swpagerdebug & SDB_WRITE) 57245749Smckusick printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n", 57345749Smckusick bp, swp, spc, swp->sw_poip); 57445749Smckusick if ((swpagerdebug & SDB_ALLOCBLK) && 57545749Smckusick (swb->swb_mask & (1 << atop(off))) == 0) 57645749Smckusick printf("swpg_io: %x write blk %x+%x\n", 57745749Smckusick swp->sw_blocks, swb->swb_block, atop(off)); 57845749Smckusick #endif 57945749Smckusick swb->swb_mask |= (1 << atop(off)); 58045749Smckusick /* 58145749Smckusick * XXX: Block write faults til we are done. 58245749Smckusick */ 58345749Smckusick m->page_lock = VM_PROT_WRITE; 58445749Smckusick m->unlock_request = VM_PROT_ALL; 58545749Smckusick pmap_copy_on_write(VM_PAGE_TO_PHYS(m)); 58645749Smckusick splx(s); 58745749Smckusick } 58845749Smckusick #ifdef DEBUG 58945749Smckusick if (swpagerdebug & SDB_IO) 59045749Smckusick printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n", 59145749Smckusick bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m)); 59245749Smckusick #endif 59345749Smckusick VOP_STRATEGY(bp); 59445749Smckusick if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { 59545749Smckusick #ifdef DEBUG 59645749Smckusick if (swpagerdebug & SDB_IO) 59745749Smckusick printf("swpg_io: IO started: bp %x\n", bp); 59845749Smckusick #endif 59945749Smckusick return(VM_PAGER_PEND); 60045749Smckusick } 60145749Smckusick s = splbio(); 60245749Smckusick #ifdef DEBUG 60345749Smckusick if (flags & B_READ) 60445749Smckusick swap_pager_piip++; 60545749Smckusick else 60645749Smckusick swap_pager_poip++; 60745749Smckusick #endif 60845749Smckusick while ((bp->b_flags & B_DONE) == 0) { 60945749Smckusick assert_wait((int)bp); 61045749Smckusick thread_block(); 61145749Smckusick } 61245749Smckusick #ifdef DEBUG 61345749Smckusick if (flags & B_READ) 61445749Smckusick --swap_pager_piip; 61545749Smckusick else 61645749Smckusick --swap_pager_poip; 61745749Smckusick #endif 61845749Smckusick rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 61945749Smckusick bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 62045749Smckusick bp->av_forw = bswlist.av_forw; 62145749Smckusick bswlist.av_forw = bp; 62245749Smckusick if (bp->b_vp) 62345749Smckusick brelvp(bp); 62445749Smckusick if (bswlist.b_flags & B_WANTED) { 62545749Smckusick bswlist.b_flags &= ~B_WANTED; 62645749Smckusick thread_wakeup((int)&bswlist); 62745749Smckusick } 62845749Smckusick if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) { 62945749Smckusick m->clean = 1; 63045749Smckusick pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 63145749Smckusick } 63245749Smckusick splx(s); 63345749Smckusick #ifdef DEBUG 63445749Smckusick if (swpagerdebug & SDB_IO) 63545749Smckusick printf("swpg_io: IO done: bp %x, rv %d\n", bp, rv); 63645749Smckusick if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_FAIL) 63745749Smckusick printf("swpg_io: IO error\n"); 63845749Smckusick #endif 63945749Smckusick vm_pager_unmap_page(kva); 64045749Smckusick return(rv); 64145749Smckusick } 64245749Smckusick 64345749Smckusick boolean_t 64445749Smckusick swap_pager_clean(m, rw) 64545749Smckusick vm_page_t m; 64645749Smckusick int rw; 64745749Smckusick { 64845749Smckusick register swp_clean_t spc, tspc; 64945749Smckusick register int s; 65045749Smckusick 65145749Smckusick #ifdef DEBUG 65245749Smckusick /* save panic time state */ 65345749Smckusick if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 65445749Smckusick return; 65545749Smckusick if (swpagerdebug & SDB_FOLLOW) 65645749Smckusick printf("swpg_clean(%x, %d)\n", m, rw); 65745749Smckusick #endif 65845749Smckusick tspc = SWP_CLEAN_NULL; 65945749Smckusick for (;;) { 66045749Smckusick /* 66145749Smckusick * Look up and removal from inuse list must be done 66245749Smckusick * at splbio() to avoid conflicts with swap_pager_iodone. 66345749Smckusick */ 66445749Smckusick s = splbio(); 66545749Smckusick spc = (swp_clean_t) queue_first(&swap_pager_inuse); 66645749Smckusick while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) { 66745749Smckusick if ((spc->spc_flags & SPC_DONE) && 66845749Smckusick swap_pager_finish(spc)) { 66945749Smckusick queue_remove(&swap_pager_inuse, spc, 67045749Smckusick swp_clean_t, spc_list); 67145749Smckusick break; 67245749Smckusick } 67345749Smckusick if (m && m == spc->spc_m) { 67445749Smckusick #ifdef DEBUG 67545749Smckusick if (swpagerdebug & SDB_ANOM) 67645749Smckusick printf("swpg_clean: %x on list, flags %x\n", 67745749Smckusick m, spc->spc_flags); 67845749Smckusick #endif 67945749Smckusick tspc = spc; 68045749Smckusick } 68145749Smckusick spc = (swp_clean_t) queue_next(&spc->spc_list); 68245749Smckusick } 68345749Smckusick 68445749Smckusick /* 68545749Smckusick * No operations done, thats all we can do for now. 68645749Smckusick */ 68745749Smckusick if (queue_end(&swap_pager_inuse, (queue_entry_t)spc)) 68845749Smckusick break; 68945749Smckusick splx(s); 69045749Smckusick 69145749Smckusick /* 69245749Smckusick * The desired page was found to be busy earlier in 69345749Smckusick * the scan but has since completed. 69445749Smckusick */ 69545749Smckusick if (tspc && tspc == spc) { 69645749Smckusick #ifdef DEBUG 69745749Smckusick if (swpagerdebug & SDB_ANOM) 69845749Smckusick printf("swpg_clean: %x done while looking\n", 69945749Smckusick m); 70045749Smckusick #endif 70145749Smckusick tspc = SWP_CLEAN_NULL; 70245749Smckusick } 70345749Smckusick spc->spc_flags = SPC_FREE; 70445749Smckusick vm_pager_unmap_page(spc->spc_kva); 70545749Smckusick queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); 70645749Smckusick #ifdef DEBUG 70745749Smckusick if (swpagerdebug & SDB_WRITE) 70845749Smckusick printf("swpg_clean: free spc %x\n", spc); 70945749Smckusick #endif 71045749Smckusick } 71145749Smckusick /* 71245749Smckusick * If we found that the desired page is already being cleaned 71345749Smckusick * mark it so that swap_pager_iodone() will not set the clean 71445749Smckusick * flag before the pageout daemon has another chance to clean it. 71545749Smckusick */ 71645749Smckusick if (tspc && rw == B_WRITE) { 71745749Smckusick #ifdef DEBUG 71845749Smckusick if (swpagerdebug & SDB_ANOM) 71945749Smckusick printf("swpg_clean: %x on clean list\n", tspc); 72045749Smckusick #endif 72145749Smckusick tspc->spc_flags |= SPC_DIRTY; 72245749Smckusick } 72345749Smckusick splx(s); 72445749Smckusick 72545749Smckusick #ifdef DEBUG 72645749Smckusick if (swpagerdebug & SDB_WRITE) 72745749Smckusick printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE); 72845749Smckusick if ((swpagerdebug & SDB_ANOM) && tspc) 72945749Smckusick printf("swpg_clean: %s of cleaning page %x\n", 73045749Smckusick rw == B_READ ? "get" : "put", m); 73145749Smckusick #endif 73245749Smckusick return(tspc ? TRUE : FALSE); 73345749Smckusick } 73445749Smckusick 73545749Smckusick swap_pager_finish(spc) 73645749Smckusick register swp_clean_t spc; 73745749Smckusick { 73845749Smckusick vm_object_t object = spc->spc_m->object; 73945749Smckusick 74045749Smckusick /* 74145749Smckusick * Mark the paging operation as done. 74245749Smckusick * (XXX) If we cannot get the lock, leave it til later. 74345749Smckusick * (XXX) Also we are assuming that an async write is a 74445749Smckusick * pageout operation that has incremented the counter. 74545749Smckusick */ 74645749Smckusick if (!vm_object_lock_try(object)) 74745749Smckusick return(0); 74845749Smckusick 74945749Smckusick #ifdef DEBUG 75045749Smckusick spc->spc_m->pagerowned = 0; 75145749Smckusick #endif 75245749Smckusick 75345749Smckusick if (--object->paging_in_progress == 0) 75445749Smckusick thread_wakeup((int) object); 75545749Smckusick 75645749Smckusick /* 75745749Smckusick * XXX: this isn't even close to the right thing to do, 75845749Smckusick * introduces a variety of race conditions. 75945749Smckusick * 76045749Smckusick * If dirty, vm_pageout() has attempted to clean the page 76145749Smckusick * again. In this case we do not do anything as we will 76245749Smckusick * see the page again shortly. Otherwise, if no error mark 76345749Smckusick * as clean and inform the pmap system. If error, mark as 76445749Smckusick * dirty so we will try again (XXX: could get stuck doing 76545749Smckusick * this, should give up after awhile). 76645749Smckusick */ 76745749Smckusick if ((spc->spc_flags & SPC_DIRTY) == 0) { 76845749Smckusick if (spc->spc_flags & SPC_ERROR) { 76945749Smckusick printf("swap_pager: clean of %x failed\n", 77045749Smckusick VM_PAGE_TO_PHYS(spc->spc_m)); 77145749Smckusick spc->spc_m->laundry = TRUE; 77245749Smckusick } else { 77345749Smckusick spc->spc_m->clean = TRUE; 77445749Smckusick pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m)); 77545749Smckusick } 77645749Smckusick } 77745749Smckusick /* 77845749Smckusick * XXX: allow blocked write faults to continue 77945749Smckusick */ 78045749Smckusick spc->spc_m->page_lock = spc->spc_m->unlock_request = VM_PROT_NONE; 78145749Smckusick PAGE_WAKEUP(spc->spc_m); 78245749Smckusick 78345749Smckusick vm_object_unlock(object); 78445749Smckusick return(1); 78545749Smckusick } 78645749Smckusick 78745749Smckusick swap_pager_iodone(bp) 78845749Smckusick register struct buf *bp; 78945749Smckusick { 79045749Smckusick register swp_clean_t spc; 79145749Smckusick daddr_t blk; 79245749Smckusick int s; 79345749Smckusick 79445749Smckusick #ifdef DEBUG 79545749Smckusick /* save panic time state */ 79645749Smckusick if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 79745749Smckusick return; 79845749Smckusick if (swpagerdebug & SDB_FOLLOW) 79945749Smckusick printf("swpg_iodone(%x)\n", bp); 80045749Smckusick #endif 80145749Smckusick s = splbio(); 80245749Smckusick spc = (swp_clean_t) queue_first(&swap_pager_inuse); 80345749Smckusick while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) { 80445749Smckusick if (spc->spc_bp == bp) 80545749Smckusick break; 80645749Smckusick spc = (swp_clean_t) queue_next(&spc->spc_list); 80745749Smckusick } 80845749Smckusick #ifdef DEBUG 80945749Smckusick if (queue_end(&swap_pager_inuse, (queue_entry_t)spc)) 81045749Smckusick panic("swpg_iodone: bp not found"); 81145749Smckusick #endif 81245749Smckusick 81345749Smckusick spc->spc_flags &= ~SPC_BUSY; 81445749Smckusick spc->spc_flags |= SPC_DONE; 81545749Smckusick if (bp->b_flags & B_ERROR) 81645749Smckusick spc->spc_flags |= SPC_ERROR; 81745749Smckusick spc->spc_bp = NULL; 81845749Smckusick blk = bp->b_blkno; 81945749Smckusick 82045749Smckusick #ifdef DEBUG 82145749Smckusick --swap_pager_poip; 82245749Smckusick if (swpagerdebug & SDB_WRITE) 82345749Smckusick printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n", 82445749Smckusick bp, spc->spc_swp, spc->spc_swp->sw_flags, 82545749Smckusick spc, spc->spc_swp->sw_poip); 82645749Smckusick #endif 82745749Smckusick 82845749Smckusick spc->spc_swp->sw_poip--; 82945749Smckusick if (spc->spc_swp->sw_flags & SW_WANTED) { 83045749Smckusick spc->spc_swp->sw_flags &= ~SW_WANTED; 83145749Smckusick thread_wakeup((int)spc->spc_swp); 83245749Smckusick } 83345749Smckusick 83445749Smckusick bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 83545749Smckusick bp->av_forw = bswlist.av_forw; 83645749Smckusick bswlist.av_forw = bp; 83745749Smckusick if (bp->b_vp) 83845749Smckusick brelvp(bp); 83945749Smckusick if (bswlist.b_flags & B_WANTED) { 84045749Smckusick bswlist.b_flags &= ~B_WANTED; 84145749Smckusick thread_wakeup((int)&bswlist); 84245749Smckusick } 84345749Smckusick #if 0 84445749Smckusick /* 84545749Smckusick * XXX: this isn't even close to the right thing to do, 84645749Smckusick * introduces a variety of race conditions. 84745749Smckusick * 84845749Smckusick * If dirty, vm_pageout() has attempted to clean the page 84945749Smckusick * again. In this case we do not do anything as we will 85045749Smckusick * see the page again shortly. Otherwise, if no error mark 85145749Smckusick * as clean and inform the pmap system. If error, mark as 85245749Smckusick * dirty so we will try again (XXX: could get stuck doing 85345749Smckusick * this, should give up after awhile). 85445749Smckusick */ 85545749Smckusick if ((spc->spc_flags & SPC_DIRTY) == 0) { 85645749Smckusick if (spc->spc_flags & SPC_ERROR) { 85745749Smckusick printf("swap_pager: clean of %x (block %x) failed\n", 85845749Smckusick VM_PAGE_TO_PHYS(spc->spc_m), blk); 85945749Smckusick spc->spc_m->laundry = TRUE; 86045749Smckusick } else { 86145749Smckusick spc->spc_m->clean = TRUE; 86245749Smckusick pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m)); 86345749Smckusick } 86445749Smckusick } 86545749Smckusick /* 86645749Smckusick * XXX: allow blocked write faults to continue 86745749Smckusick */ 86845749Smckusick spc->spc_m->page_lock = spc->spc_m->unlock_request = VM_PROT_NONE; 86945749Smckusick PAGE_WAKEUP(spc->spc_m); 87045749Smckusick #endif 87145749Smckusick 87245749Smckusick thread_wakeup((int) &vm_pages_needed); 87345749Smckusick splx(s); 87445749Smckusick } 87545749Smckusick #endif 876