1*45749Smckusick /* 2*45749Smckusick * Copyright (c) 1990 University of Utah. 3*45749Smckusick * Copyright (c) 1991 The Regents of the University of California. 4*45749Smckusick * All rights reserved. 5*45749Smckusick * 6*45749Smckusick * This code is derived from software contributed to Berkeley by 7*45749Smckusick * the Systems Programming Group of the University of Utah Computer 8*45749Smckusick * Science Department. 9*45749Smckusick * 10*45749Smckusick * %sccs.include.redist.c% 11*45749Smckusick * 12*45749Smckusick * @(#)swap_pager.c 7.1 (Berkeley) 12/05/90 13*45749Smckusick */ 14*45749Smckusick 15*45749Smckusick /* 16*45749Smckusick * Quick hack to page to dedicated partition(s). 17*45749Smckusick * TODO: 18*45749Smckusick * Add multiprocessor locks 19*45749Smckusick * Deal with async writes in a better fashion 20*45749Smckusick */ 21*45749Smckusick 22*45749Smckusick #include "swappager.h" 23*45749Smckusick #if NSWAPPAGER > 0 24*45749Smckusick 25*45749Smckusick #include "param.h" 26*45749Smckusick #include "user.h" 27*45749Smckusick #include "proc.h" 28*45749Smckusick #include "buf.h" 29*45749Smckusick #include "map.h" 30*45749Smckusick #include "systm.h" 31*45749Smckusick #include "specdev.h" 32*45749Smckusick #include "vnode.h" 33*45749Smckusick #include "malloc.h" 34*45749Smckusick #include "queue.h" 35*45749Smckusick 36*45749Smckusick #include "../vm/vm_param.h" 37*45749Smckusick #include "../vm/vm_pager.h" 38*45749Smckusick #include "../vm/vm_page.h" 39*45749Smckusick #include "../vm/vm_pageout.h" 40*45749Smckusick #include "../vm/swap_pager.h" 41*45749Smckusick 42*45749Smckusick #define NSWSIZES 16 /* size of swtab */ 43*45749Smckusick #define NPENDINGIO 64 /* max # of pending cleans */ 44*45749Smckusick #define MAXDADDRS 64 /* max # of disk addrs for fixed allocations */ 45*45749Smckusick 46*45749Smckusick #ifdef DEBUG 47*45749Smckusick int swpagerdebug = 0x100; 48*45749Smckusick #define SDB_FOLLOW 0x001 49*45749Smckusick #define SDB_INIT 0x002 50*45749Smckusick #define SDB_ALLOC 0x004 51*45749Smckusick #define SDB_IO 0x008 52*45749Smckusick #define SDB_WRITE 0x010 53*45749Smckusick #define SDB_FAIL 0x020 54*45749Smckusick #define SDB_ALLOCBLK 0x040 55*45749Smckusick #define SDB_FULL 0x080 56*45749Smckusick #define SDB_ANOM 0x100 57*45749Smckusick #define SDB_ANOMPANIC 0x200 58*45749Smckusick #endif 59*45749Smckusick 60*45749Smckusick struct swpagerclean { 61*45749Smckusick queue_head_t spc_list; 62*45749Smckusick int spc_flags; 63*45749Smckusick struct buf *spc_bp; 64*45749Smckusick sw_pager_t spc_swp; 65*45749Smckusick vm_offset_t spc_kva; 66*45749Smckusick vm_page_t spc_m; 67*45749Smckusick } swcleanlist[NPENDINGIO]; 68*45749Smckusick typedef struct swpagerclean *swp_clean_t; 69*45749Smckusick 70*45749Smckusick #define SWP_CLEAN_NULL ((swp_clean_t)0) 71*45749Smckusick 72*45749Smckusick /* spc_flags values */ 73*45749Smckusick #define SPC_FREE 0x00 74*45749Smckusick #define SPC_BUSY 0x01 75*45749Smckusick #define SPC_DONE 0x02 76*45749Smckusick #define SPC_ERROR 0x04 77*45749Smckusick #define SPC_DIRTY 0x08 78*45749Smckusick 79*45749Smckusick struct swtab { 80*45749Smckusick vm_size_t st_osize; /* size of object (bytes) */ 81*45749Smckusick int st_bsize; /* vs. size of swap block (DEV_BSIZE units) */ 82*45749Smckusick #ifdef DEBUG 83*45749Smckusick u_long st_inuse; /* number in this range in use */ 84*45749Smckusick u_long st_usecnt; /* total used of this size */ 85*45749Smckusick #endif 86*45749Smckusick } swtab[NSWSIZES+1]; 87*45749Smckusick 88*45749Smckusick #ifdef DEBUG 89*45749Smckusick int swap_pager_pendingio; /* max pending async "clean" ops */ 90*45749Smckusick int swap_pager_poip; /* pageouts in progress */ 91*45749Smckusick int swap_pager_piip; /* pageins in progress */ 92*45749Smckusick #endif 93*45749Smckusick 94*45749Smckusick queue_head_t swap_pager_inuse; /* list of pending page cleans */ 95*45749Smckusick queue_head_t swap_pager_free; /* list of free pager clean structs */ 96*45749Smckusick queue_head_t swap_pager_list; /* list of "named" anon regions */ 97*45749Smckusick 98*45749Smckusick void 99*45749Smckusick swap_pager_init() 100*45749Smckusick { 101*45749Smckusick register swp_clean_t spc; 102*45749Smckusick register int i, bsize; 103*45749Smckusick extern int dmmin, dmmax; 104*45749Smckusick int maxbsize; 105*45749Smckusick 106*45749Smckusick #ifdef DEBUG 107*45749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_INIT)) 108*45749Smckusick printf("swpg_init()\n"); 109*45749Smckusick #endif 110*45749Smckusick dfltpagerops = &swappagerops; 111*45749Smckusick queue_init(&swap_pager_list); 112*45749Smckusick 113*45749Smckusick /* 114*45749Smckusick * Initialize clean lists 115*45749Smckusick */ 116*45749Smckusick queue_init(&swap_pager_inuse); 117*45749Smckusick queue_init(&swap_pager_free); 118*45749Smckusick for (i = 0, spc = swcleanlist; i < NPENDINGIO; i++, spc++) { 119*45749Smckusick queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); 120*45749Smckusick spc->spc_flags = SPC_FREE; 121*45749Smckusick } 122*45749Smckusick 123*45749Smckusick /* 124*45749Smckusick * Calculate the swap allocation constants. 125*45749Smckusick */ 126*45749Smckusick if (dmmin == 0) { 127*45749Smckusick dmmin = DMMIN; 128*45749Smckusick if (dmmin < CLBYTES/DEV_BSIZE) 129*45749Smckusick dmmin = CLBYTES/DEV_BSIZE; 130*45749Smckusick } 131*45749Smckusick if (dmmax == 0) 132*45749Smckusick dmmax = DMMAX; 133*45749Smckusick 134*45749Smckusick /* 135*45749Smckusick * Fill in our table of object size vs. allocation size 136*45749Smckusick */ 137*45749Smckusick bsize = btodb(PAGE_SIZE); 138*45749Smckusick if (bsize < dmmin) 139*45749Smckusick bsize = dmmin; 140*45749Smckusick maxbsize = btodb(sizeof(sw_bm_t) * NBBY * PAGE_SIZE); 141*45749Smckusick if (maxbsize > dmmax) 142*45749Smckusick maxbsize = dmmax; 143*45749Smckusick for (i = 0; i < NSWSIZES; i++) { 144*45749Smckusick swtab[i].st_osize = (vm_size_t) (MAXDADDRS * dbtob(bsize)); 145*45749Smckusick swtab[i].st_bsize = bsize; 146*45749Smckusick #ifdef DEBUG 147*45749Smckusick if (swpagerdebug & SDB_INIT) 148*45749Smckusick printf("swpg_init: ix %d, size %x, bsize %x\n", 149*45749Smckusick i, swtab[i].st_osize, swtab[i].st_bsize); 150*45749Smckusick #endif 151*45749Smckusick if (bsize >= maxbsize) 152*45749Smckusick break; 153*45749Smckusick bsize *= 2; 154*45749Smckusick } 155*45749Smckusick swtab[i].st_osize = 0; 156*45749Smckusick swtab[i].st_bsize = bsize; 157*45749Smckusick } 158*45749Smckusick 159*45749Smckusick /* 160*45749Smckusick * Allocate a pager structure and associated resources. 161*45749Smckusick * Note that if we are called from the pageout daemon (handle == NULL) 162*45749Smckusick * we should not wait for memory as it could resulting in deadlock. 163*45749Smckusick */ 164*45749Smckusick vm_pager_t 165*45749Smckusick swap_pager_alloc(handle, size, prot) 166*45749Smckusick caddr_t handle; 167*45749Smckusick register vm_size_t size; 168*45749Smckusick vm_prot_t prot; 169*45749Smckusick { 170*45749Smckusick register vm_pager_t pager; 171*45749Smckusick register sw_pager_t swp; 172*45749Smckusick struct swtab *swt; 173*45749Smckusick int waitok; 174*45749Smckusick 175*45749Smckusick #ifdef DEBUG 176*45749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) 177*45749Smckusick printf("swpg_alloc(%x, %x, %x)\n", handle, size, prot); 178*45749Smckusick #endif 179*45749Smckusick /* 180*45749Smckusick * If this is a "named" anonymous region, look it up and 181*45749Smckusick * return the appropriate pager if it exists. 182*45749Smckusick */ 183*45749Smckusick if (handle) { 184*45749Smckusick pager = vm_pager_lookup(&swap_pager_list, handle); 185*45749Smckusick if (pager != VM_PAGER_NULL) { 186*45749Smckusick /* 187*45749Smckusick * Use vm_object_lookup to gain a reference 188*45749Smckusick * to the object and also to remove from the 189*45749Smckusick * object cache. 190*45749Smckusick */ 191*45749Smckusick if (vm_object_lookup(pager) == VM_OBJECT_NULL) 192*45749Smckusick panic("swap_pager_alloc: bad object"); 193*45749Smckusick return(pager); 194*45749Smckusick } 195*45749Smckusick } 196*45749Smckusick /* 197*45749Smckusick * Pager doesn't exist, allocate swap management resources 198*45749Smckusick * and initialize. 199*45749Smckusick */ 200*45749Smckusick waitok = handle ? M_WAITOK : M_NOWAIT; 201*45749Smckusick pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok); 202*45749Smckusick if (pager == VM_PAGER_NULL) 203*45749Smckusick return(VM_PAGER_NULL); 204*45749Smckusick swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok); 205*45749Smckusick if (swp == NULL) { 206*45749Smckusick #ifdef DEBUG 207*45749Smckusick if (swpagerdebug & SDB_FAIL) 208*45749Smckusick printf("swpg_alloc: swpager malloc failed\n"); 209*45749Smckusick #endif 210*45749Smckusick free((caddr_t)pager, M_VMPAGER); 211*45749Smckusick return(VM_PAGER_NULL); 212*45749Smckusick } 213*45749Smckusick size = round_page(size); 214*45749Smckusick for (swt = swtab; swt->st_osize; swt++) 215*45749Smckusick if (size <= swt->st_osize) 216*45749Smckusick break; 217*45749Smckusick #ifdef DEBUG 218*45749Smckusick swt->st_inuse++; 219*45749Smckusick swt->st_usecnt++; 220*45749Smckusick #endif 221*45749Smckusick swp->sw_osize = size; 222*45749Smckusick swp->sw_bsize = swt->st_bsize; 223*45749Smckusick swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize; 224*45749Smckusick swp->sw_blocks = (sw_blk_t) 225*45749Smckusick malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks), 226*45749Smckusick M_VMPGDATA, M_NOWAIT); 227*45749Smckusick if (swp->sw_blocks == NULL) { 228*45749Smckusick free((caddr_t)swp, M_VMPGDATA); 229*45749Smckusick free((caddr_t)pager, M_VMPAGER); 230*45749Smckusick #ifdef DEBUG 231*45749Smckusick if (swpagerdebug & SDB_FAIL) 232*45749Smckusick printf("swpg_alloc: sw_blocks malloc failed\n"); 233*45749Smckusick swt->st_inuse--; 234*45749Smckusick swt->st_usecnt--; 235*45749Smckusick #endif 236*45749Smckusick return(FALSE); 237*45749Smckusick } 238*45749Smckusick bzero((caddr_t)swp->sw_blocks, 239*45749Smckusick swp->sw_nblocks * sizeof(*swp->sw_blocks)); 240*45749Smckusick swp->sw_poip = 0; 241*45749Smckusick if (handle) { 242*45749Smckusick vm_object_t object; 243*45749Smckusick 244*45749Smckusick swp->sw_flags = SW_NAMED; 245*45749Smckusick queue_enter(&swap_pager_list, pager, vm_pager_t, pg_list); 246*45749Smckusick /* 247*45749Smckusick * Consistant with other pagers: return with object 248*45749Smckusick * referenced. Can't do this with handle == NULL 249*45749Smckusick * since it might be the pageout daemon calling. 250*45749Smckusick */ 251*45749Smckusick object = vm_object_allocate(size); 252*45749Smckusick vm_object_enter(object, pager); 253*45749Smckusick vm_object_setpager(object, pager, 0, FALSE); 254*45749Smckusick } else { 255*45749Smckusick swp->sw_flags = 0; 256*45749Smckusick queue_init(&pager->pg_list); 257*45749Smckusick } 258*45749Smckusick pager->pg_handle = handle; 259*45749Smckusick pager->pg_ops = &swappagerops; 260*45749Smckusick pager->pg_type = PG_SWAP; 261*45749Smckusick pager->pg_data = (caddr_t)swp; 262*45749Smckusick 263*45749Smckusick #ifdef DEBUG 264*45749Smckusick if (swpagerdebug & SDB_ALLOC) 265*45749Smckusick printf("swpg_alloc: pg_data %x, %x of %x at %x\n", 266*45749Smckusick swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks); 267*45749Smckusick #endif 268*45749Smckusick return(pager); 269*45749Smckusick } 270*45749Smckusick 271*45749Smckusick void 272*45749Smckusick swap_pager_dealloc(pager) 273*45749Smckusick vm_pager_t pager; 274*45749Smckusick { 275*45749Smckusick register int i; 276*45749Smckusick register sw_blk_t bp; 277*45749Smckusick register sw_pager_t swp; 278*45749Smckusick struct swtab *swt; 279*45749Smckusick int s; 280*45749Smckusick 281*45749Smckusick #ifdef DEBUG 282*45749Smckusick /* save panic time state */ 283*45749Smckusick if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 284*45749Smckusick return; 285*45749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOC)) 286*45749Smckusick printf("swpg_dealloc(%x)\n", pager); 287*45749Smckusick #endif 288*45749Smckusick /* 289*45749Smckusick * Remove from list right away so lookups will fail if we 290*45749Smckusick * block for pageout completion. 291*45749Smckusick */ 292*45749Smckusick swp = (sw_pager_t) pager->pg_data; 293*45749Smckusick if (swp->sw_flags & SW_NAMED) { 294*45749Smckusick queue_remove(&swap_pager_list, pager, vm_pager_t, pg_list); 295*45749Smckusick swp->sw_flags &= ~SW_NAMED; 296*45749Smckusick } 297*45749Smckusick #ifdef DEBUG 298*45749Smckusick for (swt = swtab; swt->st_osize; swt++) 299*45749Smckusick if (swp->sw_osize <= swt->st_osize) 300*45749Smckusick break; 301*45749Smckusick swt->st_inuse--; 302*45749Smckusick #endif 303*45749Smckusick 304*45749Smckusick /* 305*45749Smckusick * Wait for all pageouts to finish and remove 306*45749Smckusick * all entries from cleaning list. 307*45749Smckusick */ 308*45749Smckusick s = splbio(); 309*45749Smckusick while (swp->sw_poip) { 310*45749Smckusick swp->sw_flags |= SW_WANTED; 311*45749Smckusick assert_wait((int)swp); 312*45749Smckusick thread_block(); 313*45749Smckusick } 314*45749Smckusick splx(s); 315*45749Smckusick (void) swap_pager_clean(VM_PAGE_NULL, B_WRITE); 316*45749Smckusick 317*45749Smckusick /* 318*45749Smckusick * Free left over swap blocks 319*45749Smckusick */ 320*45749Smckusick for (i = 0, bp = swp->sw_blocks; i < swp->sw_nblocks; i++, bp++) 321*45749Smckusick if (bp->swb_block) { 322*45749Smckusick #ifdef DEBUG 323*45749Smckusick if (swpagerdebug & (SDB_ALLOCBLK|SDB_FULL)) 324*45749Smckusick printf("swpg_dealloc: blk %x\n", 325*45749Smckusick bp->swb_block); 326*45749Smckusick #endif 327*45749Smckusick rmfree(swapmap, swp->sw_bsize, bp->swb_block); 328*45749Smckusick } 329*45749Smckusick /* 330*45749Smckusick * Free swap management resources 331*45749Smckusick */ 332*45749Smckusick free((caddr_t)swp->sw_blocks, M_VMPGDATA); 333*45749Smckusick free((caddr_t)swp, M_VMPGDATA); 334*45749Smckusick free((caddr_t)pager, M_VMPAGER); 335*45749Smckusick } 336*45749Smckusick 337*45749Smckusick swap_pager_getpage(pager, m, sync) 338*45749Smckusick vm_pager_t pager; 339*45749Smckusick vm_page_t m; 340*45749Smckusick boolean_t sync; 341*45749Smckusick { 342*45749Smckusick #ifdef DEBUG 343*45749Smckusick if (swpagerdebug & SDB_FOLLOW) 344*45749Smckusick printf("swpg_getpage(%x, %x, %d)\n", pager, m, sync); 345*45749Smckusick #endif 346*45749Smckusick return(swap_pager_io((sw_pager_t)pager->pg_data, m, B_READ)); 347*45749Smckusick } 348*45749Smckusick 349*45749Smckusick swap_pager_putpage(pager, m, sync) 350*45749Smckusick vm_pager_t pager; 351*45749Smckusick vm_page_t m; 352*45749Smckusick boolean_t sync; 353*45749Smckusick { 354*45749Smckusick int flags; 355*45749Smckusick 356*45749Smckusick #ifdef DEBUG 357*45749Smckusick if (swpagerdebug & SDB_FOLLOW) 358*45749Smckusick printf("swpg_putpage(%x, %x, %d)\n", pager, m, sync); 359*45749Smckusick #endif 360*45749Smckusick if (pager == VM_PAGER_NULL) { 361*45749Smckusick (void) swap_pager_clean(VM_PAGE_NULL, B_WRITE); 362*45749Smckusick return; 363*45749Smckusick } 364*45749Smckusick flags = B_WRITE; 365*45749Smckusick if (!sync) 366*45749Smckusick flags |= B_ASYNC; 367*45749Smckusick return(swap_pager_io((sw_pager_t)pager->pg_data, m, flags)); 368*45749Smckusick } 369*45749Smckusick 370*45749Smckusick boolean_t 371*45749Smckusick swap_pager_haspage(pager, offset) 372*45749Smckusick vm_pager_t pager; 373*45749Smckusick vm_offset_t offset; 374*45749Smckusick { 375*45749Smckusick register sw_pager_t swp; 376*45749Smckusick register sw_blk_t swb; 377*45749Smckusick int ix; 378*45749Smckusick 379*45749Smckusick #ifdef DEBUG 380*45749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) 381*45749Smckusick printf("swpg_haspage(%x, %x) ", pager, offset); 382*45749Smckusick #endif 383*45749Smckusick swp = (sw_pager_t) pager->pg_data; 384*45749Smckusick ix = offset / dbtob(swp->sw_bsize); 385*45749Smckusick if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 386*45749Smckusick #ifdef DEBUG 387*45749Smckusick if (swpagerdebug & (SDB_FAIL|SDB_FOLLOW|SDB_ALLOCBLK)) 388*45749Smckusick printf("swpg_haspage: %x bad offset %x, ix %x\n", 389*45749Smckusick swp->sw_blocks, offset, ix); 390*45749Smckusick #endif 391*45749Smckusick return(FALSE); 392*45749Smckusick } 393*45749Smckusick swb = &swp->sw_blocks[ix]; 394*45749Smckusick if (swb->swb_block) 395*45749Smckusick ix = atop(offset % dbtob(swp->sw_bsize)); 396*45749Smckusick #ifdef DEBUG 397*45749Smckusick if (swpagerdebug & SDB_ALLOCBLK) 398*45749Smckusick printf("%x blk %x+%x ", swp->sw_blocks, swb->swb_block, ix); 399*45749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_ALLOCBLK)) 400*45749Smckusick printf("-> %c\n", 401*45749Smckusick "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]); 402*45749Smckusick #endif 403*45749Smckusick if (swb->swb_block && (swb->swb_mask & (1 << ix))) 404*45749Smckusick return(TRUE); 405*45749Smckusick return(FALSE); 406*45749Smckusick } 407*45749Smckusick 408*45749Smckusick /* 409*45749Smckusick * Scaled down version of swap(). 410*45749Smckusick * Assumes that PAGE_SIZE < MAXPHYS; i.e. only one operation needed. 411*45749Smckusick * BOGUS: lower level IO routines expect a KVA so we have to map our 412*45749Smckusick * provided physical page into the KVA to keep them happy. 413*45749Smckusick */ 414*45749Smckusick swap_pager_io(swp, m, flags) 415*45749Smckusick register sw_pager_t swp; 416*45749Smckusick vm_page_t m; 417*45749Smckusick int flags; 418*45749Smckusick { 419*45749Smckusick register struct buf *bp; 420*45749Smckusick register sw_blk_t swb; 421*45749Smckusick register int s; 422*45749Smckusick int ix; 423*45749Smckusick boolean_t rv; 424*45749Smckusick vm_offset_t kva, off; 425*45749Smckusick swp_clean_t spc; 426*45749Smckusick 427*45749Smckusick #ifdef DEBUG 428*45749Smckusick /* save panic time state */ 429*45749Smckusick if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 430*45749Smckusick return; 431*45749Smckusick if (swpagerdebug & (SDB_FOLLOW|SDB_IO)) 432*45749Smckusick printf("swpg_io(%x, %x, %x)\n", swp, m, flags); 433*45749Smckusick #endif 434*45749Smckusick 435*45749Smckusick /* 436*45749Smckusick * For reads (pageins) and synchronous writes, we clean up 437*45749Smckusick * all completed async pageouts and check to see if this 438*45749Smckusick * page is currently being cleaned. If it is, we just wait 439*45749Smckusick * til the operation is done before continuing. 440*45749Smckusick */ 441*45749Smckusick if ((flags & B_ASYNC) == 0) { 442*45749Smckusick s = splbio(); 443*45749Smckusick while (swap_pager_clean(m, flags&B_READ)) { 444*45749Smckusick swp->sw_flags |= SW_WANTED; 445*45749Smckusick assert_wait((int)swp); 446*45749Smckusick thread_block(); 447*45749Smckusick } 448*45749Smckusick splx(s); 449*45749Smckusick } 450*45749Smckusick /* 451*45749Smckusick * For async writes (pageouts), we cleanup completed pageouts so 452*45749Smckusick * that all available resources are freed. Also tells us if this 453*45749Smckusick * page is already being cleaned. If it is, or no resources 454*45749Smckusick * are available, we try again later. 455*45749Smckusick */ 456*45749Smckusick else if (swap_pager_clean(m, B_WRITE) || queue_empty(&swap_pager_free)) 457*45749Smckusick return(VM_PAGER_FAIL); 458*45749Smckusick 459*45749Smckusick /* 460*45749Smckusick * Determine swap block and allocate as necessary. 461*45749Smckusick */ 462*45749Smckusick off = m->offset + m->object->paging_offset; 463*45749Smckusick ix = off / dbtob(swp->sw_bsize); 464*45749Smckusick if (swp->sw_blocks == NULL || ix >= swp->sw_nblocks) { 465*45749Smckusick #ifdef DEBUG 466*45749Smckusick if (swpagerdebug & SDB_FAIL) 467*45749Smckusick printf("swpg_io: bad offset %x+%x(%d) in %x\n", 468*45749Smckusick m->offset, m->object->paging_offset, 469*45749Smckusick ix, swp->sw_blocks); 470*45749Smckusick #endif 471*45749Smckusick return(VM_PAGER_FAIL); 472*45749Smckusick } 473*45749Smckusick swb = &swp->sw_blocks[ix]; 474*45749Smckusick off = off % dbtob(swp->sw_bsize); 475*45749Smckusick if (flags & B_READ) { 476*45749Smckusick if (swb->swb_block == 0 || 477*45749Smckusick (swb->swb_mask & (1 << atop(off))) == 0) { 478*45749Smckusick #ifdef DEBUG 479*45749Smckusick if (swpagerdebug & (SDB_ALLOCBLK|SDB_FAIL)) 480*45749Smckusick printf("swpg_io: %x bad read: blk %x+%x, mask %x, off %x+%x\n", 481*45749Smckusick swp->sw_blocks, 482*45749Smckusick swb->swb_block, atop(off), 483*45749Smckusick swb->swb_mask, 484*45749Smckusick m->offset, m->object->paging_offset); 485*45749Smckusick #endif 486*45749Smckusick /* XXX: should we zero page here?? */ 487*45749Smckusick return(VM_PAGER_FAIL); 488*45749Smckusick } 489*45749Smckusick } else if (swb->swb_block == 0) { 490*45749Smckusick swb->swb_block = rmalloc(swapmap, swp->sw_bsize); 491*45749Smckusick if (swb->swb_block == 0) { 492*45749Smckusick #ifdef DEBUG 493*45749Smckusick if (swpagerdebug & SDB_FAIL) 494*45749Smckusick printf("swpg_io: rmalloc of %x failed\n", 495*45749Smckusick swp->sw_bsize); 496*45749Smckusick #endif 497*45749Smckusick return(VM_PAGER_FAIL); 498*45749Smckusick } 499*45749Smckusick #ifdef DEBUG 500*45749Smckusick if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK)) 501*45749Smckusick printf("swpg_io: %x alloc blk %x at ix %x\n", 502*45749Smckusick swp->sw_blocks, swb->swb_block, ix); 503*45749Smckusick #endif 504*45749Smckusick } 505*45749Smckusick 506*45749Smckusick /* 507*45749Smckusick * Allocate a kernel virtual address and initialize so that PTE 508*45749Smckusick * is available for lower level IO drivers. 509*45749Smckusick */ 510*45749Smckusick kva = vm_pager_map_page(m); 511*45749Smckusick 512*45749Smckusick /* 513*45749Smckusick * Get a swap buffer header and perform the IO 514*45749Smckusick */ 515*45749Smckusick s = splbio(); 516*45749Smckusick while (bswlist.av_forw == NULL) { 517*45749Smckusick #ifdef DEBUG 518*45749Smckusick if (swpagerdebug & SDB_ANOM) 519*45749Smckusick printf("swpg_io: wait on swbuf for %x (%d)\n", 520*45749Smckusick m, flags); 521*45749Smckusick #endif 522*45749Smckusick bswlist.b_flags |= B_WANTED; 523*45749Smckusick sleep((caddr_t)&bswlist, PSWP+1); 524*45749Smckusick } 525*45749Smckusick bp = bswlist.av_forw; 526*45749Smckusick bswlist.av_forw = bp->av_forw; 527*45749Smckusick splx(s); 528*45749Smckusick bp->b_flags = B_BUSY | (flags & B_READ); 529*45749Smckusick bp->b_proc = &proc[0]; /* XXX (but without B_PHYS set this is ok) */ 530*45749Smckusick bp->b_un.b_addr = (caddr_t)kva; 531*45749Smckusick bp->b_blkno = swb->swb_block + btodb(off); 532*45749Smckusick VHOLD(swapdev_vp); 533*45749Smckusick bp->b_vp = swapdev_vp; 534*45749Smckusick bp->b_dev = swapdev_vp->v_rdev; 535*45749Smckusick bp->b_bcount = PAGE_SIZE; 536*45749Smckusick if ((bp->b_flags & B_READ) == 0) 537*45749Smckusick swapdev_vp->v_numoutput++; 538*45749Smckusick 539*45749Smckusick /* 540*45749Smckusick * If this is an async write we set up additional buffer fields 541*45749Smckusick * and place a "cleaning" entry on the inuse queue. 542*45749Smckusick */ 543*45749Smckusick if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { 544*45749Smckusick #ifdef DEBUG 545*45749Smckusick if (queue_empty(&swap_pager_free)) 546*45749Smckusick panic("swpg_io: lost spc"); 547*45749Smckusick #endif 548*45749Smckusick queue_remove_first(&swap_pager_free, 549*45749Smckusick spc, swp_clean_t, spc_list); 550*45749Smckusick #ifdef DEBUG 551*45749Smckusick if (spc->spc_flags != SPC_FREE) 552*45749Smckusick panic("swpg_io: bad free spc"); 553*45749Smckusick #endif 554*45749Smckusick spc->spc_flags = SPC_BUSY; 555*45749Smckusick spc->spc_bp = bp; 556*45749Smckusick spc->spc_swp = swp; 557*45749Smckusick spc->spc_kva = kva; 558*45749Smckusick spc->spc_m = m; 559*45749Smckusick #ifdef DEBUG 560*45749Smckusick m->pagerowned = 1; 561*45749Smckusick #endif 562*45749Smckusick bp->b_flags |= B_CALL; 563*45749Smckusick bp->b_iodone = swap_pager_iodone; 564*45749Smckusick s = splbio(); 565*45749Smckusick swp->sw_poip++; 566*45749Smckusick queue_enter(&swap_pager_inuse, spc, swp_clean_t, spc_list); 567*45749Smckusick 568*45749Smckusick #ifdef DEBUG 569*45749Smckusick swap_pager_poip++; 570*45749Smckusick if (swpagerdebug & SDB_WRITE) 571*45749Smckusick printf("swpg_io: write: bp=%x swp=%x spc=%x poip=%d\n", 572*45749Smckusick bp, swp, spc, swp->sw_poip); 573*45749Smckusick if ((swpagerdebug & SDB_ALLOCBLK) && 574*45749Smckusick (swb->swb_mask & (1 << atop(off))) == 0) 575*45749Smckusick printf("swpg_io: %x write blk %x+%x\n", 576*45749Smckusick swp->sw_blocks, swb->swb_block, atop(off)); 577*45749Smckusick #endif 578*45749Smckusick swb->swb_mask |= (1 << atop(off)); 579*45749Smckusick /* 580*45749Smckusick * XXX: Block write faults til we are done. 581*45749Smckusick */ 582*45749Smckusick m->page_lock = VM_PROT_WRITE; 583*45749Smckusick m->unlock_request = VM_PROT_ALL; 584*45749Smckusick pmap_copy_on_write(VM_PAGE_TO_PHYS(m)); 585*45749Smckusick splx(s); 586*45749Smckusick } 587*45749Smckusick #ifdef DEBUG 588*45749Smckusick if (swpagerdebug & SDB_IO) 589*45749Smckusick printf("swpg_io: IO start: bp %x, db %x, va %x, pa %x\n", 590*45749Smckusick bp, swb->swb_block+btodb(off), kva, VM_PAGE_TO_PHYS(m)); 591*45749Smckusick #endif 592*45749Smckusick VOP_STRATEGY(bp); 593*45749Smckusick if ((flags & (B_READ|B_ASYNC)) == B_ASYNC) { 594*45749Smckusick #ifdef DEBUG 595*45749Smckusick if (swpagerdebug & SDB_IO) 596*45749Smckusick printf("swpg_io: IO started: bp %x\n", bp); 597*45749Smckusick #endif 598*45749Smckusick return(VM_PAGER_PEND); 599*45749Smckusick } 600*45749Smckusick s = splbio(); 601*45749Smckusick #ifdef DEBUG 602*45749Smckusick if (flags & B_READ) 603*45749Smckusick swap_pager_piip++; 604*45749Smckusick else 605*45749Smckusick swap_pager_poip++; 606*45749Smckusick #endif 607*45749Smckusick while ((bp->b_flags & B_DONE) == 0) { 608*45749Smckusick assert_wait((int)bp); 609*45749Smckusick thread_block(); 610*45749Smckusick } 611*45749Smckusick #ifdef DEBUG 612*45749Smckusick if (flags & B_READ) 613*45749Smckusick --swap_pager_piip; 614*45749Smckusick else 615*45749Smckusick --swap_pager_poip; 616*45749Smckusick #endif 617*45749Smckusick rv = (bp->b_flags & B_ERROR) ? VM_PAGER_FAIL : VM_PAGER_OK; 618*45749Smckusick bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 619*45749Smckusick bp->av_forw = bswlist.av_forw; 620*45749Smckusick bswlist.av_forw = bp; 621*45749Smckusick if (bp->b_vp) 622*45749Smckusick brelvp(bp); 623*45749Smckusick if (bswlist.b_flags & B_WANTED) { 624*45749Smckusick bswlist.b_flags &= ~B_WANTED; 625*45749Smckusick thread_wakeup((int)&bswlist); 626*45749Smckusick } 627*45749Smckusick if ((flags & B_READ) == 0 && rv == VM_PAGER_OK) { 628*45749Smckusick m->clean = 1; 629*45749Smckusick pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 630*45749Smckusick } 631*45749Smckusick splx(s); 632*45749Smckusick #ifdef DEBUG 633*45749Smckusick if (swpagerdebug & SDB_IO) 634*45749Smckusick printf("swpg_io: IO done: bp %x, rv %d\n", bp, rv); 635*45749Smckusick if ((swpagerdebug & SDB_FAIL) && rv == VM_PAGER_FAIL) 636*45749Smckusick printf("swpg_io: IO error\n"); 637*45749Smckusick #endif 638*45749Smckusick vm_pager_unmap_page(kva); 639*45749Smckusick return(rv); 640*45749Smckusick } 641*45749Smckusick 642*45749Smckusick boolean_t 643*45749Smckusick swap_pager_clean(m, rw) 644*45749Smckusick vm_page_t m; 645*45749Smckusick int rw; 646*45749Smckusick { 647*45749Smckusick register swp_clean_t spc, tspc; 648*45749Smckusick register int s; 649*45749Smckusick 650*45749Smckusick #ifdef DEBUG 651*45749Smckusick /* save panic time state */ 652*45749Smckusick if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 653*45749Smckusick return; 654*45749Smckusick if (swpagerdebug & SDB_FOLLOW) 655*45749Smckusick printf("swpg_clean(%x, %d)\n", m, rw); 656*45749Smckusick #endif 657*45749Smckusick tspc = SWP_CLEAN_NULL; 658*45749Smckusick for (;;) { 659*45749Smckusick /* 660*45749Smckusick * Look up and removal from inuse list must be done 661*45749Smckusick * at splbio() to avoid conflicts with swap_pager_iodone. 662*45749Smckusick */ 663*45749Smckusick s = splbio(); 664*45749Smckusick spc = (swp_clean_t) queue_first(&swap_pager_inuse); 665*45749Smckusick while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) { 666*45749Smckusick if ((spc->spc_flags & SPC_DONE) && 667*45749Smckusick swap_pager_finish(spc)) { 668*45749Smckusick queue_remove(&swap_pager_inuse, spc, 669*45749Smckusick swp_clean_t, spc_list); 670*45749Smckusick break; 671*45749Smckusick } 672*45749Smckusick if (m && m == spc->spc_m) { 673*45749Smckusick #ifdef DEBUG 674*45749Smckusick if (swpagerdebug & SDB_ANOM) 675*45749Smckusick printf("swpg_clean: %x on list, flags %x\n", 676*45749Smckusick m, spc->spc_flags); 677*45749Smckusick #endif 678*45749Smckusick tspc = spc; 679*45749Smckusick } 680*45749Smckusick spc = (swp_clean_t) queue_next(&spc->spc_list); 681*45749Smckusick } 682*45749Smckusick 683*45749Smckusick /* 684*45749Smckusick * No operations done, thats all we can do for now. 685*45749Smckusick */ 686*45749Smckusick if (queue_end(&swap_pager_inuse, (queue_entry_t)spc)) 687*45749Smckusick break; 688*45749Smckusick splx(s); 689*45749Smckusick 690*45749Smckusick /* 691*45749Smckusick * The desired page was found to be busy earlier in 692*45749Smckusick * the scan but has since completed. 693*45749Smckusick */ 694*45749Smckusick if (tspc && tspc == spc) { 695*45749Smckusick #ifdef DEBUG 696*45749Smckusick if (swpagerdebug & SDB_ANOM) 697*45749Smckusick printf("swpg_clean: %x done while looking\n", 698*45749Smckusick m); 699*45749Smckusick #endif 700*45749Smckusick tspc = SWP_CLEAN_NULL; 701*45749Smckusick } 702*45749Smckusick spc->spc_flags = SPC_FREE; 703*45749Smckusick vm_pager_unmap_page(spc->spc_kva); 704*45749Smckusick queue_enter(&swap_pager_free, spc, swp_clean_t, spc_list); 705*45749Smckusick #ifdef DEBUG 706*45749Smckusick if (swpagerdebug & SDB_WRITE) 707*45749Smckusick printf("swpg_clean: free spc %x\n", spc); 708*45749Smckusick #endif 709*45749Smckusick } 710*45749Smckusick /* 711*45749Smckusick * If we found that the desired page is already being cleaned 712*45749Smckusick * mark it so that swap_pager_iodone() will not set the clean 713*45749Smckusick * flag before the pageout daemon has another chance to clean it. 714*45749Smckusick */ 715*45749Smckusick if (tspc && rw == B_WRITE) { 716*45749Smckusick #ifdef DEBUG 717*45749Smckusick if (swpagerdebug & SDB_ANOM) 718*45749Smckusick printf("swpg_clean: %x on clean list\n", tspc); 719*45749Smckusick #endif 720*45749Smckusick tspc->spc_flags |= SPC_DIRTY; 721*45749Smckusick } 722*45749Smckusick splx(s); 723*45749Smckusick 724*45749Smckusick #ifdef DEBUG 725*45749Smckusick if (swpagerdebug & SDB_WRITE) 726*45749Smckusick printf("swpg_clean: return %d\n", tspc ? TRUE : FALSE); 727*45749Smckusick if ((swpagerdebug & SDB_ANOM) && tspc) 728*45749Smckusick printf("swpg_clean: %s of cleaning page %x\n", 729*45749Smckusick rw == B_READ ? "get" : "put", m); 730*45749Smckusick #endif 731*45749Smckusick return(tspc ? TRUE : FALSE); 732*45749Smckusick } 733*45749Smckusick 734*45749Smckusick swap_pager_finish(spc) 735*45749Smckusick register swp_clean_t spc; 736*45749Smckusick { 737*45749Smckusick vm_object_t object = spc->spc_m->object; 738*45749Smckusick 739*45749Smckusick /* 740*45749Smckusick * Mark the paging operation as done. 741*45749Smckusick * (XXX) If we cannot get the lock, leave it til later. 742*45749Smckusick * (XXX) Also we are assuming that an async write is a 743*45749Smckusick * pageout operation that has incremented the counter. 744*45749Smckusick */ 745*45749Smckusick if (!vm_object_lock_try(object)) 746*45749Smckusick return(0); 747*45749Smckusick 748*45749Smckusick #ifdef DEBUG 749*45749Smckusick spc->spc_m->pagerowned = 0; 750*45749Smckusick #endif 751*45749Smckusick 752*45749Smckusick if (--object->paging_in_progress == 0) 753*45749Smckusick thread_wakeup((int) object); 754*45749Smckusick 755*45749Smckusick /* 756*45749Smckusick * XXX: this isn't even close to the right thing to do, 757*45749Smckusick * introduces a variety of race conditions. 758*45749Smckusick * 759*45749Smckusick * If dirty, vm_pageout() has attempted to clean the page 760*45749Smckusick * again. In this case we do not do anything as we will 761*45749Smckusick * see the page again shortly. Otherwise, if no error mark 762*45749Smckusick * as clean and inform the pmap system. If error, mark as 763*45749Smckusick * dirty so we will try again (XXX: could get stuck doing 764*45749Smckusick * this, should give up after awhile). 765*45749Smckusick */ 766*45749Smckusick if ((spc->spc_flags & SPC_DIRTY) == 0) { 767*45749Smckusick if (spc->spc_flags & SPC_ERROR) { 768*45749Smckusick printf("swap_pager: clean of %x failed\n", 769*45749Smckusick VM_PAGE_TO_PHYS(spc->spc_m)); 770*45749Smckusick spc->spc_m->laundry = TRUE; 771*45749Smckusick } else { 772*45749Smckusick spc->spc_m->clean = TRUE; 773*45749Smckusick pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m)); 774*45749Smckusick } 775*45749Smckusick } 776*45749Smckusick /* 777*45749Smckusick * XXX: allow blocked write faults to continue 778*45749Smckusick */ 779*45749Smckusick spc->spc_m->page_lock = spc->spc_m->unlock_request = VM_PROT_NONE; 780*45749Smckusick PAGE_WAKEUP(spc->spc_m); 781*45749Smckusick 782*45749Smckusick vm_object_unlock(object); 783*45749Smckusick return(1); 784*45749Smckusick } 785*45749Smckusick 786*45749Smckusick swap_pager_iodone(bp) 787*45749Smckusick register struct buf *bp; 788*45749Smckusick { 789*45749Smckusick register swp_clean_t spc; 790*45749Smckusick daddr_t blk; 791*45749Smckusick int s; 792*45749Smckusick 793*45749Smckusick #ifdef DEBUG 794*45749Smckusick /* save panic time state */ 795*45749Smckusick if ((swpagerdebug & SDB_ANOMPANIC) && panicstr) 796*45749Smckusick return; 797*45749Smckusick if (swpagerdebug & SDB_FOLLOW) 798*45749Smckusick printf("swpg_iodone(%x)\n", bp); 799*45749Smckusick #endif 800*45749Smckusick s = splbio(); 801*45749Smckusick spc = (swp_clean_t) queue_first(&swap_pager_inuse); 802*45749Smckusick while (!queue_end(&swap_pager_inuse, (queue_entry_t)spc)) { 803*45749Smckusick if (spc->spc_bp == bp) 804*45749Smckusick break; 805*45749Smckusick spc = (swp_clean_t) queue_next(&spc->spc_list); 806*45749Smckusick } 807*45749Smckusick #ifdef DEBUG 808*45749Smckusick if (queue_end(&swap_pager_inuse, (queue_entry_t)spc)) 809*45749Smckusick panic("swpg_iodone: bp not found"); 810*45749Smckusick #endif 811*45749Smckusick 812*45749Smckusick spc->spc_flags &= ~SPC_BUSY; 813*45749Smckusick spc->spc_flags |= SPC_DONE; 814*45749Smckusick if (bp->b_flags & B_ERROR) 815*45749Smckusick spc->spc_flags |= SPC_ERROR; 816*45749Smckusick spc->spc_bp = NULL; 817*45749Smckusick blk = bp->b_blkno; 818*45749Smckusick 819*45749Smckusick #ifdef DEBUG 820*45749Smckusick --swap_pager_poip; 821*45749Smckusick if (swpagerdebug & SDB_WRITE) 822*45749Smckusick printf("swpg_iodone: bp=%x swp=%x flags=%x spc=%x poip=%x\n", 823*45749Smckusick bp, spc->spc_swp, spc->spc_swp->sw_flags, 824*45749Smckusick spc, spc->spc_swp->sw_poip); 825*45749Smckusick #endif 826*45749Smckusick 827*45749Smckusick spc->spc_swp->sw_poip--; 828*45749Smckusick if (spc->spc_swp->sw_flags & SW_WANTED) { 829*45749Smckusick spc->spc_swp->sw_flags &= ~SW_WANTED; 830*45749Smckusick thread_wakeup((int)spc->spc_swp); 831*45749Smckusick } 832*45749Smckusick 833*45749Smckusick bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY); 834*45749Smckusick bp->av_forw = bswlist.av_forw; 835*45749Smckusick bswlist.av_forw = bp; 836*45749Smckusick if (bp->b_vp) 837*45749Smckusick brelvp(bp); 838*45749Smckusick if (bswlist.b_flags & B_WANTED) { 839*45749Smckusick bswlist.b_flags &= ~B_WANTED; 840*45749Smckusick thread_wakeup((int)&bswlist); 841*45749Smckusick } 842*45749Smckusick #if 0 843*45749Smckusick /* 844*45749Smckusick * XXX: this isn't even close to the right thing to do, 845*45749Smckusick * introduces a variety of race conditions. 846*45749Smckusick * 847*45749Smckusick * If dirty, vm_pageout() has attempted to clean the page 848*45749Smckusick * again. In this case we do not do anything as we will 849*45749Smckusick * see the page again shortly. Otherwise, if no error mark 850*45749Smckusick * as clean and inform the pmap system. If error, mark as 851*45749Smckusick * dirty so we will try again (XXX: could get stuck doing 852*45749Smckusick * this, should give up after awhile). 853*45749Smckusick */ 854*45749Smckusick if ((spc->spc_flags & SPC_DIRTY) == 0) { 855*45749Smckusick if (spc->spc_flags & SPC_ERROR) { 856*45749Smckusick printf("swap_pager: clean of %x (block %x) failed\n", 857*45749Smckusick VM_PAGE_TO_PHYS(spc->spc_m), blk); 858*45749Smckusick spc->spc_m->laundry = TRUE; 859*45749Smckusick } else { 860*45749Smckusick spc->spc_m->clean = TRUE; 861*45749Smckusick pmap_clear_modify(VM_PAGE_TO_PHYS(spc->spc_m)); 862*45749Smckusick } 863*45749Smckusick } 864*45749Smckusick /* 865*45749Smckusick * XXX: allow blocked write faults to continue 866*45749Smckusick */ 867*45749Smckusick spc->spc_m->page_lock = spc->spc_m->unlock_request = VM_PROT_NONE; 868*45749Smckusick PAGE_WAKEUP(spc->spc_m); 869*45749Smckusick #endif 870*45749Smckusick 871*45749Smckusick thread_wakeup((int) &vm_pages_needed); 872*45749Smckusick splx(s); 873*45749Smckusick } 874*45749Smckusick #endif 875