1*45748Smckusick /* 2*45748Smckusick * Copyright (c) 1985, Avadis Tevanian, Jr., Michael Wayne Young 3*45748Smckusick * Copyright (c) 1987 Carnegie-Mellon University 4*45748Smckusick * Copyright (c) 1991 Regents of the University of California. 5*45748Smckusick * All rights reserved. 6*45748Smckusick * 7*45748Smckusick * This code is derived from software contributed to Berkeley by 8*45748Smckusick * The Mach Operating System project at Carnegie-Mellon University. 9*45748Smckusick * 10*45748Smckusick * The CMU software License Agreement specifies the terms and conditions 11*45748Smckusick * for use and redistribution. 12*45748Smckusick * 13*45748Smckusick * @(#)vm_fault.c 7.1 (Berkeley) 12/05/90 14*45748Smckusick */ 15*45748Smckusick 16*45748Smckusick /* 17*45748Smckusick * Page fault handling module. 18*45748Smckusick */ 19*45748Smckusick 20*45748Smckusick #include "param.h" 21*45748Smckusick #include "../vm/vm_param.h" 22*45748Smckusick #include "../vm/vm_map.h" 23*45748Smckusick #include "../vm/vm_object.h" 24*45748Smckusick #include "../vm/vm_page.h" 25*45748Smckusick #include "../vm/pmap.h" 26*45748Smckusick #include "../vm/vm_statistics.h" 27*45748Smckusick #include "../vm/vm_pageout.h" 28*45748Smckusick 29*45748Smckusick /* 30*45748Smckusick * vm_fault: 31*45748Smckusick * 32*45748Smckusick * Handle a page fault occuring at the given address, 33*45748Smckusick * requiring the given permissions, in the map specified. 34*45748Smckusick * If successful, the page is inserted into the 35*45748Smckusick * associated physical map. 36*45748Smckusick * 37*45748Smckusick * NOTE: the given address should be truncated to the 38*45748Smckusick * proper page address. 39*45748Smckusick * 40*45748Smckusick * KERN_SUCCESS is returned if the page fault is handled; otherwise, 41*45748Smckusick * a standard error specifying why the fault is fatal is returned. 42*45748Smckusick * 43*45748Smckusick * 44*45748Smckusick * The map in question must be referenced, and remains so. 45*45748Smckusick * Caller may hold no locks. 46*45748Smckusick */ 47*45748Smckusick vm_fault(map, vaddr, fault_type, change_wiring) 48*45748Smckusick vm_map_t map; 49*45748Smckusick vm_offset_t vaddr; 50*45748Smckusick vm_prot_t fault_type; 51*45748Smckusick boolean_t change_wiring; 52*45748Smckusick { 53*45748Smckusick vm_object_t first_object; 54*45748Smckusick vm_offset_t first_offset; 55*45748Smckusick vm_map_entry_t entry; 56*45748Smckusick register vm_object_t object; 57*45748Smckusick register vm_offset_t offset; 58*45748Smckusick register vm_page_t m; 59*45748Smckusick vm_page_t first_m; 60*45748Smckusick vm_prot_t prot; 61*45748Smckusick int result; 62*45748Smckusick boolean_t wired; 63*45748Smckusick boolean_t su; 64*45748Smckusick boolean_t lookup_still_valid; 65*45748Smckusick boolean_t page_exists; 66*45748Smckusick vm_page_t old_m; 67*45748Smckusick vm_object_t next_object; 68*45748Smckusick 69*45748Smckusick vm_stat.faults++; /* needs lock XXX */ 70*45748Smckusick /* 71*45748Smckusick * Recovery actions 72*45748Smckusick */ 73*45748Smckusick #define FREE_PAGE(m) { \ 74*45748Smckusick PAGE_WAKEUP(m); \ 75*45748Smckusick vm_page_lock_queues(); \ 76*45748Smckusick vm_page_free(m); \ 77*45748Smckusick vm_page_unlock_queues(); \ 78*45748Smckusick } 79*45748Smckusick 80*45748Smckusick #define RELEASE_PAGE(m) { \ 81*45748Smckusick PAGE_WAKEUP(m); \ 82*45748Smckusick vm_page_lock_queues(); \ 83*45748Smckusick vm_page_activate(m); \ 84*45748Smckusick vm_page_unlock_queues(); \ 85*45748Smckusick } 86*45748Smckusick 87*45748Smckusick #define UNLOCK_MAP { \ 88*45748Smckusick if (lookup_still_valid) { \ 89*45748Smckusick vm_map_lookup_done(map, entry); \ 90*45748Smckusick lookup_still_valid = FALSE; \ 91*45748Smckusick } \ 92*45748Smckusick } 93*45748Smckusick 94*45748Smckusick #define UNLOCK_THINGS { \ 95*45748Smckusick object->paging_in_progress--; \ 96*45748Smckusick vm_object_unlock(object); \ 97*45748Smckusick if (object != first_object) { \ 98*45748Smckusick vm_object_lock(first_object); \ 99*45748Smckusick FREE_PAGE(first_m); \ 100*45748Smckusick first_object->paging_in_progress--; \ 101*45748Smckusick vm_object_unlock(first_object); \ 102*45748Smckusick } \ 103*45748Smckusick UNLOCK_MAP; \ 104*45748Smckusick } 105*45748Smckusick 106*45748Smckusick #define UNLOCK_AND_DEALLOCATE { \ 107*45748Smckusick UNLOCK_THINGS; \ 108*45748Smckusick vm_object_deallocate(first_object); \ 109*45748Smckusick } 110*45748Smckusick 111*45748Smckusick RetryFault: ; 112*45748Smckusick 113*45748Smckusick /* 114*45748Smckusick * Find the backing store object and offset into 115*45748Smckusick * it to begin the search. 116*45748Smckusick */ 117*45748Smckusick 118*45748Smckusick if ((result = vm_map_lookup(&map, vaddr, fault_type, &entry, 119*45748Smckusick &first_object, &first_offset, 120*45748Smckusick &prot, &wired, &su)) != KERN_SUCCESS) { 121*45748Smckusick return(result); 122*45748Smckusick } 123*45748Smckusick lookup_still_valid = TRUE; 124*45748Smckusick 125*45748Smckusick if (wired) 126*45748Smckusick fault_type = prot; 127*45748Smckusick 128*45748Smckusick first_m = VM_PAGE_NULL; 129*45748Smckusick 130*45748Smckusick /* 131*45748Smckusick * Make a reference to this object to 132*45748Smckusick * prevent its disposal while we are messing with 133*45748Smckusick * it. Once we have the reference, the map is free 134*45748Smckusick * to be diddled. Since objects reference their 135*45748Smckusick * shadows (and copies), they will stay around as well. 136*45748Smckusick */ 137*45748Smckusick 138*45748Smckusick vm_object_lock(first_object); 139*45748Smckusick 140*45748Smckusick first_object->ref_count++; 141*45748Smckusick first_object->paging_in_progress++; 142*45748Smckusick 143*45748Smckusick /* 144*45748Smckusick * INVARIANTS (through entire routine): 145*45748Smckusick * 146*45748Smckusick * 1) At all times, we must either have the object 147*45748Smckusick * lock or a busy page in some object to prevent 148*45748Smckusick * some other thread from trying to bring in 149*45748Smckusick * the same page. 150*45748Smckusick * 151*45748Smckusick * Note that we cannot hold any locks during the 152*45748Smckusick * pager access or when waiting for memory, so 153*45748Smckusick * we use a busy page then. 154*45748Smckusick * 155*45748Smckusick * Note also that we aren't as concerned about 156*45748Smckusick * more than one thead attempting to pager_data_unlock 157*45748Smckusick * the same page at once, so we don't hold the page 158*45748Smckusick * as busy then, but do record the highest unlock 159*45748Smckusick * value so far. [Unlock requests may also be delivered 160*45748Smckusick * out of order.] 161*45748Smckusick * 162*45748Smckusick * 2) Once we have a busy page, we must remove it from 163*45748Smckusick * the pageout queues, so that the pageout daemon 164*45748Smckusick * will not grab it away. 165*45748Smckusick * 166*45748Smckusick * 3) To prevent another thread from racing us down the 167*45748Smckusick * shadow chain and entering a new page in the top 168*45748Smckusick * object before we do, we must keep a busy page in 169*45748Smckusick * the top object while following the shadow chain. 170*45748Smckusick * 171*45748Smckusick * 4) We must increment paging_in_progress on any object 172*45748Smckusick * for which we have a busy page, to prevent 173*45748Smckusick * vm_object_collapse from removing the busy page 174*45748Smckusick * without our noticing. 175*45748Smckusick */ 176*45748Smckusick 177*45748Smckusick /* 178*45748Smckusick * Search for the page at object/offset. 179*45748Smckusick */ 180*45748Smckusick 181*45748Smckusick object = first_object; 182*45748Smckusick offset = first_offset; 183*45748Smckusick 184*45748Smckusick /* 185*45748Smckusick * See whether this page is resident 186*45748Smckusick */ 187*45748Smckusick 188*45748Smckusick while (TRUE) { 189*45748Smckusick m = vm_page_lookup(object, offset); 190*45748Smckusick if (m != VM_PAGE_NULL) { 191*45748Smckusick /* 192*45748Smckusick * If the page is being brought in, 193*45748Smckusick * wait for it and then retry. 194*45748Smckusick */ 195*45748Smckusick if (m->busy) { 196*45748Smckusick #ifdef DOTHREADS 197*45748Smckusick int wait_result; 198*45748Smckusick 199*45748Smckusick PAGE_ASSERT_WAIT(m, !change_wiring); 200*45748Smckusick UNLOCK_THINGS; 201*45748Smckusick thread_block(); 202*45748Smckusick wait_result = current_thread()->wait_result; 203*45748Smckusick vm_object_deallocate(first_object); 204*45748Smckusick if (wait_result != THREAD_AWAKENED) 205*45748Smckusick return(KERN_SUCCESS); 206*45748Smckusick goto RetryFault; 207*45748Smckusick #else 208*45748Smckusick PAGE_ASSERT_WAIT(m, !change_wiring); 209*45748Smckusick UNLOCK_THINGS; 210*45748Smckusick thread_block(); 211*45748Smckusick vm_object_deallocate(first_object); 212*45748Smckusick goto RetryFault; 213*45748Smckusick #endif 214*45748Smckusick } 215*45748Smckusick 216*45748Smckusick if (m->absent) 217*45748Smckusick panic("vm_fault: absent"); 218*45748Smckusick 219*45748Smckusick /* 220*45748Smckusick * If the desired access to this page has 221*45748Smckusick * been locked out, request that it be unlocked. 222*45748Smckusick */ 223*45748Smckusick 224*45748Smckusick if (fault_type & m->page_lock) { 225*45748Smckusick #ifdef DOTHREADS 226*45748Smckusick int wait_result; 227*45748Smckusick 228*45748Smckusick if ((fault_type & m->unlock_request) != fault_type) 229*45748Smckusick panic("vm_fault: pager_data_unlock"); 230*45748Smckusick 231*45748Smckusick PAGE_ASSERT_WAIT(m, !change_wiring); 232*45748Smckusick UNLOCK_THINGS; 233*45748Smckusick thread_block(); 234*45748Smckusick wait_result = current_thread()->wait_result; 235*45748Smckusick vm_object_deallocate(first_object); 236*45748Smckusick if (wait_result != THREAD_AWAKENED) 237*45748Smckusick return(KERN_SUCCESS); 238*45748Smckusick goto RetryFault; 239*45748Smckusick #else 240*45748Smckusick if ((fault_type & m->unlock_request) != fault_type) 241*45748Smckusick panic("vm_fault: pager_data_unlock"); 242*45748Smckusick 243*45748Smckusick PAGE_ASSERT_WAIT(m, !change_wiring); 244*45748Smckusick UNLOCK_THINGS; 245*45748Smckusick thread_block(); 246*45748Smckusick vm_object_deallocate(first_object); 247*45748Smckusick goto RetryFault; 248*45748Smckusick #endif 249*45748Smckusick } 250*45748Smckusick 251*45748Smckusick /* 252*45748Smckusick * Remove the page from the pageout daemon's 253*45748Smckusick * reach while we play with it. 254*45748Smckusick */ 255*45748Smckusick 256*45748Smckusick vm_page_lock_queues(); 257*45748Smckusick if (m->inactive) { 258*45748Smckusick queue_remove(&vm_page_queue_inactive, m, 259*45748Smckusick vm_page_t, pageq); 260*45748Smckusick m->inactive = FALSE; 261*45748Smckusick vm_page_inactive_count--; 262*45748Smckusick vm_stat.reactivations++; 263*45748Smckusick } 264*45748Smckusick 265*45748Smckusick if (m->active) { 266*45748Smckusick queue_remove(&vm_page_queue_active, m, 267*45748Smckusick vm_page_t, pageq); 268*45748Smckusick m->active = FALSE; 269*45748Smckusick vm_page_active_count--; 270*45748Smckusick } 271*45748Smckusick vm_page_unlock_queues(); 272*45748Smckusick 273*45748Smckusick /* 274*45748Smckusick * Mark page busy for other threads. 275*45748Smckusick */ 276*45748Smckusick m->busy = TRUE; 277*45748Smckusick m->absent = FALSE; 278*45748Smckusick break; 279*45748Smckusick } 280*45748Smckusick 281*45748Smckusick if (((object->pager != vm_pager_null) && 282*45748Smckusick (!change_wiring || wired)) 283*45748Smckusick || (object == first_object)) { 284*45748Smckusick 285*45748Smckusick /* 286*45748Smckusick * Allocate a new page for this object/offset 287*45748Smckusick * pair. 288*45748Smckusick */ 289*45748Smckusick 290*45748Smckusick m = vm_page_alloc(object, offset); 291*45748Smckusick 292*45748Smckusick if (m == VM_PAGE_NULL) { 293*45748Smckusick UNLOCK_AND_DEALLOCATE; 294*45748Smckusick VM_WAIT; 295*45748Smckusick goto RetryFault; 296*45748Smckusick } 297*45748Smckusick } 298*45748Smckusick 299*45748Smckusick if ((object->pager != vm_pager_null) && 300*45748Smckusick (!change_wiring || wired)) { 301*45748Smckusick int rv; 302*45748Smckusick 303*45748Smckusick /* 304*45748Smckusick * Now that we have a busy page, we can 305*45748Smckusick * release the object lock. 306*45748Smckusick */ 307*45748Smckusick vm_object_unlock(object); 308*45748Smckusick 309*45748Smckusick /* 310*45748Smckusick * Call the pager to retrieve the data, if any, 311*45748Smckusick * after releasing the lock on the map. 312*45748Smckusick */ 313*45748Smckusick UNLOCK_MAP; 314*45748Smckusick 315*45748Smckusick rv = vm_pager_get(object->pager, m, TRUE); 316*45748Smckusick if (rv == VM_PAGER_OK) { 317*45748Smckusick /* 318*45748Smckusick * Found the page. 319*45748Smckusick * Leave it busy while we play with it. 320*45748Smckusick */ 321*45748Smckusick vm_object_lock(object); 322*45748Smckusick 323*45748Smckusick /* 324*45748Smckusick * Relookup in case pager changed page. 325*45748Smckusick * Pager is responsible for disposition 326*45748Smckusick * of old page if moved. 327*45748Smckusick */ 328*45748Smckusick m = vm_page_lookup(object, offset); 329*45748Smckusick 330*45748Smckusick vm_stat.pageins++; 331*45748Smckusick m->fake = FALSE; 332*45748Smckusick pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 333*45748Smckusick break; 334*45748Smckusick } 335*45748Smckusick 336*45748Smckusick /* 337*45748Smckusick * Remove the bogus page (which does not 338*45748Smckusick * exist at this object/offset); before 339*45748Smckusick * doing so, we must get back our object 340*45748Smckusick * lock to preserve our invariant. 341*45748Smckusick * 342*45748Smckusick * Also wake up any other thread that may want 343*45748Smckusick * to bring in this page. 344*45748Smckusick * 345*45748Smckusick * If this is the top-level object, we must 346*45748Smckusick * leave the busy page to prevent another 347*45748Smckusick * thread from rushing past us, and inserting 348*45748Smckusick * the page in that object at the same time 349*45748Smckusick * that we are. 350*45748Smckusick */ 351*45748Smckusick 352*45748Smckusick vm_object_lock(object); 353*45748Smckusick /* 354*45748Smckusick * Data outside the range of the pager; an error 355*45748Smckusick */ 356*45748Smckusick if (rv == VM_PAGER_BAD) { 357*45748Smckusick FREE_PAGE(m); 358*45748Smckusick UNLOCK_AND_DEALLOCATE; 359*45748Smckusick return(KERN_PROTECTION_FAILURE); /* XXX */ 360*45748Smckusick } 361*45748Smckusick if (object != first_object) { 362*45748Smckusick FREE_PAGE(m); 363*45748Smckusick /* 364*45748Smckusick * XXX - we cannot just fall out at this 365*45748Smckusick * point, m has been freed and is invalid! 366*45748Smckusick */ 367*45748Smckusick } 368*45748Smckusick } 369*45748Smckusick 370*45748Smckusick /* 371*45748Smckusick * We get here if the object has no pager (or unwiring) 372*45748Smckusick * or the pager doesn't have the page. 373*45748Smckusick */ 374*45748Smckusick if (object == first_object) 375*45748Smckusick first_m = m; 376*45748Smckusick 377*45748Smckusick /* 378*45748Smckusick * Move on to the next object. Lock the next 379*45748Smckusick * object before unlocking the current one. 380*45748Smckusick */ 381*45748Smckusick 382*45748Smckusick offset += object->shadow_offset; 383*45748Smckusick next_object = object->shadow; 384*45748Smckusick if (next_object == VM_OBJECT_NULL) { 385*45748Smckusick /* 386*45748Smckusick * If there's no object left, fill the page 387*45748Smckusick * in the top object with zeros. 388*45748Smckusick */ 389*45748Smckusick if (object != first_object) { 390*45748Smckusick object->paging_in_progress--; 391*45748Smckusick vm_object_unlock(object); 392*45748Smckusick 393*45748Smckusick object = first_object; 394*45748Smckusick offset = first_offset; 395*45748Smckusick m = first_m; 396*45748Smckusick vm_object_lock(object); 397*45748Smckusick } 398*45748Smckusick first_m = VM_PAGE_NULL; 399*45748Smckusick 400*45748Smckusick vm_page_zero_fill(m); 401*45748Smckusick vm_stat.zero_fill_count++; 402*45748Smckusick m->fake = FALSE; 403*45748Smckusick m->absent = FALSE; 404*45748Smckusick break; 405*45748Smckusick } 406*45748Smckusick else { 407*45748Smckusick vm_object_lock(next_object); 408*45748Smckusick if (object != first_object) 409*45748Smckusick object->paging_in_progress--; 410*45748Smckusick vm_object_unlock(object); 411*45748Smckusick object = next_object; 412*45748Smckusick object->paging_in_progress++; 413*45748Smckusick } 414*45748Smckusick } 415*45748Smckusick 416*45748Smckusick if (m->absent || m->active || m->inactive || !m->busy) 417*45748Smckusick panic("vm_fault: absent or active or inactive or not busy after main loop"); 418*45748Smckusick 419*45748Smckusick /* 420*45748Smckusick * PAGE HAS BEEN FOUND. 421*45748Smckusick * [Loop invariant still holds -- the object lock 422*45748Smckusick * is held.] 423*45748Smckusick */ 424*45748Smckusick 425*45748Smckusick old_m = m; /* save page that would be copied */ 426*45748Smckusick 427*45748Smckusick /* 428*45748Smckusick * If the page is being written, but isn't 429*45748Smckusick * already owned by the top-level object, 430*45748Smckusick * we have to copy it into a new page owned 431*45748Smckusick * by the top-level object. 432*45748Smckusick */ 433*45748Smckusick 434*45748Smckusick if (object != first_object) { 435*45748Smckusick /* 436*45748Smckusick * We only really need to copy if we 437*45748Smckusick * want to write it. 438*45748Smckusick */ 439*45748Smckusick 440*45748Smckusick if (fault_type & VM_PROT_WRITE) { 441*45748Smckusick 442*45748Smckusick /* 443*45748Smckusick * If we try to collapse first_object at this 444*45748Smckusick * point, we may deadlock when we try to get 445*45748Smckusick * the lock on an intermediate object (since we 446*45748Smckusick * have the bottom object locked). We can't 447*45748Smckusick * unlock the bottom object, because the page 448*45748Smckusick * we found may move (by collapse) if we do. 449*45748Smckusick * 450*45748Smckusick * Instead, we first copy the page. Then, when 451*45748Smckusick * we have no more use for the bottom object, 452*45748Smckusick * we unlock it and try to collapse. 453*45748Smckusick * 454*45748Smckusick * Note that we copy the page even if we didn't 455*45748Smckusick * need to... that's the breaks. 456*45748Smckusick */ 457*45748Smckusick 458*45748Smckusick /* 459*45748Smckusick * We already have an empty page in 460*45748Smckusick * first_object - use it. 461*45748Smckusick */ 462*45748Smckusick 463*45748Smckusick vm_page_copy(m, first_m); 464*45748Smckusick first_m->fake = FALSE; 465*45748Smckusick first_m->absent = FALSE; 466*45748Smckusick 467*45748Smckusick /* 468*45748Smckusick * If another map is truly sharing this 469*45748Smckusick * page with us, we have to flush all 470*45748Smckusick * uses of the original page, since we 471*45748Smckusick * can't distinguish those which want the 472*45748Smckusick * original from those which need the 473*45748Smckusick * new copy. 474*45748Smckusick */ 475*45748Smckusick 476*45748Smckusick vm_page_lock_queues(); 477*45748Smckusick if (!su) { 478*45748Smckusick /* 479*45748Smckusick * Also, once it's no longer in 480*45748Smckusick * use by any maps, move it to 481*45748Smckusick * the inactive queue instead. 482*45748Smckusick */ 483*45748Smckusick 484*45748Smckusick vm_page_deactivate(m); 485*45748Smckusick pmap_remove_all(VM_PAGE_TO_PHYS(m)); 486*45748Smckusick } 487*45748Smckusick else { 488*45748Smckusick /* 489*45748Smckusick * Old page is only (possibly) 490*45748Smckusick * in use by faulting map. We 491*45748Smckusick * should do a pmap_remove on 492*45748Smckusick * that mapping, but we know 493*45748Smckusick * that pmap_enter will remove 494*45748Smckusick * the old mapping before 495*45748Smckusick * inserting the new one. 496*45748Smckusick */ 497*45748Smckusick vm_page_activate(m); 498*45748Smckusick } 499*45748Smckusick vm_page_unlock_queues(); 500*45748Smckusick 501*45748Smckusick /* 502*45748Smckusick * We no longer need the old page or object. 503*45748Smckusick */ 504*45748Smckusick PAGE_WAKEUP(m); 505*45748Smckusick object->paging_in_progress--; 506*45748Smckusick vm_object_unlock(object); 507*45748Smckusick 508*45748Smckusick /* 509*45748Smckusick * Only use the new page below... 510*45748Smckusick */ 511*45748Smckusick 512*45748Smckusick vm_stat.cow_faults++; 513*45748Smckusick m = first_m; 514*45748Smckusick object = first_object; 515*45748Smckusick offset = first_offset; 516*45748Smckusick 517*45748Smckusick /* 518*45748Smckusick * Now that we've gotten the copy out of the 519*45748Smckusick * way, let's try to collapse the top object. 520*45748Smckusick */ 521*45748Smckusick vm_object_lock(object); 522*45748Smckusick /* 523*45748Smckusick * But we have to play ugly games with 524*45748Smckusick * paging_in_progress to do that... 525*45748Smckusick */ 526*45748Smckusick object->paging_in_progress--; 527*45748Smckusick vm_object_collapse(object); 528*45748Smckusick object->paging_in_progress++; 529*45748Smckusick } 530*45748Smckusick else { 531*45748Smckusick prot &= (~VM_PROT_WRITE); 532*45748Smckusick m->copy_on_write = TRUE; 533*45748Smckusick } 534*45748Smckusick } 535*45748Smckusick 536*45748Smckusick if (m->active || m->inactive) 537*45748Smckusick panic("vm_fault: active or inactive before copy object handling"); 538*45748Smckusick 539*45748Smckusick /* 540*45748Smckusick * If the page is being written, but hasn't been 541*45748Smckusick * copied to the copy-object, we have to copy it there. 542*45748Smckusick */ 543*45748Smckusick RetryCopy: 544*45748Smckusick if (first_object->copy != VM_OBJECT_NULL) { 545*45748Smckusick vm_object_t copy_object = first_object->copy; 546*45748Smckusick vm_offset_t copy_offset; 547*45748Smckusick vm_page_t copy_m; 548*45748Smckusick 549*45748Smckusick /* 550*45748Smckusick * We only need to copy if we want to write it. 551*45748Smckusick */ 552*45748Smckusick if ((fault_type & VM_PROT_WRITE) == 0) { 553*45748Smckusick prot &= ~VM_PROT_WRITE; 554*45748Smckusick m->copy_on_write = TRUE; 555*45748Smckusick } 556*45748Smckusick else { 557*45748Smckusick /* 558*45748Smckusick * Try to get the lock on the copy_object. 559*45748Smckusick */ 560*45748Smckusick if (!vm_object_lock_try(copy_object)) { 561*45748Smckusick vm_object_unlock(object); 562*45748Smckusick /* should spin a bit here... */ 563*45748Smckusick vm_object_lock(object); 564*45748Smckusick goto RetryCopy; 565*45748Smckusick } 566*45748Smckusick 567*45748Smckusick /* 568*45748Smckusick * Make another reference to the copy-object, 569*45748Smckusick * to keep it from disappearing during the 570*45748Smckusick * copy. 571*45748Smckusick */ 572*45748Smckusick copy_object->ref_count++; 573*45748Smckusick 574*45748Smckusick /* 575*45748Smckusick * Does the page exist in the copy? 576*45748Smckusick */ 577*45748Smckusick copy_offset = first_offset 578*45748Smckusick - copy_object->shadow_offset; 579*45748Smckusick copy_m = vm_page_lookup(copy_object, copy_offset); 580*45748Smckusick if (page_exists = (copy_m != VM_PAGE_NULL)) { 581*45748Smckusick if (copy_m->busy) { 582*45748Smckusick #ifdef DOTHREADS 583*45748Smckusick int wait_result; 584*45748Smckusick 585*45748Smckusick /* 586*45748Smckusick * If the page is being brought 587*45748Smckusick * in, wait for it and then retry. 588*45748Smckusick */ 589*45748Smckusick PAGE_ASSERT_WAIT(copy_m, !change_wiring); 590*45748Smckusick RELEASE_PAGE(m); 591*45748Smckusick copy_object->ref_count--; 592*45748Smckusick vm_object_unlock(copy_object); 593*45748Smckusick UNLOCK_THINGS; 594*45748Smckusick thread_block(); 595*45748Smckusick wait_result = current_thread()->wait_result; 596*45748Smckusick vm_object_deallocate(first_object); 597*45748Smckusick if (wait_result != THREAD_AWAKENED) 598*45748Smckusick return(KERN_SUCCESS); 599*45748Smckusick goto RetryFault; 600*45748Smckusick #else 601*45748Smckusick /* 602*45748Smckusick * If the page is being brought 603*45748Smckusick * in, wait for it and then retry. 604*45748Smckusick */ 605*45748Smckusick PAGE_ASSERT_WAIT(copy_m, !change_wiring); 606*45748Smckusick RELEASE_PAGE(m); 607*45748Smckusick copy_object->ref_count--; 608*45748Smckusick vm_object_unlock(copy_object); 609*45748Smckusick UNLOCK_THINGS; 610*45748Smckusick thread_block(); 611*45748Smckusick vm_object_deallocate(first_object); 612*45748Smckusick goto RetryFault; 613*45748Smckusick #endif 614*45748Smckusick } 615*45748Smckusick } 616*45748Smckusick 617*45748Smckusick /* 618*45748Smckusick * If the page is not in memory (in the object) 619*45748Smckusick * and the object has a pager, we have to check 620*45748Smckusick * if the pager has the data in secondary 621*45748Smckusick * storage. 622*45748Smckusick */ 623*45748Smckusick if (!page_exists) { 624*45748Smckusick 625*45748Smckusick /* 626*45748Smckusick * If we don't allocate a (blank) page 627*45748Smckusick * here... another thread could try 628*45748Smckusick * to page it in, allocate a page, and 629*45748Smckusick * then block on the busy page in its 630*45748Smckusick * shadow (first_object). Then we'd 631*45748Smckusick * trip over the busy page after we 632*45748Smckusick * found that the copy_object's pager 633*45748Smckusick * doesn't have the page... 634*45748Smckusick */ 635*45748Smckusick copy_m = vm_page_alloc(copy_object, 636*45748Smckusick copy_offset); 637*45748Smckusick if (copy_m == VM_PAGE_NULL) { 638*45748Smckusick /* 639*45748Smckusick * Wait for a page, then retry. 640*45748Smckusick */ 641*45748Smckusick RELEASE_PAGE(m); 642*45748Smckusick copy_object->ref_count--; 643*45748Smckusick vm_object_unlock(copy_object); 644*45748Smckusick UNLOCK_AND_DEALLOCATE; 645*45748Smckusick VM_WAIT; 646*45748Smckusick goto RetryFault; 647*45748Smckusick } 648*45748Smckusick 649*45748Smckusick if (copy_object->pager != vm_pager_null) { 650*45748Smckusick vm_object_unlock(object); 651*45748Smckusick vm_object_unlock(copy_object); 652*45748Smckusick UNLOCK_MAP; 653*45748Smckusick 654*45748Smckusick page_exists = vm_pager_has_page( 655*45748Smckusick copy_object->pager, 656*45748Smckusick (copy_offset + copy_object->paging_offset)); 657*45748Smckusick 658*45748Smckusick vm_object_lock(copy_object); 659*45748Smckusick 660*45748Smckusick /* 661*45748Smckusick * Since the map is unlocked, someone 662*45748Smckusick * else could have copied this object 663*45748Smckusick * and put a different copy_object 664*45748Smckusick * between the two. Or, the last 665*45748Smckusick * reference to the copy-object (other 666*45748Smckusick * than the one we have) may have 667*45748Smckusick * disappeared - if that has happened, 668*45748Smckusick * we don't need to make the copy. 669*45748Smckusick */ 670*45748Smckusick if (copy_object->shadow != object || 671*45748Smckusick copy_object->ref_count == 1) { 672*45748Smckusick /* 673*45748Smckusick * Gaah... start over! 674*45748Smckusick */ 675*45748Smckusick FREE_PAGE(copy_m); 676*45748Smckusick vm_object_unlock(copy_object); 677*45748Smckusick vm_object_deallocate(copy_object); 678*45748Smckusick /* may block */ 679*45748Smckusick vm_object_lock(object); 680*45748Smckusick goto RetryCopy; 681*45748Smckusick } 682*45748Smckusick vm_object_lock(object); 683*45748Smckusick 684*45748Smckusick if (page_exists) { 685*45748Smckusick /* 686*45748Smckusick * We didn't need the page 687*45748Smckusick */ 688*45748Smckusick FREE_PAGE(copy_m); 689*45748Smckusick } 690*45748Smckusick } 691*45748Smckusick } 692*45748Smckusick if (!page_exists) { 693*45748Smckusick /* 694*45748Smckusick * Must copy page into copy-object. 695*45748Smckusick */ 696*45748Smckusick vm_page_copy(m, copy_m); 697*45748Smckusick copy_m->fake = FALSE; 698*45748Smckusick copy_m->absent = FALSE; 699*45748Smckusick 700*45748Smckusick /* 701*45748Smckusick * Things to remember: 702*45748Smckusick * 1. The copied page must be marked 'dirty' 703*45748Smckusick * so it will be paged out to the copy 704*45748Smckusick * object. 705*45748Smckusick * 2. If the old page was in use by any users 706*45748Smckusick * of the copy-object, it must be removed 707*45748Smckusick * from all pmaps. (We can't know which 708*45748Smckusick * pmaps use it.) 709*45748Smckusick */ 710*45748Smckusick vm_page_lock_queues(); 711*45748Smckusick pmap_remove_all(VM_PAGE_TO_PHYS(old_m)); 712*45748Smckusick copy_m->clean = FALSE; 713*45748Smckusick vm_page_activate(copy_m); /* XXX */ 714*45748Smckusick vm_page_unlock_queues(); 715*45748Smckusick 716*45748Smckusick PAGE_WAKEUP(copy_m); 717*45748Smckusick } 718*45748Smckusick /* 719*45748Smckusick * The reference count on copy_object must be 720*45748Smckusick * at least 2: one for our extra reference, 721*45748Smckusick * and at least one from the outside world 722*45748Smckusick * (we checked that when we last locked 723*45748Smckusick * copy_object). 724*45748Smckusick */ 725*45748Smckusick copy_object->ref_count--; 726*45748Smckusick vm_object_unlock(copy_object); 727*45748Smckusick m->copy_on_write = FALSE; 728*45748Smckusick } 729*45748Smckusick } 730*45748Smckusick 731*45748Smckusick if (m->active || m->inactive) 732*45748Smckusick panic("vm_fault: active or inactive before retrying lookup"); 733*45748Smckusick 734*45748Smckusick /* 735*45748Smckusick * We must verify that the maps have not changed 736*45748Smckusick * since our last lookup. 737*45748Smckusick */ 738*45748Smckusick 739*45748Smckusick if (!lookup_still_valid) { 740*45748Smckusick vm_object_t retry_object; 741*45748Smckusick vm_offset_t retry_offset; 742*45748Smckusick vm_prot_t retry_prot; 743*45748Smckusick 744*45748Smckusick /* 745*45748Smckusick * Since map entries may be pageable, make sure we can 746*45748Smckusick * take a page fault on them. 747*45748Smckusick */ 748*45748Smckusick vm_object_unlock(object); 749*45748Smckusick 750*45748Smckusick /* 751*45748Smckusick * To avoid trying to write_lock the map while another 752*45748Smckusick * thread has it read_locked (in vm_map_pageable), we 753*45748Smckusick * do not try for write permission. If the page is 754*45748Smckusick * still writable, we will get write permission. If it 755*45748Smckusick * is not, or has been marked needs_copy, we enter the 756*45748Smckusick * mapping without write permission, and will merely 757*45748Smckusick * take another fault. 758*45748Smckusick */ 759*45748Smckusick result = vm_map_lookup(&map, vaddr, 760*45748Smckusick fault_type & ~VM_PROT_WRITE, &entry, 761*45748Smckusick &retry_object, &retry_offset, &retry_prot, 762*45748Smckusick &wired, &su); 763*45748Smckusick 764*45748Smckusick vm_object_lock(object); 765*45748Smckusick 766*45748Smckusick /* 767*45748Smckusick * If we don't need the page any longer, put it on the 768*45748Smckusick * active list (the easiest thing to do here). If no 769*45748Smckusick * one needs it, pageout will grab it eventually. 770*45748Smckusick */ 771*45748Smckusick 772*45748Smckusick if (result != KERN_SUCCESS) { 773*45748Smckusick RELEASE_PAGE(m); 774*45748Smckusick UNLOCK_AND_DEALLOCATE; 775*45748Smckusick return(result); 776*45748Smckusick } 777*45748Smckusick 778*45748Smckusick lookup_still_valid = TRUE; 779*45748Smckusick 780*45748Smckusick if ((retry_object != first_object) || 781*45748Smckusick (retry_offset != first_offset)) { 782*45748Smckusick RELEASE_PAGE(m); 783*45748Smckusick UNLOCK_AND_DEALLOCATE; 784*45748Smckusick goto RetryFault; 785*45748Smckusick } 786*45748Smckusick 787*45748Smckusick /* 788*45748Smckusick * Check whether the protection has changed or the object 789*45748Smckusick * has been copied while we left the map unlocked. 790*45748Smckusick * Changing from read to write permission is OK - we leave 791*45748Smckusick * the page write-protected, and catch the write fault. 792*45748Smckusick * Changing from write to read permission means that we 793*45748Smckusick * can't mark the page write-enabled after all. 794*45748Smckusick */ 795*45748Smckusick prot &= retry_prot; 796*45748Smckusick if (m->copy_on_write) 797*45748Smckusick prot &= ~VM_PROT_WRITE; 798*45748Smckusick } 799*45748Smckusick 800*45748Smckusick /* 801*45748Smckusick * (the various bits we're fiddling with here are locked by 802*45748Smckusick * the object's lock) 803*45748Smckusick */ 804*45748Smckusick 805*45748Smckusick /* XXX This distorts the meaning of the copy_on_write bit */ 806*45748Smckusick 807*45748Smckusick if (prot & VM_PROT_WRITE) 808*45748Smckusick m->copy_on_write = FALSE; 809*45748Smckusick 810*45748Smckusick /* 811*45748Smckusick * It's critically important that a wired-down page be faulted 812*45748Smckusick * only once in each map for which it is wired. 813*45748Smckusick */ 814*45748Smckusick 815*45748Smckusick if (m->active || m->inactive) 816*45748Smckusick panic("vm_fault: active or inactive before pmap_enter"); 817*45748Smckusick 818*45748Smckusick vm_object_unlock(object); 819*45748Smckusick 820*45748Smckusick /* 821*45748Smckusick * Put this page into the physical map. 822*45748Smckusick * We had to do the unlock above because pmap_enter 823*45748Smckusick * may cause other faults. We don't put the 824*45748Smckusick * page back on the active queue until later so 825*45748Smckusick * that the page-out daemon won't find us (yet). 826*45748Smckusick */ 827*45748Smckusick 828*45748Smckusick pmap_enter(map->pmap, vaddr, VM_PAGE_TO_PHYS(m), 829*45748Smckusick prot & ~(m->page_lock), wired); 830*45748Smckusick 831*45748Smckusick /* 832*45748Smckusick * If the page is not wired down, then put it where the 833*45748Smckusick * pageout daemon can find it. 834*45748Smckusick */ 835*45748Smckusick vm_object_lock(object); 836*45748Smckusick vm_page_lock_queues(); 837*45748Smckusick if (change_wiring) { 838*45748Smckusick if (wired) 839*45748Smckusick vm_page_wire(m); 840*45748Smckusick else 841*45748Smckusick vm_page_unwire(m); 842*45748Smckusick } 843*45748Smckusick else 844*45748Smckusick vm_page_activate(m); 845*45748Smckusick vm_page_unlock_queues(); 846*45748Smckusick 847*45748Smckusick /* 848*45748Smckusick * Unlock everything, and return 849*45748Smckusick */ 850*45748Smckusick 851*45748Smckusick PAGE_WAKEUP(m); 852*45748Smckusick UNLOCK_AND_DEALLOCATE; 853*45748Smckusick 854*45748Smckusick return(KERN_SUCCESS); 855*45748Smckusick 856*45748Smckusick } 857*45748Smckusick 858*45748Smckusick /* 859*45748Smckusick * vm_fault_wire: 860*45748Smckusick * 861*45748Smckusick * Wire down a range of virtual addresses in a map. 862*45748Smckusick */ 863*45748Smckusick void vm_fault_wire(map, start, end) 864*45748Smckusick vm_map_t map; 865*45748Smckusick vm_offset_t start, end; 866*45748Smckusick { 867*45748Smckusick 868*45748Smckusick register vm_offset_t va; 869*45748Smckusick register pmap_t pmap; 870*45748Smckusick 871*45748Smckusick pmap = vm_map_pmap(map); 872*45748Smckusick 873*45748Smckusick /* 874*45748Smckusick * Inform the physical mapping system that the 875*45748Smckusick * range of addresses may not fault, so that 876*45748Smckusick * page tables and such can be locked down as well. 877*45748Smckusick */ 878*45748Smckusick 879*45748Smckusick pmap_pageable(pmap, start, end, FALSE); 880*45748Smckusick 881*45748Smckusick /* 882*45748Smckusick * We simulate a fault to get the page and enter it 883*45748Smckusick * in the physical map. 884*45748Smckusick */ 885*45748Smckusick 886*45748Smckusick for (va = start; va < end; va += PAGE_SIZE) { 887*45748Smckusick (void) vm_fault(map, va, VM_PROT_NONE, TRUE); 888*45748Smckusick } 889*45748Smckusick } 890*45748Smckusick 891*45748Smckusick 892*45748Smckusick /* 893*45748Smckusick * vm_fault_unwire: 894*45748Smckusick * 895*45748Smckusick * Unwire a range of virtual addresses in a map. 896*45748Smckusick */ 897*45748Smckusick void vm_fault_unwire(map, start, end) 898*45748Smckusick vm_map_t map; 899*45748Smckusick vm_offset_t start, end; 900*45748Smckusick { 901*45748Smckusick 902*45748Smckusick register vm_offset_t va, pa; 903*45748Smckusick register pmap_t pmap; 904*45748Smckusick 905*45748Smckusick pmap = vm_map_pmap(map); 906*45748Smckusick 907*45748Smckusick /* 908*45748Smckusick * Since the pages are wired down, we must be able to 909*45748Smckusick * get their mappings from the physical map system. 910*45748Smckusick */ 911*45748Smckusick 912*45748Smckusick vm_page_lock_queues(); 913*45748Smckusick 914*45748Smckusick for (va = start; va < end; va += PAGE_SIZE) { 915*45748Smckusick pa = pmap_extract(pmap, va); 916*45748Smckusick if (pa == (vm_offset_t) 0) { 917*45748Smckusick panic("unwire: page not in pmap"); 918*45748Smckusick } 919*45748Smckusick pmap_change_wiring(pmap, va, FALSE); 920*45748Smckusick vm_page_unwire(PHYS_TO_VM_PAGE(pa)); 921*45748Smckusick } 922*45748Smckusick vm_page_unlock_queues(); 923*45748Smckusick 924*45748Smckusick /* 925*45748Smckusick * Inform the physical mapping system that the range 926*45748Smckusick * of addresses may fault, so that page tables and 927*45748Smckusick * such may be unwired themselves. 928*45748Smckusick */ 929*45748Smckusick 930*45748Smckusick pmap_pageable(pmap, start, end, TRUE); 931*45748Smckusick 932*45748Smckusick } 933*45748Smckusick 934*45748Smckusick /* 935*45748Smckusick * Routine: 936*45748Smckusick * vm_fault_copy_entry 937*45748Smckusick * Function: 938*45748Smckusick * Copy all of the pages from a wired-down map entry to another. 939*45748Smckusick * 940*45748Smckusick * In/out conditions: 941*45748Smckusick * The source and destination maps must be locked for write. 942*45748Smckusick * The source map entry must be wired down (or be a sharing map 943*45748Smckusick * entry corresponding to a main map entry that is wired down). 944*45748Smckusick */ 945*45748Smckusick 946*45748Smckusick void vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry) 947*45748Smckusick vm_map_t dst_map; 948*45748Smckusick vm_map_t src_map; 949*45748Smckusick vm_map_entry_t dst_entry; 950*45748Smckusick vm_map_entry_t src_entry; 951*45748Smckusick { 952*45748Smckusick 953*45748Smckusick vm_object_t dst_object; 954*45748Smckusick vm_object_t src_object; 955*45748Smckusick vm_offset_t dst_offset; 956*45748Smckusick vm_offset_t src_offset; 957*45748Smckusick vm_prot_t prot; 958*45748Smckusick vm_offset_t vaddr; 959*45748Smckusick vm_page_t dst_m; 960*45748Smckusick vm_page_t src_m; 961*45748Smckusick 962*45748Smckusick #ifdef lint 963*45748Smckusick src_map++; 964*45748Smckusick #endif lint 965*45748Smckusick 966*45748Smckusick src_object = src_entry->object.vm_object; 967*45748Smckusick src_offset = src_entry->offset; 968*45748Smckusick 969*45748Smckusick /* 970*45748Smckusick * Create the top-level object for the destination entry. 971*45748Smckusick * (Doesn't actually shadow anything - we copy the pages 972*45748Smckusick * directly.) 973*45748Smckusick */ 974*45748Smckusick dst_object = vm_object_allocate( 975*45748Smckusick (vm_size_t) (dst_entry->end - dst_entry->start)); 976*45748Smckusick 977*45748Smckusick dst_entry->object.vm_object = dst_object; 978*45748Smckusick dst_entry->offset = 0; 979*45748Smckusick 980*45748Smckusick prot = dst_entry->max_protection; 981*45748Smckusick 982*45748Smckusick /* 983*45748Smckusick * Loop through all of the pages in the entry's range, copying 984*45748Smckusick * each one from the source object (it should be there) to the 985*45748Smckusick * destination object. 986*45748Smckusick */ 987*45748Smckusick for (vaddr = dst_entry->start, dst_offset = 0; 988*45748Smckusick vaddr < dst_entry->end; 989*45748Smckusick vaddr += PAGE_SIZE, dst_offset += PAGE_SIZE) { 990*45748Smckusick 991*45748Smckusick /* 992*45748Smckusick * Allocate a page in the destination object 993*45748Smckusick */ 994*45748Smckusick vm_object_lock(dst_object); 995*45748Smckusick do { 996*45748Smckusick dst_m = vm_page_alloc(dst_object, dst_offset); 997*45748Smckusick if (dst_m == VM_PAGE_NULL) { 998*45748Smckusick vm_object_unlock(dst_object); 999*45748Smckusick VM_WAIT; 1000*45748Smckusick vm_object_lock(dst_object); 1001*45748Smckusick } 1002*45748Smckusick } while (dst_m == VM_PAGE_NULL); 1003*45748Smckusick 1004*45748Smckusick /* 1005*45748Smckusick * Find the page in the source object, and copy it in. 1006*45748Smckusick * (Because the source is wired down, the page will be 1007*45748Smckusick * in memory.) 1008*45748Smckusick */ 1009*45748Smckusick vm_object_lock(src_object); 1010*45748Smckusick src_m = vm_page_lookup(src_object, dst_offset + src_offset); 1011*45748Smckusick if (src_m == VM_PAGE_NULL) 1012*45748Smckusick panic("vm_fault_copy_wired: page missing"); 1013*45748Smckusick 1014*45748Smckusick vm_page_copy(src_m, dst_m); 1015*45748Smckusick 1016*45748Smckusick /* 1017*45748Smckusick * Enter it in the pmap... 1018*45748Smckusick */ 1019*45748Smckusick vm_object_unlock(src_object); 1020*45748Smckusick vm_object_unlock(dst_object); 1021*45748Smckusick 1022*45748Smckusick pmap_enter(dst_map->pmap, vaddr, VM_PAGE_TO_PHYS(dst_m), 1023*45748Smckusick prot, FALSE); 1024*45748Smckusick 1025*45748Smckusick /* 1026*45748Smckusick * Mark it no longer busy, and put it on the active list. 1027*45748Smckusick */ 1028*45748Smckusick vm_object_lock(dst_object); 1029*45748Smckusick vm_page_lock_queues(); 1030*45748Smckusick vm_page_activate(dst_m); 1031*45748Smckusick vm_page_unlock_queues(); 1032*45748Smckusick PAGE_WAKEUP(dst_m); 1033*45748Smckusick vm_object_unlock(dst_object); 1034*45748Smckusick } 1035*45748Smckusick 1036*45748Smckusick } 1037