1 /* $NetBSD: uvm_fault.c,v 1.9 1998/03/26 21:50:14 chuck Exp $ */ 2 3 /* 4 * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE! 5 * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<< 6 */ 7 /* 8 * 9 * Copyright (c) 1997 Charles D. Cranor and Washington University. 10 * All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by Charles D. Cranor and 23 * Washington University. 24 * 4. The name of the author may not be used to endorse or promote products 25 * derived from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 28 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 29 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 30 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 31 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 32 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 36 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 * 38 * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp 39 */ 40 41 #include "opt_uvmhist.h" 42 43 /* 44 * uvm_fault.c: fault handler 45 */ 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/kernel.h> 50 #include <sys/mount.h> 51 #include <sys/proc.h> 52 #include <sys/malloc.h> 53 #include <sys/mman.h> 54 #include <sys/user.h> 55 56 #include <vm/vm.h> 57 #include <vm/vm_page.h> 58 #include <vm/vm_kern.h> 59 60 #include <sys/syscallargs.h> 61 62 #include <uvm/uvm.h> 63 64 /* 65 * 66 * a word on page faults: 67 * 68 * types of page faults we handle: 69 * 70 * CASE 1: upper layer faults CASE 2: lower layer faults 71 * 72 * CASE 1A CASE 1B CASE 2A CASE 2B 73 * read/write1 write>1 read/write +-cow_write/zero 74 * | | | | 75 * +--|--+ +--|--+ +-----+ + | + | +-----+ 76 * amap | V | | ----------->new| | | | ^ | 77 * +-----+ +-----+ +-----+ + | + | +--|--+ 78 * | | | 79 * +-----+ +-----+ +--|--+ | +--|--+ 80 * uobj | d/c | | d/c | | V | +----| | 81 * +-----+ +-----+ +-----+ +-----+ 82 * 83 * d/c = don't care 84 * 85 * case [0]: layerless fault 86 * no amap or uobj is present. this is an error. 87 * 88 * case [1]: upper layer fault [anon active] 89 * 1A: [read] or [write with anon->an_ref == 1] 90 * I/O takes place in top level anon and uobj is not touched. 91 * 1B: [write with anon->an_ref > 1] 92 * new anon is alloc'd and data is copied off ["COW"] 93 * 94 * case [2]: lower layer fault [uobj] 95 * 2A: [read on non-NULL uobj] or [write to non-copy_on_write area] 96 * I/O takes place directly in object. 97 * 2B: [write to copy_on_write] or [read on NULL uobj] 98 * data is "promoted" from uobj to a new anon. 99 * if uobj is null, then we zero fill. 100 * 101 * we follow the standard UVM locking protocol ordering: 102 * 103 * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ) 104 * we hold a PG_BUSY page if we unlock for I/O 105 * 106 * 107 * the code is structured as follows: 108 * 109 * - init the "IN" params in the ufi structure 110 * ReFault: 111 * - do lookups [locks maps], check protection, handle needs_copy 112 * - check for case 0 fault (error) 113 * - establish "range" of fault 114 * - if we have an amap lock it and extract the anons 115 * - if sequential advice deactivate pages behind us 116 * - at the same time check pmap for unmapped areas and anon for pages 117 * that we could map in (and do map it if found) 118 * - check object for resident pages that we could map in 119 * - if (case 2) goto Case2 120 * - >>> handle case 1 121 * - ensure source anon is resident in RAM 122 * - if case 1B alloc new anon and copy from source 123 * - map the correct page in 124 * Case2: 125 * - >>> handle case 2 126 * - ensure source page is resident (if uobj) 127 * - if case 2B alloc new anon and copy from source (could be zero 128 * fill if uobj == NULL) 129 * - map the correct page in 130 * - done! 131 * 132 * note on paging: 133 * if we have to do I/O we place a PG_BUSY page in the correct object, 134 * unlock everything, and do the I/O. when I/O is done we must reverify 135 * the state of the world before assuming that our data structures are 136 * valid. [because mappings could change while the map is unlocked] 137 * 138 * alternative 1: unbusy the page in question and restart the page fault 139 * from the top (ReFault). this is easy but does not take advantage 140 * of the information that we already have from our previous lookup, 141 * although it is possible that the "hints" in the vm_map will help here. 142 * 143 * alternative 2: the system already keeps track of a "version" number of 144 * a map. [i.e. every time you write-lock a map (e.g. to change a 145 * mapping) you bump the version number up by one...] so, we can save 146 * the version number of the map before we release the lock and start I/O. 147 * then when I/O is done we can relock and check the version numbers 148 * to see if anything changed. this might save us some over 1 because 149 * we don't have to unbusy the page and may be less compares(?). 150 * 151 * alternative 3: put in backpointers or a way to "hold" part of a map 152 * in place while I/O is in progress. this could be complex to 153 * implement (especially with structures like amap that can be referenced 154 * by multiple map entries, and figuring out what should wait could be 155 * complex as well...). 156 * 157 * given that we are not currently multiprocessor or multithreaded we might 158 * as well choose alternative 2 now. maybe alternative 3 would be useful 159 * in the future. XXX keep in mind for future consideration//rechecking. 160 */ 161 162 /* 163 * local data structures 164 */ 165 166 struct uvm_advice { 167 int advice; 168 int nback; 169 int nforw; 170 }; 171 172 /* 173 * page range array: 174 * note: index in array must match "advice" value 175 * XXX: borrowed numbers from freebsd. do they work well for us? 176 */ 177 178 static struct uvm_advice uvmadvice[] = { 179 { MADV_NORMAL, 3, 4 }, 180 { MADV_RANDOM, 0, 0 }, 181 { MADV_SEQUENTIAL, 8, 7}, 182 }; 183 184 #define UVM_MAXRANGE 16 /* must be max() of nback+nforw+1 */ 185 186 /* 187 * private prototypes 188 */ 189 190 static void uvmfault_amapcopy __P((struct uvm_faultinfo *)); 191 static __inline void uvmfault_anonflush __P((struct vm_anon **, int)); 192 193 /* 194 * inline functions 195 */ 196 197 /* 198 * uvmfault_anonflush: try and deactivate pages in specified anons 199 * 200 * => does not have to deactivate page if it is busy 201 */ 202 203 static __inline void 204 uvmfault_anonflush(anons, n) 205 struct vm_anon **anons; 206 int n; 207 { 208 int lcv; 209 struct vm_page *pg; 210 211 for (lcv = 0 ; lcv < n ; lcv++) { 212 if (anons[lcv] == NULL) 213 continue; 214 simple_lock(&anons[lcv]->an_lock); 215 pg = anons[lcv]->u.an_page; 216 if (pg && (pg->flags & PG_BUSY) == 0 && pg->loan_count == 0) { 217 uvm_lock_pageq(); 218 if (pg->wire_count == 0) { 219 pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE); 220 uvm_pagedeactivate(pg); 221 } 222 uvm_unlock_pageq(); 223 } 224 simple_unlock(&anons[lcv]->an_lock); 225 } 226 } 227 228 /* 229 * normal functions 230 */ 231 232 /* 233 * uvmfault_amapcopy: clear "needs_copy" in a map. 234 * 235 * => called with VM data structures unlocked (usually, see below) 236 * => we get a write lock on the maps and clear needs_copy for a VA 237 * => if we are out of RAM we sleep (waiting for more) 238 */ 239 240 static void 241 uvmfault_amapcopy(ufi) 242 struct uvm_faultinfo *ufi; 243 { 244 245 /* 246 * while we haven't done the job 247 */ 248 249 while (1) { 250 251 /* 252 * no mapping? give up. 253 */ 254 255 if (uvmfault_lookup(ufi, TRUE) == FALSE) 256 return; 257 258 /* 259 * copy if needed. 260 */ 261 262 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) 263 amap_copy(ufi->map, ufi->entry, M_NOWAIT, TRUE, ufi->rvaddr, 264 ufi->rvaddr + 1); 265 266 /* 267 * didn't work? must be out of RAM. unlock and sleep. 268 */ 269 270 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { 271 uvmfault_unlockmaps(ufi, TRUE); 272 uvm_wait("fltamapcopy"); 273 continue; 274 } 275 276 /* 277 * got it! unlock and return. 278 */ 279 280 uvmfault_unlockmaps(ufi, TRUE); 281 return; 282 } 283 /*NOTREACHED*/ 284 } 285 286 /* 287 * uvmfault_anonget: get data in an anon into a non-busy, non-released 288 * page in that anon. 289 * 290 * => maps, amap, and anon locked by caller. 291 * => if we fail (result != VM_PAGER_OK) we unlock everything. 292 * => if we are successful, we return with everything still locked. 293 * => we don't move the page on the queues [gets moved later] 294 * => if we allocate a new page [we_own], it gets put on the queues. 295 * either way, the result is that the page is on the queues at return time 296 * => for pages which are on loan from a uvm_object (and thus are not 297 * owned by the anon): if successful, we return with the owning object 298 * locked. the caller must unlock this object when it unlocks everything 299 * else. 300 */ 301 302 int uvmfault_anonget(ufi, amap, anon) 303 struct uvm_faultinfo *ufi; 304 struct vm_amap *amap; 305 struct vm_anon *anon; 306 { 307 boolean_t we_own; /* we own anon's page? */ 308 boolean_t locked; /* did we relock? */ 309 struct vm_page *pg; 310 int result; 311 UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist); 312 313 result = 0; /* XXX shut up gcc */ 314 uvmexp.fltanget++; 315 /* bump rusage counters */ 316 if (anon->u.an_page) 317 curproc->p_addr->u_stats.p_ru.ru_minflt++; 318 else 319 curproc->p_addr->u_stats.p_ru.ru_majflt++; 320 321 /* 322 * loop until we get it, or fail. 323 */ 324 325 while (1) { 326 327 we_own = FALSE; /* TRUE if we set PG_BUSY on a page */ 328 pg = anon->u.an_page; 329 330 /* 331 * if there is a resident page and it is loaned, then anon 332 * may not own it. call out to uvm_anon_lockpage() to ensure 333 * the real owner of the page has been identified and locked. 334 */ 335 336 if (pg && pg->loan_count) 337 pg = uvm_anon_lockloanpg(anon); 338 339 /* 340 * page there? make sure it is not busy/released. 341 */ 342 343 if (pg) { 344 345 /* 346 * at this point, if the page has a uobject [meaning 347 * we have it on loan], then that uobject is locked 348 * by us! if the page is busy, we drop all the 349 * locks (including uobject) and try again. 350 */ 351 352 if ((pg->flags & (PG_BUSY|PG_RELEASED)) == 0) { 353 UVMHIST_LOG(maphist, "<- OK",0,0,0,0); 354 return(VM_PAGER_OK); 355 } 356 pg->flags |= PG_WANTED; 357 uvmexp.fltpgwait++; 358 359 /* 360 * the last unlock must be an atomic unlock+wait on 361 * the owner of page 362 */ 363 if (pg->uobject) { /* owner is uobject ? */ 364 uvmfault_unlockall(ufi, amap, NULL, anon); 365 UVMHIST_LOG(maphist, " unlock+wait on uobj",0, 366 0,0,0); 367 UVM_UNLOCK_AND_WAIT(pg, 368 &pg->uobject->vmobjlock, 369 FALSE, "anonget1",0); 370 } else { 371 /* anon owns page */ 372 uvmfault_unlockall(ufi, amap, NULL, NULL); 373 UVMHIST_LOG(maphist, " unlock+wait on anon",0, 374 0,0,0); 375 UVM_UNLOCK_AND_WAIT(pg,&anon->an_lock,0, 376 "anonget2",0); 377 } 378 /* ready to relock and try again */ 379 380 } else { 381 382 /* 383 * no page, we must try and bring it in. 384 */ 385 pg = uvm_pagealloc(NULL, 0, anon); 386 387 if (pg == NULL) { /* out of RAM. */ 388 389 uvmfault_unlockall(ufi, amap, NULL, anon); 390 uvmexp.fltnoram++; 391 UVMHIST_LOG(maphist, " noram -- UVM_WAIT",0, 392 0,0,0); 393 uvm_wait("flt_noram1"); 394 /* ready to relock and try again */ 395 396 } else { 397 398 /* we set the PG_BUSY bit */ 399 we_own = TRUE; 400 uvmfault_unlockall(ufi, amap, NULL, anon); 401 402 /* 403 * we are passing a PG_BUSY+PG_FAKE+PG_CLEAN 404 * page into the uvm_swap_get function with 405 * all data structures unlocked. 406 */ 407 uvmexp.pageins++; 408 result = uvm_swap_get(pg, anon->an_swslot, 409 PGO_SYNCIO); 410 411 /* 412 * we clean up after the i/o below in the 413 * "we_own" case 414 */ 415 /* ready to relock and try again */ 416 } 417 } 418 419 /* 420 * now relock and try again 421 */ 422 423 locked = uvmfault_relock(ufi); 424 if (locked) { 425 simple_lock(&amap->am_l); 426 } 427 if (locked || we_own) 428 simple_lock(&anon->an_lock); 429 430 /* 431 * if we own the page (i.e. we set PG_BUSY), then we need 432 * to clean up after the I/O. there are three cases to 433 * consider: 434 * [1] page released during I/O: free anon and ReFault. 435 * [2] I/O not OK. free the page and cause the fault 436 * to fail. 437 * [3] I/O OK! activate the page and sync with the 438 * non-we_own case (i.e. drop anon lock if not locked). 439 */ 440 441 if (we_own) { 442 443 if (pg->flags & PG_WANTED) { 444 /* still holding object lock */ 445 thread_wakeup(pg); 446 } 447 /* un-busy! */ 448 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); 449 UVM_PAGE_OWN(pg, NULL); 450 451 /* 452 * if we were RELEASED during I/O, then our anon is 453 * no longer part of an amap. we need to free the 454 * anon and try again. 455 */ 456 if (pg->flags & PG_RELEASED) { 457 pmap_page_protect(PMAP_PGARG(pg), 458 VM_PROT_NONE); /* to be safe */ 459 uvm_anfree(anon); /* frees page for us */ 460 if (locked) 461 uvmfault_unlockall(ufi, amap, NULL, NULL); 462 uvmexp.fltpgrele++; 463 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 464 return(VM_PAGER_REFAULT); /* refault! */ 465 } 466 467 if (result != VM_PAGER_OK) { 468 #ifdef DIAGNOSTIC 469 if (result == VM_PAGER_PEND) 470 panic("uvmfault_anonget: got PENDING for non-async I/O"); 471 #endif 472 /* remove page from anon */ 473 anon->u.an_page = NULL; 474 475 /* 476 * note: page was never !PG_BUSY, so it 477 * can't be mapped and thus no need to 478 * pmap_page_protect it... 479 */ 480 uvm_lock_pageq(); 481 uvm_pagefree(pg); 482 uvm_unlock_pageq(); 483 484 if (locked) 485 uvmfault_unlockall(ufi, amap, NULL, 486 anon); 487 else 488 simple_unlock(&anon->an_lock); 489 UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0); 490 return(VM_PAGER_ERROR); 491 } 492 493 /* 494 * must be OK, clear modify (already PG_CLEAN) 495 * and activate 496 */ 497 pmap_clear_modify(PMAP_PGARG(pg)); 498 uvm_lock_pageq(); 499 uvm_pageactivate(pg); 500 uvm_unlock_pageq(); 501 if (!locked) 502 simple_unlock(&anon->an_lock); 503 } 504 505 /* 506 * we were not able to relock. restart fault. 507 */ 508 509 if (!locked) { 510 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 511 return(VM_PAGER_REFAULT); 512 } 513 514 /* 515 * verify no one has touched the amap and moved the anon on us. 516 */ 517 518 if (amap_lookup(&ufi->entry->aref, 519 ufi->rvaddr - ufi->entry->start) != anon) { 520 521 uvmfault_unlockall(ufi, amap, NULL, anon); 522 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 523 return(VM_PAGER_REFAULT); 524 } 525 526 /* 527 * try it again! 528 */ 529 530 uvmexp.fltanretry++; 531 continue; 532 533 } /* while (1) */ 534 535 /*NOTREACHED*/ 536 } 537 538 /* 539 * F A U L T - m a i n e n t r y p o i n t 540 */ 541 542 /* 543 * uvm_fault: page fault handler 544 * 545 * => called from MD code to resolve a page fault 546 * => VM data structures usually should be unlocked. however, it is 547 * possible to call here with the main map locked if the caller 548 * gets a write lock, sets it recusive, and then calls us (c.f. 549 * uvm_map_pageable). this should be avoided because it keeps 550 * the map locked off during I/O. 551 */ 552 553 int 554 uvm_fault(orig_map, vaddr, fault_type, access_type) 555 vm_map_t orig_map; 556 vm_offset_t vaddr; 557 vm_fault_t fault_type; 558 vm_prot_t access_type; 559 { 560 struct uvm_faultinfo ufi; 561 vm_prot_t enter_prot; 562 boolean_t wired, narrow, promote, locked, shadowed; 563 int npages, nback, nforw, centeridx, result, lcv, gotpages; 564 vm_offset_t orig_startva, startva, objaddr, currva, pa, offset; 565 struct vm_amap *amap; 566 struct uvm_object *uobj; 567 struct vm_anon *anons_store[UVM_MAXRANGE], **anons, *anon, *oanon; 568 struct vm_page *pages[UVM_MAXRANGE], *pg, *uobjpage; 569 UVMHIST_FUNC("uvm_fault"); UVMHIST_CALLED(maphist); 570 571 UVMHIST_LOG(maphist, "(map=0x%x, vaddr=0x%x, ft=%d, at=%d)", 572 orig_map, vaddr, fault_type, access_type); 573 574 anon = NULL; /* XXX: shut up gcc */ 575 576 uvmexp.faults++; /* XXX: locking? */ 577 578 /* 579 * init the IN parameters in the ufi 580 */ 581 582 ufi.orig_map = orig_map; 583 ufi.orig_rvaddr = trunc_page(vaddr); 584 ufi.orig_size = PAGE_SIZE; /* can't get any smaller than this */ 585 if (fault_type == VM_FAULT_WIRE) 586 narrow = TRUE; /* don't look for neighborhood 587 * pages on wire */ 588 else 589 narrow = FALSE; /* normal fault */ 590 591 /* 592 * "goto ReFault" means restart the page fault from ground zero. 593 */ 594 ReFault: 595 596 /* 597 * lookup and lock the maps 598 */ 599 600 if (uvmfault_lookup(&ufi, FALSE) == FALSE) { 601 UVMHIST_LOG(maphist, "<- no mapping @ 0x%x", vaddr, 0,0,0); 602 return(KERN_INVALID_ADDRESS); 603 } 604 /* locked: maps(read) */ 605 606 /* 607 * check protection 608 */ 609 610 if ((ufi.entry->protection & access_type) != access_type) { 611 UVMHIST_LOG(maphist, 612 "<- protection failure (prot=0x%x, access=0x%x)", 613 ufi.entry->protection, access_type, 0, 0); 614 uvmfault_unlockmaps(&ufi, FALSE); 615 return(KERN_PROTECTION_FAILURE); 616 } 617 618 /* 619 * "enter_prot" is the protection we want to enter the page in at. 620 * for certain pages (e.g. copy-on-write pages) this protection can 621 * be more strict than ufi.entry->protection. "wired" means either 622 * the entry is wired or we are fault-wiring the pg. 623 */ 624 625 enter_prot = ufi.entry->protection; 626 wired = (ufi.entry->wired_count != 0) || (fault_type == VM_FAULT_WIRE); 627 if (wired) 628 access_type = enter_prot; /* full access for wired */ 629 630 /* 631 * handle "needs_copy" case. if we need to copy the amap we will 632 * have to drop our readlock and relock it with a write lock. (we 633 * need a write lock to change anything in a map entry [e.g. 634 * needs_copy]). 635 */ 636 637 if (UVM_ET_ISNEEDSCOPY(ufi.entry)) { 638 if ((access_type & VM_PROT_WRITE) || 639 (ufi.entry->object.uvm_obj == NULL)) { 640 /* need to clear */ 641 UVMHIST_LOG(maphist, 642 " need to clear needs_copy and refault",0,0,0,0); 643 uvmfault_unlockmaps(&ufi, FALSE); 644 uvmfault_amapcopy(&ufi); 645 uvmexp.fltamcopy++; 646 goto ReFault; 647 648 } else { 649 650 /* 651 * ensure that we pmap_enter page R/O since 652 * needs_copy is still true 653 */ 654 enter_prot = enter_prot & ~VM_PROT_WRITE; 655 656 } 657 } 658 659 /* 660 * identify the players 661 */ 662 663 amap = ufi.entry->aref.ar_amap; /* top layer */ 664 uobj = ufi.entry->object.uvm_obj; /* bottom layer */ 665 666 /* 667 * check for a case 0 fault. if nothing backing the entry then 668 * error now. 669 */ 670 671 if (amap == NULL && uobj == NULL) { 672 uvmfault_unlockmaps(&ufi, FALSE); 673 UVMHIST_LOG(maphist,"<- no backing store, no overlay",0,0,0,0); 674 return(KERN_INVALID_ADDRESS); 675 } 676 677 /* 678 * establish range of interest based on advice from mapper 679 * and then clip to fit map entry. note that we only want 680 * to do this the first time through the fault. if we 681 * ReFault we will disable this by setting "narrow" to true. 682 */ 683 684 if (narrow == FALSE) { 685 686 /* wide fault (!narrow) */ 687 #ifdef DIAGNOSTIC 688 if (uvmadvice[ufi.entry->advice].advice != ufi.entry->advice) 689 panic("fault: advice mismatch!"); 690 #endif 691 nback = min(uvmadvice[ufi.entry->advice].nback, 692 (ufi.rvaddr - ufi.entry->start) / PAGE_SIZE); 693 startva = ufi.rvaddr - (nback * PAGE_SIZE); 694 orig_startva = ufi.orig_rvaddr - (nback * PAGE_SIZE); 695 nforw = min(uvmadvice[ufi.entry->advice].nforw, 696 ((ufi.entry->end - ufi.rvaddr) / PAGE_SIZE) - 1); 697 /* 698 * note: "-1" because we don't want to count the 699 * faulting page as forw 700 */ 701 npages = nback + nforw + 1; 702 centeridx = nback; 703 704 narrow = FALSE; /* ensure only once per-fault */ 705 706 } else { 707 708 /* narrow fault! */ 709 nback = nforw = 0; 710 startva = ufi.rvaddr; 711 orig_startva = ufi.orig_rvaddr; 712 npages = 1; 713 centeridx = 0; 714 715 } 716 717 /* locked: maps(read) */ 718 UVMHIST_LOG(maphist, " narrow=%d, back=%d, forw=%d, orig_startva=0x%x", 719 narrow, nback, nforw, orig_startva); 720 UVMHIST_LOG(maphist, " entry=0x%x, amap=0x%x, obj=0x%x", ufi.entry, 721 amap, uobj, 0); 722 723 /* 724 * if we've got an amap, lock it and extract current anons. 725 */ 726 727 if (amap) { 728 simple_lock(&amap->am_l); 729 anons = anons_store; 730 amap_lookups(&ufi.entry->aref, startva - ufi.entry->start, 731 anons, npages); 732 } else { 733 anons = NULL; /* to be safe */ 734 } 735 736 /* locked: maps(read), amap(if there) */ 737 738 /* 739 * for MADV_SEQUENTIAL mappings we want to deactivate the back pages 740 * now and then forget about them (for the rest of the fault). 741 */ 742 743 if (ufi.entry->advice == MADV_SEQUENTIAL) { 744 745 UVMHIST_LOG(maphist, " MADV_SEQUENTIAL: flushing backpages", 746 0,0,0,0); 747 /* flush back-page anons? */ 748 if (amap) 749 uvmfault_anonflush(anons, nback); 750 751 /* flush object? */ 752 if (uobj) { 753 objaddr = 754 (startva - ufi.entry->start) + ufi.entry->offset; 755 simple_lock(&uobj->vmobjlock); 756 (void) uobj->pgops->pgo_flush(uobj, objaddr, objaddr + 757 (nback * PAGE_SIZE), PGO_DEACTIVATE); 758 simple_unlock(&uobj->vmobjlock); 759 } 760 761 /* now forget about the backpages */ 762 if (amap) 763 anons += nback; 764 startva = startva + (nback * PAGE_SIZE); 765 orig_startva = orig_startva + (nback * PAGE_SIZE); 766 npages -= nback; 767 nback = centeridx = 0; 768 } 769 770 /* locked: maps(read), amap(if there) */ 771 772 /* 773 * map in the backpages and frontpages we found in the amap in hopes 774 * of preventing future faults. we also init the pages[] array as 775 * we go. 776 */ 777 778 currva = orig_startva; 779 shadowed = FALSE; 780 for (lcv = 0 ; lcv < npages ; lcv++, currva += PAGE_SIZE) { 781 782 /* 783 * dont play with VAs that are already mapped 784 * *except for center) 785 * XXX: return value of pmap_extract disallows PA 0 786 */ 787 if (lcv != centeridx) { 788 pa = pmap_extract(ufi.orig_map->pmap, currva); 789 if (pa != NULL) { 790 pages[lcv] = PGO_DONTCARE; 791 continue; 792 } 793 } 794 795 /* 796 * unmapped or center page. check if any anon at this level. 797 */ 798 if (amap == NULL || anons[lcv] == NULL) { 799 pages[lcv] = NULL; 800 continue; 801 } 802 803 /* 804 * check for present page and map if possible. re-activate it. 805 */ 806 807 pages[lcv] = PGO_DONTCARE; 808 if (lcv == centeridx) { /* save center for later! */ 809 shadowed = TRUE; 810 continue; 811 } 812 anon = anons[lcv]; 813 simple_lock(&anon->an_lock); 814 /* ignore loaned pages */ 815 if (anon->u.an_page && anon->u.an_page->loan_count == 0 && 816 (anon->u.an_page->flags & (PG_RELEASED|PG_BUSY)) == 0) { 817 uvm_lock_pageq(); 818 uvm_pageactivate(anon->u.an_page); /* reactivate */ 819 uvm_unlock_pageq(); 820 UVMHIST_LOG(maphist, 821 " MAPPING: n anon: pm=0x%x, va=0x%x, pg=0x%x", 822 ufi.orig_map->pmap, currva, anon->u.an_page, 0); 823 uvmexp.fltnamap++; 824 pmap_enter(ufi.orig_map->pmap, currva, 825 VM_PAGE_TO_PHYS(anon->u.an_page), 826 (anon->an_ref > 1) ? VM_PROT_READ : enter_prot, 827 (ufi.entry->wired_count != 0)); 828 } 829 simple_unlock(&anon->an_lock); 830 } 831 832 /* locked: maps(read), amap(if there) */ 833 /* (shadowed == TRUE) if there is an anon at the faulting address */ 834 UVMHIST_LOG(maphist, " shadowed=%d, will_get=%d", shadowed, 835 (uobj && shadowed == FALSE),0,0); 836 837 /* 838 * note that if we are really short of RAM we could sleep in the above 839 * call to pmap_enter with everything locked. bad? 840 * XXXCDC: this is fixed in PMAP_NEW (no sleep alloc's in pmap) 841 */ 842 843 /* 844 * if the desired page is not shadowed by the amap and we have a 845 * backing object, then we check to see if the backing object would 846 * prefer to handle the fault itself (rather than letting us do it 847 * with the usual pgo_get hook). the backing object signals this by 848 * providing a pgo_fault routine. 849 * 850 * note: pgo_fault can obtain the correct VA for pmap_enter by using: 851 * real_va = [ ufi->orig_rvaddr + (startva - ufi->rvaddr)] 852 */ 853 854 if (uobj && shadowed == FALSE && uobj->pgops->pgo_fault != NULL) { 855 856 simple_lock(&uobj->vmobjlock); 857 858 /* locked: maps(read), amap (if there), uobj */ 859 result = uobj->pgops->pgo_fault(&ufi, startva, pages, npages, 860 centeridx, fault_type, access_type, 861 PGO_LOCKED); 862 /* locked: nothing, pgo_fault has unlocked everything */ 863 864 if (result == VM_PAGER_OK) 865 return(KERN_SUCCESS); /* pgo_fault did pmap enter */ 866 else if (result == VM_PAGER_REFAULT) 867 goto ReFault; /* try again! */ 868 else 869 return(KERN_PROTECTION_FAILURE); 870 } 871 872 /* 873 * now, if the desired page is not shadowed by the amap and we have 874 * a backing object that does not have a special fault routine, then 875 * we ask (with pgo_get) the object for resident pages that we care 876 * about and attempt to map them in. we do not let pgo_get block 877 * (PGO_LOCKED). 878 * 879 * ("get" has the option of doing a pmap_enter for us) 880 */ 881 882 if (uobj && shadowed == FALSE) { 883 simple_lock(&uobj->vmobjlock); 884 885 /* locked (!shadowed): maps(read), amap (if there), uobj */ 886 /* 887 * the following call to pgo_get does _not_ change locking state 888 */ 889 890 uvmexp.fltlget++; 891 gotpages = npages; 892 result = uobj->pgops->pgo_get(uobj, ufi.entry->offset + 893 (startva - ufi.entry->start), 894 pages, &gotpages, centeridx, 895 UVM_ET_ISCOPYONWRITE(ufi.entry) ? 896 VM_PROT_READ : access_type, 897 ufi.entry->advice, PGO_LOCKED); 898 899 /* 900 * check for pages to map, if we got any 901 */ 902 903 uobjpage = NULL; 904 905 if (gotpages) { 906 currva = orig_startva; 907 for (lcv = 0 ; lcv < npages ; 908 lcv++, currva += PAGE_SIZE) { 909 910 if (pages[lcv] == NULL || 911 pages[lcv] == PGO_DONTCARE) 912 continue; 913 914 #ifdef DIAGNOSTIC 915 /* 916 * pager sanity check: pgo_get with 917 * PGO_LOCKED should never return a 918 * released page to us. 919 */ 920 if (pages[lcv]->flags & PG_RELEASED) 921 panic("uvm_fault: pgo_get PGO_LOCKED gave us a RELEASED page"); 922 #endif 923 924 /* 925 * if center page is resident and not 926 * PG_BUSY|PG_RELEASED then pgo_get 927 * made it PG_BUSY for us and gave 928 * us a handle to it. remember this 929 * page as "uobjpage." (for later use). 930 */ 931 932 if (lcv == centeridx) { 933 uobjpage = pages[lcv]; 934 UVMHIST_LOG(maphist, " got uobjpage (0x%x) with locked get", 935 uobjpage, 0,0,0); 936 continue; 937 } 938 939 /* 940 * note: calling pgo_get with locked data 941 * structures returns us pages which are 942 * neither busy nor released, so we don't 943 * need to check for this. we can just 944 * directly enter the page (after moving it 945 * to the head of the active queue [useful?]). 946 */ 947 948 uvm_lock_pageq(); 949 uvm_pageactivate(pages[lcv]); /* reactivate */ 950 uvm_unlock_pageq(); 951 UVMHIST_LOG(maphist, 952 " MAPPING: n obj: pm=0x%x, va=0x%x, pg=0x%x", 953 ufi.orig_map->pmap, currva, pages[lcv], 0); 954 uvmexp.fltnomap++; 955 pmap_enter(ufi.orig_map->pmap, currva, 956 VM_PAGE_TO_PHYS(pages[lcv]), 957 UVM_ET_ISCOPYONWRITE(ufi.entry) ? 958 VM_PROT_READ : enter_prot, wired); 959 960 /* 961 * NOTE: page can't be PG_WANTED or PG_RELEASED 962 * because we've held the lock the whole time 963 * we've had the handle. 964 */ 965 pages[lcv]->flags &= ~(PG_BUSY); /* un-busy! */ 966 UVM_PAGE_OWN(pages[lcv], NULL); 967 968 /* done! */ 969 } /* for "lcv" loop */ 970 } /* "gotpages" != 0 */ 971 972 /* note: object still _locked_ */ 973 } else { 974 975 uobjpage = NULL; 976 977 } 978 979 /* locked (shadowed): maps(read), amap */ 980 /* locked (!shadowed): maps(read), amap(if there), 981 uobj(if !null), uobjpage(if !null) */ 982 983 /* 984 * note that at this point we are done with any front or back pages. 985 * we are now going to focus on the center page (i.e. the one we've 986 * faulted on). if we have faulted on the top (anon) layer 987 * [i.e. case 1], then the anon we want is anons[centeridx] (we have 988 * not touched it yet). if we have faulted on the bottom (uobj) 989 * layer [i.e. case 2] and the page was both present and available, 990 * then we've got a pointer to it as "uobjpage" and we've already 991 * made it BUSY. 992 */ 993 994 /* 995 * there are four possible cases we must address: 1A, 1B, 2A, and 2B 996 */ 997 998 /* 999 * redirect case 2: if we are not shadowed, go to case 2. 1000 */ 1001 1002 if (shadowed == FALSE) 1003 goto Case2; 1004 1005 /* locked: maps(read), amap */ 1006 1007 /* 1008 * handle case 1: fault on an anon in our amap 1009 */ 1010 1011 anon = anons[centeridx]; 1012 UVMHIST_LOG(maphist, " case 1 fault: anon=0x%x", anon, 0,0,0); 1013 simple_lock(&anon->an_lock); 1014 1015 /* locked: maps(read), amap, anon */ 1016 1017 /* 1018 * no matter if we have case 1A or case 1B we are going to need to 1019 * have the anon's memory resident. ensure that now. 1020 */ 1021 1022 /* 1023 * let uvmfault_anonget do the dirty work. if it fails (!OK) it will 1024 * unlock for us. if it is OK, locks are still valid and locked. 1025 * also, if it is OK, then the anon's page is on the queues. 1026 * if the page is on loan from a uvm_object, then anonget will 1027 * lock that object for us if it does not fail. 1028 */ 1029 1030 result = uvmfault_anonget(&ufi, amap, anon); 1031 1032 if (result == VM_PAGER_REFAULT) 1033 goto ReFault; 1034 1035 if (result == VM_PAGER_AGAIN) { 1036 tsleep((caddr_t)&lbolt, PVM, "fltagain1", 0); 1037 goto ReFault; 1038 } 1039 1040 if (result != VM_PAGER_OK) 1041 return(KERN_PROTECTION_FAILURE); /* XXX??? */ 1042 1043 /* 1044 * uobj is non null if the page is on loan from an object (i.e. uobj) 1045 */ 1046 1047 uobj = anon->u.an_page->uobject; /* locked by anonget if !NULL */ 1048 1049 /* locked: maps(read), amap, anon, uobj(if one) */ 1050 1051 /* 1052 * special handling for loaned pages 1053 */ 1054 if (anon->u.an_page->loan_count) { 1055 1056 if ((access_type & VM_PROT_WRITE) == 0) { 1057 1058 /* 1059 * for read faults on loaned pages we just cap the 1060 * protection at read-only. 1061 */ 1062 1063 enter_prot = enter_prot & ~VM_PROT_WRITE; 1064 1065 } else { 1066 /* 1067 * note that we can't allow writes into a loaned page! 1068 * 1069 * if we have a write fault on a loaned page in an 1070 * anon then we need to look at the anon's ref count. 1071 * if it is greater than one then we are going to do 1072 * a normal copy-on-write fault into a new anon (this 1073 * is not a problem). however, if the reference count 1074 * is one (a case where we would normally allow a 1075 * write directly to the page) then we need to kill 1076 * the loan before we continue. 1077 */ 1078 1079 /* >1 case is already ok */ 1080 if (anon->an_ref == 1) { 1081 1082 /* get new un-owned replacement page */ 1083 pg = uvm_pagealloc(NULL, 0, NULL); 1084 if (pg == NULL) { 1085 uvmfault_unlockall(&ufi, amap, uobj, 1086 anon); 1087 uvm_wait("flt_noram2"); 1088 goto ReFault; 1089 } 1090 1091 /* 1092 * copy data, kill loan, and drop uobj lock 1093 * (if any) 1094 */ 1095 /* copy old -> new */ 1096 uvm_pagecopy(anon->u.an_page, pg); 1097 1098 /* force reload */ 1099 pmap_page_protect(PMAP_PGARG(anon->u.an_page), 1100 VM_PROT_NONE); 1101 uvm_lock_pageq(); /* KILL loan */ 1102 if (uobj) 1103 /* if we were loaning */ 1104 anon->u.an_page->loan_count--; 1105 anon->u.an_page->uanon = NULL; 1106 /* in case we owned */ 1107 anon->u.an_page->pqflags &= ~PQ_ANON; 1108 uvm_unlock_pageq(); 1109 if (uobj) { 1110 simple_unlock(&uobj->vmobjlock); 1111 uobj = NULL; 1112 } 1113 1114 /* install new page in anon */ 1115 anon->u.an_page = pg; 1116 pg->uanon = anon; 1117 pg->pqflags |= PQ_ANON; 1118 pg->flags &= ~(PG_BUSY|PG_FAKE); 1119 UVM_PAGE_OWN(pg, NULL); 1120 1121 /* done! */ 1122 } /* ref == 1 */ 1123 } /* write fault */ 1124 } /* loan count */ 1125 1126 /* 1127 * if we are case 1B then we will need to allocate a new blank 1128 * anon to transfer the data into. note that we have a lock 1129 * on anon, so no one can busy or release the page until we are done. 1130 * also note that the ref count can't drop to zero here because 1131 * it is > 1 and we are only dropping one ref. 1132 * 1133 * in the (hopefully very rare) case that we are out of RAM we 1134 * will unlock, wait for more RAM, and refault. 1135 * 1136 * if we are out of anon VM we kill the process (XXX: could wait?). 1137 */ 1138 1139 if ((access_type & VM_PROT_WRITE) != 0 && anon->an_ref > 1) { 1140 1141 UVMHIST_LOG(maphist, " case 1B: COW fault",0,0,0,0); 1142 uvmexp.flt_acow++; 1143 oanon = anon; /* oanon = old, locked anon */ 1144 anon = uvm_analloc(); 1145 if (anon) 1146 pg = uvm_pagealloc(NULL, 0, anon); 1147 #if defined(sparc) 1148 else 1149 pg = NULL; /* XXX: gcc */ 1150 #endif 1151 1152 /* check for out of RAM */ 1153 if (anon == NULL || pg == NULL) { 1154 if (anon) 1155 uvm_anfree(anon); 1156 uvmfault_unlockall(&ufi, amap, uobj, oanon); 1157 if (anon == NULL) { 1158 UVMHIST_LOG(maphist, 1159 "<- failed. out of VM",0,0,0,0); 1160 uvmexp.fltnoanon++; 1161 /* XXX: OUT OF VM, ??? */ 1162 return(KERN_RESOURCE_SHORTAGE); 1163 } 1164 uvmexp.fltnoram++; 1165 uvm_wait("flt_noram3"); /* out of RAM, wait for more */ 1166 goto ReFault; 1167 } 1168 1169 /* got all resources, replace anon with nanon */ 1170 1171 uvm_pagecopy(oanon->u.an_page, pg); /* pg now !PG_CLEAN */ 1172 pg->flags &= ~(PG_BUSY|PG_FAKE); /* un-busy! new page */ 1173 UVM_PAGE_OWN(pg, NULL); 1174 amap_add(&ufi.entry->aref, ufi.rvaddr - ufi.entry->start, 1175 anon, 1); 1176 1177 /* deref: can not drop to zero here by defn! */ 1178 oanon->an_ref--; 1179 1180 /* 1181 * note: oanon still locked. anon is _not_ locked, but we 1182 * have the sole references to in from amap which _is_ locked. 1183 * thus, no one can get at it until we are done with it. 1184 */ 1185 1186 } else { 1187 1188 uvmexp.flt_anon++; 1189 oanon = anon; /* old, locked anon is same as anon */ 1190 pg = anon->u.an_page; 1191 if (anon->an_ref > 1) /* disallow writes to ref > 1 anons */ 1192 enter_prot = enter_prot & ~VM_PROT_WRITE; 1193 1194 } 1195 1196 /* locked: maps(read), amap, anon */ 1197 1198 /* 1199 * now map the page in ... 1200 * XXX: old fault unlocks object before pmap_enter. this seems 1201 * suspect since some other thread could blast the page out from 1202 * under us between the unlock and the pmap_enter. 1203 */ 1204 1205 UVMHIST_LOG(maphist, " MAPPING: anon: pm=0x%x, va=0x%x, pg=0x%x", 1206 ufi.orig_map->pmap, ufi.orig_rvaddr, pg, 0); 1207 pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg), 1208 enter_prot, wired); 1209 1210 /* 1211 * ... and update the page queues. 1212 */ 1213 1214 uvm_lock_pageq(); 1215 1216 if (fault_type == VM_FAULT_WIRE) { 1217 uvm_pagewire(pg); 1218 } else { 1219 /* activate it */ 1220 uvm_pageactivate(pg); 1221 1222 } 1223 1224 uvm_unlock_pageq(); 1225 1226 /* 1227 * done case 1! finish up by unlocking everything and returning success 1228 */ 1229 1230 uvmfault_unlockall(&ufi, amap, uobj, oanon); 1231 return(KERN_SUCCESS); 1232 1233 1234 Case2: 1235 /* 1236 * handle case 2: faulting on backing object or zero fill 1237 */ 1238 1239 /* 1240 * locked: 1241 * maps(read), amap(if there), uobj(if !null), uobjpage(if !null) 1242 */ 1243 1244 /* 1245 * note that uobjpage can not be PGO_DONTCARE at this point. we now 1246 * set uobjpage to PGO_DONTCARE if we are doing a zero fill. if we 1247 * have a backing object, check and see if we are going to promote 1248 * the data up to an anon during the fault. 1249 */ 1250 1251 if (uobj == NULL) { 1252 uobjpage = PGO_DONTCARE; 1253 promote = TRUE; /* always need anon here */ 1254 } else { 1255 /* assert(uobjpage != PGO_DONTCARE) */ 1256 promote = (access_type & VM_PROT_WRITE) && 1257 UVM_ET_ISCOPYONWRITE(ufi.entry); 1258 } 1259 UVMHIST_LOG(maphist, " case 2 fault: promote=%d, zfill=%d", 1260 promote, (uobj == NULL), 0,0); 1261 1262 /* 1263 * if uobjpage is not null then we do not need to do I/O to get the 1264 * uobjpage. 1265 * 1266 * if uobjpage is null, then we need to unlock and ask the pager to 1267 * get the data for us. once we have the data, we need to reverify 1268 * the state the world. we are currently not holding any resources. 1269 */ 1270 1271 if (uobjpage) { 1272 /* update rusage counters */ 1273 curproc->p_addr->u_stats.p_ru.ru_minflt++; 1274 } else { 1275 /* update rusage counters */ 1276 curproc->p_addr->u_stats.p_ru.ru_majflt++; 1277 1278 /* locked: maps(read), amap(if there), uobj */ 1279 uvmfault_unlockall(&ufi, amap, NULL, NULL); 1280 /* locked: uobj */ 1281 1282 uvmexp.fltget++; 1283 gotpages = 1; 1284 result = uobj->pgops->pgo_get(uobj, 1285 (ufi.rvaddr - ufi.entry->start) + ufi.entry->offset, 1286 &uobjpage, &gotpages, 0, 1287 UVM_ET_ISCOPYONWRITE(ufi.entry) ? 1288 VM_PROT_READ : access_type, 1289 ufi.entry->advice, 0); 1290 1291 /* locked: uobjpage(if result OK) */ 1292 1293 /* 1294 * recover from I/O 1295 */ 1296 1297 if (result != VM_PAGER_OK) { 1298 1299 #ifdef DIAGNOSTIC 1300 if (result == VM_PAGER_PEND) 1301 panic("uvm_fault: pgo_get got PENDing on non-async I/O"); 1302 #endif 1303 1304 if (result == VM_PAGER_AGAIN) { 1305 UVMHIST_LOG(maphist, " pgo_get says TRY AGAIN!",0,0,0,0); 1306 tsleep((caddr_t)&lbolt, PVM, "fltagain2", 0); 1307 goto ReFault; 1308 } 1309 1310 UVMHIST_LOG(maphist, "<- pgo_get failed (code %d)", 1311 result, 0,0,0); 1312 return(KERN_PROTECTION_FAILURE); /* XXX i/o error */ 1313 } 1314 1315 /* locked: uobjpage */ 1316 1317 /* 1318 * re-verify the state of the world by first trying to relock 1319 * the maps. always relock the object. 1320 */ 1321 1322 locked = uvmfault_relock(&ufi); 1323 if (locked && amap) 1324 simple_lock(&amap->am_l); 1325 simple_lock(&uobj->vmobjlock); 1326 1327 /* locked(locked): maps(read), amap(if !null), uobj, uobjpage */ 1328 /* locked(!locked): uobj, uobjpage */ 1329 1330 /* 1331 * verify that the page has not be released and re-verify 1332 * that amap slot is still free. if there is a problem, 1333 * we unlock and clean up. 1334 */ 1335 1336 if ((uobjpage->flags & PG_RELEASED) != 0 || 1337 (locked && amap && 1338 amap_lookup(&ufi.entry->aref, 1339 ufi.rvaddr - ufi.entry->start))) { 1340 if (locked) 1341 uvmfault_unlockall(&ufi, amap, NULL, NULL); 1342 locked = FALSE; 1343 } 1344 1345 /* 1346 * didn't get the lock? release the page and retry. 1347 */ 1348 1349 if (locked == FALSE) { 1350 1351 UVMHIST_LOG(maphist, 1352 " wasn't able to relock after fault: retry", 1353 0,0,0,0); 1354 if (uobjpage->flags & PG_WANTED) 1355 /* still holding object lock */ 1356 thread_wakeup(uobjpage); 1357 1358 if (uobjpage->flags & PG_RELEASED) { 1359 uvmexp.fltpgrele++; 1360 #ifdef DIAGNOSTIC 1361 if (uobj->pgops->pgo_releasepg == NULL) 1362 panic("uvm_fault: object has no releasepg function"); 1363 #endif 1364 /* frees page */ 1365 if (uobj->pgops->pgo_releasepg(uobjpage,NULL)) 1366 /* unlock if still alive */ 1367 simple_unlock(&uobj->vmobjlock); 1368 goto ReFault; 1369 } 1370 1371 uvm_lock_pageq(); 1372 /* make sure it is in queues */ 1373 uvm_pageactivate(uobjpage); 1374 1375 uvm_unlock_pageq(); 1376 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1377 UVM_PAGE_OWN(uobjpage, NULL); 1378 simple_unlock(&uobj->vmobjlock); 1379 goto ReFault; 1380 1381 } 1382 1383 /* 1384 * we have the data in uobjpage which is PG_BUSY and 1385 * !PG_RELEASED. we are holding object lock (so the page 1386 * can't be released on us). 1387 */ 1388 1389 /* locked: maps(read), amap(if !null), uobj, uobjpage */ 1390 1391 } 1392 1393 /* 1394 * locked: 1395 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj) 1396 */ 1397 1398 /* 1399 * notes: 1400 * - at this point uobjpage can not be NULL 1401 * - at this point uobjpage can not be PG_RELEASED (since we checked 1402 * for it above) 1403 * - at this point uobjpage could be PG_WANTED (handle later) 1404 */ 1405 1406 if (promote == FALSE) { 1407 1408 /* 1409 * we are not promoting. if the mapping is COW ensure that we 1410 * don't give more access than we should (e.g. when doing a read 1411 * fault on a COPYONWRITE mapping we want to map the COW page in 1412 * R/O even though the entry protection could be R/W). 1413 * 1414 * set "pg" to the page we want to map in (uobjpage, usually) 1415 */ 1416 1417 uvmexp.flt_obj++; 1418 if (UVM_ET_ISCOPYONWRITE(ufi.entry)) 1419 enter_prot = enter_prot & ~VM_PROT_WRITE; 1420 pg = uobjpage; /* map in the actual object */ 1421 1422 /* assert(uobjpage != PGO_DONTCARE) */ 1423 1424 /* 1425 * we are faulting directly on the page. be careful 1426 * about writing to loaned pages... 1427 */ 1428 if (uobjpage->loan_count) { 1429 1430 if ((access_type & VM_PROT_WRITE) == 0) { 1431 /* read fault: cap the protection at readonly */ 1432 /* cap! */ 1433 enter_prot = enter_prot & ~VM_PROT_WRITE; 1434 } else { 1435 /* write fault: must break the loan here */ 1436 1437 /* alloc new un-owned page */ 1438 pg = uvm_pagealloc(NULL, 0, NULL); 1439 1440 if (pg == NULL) { 1441 /* 1442 * drop ownership of page, it can't 1443 * be released 1444 * */ 1445 if (uobjpage->flags & PG_WANTED) 1446 thread_wakeup(uobjpage); 1447 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1448 UVM_PAGE_OWN(uobjpage, NULL); 1449 1450 uvm_lock_pageq(); 1451 /* activate: we will need it later */ 1452 uvm_pageactivate(uobjpage); 1453 1454 uvm_unlock_pageq(); 1455 uvmfault_unlockall(&ufi, amap, uobj, 1456 NULL); 1457 UVMHIST_LOG(maphist, 1458 " out of RAM breaking loan, waiting", 0,0,0,0); 1459 uvmexp.fltnoram++; 1460 uvm_wait("flt_noram4"); 1461 goto ReFault; 1462 } 1463 1464 /* 1465 * copy the data from the old page to the new 1466 * one and clear the fake/clean flags on the 1467 * new page (keep it busy). force a reload 1468 * of the old page by clearing it from all 1469 * pmaps. then lock the page queues to 1470 * rename the pages. 1471 */ 1472 uvm_pagecopy(uobjpage, pg); /* old -> new */ 1473 pg->flags &= ~(PG_FAKE|PG_CLEAN); 1474 pmap_page_protect(PMAP_PGARG(uobjpage), 1475 VM_PROT_NONE); 1476 if (uobjpage->flags & PG_WANTED) 1477 thread_wakeup(uobjpage); 1478 /* uobj still locked */ 1479 uobjpage->flags &= ~(PG_WANTED|PG_BUSY); 1480 UVM_PAGE_OWN(uobjpage, NULL); 1481 1482 uvm_lock_pageq(); 1483 offset = uobjpage->offset; 1484 /* remove old page */ 1485 uvm_pagerealloc(uobjpage, NULL, 0); 1486 1487 /* 1488 * at this point we have absolutely no 1489 * control over uobjpage 1490 */ 1491 /* install new page */ 1492 uvm_pagerealloc(pg, uobj, offset); 1493 uvm_unlock_pageq(); 1494 1495 /* 1496 * done! loan is broken and "pg" is 1497 * PG_BUSY. it can now replace uobjpage. 1498 */ 1499 1500 uobjpage = pg; 1501 1502 } /* write fault case */ 1503 } /* if loan_count */ 1504 1505 } else { 1506 1507 /* 1508 * if we are going to promote the data to an anon we 1509 * allocate a blank anon here and plug it into our amap. 1510 */ 1511 #if DIAGNOSTIC 1512 if (amap == NULL) 1513 panic("uvm_fault: want to promote data, but no anon"); 1514 #endif 1515 1516 anon = uvm_analloc(); 1517 if (anon) 1518 pg = uvm_pagealloc(NULL, 0, anon); /* BUSY+CLEAN+FAKE */ 1519 #if defined(sparc) 1520 else 1521 pg = NULL; /* XXX: gcc */ 1522 #endif 1523 1524 /* 1525 * out of memory resources? 1526 */ 1527 if (anon == NULL || pg == NULL) { 1528 1529 /* 1530 * arg! must unbusy our page and fail or sleep. 1531 */ 1532 if (uobjpage != PGO_DONTCARE) { 1533 if (uobjpage->flags & PG_WANTED) 1534 /* still holding object lock */ 1535 thread_wakeup(uobjpage); 1536 1537 uvm_lock_pageq(); 1538 /* make sure it is in queues */ 1539 uvm_pageactivate(uobjpage); 1540 uvm_unlock_pageq(); 1541 /* un-busy! (still locked) */ 1542 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1543 UVM_PAGE_OWN(uobjpage, NULL); 1544 } 1545 1546 /* unlock and fail ... */ 1547 uvmfault_unlockall(&ufi, amap, uobj, NULL); 1548 if (anon == NULL) { 1549 UVMHIST_LOG(maphist, " promote: out of VM", 1550 0,0,0,0); 1551 uvmexp.fltnoanon++; 1552 /* XXX: out of VM */ 1553 return(KERN_RESOURCE_SHORTAGE); 1554 } 1555 UVMHIST_LOG(maphist, " out of RAM, waiting for more", 1556 0,0,0,0); 1557 uvm_anfree(anon); 1558 uvmexp.fltnoram++; 1559 uvm_wait("flt_noram5"); 1560 goto ReFault; 1561 } 1562 1563 /* 1564 * fill in the data 1565 */ 1566 1567 if (uobjpage != PGO_DONTCARE) { 1568 uvmexp.flt_prcopy++; 1569 /* copy page [pg now dirty] */ 1570 uvm_pagecopy(uobjpage, pg); 1571 1572 /* 1573 * promote to shared amap? make sure all sharing 1574 * procs see it 1575 */ 1576 if ((amap->am_flags & AMAP_SHARED) != 0) { 1577 pmap_page_protect(PMAP_PGARG(uobjpage), 1578 VM_PROT_NONE); 1579 } 1580 1581 /* 1582 * dispose of uobjpage. it can't be PG_RELEASED 1583 * since we still hold the object lock. drop 1584 * handle to uobj as well. 1585 */ 1586 1587 if (uobjpage->flags & PG_WANTED) 1588 /* still have the obj lock */ 1589 thread_wakeup(uobjpage); 1590 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1591 UVM_PAGE_OWN(uobjpage, NULL); 1592 uvm_lock_pageq(); 1593 uvm_pageactivate(uobjpage); /* put it back */ 1594 uvm_unlock_pageq(); 1595 simple_unlock(&uobj->vmobjlock); 1596 uobj = NULL; 1597 UVMHIST_LOG(maphist, 1598 " promote uobjpage 0x%x to anon/page 0x%x/0x%x", 1599 uobjpage, anon, pg, 0); 1600 1601 } else { 1602 uvmexp.flt_przero++; 1603 uvm_pagezero(pg); /* zero page [pg now dirty] */ 1604 UVMHIST_LOG(maphist," zero fill anon/page 0x%x/0%x", 1605 anon, pg, 0, 0); 1606 } 1607 1608 amap_add(&ufi.entry->aref, ufi.rvaddr - ufi.entry->start, 1609 anon, 0); 1610 1611 } 1612 1613 /* 1614 * locked: 1615 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj) 1616 * 1617 * note: pg is either the uobjpage or the new page in the new anon 1618 */ 1619 1620 /* 1621 * all resources are present. we can now map it in and free our 1622 * resources. 1623 */ 1624 1625 UVMHIST_LOG(maphist, 1626 " MAPPING: case2: pm=0x%x, va=0x%x, pg=0x%x, promote=%d", 1627 ufi.orig_map->pmap, ufi.orig_rvaddr, pg, promote); 1628 pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg), 1629 enter_prot, wired); 1630 1631 uvm_lock_pageq(); 1632 1633 if (fault_type == VM_FAULT_WIRE) { 1634 uvm_pagewire(pg); 1635 } else { 1636 1637 /* activate it */ 1638 uvm_pageactivate(pg); 1639 1640 } 1641 1642 uvm_unlock_pageq(); 1643 1644 if (pg->flags & PG_WANTED) 1645 thread_wakeup(pg); /* lock still held */ 1646 1647 /* 1648 * note that pg can't be PG_RELEASED since we did not drop the object 1649 * lock since the last time we checked. 1650 */ 1651 1652 pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED); 1653 UVM_PAGE_OWN(pg, NULL); 1654 uvmfault_unlockall(&ufi, amap, uobj, NULL); 1655 1656 UVMHIST_LOG(maphist, "<- done (SUCCESS!)",0,0,0,0); 1657 return(KERN_SUCCESS); 1658 } 1659 1660 1661 /* 1662 * uvm_fault_wire: wire down a range of virtual addresses in a map. 1663 * 1664 * => map should be locked by caller? If so how can we call 1665 * uvm_fault? WRONG. 1666 * => XXXCDC: locking here is all screwed up!!! start with 1667 * uvm_map_pageable and fix it. 1668 */ 1669 1670 int 1671 uvm_fault_wire(map, start, end) 1672 vm_map_t map; 1673 vm_offset_t start, end; 1674 { 1675 vm_offset_t va; 1676 pmap_t pmap; 1677 int rv; 1678 1679 pmap = vm_map_pmap(map); 1680 1681 /* 1682 * call pmap pageable: this tells the pmap layer to lock down these 1683 * page tables. 1684 */ 1685 1686 pmap_pageable(pmap, start, end, FALSE); 1687 1688 /* 1689 * now fault it in page at a time. if the fault fails then we have 1690 * to undo what we have done. note that in uvm_fault VM_PROT_NONE 1691 * is replaced with the max protection if fault_type is VM_FAULT_WIRE. 1692 */ 1693 1694 for (va = start ; va < end ; va += PAGE_SIZE) { 1695 rv = uvm_fault(map, va, VM_FAULT_WIRE, VM_PROT_NONE); 1696 if (rv) { 1697 if (va != start) { 1698 uvm_fault_unwire(map->pmap, start, va); 1699 } 1700 return(rv); 1701 } 1702 } 1703 1704 return(KERN_SUCCESS); 1705 } 1706 1707 /* 1708 * uvm_fault_unwire(): unwire range of virtual space. 1709 * 1710 * => caller holds reference to pmap (via its map) 1711 */ 1712 1713 void 1714 uvm_fault_unwire(pmap, start, end) 1715 struct pmap *pmap; 1716 vm_offset_t start, end; 1717 { 1718 vm_offset_t va, pa; 1719 struct vm_page *pg; 1720 1721 /* 1722 * we assume that the area we are unwiring has actually been wired 1723 * in the first place. this means that we should be able to extract 1724 * the PAs from the pmap. we also lock out the page daemon so that 1725 * we can call uvm_pageunwire. 1726 */ 1727 1728 uvm_lock_pageq(); 1729 1730 for (va = start; va < end ; va += PAGE_SIZE) { 1731 pa = pmap_extract(pmap, va); 1732 1733 /* XXX: assumes PA 0 cannot be in map */ 1734 if (pa == (vm_offset_t) 0) { 1735 panic("uvm_fault_unwire: unwiring non-wired memory"); 1736 } 1737 pmap_change_wiring(pmap, va, FALSE); /* tell the pmap */ 1738 pg = PHYS_TO_VM_PAGE(pa); 1739 if (pg) 1740 uvm_pageunwire(pg); 1741 } 1742 1743 uvm_unlock_pageq(); 1744 1745 /* 1746 * now we call pmap_pageable to let the pmap know that the page tables 1747 * in this space no longer need to be wired. 1748 */ 1749 1750 pmap_pageable(pmap, start, end, TRUE); 1751 1752 } 1753