1 /* $OpenBSD: uvm_fault.c,v 1.2 1999/02/26 05:32:06 art Exp $ */ 2 /* $NetBSD: uvm_fault.c,v 1.19 1999/01/24 23:53:15 chuck Exp $ */ 3 4 /* 5 * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE! 6 * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<< 7 */ 8 /* 9 * 10 * Copyright (c) 1997 Charles D. Cranor and Washington University. 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by Charles D. Cranor and 24 * Washington University. 25 * 4. The name of the author may not be used to endorse or promote products 26 * derived from this software without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 29 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 30 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 31 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 33 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 37 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 * 39 * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp 40 */ 41 42 /* 43 * uvm_fault.c: fault handler 44 */ 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/proc.h> 50 #include <sys/malloc.h> 51 #include <sys/mman.h> 52 #include <sys/user.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_kern.h> 57 58 #include <uvm/uvm.h> 59 60 /* 61 * 62 * a word on page faults: 63 * 64 * types of page faults we handle: 65 * 66 * CASE 1: upper layer faults CASE 2: lower layer faults 67 * 68 * CASE 1A CASE 1B CASE 2A CASE 2B 69 * read/write1 write>1 read/write +-cow_write/zero 70 * | | | | 71 * +--|--+ +--|--+ +-----+ + | + | +-----+ 72 * amap | V | | ----------->new| | | | ^ | 73 * +-----+ +-----+ +-----+ + | + | +--|--+ 74 * | | | 75 * +-----+ +-----+ +--|--+ | +--|--+ 76 * uobj | d/c | | d/c | | V | +----| | 77 * +-----+ +-----+ +-----+ +-----+ 78 * 79 * d/c = don't care 80 * 81 * case [0]: layerless fault 82 * no amap or uobj is present. this is an error. 83 * 84 * case [1]: upper layer fault [anon active] 85 * 1A: [read] or [write with anon->an_ref == 1] 86 * I/O takes place in top level anon and uobj is not touched. 87 * 1B: [write with anon->an_ref > 1] 88 * new anon is alloc'd and data is copied off ["COW"] 89 * 90 * case [2]: lower layer fault [uobj] 91 * 2A: [read on non-NULL uobj] or [write to non-copy_on_write area] 92 * I/O takes place directly in object. 93 * 2B: [write to copy_on_write] or [read on NULL uobj] 94 * data is "promoted" from uobj to a new anon. 95 * if uobj is null, then we zero fill. 96 * 97 * we follow the standard UVM locking protocol ordering: 98 * 99 * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ) 100 * we hold a PG_BUSY page if we unlock for I/O 101 * 102 * 103 * the code is structured as follows: 104 * 105 * - init the "IN" params in the ufi structure 106 * ReFault: 107 * - do lookups [locks maps], check protection, handle needs_copy 108 * - check for case 0 fault (error) 109 * - establish "range" of fault 110 * - if we have an amap lock it and extract the anons 111 * - if sequential advice deactivate pages behind us 112 * - at the same time check pmap for unmapped areas and anon for pages 113 * that we could map in (and do map it if found) 114 * - check object for resident pages that we could map in 115 * - if (case 2) goto Case2 116 * - >>> handle case 1 117 * - ensure source anon is resident in RAM 118 * - if case 1B alloc new anon and copy from source 119 * - map the correct page in 120 * Case2: 121 * - >>> handle case 2 122 * - ensure source page is resident (if uobj) 123 * - if case 2B alloc new anon and copy from source (could be zero 124 * fill if uobj == NULL) 125 * - map the correct page in 126 * - done! 127 * 128 * note on paging: 129 * if we have to do I/O we place a PG_BUSY page in the correct object, 130 * unlock everything, and do the I/O. when I/O is done we must reverify 131 * the state of the world before assuming that our data structures are 132 * valid. [because mappings could change while the map is unlocked] 133 * 134 * alternative 1: unbusy the page in question and restart the page fault 135 * from the top (ReFault). this is easy but does not take advantage 136 * of the information that we already have from our previous lookup, 137 * although it is possible that the "hints" in the vm_map will help here. 138 * 139 * alternative 2: the system already keeps track of a "version" number of 140 * a map. [i.e. every time you write-lock a map (e.g. to change a 141 * mapping) you bump the version number up by one...] so, we can save 142 * the version number of the map before we release the lock and start I/O. 143 * then when I/O is done we can relock and check the version numbers 144 * to see if anything changed. this might save us some over 1 because 145 * we don't have to unbusy the page and may be less compares(?). 146 * 147 * alternative 3: put in backpointers or a way to "hold" part of a map 148 * in place while I/O is in progress. this could be complex to 149 * implement (especially with structures like amap that can be referenced 150 * by multiple map entries, and figuring out what should wait could be 151 * complex as well...). 152 * 153 * given that we are not currently multiprocessor or multithreaded we might 154 * as well choose alternative 2 now. maybe alternative 3 would be useful 155 * in the future. XXX keep in mind for future consideration//rechecking. 156 */ 157 158 /* 159 * local data structures 160 */ 161 162 struct uvm_advice { 163 int advice; 164 int nback; 165 int nforw; 166 }; 167 168 /* 169 * page range array: 170 * note: index in array must match "advice" value 171 * XXX: borrowed numbers from freebsd. do they work well for us? 172 */ 173 174 static struct uvm_advice uvmadvice[] = { 175 { MADV_NORMAL, 3, 4 }, 176 { MADV_RANDOM, 0, 0 }, 177 { MADV_SEQUENTIAL, 8, 7}, 178 }; 179 180 #define UVM_MAXRANGE 16 /* must be max() of nback+nforw+1 */ 181 182 /* 183 * private prototypes 184 */ 185 186 static void uvmfault_amapcopy __P((struct uvm_faultinfo *)); 187 static __inline void uvmfault_anonflush __P((struct vm_anon **, int)); 188 189 /* 190 * inline functions 191 */ 192 193 /* 194 * uvmfault_anonflush: try and deactivate pages in specified anons 195 * 196 * => does not have to deactivate page if it is busy 197 */ 198 199 static __inline void 200 uvmfault_anonflush(anons, n) 201 struct vm_anon **anons; 202 int n; 203 { 204 int lcv; 205 struct vm_page *pg; 206 207 for (lcv = 0 ; lcv < n ; lcv++) { 208 if (anons[lcv] == NULL) 209 continue; 210 simple_lock(&anons[lcv]->an_lock); 211 pg = anons[lcv]->u.an_page; 212 if (pg && (pg->flags & PG_BUSY) == 0 && pg->loan_count == 0) { 213 uvm_lock_pageq(); 214 if (pg->wire_count == 0) { 215 pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE); 216 uvm_pagedeactivate(pg); 217 } 218 uvm_unlock_pageq(); 219 } 220 simple_unlock(&anons[lcv]->an_lock); 221 } 222 } 223 224 /* 225 * normal functions 226 */ 227 228 /* 229 * uvmfault_amapcopy: clear "needs_copy" in a map. 230 * 231 * => called with VM data structures unlocked (usually, see below) 232 * => we get a write lock on the maps and clear needs_copy for a VA 233 * => if we are out of RAM we sleep (waiting for more) 234 */ 235 236 static void 237 uvmfault_amapcopy(ufi) 238 struct uvm_faultinfo *ufi; 239 { 240 241 /* 242 * while we haven't done the job 243 */ 244 245 while (1) { 246 247 /* 248 * no mapping? give up. 249 */ 250 251 if (uvmfault_lookup(ufi, TRUE) == FALSE) 252 return; 253 254 /* 255 * copy if needed. 256 */ 257 258 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) 259 amap_copy(ufi->map, ufi->entry, M_NOWAIT, TRUE, 260 ufi->orig_rvaddr, ufi->orig_rvaddr + 1); 261 262 /* 263 * didn't work? must be out of RAM. unlock and sleep. 264 */ 265 266 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { 267 uvmfault_unlockmaps(ufi, TRUE); 268 uvm_wait("fltamapcopy"); 269 continue; 270 } 271 272 /* 273 * got it! unlock and return. 274 */ 275 276 uvmfault_unlockmaps(ufi, TRUE); 277 return; 278 } 279 /*NOTREACHED*/ 280 } 281 282 /* 283 * uvmfault_anonget: get data in an anon into a non-busy, non-released 284 * page in that anon. 285 * 286 * => maps, amap, and anon locked by caller. 287 * => if we fail (result != VM_PAGER_OK) we unlock everything. 288 * => if we are successful, we return with everything still locked. 289 * => we don't move the page on the queues [gets moved later] 290 * => if we allocate a new page [we_own], it gets put on the queues. 291 * either way, the result is that the page is on the queues at return time 292 * => for pages which are on loan from a uvm_object (and thus are not 293 * owned by the anon): if successful, we return with the owning object 294 * locked. the caller must unlock this object when it unlocks everything 295 * else. 296 */ 297 298 int uvmfault_anonget(ufi, amap, anon) 299 struct uvm_faultinfo *ufi; 300 struct vm_amap *amap; 301 struct vm_anon *anon; 302 { 303 boolean_t we_own; /* we own anon's page? */ 304 boolean_t locked; /* did we relock? */ 305 struct vm_page *pg; 306 int result; 307 UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist); 308 309 result = 0; /* XXX shut up gcc */ 310 uvmexp.fltanget++; 311 /* bump rusage counters */ 312 if (anon->u.an_page) 313 curproc->p_addr->u_stats.p_ru.ru_minflt++; 314 else 315 curproc->p_addr->u_stats.p_ru.ru_majflt++; 316 317 /* 318 * loop until we get it, or fail. 319 */ 320 321 while (1) { 322 323 we_own = FALSE; /* TRUE if we set PG_BUSY on a page */ 324 pg = anon->u.an_page; 325 326 /* 327 * if there is a resident page and it is loaned, then anon 328 * may not own it. call out to uvm_anon_lockpage() to ensure 329 * the real owner of the page has been identified and locked. 330 */ 331 332 if (pg && pg->loan_count) 333 pg = uvm_anon_lockloanpg(anon); 334 335 /* 336 * page there? make sure it is not busy/released. 337 */ 338 339 if (pg) { 340 341 /* 342 * at this point, if the page has a uobject [meaning 343 * we have it on loan], then that uobject is locked 344 * by us! if the page is busy, we drop all the 345 * locks (including uobject) and try again. 346 */ 347 348 if ((pg->flags & (PG_BUSY|PG_RELEASED)) == 0) { 349 UVMHIST_LOG(maphist, "<- OK",0,0,0,0); 350 return (VM_PAGER_OK); 351 } 352 pg->flags |= PG_WANTED; 353 uvmexp.fltpgwait++; 354 355 /* 356 * the last unlock must be an atomic unlock+wait on 357 * the owner of page 358 */ 359 if (pg->uobject) { /* owner is uobject ? */ 360 uvmfault_unlockall(ufi, amap, NULL, anon); 361 UVMHIST_LOG(maphist, " unlock+wait on uobj",0, 362 0,0,0); 363 UVM_UNLOCK_AND_WAIT(pg, 364 &pg->uobject->vmobjlock, 365 FALSE, "anonget1",0); 366 } else { 367 /* anon owns page */ 368 uvmfault_unlockall(ufi, amap, NULL, NULL); 369 UVMHIST_LOG(maphist, " unlock+wait on anon",0, 370 0,0,0); 371 UVM_UNLOCK_AND_WAIT(pg,&anon->an_lock,0, 372 "anonget2",0); 373 } 374 /* ready to relock and try again */ 375 376 } else { 377 378 /* 379 * no page, we must try and bring it in. 380 */ 381 pg = uvm_pagealloc(NULL, 0, anon); 382 383 if (pg == NULL) { /* out of RAM. */ 384 385 uvmfault_unlockall(ufi, amap, NULL, anon); 386 uvmexp.fltnoram++; 387 UVMHIST_LOG(maphist, " noram -- UVM_WAIT",0, 388 0,0,0); 389 uvm_wait("flt_noram1"); 390 /* ready to relock and try again */ 391 392 } else { 393 394 /* we set the PG_BUSY bit */ 395 we_own = TRUE; 396 uvmfault_unlockall(ufi, amap, NULL, anon); 397 398 /* 399 * we are passing a PG_BUSY+PG_FAKE+PG_CLEAN 400 * page into the uvm_swap_get function with 401 * all data structures unlocked. note that 402 * it is ok to read an_swslot here because 403 * we hold PG_BUSY on the page. 404 */ 405 uvmexp.pageins++; 406 result = uvm_swap_get(pg, anon->an_swslot, 407 PGO_SYNCIO); 408 409 /* 410 * we clean up after the i/o below in the 411 * "we_own" case 412 */ 413 /* ready to relock and try again */ 414 } 415 } 416 417 /* 418 * now relock and try again 419 */ 420 421 locked = uvmfault_relock(ufi); 422 if (locked) { 423 amap_lock(amap); 424 } 425 if (locked || we_own) 426 simple_lock(&anon->an_lock); 427 428 /* 429 * if we own the page (i.e. we set PG_BUSY), then we need 430 * to clean up after the I/O. there are three cases to 431 * consider: 432 * [1] page released during I/O: free anon and ReFault. 433 * [2] I/O not OK. free the page and cause the fault 434 * to fail. 435 * [3] I/O OK! activate the page and sync with the 436 * non-we_own case (i.e. drop anon lock if not locked). 437 */ 438 439 if (we_own) { 440 441 if (pg->flags & PG_WANTED) { 442 /* still holding object lock */ 443 thread_wakeup(pg); 444 } 445 /* un-busy! */ 446 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); 447 UVM_PAGE_OWN(pg, NULL); 448 449 /* 450 * if we were RELEASED during I/O, then our anon is 451 * no longer part of an amap. we need to free the 452 * anon and try again. 453 */ 454 if (pg->flags & PG_RELEASED) { 455 pmap_page_protect(PMAP_PGARG(pg), 456 VM_PROT_NONE); /* to be safe */ 457 simple_unlock(&anon->an_lock); 458 uvm_anfree(anon); /* frees page for us */ 459 if (locked) 460 uvmfault_unlockall(ufi, amap, NULL, NULL); 461 uvmexp.fltpgrele++; 462 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 463 return (VM_PAGER_REFAULT); /* refault! */ 464 } 465 466 if (result != VM_PAGER_OK) { 467 #ifdef DIAGNOSTIC 468 if (result == VM_PAGER_PEND) 469 panic("uvmfault_anonget: got PENDING for non-async I/O"); 470 #endif 471 /* remove page from anon */ 472 anon->u.an_page = NULL; 473 474 /* 475 * note: page was never !PG_BUSY, so it 476 * can't be mapped and thus no need to 477 * pmap_page_protect it... 478 */ 479 uvm_lock_pageq(); 480 uvm_pagefree(pg); 481 uvm_unlock_pageq(); 482 483 if (locked) 484 uvmfault_unlockall(ufi, amap, NULL, 485 anon); 486 else 487 simple_unlock(&anon->an_lock); 488 UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0); 489 return (VM_PAGER_ERROR); 490 } 491 492 /* 493 * must be OK, clear modify (already PG_CLEAN) 494 * and activate 495 */ 496 pmap_clear_modify(PMAP_PGARG(pg)); 497 uvm_lock_pageq(); 498 uvm_pageactivate(pg); 499 uvm_unlock_pageq(); 500 if (!locked) 501 simple_unlock(&anon->an_lock); 502 } 503 504 /* 505 * we were not able to relock. restart fault. 506 */ 507 508 if (!locked) { 509 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 510 return (VM_PAGER_REFAULT); 511 } 512 513 /* 514 * verify no one has touched the amap and moved the anon on us. 515 */ 516 517 if (amap_lookup(&ufi->entry->aref, 518 ufi->orig_rvaddr - ufi->entry->start) != anon) { 519 520 uvmfault_unlockall(ufi, amap, NULL, anon); 521 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 522 return (VM_PAGER_REFAULT); 523 } 524 525 /* 526 * try it again! 527 */ 528 529 uvmexp.fltanretry++; 530 continue; 531 532 } /* while (1) */ 533 534 /*NOTREACHED*/ 535 } 536 537 /* 538 * F A U L T - m a i n e n t r y p o i n t 539 */ 540 541 /* 542 * uvm_fault: page fault handler 543 * 544 * => called from MD code to resolve a page fault 545 * => VM data structures usually should be unlocked. however, it is 546 * possible to call here with the main map locked if the caller 547 * gets a write lock, sets it recusive, and then calls us (c.f. 548 * uvm_map_pageable). this should be avoided because it keeps 549 * the map locked off during I/O. 550 */ 551 552 int 553 uvm_fault(orig_map, vaddr, fault_type, access_type) 554 vm_map_t orig_map; 555 vaddr_t vaddr; 556 vm_fault_t fault_type; 557 vm_prot_t access_type; 558 { 559 struct uvm_faultinfo ufi; 560 vm_prot_t enter_prot; 561 boolean_t wired, narrow, promote, locked, shadowed; 562 int npages, nback, nforw, centeridx, result, lcv, gotpages; 563 vaddr_t startva, objaddr, currva, offset; 564 paddr_t pa; 565 struct vm_amap *amap; 566 struct uvm_object *uobj; 567 struct vm_anon *anons_store[UVM_MAXRANGE], **anons, *anon, *oanon; 568 struct vm_page *pages[UVM_MAXRANGE], *pg, *uobjpage; 569 UVMHIST_FUNC("uvm_fault"); UVMHIST_CALLED(maphist); 570 571 UVMHIST_LOG(maphist, "(map=0x%x, vaddr=0x%x, ft=%d, at=%d)", 572 orig_map, vaddr, fault_type, access_type); 573 574 anon = NULL; /* XXX: shut up gcc */ 575 576 uvmexp.faults++; /* XXX: locking? */ 577 578 /* 579 * init the IN parameters in the ufi 580 */ 581 582 ufi.orig_map = orig_map; 583 ufi.orig_rvaddr = trunc_page(vaddr); 584 ufi.orig_size = PAGE_SIZE; /* can't get any smaller than this */ 585 if (fault_type == VM_FAULT_WIRE) 586 narrow = TRUE; /* don't look for neighborhood 587 * pages on wire */ 588 else 589 narrow = FALSE; /* normal fault */ 590 591 /* 592 * "goto ReFault" means restart the page fault from ground zero. 593 */ 594 ReFault: 595 596 /* 597 * lookup and lock the maps 598 */ 599 600 if (uvmfault_lookup(&ufi, FALSE) == FALSE) { 601 UVMHIST_LOG(maphist, "<- no mapping @ 0x%x", vaddr, 0,0,0); 602 return (KERN_INVALID_ADDRESS); 603 } 604 /* locked: maps(read) */ 605 606 /* 607 * check protection 608 */ 609 610 if ((ufi.entry->protection & access_type) != access_type) { 611 UVMHIST_LOG(maphist, 612 "<- protection failure (prot=0x%x, access=0x%x)", 613 ufi.entry->protection, access_type, 0, 0); 614 uvmfault_unlockmaps(&ufi, FALSE); 615 return (KERN_PROTECTION_FAILURE); 616 } 617 618 /* 619 * "enter_prot" is the protection we want to enter the page in at. 620 * for certain pages (e.g. copy-on-write pages) this protection can 621 * be more strict than ufi.entry->protection. "wired" means either 622 * the entry is wired or we are fault-wiring the pg. 623 */ 624 625 enter_prot = ufi.entry->protection; 626 wired = (ufi.entry->wired_count != 0) || (fault_type == VM_FAULT_WIRE); 627 if (wired) 628 access_type = enter_prot; /* full access for wired */ 629 630 /* 631 * handle "needs_copy" case. if we need to copy the amap we will 632 * have to drop our readlock and relock it with a write lock. (we 633 * need a write lock to change anything in a map entry [e.g. 634 * needs_copy]). 635 */ 636 637 if (UVM_ET_ISNEEDSCOPY(ufi.entry)) { 638 if ((access_type & VM_PROT_WRITE) || 639 (ufi.entry->object.uvm_obj == NULL)) { 640 /* need to clear */ 641 UVMHIST_LOG(maphist, 642 " need to clear needs_copy and refault",0,0,0,0); 643 uvmfault_unlockmaps(&ufi, FALSE); 644 uvmfault_amapcopy(&ufi); 645 uvmexp.fltamcopy++; 646 goto ReFault; 647 648 } else { 649 650 /* 651 * ensure that we pmap_enter page R/O since 652 * needs_copy is still true 653 */ 654 enter_prot = enter_prot & ~VM_PROT_WRITE; 655 656 } 657 } 658 659 /* 660 * identify the players 661 */ 662 663 amap = ufi.entry->aref.ar_amap; /* top layer */ 664 uobj = ufi.entry->object.uvm_obj; /* bottom layer */ 665 666 /* 667 * check for a case 0 fault. if nothing backing the entry then 668 * error now. 669 */ 670 671 if (amap == NULL && uobj == NULL) { 672 uvmfault_unlockmaps(&ufi, FALSE); 673 UVMHIST_LOG(maphist,"<- no backing store, no overlay",0,0,0,0); 674 return (KERN_INVALID_ADDRESS); 675 } 676 677 /* 678 * establish range of interest based on advice from mapper 679 * and then clip to fit map entry. note that we only want 680 * to do this the first time through the fault. if we 681 * ReFault we will disable this by setting "narrow" to true. 682 */ 683 684 if (narrow == FALSE) { 685 686 /* wide fault (!narrow) */ 687 #ifdef DIAGNOSTIC 688 if (uvmadvice[ufi.entry->advice].advice != ufi.entry->advice) 689 panic("fault: advice mismatch!"); 690 #endif 691 nback = min(uvmadvice[ufi.entry->advice].nback, 692 (ufi.orig_rvaddr - ufi.entry->start) >> PAGE_SHIFT); 693 startva = ufi.orig_rvaddr - (nback << PAGE_SHIFT); 694 nforw = min(uvmadvice[ufi.entry->advice].nforw, 695 ((ufi.entry->end - ufi.orig_rvaddr) >> 696 PAGE_SHIFT) - 1); 697 /* 698 * note: "-1" because we don't want to count the 699 * faulting page as forw 700 */ 701 npages = nback + nforw + 1; 702 centeridx = nback; 703 704 narrow = FALSE; /* ensure only once per-fault */ 705 706 } else { 707 708 /* narrow fault! */ 709 nback = nforw = 0; 710 startva = ufi.orig_rvaddr; 711 npages = 1; 712 centeridx = 0; 713 714 } 715 716 /* locked: maps(read) */ 717 UVMHIST_LOG(maphist, " narrow=%d, back=%d, forw=%d, startva=0x%x", 718 narrow, nback, nforw, startva); 719 UVMHIST_LOG(maphist, " entry=0x%x, amap=0x%x, obj=0x%x", ufi.entry, 720 amap, uobj, 0); 721 722 /* 723 * if we've got an amap, lock it and extract current anons. 724 */ 725 726 if (amap) { 727 amap_lock(amap); 728 anons = anons_store; 729 amap_lookups(&ufi.entry->aref, startva - ufi.entry->start, 730 anons, npages); 731 } else { 732 anons = NULL; /* to be safe */ 733 } 734 735 /* locked: maps(read), amap(if there) */ 736 737 /* 738 * for MADV_SEQUENTIAL mappings we want to deactivate the back pages 739 * now and then forget about them (for the rest of the fault). 740 */ 741 742 if (ufi.entry->advice == MADV_SEQUENTIAL) { 743 744 UVMHIST_LOG(maphist, " MADV_SEQUENTIAL: flushing backpages", 745 0,0,0,0); 746 /* flush back-page anons? */ 747 if (amap) 748 uvmfault_anonflush(anons, nback); 749 750 /* flush object? */ 751 if (uobj) { 752 objaddr = 753 (startva - ufi.entry->start) + ufi.entry->offset; 754 simple_lock(&uobj->vmobjlock); 755 (void) uobj->pgops->pgo_flush(uobj, objaddr, objaddr + 756 (nback << PAGE_SHIFT), PGO_DEACTIVATE); 757 simple_unlock(&uobj->vmobjlock); 758 } 759 760 /* now forget about the backpages */ 761 if (amap) 762 anons += nback; 763 startva = startva + (nback << PAGE_SHIFT); 764 npages -= nback; 765 nback = centeridx = 0; 766 } 767 768 /* locked: maps(read), amap(if there) */ 769 770 /* 771 * map in the backpages and frontpages we found in the amap in hopes 772 * of preventing future faults. we also init the pages[] array as 773 * we go. 774 */ 775 776 currva = startva; 777 shadowed = FALSE; 778 for (lcv = 0 ; lcv < npages ; lcv++, currva += PAGE_SIZE) { 779 780 /* 781 * dont play with VAs that are already mapped 782 * except for center) 783 * XXX: return value of pmap_extract disallows PA 0 784 */ 785 if (lcv != centeridx) { 786 pa = pmap_extract(ufi.orig_map->pmap, currva); 787 if (pa != NULL) { 788 pages[lcv] = PGO_DONTCARE; 789 continue; 790 } 791 } 792 793 /* 794 * unmapped or center page. check if any anon at this level. 795 */ 796 if (amap == NULL || anons[lcv] == NULL) { 797 pages[lcv] = NULL; 798 continue; 799 } 800 801 /* 802 * check for present page and map if possible. re-activate it. 803 */ 804 805 pages[lcv] = PGO_DONTCARE; 806 if (lcv == centeridx) { /* save center for later! */ 807 shadowed = TRUE; 808 continue; 809 } 810 anon = anons[lcv]; 811 simple_lock(&anon->an_lock); 812 /* ignore loaned pages */ 813 if (anon->u.an_page && anon->u.an_page->loan_count == 0 && 814 (anon->u.an_page->flags & (PG_RELEASED|PG_BUSY)) == 0) { 815 uvm_lock_pageq(); 816 uvm_pageactivate(anon->u.an_page); /* reactivate */ 817 uvm_unlock_pageq(); 818 UVMHIST_LOG(maphist, 819 " MAPPING: n anon: pm=0x%x, va=0x%x, pg=0x%x", 820 ufi.orig_map->pmap, currva, anon->u.an_page, 0); 821 uvmexp.fltnamap++; 822 pmap_enter(ufi.orig_map->pmap, currva, 823 VM_PAGE_TO_PHYS(anon->u.an_page), 824 (anon->an_ref > 1) ? VM_PROT_READ : enter_prot, 825 (ufi.entry->wired_count != 0)); 826 } 827 simple_unlock(&anon->an_lock); 828 } 829 830 /* locked: maps(read), amap(if there) */ 831 /* (shadowed == TRUE) if there is an anon at the faulting address */ 832 UVMHIST_LOG(maphist, " shadowed=%d, will_get=%d", shadowed, 833 (uobj && shadowed == FALSE),0,0); 834 835 /* 836 * note that if we are really short of RAM we could sleep in the above 837 * call to pmap_enter with everything locked. bad? 838 * XXXCDC: this is fixed in PMAP_NEW (no sleep alloc's in pmap) 839 */ 840 841 /* 842 * if the desired page is not shadowed by the amap and we have a 843 * backing object, then we check to see if the backing object would 844 * prefer to handle the fault itself (rather than letting us do it 845 * with the usual pgo_get hook). the backing object signals this by 846 * providing a pgo_fault routine. 847 */ 848 849 if (uobj && shadowed == FALSE && uobj->pgops->pgo_fault != NULL) { 850 851 simple_lock(&uobj->vmobjlock); 852 853 /* locked: maps(read), amap (if there), uobj */ 854 result = uobj->pgops->pgo_fault(&ufi, startva, pages, npages, 855 centeridx, fault_type, access_type, 856 PGO_LOCKED); 857 /* locked: nothing, pgo_fault has unlocked everything */ 858 859 if (result == VM_PAGER_OK) 860 return (KERN_SUCCESS); /* pgo_fault did pmap enter */ 861 else if (result == VM_PAGER_REFAULT) 862 goto ReFault; /* try again! */ 863 else 864 return (KERN_PROTECTION_FAILURE); 865 } 866 867 /* 868 * now, if the desired page is not shadowed by the amap and we have 869 * a backing object that does not have a special fault routine, then 870 * we ask (with pgo_get) the object for resident pages that we care 871 * about and attempt to map them in. we do not let pgo_get block 872 * (PGO_LOCKED). 873 * 874 * ("get" has the option of doing a pmap_enter for us) 875 */ 876 877 if (uobj && shadowed == FALSE) { 878 simple_lock(&uobj->vmobjlock); 879 880 /* locked (!shadowed): maps(read), amap (if there), uobj */ 881 /* 882 * the following call to pgo_get does _not_ change locking state 883 */ 884 885 uvmexp.fltlget++; 886 gotpages = npages; 887 result = uobj->pgops->pgo_get(uobj, ufi.entry->offset + 888 (startva - ufi.entry->start), 889 pages, &gotpages, centeridx, 890 UVM_ET_ISCOPYONWRITE(ufi.entry) ? 891 VM_PROT_READ : access_type, 892 ufi.entry->advice, PGO_LOCKED); 893 894 /* 895 * check for pages to map, if we got any 896 */ 897 898 uobjpage = NULL; 899 900 if (gotpages) { 901 currva = startva; 902 for (lcv = 0 ; lcv < npages ; 903 lcv++, currva += PAGE_SIZE) { 904 905 if (pages[lcv] == NULL || 906 pages[lcv] == PGO_DONTCARE) 907 continue; 908 909 #ifdef DIAGNOSTIC 910 /* 911 * pager sanity check: pgo_get with 912 * PGO_LOCKED should never return a 913 * released page to us. 914 */ 915 if (pages[lcv]->flags & PG_RELEASED) 916 panic("uvm_fault: pgo_get PGO_LOCKED gave us a RELEASED page"); 917 #endif 918 919 /* 920 * if center page is resident and not 921 * PG_BUSY|PG_RELEASED then pgo_get 922 * made it PG_BUSY for us and gave 923 * us a handle to it. remember this 924 * page as "uobjpage." (for later use). 925 */ 926 927 if (lcv == centeridx) { 928 uobjpage = pages[lcv]; 929 UVMHIST_LOG(maphist, " got uobjpage (0x%x) with locked get", 930 uobjpage, 0,0,0); 931 continue; 932 } 933 934 /* 935 * note: calling pgo_get with locked data 936 * structures returns us pages which are 937 * neither busy nor released, so we don't 938 * need to check for this. we can just 939 * directly enter the page (after moving it 940 * to the head of the active queue [useful?]). 941 */ 942 943 uvm_lock_pageq(); 944 uvm_pageactivate(pages[lcv]); /* reactivate */ 945 uvm_unlock_pageq(); 946 UVMHIST_LOG(maphist, 947 " MAPPING: n obj: pm=0x%x, va=0x%x, pg=0x%x", 948 ufi.orig_map->pmap, currva, pages[lcv], 0); 949 uvmexp.fltnomap++; 950 pmap_enter(ufi.orig_map->pmap, currva, 951 VM_PAGE_TO_PHYS(pages[lcv]), 952 UVM_ET_ISCOPYONWRITE(ufi.entry) ? 953 VM_PROT_READ : enter_prot, wired); 954 955 /* 956 * NOTE: page can't be PG_WANTED or PG_RELEASED 957 * because we've held the lock the whole time 958 * we've had the handle. 959 */ 960 pages[lcv]->flags &= ~(PG_BUSY); /* un-busy! */ 961 UVM_PAGE_OWN(pages[lcv], NULL); 962 963 /* done! */ 964 } /* for "lcv" loop */ 965 } /* "gotpages" != 0 */ 966 967 /* note: object still _locked_ */ 968 } else { 969 970 uobjpage = NULL; 971 972 } 973 974 /* locked (shadowed): maps(read), amap */ 975 /* locked (!shadowed): maps(read), amap(if there), 976 uobj(if !null), uobjpage(if !null) */ 977 978 /* 979 * note that at this point we are done with any front or back pages. 980 * we are now going to focus on the center page (i.e. the one we've 981 * faulted on). if we have faulted on the top (anon) layer 982 * [i.e. case 1], then the anon we want is anons[centeridx] (we have 983 * not touched it yet). if we have faulted on the bottom (uobj) 984 * layer [i.e. case 2] and the page was both present and available, 985 * then we've got a pointer to it as "uobjpage" and we've already 986 * made it BUSY. 987 */ 988 989 /* 990 * there are four possible cases we must address: 1A, 1B, 2A, and 2B 991 */ 992 993 /* 994 * redirect case 2: if we are not shadowed, go to case 2. 995 */ 996 997 if (shadowed == FALSE) 998 goto Case2; 999 1000 /* locked: maps(read), amap */ 1001 1002 /* 1003 * handle case 1: fault on an anon in our amap 1004 */ 1005 1006 anon = anons[centeridx]; 1007 UVMHIST_LOG(maphist, " case 1 fault: anon=0x%x", anon, 0,0,0); 1008 simple_lock(&anon->an_lock); 1009 1010 /* locked: maps(read), amap, anon */ 1011 1012 /* 1013 * no matter if we have case 1A or case 1B we are going to need to 1014 * have the anon's memory resident. ensure that now. 1015 */ 1016 1017 /* 1018 * let uvmfault_anonget do the dirty work. if it fails (!OK) it will 1019 * unlock for us. if it is OK, locks are still valid and locked. 1020 * also, if it is OK, then the anon's page is on the queues. 1021 * if the page is on loan from a uvm_object, then anonget will 1022 * lock that object for us if it does not fail. 1023 */ 1024 1025 result = uvmfault_anonget(&ufi, amap, anon); 1026 1027 if (result == VM_PAGER_REFAULT) 1028 goto ReFault; 1029 1030 if (result == VM_PAGER_AGAIN) { 1031 tsleep((caddr_t)&lbolt, PVM, "fltagain1", 0); 1032 goto ReFault; 1033 } 1034 1035 if (result != VM_PAGER_OK) 1036 return (KERN_PROTECTION_FAILURE); /* XXX??? */ 1037 1038 /* 1039 * uobj is non null if the page is on loan from an object (i.e. uobj) 1040 */ 1041 1042 uobj = anon->u.an_page->uobject; /* locked by anonget if !NULL */ 1043 1044 /* locked: maps(read), amap, anon, uobj(if one) */ 1045 1046 /* 1047 * special handling for loaned pages 1048 */ 1049 if (anon->u.an_page->loan_count) { 1050 1051 if ((access_type & VM_PROT_WRITE) == 0) { 1052 1053 /* 1054 * for read faults on loaned pages we just cap the 1055 * protection at read-only. 1056 */ 1057 1058 enter_prot = enter_prot & ~VM_PROT_WRITE; 1059 1060 } else { 1061 /* 1062 * note that we can't allow writes into a loaned page! 1063 * 1064 * if we have a write fault on a loaned page in an 1065 * anon then we need to look at the anon's ref count. 1066 * if it is greater than one then we are going to do 1067 * a normal copy-on-write fault into a new anon (this 1068 * is not a problem). however, if the reference count 1069 * is one (a case where we would normally allow a 1070 * write directly to the page) then we need to kill 1071 * the loan before we continue. 1072 */ 1073 1074 /* >1 case is already ok */ 1075 if (anon->an_ref == 1) { 1076 1077 /* get new un-owned replacement page */ 1078 pg = uvm_pagealloc(NULL, 0, NULL); 1079 if (pg == NULL) { 1080 uvmfault_unlockall(&ufi, amap, uobj, 1081 anon); 1082 uvm_wait("flt_noram2"); 1083 goto ReFault; 1084 } 1085 1086 /* 1087 * copy data, kill loan, and drop uobj lock 1088 * (if any) 1089 */ 1090 /* copy old -> new */ 1091 uvm_pagecopy(anon->u.an_page, pg); 1092 1093 /* force reload */ 1094 pmap_page_protect(PMAP_PGARG(anon->u.an_page), 1095 VM_PROT_NONE); 1096 uvm_lock_pageq(); /* KILL loan */ 1097 if (uobj) 1098 /* if we were loaning */ 1099 anon->u.an_page->loan_count--; 1100 anon->u.an_page->uanon = NULL; 1101 /* in case we owned */ 1102 anon->u.an_page->pqflags &= ~PQ_ANON; 1103 uvm_unlock_pageq(); 1104 if (uobj) { 1105 simple_unlock(&uobj->vmobjlock); 1106 uobj = NULL; 1107 } 1108 1109 /* install new page in anon */ 1110 anon->u.an_page = pg; 1111 pg->uanon = anon; 1112 pg->pqflags |= PQ_ANON; 1113 pg->flags &= ~(PG_BUSY|PG_FAKE); 1114 UVM_PAGE_OWN(pg, NULL); 1115 1116 /* done! */ 1117 } /* ref == 1 */ 1118 } /* write fault */ 1119 } /* loan count */ 1120 1121 /* 1122 * if we are case 1B then we will need to allocate a new blank 1123 * anon to transfer the data into. note that we have a lock 1124 * on anon, so no one can busy or release the page until we are done. 1125 * also note that the ref count can't drop to zero here because 1126 * it is > 1 and we are only dropping one ref. 1127 * 1128 * in the (hopefully very rare) case that we are out of RAM we 1129 * will unlock, wait for more RAM, and refault. 1130 * 1131 * if we are out of anon VM we kill the process (XXX: could wait?). 1132 */ 1133 1134 if ((access_type & VM_PROT_WRITE) != 0 && anon->an_ref > 1) { 1135 1136 UVMHIST_LOG(maphist, " case 1B: COW fault",0,0,0,0); 1137 uvmexp.flt_acow++; 1138 oanon = anon; /* oanon = old, locked anon */ 1139 anon = uvm_analloc(); 1140 if (anon) 1141 pg = uvm_pagealloc(NULL, 0, anon); 1142 #ifdef __GNUC__ 1143 else 1144 pg = NULL; /* XXX: gcc */ 1145 #endif 1146 1147 /* check for out of RAM */ 1148 if (anon == NULL || pg == NULL) { 1149 if (anon) 1150 uvm_anfree(anon); 1151 uvmfault_unlockall(&ufi, amap, uobj, oanon); 1152 if (anon == NULL) { 1153 UVMHIST_LOG(maphist, 1154 "<- failed. out of VM",0,0,0,0); 1155 uvmexp.fltnoanon++; 1156 /* XXX: OUT OF VM, ??? */ 1157 return (KERN_RESOURCE_SHORTAGE); 1158 } 1159 uvmexp.fltnoram++; 1160 uvm_wait("flt_noram3"); /* out of RAM, wait for more */ 1161 goto ReFault; 1162 } 1163 1164 /* got all resources, replace anon with nanon */ 1165 1166 uvm_pagecopy(oanon->u.an_page, pg); /* pg now !PG_CLEAN */ 1167 pg->flags &= ~(PG_BUSY|PG_FAKE); /* un-busy! new page */ 1168 UVM_PAGE_OWN(pg, NULL); 1169 amap_add(&ufi.entry->aref, ufi.orig_rvaddr - ufi.entry->start, 1170 anon, 1); 1171 1172 /* deref: can not drop to zero here by defn! */ 1173 oanon->an_ref--; 1174 1175 /* 1176 * note: oanon still locked. anon is _not_ locked, but we 1177 * have the sole references to in from amap which _is_ locked. 1178 * thus, no one can get at it until we are done with it. 1179 */ 1180 1181 } else { 1182 1183 uvmexp.flt_anon++; 1184 oanon = anon; /* old, locked anon is same as anon */ 1185 pg = anon->u.an_page; 1186 if (anon->an_ref > 1) /* disallow writes to ref > 1 anons */ 1187 enter_prot = enter_prot & ~VM_PROT_WRITE; 1188 1189 } 1190 1191 /* locked: maps(read), amap, anon */ 1192 1193 /* 1194 * now map the page in ... 1195 * XXX: old fault unlocks object before pmap_enter. this seems 1196 * suspect since some other thread could blast the page out from 1197 * under us between the unlock and the pmap_enter. 1198 */ 1199 1200 UVMHIST_LOG(maphist, " MAPPING: anon: pm=0x%x, va=0x%x, pg=0x%x", 1201 ufi.orig_map->pmap, ufi.orig_rvaddr, pg, 0); 1202 pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg), 1203 enter_prot, wired); 1204 1205 /* 1206 * ... and update the page queues. 1207 */ 1208 1209 uvm_lock_pageq(); 1210 1211 if (fault_type == VM_FAULT_WIRE) { 1212 uvm_pagewire(pg); 1213 } else { 1214 /* activate it */ 1215 uvm_pageactivate(pg); 1216 1217 } 1218 1219 uvm_unlock_pageq(); 1220 1221 /* 1222 * done case 1! finish up by unlocking everything and returning success 1223 */ 1224 1225 uvmfault_unlockall(&ufi, amap, uobj, oanon); 1226 return (KERN_SUCCESS); 1227 1228 1229 Case2: 1230 /* 1231 * handle case 2: faulting on backing object or zero fill 1232 */ 1233 1234 /* 1235 * locked: 1236 * maps(read), amap(if there), uobj(if !null), uobjpage(if !null) 1237 */ 1238 1239 /* 1240 * note that uobjpage can not be PGO_DONTCARE at this point. we now 1241 * set uobjpage to PGO_DONTCARE if we are doing a zero fill. if we 1242 * have a backing object, check and see if we are going to promote 1243 * the data up to an anon during the fault. 1244 */ 1245 1246 if (uobj == NULL) { 1247 uobjpage = PGO_DONTCARE; 1248 promote = TRUE; /* always need anon here */ 1249 } else { 1250 /* assert(uobjpage != PGO_DONTCARE) */ 1251 promote = (access_type & VM_PROT_WRITE) && 1252 UVM_ET_ISCOPYONWRITE(ufi.entry); 1253 } 1254 UVMHIST_LOG(maphist, " case 2 fault: promote=%d, zfill=%d", 1255 promote, (uobj == NULL), 0,0); 1256 1257 /* 1258 * if uobjpage is not null then we do not need to do I/O to get the 1259 * uobjpage. 1260 * 1261 * if uobjpage is null, then we need to unlock and ask the pager to 1262 * get the data for us. once we have the data, we need to reverify 1263 * the state the world. we are currently not holding any resources. 1264 */ 1265 1266 if (uobjpage) { 1267 /* update rusage counters */ 1268 curproc->p_addr->u_stats.p_ru.ru_minflt++; 1269 } else { 1270 /* update rusage counters */ 1271 curproc->p_addr->u_stats.p_ru.ru_majflt++; 1272 1273 /* locked: maps(read), amap(if there), uobj */ 1274 uvmfault_unlockall(&ufi, amap, NULL, NULL); 1275 /* locked: uobj */ 1276 1277 uvmexp.fltget++; 1278 gotpages = 1; 1279 result = uobj->pgops->pgo_get(uobj, 1280 (ufi.orig_rvaddr - ufi.entry->start) + ufi.entry->offset, 1281 &uobjpage, &gotpages, 0, 1282 UVM_ET_ISCOPYONWRITE(ufi.entry) ? 1283 VM_PROT_READ : access_type, 1284 ufi.entry->advice, 0); 1285 1286 /* locked: uobjpage(if result OK) */ 1287 1288 /* 1289 * recover from I/O 1290 */ 1291 1292 if (result != VM_PAGER_OK) { 1293 1294 #ifdef DIAGNOSTIC 1295 if (result == VM_PAGER_PEND) 1296 panic("uvm_fault: pgo_get got PENDing on non-async I/O"); 1297 #endif 1298 1299 if (result == VM_PAGER_AGAIN) { 1300 UVMHIST_LOG(maphist, " pgo_get says TRY AGAIN!",0,0,0,0); 1301 tsleep((caddr_t)&lbolt, PVM, "fltagain2", 0); 1302 goto ReFault; 1303 } 1304 1305 UVMHIST_LOG(maphist, "<- pgo_get failed (code %d)", 1306 result, 0,0,0); 1307 return (KERN_PROTECTION_FAILURE); /* XXX i/o error */ 1308 } 1309 1310 /* locked: uobjpage */ 1311 1312 /* 1313 * re-verify the state of the world by first trying to relock 1314 * the maps. always relock the object. 1315 */ 1316 1317 locked = uvmfault_relock(&ufi); 1318 if (locked && amap) 1319 amap_lock(amap); 1320 simple_lock(&uobj->vmobjlock); 1321 1322 /* locked(locked): maps(read), amap(if !null), uobj, uobjpage */ 1323 /* locked(!locked): uobj, uobjpage */ 1324 1325 /* 1326 * verify that the page has not be released and re-verify 1327 * that amap slot is still free. if there is a problem, 1328 * we unlock and clean up. 1329 */ 1330 1331 if ((uobjpage->flags & PG_RELEASED) != 0 || 1332 (locked && amap && 1333 amap_lookup(&ufi.entry->aref, 1334 ufi.orig_rvaddr - ufi.entry->start))) { 1335 if (locked) 1336 uvmfault_unlockall(&ufi, amap, NULL, NULL); 1337 locked = FALSE; 1338 } 1339 1340 /* 1341 * didn't get the lock? release the page and retry. 1342 */ 1343 1344 if (locked == FALSE) { 1345 1346 UVMHIST_LOG(maphist, 1347 " wasn't able to relock after fault: retry", 1348 0,0,0,0); 1349 if (uobjpage->flags & PG_WANTED) 1350 /* still holding object lock */ 1351 thread_wakeup(uobjpage); 1352 1353 if (uobjpage->flags & PG_RELEASED) { 1354 uvmexp.fltpgrele++; 1355 #ifdef DIAGNOSTIC 1356 if (uobj->pgops->pgo_releasepg == NULL) 1357 panic("uvm_fault: object has no releasepg function"); 1358 #endif 1359 /* frees page */ 1360 if (uobj->pgops->pgo_releasepg(uobjpage,NULL)) 1361 /* unlock if still alive */ 1362 simple_unlock(&uobj->vmobjlock); 1363 goto ReFault; 1364 } 1365 1366 uvm_lock_pageq(); 1367 /* make sure it is in queues */ 1368 uvm_pageactivate(uobjpage); 1369 1370 uvm_unlock_pageq(); 1371 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1372 UVM_PAGE_OWN(uobjpage, NULL); 1373 simple_unlock(&uobj->vmobjlock); 1374 goto ReFault; 1375 1376 } 1377 1378 /* 1379 * we have the data in uobjpage which is PG_BUSY and 1380 * !PG_RELEASED. we are holding object lock (so the page 1381 * can't be released on us). 1382 */ 1383 1384 /* locked: maps(read), amap(if !null), uobj, uobjpage */ 1385 1386 } 1387 1388 /* 1389 * locked: 1390 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj) 1391 */ 1392 1393 /* 1394 * notes: 1395 * - at this point uobjpage can not be NULL 1396 * - at this point uobjpage can not be PG_RELEASED (since we checked 1397 * for it above) 1398 * - at this point uobjpage could be PG_WANTED (handle later) 1399 */ 1400 1401 if (promote == FALSE) { 1402 1403 /* 1404 * we are not promoting. if the mapping is COW ensure that we 1405 * don't give more access than we should (e.g. when doing a read 1406 * fault on a COPYONWRITE mapping we want to map the COW page in 1407 * R/O even though the entry protection could be R/W). 1408 * 1409 * set "pg" to the page we want to map in (uobjpage, usually) 1410 */ 1411 1412 uvmexp.flt_obj++; 1413 if (UVM_ET_ISCOPYONWRITE(ufi.entry)) 1414 enter_prot = enter_prot & ~VM_PROT_WRITE; 1415 pg = uobjpage; /* map in the actual object */ 1416 1417 /* assert(uobjpage != PGO_DONTCARE) */ 1418 1419 /* 1420 * we are faulting directly on the page. be careful 1421 * about writing to loaned pages... 1422 */ 1423 if (uobjpage->loan_count) { 1424 1425 if ((access_type & VM_PROT_WRITE) == 0) { 1426 /* read fault: cap the protection at readonly */ 1427 /* cap! */ 1428 enter_prot = enter_prot & ~VM_PROT_WRITE; 1429 } else { 1430 /* write fault: must break the loan here */ 1431 1432 /* alloc new un-owned page */ 1433 pg = uvm_pagealloc(NULL, 0, NULL); 1434 1435 if (pg == NULL) { 1436 /* 1437 * drop ownership of page, it can't 1438 * be released 1439 * */ 1440 if (uobjpage->flags & PG_WANTED) 1441 thread_wakeup(uobjpage); 1442 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1443 UVM_PAGE_OWN(uobjpage, NULL); 1444 1445 uvm_lock_pageq(); 1446 /* activate: we will need it later */ 1447 uvm_pageactivate(uobjpage); 1448 1449 uvm_unlock_pageq(); 1450 uvmfault_unlockall(&ufi, amap, uobj, 1451 NULL); 1452 UVMHIST_LOG(maphist, 1453 " out of RAM breaking loan, waiting", 0,0,0,0); 1454 uvmexp.fltnoram++; 1455 uvm_wait("flt_noram4"); 1456 goto ReFault; 1457 } 1458 1459 /* 1460 * copy the data from the old page to the new 1461 * one and clear the fake/clean flags on the 1462 * new page (keep it busy). force a reload 1463 * of the old page by clearing it from all 1464 * pmaps. then lock the page queues to 1465 * rename the pages. 1466 */ 1467 uvm_pagecopy(uobjpage, pg); /* old -> new */ 1468 pg->flags &= ~(PG_FAKE|PG_CLEAN); 1469 pmap_page_protect(PMAP_PGARG(uobjpage), 1470 VM_PROT_NONE); 1471 if (uobjpage->flags & PG_WANTED) 1472 thread_wakeup(uobjpage); 1473 /* uobj still locked */ 1474 uobjpage->flags &= ~(PG_WANTED|PG_BUSY); 1475 UVM_PAGE_OWN(uobjpage, NULL); 1476 1477 uvm_lock_pageq(); 1478 offset = uobjpage->offset; 1479 /* remove old page */ 1480 uvm_pagerealloc(uobjpage, NULL, 0); 1481 1482 /* 1483 * at this point we have absolutely no 1484 * control over uobjpage 1485 */ 1486 /* install new page */ 1487 uvm_pagerealloc(pg, uobj, offset); 1488 uvm_unlock_pageq(); 1489 1490 /* 1491 * done! loan is broken and "pg" is 1492 * PG_BUSY. it can now replace uobjpage. 1493 */ 1494 1495 uobjpage = pg; 1496 1497 } /* write fault case */ 1498 } /* if loan_count */ 1499 1500 } else { 1501 1502 /* 1503 * if we are going to promote the data to an anon we 1504 * allocate a blank anon here and plug it into our amap. 1505 */ 1506 #if DIAGNOSTIC 1507 if (amap == NULL) 1508 panic("uvm_fault: want to promote data, but no anon"); 1509 #endif 1510 1511 anon = uvm_analloc(); 1512 if (anon) 1513 pg = uvm_pagealloc(NULL, 0, anon); /* BUSY+CLEAN+FAKE */ 1514 #ifdef __GNUC__ 1515 else 1516 pg = NULL; /* XXX: gcc */ 1517 #endif 1518 1519 /* 1520 * out of memory resources? 1521 */ 1522 if (anon == NULL || pg == NULL) { 1523 1524 /* 1525 * arg! must unbusy our page and fail or sleep. 1526 */ 1527 if (uobjpage != PGO_DONTCARE) { 1528 if (uobjpage->flags & PG_WANTED) 1529 /* still holding object lock */ 1530 thread_wakeup(uobjpage); 1531 1532 uvm_lock_pageq(); 1533 /* make sure it is in queues */ 1534 uvm_pageactivate(uobjpage); 1535 uvm_unlock_pageq(); 1536 /* un-busy! (still locked) */ 1537 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1538 UVM_PAGE_OWN(uobjpage, NULL); 1539 } 1540 1541 /* unlock and fail ... */ 1542 uvmfault_unlockall(&ufi, amap, uobj, NULL); 1543 if (anon == NULL) { 1544 UVMHIST_LOG(maphist, " promote: out of VM", 1545 0,0,0,0); 1546 uvmexp.fltnoanon++; 1547 /* XXX: out of VM */ 1548 return (KERN_RESOURCE_SHORTAGE); 1549 } 1550 UVMHIST_LOG(maphist, " out of RAM, waiting for more", 1551 0,0,0,0); 1552 uvm_anfree(anon); 1553 uvmexp.fltnoram++; 1554 uvm_wait("flt_noram5"); 1555 goto ReFault; 1556 } 1557 1558 /* 1559 * fill in the data 1560 */ 1561 1562 if (uobjpage != PGO_DONTCARE) { 1563 uvmexp.flt_prcopy++; 1564 /* copy page [pg now dirty] */ 1565 uvm_pagecopy(uobjpage, pg); 1566 1567 /* 1568 * promote to shared amap? make sure all sharing 1569 * procs see it 1570 */ 1571 if ((amap_flags(amap) & AMAP_SHARED) != 0) { 1572 pmap_page_protect(PMAP_PGARG(uobjpage), 1573 VM_PROT_NONE); 1574 } 1575 1576 /* 1577 * dispose of uobjpage. it can't be PG_RELEASED 1578 * since we still hold the object lock. drop 1579 * handle to uobj as well. 1580 */ 1581 1582 if (uobjpage->flags & PG_WANTED) 1583 /* still have the obj lock */ 1584 thread_wakeup(uobjpage); 1585 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1586 UVM_PAGE_OWN(uobjpage, NULL); 1587 uvm_lock_pageq(); 1588 uvm_pageactivate(uobjpage); /* put it back */ 1589 uvm_unlock_pageq(); 1590 simple_unlock(&uobj->vmobjlock); 1591 uobj = NULL; 1592 UVMHIST_LOG(maphist, 1593 " promote uobjpage 0x%x to anon/page 0x%x/0x%x", 1594 uobjpage, anon, pg, 0); 1595 1596 } else { 1597 uvmexp.flt_przero++; 1598 uvm_pagezero(pg); /* zero page [pg now dirty] */ 1599 UVMHIST_LOG(maphist," zero fill anon/page 0x%x/0%x", 1600 anon, pg, 0, 0); 1601 } 1602 1603 amap_add(&ufi.entry->aref, ufi.orig_rvaddr - ufi.entry->start, 1604 anon, 0); 1605 1606 } 1607 1608 /* 1609 * locked: 1610 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj) 1611 * 1612 * note: pg is either the uobjpage or the new page in the new anon 1613 */ 1614 1615 /* 1616 * all resources are present. we can now map it in and free our 1617 * resources. 1618 */ 1619 1620 UVMHIST_LOG(maphist, 1621 " MAPPING: case2: pm=0x%x, va=0x%x, pg=0x%x, promote=%d", 1622 ufi.orig_map->pmap, ufi.orig_rvaddr, pg, promote); 1623 pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg), 1624 enter_prot, wired); 1625 1626 uvm_lock_pageq(); 1627 1628 if (fault_type == VM_FAULT_WIRE) { 1629 uvm_pagewire(pg); 1630 } else { 1631 1632 /* activate it */ 1633 uvm_pageactivate(pg); 1634 1635 } 1636 1637 uvm_unlock_pageq(); 1638 1639 if (pg->flags & PG_WANTED) 1640 thread_wakeup(pg); /* lock still held */ 1641 1642 /* 1643 * note that pg can't be PG_RELEASED since we did not drop the object 1644 * lock since the last time we checked. 1645 */ 1646 1647 pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED); 1648 UVM_PAGE_OWN(pg, NULL); 1649 uvmfault_unlockall(&ufi, amap, uobj, NULL); 1650 1651 UVMHIST_LOG(maphist, "<- done (SUCCESS!)",0,0,0,0); 1652 return (KERN_SUCCESS); 1653 } 1654 1655 1656 /* 1657 * uvm_fault_wire: wire down a range of virtual addresses in a map. 1658 * 1659 * => map should be locked by caller? If so how can we call 1660 * uvm_fault? WRONG. 1661 * => XXXCDC: locking here is all screwed up!!! start with 1662 * uvm_map_pageable and fix it. 1663 */ 1664 1665 int 1666 uvm_fault_wire(map, start, end) 1667 vm_map_t map; 1668 vaddr_t start, end; 1669 { 1670 vaddr_t va; 1671 pmap_t pmap; 1672 int rv; 1673 1674 pmap = vm_map_pmap(map); 1675 1676 /* 1677 * call pmap pageable: this tells the pmap layer to lock down these 1678 * page tables. 1679 */ 1680 1681 pmap_pageable(pmap, start, end, FALSE); 1682 1683 /* 1684 * now fault it in page at a time. if the fault fails then we have 1685 * to undo what we have done. note that in uvm_fault VM_PROT_NONE 1686 * is replaced with the max protection if fault_type is VM_FAULT_WIRE. 1687 */ 1688 1689 for (va = start ; va < end ; va += PAGE_SIZE) { 1690 rv = uvm_fault(map, va, VM_FAULT_WIRE, VM_PROT_NONE); 1691 if (rv) { 1692 if (va != start) { 1693 uvm_fault_unwire(map->pmap, start, va); 1694 } 1695 return (rv); 1696 } 1697 } 1698 1699 return (KERN_SUCCESS); 1700 } 1701 1702 /* 1703 * uvm_fault_unwire(): unwire range of virtual space. 1704 * 1705 * => caller holds reference to pmap (via its map) 1706 */ 1707 1708 void 1709 uvm_fault_unwire(pmap, start, end) 1710 struct pmap *pmap; 1711 vaddr_t start, end; 1712 { 1713 vaddr_t va; 1714 paddr_t pa; 1715 struct vm_page *pg; 1716 1717 /* 1718 * we assume that the area we are unwiring has actually been wired 1719 * in the first place. this means that we should be able to extract 1720 * the PAs from the pmap. we also lock out the page daemon so that 1721 * we can call uvm_pageunwire. 1722 */ 1723 1724 uvm_lock_pageq(); 1725 1726 for (va = start; va < end ; va += PAGE_SIZE) { 1727 pa = pmap_extract(pmap, va); 1728 1729 /* XXX: assumes PA 0 cannot be in map */ 1730 if (pa == (paddr_t) 0) { 1731 panic("uvm_fault_unwire: unwiring non-wired memory"); 1732 } 1733 pmap_change_wiring(pmap, va, FALSE); /* tell the pmap */ 1734 pg = PHYS_TO_VM_PAGE(pa); 1735 if (pg) 1736 uvm_pageunwire(pg); 1737 } 1738 1739 uvm_unlock_pageq(); 1740 1741 /* 1742 * now we call pmap_pageable to let the pmap know that the page tables 1743 * in this space no longer need to be wired. 1744 */ 1745 1746 pmap_pageable(pmap, start, end, TRUE); 1747 1748 } 1749