1 /* $NetBSD: uvm_fault.c,v 1.69 2001/09/15 20:36:45 chs Exp $ */ 2 3 /* 4 * 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Charles D. Cranor and 19 * Washington University. 20 * 4. The name of the author may not be used to endorse or promote products 21 * derived from this software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * 34 * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp 35 */ 36 37 #include "opt_uvmhist.h" 38 39 /* 40 * uvm_fault.c: fault handler 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/proc.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/user.h> 50 51 #include <uvm/uvm.h> 52 53 /* 54 * 55 * a word on page faults: 56 * 57 * types of page faults we handle: 58 * 59 * CASE 1: upper layer faults CASE 2: lower layer faults 60 * 61 * CASE 1A CASE 1B CASE 2A CASE 2B 62 * read/write1 write>1 read/write +-cow_write/zero 63 * | | | | 64 * +--|--+ +--|--+ +-----+ + | + | +-----+ 65 * amap | V | | ----------->new| | | | ^ | 66 * +-----+ +-----+ +-----+ + | + | +--|--+ 67 * | | | 68 * +-----+ +-----+ +--|--+ | +--|--+ 69 * uobj | d/c | | d/c | | V | +----| | 70 * +-----+ +-----+ +-----+ +-----+ 71 * 72 * d/c = don't care 73 * 74 * case [0]: layerless fault 75 * no amap or uobj is present. this is an error. 76 * 77 * case [1]: upper layer fault [anon active] 78 * 1A: [read] or [write with anon->an_ref == 1] 79 * I/O takes place in top level anon and uobj is not touched. 80 * 1B: [write with anon->an_ref > 1] 81 * new anon is alloc'd and data is copied off ["COW"] 82 * 83 * case [2]: lower layer fault [uobj] 84 * 2A: [read on non-NULL uobj] or [write to non-copy_on_write area] 85 * I/O takes place directly in object. 86 * 2B: [write to copy_on_write] or [read on NULL uobj] 87 * data is "promoted" from uobj to a new anon. 88 * if uobj is null, then we zero fill. 89 * 90 * we follow the standard UVM locking protocol ordering: 91 * 92 * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ) 93 * we hold a PG_BUSY page if we unlock for I/O 94 * 95 * 96 * the code is structured as follows: 97 * 98 * - init the "IN" params in the ufi structure 99 * ReFault: 100 * - do lookups [locks maps], check protection, handle needs_copy 101 * - check for case 0 fault (error) 102 * - establish "range" of fault 103 * - if we have an amap lock it and extract the anons 104 * - if sequential advice deactivate pages behind us 105 * - at the same time check pmap for unmapped areas and anon for pages 106 * that we could map in (and do map it if found) 107 * - check object for resident pages that we could map in 108 * - if (case 2) goto Case2 109 * - >>> handle case 1 110 * - ensure source anon is resident in RAM 111 * - if case 1B alloc new anon and copy from source 112 * - map the correct page in 113 * Case2: 114 * - >>> handle case 2 115 * - ensure source page is resident (if uobj) 116 * - if case 2B alloc new anon and copy from source (could be zero 117 * fill if uobj == NULL) 118 * - map the correct page in 119 * - done! 120 * 121 * note on paging: 122 * if we have to do I/O we place a PG_BUSY page in the correct object, 123 * unlock everything, and do the I/O. when I/O is done we must reverify 124 * the state of the world before assuming that our data structures are 125 * valid. [because mappings could change while the map is unlocked] 126 * 127 * alternative 1: unbusy the page in question and restart the page fault 128 * from the top (ReFault). this is easy but does not take advantage 129 * of the information that we already have from our previous lookup, 130 * although it is possible that the "hints" in the vm_map will help here. 131 * 132 * alternative 2: the system already keeps track of a "version" number of 133 * a map. [i.e. every time you write-lock a map (e.g. to change a 134 * mapping) you bump the version number up by one...] so, we can save 135 * the version number of the map before we release the lock and start I/O. 136 * then when I/O is done we can relock and check the version numbers 137 * to see if anything changed. this might save us some over 1 because 138 * we don't have to unbusy the page and may be less compares(?). 139 * 140 * alternative 3: put in backpointers or a way to "hold" part of a map 141 * in place while I/O is in progress. this could be complex to 142 * implement (especially with structures like amap that can be referenced 143 * by multiple map entries, and figuring out what should wait could be 144 * complex as well...). 145 * 146 * given that we are not currently multiprocessor or multithreaded we might 147 * as well choose alternative 2 now. maybe alternative 3 would be useful 148 * in the future. XXX keep in mind for future consideration//rechecking. 149 */ 150 151 /* 152 * local data structures 153 */ 154 155 struct uvm_advice { 156 int advice; 157 int nback; 158 int nforw; 159 }; 160 161 /* 162 * page range array: 163 * note: index in array must match "advice" value 164 * XXX: borrowed numbers from freebsd. do they work well for us? 165 */ 166 167 static struct uvm_advice uvmadvice[] = { 168 { MADV_NORMAL, 3, 4 }, 169 { MADV_RANDOM, 0, 0 }, 170 { MADV_SEQUENTIAL, 8, 7}, 171 }; 172 173 #define UVM_MAXRANGE 16 /* must be MAX() of nback+nforw+1 */ 174 175 /* 176 * private prototypes 177 */ 178 179 static void uvmfault_amapcopy __P((struct uvm_faultinfo *)); 180 static __inline void uvmfault_anonflush __P((struct vm_anon **, int)); 181 182 /* 183 * inline functions 184 */ 185 186 /* 187 * uvmfault_anonflush: try and deactivate pages in specified anons 188 * 189 * => does not have to deactivate page if it is busy 190 */ 191 192 static __inline void 193 uvmfault_anonflush(anons, n) 194 struct vm_anon **anons; 195 int n; 196 { 197 int lcv; 198 struct vm_page *pg; 199 200 for (lcv = 0 ; lcv < n ; lcv++) { 201 if (anons[lcv] == NULL) 202 continue; 203 simple_lock(&anons[lcv]->an_lock); 204 pg = anons[lcv]->u.an_page; 205 if (pg && (pg->flags & PG_BUSY) == 0 && pg->loan_count == 0) { 206 uvm_lock_pageq(); 207 if (pg->wire_count == 0) { 208 pmap_clear_reference(pg); 209 uvm_pagedeactivate(pg); 210 } 211 uvm_unlock_pageq(); 212 } 213 simple_unlock(&anons[lcv]->an_lock); 214 } 215 } 216 217 /* 218 * normal functions 219 */ 220 221 /* 222 * uvmfault_amapcopy: clear "needs_copy" in a map. 223 * 224 * => called with VM data structures unlocked (usually, see below) 225 * => we get a write lock on the maps and clear needs_copy for a VA 226 * => if we are out of RAM we sleep (waiting for more) 227 */ 228 229 static void 230 uvmfault_amapcopy(ufi) 231 struct uvm_faultinfo *ufi; 232 { 233 for (;;) { 234 235 /* 236 * no mapping? give up. 237 */ 238 239 if (uvmfault_lookup(ufi, TRUE) == FALSE) 240 return; 241 242 /* 243 * copy if needed. 244 */ 245 246 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) 247 amap_copy(ufi->map, ufi->entry, M_NOWAIT, TRUE, 248 ufi->orig_rvaddr, ufi->orig_rvaddr + 1); 249 250 /* 251 * didn't work? must be out of RAM. unlock and sleep. 252 */ 253 254 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { 255 uvmfault_unlockmaps(ufi, TRUE); 256 uvm_wait("fltamapcopy"); 257 continue; 258 } 259 260 /* 261 * got it! unlock and return. 262 */ 263 264 uvmfault_unlockmaps(ufi, TRUE); 265 return; 266 } 267 /*NOTREACHED*/ 268 } 269 270 /* 271 * uvmfault_anonget: get data in an anon into a non-busy, non-released 272 * page in that anon. 273 * 274 * => maps, amap, and anon locked by caller. 275 * => if we fail (result != 0) we unlock everything. 276 * => if we are successful, we return with everything still locked. 277 * => we don't move the page on the queues [gets moved later] 278 * => if we allocate a new page [we_own], it gets put on the queues. 279 * either way, the result is that the page is on the queues at return time 280 * => for pages which are on loan from a uvm_object (and thus are not 281 * owned by the anon): if successful, we return with the owning object 282 * locked. the caller must unlock this object when it unlocks everything 283 * else. 284 */ 285 286 int 287 uvmfault_anonget(ufi, amap, anon) 288 struct uvm_faultinfo *ufi; 289 struct vm_amap *amap; 290 struct vm_anon *anon; 291 { 292 boolean_t we_own; /* we own anon's page? */ 293 boolean_t locked; /* did we relock? */ 294 struct vm_page *pg; 295 int error; 296 UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist); 297 298 LOCK_ASSERT(simple_lock_held(&anon->an_lock)); 299 300 error = 0; 301 uvmexp.fltanget++; 302 /* bump rusage counters */ 303 if (anon->u.an_page) 304 curproc->p_addr->u_stats.p_ru.ru_minflt++; 305 else 306 curproc->p_addr->u_stats.p_ru.ru_majflt++; 307 308 /* 309 * loop until we get it, or fail. 310 */ 311 312 for (;;) { 313 we_own = FALSE; /* TRUE if we set PG_BUSY on a page */ 314 pg = anon->u.an_page; 315 316 /* 317 * if there is a resident page and it is loaned, then anon 318 * may not own it. call out to uvm_anon_lockpage() to ensure 319 * the real owner of the page has been identified and locked. 320 */ 321 322 if (pg && pg->loan_count) 323 pg = uvm_anon_lockloanpg(anon); 324 325 /* 326 * page there? make sure it is not busy/released. 327 */ 328 329 if (pg) { 330 331 /* 332 * at this point, if the page has a uobject [meaning 333 * we have it on loan], then that uobject is locked 334 * by us! if the page is busy, we drop all the 335 * locks (including uobject) and try again. 336 */ 337 338 if ((pg->flags & PG_BUSY) == 0) { 339 UVMHIST_LOG(maphist, "<- OK",0,0,0,0); 340 return (0); 341 } 342 pg->flags |= PG_WANTED; 343 uvmexp.fltpgwait++; 344 345 /* 346 * the last unlock must be an atomic unlock+wait on 347 * the owner of page 348 */ 349 350 if (pg->uobject) { /* owner is uobject ? */ 351 uvmfault_unlockall(ufi, amap, NULL, anon); 352 UVMHIST_LOG(maphist, " unlock+wait on uobj",0, 353 0,0,0); 354 UVM_UNLOCK_AND_WAIT(pg, 355 &pg->uobject->vmobjlock, 356 FALSE, "anonget1",0); 357 } else { 358 /* anon owns page */ 359 uvmfault_unlockall(ufi, amap, NULL, NULL); 360 UVMHIST_LOG(maphist, " unlock+wait on anon",0, 361 0,0,0); 362 UVM_UNLOCK_AND_WAIT(pg,&anon->an_lock,0, 363 "anonget2",0); 364 } 365 } else { 366 367 /* 368 * no page, we must try and bring it in. 369 */ 370 371 pg = uvm_pagealloc(NULL, 0, anon, 0); 372 if (pg == NULL) { /* out of RAM. */ 373 uvmfault_unlockall(ufi, amap, NULL, anon); 374 uvmexp.fltnoram++; 375 UVMHIST_LOG(maphist, " noram -- UVM_WAIT",0, 376 0,0,0); 377 uvm_wait("flt_noram1"); 378 } else { 379 /* we set the PG_BUSY bit */ 380 we_own = TRUE; 381 uvmfault_unlockall(ufi, amap, NULL, anon); 382 383 /* 384 * we are passing a PG_BUSY+PG_FAKE+PG_CLEAN 385 * page into the uvm_swap_get function with 386 * all data structures unlocked. note that 387 * it is ok to read an_swslot here because 388 * we hold PG_BUSY on the page. 389 */ 390 uvmexp.pageins++; 391 error = uvm_swap_get(pg, anon->an_swslot, 392 PGO_SYNCIO); 393 394 /* 395 * we clean up after the i/o below in the 396 * "we_own" case 397 */ 398 } 399 } 400 401 /* 402 * now relock and try again 403 */ 404 405 locked = uvmfault_relock(ufi); 406 if (locked && amap != NULL) { 407 amap_lock(amap); 408 } 409 if (locked || we_own) 410 simple_lock(&anon->an_lock); 411 412 /* 413 * if we own the page (i.e. we set PG_BUSY), then we need 414 * to clean up after the I/O. there are three cases to 415 * consider: 416 * [1] page released during I/O: free anon and ReFault. 417 * [2] I/O not OK. free the page and cause the fault 418 * to fail. 419 * [3] I/O OK! activate the page and sync with the 420 * non-we_own case (i.e. drop anon lock if not locked). 421 */ 422 423 if (we_own) { 424 if (pg->flags & PG_WANTED) { 425 wakeup(pg); 426 } 427 if (error) { 428 /* remove page from anon */ 429 anon->u.an_page = NULL; 430 431 /* 432 * remove the swap slot from the anon 433 * and mark the anon as having no real slot. 434 * don't free the swap slot, thus preventing 435 * it from being used again. 436 */ 437 438 uvm_swap_markbad(anon->an_swslot, 1); 439 anon->an_swslot = SWSLOT_BAD; 440 441 /* 442 * note: page was never !PG_BUSY, so it 443 * can't be mapped and thus no need to 444 * pmap_page_protect it... 445 */ 446 447 uvm_lock_pageq(); 448 uvm_pagefree(pg); 449 uvm_unlock_pageq(); 450 451 if (locked) 452 uvmfault_unlockall(ufi, amap, NULL, 453 anon); 454 else 455 simple_unlock(&anon->an_lock); 456 UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0); 457 return error; 458 } 459 460 /* 461 * we've successfully read the page, activate it. 462 */ 463 464 uvm_lock_pageq(); 465 uvm_pageactivate(pg); 466 uvm_unlock_pageq(); 467 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); 468 UVM_PAGE_OWN(pg, NULL); 469 if (!locked) 470 simple_unlock(&anon->an_lock); 471 } 472 473 /* 474 * we were not able to relock. restart fault. 475 */ 476 477 if (!locked) { 478 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 479 return (ERESTART); 480 } 481 482 /* 483 * verify no one has touched the amap and moved the anon on us. 484 */ 485 486 if (ufi != NULL && 487 amap_lookup(&ufi->entry->aref, 488 ufi->orig_rvaddr - ufi->entry->start) != anon) { 489 490 uvmfault_unlockall(ufi, amap, NULL, anon); 491 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 492 return (ERESTART); 493 } 494 495 /* 496 * try it again! 497 */ 498 499 uvmexp.fltanretry++; 500 continue; 501 } 502 /*NOTREACHED*/ 503 } 504 505 /* 506 * F A U L T - m a i n e n t r y p o i n t 507 */ 508 509 /* 510 * uvm_fault: page fault handler 511 * 512 * => called from MD code to resolve a page fault 513 * => VM data structures usually should be unlocked. however, it is 514 * possible to call here with the main map locked if the caller 515 * gets a write lock, sets it recusive, and then calls us (c.f. 516 * uvm_map_pageable). this should be avoided because it keeps 517 * the map locked off during I/O. 518 * => MUST NEVER BE CALLED IN INTERRUPT CONTEXT 519 */ 520 521 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 522 ~VM_PROT_WRITE : VM_PROT_ALL) 523 524 int 525 uvm_fault(orig_map, vaddr, fault_type, access_type) 526 struct vm_map *orig_map; 527 vaddr_t vaddr; 528 vm_fault_t fault_type; 529 vm_prot_t access_type; 530 { 531 struct uvm_faultinfo ufi; 532 vm_prot_t enter_prot; 533 boolean_t wired, narrow, promote, locked, shadowed; 534 int npages, nback, nforw, centeridx, error, lcv, gotpages; 535 vaddr_t startva, objaddr, currva, offset, uoff; 536 paddr_t pa; 537 struct vm_amap *amap; 538 struct uvm_object *uobj; 539 struct vm_anon *anons_store[UVM_MAXRANGE], **anons, *anon, *oanon; 540 struct vm_page *pages[UVM_MAXRANGE], *pg, *uobjpage; 541 UVMHIST_FUNC("uvm_fault"); UVMHIST_CALLED(maphist); 542 543 UVMHIST_LOG(maphist, "(map=0x%x, vaddr=0x%x, ft=%d, at=%d)", 544 orig_map, vaddr, fault_type, access_type); 545 546 anon = NULL; 547 pg = NULL; 548 549 uvmexp.faults++; /* XXX: locking? */ 550 551 /* 552 * init the IN parameters in the ufi 553 */ 554 555 ufi.orig_map = orig_map; 556 ufi.orig_rvaddr = trunc_page(vaddr); 557 ufi.orig_size = PAGE_SIZE; /* can't get any smaller than this */ 558 if (fault_type == VM_FAULT_WIRE) 559 narrow = TRUE; /* don't look for neighborhood 560 * pages on wire */ 561 else 562 narrow = FALSE; /* normal fault */ 563 564 /* 565 * "goto ReFault" means restart the page fault from ground zero. 566 */ 567 ReFault: 568 569 /* 570 * lookup and lock the maps 571 */ 572 573 if (uvmfault_lookup(&ufi, FALSE) == FALSE) { 574 UVMHIST_LOG(maphist, "<- no mapping @ 0x%x", vaddr, 0,0,0); 575 return (EFAULT); 576 } 577 /* locked: maps(read) */ 578 579 #ifdef DIAGNOSTIC 580 if ((ufi.map->flags & VM_MAP_PAGEABLE) == 0) { 581 printf("Page fault on non-pageable map:\n"); 582 printf("ufi.map = %p\n", ufi.map); 583 printf("ufi.orig_map = %p\n", ufi.orig_map); 584 printf("ufi.orig_rvaddr = 0x%lx\n", (u_long) ufi.orig_rvaddr); 585 panic("uvm_fault: (ufi.map->flags & VM_MAP_PAGEABLE) == 0"); 586 } 587 #endif 588 589 /* 590 * check protection 591 */ 592 593 if ((ufi.entry->protection & access_type) != access_type) { 594 UVMHIST_LOG(maphist, 595 "<- protection failure (prot=0x%x, access=0x%x)", 596 ufi.entry->protection, access_type, 0, 0); 597 uvmfault_unlockmaps(&ufi, FALSE); 598 return EACCES; 599 } 600 601 /* 602 * "enter_prot" is the protection we want to enter the page in at. 603 * for certain pages (e.g. copy-on-write pages) this protection can 604 * be more strict than ufi.entry->protection. "wired" means either 605 * the entry is wired or we are fault-wiring the pg. 606 */ 607 608 enter_prot = ufi.entry->protection; 609 wired = VM_MAPENT_ISWIRED(ufi.entry) || (fault_type == VM_FAULT_WIRE); 610 if (wired) 611 access_type = enter_prot; /* full access for wired */ 612 613 /* 614 * handle "needs_copy" case. if we need to copy the amap we will 615 * have to drop our readlock and relock it with a write lock. (we 616 * need a write lock to change anything in a map entry [e.g. 617 * needs_copy]). 618 */ 619 620 if (UVM_ET_ISNEEDSCOPY(ufi.entry)) { 621 if ((access_type & VM_PROT_WRITE) || 622 (ufi.entry->object.uvm_obj == NULL)) { 623 /* need to clear */ 624 UVMHIST_LOG(maphist, 625 " need to clear needs_copy and refault",0,0,0,0); 626 uvmfault_unlockmaps(&ufi, FALSE); 627 uvmfault_amapcopy(&ufi); 628 uvmexp.fltamcopy++; 629 goto ReFault; 630 631 } else { 632 633 /* 634 * ensure that we pmap_enter page R/O since 635 * needs_copy is still true 636 */ 637 enter_prot &= ~VM_PROT_WRITE; 638 639 } 640 } 641 642 /* 643 * identify the players 644 */ 645 646 amap = ufi.entry->aref.ar_amap; /* top layer */ 647 uobj = ufi.entry->object.uvm_obj; /* bottom layer */ 648 649 /* 650 * check for a case 0 fault. if nothing backing the entry then 651 * error now. 652 */ 653 654 if (amap == NULL && uobj == NULL) { 655 uvmfault_unlockmaps(&ufi, FALSE); 656 UVMHIST_LOG(maphist,"<- no backing store, no overlay",0,0,0,0); 657 return (EFAULT); 658 } 659 660 /* 661 * establish range of interest based on advice from mapper 662 * and then clip to fit map entry. note that we only want 663 * to do this the first time through the fault. if we 664 * ReFault we will disable this by setting "narrow" to true. 665 */ 666 667 if (narrow == FALSE) { 668 669 /* wide fault (!narrow) */ 670 KASSERT(uvmadvice[ufi.entry->advice].advice == 671 ufi.entry->advice); 672 nback = MIN(uvmadvice[ufi.entry->advice].nback, 673 (ufi.orig_rvaddr - ufi.entry->start) >> PAGE_SHIFT); 674 startva = ufi.orig_rvaddr - (nback << PAGE_SHIFT); 675 nforw = MIN(uvmadvice[ufi.entry->advice].nforw, 676 ((ufi.entry->end - ufi.orig_rvaddr) >> 677 PAGE_SHIFT) - 1); 678 /* 679 * note: "-1" because we don't want to count the 680 * faulting page as forw 681 */ 682 npages = nback + nforw + 1; 683 centeridx = nback; 684 685 narrow = TRUE; /* ensure only once per-fault */ 686 687 } else { 688 689 /* narrow fault! */ 690 nback = nforw = 0; 691 startva = ufi.orig_rvaddr; 692 npages = 1; 693 centeridx = 0; 694 695 } 696 697 /* locked: maps(read) */ 698 UVMHIST_LOG(maphist, " narrow=%d, back=%d, forw=%d, startva=0x%x", 699 narrow, nback, nforw, startva); 700 UVMHIST_LOG(maphist, " entry=0x%x, amap=0x%x, obj=0x%x", ufi.entry, 701 amap, uobj, 0); 702 703 /* 704 * if we've got an amap, lock it and extract current anons. 705 */ 706 707 if (amap) { 708 amap_lock(amap); 709 anons = anons_store; 710 amap_lookups(&ufi.entry->aref, startva - ufi.entry->start, 711 anons, npages); 712 } else { 713 anons = NULL; /* to be safe */ 714 } 715 716 /* locked: maps(read), amap(if there) */ 717 718 /* 719 * for MADV_SEQUENTIAL mappings we want to deactivate the back pages 720 * now and then forget about them (for the rest of the fault). 721 */ 722 723 if (ufi.entry->advice == MADV_SEQUENTIAL) { 724 725 UVMHIST_LOG(maphist, " MADV_SEQUENTIAL: flushing backpages", 726 0,0,0,0); 727 /* flush back-page anons? */ 728 if (amap) 729 uvmfault_anonflush(anons, nback); 730 731 /* flush object? */ 732 if (uobj) { 733 objaddr = 734 (startva - ufi.entry->start) + ufi.entry->offset; 735 simple_lock(&uobj->vmobjlock); 736 (void) (uobj->pgops->pgo_put)(uobj, objaddr, objaddr + 737 (nback << PAGE_SHIFT), PGO_DEACTIVATE); 738 } 739 740 /* now forget about the backpages */ 741 if (amap) 742 anons += nback; 743 startva += (nback << PAGE_SHIFT); 744 npages -= nback; 745 nback = centeridx = 0; 746 } 747 748 /* locked: maps(read), amap(if there) */ 749 750 /* 751 * map in the backpages and frontpages we found in the amap in hopes 752 * of preventing future faults. we also init the pages[] array as 753 * we go. 754 */ 755 756 currva = startva; 757 shadowed = FALSE; 758 for (lcv = 0 ; lcv < npages ; lcv++, currva += PAGE_SIZE) { 759 760 /* 761 * dont play with VAs that are already mapped 762 * except for center) 763 */ 764 if (lcv != centeridx && 765 pmap_extract(ufi.orig_map->pmap, currva, &pa)) { 766 pages[lcv] = PGO_DONTCARE; 767 continue; 768 } 769 770 /* 771 * unmapped or center page. check if any anon at this level. 772 */ 773 if (amap == NULL || anons[lcv] == NULL) { 774 pages[lcv] = NULL; 775 continue; 776 } 777 778 /* 779 * check for present page and map if possible. re-activate it. 780 */ 781 782 pages[lcv] = PGO_DONTCARE; 783 if (lcv == centeridx) { /* save center for later! */ 784 shadowed = TRUE; 785 continue; 786 } 787 anon = anons[lcv]; 788 simple_lock(&anon->an_lock); 789 /* ignore loaned pages */ 790 if (anon->u.an_page && anon->u.an_page->loan_count == 0 && 791 (anon->u.an_page->flags & PG_BUSY) == 0) { 792 uvm_lock_pageq(); 793 uvm_pageactivate(anon->u.an_page); 794 uvm_unlock_pageq(); 795 UVMHIST_LOG(maphist, 796 " MAPPING: n anon: pm=0x%x, va=0x%x, pg=0x%x", 797 ufi.orig_map->pmap, currva, anon->u.an_page, 0); 798 uvmexp.fltnamap++; 799 800 /* 801 * Since this isn't the page that's actually faulting, 802 * ignore pmap_enter() failures; it's not critical 803 * that we enter these right now. 804 */ 805 806 (void) pmap_enter(ufi.orig_map->pmap, currva, 807 VM_PAGE_TO_PHYS(anon->u.an_page), 808 (anon->an_ref > 1) ? (enter_prot & ~VM_PROT_WRITE) : 809 enter_prot, 810 PMAP_CANFAIL | 811 (VM_MAPENT_ISWIRED(ufi.entry) ? PMAP_WIRED : 0)); 812 } 813 simple_unlock(&anon->an_lock); 814 pmap_update(ufi.orig_map->pmap); 815 } 816 817 /* locked: maps(read), amap(if there) */ 818 /* (shadowed == TRUE) if there is an anon at the faulting address */ 819 UVMHIST_LOG(maphist, " shadowed=%d, will_get=%d", shadowed, 820 (uobj && shadowed == FALSE),0,0); 821 822 /* 823 * note that if we are really short of RAM we could sleep in the above 824 * call to pmap_enter with everything locked. bad? 825 * 826 * XXX Actually, that is bad; pmap_enter() should just fail in that 827 * XXX case. --thorpej 828 */ 829 830 /* 831 * if the desired page is not shadowed by the amap and we have a 832 * backing object, then we check to see if the backing object would 833 * prefer to handle the fault itself (rather than letting us do it 834 * with the usual pgo_get hook). the backing object signals this by 835 * providing a pgo_fault routine. 836 */ 837 838 if (uobj && shadowed == FALSE && uobj->pgops->pgo_fault != NULL) { 839 simple_lock(&uobj->vmobjlock); 840 841 /* locked: maps(read), amap (if there), uobj */ 842 error = uobj->pgops->pgo_fault(&ufi, startva, pages, npages, 843 centeridx, fault_type, access_type, PGO_LOCKED|PGO_SYNCIO); 844 845 /* locked: nothing, pgo_fault has unlocked everything */ 846 847 if (error == ERESTART) 848 goto ReFault; /* try again! */ 849 /* 850 * object fault routine responsible for pmap_update(). 851 */ 852 return error; 853 } 854 855 /* 856 * now, if the desired page is not shadowed by the amap and we have 857 * a backing object that does not have a special fault routine, then 858 * we ask (with pgo_get) the object for resident pages that we care 859 * about and attempt to map them in. we do not let pgo_get block 860 * (PGO_LOCKED). 861 */ 862 863 if (uobj && shadowed == FALSE) { 864 simple_lock(&uobj->vmobjlock); 865 866 /* locked (!shadowed): maps(read), amap (if there), uobj */ 867 /* 868 * the following call to pgo_get does _not_ change locking state 869 */ 870 871 uvmexp.fltlget++; 872 gotpages = npages; 873 (void) uobj->pgops->pgo_get(uobj, ufi.entry->offset + 874 (startva - ufi.entry->start), 875 pages, &gotpages, centeridx, 876 access_type & MASK(ufi.entry), 877 ufi.entry->advice, PGO_LOCKED); 878 879 /* 880 * check for pages to map, if we got any 881 */ 882 883 uobjpage = NULL; 884 885 if (gotpages) { 886 currva = startva; 887 for (lcv = 0; lcv < npages; 888 lcv++, currva += PAGE_SIZE) { 889 if (pages[lcv] == NULL || 890 pages[lcv] == PGO_DONTCARE) { 891 continue; 892 } 893 894 /* 895 * if center page is resident and not 896 * PG_BUSY|PG_RELEASED then pgo_get 897 * made it PG_BUSY for us and gave 898 * us a handle to it. remember this 899 * page as "uobjpage." (for later use). 900 */ 901 902 if (lcv == centeridx) { 903 uobjpage = pages[lcv]; 904 UVMHIST_LOG(maphist, " got uobjpage " 905 "(0x%x) with locked get", 906 uobjpage, 0,0,0); 907 continue; 908 } 909 910 /* 911 * calling pgo_get with PGO_LOCKED returns us 912 * pages which are neither busy nor released, 913 * so we don't need to check for this. 914 * we can just directly enter the pages. 915 */ 916 917 uvm_lock_pageq(); 918 uvm_pageactivate(pages[lcv]); 919 uvm_unlock_pageq(); 920 UVMHIST_LOG(maphist, 921 " MAPPING: n obj: pm=0x%x, va=0x%x, pg=0x%x", 922 ufi.orig_map->pmap, currva, pages[lcv], 0); 923 uvmexp.fltnomap++; 924 925 /* 926 * Since this page isn't the page that's 927 * actually fauling, ignore pmap_enter() 928 * failures; it's not critical that we 929 * enter these right now. 930 */ 931 932 (void) pmap_enter(ufi.orig_map->pmap, currva, 933 VM_PAGE_TO_PHYS(pages[lcv]), 934 pages[lcv]->flags & PG_RDONLY ? 935 VM_PROT_READ : enter_prot & MASK(ufi.entry), 936 PMAP_CANFAIL | 937 (wired ? PMAP_WIRED : 0)); 938 939 /* 940 * NOTE: page can't be PG_WANTED or PG_RELEASED 941 * because we've held the lock the whole time 942 * we've had the handle. 943 */ 944 945 pages[lcv]->flags &= ~(PG_BUSY); 946 UVM_PAGE_OWN(pages[lcv], NULL); 947 } 948 pmap_update(ufi.orig_map->pmap); 949 } 950 } else { 951 uobjpage = NULL; 952 } 953 954 /* locked (shadowed): maps(read), amap */ 955 /* locked (!shadowed): maps(read), amap(if there), 956 uobj(if !null), uobjpage(if !null) */ 957 958 /* 959 * note that at this point we are done with any front or back pages. 960 * we are now going to focus on the center page (i.e. the one we've 961 * faulted on). if we have faulted on the top (anon) layer 962 * [i.e. case 1], then the anon we want is anons[centeridx] (we have 963 * not touched it yet). if we have faulted on the bottom (uobj) 964 * layer [i.e. case 2] and the page was both present and available, 965 * then we've got a pointer to it as "uobjpage" and we've already 966 * made it BUSY. 967 */ 968 969 /* 970 * there are four possible cases we must address: 1A, 1B, 2A, and 2B 971 */ 972 973 /* 974 * redirect case 2: if we are not shadowed, go to case 2. 975 */ 976 977 if (shadowed == FALSE) 978 goto Case2; 979 980 /* locked: maps(read), amap */ 981 982 /* 983 * handle case 1: fault on an anon in our amap 984 */ 985 986 anon = anons[centeridx]; 987 UVMHIST_LOG(maphist, " case 1 fault: anon=0x%x", anon, 0,0,0); 988 simple_lock(&anon->an_lock); 989 990 /* locked: maps(read), amap, anon */ 991 992 /* 993 * no matter if we have case 1A or case 1B we are going to need to 994 * have the anon's memory resident. ensure that now. 995 */ 996 997 /* 998 * let uvmfault_anonget do the dirty work. 999 * if it fails (!OK) it will unlock everything for us. 1000 * if it succeeds, locks are still valid and locked. 1001 * also, if it is OK, then the anon's page is on the queues. 1002 * if the page is on loan from a uvm_object, then anonget will 1003 * lock that object for us if it does not fail. 1004 */ 1005 1006 error = uvmfault_anonget(&ufi, amap, anon); 1007 switch (error) { 1008 case 0: 1009 break; 1010 1011 case ERESTART: 1012 goto ReFault; 1013 1014 case EAGAIN: 1015 tsleep(&lbolt, PVM, "fltagain1", 0); 1016 goto ReFault; 1017 1018 default: 1019 return error; 1020 } 1021 1022 /* 1023 * uobj is non null if the page is on loan from an object (i.e. uobj) 1024 */ 1025 1026 uobj = anon->u.an_page->uobject; /* locked by anonget if !NULL */ 1027 1028 /* locked: maps(read), amap, anon, uobj(if one) */ 1029 1030 /* 1031 * special handling for loaned pages 1032 */ 1033 1034 if (anon->u.an_page->loan_count) { 1035 1036 if ((access_type & VM_PROT_WRITE) == 0) { 1037 1038 /* 1039 * for read faults on loaned pages we just cap the 1040 * protection at read-only. 1041 */ 1042 1043 enter_prot = enter_prot & ~VM_PROT_WRITE; 1044 1045 } else { 1046 /* 1047 * note that we can't allow writes into a loaned page! 1048 * 1049 * if we have a write fault on a loaned page in an 1050 * anon then we need to look at the anon's ref count. 1051 * if it is greater than one then we are going to do 1052 * a normal copy-on-write fault into a new anon (this 1053 * is not a problem). however, if the reference count 1054 * is one (a case where we would normally allow a 1055 * write directly to the page) then we need to kill 1056 * the loan before we continue. 1057 */ 1058 1059 /* >1 case is already ok */ 1060 if (anon->an_ref == 1) { 1061 1062 /* get new un-owned replacement page */ 1063 pg = uvm_pagealloc(NULL, 0, NULL, 0); 1064 if (pg == NULL) { 1065 uvmfault_unlockall(&ufi, amap, uobj, 1066 anon); 1067 uvm_wait("flt_noram2"); 1068 goto ReFault; 1069 } 1070 1071 /* 1072 * copy data, kill loan, and drop uobj lock 1073 * (if any) 1074 */ 1075 /* copy old -> new */ 1076 uvm_pagecopy(anon->u.an_page, pg); 1077 1078 /* force reload */ 1079 pmap_page_protect(anon->u.an_page, 1080 VM_PROT_NONE); 1081 uvm_lock_pageq(); /* KILL loan */ 1082 if (uobj) 1083 /* if we were loaning */ 1084 anon->u.an_page->loan_count--; 1085 anon->u.an_page->uanon = NULL; 1086 /* in case we owned */ 1087 anon->u.an_page->pqflags &= ~PQ_ANON; 1088 uvm_unlock_pageq(); 1089 if (uobj) { 1090 simple_unlock(&uobj->vmobjlock); 1091 uobj = NULL; 1092 } 1093 1094 /* install new page in anon */ 1095 anon->u.an_page = pg; 1096 pg->uanon = anon; 1097 pg->pqflags |= PQ_ANON; 1098 pg->flags &= ~(PG_BUSY|PG_FAKE); 1099 UVM_PAGE_OWN(pg, NULL); 1100 1101 /* done! */ 1102 } /* ref == 1 */ 1103 } /* write fault */ 1104 } /* loan count */ 1105 1106 /* 1107 * if we are case 1B then we will need to allocate a new blank 1108 * anon to transfer the data into. note that we have a lock 1109 * on anon, so no one can busy or release the page until we are done. 1110 * also note that the ref count can't drop to zero here because 1111 * it is > 1 and we are only dropping one ref. 1112 * 1113 * in the (hopefully very rare) case that we are out of RAM we 1114 * will unlock, wait for more RAM, and refault. 1115 * 1116 * if we are out of anon VM we kill the process (XXX: could wait?). 1117 */ 1118 1119 if ((access_type & VM_PROT_WRITE) != 0 && anon->an_ref > 1) { 1120 1121 UVMHIST_LOG(maphist, " case 1B: COW fault",0,0,0,0); 1122 uvmexp.flt_acow++; 1123 oanon = anon; /* oanon = old, locked anon */ 1124 anon = uvm_analloc(); 1125 if (anon) { 1126 /* new anon is locked! */ 1127 pg = uvm_pagealloc(NULL, 0, anon, 0); 1128 } 1129 1130 /* check for out of RAM */ 1131 if (anon == NULL || pg == NULL) { 1132 if (anon) { 1133 anon->an_ref--; 1134 simple_unlock(&anon->an_lock); 1135 uvm_anfree(anon); 1136 } 1137 uvmfault_unlockall(&ufi, amap, uobj, oanon); 1138 KASSERT(uvmexp.swpgonly <= uvmexp.swpages); 1139 if (anon == NULL || uvmexp.swpgonly == uvmexp.swpages) { 1140 UVMHIST_LOG(maphist, 1141 "<- failed. out of VM",0,0,0,0); 1142 uvmexp.fltnoanon++; 1143 return ENOMEM; 1144 } 1145 1146 uvmexp.fltnoram++; 1147 uvm_wait("flt_noram3"); /* out of RAM, wait for more */ 1148 goto ReFault; 1149 } 1150 1151 /* got all resources, replace anon with nanon */ 1152 uvm_pagecopy(oanon->u.an_page, pg); 1153 uvm_pageactivate(pg); 1154 pg->flags &= ~(PG_BUSY|PG_FAKE); 1155 UVM_PAGE_OWN(pg, NULL); 1156 amap_add(&ufi.entry->aref, ufi.orig_rvaddr - ufi.entry->start, 1157 anon, 1); 1158 1159 /* deref: can not drop to zero here by defn! */ 1160 oanon->an_ref--; 1161 1162 /* 1163 * note: oanon is still locked, as is the new anon. we 1164 * need to check for this later when we unlock oanon; if 1165 * oanon != anon, we'll have to unlock anon, too. 1166 */ 1167 1168 } else { 1169 1170 uvmexp.flt_anon++; 1171 oanon = anon; /* old, locked anon is same as anon */ 1172 pg = anon->u.an_page; 1173 if (anon->an_ref > 1) /* disallow writes to ref > 1 anons */ 1174 enter_prot = enter_prot & ~VM_PROT_WRITE; 1175 1176 } 1177 1178 /* locked: maps(read), amap, oanon, anon (if different from oanon) */ 1179 1180 /* 1181 * now map the page in. 1182 */ 1183 1184 UVMHIST_LOG(maphist, " MAPPING: anon: pm=0x%x, va=0x%x, pg=0x%x", 1185 ufi.orig_map->pmap, ufi.orig_rvaddr, pg, 0); 1186 if (pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg), 1187 enter_prot, access_type | PMAP_CANFAIL | (wired ? PMAP_WIRED : 0)) 1188 != 0) { 1189 1190 /* 1191 * No need to undo what we did; we can simply think of 1192 * this as the pmap throwing away the mapping information. 1193 * 1194 * We do, however, have to go through the ReFault path, 1195 * as the map may change while we're asleep. 1196 */ 1197 1198 if (anon != oanon) 1199 simple_unlock(&anon->an_lock); 1200 uvmfault_unlockall(&ufi, amap, uobj, oanon); 1201 KASSERT(uvmexp.swpgonly <= uvmexp.swpages); 1202 if (uvmexp.swpgonly == uvmexp.swpages) { 1203 UVMHIST_LOG(maphist, 1204 "<- failed. out of VM",0,0,0,0); 1205 /* XXX instrumentation */ 1206 return ENOMEM; 1207 } 1208 /* XXX instrumentation */ 1209 uvm_wait("flt_pmfail1"); 1210 goto ReFault; 1211 } 1212 1213 /* 1214 * ... update the page queues. 1215 */ 1216 1217 uvm_lock_pageq(); 1218 if (fault_type == VM_FAULT_WIRE) { 1219 uvm_pagewire(pg); 1220 1221 /* 1222 * since the now-wired page cannot be paged out, 1223 * release its swap resources for others to use. 1224 * since an anon with no swap cannot be PG_CLEAN, 1225 * clear its clean flag now. 1226 */ 1227 1228 pg->flags &= ~(PG_CLEAN); 1229 uvm_anon_dropswap(anon); 1230 } else { 1231 uvm_pageactivate(pg); 1232 } 1233 uvm_unlock_pageq(); 1234 1235 /* 1236 * done case 1! finish up by unlocking everything and returning success 1237 */ 1238 1239 if (anon != oanon) 1240 simple_unlock(&anon->an_lock); 1241 uvmfault_unlockall(&ufi, amap, uobj, oanon); 1242 pmap_update(ufi.orig_map->pmap); 1243 return 0; 1244 1245 Case2: 1246 /* 1247 * handle case 2: faulting on backing object or zero fill 1248 */ 1249 1250 /* 1251 * locked: 1252 * maps(read), amap(if there), uobj(if !null), uobjpage(if !null) 1253 */ 1254 1255 /* 1256 * note that uobjpage can not be PGO_DONTCARE at this point. we now 1257 * set uobjpage to PGO_DONTCARE if we are doing a zero fill. if we 1258 * have a backing object, check and see if we are going to promote 1259 * the data up to an anon during the fault. 1260 */ 1261 1262 if (uobj == NULL) { 1263 uobjpage = PGO_DONTCARE; 1264 promote = TRUE; /* always need anon here */ 1265 } else { 1266 KASSERT(uobjpage != PGO_DONTCARE); 1267 promote = (access_type & VM_PROT_WRITE) && 1268 UVM_ET_ISCOPYONWRITE(ufi.entry); 1269 } 1270 UVMHIST_LOG(maphist, " case 2 fault: promote=%d, zfill=%d", 1271 promote, (uobj == NULL), 0,0); 1272 1273 /* 1274 * if uobjpage is not null then we do not need to do I/O to get the 1275 * uobjpage. 1276 * 1277 * if uobjpage is null, then we need to unlock and ask the pager to 1278 * get the data for us. once we have the data, we need to reverify 1279 * the state the world. we are currently not holding any resources. 1280 */ 1281 1282 if (uobjpage) { 1283 /* update rusage counters */ 1284 curproc->p_addr->u_stats.p_ru.ru_minflt++; 1285 } else { 1286 /* update rusage counters */ 1287 curproc->p_addr->u_stats.p_ru.ru_majflt++; 1288 1289 /* locked: maps(read), amap(if there), uobj */ 1290 uvmfault_unlockall(&ufi, amap, NULL, NULL); 1291 /* locked: uobj */ 1292 1293 uvmexp.fltget++; 1294 gotpages = 1; 1295 uoff = (ufi.orig_rvaddr - ufi.entry->start) + ufi.entry->offset; 1296 error = uobj->pgops->pgo_get(uobj, uoff, &uobjpage, &gotpages, 1297 0, access_type & MASK(ufi.entry), ufi.entry->advice, 1298 PGO_SYNCIO); 1299 /* locked: uobjpage(if no error) */ 1300 1301 /* 1302 * recover from I/O 1303 */ 1304 1305 if (error) { 1306 if (error == EAGAIN) { 1307 UVMHIST_LOG(maphist, 1308 " pgo_get says TRY AGAIN!",0,0,0,0); 1309 tsleep(&lbolt, PVM, "fltagain2", 0); 1310 goto ReFault; 1311 } 1312 1313 UVMHIST_LOG(maphist, "<- pgo_get failed (code %d)", 1314 error, 0,0,0); 1315 return error; 1316 } 1317 1318 /* locked: uobjpage */ 1319 1320 uvm_lock_pageq(); 1321 uvm_pageactivate(uobjpage); 1322 uvm_unlock_pageq(); 1323 1324 /* 1325 * re-verify the state of the world by first trying to relock 1326 * the maps. always relock the object. 1327 */ 1328 1329 locked = uvmfault_relock(&ufi); 1330 if (locked && amap) 1331 amap_lock(amap); 1332 simple_lock(&uobj->vmobjlock); 1333 1334 /* locked(locked): maps(read), amap(if !null), uobj, uobjpage */ 1335 /* locked(!locked): uobj, uobjpage */ 1336 1337 /* 1338 * verify that the page has not be released and re-verify 1339 * that amap slot is still free. if there is a problem, 1340 * we unlock and clean up. 1341 */ 1342 1343 if ((uobjpage->flags & PG_RELEASED) != 0 || 1344 (locked && amap && 1345 amap_lookup(&ufi.entry->aref, 1346 ufi.orig_rvaddr - ufi.entry->start))) { 1347 if (locked) 1348 uvmfault_unlockall(&ufi, amap, NULL, NULL); 1349 locked = FALSE; 1350 } 1351 1352 /* 1353 * didn't get the lock? release the page and retry. 1354 */ 1355 1356 if (locked == FALSE) { 1357 UVMHIST_LOG(maphist, 1358 " wasn't able to relock after fault: retry", 1359 0,0,0,0); 1360 if (uobjpage->flags & PG_WANTED) 1361 wakeup(uobjpage); 1362 if (uobjpage->flags & PG_RELEASED) { 1363 uvmexp.fltpgrele++; 1364 uvm_pagefree(uobjpage); 1365 goto ReFault; 1366 } 1367 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1368 UVM_PAGE_OWN(uobjpage, NULL); 1369 simple_unlock(&uobj->vmobjlock); 1370 goto ReFault; 1371 } 1372 1373 /* 1374 * we have the data in uobjpage which is busy and 1375 * not released. we are holding object lock (so the page 1376 * can't be released on us). 1377 */ 1378 1379 /* locked: maps(read), amap(if !null), uobj, uobjpage */ 1380 } 1381 1382 /* 1383 * locked: 1384 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj) 1385 */ 1386 1387 /* 1388 * notes: 1389 * - at this point uobjpage can not be NULL 1390 * - at this point uobjpage can not be PG_RELEASED (since we checked 1391 * for it above) 1392 * - at this point uobjpage could be PG_WANTED (handle later) 1393 */ 1394 1395 if (promote == FALSE) { 1396 1397 /* 1398 * we are not promoting. if the mapping is COW ensure that we 1399 * don't give more access than we should (e.g. when doing a read 1400 * fault on a COPYONWRITE mapping we want to map the COW page in 1401 * R/O even though the entry protection could be R/W). 1402 * 1403 * set "pg" to the page we want to map in (uobjpage, usually) 1404 */ 1405 1406 /* no anon in this case. */ 1407 anon = NULL; 1408 1409 uvmexp.flt_obj++; 1410 if (UVM_ET_ISCOPYONWRITE(ufi.entry)) 1411 enter_prot &= ~VM_PROT_WRITE; 1412 pg = uobjpage; /* map in the actual object */ 1413 1414 /* assert(uobjpage != PGO_DONTCARE) */ 1415 1416 /* 1417 * we are faulting directly on the page. be careful 1418 * about writing to loaned pages... 1419 */ 1420 1421 if (uobjpage->loan_count) { 1422 if ((access_type & VM_PROT_WRITE) == 0) { 1423 /* read fault: cap the protection at readonly */ 1424 /* cap! */ 1425 enter_prot = enter_prot & ~VM_PROT_WRITE; 1426 } else { 1427 /* write fault: must break the loan here */ 1428 1429 /* alloc new un-owned page */ 1430 pg = uvm_pagealloc(NULL, 0, NULL, 0); 1431 1432 if (pg == NULL) { 1433 1434 /* 1435 * drop ownership of page, it can't 1436 * be released 1437 */ 1438 1439 if (uobjpage->flags & PG_WANTED) 1440 wakeup(uobjpage); 1441 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1442 UVM_PAGE_OWN(uobjpage, NULL); 1443 1444 uvmfault_unlockall(&ufi, amap, uobj, 1445 NULL); 1446 UVMHIST_LOG(maphist, 1447 " out of RAM breaking loan, waiting", 1448 0,0,0,0); 1449 uvmexp.fltnoram++; 1450 uvm_wait("flt_noram4"); 1451 goto ReFault; 1452 } 1453 1454 /* 1455 * copy the data from the old page to the new 1456 * one and clear the fake/clean flags on the 1457 * new page (keep it busy). force a reload 1458 * of the old page by clearing it from all 1459 * pmaps. then lock the page queues to 1460 * rename the pages. 1461 */ 1462 1463 uvm_pagecopy(uobjpage, pg); /* old -> new */ 1464 pg->flags &= ~(PG_FAKE|PG_CLEAN); 1465 pmap_page_protect(uobjpage, VM_PROT_NONE); 1466 if (uobjpage->flags & PG_WANTED) 1467 wakeup(uobjpage); 1468 /* uobj still locked */ 1469 uobjpage->flags &= ~(PG_WANTED|PG_BUSY); 1470 UVM_PAGE_OWN(uobjpage, NULL); 1471 1472 uvm_lock_pageq(); 1473 offset = uobjpage->offset; 1474 uvm_pagerealloc(uobjpage, NULL, 0); 1475 1476 /* 1477 * at this point we have absolutely no 1478 * control over uobjpage 1479 */ 1480 1481 /* install new page */ 1482 uvm_pageactivate(pg); 1483 uvm_pagerealloc(pg, uobj, offset); 1484 uvm_unlock_pageq(); 1485 1486 /* 1487 * done! loan is broken and "pg" is 1488 * PG_BUSY. it can now replace uobjpage. 1489 */ 1490 1491 uobjpage = pg; 1492 } 1493 } 1494 } else { 1495 1496 /* 1497 * if we are going to promote the data to an anon we 1498 * allocate a blank anon here and plug it into our amap. 1499 */ 1500 #if DIAGNOSTIC 1501 if (amap == NULL) 1502 panic("uvm_fault: want to promote data, but no anon"); 1503 #endif 1504 1505 anon = uvm_analloc(); 1506 if (anon) { 1507 1508 /* 1509 * The new anon is locked. 1510 * 1511 * In `Fill in data...' below, if 1512 * uobjpage == PGO_DONTCARE, we want 1513 * a zero'd, dirty page, so have 1514 * uvm_pagealloc() do that for us. 1515 */ 1516 1517 pg = uvm_pagealloc(NULL, 0, anon, 1518 (uobjpage == PGO_DONTCARE) ? UVM_PGA_ZERO : 0); 1519 } 1520 1521 /* 1522 * out of memory resources? 1523 */ 1524 1525 if (anon == NULL || pg == NULL) { 1526 if (anon != NULL) { 1527 anon->an_ref--; 1528 simple_unlock(&anon->an_lock); 1529 uvm_anfree(anon); 1530 } 1531 1532 /* 1533 * arg! must unbusy our page and fail or sleep. 1534 */ 1535 1536 if (uobjpage != PGO_DONTCARE) { 1537 if (uobjpage->flags & PG_WANTED) 1538 /* still holding object lock */ 1539 wakeup(uobjpage); 1540 1541 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1542 UVM_PAGE_OWN(uobjpage, NULL); 1543 } 1544 1545 /* unlock and fail ... */ 1546 uvmfault_unlockall(&ufi, amap, uobj, NULL); 1547 KASSERT(uvmexp.swpgonly <= uvmexp.swpages); 1548 if (anon == NULL || uvmexp.swpgonly == uvmexp.swpages) { 1549 UVMHIST_LOG(maphist, " promote: out of VM", 1550 0,0,0,0); 1551 uvmexp.fltnoanon++; 1552 return ENOMEM; 1553 } 1554 1555 UVMHIST_LOG(maphist, " out of RAM, waiting for more", 1556 0,0,0,0); 1557 uvmexp.fltnoram++; 1558 uvm_wait("flt_noram5"); 1559 goto ReFault; 1560 } 1561 1562 /* 1563 * fill in the data 1564 */ 1565 1566 if (uobjpage != PGO_DONTCARE) { 1567 uvmexp.flt_prcopy++; 1568 /* copy page [pg now dirty] */ 1569 uvm_pagecopy(uobjpage, pg); 1570 1571 /* 1572 * promote to shared amap? make sure all sharing 1573 * procs see it 1574 */ 1575 1576 if ((amap_flags(amap) & AMAP_SHARED) != 0) { 1577 pmap_page_protect(uobjpage, VM_PROT_NONE); 1578 /* 1579 * XXX: PAGE MIGHT BE WIRED! 1580 */ 1581 } 1582 1583 /* 1584 * dispose of uobjpage. it can't be PG_RELEASED 1585 * since we still hold the object lock. 1586 * drop handle to uobj as well. 1587 */ 1588 1589 if (uobjpage->flags & PG_WANTED) 1590 /* still have the obj lock */ 1591 wakeup(uobjpage); 1592 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1593 UVM_PAGE_OWN(uobjpage, NULL); 1594 simple_unlock(&uobj->vmobjlock); 1595 uobj = NULL; 1596 1597 UVMHIST_LOG(maphist, 1598 " promote uobjpage 0x%x to anon/page 0x%x/0x%x", 1599 uobjpage, anon, pg, 0); 1600 1601 } else { 1602 uvmexp.flt_przero++; 1603 1604 /* 1605 * Page is zero'd and marked dirty by uvm_pagealloc() 1606 * above. 1607 */ 1608 1609 UVMHIST_LOG(maphist," zero fill anon/page 0x%x/0%x", 1610 anon, pg, 0, 0); 1611 } 1612 amap_add(&ufi.entry->aref, ufi.orig_rvaddr - ufi.entry->start, 1613 anon, 0); 1614 } 1615 1616 /* 1617 * locked: 1618 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj), 1619 * anon(if !null), pg(if anon) 1620 * 1621 * note: pg is either the uobjpage or the new page in the new anon 1622 */ 1623 1624 /* 1625 * all resources are present. we can now map it in and free our 1626 * resources. 1627 */ 1628 1629 UVMHIST_LOG(maphist, 1630 " MAPPING: case2: pm=0x%x, va=0x%x, pg=0x%x, promote=%d", 1631 ufi.orig_map->pmap, ufi.orig_rvaddr, pg, promote); 1632 KASSERT(access_type == VM_PROT_READ || (pg->flags & PG_RDONLY) == 0); 1633 if (pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg), 1634 pg->flags & PG_RDONLY ? VM_PROT_READ : enter_prot, 1635 access_type | PMAP_CANFAIL | (wired ? PMAP_WIRED : 0)) != 0) { 1636 1637 /* 1638 * No need to undo what we did; we can simply think of 1639 * this as the pmap throwing away the mapping information. 1640 * 1641 * We do, however, have to go through the ReFault path, 1642 * as the map may change while we're asleep. 1643 */ 1644 1645 if (pg->flags & PG_WANTED) 1646 wakeup(pg); 1647 1648 /* 1649 * note that pg can't be PG_RELEASED since we did not drop 1650 * the object lock since the last time we checked. 1651 */ 1652 1653 pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED); 1654 UVM_PAGE_OWN(pg, NULL); 1655 uvmfault_unlockall(&ufi, amap, uobj, anon); 1656 KASSERT(uvmexp.swpgonly <= uvmexp.swpages); 1657 if (uvmexp.swpgonly == uvmexp.swpages) { 1658 UVMHIST_LOG(maphist, 1659 "<- failed. out of VM",0,0,0,0); 1660 /* XXX instrumentation */ 1661 return ENOMEM; 1662 } 1663 /* XXX instrumentation */ 1664 uvm_wait("flt_pmfail2"); 1665 goto ReFault; 1666 } 1667 1668 uvm_lock_pageq(); 1669 if (fault_type == VM_FAULT_WIRE) { 1670 uvm_pagewire(pg); 1671 if (pg->pqflags & PQ_AOBJ) { 1672 1673 /* 1674 * since the now-wired page cannot be paged out, 1675 * release its swap resources for others to use. 1676 * since an aobj page with no swap cannot be PG_CLEAN, 1677 * clear its clean flag now. 1678 */ 1679 1680 pg->flags &= ~(PG_CLEAN); 1681 uao_dropswap(uobj, pg->offset >> PAGE_SHIFT); 1682 } 1683 } else { 1684 uvm_pageactivate(pg); 1685 } 1686 uvm_unlock_pageq(); 1687 if (pg->flags & PG_WANTED) 1688 wakeup(pg); 1689 1690 /* 1691 * note that pg can't be PG_RELEASED since we did not drop the object 1692 * lock since the last time we checked. 1693 */ 1694 1695 pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED); 1696 UVM_PAGE_OWN(pg, NULL); 1697 uvmfault_unlockall(&ufi, amap, uobj, anon); 1698 pmap_update(ufi.orig_map->pmap); 1699 UVMHIST_LOG(maphist, "<- done (SUCCESS!)",0,0,0,0); 1700 return 0; 1701 } 1702 1703 /* 1704 * uvm_fault_wire: wire down a range of virtual addresses in a map. 1705 * 1706 * => map may be read-locked by caller, but MUST NOT be write-locked. 1707 * => if map is read-locked, any operations which may cause map to 1708 * be write-locked in uvm_fault() must be taken care of by 1709 * the caller. See uvm_map_pageable(). 1710 */ 1711 1712 int 1713 uvm_fault_wire(map, start, end, access_type) 1714 struct vm_map *map; 1715 vaddr_t start, end; 1716 vm_prot_t access_type; 1717 { 1718 vaddr_t va; 1719 int error; 1720 1721 /* 1722 * now fault it in a page at a time. if the fault fails then we have 1723 * to undo what we have done. note that in uvm_fault VM_PROT_NONE 1724 * is replaced with the max protection if fault_type is VM_FAULT_WIRE. 1725 */ 1726 1727 /* 1728 * XXX work around overflowing a vaddr_t. this prevents us from 1729 * wiring the last page in the address space, though. 1730 */ 1731 if (start > end) { 1732 return EFAULT; 1733 } 1734 1735 for (va = start ; va < end ; va += PAGE_SIZE) { 1736 error = uvm_fault(map, va, VM_FAULT_WIRE, access_type); 1737 if (error) { 1738 if (va != start) { 1739 uvm_fault_unwire(map, start, va); 1740 } 1741 return error; 1742 } 1743 } 1744 return 0; 1745 } 1746 1747 /* 1748 * uvm_fault_unwire(): unwire range of virtual space. 1749 */ 1750 1751 void 1752 uvm_fault_unwire(map, start, end) 1753 struct vm_map *map; 1754 vaddr_t start, end; 1755 { 1756 vm_map_lock_read(map); 1757 uvm_fault_unwire_locked(map, start, end); 1758 vm_map_unlock_read(map); 1759 } 1760 1761 /* 1762 * uvm_fault_unwire_locked(): the guts of uvm_fault_unwire(). 1763 * 1764 * => map must be at least read-locked. 1765 */ 1766 1767 void 1768 uvm_fault_unwire_locked(map, start, end) 1769 struct vm_map *map; 1770 vaddr_t start, end; 1771 { 1772 struct vm_map_entry *entry; 1773 pmap_t pmap = vm_map_pmap(map); 1774 vaddr_t va; 1775 paddr_t pa; 1776 struct vm_page *pg; 1777 1778 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 1779 1780 /* 1781 * we assume that the area we are unwiring has actually been wired 1782 * in the first place. this means that we should be able to extract 1783 * the PAs from the pmap. we also lock out the page daemon so that 1784 * we can call uvm_pageunwire. 1785 */ 1786 1787 uvm_lock_pageq(); 1788 1789 /* 1790 * find the beginning map entry for the region. 1791 */ 1792 KASSERT(start >= vm_map_min(map) && end <= vm_map_max(map)); 1793 if (uvm_map_lookup_entry(map, start, &entry) == FALSE) 1794 panic("uvm_fault_unwire_locked: address not in map"); 1795 1796 for (va = start; va < end; va += PAGE_SIZE) { 1797 if (pmap_extract(pmap, va, &pa) == FALSE) 1798 panic("uvm_fault_unwire_locked: unwiring " 1799 "non-wired memory"); 1800 1801 /* 1802 * make sure the current entry is for the address we're 1803 * dealing with. if not, grab the next entry. 1804 */ 1805 1806 KASSERT(va >= entry->start); 1807 if (va >= entry->end) { 1808 KASSERT(entry->next != &map->header && 1809 entry->next->start <= entry->end); 1810 entry = entry->next; 1811 } 1812 1813 /* 1814 * if the entry is no longer wired, tell the pmap. 1815 */ 1816 if (VM_MAPENT_ISWIRED(entry) == 0) 1817 pmap_unwire(pmap, va); 1818 1819 pg = PHYS_TO_VM_PAGE(pa); 1820 if (pg) 1821 uvm_pageunwire(pg); 1822 } 1823 1824 uvm_unlock_pageq(); 1825 } 1826