1 /* $NetBSD: uvm_fault.c,v 1.95 2005/06/27 02:19:48 thorpej Exp $ */ 2 3 /* 4 * 5 * Copyright (c) 1997 Charles D. Cranor and Washington University. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by Charles D. Cranor and 19 * Washington University. 20 * 4. The name of the author may not be used to endorse or promote products 21 * derived from this software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 28 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 32 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 * 34 * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp 35 */ 36 37 /* 38 * uvm_fault.c: fault handler 39 */ 40 41 #include <sys/cdefs.h> 42 __KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.95 2005/06/27 02:19:48 thorpej Exp $"); 43 44 #include "opt_uvmhist.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/proc.h> 50 #include <sys/malloc.h> 51 #include <sys/mman.h> 52 #include <sys/user.h> 53 54 #include <uvm/uvm.h> 55 56 /* 57 * 58 * a word on page faults: 59 * 60 * types of page faults we handle: 61 * 62 * CASE 1: upper layer faults CASE 2: lower layer faults 63 * 64 * CASE 1A CASE 1B CASE 2A CASE 2B 65 * read/write1 write>1 read/write +-cow_write/zero 66 * | | | | 67 * +--|--+ +--|--+ +-----+ + | + | +-----+ 68 * amap | V | | ----------->new| | | | ^ | 69 * +-----+ +-----+ +-----+ + | + | +--|--+ 70 * | | | 71 * +-----+ +-----+ +--|--+ | +--|--+ 72 * uobj | d/c | | d/c | | V | +----| | 73 * +-----+ +-----+ +-----+ +-----+ 74 * 75 * d/c = don't care 76 * 77 * case [0]: layerless fault 78 * no amap or uobj is present. this is an error. 79 * 80 * case [1]: upper layer fault [anon active] 81 * 1A: [read] or [write with anon->an_ref == 1] 82 * I/O takes place in top level anon and uobj is not touched. 83 * 1B: [write with anon->an_ref > 1] 84 * new anon is alloc'd and data is copied off ["COW"] 85 * 86 * case [2]: lower layer fault [uobj] 87 * 2A: [read on non-NULL uobj] or [write to non-copy_on_write area] 88 * I/O takes place directly in object. 89 * 2B: [write to copy_on_write] or [read on NULL uobj] 90 * data is "promoted" from uobj to a new anon. 91 * if uobj is null, then we zero fill. 92 * 93 * we follow the standard UVM locking protocol ordering: 94 * 95 * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ) 96 * we hold a PG_BUSY page if we unlock for I/O 97 * 98 * 99 * the code is structured as follows: 100 * 101 * - init the "IN" params in the ufi structure 102 * ReFault: 103 * - do lookups [locks maps], check protection, handle needs_copy 104 * - check for case 0 fault (error) 105 * - establish "range" of fault 106 * - if we have an amap lock it and extract the anons 107 * - if sequential advice deactivate pages behind us 108 * - at the same time check pmap for unmapped areas and anon for pages 109 * that we could map in (and do map it if found) 110 * - check object for resident pages that we could map in 111 * - if (case 2) goto Case2 112 * - >>> handle case 1 113 * - ensure source anon is resident in RAM 114 * - if case 1B alloc new anon and copy from source 115 * - map the correct page in 116 * Case2: 117 * - >>> handle case 2 118 * - ensure source page is resident (if uobj) 119 * - if case 2B alloc new anon and copy from source (could be zero 120 * fill if uobj == NULL) 121 * - map the correct page in 122 * - done! 123 * 124 * note on paging: 125 * if we have to do I/O we place a PG_BUSY page in the correct object, 126 * unlock everything, and do the I/O. when I/O is done we must reverify 127 * the state of the world before assuming that our data structures are 128 * valid. [because mappings could change while the map is unlocked] 129 * 130 * alternative 1: unbusy the page in question and restart the page fault 131 * from the top (ReFault). this is easy but does not take advantage 132 * of the information that we already have from our previous lookup, 133 * although it is possible that the "hints" in the vm_map will help here. 134 * 135 * alternative 2: the system already keeps track of a "version" number of 136 * a map. [i.e. every time you write-lock a map (e.g. to change a 137 * mapping) you bump the version number up by one...] so, we can save 138 * the version number of the map before we release the lock and start I/O. 139 * then when I/O is done we can relock and check the version numbers 140 * to see if anything changed. this might save us some over 1 because 141 * we don't have to unbusy the page and may be less compares(?). 142 * 143 * alternative 3: put in backpointers or a way to "hold" part of a map 144 * in place while I/O is in progress. this could be complex to 145 * implement (especially with structures like amap that can be referenced 146 * by multiple map entries, and figuring out what should wait could be 147 * complex as well...). 148 * 149 * given that we are not currently multiprocessor or multithreaded we might 150 * as well choose alternative 2 now. maybe alternative 3 would be useful 151 * in the future. XXX keep in mind for future consideration//rechecking. 152 */ 153 154 /* 155 * local data structures 156 */ 157 158 struct uvm_advice { 159 int advice; 160 int nback; 161 int nforw; 162 }; 163 164 /* 165 * page range array: 166 * note: index in array must match "advice" value 167 * XXX: borrowed numbers from freebsd. do they work well for us? 168 */ 169 170 static const struct uvm_advice uvmadvice[] = { 171 { MADV_NORMAL, 3, 4 }, 172 { MADV_RANDOM, 0, 0 }, 173 { MADV_SEQUENTIAL, 8, 7}, 174 }; 175 176 #define UVM_MAXRANGE 16 /* must be MAX() of nback+nforw+1 */ 177 178 /* 179 * private prototypes 180 */ 181 182 /* 183 * inline functions 184 */ 185 186 /* 187 * uvmfault_anonflush: try and deactivate pages in specified anons 188 * 189 * => does not have to deactivate page if it is busy 190 */ 191 192 static __inline void 193 uvmfault_anonflush(struct vm_anon **anons, int n) 194 { 195 int lcv; 196 struct vm_page *pg; 197 198 for (lcv = 0 ; lcv < n ; lcv++) { 199 if (anons[lcv] == NULL) 200 continue; 201 simple_lock(&anons[lcv]->an_lock); 202 pg = anons[lcv]->an_page; 203 if (pg && (pg->flags & PG_BUSY) == 0 && pg->loan_count == 0) { 204 uvm_lock_pageq(); 205 if (pg->wire_count == 0) { 206 pmap_clear_reference(pg); 207 uvm_pagedeactivate(pg); 208 } 209 uvm_unlock_pageq(); 210 } 211 simple_unlock(&anons[lcv]->an_lock); 212 } 213 } 214 215 /* 216 * normal functions 217 */ 218 219 /* 220 * uvmfault_amapcopy: clear "needs_copy" in a map. 221 * 222 * => called with VM data structures unlocked (usually, see below) 223 * => we get a write lock on the maps and clear needs_copy for a VA 224 * => if we are out of RAM we sleep (waiting for more) 225 */ 226 227 static void 228 uvmfault_amapcopy(struct uvm_faultinfo *ufi) 229 { 230 for (;;) { 231 232 /* 233 * no mapping? give up. 234 */ 235 236 if (uvmfault_lookup(ufi, TRUE) == FALSE) 237 return; 238 239 /* 240 * copy if needed. 241 */ 242 243 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) 244 amap_copy(ufi->map, ufi->entry, M_NOWAIT, TRUE, 245 ufi->orig_rvaddr, ufi->orig_rvaddr + 1); 246 247 /* 248 * didn't work? must be out of RAM. unlock and sleep. 249 */ 250 251 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { 252 uvmfault_unlockmaps(ufi, TRUE); 253 uvm_wait("fltamapcopy"); 254 continue; 255 } 256 257 /* 258 * got it! unlock and return. 259 */ 260 261 uvmfault_unlockmaps(ufi, TRUE); 262 return; 263 } 264 /*NOTREACHED*/ 265 } 266 267 /* 268 * uvmfault_anonget: get data in an anon into a non-busy, non-released 269 * page in that anon. 270 * 271 * => maps, amap, and anon locked by caller. 272 * => if we fail (result != 0) we unlock everything. 273 * => if we are successful, we return with everything still locked. 274 * => we don't move the page on the queues [gets moved later] 275 * => if we allocate a new page [we_own], it gets put on the queues. 276 * either way, the result is that the page is on the queues at return time 277 * => for pages which are on loan from a uvm_object (and thus are not 278 * owned by the anon): if successful, we return with the owning object 279 * locked. the caller must unlock this object when it unlocks everything 280 * else. 281 */ 282 283 int 284 uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, 285 struct vm_anon *anon) 286 { 287 boolean_t we_own; /* we own anon's page? */ 288 boolean_t locked; /* did we relock? */ 289 struct vm_page *pg; 290 int error; 291 UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist); 292 293 LOCK_ASSERT(simple_lock_held(&anon->an_lock)); 294 295 error = 0; 296 uvmexp.fltanget++; 297 /* bump rusage counters */ 298 if (anon->an_page) 299 curproc->p_stats->p_ru.ru_minflt++; 300 else 301 curproc->p_stats->p_ru.ru_majflt++; 302 303 /* 304 * loop until we get it, or fail. 305 */ 306 307 for (;;) { 308 we_own = FALSE; /* TRUE if we set PG_BUSY on a page */ 309 pg = anon->an_page; 310 311 /* 312 * if there is a resident page and it is loaned, then anon 313 * may not own it. call out to uvm_anon_lockpage() to ensure 314 * the real owner of the page has been identified and locked. 315 */ 316 317 if (pg && pg->loan_count) 318 pg = uvm_anon_lockloanpg(anon); 319 320 /* 321 * page there? make sure it is not busy/released. 322 */ 323 324 if (pg) { 325 326 /* 327 * at this point, if the page has a uobject [meaning 328 * we have it on loan], then that uobject is locked 329 * by us! if the page is busy, we drop all the 330 * locks (including uobject) and try again. 331 */ 332 333 if ((pg->flags & PG_BUSY) == 0) { 334 UVMHIST_LOG(maphist, "<- OK",0,0,0,0); 335 return (0); 336 } 337 pg->flags |= PG_WANTED; 338 uvmexp.fltpgwait++; 339 340 /* 341 * the last unlock must be an atomic unlock+wait on 342 * the owner of page 343 */ 344 345 if (pg->uobject) { /* owner is uobject ? */ 346 uvmfault_unlockall(ufi, amap, NULL, anon); 347 UVMHIST_LOG(maphist, " unlock+wait on uobj",0, 348 0,0,0); 349 UVM_UNLOCK_AND_WAIT(pg, 350 &pg->uobject->vmobjlock, 351 FALSE, "anonget1",0); 352 } else { 353 /* anon owns page */ 354 uvmfault_unlockall(ufi, amap, NULL, NULL); 355 UVMHIST_LOG(maphist, " unlock+wait on anon",0, 356 0,0,0); 357 UVM_UNLOCK_AND_WAIT(pg,&anon->an_lock,0, 358 "anonget2",0); 359 } 360 } else { 361 362 /* 363 * no page, we must try and bring it in. 364 */ 365 366 pg = uvm_pagealloc(NULL, 0, anon, 0); 367 if (pg == NULL) { /* out of RAM. */ 368 uvmfault_unlockall(ufi, amap, NULL, anon); 369 uvmexp.fltnoram++; 370 UVMHIST_LOG(maphist, " noram -- UVM_WAIT",0, 371 0,0,0); 372 if (!uvm_reclaimable()) { 373 return ENOMEM; 374 } 375 uvm_wait("flt_noram1"); 376 } else { 377 /* we set the PG_BUSY bit */ 378 we_own = TRUE; 379 uvmfault_unlockall(ufi, amap, NULL, anon); 380 381 /* 382 * we are passing a PG_BUSY+PG_FAKE+PG_CLEAN 383 * page into the uvm_swap_get function with 384 * all data structures unlocked. note that 385 * it is ok to read an_swslot here because 386 * we hold PG_BUSY on the page. 387 */ 388 uvmexp.pageins++; 389 error = uvm_swap_get(pg, anon->an_swslot, 390 PGO_SYNCIO); 391 392 /* 393 * we clean up after the i/o below in the 394 * "we_own" case 395 */ 396 } 397 } 398 399 /* 400 * now relock and try again 401 */ 402 403 locked = uvmfault_relock(ufi); 404 if (locked && amap != NULL) { 405 amap_lock(amap); 406 } 407 if (locked || we_own) 408 simple_lock(&anon->an_lock); 409 410 /* 411 * if we own the page (i.e. we set PG_BUSY), then we need 412 * to clean up after the I/O. there are three cases to 413 * consider: 414 * [1] page released during I/O: free anon and ReFault. 415 * [2] I/O not OK. free the page and cause the fault 416 * to fail. 417 * [3] I/O OK! activate the page and sync with the 418 * non-we_own case (i.e. drop anon lock if not locked). 419 */ 420 421 if (we_own) { 422 if (pg->flags & PG_WANTED) { 423 wakeup(pg); 424 } 425 if (error) { 426 427 /* 428 * remove the swap slot from the anon 429 * and mark the anon as having no real slot. 430 * don't free the swap slot, thus preventing 431 * it from being used again. 432 */ 433 434 if (anon->an_swslot > 0) 435 uvm_swap_markbad(anon->an_swslot, 1); 436 anon->an_swslot = SWSLOT_BAD; 437 438 if ((pg->flags & PG_RELEASED) != 0) 439 goto released; 440 441 /* 442 * note: page was never !PG_BUSY, so it 443 * can't be mapped and thus no need to 444 * pmap_page_protect it... 445 */ 446 447 uvm_lock_pageq(); 448 uvm_pagefree(pg); 449 uvm_unlock_pageq(); 450 451 if (locked) 452 uvmfault_unlockall(ufi, amap, NULL, 453 anon); 454 else 455 simple_unlock(&anon->an_lock); 456 UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0); 457 return error; 458 } 459 460 if ((pg->flags & PG_RELEASED) != 0) { 461 released: 462 KASSERT(anon->an_ref == 0); 463 464 /* 465 * released while we unlocked amap. 466 */ 467 468 if (locked) 469 uvmfault_unlockall(ufi, amap, NULL, 470 NULL); 471 472 uvm_anon_release(anon); 473 474 if (error) { 475 UVMHIST_LOG(maphist, 476 "<- ERROR/RELEASED", 0,0,0,0); 477 return error; 478 } 479 480 UVMHIST_LOG(maphist, "<- RELEASED", 0,0,0,0); 481 return ERESTART; 482 } 483 484 /* 485 * we've successfully read the page, activate it. 486 */ 487 488 uvm_lock_pageq(); 489 uvm_pageactivate(pg); 490 uvm_unlock_pageq(); 491 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); 492 UVM_PAGE_OWN(pg, NULL); 493 if (!locked) 494 simple_unlock(&anon->an_lock); 495 } 496 497 /* 498 * we were not able to relock. restart fault. 499 */ 500 501 if (!locked) { 502 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 503 return (ERESTART); 504 } 505 506 /* 507 * verify no one has touched the amap and moved the anon on us. 508 */ 509 510 if (ufi != NULL && 511 amap_lookup(&ufi->entry->aref, 512 ufi->orig_rvaddr - ufi->entry->start) != anon) { 513 514 uvmfault_unlockall(ufi, amap, NULL, anon); 515 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 516 return (ERESTART); 517 } 518 519 /* 520 * try it again! 521 */ 522 523 uvmexp.fltanretry++; 524 continue; 525 } 526 /*NOTREACHED*/ 527 } 528 529 /* 530 * F A U L T - m a i n e n t r y p o i n t 531 */ 532 533 /* 534 * uvm_fault: page fault handler 535 * 536 * => called from MD code to resolve a page fault 537 * => VM data structures usually should be unlocked. however, it is 538 * possible to call here with the main map locked if the caller 539 * gets a write lock, sets it recusive, and then calls us (c.f. 540 * uvm_map_pageable). this should be avoided because it keeps 541 * the map locked off during I/O. 542 * => MUST NEVER BE CALLED IN INTERRUPT CONTEXT 543 */ 544 545 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 546 ~VM_PROT_WRITE : VM_PROT_ALL) 547 548 int 549 uvm_fault(struct vm_map *orig_map, vaddr_t vaddr, vm_fault_t fault_type, 550 vm_prot_t access_type) 551 { 552 struct uvm_faultinfo ufi; 553 vm_prot_t enter_prot, check_prot; 554 boolean_t wired, narrow, promote, locked, shadowed, wire_fault, cow_now; 555 int npages, nback, nforw, centeridx, error, lcv, gotpages; 556 vaddr_t startva, currva; 557 voff_t uoff; 558 struct vm_amap *amap; 559 struct uvm_object *uobj; 560 struct vm_anon *anons_store[UVM_MAXRANGE], **anons, *anon, *oanon; 561 struct vm_page *pages[UVM_MAXRANGE], *pg, *uobjpage; 562 UVMHIST_FUNC("uvm_fault"); UVMHIST_CALLED(maphist); 563 564 UVMHIST_LOG(maphist, "(map=0x%x, vaddr=0x%x, ft=%d, at=%d)", 565 orig_map, vaddr, fault_type, access_type); 566 567 anon = NULL; 568 pg = NULL; 569 570 uvmexp.faults++; /* XXX: locking? */ 571 572 /* 573 * init the IN parameters in the ufi 574 */ 575 576 ufi.orig_map = orig_map; 577 ufi.orig_rvaddr = trunc_page(vaddr); 578 ufi.orig_size = PAGE_SIZE; /* can't get any smaller than this */ 579 wire_fault = fault_type == VM_FAULT_WIRE || 580 fault_type == VM_FAULT_WIREMAX; 581 if (wire_fault) 582 narrow = TRUE; /* don't look for neighborhood 583 * pages on wire */ 584 else 585 narrow = FALSE; /* normal fault */ 586 587 /* 588 * "goto ReFault" means restart the page fault from ground zero. 589 */ 590 ReFault: 591 592 /* 593 * lookup and lock the maps 594 */ 595 596 if (uvmfault_lookup(&ufi, FALSE) == FALSE) { 597 UVMHIST_LOG(maphist, "<- no mapping @ 0x%x", vaddr, 0,0,0); 598 return (EFAULT); 599 } 600 /* locked: maps(read) */ 601 602 #ifdef DIAGNOSTIC 603 if ((ufi.map->flags & VM_MAP_PAGEABLE) == 0) { 604 printf("Page fault on non-pageable map:\n"); 605 printf("ufi.map = %p\n", ufi.map); 606 printf("ufi.orig_map = %p\n", ufi.orig_map); 607 printf("ufi.orig_rvaddr = 0x%lx\n", (u_long) ufi.orig_rvaddr); 608 panic("uvm_fault: (ufi.map->flags & VM_MAP_PAGEABLE) == 0"); 609 } 610 #endif 611 612 /* 613 * check protection 614 */ 615 616 check_prot = fault_type == VM_FAULT_WIREMAX ? 617 ufi.entry->max_protection : ufi.entry->protection; 618 if ((check_prot & access_type) != access_type) { 619 UVMHIST_LOG(maphist, 620 "<- protection failure (prot=0x%x, access=0x%x)", 621 ufi.entry->protection, access_type, 0, 0); 622 uvmfault_unlockmaps(&ufi, FALSE); 623 return EACCES; 624 } 625 626 /* 627 * "enter_prot" is the protection we want to enter the page in at. 628 * for certain pages (e.g. copy-on-write pages) this protection can 629 * be more strict than ufi.entry->protection. "wired" means either 630 * the entry is wired or we are fault-wiring the pg. 631 */ 632 633 enter_prot = ufi.entry->protection; 634 wired = VM_MAPENT_ISWIRED(ufi.entry) || wire_fault; 635 if (wired) { 636 access_type = enter_prot; /* full access for wired */ 637 cow_now = (check_prot & VM_PROT_WRITE) != 0; 638 } else { 639 cow_now = (access_type & VM_PROT_WRITE) != 0; 640 } 641 642 /* 643 * handle "needs_copy" case. if we need to copy the amap we will 644 * have to drop our readlock and relock it with a write lock. (we 645 * need a write lock to change anything in a map entry [e.g. 646 * needs_copy]). 647 */ 648 649 if (UVM_ET_ISNEEDSCOPY(ufi.entry)) { 650 KASSERT(fault_type != VM_FAULT_WIREMAX); 651 if (cow_now || (ufi.entry->object.uvm_obj == NULL)) { 652 /* need to clear */ 653 UVMHIST_LOG(maphist, 654 " need to clear needs_copy and refault",0,0,0,0); 655 uvmfault_unlockmaps(&ufi, FALSE); 656 uvmfault_amapcopy(&ufi); 657 uvmexp.fltamcopy++; 658 goto ReFault; 659 660 } else { 661 662 /* 663 * ensure that we pmap_enter page R/O since 664 * needs_copy is still true 665 */ 666 667 enter_prot &= ~VM_PROT_WRITE; 668 } 669 } 670 671 /* 672 * identify the players 673 */ 674 675 amap = ufi.entry->aref.ar_amap; /* top layer */ 676 uobj = ufi.entry->object.uvm_obj; /* bottom layer */ 677 678 /* 679 * check for a case 0 fault. if nothing backing the entry then 680 * error now. 681 */ 682 683 if (amap == NULL && uobj == NULL) { 684 uvmfault_unlockmaps(&ufi, FALSE); 685 UVMHIST_LOG(maphist,"<- no backing store, no overlay",0,0,0,0); 686 return (EFAULT); 687 } 688 689 /* 690 * establish range of interest based on advice from mapper 691 * and then clip to fit map entry. note that we only want 692 * to do this the first time through the fault. if we 693 * ReFault we will disable this by setting "narrow" to true. 694 */ 695 696 if (narrow == FALSE) { 697 698 /* wide fault (!narrow) */ 699 KASSERT(uvmadvice[ufi.entry->advice].advice == 700 ufi.entry->advice); 701 nback = MIN(uvmadvice[ufi.entry->advice].nback, 702 (ufi.orig_rvaddr - ufi.entry->start) >> PAGE_SHIFT); 703 startva = ufi.orig_rvaddr - (nback << PAGE_SHIFT); 704 nforw = MIN(uvmadvice[ufi.entry->advice].nforw, 705 ((ufi.entry->end - ufi.orig_rvaddr) >> 706 PAGE_SHIFT) - 1); 707 /* 708 * note: "-1" because we don't want to count the 709 * faulting page as forw 710 */ 711 npages = nback + nforw + 1; 712 centeridx = nback; 713 714 narrow = TRUE; /* ensure only once per-fault */ 715 716 } else { 717 718 /* narrow fault! */ 719 nback = nforw = 0; 720 startva = ufi.orig_rvaddr; 721 npages = 1; 722 centeridx = 0; 723 724 } 725 726 /* locked: maps(read) */ 727 UVMHIST_LOG(maphist, " narrow=%d, back=%d, forw=%d, startva=0x%x", 728 narrow, nback, nforw, startva); 729 UVMHIST_LOG(maphist, " entry=0x%x, amap=0x%x, obj=0x%x", ufi.entry, 730 amap, uobj, 0); 731 732 /* 733 * if we've got an amap, lock it and extract current anons. 734 */ 735 736 if (amap) { 737 amap_lock(amap); 738 anons = anons_store; 739 amap_lookups(&ufi.entry->aref, startva - ufi.entry->start, 740 anons, npages); 741 } else { 742 anons = NULL; /* to be safe */ 743 } 744 745 /* locked: maps(read), amap(if there) */ 746 747 /* 748 * for MADV_SEQUENTIAL mappings we want to deactivate the back pages 749 * now and then forget about them (for the rest of the fault). 750 */ 751 752 if (ufi.entry->advice == MADV_SEQUENTIAL && nback != 0) { 753 754 UVMHIST_LOG(maphist, " MADV_SEQUENTIAL: flushing backpages", 755 0,0,0,0); 756 /* flush back-page anons? */ 757 if (amap) 758 uvmfault_anonflush(anons, nback); 759 760 /* flush object? */ 761 if (uobj) { 762 uoff = (startva - ufi.entry->start) + ufi.entry->offset; 763 simple_lock(&uobj->vmobjlock); 764 (void) (uobj->pgops->pgo_put)(uobj, uoff, uoff + 765 (nback << PAGE_SHIFT), PGO_DEACTIVATE); 766 } 767 768 /* now forget about the backpages */ 769 if (amap) 770 anons += nback; 771 startva += (nback << PAGE_SHIFT); 772 npages -= nback; 773 nback = centeridx = 0; 774 } 775 776 /* locked: maps(read), amap(if there) */ 777 778 /* 779 * map in the backpages and frontpages we found in the amap in hopes 780 * of preventing future faults. we also init the pages[] array as 781 * we go. 782 */ 783 784 currva = startva; 785 shadowed = FALSE; 786 for (lcv = 0 ; lcv < npages ; lcv++, currva += PAGE_SIZE) { 787 788 /* 789 * dont play with VAs that are already mapped 790 * except for center) 791 */ 792 if (lcv != centeridx && 793 pmap_extract(ufi.orig_map->pmap, currva, NULL)) { 794 pages[lcv] = PGO_DONTCARE; 795 continue; 796 } 797 798 /* 799 * unmapped or center page. check if any anon at this level. 800 */ 801 if (amap == NULL || anons[lcv] == NULL) { 802 pages[lcv] = NULL; 803 continue; 804 } 805 806 /* 807 * check for present page and map if possible. re-activate it. 808 */ 809 810 pages[lcv] = PGO_DONTCARE; 811 if (lcv == centeridx) { /* save center for later! */ 812 shadowed = TRUE; 813 continue; 814 } 815 anon = anons[lcv]; 816 simple_lock(&anon->an_lock); 817 /* ignore loaned pages */ 818 if (anon->an_page && anon->an_page->loan_count == 0 && 819 (anon->an_page->flags & PG_BUSY) == 0) { 820 uvm_lock_pageq(); 821 uvm_pageactivate(anon->an_page); 822 uvm_unlock_pageq(); 823 UVMHIST_LOG(maphist, 824 " MAPPING: n anon: pm=0x%x, va=0x%x, pg=0x%x", 825 ufi.orig_map->pmap, currva, anon->an_page, 0); 826 uvmexp.fltnamap++; 827 828 /* 829 * Since this isn't the page that's actually faulting, 830 * ignore pmap_enter() failures; it's not critical 831 * that we enter these right now. 832 */ 833 834 (void) pmap_enter(ufi.orig_map->pmap, currva, 835 VM_PAGE_TO_PHYS(anon->an_page), 836 (anon->an_ref > 1) ? (enter_prot & ~VM_PROT_WRITE) : 837 enter_prot, 838 PMAP_CANFAIL | 839 (VM_MAPENT_ISWIRED(ufi.entry) ? PMAP_WIRED : 0)); 840 } 841 simple_unlock(&anon->an_lock); 842 pmap_update(ufi.orig_map->pmap); 843 } 844 845 /* locked: maps(read), amap(if there) */ 846 /* (shadowed == TRUE) if there is an anon at the faulting address */ 847 UVMHIST_LOG(maphist, " shadowed=%d, will_get=%d", shadowed, 848 (uobj && shadowed == FALSE),0,0); 849 850 /* 851 * note that if we are really short of RAM we could sleep in the above 852 * call to pmap_enter with everything locked. bad? 853 * 854 * XXX Actually, that is bad; pmap_enter() should just fail in that 855 * XXX case. --thorpej 856 */ 857 858 /* 859 * if the desired page is not shadowed by the amap and we have a 860 * backing object, then we check to see if the backing object would 861 * prefer to handle the fault itself (rather than letting us do it 862 * with the usual pgo_get hook). the backing object signals this by 863 * providing a pgo_fault routine. 864 */ 865 866 if (uobj && shadowed == FALSE && uobj->pgops->pgo_fault != NULL) { 867 simple_lock(&uobj->vmobjlock); 868 869 /* locked: maps(read), amap (if there), uobj */ 870 error = uobj->pgops->pgo_fault(&ufi, startva, pages, npages, 871 centeridx, fault_type, access_type, PGO_LOCKED|PGO_SYNCIO); 872 873 /* locked: nothing, pgo_fault has unlocked everything */ 874 875 if (error == ERESTART) 876 goto ReFault; /* try again! */ 877 /* 878 * object fault routine responsible for pmap_update(). 879 */ 880 return error; 881 } 882 883 /* 884 * now, if the desired page is not shadowed by the amap and we have 885 * a backing object that does not have a special fault routine, then 886 * we ask (with pgo_get) the object for resident pages that we care 887 * about and attempt to map them in. we do not let pgo_get block 888 * (PGO_LOCKED). 889 */ 890 891 if (uobj && shadowed == FALSE) { 892 simple_lock(&uobj->vmobjlock); 893 894 /* locked (!shadowed): maps(read), amap (if there), uobj */ 895 /* 896 * the following call to pgo_get does _not_ change locking state 897 */ 898 899 uvmexp.fltlget++; 900 gotpages = npages; 901 (void) uobj->pgops->pgo_get(uobj, ufi.entry->offset + 902 (startva - ufi.entry->start), 903 pages, &gotpages, centeridx, 904 access_type & MASK(ufi.entry), 905 ufi.entry->advice, PGO_LOCKED); 906 907 /* 908 * check for pages to map, if we got any 909 */ 910 911 uobjpage = NULL; 912 913 if (gotpages) { 914 currva = startva; 915 for (lcv = 0; lcv < npages; 916 lcv++, currva += PAGE_SIZE) { 917 struct vm_page *curpg; 918 boolean_t readonly; 919 920 curpg = pages[lcv]; 921 if (curpg == NULL || curpg == PGO_DONTCARE) { 922 continue; 923 } 924 925 /* 926 * if center page is resident and not 927 * PG_BUSY|PG_RELEASED then pgo_get 928 * made it PG_BUSY for us and gave 929 * us a handle to it. remember this 930 * page as "uobjpage." (for later use). 931 */ 932 933 if (lcv == centeridx) { 934 uobjpage = curpg; 935 UVMHIST_LOG(maphist, " got uobjpage " 936 "(0x%x) with locked get", 937 uobjpage, 0,0,0); 938 continue; 939 } 940 941 /* 942 * calling pgo_get with PGO_LOCKED returns us 943 * pages which are neither busy nor released, 944 * so we don't need to check for this. 945 * we can just directly enter the pages. 946 */ 947 948 uvm_lock_pageq(); 949 uvm_pageactivate(curpg); 950 uvm_unlock_pageq(); 951 UVMHIST_LOG(maphist, 952 " MAPPING: n obj: pm=0x%x, va=0x%x, pg=0x%x", 953 ufi.orig_map->pmap, currva, curpg, 0); 954 uvmexp.fltnomap++; 955 956 /* 957 * Since this page isn't the page that's 958 * actually faulting, ignore pmap_enter() 959 * failures; it's not critical that we 960 * enter these right now. 961 */ 962 KASSERT((curpg->flags & PG_PAGEOUT) == 0); 963 KASSERT((curpg->flags & PG_RELEASED) == 0); 964 readonly = (curpg->flags & PG_RDONLY) 965 || (curpg->loan_count > 0); 966 967 (void) pmap_enter(ufi.orig_map->pmap, currva, 968 VM_PAGE_TO_PHYS(curpg), 969 readonly ? 970 enter_prot & ~VM_PROT_WRITE : 971 enter_prot & MASK(ufi.entry), 972 PMAP_CANFAIL | 973 (wired ? PMAP_WIRED : 0)); 974 975 /* 976 * NOTE: page can't be PG_WANTED or PG_RELEASED 977 * because we've held the lock the whole time 978 * we've had the handle. 979 */ 980 981 curpg->flags &= ~(PG_BUSY); 982 UVM_PAGE_OWN(curpg, NULL); 983 } 984 pmap_update(ufi.orig_map->pmap); 985 } 986 } else { 987 uobjpage = NULL; 988 } 989 990 /* locked (shadowed): maps(read), amap */ 991 /* locked (!shadowed): maps(read), amap(if there), 992 uobj(if !null), uobjpage(if !null) */ 993 994 /* 995 * note that at this point we are done with any front or back pages. 996 * we are now going to focus on the center page (i.e. the one we've 997 * faulted on). if we have faulted on the top (anon) layer 998 * [i.e. case 1], then the anon we want is anons[centeridx] (we have 999 * not touched it yet). if we have faulted on the bottom (uobj) 1000 * layer [i.e. case 2] and the page was both present and available, 1001 * then we've got a pointer to it as "uobjpage" and we've already 1002 * made it BUSY. 1003 */ 1004 1005 /* 1006 * there are four possible cases we must address: 1A, 1B, 2A, and 2B 1007 */ 1008 1009 /* 1010 * redirect case 2: if we are not shadowed, go to case 2. 1011 */ 1012 1013 if (shadowed == FALSE) 1014 goto Case2; 1015 1016 /* locked: maps(read), amap */ 1017 1018 /* 1019 * handle case 1: fault on an anon in our amap 1020 */ 1021 1022 anon = anons[centeridx]; 1023 UVMHIST_LOG(maphist, " case 1 fault: anon=0x%x", anon, 0,0,0); 1024 simple_lock(&anon->an_lock); 1025 1026 /* locked: maps(read), amap, anon */ 1027 1028 /* 1029 * no matter if we have case 1A or case 1B we are going to need to 1030 * have the anon's memory resident. ensure that now. 1031 */ 1032 1033 /* 1034 * let uvmfault_anonget do the dirty work. 1035 * if it fails (!OK) it will unlock everything for us. 1036 * if it succeeds, locks are still valid and locked. 1037 * also, if it is OK, then the anon's page is on the queues. 1038 * if the page is on loan from a uvm_object, then anonget will 1039 * lock that object for us if it does not fail. 1040 */ 1041 1042 error = uvmfault_anonget(&ufi, amap, anon); 1043 switch (error) { 1044 case 0: 1045 break; 1046 1047 case ERESTART: 1048 goto ReFault; 1049 1050 case EAGAIN: 1051 tsleep(&lbolt, PVM, "fltagain1", 0); 1052 goto ReFault; 1053 1054 default: 1055 return error; 1056 } 1057 1058 /* 1059 * uobj is non null if the page is on loan from an object (i.e. uobj) 1060 */ 1061 1062 uobj = anon->an_page->uobject; /* locked by anonget if !NULL */ 1063 1064 /* locked: maps(read), amap, anon, uobj(if one) */ 1065 1066 /* 1067 * special handling for loaned pages 1068 */ 1069 1070 if (anon->an_page->loan_count) { 1071 1072 if (!cow_now) { 1073 1074 /* 1075 * for read faults on loaned pages we just cap the 1076 * protection at read-only. 1077 */ 1078 1079 enter_prot = enter_prot & ~VM_PROT_WRITE; 1080 1081 } else { 1082 /* 1083 * note that we can't allow writes into a loaned page! 1084 * 1085 * if we have a write fault on a loaned page in an 1086 * anon then we need to look at the anon's ref count. 1087 * if it is greater than one then we are going to do 1088 * a normal copy-on-write fault into a new anon (this 1089 * is not a problem). however, if the reference count 1090 * is one (a case where we would normally allow a 1091 * write directly to the page) then we need to kill 1092 * the loan before we continue. 1093 */ 1094 1095 /* >1 case is already ok */ 1096 if (anon->an_ref == 1) { 1097 1098 /* get new un-owned replacement page */ 1099 pg = uvm_pagealloc(NULL, 0, NULL, 0); 1100 if (pg == NULL) { 1101 uvmfault_unlockall(&ufi, amap, uobj, 1102 anon); 1103 uvm_wait("flt_noram2"); 1104 goto ReFault; 1105 } 1106 1107 /* 1108 * copy data, kill loan, and drop uobj lock 1109 * (if any) 1110 */ 1111 /* copy old -> new */ 1112 uvm_pagecopy(anon->an_page, pg); 1113 1114 /* force reload */ 1115 pmap_page_protect(anon->an_page, 1116 VM_PROT_NONE); 1117 uvm_lock_pageq(); /* KILL loan */ 1118 1119 anon->an_page->uanon = NULL; 1120 /* in case we owned */ 1121 anon->an_page->pqflags &= ~PQ_ANON; 1122 1123 if (uobj) { 1124 /* if we were receiver of loan */ 1125 anon->an_page->loan_count--; 1126 } else { 1127 /* 1128 * we were the lender (A->K); need 1129 * to remove the page from pageq's. 1130 */ 1131 uvm_pagedequeue(anon->an_page); 1132 } 1133 1134 uvm_pageactivate(pg); 1135 uvm_unlock_pageq(); 1136 if (uobj) { 1137 simple_unlock(&uobj->vmobjlock); 1138 uobj = NULL; 1139 } 1140 1141 /* install new page in anon */ 1142 anon->an_page = pg; 1143 pg->uanon = anon; 1144 pg->pqflags |= PQ_ANON; 1145 pg->flags &= ~(PG_BUSY|PG_FAKE); 1146 UVM_PAGE_OWN(pg, NULL); 1147 1148 /* done! */ 1149 } /* ref == 1 */ 1150 } /* write fault */ 1151 } /* loan count */ 1152 1153 /* 1154 * if we are case 1B then we will need to allocate a new blank 1155 * anon to transfer the data into. note that we have a lock 1156 * on anon, so no one can busy or release the page until we are done. 1157 * also note that the ref count can't drop to zero here because 1158 * it is > 1 and we are only dropping one ref. 1159 * 1160 * in the (hopefully very rare) case that we are out of RAM we 1161 * will unlock, wait for more RAM, and refault. 1162 * 1163 * if we are out of anon VM we kill the process (XXX: could wait?). 1164 */ 1165 1166 if (cow_now && anon->an_ref > 1) { 1167 1168 UVMHIST_LOG(maphist, " case 1B: COW fault",0,0,0,0); 1169 uvmexp.flt_acow++; 1170 oanon = anon; /* oanon = old, locked anon */ 1171 anon = uvm_analloc(); 1172 if (anon) { 1173 /* new anon is locked! */ 1174 pg = uvm_pagealloc(NULL, 0, anon, 0); 1175 } 1176 1177 /* check for out of RAM */ 1178 if (anon == NULL || pg == NULL) { 1179 if (anon) { 1180 anon->an_ref--; 1181 simple_unlock(&anon->an_lock); 1182 uvm_anfree(anon); 1183 } 1184 uvmfault_unlockall(&ufi, amap, uobj, oanon); 1185 if (!uvm_reclaimable()) { 1186 UVMHIST_LOG(maphist, 1187 "<- failed. out of VM",0,0,0,0); 1188 uvmexp.fltnoanon++; 1189 return ENOMEM; 1190 } 1191 1192 uvmexp.fltnoram++; 1193 uvm_wait("flt_noram3"); /* out of RAM, wait for more */ 1194 goto ReFault; 1195 } 1196 1197 /* got all resources, replace anon with nanon */ 1198 uvm_pagecopy(oanon->an_page, pg); 1199 uvm_lock_pageq(); 1200 uvm_pageactivate(pg); 1201 pg->flags &= ~(PG_BUSY|PG_FAKE); 1202 uvm_unlock_pageq(); 1203 UVM_PAGE_OWN(pg, NULL); 1204 amap_add(&ufi.entry->aref, ufi.orig_rvaddr - ufi.entry->start, 1205 anon, TRUE); 1206 1207 /* deref: can not drop to zero here by defn! */ 1208 oanon->an_ref--; 1209 1210 /* 1211 * note: oanon is still locked, as is the new anon. we 1212 * need to check for this later when we unlock oanon; if 1213 * oanon != anon, we'll have to unlock anon, too. 1214 */ 1215 1216 } else { 1217 1218 uvmexp.flt_anon++; 1219 oanon = anon; /* old, locked anon is same as anon */ 1220 pg = anon->an_page; 1221 if (anon->an_ref > 1) /* disallow writes to ref > 1 anons */ 1222 enter_prot = enter_prot & ~VM_PROT_WRITE; 1223 1224 } 1225 1226 /* locked: maps(read), amap, oanon, anon (if different from oanon) */ 1227 1228 /* 1229 * now map the page in. 1230 */ 1231 1232 UVMHIST_LOG(maphist, " MAPPING: anon: pm=0x%x, va=0x%x, pg=0x%x", 1233 ufi.orig_map->pmap, ufi.orig_rvaddr, pg, 0); 1234 if (pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg), 1235 enter_prot, access_type | PMAP_CANFAIL | (wired ? PMAP_WIRED : 0)) 1236 != 0) { 1237 1238 /* 1239 * No need to undo what we did; we can simply think of 1240 * this as the pmap throwing away the mapping information. 1241 * 1242 * We do, however, have to go through the ReFault path, 1243 * as the map may change while we're asleep. 1244 */ 1245 1246 if (anon != oanon) 1247 simple_unlock(&anon->an_lock); 1248 uvmfault_unlockall(&ufi, amap, uobj, oanon); 1249 if (!uvm_reclaimable()) { 1250 UVMHIST_LOG(maphist, 1251 "<- failed. out of VM",0,0,0,0); 1252 /* XXX instrumentation */ 1253 return ENOMEM; 1254 } 1255 /* XXX instrumentation */ 1256 uvm_wait("flt_pmfail1"); 1257 goto ReFault; 1258 } 1259 1260 /* 1261 * ... update the page queues. 1262 */ 1263 1264 uvm_lock_pageq(); 1265 if (wire_fault) { 1266 uvm_pagewire(pg); 1267 1268 /* 1269 * since the now-wired page cannot be paged out, 1270 * release its swap resources for others to use. 1271 * since an anon with no swap cannot be PG_CLEAN, 1272 * clear its clean flag now. 1273 */ 1274 1275 pg->flags &= ~(PG_CLEAN); 1276 uvm_anon_dropswap(anon); 1277 } else { 1278 uvm_pageactivate(pg); 1279 } 1280 uvm_unlock_pageq(); 1281 1282 /* 1283 * done case 1! finish up by unlocking everything and returning success 1284 */ 1285 1286 if (anon != oanon) 1287 simple_unlock(&anon->an_lock); 1288 uvmfault_unlockall(&ufi, amap, uobj, oanon); 1289 pmap_update(ufi.orig_map->pmap); 1290 return 0; 1291 1292 Case2: 1293 /* 1294 * handle case 2: faulting on backing object or zero fill 1295 */ 1296 1297 /* 1298 * locked: 1299 * maps(read), amap(if there), uobj(if !null), uobjpage(if !null) 1300 */ 1301 1302 /* 1303 * note that uobjpage can not be PGO_DONTCARE at this point. we now 1304 * set uobjpage to PGO_DONTCARE if we are doing a zero fill. if we 1305 * have a backing object, check and see if we are going to promote 1306 * the data up to an anon during the fault. 1307 */ 1308 1309 if (uobj == NULL) { 1310 uobjpage = PGO_DONTCARE; 1311 promote = TRUE; /* always need anon here */ 1312 } else { 1313 KASSERT(uobjpage != PGO_DONTCARE); 1314 promote = cow_now && UVM_ET_ISCOPYONWRITE(ufi.entry); 1315 } 1316 UVMHIST_LOG(maphist, " case 2 fault: promote=%d, zfill=%d", 1317 promote, (uobj == NULL), 0,0); 1318 1319 /* 1320 * if uobjpage is not null then we do not need to do I/O to get the 1321 * uobjpage. 1322 * 1323 * if uobjpage is null, then we need to unlock and ask the pager to 1324 * get the data for us. once we have the data, we need to reverify 1325 * the state the world. we are currently not holding any resources. 1326 */ 1327 1328 if (uobjpage) { 1329 /* update rusage counters */ 1330 curproc->p_stats->p_ru.ru_minflt++; 1331 } else { 1332 /* update rusage counters */ 1333 curproc->p_stats->p_ru.ru_majflt++; 1334 1335 /* locked: maps(read), amap(if there), uobj */ 1336 uvmfault_unlockall(&ufi, amap, NULL, NULL); 1337 /* locked: uobj */ 1338 1339 uvmexp.fltget++; 1340 gotpages = 1; 1341 uoff = (ufi.orig_rvaddr - ufi.entry->start) + ufi.entry->offset; 1342 error = uobj->pgops->pgo_get(uobj, uoff, &uobjpage, &gotpages, 1343 0, access_type & MASK(ufi.entry), ufi.entry->advice, 1344 PGO_SYNCIO); 1345 /* locked: uobjpage(if no error) */ 1346 1347 /* 1348 * recover from I/O 1349 */ 1350 1351 if (error) { 1352 if (error == EAGAIN) { 1353 UVMHIST_LOG(maphist, 1354 " pgo_get says TRY AGAIN!",0,0,0,0); 1355 tsleep(&lbolt, PVM, "fltagain2", 0); 1356 goto ReFault; 1357 } 1358 1359 UVMHIST_LOG(maphist, "<- pgo_get failed (code %d)", 1360 error, 0,0,0); 1361 return error; 1362 } 1363 1364 /* locked: uobjpage */ 1365 1366 uvm_lock_pageq(); 1367 uvm_pageactivate(uobjpage); 1368 uvm_unlock_pageq(); 1369 1370 /* 1371 * re-verify the state of the world by first trying to relock 1372 * the maps. always relock the object. 1373 */ 1374 1375 locked = uvmfault_relock(&ufi); 1376 if (locked && amap) 1377 amap_lock(amap); 1378 simple_lock(&uobj->vmobjlock); 1379 1380 /* locked(locked): maps(read), amap(if !null), uobj, uobjpage */ 1381 /* locked(!locked): uobj, uobjpage */ 1382 1383 /* 1384 * verify that the page has not be released and re-verify 1385 * that amap slot is still free. if there is a problem, 1386 * we unlock and clean up. 1387 */ 1388 1389 if ((uobjpage->flags & PG_RELEASED) != 0 || 1390 (locked && amap && 1391 amap_lookup(&ufi.entry->aref, 1392 ufi.orig_rvaddr - ufi.entry->start))) { 1393 if (locked) 1394 uvmfault_unlockall(&ufi, amap, NULL, NULL); 1395 locked = FALSE; 1396 } 1397 1398 /* 1399 * didn't get the lock? release the page and retry. 1400 */ 1401 1402 if (locked == FALSE) { 1403 UVMHIST_LOG(maphist, 1404 " wasn't able to relock after fault: retry", 1405 0,0,0,0); 1406 if (uobjpage->flags & PG_WANTED) 1407 wakeup(uobjpage); 1408 if (uobjpage->flags & PG_RELEASED) { 1409 uvmexp.fltpgrele++; 1410 uvm_pagefree(uobjpage); 1411 goto ReFault; 1412 } 1413 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1414 UVM_PAGE_OWN(uobjpage, NULL); 1415 simple_unlock(&uobj->vmobjlock); 1416 goto ReFault; 1417 } 1418 1419 /* 1420 * we have the data in uobjpage which is busy and 1421 * not released. we are holding object lock (so the page 1422 * can't be released on us). 1423 */ 1424 1425 /* locked: maps(read), amap(if !null), uobj, uobjpage */ 1426 } 1427 1428 /* 1429 * locked: 1430 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj) 1431 */ 1432 1433 /* 1434 * notes: 1435 * - at this point uobjpage can not be NULL 1436 * - at this point uobjpage can not be PG_RELEASED (since we checked 1437 * for it above) 1438 * - at this point uobjpage could be PG_WANTED (handle later) 1439 */ 1440 1441 if (promote == FALSE) { 1442 1443 /* 1444 * we are not promoting. if the mapping is COW ensure that we 1445 * don't give more access than we should (e.g. when doing a read 1446 * fault on a COPYONWRITE mapping we want to map the COW page in 1447 * R/O even though the entry protection could be R/W). 1448 * 1449 * set "pg" to the page we want to map in (uobjpage, usually) 1450 */ 1451 1452 /* no anon in this case. */ 1453 anon = NULL; 1454 1455 uvmexp.flt_obj++; 1456 if (UVM_ET_ISCOPYONWRITE(ufi.entry)) 1457 enter_prot &= ~VM_PROT_WRITE; 1458 pg = uobjpage; /* map in the actual object */ 1459 1460 /* assert(uobjpage != PGO_DONTCARE) */ 1461 1462 /* 1463 * we are faulting directly on the page. be careful 1464 * about writing to loaned pages... 1465 */ 1466 1467 if (uobjpage->loan_count) { 1468 if (!cow_now) { 1469 /* read fault: cap the protection at readonly */ 1470 /* cap! */ 1471 enter_prot = enter_prot & ~VM_PROT_WRITE; 1472 } else { 1473 /* write fault: must break the loan here */ 1474 1475 pg = uvm_loanbreak(uobjpage); 1476 if (pg == NULL) { 1477 1478 /* 1479 * drop ownership of page, it can't 1480 * be released 1481 */ 1482 1483 if (uobjpage->flags & PG_WANTED) 1484 wakeup(uobjpage); 1485 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1486 UVM_PAGE_OWN(uobjpage, NULL); 1487 1488 uvmfault_unlockall(&ufi, amap, uobj, 1489 NULL); 1490 UVMHIST_LOG(maphist, 1491 " out of RAM breaking loan, waiting", 1492 0,0,0,0); 1493 uvmexp.fltnoram++; 1494 uvm_wait("flt_noram4"); 1495 goto ReFault; 1496 } 1497 uobjpage = pg; 1498 } 1499 } 1500 } else { 1501 1502 /* 1503 * if we are going to promote the data to an anon we 1504 * allocate a blank anon here and plug it into our amap. 1505 */ 1506 #if DIAGNOSTIC 1507 if (amap == NULL) 1508 panic("uvm_fault: want to promote data, but no anon"); 1509 #endif 1510 1511 anon = uvm_analloc(); 1512 if (anon) { 1513 1514 /* 1515 * The new anon is locked. 1516 * 1517 * In `Fill in data...' below, if 1518 * uobjpage == PGO_DONTCARE, we want 1519 * a zero'd, dirty page, so have 1520 * uvm_pagealloc() do that for us. 1521 */ 1522 1523 pg = uvm_pagealloc(NULL, 0, anon, 1524 (uobjpage == PGO_DONTCARE) ? UVM_PGA_ZERO : 0); 1525 } 1526 1527 /* 1528 * out of memory resources? 1529 */ 1530 1531 if (anon == NULL || pg == NULL) { 1532 if (anon != NULL) { 1533 anon->an_ref--; 1534 simple_unlock(&anon->an_lock); 1535 uvm_anfree(anon); 1536 } 1537 1538 /* 1539 * arg! must unbusy our page and fail or sleep. 1540 */ 1541 1542 if (uobjpage != PGO_DONTCARE) { 1543 if (uobjpage->flags & PG_WANTED) 1544 /* still holding object lock */ 1545 wakeup(uobjpage); 1546 1547 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1548 UVM_PAGE_OWN(uobjpage, NULL); 1549 } 1550 1551 /* unlock and fail ... */ 1552 uvmfault_unlockall(&ufi, amap, uobj, NULL); 1553 if (!uvm_reclaimable()) { 1554 UVMHIST_LOG(maphist, " promote: out of VM", 1555 0,0,0,0); 1556 uvmexp.fltnoanon++; 1557 return ENOMEM; 1558 } 1559 1560 UVMHIST_LOG(maphist, " out of RAM, waiting for more", 1561 0,0,0,0); 1562 uvmexp.fltnoram++; 1563 uvm_wait("flt_noram5"); 1564 goto ReFault; 1565 } 1566 1567 /* 1568 * fill in the data 1569 */ 1570 1571 if (uobjpage != PGO_DONTCARE) { 1572 uvmexp.flt_prcopy++; 1573 /* copy page [pg now dirty] */ 1574 uvm_pagecopy(uobjpage, pg); 1575 1576 /* 1577 * promote to shared amap? make sure all sharing 1578 * procs see it 1579 */ 1580 1581 if ((amap_flags(amap) & AMAP_SHARED) != 0) { 1582 pmap_page_protect(uobjpage, VM_PROT_NONE); 1583 /* 1584 * XXX: PAGE MIGHT BE WIRED! 1585 */ 1586 } 1587 1588 /* 1589 * dispose of uobjpage. it can't be PG_RELEASED 1590 * since we still hold the object lock. 1591 * drop handle to uobj as well. 1592 */ 1593 1594 if (uobjpage->flags & PG_WANTED) 1595 /* still have the obj lock */ 1596 wakeup(uobjpage); 1597 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 1598 UVM_PAGE_OWN(uobjpage, NULL); 1599 simple_unlock(&uobj->vmobjlock); 1600 uobj = NULL; 1601 1602 UVMHIST_LOG(maphist, 1603 " promote uobjpage 0x%x to anon/page 0x%x/0x%x", 1604 uobjpage, anon, pg, 0); 1605 1606 } else { 1607 uvmexp.flt_przero++; 1608 1609 /* 1610 * Page is zero'd and marked dirty by uvm_pagealloc() 1611 * above. 1612 */ 1613 1614 UVMHIST_LOG(maphist," zero fill anon/page 0x%x/0%x", 1615 anon, pg, 0, 0); 1616 } 1617 amap_add(&ufi.entry->aref, ufi.orig_rvaddr - ufi.entry->start, 1618 anon, FALSE); 1619 } 1620 1621 /* 1622 * locked: 1623 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj), 1624 * anon(if !null), pg(if anon) 1625 * 1626 * note: pg is either the uobjpage or the new page in the new anon 1627 */ 1628 1629 /* 1630 * all resources are present. we can now map it in and free our 1631 * resources. 1632 */ 1633 1634 UVMHIST_LOG(maphist, 1635 " MAPPING: case2: pm=0x%x, va=0x%x, pg=0x%x, promote=%d", 1636 ufi.orig_map->pmap, ufi.orig_rvaddr, pg, promote); 1637 KASSERT((access_type & VM_PROT_WRITE) == 0 || 1638 (pg->flags & PG_RDONLY) == 0); 1639 if (pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg), 1640 pg->flags & PG_RDONLY ? enter_prot & ~VM_PROT_WRITE : enter_prot, 1641 access_type | PMAP_CANFAIL | (wired ? PMAP_WIRED : 0)) != 0) { 1642 1643 /* 1644 * No need to undo what we did; we can simply think of 1645 * this as the pmap throwing away the mapping information. 1646 * 1647 * We do, however, have to go through the ReFault path, 1648 * as the map may change while we're asleep. 1649 */ 1650 1651 if (pg->flags & PG_WANTED) 1652 wakeup(pg); 1653 1654 /* 1655 * note that pg can't be PG_RELEASED since we did not drop 1656 * the object lock since the last time we checked. 1657 */ 1658 1659 pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED); 1660 UVM_PAGE_OWN(pg, NULL); 1661 uvmfault_unlockall(&ufi, amap, uobj, anon); 1662 if (!uvm_reclaimable()) { 1663 UVMHIST_LOG(maphist, 1664 "<- failed. out of VM",0,0,0,0); 1665 /* XXX instrumentation */ 1666 return ENOMEM; 1667 } 1668 /* XXX instrumentation */ 1669 uvm_wait("flt_pmfail2"); 1670 goto ReFault; 1671 } 1672 1673 uvm_lock_pageq(); 1674 if (wire_fault) { 1675 uvm_pagewire(pg); 1676 if (pg->pqflags & PQ_AOBJ) { 1677 1678 /* 1679 * since the now-wired page cannot be paged out, 1680 * release its swap resources for others to use. 1681 * since an aobj page with no swap cannot be PG_CLEAN, 1682 * clear its clean flag now. 1683 */ 1684 1685 pg->flags &= ~(PG_CLEAN); 1686 uao_dropswap(uobj, pg->offset >> PAGE_SHIFT); 1687 } 1688 } else { 1689 uvm_pageactivate(pg); 1690 } 1691 uvm_unlock_pageq(); 1692 if (pg->flags & PG_WANTED) 1693 wakeup(pg); 1694 1695 /* 1696 * note that pg can't be PG_RELEASED since we did not drop the object 1697 * lock since the last time we checked. 1698 */ 1699 1700 pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED); 1701 UVM_PAGE_OWN(pg, NULL); 1702 uvmfault_unlockall(&ufi, amap, uobj, anon); 1703 pmap_update(ufi.orig_map->pmap); 1704 UVMHIST_LOG(maphist, "<- done (SUCCESS!)",0,0,0,0); 1705 return 0; 1706 } 1707 1708 /* 1709 * uvm_fault_wire: wire down a range of virtual addresses in a map. 1710 * 1711 * => map may be read-locked by caller, but MUST NOT be write-locked. 1712 * => if map is read-locked, any operations which may cause map to 1713 * be write-locked in uvm_fault() must be taken care of by 1714 * the caller. See uvm_map_pageable(). 1715 */ 1716 1717 int 1718 uvm_fault_wire(struct vm_map *map, vaddr_t start, vaddr_t end, 1719 vm_fault_t fault_type, vm_prot_t access_type) 1720 { 1721 vaddr_t va; 1722 int error; 1723 1724 /* 1725 * now fault it in a page at a time. if the fault fails then we have 1726 * to undo what we have done. note that in uvm_fault VM_PROT_NONE 1727 * is replaced with the max protection if fault_type is VM_FAULT_WIRE. 1728 */ 1729 1730 /* 1731 * XXX work around overflowing a vaddr_t. this prevents us from 1732 * wiring the last page in the address space, though. 1733 */ 1734 if (start > end) { 1735 return EFAULT; 1736 } 1737 1738 for (va = start ; va < end ; va += PAGE_SIZE) { 1739 error = uvm_fault(map, va, fault_type, access_type); 1740 if (error) { 1741 if (va != start) { 1742 uvm_fault_unwire(map, start, va); 1743 } 1744 return error; 1745 } 1746 } 1747 return 0; 1748 } 1749 1750 /* 1751 * uvm_fault_unwire(): unwire range of virtual space. 1752 */ 1753 1754 void 1755 uvm_fault_unwire(struct vm_map *map, vaddr_t start, vaddr_t end) 1756 { 1757 vm_map_lock_read(map); 1758 uvm_fault_unwire_locked(map, start, end); 1759 vm_map_unlock_read(map); 1760 } 1761 1762 /* 1763 * uvm_fault_unwire_locked(): the guts of uvm_fault_unwire(). 1764 * 1765 * => map must be at least read-locked. 1766 */ 1767 1768 void 1769 uvm_fault_unwire_locked(struct vm_map *map, vaddr_t start, vaddr_t end) 1770 { 1771 struct vm_map_entry *entry; 1772 pmap_t pmap = vm_map_pmap(map); 1773 vaddr_t va; 1774 paddr_t pa; 1775 struct vm_page *pg; 1776 1777 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 1778 1779 /* 1780 * we assume that the area we are unwiring has actually been wired 1781 * in the first place. this means that we should be able to extract 1782 * the PAs from the pmap. we also lock out the page daemon so that 1783 * we can call uvm_pageunwire. 1784 */ 1785 1786 uvm_lock_pageq(); 1787 1788 /* 1789 * find the beginning map entry for the region. 1790 */ 1791 1792 KASSERT(start >= vm_map_min(map) && end <= vm_map_max(map)); 1793 if (uvm_map_lookup_entry(map, start, &entry) == FALSE) 1794 panic("uvm_fault_unwire_locked: address not in map"); 1795 1796 for (va = start; va < end; va += PAGE_SIZE) { 1797 if (pmap_extract(pmap, va, &pa) == FALSE) 1798 continue; 1799 1800 /* 1801 * find the map entry for the current address. 1802 */ 1803 1804 KASSERT(va >= entry->start); 1805 while (va >= entry->end) { 1806 KASSERT(entry->next != &map->header && 1807 entry->next->start <= entry->end); 1808 entry = entry->next; 1809 } 1810 1811 /* 1812 * if the entry is no longer wired, tell the pmap. 1813 */ 1814 1815 if (VM_MAPENT_ISWIRED(entry) == 0) 1816 pmap_unwire(pmap, va); 1817 1818 pg = PHYS_TO_VM_PAGE(pa); 1819 if (pg) 1820 uvm_pageunwire(pg); 1821 } 1822 1823 uvm_unlock_pageq(); 1824 } 1825