1 /* $NetBSD: uvm_fault.c,v 1.189 2011/07/05 13:47:24 yamt Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp 28 */ 29 30 /* 31 * uvm_fault.c: fault handler 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.189 2011/07/05 13:47:24 yamt Exp $"); 36 37 #include "opt_uvmhist.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/kernel.h> 42 #include <sys/mman.h> 43 44 #include <uvm/uvm.h> 45 46 /* 47 * 48 * a word on page faults: 49 * 50 * types of page faults we handle: 51 * 52 * CASE 1: upper layer faults CASE 2: lower layer faults 53 * 54 * CASE 1A CASE 1B CASE 2A CASE 2B 55 * read/write1 write>1 read/write +-cow_write/zero 56 * | | | | 57 * +--|--+ +--|--+ +-----+ + | + | +-----+ 58 * amap | V | | ---------> new | | | | ^ | 59 * +-----+ +-----+ +-----+ + | + | +--|--+ 60 * | | | 61 * +-----+ +-----+ +--|--+ | +--|--+ 62 * uobj | d/c | | d/c | | V | +----+ | 63 * +-----+ +-----+ +-----+ +-----+ 64 * 65 * d/c = don't care 66 * 67 * case [0]: layerless fault 68 * no amap or uobj is present. this is an error. 69 * 70 * case [1]: upper layer fault [anon active] 71 * 1A: [read] or [write with anon->an_ref == 1] 72 * I/O takes place in upper level anon and uobj is not touched. 73 * 1B: [write with anon->an_ref > 1] 74 * new anon is alloc'd and data is copied off ["COW"] 75 * 76 * case [2]: lower layer fault [uobj] 77 * 2A: [read on non-NULL uobj] or [write to non-copy_on_write area] 78 * I/O takes place directly in object. 79 * 2B: [write to copy_on_write] or [read on NULL uobj] 80 * data is "promoted" from uobj to a new anon. 81 * if uobj is null, then we zero fill. 82 * 83 * we follow the standard UVM locking protocol ordering: 84 * 85 * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ) 86 * we hold a PG_BUSY page if we unlock for I/O 87 * 88 * 89 * the code is structured as follows: 90 * 91 * - init the "IN" params in the ufi structure 92 * ReFault: (ERESTART returned to the loop in uvm_fault_internal) 93 * - do lookups [locks maps], check protection, handle needs_copy 94 * - check for case 0 fault (error) 95 * - establish "range" of fault 96 * - if we have an amap lock it and extract the anons 97 * - if sequential advice deactivate pages behind us 98 * - at the same time check pmap for unmapped areas and anon for pages 99 * that we could map in (and do map it if found) 100 * - check object for resident pages that we could map in 101 * - if (case 2) goto Case2 102 * - >>> handle case 1 103 * - ensure source anon is resident in RAM 104 * - if case 1B alloc new anon and copy from source 105 * - map the correct page in 106 * Case2: 107 * - >>> handle case 2 108 * - ensure source page is resident (if uobj) 109 * - if case 2B alloc new anon and copy from source (could be zero 110 * fill if uobj == NULL) 111 * - map the correct page in 112 * - done! 113 * 114 * note on paging: 115 * if we have to do I/O we place a PG_BUSY page in the correct object, 116 * unlock everything, and do the I/O. when I/O is done we must reverify 117 * the state of the world before assuming that our data structures are 118 * valid. [because mappings could change while the map is unlocked] 119 * 120 * alternative 1: unbusy the page in question and restart the page fault 121 * from the top (ReFault). this is easy but does not take advantage 122 * of the information that we already have from our previous lookup, 123 * although it is possible that the "hints" in the vm_map will help here. 124 * 125 * alternative 2: the system already keeps track of a "version" number of 126 * a map. [i.e. every time you write-lock a map (e.g. to change a 127 * mapping) you bump the version number up by one...] so, we can save 128 * the version number of the map before we release the lock and start I/O. 129 * then when I/O is done we can relock and check the version numbers 130 * to see if anything changed. this might save us some over 1 because 131 * we don't have to unbusy the page and may be less compares(?). 132 * 133 * alternative 3: put in backpointers or a way to "hold" part of a map 134 * in place while I/O is in progress. this could be complex to 135 * implement (especially with structures like amap that can be referenced 136 * by multiple map entries, and figuring out what should wait could be 137 * complex as well...). 138 * 139 * we use alternative 2. given that we are multi-threaded now we may want 140 * to reconsider the choice. 141 */ 142 143 /* 144 * local data structures 145 */ 146 147 struct uvm_advice { 148 int advice; 149 int nback; 150 int nforw; 151 }; 152 153 /* 154 * page range array: 155 * note: index in array must match "advice" value 156 * XXX: borrowed numbers from freebsd. do they work well for us? 157 */ 158 159 static const struct uvm_advice uvmadvice[] = { 160 { UVM_ADV_NORMAL, 3, 4 }, 161 { UVM_ADV_RANDOM, 0, 0 }, 162 { UVM_ADV_SEQUENTIAL, 8, 7}, 163 }; 164 165 #define UVM_MAXRANGE 16 /* must be MAX() of nback+nforw+1 */ 166 167 /* 168 * private prototypes 169 */ 170 171 /* 172 * inline functions 173 */ 174 175 /* 176 * uvmfault_anonflush: try and deactivate pages in specified anons 177 * 178 * => does not have to deactivate page if it is busy 179 */ 180 181 static inline void 182 uvmfault_anonflush(struct vm_anon **anons, int n) 183 { 184 int lcv; 185 struct vm_page *pg; 186 187 for (lcv = 0; lcv < n; lcv++) { 188 if (anons[lcv] == NULL) 189 continue; 190 KASSERT(mutex_owned(anons[lcv]->an_lock)); 191 pg = anons[lcv]->an_page; 192 if (pg && (pg->flags & PG_BUSY) == 0) { 193 mutex_enter(&uvm_pageqlock); 194 if (pg->wire_count == 0) { 195 uvm_pagedeactivate(pg); 196 } 197 mutex_exit(&uvm_pageqlock); 198 } 199 } 200 } 201 202 /* 203 * normal functions 204 */ 205 206 /* 207 * uvmfault_amapcopy: clear "needs_copy" in a map. 208 * 209 * => called with VM data structures unlocked (usually, see below) 210 * => we get a write lock on the maps and clear needs_copy for a VA 211 * => if we are out of RAM we sleep (waiting for more) 212 */ 213 214 static void 215 uvmfault_amapcopy(struct uvm_faultinfo *ufi) 216 { 217 for (;;) { 218 219 /* 220 * no mapping? give up. 221 */ 222 223 if (uvmfault_lookup(ufi, true) == false) 224 return; 225 226 /* 227 * copy if needed. 228 */ 229 230 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) 231 amap_copy(ufi->map, ufi->entry, AMAP_COPY_NOWAIT, 232 ufi->orig_rvaddr, ufi->orig_rvaddr + 1); 233 234 /* 235 * didn't work? must be out of RAM. unlock and sleep. 236 */ 237 238 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { 239 uvmfault_unlockmaps(ufi, true); 240 uvm_wait("fltamapcopy"); 241 continue; 242 } 243 244 /* 245 * got it! unlock and return. 246 */ 247 248 uvmfault_unlockmaps(ufi, true); 249 return; 250 } 251 /*NOTREACHED*/ 252 } 253 254 /* 255 * uvmfault_anonget: get data in an anon into a non-busy, non-released 256 * page in that anon. 257 * 258 * => Map, amap and thus anon should be locked by caller. 259 * => If we fail, we unlock everything and error is returned. 260 * => If we are successful, return with everything still locked. 261 * => We do not move the page on the queues [gets moved later]. If we 262 * allocate a new page [we_own], it gets put on the queues. Either way, 263 * the result is that the page is on the queues at return time 264 * => For pages which are on loan from a uvm_object (and thus are not owned 265 * by the anon): if successful, return with the owning object locked. 266 * The caller must unlock this object when it unlocks everything else. 267 */ 268 269 int 270 uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, 271 struct vm_anon *anon) 272 { 273 struct vm_page *pg; 274 int error; 275 276 UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist); 277 KASSERT(mutex_owned(anon->an_lock)); 278 KASSERT(anon->an_lock == amap->am_lock); 279 280 /* Increment the counters.*/ 281 uvmexp.fltanget++; 282 if (anon->an_page) { 283 curlwp->l_ru.ru_minflt++; 284 } else { 285 curlwp->l_ru.ru_majflt++; 286 } 287 error = 0; 288 289 /* 290 * Loop until we get the anon data, or fail. 291 */ 292 293 for (;;) { 294 bool we_own, locked; 295 /* 296 * Note: 'we_own' will become true if we set PG_BUSY on a page. 297 */ 298 we_own = false; 299 pg = anon->an_page; 300 301 /* 302 * If there is a resident page and it is loaned, then anon 303 * may not own it. Call out to uvm_anon_lockloanpg() to 304 * identify and lock the real owner of the page. 305 */ 306 307 if (pg && pg->loan_count) 308 pg = uvm_anon_lockloanpg(anon); 309 310 /* 311 * Is page resident? Make sure it is not busy/released. 312 */ 313 314 if (pg) { 315 316 /* 317 * at this point, if the page has a uobject [meaning 318 * we have it on loan], then that uobject is locked 319 * by us! if the page is busy, we drop all the 320 * locks (including uobject) and try again. 321 */ 322 323 if ((pg->flags & PG_BUSY) == 0) { 324 UVMHIST_LOG(maphist, "<- OK",0,0,0,0); 325 return 0; 326 } 327 pg->flags |= PG_WANTED; 328 uvmexp.fltpgwait++; 329 330 /* 331 * The last unlock must be an atomic unlock and wait 332 * on the owner of page. 333 */ 334 335 if (pg->uobject) { 336 /* Owner of page is UVM object. */ 337 uvmfault_unlockall(ufi, amap, NULL); 338 UVMHIST_LOG(maphist, " unlock+wait on uobj",0, 339 0,0,0); 340 UVM_UNLOCK_AND_WAIT(pg, 341 pg->uobject->vmobjlock, 342 false, "anonget1", 0); 343 } else { 344 /* Owner of page is anon. */ 345 uvmfault_unlockall(ufi, NULL, NULL); 346 UVMHIST_LOG(maphist, " unlock+wait on anon",0, 347 0,0,0); 348 UVM_UNLOCK_AND_WAIT(pg, anon->an_lock, 349 false, "anonget2", 0); 350 } 351 } else { 352 #if defined(VMSWAP) 353 /* 354 * No page, therefore allocate one. 355 */ 356 357 pg = uvm_pagealloc(NULL, 358 ufi != NULL ? ufi->orig_rvaddr : 0, 359 anon, ufi != NULL ? UVM_FLAG_COLORMATCH : 0); 360 if (pg == NULL) { 361 /* Out of memory. Wait a little. */ 362 uvmfault_unlockall(ufi, amap, NULL); 363 uvmexp.fltnoram++; 364 UVMHIST_LOG(maphist, " noram -- UVM_WAIT",0, 365 0,0,0); 366 if (!uvm_reclaimable()) { 367 return ENOMEM; 368 } 369 uvm_wait("flt_noram1"); 370 } else { 371 /* PG_BUSY bit is set. */ 372 we_own = true; 373 uvmfault_unlockall(ufi, amap, NULL); 374 375 /* 376 * Pass a PG_BUSY+PG_FAKE+PG_CLEAN page into 377 * the uvm_swap_get() function with all data 378 * structures unlocked. Note that it is OK 379 * to read an_swslot here, because we hold 380 * PG_BUSY on the page. 381 */ 382 uvmexp.pageins++; 383 error = uvm_swap_get(pg, anon->an_swslot, 384 PGO_SYNCIO); 385 386 /* 387 * We clean up after the I/O below in the 388 * 'we_own' case. 389 */ 390 } 391 #else 392 panic("%s: no page", __func__); 393 #endif /* defined(VMSWAP) */ 394 } 395 396 /* 397 * Re-lock the map and anon. 398 */ 399 400 locked = uvmfault_relock(ufi); 401 if (locked || we_own) { 402 mutex_enter(anon->an_lock); 403 } 404 405 /* 406 * If we own the page (i.e. we set PG_BUSY), then we need 407 * to clean up after the I/O. There are three cases to 408 * consider: 409 * 410 * 1) Page was released during I/O: free anon and ReFault. 411 * 2) I/O not OK. Free the page and cause the fault to fail. 412 * 3) I/O OK! Activate the page and sync with the non-we_own 413 * case (i.e. drop anon lock if not locked). 414 */ 415 416 if (we_own) { 417 #if defined(VMSWAP) 418 if (pg->flags & PG_WANTED) { 419 wakeup(pg); 420 } 421 if (error) { 422 423 /* 424 * Remove the swap slot from the anon and 425 * mark the anon as having no real slot. 426 * Do not free the swap slot, thus preventing 427 * it from being used again. 428 */ 429 430 if (anon->an_swslot > 0) { 431 uvm_swap_markbad(anon->an_swslot, 1); 432 } 433 anon->an_swslot = SWSLOT_BAD; 434 435 if ((pg->flags & PG_RELEASED) != 0) { 436 goto released; 437 } 438 439 /* 440 * Note: page was never !PG_BUSY, so it 441 * cannot be mapped and thus no need to 442 * pmap_page_protect() it. 443 */ 444 445 mutex_enter(&uvm_pageqlock); 446 uvm_pagefree(pg); 447 mutex_exit(&uvm_pageqlock); 448 449 if (locked) { 450 uvmfault_unlockall(ufi, NULL, NULL); 451 } 452 mutex_exit(anon->an_lock); 453 UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0); 454 return error; 455 } 456 457 if ((pg->flags & PG_RELEASED) != 0) { 458 released: 459 KASSERT(anon->an_ref == 0); 460 461 /* 462 * Released while we had unlocked amap. 463 */ 464 465 if (locked) { 466 uvmfault_unlockall(ufi, NULL, NULL); 467 } 468 uvm_anon_release(anon); 469 470 if (error) { 471 UVMHIST_LOG(maphist, 472 "<- ERROR/RELEASED", 0,0,0,0); 473 return error; 474 } 475 476 UVMHIST_LOG(maphist, "<- RELEASED", 0,0,0,0); 477 return ERESTART; 478 } 479 480 /* 481 * We have successfully read the page, activate it. 482 */ 483 484 mutex_enter(&uvm_pageqlock); 485 uvm_pageactivate(pg); 486 mutex_exit(&uvm_pageqlock); 487 pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); 488 UVM_PAGE_OWN(pg, NULL); 489 #else 490 panic("%s: we_own", __func__); 491 #endif /* defined(VMSWAP) */ 492 } 493 494 /* 495 * We were not able to re-lock the map - restart the fault. 496 */ 497 498 if (!locked) { 499 if (we_own) { 500 mutex_exit(anon->an_lock); 501 } 502 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 503 return ERESTART; 504 } 505 506 /* 507 * Verify that no one has touched the amap and moved 508 * the anon on us. 509 */ 510 511 if (ufi != NULL && amap_lookup(&ufi->entry->aref, 512 ufi->orig_rvaddr - ufi->entry->start) != anon) { 513 514 uvmfault_unlockall(ufi, amap, NULL); 515 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 516 return ERESTART; 517 } 518 519 /* 520 * Retry.. 521 */ 522 523 uvmexp.fltanretry++; 524 continue; 525 } 526 /*NOTREACHED*/ 527 } 528 529 /* 530 * uvmfault_promote: promote data to a new anon. used for 1B and 2B. 531 * 532 * 1. allocate an anon and a page. 533 * 2. fill its contents. 534 * 3. put it into amap. 535 * 536 * => if we fail (result != 0) we unlock everything. 537 * => on success, return a new locked anon via 'nanon'. 538 * (*nanon)->an_page will be a resident, locked, dirty page. 539 * => it's caller's responsibility to put the promoted nanon->an_page to the 540 * page queue. 541 */ 542 543 static int 544 uvmfault_promote(struct uvm_faultinfo *ufi, 545 struct vm_anon *oanon, 546 struct vm_page *uobjpage, 547 struct vm_anon **nanon, /* OUT: allocated anon */ 548 struct vm_anon **spare) 549 { 550 struct vm_amap *amap = ufi->entry->aref.ar_amap; 551 struct uvm_object *uobj; 552 struct vm_anon *anon; 553 struct vm_page *pg; 554 struct vm_page *opg; 555 int error; 556 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 557 558 if (oanon) { 559 /* anon COW */ 560 opg = oanon->an_page; 561 KASSERT(opg != NULL); 562 KASSERT(opg->uobject == NULL || opg->loan_count > 0); 563 } else if (uobjpage != PGO_DONTCARE) { 564 /* object-backed COW */ 565 opg = uobjpage; 566 } else { 567 /* ZFOD */ 568 opg = NULL; 569 } 570 if (opg != NULL) { 571 uobj = opg->uobject; 572 } else { 573 uobj = NULL; 574 } 575 576 KASSERT(amap != NULL); 577 KASSERT(uobjpage != NULL); 578 KASSERT(uobjpage == PGO_DONTCARE || (uobjpage->flags & PG_BUSY) != 0); 579 KASSERT(mutex_owned(amap->am_lock)); 580 KASSERT(oanon == NULL || amap->am_lock == oanon->an_lock); 581 KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock)); 582 583 if (*spare != NULL) { 584 anon = *spare; 585 *spare = NULL; 586 } else if (ufi->map != kernel_map) { 587 anon = uvm_analloc(); 588 } else { 589 UVMHIST_LOG(maphist, "kernel_map, unlock and retry", 0,0,0,0); 590 591 /* 592 * we can't allocate anons with kernel_map locked. 593 */ 594 595 uvm_page_unbusy(&uobjpage, 1); 596 uvmfault_unlockall(ufi, amap, uobj); 597 598 *spare = uvm_analloc(); 599 if (*spare == NULL) { 600 goto nomem; 601 } 602 KASSERT((*spare)->an_lock == NULL); 603 error = ERESTART; 604 goto done; 605 } 606 if (anon) { 607 608 /* 609 * The new anon is locked. 610 * 611 * if opg == NULL, we want a zero'd, dirty page, 612 * so have uvm_pagealloc() do that for us. 613 */ 614 615 KASSERT(anon->an_lock == NULL); 616 anon->an_lock = amap->am_lock; 617 pg = uvm_pagealloc(NULL, ufi->orig_rvaddr, anon, 618 UVM_FLAG_COLORMATCH | (opg == NULL ? UVM_PGA_ZERO : 0)); 619 if (pg == NULL) { 620 anon->an_lock = NULL; 621 } 622 } else { 623 pg = NULL; 624 } 625 626 /* 627 * out of memory resources? 628 */ 629 630 if (pg == NULL) { 631 /* save anon for the next try. */ 632 if (anon != NULL) { 633 *spare = anon; 634 } 635 636 /* unlock and fail ... */ 637 uvm_page_unbusy(&uobjpage, 1); 638 uvmfault_unlockall(ufi, amap, uobj); 639 nomem: 640 if (!uvm_reclaimable()) { 641 UVMHIST_LOG(maphist, "out of VM", 0,0,0,0); 642 uvmexp.fltnoanon++; 643 error = ENOMEM; 644 goto done; 645 } 646 647 UVMHIST_LOG(maphist, "out of RAM, waiting for more", 0,0,0,0); 648 uvmexp.fltnoram++; 649 uvm_wait("flt_noram5"); 650 error = ERESTART; 651 goto done; 652 } 653 654 /* copy page [pg now dirty] */ 655 if (opg) { 656 uvm_pagecopy(opg, pg); 657 } 658 659 amap_add(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start, anon, 660 oanon != NULL); 661 662 *nanon = anon; 663 error = 0; 664 done: 665 return error; 666 } 667 668 669 /* 670 * F A U L T - m a i n e n t r y p o i n t 671 */ 672 673 /* 674 * uvm_fault: page fault handler 675 * 676 * => called from MD code to resolve a page fault 677 * => VM data structures usually should be unlocked. however, it is 678 * possible to call here with the main map locked if the caller 679 * gets a write lock, sets it recusive, and then calls us (c.f. 680 * uvm_map_pageable). this should be avoided because it keeps 681 * the map locked off during I/O. 682 * => MUST NEVER BE CALLED IN INTERRUPT CONTEXT 683 */ 684 685 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 686 ~VM_PROT_WRITE : VM_PROT_ALL) 687 688 /* fault_flag values passed from uvm_fault_wire to uvm_fault_internal */ 689 #define UVM_FAULT_WIRE (1 << 0) 690 #define UVM_FAULT_MAXPROT (1 << 1) 691 692 struct uvm_faultctx { 693 vm_prot_t access_type; 694 vm_prot_t enter_prot; 695 vaddr_t startva; 696 int npages; 697 int centeridx; 698 struct vm_anon *anon_spare; 699 bool wire_mapping; 700 bool narrow; 701 bool wire_paging; 702 bool cow_now; 703 bool promote; 704 }; 705 706 static inline int uvm_fault_check( 707 struct uvm_faultinfo *, struct uvm_faultctx *, 708 struct vm_anon ***, bool); 709 710 static int uvm_fault_upper( 711 struct uvm_faultinfo *, struct uvm_faultctx *, 712 struct vm_anon **); 713 static inline int uvm_fault_upper_lookup( 714 struct uvm_faultinfo *, const struct uvm_faultctx *, 715 struct vm_anon **, struct vm_page **); 716 static inline void uvm_fault_upper_neighbor( 717 struct uvm_faultinfo *, const struct uvm_faultctx *, 718 vaddr_t, struct vm_page *, bool); 719 static inline int uvm_fault_upper_loan( 720 struct uvm_faultinfo *, struct uvm_faultctx *, 721 struct vm_anon *, struct uvm_object **); 722 static inline int uvm_fault_upper_promote( 723 struct uvm_faultinfo *, struct uvm_faultctx *, 724 struct uvm_object *, struct vm_anon *); 725 static inline int uvm_fault_upper_direct( 726 struct uvm_faultinfo *, struct uvm_faultctx *, 727 struct uvm_object *, struct vm_anon *); 728 static int uvm_fault_upper_enter( 729 struct uvm_faultinfo *, const struct uvm_faultctx *, 730 struct uvm_object *, struct vm_anon *, 731 struct vm_page *, struct vm_anon *); 732 static inline void uvm_fault_upper_done( 733 struct uvm_faultinfo *, const struct uvm_faultctx *, 734 struct vm_anon *, struct vm_page *); 735 736 static int uvm_fault_lower( 737 struct uvm_faultinfo *, struct uvm_faultctx *, 738 struct vm_page **); 739 static inline void uvm_fault_lower_lookup( 740 struct uvm_faultinfo *, const struct uvm_faultctx *, 741 struct vm_page **); 742 static inline void uvm_fault_lower_neighbor( 743 struct uvm_faultinfo *, const struct uvm_faultctx *, 744 vaddr_t, struct vm_page *, bool); 745 static inline int uvm_fault_lower_io( 746 struct uvm_faultinfo *, const struct uvm_faultctx *, 747 struct uvm_object **, struct vm_page **); 748 static inline int uvm_fault_lower_direct( 749 struct uvm_faultinfo *, struct uvm_faultctx *, 750 struct uvm_object *, struct vm_page *); 751 static inline int uvm_fault_lower_direct_loan( 752 struct uvm_faultinfo *, struct uvm_faultctx *, 753 struct uvm_object *, struct vm_page **, 754 struct vm_page **); 755 static inline int uvm_fault_lower_promote( 756 struct uvm_faultinfo *, struct uvm_faultctx *, 757 struct uvm_object *, struct vm_page *); 758 static int uvm_fault_lower_enter( 759 struct uvm_faultinfo *, const struct uvm_faultctx *, 760 struct uvm_object *, 761 struct vm_anon *, struct vm_page *); 762 static inline void uvm_fault_lower_done( 763 struct uvm_faultinfo *, const struct uvm_faultctx *, 764 struct uvm_object *, struct vm_page *); 765 766 int 767 uvm_fault_internal(struct vm_map *orig_map, vaddr_t vaddr, 768 vm_prot_t access_type, int fault_flag) 769 { 770 struct uvm_faultinfo ufi; 771 struct uvm_faultctx flt = { 772 .access_type = access_type, 773 774 /* don't look for neighborhood * pages on "wire" fault */ 775 .narrow = (fault_flag & UVM_FAULT_WIRE) != 0, 776 777 /* "wire" fault causes wiring of both mapping and paging */ 778 .wire_mapping = (fault_flag & UVM_FAULT_WIRE) != 0, 779 .wire_paging = (fault_flag & UVM_FAULT_WIRE) != 0, 780 }; 781 const bool maxprot = (fault_flag & UVM_FAULT_MAXPROT) != 0; 782 struct vm_anon *anons_store[UVM_MAXRANGE], **anons; 783 struct vm_page *pages_store[UVM_MAXRANGE], **pages; 784 int error; 785 UVMHIST_FUNC("uvm_fault"); UVMHIST_CALLED(maphist); 786 787 UVMHIST_LOG(maphist, "(map=0x%x, vaddr=0x%x, at=%d, ff=%d)", 788 orig_map, vaddr, access_type, fault_flag); 789 790 curcpu()->ci_data.cpu_nfault++; 791 792 /* 793 * init the IN parameters in the ufi 794 */ 795 796 ufi.orig_map = orig_map; 797 ufi.orig_rvaddr = trunc_page(vaddr); 798 ufi.orig_size = PAGE_SIZE; /* can't get any smaller than this */ 799 800 error = ERESTART; 801 while (error == ERESTART) { /* ReFault: */ 802 anons = anons_store; 803 pages = pages_store; 804 805 error = uvm_fault_check(&ufi, &flt, &anons, maxprot); 806 if (error != 0) 807 continue; 808 809 error = uvm_fault_upper_lookup(&ufi, &flt, anons, pages); 810 if (error != 0) 811 continue; 812 813 if (pages[flt.centeridx] == PGO_DONTCARE) 814 error = uvm_fault_upper(&ufi, &flt, anons); 815 else { 816 struct uvm_object * const uobj = 817 ufi.entry->object.uvm_obj; 818 819 if (uobj && uobj->pgops->pgo_fault != NULL) { 820 /* 821 * invoke "special" fault routine. 822 */ 823 mutex_enter(uobj->vmobjlock); 824 /* locked: maps(read), amap(if there), uobj */ 825 error = uobj->pgops->pgo_fault(&ufi, 826 flt.startva, pages, flt.npages, 827 flt.centeridx, flt.access_type, 828 PGO_LOCKED|PGO_SYNCIO); 829 830 /* 831 * locked: nothing, pgo_fault has unlocked 832 * everything 833 */ 834 835 /* 836 * object fault routine responsible for 837 * pmap_update(). 838 */ 839 } else { 840 error = uvm_fault_lower(&ufi, &flt, pages); 841 } 842 } 843 } 844 845 if (flt.anon_spare != NULL) { 846 flt.anon_spare->an_ref--; 847 KASSERT(flt.anon_spare->an_ref == 0); 848 KASSERT(flt.anon_spare->an_lock == NULL); 849 uvm_anfree(flt.anon_spare); 850 } 851 return error; 852 } 853 854 /* 855 * uvm_fault_check: check prot, handle needs-copy, etc. 856 * 857 * 1. lookup entry. 858 * 2. check protection. 859 * 3. adjust fault condition (mainly for simulated fault). 860 * 4. handle needs-copy (lazy amap copy). 861 * 5. establish range of interest for neighbor fault (aka pre-fault). 862 * 6. look up anons (if amap exists). 863 * 7. flush pages (if MADV_SEQUENTIAL) 864 * 865 * => called with nothing locked. 866 * => if we fail (result != 0) we unlock everything. 867 * => initialize/adjust many members of flt. 868 */ 869 870 static int 871 uvm_fault_check( 872 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 873 struct vm_anon ***ranons, bool maxprot) 874 { 875 struct vm_amap *amap; 876 struct uvm_object *uobj; 877 vm_prot_t check_prot; 878 int nback, nforw; 879 UVMHIST_FUNC("uvm_fault_check"); UVMHIST_CALLED(maphist); 880 881 /* 882 * lookup and lock the maps 883 */ 884 885 if (uvmfault_lookup(ufi, false) == false) { 886 UVMHIST_LOG(maphist, "<- no mapping @ 0x%x", ufi->orig_rvaddr, 887 0,0,0); 888 return EFAULT; 889 } 890 /* locked: maps(read) */ 891 892 #ifdef DIAGNOSTIC 893 if ((ufi->map->flags & VM_MAP_PAGEABLE) == 0) { 894 printf("Page fault on non-pageable map:\n"); 895 printf("ufi->map = %p\n", ufi->map); 896 printf("ufi->orig_map = %p\n", ufi->orig_map); 897 printf("ufi->orig_rvaddr = 0x%lx\n", (u_long) ufi->orig_rvaddr); 898 panic("uvm_fault: (ufi->map->flags & VM_MAP_PAGEABLE) == 0"); 899 } 900 #endif 901 902 /* 903 * check protection 904 */ 905 906 check_prot = maxprot ? 907 ufi->entry->max_protection : ufi->entry->protection; 908 if ((check_prot & flt->access_type) != flt->access_type) { 909 UVMHIST_LOG(maphist, 910 "<- protection failure (prot=0x%x, access=0x%x)", 911 ufi->entry->protection, flt->access_type, 0, 0); 912 uvmfault_unlockmaps(ufi, false); 913 return EACCES; 914 } 915 916 /* 917 * "enter_prot" is the protection we want to enter the page in at. 918 * for certain pages (e.g. copy-on-write pages) this protection can 919 * be more strict than ufi->entry->protection. "wired" means either 920 * the entry is wired or we are fault-wiring the pg. 921 */ 922 923 flt->enter_prot = ufi->entry->protection; 924 if (VM_MAPENT_ISWIRED(ufi->entry)) 925 flt->wire_mapping = true; 926 927 if (flt->wire_mapping) { 928 flt->access_type = flt->enter_prot; /* full access for wired */ 929 flt->cow_now = (check_prot & VM_PROT_WRITE) != 0; 930 } else { 931 flt->cow_now = (flt->access_type & VM_PROT_WRITE) != 0; 932 } 933 934 flt->promote = false; 935 936 /* 937 * handle "needs_copy" case. if we need to copy the amap we will 938 * have to drop our readlock and relock it with a write lock. (we 939 * need a write lock to change anything in a map entry [e.g. 940 * needs_copy]). 941 */ 942 943 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { 944 if (flt->cow_now || (ufi->entry->object.uvm_obj == NULL)) { 945 KASSERT(!maxprot); 946 /* need to clear */ 947 UVMHIST_LOG(maphist, 948 " need to clear needs_copy and refault",0,0,0,0); 949 uvmfault_unlockmaps(ufi, false); 950 uvmfault_amapcopy(ufi); 951 uvmexp.fltamcopy++; 952 return ERESTART; 953 954 } else { 955 956 /* 957 * ensure that we pmap_enter page R/O since 958 * needs_copy is still true 959 */ 960 961 flt->enter_prot &= ~VM_PROT_WRITE; 962 } 963 } 964 965 /* 966 * identify the players 967 */ 968 969 amap = ufi->entry->aref.ar_amap; /* upper layer */ 970 uobj = ufi->entry->object.uvm_obj; /* lower layer */ 971 972 /* 973 * check for a case 0 fault. if nothing backing the entry then 974 * error now. 975 */ 976 977 if (amap == NULL && uobj == NULL) { 978 uvmfault_unlockmaps(ufi, false); 979 UVMHIST_LOG(maphist,"<- no backing store, no overlay",0,0,0,0); 980 return EFAULT; 981 } 982 983 /* 984 * establish range of interest based on advice from mapper 985 * and then clip to fit map entry. note that we only want 986 * to do this the first time through the fault. if we 987 * ReFault we will disable this by setting "narrow" to true. 988 */ 989 990 if (flt->narrow == false) { 991 992 /* wide fault (!narrow) */ 993 KASSERT(uvmadvice[ufi->entry->advice].advice == 994 ufi->entry->advice); 995 nback = MIN(uvmadvice[ufi->entry->advice].nback, 996 (ufi->orig_rvaddr - ufi->entry->start) >> PAGE_SHIFT); 997 flt->startva = ufi->orig_rvaddr - (nback << PAGE_SHIFT); 998 /* 999 * note: "-1" because we don't want to count the 1000 * faulting page as forw 1001 */ 1002 nforw = MIN(uvmadvice[ufi->entry->advice].nforw, 1003 ((ufi->entry->end - ufi->orig_rvaddr) >> 1004 PAGE_SHIFT) - 1); 1005 flt->npages = nback + nforw + 1; 1006 flt->centeridx = nback; 1007 1008 flt->narrow = true; /* ensure only once per-fault */ 1009 1010 } else { 1011 1012 /* narrow fault! */ 1013 nback = nforw = 0; 1014 flt->startva = ufi->orig_rvaddr; 1015 flt->npages = 1; 1016 flt->centeridx = 0; 1017 1018 } 1019 /* offset from entry's start to pgs' start */ 1020 const voff_t eoff = flt->startva - ufi->entry->start; 1021 1022 /* locked: maps(read) */ 1023 UVMHIST_LOG(maphist, " narrow=%d, back=%d, forw=%d, startva=0x%x", 1024 flt->narrow, nback, nforw, flt->startva); 1025 UVMHIST_LOG(maphist, " entry=0x%x, amap=0x%x, obj=0x%x", ufi->entry, 1026 amap, uobj, 0); 1027 1028 /* 1029 * if we've got an amap, lock it and extract current anons. 1030 */ 1031 1032 if (amap) { 1033 amap_lock(amap); 1034 amap_lookups(&ufi->entry->aref, eoff, *ranons, flt->npages); 1035 } else { 1036 *ranons = NULL; /* to be safe */ 1037 } 1038 1039 /* locked: maps(read), amap(if there) */ 1040 KASSERT(amap == NULL || mutex_owned(amap->am_lock)); 1041 1042 /* 1043 * for MADV_SEQUENTIAL mappings we want to deactivate the back pages 1044 * now and then forget about them (for the rest of the fault). 1045 */ 1046 1047 if (ufi->entry->advice == MADV_SEQUENTIAL && nback != 0) { 1048 1049 UVMHIST_LOG(maphist, " MADV_SEQUENTIAL: flushing backpages", 1050 0,0,0,0); 1051 /* flush back-page anons? */ 1052 if (amap) 1053 uvmfault_anonflush(*ranons, nback); 1054 1055 /* flush object? */ 1056 if (uobj) { 1057 voff_t uoff; 1058 1059 uoff = ufi->entry->offset + eoff; 1060 mutex_enter(uobj->vmobjlock); 1061 (void) (uobj->pgops->pgo_put)(uobj, uoff, uoff + 1062 (nback << PAGE_SHIFT), PGO_DEACTIVATE); 1063 } 1064 1065 /* now forget about the backpages */ 1066 if (amap) 1067 *ranons += nback; 1068 flt->startva += (nback << PAGE_SHIFT); 1069 flt->npages -= nback; 1070 flt->centeridx = 0; 1071 } 1072 /* 1073 * => startva is fixed 1074 * => npages is fixed 1075 */ 1076 KASSERT(flt->startva <= ufi->orig_rvaddr); 1077 KASSERT(ufi->orig_rvaddr + ufi->orig_size <= 1078 flt->startva + (flt->npages << PAGE_SHIFT)); 1079 return 0; 1080 } 1081 1082 /* 1083 * uvm_fault_upper_lookup: look up existing h/w mapping and amap. 1084 * 1085 * iterate range of interest: 1086 * 1. check if h/w mapping exists. if yes, we don't care 1087 * 2. check if anon exists. if not, page is lower. 1088 * 3. if anon exists, enter h/w mapping for neighbors. 1089 * 1090 * => called with amap locked (if exists). 1091 */ 1092 1093 static int 1094 uvm_fault_upper_lookup( 1095 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1096 struct vm_anon **anons, struct vm_page **pages) 1097 { 1098 struct vm_amap *amap = ufi->entry->aref.ar_amap; 1099 int lcv; 1100 vaddr_t currva; 1101 bool shadowed; 1102 UVMHIST_FUNC("uvm_fault_upper_lookup"); UVMHIST_CALLED(maphist); 1103 1104 /* locked: maps(read), amap(if there) */ 1105 KASSERT(amap == NULL || mutex_owned(amap->am_lock)); 1106 1107 /* 1108 * map in the backpages and frontpages we found in the amap in hopes 1109 * of preventing future faults. we also init the pages[] array as 1110 * we go. 1111 */ 1112 1113 currva = flt->startva; 1114 shadowed = false; 1115 for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) { 1116 /* 1117 * don't play with VAs that are already mapped 1118 * (except for center) 1119 */ 1120 if (lcv != flt->centeridx && 1121 pmap_extract(ufi->orig_map->pmap, currva, NULL)) { 1122 pages[lcv] = PGO_DONTCARE; 1123 continue; 1124 } 1125 1126 /* 1127 * unmapped or center page. check if any anon at this level. 1128 */ 1129 if (amap == NULL || anons[lcv] == NULL) { 1130 pages[lcv] = NULL; 1131 continue; 1132 } 1133 1134 /* 1135 * check for present page and map if possible. re-activate it. 1136 */ 1137 1138 pages[lcv] = PGO_DONTCARE; 1139 if (lcv == flt->centeridx) { /* save center for later! */ 1140 shadowed = true; 1141 continue; 1142 } 1143 1144 struct vm_anon *anon = anons[lcv]; 1145 struct vm_page *pg = anon->an_page; 1146 1147 KASSERT(anon->an_lock == amap->am_lock); 1148 1149 /* Ignore loaned and busy pages. */ 1150 if (pg && pg->loan_count == 0 && (pg->flags & PG_BUSY) == 0) { 1151 uvm_fault_upper_neighbor(ufi, flt, currva, 1152 pg, anon->an_ref > 1); 1153 } 1154 } 1155 1156 /* locked: maps(read), amap(if there) */ 1157 KASSERT(amap == NULL || mutex_owned(amap->am_lock)); 1158 /* (shadowed == true) if there is an anon at the faulting address */ 1159 UVMHIST_LOG(maphist, " shadowed=%d, will_get=%d", shadowed, 1160 (ufi->entry->object.uvm_obj && shadowed != false),0,0); 1161 1162 /* 1163 * note that if we are really short of RAM we could sleep in the above 1164 * call to pmap_enter with everything locked. bad? 1165 * 1166 * XXX Actually, that is bad; pmap_enter() should just fail in that 1167 * XXX case. --thorpej 1168 */ 1169 1170 return 0; 1171 } 1172 1173 /* 1174 * uvm_fault_upper_neighbor: enter single lower neighbor page. 1175 * 1176 * => called with amap and anon locked. 1177 */ 1178 1179 static void 1180 uvm_fault_upper_neighbor( 1181 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1182 vaddr_t currva, struct vm_page *pg, bool readonly) 1183 { 1184 UVMHIST_FUNC("uvm_fault_upper_neighbor"); UVMHIST_CALLED(maphist); 1185 1186 /* locked: amap, anon */ 1187 1188 mutex_enter(&uvm_pageqlock); 1189 uvm_pageenqueue(pg); 1190 mutex_exit(&uvm_pageqlock); 1191 UVMHIST_LOG(maphist, 1192 " MAPPING: n anon: pm=0x%x, va=0x%x, pg=0x%x", 1193 ufi->orig_map->pmap, currva, pg, 0); 1194 uvmexp.fltnamap++; 1195 1196 /* 1197 * Since this page isn't the page that's actually faulting, 1198 * ignore pmap_enter() failures; it's not critical that we 1199 * enter these right now. 1200 */ 1201 1202 (void) pmap_enter(ufi->orig_map->pmap, currva, 1203 VM_PAGE_TO_PHYS(pg), 1204 readonly ? (flt->enter_prot & ~VM_PROT_WRITE) : 1205 flt->enter_prot, 1206 PMAP_CANFAIL | (flt->wire_mapping ? PMAP_WIRED : 0)); 1207 1208 pmap_update(ufi->orig_map->pmap); 1209 } 1210 1211 /* 1212 * uvm_fault_upper: handle upper fault. 1213 * 1214 * 1. acquire anon lock. 1215 * 2. get anon. let uvmfault_anonget do the dirty work. 1216 * 3. handle loan. 1217 * 4. dispatch direct or promote handlers. 1218 */ 1219 1220 static int 1221 uvm_fault_upper( 1222 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1223 struct vm_anon **anons) 1224 { 1225 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 1226 struct vm_anon * const anon = anons[flt->centeridx]; 1227 struct uvm_object *uobj; 1228 int error; 1229 UVMHIST_FUNC("uvm_fault_upper"); UVMHIST_CALLED(maphist); 1230 1231 /* locked: maps(read), amap, anon */ 1232 KASSERT(mutex_owned(amap->am_lock)); 1233 KASSERT(anon->an_lock == amap->am_lock); 1234 1235 /* 1236 * handle case 1: fault on an anon in our amap 1237 */ 1238 1239 UVMHIST_LOG(maphist, " case 1 fault: anon=0x%x", anon, 0,0,0); 1240 1241 /* 1242 * no matter if we have case 1A or case 1B we are going to need to 1243 * have the anon's memory resident. ensure that now. 1244 */ 1245 1246 /* 1247 * let uvmfault_anonget do the dirty work. 1248 * if it fails (!OK) it will unlock everything for us. 1249 * if it succeeds, locks are still valid and locked. 1250 * also, if it is OK, then the anon's page is on the queues. 1251 * if the page is on loan from a uvm_object, then anonget will 1252 * lock that object for us if it does not fail. 1253 */ 1254 1255 error = uvmfault_anonget(ufi, amap, anon); 1256 switch (error) { 1257 case 0: 1258 break; 1259 1260 case ERESTART: 1261 return ERESTART; 1262 1263 case EAGAIN: 1264 kpause("fltagain1", false, hz/2, NULL); 1265 return ERESTART; 1266 1267 default: 1268 return error; 1269 } 1270 1271 /* 1272 * uobj is non null if the page is on loan from an object (i.e. uobj) 1273 */ 1274 1275 uobj = anon->an_page->uobject; /* locked by anonget if !NULL */ 1276 1277 /* locked: maps(read), amap, anon, uobj(if one) */ 1278 KASSERT(mutex_owned(amap->am_lock)); 1279 KASSERT(anon->an_lock == amap->am_lock); 1280 KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock)); 1281 1282 /* 1283 * special handling for loaned pages 1284 */ 1285 1286 if (anon->an_page->loan_count) { 1287 error = uvm_fault_upper_loan(ufi, flt, anon, &uobj); 1288 if (error != 0) 1289 return error; 1290 } 1291 1292 /* 1293 * if we are case 1B then we will need to allocate a new blank 1294 * anon to transfer the data into. note that we have a lock 1295 * on anon, so no one can busy or release the page until we are done. 1296 * also note that the ref count can't drop to zero here because 1297 * it is > 1 and we are only dropping one ref. 1298 * 1299 * in the (hopefully very rare) case that we are out of RAM we 1300 * will unlock, wait for more RAM, and refault. 1301 * 1302 * if we are out of anon VM we kill the process (XXX: could wait?). 1303 */ 1304 1305 if (flt->cow_now && anon->an_ref > 1) { 1306 flt->promote = true; 1307 error = uvm_fault_upper_promote(ufi, flt, uobj, anon); 1308 } else { 1309 error = uvm_fault_upper_direct(ufi, flt, uobj, anon); 1310 } 1311 return error; 1312 } 1313 1314 /* 1315 * uvm_fault_upper_loan: handle loaned upper page. 1316 * 1317 * 1. if not cow'ing now, simply adjust flt->enter_prot. 1318 * 2. if cow'ing now, and if ref count is 1, break loan. 1319 */ 1320 1321 static int 1322 uvm_fault_upper_loan( 1323 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1324 struct vm_anon *anon, struct uvm_object **ruobj) 1325 { 1326 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 1327 int error = 0; 1328 UVMHIST_FUNC("uvm_fault_upper_loan"); UVMHIST_CALLED(maphist); 1329 1330 if (!flt->cow_now) { 1331 1332 /* 1333 * for read faults on loaned pages we just cap the 1334 * protection at read-only. 1335 */ 1336 1337 flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE; 1338 1339 } else { 1340 /* 1341 * note that we can't allow writes into a loaned page! 1342 * 1343 * if we have a write fault on a loaned page in an 1344 * anon then we need to look at the anon's ref count. 1345 * if it is greater than one then we are going to do 1346 * a normal copy-on-write fault into a new anon (this 1347 * is not a problem). however, if the reference count 1348 * is one (a case where we would normally allow a 1349 * write directly to the page) then we need to kill 1350 * the loan before we continue. 1351 */ 1352 1353 /* >1 case is already ok */ 1354 if (anon->an_ref == 1) { 1355 error = uvm_loanbreak_anon(anon, *ruobj); 1356 if (error != 0) { 1357 uvmfault_unlockall(ufi, amap, *ruobj); 1358 uvm_wait("flt_noram2"); 1359 return ERESTART; 1360 } 1361 /* if we were a loan reciever uobj is gone */ 1362 if (*ruobj) 1363 *ruobj = NULL; 1364 } 1365 } 1366 return error; 1367 } 1368 1369 /* 1370 * uvm_fault_upper_promote: promote upper page. 1371 * 1372 * 1. call uvmfault_promote. 1373 * 2. enqueue page. 1374 * 3. deref. 1375 * 4. pass page to uvm_fault_upper_enter. 1376 */ 1377 1378 static int 1379 uvm_fault_upper_promote( 1380 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1381 struct uvm_object *uobj, struct vm_anon *anon) 1382 { 1383 struct vm_anon * const oanon = anon; 1384 struct vm_page *pg; 1385 int error; 1386 UVMHIST_FUNC("uvm_fault_upper_promote"); UVMHIST_CALLED(maphist); 1387 1388 UVMHIST_LOG(maphist, " case 1B: COW fault",0,0,0,0); 1389 uvmexp.flt_acow++; 1390 1391 error = uvmfault_promote(ufi, oanon, PGO_DONTCARE, &anon, 1392 &flt->anon_spare); 1393 switch (error) { 1394 case 0: 1395 break; 1396 case ERESTART: 1397 return ERESTART; 1398 default: 1399 return error; 1400 } 1401 1402 KASSERT(anon == NULL || anon->an_lock == oanon->an_lock); 1403 1404 pg = anon->an_page; 1405 mutex_enter(&uvm_pageqlock); 1406 uvm_pageenqueue(pg); /* uvm_fault_upper_done will activate the page */ 1407 mutex_exit(&uvm_pageqlock); 1408 pg->flags &= ~(PG_BUSY|PG_FAKE); 1409 UVM_PAGE_OWN(pg, NULL); 1410 1411 /* deref: can not drop to zero here by defn! */ 1412 KASSERT(oanon->an_ref > 1); 1413 oanon->an_ref--; 1414 1415 /* 1416 * note: oanon is still locked, as is the new anon. we 1417 * need to check for this later when we unlock oanon; if 1418 * oanon != anon, we'll have to unlock anon, too. 1419 */ 1420 1421 return uvm_fault_upper_enter(ufi, flt, uobj, anon, pg, oanon); 1422 } 1423 1424 /* 1425 * uvm_fault_upper_direct: handle direct fault. 1426 */ 1427 1428 static int 1429 uvm_fault_upper_direct( 1430 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1431 struct uvm_object *uobj, struct vm_anon *anon) 1432 { 1433 struct vm_anon * const oanon = anon; 1434 struct vm_page *pg; 1435 UVMHIST_FUNC("uvm_fault_upper_direct"); UVMHIST_CALLED(maphist); 1436 1437 uvmexp.flt_anon++; 1438 pg = anon->an_page; 1439 if (anon->an_ref > 1) /* disallow writes to ref > 1 anons */ 1440 flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE; 1441 1442 return uvm_fault_upper_enter(ufi, flt, uobj, anon, pg, oanon); 1443 } 1444 1445 /* 1446 * uvm_fault_upper_enter: enter h/w mapping of upper page. 1447 */ 1448 1449 static int 1450 uvm_fault_upper_enter( 1451 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1452 struct uvm_object *uobj, struct vm_anon *anon, struct vm_page *pg, 1453 struct vm_anon *oanon) 1454 { 1455 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 1456 UVMHIST_FUNC("uvm_fault_upper_enter"); UVMHIST_CALLED(maphist); 1457 1458 /* locked: maps(read), amap, oanon, anon(if different from oanon) */ 1459 KASSERT(mutex_owned(amap->am_lock)); 1460 KASSERT(anon->an_lock == amap->am_lock); 1461 KASSERT(oanon->an_lock == amap->am_lock); 1462 KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock)); 1463 1464 /* 1465 * now map the page in. 1466 */ 1467 1468 UVMHIST_LOG(maphist, 1469 " MAPPING: anon: pm=0x%x, va=0x%x, pg=0x%x, promote=%d", 1470 ufi->orig_map->pmap, ufi->orig_rvaddr, pg, flt->promote); 1471 if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr, 1472 VM_PAGE_TO_PHYS(pg), 1473 flt->enter_prot, flt->access_type | PMAP_CANFAIL | 1474 (flt->wire_mapping ? PMAP_WIRED : 0)) != 0) { 1475 1476 /* 1477 * No need to undo what we did; we can simply think of 1478 * this as the pmap throwing away the mapping information. 1479 * 1480 * We do, however, have to go through the ReFault path, 1481 * as the map may change while we're asleep. 1482 */ 1483 1484 uvmfault_unlockall(ufi, amap, uobj); 1485 if (!uvm_reclaimable()) { 1486 UVMHIST_LOG(maphist, 1487 "<- failed. out of VM",0,0,0,0); 1488 /* XXX instrumentation */ 1489 return ENOMEM; 1490 } 1491 /* XXX instrumentation */ 1492 uvm_wait("flt_pmfail1"); 1493 return ERESTART; 1494 } 1495 1496 uvm_fault_upper_done(ufi, flt, anon, pg); 1497 1498 /* 1499 * done case 1! finish up by unlocking everything and returning success 1500 */ 1501 1502 pmap_update(ufi->orig_map->pmap); 1503 uvmfault_unlockall(ufi, amap, uobj); 1504 return 0; 1505 } 1506 1507 /* 1508 * uvm_fault_upper_done: queue upper center page. 1509 */ 1510 1511 static void 1512 uvm_fault_upper_done( 1513 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1514 struct vm_anon *anon, struct vm_page *pg) 1515 { 1516 const bool wire_paging = flt->wire_paging; 1517 1518 UVMHIST_FUNC("uvm_fault_upper_done"); UVMHIST_CALLED(maphist); 1519 1520 /* 1521 * ... update the page queues. 1522 */ 1523 1524 mutex_enter(&uvm_pageqlock); 1525 if (wire_paging) { 1526 uvm_pagewire(pg); 1527 1528 /* 1529 * since the now-wired page cannot be paged out, 1530 * release its swap resources for others to use. 1531 * since an anon with no swap cannot be PG_CLEAN, 1532 * clear its clean flag now. 1533 */ 1534 1535 pg->flags &= ~(PG_CLEAN); 1536 1537 } else { 1538 uvm_pageactivate(pg); 1539 } 1540 mutex_exit(&uvm_pageqlock); 1541 1542 if (wire_paging) { 1543 uvm_anon_dropswap(anon); 1544 } 1545 } 1546 1547 /* 1548 * uvm_fault_lower: handle lower fault. 1549 * 1550 * 1. check uobj 1551 * 1.1. if null, ZFOD. 1552 * 1.2. if not null, look up unnmapped neighbor pages. 1553 * 2. for center page, check if promote. 1554 * 2.1. ZFOD always needs promotion. 1555 * 2.2. other uobjs, when entry is marked COW (usually MAP_PRIVATE vnode). 1556 * 3. if uobj is not ZFOD and page is not found, do i/o. 1557 * 4. dispatch either direct / promote fault. 1558 */ 1559 1560 static int 1561 uvm_fault_lower( 1562 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1563 struct vm_page **pages) 1564 { 1565 #ifdef DIAGNOSTIC 1566 struct vm_amap *amap = ufi->entry->aref.ar_amap; 1567 #endif 1568 struct uvm_object *uobj = ufi->entry->object.uvm_obj; 1569 struct vm_page *uobjpage; 1570 int error; 1571 UVMHIST_FUNC("uvm_fault_lower"); UVMHIST_CALLED(maphist); 1572 1573 /* 1574 * now, if the desired page is not shadowed by the amap and we have 1575 * a backing object that does not have a special fault routine, then 1576 * we ask (with pgo_get) the object for resident pages that we care 1577 * about and attempt to map them in. we do not let pgo_get block 1578 * (PGO_LOCKED). 1579 */ 1580 1581 if (uobj == NULL) { 1582 /* zero fill; don't care neighbor pages */ 1583 uobjpage = NULL; 1584 } else { 1585 uvm_fault_lower_lookup(ufi, flt, pages); 1586 uobjpage = pages[flt->centeridx]; 1587 } 1588 1589 /* 1590 * note that at this point we are done with any front or back pages. 1591 * we are now going to focus on the center page (i.e. the one we've 1592 * faulted on). if we have faulted on the upper (anon) layer 1593 * [i.e. case 1], then the anon we want is anons[centeridx] (we have 1594 * not touched it yet). if we have faulted on the bottom (uobj) 1595 * layer [i.e. case 2] and the page was both present and available, 1596 * then we've got a pointer to it as "uobjpage" and we've already 1597 * made it BUSY. 1598 */ 1599 1600 /* 1601 * locked: 1602 * maps(read), amap(if there), uobj(if !null), uobjpage(if !null) 1603 */ 1604 KASSERT(amap == NULL || mutex_owned(amap->am_lock)); 1605 KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock)); 1606 KASSERT(uobjpage == NULL || (uobjpage->flags & PG_BUSY) != 0); 1607 1608 /* 1609 * note that uobjpage can not be PGO_DONTCARE at this point. we now 1610 * set uobjpage to PGO_DONTCARE if we are doing a zero fill. if we 1611 * have a backing object, check and see if we are going to promote 1612 * the data up to an anon during the fault. 1613 */ 1614 1615 if (uobj == NULL) { 1616 uobjpage = PGO_DONTCARE; 1617 flt->promote = true; /* always need anon here */ 1618 } else { 1619 KASSERT(uobjpage != PGO_DONTCARE); 1620 flt->promote = flt->cow_now && UVM_ET_ISCOPYONWRITE(ufi->entry); 1621 } 1622 UVMHIST_LOG(maphist, " case 2 fault: promote=%d, zfill=%d", 1623 flt->promote, (uobj == NULL), 0,0); 1624 1625 /* 1626 * if uobjpage is not null then we do not need to do I/O to get the 1627 * uobjpage. 1628 * 1629 * if uobjpage is null, then we need to unlock and ask the pager to 1630 * get the data for us. once we have the data, we need to reverify 1631 * the state the world. we are currently not holding any resources. 1632 */ 1633 1634 if (uobjpage) { 1635 /* update rusage counters */ 1636 curlwp->l_ru.ru_minflt++; 1637 } else { 1638 error = uvm_fault_lower_io(ufi, flt, &uobj, &uobjpage); 1639 if (error != 0) 1640 return error; 1641 } 1642 1643 /* 1644 * locked: 1645 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj) 1646 */ 1647 KASSERT(amap == NULL || mutex_owned(amap->am_lock)); 1648 KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock)); 1649 KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0); 1650 1651 /* 1652 * notes: 1653 * - at this point uobjpage can not be NULL 1654 * - at this point uobjpage can not be PG_RELEASED (since we checked 1655 * for it above) 1656 * - at this point uobjpage could be PG_WANTED (handle later) 1657 */ 1658 1659 KASSERT(uobjpage != NULL); 1660 KASSERT(uobj == NULL || uobj == uobjpage->uobject); 1661 KASSERT(uobj == NULL || !UVM_OBJ_IS_CLEAN(uobjpage->uobject) || 1662 (uobjpage->flags & PG_CLEAN) != 0); 1663 1664 if (!flt->promote) { 1665 error = uvm_fault_lower_direct(ufi, flt, uobj, uobjpage); 1666 } else { 1667 error = uvm_fault_lower_promote(ufi, flt, uobj, uobjpage); 1668 } 1669 return error; 1670 } 1671 1672 /* 1673 * uvm_fault_lower_lookup: look up on-memory uobj pages. 1674 * 1675 * 1. get on-memory pages. 1676 * 2. if failed, give up (get only center page later). 1677 * 3. if succeeded, enter h/w mapping of neighbor pages. 1678 */ 1679 1680 static void 1681 uvm_fault_lower_lookup( 1682 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1683 struct vm_page **pages) 1684 { 1685 struct uvm_object *uobj = ufi->entry->object.uvm_obj; 1686 int lcv, gotpages; 1687 vaddr_t currva; 1688 UVMHIST_FUNC("uvm_fault_lower_lookup"); UVMHIST_CALLED(maphist); 1689 1690 mutex_enter(uobj->vmobjlock); 1691 /* Locked: maps(read), amap(if there), uobj */ 1692 1693 uvmexp.fltlget++; 1694 gotpages = flt->npages; 1695 (void) uobj->pgops->pgo_get(uobj, 1696 ufi->entry->offset + flt->startva - ufi->entry->start, 1697 pages, &gotpages, flt->centeridx, 1698 flt->access_type & MASK(ufi->entry), ufi->entry->advice, PGO_LOCKED); 1699 1700 KASSERT(mutex_owned(uobj->vmobjlock)); 1701 1702 /* 1703 * check for pages to map, if we got any 1704 */ 1705 1706 if (gotpages == 0) { 1707 pages[flt->centeridx] = NULL; 1708 return; 1709 } 1710 1711 currva = flt->startva; 1712 for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) { 1713 struct vm_page *curpg; 1714 1715 curpg = pages[lcv]; 1716 if (curpg == NULL || curpg == PGO_DONTCARE) { 1717 continue; 1718 } 1719 KASSERT(curpg->uobject == uobj); 1720 1721 /* 1722 * if center page is resident and not PG_BUSY|PG_RELEASED 1723 * then pgo_get made it PG_BUSY for us and gave us a handle 1724 * to it. 1725 */ 1726 1727 if (lcv == flt->centeridx) { 1728 UVMHIST_LOG(maphist, " got uobjpage " 1729 "(0x%x) with locked get", 1730 curpg, 0,0,0); 1731 } else { 1732 bool readonly = (curpg->flags & PG_RDONLY) 1733 || (curpg->loan_count > 0) 1734 || UVM_OBJ_NEEDS_WRITEFAULT(curpg->uobject); 1735 1736 uvm_fault_lower_neighbor(ufi, flt, 1737 currva, curpg, readonly); 1738 } 1739 } 1740 pmap_update(ufi->orig_map->pmap); 1741 } 1742 1743 /* 1744 * uvm_fault_lower_neighbor: enter h/w mapping of lower neighbor page. 1745 */ 1746 1747 static void 1748 uvm_fault_lower_neighbor( 1749 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1750 vaddr_t currva, struct vm_page *pg, bool readonly) 1751 { 1752 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1753 1754 /* locked: maps(read), amap(if there), uobj */ 1755 1756 /* 1757 * calling pgo_get with PGO_LOCKED returns us pages which 1758 * are neither busy nor released, so we don't need to check 1759 * for this. we can just directly enter the pages. 1760 */ 1761 1762 mutex_enter(&uvm_pageqlock); 1763 uvm_pageenqueue(pg); 1764 mutex_exit(&uvm_pageqlock); 1765 UVMHIST_LOG(maphist, 1766 " MAPPING: n obj: pm=0x%x, va=0x%x, pg=0x%x", 1767 ufi->orig_map->pmap, currva, pg, 0); 1768 uvmexp.fltnomap++; 1769 1770 /* 1771 * Since this page isn't the page that's actually faulting, 1772 * ignore pmap_enter() failures; it's not critical that we 1773 * enter these right now. 1774 * NOTE: page can't be PG_WANTED or PG_RELEASED because we've 1775 * held the lock the whole time we've had the handle. 1776 */ 1777 KASSERT((pg->flags & PG_PAGEOUT) == 0); 1778 KASSERT((pg->flags & PG_RELEASED) == 0); 1779 KASSERT((pg->flags & PG_WANTED) == 0); 1780 KASSERT(!UVM_OBJ_IS_CLEAN(pg->uobject) || (pg->flags & PG_CLEAN) != 0); 1781 pg->flags &= ~(PG_BUSY); 1782 UVM_PAGE_OWN(pg, NULL); 1783 1784 KASSERT(mutex_owned(pg->uobject->vmobjlock)); 1785 (void) pmap_enter(ufi->orig_map->pmap, currva, 1786 VM_PAGE_TO_PHYS(pg), 1787 readonly ? (flt->enter_prot & ~VM_PROT_WRITE) : 1788 flt->enter_prot & MASK(ufi->entry), 1789 PMAP_CANFAIL | (flt->wire_mapping ? PMAP_WIRED : 0)); 1790 } 1791 1792 /* 1793 * uvm_fault_lower_io: get lower page from backing store. 1794 * 1795 * 1. unlock everything, because i/o will block. 1796 * 2. call pgo_get. 1797 * 3. if failed, recover. 1798 * 4. if succeeded, relock everything and verify things. 1799 */ 1800 1801 static int 1802 uvm_fault_lower_io( 1803 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1804 struct uvm_object **ruobj, struct vm_page **ruobjpage) 1805 { 1806 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 1807 struct uvm_object *uobj = *ruobj; 1808 struct vm_page *pg; 1809 bool locked; 1810 int gotpages; 1811 int error; 1812 voff_t uoff; 1813 UVMHIST_FUNC("uvm_fault_lower_io"); UVMHIST_CALLED(maphist); 1814 1815 /* update rusage counters */ 1816 curlwp->l_ru.ru_majflt++; 1817 1818 /* Locked: maps(read), amap(if there), uobj */ 1819 uvmfault_unlockall(ufi, amap, NULL); 1820 1821 /* Locked: uobj */ 1822 KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock)); 1823 1824 uvmexp.fltget++; 1825 gotpages = 1; 1826 pg = NULL; 1827 uoff = (ufi->orig_rvaddr - ufi->entry->start) + ufi->entry->offset; 1828 error = uobj->pgops->pgo_get(uobj, uoff, &pg, &gotpages, 1829 0, flt->access_type & MASK(ufi->entry), ufi->entry->advice, 1830 PGO_SYNCIO); 1831 /* locked: pg(if no error) */ 1832 1833 /* 1834 * recover from I/O 1835 */ 1836 1837 if (error) { 1838 if (error == EAGAIN) { 1839 UVMHIST_LOG(maphist, 1840 " pgo_get says TRY AGAIN!",0,0,0,0); 1841 kpause("fltagain2", false, hz/2, NULL); 1842 return ERESTART; 1843 } 1844 1845 #if 0 1846 KASSERT(error != ERESTART); 1847 #else 1848 /* XXXUEBS don't re-fault? */ 1849 if (error == ERESTART) 1850 error = EIO; 1851 #endif 1852 1853 UVMHIST_LOG(maphist, "<- pgo_get failed (code %d)", 1854 error, 0,0,0); 1855 return error; 1856 } 1857 1858 /* 1859 * re-verify the state of the world by first trying to relock 1860 * the maps. always relock the object. 1861 */ 1862 1863 locked = uvmfault_relock(ufi); 1864 if (locked && amap) 1865 amap_lock(amap); 1866 1867 /* might be changed */ 1868 uobj = pg->uobject; 1869 1870 mutex_enter(uobj->vmobjlock); 1871 KASSERT((pg->flags & PG_BUSY) != 0); 1872 1873 mutex_enter(&uvm_pageqlock); 1874 uvm_pageactivate(pg); 1875 mutex_exit(&uvm_pageqlock); 1876 1877 /* locked(locked): maps(read), amap(if !null), uobj, pg */ 1878 /* locked(!locked): uobj, pg */ 1879 1880 /* 1881 * verify that the page has not be released and re-verify 1882 * that amap slot is still free. if there is a problem, 1883 * we unlock and clean up. 1884 */ 1885 1886 if ((pg->flags & PG_RELEASED) != 0 || 1887 (locked && amap && amap_lookup(&ufi->entry->aref, 1888 ufi->orig_rvaddr - ufi->entry->start))) { 1889 if (locked) 1890 uvmfault_unlockall(ufi, amap, NULL); 1891 locked = false; 1892 } 1893 1894 /* 1895 * didn't get the lock? release the page and retry. 1896 */ 1897 1898 if (locked == false) { 1899 UVMHIST_LOG(maphist, 1900 " wasn't able to relock after fault: retry", 1901 0,0,0,0); 1902 if (pg->flags & PG_WANTED) { 1903 wakeup(pg); 1904 } 1905 if ((pg->flags & PG_RELEASED) == 0) { 1906 pg->flags &= ~(PG_BUSY | PG_WANTED); 1907 UVM_PAGE_OWN(pg, NULL); 1908 } else { 1909 uvmexp.fltpgrele++; 1910 uvm_pagefree(pg); 1911 } 1912 mutex_exit(uobj->vmobjlock); 1913 return ERESTART; 1914 } 1915 1916 /* 1917 * we have the data in pg which is busy and 1918 * not released. we are holding object lock (so the page 1919 * can't be released on us). 1920 */ 1921 1922 /* locked: maps(read), amap(if !null), uobj, pg */ 1923 1924 *ruobj = uobj; 1925 *ruobjpage = pg; 1926 return 0; 1927 } 1928 1929 /* 1930 * uvm_fault_lower_direct: fault lower center page 1931 * 1932 * 1. adjust flt->enter_prot. 1933 * 2. if page is loaned, resolve. 1934 */ 1935 1936 int 1937 uvm_fault_lower_direct( 1938 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1939 struct uvm_object *uobj, struct vm_page *uobjpage) 1940 { 1941 struct vm_page *pg; 1942 UVMHIST_FUNC("uvm_fault_lower_direct"); UVMHIST_CALLED(maphist); 1943 1944 /* 1945 * we are not promoting. if the mapping is COW ensure that we 1946 * don't give more access than we should (e.g. when doing a read 1947 * fault on a COPYONWRITE mapping we want to map the COW page in 1948 * R/O even though the entry protection could be R/W). 1949 * 1950 * set "pg" to the page we want to map in (uobjpage, usually) 1951 */ 1952 1953 uvmexp.flt_obj++; 1954 if (UVM_ET_ISCOPYONWRITE(ufi->entry) || 1955 UVM_OBJ_NEEDS_WRITEFAULT(uobjpage->uobject)) 1956 flt->enter_prot &= ~VM_PROT_WRITE; 1957 pg = uobjpage; /* map in the actual object */ 1958 1959 KASSERT(uobjpage != PGO_DONTCARE); 1960 1961 /* 1962 * we are faulting directly on the page. be careful 1963 * about writing to loaned pages... 1964 */ 1965 1966 if (uobjpage->loan_count) { 1967 uvm_fault_lower_direct_loan(ufi, flt, uobj, &pg, &uobjpage); 1968 } 1969 KASSERT(pg == uobjpage); 1970 1971 KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0); 1972 return uvm_fault_lower_enter(ufi, flt, uobj, NULL, pg); 1973 } 1974 1975 /* 1976 * uvm_fault_lower_direct_loan: resolve loaned page. 1977 * 1978 * 1. if not cow'ing, adjust flt->enter_prot. 1979 * 2. if cow'ing, break loan. 1980 */ 1981 1982 static int 1983 uvm_fault_lower_direct_loan( 1984 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1985 struct uvm_object *uobj, struct vm_page **rpg, 1986 struct vm_page **ruobjpage) 1987 { 1988 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 1989 struct vm_page *pg; 1990 struct vm_page *uobjpage = *ruobjpage; 1991 UVMHIST_FUNC("uvm_fault_lower_direct_loan"); UVMHIST_CALLED(maphist); 1992 1993 if (!flt->cow_now) { 1994 /* read fault: cap the protection at readonly */ 1995 /* cap! */ 1996 flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE; 1997 } else { 1998 /* write fault: must break the loan here */ 1999 2000 pg = uvm_loanbreak(uobjpage); 2001 if (pg == NULL) { 2002 2003 /* 2004 * drop ownership of page, it can't be released 2005 */ 2006 2007 if (uobjpage->flags & PG_WANTED) 2008 wakeup(uobjpage); 2009 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 2010 UVM_PAGE_OWN(uobjpage, NULL); 2011 2012 uvmfault_unlockall(ufi, amap, uobj); 2013 UVMHIST_LOG(maphist, 2014 " out of RAM breaking loan, waiting", 2015 0,0,0,0); 2016 uvmexp.fltnoram++; 2017 uvm_wait("flt_noram4"); 2018 return ERESTART; 2019 } 2020 *rpg = pg; 2021 *ruobjpage = pg; 2022 } 2023 return 0; 2024 } 2025 2026 /* 2027 * uvm_fault_lower_promote: promote lower page. 2028 * 2029 * 1. call uvmfault_promote. 2030 * 2. fill in data. 2031 * 3. if not ZFOD, dispose old page. 2032 */ 2033 2034 int 2035 uvm_fault_lower_promote( 2036 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 2037 struct uvm_object *uobj, struct vm_page *uobjpage) 2038 { 2039 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 2040 struct vm_anon *anon; 2041 struct vm_page *pg; 2042 int error; 2043 UVMHIST_FUNC("uvm_fault_lower_promote"); UVMHIST_CALLED(maphist); 2044 2045 KASSERT(amap != NULL); 2046 2047 /* 2048 * If we are going to promote the data to an anon we 2049 * allocate a blank anon here and plug it into our amap. 2050 */ 2051 error = uvmfault_promote(ufi, NULL, uobjpage, 2052 &anon, &flt->anon_spare); 2053 switch (error) { 2054 case 0: 2055 break; 2056 case ERESTART: 2057 return ERESTART; 2058 default: 2059 return error; 2060 } 2061 2062 pg = anon->an_page; 2063 2064 /* 2065 * Fill in the data. 2066 */ 2067 KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0); 2068 2069 if (uobjpage != PGO_DONTCARE) { 2070 uvmexp.flt_prcopy++; 2071 2072 /* 2073 * promote to shared amap? make sure all sharing 2074 * procs see it 2075 */ 2076 2077 if ((amap_flags(amap) & AMAP_SHARED) != 0) { 2078 pmap_page_protect(uobjpage, VM_PROT_NONE); 2079 /* 2080 * XXX: PAGE MIGHT BE WIRED! 2081 */ 2082 } 2083 2084 /* 2085 * dispose of uobjpage. it can't be PG_RELEASED 2086 * since we still hold the object lock. 2087 */ 2088 2089 if (uobjpage->flags & PG_WANTED) { 2090 /* still have the obj lock */ 2091 wakeup(uobjpage); 2092 } 2093 uobjpage->flags &= ~(PG_BUSY|PG_WANTED); 2094 UVM_PAGE_OWN(uobjpage, NULL); 2095 2096 UVMHIST_LOG(maphist, 2097 " promote uobjpage 0x%x to anon/page 0x%x/0x%x", 2098 uobjpage, anon, pg, 0); 2099 2100 } else { 2101 uvmexp.flt_przero++; 2102 2103 /* 2104 * Page is zero'd and marked dirty by 2105 * uvmfault_promote(). 2106 */ 2107 2108 UVMHIST_LOG(maphist," zero fill anon/page 0x%x/0%x", 2109 anon, pg, 0, 0); 2110 } 2111 2112 return uvm_fault_lower_enter(ufi, flt, uobj, anon, pg); 2113 } 2114 2115 /* 2116 * uvm_fault_lower_enter: enter h/w mapping of lower page or anon page promoted 2117 * from the lower page. 2118 */ 2119 2120 int 2121 uvm_fault_lower_enter( 2122 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 2123 struct uvm_object *uobj, 2124 struct vm_anon *anon, struct vm_page *pg) 2125 { 2126 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 2127 int error; 2128 UVMHIST_FUNC("uvm_fault_lower_enter"); UVMHIST_CALLED(maphist); 2129 2130 /* 2131 * Locked: 2132 * 2133 * maps(read), amap(if !null), uobj(if !null), 2134 * anon(if !null), pg(if anon), unlock_uobj(if !null) 2135 * 2136 * Note: pg is either the uobjpage or the new page in the new anon. 2137 */ 2138 KASSERT(amap == NULL || mutex_owned(amap->am_lock)); 2139 KASSERT(uobj == NULL || mutex_owned(uobj->vmobjlock)); 2140 KASSERT(anon == NULL || anon->an_lock == amap->am_lock); 2141 KASSERT((pg->flags & PG_BUSY) != 0); 2142 2143 /* 2144 * all resources are present. we can now map it in and free our 2145 * resources. 2146 */ 2147 2148 UVMHIST_LOG(maphist, 2149 " MAPPING: case2: pm=0x%x, va=0x%x, pg=0x%x, promote=%d", 2150 ufi->orig_map->pmap, ufi->orig_rvaddr, pg, flt->promote); 2151 KASSERT((flt->access_type & VM_PROT_WRITE) == 0 || 2152 (pg->flags & PG_RDONLY) == 0); 2153 if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr, 2154 VM_PAGE_TO_PHYS(pg), 2155 (pg->flags & PG_RDONLY) != 0 ? 2156 flt->enter_prot & ~VM_PROT_WRITE : flt->enter_prot, 2157 flt->access_type | PMAP_CANFAIL | 2158 (flt->wire_mapping ? PMAP_WIRED : 0)) != 0) { 2159 2160 /* 2161 * No need to undo what we did; we can simply think of 2162 * this as the pmap throwing away the mapping information. 2163 * 2164 * We do, however, have to go through the ReFault path, 2165 * as the map may change while we're asleep. 2166 */ 2167 2168 /* 2169 * ensure that the page is queued in the case that 2170 * we just promoted the page. 2171 */ 2172 2173 mutex_enter(&uvm_pageqlock); 2174 uvm_pageenqueue(pg); 2175 mutex_exit(&uvm_pageqlock); 2176 2177 if (pg->flags & PG_WANTED) 2178 wakeup(pg); 2179 2180 /* 2181 * note that pg can't be PG_RELEASED since we did not drop 2182 * the object lock since the last time we checked. 2183 */ 2184 KASSERT((pg->flags & PG_RELEASED) == 0); 2185 2186 pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED); 2187 UVM_PAGE_OWN(pg, NULL); 2188 2189 uvmfault_unlockall(ufi, amap, uobj); 2190 if (!uvm_reclaimable()) { 2191 UVMHIST_LOG(maphist, 2192 "<- failed. out of VM",0,0,0,0); 2193 /* XXX instrumentation */ 2194 error = ENOMEM; 2195 return error; 2196 } 2197 /* XXX instrumentation */ 2198 uvm_wait("flt_pmfail2"); 2199 return ERESTART; 2200 } 2201 2202 uvm_fault_lower_done(ufi, flt, uobj, pg); 2203 2204 /* 2205 * note that pg can't be PG_RELEASED since we did not drop the object 2206 * lock since the last time we checked. 2207 */ 2208 KASSERT((pg->flags & PG_RELEASED) == 0); 2209 if (pg->flags & PG_WANTED) 2210 wakeup(pg); 2211 pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED); 2212 UVM_PAGE_OWN(pg, NULL); 2213 2214 pmap_update(ufi->orig_map->pmap); 2215 uvmfault_unlockall(ufi, amap, uobj); 2216 2217 UVMHIST_LOG(maphist, "<- done (SUCCESS!)",0,0,0,0); 2218 return 0; 2219 } 2220 2221 /* 2222 * uvm_fault_lower_done: queue lower center page. 2223 */ 2224 2225 void 2226 uvm_fault_lower_done( 2227 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 2228 struct uvm_object *uobj, struct vm_page *pg) 2229 { 2230 bool dropswap = false; 2231 2232 UVMHIST_FUNC("uvm_fault_lower_done"); UVMHIST_CALLED(maphist); 2233 2234 mutex_enter(&uvm_pageqlock); 2235 if (flt->wire_paging) { 2236 uvm_pagewire(pg); 2237 if (pg->pqflags & PQ_AOBJ) { 2238 2239 /* 2240 * since the now-wired page cannot be paged out, 2241 * release its swap resources for others to use. 2242 * since an aobj page with no swap cannot be PG_CLEAN, 2243 * clear its clean flag now. 2244 */ 2245 2246 KASSERT(uobj != NULL); 2247 pg->flags &= ~(PG_CLEAN); 2248 dropswap = true; 2249 } 2250 } else { 2251 uvm_pageactivate(pg); 2252 } 2253 mutex_exit(&uvm_pageqlock); 2254 2255 if (dropswap) { 2256 uao_dropswap(uobj, pg->offset >> PAGE_SHIFT); 2257 } 2258 } 2259 2260 2261 /* 2262 * uvm_fault_wire: wire down a range of virtual addresses in a map. 2263 * 2264 * => map may be read-locked by caller, but MUST NOT be write-locked. 2265 * => if map is read-locked, any operations which may cause map to 2266 * be write-locked in uvm_fault() must be taken care of by 2267 * the caller. See uvm_map_pageable(). 2268 */ 2269 2270 int 2271 uvm_fault_wire(struct vm_map *map, vaddr_t start, vaddr_t end, 2272 vm_prot_t access_type, int maxprot) 2273 { 2274 vaddr_t va; 2275 int error; 2276 2277 /* 2278 * now fault it in a page at a time. if the fault fails then we have 2279 * to undo what we have done. note that in uvm_fault VM_PROT_NONE 2280 * is replaced with the max protection if fault_type is VM_FAULT_WIRE. 2281 */ 2282 2283 /* 2284 * XXX work around overflowing a vaddr_t. this prevents us from 2285 * wiring the last page in the address space, though. 2286 */ 2287 if (start > end) { 2288 return EFAULT; 2289 } 2290 2291 for (va = start; va < end; va += PAGE_SIZE) { 2292 error = uvm_fault_internal(map, va, access_type, 2293 (maxprot ? UVM_FAULT_MAXPROT : 0) | UVM_FAULT_WIRE); 2294 if (error) { 2295 if (va != start) { 2296 uvm_fault_unwire(map, start, va); 2297 } 2298 return error; 2299 } 2300 } 2301 return 0; 2302 } 2303 2304 /* 2305 * uvm_fault_unwire(): unwire range of virtual space. 2306 */ 2307 2308 void 2309 uvm_fault_unwire(struct vm_map *map, vaddr_t start, vaddr_t end) 2310 { 2311 vm_map_lock_read(map); 2312 uvm_fault_unwire_locked(map, start, end); 2313 vm_map_unlock_read(map); 2314 } 2315 2316 /* 2317 * uvm_fault_unwire_locked(): the guts of uvm_fault_unwire(). 2318 * 2319 * => map must be at least read-locked. 2320 */ 2321 2322 void 2323 uvm_fault_unwire_locked(struct vm_map *map, vaddr_t start, vaddr_t end) 2324 { 2325 struct vm_map_entry *entry, *oentry; 2326 pmap_t pmap = vm_map_pmap(map); 2327 vaddr_t va; 2328 paddr_t pa; 2329 struct vm_page *pg; 2330 2331 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 2332 2333 /* 2334 * we assume that the area we are unwiring has actually been wired 2335 * in the first place. this means that we should be able to extract 2336 * the PAs from the pmap. we also lock out the page daemon so that 2337 * we can call uvm_pageunwire. 2338 */ 2339 2340 /* 2341 * find the beginning map entry for the region. 2342 */ 2343 2344 KASSERT(start >= vm_map_min(map) && end <= vm_map_max(map)); 2345 if (uvm_map_lookup_entry(map, start, &entry) == false) 2346 panic("uvm_fault_unwire_locked: address not in map"); 2347 2348 oentry = NULL; 2349 for (va = start; va < end; va += PAGE_SIZE) { 2350 if (pmap_extract(pmap, va, &pa) == false) 2351 continue; 2352 2353 /* 2354 * find the map entry for the current address. 2355 */ 2356 2357 KASSERT(va >= entry->start); 2358 while (va >= entry->end) { 2359 KASSERT(entry->next != &map->header && 2360 entry->next->start <= entry->end); 2361 entry = entry->next; 2362 } 2363 2364 /* 2365 * lock it. 2366 */ 2367 2368 if (entry != oentry) { 2369 if (oentry != NULL) { 2370 mutex_exit(&uvm_pageqlock); 2371 uvm_map_unlock_entry(oentry); 2372 } 2373 uvm_map_lock_entry(entry); 2374 mutex_enter(&uvm_pageqlock); 2375 oentry = entry; 2376 } 2377 2378 /* 2379 * if the entry is no longer wired, tell the pmap. 2380 */ 2381 2382 if (VM_MAPENT_ISWIRED(entry) == 0) 2383 pmap_unwire(pmap, va); 2384 2385 pg = PHYS_TO_VM_PAGE(pa); 2386 if (pg) 2387 uvm_pageunwire(pg); 2388 } 2389 2390 if (oentry != NULL) { 2391 mutex_exit(&uvm_pageqlock); 2392 uvm_map_unlock_entry(entry); 2393 } 2394 } 2395