1 /* $NetBSD: uvm_fault.c,v 1.232 2023/04/09 09:00:56 riastradh Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp 28 */ 29 30 /* 31 * uvm_fault.c: fault handler 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.232 2023/04/09 09:00:56 riastradh Exp $"); 36 37 #include "opt_uvmhist.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/atomic.h> 42 #include <sys/kernel.h> 43 #include <sys/mman.h> 44 45 #include <uvm/uvm.h> 46 #include <uvm/uvm_pdpolicy.h> 47 48 /* 49 * 50 * a word on page faults: 51 * 52 * types of page faults we handle: 53 * 54 * CASE 1: upper layer faults CASE 2: lower layer faults 55 * 56 * CASE 1A CASE 1B CASE 2A CASE 2B 57 * read/write1 write>1 read/write +-cow_write/zero 58 * | | | | 59 * +--|--+ +--|--+ +-----+ + | + | +-----+ 60 * amap | V | | ---------> new | | | | ^ | 61 * +-----+ +-----+ +-----+ + | + | +--|--+ 62 * | | | 63 * +-----+ +-----+ +--|--+ | +--|--+ 64 * uobj | d/c | | d/c | | V | +----+ | 65 * +-----+ +-----+ +-----+ +-----+ 66 * 67 * d/c = don't care 68 * 69 * case [0]: layerless fault 70 * no amap or uobj is present. this is an error. 71 * 72 * case [1]: upper layer fault [anon active] 73 * 1A: [read] or [write with anon->an_ref == 1] 74 * I/O takes place in upper level anon and uobj is not touched. 75 * 1B: [write with anon->an_ref > 1] 76 * new anon is alloc'd and data is copied off ["COW"] 77 * 78 * case [2]: lower layer fault [uobj] 79 * 2A: [read on non-NULL uobj] or [write to non-copy_on_write area] 80 * I/O takes place directly in object. 81 * 2B: [write to copy_on_write] or [read on NULL uobj] 82 * data is "promoted" from uobj to a new anon. 83 * if uobj is null, then we zero fill. 84 * 85 * we follow the standard UVM locking protocol ordering: 86 * 87 * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ) 88 * we hold a PG_BUSY page if we unlock for I/O 89 * 90 * 91 * the code is structured as follows: 92 * 93 * - init the "IN" params in the ufi structure 94 * ReFault: (ERESTART returned to the loop in uvm_fault_internal) 95 * - do lookups [locks maps], check protection, handle needs_copy 96 * - check for case 0 fault (error) 97 * - establish "range" of fault 98 * - if we have an amap lock it and extract the anons 99 * - if sequential advice deactivate pages behind us 100 * - at the same time check pmap for unmapped areas and anon for pages 101 * that we could map in (and do map it if found) 102 * - check object for resident pages that we could map in 103 * - if (case 2) goto Case2 104 * - >>> handle case 1 105 * - ensure source anon is resident in RAM 106 * - if case 1B alloc new anon and copy from source 107 * - map the correct page in 108 * Case2: 109 * - >>> handle case 2 110 * - ensure source page is resident (if uobj) 111 * - if case 2B alloc new anon and copy from source (could be zero 112 * fill if uobj == NULL) 113 * - map the correct page in 114 * - done! 115 * 116 * note on paging: 117 * if we have to do I/O we place a PG_BUSY page in the correct object, 118 * unlock everything, and do the I/O. when I/O is done we must reverify 119 * the state of the world before assuming that our data structures are 120 * valid. [because mappings could change while the map is unlocked] 121 * 122 * alternative 1: unbusy the page in question and restart the page fault 123 * from the top (ReFault). this is easy but does not take advantage 124 * of the information that we already have from our previous lookup, 125 * although it is possible that the "hints" in the vm_map will help here. 126 * 127 * alternative 2: the system already keeps track of a "version" number of 128 * a map. [i.e. every time you write-lock a map (e.g. to change a 129 * mapping) you bump the version number up by one...] so, we can save 130 * the version number of the map before we release the lock and start I/O. 131 * then when I/O is done we can relock and check the version numbers 132 * to see if anything changed. this might save us some over 1 because 133 * we don't have to unbusy the page and may be less compares(?). 134 * 135 * alternative 3: put in backpointers or a way to "hold" part of a map 136 * in place while I/O is in progress. this could be complex to 137 * implement (especially with structures like amap that can be referenced 138 * by multiple map entries, and figuring out what should wait could be 139 * complex as well...). 140 * 141 * we use alternative 2. given that we are multi-threaded now we may want 142 * to reconsider the choice. 143 */ 144 145 /* 146 * local data structures 147 */ 148 149 struct uvm_advice { 150 int advice; 151 int nback; 152 int nforw; 153 }; 154 155 /* 156 * page range array: 157 * note: index in array must match "advice" value 158 * XXX: borrowed numbers from freebsd. do they work well for us? 159 */ 160 161 static const struct uvm_advice uvmadvice[] = { 162 { UVM_ADV_NORMAL, 3, 4 }, 163 { UVM_ADV_RANDOM, 0, 0 }, 164 { UVM_ADV_SEQUENTIAL, 8, 7}, 165 }; 166 167 #define UVM_MAXRANGE 16 /* must be MAX() of nback+nforw+1 */ 168 169 /* 170 * private prototypes 171 */ 172 173 /* 174 * inline functions 175 */ 176 177 /* 178 * uvmfault_anonflush: try and deactivate pages in specified anons 179 * 180 * => does not have to deactivate page if it is busy 181 */ 182 183 static inline void 184 uvmfault_anonflush(struct vm_anon **anons, int n) 185 { 186 int lcv; 187 struct vm_page *pg; 188 189 for (lcv = 0; lcv < n; lcv++) { 190 if (anons[lcv] == NULL) 191 continue; 192 KASSERT(rw_lock_held(anons[lcv]->an_lock)); 193 pg = anons[lcv]->an_page; 194 if (pg && (pg->flags & PG_BUSY) == 0) { 195 uvm_pagelock(pg); 196 uvm_pagedeactivate(pg); 197 uvm_pageunlock(pg); 198 } 199 } 200 } 201 202 /* 203 * normal functions 204 */ 205 206 /* 207 * uvmfault_amapcopy: clear "needs_copy" in a map. 208 * 209 * => called with VM data structures unlocked (usually, see below) 210 * => we get a write lock on the maps and clear needs_copy for a VA 211 * => if we are out of RAM we sleep (waiting for more) 212 */ 213 214 static void 215 uvmfault_amapcopy(struct uvm_faultinfo *ufi) 216 { 217 for (;;) { 218 219 /* 220 * no mapping? give up. 221 */ 222 223 if (uvmfault_lookup(ufi, true) == false) 224 return; 225 226 /* 227 * copy if needed. 228 */ 229 230 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) 231 amap_copy(ufi->map, ufi->entry, AMAP_COPY_NOWAIT, 232 ufi->orig_rvaddr, ufi->orig_rvaddr + 1); 233 234 /* 235 * didn't work? must be out of RAM. unlock and sleep. 236 */ 237 238 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { 239 uvmfault_unlockmaps(ufi, true); 240 uvm_wait("fltamapcopy"); 241 continue; 242 } 243 244 /* 245 * got it! unlock and return. 246 */ 247 248 uvmfault_unlockmaps(ufi, true); 249 return; 250 } 251 /*NOTREACHED*/ 252 } 253 254 /* 255 * uvmfault_anonget: get data in an anon into a non-busy, non-released 256 * page in that anon. 257 * 258 * => Map, amap and thus anon should be locked by caller. 259 * => If we fail, we unlock everything and error is returned. 260 * => If we are successful, return with everything still locked. 261 * => We do not move the page on the queues [gets moved later]. If we 262 * allocate a new page [we_own], it gets put on the queues. Either way, 263 * the result is that the page is on the queues at return time 264 * => For pages which are on loan from a uvm_object (and thus are not owned 265 * by the anon): if successful, return with the owning object locked. 266 * The caller must unlock this object when it unlocks everything else. 267 */ 268 269 int 270 uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, 271 struct vm_anon *anon) 272 { 273 struct vm_page *pg; 274 krw_t lock_type; 275 int error; 276 277 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 278 KASSERT(rw_lock_held(anon->an_lock)); 279 KASSERT(anon->an_lock == amap->am_lock); 280 281 /* Increment the counters.*/ 282 cpu_count(CPU_COUNT_FLTANGET, 1); 283 if (anon->an_page) { 284 curlwp->l_ru.ru_minflt++; 285 } else { 286 curlwp->l_ru.ru_majflt++; 287 } 288 error = 0; 289 290 /* 291 * Loop until we get the anon data, or fail. 292 */ 293 294 for (;;) { 295 bool we_own, locked; 296 /* 297 * Note: 'we_own' will become true if we set PG_BUSY on a page. 298 */ 299 we_own = false; 300 pg = anon->an_page; 301 302 /* 303 * If there is a resident page and it is loaned, then anon 304 * may not own it. Call out to uvm_anon_lockloanpg() to 305 * identify and lock the real owner of the page. 306 */ 307 308 if (pg && pg->loan_count) 309 pg = uvm_anon_lockloanpg(anon); 310 311 /* 312 * Is page resident? Make sure it is not busy/released. 313 */ 314 315 lock_type = rw_lock_op(anon->an_lock); 316 if (pg) { 317 318 /* 319 * at this point, if the page has a uobject [meaning 320 * we have it on loan], then that uobject is locked 321 * by us! if the page is busy, we drop all the 322 * locks (including uobject) and try again. 323 */ 324 325 if ((pg->flags & PG_BUSY) == 0) { 326 UVMHIST_LOG(maphist, "<- OK",0,0,0,0); 327 return 0; 328 } 329 cpu_count(CPU_COUNT_FLTPGWAIT, 1); 330 331 /* 332 * The last unlock must be an atomic unlock and wait 333 * on the owner of page. 334 */ 335 336 if (pg->uobject) { 337 /* Owner of page is UVM object. */ 338 uvmfault_unlockall(ufi, amap, NULL); 339 UVMHIST_LOG(maphist, " unlock+wait on uobj",0, 340 0,0,0); 341 uvm_pagewait(pg, pg->uobject->vmobjlock, "anonget1"); 342 } else { 343 /* Owner of page is anon. */ 344 uvmfault_unlockall(ufi, NULL, NULL); 345 UVMHIST_LOG(maphist, " unlock+wait on anon",0, 346 0,0,0); 347 uvm_pagewait(pg, anon->an_lock, "anonget2"); 348 } 349 } else { 350 #if defined(VMSWAP) 351 /* 352 * No page, therefore allocate one. A write lock is 353 * required for this. If the caller didn't supply 354 * one, fail now and have them retry. 355 */ 356 357 if (lock_type == RW_READER) { 358 return ENOLCK; 359 } 360 pg = uvm_pagealloc(NULL, 361 ufi != NULL ? ufi->orig_rvaddr : 0, 362 anon, ufi != NULL ? UVM_FLAG_COLORMATCH : 0); 363 if (pg == NULL) { 364 /* Out of memory. Wait a little. */ 365 uvmfault_unlockall(ufi, amap, NULL); 366 cpu_count(CPU_COUNT_FLTNORAM, 1); 367 UVMHIST_LOG(maphist, " noram -- UVM_WAIT",0, 368 0,0,0); 369 if (!uvm_reclaimable()) { 370 return ENOMEM; 371 } 372 uvm_wait("flt_noram1"); 373 } else { 374 /* PG_BUSY bit is set. */ 375 we_own = true; 376 uvmfault_unlockall(ufi, amap, NULL); 377 378 /* 379 * Pass a PG_BUSY+PG_FAKE clean page into 380 * the uvm_swap_get() function with all data 381 * structures unlocked. Note that it is OK 382 * to read an_swslot here, because we hold 383 * PG_BUSY on the page. 384 */ 385 cpu_count(CPU_COUNT_PAGEINS, 1); 386 error = uvm_swap_get(pg, anon->an_swslot, 387 PGO_SYNCIO); 388 389 /* 390 * We clean up after the I/O below in the 391 * 'we_own' case. 392 */ 393 } 394 #else 395 panic("%s: no page", __func__); 396 #endif /* defined(VMSWAP) */ 397 } 398 399 /* 400 * Re-lock the map and anon. 401 */ 402 403 locked = uvmfault_relock(ufi); 404 if (locked || we_own) { 405 rw_enter(anon->an_lock, lock_type); 406 } 407 408 /* 409 * If we own the page (i.e. we set PG_BUSY), then we need 410 * to clean up after the I/O. There are three cases to 411 * consider: 412 * 413 * 1) Page was released during I/O: free anon and ReFault. 414 * 2) I/O not OK. Free the page and cause the fault to fail. 415 * 3) I/O OK! Activate the page and sync with the non-we_own 416 * case (i.e. drop anon lock if not locked). 417 */ 418 419 if (we_own) { 420 KASSERT(lock_type == RW_WRITER); 421 #if defined(VMSWAP) 422 if (error) { 423 424 /* 425 * Remove the swap slot from the anon and 426 * mark the anon as having no real slot. 427 * Do not free the swap slot, thus preventing 428 * it from being used again. 429 */ 430 431 if (anon->an_swslot > 0) { 432 uvm_swap_markbad(anon->an_swslot, 1); 433 } 434 anon->an_swslot = SWSLOT_BAD; 435 436 if ((pg->flags & PG_RELEASED) != 0) { 437 goto released; 438 } 439 440 /* 441 * Note: page was never !PG_BUSY, so it 442 * cannot be mapped and thus no need to 443 * pmap_page_protect() it. 444 */ 445 446 uvm_pagefree(pg); 447 448 if (locked) { 449 uvmfault_unlockall(ufi, NULL, NULL); 450 } 451 rw_exit(anon->an_lock); 452 UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0); 453 return error; 454 } 455 456 if ((pg->flags & PG_RELEASED) != 0) { 457 released: 458 KASSERT(anon->an_ref == 0); 459 460 /* 461 * Released while we had unlocked amap. 462 */ 463 464 if (locked) { 465 uvmfault_unlockall(ufi, NULL, NULL); 466 } 467 uvm_anon_release(anon); 468 469 if (error) { 470 UVMHIST_LOG(maphist, 471 "<- ERROR/RELEASED", 0,0,0,0); 472 return error; 473 } 474 475 UVMHIST_LOG(maphist, "<- RELEASED", 0,0,0,0); 476 return ERESTART; 477 } 478 479 /* 480 * We have successfully read the page, activate it. 481 */ 482 483 uvm_pagelock(pg); 484 uvm_pageactivate(pg); 485 uvm_pagewakeup(pg); 486 uvm_pageunlock(pg); 487 pg->flags &= ~(PG_BUSY|PG_FAKE); 488 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_UNKNOWN); 489 UVM_PAGE_OWN(pg, NULL); 490 #else 491 panic("%s: we_own", __func__); 492 #endif /* defined(VMSWAP) */ 493 } 494 495 /* 496 * We were not able to re-lock the map - restart the fault. 497 */ 498 499 if (!locked) { 500 if (we_own) { 501 rw_exit(anon->an_lock); 502 } 503 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 504 return ERESTART; 505 } 506 507 /* 508 * Verify that no one has touched the amap and moved 509 * the anon on us. 510 */ 511 512 if (ufi != NULL && amap_lookup(&ufi->entry->aref, 513 ufi->orig_rvaddr - ufi->entry->start) != anon) { 514 515 uvmfault_unlockall(ufi, amap, NULL); 516 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 517 return ERESTART; 518 } 519 520 /* 521 * Retry.. 522 */ 523 524 cpu_count(CPU_COUNT_FLTANRETRY, 1); 525 continue; 526 } 527 /*NOTREACHED*/ 528 } 529 530 /* 531 * uvmfault_promote: promote data to a new anon. used for 1B and 2B. 532 * 533 * 1. allocate an anon and a page. 534 * 2. fill its contents. 535 * 3. put it into amap. 536 * 537 * => if we fail (result != 0) we unlock everything. 538 * => on success, return a new locked anon via 'nanon'. 539 * (*nanon)->an_page will be a resident, locked, dirty page. 540 * => it's caller's responsibility to put the promoted nanon->an_page to the 541 * page queue. 542 */ 543 544 static int 545 uvmfault_promote(struct uvm_faultinfo *ufi, 546 struct vm_anon *oanon, 547 struct vm_page *uobjpage, 548 struct vm_anon **nanon, /* OUT: allocated anon */ 549 struct vm_anon **spare) 550 { 551 struct vm_amap *amap = ufi->entry->aref.ar_amap; 552 struct uvm_object *uobj; 553 struct vm_anon *anon; 554 struct vm_page *pg; 555 struct vm_page *opg; 556 int error; 557 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 558 559 if (oanon) { 560 /* anon COW */ 561 opg = oanon->an_page; 562 KASSERT(opg != NULL); 563 KASSERT(opg->uobject == NULL || opg->loan_count > 0); 564 } else if (uobjpage != PGO_DONTCARE) { 565 /* object-backed COW */ 566 opg = uobjpage; 567 KASSERT(rw_lock_held(opg->uobject->vmobjlock)); 568 } else { 569 /* ZFOD */ 570 opg = NULL; 571 } 572 if (opg != NULL) { 573 uobj = opg->uobject; 574 } else { 575 uobj = NULL; 576 } 577 578 KASSERT(amap != NULL); 579 KASSERT(uobjpage != NULL); 580 KASSERT(rw_write_held(amap->am_lock)); 581 KASSERT(oanon == NULL || amap->am_lock == oanon->an_lock); 582 KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock)); 583 584 if (*spare != NULL) { 585 anon = *spare; 586 *spare = NULL; 587 } else { 588 anon = uvm_analloc(); 589 } 590 if (anon) { 591 592 /* 593 * The new anon is locked. 594 * 595 * if opg == NULL, we want a zero'd, dirty page, 596 * so have uvm_pagealloc() do that for us. 597 */ 598 599 KASSERT(anon->an_lock == NULL); 600 anon->an_lock = amap->am_lock; 601 pg = uvm_pagealloc(NULL, ufi->orig_rvaddr, anon, 602 UVM_FLAG_COLORMATCH | (opg == NULL ? UVM_PGA_ZERO : 0)); 603 if (pg == NULL) { 604 anon->an_lock = NULL; 605 } 606 } else { 607 pg = NULL; 608 } 609 610 /* 611 * out of memory resources? 612 */ 613 614 if (pg == NULL) { 615 /* save anon for the next try. */ 616 if (anon != NULL) { 617 *spare = anon; 618 } 619 620 /* unlock and fail ... */ 621 uvmfault_unlockall(ufi, amap, uobj); 622 if (!uvm_reclaimable()) { 623 UVMHIST_LOG(maphist, "out of VM", 0,0,0,0); 624 cpu_count(CPU_COUNT_FLTNOANON, 1); 625 error = ENOMEM; 626 goto done; 627 } 628 629 UVMHIST_LOG(maphist, "out of RAM, waiting for more", 0,0,0,0); 630 cpu_count(CPU_COUNT_FLTNORAM, 1); 631 uvm_wait("flt_noram5"); 632 error = ERESTART; 633 goto done; 634 } 635 636 /* copy page [pg now dirty] */ 637 if (opg) { 638 uvm_pagecopy(opg, pg); 639 } 640 KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY); 641 642 amap_add(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start, anon, 643 oanon != NULL); 644 645 /* 646 * from this point on am_lock won't be dropped until the page is 647 * entered, so it's safe to unbusy the page up front. 648 * 649 * uvm_fault_{upper,lower}_done will activate or enqueue the page. 650 */ 651 652 pg = anon->an_page; 653 pg->flags &= ~(PG_BUSY|PG_FAKE); 654 UVM_PAGE_OWN(pg, NULL); 655 656 *nanon = anon; 657 error = 0; 658 done: 659 return error; 660 } 661 662 /* 663 * Update statistics after fault resolution. 664 * - maxrss 665 */ 666 void 667 uvmfault_update_stats(struct uvm_faultinfo *ufi) 668 { 669 struct vm_map *map; 670 struct vmspace *vm; 671 struct proc *p; 672 vsize_t res; 673 674 map = ufi->orig_map; 675 676 p = curproc; 677 KASSERT(p != NULL); 678 vm = p->p_vmspace; 679 680 if (&vm->vm_map != map) 681 return; 682 683 res = pmap_resident_count(map->pmap); 684 if (vm->vm_rssmax < res) 685 vm->vm_rssmax = res; 686 } 687 688 /* 689 * F A U L T - m a i n e n t r y p o i n t 690 */ 691 692 /* 693 * uvm_fault: page fault handler 694 * 695 * => called from MD code to resolve a page fault 696 * => VM data structures usually should be unlocked. however, it is 697 * possible to call here with the main map locked if the caller 698 * gets a write lock, sets it recursive, and then calls us (c.f. 699 * uvm_map_pageable). this should be avoided because it keeps 700 * the map locked off during I/O. 701 * => MUST NEVER BE CALLED IN INTERRUPT CONTEXT 702 */ 703 704 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 705 ~VM_PROT_WRITE : VM_PROT_ALL) 706 707 /* fault_flag values passed from uvm_fault_wire to uvm_fault_internal */ 708 #define UVM_FAULT_WIRE (1 << 0) 709 #define UVM_FAULT_MAXPROT (1 << 1) 710 711 struct uvm_faultctx { 712 713 /* 714 * the following members are set up by uvm_fault_check() and 715 * read-only after that. 716 * 717 * note that narrow is used by uvm_fault_check() to change 718 * the behaviour after ERESTART. 719 * 720 * most of them might change after RESTART if the underlying 721 * map entry has been changed behind us. an exception is 722 * wire_paging, which does never change. 723 */ 724 vm_prot_t access_type; 725 vaddr_t startva; 726 int npages; 727 int centeridx; 728 bool narrow; /* work on a single requested page only */ 729 bool wire_mapping; /* request a PMAP_WIRED mapping 730 (UVM_FAULT_WIRE or VM_MAPENT_ISWIRED) */ 731 bool wire_paging; /* request uvm_pagewire 732 (true for UVM_FAULT_WIRE) */ 733 bool cow_now; /* VM_PROT_WRITE is actually requested 734 (ie. should break COW and page loaning) */ 735 736 /* 737 * enter_prot is set up by uvm_fault_check() and clamped 738 * (ie. drop the VM_PROT_WRITE bit) in various places in case 739 * of !cow_now. 740 */ 741 vm_prot_t enter_prot; /* prot at which we want to enter pages in */ 742 743 /* 744 * the following member is for uvmfault_promote() and ERESTART. 745 */ 746 struct vm_anon *anon_spare; 747 748 /* 749 * the following is actually a uvm_fault_lower() internal. 750 * it's here merely for debugging. 751 * (or due to the mechanical separation of the function?) 752 */ 753 bool promote; 754 755 /* 756 * type of lock to acquire on objects in both layers. 757 */ 758 krw_t lower_lock_type; 759 krw_t upper_lock_type; 760 }; 761 762 static inline int uvm_fault_check( 763 struct uvm_faultinfo *, struct uvm_faultctx *, 764 struct vm_anon ***, bool); 765 766 static int uvm_fault_upper( 767 struct uvm_faultinfo *, struct uvm_faultctx *, 768 struct vm_anon **); 769 static inline int uvm_fault_upper_lookup( 770 struct uvm_faultinfo *, const struct uvm_faultctx *, 771 struct vm_anon **, struct vm_page **); 772 static inline void uvm_fault_upper_neighbor( 773 struct uvm_faultinfo *, const struct uvm_faultctx *, 774 vaddr_t, struct vm_page *, bool); 775 static inline int uvm_fault_upper_loan( 776 struct uvm_faultinfo *, struct uvm_faultctx *, 777 struct vm_anon *, struct uvm_object **); 778 static inline int uvm_fault_upper_promote( 779 struct uvm_faultinfo *, struct uvm_faultctx *, 780 struct uvm_object *, struct vm_anon *); 781 static inline int uvm_fault_upper_direct( 782 struct uvm_faultinfo *, struct uvm_faultctx *, 783 struct uvm_object *, struct vm_anon *); 784 static int uvm_fault_upper_enter( 785 struct uvm_faultinfo *, const struct uvm_faultctx *, 786 struct uvm_object *, struct vm_anon *, 787 struct vm_page *, struct vm_anon *); 788 static inline void uvm_fault_upper_done( 789 struct uvm_faultinfo *, const struct uvm_faultctx *, 790 struct vm_anon *, struct vm_page *); 791 792 static int uvm_fault_lower( 793 struct uvm_faultinfo *, struct uvm_faultctx *, 794 struct vm_page **); 795 static inline void uvm_fault_lower_lookup( 796 struct uvm_faultinfo *, const struct uvm_faultctx *, 797 struct vm_page **); 798 static inline void uvm_fault_lower_neighbor( 799 struct uvm_faultinfo *, const struct uvm_faultctx *, 800 vaddr_t, struct vm_page *); 801 static inline int uvm_fault_lower_io( 802 struct uvm_faultinfo *, struct uvm_faultctx *, 803 struct uvm_object **, struct vm_page **); 804 static inline int uvm_fault_lower_direct( 805 struct uvm_faultinfo *, struct uvm_faultctx *, 806 struct uvm_object *, struct vm_page *); 807 static inline int uvm_fault_lower_direct_loan( 808 struct uvm_faultinfo *, struct uvm_faultctx *, 809 struct uvm_object *, struct vm_page **, 810 struct vm_page **); 811 static inline int uvm_fault_lower_promote( 812 struct uvm_faultinfo *, struct uvm_faultctx *, 813 struct uvm_object *, struct vm_page *); 814 static int uvm_fault_lower_enter( 815 struct uvm_faultinfo *, const struct uvm_faultctx *, 816 struct uvm_object *, 817 struct vm_anon *, struct vm_page *); 818 static inline void uvm_fault_lower_done( 819 struct uvm_faultinfo *, const struct uvm_faultctx *, 820 struct uvm_object *, struct vm_page *); 821 822 int 823 uvm_fault_internal(struct vm_map *orig_map, vaddr_t vaddr, 824 vm_prot_t access_type, int fault_flag) 825 { 826 struct uvm_faultinfo ufi; 827 struct uvm_faultctx flt = { 828 .access_type = access_type, 829 830 /* don't look for neighborhood * pages on "wire" fault */ 831 .narrow = (fault_flag & UVM_FAULT_WIRE) != 0, 832 833 /* "wire" fault causes wiring of both mapping and paging */ 834 .wire_mapping = (fault_flag & UVM_FAULT_WIRE) != 0, 835 .wire_paging = (fault_flag & UVM_FAULT_WIRE) != 0, 836 837 /* 838 * default lock type to acquire on upper & lower layer 839 * objects: reader. this can be upgraded at any point 840 * during the fault from read -> write and uvm_faultctx 841 * changed to match, but is never downgraded write -> read. 842 */ 843 #ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */ 844 .upper_lock_type = RW_WRITER, 845 .lower_lock_type = RW_WRITER, 846 #else 847 .upper_lock_type = RW_READER, 848 .lower_lock_type = RW_READER, 849 #endif 850 }; 851 const bool maxprot = (fault_flag & UVM_FAULT_MAXPROT) != 0; 852 struct vm_anon *anons_store[UVM_MAXRANGE], **anons; 853 struct vm_page *pages_store[UVM_MAXRANGE], **pages; 854 int error; 855 856 UVMHIST_FUNC(__func__); 857 UVMHIST_CALLARGS(maphist, "(map=%#jx, vaddr=%#jx, at=%jd, ff=%jd)", 858 (uintptr_t)orig_map, vaddr, access_type, fault_flag); 859 860 /* Don't count anything until user interaction is possible */ 861 kpreempt_disable(); 862 if (__predict_true(start_init_exec)) { 863 struct cpu_info *ci = curcpu(); 864 CPU_COUNT(CPU_COUNT_NFAULT, 1); 865 /* Don't flood RNG subsystem with samples. */ 866 if (++(ci->ci_faultrng) == 503) { 867 ci->ci_faultrng = 0; 868 rnd_add_uint32(&curcpu()->ci_data.cpu_uvm->rs, 869 sizeof(vaddr_t) == sizeof(uint32_t) ? 870 (uint32_t)vaddr : sizeof(vaddr_t) == 871 sizeof(uint64_t) ? 872 (uint32_t)vaddr : 873 (uint32_t)ci->ci_counts[CPU_COUNT_NFAULT]); 874 } 875 } 876 kpreempt_enable(); 877 878 /* 879 * init the IN parameters in the ufi 880 */ 881 882 ufi.orig_map = orig_map; 883 ufi.orig_rvaddr = trunc_page(vaddr); 884 ufi.orig_size = PAGE_SIZE; /* can't get any smaller than this */ 885 886 error = ERESTART; 887 while (error == ERESTART) { /* ReFault: */ 888 anons = anons_store; 889 pages = pages_store; 890 891 error = uvm_fault_check(&ufi, &flt, &anons, maxprot); 892 if (error != 0) 893 continue; 894 895 error = uvm_fault_upper_lookup(&ufi, &flt, anons, pages); 896 if (error != 0) 897 continue; 898 899 if (pages[flt.centeridx] == PGO_DONTCARE) 900 error = uvm_fault_upper(&ufi, &flt, anons); 901 else { 902 struct uvm_object * const uobj = 903 ufi.entry->object.uvm_obj; 904 905 if (uobj && uobj->pgops->pgo_fault != NULL) { 906 /* 907 * invoke "special" fault routine. 908 */ 909 rw_enter(uobj->vmobjlock, RW_WRITER); 910 /* locked: maps(read), amap(if there), uobj */ 911 error = uobj->pgops->pgo_fault(&ufi, 912 flt.startva, pages, flt.npages, 913 flt.centeridx, flt.access_type, 914 PGO_LOCKED|PGO_SYNCIO); 915 916 /* 917 * locked: nothing, pgo_fault has unlocked 918 * everything 919 */ 920 921 /* 922 * object fault routine responsible for 923 * pmap_update(). 924 */ 925 926 /* 927 * Wake up the pagedaemon if the fault method 928 * failed for lack of memory but some can be 929 * reclaimed. 930 */ 931 if (error == ENOMEM && uvm_reclaimable()) { 932 uvm_wait("pgo_fault"); 933 error = ERESTART; 934 } 935 } else { 936 error = uvm_fault_lower(&ufi, &flt, pages); 937 } 938 } 939 } 940 941 if (flt.anon_spare != NULL) { 942 flt.anon_spare->an_ref--; 943 KASSERT(flt.anon_spare->an_ref == 0); 944 KASSERT(flt.anon_spare->an_lock == NULL); 945 uvm_anfree(flt.anon_spare); 946 } 947 return error; 948 } 949 950 /* 951 * uvm_fault_check: check prot, handle needs-copy, etc. 952 * 953 * 1. lookup entry. 954 * 2. check protection. 955 * 3. adjust fault condition (mainly for simulated fault). 956 * 4. handle needs-copy (lazy amap copy). 957 * 5. establish range of interest for neighbor fault (aka pre-fault). 958 * 6. look up anons (if amap exists). 959 * 7. flush pages (if MADV_SEQUENTIAL) 960 * 961 * => called with nothing locked. 962 * => if we fail (result != 0) we unlock everything. 963 * => initialize/adjust many members of flt. 964 */ 965 966 static int 967 uvm_fault_check( 968 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 969 struct vm_anon ***ranons, bool maxprot) 970 { 971 struct vm_amap *amap; 972 struct uvm_object *uobj; 973 vm_prot_t check_prot; 974 int nback, nforw; 975 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 976 977 /* 978 * lookup and lock the maps 979 */ 980 981 if (uvmfault_lookup(ufi, false) == false) { 982 UVMHIST_LOG(maphist, "<- no mapping @ %#jx", ufi->orig_rvaddr, 983 0,0,0); 984 return EFAULT; 985 } 986 /* locked: maps(read) */ 987 988 #ifdef DIAGNOSTIC 989 if ((ufi->map->flags & VM_MAP_PAGEABLE) == 0) { 990 printf("Page fault on non-pageable map:\n"); 991 printf("ufi->map = %p\n", ufi->map); 992 printf("ufi->orig_map = %p\n", ufi->orig_map); 993 printf("ufi->orig_rvaddr = %#lx\n", (u_long) ufi->orig_rvaddr); 994 panic("uvm_fault: (ufi->map->flags & VM_MAP_PAGEABLE) == 0"); 995 } 996 #endif 997 998 /* 999 * check protection 1000 */ 1001 1002 check_prot = maxprot ? 1003 ufi->entry->max_protection : ufi->entry->protection; 1004 if ((check_prot & flt->access_type) != flt->access_type) { 1005 UVMHIST_LOG(maphist, 1006 "<- protection failure (prot=%#jx, access=%#jx)", 1007 ufi->entry->protection, flt->access_type, 0, 0); 1008 uvmfault_unlockmaps(ufi, false); 1009 return EFAULT; 1010 } 1011 1012 /* 1013 * "enter_prot" is the protection we want to enter the page in at. 1014 * for certain pages (e.g. copy-on-write pages) this protection can 1015 * be more strict than ufi->entry->protection. "wired" means either 1016 * the entry is wired or we are fault-wiring the pg. 1017 */ 1018 1019 flt->enter_prot = ufi->entry->protection; 1020 if (VM_MAPENT_ISWIRED(ufi->entry)) { 1021 flt->wire_mapping = true; 1022 flt->wire_paging = true; 1023 flt->narrow = true; 1024 } 1025 1026 if (flt->wire_mapping) { 1027 flt->access_type = flt->enter_prot; /* full access for wired */ 1028 flt->cow_now = (check_prot & VM_PROT_WRITE) != 0; 1029 } else { 1030 flt->cow_now = (flt->access_type & VM_PROT_WRITE) != 0; 1031 } 1032 1033 if (flt->wire_paging) { 1034 /* wiring pages requires a write lock. */ 1035 flt->upper_lock_type = RW_WRITER; 1036 flt->lower_lock_type = RW_WRITER; 1037 } 1038 1039 flt->promote = false; 1040 1041 /* 1042 * handle "needs_copy" case. if we need to copy the amap we will 1043 * have to drop our readlock and relock it with a write lock. (we 1044 * need a write lock to change anything in a map entry [e.g. 1045 * needs_copy]). 1046 */ 1047 1048 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { 1049 if (flt->cow_now || (ufi->entry->object.uvm_obj == NULL)) { 1050 KASSERT(!maxprot); 1051 /* need to clear */ 1052 UVMHIST_LOG(maphist, 1053 " need to clear needs_copy and refault",0,0,0,0); 1054 uvmfault_unlockmaps(ufi, false); 1055 uvmfault_amapcopy(ufi); 1056 cpu_count(CPU_COUNT_FLTAMCOPY, 1); 1057 return ERESTART; 1058 1059 } else { 1060 1061 /* 1062 * ensure that we pmap_enter page R/O since 1063 * needs_copy is still true 1064 */ 1065 1066 flt->enter_prot &= ~VM_PROT_WRITE; 1067 } 1068 } 1069 1070 /* 1071 * identify the players 1072 */ 1073 1074 amap = ufi->entry->aref.ar_amap; /* upper layer */ 1075 uobj = ufi->entry->object.uvm_obj; /* lower layer */ 1076 1077 /* 1078 * check for a case 0 fault. if nothing backing the entry then 1079 * error now. 1080 */ 1081 1082 if (amap == NULL && uobj == NULL) { 1083 uvmfault_unlockmaps(ufi, false); 1084 UVMHIST_LOG(maphist,"<- no backing store, no overlay",0,0,0,0); 1085 return EFAULT; 1086 } 1087 1088 /* 1089 * for a case 2B fault waste no time on adjacent pages because 1090 * they are likely already entered. 1091 */ 1092 1093 if (uobj != NULL && amap != NULL && 1094 (flt->access_type & VM_PROT_WRITE) != 0) { 1095 /* wide fault (!narrow) */ 1096 flt->narrow = true; 1097 } 1098 1099 /* 1100 * establish range of interest based on advice from mapper 1101 * and then clip to fit map entry. note that we only want 1102 * to do this the first time through the fault. if we 1103 * ReFault we will disable this by setting "narrow" to true. 1104 */ 1105 1106 if (flt->narrow == false) { 1107 1108 /* wide fault (!narrow) */ 1109 KASSERT(uvmadvice[ufi->entry->advice].advice == 1110 ufi->entry->advice); 1111 nback = MIN(uvmadvice[ufi->entry->advice].nback, 1112 (ufi->orig_rvaddr - ufi->entry->start) >> PAGE_SHIFT); 1113 flt->startva = ufi->orig_rvaddr - (nback << PAGE_SHIFT); 1114 /* 1115 * note: "-1" because we don't want to count the 1116 * faulting page as forw 1117 */ 1118 nforw = MIN(uvmadvice[ufi->entry->advice].nforw, 1119 ((ufi->entry->end - ufi->orig_rvaddr) >> 1120 PAGE_SHIFT) - 1); 1121 flt->npages = nback + nforw + 1; 1122 flt->centeridx = nback; 1123 1124 flt->narrow = true; /* ensure only once per-fault */ 1125 1126 } else { 1127 1128 /* narrow fault! */ 1129 nback = nforw = 0; 1130 flt->startva = ufi->orig_rvaddr; 1131 flt->npages = 1; 1132 flt->centeridx = 0; 1133 1134 } 1135 /* offset from entry's start to pgs' start */ 1136 const voff_t eoff = flt->startva - ufi->entry->start; 1137 1138 /* locked: maps(read) */ 1139 UVMHIST_LOG(maphist, " narrow=%jd, back=%jd, forw=%jd, startva=%#jx", 1140 flt->narrow, nback, nforw, flt->startva); 1141 UVMHIST_LOG(maphist, " entry=%#jx, amap=%#jx, obj=%#jx", 1142 (uintptr_t)ufi->entry, (uintptr_t)amap, (uintptr_t)uobj, 0); 1143 1144 /* 1145 * guess at the most suitable lock types to acquire. 1146 * if we've got an amap then lock it and extract current anons. 1147 */ 1148 1149 if (amap) { 1150 if ((amap_flags(amap) & AMAP_SHARED) == 0) { 1151 /* 1152 * the amap isn't shared. get a writer lock to 1153 * avoid the cost of upgrading the lock later if 1154 * needed. 1155 * 1156 * XXX nice for PostgreSQL, but consider threads. 1157 */ 1158 flt->upper_lock_type = RW_WRITER; 1159 } else if ((flt->access_type & VM_PROT_WRITE) != 0) { 1160 /* 1161 * assume we're about to COW. 1162 */ 1163 flt->upper_lock_type = RW_WRITER; 1164 } 1165 amap_lock(amap, flt->upper_lock_type); 1166 amap_lookups(&ufi->entry->aref, eoff, *ranons, flt->npages); 1167 } else { 1168 if ((flt->access_type & VM_PROT_WRITE) != 0) { 1169 /* 1170 * we are about to dirty the object and that 1171 * requires a write lock. 1172 */ 1173 flt->lower_lock_type = RW_WRITER; 1174 } 1175 *ranons = NULL; /* to be safe */ 1176 } 1177 1178 /* locked: maps(read), amap(if there) */ 1179 KASSERT(amap == NULL || 1180 rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1181 1182 /* 1183 * for MADV_SEQUENTIAL mappings we want to deactivate the back pages 1184 * now and then forget about them (for the rest of the fault). 1185 */ 1186 1187 if (ufi->entry->advice == MADV_SEQUENTIAL && nback != 0) { 1188 1189 UVMHIST_LOG(maphist, " MADV_SEQUENTIAL: flushing backpages", 1190 0,0,0,0); 1191 /* flush back-page anons? */ 1192 if (amap) 1193 uvmfault_anonflush(*ranons, nback); 1194 1195 /* 1196 * flush object? change lock type to RW_WRITER, to avoid 1197 * excessive competition between read/write locks if many 1198 * threads doing "sequential access". 1199 */ 1200 if (uobj) { 1201 voff_t uoff; 1202 1203 flt->lower_lock_type = RW_WRITER; 1204 uoff = ufi->entry->offset + eoff; 1205 rw_enter(uobj->vmobjlock, RW_WRITER); 1206 (void) (uobj->pgops->pgo_put)(uobj, uoff, uoff + 1207 (nback << PAGE_SHIFT), PGO_DEACTIVATE); 1208 } 1209 1210 /* now forget about the backpages */ 1211 if (amap) 1212 *ranons += nback; 1213 flt->startva += (nback << PAGE_SHIFT); 1214 flt->npages -= nback; 1215 flt->centeridx = 0; 1216 } 1217 /* 1218 * => startva is fixed 1219 * => npages is fixed 1220 */ 1221 KASSERT(flt->startva <= ufi->orig_rvaddr); 1222 KASSERT(ufi->orig_rvaddr + ufi->orig_size <= 1223 flt->startva + (flt->npages << PAGE_SHIFT)); 1224 return 0; 1225 } 1226 1227 /* 1228 * uvm_fault_upper_upgrade: upgrade upper lock, reader -> writer 1229 */ 1230 1231 static inline int 1232 uvm_fault_upper_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1233 struct vm_amap *amap, struct uvm_object *uobj) 1234 { 1235 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1236 1237 KASSERT(amap != NULL); 1238 KASSERT(flt->upper_lock_type == rw_lock_op(amap->am_lock)); 1239 1240 /* 1241 * fast path. 1242 */ 1243 1244 if (__predict_true(flt->upper_lock_type == RW_WRITER)) { 1245 return 0; 1246 } 1247 1248 /* 1249 * otherwise try for the upgrade. if we don't get it, unlock 1250 * everything, restart the fault and next time around get a writer 1251 * lock. 1252 */ 1253 1254 flt->upper_lock_type = RW_WRITER; 1255 if (__predict_false(!rw_tryupgrade(amap->am_lock))) { 1256 uvmfault_unlockall(ufi, amap, uobj); 1257 cpu_count(CPU_COUNT_FLTNOUP, 1); 1258 UVMHIST_LOG(maphist, " !upgrade upper", 0, 0,0,0); 1259 return ERESTART; 1260 } 1261 cpu_count(CPU_COUNT_FLTUP, 1); 1262 KASSERT(flt->upper_lock_type == rw_lock_op(amap->am_lock)); 1263 return 0; 1264 } 1265 1266 /* 1267 * uvm_fault_upper_lookup: look up existing h/w mapping and amap. 1268 * 1269 * iterate range of interest: 1270 * 1. check if h/w mapping exists. if yes, we don't care 1271 * 2. check if anon exists. if not, page is lower. 1272 * 3. if anon exists, enter h/w mapping for neighbors. 1273 * 1274 * => called with amap locked (if exists). 1275 */ 1276 1277 static int 1278 uvm_fault_upper_lookup( 1279 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1280 struct vm_anon **anons, struct vm_page **pages) 1281 { 1282 struct vm_amap *amap = ufi->entry->aref.ar_amap; 1283 int lcv; 1284 vaddr_t currva; 1285 bool shadowed __unused; 1286 bool entered; 1287 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1288 1289 /* locked: maps(read), amap(if there) */ 1290 KASSERT(amap == NULL || 1291 rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1292 1293 /* 1294 * map in the backpages and frontpages we found in the amap in hopes 1295 * of preventing future faults. we also init the pages[] array as 1296 * we go. 1297 */ 1298 1299 currva = flt->startva; 1300 shadowed = false; 1301 entered = false; 1302 for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) { 1303 /* 1304 * unmapped or center page. check if any anon at this level. 1305 */ 1306 if (amap == NULL || anons[lcv] == NULL) { 1307 pages[lcv] = NULL; 1308 continue; 1309 } 1310 1311 /* 1312 * check for present page and map if possible. 1313 */ 1314 1315 pages[lcv] = PGO_DONTCARE; 1316 if (lcv == flt->centeridx) { /* save center for later! */ 1317 shadowed = true; 1318 continue; 1319 } 1320 1321 struct vm_anon *anon = anons[lcv]; 1322 struct vm_page *pg = anon->an_page; 1323 1324 KASSERT(anon->an_lock == amap->am_lock); 1325 1326 /* 1327 * ignore loaned and busy pages. 1328 * don't play with VAs that are already mapped. 1329 */ 1330 1331 if (pg && pg->loan_count == 0 && (pg->flags & PG_BUSY) == 0 && 1332 !pmap_extract(ufi->orig_map->pmap, currva, NULL)) { 1333 uvm_fault_upper_neighbor(ufi, flt, currva, 1334 pg, anon->an_ref > 1); 1335 entered = true; 1336 } 1337 } 1338 if (entered) { 1339 pmap_update(ufi->orig_map->pmap); 1340 } 1341 1342 /* locked: maps(read), amap(if there) */ 1343 KASSERT(amap == NULL || 1344 rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1345 /* (shadowed == true) if there is an anon at the faulting address */ 1346 UVMHIST_LOG(maphist, " shadowed=%jd, will_get=%jd", shadowed, 1347 (ufi->entry->object.uvm_obj && shadowed != false),0,0); 1348 1349 return 0; 1350 } 1351 1352 /* 1353 * uvm_fault_upper_neighbor: enter single upper neighbor page. 1354 * 1355 * => called with amap and anon locked. 1356 */ 1357 1358 static void 1359 uvm_fault_upper_neighbor( 1360 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1361 vaddr_t currva, struct vm_page *pg, bool readonly) 1362 { 1363 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1364 1365 /* locked: amap, anon */ 1366 1367 KASSERT(pg->uobject == NULL); 1368 KASSERT(pg->uanon != NULL); 1369 KASSERT(rw_lock_op(pg->uanon->an_lock) == flt->upper_lock_type); 1370 KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN); 1371 1372 /* 1373 * there wasn't a direct fault on the page, so avoid the cost of 1374 * activating it. 1375 */ 1376 1377 if (!uvmpdpol_pageisqueued_p(pg) && pg->wire_count == 0) { 1378 uvm_pagelock(pg); 1379 uvm_pageenqueue(pg); 1380 uvm_pageunlock(pg); 1381 } 1382 1383 UVMHIST_LOG(maphist, 1384 " MAPPING: n anon: pm=%#jx, va=%#jx, pg=%#jx", 1385 (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0); 1386 cpu_count(CPU_COUNT_FLTNAMAP, 1); 1387 1388 /* 1389 * Since this page isn't the page that's actually faulting, 1390 * ignore pmap_enter() failures; it's not critical that we 1391 * enter these right now. 1392 */ 1393 1394 (void) pmap_enter(ufi->orig_map->pmap, currva, 1395 VM_PAGE_TO_PHYS(pg), 1396 readonly ? (flt->enter_prot & ~VM_PROT_WRITE) : 1397 flt->enter_prot, 1398 PMAP_CANFAIL | (flt->wire_mapping ? PMAP_WIRED : 0)); 1399 } 1400 1401 /* 1402 * uvm_fault_upper: handle upper fault. 1403 * 1404 * 1. acquire anon lock. 1405 * 2. get anon. let uvmfault_anonget do the dirty work. 1406 * 3. handle loan. 1407 * 4. dispatch direct or promote handlers. 1408 */ 1409 1410 static int 1411 uvm_fault_upper( 1412 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1413 struct vm_anon **anons) 1414 { 1415 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 1416 struct vm_anon * const anon = anons[flt->centeridx]; 1417 struct uvm_object *uobj; 1418 int error; 1419 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1420 1421 /* locked: maps(read), amap, anon */ 1422 KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1423 KASSERT(anon->an_lock == amap->am_lock); 1424 1425 /* 1426 * handle case 1: fault on an anon in our amap 1427 */ 1428 1429 UVMHIST_LOG(maphist, " case 1 fault: anon=%#jx", 1430 (uintptr_t)anon, 0, 0, 0); 1431 1432 /* 1433 * no matter if we have case 1A or case 1B we are going to need to 1434 * have the anon's memory resident. ensure that now. 1435 */ 1436 1437 /* 1438 * let uvmfault_anonget do the dirty work. 1439 * if it fails (!OK) it will unlock everything for us. 1440 * if it succeeds, locks are still valid and locked. 1441 * also, if it is OK, then the anon's page is on the queues. 1442 * if the page is on loan from a uvm_object, then anonget will 1443 * lock that object for us if it does not fail. 1444 */ 1445 retry: 1446 error = uvmfault_anonget(ufi, amap, anon); 1447 switch (error) { 1448 case 0: 1449 break; 1450 1451 case ERESTART: 1452 return ERESTART; 1453 1454 case EAGAIN: 1455 kpause("fltagain1", false, hz/2, NULL); 1456 return ERESTART; 1457 1458 case ENOLCK: 1459 /* it needs a write lock: retry */ 1460 error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL); 1461 if (error != 0) { 1462 return error; 1463 } 1464 KASSERT(rw_write_held(amap->am_lock)); 1465 goto retry; 1466 1467 default: 1468 return error; 1469 } 1470 1471 /* 1472 * uobj is non null if the page is on loan from an object (i.e. uobj) 1473 */ 1474 1475 uobj = anon->an_page->uobject; /* locked by anonget if !NULL */ 1476 1477 /* locked: maps(read), amap, anon, uobj(if one) */ 1478 KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1479 KASSERT(anon->an_lock == amap->am_lock); 1480 KASSERT(uobj == NULL || 1481 rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); 1482 1483 /* 1484 * special handling for loaned pages 1485 */ 1486 1487 if (anon->an_page->loan_count) { 1488 error = uvm_fault_upper_loan(ufi, flt, anon, &uobj); 1489 if (error != 0) 1490 return error; 1491 } 1492 1493 /* 1494 * if we are case 1B then we will need to allocate a new blank 1495 * anon to transfer the data into. note that we have a lock 1496 * on anon, so no one can busy or release the page until we are done. 1497 * also note that the ref count can't drop to zero here because 1498 * it is > 1 and we are only dropping one ref. 1499 * 1500 * in the (hopefully very rare) case that we are out of RAM we 1501 * will unlock, wait for more RAM, and refault. 1502 * 1503 * if we are out of anon VM we kill the process (XXX: could wait?). 1504 */ 1505 1506 if (flt->cow_now && anon->an_ref > 1) { 1507 flt->promote = true; 1508 error = uvm_fault_upper_promote(ufi, flt, uobj, anon); 1509 } else { 1510 error = uvm_fault_upper_direct(ufi, flt, uobj, anon); 1511 } 1512 return error; 1513 } 1514 1515 /* 1516 * uvm_fault_upper_loan: handle loaned upper page. 1517 * 1518 * 1. if not cow'ing now, simply adjust flt->enter_prot. 1519 * 2. if cow'ing now, and if ref count is 1, break loan. 1520 */ 1521 1522 static int 1523 uvm_fault_upper_loan( 1524 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1525 struct vm_anon *anon, struct uvm_object **ruobj) 1526 { 1527 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 1528 int error = 0; 1529 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1530 1531 if (!flt->cow_now) { 1532 1533 /* 1534 * for read faults on loaned pages we just cap the 1535 * protection at read-only. 1536 */ 1537 1538 flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE; 1539 1540 } else { 1541 /* 1542 * note that we can't allow writes into a loaned page! 1543 * 1544 * if we have a write fault on a loaned page in an 1545 * anon then we need to look at the anon's ref count. 1546 * if it is greater than one then we are going to do 1547 * a normal copy-on-write fault into a new anon (this 1548 * is not a problem). however, if the reference count 1549 * is one (a case where we would normally allow a 1550 * write directly to the page) then we need to kill 1551 * the loan before we continue. 1552 */ 1553 1554 /* >1 case is already ok */ 1555 if (anon->an_ref == 1) { 1556 /* breaking loan requires a write lock. */ 1557 error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL); 1558 if (error != 0) { 1559 return error; 1560 } 1561 KASSERT(rw_write_held(amap->am_lock)); 1562 1563 error = uvm_loanbreak_anon(anon, *ruobj); 1564 if (error != 0) { 1565 uvmfault_unlockall(ufi, amap, *ruobj); 1566 uvm_wait("flt_noram2"); 1567 return ERESTART; 1568 } 1569 /* if we were a loan receiver uobj is gone */ 1570 if (*ruobj) 1571 *ruobj = NULL; 1572 } 1573 } 1574 return error; 1575 } 1576 1577 /* 1578 * uvm_fault_upper_promote: promote upper page. 1579 * 1580 * 1. call uvmfault_promote. 1581 * 2. enqueue page. 1582 * 3. deref. 1583 * 4. pass page to uvm_fault_upper_enter. 1584 */ 1585 1586 static int 1587 uvm_fault_upper_promote( 1588 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1589 struct uvm_object *uobj, struct vm_anon *anon) 1590 { 1591 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 1592 struct vm_anon * const oanon = anon; 1593 struct vm_page *pg; 1594 int error; 1595 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1596 1597 UVMHIST_LOG(maphist, " case 1B: COW fault",0,0,0,0); 1598 cpu_count(CPU_COUNT_FLT_ACOW, 1); 1599 1600 /* promoting requires a write lock. */ 1601 error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL); 1602 if (error != 0) { 1603 return error; 1604 } 1605 KASSERT(rw_write_held(amap->am_lock)); 1606 1607 error = uvmfault_promote(ufi, oanon, PGO_DONTCARE, &anon, 1608 &flt->anon_spare); 1609 switch (error) { 1610 case 0: 1611 break; 1612 case ERESTART: 1613 return ERESTART; 1614 default: 1615 return error; 1616 } 1617 pg = anon->an_page; 1618 1619 KASSERT(anon->an_lock == oanon->an_lock); 1620 KASSERT((pg->flags & (PG_BUSY | PG_FAKE)) == 0); 1621 1622 /* deref: can not drop to zero here by defn! */ 1623 KASSERT(oanon->an_ref > 1); 1624 oanon->an_ref--; 1625 1626 /* 1627 * note: oanon is still locked, as is the new anon. we 1628 * need to check for this later when we unlock oanon; if 1629 * oanon != anon, we'll have to unlock anon, too. 1630 */ 1631 1632 return uvm_fault_upper_enter(ufi, flt, uobj, anon, pg, oanon); 1633 } 1634 1635 /* 1636 * uvm_fault_upper_direct: handle direct fault. 1637 */ 1638 1639 static int 1640 uvm_fault_upper_direct( 1641 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1642 struct uvm_object *uobj, struct vm_anon *anon) 1643 { 1644 struct vm_anon * const oanon = anon; 1645 struct vm_page *pg; 1646 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1647 1648 cpu_count(CPU_COUNT_FLT_ANON, 1); 1649 pg = anon->an_page; 1650 if (anon->an_ref > 1) /* disallow writes to ref > 1 anons */ 1651 flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE; 1652 1653 return uvm_fault_upper_enter(ufi, flt, uobj, anon, pg, oanon); 1654 } 1655 1656 /* 1657 * uvm_fault_upper_enter: enter h/w mapping of upper page. 1658 */ 1659 1660 static int 1661 uvm_fault_upper_enter( 1662 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1663 struct uvm_object *uobj, struct vm_anon *anon, struct vm_page *pg, 1664 struct vm_anon *oanon) 1665 { 1666 struct pmap *pmap = ufi->orig_map->pmap; 1667 vaddr_t va = ufi->orig_rvaddr; 1668 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 1669 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1670 1671 /* locked: maps(read), amap, oanon, anon(if different from oanon) */ 1672 KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1673 KASSERT(anon->an_lock == amap->am_lock); 1674 KASSERT(oanon->an_lock == amap->am_lock); 1675 KASSERT(uobj == NULL || 1676 rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); 1677 KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN); 1678 1679 /* 1680 * now map the page in. 1681 */ 1682 1683 UVMHIST_LOG(maphist, 1684 " MAPPING: anon: pm=%#jx, va=%#jx, pg=%#jx, promote=%jd", 1685 (uintptr_t)pmap, va, (uintptr_t)pg, flt->promote); 1686 if (pmap_enter(pmap, va, VM_PAGE_TO_PHYS(pg), 1687 flt->enter_prot, flt->access_type | PMAP_CANFAIL | 1688 (flt->wire_mapping ? PMAP_WIRED : 0)) != 0) { 1689 1690 /* 1691 * If pmap_enter() fails, it must not leave behind an existing 1692 * pmap entry. In particular, a now-stale entry for a different 1693 * page would leave the pmap inconsistent with the vm_map. 1694 * This is not to imply that pmap_enter() should remove an 1695 * existing mapping in such a situation (since that could create 1696 * different problems, eg. if the existing mapping is wired), 1697 * but rather that the pmap should be designed such that it 1698 * never needs to fail when the new mapping is replacing an 1699 * existing mapping and the new page has no existing mappings. 1700 * 1701 * XXX This can't be asserted safely any more because many 1702 * LWPs and/or many processes could simultaneously fault on 1703 * the same VA and some might succeed. 1704 */ 1705 1706 /* KASSERT(!pmap_extract(pmap, va, NULL)); */ 1707 1708 /* 1709 * ensure that the page is queued in the case that 1710 * we just promoted. 1711 */ 1712 1713 uvm_pagelock(pg); 1714 uvm_pageenqueue(pg); 1715 uvm_pageunlock(pg); 1716 1717 /* 1718 * No need to undo what we did; we can simply think of 1719 * this as the pmap throwing away the mapping information. 1720 * 1721 * We do, however, have to go through the ReFault path, 1722 * as the map may change while we're asleep. 1723 */ 1724 1725 uvmfault_unlockall(ufi, amap, uobj); 1726 if (!uvm_reclaimable()) { 1727 UVMHIST_LOG(maphist, 1728 "<- failed. out of VM",0,0,0,0); 1729 /* XXX instrumentation */ 1730 return ENOMEM; 1731 } 1732 /* XXX instrumentation */ 1733 uvm_wait("flt_pmfail1"); 1734 return ERESTART; 1735 } 1736 1737 uvm_fault_upper_done(ufi, flt, anon, pg); 1738 1739 /* 1740 * done case 1! finish up by unlocking everything and returning success 1741 */ 1742 1743 pmap_update(pmap); 1744 uvmfault_unlockall(ufi, amap, uobj); 1745 return 0; 1746 } 1747 1748 /* 1749 * uvm_fault_upper_done: queue upper center page. 1750 */ 1751 1752 static void 1753 uvm_fault_upper_done( 1754 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1755 struct vm_anon *anon, struct vm_page *pg) 1756 { 1757 const bool wire_paging = flt->wire_paging; 1758 1759 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1760 1761 /* 1762 * ... update the page queues. 1763 */ 1764 1765 if (wire_paging) { 1766 uvm_pagelock(pg); 1767 uvm_pagewire(pg); 1768 uvm_pageunlock(pg); 1769 1770 /* 1771 * since the now-wired page cannot be paged out, 1772 * release its swap resources for others to use. 1773 * and since an anon with no swap cannot be clean, 1774 * mark it dirty now. 1775 */ 1776 1777 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 1778 uvm_anon_dropswap(anon); 1779 } else if (uvmpdpol_pageactivate_p(pg)) { 1780 /* 1781 * avoid re-activating the page unless needed, 1782 * to avoid false sharing on multiprocessor. 1783 */ 1784 1785 uvm_pagelock(pg); 1786 uvm_pageactivate(pg); 1787 uvm_pageunlock(pg); 1788 } 1789 } 1790 1791 /* 1792 * uvm_fault_lower_upgrade: upgrade lower lock, reader -> writer 1793 */ 1794 1795 static inline int 1796 uvm_fault_lower_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1797 struct vm_amap *amap, struct uvm_object *uobj, struct vm_page *uobjpage) 1798 { 1799 1800 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1801 1802 KASSERT(uobj != NULL); 1803 KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock)); 1804 1805 /* 1806 * fast path. 1807 */ 1808 1809 if (__predict_true(flt->lower_lock_type == RW_WRITER)) { 1810 return 0; 1811 } 1812 1813 /* 1814 * otherwise try for the upgrade. if we don't get it, unlock 1815 * everything, restart the fault and next time around get a writer 1816 * lock. 1817 */ 1818 1819 flt->lower_lock_type = RW_WRITER; 1820 if (__predict_false(!rw_tryupgrade(uobj->vmobjlock))) { 1821 uvmfault_unlockall(ufi, amap, uobj); 1822 cpu_count(CPU_COUNT_FLTNOUP, 1); 1823 UVMHIST_LOG(maphist, " !upgrade lower", 0, 0,0,0); 1824 return ERESTART; 1825 } 1826 cpu_count(CPU_COUNT_FLTUP, 1); 1827 KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock)); 1828 return 0; 1829 } 1830 1831 /* 1832 * uvm_fault_lower: handle lower fault. 1833 * 1834 * 1. check uobj 1835 * 1.1. if null, ZFOD. 1836 * 1.2. if not null, look up unnmapped neighbor pages. 1837 * 2. for center page, check if promote. 1838 * 2.1. ZFOD always needs promotion. 1839 * 2.2. other uobjs, when entry is marked COW (usually MAP_PRIVATE vnode). 1840 * 3. if uobj is not ZFOD and page is not found, do i/o. 1841 * 4. dispatch either direct / promote fault. 1842 */ 1843 1844 static int 1845 uvm_fault_lower( 1846 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1847 struct vm_page **pages) 1848 { 1849 struct vm_amap *amap __diagused = ufi->entry->aref.ar_amap; 1850 struct uvm_object *uobj = ufi->entry->object.uvm_obj; 1851 struct vm_page *uobjpage; 1852 int error; 1853 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1854 1855 /* 1856 * now, if the desired page is not shadowed by the amap and we have 1857 * a backing object that does not have a special fault routine, then 1858 * we ask (with pgo_get) the object for resident pages that we care 1859 * about and attempt to map them in. we do not let pgo_get block 1860 * (PGO_LOCKED). 1861 */ 1862 1863 if (uobj == NULL) { 1864 /* zero fill; don't care neighbor pages */ 1865 uobjpage = NULL; 1866 } else { 1867 uvm_fault_lower_lookup(ufi, flt, pages); 1868 uobjpage = pages[flt->centeridx]; 1869 } 1870 1871 /* 1872 * note that at this point we are done with any front or back pages. 1873 * we are now going to focus on the center page (i.e. the one we've 1874 * faulted on). if we have faulted on the upper (anon) layer 1875 * [i.e. case 1], then the anon we want is anons[centeridx] (we have 1876 * not touched it yet). if we have faulted on the bottom (uobj) 1877 * layer [i.e. case 2] and the page was both present and available, 1878 * then we've got a pointer to it as "uobjpage" and we've already 1879 * made it BUSY. 1880 */ 1881 1882 /* 1883 * locked: 1884 * maps(read), amap(if there), uobj(if !null), uobjpage(if !null) 1885 */ 1886 KASSERT(amap == NULL || 1887 rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1888 KASSERT(uobj == NULL || 1889 rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); 1890 1891 /* 1892 * note that uobjpage can not be PGO_DONTCARE at this point. we now 1893 * set uobjpage to PGO_DONTCARE if we are doing a zero fill. if we 1894 * have a backing object, check and see if we are going to promote 1895 * the data up to an anon during the fault. 1896 */ 1897 1898 if (uobj == NULL) { 1899 uobjpage = PGO_DONTCARE; 1900 flt->promote = true; /* always need anon here */ 1901 } else { 1902 KASSERT(uobjpage != PGO_DONTCARE); 1903 flt->promote = flt->cow_now && UVM_ET_ISCOPYONWRITE(ufi->entry); 1904 } 1905 UVMHIST_LOG(maphist, " case 2 fault: promote=%jd, zfill=%jd", 1906 flt->promote, (uobj == NULL), 0,0); 1907 1908 /* 1909 * if uobjpage is not null then we do not need to do I/O to get the 1910 * uobjpage. 1911 * 1912 * if uobjpage is null, then we need to unlock and ask the pager to 1913 * get the data for us. once we have the data, we need to reverify 1914 * the state the world. we are currently not holding any resources. 1915 */ 1916 1917 if (uobjpage) { 1918 /* update rusage counters */ 1919 curlwp->l_ru.ru_minflt++; 1920 } else { 1921 error = uvm_fault_lower_io(ufi, flt, &uobj, &uobjpage); 1922 if (error != 0) 1923 return error; 1924 } 1925 1926 /* 1927 * locked: 1928 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj) 1929 */ 1930 KASSERT(amap == NULL || 1931 rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1932 KASSERT(uobj == NULL || 1933 rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); 1934 1935 /* 1936 * notes: 1937 * - at this point uobjpage can not be NULL 1938 * - at this point uobjpage can not be PG_RELEASED (since we checked 1939 * for it above) 1940 * - at this point uobjpage could be waited on (handle later) 1941 * - uobjpage can be from a different object if tmpfs (vnode vs UAO) 1942 */ 1943 1944 KASSERT(uobjpage != NULL); 1945 KASSERT(uobj == NULL || 1946 uobjpage->uobject->vmobjlock == uobj->vmobjlock); 1947 KASSERT(uobj == NULL || !UVM_OBJ_IS_CLEAN(uobjpage->uobject) || 1948 uvm_pagegetdirty(uobjpage) == UVM_PAGE_STATUS_CLEAN); 1949 1950 if (!flt->promote) { 1951 error = uvm_fault_lower_direct(ufi, flt, uobj, uobjpage); 1952 } else { 1953 error = uvm_fault_lower_promote(ufi, flt, uobj, uobjpage); 1954 } 1955 return error; 1956 } 1957 1958 /* 1959 * uvm_fault_lower_lookup: look up on-memory uobj pages. 1960 * 1961 * 1. get on-memory pages. 1962 * 2. if failed, give up (get only center page later). 1963 * 3. if succeeded, enter h/w mapping of neighbor pages. 1964 */ 1965 1966 static void 1967 uvm_fault_lower_lookup( 1968 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1969 struct vm_page **pages) 1970 { 1971 struct uvm_object *uobj = ufi->entry->object.uvm_obj; 1972 int lcv, gotpages; 1973 vaddr_t currva; 1974 bool entered; 1975 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1976 1977 rw_enter(uobj->vmobjlock, flt->lower_lock_type); 1978 1979 /* 1980 * Locked: maps(read), amap(if there), uobj 1981 */ 1982 1983 cpu_count(CPU_COUNT_FLTLGET, 1); 1984 gotpages = flt->npages; 1985 (void) uobj->pgops->pgo_get(uobj, 1986 ufi->entry->offset + flt->startva - ufi->entry->start, 1987 pages, &gotpages, flt->centeridx, 1988 flt->access_type & MASK(ufi->entry), ufi->entry->advice, 1989 PGO_LOCKED); 1990 1991 KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); 1992 1993 /* 1994 * check for pages to map, if we got any 1995 */ 1996 1997 if (gotpages == 0) { 1998 pages[flt->centeridx] = NULL; 1999 return; 2000 } 2001 2002 entered = false; 2003 currva = flt->startva; 2004 for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) { 2005 struct vm_page *curpg; 2006 2007 curpg = pages[lcv]; 2008 if (curpg == NULL || curpg == PGO_DONTCARE) { 2009 continue; 2010 } 2011 2012 /* 2013 * in the case of tmpfs, the pages might be from a different 2014 * uvm_object. just make sure that they have the same lock. 2015 */ 2016 2017 KASSERT(curpg->uobject->vmobjlock == uobj->vmobjlock); 2018 KASSERT((curpg->flags & PG_BUSY) == 0); 2019 2020 /* 2021 * leave the centre page for later. don't screw with 2022 * existing mappings (needless & expensive). 2023 */ 2024 2025 if (lcv == flt->centeridx) { 2026 UVMHIST_LOG(maphist, " got uobjpage (%#jx) " 2027 "with locked get", (uintptr_t)curpg, 0, 0, 0); 2028 } else if (!pmap_extract(ufi->orig_map->pmap, currva, NULL)) { 2029 uvm_fault_lower_neighbor(ufi, flt, currva, curpg); 2030 entered = true; 2031 } 2032 } 2033 if (entered) { 2034 pmap_update(ufi->orig_map->pmap); 2035 } 2036 } 2037 2038 /* 2039 * uvm_fault_lower_neighbor: enter h/w mapping of lower neighbor page. 2040 */ 2041 2042 static void 2043 uvm_fault_lower_neighbor( 2044 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 2045 vaddr_t currva, struct vm_page *pg) 2046 { 2047 const bool readonly = uvm_pagereadonly_p(pg) || pg->loan_count > 0; 2048 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 2049 2050 /* locked: maps(read), amap(if there), uobj */ 2051 2052 /* 2053 * calling pgo_get with PGO_LOCKED returns us pages which 2054 * are neither busy nor released, so we don't need to check 2055 * for this. we can just directly enter the pages. 2056 * 2057 * there wasn't a direct fault on the page, so avoid the cost of 2058 * activating it. 2059 */ 2060 2061 if (!uvmpdpol_pageisqueued_p(pg) && pg->wire_count == 0) { 2062 uvm_pagelock(pg); 2063 uvm_pageenqueue(pg); 2064 uvm_pageunlock(pg); 2065 } 2066 2067 UVMHIST_LOG(maphist, 2068 " MAPPING: n obj: pm=%#jx, va=%#jx, pg=%#jx", 2069 (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0); 2070 cpu_count(CPU_COUNT_FLTNOMAP, 1); 2071 2072 /* 2073 * Since this page isn't the page that's actually faulting, 2074 * ignore pmap_enter() failures; it's not critical that we 2075 * enter these right now. 2076 * NOTE: page can't be waited on or PG_RELEASED because we've 2077 * held the lock the whole time we've had the handle. 2078 */ 2079 KASSERT((pg->flags & PG_PAGEOUT) == 0); 2080 KASSERT((pg->flags & PG_RELEASED) == 0); 2081 KASSERT(!UVM_OBJ_IS_CLEAN(pg->uobject) || 2082 uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN); 2083 KASSERT((pg->flags & PG_BUSY) == 0); 2084 KASSERT(rw_lock_op(pg->uobject->vmobjlock) == flt->lower_lock_type); 2085 2086 const vm_prot_t mapprot = 2087 readonly ? (flt->enter_prot & ~VM_PROT_WRITE) : 2088 flt->enter_prot & MASK(ufi->entry); 2089 const u_int mapflags = 2090 PMAP_CANFAIL | (flt->wire_mapping ? (mapprot | PMAP_WIRED) : 0); 2091 (void) pmap_enter(ufi->orig_map->pmap, currva, 2092 VM_PAGE_TO_PHYS(pg), mapprot, mapflags); 2093 } 2094 2095 /* 2096 * uvm_fault_lower_io: get lower page from backing store. 2097 * 2098 * 1. unlock everything, because i/o will block. 2099 * 2. call pgo_get. 2100 * 3. if failed, recover. 2101 * 4. if succeeded, relock everything and verify things. 2102 */ 2103 2104 static int 2105 uvm_fault_lower_io( 2106 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 2107 struct uvm_object **ruobj, struct vm_page **ruobjpage) 2108 { 2109 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 2110 struct uvm_object *uobj = *ruobj; 2111 struct vm_page *pg; 2112 bool locked; 2113 int gotpages; 2114 int error; 2115 voff_t uoff; 2116 vm_prot_t access_type; 2117 int advice; 2118 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 2119 2120 /* update rusage counters */ 2121 curlwp->l_ru.ru_majflt++; 2122 2123 /* grab everything we need from the entry before we unlock */ 2124 uoff = (ufi->orig_rvaddr - ufi->entry->start) + ufi->entry->offset; 2125 access_type = flt->access_type & MASK(ufi->entry); 2126 advice = ufi->entry->advice; 2127 2128 /* Locked: maps(read), amap(if there), uobj */ 2129 KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); 2130 2131 /* Upgrade to a write lock if needed. */ 2132 error = uvm_fault_lower_upgrade(ufi, flt, amap, uobj, NULL); 2133 if (error != 0) { 2134 return error; 2135 } 2136 uvmfault_unlockall(ufi, amap, NULL); 2137 2138 /* Locked: uobj(write) */ 2139 KASSERT(rw_write_held(uobj->vmobjlock)); 2140 2141 cpu_count(CPU_COUNT_FLTGET, 1); 2142 gotpages = 1; 2143 pg = NULL; 2144 error = uobj->pgops->pgo_get(uobj, uoff, &pg, &gotpages, 2145 0, access_type, advice, PGO_SYNCIO); 2146 /* locked: pg(if no error) */ 2147 2148 /* 2149 * recover from I/O 2150 */ 2151 2152 if (error) { 2153 if (error == EAGAIN) { 2154 UVMHIST_LOG(maphist, 2155 " pgo_get says TRY AGAIN!",0,0,0,0); 2156 kpause("fltagain2", false, hz/2, NULL); 2157 return ERESTART; 2158 } 2159 2160 #if 0 2161 KASSERT(error != ERESTART); 2162 #else 2163 /* XXXUEBS don't re-fault? */ 2164 if (error == ERESTART) 2165 error = EIO; 2166 #endif 2167 2168 UVMHIST_LOG(maphist, "<- pgo_get failed (code %jd)", 2169 error, 0,0,0); 2170 return error; 2171 } 2172 2173 /* 2174 * re-verify the state of the world by first trying to relock 2175 * the maps. always relock the object. 2176 */ 2177 2178 locked = uvmfault_relock(ufi); 2179 if (locked && amap) 2180 amap_lock(amap, flt->upper_lock_type); 2181 2182 /* might be changed */ 2183 uobj = pg->uobject; 2184 2185 rw_enter(uobj->vmobjlock, flt->lower_lock_type); 2186 KASSERT((pg->flags & PG_BUSY) != 0); 2187 KASSERT(flt->lower_lock_type == RW_WRITER); 2188 2189 uvm_pagelock(pg); 2190 uvm_pageactivate(pg); 2191 uvm_pageunlock(pg); 2192 2193 /* locked(locked): maps(read), amap(if !null), uobj, pg */ 2194 /* locked(!locked): uobj, pg */ 2195 2196 /* 2197 * verify that the page has not be released and re-verify 2198 * that amap slot is still free. if there is a problem, 2199 * we unlock and clean up. 2200 */ 2201 2202 if ((pg->flags & PG_RELEASED) != 0 || 2203 (locked && amap && amap_lookup(&ufi->entry->aref, 2204 ufi->orig_rvaddr - ufi->entry->start))) { 2205 if (locked) 2206 uvmfault_unlockall(ufi, amap, NULL); 2207 locked = false; 2208 } 2209 2210 /* 2211 * unbusy/release the page. 2212 */ 2213 2214 if ((pg->flags & PG_RELEASED) == 0) { 2215 pg->flags &= ~PG_BUSY; 2216 uvm_pagelock(pg); 2217 uvm_pagewakeup(pg); 2218 uvm_pageunlock(pg); 2219 UVM_PAGE_OWN(pg, NULL); 2220 } else { 2221 cpu_count(CPU_COUNT_FLTPGRELE, 1); 2222 uvm_pagefree(pg); 2223 } 2224 2225 /* 2226 * didn't get the lock? retry. 2227 */ 2228 2229 if (locked == false) { 2230 UVMHIST_LOG(maphist, 2231 " wasn't able to relock after fault: retry", 2232 0,0,0,0); 2233 rw_exit(uobj->vmobjlock); 2234 return ERESTART; 2235 } 2236 2237 /* 2238 * we have the data in pg. we are holding object lock (so the page 2239 * can't be released on us). 2240 */ 2241 2242 /* locked: maps(read), amap(if !null), uobj */ 2243 2244 *ruobj = uobj; 2245 *ruobjpage = pg; 2246 return 0; 2247 } 2248 2249 /* 2250 * uvm_fault_lower_direct: fault lower center page 2251 * 2252 * 1. adjust flt->enter_prot. 2253 * 2. if page is loaned, resolve. 2254 */ 2255 2256 int 2257 uvm_fault_lower_direct( 2258 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 2259 struct uvm_object *uobj, struct vm_page *uobjpage) 2260 { 2261 struct vm_page *pg; 2262 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 2263 2264 /* 2265 * we are not promoting. if the mapping is COW ensure that we 2266 * don't give more access than we should (e.g. when doing a read 2267 * fault on a COPYONWRITE mapping we want to map the COW page in 2268 * R/O even though the entry protection could be R/W). 2269 * 2270 * set "pg" to the page we want to map in (uobjpage, usually) 2271 */ 2272 2273 cpu_count(CPU_COUNT_FLT_OBJ, 1); 2274 if (UVM_ET_ISCOPYONWRITE(ufi->entry) || 2275 UVM_OBJ_NEEDS_WRITEFAULT(uobjpage->uobject)) 2276 flt->enter_prot &= ~VM_PROT_WRITE; 2277 pg = uobjpage; /* map in the actual object */ 2278 2279 KASSERT(uobjpage != PGO_DONTCARE); 2280 2281 /* 2282 * we are faulting directly on the page. be careful 2283 * about writing to loaned pages... 2284 */ 2285 2286 if (uobjpage->loan_count) { 2287 uvm_fault_lower_direct_loan(ufi, flt, uobj, &pg, &uobjpage); 2288 } 2289 KASSERT(pg == uobjpage); 2290 KASSERT((pg->flags & PG_BUSY) == 0); 2291 return uvm_fault_lower_enter(ufi, flt, uobj, NULL, pg); 2292 } 2293 2294 /* 2295 * uvm_fault_lower_direct_loan: resolve loaned page. 2296 * 2297 * 1. if not cow'ing, adjust flt->enter_prot. 2298 * 2. if cow'ing, break loan. 2299 */ 2300 2301 static int 2302 uvm_fault_lower_direct_loan( 2303 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 2304 struct uvm_object *uobj, struct vm_page **rpg, 2305 struct vm_page **ruobjpage) 2306 { 2307 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 2308 struct vm_page *pg; 2309 struct vm_page *uobjpage = *ruobjpage; 2310 int error; 2311 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 2312 2313 if (!flt->cow_now) { 2314 /* read fault: cap the protection at readonly */ 2315 /* cap! */ 2316 flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE; 2317 } else { 2318 /* 2319 * write fault: must break the loan here. to do this 2320 * we need a write lock on the object. 2321 */ 2322 2323 error = uvm_fault_lower_upgrade(ufi, flt, amap, uobj, uobjpage); 2324 if (error != 0) { 2325 return error; 2326 } 2327 KASSERT(rw_write_held(uobj->vmobjlock)); 2328 2329 pg = uvm_loanbreak(uobjpage); 2330 if (pg == NULL) { 2331 2332 uvmfault_unlockall(ufi, amap, uobj); 2333 UVMHIST_LOG(maphist, 2334 " out of RAM breaking loan, waiting", 2335 0,0,0,0); 2336 cpu_count(CPU_COUNT_FLTNORAM, 1); 2337 uvm_wait("flt_noram4"); 2338 return ERESTART; 2339 } 2340 *rpg = pg; 2341 *ruobjpage = pg; 2342 2343 /* 2344 * drop ownership of page while still holding object lock, 2345 * which won't be dropped until the page is entered. 2346 */ 2347 2348 uvm_pagelock(pg); 2349 uvm_pagewakeup(pg); 2350 uvm_pageunlock(pg); 2351 pg->flags &= ~PG_BUSY; 2352 UVM_PAGE_OWN(pg, NULL); 2353 } 2354 return 0; 2355 } 2356 2357 /* 2358 * uvm_fault_lower_promote: promote lower page. 2359 * 2360 * 1. call uvmfault_promote. 2361 * 2. fill in data. 2362 * 3. if not ZFOD, dispose old page. 2363 */ 2364 2365 int 2366 uvm_fault_lower_promote( 2367 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 2368 struct uvm_object *uobj, struct vm_page *uobjpage) 2369 { 2370 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 2371 struct vm_anon *anon; 2372 struct vm_page *pg; 2373 int error; 2374 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 2375 2376 KASSERT(amap != NULL); 2377 2378 /* promoting requires a write lock. */ 2379 error = uvm_fault_upper_upgrade(ufi, flt, amap, uobj); 2380 if (error != 0) { 2381 return error; 2382 } 2383 KASSERT(rw_write_held(amap->am_lock)); 2384 KASSERT(uobj == NULL || 2385 rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); 2386 2387 /* 2388 * If we are going to promote the data to an anon we 2389 * allocate a blank anon here and plug it into our amap. 2390 */ 2391 error = uvmfault_promote(ufi, NULL, uobjpage, &anon, &flt->anon_spare); 2392 switch (error) { 2393 case 0: 2394 break; 2395 case ERESTART: 2396 return ERESTART; 2397 default: 2398 return error; 2399 } 2400 2401 pg = anon->an_page; 2402 2403 /* 2404 * Fill in the data. 2405 */ 2406 2407 if (uobjpage != PGO_DONTCARE) { 2408 cpu_count(CPU_COUNT_FLT_PRCOPY, 1); 2409 2410 /* 2411 * promote to shared amap? make sure all sharing 2412 * procs see it 2413 */ 2414 2415 if ((amap_flags(amap) & AMAP_SHARED) != 0) { 2416 pmap_page_protect(uobjpage, VM_PROT_NONE); 2417 /* 2418 * XXX: PAGE MIGHT BE WIRED! 2419 */ 2420 } 2421 2422 UVMHIST_LOG(maphist, 2423 " promote uobjpage %#jx to anon/page %#jx/%#jx", 2424 (uintptr_t)uobjpage, (uintptr_t)anon, (uintptr_t)pg, 0); 2425 2426 } else { 2427 cpu_count(CPU_COUNT_FLT_PRZERO, 1); 2428 2429 /* 2430 * Page is zero'd and marked dirty by 2431 * uvmfault_promote(). 2432 */ 2433 2434 UVMHIST_LOG(maphist," zero fill anon/page %#jx/%#jx", 2435 (uintptr_t)anon, (uintptr_t)pg, 0, 0); 2436 } 2437 2438 return uvm_fault_lower_enter(ufi, flt, uobj, anon, pg); 2439 } 2440 2441 /* 2442 * uvm_fault_lower_enter: enter h/w mapping of lower page or anon page promoted 2443 * from the lower page. 2444 */ 2445 2446 int 2447 uvm_fault_lower_enter( 2448 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 2449 struct uvm_object *uobj, 2450 struct vm_anon *anon, struct vm_page *pg) 2451 { 2452 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 2453 const bool readonly = uvm_pagereadonly_p(pg); 2454 int error; 2455 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 2456 2457 /* 2458 * Locked: 2459 * 2460 * maps(read), amap(if !null), uobj(if !null), 2461 * anon(if !null), pg(if anon), unlock_uobj(if !null) 2462 * 2463 * anon must be write locked (promotion). uobj can be either. 2464 * 2465 * Note: pg is either the uobjpage or the new page in the new anon. 2466 */ 2467 2468 KASSERT(amap == NULL || 2469 rw_lock_op(amap->am_lock) == flt->upper_lock_type); 2470 KASSERT(uobj == NULL || 2471 rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); 2472 KASSERT(anon == NULL || anon->an_lock == amap->am_lock); 2473 2474 /* 2475 * note that pg can't be PG_RELEASED or PG_BUSY since we did 2476 * not drop the object lock since the last time we checked. 2477 */ 2478 2479 KASSERT((pg->flags & PG_RELEASED) == 0); 2480 KASSERT((pg->flags & PG_BUSY) == 0); 2481 2482 /* 2483 * all resources are present. we can now map it in and free our 2484 * resources. 2485 */ 2486 2487 UVMHIST_LOG(maphist, 2488 " MAPPING: case2: pm=%#jx, va=%#jx, pg=%#jx, promote=%jd", 2489 (uintptr_t)ufi->orig_map->pmap, ufi->orig_rvaddr, 2490 (uintptr_t)pg, flt->promote); 2491 KASSERTMSG((flt->access_type & VM_PROT_WRITE) == 0 || !readonly, 2492 "promote=%u cow_now=%u access_type=%x enter_prot=%x cow=%u " 2493 "entry=%p map=%p orig_rvaddr=%p pg=%p", 2494 flt->promote, flt->cow_now, flt->access_type, flt->enter_prot, 2495 UVM_ET_ISCOPYONWRITE(ufi->entry), ufi->entry, ufi->orig_map, 2496 (void *)ufi->orig_rvaddr, pg); 2497 KASSERT((flt->access_type & VM_PROT_WRITE) == 0 || !readonly); 2498 if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr, 2499 VM_PAGE_TO_PHYS(pg), 2500 readonly ? flt->enter_prot & ~VM_PROT_WRITE : flt->enter_prot, 2501 flt->access_type | PMAP_CANFAIL | 2502 (flt->wire_mapping ? PMAP_WIRED : 0)) != 0) { 2503 2504 /* 2505 * No need to undo what we did; we can simply think of 2506 * this as the pmap throwing away the mapping information. 2507 * 2508 * We do, however, have to go through the ReFault path, 2509 * as the map may change while we're asleep. 2510 */ 2511 2512 /* 2513 * ensure that the page is queued in the case that 2514 * we just promoted the page. 2515 */ 2516 2517 if (anon != NULL) { 2518 uvm_pagelock(pg); 2519 uvm_pageenqueue(pg); 2520 uvm_pagewakeup(pg); 2521 uvm_pageunlock(pg); 2522 } 2523 2524 uvmfault_unlockall(ufi, amap, uobj); 2525 if (!uvm_reclaimable()) { 2526 UVMHIST_LOG(maphist, 2527 "<- failed. out of VM",0,0,0,0); 2528 /* XXX instrumentation */ 2529 error = ENOMEM; 2530 return error; 2531 } 2532 /* XXX instrumentation */ 2533 uvm_wait("flt_pmfail2"); 2534 return ERESTART; 2535 } 2536 2537 uvm_fault_lower_done(ufi, flt, uobj, pg); 2538 pmap_update(ufi->orig_map->pmap); 2539 uvmfault_unlockall(ufi, amap, uobj); 2540 2541 UVMHIST_LOG(maphist, "<- done (SUCCESS!)",0,0,0,0); 2542 return 0; 2543 } 2544 2545 /* 2546 * uvm_fault_lower_done: queue lower center page. 2547 */ 2548 2549 void 2550 uvm_fault_lower_done( 2551 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 2552 struct uvm_object *uobj, struct vm_page *pg) 2553 { 2554 2555 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 2556 2557 if (flt->wire_paging) { 2558 uvm_pagelock(pg); 2559 uvm_pagewire(pg); 2560 uvm_pageunlock(pg); 2561 if (pg->flags & PG_AOBJ) { 2562 2563 /* 2564 * since the now-wired page cannot be paged out, 2565 * release its swap resources for others to use. 2566 * since an aobj page with no swap cannot be clean, 2567 * mark it dirty now. 2568 * 2569 * use pg->uobject here. if the page is from a 2570 * tmpfs vnode, the pages are backed by its UAO and 2571 * not the vnode. 2572 */ 2573 2574 KASSERT(uobj != NULL); 2575 KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock); 2576 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 2577 uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT); 2578 } 2579 } else if (uvmpdpol_pageactivate_p(pg)) { 2580 /* 2581 * avoid re-activating the page unless needed, 2582 * to avoid false sharing on multiprocessor. 2583 */ 2584 2585 uvm_pagelock(pg); 2586 uvm_pageactivate(pg); 2587 uvm_pageunlock(pg); 2588 } 2589 } 2590 2591 2592 /* 2593 * uvm_fault_wire: wire down a range of virtual addresses in a map. 2594 * 2595 * => map may be read-locked by caller, but MUST NOT be write-locked. 2596 * => if map is read-locked, any operations which may cause map to 2597 * be write-locked in uvm_fault() must be taken care of by 2598 * the caller. See uvm_map_pageable(). 2599 */ 2600 2601 int 2602 uvm_fault_wire(struct vm_map *map, vaddr_t start, vaddr_t end, 2603 vm_prot_t access_type, int maxprot) 2604 { 2605 vaddr_t va; 2606 int error; 2607 2608 /* 2609 * now fault it in a page at a time. if the fault fails then we have 2610 * to undo what we have done. note that in uvm_fault VM_PROT_NONE 2611 * is replaced with the max protection if fault_type is VM_FAULT_WIRE. 2612 */ 2613 2614 /* 2615 * XXX work around overflowing a vaddr_t. this prevents us from 2616 * wiring the last page in the address space, though. 2617 */ 2618 if (start > end) { 2619 return EFAULT; 2620 } 2621 2622 for (va = start; va < end; va += PAGE_SIZE) { 2623 error = uvm_fault_internal(map, va, access_type, 2624 (maxprot ? UVM_FAULT_MAXPROT : 0) | UVM_FAULT_WIRE); 2625 if (error) { 2626 if (va != start) { 2627 uvm_fault_unwire(map, start, va); 2628 } 2629 return error; 2630 } 2631 } 2632 return 0; 2633 } 2634 2635 /* 2636 * uvm_fault_unwire(): unwire range of virtual space. 2637 */ 2638 2639 void 2640 uvm_fault_unwire(struct vm_map *map, vaddr_t start, vaddr_t end) 2641 { 2642 vm_map_lock_read(map); 2643 uvm_fault_unwire_locked(map, start, end); 2644 vm_map_unlock_read(map); 2645 } 2646 2647 /* 2648 * uvm_fault_unwire_locked(): the guts of uvm_fault_unwire(). 2649 * 2650 * => map must be at least read-locked. 2651 */ 2652 2653 void 2654 uvm_fault_unwire_locked(struct vm_map *map, vaddr_t start, vaddr_t end) 2655 { 2656 struct vm_map_entry *entry, *oentry; 2657 pmap_t pmap = vm_map_pmap(map); 2658 vaddr_t va; 2659 paddr_t pa; 2660 struct vm_page *pg; 2661 2662 /* 2663 * we assume that the area we are unwiring has actually been wired 2664 * in the first place. this means that we should be able to extract 2665 * the PAs from the pmap. we also lock out the page daemon so that 2666 * we can call uvm_pageunwire. 2667 */ 2668 2669 /* 2670 * find the beginning map entry for the region. 2671 */ 2672 2673 KASSERT(start >= vm_map_min(map)); 2674 KASSERT(end <= vm_map_max(map)); 2675 if (uvm_map_lookup_entry(map, start, &entry) == false) 2676 panic("uvm_fault_unwire_locked: address not in map"); 2677 2678 oentry = NULL; 2679 for (va = start; va < end; va += PAGE_SIZE) { 2680 2681 /* 2682 * find the map entry for the current address. 2683 */ 2684 2685 KASSERT(va >= entry->start); 2686 while (va >= entry->end) { 2687 KASSERT(entry->next != &map->header); 2688 KASSERT(entry->next->start <= entry->end); 2689 entry = entry->next; 2690 } 2691 2692 /* 2693 * lock it. 2694 */ 2695 2696 if (entry != oentry) { 2697 if (oentry != NULL) { 2698 uvm_map_unlock_entry(oentry); 2699 } 2700 uvm_map_lock_entry(entry, RW_WRITER); 2701 oentry = entry; 2702 } 2703 2704 /* 2705 * if the entry is no longer wired, tell the pmap. 2706 */ 2707 2708 if (!pmap_extract(pmap, va, &pa)) 2709 continue; 2710 2711 if (VM_MAPENT_ISWIRED(entry) == 0) 2712 pmap_unwire(pmap, va); 2713 2714 pg = PHYS_TO_VM_PAGE(pa); 2715 if (pg) { 2716 uvm_pagelock(pg); 2717 uvm_pageunwire(pg); 2718 uvm_pageunlock(pg); 2719 } 2720 } 2721 2722 if (oentry != NULL) { 2723 uvm_map_unlock_entry(entry); 2724 } 2725 } 2726