1 /* $NetBSD: uvm_fault.c,v 1.224 2020/03/23 10:35:56 skrll Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Charles D. Cranor and Washington University. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp 28 */ 29 30 /* 31 * uvm_fault.c: fault handler 32 */ 33 34 #include <sys/cdefs.h> 35 __KERNEL_RCSID(0, "$NetBSD: uvm_fault.c,v 1.224 2020/03/23 10:35:56 skrll Exp $"); 36 37 #include "opt_uvmhist.h" 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/atomic.h> 42 #include <sys/kernel.h> 43 #include <sys/mman.h> 44 45 #include <uvm/uvm.h> 46 47 /* 48 * 49 * a word on page faults: 50 * 51 * types of page faults we handle: 52 * 53 * CASE 1: upper layer faults CASE 2: lower layer faults 54 * 55 * CASE 1A CASE 1B CASE 2A CASE 2B 56 * read/write1 write>1 read/write +-cow_write/zero 57 * | | | | 58 * +--|--+ +--|--+ +-----+ + | + | +-----+ 59 * amap | V | | ---------> new | | | | ^ | 60 * +-----+ +-----+ +-----+ + | + | +--|--+ 61 * | | | 62 * +-----+ +-----+ +--|--+ | +--|--+ 63 * uobj | d/c | | d/c | | V | +----+ | 64 * +-----+ +-----+ +-----+ +-----+ 65 * 66 * d/c = don't care 67 * 68 * case [0]: layerless fault 69 * no amap or uobj is present. this is an error. 70 * 71 * case [1]: upper layer fault [anon active] 72 * 1A: [read] or [write with anon->an_ref == 1] 73 * I/O takes place in upper level anon and uobj is not touched. 74 * 1B: [write with anon->an_ref > 1] 75 * new anon is alloc'd and data is copied off ["COW"] 76 * 77 * case [2]: lower layer fault [uobj] 78 * 2A: [read on non-NULL uobj] or [write to non-copy_on_write area] 79 * I/O takes place directly in object. 80 * 2B: [write to copy_on_write] or [read on NULL uobj] 81 * data is "promoted" from uobj to a new anon. 82 * if uobj is null, then we zero fill. 83 * 84 * we follow the standard UVM locking protocol ordering: 85 * 86 * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ) 87 * we hold a PG_BUSY page if we unlock for I/O 88 * 89 * 90 * the code is structured as follows: 91 * 92 * - init the "IN" params in the ufi structure 93 * ReFault: (ERESTART returned to the loop in uvm_fault_internal) 94 * - do lookups [locks maps], check protection, handle needs_copy 95 * - check for case 0 fault (error) 96 * - establish "range" of fault 97 * - if we have an amap lock it and extract the anons 98 * - if sequential advice deactivate pages behind us 99 * - at the same time check pmap for unmapped areas and anon for pages 100 * that we could map in (and do map it if found) 101 * - check object for resident pages that we could map in 102 * - if (case 2) goto Case2 103 * - >>> handle case 1 104 * - ensure source anon is resident in RAM 105 * - if case 1B alloc new anon and copy from source 106 * - map the correct page in 107 * Case2: 108 * - >>> handle case 2 109 * - ensure source page is resident (if uobj) 110 * - if case 2B alloc new anon and copy from source (could be zero 111 * fill if uobj == NULL) 112 * - map the correct page in 113 * - done! 114 * 115 * note on paging: 116 * if we have to do I/O we place a PG_BUSY page in the correct object, 117 * unlock everything, and do the I/O. when I/O is done we must reverify 118 * the state of the world before assuming that our data structures are 119 * valid. [because mappings could change while the map is unlocked] 120 * 121 * alternative 1: unbusy the page in question and restart the page fault 122 * from the top (ReFault). this is easy but does not take advantage 123 * of the information that we already have from our previous lookup, 124 * although it is possible that the "hints" in the vm_map will help here. 125 * 126 * alternative 2: the system already keeps track of a "version" number of 127 * a map. [i.e. every time you write-lock a map (e.g. to change a 128 * mapping) you bump the version number up by one...] so, we can save 129 * the version number of the map before we release the lock and start I/O. 130 * then when I/O is done we can relock and check the version numbers 131 * to see if anything changed. this might save us some over 1 because 132 * we don't have to unbusy the page and may be less compares(?). 133 * 134 * alternative 3: put in backpointers or a way to "hold" part of a map 135 * in place while I/O is in progress. this could be complex to 136 * implement (especially with structures like amap that can be referenced 137 * by multiple map entries, and figuring out what should wait could be 138 * complex as well...). 139 * 140 * we use alternative 2. given that we are multi-threaded now we may want 141 * to reconsider the choice. 142 */ 143 144 /* 145 * local data structures 146 */ 147 148 struct uvm_advice { 149 int advice; 150 int nback; 151 int nforw; 152 }; 153 154 /* 155 * page range array: 156 * note: index in array must match "advice" value 157 * XXX: borrowed numbers from freebsd. do they work well for us? 158 */ 159 160 static const struct uvm_advice uvmadvice[] = { 161 { UVM_ADV_NORMAL, 3, 4 }, 162 { UVM_ADV_RANDOM, 0, 0 }, 163 { UVM_ADV_SEQUENTIAL, 8, 7}, 164 }; 165 166 #define UVM_MAXRANGE 16 /* must be MAX() of nback+nforw+1 */ 167 168 /* 169 * private prototypes 170 */ 171 172 /* 173 * externs from other modules 174 */ 175 176 extern int start_init_exec; /* Is init_main() done / init running? */ 177 178 /* 179 * inline functions 180 */ 181 182 /* 183 * uvmfault_anonflush: try and deactivate pages in specified anons 184 * 185 * => does not have to deactivate page if it is busy 186 */ 187 188 static inline void 189 uvmfault_anonflush(struct vm_anon **anons, int n) 190 { 191 int lcv; 192 struct vm_page *pg; 193 194 for (lcv = 0; lcv < n; lcv++) { 195 if (anons[lcv] == NULL) 196 continue; 197 KASSERT(rw_lock_held(anons[lcv]->an_lock)); 198 pg = anons[lcv]->an_page; 199 if (pg && (pg->flags & PG_BUSY) == 0) { 200 uvm_pagelock(pg); 201 uvm_pagedeactivate(pg); 202 uvm_pageunlock(pg); 203 } 204 } 205 } 206 207 /* 208 * normal functions 209 */ 210 211 /* 212 * uvmfault_amapcopy: clear "needs_copy" in a map. 213 * 214 * => called with VM data structures unlocked (usually, see below) 215 * => we get a write lock on the maps and clear needs_copy for a VA 216 * => if we are out of RAM we sleep (waiting for more) 217 */ 218 219 static void 220 uvmfault_amapcopy(struct uvm_faultinfo *ufi) 221 { 222 for (;;) { 223 224 /* 225 * no mapping? give up. 226 */ 227 228 if (uvmfault_lookup(ufi, true) == false) 229 return; 230 231 /* 232 * copy if needed. 233 */ 234 235 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) 236 amap_copy(ufi->map, ufi->entry, AMAP_COPY_NOWAIT, 237 ufi->orig_rvaddr, ufi->orig_rvaddr + 1); 238 239 /* 240 * didn't work? must be out of RAM. unlock and sleep. 241 */ 242 243 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { 244 uvmfault_unlockmaps(ufi, true); 245 uvm_wait("fltamapcopy"); 246 continue; 247 } 248 249 /* 250 * got it! unlock and return. 251 */ 252 253 uvmfault_unlockmaps(ufi, true); 254 return; 255 } 256 /*NOTREACHED*/ 257 } 258 259 /* 260 * uvmfault_anonget: get data in an anon into a non-busy, non-released 261 * page in that anon. 262 * 263 * => Map, amap and thus anon should be locked by caller. 264 * => If we fail, we unlock everything and error is returned. 265 * => If we are successful, return with everything still locked. 266 * => We do not move the page on the queues [gets moved later]. If we 267 * allocate a new page [we_own], it gets put on the queues. Either way, 268 * the result is that the page is on the queues at return time 269 * => For pages which are on loan from a uvm_object (and thus are not owned 270 * by the anon): if successful, return with the owning object locked. 271 * The caller must unlock this object when it unlocks everything else. 272 */ 273 274 int 275 uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, 276 struct vm_anon *anon) 277 { 278 struct vm_page *pg; 279 krw_t lock_type; 280 int error; 281 282 UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist); 283 KASSERT(rw_lock_held(anon->an_lock)); 284 KASSERT(anon->an_lock == amap->am_lock); 285 286 /* Increment the counters.*/ 287 cpu_count(CPU_COUNT_FLTANGET, 1); 288 if (anon->an_page) { 289 curlwp->l_ru.ru_minflt++; 290 } else { 291 curlwp->l_ru.ru_majflt++; 292 } 293 error = 0; 294 295 /* 296 * Loop until we get the anon data, or fail. 297 */ 298 299 for (;;) { 300 bool we_own, locked; 301 /* 302 * Note: 'we_own' will become true if we set PG_BUSY on a page. 303 */ 304 we_own = false; 305 pg = anon->an_page; 306 307 /* 308 * If there is a resident page and it is loaned, then anon 309 * may not own it. Call out to uvm_anon_lockloanpg() to 310 * identify and lock the real owner of the page. 311 */ 312 313 if (pg && pg->loan_count) 314 pg = uvm_anon_lockloanpg(anon); 315 316 /* 317 * Is page resident? Make sure it is not busy/released. 318 */ 319 320 lock_type = rw_lock_op(anon->an_lock); 321 if (pg) { 322 323 /* 324 * at this point, if the page has a uobject [meaning 325 * we have it on loan], then that uobject is locked 326 * by us! if the page is busy, we drop all the 327 * locks (including uobject) and try again. 328 */ 329 330 if ((pg->flags & PG_BUSY) == 0) { 331 UVMHIST_LOG(maphist, "<- OK",0,0,0,0); 332 return 0; 333 } 334 cpu_count(CPU_COUNT_FLTPGWAIT, 1); 335 336 /* 337 * The last unlock must be an atomic unlock and wait 338 * on the owner of page. 339 */ 340 341 if (pg->uobject) { 342 /* Owner of page is UVM object. */ 343 uvmfault_unlockall(ufi, amap, NULL); 344 UVMHIST_LOG(maphist, " unlock+wait on uobj",0, 345 0,0,0); 346 uvm_pagewait(pg, pg->uobject->vmobjlock, "anonget1"); 347 } else { 348 /* Owner of page is anon. */ 349 uvmfault_unlockall(ufi, NULL, NULL); 350 UVMHIST_LOG(maphist, " unlock+wait on anon",0, 351 0,0,0); 352 uvm_pagewait(pg, anon->an_lock, "anonget2"); 353 } 354 } else { 355 #if defined(VMSWAP) 356 /* 357 * No page, therefore allocate one. A write lock is 358 * required for this. If the caller didn't supply 359 * one, fail now and have them retry. 360 */ 361 362 if (lock_type == RW_READER) { 363 return ENOLCK; 364 } 365 pg = uvm_pagealloc(NULL, 366 ufi != NULL ? ufi->orig_rvaddr : 0, 367 anon, ufi != NULL ? UVM_FLAG_COLORMATCH : 0); 368 if (pg == NULL) { 369 /* Out of memory. Wait a little. */ 370 uvmfault_unlockall(ufi, amap, NULL); 371 cpu_count(CPU_COUNT_FLTNORAM, 1); 372 UVMHIST_LOG(maphist, " noram -- UVM_WAIT",0, 373 0,0,0); 374 if (!uvm_reclaimable()) { 375 return ENOMEM; 376 } 377 uvm_wait("flt_noram1"); 378 } else { 379 /* PG_BUSY bit is set. */ 380 we_own = true; 381 uvmfault_unlockall(ufi, amap, NULL); 382 383 /* 384 * Pass a PG_BUSY+PG_FAKE clean page into 385 * the uvm_swap_get() function with all data 386 * structures unlocked. Note that it is OK 387 * to read an_swslot here, because we hold 388 * PG_BUSY on the page. 389 */ 390 cpu_count(CPU_COUNT_PAGEINS, 1); 391 error = uvm_swap_get(pg, anon->an_swslot, 392 PGO_SYNCIO); 393 394 /* 395 * We clean up after the I/O below in the 396 * 'we_own' case. 397 */ 398 } 399 #else 400 panic("%s: no page", __func__); 401 #endif /* defined(VMSWAP) */ 402 } 403 404 /* 405 * Re-lock the map and anon. 406 */ 407 408 locked = uvmfault_relock(ufi); 409 if (locked || we_own) { 410 rw_enter(anon->an_lock, lock_type); 411 } 412 413 /* 414 * If we own the page (i.e. we set PG_BUSY), then we need 415 * to clean up after the I/O. There are three cases to 416 * consider: 417 * 418 * 1) Page was released during I/O: free anon and ReFault. 419 * 2) I/O not OK. Free the page and cause the fault to fail. 420 * 3) I/O OK! Activate the page and sync with the non-we_own 421 * case (i.e. drop anon lock if not locked). 422 */ 423 424 if (we_own) { 425 KASSERT(lock_type == RW_WRITER); 426 #if defined(VMSWAP) 427 if (error) { 428 429 /* 430 * Remove the swap slot from the anon and 431 * mark the anon as having no real slot. 432 * Do not free the swap slot, thus preventing 433 * it from being used again. 434 */ 435 436 if (anon->an_swslot > 0) { 437 uvm_swap_markbad(anon->an_swslot, 1); 438 } 439 anon->an_swslot = SWSLOT_BAD; 440 441 if ((pg->flags & PG_RELEASED) != 0) { 442 goto released; 443 } 444 445 /* 446 * Note: page was never !PG_BUSY, so it 447 * cannot be mapped and thus no need to 448 * pmap_page_protect() it. 449 */ 450 451 uvm_pagefree(pg); 452 453 if (locked) { 454 uvmfault_unlockall(ufi, NULL, NULL); 455 } 456 rw_exit(anon->an_lock); 457 UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0); 458 return error; 459 } 460 461 if ((pg->flags & PG_RELEASED) != 0) { 462 released: 463 KASSERT(anon->an_ref == 0); 464 465 /* 466 * Released while we had unlocked amap. 467 */ 468 469 if (locked) { 470 uvmfault_unlockall(ufi, NULL, NULL); 471 } 472 uvm_anon_release(anon); 473 474 if (error) { 475 UVMHIST_LOG(maphist, 476 "<- ERROR/RELEASED", 0,0,0,0); 477 return error; 478 } 479 480 UVMHIST_LOG(maphist, "<- RELEASED", 0,0,0,0); 481 return ERESTART; 482 } 483 484 /* 485 * We have successfully read the page, activate it. 486 */ 487 488 uvm_pagelock(pg); 489 uvm_pageactivate(pg); 490 uvm_pagewakeup(pg); 491 uvm_pageunlock(pg); 492 pg->flags &= ~(PG_BUSY|PG_FAKE); 493 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_UNKNOWN); 494 UVM_PAGE_OWN(pg, NULL); 495 #else 496 panic("%s: we_own", __func__); 497 #endif /* defined(VMSWAP) */ 498 } 499 500 /* 501 * We were not able to re-lock the map - restart the fault. 502 */ 503 504 if (!locked) { 505 if (we_own) { 506 rw_exit(anon->an_lock); 507 } 508 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 509 return ERESTART; 510 } 511 512 /* 513 * Verify that no one has touched the amap and moved 514 * the anon on us. 515 */ 516 517 if (ufi != NULL && amap_lookup(&ufi->entry->aref, 518 ufi->orig_rvaddr - ufi->entry->start) != anon) { 519 520 uvmfault_unlockall(ufi, amap, NULL); 521 UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0); 522 return ERESTART; 523 } 524 525 /* 526 * Retry.. 527 */ 528 529 cpu_count(CPU_COUNT_FLTANRETRY, 1); 530 continue; 531 } 532 /*NOTREACHED*/ 533 } 534 535 /* 536 * uvmfault_promote: promote data to a new anon. used for 1B and 2B. 537 * 538 * 1. allocate an anon and a page. 539 * 2. fill its contents. 540 * 3. put it into amap. 541 * 542 * => if we fail (result != 0) we unlock everything. 543 * => on success, return a new locked anon via 'nanon'. 544 * (*nanon)->an_page will be a resident, locked, dirty page. 545 * => it's caller's responsibility to put the promoted nanon->an_page to the 546 * page queue. 547 */ 548 549 static int 550 uvmfault_promote(struct uvm_faultinfo *ufi, 551 struct vm_anon *oanon, 552 struct vm_page *uobjpage, 553 struct vm_anon **nanon, /* OUT: allocated anon */ 554 struct vm_anon **spare) 555 { 556 struct vm_amap *amap = ufi->entry->aref.ar_amap; 557 struct uvm_object *uobj; 558 struct vm_anon *anon; 559 struct vm_page *pg; 560 struct vm_page *opg; 561 int error; 562 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 563 564 if (oanon) { 565 /* anon COW */ 566 opg = oanon->an_page; 567 KASSERT(opg != NULL); 568 KASSERT(opg->uobject == NULL || opg->loan_count > 0); 569 } else if (uobjpage != PGO_DONTCARE) { 570 /* object-backed COW */ 571 opg = uobjpage; 572 if ((uobjpage->flags & PG_BUSY) != 0) { 573 KASSERT(rw_write_held(opg->uobject->vmobjlock)); 574 } else { 575 KASSERT(rw_read_held(opg->uobject->vmobjlock)); 576 } 577 } else { 578 /* ZFOD */ 579 opg = NULL; 580 } 581 if (opg != NULL) { 582 uobj = opg->uobject; 583 } else { 584 uobj = NULL; 585 } 586 587 KASSERT(amap != NULL); 588 KASSERT(uobjpage != NULL); 589 KASSERT(rw_write_held(amap->am_lock)); 590 KASSERT(oanon == NULL || amap->am_lock == oanon->an_lock); 591 KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock)); 592 593 if (*spare != NULL) { 594 anon = *spare; 595 *spare = NULL; 596 } else { 597 anon = uvm_analloc(); 598 } 599 if (anon) { 600 601 /* 602 * The new anon is locked. 603 * 604 * if opg == NULL, we want a zero'd, dirty page, 605 * so have uvm_pagealloc() do that for us. 606 */ 607 608 KASSERT(anon->an_lock == NULL); 609 anon->an_lock = amap->am_lock; 610 pg = uvm_pagealloc(NULL, ufi->orig_rvaddr, anon, 611 UVM_FLAG_COLORMATCH | (opg == NULL ? UVM_PGA_ZERO : 0)); 612 if (pg == NULL) { 613 anon->an_lock = NULL; 614 } 615 } else { 616 pg = NULL; 617 } 618 619 /* 620 * out of memory resources? 621 */ 622 623 if (pg == NULL) { 624 /* save anon for the next try. */ 625 if (anon != NULL) { 626 *spare = anon; 627 } 628 629 /* unlock and fail ... */ 630 if (uobjpage != PGO_DONTCARE && 631 (uobjpage->flags & PG_BUSY) != 0) { 632 uvm_page_unbusy(&uobjpage, 1); 633 } 634 uvmfault_unlockall(ufi, amap, uobj); 635 if (!uvm_reclaimable()) { 636 UVMHIST_LOG(maphist, "out of VM", 0,0,0,0); 637 cpu_count(CPU_COUNT_FLTNOANON, 1); 638 error = ENOMEM; 639 goto done; 640 } 641 642 UVMHIST_LOG(maphist, "out of RAM, waiting for more", 0,0,0,0); 643 cpu_count(CPU_COUNT_FLTNORAM, 1); 644 uvm_wait("flt_noram5"); 645 error = ERESTART; 646 goto done; 647 } 648 649 /* copy page [pg now dirty] */ 650 if (opg) { 651 uvm_pagecopy(opg, pg); 652 } 653 KASSERT(uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY); 654 655 amap_add(&ufi->entry->aref, ufi->orig_rvaddr - ufi->entry->start, anon, 656 oanon != NULL); 657 658 *nanon = anon; 659 error = 0; 660 done: 661 return error; 662 } 663 664 /* 665 * Update statistics after fault resolution. 666 * - maxrss 667 */ 668 void 669 uvmfault_update_stats(struct uvm_faultinfo *ufi) 670 { 671 struct vm_map *map; 672 struct vmspace *vm; 673 struct proc *p; 674 vsize_t res; 675 676 map = ufi->orig_map; 677 678 p = curproc; 679 KASSERT(p != NULL); 680 vm = p->p_vmspace; 681 682 if (&vm->vm_map != map) 683 return; 684 685 res = pmap_resident_count(map->pmap); 686 if (vm->vm_rssmax < res) 687 vm->vm_rssmax = res; 688 } 689 690 /* 691 * F A U L T - m a i n e n t r y p o i n t 692 */ 693 694 /* 695 * uvm_fault: page fault handler 696 * 697 * => called from MD code to resolve a page fault 698 * => VM data structures usually should be unlocked. however, it is 699 * possible to call here with the main map locked if the caller 700 * gets a write lock, sets it recusive, and then calls us (c.f. 701 * uvm_map_pageable). this should be avoided because it keeps 702 * the map locked off during I/O. 703 * => MUST NEVER BE CALLED IN INTERRUPT CONTEXT 704 */ 705 706 #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 707 ~VM_PROT_WRITE : VM_PROT_ALL) 708 709 /* fault_flag values passed from uvm_fault_wire to uvm_fault_internal */ 710 #define UVM_FAULT_WIRE (1 << 0) 711 #define UVM_FAULT_MAXPROT (1 << 1) 712 713 struct uvm_faultctx { 714 715 /* 716 * the following members are set up by uvm_fault_check() and 717 * read-only after that. 718 * 719 * note that narrow is used by uvm_fault_check() to change 720 * the behaviour after ERESTART. 721 * 722 * most of them might change after RESTART if the underlying 723 * map entry has been changed behind us. an exception is 724 * wire_paging, which does never change. 725 */ 726 vm_prot_t access_type; 727 vaddr_t startva; 728 int npages; 729 int centeridx; 730 bool narrow; /* work on a single requested page only */ 731 bool wire_mapping; /* request a PMAP_WIRED mapping 732 (UVM_FAULT_WIRE or VM_MAPENT_ISWIRED) */ 733 bool wire_paging; /* request uvm_pagewire 734 (true for UVM_FAULT_WIRE) */ 735 bool cow_now; /* VM_PROT_WRITE is actually requested 736 (ie. should break COW and page loaning) */ 737 738 /* 739 * enter_prot is set up by uvm_fault_check() and clamped 740 * (ie. drop the VM_PROT_WRITE bit) in various places in case 741 * of !cow_now. 742 */ 743 vm_prot_t enter_prot; /* prot at which we want to enter pages in */ 744 745 /* 746 * the following member is for uvmfault_promote() and ERESTART. 747 */ 748 struct vm_anon *anon_spare; 749 750 /* 751 * the folloing is actually a uvm_fault_lower() internal. 752 * it's here merely for debugging. 753 * (or due to the mechanical separation of the function?) 754 */ 755 bool promote; 756 757 /* 758 * type of lock to acquire on objects in both layers. 759 */ 760 krw_t lower_lock_type; 761 krw_t upper_lock_type; 762 }; 763 764 static inline int uvm_fault_check( 765 struct uvm_faultinfo *, struct uvm_faultctx *, 766 struct vm_anon ***, bool); 767 768 static int uvm_fault_upper( 769 struct uvm_faultinfo *, struct uvm_faultctx *, 770 struct vm_anon **); 771 static inline int uvm_fault_upper_lookup( 772 struct uvm_faultinfo *, const struct uvm_faultctx *, 773 struct vm_anon **, struct vm_page **); 774 static inline void uvm_fault_upper_neighbor( 775 struct uvm_faultinfo *, const struct uvm_faultctx *, 776 vaddr_t, struct vm_page *, bool); 777 static inline int uvm_fault_upper_loan( 778 struct uvm_faultinfo *, struct uvm_faultctx *, 779 struct vm_anon *, struct uvm_object **); 780 static inline int uvm_fault_upper_promote( 781 struct uvm_faultinfo *, struct uvm_faultctx *, 782 struct uvm_object *, struct vm_anon *); 783 static inline int uvm_fault_upper_direct( 784 struct uvm_faultinfo *, struct uvm_faultctx *, 785 struct uvm_object *, struct vm_anon *); 786 static int uvm_fault_upper_enter( 787 struct uvm_faultinfo *, const struct uvm_faultctx *, 788 struct uvm_object *, struct vm_anon *, 789 struct vm_page *, struct vm_anon *); 790 static inline void uvm_fault_upper_done( 791 struct uvm_faultinfo *, const struct uvm_faultctx *, 792 struct vm_anon *, struct vm_page *); 793 794 static int uvm_fault_lower( 795 struct uvm_faultinfo *, struct uvm_faultctx *, 796 struct vm_page **); 797 static inline void uvm_fault_lower_lookup( 798 struct uvm_faultinfo *, const struct uvm_faultctx *, 799 struct vm_page **); 800 static inline void uvm_fault_lower_neighbor( 801 struct uvm_faultinfo *, const struct uvm_faultctx *, 802 vaddr_t, struct vm_page *); 803 static inline int uvm_fault_lower_io( 804 struct uvm_faultinfo *, struct uvm_faultctx *, 805 struct uvm_object **, struct vm_page **); 806 static inline int uvm_fault_lower_direct( 807 struct uvm_faultinfo *, struct uvm_faultctx *, 808 struct uvm_object *, struct vm_page *); 809 static inline int uvm_fault_lower_direct_loan( 810 struct uvm_faultinfo *, struct uvm_faultctx *, 811 struct uvm_object *, struct vm_page **, 812 struct vm_page **); 813 static inline int uvm_fault_lower_promote( 814 struct uvm_faultinfo *, struct uvm_faultctx *, 815 struct uvm_object *, struct vm_page *); 816 static int uvm_fault_lower_enter( 817 struct uvm_faultinfo *, const struct uvm_faultctx *, 818 struct uvm_object *, 819 struct vm_anon *, struct vm_page *); 820 static inline void uvm_fault_lower_done( 821 struct uvm_faultinfo *, const struct uvm_faultctx *, 822 struct uvm_object *, struct vm_page *); 823 824 int 825 uvm_fault_internal(struct vm_map *orig_map, vaddr_t vaddr, 826 vm_prot_t access_type, int fault_flag) 827 { 828 struct uvm_faultinfo ufi; 829 struct uvm_faultctx flt = { 830 .access_type = access_type, 831 832 /* don't look for neighborhood * pages on "wire" fault */ 833 .narrow = (fault_flag & UVM_FAULT_WIRE) != 0, 834 835 /* "wire" fault causes wiring of both mapping and paging */ 836 .wire_mapping = (fault_flag & UVM_FAULT_WIRE) != 0, 837 .wire_paging = (fault_flag & UVM_FAULT_WIRE) != 0, 838 839 /* 840 * default lock type to acquire on upper & lower layer 841 * objects: reader. this can be upgraded at any point 842 * during the fault from read -> write and uvm_faultctx 843 * changed to match, but is never downgraded write -> read. 844 */ 845 #ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */ 846 .upper_lock_type = RW_WRITER, 847 .lower_lock_type = RW_WRITER, 848 #else 849 .upper_lock_type = RW_READER, 850 .lower_lock_type = RW_READER, 851 #endif 852 }; 853 const bool maxprot = (fault_flag & UVM_FAULT_MAXPROT) != 0; 854 struct vm_anon *anons_store[UVM_MAXRANGE], **anons; 855 struct vm_page *pages_store[UVM_MAXRANGE], **pages; 856 int error; 857 858 UVMHIST_FUNC("uvm_fault"); UVMHIST_CALLED(maphist); 859 860 UVMHIST_LOG(maphist, "(map=%#jx, vaddr=%#jx, at=%jd, ff=%jd)", 861 (uintptr_t)orig_map, vaddr, access_type, fault_flag); 862 863 /* Don't count anything until user interaction is possible */ 864 kpreempt_disable(); 865 if (__predict_true(start_init_exec)) { 866 struct cpu_info *ci = curcpu(); 867 CPU_COUNT(CPU_COUNT_NFAULT, 1); 868 /* Don't flood RNG subsystem with samples. */ 869 if (++(ci->ci_faultrng) == 503) { 870 ci->ci_faultrng = 0; 871 rnd_add_uint32(&curcpu()->ci_data.cpu_uvm->rs, 872 sizeof(vaddr_t) == sizeof(uint32_t) ? 873 (uint32_t)vaddr : sizeof(vaddr_t) == 874 sizeof(uint64_t) ? 875 (uint32_t)vaddr : 876 (uint32_t)ci->ci_counts[CPU_COUNT_NFAULT]); 877 } 878 } 879 kpreempt_enable(); 880 881 /* 882 * init the IN parameters in the ufi 883 */ 884 885 ufi.orig_map = orig_map; 886 ufi.orig_rvaddr = trunc_page(vaddr); 887 ufi.orig_size = PAGE_SIZE; /* can't get any smaller than this */ 888 889 error = ERESTART; 890 while (error == ERESTART) { /* ReFault: */ 891 anons = anons_store; 892 pages = pages_store; 893 894 error = uvm_fault_check(&ufi, &flt, &anons, maxprot); 895 if (error != 0) 896 continue; 897 898 error = uvm_fault_upper_lookup(&ufi, &flt, anons, pages); 899 if (error != 0) 900 continue; 901 902 if (pages[flt.centeridx] == PGO_DONTCARE) 903 error = uvm_fault_upper(&ufi, &flt, anons); 904 else { 905 struct uvm_object * const uobj = 906 ufi.entry->object.uvm_obj; 907 908 if (uobj && uobj->pgops->pgo_fault != NULL) { 909 /* 910 * invoke "special" fault routine. 911 */ 912 rw_enter(uobj->vmobjlock, RW_WRITER); 913 /* locked: maps(read), amap(if there), uobj */ 914 error = uobj->pgops->pgo_fault(&ufi, 915 flt.startva, pages, flt.npages, 916 flt.centeridx, flt.access_type, 917 PGO_LOCKED|PGO_SYNCIO); 918 919 /* 920 * locked: nothing, pgo_fault has unlocked 921 * everything 922 */ 923 924 /* 925 * object fault routine responsible for 926 * pmap_update(). 927 */ 928 929 /* 930 * Wake up the pagedaemon if the fault method 931 * failed for lack of memory but some can be 932 * reclaimed. 933 */ 934 if (error == ENOMEM && uvm_reclaimable()) { 935 uvm_wait("pgo_fault"); 936 error = ERESTART; 937 } 938 } else { 939 error = uvm_fault_lower(&ufi, &flt, pages); 940 } 941 } 942 } 943 944 if (flt.anon_spare != NULL) { 945 flt.anon_spare->an_ref--; 946 KASSERT(flt.anon_spare->an_ref == 0); 947 KASSERT(flt.anon_spare->an_lock == NULL); 948 uvm_anfree(flt.anon_spare); 949 } 950 return error; 951 } 952 953 /* 954 * uvm_fault_check: check prot, handle needs-copy, etc. 955 * 956 * 1. lookup entry. 957 * 2. check protection. 958 * 3. adjust fault condition (mainly for simulated fault). 959 * 4. handle needs-copy (lazy amap copy). 960 * 5. establish range of interest for neighbor fault (aka pre-fault). 961 * 6. look up anons (if amap exists). 962 * 7. flush pages (if MADV_SEQUENTIAL) 963 * 964 * => called with nothing locked. 965 * => if we fail (result != 0) we unlock everything. 966 * => initialize/adjust many members of flt. 967 */ 968 969 static int 970 uvm_fault_check( 971 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 972 struct vm_anon ***ranons, bool maxprot) 973 { 974 struct vm_amap *amap; 975 struct uvm_object *uobj; 976 vm_prot_t check_prot; 977 int nback, nforw; 978 UVMHIST_FUNC("uvm_fault_check"); UVMHIST_CALLED(maphist); 979 980 /* 981 * lookup and lock the maps 982 */ 983 984 if (uvmfault_lookup(ufi, false) == false) { 985 UVMHIST_LOG(maphist, "<- no mapping @ %#jx", ufi->orig_rvaddr, 986 0,0,0); 987 return EFAULT; 988 } 989 /* locked: maps(read) */ 990 991 #ifdef DIAGNOSTIC 992 if ((ufi->map->flags & VM_MAP_PAGEABLE) == 0) { 993 printf("Page fault on non-pageable map:\n"); 994 printf("ufi->map = %p\n", ufi->map); 995 printf("ufi->orig_map = %p\n", ufi->orig_map); 996 printf("ufi->orig_rvaddr = %#lx\n", (u_long) ufi->orig_rvaddr); 997 panic("uvm_fault: (ufi->map->flags & VM_MAP_PAGEABLE) == 0"); 998 } 999 #endif 1000 1001 /* 1002 * check protection 1003 */ 1004 1005 check_prot = maxprot ? 1006 ufi->entry->max_protection : ufi->entry->protection; 1007 if ((check_prot & flt->access_type) != flt->access_type) { 1008 UVMHIST_LOG(maphist, 1009 "<- protection failure (prot=%#jx, access=%#jx)", 1010 ufi->entry->protection, flt->access_type, 0, 0); 1011 uvmfault_unlockmaps(ufi, false); 1012 return EFAULT; 1013 } 1014 1015 /* 1016 * "enter_prot" is the protection we want to enter the page in at. 1017 * for certain pages (e.g. copy-on-write pages) this protection can 1018 * be more strict than ufi->entry->protection. "wired" means either 1019 * the entry is wired or we are fault-wiring the pg. 1020 */ 1021 1022 flt->enter_prot = ufi->entry->protection; 1023 if (VM_MAPENT_ISWIRED(ufi->entry)) { 1024 flt->wire_mapping = true; 1025 flt->wire_paging = true; 1026 flt->narrow = true; 1027 } 1028 1029 if (flt->wire_mapping) { 1030 flt->access_type = flt->enter_prot; /* full access for wired */ 1031 flt->cow_now = (check_prot & VM_PROT_WRITE) != 0; 1032 } else { 1033 flt->cow_now = (flt->access_type & VM_PROT_WRITE) != 0; 1034 } 1035 1036 if (flt->wire_paging) { 1037 /* wiring pages requires a write lock. */ 1038 flt->upper_lock_type = RW_WRITER; 1039 flt->lower_lock_type = RW_WRITER; 1040 } 1041 1042 flt->promote = false; 1043 1044 /* 1045 * handle "needs_copy" case. if we need to copy the amap we will 1046 * have to drop our readlock and relock it with a write lock. (we 1047 * need a write lock to change anything in a map entry [e.g. 1048 * needs_copy]). 1049 */ 1050 1051 if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { 1052 if (flt->cow_now || (ufi->entry->object.uvm_obj == NULL)) { 1053 KASSERT(!maxprot); 1054 /* need to clear */ 1055 UVMHIST_LOG(maphist, 1056 " need to clear needs_copy and refault",0,0,0,0); 1057 uvmfault_unlockmaps(ufi, false); 1058 uvmfault_amapcopy(ufi); 1059 cpu_count(CPU_COUNT_FLTAMCOPY, 1); 1060 return ERESTART; 1061 1062 } else { 1063 1064 /* 1065 * ensure that we pmap_enter page R/O since 1066 * needs_copy is still true 1067 */ 1068 1069 flt->enter_prot &= ~VM_PROT_WRITE; 1070 } 1071 } 1072 1073 /* 1074 * identify the players 1075 */ 1076 1077 amap = ufi->entry->aref.ar_amap; /* upper layer */ 1078 uobj = ufi->entry->object.uvm_obj; /* lower layer */ 1079 1080 /* 1081 * check for a case 0 fault. if nothing backing the entry then 1082 * error now. 1083 */ 1084 1085 if (amap == NULL && uobj == NULL) { 1086 uvmfault_unlockmaps(ufi, false); 1087 UVMHIST_LOG(maphist,"<- no backing store, no overlay",0,0,0,0); 1088 return EFAULT; 1089 } 1090 1091 /* 1092 * establish range of interest based on advice from mapper 1093 * and then clip to fit map entry. note that we only want 1094 * to do this the first time through the fault. if we 1095 * ReFault we will disable this by setting "narrow" to true. 1096 */ 1097 1098 if (flt->narrow == false) { 1099 1100 /* wide fault (!narrow) */ 1101 KASSERT(uvmadvice[ufi->entry->advice].advice == 1102 ufi->entry->advice); 1103 nback = MIN(uvmadvice[ufi->entry->advice].nback, 1104 (ufi->orig_rvaddr - ufi->entry->start) >> PAGE_SHIFT); 1105 flt->startva = ufi->orig_rvaddr - (nback << PAGE_SHIFT); 1106 /* 1107 * note: "-1" because we don't want to count the 1108 * faulting page as forw 1109 */ 1110 nforw = MIN(uvmadvice[ufi->entry->advice].nforw, 1111 ((ufi->entry->end - ufi->orig_rvaddr) >> 1112 PAGE_SHIFT) - 1); 1113 flt->npages = nback + nforw + 1; 1114 flt->centeridx = nback; 1115 1116 flt->narrow = true; /* ensure only once per-fault */ 1117 1118 } else { 1119 1120 /* narrow fault! */ 1121 nback = nforw = 0; 1122 flt->startva = ufi->orig_rvaddr; 1123 flt->npages = 1; 1124 flt->centeridx = 0; 1125 1126 } 1127 /* offset from entry's start to pgs' start */ 1128 const voff_t eoff = flt->startva - ufi->entry->start; 1129 1130 /* locked: maps(read) */ 1131 UVMHIST_LOG(maphist, " narrow=%jd, back=%jd, forw=%jd, startva=%#jx", 1132 flt->narrow, nback, nforw, flt->startva); 1133 UVMHIST_LOG(maphist, " entry=%#jx, amap=%#jx, obj=%#jx", 1134 (uintptr_t)ufi->entry, (uintptr_t)amap, (uintptr_t)uobj, 0); 1135 1136 /* 1137 * guess at the most suitable lock types to acquire. 1138 * if we've got an amap then lock it and extract current anons. 1139 */ 1140 1141 if (amap) { 1142 if ((amap_flags(amap) & AMAP_SHARED) == 0) { 1143 /* 1144 * the amap isn't shared. get a writer lock to 1145 * avoid the cost of upgrading the lock later if 1146 * needed. 1147 * 1148 * XXX nice for PostgreSQL, but consider threads. 1149 */ 1150 flt->upper_lock_type = RW_WRITER; 1151 } else if ((flt->access_type & VM_PROT_WRITE) != 0) { 1152 /* 1153 * assume we're about to COW. 1154 */ 1155 flt->upper_lock_type = RW_WRITER; 1156 } 1157 amap_lock(amap, flt->upper_lock_type); 1158 amap_lookups(&ufi->entry->aref, eoff, *ranons, flt->npages); 1159 } else { 1160 if ((flt->access_type & VM_PROT_WRITE) != 0) { 1161 /* 1162 * we are about to dirty the object and that 1163 * requires a write lock. 1164 */ 1165 flt->lower_lock_type = RW_WRITER; 1166 } 1167 *ranons = NULL; /* to be safe */ 1168 } 1169 1170 /* locked: maps(read), amap(if there) */ 1171 KASSERT(amap == NULL || 1172 rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1173 1174 /* 1175 * for MADV_SEQUENTIAL mappings we want to deactivate the back pages 1176 * now and then forget about them (for the rest of the fault). 1177 */ 1178 1179 if (ufi->entry->advice == MADV_SEQUENTIAL && nback != 0) { 1180 1181 UVMHIST_LOG(maphist, " MADV_SEQUENTIAL: flushing backpages", 1182 0,0,0,0); 1183 /* flush back-page anons? */ 1184 if (amap) 1185 uvmfault_anonflush(*ranons, nback); 1186 1187 /* flush object? */ 1188 if (uobj) { 1189 voff_t uoff; 1190 1191 uoff = ufi->entry->offset + eoff; 1192 rw_enter(uobj->vmobjlock, RW_WRITER); 1193 (void) (uobj->pgops->pgo_put)(uobj, uoff, uoff + 1194 (nback << PAGE_SHIFT), PGO_DEACTIVATE); 1195 } 1196 1197 /* now forget about the backpages */ 1198 if (amap) 1199 *ranons += nback; 1200 flt->startva += (nback << PAGE_SHIFT); 1201 flt->npages -= nback; 1202 flt->centeridx = 0; 1203 } 1204 /* 1205 * => startva is fixed 1206 * => npages is fixed 1207 */ 1208 KASSERT(flt->startva <= ufi->orig_rvaddr); 1209 KASSERT(ufi->orig_rvaddr + ufi->orig_size <= 1210 flt->startva + (flt->npages << PAGE_SHIFT)); 1211 return 0; 1212 } 1213 1214 /* 1215 * uvm_fault_upper_upgrade: upgrade upper lock, reader -> writer 1216 */ 1217 1218 static inline int 1219 uvm_fault_upper_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1220 struct vm_amap *amap, struct uvm_object *uobj) 1221 { 1222 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1223 1224 KASSERT(amap != NULL); 1225 KASSERT(flt->upper_lock_type == rw_lock_op(amap->am_lock)); 1226 1227 /* 1228 * fast path. 1229 */ 1230 1231 if (__predict_true(flt->upper_lock_type == RW_WRITER)) { 1232 return 0; 1233 } 1234 1235 /* 1236 * otherwise try for the upgrade. if we don't get it, unlock 1237 * everything, restart the fault and next time around get a writer 1238 * lock. 1239 */ 1240 1241 flt->upper_lock_type = RW_WRITER; 1242 if (__predict_false(!rw_tryupgrade(amap->am_lock))) { 1243 uvmfault_unlockall(ufi, amap, uobj); 1244 cpu_count(CPU_COUNT_FLTNOUP, 1); 1245 UVMHIST_LOG(maphist, " !upgrade upper", 0, 0,0,0); 1246 return ERESTART; 1247 } 1248 cpu_count(CPU_COUNT_FLTUP, 1); 1249 KASSERT(flt->upper_lock_type == rw_lock_op(amap->am_lock)); 1250 return 0; 1251 } 1252 1253 /* 1254 * uvm_fault_upper_lookup: look up existing h/w mapping and amap. 1255 * 1256 * iterate range of interest: 1257 * 1. check if h/w mapping exists. if yes, we don't care 1258 * 2. check if anon exists. if not, page is lower. 1259 * 3. if anon exists, enter h/w mapping for neighbors. 1260 * 1261 * => called with amap locked (if exists). 1262 */ 1263 1264 static int 1265 uvm_fault_upper_lookup( 1266 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1267 struct vm_anon **anons, struct vm_page **pages) 1268 { 1269 struct vm_amap *amap = ufi->entry->aref.ar_amap; 1270 int lcv; 1271 vaddr_t currva; 1272 bool shadowed __unused; 1273 bool entered; 1274 UVMHIST_FUNC("uvm_fault_upper_lookup"); UVMHIST_CALLED(maphist); 1275 1276 /* locked: maps(read), amap(if there) */ 1277 KASSERT(amap == NULL || 1278 rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1279 1280 /* 1281 * map in the backpages and frontpages we found in the amap in hopes 1282 * of preventing future faults. we also init the pages[] array as 1283 * we go. 1284 */ 1285 1286 currva = flt->startva; 1287 shadowed = false; 1288 entered = false; 1289 for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) { 1290 /* 1291 * unmapped or center page. check if any anon at this level. 1292 */ 1293 if (amap == NULL || anons[lcv] == NULL) { 1294 pages[lcv] = NULL; 1295 continue; 1296 } 1297 1298 /* 1299 * check for present page and map if possible. 1300 */ 1301 1302 pages[lcv] = PGO_DONTCARE; 1303 if (lcv == flt->centeridx) { /* save center for later! */ 1304 shadowed = true; 1305 continue; 1306 } 1307 1308 struct vm_anon *anon = anons[lcv]; 1309 struct vm_page *pg = anon->an_page; 1310 1311 KASSERT(anon->an_lock == amap->am_lock); 1312 1313 /* 1314 * ignore loaned and busy pages. 1315 * don't play with VAs that are already mapped. 1316 */ 1317 1318 if (pg && pg->loan_count == 0 && (pg->flags & PG_BUSY) == 0 && 1319 !pmap_extract(ufi->orig_map->pmap, currva, NULL)) { 1320 uvm_fault_upper_neighbor(ufi, flt, currva, 1321 pg, anon->an_ref > 1); 1322 entered = true; 1323 } 1324 } 1325 if (entered) { 1326 pmap_update(ufi->orig_map->pmap); 1327 } 1328 1329 /* locked: maps(read), amap(if there) */ 1330 KASSERT(amap == NULL || 1331 rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1332 /* (shadowed == true) if there is an anon at the faulting address */ 1333 UVMHIST_LOG(maphist, " shadowed=%jd, will_get=%jd", shadowed, 1334 (ufi->entry->object.uvm_obj && shadowed != false),0,0); 1335 1336 /* 1337 * note that if we are really short of RAM we could sleep in the above 1338 * call to pmap_enter with everything locked. bad? 1339 * 1340 * XXX Actually, that is bad; pmap_enter() should just fail in that 1341 * XXX case. --thorpej 1342 */ 1343 1344 return 0; 1345 } 1346 1347 /* 1348 * uvm_fault_upper_neighbor: enter single upper neighbor page. 1349 * 1350 * => called with amap and anon locked. 1351 */ 1352 1353 static void 1354 uvm_fault_upper_neighbor( 1355 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1356 vaddr_t currva, struct vm_page *pg, bool readonly) 1357 { 1358 UVMHIST_FUNC("uvm_fault_upper_neighbor"); UVMHIST_CALLED(maphist); 1359 1360 /* locked: amap, anon */ 1361 1362 KASSERT(pg->uobject == NULL); 1363 KASSERT(pg->uanon != NULL); 1364 KASSERT(rw_lock_op(pg->uanon->an_lock) == flt->upper_lock_type); 1365 KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN); 1366 1367 /* 1368 * in the read-locked case, it's not possible for this to be a new 1369 * page, therefore it's enqueued already. there wasn't a direct 1370 * fault on the page, so avoid the cost of re-enqueuing it unless 1371 * write-locked. 1372 */ 1373 1374 if (flt->upper_lock_type == RW_WRITER) { 1375 uvm_pagelock(pg); 1376 uvm_pageenqueue(pg); 1377 uvm_pageunlock(pg); 1378 } 1379 UVMHIST_LOG(maphist, 1380 " MAPPING: n anon: pm=%#jx, va=%#jx, pg=%#jx", 1381 (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0); 1382 cpu_count(CPU_COUNT_FLTNAMAP, 1); 1383 1384 /* 1385 * Since this page isn't the page that's actually faulting, 1386 * ignore pmap_enter() failures; it's not critical that we 1387 * enter these right now. 1388 */ 1389 1390 (void) pmap_enter(ufi->orig_map->pmap, currva, 1391 VM_PAGE_TO_PHYS(pg), 1392 readonly ? (flt->enter_prot & ~VM_PROT_WRITE) : 1393 flt->enter_prot, 1394 PMAP_CANFAIL | (flt->wire_mapping ? PMAP_WIRED : 0)); 1395 } 1396 1397 /* 1398 * uvm_fault_upper: handle upper fault. 1399 * 1400 * 1. acquire anon lock. 1401 * 2. get anon. let uvmfault_anonget do the dirty work. 1402 * 3. handle loan. 1403 * 4. dispatch direct or promote handlers. 1404 */ 1405 1406 static int 1407 uvm_fault_upper( 1408 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1409 struct vm_anon **anons) 1410 { 1411 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 1412 struct vm_anon * const anon = anons[flt->centeridx]; 1413 struct uvm_object *uobj; 1414 int error; 1415 UVMHIST_FUNC("uvm_fault_upper"); UVMHIST_CALLED(maphist); 1416 1417 /* locked: maps(read), amap, anon */ 1418 KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1419 KASSERT(anon->an_lock == amap->am_lock); 1420 1421 /* 1422 * handle case 1: fault on an anon in our amap 1423 */ 1424 1425 UVMHIST_LOG(maphist, " case 1 fault: anon=%#jx", 1426 (uintptr_t)anon, 0, 0, 0); 1427 1428 /* 1429 * no matter if we have case 1A or case 1B we are going to need to 1430 * have the anon's memory resident. ensure that now. 1431 */ 1432 1433 /* 1434 * let uvmfault_anonget do the dirty work. 1435 * if it fails (!OK) it will unlock everything for us. 1436 * if it succeeds, locks are still valid and locked. 1437 * also, if it is OK, then the anon's page is on the queues. 1438 * if the page is on loan from a uvm_object, then anonget will 1439 * lock that object for us if it does not fail. 1440 */ 1441 retry: 1442 error = uvmfault_anonget(ufi, amap, anon); 1443 switch (error) { 1444 case 0: 1445 break; 1446 1447 case ERESTART: 1448 return ERESTART; 1449 1450 case EAGAIN: 1451 kpause("fltagain1", false, hz/2, NULL); 1452 return ERESTART; 1453 1454 case ENOLCK: 1455 /* it needs a write lock: retry */ 1456 error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL); 1457 if (error != 0) { 1458 return error; 1459 } 1460 KASSERT(rw_write_held(amap->am_lock)); 1461 goto retry; 1462 1463 default: 1464 return error; 1465 } 1466 1467 /* 1468 * uobj is non null if the page is on loan from an object (i.e. uobj) 1469 */ 1470 1471 uobj = anon->an_page->uobject; /* locked by anonget if !NULL */ 1472 1473 /* locked: maps(read), amap, anon, uobj(if one) */ 1474 KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1475 KASSERT(anon->an_lock == amap->am_lock); 1476 KASSERT(uobj == NULL || 1477 rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); 1478 1479 /* 1480 * special handling for loaned pages 1481 */ 1482 1483 if (anon->an_page->loan_count) { 1484 error = uvm_fault_upper_loan(ufi, flt, anon, &uobj); 1485 if (error != 0) 1486 return error; 1487 } 1488 1489 /* 1490 * if we are case 1B then we will need to allocate a new blank 1491 * anon to transfer the data into. note that we have a lock 1492 * on anon, so no one can busy or release the page until we are done. 1493 * also note that the ref count can't drop to zero here because 1494 * it is > 1 and we are only dropping one ref. 1495 * 1496 * in the (hopefully very rare) case that we are out of RAM we 1497 * will unlock, wait for more RAM, and refault. 1498 * 1499 * if we are out of anon VM we kill the process (XXX: could wait?). 1500 */ 1501 1502 if (flt->cow_now && anon->an_ref > 1) { 1503 flt->promote = true; 1504 error = uvm_fault_upper_promote(ufi, flt, uobj, anon); 1505 } else { 1506 error = uvm_fault_upper_direct(ufi, flt, uobj, anon); 1507 } 1508 return error; 1509 } 1510 1511 /* 1512 * uvm_fault_upper_loan: handle loaned upper page. 1513 * 1514 * 1. if not cow'ing now, simply adjust flt->enter_prot. 1515 * 2. if cow'ing now, and if ref count is 1, break loan. 1516 */ 1517 1518 static int 1519 uvm_fault_upper_loan( 1520 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1521 struct vm_anon *anon, struct uvm_object **ruobj) 1522 { 1523 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 1524 int error = 0; 1525 UVMHIST_FUNC("uvm_fault_upper_loan"); UVMHIST_CALLED(maphist); 1526 1527 if (!flt->cow_now) { 1528 1529 /* 1530 * for read faults on loaned pages we just cap the 1531 * protection at read-only. 1532 */ 1533 1534 flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE; 1535 1536 } else { 1537 /* 1538 * note that we can't allow writes into a loaned page! 1539 * 1540 * if we have a write fault on a loaned page in an 1541 * anon then we need to look at the anon's ref count. 1542 * if it is greater than one then we are going to do 1543 * a normal copy-on-write fault into a new anon (this 1544 * is not a problem). however, if the reference count 1545 * is one (a case where we would normally allow a 1546 * write directly to the page) then we need to kill 1547 * the loan before we continue. 1548 */ 1549 1550 /* >1 case is already ok */ 1551 if (anon->an_ref == 1) { 1552 /* breaking loan requires a write lock. */ 1553 error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL); 1554 if (error != 0) { 1555 return error; 1556 } 1557 KASSERT(rw_write_held(amap->am_lock)); 1558 1559 error = uvm_loanbreak_anon(anon, *ruobj); 1560 if (error != 0) { 1561 uvmfault_unlockall(ufi, amap, *ruobj); 1562 uvm_wait("flt_noram2"); 1563 return ERESTART; 1564 } 1565 /* if we were a loan receiver uobj is gone */ 1566 if (*ruobj) 1567 *ruobj = NULL; 1568 } 1569 } 1570 return error; 1571 } 1572 1573 /* 1574 * uvm_fault_upper_promote: promote upper page. 1575 * 1576 * 1. call uvmfault_promote. 1577 * 2. enqueue page. 1578 * 3. deref. 1579 * 4. pass page to uvm_fault_upper_enter. 1580 */ 1581 1582 static int 1583 uvm_fault_upper_promote( 1584 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1585 struct uvm_object *uobj, struct vm_anon *anon) 1586 { 1587 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 1588 struct vm_anon * const oanon = anon; 1589 struct vm_page *pg; 1590 int error; 1591 UVMHIST_FUNC("uvm_fault_upper_promote"); UVMHIST_CALLED(maphist); 1592 1593 UVMHIST_LOG(maphist, " case 1B: COW fault",0,0,0,0); 1594 cpu_count(CPU_COUNT_FLT_ACOW, 1); 1595 1596 /* promoting requires a write lock. */ 1597 error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL); 1598 if (error != 0) { 1599 return error; 1600 } 1601 KASSERT(rw_write_held(amap->am_lock)); 1602 1603 error = uvmfault_promote(ufi, oanon, PGO_DONTCARE, &anon, 1604 &flt->anon_spare); 1605 switch (error) { 1606 case 0: 1607 break; 1608 case ERESTART: 1609 return ERESTART; 1610 default: 1611 return error; 1612 } 1613 1614 KASSERT(anon->an_lock == oanon->an_lock); 1615 1616 /* uvm_fault_upper_done will activate or enqueue the page */ 1617 pg = anon->an_page; 1618 pg->flags &= ~(PG_BUSY|PG_FAKE); 1619 UVM_PAGE_OWN(pg, NULL); 1620 1621 /* deref: can not drop to zero here by defn! */ 1622 KASSERT(oanon->an_ref > 1); 1623 oanon->an_ref--; 1624 1625 /* 1626 * note: oanon is still locked, as is the new anon. we 1627 * need to check for this later when we unlock oanon; if 1628 * oanon != anon, we'll have to unlock anon, too. 1629 */ 1630 1631 return uvm_fault_upper_enter(ufi, flt, uobj, anon, pg, oanon); 1632 } 1633 1634 /* 1635 * uvm_fault_upper_direct: handle direct fault. 1636 */ 1637 1638 static int 1639 uvm_fault_upper_direct( 1640 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1641 struct uvm_object *uobj, struct vm_anon *anon) 1642 { 1643 struct vm_anon * const oanon = anon; 1644 struct vm_page *pg; 1645 UVMHIST_FUNC("uvm_fault_upper_direct"); UVMHIST_CALLED(maphist); 1646 1647 cpu_count(CPU_COUNT_FLT_ANON, 1); 1648 pg = anon->an_page; 1649 if (anon->an_ref > 1) /* disallow writes to ref > 1 anons */ 1650 flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE; 1651 1652 return uvm_fault_upper_enter(ufi, flt, uobj, anon, pg, oanon); 1653 } 1654 1655 /* 1656 * uvm_fault_upper_enter: enter h/w mapping of upper page. 1657 */ 1658 1659 static int 1660 uvm_fault_upper_enter( 1661 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1662 struct uvm_object *uobj, struct vm_anon *anon, struct vm_page *pg, 1663 struct vm_anon *oanon) 1664 { 1665 struct pmap *pmap = ufi->orig_map->pmap; 1666 vaddr_t va = ufi->orig_rvaddr; 1667 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 1668 UVMHIST_FUNC("uvm_fault_upper_enter"); UVMHIST_CALLED(maphist); 1669 1670 /* locked: maps(read), amap, oanon, anon(if different from oanon) */ 1671 KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1672 KASSERT(anon->an_lock == amap->am_lock); 1673 KASSERT(oanon->an_lock == amap->am_lock); 1674 KASSERT(uobj == NULL || 1675 rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); 1676 KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN); 1677 1678 /* 1679 * now map the page in. 1680 */ 1681 1682 UVMHIST_LOG(maphist, 1683 " MAPPING: anon: pm=%#jx, va=%#jx, pg=%#jx, promote=%jd", 1684 (uintptr_t)pmap, va, (uintptr_t)pg, flt->promote); 1685 if (pmap_enter(pmap, va, VM_PAGE_TO_PHYS(pg), 1686 flt->enter_prot, flt->access_type | PMAP_CANFAIL | 1687 (flt->wire_mapping ? PMAP_WIRED : 0)) != 0) { 1688 1689 /* 1690 * If pmap_enter() fails, it must not leave behind an existing 1691 * pmap entry. In particular, a now-stale entry for a different 1692 * page would leave the pmap inconsistent with the vm_map. 1693 * This is not to imply that pmap_enter() should remove an 1694 * existing mapping in such a situation (since that could create 1695 * different problems, eg. if the existing mapping is wired), 1696 * but rather that the pmap should be designed such that it 1697 * never needs to fail when the new mapping is replacing an 1698 * existing mapping and the new page has no existing mappings. 1699 */ 1700 1701 KASSERT(!pmap_extract(pmap, va, NULL)); 1702 1703 /* 1704 * ensure that the page is queued in the case that 1705 * we just promoted. 1706 */ 1707 1708 if (flt->upper_lock_type == RW_WRITER) { 1709 uvm_pagelock(pg); 1710 uvm_pageenqueue(pg); 1711 uvm_pageunlock(pg); 1712 } 1713 1714 /* 1715 * No need to undo what we did; we can simply think of 1716 * this as the pmap throwing away the mapping information. 1717 * 1718 * We do, however, have to go through the ReFault path, 1719 * as the map may change while we're asleep. 1720 */ 1721 1722 uvmfault_unlockall(ufi, amap, uobj); 1723 if (!uvm_reclaimable()) { 1724 UVMHIST_LOG(maphist, 1725 "<- failed. out of VM",0,0,0,0); 1726 /* XXX instrumentation */ 1727 return ENOMEM; 1728 } 1729 /* XXX instrumentation */ 1730 uvm_wait("flt_pmfail1"); 1731 return ERESTART; 1732 } 1733 1734 uvm_fault_upper_done(ufi, flt, anon, pg); 1735 1736 /* 1737 * done case 1! finish up by unlocking everything and returning success 1738 */ 1739 1740 pmap_update(pmap); 1741 uvmfault_unlockall(ufi, amap, uobj); 1742 return 0; 1743 } 1744 1745 /* 1746 * uvm_fault_upper_done: queue upper center page. 1747 */ 1748 1749 static void 1750 uvm_fault_upper_done( 1751 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1752 struct vm_anon *anon, struct vm_page *pg) 1753 { 1754 const bool wire_paging = flt->wire_paging; 1755 1756 UVMHIST_FUNC("uvm_fault_upper_done"); UVMHIST_CALLED(maphist); 1757 1758 /* 1759 * ... update the page queues. 1760 */ 1761 1762 uvm_pagelock(pg); 1763 if (wire_paging) { 1764 uvm_pagewire(pg); 1765 } else { 1766 uvm_pageactivate(pg); 1767 } 1768 uvm_pageunlock(pg); 1769 1770 if (wire_paging) { 1771 /* 1772 * since the now-wired page cannot be paged out, 1773 * release its swap resources for others to use. 1774 * and since an anon with no swap cannot be clean, 1775 * mark it dirty now. 1776 */ 1777 1778 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 1779 uvm_anon_dropswap(anon); 1780 } 1781 } 1782 1783 /* 1784 * uvm_fault_lower_upgrade: upgrade lower lock, reader -> writer 1785 */ 1786 1787 static inline int 1788 uvm_fault_lower_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1789 struct vm_amap *amap, struct uvm_object *uobj, struct vm_page *uobjpage) 1790 { 1791 1792 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 1793 1794 KASSERT(uobj != NULL); 1795 KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock)); 1796 1797 /* 1798 * fast path. 1799 */ 1800 1801 if (__predict_true(flt->lower_lock_type == RW_WRITER)) { 1802 KASSERT(uobjpage == NULL || (uobjpage->flags & PG_BUSY) != 0); 1803 return 0; 1804 } 1805 1806 /* 1807 * otherwise try for the upgrade. if we don't get it, unlock 1808 * everything, restart the fault and next time around get a writer 1809 * lock. 1810 */ 1811 1812 flt->lower_lock_type = RW_WRITER; 1813 if (__predict_false(!rw_tryupgrade(uobj->vmobjlock))) { 1814 uvmfault_unlockall(ufi, amap, uobj); 1815 cpu_count(CPU_COUNT_FLTNOUP, 1); 1816 UVMHIST_LOG(maphist, " !upgrade lower", 0, 0,0,0); 1817 return ERESTART; 1818 } 1819 cpu_count(CPU_COUNT_FLTUP, 1); 1820 KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock)); 1821 1822 /* 1823 * finally, if a page was supplied, assert that it's not busy 1824 * (can't be with a reader lock) and then mark it busy now that 1825 * we have a writer lock. 1826 */ 1827 1828 if (uobjpage != NULL) { 1829 KASSERT((uobjpage->flags & PG_BUSY) == 0); 1830 uobjpage->flags |= PG_BUSY; 1831 UVM_PAGE_OWN(uobjpage, "upgrdlwr"); 1832 } 1833 return 0; 1834 } 1835 1836 /* 1837 * uvm_fault_lower: handle lower fault. 1838 * 1839 * 1. check uobj 1840 * 1.1. if null, ZFOD. 1841 * 1.2. if not null, look up unnmapped neighbor pages. 1842 * 2. for center page, check if promote. 1843 * 2.1. ZFOD always needs promotion. 1844 * 2.2. other uobjs, when entry is marked COW (usually MAP_PRIVATE vnode). 1845 * 3. if uobj is not ZFOD and page is not found, do i/o. 1846 * 4. dispatch either direct / promote fault. 1847 */ 1848 1849 static int 1850 uvm_fault_lower( 1851 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1852 struct vm_page **pages) 1853 { 1854 struct vm_amap *amap __diagused = ufi->entry->aref.ar_amap; 1855 struct uvm_object *uobj = ufi->entry->object.uvm_obj; 1856 struct vm_page *uobjpage; 1857 int error; 1858 UVMHIST_FUNC("uvm_fault_lower"); UVMHIST_CALLED(maphist); 1859 1860 /* 1861 * now, if the desired page is not shadowed by the amap and we have 1862 * a backing object that does not have a special fault routine, then 1863 * we ask (with pgo_get) the object for resident pages that we care 1864 * about and attempt to map them in. we do not let pgo_get block 1865 * (PGO_LOCKED). 1866 */ 1867 1868 if (uobj == NULL) { 1869 /* zero fill; don't care neighbor pages */ 1870 uobjpage = NULL; 1871 } else { 1872 uvm_fault_lower_lookup(ufi, flt, pages); 1873 uobjpage = pages[flt->centeridx]; 1874 } 1875 1876 /* 1877 * note that at this point we are done with any front or back pages. 1878 * we are now going to focus on the center page (i.e. the one we've 1879 * faulted on). if we have faulted on the upper (anon) layer 1880 * [i.e. case 1], then the anon we want is anons[centeridx] (we have 1881 * not touched it yet). if we have faulted on the bottom (uobj) 1882 * layer [i.e. case 2] and the page was both present and available, 1883 * then we've got a pointer to it as "uobjpage" and we've already 1884 * made it BUSY. 1885 */ 1886 1887 /* 1888 * locked: 1889 * maps(read), amap(if there), uobj(if !null), uobjpage(if !null) 1890 */ 1891 KASSERT(amap == NULL || 1892 rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1893 if (flt->lower_lock_type == RW_WRITER) { 1894 KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); 1895 KASSERTMSG(uobjpage == NULL || 1896 (uobjpage->flags & PG_BUSY) != 0, 1897 "page %p should be busy", uobjpage); 1898 } else { 1899 KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock)); 1900 KASSERTMSG(uobjpage == NULL || 1901 (uobjpage->flags & PG_BUSY) == 0, 1902 "page %p should not be busy", uobjpage); 1903 } 1904 1905 /* 1906 * note that uobjpage can not be PGO_DONTCARE at this point. we now 1907 * set uobjpage to PGO_DONTCARE if we are doing a zero fill. if we 1908 * have a backing object, check and see if we are going to promote 1909 * the data up to an anon during the fault. 1910 */ 1911 1912 if (uobj == NULL) { 1913 uobjpage = PGO_DONTCARE; 1914 flt->promote = true; /* always need anon here */ 1915 } else { 1916 KASSERT(uobjpage != PGO_DONTCARE); 1917 flt->promote = flt->cow_now && UVM_ET_ISCOPYONWRITE(ufi->entry); 1918 } 1919 UVMHIST_LOG(maphist, " case 2 fault: promote=%jd, zfill=%jd", 1920 flt->promote, (uobj == NULL), 0,0); 1921 1922 /* 1923 * if uobjpage is not null then we do not need to do I/O to get the 1924 * uobjpage. 1925 * 1926 * if uobjpage is null, then we need to unlock and ask the pager to 1927 * get the data for us. once we have the data, we need to reverify 1928 * the state the world. we are currently not holding any resources. 1929 */ 1930 1931 if (uobjpage) { 1932 /* update rusage counters */ 1933 curlwp->l_ru.ru_minflt++; 1934 } else { 1935 error = uvm_fault_lower_io(ufi, flt, &uobj, &uobjpage); 1936 if (error != 0) 1937 return error; 1938 } 1939 1940 /* 1941 * locked: 1942 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj) 1943 */ 1944 KASSERT(amap == NULL || 1945 rw_lock_op(amap->am_lock) == flt->upper_lock_type); 1946 if (flt->lower_lock_type == RW_WRITER) { 1947 KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); 1948 KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0); 1949 } else { 1950 KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock)); 1951 KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0); 1952 } 1953 1954 /* 1955 * notes: 1956 * - at this point uobjpage can not be NULL 1957 * - at this point uobjpage can not be PG_RELEASED (since we checked 1958 * for it above) 1959 * - at this point uobjpage could be waited on (handle later) 1960 */ 1961 1962 KASSERT(uobjpage != NULL); 1963 KASSERT(uobj == NULL || uobj == uobjpage->uobject); 1964 KASSERT(uobj == NULL || !UVM_OBJ_IS_CLEAN(uobjpage->uobject) || 1965 uvm_pagegetdirty(uobjpage) == UVM_PAGE_STATUS_CLEAN); 1966 1967 if (!flt->promote) { 1968 error = uvm_fault_lower_direct(ufi, flt, uobj, uobjpage); 1969 } else { 1970 error = uvm_fault_lower_promote(ufi, flt, uobj, uobjpage); 1971 } 1972 return error; 1973 } 1974 1975 /* 1976 * uvm_fault_lower_lookup: look up on-memory uobj pages. 1977 * 1978 * 1. get on-memory pages. 1979 * 2. if failed, give up (get only center page later). 1980 * 3. if succeeded, enter h/w mapping of neighbor pages. 1981 */ 1982 1983 static void 1984 uvm_fault_lower_lookup( 1985 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1986 struct vm_page **pages) 1987 { 1988 struct uvm_object *uobj = ufi->entry->object.uvm_obj; 1989 int lcv, gotpages; 1990 vaddr_t currva; 1991 UVMHIST_FUNC("uvm_fault_lower_lookup"); UVMHIST_CALLED(maphist); 1992 1993 rw_enter(uobj->vmobjlock, flt->lower_lock_type); 1994 1995 /* 1996 * Locked: maps(read), amap(if there), uobj 1997 * 1998 * if we have a read lock on the object, do a PGO_NOBUSY get, which 1999 * will return us pages with PG_BUSY clear. if a write lock is held 2000 * pages will be returned with PG_BUSY set. 2001 */ 2002 2003 cpu_count(CPU_COUNT_FLTLGET, 1); 2004 gotpages = flt->npages; 2005 (void) uobj->pgops->pgo_get(uobj, 2006 ufi->entry->offset + flt->startva - ufi->entry->start, 2007 pages, &gotpages, flt->centeridx, 2008 flt->access_type & MASK(ufi->entry), ufi->entry->advice, 2009 PGO_LOCKED | (flt->lower_lock_type == RW_WRITER ? 0 : PGO_NOBUSY)); 2010 2011 KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); 2012 2013 /* 2014 * check for pages to map, if we got any 2015 */ 2016 2017 if (gotpages == 0) { 2018 pages[flt->centeridx] = NULL; 2019 return; 2020 } 2021 2022 currva = flt->startva; 2023 for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) { 2024 struct vm_page *curpg; 2025 2026 curpg = pages[lcv]; 2027 if (curpg == NULL || curpg == PGO_DONTCARE) { 2028 continue; 2029 } 2030 KASSERT(curpg->uobject == uobj); 2031 2032 if (flt->lower_lock_type == RW_WRITER) { 2033 KASSERT(rw_write_held(uobj->vmobjlock)); 2034 KASSERTMSG((curpg->flags & PG_BUSY) != 0, 2035 "page %p should be busy", curpg); 2036 } else { 2037 KASSERT(rw_read_held(uobj->vmobjlock)); 2038 KASSERTMSG((curpg->flags & PG_BUSY) == 0, 2039 "page %p should not be busy", curpg); 2040 } 2041 2042 /* 2043 * if center page is resident and not PG_BUSY|PG_RELEASED 2044 * and !PGO_NOBUSY, then pgo_get made it PG_BUSY for us and 2045 * gave us a handle to it. 2046 */ 2047 2048 if (lcv == flt->centeridx) { 2049 UVMHIST_LOG(maphist, " got uobjpage (%#jx) " 2050 "with locked get", (uintptr_t)curpg, 0, 0, 0); 2051 } else { 2052 uvm_fault_lower_neighbor(ufi, flt, currva, curpg); 2053 } 2054 } 2055 pmap_update(ufi->orig_map->pmap); 2056 } 2057 2058 /* 2059 * uvm_fault_lower_neighbor: enter h/w mapping of lower neighbor page. 2060 */ 2061 2062 static void 2063 uvm_fault_lower_neighbor( 2064 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 2065 vaddr_t currva, struct vm_page *pg) 2066 { 2067 const bool readonly = uvm_pagereadonly_p(pg) || pg->loan_count > 0; 2068 UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); 2069 2070 /* locked: maps(read), amap(if there), uobj */ 2071 2072 /* 2073 * calling pgo_get with PGO_LOCKED returns us pages which 2074 * are neither busy nor released, so we don't need to check 2075 * for this. we can just directly enter the pages. 2076 */ 2077 2078 /* 2079 * in the read-locked case, it's not possible for this to be a new 2080 * page. it must be cached with the object and enqueued already. 2081 * there wasn't a direct fault on the page, so avoid the cost of 2082 * re-enqueuing it. 2083 */ 2084 2085 if (flt->lower_lock_type == RW_WRITER) { 2086 uvm_pagelock(pg); 2087 uvm_pageenqueue(pg); 2088 uvm_pageunlock(pg); 2089 } 2090 UVMHIST_LOG(maphist, 2091 " MAPPING: n obj: pm=%#jx, va=%#jx, pg=%#jx", 2092 (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0); 2093 cpu_count(CPU_COUNT_FLTNOMAP, 1); 2094 2095 /* 2096 * Since this page isn't the page that's actually faulting, 2097 * ignore pmap_enter() failures; it's not critical that we 2098 * enter these right now. 2099 * NOTE: page can't be waited on or PG_RELEASED because we've 2100 * held the lock the whole time we've had the handle. 2101 */ 2102 KASSERT((pg->flags & PG_PAGEOUT) == 0); 2103 KASSERT((pg->flags & PG_RELEASED) == 0); 2104 KASSERT(!UVM_OBJ_IS_CLEAN(pg->uobject) || 2105 uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN); 2106 2107 /* 2108 * if a write lock was held on the object, the pages have been 2109 * busied. unbusy them now, as we are about to enter and then 2110 * forget about them. 2111 */ 2112 2113 if (flt->lower_lock_type == RW_WRITER) { 2114 KASSERT((pg->flags & PG_BUSY) != 0); 2115 pg->flags &= ~(PG_BUSY); 2116 UVM_PAGE_OWN(pg, NULL); 2117 } else { 2118 KASSERT((pg->flags & PG_BUSY) == 0); 2119 } 2120 KASSERT(rw_lock_op(pg->uobject->vmobjlock) == flt->lower_lock_type); 2121 2122 const vm_prot_t mapprot = 2123 readonly ? (flt->enter_prot & ~VM_PROT_WRITE) : 2124 flt->enter_prot & MASK(ufi->entry); 2125 const u_int mapflags = 2126 PMAP_CANFAIL | (flt->wire_mapping ? (mapprot | PMAP_WIRED) : 0); 2127 (void) pmap_enter(ufi->orig_map->pmap, currva, 2128 VM_PAGE_TO_PHYS(pg), mapprot, mapflags); 2129 } 2130 2131 /* 2132 * uvm_fault_lower_io: get lower page from backing store. 2133 * 2134 * 1. unlock everything, because i/o will block. 2135 * 2. call pgo_get. 2136 * 3. if failed, recover. 2137 * 4. if succeeded, relock everything and verify things. 2138 */ 2139 2140 static int 2141 uvm_fault_lower_io( 2142 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 2143 struct uvm_object **ruobj, struct vm_page **ruobjpage) 2144 { 2145 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 2146 struct uvm_object *uobj = *ruobj; 2147 struct vm_page *pg; 2148 bool locked; 2149 int gotpages; 2150 int error; 2151 voff_t uoff; 2152 vm_prot_t access_type; 2153 int advice; 2154 UVMHIST_FUNC("uvm_fault_lower_io"); UVMHIST_CALLED(maphist); 2155 2156 /* update rusage counters */ 2157 curlwp->l_ru.ru_majflt++; 2158 2159 /* grab everything we need from the entry before we unlock */ 2160 uoff = (ufi->orig_rvaddr - ufi->entry->start) + ufi->entry->offset; 2161 access_type = flt->access_type & MASK(ufi->entry); 2162 advice = ufi->entry->advice; 2163 2164 /* Locked: maps(read), amap(if there), uobj */ 2165 KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); 2166 2167 /* Upgrade to a write lock if needed. */ 2168 error = uvm_fault_lower_upgrade(ufi, flt, amap, uobj, NULL); 2169 if (error != 0) { 2170 return error; 2171 } 2172 uvmfault_unlockall(ufi, amap, NULL); 2173 2174 /* Locked: uobj(write) */ 2175 KASSERT(rw_write_held(uobj->vmobjlock)); 2176 2177 cpu_count(CPU_COUNT_FLTGET, 1); 2178 gotpages = 1; 2179 pg = NULL; 2180 error = uobj->pgops->pgo_get(uobj, uoff, &pg, &gotpages, 2181 0, access_type, advice, PGO_SYNCIO); 2182 /* locked: pg(if no error) */ 2183 2184 /* 2185 * recover from I/O 2186 */ 2187 2188 if (error) { 2189 if (error == EAGAIN) { 2190 UVMHIST_LOG(maphist, 2191 " pgo_get says TRY AGAIN!",0,0,0,0); 2192 kpause("fltagain2", false, hz/2, NULL); 2193 return ERESTART; 2194 } 2195 2196 #if 0 2197 KASSERT(error != ERESTART); 2198 #else 2199 /* XXXUEBS don't re-fault? */ 2200 if (error == ERESTART) 2201 error = EIO; 2202 #endif 2203 2204 UVMHIST_LOG(maphist, "<- pgo_get failed (code %jd)", 2205 error, 0,0,0); 2206 return error; 2207 } 2208 2209 /* 2210 * re-verify the state of the world by first trying to relock 2211 * the maps. always relock the object. 2212 */ 2213 2214 locked = uvmfault_relock(ufi); 2215 if (locked && amap) 2216 amap_lock(amap, flt->upper_lock_type); 2217 2218 /* might be changed */ 2219 uobj = pg->uobject; 2220 2221 rw_enter(uobj->vmobjlock, flt->lower_lock_type); 2222 KASSERT((pg->flags & PG_BUSY) != 0); 2223 KASSERT(flt->lower_lock_type == RW_WRITER); 2224 2225 uvm_pagelock(pg); 2226 uvm_pageactivate(pg); 2227 uvm_pageunlock(pg); 2228 2229 /* locked(locked): maps(read), amap(if !null), uobj, pg */ 2230 /* locked(!locked): uobj, pg */ 2231 2232 /* 2233 * verify that the page has not be released and re-verify 2234 * that amap slot is still free. if there is a problem, 2235 * we unlock and clean up. 2236 */ 2237 2238 if ((pg->flags & PG_RELEASED) != 0 || 2239 (locked && amap && amap_lookup(&ufi->entry->aref, 2240 ufi->orig_rvaddr - ufi->entry->start))) { 2241 if (locked) 2242 uvmfault_unlockall(ufi, amap, NULL); 2243 locked = false; 2244 } 2245 2246 /* 2247 * didn't get the lock? release the page and retry. 2248 */ 2249 2250 if (locked == false) { 2251 UVMHIST_LOG(maphist, 2252 " wasn't able to relock after fault: retry", 2253 0,0,0,0); 2254 if ((pg->flags & PG_RELEASED) == 0) { 2255 pg->flags &= ~PG_BUSY; 2256 uvm_pagelock(pg); 2257 uvm_pagewakeup(pg); 2258 uvm_pageunlock(pg); 2259 UVM_PAGE_OWN(pg, NULL); 2260 } else { 2261 cpu_count(CPU_COUNT_FLTPGRELE, 1); 2262 uvm_pagefree(pg); 2263 } 2264 rw_exit(uobj->vmobjlock); 2265 return ERESTART; 2266 } 2267 2268 /* 2269 * we have the data in pg which is busy and 2270 * not released. we are holding object lock (so the page 2271 * can't be released on us). 2272 */ 2273 2274 /* locked: maps(read), amap(if !null), uobj, pg */ 2275 2276 *ruobj = uobj; 2277 *ruobjpage = pg; 2278 return 0; 2279 } 2280 2281 /* 2282 * uvm_fault_lower_direct: fault lower center page 2283 * 2284 * 1. adjust flt->enter_prot. 2285 * 2. if page is loaned, resolve. 2286 */ 2287 2288 int 2289 uvm_fault_lower_direct( 2290 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 2291 struct uvm_object *uobj, struct vm_page *uobjpage) 2292 { 2293 struct vm_page *pg; 2294 UVMHIST_FUNC("uvm_fault_lower_direct"); UVMHIST_CALLED(maphist); 2295 2296 /* 2297 * we are not promoting. if the mapping is COW ensure that we 2298 * don't give more access than we should (e.g. when doing a read 2299 * fault on a COPYONWRITE mapping we want to map the COW page in 2300 * R/O even though the entry protection could be R/W). 2301 * 2302 * set "pg" to the page we want to map in (uobjpage, usually) 2303 */ 2304 2305 cpu_count(CPU_COUNT_FLT_OBJ, 1); 2306 if (UVM_ET_ISCOPYONWRITE(ufi->entry) || 2307 UVM_OBJ_NEEDS_WRITEFAULT(uobjpage->uobject)) 2308 flt->enter_prot &= ~VM_PROT_WRITE; 2309 pg = uobjpage; /* map in the actual object */ 2310 2311 KASSERT(uobjpage != PGO_DONTCARE); 2312 2313 /* 2314 * we are faulting directly on the page. be careful 2315 * about writing to loaned pages... 2316 */ 2317 2318 if (uobjpage->loan_count) { 2319 uvm_fault_lower_direct_loan(ufi, flt, uobj, &pg, &uobjpage); 2320 } 2321 KASSERT(pg == uobjpage); 2322 2323 if (flt->lower_lock_type == RW_READER) { 2324 KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0); 2325 } else { 2326 KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0); 2327 } 2328 return uvm_fault_lower_enter(ufi, flt, uobj, NULL, pg); 2329 } 2330 2331 /* 2332 * uvm_fault_lower_direct_loan: resolve loaned page. 2333 * 2334 * 1. if not cow'ing, adjust flt->enter_prot. 2335 * 2. if cow'ing, break loan. 2336 */ 2337 2338 static int 2339 uvm_fault_lower_direct_loan( 2340 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 2341 struct uvm_object *uobj, struct vm_page **rpg, 2342 struct vm_page **ruobjpage) 2343 { 2344 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 2345 struct vm_page *pg; 2346 struct vm_page *uobjpage = *ruobjpage; 2347 int error; 2348 UVMHIST_FUNC("uvm_fault_lower_direct_loan"); UVMHIST_CALLED(maphist); 2349 2350 if (!flt->cow_now) { 2351 /* read fault: cap the protection at readonly */ 2352 /* cap! */ 2353 flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE; 2354 } else { 2355 /* 2356 * write fault: must break the loan here. to do this 2357 * we need a write lock on the object. 2358 */ 2359 2360 error = uvm_fault_lower_upgrade(ufi, flt, amap, uobj, uobjpage); 2361 if (error != 0) { 2362 return error; 2363 } 2364 KASSERT(rw_write_held(uobj->vmobjlock)); 2365 2366 pg = uvm_loanbreak(uobjpage); 2367 if (pg == NULL) { 2368 2369 /* 2370 * drop ownership of page, it can't be released 2371 */ 2372 2373 uvm_pagelock(uobjpage); 2374 uvm_pagewakeup(uobjpage); 2375 uvm_pageunlock(uobjpage); 2376 uobjpage->flags &= ~PG_BUSY; 2377 UVM_PAGE_OWN(uobjpage, NULL); 2378 2379 uvmfault_unlockall(ufi, amap, uobj); 2380 UVMHIST_LOG(maphist, 2381 " out of RAM breaking loan, waiting", 2382 0,0,0,0); 2383 cpu_count(CPU_COUNT_FLTNORAM, 1); 2384 uvm_wait("flt_noram4"); 2385 return ERESTART; 2386 } 2387 *rpg = pg; 2388 *ruobjpage = pg; 2389 } 2390 return 0; 2391 } 2392 2393 /* 2394 * uvm_fault_lower_promote: promote lower page. 2395 * 2396 * 1. call uvmfault_promote. 2397 * 2. fill in data. 2398 * 3. if not ZFOD, dispose old page. 2399 */ 2400 2401 int 2402 uvm_fault_lower_promote( 2403 struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 2404 struct uvm_object *uobj, struct vm_page *uobjpage) 2405 { 2406 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 2407 struct vm_anon *anon; 2408 struct vm_page *pg; 2409 int error; 2410 UVMHIST_FUNC("uvm_fault_lower_promote"); UVMHIST_CALLED(maphist); 2411 2412 KASSERT(amap != NULL); 2413 2414 /* promoting requires a write lock. */ 2415 error = uvm_fault_upper_upgrade(ufi, flt, amap, uobj); 2416 if (error != 0) { 2417 return error; 2418 } 2419 KASSERT(rw_write_held(amap->am_lock)); 2420 2421 /* 2422 * If we are going to promote the data to an anon we 2423 * allocate a blank anon here and plug it into our amap. 2424 */ 2425 error = uvmfault_promote(ufi, NULL, uobjpage, &anon, &flt->anon_spare); 2426 switch (error) { 2427 case 0: 2428 break; 2429 case ERESTART: 2430 return ERESTART; 2431 default: 2432 return error; 2433 } 2434 2435 pg = anon->an_page; 2436 2437 /* 2438 * Fill in the data. 2439 */ 2440 if (flt->lower_lock_type == RW_READER) { 2441 KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0); 2442 } else { 2443 KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0); 2444 } 2445 2446 if (uobjpage != PGO_DONTCARE) { 2447 cpu_count(CPU_COUNT_FLT_PRCOPY, 1); 2448 2449 /* 2450 * promote to shared amap? make sure all sharing 2451 * procs see it 2452 */ 2453 2454 if ((amap_flags(amap) & AMAP_SHARED) != 0) { 2455 pmap_page_protect(uobjpage, VM_PROT_NONE); 2456 /* 2457 * XXX: PAGE MIGHT BE WIRED! 2458 */ 2459 } 2460 2461 /* 2462 * dispose of uobjpage. it can't be PG_RELEASED 2463 * since we still hold the object lock. 2464 */ 2465 2466 if ((uobjpage->flags & PG_BUSY) != 0) { 2467 uobjpage->flags &= ~PG_BUSY; 2468 uvm_pagelock(uobjpage); 2469 uvm_pagewakeup(uobjpage); 2470 uvm_pageunlock(uobjpage); 2471 UVM_PAGE_OWN(uobjpage, NULL); 2472 } 2473 2474 UVMHIST_LOG(maphist, 2475 " promote uobjpage %#jx to anon/page %#jx/%#jx", 2476 (uintptr_t)uobjpage, (uintptr_t)anon, (uintptr_t)pg, 0); 2477 2478 } else { 2479 cpu_count(CPU_COUNT_FLT_PRZERO, 1); 2480 2481 /* 2482 * Page is zero'd and marked dirty by 2483 * uvmfault_promote(). 2484 */ 2485 2486 UVMHIST_LOG(maphist," zero fill anon/page %#jx/%#jx", 2487 (uintptr_t)anon, (uintptr_t)pg, 0, 0); 2488 } 2489 2490 return uvm_fault_lower_enter(ufi, flt, uobj, anon, pg); 2491 } 2492 2493 /* 2494 * uvm_fault_lower_enter: enter h/w mapping of lower page or anon page promoted 2495 * from the lower page. 2496 */ 2497 2498 int 2499 uvm_fault_lower_enter( 2500 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 2501 struct uvm_object *uobj, 2502 struct vm_anon *anon, struct vm_page *pg) 2503 { 2504 struct vm_amap * const amap = ufi->entry->aref.ar_amap; 2505 const bool readonly = uvm_pagereadonly_p(pg); 2506 int error; 2507 UVMHIST_FUNC("uvm_fault_lower_enter"); UVMHIST_CALLED(maphist); 2508 2509 /* 2510 * Locked: 2511 * 2512 * maps(read), amap(if !null), uobj(if !null), 2513 * anon(if !null), pg(if anon), unlock_uobj(if !null) 2514 * 2515 * anon must be write locked (promotion). uobj can be either. 2516 * 2517 * Note: pg is either the uobjpage or the new page in the new anon. 2518 */ 2519 KASSERT(amap == NULL || 2520 rw_lock_op(amap->am_lock) == flt->upper_lock_type); 2521 KASSERT(anon == NULL || anon->an_lock == amap->am_lock); 2522 if (flt->lower_lock_type == RW_WRITER) { 2523 KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); 2524 KASSERTMSG((pg->flags & PG_BUSY) != 0, 2525 "page %p should be busy", pg); 2526 } else { 2527 KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock)); 2528 KASSERTMSG(anon != NULL || (pg->flags & PG_BUSY) == 0, 2529 "page %p should not be busy", pg); 2530 } 2531 2532 /* 2533 * all resources are present. we can now map it in and free our 2534 * resources. 2535 */ 2536 2537 UVMHIST_LOG(maphist, 2538 " MAPPING: case2: pm=%#jx, va=%#jx, pg=%#jx, promote=%jd", 2539 (uintptr_t)ufi->orig_map->pmap, ufi->orig_rvaddr, 2540 (uintptr_t)pg, flt->promote); 2541 KASSERTMSG((flt->access_type & VM_PROT_WRITE) == 0 || !readonly, 2542 "promote=%u cow_now=%u access_type=%x enter_prot=%x cow=%u " 2543 "entry=%p map=%p orig_rvaddr=%p pg=%p", 2544 flt->promote, flt->cow_now, flt->access_type, flt->enter_prot, 2545 UVM_ET_ISCOPYONWRITE(ufi->entry), ufi->entry, ufi->orig_map, 2546 (void *)ufi->orig_rvaddr, pg); 2547 KASSERT((flt->access_type & VM_PROT_WRITE) == 0 || !readonly); 2548 if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr, 2549 VM_PAGE_TO_PHYS(pg), 2550 readonly ? flt->enter_prot & ~VM_PROT_WRITE : flt->enter_prot, 2551 flt->access_type | PMAP_CANFAIL | 2552 (flt->wire_mapping ? PMAP_WIRED : 0)) != 0) { 2553 2554 /* 2555 * No need to undo what we did; we can simply think of 2556 * this as the pmap throwing away the mapping information. 2557 * 2558 * We do, however, have to go through the ReFault path, 2559 * as the map may change while we're asleep. 2560 */ 2561 2562 /* 2563 * ensure that the page is queued in the case that 2564 * we just promoted the page. 2565 */ 2566 2567 if (anon != NULL || flt->lower_lock_type == RW_WRITER) { 2568 uvm_pagelock(pg); 2569 uvm_pageenqueue(pg); 2570 uvm_pagewakeup(pg); 2571 uvm_pageunlock(pg); 2572 } else { 2573 KASSERT((pg->flags & PG_BUSY) == 0); 2574 } 2575 2576 /* 2577 * note that pg can't be PG_RELEASED since we did not drop 2578 * the object lock since the last time we checked. 2579 */ 2580 KASSERT((pg->flags & PG_RELEASED) == 0); 2581 if ((pg->flags & PG_BUSY) != 0) { 2582 pg->flags &= ~(PG_BUSY|PG_FAKE); 2583 UVM_PAGE_OWN(pg, NULL); 2584 } 2585 2586 uvmfault_unlockall(ufi, amap, uobj); 2587 if (!uvm_reclaimable()) { 2588 UVMHIST_LOG(maphist, 2589 "<- failed. out of VM",0,0,0,0); 2590 /* XXX instrumentation */ 2591 error = ENOMEM; 2592 return error; 2593 } 2594 /* XXX instrumentation */ 2595 uvm_wait("flt_pmfail2"); 2596 return ERESTART; 2597 } 2598 2599 uvm_fault_lower_done(ufi, flt, uobj, pg); 2600 2601 /* 2602 * note that pg can't be PG_RELEASED since we did not drop the object 2603 * lock since the last time we checked. 2604 */ 2605 KASSERT((pg->flags & PG_RELEASED) == 0); 2606 if ((pg->flags & PG_BUSY) != 0) { 2607 uvm_pagelock(pg); 2608 uvm_pagewakeup(pg); 2609 uvm_pageunlock(pg); 2610 pg->flags &= ~(PG_BUSY|PG_FAKE); 2611 UVM_PAGE_OWN(pg, NULL); 2612 } 2613 2614 pmap_update(ufi->orig_map->pmap); 2615 uvmfault_unlockall(ufi, amap, uobj); 2616 2617 UVMHIST_LOG(maphist, "<- done (SUCCESS!)",0,0,0,0); 2618 return 0; 2619 } 2620 2621 /* 2622 * uvm_fault_lower_done: queue lower center page. 2623 */ 2624 2625 void 2626 uvm_fault_lower_done( 2627 struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 2628 struct uvm_object *uobj, struct vm_page *pg) 2629 { 2630 bool dropswap = false; 2631 2632 UVMHIST_FUNC("uvm_fault_lower_done"); UVMHIST_CALLED(maphist); 2633 2634 uvm_pagelock(pg); 2635 if (flt->wire_paging) { 2636 uvm_pagewire(pg); 2637 if (pg->flags & PG_AOBJ) { 2638 2639 /* 2640 * since the now-wired page cannot be paged out, 2641 * release its swap resources for others to use. 2642 * since an aobj page with no swap cannot be clean, 2643 * mark it dirty now. 2644 */ 2645 2646 KASSERT(uobj != NULL); 2647 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 2648 dropswap = true; 2649 } 2650 } else { 2651 uvm_pageactivate(pg); 2652 } 2653 uvm_pageunlock(pg); 2654 2655 if (dropswap) { 2656 uao_dropswap(uobj, pg->offset >> PAGE_SHIFT); 2657 } 2658 } 2659 2660 2661 /* 2662 * uvm_fault_wire: wire down a range of virtual addresses in a map. 2663 * 2664 * => map may be read-locked by caller, but MUST NOT be write-locked. 2665 * => if map is read-locked, any operations which may cause map to 2666 * be write-locked in uvm_fault() must be taken care of by 2667 * the caller. See uvm_map_pageable(). 2668 */ 2669 2670 int 2671 uvm_fault_wire(struct vm_map *map, vaddr_t start, vaddr_t end, 2672 vm_prot_t access_type, int maxprot) 2673 { 2674 vaddr_t va; 2675 int error; 2676 2677 /* 2678 * now fault it in a page at a time. if the fault fails then we have 2679 * to undo what we have done. note that in uvm_fault VM_PROT_NONE 2680 * is replaced with the max protection if fault_type is VM_FAULT_WIRE. 2681 */ 2682 2683 /* 2684 * XXX work around overflowing a vaddr_t. this prevents us from 2685 * wiring the last page in the address space, though. 2686 */ 2687 if (start > end) { 2688 return EFAULT; 2689 } 2690 2691 for (va = start; va < end; va += PAGE_SIZE) { 2692 error = uvm_fault_internal(map, va, access_type, 2693 (maxprot ? UVM_FAULT_MAXPROT : 0) | UVM_FAULT_WIRE); 2694 if (error) { 2695 if (va != start) { 2696 uvm_fault_unwire(map, start, va); 2697 } 2698 return error; 2699 } 2700 } 2701 return 0; 2702 } 2703 2704 /* 2705 * uvm_fault_unwire(): unwire range of virtual space. 2706 */ 2707 2708 void 2709 uvm_fault_unwire(struct vm_map *map, vaddr_t start, vaddr_t end) 2710 { 2711 vm_map_lock_read(map); 2712 uvm_fault_unwire_locked(map, start, end); 2713 vm_map_unlock_read(map); 2714 } 2715 2716 /* 2717 * uvm_fault_unwire_locked(): the guts of uvm_fault_unwire(). 2718 * 2719 * => map must be at least read-locked. 2720 */ 2721 2722 void 2723 uvm_fault_unwire_locked(struct vm_map *map, vaddr_t start, vaddr_t end) 2724 { 2725 struct vm_map_entry *entry, *oentry; 2726 pmap_t pmap = vm_map_pmap(map); 2727 vaddr_t va; 2728 paddr_t pa; 2729 struct vm_page *pg; 2730 2731 /* 2732 * we assume that the area we are unwiring has actually been wired 2733 * in the first place. this means that we should be able to extract 2734 * the PAs from the pmap. we also lock out the page daemon so that 2735 * we can call uvm_pageunwire. 2736 */ 2737 2738 /* 2739 * find the beginning map entry for the region. 2740 */ 2741 2742 KASSERT(start >= vm_map_min(map) && end <= vm_map_max(map)); 2743 if (uvm_map_lookup_entry(map, start, &entry) == false) 2744 panic("uvm_fault_unwire_locked: address not in map"); 2745 2746 oentry = NULL; 2747 for (va = start; va < end; va += PAGE_SIZE) { 2748 2749 /* 2750 * find the map entry for the current address. 2751 */ 2752 2753 KASSERT(va >= entry->start); 2754 while (va >= entry->end) { 2755 KASSERT(entry->next != &map->header && 2756 entry->next->start <= entry->end); 2757 entry = entry->next; 2758 } 2759 2760 /* 2761 * lock it. 2762 */ 2763 2764 if (entry != oentry) { 2765 if (oentry != NULL) { 2766 uvm_map_unlock_entry(oentry); 2767 } 2768 uvm_map_lock_entry(entry, RW_WRITER); 2769 oentry = entry; 2770 } 2771 2772 /* 2773 * if the entry is no longer wired, tell the pmap. 2774 */ 2775 2776 if (!pmap_extract(pmap, va, &pa)) 2777 continue; 2778 2779 if (VM_MAPENT_ISWIRED(entry) == 0) 2780 pmap_unwire(pmap, va); 2781 2782 pg = PHYS_TO_VM_PAGE(pa); 2783 if (pg) { 2784 uvm_pagelock(pg); 2785 uvm_pageunwire(pg); 2786 uvm_pageunlock(pg); 2787 } 2788 } 2789 2790 if (oentry != NULL) { 2791 uvm_map_unlock_entry(entry); 2792 } 2793 } 2794