1*4016c7deSmpi /* $OpenBSD: uvm_fault.c,v 1.162 2025/01/22 10:52:09 mpi Exp $ */ 22c932f6fSmiod /* $NetBSD: uvm_fault.c,v 1.51 2000/08/06 00:22:53 thorpej Exp $ */ 3cd7ee8acSart 4cd7ee8acSart /* 5cd7ee8acSart * Copyright (c) 1997 Charles D. Cranor and Washington University. 6cd7ee8acSart * All rights reserved. 7cd7ee8acSart * 8cd7ee8acSart * Redistribution and use in source and binary forms, with or without 9cd7ee8acSart * modification, are permitted provided that the following conditions 10cd7ee8acSart * are met: 11cd7ee8acSart * 1. Redistributions of source code must retain the above copyright 12cd7ee8acSart * notice, this list of conditions and the following disclaimer. 13cd7ee8acSart * 2. Redistributions in binary form must reproduce the above copyright 14cd7ee8acSart * notice, this list of conditions and the following disclaimer in the 15cd7ee8acSart * documentation and/or other materials provided with the distribution. 16cd7ee8acSart * 17cd7ee8acSart * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18cd7ee8acSart * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19cd7ee8acSart * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20cd7ee8acSart * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21cd7ee8acSart * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22cd7ee8acSart * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23cd7ee8acSart * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24cd7ee8acSart * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25cd7ee8acSart * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26cd7ee8acSart * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27cd7ee8acSart * 28cd7ee8acSart * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp 29cd7ee8acSart */ 30cd7ee8acSart 31cd7ee8acSart /* 32cd7ee8acSart * uvm_fault.c: fault handler 33cd7ee8acSart */ 34cd7ee8acSart 35cd7ee8acSart #include <sys/param.h> 36cd7ee8acSart #include <sys/systm.h> 37cd7ee8acSart #include <sys/kernel.h> 38627a59d1Smpi #include <sys/percpu.h> 39cd7ee8acSart #include <sys/proc.h> 40cd7ee8acSart #include <sys/malloc.h> 41cd7ee8acSart #include <sys/mman.h> 42c28df561Smpi #include <sys/tracepoint.h> 43cd7ee8acSart 44cd7ee8acSart #include <uvm/uvm.h> 45cd7ee8acSart 46cd7ee8acSart /* 47cd7ee8acSart * 48cd7ee8acSart * a word on page faults: 49cd7ee8acSart * 50cd7ee8acSart * types of page faults we handle: 51cd7ee8acSart * 52cd7ee8acSart * CASE 1: upper layer faults CASE 2: lower layer faults 53cd7ee8acSart * 54cd7ee8acSart * CASE 1A CASE 1B CASE 2A CASE 2B 55cd7ee8acSart * read/write1 write>1 read/write +-cow_write/zero 56cd7ee8acSart * | | | | 57cd7ee8acSart * +--|--+ +--|--+ +-----+ + | + | +-----+ 582ed91a58Smpi * amap | V | | ---------> new | | | | ^ | 59cd7ee8acSart * +-----+ +-----+ +-----+ + | + | +--|--+ 60cd7ee8acSart * | | | 61cd7ee8acSart * +-----+ +-----+ +--|--+ | +--|--+ 622ed91a58Smpi * uobj | d/c | | d/c | | V | +----+ | 63cd7ee8acSart * +-----+ +-----+ +-----+ +-----+ 64cd7ee8acSart * 65cd7ee8acSart * d/c = don't care 66cd7ee8acSart * 67cd7ee8acSart * case [0]: layerless fault 68cd7ee8acSart * no amap or uobj is present. this is an error. 69cd7ee8acSart * 70cd7ee8acSart * case [1]: upper layer fault [anon active] 71cd7ee8acSart * 1A: [read] or [write with anon->an_ref == 1] 722ed91a58Smpi * I/O takes place in upper level anon and uobj is not touched. 73cd7ee8acSart * 1B: [write with anon->an_ref > 1] 74cd7ee8acSart * new anon is alloc'd and data is copied off ["COW"] 75cd7ee8acSart * 76cd7ee8acSart * case [2]: lower layer fault [uobj] 77cd7ee8acSart * 2A: [read on non-NULL uobj] or [write to non-copy_on_write area] 78cd7ee8acSart * I/O takes place directly in object. 79cd7ee8acSart * 2B: [write to copy_on_write] or [read on NULL uobj] 80cd7ee8acSart * data is "promoted" from uobj to a new anon. 81cd7ee8acSart * if uobj is null, then we zero fill. 82cd7ee8acSart * 83cd7ee8acSart * we follow the standard UVM locking protocol ordering: 84cd7ee8acSart * 85cd7ee8acSart * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ) 86cd7ee8acSart * we hold a PG_BUSY page if we unlock for I/O 87cd7ee8acSart * 88cd7ee8acSart * 89cd7ee8acSart * the code is structured as follows: 90cd7ee8acSart * 91cd7ee8acSart * - init the "IN" params in the ufi structure 922ed91a58Smpi * ReFault: (ERESTART returned to the loop in uvm_fault) 93cd7ee8acSart * - do lookups [locks maps], check protection, handle needs_copy 94cd7ee8acSart * - check for case 0 fault (error) 95cd7ee8acSart * - establish "range" of fault 96cd7ee8acSart * - if we have an amap lock it and extract the anons 97cd7ee8acSart * - if sequential advice deactivate pages behind us 98cd7ee8acSart * - at the same time check pmap for unmapped areas and anon for pages 99cd7ee8acSart * that we could map in (and do map it if found) 100cd7ee8acSart * - check object for resident pages that we could map in 101cd7ee8acSart * - if (case 2) goto Case2 102cd7ee8acSart * - >>> handle case 1 103cd7ee8acSart * - ensure source anon is resident in RAM 104cd7ee8acSart * - if case 1B alloc new anon and copy from source 105cd7ee8acSart * - map the correct page in 106cd7ee8acSart * Case2: 107cd7ee8acSart * - >>> handle case 2 108cd7ee8acSart * - ensure source page is resident (if uobj) 109cd7ee8acSart * - if case 2B alloc new anon and copy from source (could be zero 110cd7ee8acSart * fill if uobj == NULL) 111cd7ee8acSart * - map the correct page in 112cd7ee8acSart * - done! 113cd7ee8acSart * 114cd7ee8acSart * note on paging: 115cd7ee8acSart * if we have to do I/O we place a PG_BUSY page in the correct object, 116cd7ee8acSart * unlock everything, and do the I/O. when I/O is done we must reverify 117cd7ee8acSart * the state of the world before assuming that our data structures are 118cd7ee8acSart * valid. [because mappings could change while the map is unlocked] 119cd7ee8acSart * 120cd7ee8acSart * alternative 1: unbusy the page in question and restart the page fault 121cd7ee8acSart * from the top (ReFault). this is easy but does not take advantage 122cd7ee8acSart * of the information that we already have from our previous lookup, 123cd7ee8acSart * although it is possible that the "hints" in the vm_map will help here. 124cd7ee8acSart * 125cd7ee8acSart * alternative 2: the system already keeps track of a "version" number of 126cd7ee8acSart * a map. [i.e. every time you write-lock a map (e.g. to change a 127cd7ee8acSart * mapping) you bump the version number up by one...] so, we can save 128cd7ee8acSart * the version number of the map before we release the lock and start I/O. 129cd7ee8acSart * then when I/O is done we can relock and check the version numbers 130cd7ee8acSart * to see if anything changed. this might save us some over 1 because 131cd7ee8acSart * we don't have to unbusy the page and may be less compares(?). 132cd7ee8acSart * 133cd7ee8acSart * alternative 3: put in backpointers or a way to "hold" part of a map 134cd7ee8acSart * in place while I/O is in progress. this could be complex to 135cd7ee8acSart * implement (especially with structures like amap that can be referenced 136cd7ee8acSart * by multiple map entries, and figuring out what should wait could be 137cd7ee8acSart * complex as well...). 138cd7ee8acSart * 1392ed91a58Smpi * we use alternative 2. given that we are multi-threaded now we may want 1402ed91a58Smpi * to reconsider the choice. 141cd7ee8acSart */ 142cd7ee8acSart 143cd7ee8acSart /* 144cd7ee8acSart * local data structures 145cd7ee8acSart */ 146cd7ee8acSart struct uvm_advice { 147cd7ee8acSart int nback; 148cd7ee8acSart int nforw; 149cd7ee8acSart }; 150cd7ee8acSart 151cd7ee8acSart /* 1521c7ad6bdSmiod * page range array: set up in uvmfault_init(). 153cd7ee8acSart */ 154e087cc70Sguenther static struct uvm_advice uvmadvice[MADV_MASK + 1]; 155cd7ee8acSart 156cd7ee8acSart #define UVM_MAXRANGE 16 /* must be max() of nback+nforw+1 */ 157cd7ee8acSart 158cd7ee8acSart /* 159cd7ee8acSart * private prototypes 160cd7ee8acSart */ 1611e3e475dSoga static void uvmfault_amapcopy(struct uvm_faultinfo *); 16256b7e380Smpi static inline void uvmfault_anonflush(struct vm_anon **, int); 163ca5c6958Soga void uvmfault_unlockmaps(struct uvm_faultinfo *, boolean_t); 1640372dd1aSariane void uvmfault_update_stats(struct uvm_faultinfo *); 165cd7ee8acSart 166cd7ee8acSart /* 167cd7ee8acSart * inline functions 168cd7ee8acSart */ 169cd7ee8acSart /* 170cd7ee8acSart * uvmfault_anonflush: try and deactivate pages in specified anons 171cd7ee8acSart * 172cd7ee8acSart * => does not have to deactivate page if it is busy 173cd7ee8acSart */ 17456b7e380Smpi static inline void 1752023d591Soga uvmfault_anonflush(struct vm_anon **anons, int n) 176cd7ee8acSart { 177cd7ee8acSart int lcv; 178cd7ee8acSart struct vm_page *pg; 179cd7ee8acSart 180cd7ee8acSart for (lcv = 0; lcv < n; lcv++) { 181cd7ee8acSart if (anons[lcv] == NULL) 182cd7ee8acSart continue; 18319dcab73Smpi KASSERT(rw_lock_held(anons[lcv]->an_lock)); 1848d0b5bafSpedro pg = anons[lcv]->an_page; 1856f909936Svisa if (pg && (pg->pg_flags & PG_BUSY) == 0) { 186cd7ee8acSart uvm_lock_pageq(); 187cd7ee8acSart if (pg->wire_count == 0) { 188cd7ee8acSart uvm_pagedeactivate(pg); 189cd7ee8acSart } 190cd7ee8acSart uvm_unlock_pageq(); 191cd7ee8acSart } 192cd7ee8acSart } 193cd7ee8acSart } 194cd7ee8acSart 195cd7ee8acSart /* 196cd7ee8acSart * normal functions 197cd7ee8acSart */ 198cd7ee8acSart /* 1991c7ad6bdSmiod * uvmfault_init: compute proper values for the uvmadvice[] array. 2001c7ad6bdSmiod */ 2011c7ad6bdSmiod void 202c799dc6dSnaddy uvmfault_init(void) 2031c7ad6bdSmiod { 2041c7ad6bdSmiod int npages; 2051c7ad6bdSmiod 2061c7ad6bdSmiod npages = atop(16384); 2071c7ad6bdSmiod if (npages > 0) { 2081c7ad6bdSmiod KASSERT(npages <= UVM_MAXRANGE / 2); 20915cd8707Sguenther uvmadvice[MADV_NORMAL].nforw = npages; 21015cd8707Sguenther uvmadvice[MADV_NORMAL].nback = npages - 1; 2111c7ad6bdSmiod } 2121c7ad6bdSmiod 2131c7ad6bdSmiod npages = atop(32768); 2141c7ad6bdSmiod if (npages > 0) { 2151c7ad6bdSmiod KASSERT(npages <= UVM_MAXRANGE / 2); 21615cd8707Sguenther uvmadvice[MADV_SEQUENTIAL].nforw = npages - 1; 21715cd8707Sguenther uvmadvice[MADV_SEQUENTIAL].nback = npages; 2181c7ad6bdSmiod } 2191c7ad6bdSmiod } 2201c7ad6bdSmiod 2211c7ad6bdSmiod /* 222cd7ee8acSart * uvmfault_amapcopy: clear "needs_copy" in a map. 223cd7ee8acSart * 2242ed91a58Smpi * => called with VM data structures unlocked (usually, see below) 2252ed91a58Smpi * => we get a write lock on the maps and clear needs_copy for a VA 226cd7ee8acSart * => if we are out of RAM we sleep (waiting for more) 227cd7ee8acSart */ 2281e3e475dSoga static void 2292023d591Soga uvmfault_amapcopy(struct uvm_faultinfo *ufi) 230cd7ee8acSart { 2312ed91a58Smpi for (;;) { 23252887a38Smpi /* 23352887a38Smpi * no mapping? give up. 23452887a38Smpi */ 235cd7ee8acSart if (uvmfault_lookup(ufi, TRUE) == FALSE) 236cd7ee8acSart return; 237cd7ee8acSart 23852887a38Smpi /* 23952887a38Smpi * copy if needed. 24052887a38Smpi */ 241cd7ee8acSart if (UVM_ET_ISNEEDSCOPY(ufi->entry)) 242003f5e42Sderaadt amap_copy(ufi->map, ufi->entry, M_NOWAIT, 243003f5e42Sderaadt UVM_ET_ISSTACK(ufi->entry) ? FALSE : TRUE, 244cd7ee8acSart ufi->orig_rvaddr, ufi->orig_rvaddr + 1); 245cd7ee8acSart 24652887a38Smpi /* 24752887a38Smpi * didn't work? must be out of RAM. unlock and sleep. 24852887a38Smpi */ 249cd7ee8acSart if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { 250cd7ee8acSart uvmfault_unlockmaps(ufi, TRUE); 251cd7ee8acSart uvm_wait("fltamapcopy"); 252cd7ee8acSart continue; 253cd7ee8acSart } 254cd7ee8acSart 25552887a38Smpi /* 25652887a38Smpi * got it! unlock and return. 25752887a38Smpi */ 258cd7ee8acSart uvmfault_unlockmaps(ufi, TRUE); 259cd7ee8acSart return; 260cd7ee8acSart } 261cd7ee8acSart /*NOTREACHED*/ 262cd7ee8acSart } 263cd7ee8acSart 264cd7ee8acSart /* 265cd7ee8acSart * uvmfault_anonget: get data in an anon into a non-busy, non-released 266cd7ee8acSart * page in that anon. 267cd7ee8acSart * 2682ed91a58Smpi * => Map, amap and thus anon should be locked by caller. 2692ed91a58Smpi * => If we fail, we unlock everything and error is returned. 2702ed91a58Smpi * => If we are successful, return with everything still locked. 2712ed91a58Smpi * => We do not move the page on the queues [gets moved later]. If we 2722ed91a58Smpi * allocate a new page [we_own], it gets put on the queues. Either way, 2732ed91a58Smpi * the result is that the page is on the queues at return time 274cd7ee8acSart */ 27528fbabcfSart int 2762023d591Soga uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap, 2772023d591Soga struct vm_anon *anon) 278cd7ee8acSart { 279cd7ee8acSart struct vm_page *pg; 2802ed91a58Smpi int error; 281cd7ee8acSart 28219dcab73Smpi KASSERT(rw_lock_held(anon->an_lock)); 28319dcab73Smpi KASSERT(anon->an_lock == amap->am_lock); 28419dcab73Smpi 2852ed91a58Smpi /* Increment the counters.*/ 286627a59d1Smpi counters_inc(uvmexp_counters, flt_anget); 2872ed91a58Smpi if (anon->an_page) { 2888f15e6a4Sguenther curproc->p_ru.ru_minflt++; 2892ed91a58Smpi } else { 2908f15e6a4Sguenther curproc->p_ru.ru_majflt++; 2912ed91a58Smpi } 2922ed91a58Smpi error = 0; 293cd7ee8acSart 2942ed91a58Smpi /* 2952ed91a58Smpi * Loop until we get the anon data, or fail. 2962ed91a58Smpi */ 2972ed91a58Smpi for (;;) { 2982ed91a58Smpi boolean_t we_own, locked; 2992ed91a58Smpi /* 3002ed91a58Smpi * Note: 'we_own' will become true if we set PG_BUSY on a page. 3012ed91a58Smpi */ 3022ed91a58Smpi we_own = FALSE; 3038d0b5bafSpedro pg = anon->an_page; 304cd7ee8acSart 3052ed91a58Smpi /* 3062ed91a58Smpi * Is page resident? Make sure it is not busy/released. 3072ed91a58Smpi */ 308cd7ee8acSart if (pg) { 3096f909936Svisa KASSERT(pg->pg_flags & PQ_ANON); 3106f909936Svisa KASSERT(pg->uanon == anon); 3116f909936Svisa 312cd7ee8acSart /* 3136f909936Svisa * if the page is busy, we drop all the locks and 3146f909936Svisa * try again. 315cd7ee8acSart */ 3166f909936Svisa if ((pg->pg_flags & (PG_BUSY|PG_RELEASED)) == 0) 31734e43087Smpi return 0; 318627a59d1Smpi counters_inc(uvmexp_counters, flt_pgwait); 319cd7ee8acSart 320cd7ee8acSart /* 3212ed91a58Smpi * The last unlock must be an atomic unlock and wait 3222ed91a58Smpi * on the owner of page. 323cd7ee8acSart */ 324b83f5574Smpi KASSERT(pg->uobject == NULL); 32519dcab73Smpi uvmfault_unlockall(ufi, NULL, NULL); 326b83f5574Smpi uvm_pagewait(pg, anon->an_lock, "anonget"); 327cd7ee8acSart } else { 3282ed91a58Smpi /* 3292ed91a58Smpi * No page, therefore allocate one. 3302ed91a58Smpi */ 3318a42ed70Sart pg = uvm_pagealloc(NULL, 0, anon, 0); 3322ed91a58Smpi if (pg == NULL) { 3332ed91a58Smpi /* Out of memory. Wait a little. */ 334ec3489eeSmpi uvmfault_unlockall(ufi, amap, NULL); 335627a59d1Smpi counters_inc(uvmexp_counters, flt_noram); 336cd7ee8acSart uvm_wait("flt_noram1"); 337cd7ee8acSart } else { 3382ed91a58Smpi /* PG_BUSY bit is set. */ 339cd7ee8acSart we_own = TRUE; 340ec3489eeSmpi uvmfault_unlockall(ufi, amap, NULL); 341cd7ee8acSart 342cd7ee8acSart /* 34352887a38Smpi * Pass a PG_BUSY+PG_FAKE+PG_CLEAN page into 34452887a38Smpi * the uvm_swap_get() function with all data 34552887a38Smpi * structures unlocked. Note that it is OK 34652887a38Smpi * to read an_swslot here, because we hold 34752887a38Smpi * PG_BUSY on the page. 348cd7ee8acSart */ 349627a59d1Smpi counters_inc(uvmexp_counters, pageins); 3502ed91a58Smpi error = uvm_swap_get(pg, anon->an_swslot, 351cd7ee8acSart PGO_SYNCIO); 352cd7ee8acSart 353cd7ee8acSart /* 3542ed91a58Smpi * We clean up after the I/O below in the 3552ed91a58Smpi * 'we_own' case. 356cd7ee8acSart */ 357cd7ee8acSart } 358cd7ee8acSart } 359cd7ee8acSart 3602ed91a58Smpi /* 3612ed91a58Smpi * Re-lock the map and anon. 3622ed91a58Smpi */ 363cd7ee8acSart locked = uvmfault_relock(ufi); 36419dcab73Smpi if (locked || we_own) { 36519dcab73Smpi rw_enter(anon->an_lock, RW_WRITE); 36619dcab73Smpi } 367cd7ee8acSart 368cd7ee8acSart /* 3692ed91a58Smpi * If we own the page (i.e. we set PG_BUSY), then we need 3702ed91a58Smpi * to clean up after the I/O. There are three cases to 371cd7ee8acSart * consider: 3722ed91a58Smpi * 3732ed91a58Smpi * 1) Page was released during I/O: free anon and ReFault. 3742ed91a58Smpi * 2) I/O not OK. Free the page and cause the fault to fail. 3752ed91a58Smpi * 3) I/O OK! Activate the page and sync with the non-we_own 3762ed91a58Smpi * case (i.e. drop anon lock if not locked). 377cd7ee8acSart */ 378cd7ee8acSart if (we_own) { 3799662fca4Sart if (pg->pg_flags & PG_WANTED) { 3801e3afca1Ssmart wakeup(pg); 381cd7ee8acSart } 382cd7ee8acSart 383cd7ee8acSart /* 384cd7ee8acSart * if we were RELEASED during I/O, then our anon is 385cd7ee8acSart * no longer part of an amap. we need to free the 386cd7ee8acSart * anon and try again. 387cd7ee8acSart */ 3889662fca4Sart if (pg->pg_flags & PG_RELEASED) { 38919dcab73Smpi KASSERT(anon->an_ref == 0); 39052887a38Smpi /* 39152887a38Smpi * Released while we had unlocked amap. 39252887a38Smpi */ 393cd7ee8acSart if (locked) 39438b0cdf0Smpi uvmfault_unlockall(ufi, NULL, NULL); 39519dcab73Smpi uvm_anon_release(anon); /* frees page for us */ 396627a59d1Smpi counters_inc(uvmexp_counters, flt_pgrele); 39734e43087Smpi return ERESTART; /* refault! */ 398cd7ee8acSart } 399cd7ee8acSart 4002ed91a58Smpi if (error != VM_PAGER_OK) { 4012ed91a58Smpi KASSERT(error != VM_PAGER_PEND); 4021414b0faSart 403cd7ee8acSart /* remove page from anon */ 4048d0b5bafSpedro anon->an_page = NULL; 405cd7ee8acSart 406cd7ee8acSart /* 4072ed91a58Smpi * Remove the swap slot from the anon and 4082ed91a58Smpi * mark the anon as having no real slot. 4092ed91a58Smpi * Do not free the swap slot, thus preventing 41028fbabcfSart * it from being used again. 41128fbabcfSart */ 41228fbabcfSart uvm_swap_markbad(anon->an_swslot, 1); 41328fbabcfSart anon->an_swslot = SWSLOT_BAD; 41428fbabcfSart 41528fbabcfSart /* 4162ed91a58Smpi * Note: page was never !PG_BUSY, so it 4172ed91a58Smpi * cannot be mapped and thus no need to 41852887a38Smpi * pmap_page_protect() it. 419cd7ee8acSart */ 420cd7ee8acSart uvm_lock_pageq(); 421cd7ee8acSart uvm_pagefree(pg); 422cd7ee8acSart uvm_unlock_pageq(); 423cd7ee8acSart 4242ed91a58Smpi if (locked) { 42538b0cdf0Smpi uvmfault_unlockall(ufi, NULL, NULL); 4262ed91a58Smpi } 42719dcab73Smpi rw_exit(anon->an_lock); 42834e43087Smpi /* 42934e43087Smpi * An error occurred while trying to bring 43034e43087Smpi * in the page -- this is the only error we 43134e43087Smpi * return right now. 43234e43087Smpi */ 43334e43087Smpi return EACCES; /* XXX */ 434cd7ee8acSart } 435cd7ee8acSart 436cd7ee8acSart /* 43752887a38Smpi * We have successfully read the page, activate it. 438cd7ee8acSart */ 4392c7adcb7Sart pmap_clear_modify(pg); 440cd7ee8acSart uvm_lock_pageq(); 441cd7ee8acSart uvm_pageactivate(pg); 442cd7ee8acSart uvm_unlock_pageq(); 443f8cbc53aSmpi atomic_clearbits_int(&pg->pg_flags, 444f8cbc53aSmpi PG_WANTED|PG_BUSY|PG_FAKE); 445f8cbc53aSmpi UVM_PAGE_OWN(pg, NULL); 446cd7ee8acSart } 447cd7ee8acSart 4482ed91a58Smpi /* 4492ed91a58Smpi * We were not able to re-lock the map - restart the fault. 4502ed91a58Smpi */ 45119dcab73Smpi if (!locked) { 45219dcab73Smpi if (we_own) { 45319dcab73Smpi rw_exit(anon->an_lock); 45419dcab73Smpi } 45534e43087Smpi return ERESTART; 45619dcab73Smpi } 457cd7ee8acSart 4582ed91a58Smpi /* 4592ed91a58Smpi * Verify that no one has touched the amap and moved 4602ed91a58Smpi * the anon on us. 4612ed91a58Smpi */ 4622ed91a58Smpi if (ufi != NULL && amap_lookup(&ufi->entry->aref, 463cd7ee8acSart ufi->orig_rvaddr - ufi->entry->start) != anon) { 464ec3489eeSmpi uvmfault_unlockall(ufi, amap, NULL); 46534e43087Smpi return ERESTART; 466cd7ee8acSart } 467cd7ee8acSart 4682ed91a58Smpi /* 4692ed91a58Smpi * Retry.. 4702ed91a58Smpi */ 471627a59d1Smpi counters_inc(uvmexp_counters, flt_anretry); 472cd7ee8acSart continue; 473cd7ee8acSart 4742ed91a58Smpi } 475cd7ee8acSart /*NOTREACHED*/ 476cd7ee8acSart } 477cd7ee8acSart 478cd7ee8acSart /* 479dceff774Smpi * uvmfault_promote: promote data to a new anon. used for 1B and 2B. 480dceff774Smpi * 481dceff774Smpi * 1. allocate an anon and a page. 482dceff774Smpi * 2. fill its contents. 483dceff774Smpi * 484dceff774Smpi * => if we fail (result != 0) we unlock everything. 485dceff774Smpi * => on success, return a new locked anon via 'nanon'. 486dceff774Smpi * => it's caller's responsibility to put the promoted nanon->an_page to the 487dceff774Smpi * page queue. 488dceff774Smpi */ 489dceff774Smpi int 490dceff774Smpi uvmfault_promote(struct uvm_faultinfo *ufi, 491dceff774Smpi struct vm_page *uobjpage, 492dceff774Smpi struct vm_anon **nanon, /* OUT: allocated anon */ 493dceff774Smpi struct vm_page **npg) 494dceff774Smpi { 495dceff774Smpi struct vm_amap *amap = ufi->entry->aref.ar_amap; 496e9d70b48Smpi struct uvm_object *uobj = NULL; 497dceff774Smpi struct vm_anon *anon; 498dceff774Smpi struct vm_page *pg = NULL; 499dceff774Smpi 500e9d70b48Smpi if (uobjpage != PGO_DONTCARE) 501e9d70b48Smpi uobj = uobjpage->uobject; 502e9d70b48Smpi 503e9d70b48Smpi KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock)); 504e9d70b48Smpi 505dceff774Smpi anon = uvm_analloc(); 506dceff774Smpi if (anon) { 507dceff774Smpi anon->an_lock = amap->am_lock; 508dceff774Smpi pg = uvm_pagealloc(NULL, 0, anon, 509dceff774Smpi (uobjpage == PGO_DONTCARE) ? UVM_PGA_ZERO : 0); 510dceff774Smpi } 511dceff774Smpi 512dceff774Smpi /* check for out of RAM */ 513dceff774Smpi if (anon == NULL || pg == NULL) { 514e9d70b48Smpi uvmfault_unlockall(ufi, amap, uobj); 515dceff774Smpi if (anon == NULL) 516dceff774Smpi counters_inc(uvmexp_counters, flt_noanon); 517dceff774Smpi else { 518dceff774Smpi anon->an_lock = NULL; 519dceff774Smpi anon->an_ref--; 520dceff774Smpi uvm_anfree(anon); 521dceff774Smpi counters_inc(uvmexp_counters, flt_noram); 522dceff774Smpi } 523dceff774Smpi 524dceff774Smpi if (uvm_swapisfull()) 525dceff774Smpi return ENOMEM; 526dceff774Smpi 527dceff774Smpi /* out of RAM, wait for more */ 528dceff774Smpi if (anon == NULL) 529dceff774Smpi uvm_anwait(); 530dceff774Smpi else 531dceff774Smpi uvm_wait("flt_noram3"); 532dceff774Smpi return ERESTART; 533dceff774Smpi } 534dceff774Smpi 535dceff774Smpi /* 536dceff774Smpi * copy the page [pg now dirty] 537dceff774Smpi */ 538dceff774Smpi if (uobjpage != PGO_DONTCARE) 539dceff774Smpi uvm_pagecopy(uobjpage, pg); 540dceff774Smpi 541dceff774Smpi *nanon = anon; 542dceff774Smpi *npg = pg; 543dceff774Smpi return 0; 544dceff774Smpi } 545dceff774Smpi 546dceff774Smpi /* 5470372dd1aSariane * Update statistics after fault resolution. 5480372dd1aSariane * - maxrss 5490372dd1aSariane */ 5500372dd1aSariane void 5510372dd1aSariane uvmfault_update_stats(struct uvm_faultinfo *ufi) 5520372dd1aSariane { 5530372dd1aSariane struct vm_map *map; 5540372dd1aSariane struct proc *p; 5550372dd1aSariane vsize_t res; 5560372dd1aSariane 5570372dd1aSariane map = ufi->orig_map; 5580372dd1aSariane 559b9032da0Smlarkin /* 560b9032da0Smlarkin * If this is a nested pmap (eg, a virtual machine pmap managed 561b9032da0Smlarkin * by vmm(4) on amd64/i386), don't do any updating, just return. 562b9032da0Smlarkin * 563b9032da0Smlarkin * pmap_nested() on other archs is #defined to 0, so this is a 564b9032da0Smlarkin * no-op. 565b9032da0Smlarkin */ 566b9032da0Smlarkin if (pmap_nested(map->pmap)) 567b9032da0Smlarkin return; 568b9032da0Smlarkin 56935164244Stedu /* Update the maxrss for the process. */ 5700372dd1aSariane if (map->flags & VM_MAP_ISVMSPACE) { 5710372dd1aSariane p = curproc; 5720372dd1aSariane KASSERT(p != NULL && &p->p_vmspace->vm_map == map); 5730372dd1aSariane 5740372dd1aSariane res = pmap_resident_count(map->pmap); 5750372dd1aSariane /* Convert res from pages to kilobytes. */ 5760372dd1aSariane res <<= (PAGE_SHIFT - 10); 5770372dd1aSariane 5780372dd1aSariane if (p->p_ru.ru_maxrss < res) 5790372dd1aSariane p->p_ru.ru_maxrss = res; 5800372dd1aSariane } 5810372dd1aSariane } 5820372dd1aSariane 5832ed91a58Smpi /* 5842ed91a58Smpi * F A U L T - m a i n e n t r y p o i n t 5852ed91a58Smpi */ 5862ed91a58Smpi 5872ed91a58Smpi /* 5882ed91a58Smpi * uvm_fault: page fault handler 5892ed91a58Smpi * 5902ed91a58Smpi * => called from MD code to resolve a page fault 5912ed91a58Smpi * => VM data structures usually should be unlocked. however, it is 5922ed91a58Smpi * possible to call here with the main map locked if the caller 5932ed91a58Smpi * gets a write lock, sets it recursive, and then calls us (c.f. 5942ed91a58Smpi * uvm_map_pageable). this should be avoided because it keeps 5952ed91a58Smpi * the map locked off during I/O. 5962ed91a58Smpi * => MUST NEVER BE CALLED IN INTERRUPT CONTEXT 5972ed91a58Smpi */ 5982ed91a58Smpi #define MASK(entry) (UVM_ET_ISCOPYONWRITE(entry) ? \ 5992ed91a58Smpi ~PROT_WRITE : PROT_MASK) 6007f5d8661Smpi struct uvm_faultctx { 6017f5d8661Smpi /* 6027f5d8661Smpi * the following members are set up by uvm_fault_check() and 6037f5d8661Smpi * read-only after that. 6047f5d8661Smpi */ 6057f5d8661Smpi vm_prot_t enter_prot; 606b004aefeSmpi vm_prot_t access_type; 6077f5d8661Smpi vaddr_t startva; 6087f5d8661Smpi int npages; 6097f5d8661Smpi int centeridx; 6107f5d8661Smpi boolean_t narrow; 6117f5d8661Smpi boolean_t wired; 6127f5d8661Smpi paddr_t pa_flags; 613cce913b9Smpi boolean_t promote; 614552563d5Smpi int lower_lock_type; 6157f5d8661Smpi }; 6167f5d8661Smpi 6172ed91a58Smpi int uvm_fault_check( 6182ed91a58Smpi struct uvm_faultinfo *, struct uvm_faultctx *, 619d6897f14Smpi struct vm_anon ***, vm_fault_t); 6202ed91a58Smpi 6212ed91a58Smpi int uvm_fault_upper( 6222ed91a58Smpi struct uvm_faultinfo *, struct uvm_faultctx *, 62358243cbfSmpi struct vm_anon **); 6242ed91a58Smpi boolean_t uvm_fault_upper_lookup( 6252ed91a58Smpi struct uvm_faultinfo *, const struct uvm_faultctx *, 6262ed91a58Smpi struct vm_anon **, struct vm_page **); 6272ed91a58Smpi 6282ed91a58Smpi int uvm_fault_lower( 6292ed91a58Smpi struct uvm_faultinfo *, struct uvm_faultctx *, 63058243cbfSmpi struct vm_page **); 631cce913b9Smpi int uvm_fault_lower_io( 632cce913b9Smpi struct uvm_faultinfo *, struct uvm_faultctx *, 633cce913b9Smpi struct uvm_object **, struct vm_page **); 6343053940aSmpi 6352ed91a58Smpi int 6362ed91a58Smpi uvm_fault(vm_map_t orig_map, vaddr_t vaddr, vm_fault_t fault_type, 6372ed91a58Smpi vm_prot_t access_type) 6382ed91a58Smpi { 6392ed91a58Smpi struct uvm_faultinfo ufi; 6402ed91a58Smpi struct uvm_faultctx flt; 6412ed91a58Smpi boolean_t shadowed; 6422ed91a58Smpi struct vm_anon *anons_store[UVM_MAXRANGE], **anons; 6432ed91a58Smpi struct vm_page *pages[UVM_MAXRANGE]; 6442ed91a58Smpi int error; 6452ed91a58Smpi 6462ed91a58Smpi counters_inc(uvmexp_counters, faults); 6472ed91a58Smpi TRACEPOINT(uvm, fault, vaddr, fault_type, access_type, NULL); 6482ed91a58Smpi 6492ed91a58Smpi /* 6502ed91a58Smpi * init the IN parameters in the ufi 6512ed91a58Smpi */ 6522ed91a58Smpi ufi.orig_map = orig_map; 6532ed91a58Smpi ufi.orig_rvaddr = trunc_page(vaddr); 6542ed91a58Smpi ufi.orig_size = PAGE_SIZE; /* can't get any smaller than this */ 6552ed91a58Smpi flt.access_type = access_type; 656d6897f14Smpi flt.narrow = FALSE; /* assume normal fault for now */ 657d6897f14Smpi flt.wired = FALSE; /* assume non-wired fault for now */ 658552563d5Smpi flt.lower_lock_type = RW_WRITE; /* exclusive lock for now */ 6592ed91a58Smpi 6602ed91a58Smpi error = ERESTART; 6612ed91a58Smpi while (error == ERESTART) { /* ReFault: */ 6622ed91a58Smpi anons = anons_store; 6632ed91a58Smpi 664d6897f14Smpi error = uvm_fault_check(&ufi, &flt, &anons, fault_type); 6652ed91a58Smpi if (error != 0) 6662ed91a58Smpi continue; 6672ed91a58Smpi 6682ed91a58Smpi /* True if there is an anon at the faulting address */ 6692ed91a58Smpi shadowed = uvm_fault_upper_lookup(&ufi, &flt, anons, pages); 6702ed91a58Smpi if (shadowed == TRUE) { 6712ed91a58Smpi /* case 1: fault on an anon in our amap */ 67258243cbfSmpi error = uvm_fault_upper(&ufi, &flt, anons); 6732ed91a58Smpi } else { 6744bb42341Smpi struct uvm_object *uobj = ufi.entry->object.uvm_obj; 6754bb42341Smpi 6764bb42341Smpi /* 6774bb42341Smpi * if the desired page is not shadowed by the amap and 6784bb42341Smpi * we have a backing object, then we check to see if 6794bb42341Smpi * the backing object would prefer to handle the fault 6804bb42341Smpi * itself (rather than letting us do it with the usual 6814bb42341Smpi * pgo_get hook). the backing object signals this by 6824bb42341Smpi * providing a pgo_fault routine. 6834bb42341Smpi */ 6844bb42341Smpi if (uobj != NULL && uobj->pgops->pgo_fault != NULL) { 68569c04514Smpi rw_enter(uobj->vmobjlock, RW_WRITE); 686f46a341eSmpi KERNEL_LOCK(); 6874bb42341Smpi error = uobj->pgops->pgo_fault(&ufi, 6884bb42341Smpi flt.startva, pages, flt.npages, 6894bb42341Smpi flt.centeridx, fault_type, flt.access_type, 6904bb42341Smpi PGO_LOCKED); 6912ed91a58Smpi KERNEL_UNLOCK(); 6924bb42341Smpi } else { 6934bb42341Smpi /* case 2: fault on backing obj or zero fill */ 69458243cbfSmpi error = uvm_fault_lower(&ufi, &flt, pages); 6954bb42341Smpi } 6962ed91a58Smpi } 6972ed91a58Smpi } 6982ed91a58Smpi 6992ed91a58Smpi return error; 7002ed91a58Smpi } 7012ed91a58Smpi 7027f5d8661Smpi /* 7037f5d8661Smpi * uvm_fault_check: check prot, handle needs-copy, etc. 7047f5d8661Smpi * 7057f5d8661Smpi * 1. lookup entry. 7067f5d8661Smpi * 2. check protection. 7077f5d8661Smpi * 3. adjust fault condition (mainly for simulated fault). 7087f5d8661Smpi * 4. handle needs-copy (lazy amap copy). 7097f5d8661Smpi * 5. establish range of interest for neighbor fault (aka pre-fault). 7107f5d8661Smpi * 6. look up anons (if amap exists). 7117f5d8661Smpi * 7. flush pages (if MADV_SEQUENTIAL) 7127f5d8661Smpi * 7137f5d8661Smpi * => called with nothing locked. 7147f5d8661Smpi * => if we fail (result != 0) we unlock everything. 7157f5d8661Smpi * => initialize/adjust many members of flt. 7167f5d8661Smpi */ 7177f5d8661Smpi int 7187f5d8661Smpi uvm_fault_check(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 719d6897f14Smpi struct vm_anon ***ranons, vm_fault_t fault_type) 7207f5d8661Smpi { 7217f5d8661Smpi struct vm_amap *amap; 7227f5d8661Smpi struct uvm_object *uobj; 7237f5d8661Smpi int nback, nforw; 7247f5d8661Smpi 7252ed91a58Smpi /* 7262ed91a58Smpi * lookup and lock the maps 7272ed91a58Smpi */ 7287f5d8661Smpi if (uvmfault_lookup(ufi, FALSE) == FALSE) { 7292ed91a58Smpi return EFAULT; 7307f5d8661Smpi } 7312ed91a58Smpi /* locked: maps(read) */ 7327f5d8661Smpi 7337f5d8661Smpi #ifdef DIAGNOSTIC 7347f5d8661Smpi if ((ufi->map->flags & VM_MAP_PAGEABLE) == 0) 7357f5d8661Smpi panic("uvm_fault: fault on non-pageable map (%p, 0x%lx)", 7367f5d8661Smpi ufi->map, ufi->orig_rvaddr); 7377f5d8661Smpi #endif 7387f5d8661Smpi 7392ed91a58Smpi /* 7402ed91a58Smpi * check protection 7412ed91a58Smpi */ 742b004aefeSmpi if ((ufi->entry->protection & flt->access_type) != flt->access_type) { 7437f5d8661Smpi uvmfault_unlockmaps(ufi, FALSE); 7442ed91a58Smpi return EACCES; 7457f5d8661Smpi } 7467f5d8661Smpi 7477f5d8661Smpi /* 7487f5d8661Smpi * "enter_prot" is the protection we want to enter the page in at. 7497f5d8661Smpi * for certain pages (e.g. copy-on-write pages) this protection can 7507f5d8661Smpi * be more strict than ufi->entry->protection. "wired" means either 7517f5d8661Smpi * the entry is wired or we are fault-wiring the pg. 7527f5d8661Smpi */ 7537f5d8661Smpi flt->enter_prot = ufi->entry->protection; 7547f5d8661Smpi flt->pa_flags = UVM_ET_ISWC(ufi->entry) ? PMAP_WC : 0; 755d6897f14Smpi if (VM_MAPENT_ISWIRED(ufi->entry) || (fault_type == VM_FAULT_WIRE)) { 756d6897f14Smpi flt->wired = TRUE; 757b004aefeSmpi flt->access_type = flt->enter_prot; /* full access for wired */ 758d6897f14Smpi /* don't look for neighborhood * pages on "wire" fault */ 759d6897f14Smpi flt->narrow = TRUE; 760d6897f14Smpi } 7617f5d8661Smpi 7627f5d8661Smpi /* handle "needs_copy" case. */ 7637f5d8661Smpi if (UVM_ET_ISNEEDSCOPY(ufi->entry)) { 764b004aefeSmpi if ((flt->access_type & PROT_WRITE) || 7657f5d8661Smpi (ufi->entry->object.uvm_obj == NULL)) { 7667f5d8661Smpi /* need to clear */ 7677f5d8661Smpi uvmfault_unlockmaps(ufi, FALSE); 7687f5d8661Smpi uvmfault_amapcopy(ufi); 769627a59d1Smpi counters_inc(uvmexp_counters, flt_amcopy); 7702ed91a58Smpi return ERESTART; 7717f5d8661Smpi } else { 7727f5d8661Smpi /* 7737f5d8661Smpi * ensure that we pmap_enter page R/O since 7747f5d8661Smpi * needs_copy is still true 7757f5d8661Smpi */ 7767f5d8661Smpi flt->enter_prot &= ~PROT_WRITE; 7777f5d8661Smpi } 7787f5d8661Smpi } 7797f5d8661Smpi 7802ed91a58Smpi /* 7812ed91a58Smpi * identify the players 7822ed91a58Smpi */ 7832ed91a58Smpi amap = ufi->entry->aref.ar_amap; /* upper layer */ 7842ed91a58Smpi uobj = ufi->entry->object.uvm_obj; /* lower layer */ 7857f5d8661Smpi 7867f5d8661Smpi /* 7877f5d8661Smpi * check for a case 0 fault. if nothing backing the entry then 7887f5d8661Smpi * error now. 7897f5d8661Smpi */ 7907f5d8661Smpi if (amap == NULL && uobj == NULL) { 7917f5d8661Smpi uvmfault_unlockmaps(ufi, FALSE); 7922ed91a58Smpi return EFAULT; 7937f5d8661Smpi } 7947f5d8661Smpi 7957f5d8661Smpi /* 7963b9e4e18Smpi * for a case 2B fault waste no time on adjacent pages because 7973b9e4e18Smpi * they are likely already entered. 7983b9e4e18Smpi */ 7993b9e4e18Smpi if (uobj != NULL && amap != NULL && 8003b9e4e18Smpi (flt->access_type & PROT_WRITE) != 0) { 8013b9e4e18Smpi /* wide fault (!narrow) */ 8023b9e4e18Smpi flt->narrow = TRUE; 8033b9e4e18Smpi } 8043b9e4e18Smpi 8053b9e4e18Smpi /* 8067f5d8661Smpi * establish range of interest based on advice from mapper 8077f5d8661Smpi * and then clip to fit map entry. note that we only want 8087f5d8661Smpi * to do this the first time through the fault. if we 8097f5d8661Smpi * ReFault we will disable this by setting "narrow" to true. 8107f5d8661Smpi */ 8117f5d8661Smpi if (flt->narrow == FALSE) { 8127f5d8661Smpi 8137f5d8661Smpi /* wide fault (!narrow) */ 8147f5d8661Smpi nback = min(uvmadvice[ufi->entry->advice].nback, 8157f5d8661Smpi (ufi->orig_rvaddr - ufi->entry->start) >> PAGE_SHIFT); 8167f5d8661Smpi flt->startva = ufi->orig_rvaddr - ((vsize_t)nback << PAGE_SHIFT); 8177f5d8661Smpi nforw = min(uvmadvice[ufi->entry->advice].nforw, 8187f5d8661Smpi ((ufi->entry->end - ufi->orig_rvaddr) >> PAGE_SHIFT) - 1); 8197f5d8661Smpi /* 8207f5d8661Smpi * note: "-1" because we don't want to count the 8217f5d8661Smpi * faulting page as forw 8227f5d8661Smpi */ 8237f5d8661Smpi flt->npages = nback + nforw + 1; 8247f5d8661Smpi flt->centeridx = nback; 8257f5d8661Smpi 8267f5d8661Smpi flt->narrow = TRUE; /* ensure only once per-fault */ 8277f5d8661Smpi } else { 8287f5d8661Smpi /* narrow fault! */ 8297f5d8661Smpi nback = nforw = 0; 8307f5d8661Smpi flt->startva = ufi->orig_rvaddr; 8317f5d8661Smpi flt->npages = 1; 8327f5d8661Smpi flt->centeridx = 0; 8337f5d8661Smpi } 8347f5d8661Smpi 8352ed91a58Smpi /* 8362ed91a58Smpi * if we've got an amap then lock it and extract current anons. 8372ed91a58Smpi */ 8387f5d8661Smpi if (amap) { 839335383c9Smpi amap_lock(amap, RW_WRITE); 8407f5d8661Smpi amap_lookups(&ufi->entry->aref, 8417f5d8661Smpi flt->startva - ufi->entry->start, *ranons, flt->npages); 8427f5d8661Smpi } else { 8437f5d8661Smpi *ranons = NULL; /* to be safe */ 8447f5d8661Smpi } 8457f5d8661Smpi 8467f5d8661Smpi /* 8477f5d8661Smpi * for MADV_SEQUENTIAL mappings we want to deactivate the back pages 8487f5d8661Smpi * now and then forget about them (for the rest of the fault). 8497f5d8661Smpi */ 8507f5d8661Smpi if (ufi->entry->advice == MADV_SEQUENTIAL && nback != 0) { 8517f5d8661Smpi /* flush back-page anons? */ 8527f5d8661Smpi if (amap) 8537f5d8661Smpi uvmfault_anonflush(*ranons, nback); 8547f5d8661Smpi 85552887a38Smpi /* 85652887a38Smpi * flush object? 85752887a38Smpi */ 8587f5d8661Smpi if (uobj) { 8597f5d8661Smpi voff_t uoff; 8607f5d8661Smpi 8617f5d8661Smpi uoff = (flt->startva - ufi->entry->start) + ufi->entry->offset; 86269c04514Smpi rw_enter(uobj->vmobjlock, RW_WRITE); 8637f5d8661Smpi (void) uobj->pgops->pgo_flush(uobj, uoff, uoff + 8647f5d8661Smpi ((vsize_t)nback << PAGE_SHIFT), PGO_DEACTIVATE); 86569c04514Smpi rw_exit(uobj->vmobjlock); 8667f5d8661Smpi } 8677f5d8661Smpi 8687f5d8661Smpi /* now forget about the backpages */ 8697f5d8661Smpi if (amap) 8707f5d8661Smpi *ranons += nback; 8717f5d8661Smpi flt->startva += ((vsize_t)nback << PAGE_SHIFT); 8727f5d8661Smpi flt->npages -= nback; 8737f5d8661Smpi flt->centeridx = 0; 8747f5d8661Smpi } 8757f5d8661Smpi 8767f5d8661Smpi return 0; 8777f5d8661Smpi } 8787f5d8661Smpi 8790372dd1aSariane /* 8802ed91a58Smpi * uvm_fault_upper_lookup: look up existing h/w mapping and amap. 8816d51fca8Smpi * 8822ed91a58Smpi * iterate range of interest: 8832ed91a58Smpi * 1. check if h/w mapping exists. if yes, we don't care 8842ed91a58Smpi * 2. check if anon exists. if not, page is lower. 8852ed91a58Smpi * 3. if anon exists, enter h/w mapping for neighbors. 8862ed91a58Smpi * 8872ed91a58Smpi * => called with amap locked (if exists). 8882ed91a58Smpi */ 8892ed91a58Smpi boolean_t 8902ed91a58Smpi uvm_fault_upper_lookup(struct uvm_faultinfo *ufi, 8912ed91a58Smpi const struct uvm_faultctx *flt, struct vm_anon **anons, 8922ed91a58Smpi struct vm_page **pages) 8932ed91a58Smpi { 8942ed91a58Smpi struct vm_amap *amap = ufi->entry->aref.ar_amap; 8952ed91a58Smpi struct vm_anon *anon; 896a52f395cSmpi struct vm_page *pg; 8972ed91a58Smpi boolean_t shadowed; 8982ed91a58Smpi vaddr_t currva; 8992ed91a58Smpi paddr_t pa; 900a52f395cSmpi int lcv, entered = 0; 9012ed91a58Smpi 9022ed91a58Smpi /* locked: maps(read), amap(if there) */ 9032ed91a58Smpi KASSERT(amap == NULL || 9042ed91a58Smpi rw_write_held(amap->am_lock)); 9052ed91a58Smpi 9062ed91a58Smpi /* 9072ed91a58Smpi * map in the backpages and frontpages we found in the amap in hopes 9082ed91a58Smpi * of preventing future faults. we also init the pages[] array as 9092ed91a58Smpi * we go. 9102ed91a58Smpi */ 9112ed91a58Smpi currva = flt->startva; 9122ed91a58Smpi shadowed = FALSE; 9132ed91a58Smpi for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) { 9142ed91a58Smpi /* 9152ed91a58Smpi * unmapped or center page. check if any anon at this level. 9162ed91a58Smpi */ 9172ed91a58Smpi if (amap == NULL || anons[lcv] == NULL) { 9182ed91a58Smpi pages[lcv] = NULL; 9192ed91a58Smpi continue; 9202ed91a58Smpi } 9212ed91a58Smpi 9222ed91a58Smpi /* 9232ed91a58Smpi * check for present page and map if possible. 9242ed91a58Smpi */ 9252ed91a58Smpi pages[lcv] = PGO_DONTCARE; 9262ed91a58Smpi if (lcv == flt->centeridx) { /* save center for later! */ 9272ed91a58Smpi shadowed = TRUE; 9282ed91a58Smpi continue; 9292ed91a58Smpi } 930a52f395cSmpi 9312ed91a58Smpi anon = anons[lcv]; 932a52f395cSmpi pg = anon->an_page; 933a52f395cSmpi 9342ed91a58Smpi KASSERT(anon->an_lock == amap->am_lock); 935a52f395cSmpi 936a52f395cSmpi /* 937a52f395cSmpi * ignore busy pages. 938a52f395cSmpi * don't play with VAs that are already mapped. 939a52f395cSmpi */ 940a52f395cSmpi if (pg && (pg->pg_flags & (PG_RELEASED|PG_BUSY)) == 0 && 941a52f395cSmpi !pmap_extract(ufi->orig_map->pmap, currva, &pa)) { 9422ed91a58Smpi uvm_lock_pageq(); 943a52f395cSmpi uvm_pageactivate(pg); /* reactivate */ 9442ed91a58Smpi uvm_unlock_pageq(); 9452ed91a58Smpi counters_inc(uvmexp_counters, flt_namap); 9462ed91a58Smpi 9478a233859Smpi /* No fault-ahead when wired. */ 9488a233859Smpi KASSERT(flt->wired == FALSE); 9498a233859Smpi 9502ed91a58Smpi /* 9512ed91a58Smpi * Since this isn't the page that's actually faulting, 9522ed91a58Smpi * ignore pmap_enter() failures; it's not critical 9532ed91a58Smpi * that we enter these right now. 9542ed91a58Smpi */ 9552ed91a58Smpi (void) pmap_enter(ufi->orig_map->pmap, currva, 956a52f395cSmpi VM_PAGE_TO_PHYS(pg) | flt->pa_flags, 9572ed91a58Smpi (anon->an_ref > 1) ? 9582ed91a58Smpi (flt->enter_prot & ~PROT_WRITE) : flt->enter_prot, 9598a233859Smpi PMAP_CANFAIL); 960a52f395cSmpi entered++; 9612ed91a58Smpi } 9622ed91a58Smpi } 963a52f395cSmpi if (entered > 0) 9642ed91a58Smpi pmap_update(ufi->orig_map->pmap); 9652ed91a58Smpi 9662ed91a58Smpi return shadowed; 9672ed91a58Smpi } 9682ed91a58Smpi 9692ed91a58Smpi /* 9702ed91a58Smpi * uvm_fault_upper: handle upper fault. 9712ed91a58Smpi * 9722ed91a58Smpi * 1. acquire anon lock. 9732ed91a58Smpi * 2. get anon. let uvmfault_anonget do the dirty work. 9742ed91a58Smpi * 3. if COW, promote data to new anon 9752ed91a58Smpi * 4. enter h/w mapping 9766d51fca8Smpi */ 9776d51fca8Smpi int 9786d51fca8Smpi uvm_fault_upper(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 97958243cbfSmpi struct vm_anon **anons) 9806d51fca8Smpi { 9816d51fca8Smpi struct vm_amap *amap = ufi->entry->aref.ar_amap; 9826d51fca8Smpi struct vm_anon *oanon, *anon = anons[flt->centeridx]; 9836d51fca8Smpi struct vm_page *pg = NULL; 9846d51fca8Smpi int error, ret; 9856d51fca8Smpi 98652887a38Smpi /* locked: maps(read), amap, anon */ 98719dcab73Smpi KASSERT(rw_write_held(amap->am_lock)); 98819dcab73Smpi KASSERT(anon->an_lock == amap->am_lock); 98919dcab73Smpi 9906d51fca8Smpi /* 9916d51fca8Smpi * no matter if we have case 1A or case 1B we are going to need to 9926d51fca8Smpi * have the anon's memory resident. ensure that now. 9936d51fca8Smpi */ 9946d51fca8Smpi /* 9956d51fca8Smpi * let uvmfault_anonget do the dirty work. 9962ed91a58Smpi * if it fails (!OK) it will unlock everything for us. 9972ed91a58Smpi * if it succeeds, locks are still valid and locked. 9986d51fca8Smpi * also, if it is OK, then the anon's page is on the queues. 9996d51fca8Smpi */ 10006d51fca8Smpi error = uvmfault_anonget(ufi, amap, anon); 10016d51fca8Smpi switch (error) { 100234e43087Smpi case 0: 10036d51fca8Smpi break; 10046d51fca8Smpi 100534e43087Smpi case ERESTART: 10066d51fca8Smpi return ERESTART; 10076d51fca8Smpi 10086d51fca8Smpi default: 100934e43087Smpi return error; 10106d51fca8Smpi } 10116d51fca8Smpi 101219dcab73Smpi KASSERT(rw_write_held(amap->am_lock)); 101319dcab73Smpi KASSERT(anon->an_lock == amap->am_lock); 101419dcab73Smpi 10156d51fca8Smpi /* 10166d51fca8Smpi * if we are case 1B then we will need to allocate a new blank 10176d51fca8Smpi * anon to transfer the data into. note that we have a lock 10186d51fca8Smpi * on anon, so no one can busy or release the page until we are done. 10196d51fca8Smpi * also note that the ref count can't drop to zero here because 10206d51fca8Smpi * it is > 1 and we are only dropping one ref. 10216d51fca8Smpi * 10226d51fca8Smpi * in the (hopefully very rare) case that we are out of RAM we 10232ed91a58Smpi * will unlock, wait for more RAM, and refault. 10246d51fca8Smpi * 10256d51fca8Smpi * if we are out of anon VM we wait for RAM to become available. 10266d51fca8Smpi */ 10276d51fca8Smpi 1028b004aefeSmpi if ((flt->access_type & PROT_WRITE) != 0 && anon->an_ref > 1) { 1029335383c9Smpi /* promoting requires a write lock. */ 1030335383c9Smpi KASSERT(rw_write_held(amap->am_lock)); 1031335383c9Smpi 1032627a59d1Smpi counters_inc(uvmexp_counters, flt_acow); 10336d51fca8Smpi oanon = anon; /* oanon = old */ 10346d51fca8Smpi 1035dceff774Smpi error = uvmfault_promote(ufi, oanon->an_page, &anon, &pg); 1036dceff774Smpi if (error) 1037dceff774Smpi return error; 10386d51fca8Smpi 10396d51fca8Smpi /* un-busy! new page */ 10406d51fca8Smpi atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE); 10416d51fca8Smpi UVM_PAGE_OWN(pg, NULL); 10426d51fca8Smpi ret = amap_add(&ufi->entry->aref, 10436d51fca8Smpi ufi->orig_rvaddr - ufi->entry->start, anon, 1); 10446d51fca8Smpi KASSERT(ret == 0); 10456d51fca8Smpi 1046335383c9Smpi KASSERT(anon->an_lock == oanon->an_lock); 1047335383c9Smpi 10486d51fca8Smpi /* deref: can not drop to zero here by defn! */ 1049335383c9Smpi KASSERT(oanon->an_ref > 1); 10506d51fca8Smpi oanon->an_ref--; 10516d51fca8Smpi 10522f953554Sguenther #if defined(MULTIPROCESSOR) && !defined(__HAVE_PMAP_MPSAFE_ENTER_COW) 105343687ba5Sguenther /* 105443687ba5Sguenther * If there are multiple threads, either uvm or the 105543687ba5Sguenther * pmap has to make sure no threads see the old RO 105643687ba5Sguenther * mapping once any have seen the new RW mapping. 105743687ba5Sguenther * uvm does it by inserting the new mapping RO and 105843687ba5Sguenther * letting it fault again. 10592f953554Sguenther * This is only a problem on MP systems. 106043687ba5Sguenther */ 10617376e01dSguenther if (P_HASSIBLING(curproc)) { 106243687ba5Sguenther flt->enter_prot &= ~PROT_WRITE; 10637376e01dSguenther flt->access_type &= ~PROT_WRITE; 10647376e01dSguenther } 106543687ba5Sguenther #endif 106643687ba5Sguenther 10676d51fca8Smpi /* 10686d51fca8Smpi * note: anon is _not_ locked, but we have the sole references 10696d51fca8Smpi * to in from amap. 10706d51fca8Smpi * thus, no one can get at it until we are done with it. 10716d51fca8Smpi */ 10726d51fca8Smpi } else { 1073627a59d1Smpi counters_inc(uvmexp_counters, flt_anon); 10746d51fca8Smpi oanon = anon; 10756d51fca8Smpi pg = anon->an_page; 10766d51fca8Smpi if (anon->an_ref > 1) /* disallow writes to ref > 1 anons */ 10776d51fca8Smpi flt->enter_prot = flt->enter_prot & ~PROT_WRITE; 10786d51fca8Smpi } 10796d51fca8Smpi 10806d51fca8Smpi /* 10812ed91a58Smpi * now map the page in . 10826d51fca8Smpi */ 10836d51fca8Smpi if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr, 10846d51fca8Smpi VM_PAGE_TO_PHYS(pg) | flt->pa_flags, flt->enter_prot, 1085b004aefeSmpi flt->access_type | PMAP_CANFAIL | (flt->wired ? PMAP_WIRED : 0)) != 0) { 10866d51fca8Smpi /* 10876d51fca8Smpi * No need to undo what we did; we can simply think of 10886d51fca8Smpi * this as the pmap throwing away the mapping information. 10896d51fca8Smpi * 10906d51fca8Smpi * We do, however, have to go through the ReFault path, 10916d51fca8Smpi * as the map may change while we're asleep. 10926d51fca8Smpi */ 10936d51fca8Smpi uvmfault_unlockall(ufi, amap, NULL); 10946d51fca8Smpi if (uvm_swapisfull()) { 10956d51fca8Smpi /* XXX instrumentation */ 10966d51fca8Smpi return ENOMEM; 10976d51fca8Smpi } 1098679d40f1Skettenis #ifdef __HAVE_PMAP_POPULATE 1099679d40f1Skettenis pmap_populate(ufi->orig_map->pmap, ufi->orig_rvaddr); 1100679d40f1Skettenis #else 11016d51fca8Smpi /* XXX instrumentation */ 11026d51fca8Smpi uvm_wait("flt_pmfail1"); 1103679d40f1Skettenis #endif 11046d51fca8Smpi return ERESTART; 11056d51fca8Smpi } 11066d51fca8Smpi 110752887a38Smpi /* 110852887a38Smpi * ... update the page queues. 110952887a38Smpi */ 11106d51fca8Smpi uvm_lock_pageq(); 111196ec8e93Smpi if (flt->wired) { 1112e5ad67b7Smpi uvm_pagewire(pg); 111396ec8e93Smpi } else { 111496ec8e93Smpi uvm_pageactivate(pg); 111596ec8e93Smpi } 111696ec8e93Smpi uvm_unlock_pageq(); 111796ec8e93Smpi 111896ec8e93Smpi if (flt->wired) { 11196d51fca8Smpi /* 11206d51fca8Smpi * since the now-wired page cannot be paged out, 11216d51fca8Smpi * release its swap resources for others to use. 11226d51fca8Smpi * since an anon with no swap cannot be PG_CLEAN, 11236d51fca8Smpi * clear its clean flag now. 11246d51fca8Smpi */ 11256d51fca8Smpi atomic_clearbits_int(&pg->pg_flags, PG_CLEAN); 11266d51fca8Smpi uvm_anon_dropswap(anon); 11276d51fca8Smpi } 11286d51fca8Smpi 11292ed91a58Smpi /* 11302ed91a58Smpi * done case 1! finish up by unlocking everything and returning success 11312ed91a58Smpi */ 11326d51fca8Smpi uvmfault_unlockall(ufi, amap, NULL); 11336d51fca8Smpi pmap_update(ufi->orig_map->pmap); 11346d51fca8Smpi return 0; 11356d51fca8Smpi } 11366d51fca8Smpi 1137cd713f80Smpi /* 1138a3afc610Smpi * uvm_fault_lower_lookup: look up on-memory uobj pages. 1139cd713f80Smpi * 1140a3afc610Smpi * 1. get on-memory pages. 1141a3afc610Smpi * 2. if failed, give up (get only center page later). 1142a3afc610Smpi * 3. if succeeded, enter h/w mapping of neighbor pages. 1143cd713f80Smpi */ 1144427225b6Smpi 1145a3afc610Smpi struct vm_page * 1146a3afc610Smpi uvm_fault_lower_lookup( 1147a3afc610Smpi struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, 1148a3afc610Smpi struct vm_page **pages) 1149a3afc610Smpi { 1150a3afc610Smpi struct uvm_object *uobj = ufi->entry->object.uvm_obj; 1151a3afc610Smpi struct vm_page *uobjpage = NULL; 11525797ad06Smpi int lcv, gotpages, entered; 1153a3afc610Smpi vaddr_t currva; 11545797ad06Smpi paddr_t pa; 1155a3afc610Smpi 1156552563d5Smpi rw_enter(uobj->vmobjlock, flt->lower_lock_type); 115769c04514Smpi 1158627a59d1Smpi counters_inc(uvmexp_counters, flt_lget); 11593053940aSmpi gotpages = flt->npages; 1160a3afc610Smpi (void) uobj->pgops->pgo_get(uobj, 1161a3afc610Smpi ufi->entry->offset + (flt->startva - ufi->entry->start), 11623053940aSmpi pages, &gotpages, flt->centeridx, 1163a3afc610Smpi flt->access_type & MASK(ufi->entry), ufi->entry->advice, 1164a3afc610Smpi PGO_LOCKED); 1165cd7ee8acSart 1166a3afc610Smpi /* 1167a3afc610Smpi * check for pages to map, if we got any 1168a3afc610Smpi */ 1169a3afc610Smpi if (gotpages == 0) { 1170a3afc610Smpi return NULL; 1171a3afc610Smpi } 1172a3afc610Smpi 11735797ad06Smpi entered = 0; 11743053940aSmpi currva = flt->startva; 1175a3afc610Smpi for (lcv = 0; lcv < flt->npages; lcv++, currva += PAGE_SIZE) { 1176cd7ee8acSart if (pages[lcv] == NULL || 1177cd7ee8acSart pages[lcv] == PGO_DONTCARE) 1178cd7ee8acSart continue; 1179cd7ee8acSart 11800528dcd0Smpi KASSERT((pages[lcv]->pg_flags & PG_BUSY) == 0); 11819662fca4Sart KASSERT((pages[lcv]->pg_flags & PG_RELEASED) == 0); 1182cd7ee8acSart 1183cd7ee8acSart /* 11840528dcd0Smpi * if center page is resident and not PG_BUSY, then pgo_get 11850528dcd0Smpi * gave us a handle to it. 11860528dcd0Smpi * remember this page as "uobjpage." (for later use). 1187cd7ee8acSart */ 11883053940aSmpi if (lcv == flt->centeridx) { 1189cd7ee8acSart uobjpage = pages[lcv]; 1190cd7ee8acSart continue; 1191cd7ee8acSart } 1192cd7ee8acSart 11935797ad06Smpi if (pmap_extract(ufi->orig_map->pmap, currva, &pa)) 11940528dcd0Smpi continue; 1195cd7ee8acSart 11960528dcd0Smpi /* 11970528dcd0Smpi * calling pgo_get with PGO_LOCKED returns us pages which 11980528dcd0Smpi * are neither busy nor released, so we don't need to check 11990528dcd0Smpi * for this. we can just directly enter the pages. 12000528dcd0Smpi */ 12015797ad06Smpi if (pages[lcv]->wire_count == 0) { 1202cd7ee8acSart uvm_lock_pageq(); 12035797ad06Smpi uvm_pageactivate(pages[lcv]); 1204cd7ee8acSart uvm_unlock_pageq(); 12055797ad06Smpi } 1206627a59d1Smpi counters_inc(uvmexp_counters, flt_nomap); 1207cac1bff1Sart 12088a233859Smpi /* No fault-ahead when wired. */ 12098a233859Smpi KASSERT(flt->wired == FALSE); 12108a233859Smpi 121165f111fbSart /* 12120528dcd0Smpi * Since this page isn't the page that's actually faulting, 12130528dcd0Smpi * ignore pmap_enter() failures; it's not critical that we 121465f111fbSart * enter these right now. 12150528dcd0Smpi * NOTE: page can't be PG_WANTED or PG_RELEASED because we've 12160528dcd0Smpi * held the lock the whole time we've had the handle. 121765f111fbSart */ 12183053940aSmpi (void) pmap_enter(ufi->orig_map->pmap, currva, 12193053940aSmpi VM_PAGE_TO_PHYS(pages[lcv]) | flt->pa_flags, 12208a233859Smpi flt->enter_prot & MASK(ufi->entry), PMAP_CANFAIL); 12215797ad06Smpi entered++; 1222cd7ee8acSart 1223a3afc610Smpi } 12245797ad06Smpi if (entered > 0) 12253053940aSmpi pmap_update(ufi->orig_map->pmap); 1226a3afc610Smpi 1227a3afc610Smpi return uobjpage; 1228a3afc610Smpi } 1229a3afc610Smpi 1230a3afc610Smpi /* 1231a3afc610Smpi * uvm_fault_lower: handle lower fault. 1232a3afc610Smpi * 1233cce913b9Smpi * 1. check uobj 1234cce913b9Smpi * 1.1. if null, ZFOD. 1235cce913b9Smpi * 1.2. if not null, look up unnmapped neighbor pages. 1236cce913b9Smpi * 2. for center page, check if promote. 1237cce913b9Smpi * 2.1. ZFOD always needs promotion. 1238cce913b9Smpi * 2.2. other uobjs, when entry is marked COW (usually MAP_PRIVATE vnode). 1239cce913b9Smpi * 3. if uobj is not ZFOD and page is not found, do i/o. 1240cce913b9Smpi * 4. dispatch either direct / promote fault. 1241a3afc610Smpi */ 1242a3afc610Smpi int 1243a3afc610Smpi uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 124458243cbfSmpi struct vm_page **pages) 1245a3afc610Smpi { 1246a3afc610Smpi struct vm_amap *amap = ufi->entry->aref.ar_amap; 1247a3afc610Smpi struct uvm_object *uobj = ufi->entry->object.uvm_obj; 1248cce913b9Smpi int dropswap = 0; 1249a3afc610Smpi struct vm_page *uobjpage, *pg = NULL; 1250a3afc610Smpi struct vm_anon *anon = NULL; 1251cce913b9Smpi int error; 1252a3afc610Smpi 1253a3afc610Smpi /* 1254a3afc610Smpi * now, if the desired page is not shadowed by the amap and we have 1255a3afc610Smpi * a backing object that does not have a special fault routine, then 1256a3afc610Smpi * we ask (with pgo_get) the object for resident pages that we care 1257a3afc610Smpi * about and attempt to map them in. we do not let pgo_get block 1258a3afc610Smpi * (PGO_LOCKED). 1259a3afc610Smpi */ 1260a3afc610Smpi if (uobj == NULL) { 126152887a38Smpi /* zero fill; don't care neighbor pages */ 1262cd7ee8acSart uobjpage = NULL; 1263a3afc610Smpi } else { 1264a3afc610Smpi uobjpage = uvm_fault_lower_lookup(ufi, flt, pages); 1265cd7ee8acSart } 1266cd7ee8acSart 1267cd7ee8acSart /* 1268cd7ee8acSart * note that at this point we are done with any front or back pages. 1269cd7ee8acSart * we are now going to focus on the center page (i.e. the one we've 1270427225b6Smpi * faulted on). if we have faulted on the bottom (uobj) 1271cd7ee8acSart * layer [i.e. case 2] and the page was both present and available, 1272cd7ee8acSart * then we've got a pointer to it as "uobjpage" and we've already 1273cd7ee8acSart * made it BUSY. 1274cd7ee8acSart */ 1275cd7ee8acSart 1276cd7ee8acSart /* 127769c04514Smpi * locked: 127869c04514Smpi */ 127969c04514Smpi KASSERT(amap == NULL || 128069c04514Smpi rw_write_held(amap->am_lock)); 128169c04514Smpi KASSERT(uobj == NULL || 1282552563d5Smpi rw_status(uobj->vmobjlock) == flt->lower_lock_type); 128369c04514Smpi 128469c04514Smpi /* 1285cd7ee8acSart * note that uobjpage can not be PGO_DONTCARE at this point. we now 1286cd7ee8acSart * set uobjpage to PGO_DONTCARE if we are doing a zero fill. if we 1287cd7ee8acSart * have a backing object, check and see if we are going to promote 1288cd7ee8acSart * the data up to an anon during the fault. 1289cd7ee8acSart */ 1290cd7ee8acSart if (uobj == NULL) { 1291cd7ee8acSart uobjpage = PGO_DONTCARE; 1292cce913b9Smpi flt->promote = TRUE; /* always need anon here */ 1293cd7ee8acSart } else { 12941496ff33Sart KASSERT(uobjpage != PGO_DONTCARE); 1295cce913b9Smpi flt->promote = (flt->access_type & PROT_WRITE) && 12963053940aSmpi UVM_ET_ISCOPYONWRITE(ufi->entry); 1297cd7ee8acSart } 1298cd7ee8acSart 1299cd7ee8acSart /* 1300cd7ee8acSart * if uobjpage is not null then we do not need to do I/O to get the 1301cd7ee8acSart * uobjpage. 1302cd7ee8acSart * 1303b8a635f6Stedu * if uobjpage is null, then we need to ask the pager to 1304cd7ee8acSart * get the data for us. once we have the data, we need to reverify 1305cd7ee8acSart * the state the world. we are currently not holding any resources. 1306cd7ee8acSart */ 1307cd7ee8acSart if (uobjpage) { 1308cd7ee8acSart /* update rusage counters */ 13098f15e6a4Sguenther curproc->p_ru.ru_minflt++; 13100528dcd0Smpi if (uobjpage != PGO_DONTCARE) { 13110528dcd0Smpi uvm_lock_pageq(); 13120528dcd0Smpi uvm_pageactivate(uobjpage); 13130528dcd0Smpi uvm_unlock_pageq(); 13140528dcd0Smpi } 1315cd7ee8acSart } else { 1316cce913b9Smpi error = uvm_fault_lower_io(ufi, flt, &uobj, &uobjpage); 1317cce913b9Smpi if (error != 0) 1318cce913b9Smpi return error; 1319cd7ee8acSart } 1320cd7ee8acSart 1321cd7ee8acSart /* 1322cd7ee8acSart * notes: 1323cd7ee8acSart * - at this point uobjpage can not be NULL 1324cd7ee8acSart * - at this point uobjpage could be PG_WANTED (handle later) 1325cd7ee8acSart */ 1326cce913b9Smpi if (flt->promote == FALSE) { 1327cd7ee8acSart /* 1328cd7ee8acSart * we are not promoting. if the mapping is COW ensure that we 1329cd7ee8acSart * don't give more access than we should (e.g. when doing a read 1330cd7ee8acSart * fault on a COPYONWRITE mapping we want to map the COW page in 1331cd7ee8acSart * R/O even though the entry protection could be R/W). 1332cd7ee8acSart * 1333cd7ee8acSart * set "pg" to the page we want to map in (uobjpage, usually) 1334cd7ee8acSart */ 1335627a59d1Smpi counters_inc(uvmexp_counters, flt_obj); 13363053940aSmpi if (UVM_ET_ISCOPYONWRITE(ufi->entry)) 13373053940aSmpi flt->enter_prot &= ~PROT_WRITE; 1338cd7ee8acSart pg = uobjpage; /* map in the actual object */ 1339cd7ee8acSart 1340cd7ee8acSart /* assert(uobjpage != PGO_DONTCARE) */ 1341cd7ee8acSart 1342cd7ee8acSart /* 13436f909936Svisa * we are faulting directly on the page. 1344cd7ee8acSart */ 1345cd7ee8acSart } else { 1346552563d5Smpi KASSERT(amap != NULL); 1347552563d5Smpi 1348552563d5Smpi /* promoting requires a write lock. */ 1349552563d5Smpi KASSERT(rw_write_held(amap->am_lock)); 1350552563d5Smpi KASSERT(uobj == NULL || 1351552563d5Smpi rw_status(uobj->vmobjlock) == flt->lower_lock_type); 1352552563d5Smpi 1353cd7ee8acSart /* 1354cd7ee8acSart * if we are going to promote the data to an anon we 1355cd7ee8acSart * allocate a blank anon here and plug it into our amap. 1356cd7ee8acSart */ 1357dceff774Smpi error = uvmfault_promote(ufi, uobjpage, &anon, &pg); 1358dceff774Smpi if (error) 1359dceff774Smpi return error; 1360cd7ee8acSart 136152887a38Smpi /* 136252887a38Smpi * fill in the data 136352887a38Smpi */ 1364cd7ee8acSart if (uobjpage != PGO_DONTCARE) { 1365627a59d1Smpi counters_inc(uvmexp_counters, flt_prcopy); 1366cd7ee8acSart 1367cd7ee8acSart /* 1368cd7ee8acSart * promote to shared amap? make sure all sharing 1369cd7ee8acSart * procs see it 1370cd7ee8acSart */ 1371cd7ee8acSart if ((amap_flags(amap) & AMAP_SHARED) != 0) { 13721e8cdc2eSderaadt pmap_page_protect(uobjpage, PROT_NONE); 1373cd7ee8acSart } 1374b3774972Sguenther #if defined(MULTIPROCESSOR) && !defined(__HAVE_PMAP_MPSAFE_ENTER_COW) 1375b3774972Sguenther /* 1376b3774972Sguenther * Otherwise: 1377b3774972Sguenther * If there are multiple threads, either uvm or the 1378b3774972Sguenther * pmap has to make sure no threads see the old RO 1379b3774972Sguenther * mapping once any have seen the new RW mapping. 1380b3774972Sguenther * uvm does it here by forcing it to PROT_NONE before 1381b3774972Sguenther * inserting the new mapping. 1382b3774972Sguenther */ 1383b3774972Sguenther else if (P_HASSIBLING(curproc)) { 1384b3774972Sguenther pmap_page_protect(uobjpage, PROT_NONE); 1385b3774972Sguenther } 1386b3774972Sguenther #endif 138707c549d8Smpi /* done with copied uobjpage. */ 138869c04514Smpi rw_exit(uobj->vmobjlock); 1389cd7ee8acSart uobj = NULL; 1390cd7ee8acSart } else { 1391627a59d1Smpi counters_inc(uvmexp_counters, flt_przero); 1392b1990b04Sart /* 1393dceff774Smpi * Page is zero'd and marked dirty by uvm_pagealloc(), 1394dceff774Smpi * called in uvmfault_promote() above. 1395b1990b04Sart */ 1396cd7ee8acSart } 1397cd7ee8acSart 13983053940aSmpi if (amap_add(&ufi->entry->aref, 13993053940aSmpi ufi->orig_rvaddr - ufi->entry->start, anon, 0)) { 1400b16b5f31Smpi if (pg->pg_flags & PG_WANTED) 1401b16b5f31Smpi wakeup(pg); 1402b16b5f31Smpi 1403b16b5f31Smpi atomic_clearbits_int(&pg->pg_flags, 1404b16b5f31Smpi PG_BUSY|PG_FAKE|PG_WANTED); 1405b16b5f31Smpi UVM_PAGE_OWN(pg, NULL); 140669c04514Smpi uvmfault_unlockall(ufi, amap, uobj); 14074bfd0d76Sstefan uvm_anfree(anon); 1408627a59d1Smpi counters_inc(uvmexp_counters, flt_noamap); 14094bfd0d76Sstefan 1410afd3b31eSmpi if (uvm_swapisfull()) 14114bfd0d76Sstefan return (ENOMEM); 14124bfd0d76Sstefan 14133053940aSmpi amap_populate(&ufi->entry->aref, 14143053940aSmpi ufi->orig_rvaddr - ufi->entry->start); 14153053940aSmpi return ERESTART; 14164bfd0d76Sstefan } 1417cd7ee8acSart } 1418cd7ee8acSart 1419552563d5Smpi /* 1420552563d5Smpi * anon must be write locked (promotion). uobj can be either. 1421552563d5Smpi * 1422552563d5Smpi * Note: pg is either the uobjpage or the new page in the new anon. 1423552563d5Smpi */ 1424552563d5Smpi KASSERT(amap == NULL || 1425552563d5Smpi rw_write_held(amap->am_lock)); 1426552563d5Smpi KASSERT(uobj == NULL || 1427552563d5Smpi rw_status(uobj->vmobjlock) == flt->lower_lock_type); 1428552563d5Smpi KASSERT(anon == NULL || anon->an_lock == amap->am_lock); 1429552563d5Smpi 1430cd7ee8acSart /* 1431cd7ee8acSart * all resources are present. we can now map it in and free our 1432cd7ee8acSart * resources. 1433cd7ee8acSart */ 14343053940aSmpi if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr, 14353053940aSmpi VM_PAGE_TO_PHYS(pg) | flt->pa_flags, flt->enter_prot, 1436b004aefeSmpi flt->access_type | PMAP_CANFAIL | (flt->wired ? PMAP_WIRED : 0)) != 0) { 143765f111fbSart /* 143865f111fbSart * No need to undo what we did; we can simply think of 143965f111fbSart * this as the pmap throwing away the mapping information. 144065f111fbSart * 144165f111fbSart * We do, however, have to go through the ReFault path, 144265f111fbSart * as the map may change while we're asleep. 144365f111fbSart */ 14449662fca4Sart if (pg->pg_flags & PG_WANTED) 1445b8a635f6Stedu wakeup(pg); 144665f111fbSart 144765d6360cSart atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE|PG_WANTED); 144865f111fbSart UVM_PAGE_OWN(pg, NULL); 14493053940aSmpi uvmfault_unlockall(ufi, amap, uobj); 1450afd3b31eSmpi if (uvm_swapisfull()) { 145165f111fbSart /* XXX instrumentation */ 1452159b0ca6Sart return (ENOMEM); 145365f111fbSart } 1454679d40f1Skettenis #ifdef __HAVE_PMAP_POPULATE 1455679d40f1Skettenis pmap_populate(ufi->orig_map->pmap, ufi->orig_rvaddr); 1456679d40f1Skettenis #else 145765f111fbSart /* XXX instrumentation */ 145865f111fbSart uvm_wait("flt_pmfail2"); 1459679d40f1Skettenis #endif 14603053940aSmpi return ERESTART; 146165f111fbSart } 1462cd7ee8acSart 146369c04514Smpi uvm_lock_pageq(); 146496ec8e93Smpi if (flt->wired) { 1465cd7ee8acSart uvm_pagewire(pg); 146665d6360cSart if (pg->pg_flags & PQ_AOBJ) { 14678a42ed70Sart /* 14688a42ed70Sart * since the now-wired page cannot be paged out, 14698a42ed70Sart * release its swap resources for others to use. 147069c04514Smpi * since an aobj page with no swap cannot be clean, 147169c04514Smpi * mark it dirty now. 147269c04514Smpi * 147369c04514Smpi * use pg->uobject here. if the page is from a 147469c04514Smpi * tmpfs vnode, the pages are backed by its UAO and 147569c04514Smpi * not the vnode. 14768a42ed70Sart */ 147769c04514Smpi KASSERT(uobj != NULL); 147869c04514Smpi KASSERT(uobj->vmobjlock == pg->uobject->vmobjlock); 147965d6360cSart atomic_clearbits_int(&pg->pg_flags, PG_CLEAN); 148096ec8e93Smpi dropswap = 1; 14818a42ed70Sart } 14828a42ed70Sart } else { 1483cd7ee8acSart uvm_pageactivate(pg); 1484e5ad67b7Smpi } 148596ec8e93Smpi uvm_unlock_pageq(); 148696ec8e93Smpi 148796ec8e93Smpi if (dropswap) 148896ec8e93Smpi uao_dropswap(uobj, pg->offset >> PAGE_SHIFT); 1489cd7ee8acSart 14909662fca4Sart if (pg->pg_flags & PG_WANTED) 1491b8a635f6Stedu wakeup(pg); 1492cd7ee8acSart 149365d6360cSart atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE|PG_WANTED); 1494cd7ee8acSart UVM_PAGE_OWN(pg, NULL); 14953053940aSmpi uvmfault_unlockall(ufi, amap, uobj); 14963053940aSmpi pmap_update(ufi->orig_map->pmap); 1497cd7ee8acSart 149873c19439Sart return (0); 1499cd7ee8acSart } 1500cd7ee8acSart 1501cce913b9Smpi /* 1502cce913b9Smpi * uvm_fault_lower_io: get lower page from backing store. 1503cce913b9Smpi * 1504cce913b9Smpi * 1. unlock everything, because i/o will block. 1505cce913b9Smpi * 2. call pgo_get. 1506cce913b9Smpi * 3. if failed, recover. 1507cce913b9Smpi * 4. if succeeded, relock everything and verify things. 1508cce913b9Smpi */ 1509cce913b9Smpi int 1510cce913b9Smpi uvm_fault_lower_io( 1511cce913b9Smpi struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, 1512cce913b9Smpi struct uvm_object **ruobj, struct vm_page **ruobjpage) 1513cce913b9Smpi { 1514cce913b9Smpi struct vm_amap * const amap = ufi->entry->aref.ar_amap; 1515cce913b9Smpi struct uvm_object *uobj = *ruobj; 1516cce913b9Smpi struct vm_page *pg; 1517cce913b9Smpi boolean_t locked; 15188774a958Smpi int gotpages, advice; 1519cce913b9Smpi int result; 1520cce913b9Smpi voff_t uoff; 15218774a958Smpi vm_prot_t access_type; 15228774a958Smpi 15238774a958Smpi /* grab everything we need from the entry before we unlock */ 15248774a958Smpi uoff = (ufi->orig_rvaddr - ufi->entry->start) + ufi->entry->offset; 15258774a958Smpi access_type = flt->access_type & MASK(ufi->entry); 15268774a958Smpi advice = ufi->entry->advice; 15278774a958Smpi 15288774a958Smpi uvmfault_unlockall(ufi, amap, NULL); 1529cce913b9Smpi 1530cce913b9Smpi /* update rusage counters */ 1531cce913b9Smpi curproc->p_ru.ru_majflt++; 1532cce913b9Smpi 15338774a958Smpi KASSERT(rw_write_held(uobj->vmobjlock)); 1534cce913b9Smpi 1535cce913b9Smpi counters_inc(uvmexp_counters, flt_get); 1536cce913b9Smpi gotpages = 1; 1537cce913b9Smpi pg = NULL; 1538cce913b9Smpi result = uobj->pgops->pgo_get(uobj, uoff, &pg, &gotpages, 15398774a958Smpi 0, access_type, advice, PGO_SYNCIO); 1540cce913b9Smpi 1541cce913b9Smpi /* 1542cce913b9Smpi * recover from I/O 1543cce913b9Smpi */ 1544cce913b9Smpi if (result != VM_PAGER_OK) { 1545cce913b9Smpi KASSERT(result != VM_PAGER_PEND); 1546cce913b9Smpi 1547cce913b9Smpi if (result == VM_PAGER_AGAIN) { 1548cce913b9Smpi tsleep_nsec(&nowake, PVM, "fltagain2", MSEC_TO_NSEC(5)); 1549cce913b9Smpi return ERESTART; 1550cce913b9Smpi } 1551cce913b9Smpi 1552cce913b9Smpi if (!UVM_ET_ISNOFAULT(ufi->entry)) 1553cce913b9Smpi return (EIO); 1554cce913b9Smpi 1555cce913b9Smpi pg = PGO_DONTCARE; 1556cce913b9Smpi uobj = NULL; 1557cce913b9Smpi flt->promote = TRUE; 1558cce913b9Smpi } 1559cce913b9Smpi 1560cce913b9Smpi /* re-verify the state of the world. */ 1561cce913b9Smpi locked = uvmfault_relock(ufi); 1562cce913b9Smpi if (locked && amap != NULL) 1563335383c9Smpi amap_lock(amap, RW_WRITE); 1564cce913b9Smpi 1565cce913b9Smpi /* might be changed */ 1566cce913b9Smpi if (pg != PGO_DONTCARE) { 1567cce913b9Smpi uobj = pg->uobject; 1568552563d5Smpi rw_enter(uobj->vmobjlock, flt->lower_lock_type); 1569552563d5Smpi KASSERT((pg->pg_flags & PG_BUSY) != 0); 1570552563d5Smpi KASSERT(flt->lower_lock_type == RW_WRITE); 1571cce913b9Smpi } 1572cce913b9Smpi 1573cce913b9Smpi /* 1574cce913b9Smpi * Re-verify that amap slot is still free. if there is 1575cce913b9Smpi * a problem, we clean up. 1576cce913b9Smpi */ 1577cce913b9Smpi if (locked && amap && amap_lookup(&ufi->entry->aref, 1578cce913b9Smpi ufi->orig_rvaddr - ufi->entry->start)) { 1579cce913b9Smpi if (locked) 1580cce913b9Smpi uvmfault_unlockall(ufi, amap, NULL); 1581cce913b9Smpi locked = FALSE; 1582cce913b9Smpi } 1583cce913b9Smpi 15840528dcd0Smpi /* release the page now, still holding object lock */ 15850528dcd0Smpi if (pg != PGO_DONTCARE) { 1586cce913b9Smpi uvm_lock_pageq(); 1587cce913b9Smpi uvm_pageactivate(pg); 1588cce913b9Smpi uvm_unlock_pageq(); 1589cce913b9Smpi 1590cce913b9Smpi if (pg->pg_flags & PG_WANTED) 1591cce913b9Smpi wakeup(pg); 1592cce913b9Smpi atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_WANTED); 1593cce913b9Smpi UVM_PAGE_OWN(pg, NULL); 1594cce913b9Smpi } 1595cce913b9Smpi 1596cce913b9Smpi if (locked == FALSE) { 1597cce913b9Smpi if (pg != PGO_DONTCARE) 1598cce913b9Smpi rw_exit(uobj->vmobjlock); 1599cce913b9Smpi return ERESTART; 1600cce913b9Smpi } 1601cce913b9Smpi 1602cce913b9Smpi /* 16030528dcd0Smpi * we have the data in pg. we are holding object lock (so the page 16040528dcd0Smpi * can't be released on us). 1605cce913b9Smpi */ 1606cce913b9Smpi *ruobj = uobj; 1607cce913b9Smpi *ruobjpage = pg; 1608cce913b9Smpi return 0; 1609cce913b9Smpi } 1610cd7ee8acSart 1611cd7ee8acSart /* 1612cd7ee8acSart * uvm_fault_wire: wire down a range of virtual addresses in a map. 1613cd7ee8acSart * 161407b6088bSart * => map may be read-locked by caller, but MUST NOT be write-locked. 161507b6088bSart * => if map is read-locked, any operations which may cause map to 161607b6088bSart * be write-locked in uvm_fault() must be taken care of by 161707b6088bSart * the caller. See uvm_map_pageable(). 1618cd7ee8acSart */ 1619cd7ee8acSart int 16202023d591Soga uvm_fault_wire(vm_map_t map, vaddr_t start, vaddr_t end, vm_prot_t access_type) 1621cd7ee8acSart { 1622cd7ee8acSart vaddr_t va; 16231414b0faSart int rv; 16241414b0faSart 1625cd7ee8acSart /* 162628fbabcfSart * now fault it in a page at a time. if the fault fails then we have 16271e8cdc2eSderaadt * to undo what we have done. note that in uvm_fault PROT_NONE 162828fbabcfSart * is replaced with the max protection if fault_type is VM_FAULT_WIRE. 1629cd7ee8acSart */ 1630cd7ee8acSart for (va = start ; va < end ; va += PAGE_SIZE) { 16311414b0faSart rv = uvm_fault(map, va, VM_FAULT_WIRE, access_type); 16321414b0faSart if (rv) { 1633cd7ee8acSart if (va != start) { 16347cb53682Sart uvm_fault_unwire(map, start, va); 1635cd7ee8acSart } 16361414b0faSart return (rv); 1637cd7ee8acSart } 1638cd7ee8acSart } 16391414b0faSart 164073c19439Sart return (0); 1641cd7ee8acSart } 1642cd7ee8acSart 1643cd7ee8acSart /* 1644cd7ee8acSart * uvm_fault_unwire(): unwire range of virtual space. 1645cd7ee8acSart */ 1646cd7ee8acSart void 16472023d591Soga uvm_fault_unwire(vm_map_t map, vaddr_t start, vaddr_t end) 1648cd7ee8acSart { 164907b6088bSart 165007b6088bSart vm_map_lock_read(map); 165107b6088bSart uvm_fault_unwire_locked(map, start, end); 165207b6088bSart vm_map_unlock_read(map); 165307b6088bSart } 165407b6088bSart 165507b6088bSart /* 165607b6088bSart * uvm_fault_unwire_locked(): the guts of uvm_fault_unwire(). 165707b6088bSart * 165807b6088bSart * => map must be at least read-locked. 165907b6088bSart */ 166007b6088bSart void 16612023d591Soga uvm_fault_unwire_locked(vm_map_t map, vaddr_t start, vaddr_t end) 166207b6088bSart { 166369c04514Smpi vm_map_entry_t entry, oentry = NULL, next; 16647cb53682Sart pmap_t pmap = vm_map_pmap(map); 1665cd7ee8acSart vaddr_t va; 1666cd7ee8acSart paddr_t pa; 1667cd7ee8acSart struct vm_page *pg; 1668cd7ee8acSart 16691496ff33Sart KASSERT((map->flags & VM_MAP_INTRSAFE) == 0); 167055490b01Smpi vm_map_assert_anylock(map); 16717cb53682Sart 1672cd7ee8acSart /* 1673cd7ee8acSart * we assume that the area we are unwiring has actually been wired 1674cd7ee8acSart * in the first place. this means that we should be able to extract 167569c04514Smpi * the PAs from the pmap. 1676cd7ee8acSart */ 167752887a38Smpi 167852887a38Smpi /* 167952887a38Smpi * find the beginning map entry for the region. 168052887a38Smpi */ 1681cac1bff1Sart KASSERT(start >= vm_map_min(map) && end <= vm_map_max(map)); 168207b6088bSart if (uvm_map_lookup_entry(map, start, &entry) == FALSE) 168307b6088bSart panic("uvm_fault_unwire_locked: address not in map"); 168407b6088bSart 1685cd7ee8acSart for (va = start; va < end ; va += PAGE_SIZE) { 168652887a38Smpi /* 168752887a38Smpi * find the map entry for the current address. 168852887a38Smpi */ 1689cac1bff1Sart KASSERT(va >= entry->start); 1690f77c8782Skettenis while (entry && va >= entry->end) { 1691415d6aa0Sdlg next = RBT_NEXT(uvm_map_addr, entry); 1692181c6205Sariane entry = next; 169307b6088bSart } 169407b6088bSart 1695f77c8782Skettenis if (entry == NULL) 1696f77c8782Skettenis return; 1697f77c8782Skettenis if (va < entry->start) 1698f77c8782Skettenis continue; 1699f77c8782Skettenis 170052887a38Smpi /* 170169c04514Smpi * lock it. 170269c04514Smpi */ 170369c04514Smpi if (entry != oentry) { 170469c04514Smpi if (oentry != NULL) { 170569c04514Smpi uvm_map_unlock_entry(oentry); 170669c04514Smpi } 170769c04514Smpi uvm_map_lock_entry(entry); 170869c04514Smpi oentry = entry; 170969c04514Smpi } 171069c04514Smpi 17110535051cSmpi if (!pmap_extract(pmap, va, &pa)) 17120535051cSmpi continue; 17130535051cSmpi 171469c04514Smpi /* 171552887a38Smpi * if the entry is no longer wired, tell the pmap. 171652887a38Smpi */ 171707b6088bSart if (VM_MAPENT_ISWIRED(entry) == 0) 1718ebb3c897Sart pmap_unwire(pmap, va); 1719fb33f38cSniklas 1720cd7ee8acSart pg = PHYS_TO_VM_PAGE(pa); 172169c04514Smpi if (pg) { 172269c04514Smpi uvm_lock_pageq(); 1723cd7ee8acSart uvm_pageunwire(pg); 172469c04514Smpi uvm_unlock_pageq(); 172569c04514Smpi } 1726cd7ee8acSart } 1727cd7ee8acSart 172869c04514Smpi if (oentry != NULL) { 1729f77c8782Skettenis uvm_map_unlock_entry(oentry); 173069c04514Smpi } 1731cd7ee8acSart } 1732ca5c6958Soga 1733ca5c6958Soga /* 1734ca5c6958Soga * uvmfault_unlockmaps: unlock the maps 1735ca5c6958Soga */ 1736ca5c6958Soga void 1737ca5c6958Soga uvmfault_unlockmaps(struct uvm_faultinfo *ufi, boolean_t write_locked) 1738ca5c6958Soga { 1739ca5c6958Soga /* 1740ca5c6958Soga * ufi can be NULL when this isn't really a fault, 1741ca5c6958Soga * but merely paging in anon data. 1742ca5c6958Soga */ 1743ca5c6958Soga if (ufi == NULL) { 1744ca5c6958Soga return; 1745ca5c6958Soga } 1746ca5c6958Soga 17470372dd1aSariane uvmfault_update_stats(ufi); 1748ca5c6958Soga if (write_locked) { 1749ca5c6958Soga vm_map_unlock(ufi->map); 1750ca5c6958Soga } else { 1751ca5c6958Soga vm_map_unlock_read(ufi->map); 1752ca5c6958Soga } 1753ca5c6958Soga } 1754ca5c6958Soga 1755ca5c6958Soga /* 1756ca5c6958Soga * uvmfault_unlockall: unlock everything passed in. 1757ca5c6958Soga * 1758ca5c6958Soga * => maps must be read-locked (not write-locked). 1759ca5c6958Soga */ 1760ca5c6958Soga void 1761ca5c6958Soga uvmfault_unlockall(struct uvm_faultinfo *ufi, struct vm_amap *amap, 1762ec3489eeSmpi struct uvm_object *uobj) 1763ca5c6958Soga { 176469c04514Smpi if (uobj) 176569c04514Smpi rw_exit(uobj->vmobjlock); 176619dcab73Smpi if (amap != NULL) 176719dcab73Smpi amap_unlock(amap); 1768ca5c6958Soga uvmfault_unlockmaps(ufi, FALSE); 1769ca5c6958Soga } 1770ca5c6958Soga 1771ca5c6958Soga /* 1772ca5c6958Soga * uvmfault_lookup: lookup a virtual address in a map 1773ca5c6958Soga * 1774ca5c6958Soga * => caller must provide a uvm_faultinfo structure with the IN 1775ca5c6958Soga * params properly filled in 1776ca5c6958Soga * => we will lookup the map entry (handling submaps) as we go 1777ca5c6958Soga * => if the lookup is a success we will return with the maps locked 1778ca5c6958Soga * => if "write_lock" is TRUE, we write_lock the map, otherwise we only 1779ca5c6958Soga * get a read lock. 1780ca5c6958Soga * => note that submaps can only appear in the kernel and they are 1781ca5c6958Soga * required to use the same virtual addresses as the map they 1782ca5c6958Soga * are referenced by (thus address translation between the main 1783ca5c6958Soga * map and the submap is unnecessary). 1784ca5c6958Soga */ 1785ca5c6958Soga 1786ca5c6958Soga boolean_t 1787ca5c6958Soga uvmfault_lookup(struct uvm_faultinfo *ufi, boolean_t write_lock) 1788ca5c6958Soga { 1789ca5c6958Soga vm_map_t tmpmap; 1790ca5c6958Soga 179152887a38Smpi /* 179252887a38Smpi * init ufi values for lookup. 179352887a38Smpi */ 1794ca5c6958Soga ufi->map = ufi->orig_map; 1795ca5c6958Soga ufi->size = ufi->orig_size; 1796ca5c6958Soga 1797ca5c6958Soga /* 1798ca5c6958Soga * keep going down levels until we are done. note that there can 1799ca5c6958Soga * only be two levels so we won't loop very long. 1800ca5c6958Soga */ 1801ca5c6958Soga while (1) { 1802181c6205Sariane if (ufi->orig_rvaddr < ufi->map->min_offset || 1803181c6205Sariane ufi->orig_rvaddr >= ufi->map->max_offset) 1804b9df1565Smpi return FALSE; 180597581e8aSariane 180635164244Stedu /* lock map */ 1807ca5c6958Soga if (write_lock) { 1808ca5c6958Soga vm_map_lock(ufi->map); 1809ca5c6958Soga } else { 1810ca5c6958Soga vm_map_lock_read(ufi->map); 1811ca5c6958Soga } 1812ca5c6958Soga 181335164244Stedu /* lookup */ 1814ca5c6958Soga if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr, 1815ca5c6958Soga &ufi->entry)) { 1816ca5c6958Soga uvmfault_unlockmaps(ufi, write_lock); 1817b9df1565Smpi return FALSE; 1818ca5c6958Soga } 1819ca5c6958Soga 182035164244Stedu /* reduce size if necessary */ 1821ca5c6958Soga if (ufi->entry->end - ufi->orig_rvaddr < ufi->size) 1822ca5c6958Soga ufi->size = ufi->entry->end - ufi->orig_rvaddr; 1823ca5c6958Soga 1824ca5c6958Soga /* 1825ca5c6958Soga * submap? replace map with the submap and lookup again. 1826ca5c6958Soga * note: VAs in submaps must match VAs in main map. 1827ca5c6958Soga */ 1828ca5c6958Soga if (UVM_ET_ISSUBMAP(ufi->entry)) { 1829ca5c6958Soga tmpmap = ufi->entry->object.sub_map; 18306bc7ce64Soga uvmfault_unlockmaps(ufi, write_lock); 1831ca5c6958Soga ufi->map = tmpmap; 1832ca5c6958Soga continue; 1833ca5c6958Soga } 1834ca5c6958Soga 183552887a38Smpi /* 183652887a38Smpi * got it! 183752887a38Smpi */ 1838ca5c6958Soga ufi->mapv = ufi->map->timestamp; 1839b9df1565Smpi return TRUE; 1840ca5c6958Soga 184152887a38Smpi } /* while loop */ 184252887a38Smpi 1843ca5c6958Soga /*NOTREACHED*/ 1844ca5c6958Soga } 1845ca5c6958Soga 1846ca5c6958Soga /* 1847ca5c6958Soga * uvmfault_relock: attempt to relock the same version of the map 1848ca5c6958Soga * 1849ca5c6958Soga * => fault data structures should be unlocked before calling. 1850ca5c6958Soga * => if a success (TRUE) maps will be locked after call. 1851ca5c6958Soga */ 1852ca5c6958Soga boolean_t 1853ca5c6958Soga uvmfault_relock(struct uvm_faultinfo *ufi) 1854ca5c6958Soga { 1855ca5c6958Soga /* 1856ca5c6958Soga * ufi can be NULL when this isn't really a fault, 1857ca5c6958Soga * but merely paging in anon data. 1858ca5c6958Soga */ 1859ca5c6958Soga if (ufi == NULL) { 1860ca5c6958Soga return TRUE; 1861ca5c6958Soga } 1862ca5c6958Soga 1863627a59d1Smpi counters_inc(uvmexp_counters, flt_relck); 1864ca5c6958Soga 1865ca5c6958Soga /* 1866ca5c6958Soga * relock map. fail if version mismatch (in which case nothing 1867ca5c6958Soga * gets locked). 1868ca5c6958Soga */ 1869ca5c6958Soga vm_map_lock_read(ufi->map); 1870ca5c6958Soga if (ufi->mapv != ufi->map->timestamp) { 1871ca5c6958Soga vm_map_unlock_read(ufi->map); 1872b9df1565Smpi return FALSE; 1873ca5c6958Soga } 1874ca5c6958Soga 1875627a59d1Smpi counters_inc(uvmexp_counters, flt_relckok); 1876b9df1565Smpi return TRUE; /* got it! */ 1877ca5c6958Soga } 1878