1*0b4f1452Smpi /* $OpenBSD: uvm_aobj.c,v 1.115 2024/12/27 12:04:40 mpi Exp $ */ 21414b0faSart /* $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $ */ 3cd7ee8acSart 4cd7ee8acSart /* 5cd7ee8acSart * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and 6cd7ee8acSart * Washington University. 7cd7ee8acSart * All rights reserved. 8cd7ee8acSart * 9cd7ee8acSart * Redistribution and use in source and binary forms, with or without 10cd7ee8acSart * modification, are permitted provided that the following conditions 11cd7ee8acSart * are met: 12cd7ee8acSart * 1. Redistributions of source code must retain the above copyright 13cd7ee8acSart * notice, this list of conditions and the following disclaimer. 14cd7ee8acSart * 2. Redistributions in binary form must reproduce the above copyright 15cd7ee8acSart * notice, this list of conditions and the following disclaimer in the 16cd7ee8acSart * documentation and/or other materials provided with the distribution. 17cd7ee8acSart * 18cd7ee8acSart * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19cd7ee8acSart * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20cd7ee8acSart * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21cd7ee8acSart * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22cd7ee8acSart * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23cd7ee8acSart * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24cd7ee8acSart * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25cd7ee8acSart * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26cd7ee8acSart * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27cd7ee8acSart * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28cd7ee8acSart * 29cd7ee8acSart * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp 30cd7ee8acSart */ 31cd7ee8acSart /* 32cd7ee8acSart * uvm_aobj.c: anonymous memory uvm_object pager 33cd7ee8acSart * 34cd7ee8acSart * author: Chuck Silvers <chuq@chuq.com> 35cd7ee8acSart * started: Jan-1998 36cd7ee8acSart * 37cd7ee8acSart * - design mostly from Chuck Cranor 38cd7ee8acSart */ 39cd7ee8acSart 40cd7ee8acSart #include <sys/param.h> 41cd7ee8acSart #include <sys/systm.h> 42cd7ee8acSart #include <sys/malloc.h> 43fd628a11Sart #include <sys/kernel.h> 44cd7ee8acSart #include <sys/pool.h> 451731322cSespie #include <sys/stdint.h> 4603d1830dStedu #include <sys/atomic.h> 47cd7ee8acSart 48cd7ee8acSart #include <uvm/uvm.h> 49cd7ee8acSart 50cd7ee8acSart /* 5152887a38Smpi * An anonymous UVM object (aobj) manages anonymous-memory. In addition to 5252887a38Smpi * keeping the list of resident pages, it may also keep a list of allocated 5352887a38Smpi * swap blocks. Depending on the size of the object, this list is either 5452887a38Smpi * stored in an array (small objects) or in a hash table (large objects). 55cd7ee8acSart */ 56cd7ee8acSart 57cd7ee8acSart /* 5852887a38Smpi * Note: for hash tables, we break the address space of the aobj into blocks 5952887a38Smpi * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two. 60cd7ee8acSart */ 61cd7ee8acSart #define UAO_SWHASH_CLUSTER_SHIFT 4 62cd7ee8acSart #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT) 63cd7ee8acSart 6452887a38Smpi /* Get the "tag" for this page index. */ 6539c73ac7Smpi #define UAO_SWHASH_ELT_TAG(idx) ((idx) >> UAO_SWHASH_CLUSTER_SHIFT) 6639c73ac7Smpi #define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \ 6739c73ac7Smpi ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1)) 68cd7ee8acSart 6952887a38Smpi /* Given an ELT and a page index, find the swap slot. */ 7039c73ac7Smpi #define UAO_SWHASH_ELT_PAGESLOT(elt, idx) \ 7139c73ac7Smpi ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)]) 72cd7ee8acSart 7352887a38Smpi /* Given an ELT, return its pageidx base. */ 7439c73ac7Smpi #define UAO_SWHASH_ELT_PAGEIDX_BASE(elt) \ 7539c73ac7Smpi ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT) 76cd7ee8acSart 7752887a38Smpi /* The hash function. */ 7839c73ac7Smpi #define UAO_SWHASH_HASH(aobj, idx) \ 7939c73ac7Smpi (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \ 8039c73ac7Smpi & (aobj)->u_swhashmask)]) 81cd7ee8acSart 82cd7ee8acSart /* 8352887a38Smpi * The threshold which determines whether we will use an array or a 84cd7ee8acSart * hash table to store the list of allocated swap blocks. 85cd7ee8acSart */ 86cd7ee8acSart #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4) 8739c73ac7Smpi #define UAO_USES_SWHASH(aobj) \ 8839c73ac7Smpi ((aobj)->u_pages > UAO_SWHASH_THRESHOLD) 89cd7ee8acSart 9052887a38Smpi /* The number of buckets in a hash, with an upper bound. */ 91cd7ee8acSart #define UAO_SWHASH_MAXBUCKETS 256 921731322cSespie #define UAO_SWHASH_BUCKETS(pages) \ 931731322cSespie (min((pages) >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS)) 94cd7ee8acSart 95cd7ee8acSart 96cd7ee8acSart /* 97cd7ee8acSart * uao_swhash_elt: when a hash table is being used, this structure defines 98cd7ee8acSart * the format of an entry in the bucket list. 99cd7ee8acSart */ 100cd7ee8acSart struct uao_swhash_elt { 101cd7ee8acSart LIST_ENTRY(uao_swhash_elt) list; /* the hash list */ 10240cf655dSart voff_t tag; /* our 'tag' */ 103cd7ee8acSart int count; /* our number of active slots */ 104cd7ee8acSart int slots[UAO_SWHASH_CLUSTER_SIZE]; /* the slots */ 105cd7ee8acSart }; 106cd7ee8acSart 107cd7ee8acSart /* 108cd7ee8acSart * uao_swhash: the swap hash table structure 109cd7ee8acSart */ 110cd7ee8acSart LIST_HEAD(uao_swhash, uao_swhash_elt); 111cd7ee8acSart 112cd7ee8acSart /* 113cd7ee8acSart * uao_swhash_elt_pool: pool of uao_swhash_elt structures 114cd7ee8acSart */ 115cd7ee8acSart struct pool uao_swhash_elt_pool; 116cd7ee8acSart 117cd7ee8acSart /* 118cd7ee8acSart * uvm_aobj: the actual anon-backed uvm_object 119cd7ee8acSart * 120cd7ee8acSart * => the uvm_object is at the top of the structure, this allows 1210b0fe1a1Soga * (struct uvm_aobj *) == (struct uvm_object *) 122cd7ee8acSart * => only one of u_swslots and u_swhash is used in any given aobj 123cd7ee8acSart */ 124cd7ee8acSart struct uvm_aobj { 125b8a635f6Stedu struct uvm_object u_obj; /* has: pgops, memt, #pages, #refs */ 126cd7ee8acSart int u_pages; /* number of pages in entire object */ 127cd7ee8acSart int u_flags; /* the flags (see uvm_aobj.h) */ 128cd7ee8acSart /* 129139fed43Soga * Either an array or hashtable (array of bucket heads) of 130139fed43Soga * offset -> swapslot mappings for the aobj. 131cd7ee8acSart */ 132139fed43Soga #define u_swslots u_swap.slot_array 133139fed43Soga #define u_swhash u_swap.slot_hash 134139fed43Soga union swslots { 135139fed43Soga int *slot_array; 136139fed43Soga struct uao_swhash *slot_hash; 137139fed43Soga } u_swap; 138cd7ee8acSart u_long u_swhashmask; /* mask for hashtable */ 139cd7ee8acSart LIST_ENTRY(uvm_aobj) u_list; /* global list of aobjs */ 140cd7ee8acSart }; 141cd7ee8acSart 142cd7ee8acSart struct pool uvm_aobj_pool; 143cd7ee8acSart 1441e3e475dSoga static struct uao_swhash_elt *uao_find_swhash_elt(struct uvm_aobj *, int, 1452023d591Soga boolean_t); 1461e3e475dSoga static boolean_t uao_flush(struct uvm_object *, voff_t, 1471e3e475dSoga voff_t, int); 1481e3e475dSoga static void uao_free(struct uvm_aobj *); 1491e3e475dSoga static int uao_get(struct uvm_object *, voff_t, 1501e3e475dSoga vm_page_t *, int *, int, vm_prot_t, 1511e3e475dSoga int, int); 1521e3e475dSoga static boolean_t uao_pagein(struct uvm_aobj *, int, int); 1531e3e475dSoga static boolean_t uao_pagein_page(struct uvm_aobj *, int); 154cd7ee8acSart 1551731322cSespie void uao_dropswap_range(struct uvm_object *, voff_t, voff_t); 1561731322cSespie void uao_shrink_flush(struct uvm_object *, int, int); 1571731322cSespie int uao_shrink_hash(struct uvm_object *, int); 1581731322cSespie int uao_shrink_array(struct uvm_object *, int); 1591731322cSespie int uao_shrink_convert(struct uvm_object *, int); 1601731322cSespie 1611731322cSespie int uao_grow_hash(struct uvm_object *, int); 1621731322cSespie int uao_grow_array(struct uvm_object *, int); 1631731322cSespie int uao_grow_convert(struct uvm_object *, int); 1641731322cSespie 165cd7ee8acSart /* 166cd7ee8acSart * aobj_pager 167cd7ee8acSart * 168cd7ee8acSart * note that some functions (e.g. put) are handled elsewhere 169cd7ee8acSart */ 1709f7b7ef0Smpi const struct uvm_pagerops aobj_pager = { 1719f7b7ef0Smpi .pgo_reference = uao_reference, 1729f7b7ef0Smpi .pgo_detach = uao_detach, 1739f7b7ef0Smpi .pgo_flush = uao_flush, 1749f7b7ef0Smpi .pgo_get = uao_get, 175cd7ee8acSart }; 176cd7ee8acSart 177cd7ee8acSart /* 178cd7ee8acSart * uao_list: global list of active aobjs, locked by uao_list_lock 17969ba976bSoga * 18069ba976bSoga * Lock ordering: generally the locking order is object lock, then list lock. 18169ba976bSoga * in the case of swap off we have to iterate over the list, and thus the 18269ba976bSoga * ordering is reversed. In that case we must use trylocking to prevent 18369ba976bSoga * deadlock. 184cd7ee8acSart */ 185bd69ae14Soga static LIST_HEAD(aobjlist, uvm_aobj) uao_list = LIST_HEAD_INITIALIZER(uao_list); 18669c04514Smpi static struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_MPFLOOR); 187cd7ee8acSart 188cd7ee8acSart 189cd7ee8acSart /* 190cd7ee8acSart * functions 191cd7ee8acSart */ 192cd7ee8acSart /* 193cd7ee8acSart * hash table/array related functions 194cd7ee8acSart */ 195cd7ee8acSart /* 196cd7ee8acSart * uao_find_swhash_elt: find (or create) a hash table entry for a page 197cd7ee8acSart * offset. 198cd7ee8acSart */ 1991e3e475dSoga static struct uao_swhash_elt * 2002023d591Soga uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, boolean_t create) 201cd7ee8acSart { 202cd7ee8acSart struct uao_swhash *swhash; 203cd7ee8acSart struct uao_swhash_elt *elt; 20440cf655dSart voff_t page_tag; 205cd7ee8acSart 2061414b0faSart swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */ 2071414b0faSart page_tag = UAO_SWHASH_ELT_TAG(pageidx); /* tag to search for */ 208cd7ee8acSart 20952887a38Smpi /* 21052887a38Smpi * now search the bucket for the requested tag 21152887a38Smpi */ 212fd628a11Sart LIST_FOREACH(elt, swhash, list) { 2131414b0faSart if (elt->tag == page_tag) 214b9df1565Smpi return elt; 215cd7ee8acSart } 2161414b0faSart 2171414b0faSart if (!create) 218cd7ee8acSart return NULL; 2191414b0faSart 22052887a38Smpi /* 22152887a38Smpi * allocate a new entry for the bucket and init/insert it in 22252887a38Smpi */ 223e7d50abdSkettenis elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT | PR_ZERO); 224e7d50abdSkettenis /* 225e7d50abdSkettenis * XXX We cannot sleep here as the hash table might disappear 226e7d50abdSkettenis * from under our feet. And we run the risk of deadlocking 227e7d50abdSkettenis * the pagedeamon. In fact this code will only be called by 228e7d50abdSkettenis * the pagedaemon and allocation will only fail if we 229e7d50abdSkettenis * exhausted the pagedeamon reserve. In that case we're 230e7d50abdSkettenis * doomed anyway, so panic. 231e7d50abdSkettenis */ 232e7d50abdSkettenis if (elt == NULL) 233e7d50abdSkettenis panic("%s: can't allocate entry", __func__); 234cd7ee8acSart LIST_INSERT_HEAD(swhash, elt, list); 235cd7ee8acSart elt->tag = page_tag; 2361414b0faSart 237b9df1565Smpi return elt; 238cd7ee8acSart } 239cd7ee8acSart 240cd7ee8acSart /* 241cd7ee8acSart * uao_find_swslot: find the swap slot number for an aobj/pageidx 242cd7ee8acSart */ 243a375eb79Smpi int 24457296fa7Smpi uao_find_swslot(struct uvm_object *uobj, int pageidx) 245cd7ee8acSart { 24657296fa7Smpi struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 24757296fa7Smpi 24857296fa7Smpi KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 249cd7ee8acSart 25052887a38Smpi /* 25152887a38Smpi * if noswap flag is set, then we never return a slot 25252887a38Smpi */ 253cd7ee8acSart if (aobj->u_flags & UAO_FLAG_NOSWAP) 254b9df1565Smpi return 0; 255cd7ee8acSart 25652887a38Smpi /* 25752887a38Smpi * if hashing, look in hash table. 25852887a38Smpi */ 25939c73ac7Smpi if (UAO_USES_SWHASH(aobj)) { 260cd7ee8acSart struct uao_swhash_elt *elt = 261cd7ee8acSart uao_find_swhash_elt(aobj, pageidx, FALSE); 262cd7ee8acSart 263cd7ee8acSart if (elt) 264b9df1565Smpi return UAO_SWHASH_ELT_PAGESLOT(elt, pageidx); 265cd7ee8acSart else 266b9df1565Smpi return 0; 267cd7ee8acSart } 268cd7ee8acSart 26952887a38Smpi /* 27052887a38Smpi * otherwise, look in the array 27152887a38Smpi */ 272b9df1565Smpi return aobj->u_swslots[pageidx]; 273cd7ee8acSart } 274cd7ee8acSart 275cd7ee8acSart /* 276cd7ee8acSart * uao_set_swslot: set the swap slot for a page in an aobj. 277cd7ee8acSart * 278cd7ee8acSart * => setting a slot to zero frees the slot 27969c04514Smpi * => object must be locked by caller 28052887a38Smpi * => we return the old slot number, or -1 if we failed to allocate 28152887a38Smpi * memory to record the new slot number 282cd7ee8acSart */ 283cd7ee8acSart int 2842023d591Soga uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot) 285cd7ee8acSart { 286cd7ee8acSart struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 287cd7ee8acSart int oldslot; 288cd7ee8acSart 28969c04514Smpi KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0); 29057296fa7Smpi KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 291ac25e10fSmpi 29252887a38Smpi /* 29352887a38Smpi * if noswap flag is set, then we can't set a slot 29452887a38Smpi */ 295cd7ee8acSart if (aobj->u_flags & UAO_FLAG_NOSWAP) { 296cd7ee8acSart if (slot == 0) 297b9df1565Smpi return 0; /* a clear is ok */ 298cd7ee8acSart 299cd7ee8acSart /* but a set is not */ 300cd7ee8acSart printf("uao_set_swslot: uobj = %p\n", uobj); 30152887a38Smpi panic("uao_set_swslot: attempt to set a slot on a NOSWAP object"); 302cd7ee8acSart } 303cd7ee8acSart 30452887a38Smpi /* 30552887a38Smpi * are we using a hash table? if so, add it in the hash. 30652887a38Smpi */ 30739c73ac7Smpi if (UAO_USES_SWHASH(aobj)) { 308cd7ee8acSart /* 309cd7ee8acSart * Avoid allocating an entry just to free it again if 310cd7ee8acSart * the page had not swap slot in the first place, and 311cd7ee8acSart * we are freeing. 312cd7ee8acSart */ 3131414b0faSart struct uao_swhash_elt *elt = 3141414b0faSart uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE); 315cd7ee8acSart if (elt == NULL) { 3161414b0faSart KASSERT(slot == 0); 317b9df1565Smpi return 0; 318cd7ee8acSart } 319cd7ee8acSart 320cd7ee8acSart oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx); 321cd7ee8acSart UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot; 322cd7ee8acSart 323cd7ee8acSart /* 324cd7ee8acSart * now adjust the elt's reference counter and free it if we've 325cd7ee8acSart * dropped it to zero. 326cd7ee8acSart */ 327cd7ee8acSart if (slot) { 328cd7ee8acSart if (oldslot == 0) 329cd7ee8acSart elt->count++; 33052887a38Smpi } else { 33152887a38Smpi if (oldslot) 332cd7ee8acSart elt->count--; 333cd7ee8acSart 334cd7ee8acSart if (elt->count == 0) { 335cd7ee8acSart LIST_REMOVE(elt, list); 336cd7ee8acSart pool_put(&uao_swhash_elt_pool, elt); 337cd7ee8acSart } 338cd7ee8acSart } 339cd7ee8acSart } else { 340cd7ee8acSart /* we are using an array */ 341cd7ee8acSart oldslot = aobj->u_swslots[pageidx]; 342cd7ee8acSart aobj->u_swslots[pageidx] = slot; 343cd7ee8acSart } 344b9df1565Smpi return oldslot; 345cd7ee8acSart } 346cd7ee8acSart /* 347cd7ee8acSart * end of hash/array functions 348cd7ee8acSart */ 349cd7ee8acSart 350cd7ee8acSart /* 351cd7ee8acSart * uao_free: free all resources held by an aobj, and then free the aobj 352cd7ee8acSart * 353cd7ee8acSart * => the aobj should be dead 354cd7ee8acSart */ 3551e3e475dSoga static void 3562023d591Soga uao_free(struct uvm_aobj *aobj) 357cd7ee8acSart { 3584b17baa2Smpi struct uvm_object *uobj = &aobj->u_obj; 3594b17baa2Smpi 36057296fa7Smpi KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 36169c04514Smpi KASSERT(rw_write_held(uobj->vmobjlock)); 3624b17baa2Smpi uao_dropswap_range(uobj, 0, 0); 36369c04514Smpi rw_exit(uobj->vmobjlock); 364cd7ee8acSart 36539c73ac7Smpi if (UAO_USES_SWHASH(aobj)) { 366cd7ee8acSart /* 3674b17baa2Smpi * free the hash table itself. 368cd7ee8acSart */ 3696540f983Stedu hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); 370cd7ee8acSart } else { 371cd500754Sdhill free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); 372cd7ee8acSart } 373cd7ee8acSart 37452887a38Smpi /* 37552887a38Smpi * finally free the aobj itself 37652887a38Smpi */ 3772c850ee8Smpi uvm_obj_destroy(uobj); 378cd7ee8acSart pool_put(&uvm_aobj_pool, aobj); 379cd7ee8acSart } 380cd7ee8acSart 381cd7ee8acSart /* 382cd7ee8acSart * pager functions 383cd7ee8acSart */ 384cd7ee8acSart 385a4b88e66Smpi #ifdef TMPFS 386cd7ee8acSart /* 3871731322cSespie * Shrink an aobj to a given number of pages. The procedure is always the same: 3881731322cSespie * assess the necessity of data structure conversion (hash to array), secure 3891731322cSespie * resources, flush pages and drop swap slots. 3901731322cSespie * 3911731322cSespie */ 3921731322cSespie 3931731322cSespie void 3941731322cSespie uao_shrink_flush(struct uvm_object *uobj, int startpg, int endpg) 3951731322cSespie { 3961731322cSespie KASSERT(startpg < endpg); 3971731322cSespie KASSERT(uobj->uo_refs == 1); 39836d5d901Skettenis uao_flush(uobj, (voff_t)startpg << PAGE_SHIFT, 39936d5d901Skettenis (voff_t)endpg << PAGE_SHIFT, PGO_FREE); 4001731322cSespie uao_dropswap_range(uobj, startpg, endpg); 4011731322cSespie } 4021731322cSespie 4031731322cSespie int 4041731322cSespie uao_shrink_hash(struct uvm_object *uobj, int pages) 4051731322cSespie { 4061731322cSespie struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 4071731322cSespie struct uao_swhash *new_swhash; 408810f6bbdSkettenis struct uao_swhash_elt *elt; 4091731322cSespie unsigned long new_hashmask; 4101731322cSespie int i; 4111731322cSespie 41239c73ac7Smpi KASSERT(UAO_USES_SWHASH(aobj)); 4131731322cSespie 4141731322cSespie /* 4151731322cSespie * If the size of the hash table doesn't change, all we need to do is 4161731322cSespie * to adjust the page count. 4171731322cSespie */ 4181731322cSespie if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) { 4194f7816f6Skettenis uao_shrink_flush(uobj, pages, aobj->u_pages); 4201731322cSespie aobj->u_pages = pages; 4211731322cSespie return 0; 4221731322cSespie } 4231731322cSespie 4241731322cSespie new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, 4251731322cSespie M_WAITOK | M_CANFAIL, &new_hashmask); 4261731322cSespie if (new_swhash == NULL) 4271731322cSespie return ENOMEM; 4281731322cSespie 4291731322cSespie uao_shrink_flush(uobj, pages, aobj->u_pages); 4301731322cSespie 4311731322cSespie /* 4321731322cSespie * Even though the hash table size is changing, the hash of the buckets 4331731322cSespie * we are interested in copying should not change. 4341731322cSespie */ 435810f6bbdSkettenis for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) { 436810f6bbdSkettenis while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) { 437810f6bbdSkettenis elt = LIST_FIRST(&aobj->u_swhash[i]); 438810f6bbdSkettenis LIST_REMOVE(elt, list); 439810f6bbdSkettenis LIST_INSERT_HEAD(&new_swhash[i], elt, list); 440810f6bbdSkettenis } 441810f6bbdSkettenis } 4421731322cSespie 4436540f983Stedu hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); 4441731322cSespie 4451731322cSespie aobj->u_swhash = new_swhash; 4461731322cSespie aobj->u_pages = pages; 4471731322cSespie aobj->u_swhashmask = new_hashmask; 4481731322cSespie 4491731322cSespie return 0; 4501731322cSespie } 4511731322cSespie 4521731322cSespie int 4531731322cSespie uao_shrink_convert(struct uvm_object *uobj, int pages) 4541731322cSespie { 4551731322cSespie struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 4561731322cSespie struct uao_swhash_elt *elt; 4571731322cSespie int i, *new_swslots; 4581731322cSespie 459540e394aSdoug new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ, 4601731322cSespie M_WAITOK | M_CANFAIL | M_ZERO); 4611731322cSespie if (new_swslots == NULL) 4621731322cSespie return ENOMEM; 4631731322cSespie 4641731322cSespie uao_shrink_flush(uobj, pages, aobj->u_pages); 4651731322cSespie 46635164244Stedu /* Convert swap slots from hash to array. */ 4671731322cSespie for (i = 0; i < pages; i++) { 4681731322cSespie elt = uao_find_swhash_elt(aobj, i, FALSE); 4691731322cSespie if (elt != NULL) { 4701731322cSespie new_swslots[i] = UAO_SWHASH_ELT_PAGESLOT(elt, i); 4711731322cSespie if (new_swslots[i] != 0) 4721731322cSespie elt->count--; 4731731322cSespie if (elt->count == 0) { 4741731322cSespie LIST_REMOVE(elt, list); 4751731322cSespie pool_put(&uao_swhash_elt_pool, elt); 4761731322cSespie } 4771731322cSespie } 4781731322cSespie } 4791731322cSespie 4806540f983Stedu hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); 4811731322cSespie 4821731322cSespie aobj->u_swslots = new_swslots; 4831731322cSespie aobj->u_pages = pages; 4841731322cSespie 4851731322cSespie return 0; 4861731322cSespie } 4871731322cSespie 4881731322cSespie int 4891731322cSespie uao_shrink_array(struct uvm_object *uobj, int pages) 4901731322cSespie { 4911731322cSespie struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 4921731322cSespie int i, *new_swslots; 4931731322cSespie 494540e394aSdoug new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ, 4951731322cSespie M_WAITOK | M_CANFAIL | M_ZERO); 4961731322cSespie if (new_swslots == NULL) 4971731322cSespie return ENOMEM; 4981731322cSespie 4991731322cSespie uao_shrink_flush(uobj, pages, aobj->u_pages); 5001731322cSespie 5011731322cSespie for (i = 0; i < pages; i++) 5021731322cSespie new_swslots[i] = aobj->u_swslots[i]; 5031731322cSespie 504cd500754Sdhill free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); 5051731322cSespie 5061731322cSespie aobj->u_swslots = new_swslots; 5071731322cSespie aobj->u_pages = pages; 5081731322cSespie 5091731322cSespie return 0; 5101731322cSespie } 5111731322cSespie 5121731322cSespie int 5131731322cSespie uao_shrink(struct uvm_object *uobj, int pages) 5141731322cSespie { 5151731322cSespie struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 5161731322cSespie 5171731322cSespie KASSERT(pages < aobj->u_pages); 5181731322cSespie 5191731322cSespie /* 5201731322cSespie * Distinguish between three possible cases: 5211731322cSespie * 1. aobj uses hash and must be converted to array. 5221731322cSespie * 2. aobj uses array and array size needs to be adjusted. 5231731322cSespie * 3. aobj uses hash and hash size needs to be adjusted. 5241731322cSespie */ 5251731322cSespie if (pages > UAO_SWHASH_THRESHOLD) 5261731322cSespie return uao_shrink_hash(uobj, pages); /* case 3 */ 5271731322cSespie else if (aobj->u_pages > UAO_SWHASH_THRESHOLD) 5281731322cSespie return uao_shrink_convert(uobj, pages); /* case 1 */ 5291731322cSespie else 5301731322cSespie return uao_shrink_array(uobj, pages); /* case 2 */ 5311731322cSespie } 5321731322cSespie 5331731322cSespie /* 5341731322cSespie * Grow an aobj to a given number of pages. Right now we only adjust the swap 5351731322cSespie * slots. We could additionally handle page allocation directly, so that they 5361731322cSespie * don't happen through uvm_fault(). That would allow us to use another 5371731322cSespie * mechanism for the swap slots other than malloc(). It is thus mandatory that 5381731322cSespie * the caller of these functions does not allow faults to happen in case of 5391731322cSespie * growth error. 5401731322cSespie */ 5411731322cSespie int 5421731322cSespie uao_grow_array(struct uvm_object *uobj, int pages) 5431731322cSespie { 5441731322cSespie struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 5451731322cSespie int i, *new_swslots; 5461731322cSespie 5471731322cSespie KASSERT(aobj->u_pages <= UAO_SWHASH_THRESHOLD); 5481731322cSespie 549540e394aSdoug new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ, 5501731322cSespie M_WAITOK | M_CANFAIL | M_ZERO); 5511731322cSespie if (new_swslots == NULL) 5521731322cSespie return ENOMEM; 5531731322cSespie 5541731322cSespie for (i = 0; i < aobj->u_pages; i++) 5551731322cSespie new_swslots[i] = aobj->u_swslots[i]; 5561731322cSespie 557cd500754Sdhill free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); 5581731322cSespie 5591731322cSespie aobj->u_swslots = new_swslots; 5601731322cSespie aobj->u_pages = pages; 5611731322cSespie 5621731322cSespie return 0; 5631731322cSespie } 5641731322cSespie 5651731322cSespie int 5661731322cSespie uao_grow_hash(struct uvm_object *uobj, int pages) 5671731322cSespie { 5681731322cSespie struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 5691731322cSespie struct uao_swhash *new_swhash; 5701731322cSespie struct uao_swhash_elt *elt; 5711731322cSespie unsigned long new_hashmask; 5721731322cSespie int i; 5731731322cSespie 5741731322cSespie KASSERT(pages > UAO_SWHASH_THRESHOLD); 5751731322cSespie 5761731322cSespie /* 5771731322cSespie * If the size of the hash table doesn't change, all we need to do is 5781731322cSespie * to adjust the page count. 5791731322cSespie */ 5801731322cSespie if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) { 5811731322cSespie aobj->u_pages = pages; 5821731322cSespie return 0; 5831731322cSespie } 5841731322cSespie 5851731322cSespie KASSERT(UAO_SWHASH_BUCKETS(aobj->u_pages) < UAO_SWHASH_BUCKETS(pages)); 5861731322cSespie 5871731322cSespie new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, 5881731322cSespie M_WAITOK | M_CANFAIL, &new_hashmask); 5891731322cSespie if (new_swhash == NULL) 5901731322cSespie return ENOMEM; 5911731322cSespie 5921731322cSespie for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) { 5931731322cSespie while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) { 5941731322cSespie elt = LIST_FIRST(&aobj->u_swhash[i]); 5951731322cSespie LIST_REMOVE(elt, list); 5961731322cSespie LIST_INSERT_HEAD(&new_swhash[i], elt, list); 5971731322cSespie } 5981731322cSespie } 5991731322cSespie 6006540f983Stedu hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ); 6011731322cSespie 6021731322cSespie aobj->u_swhash = new_swhash; 6031731322cSespie aobj->u_pages = pages; 6041731322cSespie aobj->u_swhashmask = new_hashmask; 6051731322cSespie 6061731322cSespie return 0; 6071731322cSespie } 6081731322cSespie 6091731322cSespie int 6101731322cSespie uao_grow_convert(struct uvm_object *uobj, int pages) 6111731322cSespie { 6121731322cSespie struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 6131731322cSespie struct uao_swhash *new_swhash; 6141731322cSespie struct uao_swhash_elt *elt; 6151731322cSespie unsigned long new_hashmask; 6161731322cSespie int i, *old_swslots; 6171731322cSespie 6181731322cSespie new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ, 6191731322cSespie M_WAITOK | M_CANFAIL, &new_hashmask); 6201731322cSespie if (new_swhash == NULL) 6211731322cSespie return ENOMEM; 6221731322cSespie 62335164244Stedu /* Set these now, so we can use uao_find_swhash_elt(). */ 6241731322cSespie old_swslots = aobj->u_swslots; 6251731322cSespie aobj->u_swhash = new_swhash; 6261731322cSespie aobj->u_swhashmask = new_hashmask; 6271731322cSespie 6281731322cSespie for (i = 0; i < aobj->u_pages; i++) { 6291731322cSespie if (old_swslots[i] != 0) { 6301731322cSespie elt = uao_find_swhash_elt(aobj, i, TRUE); 6311731322cSespie elt->count++; 6321731322cSespie UAO_SWHASH_ELT_PAGESLOT(elt, i) = old_swslots[i]; 6331731322cSespie } 6341731322cSespie } 6351731322cSespie 636cd500754Sdhill free(old_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int)); 6371731322cSespie aobj->u_pages = pages; 6381731322cSespie 6391731322cSespie return 0; 6401731322cSespie } 6411731322cSespie 6421731322cSespie int 6431731322cSespie uao_grow(struct uvm_object *uobj, int pages) 6441731322cSespie { 6451731322cSespie struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 6461731322cSespie 6471731322cSespie KASSERT(pages > aobj->u_pages); 6481731322cSespie 6491731322cSespie /* 6501731322cSespie * Distinguish between three possible cases: 6511731322cSespie * 1. aobj uses hash and hash size needs to be adjusted. 6521731322cSespie * 2. aobj uses array and array size needs to be adjusted. 6531731322cSespie * 3. aobj uses array and must be converted to hash. 6541731322cSespie */ 6551731322cSespie if (pages <= UAO_SWHASH_THRESHOLD) 6561731322cSespie return uao_grow_array(uobj, pages); /* case 2 */ 6571731322cSespie else if (aobj->u_pages > UAO_SWHASH_THRESHOLD) 6581731322cSespie return uao_grow_hash(uobj, pages); /* case 1 */ 6591731322cSespie else 6601731322cSespie return uao_grow_convert(uobj, pages); 6611731322cSespie } 662a4b88e66Smpi #endif /* TMPFS */ 6631731322cSespie 6641731322cSespie /* 665cd7ee8acSart * uao_create: create an aobj of the given size and return its uvm_object. 666cd7ee8acSart * 6671731322cSespie * => for normal use, flags are zero or UAO_FLAG_CANFAIL. 668cd7ee8acSart * => for the kernel object, the flags are: 669cd7ee8acSart * UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once) 670cd7ee8acSart * UAO_FLAG_KERNSWAP - enable swapping of kernel object (" ") 671cd7ee8acSart */ 672cd7ee8acSart struct uvm_object * 6732023d591Soga uao_create(vsize_t size, int flags) 674cd7ee8acSart { 67552887a38Smpi static struct uvm_aobj kernel_object_store; 67669c04514Smpi static struct rwlock bootstrap_kernel_object_lock; 67752887a38Smpi static int kobj_alloced = 0; 678cd7ee8acSart int pages = round_page(size) >> PAGE_SHIFT; 679cd7ee8acSart struct uvm_aobj *aobj; 6806894b7cfSmpi int refs; 681cd7ee8acSart 68252887a38Smpi /* 68352887a38Smpi * Allocate a new aobj, unless kernel object is requested. 68452887a38Smpi */ 68552887a38Smpi if (flags & UAO_FLAG_KERNOBJ) { 6866894b7cfSmpi KASSERT(!kobj_alloced); 687cd7ee8acSart aobj = &kernel_object_store; 688cd7ee8acSart aobj->u_pages = pages; 68952887a38Smpi aobj->u_flags = UAO_FLAG_NOSWAP; 6906894b7cfSmpi refs = UVM_OBJ_KERN; 691cd7ee8acSart kobj_alloced = UAO_FLAG_KERNOBJ; 692cd7ee8acSart } else if (flags & UAO_FLAG_KERNSWAP) { 6936894b7cfSmpi KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ); 694cd7ee8acSart aobj = &kernel_object_store; 695cd7ee8acSart kobj_alloced = UAO_FLAG_KERNSWAP; 69652887a38Smpi } else { 697cd7ee8acSart aobj = pool_get(&uvm_aobj_pool, PR_WAITOK); 698cd7ee8acSart aobj->u_pages = pages; 69952887a38Smpi aobj->u_flags = 0; 70052887a38Smpi refs = 1; 701cd7ee8acSart } 702cd7ee8acSart 70352887a38Smpi /* 70452887a38Smpi * allocate hash/array if necessary 70552887a38Smpi */ 7061731322cSespie if (flags == 0 || (flags & (UAO_FLAG_KERNSWAP | UAO_FLAG_CANFAIL))) { 7076894b7cfSmpi int mflags; 7086894b7cfSmpi 7091731322cSespie if (flags) 7101731322cSespie mflags = M_NOWAIT; 7111731322cSespie else 7121731322cSespie mflags = M_WAITOK; 713cd7ee8acSart 714cd7ee8acSart /* allocate hash table or array depending on object size */ 71539c73ac7Smpi if (UAO_USES_SWHASH(aobj)) { 7161731322cSespie aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), 717cd7ee8acSart M_UVMAOBJ, mflags, &aobj->u_swhashmask); 7181731322cSespie if (aobj->u_swhash == NULL) { 7191731322cSespie if (flags & UAO_FLAG_CANFAIL) { 7201731322cSespie pool_put(&uvm_aobj_pool, aobj); 721b9df1565Smpi return NULL; 7221731322cSespie } 723cd7ee8acSart panic("uao_create: hashinit swhash failed"); 7241731322cSespie } 725cd7ee8acSart } else { 726540e394aSdoug aobj->u_swslots = mallocarray(pages, sizeof(int), 72728a8f404Sart M_UVMAOBJ, mflags|M_ZERO); 7281731322cSespie if (aobj->u_swslots == NULL) { 7291731322cSespie if (flags & UAO_FLAG_CANFAIL) { 7301731322cSespie pool_put(&uvm_aobj_pool, aobj); 731b9df1565Smpi return NULL; 7321731322cSespie } 733cd7ee8acSart panic("uao_create: malloc swslots failed"); 734cd7ee8acSart } 7351731322cSespie } 736cd7ee8acSart 7371731322cSespie if (flags & UAO_FLAG_KERNSWAP) { 738cd7ee8acSart aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */ 739b9df1565Smpi return &aobj->u_obj; 740cd7ee8acSart /* done! */ 741cd7ee8acSart } 742cd7ee8acSart } 743cd7ee8acSart 74452887a38Smpi /* 74552887a38Smpi * Initialise UVM object. 74652887a38Smpi */ 747da3d0110Smpi uvm_obj_init(&aobj->u_obj, &aobj_pager, refs); 74869c04514Smpi if (flags & UAO_FLAG_KERNOBJ) { 74969c04514Smpi /* Use a temporary static lock for kernel_object. */ 75069c04514Smpi rw_init(&bootstrap_kernel_object_lock, "kobjlk"); 75169c04514Smpi uvm_obj_setlock(&aobj->u_obj, &bootstrap_kernel_object_lock); 75269c04514Smpi } 753cd7ee8acSart 75452887a38Smpi /* 75552887a38Smpi * now that aobj is ready, add it to the global list 75652887a38Smpi */ 757bd69ae14Soga mtx_enter(&uao_list_lock); 758cd7ee8acSart LIST_INSERT_HEAD(&uao_list, aobj, u_list); 759bd69ae14Soga mtx_leave(&uao_list_lock); 760cd7ee8acSart 761b9df1565Smpi return &aobj->u_obj; 762cd7ee8acSart } 763cd7ee8acSart 764cd7ee8acSart 765cd7ee8acSart 766cd7ee8acSart /* 767cd7ee8acSart * uao_init: set up aobj pager subsystem 768cd7ee8acSart * 769cd7ee8acSart * => called at boot time from uvm_pager_init() 770cd7ee8acSart */ 77128fbabcfSart void 7722023d591Soga uao_init(void) 773cd7ee8acSart { 774cd7ee8acSart /* 7750b0fe1a1Soga * NOTE: Pages for this pool must not come from a pageable 776cd7ee8acSart * kernel map! 777cd7ee8acSart */ 7781378bae2Sdlg pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0, 7791378bae2Sdlg IPL_NONE, PR_WAITOK, "uaoeltpl", NULL); 7801378bae2Sdlg pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0, 7811378bae2Sdlg IPL_NONE, PR_WAITOK, "aobjpl", NULL); 782cd7ee8acSart } 783cd7ee8acSart 784cd7ee8acSart /* 78552887a38Smpi * uao_reference: hold a reference to an anonymous UVM object. 786cd7ee8acSart */ 787cd7ee8acSart void 7882023d591Soga uao_reference(struct uvm_object *uobj) 789cd7ee8acSart { 79052887a38Smpi /* Kernel object is persistent. */ 7917cb53682Sart if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 792cd7ee8acSart return; 793cd7ee8acSart 7941af74310Smpi atomic_inc_int(&uobj->uo_refs); 795cd7ee8acSart } 796cd7ee8acSart 79728fbabcfSart 798cd7ee8acSart /* 79952887a38Smpi * uao_detach: drop a reference to an anonymous UVM object. 800cd7ee8acSart */ 801cd7ee8acSart void 8022023d591Soga uao_detach(struct uvm_object *uobj) 803cd7ee8acSart { 804cd7ee8acSart struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 8050b0fe1a1Soga struct vm_page *pg; 806cd7ee8acSart 80752887a38Smpi /* 80852887a38Smpi * Detaching from kernel_object is a NOP. 80952887a38Smpi */ 8101af74310Smpi if (UVM_OBJ_IS_KERN_OBJECT(uobj)) 811cd7ee8acSart return; 812cd7ee8acSart 81352887a38Smpi /* 81452887a38Smpi * Drop the reference. If it was the last one, destroy the object. 81552887a38Smpi */ 8161af74310Smpi if (atomic_dec_int_nv(&uobj->uo_refs) > 0) { 817cd7ee8acSart return; 818cd7ee8acSart } 819cd7ee8acSart 82052887a38Smpi /* 82152887a38Smpi * Remove the aobj from the global list. 82252887a38Smpi */ 823bd69ae14Soga mtx_enter(&uao_list_lock); 824cd7ee8acSart LIST_REMOVE(aobj, u_list); 825bd69ae14Soga mtx_leave(&uao_list_lock); 826cd7ee8acSart 827cd7ee8acSart /* 82852887a38Smpi * Free all the pages left in the aobj. For each page, when the 82952887a38Smpi * page is no longer busy (and thus after any disk I/O that it is 83052887a38Smpi * involved in is complete), release any swap resources and free 83152887a38Smpi * the page itself. 832cd7ee8acSart */ 83369c04514Smpi rw_enter(uobj->vmobjlock, RW_WRITE); 834262a556aSdlg while ((pg = RBT_ROOT(uvm_objtree, &uobj->memt)) != NULL) { 83569c04514Smpi pmap_page_protect(pg, PROT_NONE); 8369662fca4Sart if (pg->pg_flags & PG_BUSY) { 8375b4619eaSmpi uvm_pagewait(pg, uobj->vmobjlock, "uao_det"); 8385b4619eaSmpi rw_enter(uobj->vmobjlock, RW_WRITE); 839cd7ee8acSart continue; 840cd7ee8acSart } 8418a42ed70Sart uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT); 84269c04514Smpi uvm_lock_pageq(); 843cd7ee8acSart uvm_pagefree(pg); 844cd7ee8acSart uvm_unlock_pageq(); 84569c04514Smpi } 846cd7ee8acSart 84752887a38Smpi /* 84852887a38Smpi * Finally, free the anonymous UVM object itself. 84952887a38Smpi */ 850cd7ee8acSart uao_free(aobj); 851cd7ee8acSart } 852cd7ee8acSart 853cd7ee8acSart /* 85452887a38Smpi * uao_flush: flush pages out of a uvm object 855e920f2c9Ssmart * 856b8a635f6Stedu * => if PGO_CLEANIT is not set, then we will not block. 857e920f2c9Ssmart * => if PGO_ALLPAGE is set, then all pages in the object are valid targets 858e920f2c9Ssmart * for flushing. 859e920f2c9Ssmart * => NOTE: we are allowed to lock the page queues, so the caller 860e920f2c9Ssmart * must not be holding the lock on them [e.g. pagedaemon had 861e920f2c9Ssmart * better not call us with the queues locked] 862e920f2c9Ssmart * => we return TRUE unless we encountered some sort of I/O error 863e920f2c9Ssmart * XXXJRT currently never happens, as we never directly initiate 864e920f2c9Ssmart * XXXJRT I/O 865cd7ee8acSart */ 866cd7ee8acSart boolean_t 8672023d591Soga uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags) 868cd7ee8acSart { 869e920f2c9Ssmart struct uvm_aobj *aobj = (struct uvm_aobj *) uobj; 87018725a33Smpi struct vm_page *pg; 87140cf655dSart voff_t curoff; 872e920f2c9Ssmart 87357296fa7Smpi KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 87469c04514Smpi KASSERT(rw_write_held(uobj->vmobjlock)); 8759f7b7ef0Smpi 876e920f2c9Ssmart if (flags & PGO_ALLPAGES) { 877e920f2c9Ssmart start = 0; 87836d5d901Skettenis stop = (voff_t)aobj->u_pages << PAGE_SHIFT; 879e920f2c9Ssmart } else { 880e920f2c9Ssmart start = trunc_page(start); 881e920f2c9Ssmart stop = round_page(stop); 88236d5d901Skettenis if (stop > ((voff_t)aobj->u_pages << PAGE_SHIFT)) { 883e920f2c9Ssmart printf("uao_flush: strange, got an out of range " 884e920f2c9Ssmart "flush (fixed)\n"); 88536d5d901Skettenis stop = (voff_t)aobj->u_pages << PAGE_SHIFT; 886e920f2c9Ssmart } 887e920f2c9Ssmart } 888e920f2c9Ssmart 889cd7ee8acSart /* 890e920f2c9Ssmart * Don't need to do any work here if we're not freeing 891e920f2c9Ssmart * or deactivating pages. 892cd7ee8acSart */ 8936894b7cfSmpi if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) { 894b9df1565Smpi return TRUE; 8956894b7cfSmpi } 896e920f2c9Ssmart 897e920f2c9Ssmart curoff = start; 8980b0fe1a1Soga for (;;) { 8990b0fe1a1Soga if (curoff < stop) { 90018725a33Smpi pg = uvm_pagelookup(uobj, curoff); 9010b0fe1a1Soga curoff += PAGE_SIZE; 90218725a33Smpi if (pg == NULL) 903e920f2c9Ssmart continue; 904e920f2c9Ssmart } else { 9050b0fe1a1Soga break; 9060b0fe1a1Soga } 907e920f2c9Ssmart 9080b0fe1a1Soga /* Make sure page is unbusy, else wait for it. */ 90918725a33Smpi if (pg->pg_flags & PG_BUSY) { 9105b4619eaSmpi uvm_pagewait(pg, uobj->vmobjlock, "uaoflsh"); 9115b4619eaSmpi rw_enter(uobj->vmobjlock, RW_WRITE); 9120b0fe1a1Soga curoff -= PAGE_SIZE; 913e920f2c9Ssmart continue; 914e920f2c9Ssmart } 915e920f2c9Ssmart 916e920f2c9Ssmart switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) { 917e920f2c9Ssmart /* 918e920f2c9Ssmart * XXX In these first 3 cases, we always just 919e920f2c9Ssmart * XXX deactivate the page. We may want to 920e920f2c9Ssmart * XXX handle the different cases more specifically 921e920f2c9Ssmart * XXX in the future. 922e920f2c9Ssmart */ 923e920f2c9Ssmart case PGO_CLEANIT|PGO_FREE: 9240b0fe1a1Soga /* FALLTHROUGH */ 925e920f2c9Ssmart case PGO_CLEANIT|PGO_DEACTIVATE: 9260b0fe1a1Soga /* FALLTHROUGH */ 927e920f2c9Ssmart case PGO_DEACTIVATE: 928e920f2c9Ssmart deactivate_it: 92918725a33Smpi if (pg->wire_count != 0) 930e920f2c9Ssmart continue; 931e920f2c9Ssmart 9320b0fe1a1Soga uvm_lock_pageq(); 93318725a33Smpi uvm_pagedeactivate(pg); 9340b0fe1a1Soga uvm_unlock_pageq(); 935e920f2c9Ssmart 936e920f2c9Ssmart continue; 937e920f2c9Ssmart case PGO_FREE: 938e920f2c9Ssmart /* 939e920f2c9Ssmart * If there are multiple references to 940e920f2c9Ssmart * the object, just deactivate the page. 941e920f2c9Ssmart */ 942e920f2c9Ssmart if (uobj->uo_refs > 1) 943e920f2c9Ssmart goto deactivate_it; 944e920f2c9Ssmart 9456f909936Svisa /* XXX skip the page if it's wired */ 94618725a33Smpi if (pg->wire_count != 0) 947e920f2c9Ssmart continue; 948e920f2c9Ssmart 94952887a38Smpi /* 95052887a38Smpi * free the swap slot and the page. 95152887a38Smpi */ 95218725a33Smpi pmap_page_protect(pg, PROT_NONE); 953e920f2c9Ssmart 95452887a38Smpi /* 95552887a38Smpi * freeing swapslot here is not strictly necessary. 95652887a38Smpi * however, leaving it here doesn't save much 95752887a38Smpi * because we need to update swap accounting anyway. 95852887a38Smpi */ 95918725a33Smpi uao_dropswap(uobj, pg->offset >> PAGE_SHIFT); 9600b0fe1a1Soga uvm_lock_pageq(); 96118725a33Smpi uvm_pagefree(pg); 9620b0fe1a1Soga uvm_unlock_pageq(); 963e920f2c9Ssmart 964e920f2c9Ssmart continue; 965e920f2c9Ssmart default: 966e920f2c9Ssmart panic("uao_flush: weird flags"); 967e920f2c9Ssmart } 968e920f2c9Ssmart } 969e920f2c9Ssmart 970b9df1565Smpi return TRUE; 971cd7ee8acSart } 972cd7ee8acSart 973cd7ee8acSart /* 974cd7ee8acSart * uao_get: fetch me a page 975cd7ee8acSart * 976cd7ee8acSart * we have three cases: 977cd7ee8acSart * 1: page is resident -> just return the page. 978cd7ee8acSart * 2: page is zero-fill -> allocate a new page and zero it. 979cd7ee8acSart * 3: page is swapped out -> fetch the page from swap. 980cd7ee8acSart * 98169c04514Smpi * cases 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot. 982cd7ee8acSart * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES), 9831414b0faSart * then we will need to return VM_PAGER_UNLOCK. 984cd7ee8acSart * 985cd7ee8acSart * => flags: PGO_ALLPAGES: get all of the pages 986cd7ee8acSart * PGO_LOCKED: fault data structures are locked 987cd7ee8acSart * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx] 988cd7ee8acSart * => NOTE: caller must check for released pages!! 989cd7ee8acSart */ 9901e3e475dSoga static int 9912023d591Soga uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps, 9922023d591Soga int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags) 993cd7ee8acSart { 994cd7ee8acSart struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 99540cf655dSart voff_t current_offset; 9961414b0faSart vm_page_t ptmp; 99728fbabcfSart int lcv, gotpages, maxpages, swslot, rv, pageidx; 998cd7ee8acSart boolean_t done; 999cd7ee8acSart 100057296fa7Smpi KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 1001f3e62b59Smpi KASSERT(rw_lock_held(uobj->vmobjlock)); 1002f3e62b59Smpi KASSERT(rw_write_held(uobj->vmobjlock) || 1003f3e62b59Smpi ((flags & PGO_LOCKED) != 0 && (access_type & PROT_WRITE) == 0)); 10049f7b7ef0Smpi 100552887a38Smpi /* 100652887a38Smpi * get number of pages 100752887a38Smpi */ 1008cd7ee8acSart maxpages = *npagesp; 1009cd7ee8acSart 1010cd7ee8acSart if (flags & PGO_LOCKED) { 101152887a38Smpi /* 101252887a38Smpi * step 1a: get pages that are already resident. only do 101352887a38Smpi * this if the data structures are locked (i.e. the first 101452887a38Smpi * time through). 101552887a38Smpi */ 1016cd7ee8acSart done = TRUE; /* be optimistic */ 1017cd7ee8acSart gotpages = 0; /* # of pages we got so far */ 1018cd7ee8acSart 1019cd7ee8acSart for (lcv = 0, current_offset = offset ; lcv < maxpages ; 1020cd7ee8acSart lcv++, current_offset += PAGE_SIZE) { 1021cd7ee8acSart /* do we care about this page? if not, skip it */ 1022cd7ee8acSart if (pps[lcv] == PGO_DONTCARE) 1023cd7ee8acSart continue; 1024cd7ee8acSart 1025a8f4448aSmpi /* lookup page */ 1026cd7ee8acSart ptmp = uvm_pagelookup(uobj, current_offset); 1027cd7ee8acSart 1028cd7ee8acSart /* 102952887a38Smpi * to be useful must get a non-busy page 103052887a38Smpi */ 1031a8f4448aSmpi if (ptmp == NULL || (ptmp->pg_flags & PG_BUSY) != 0) { 1032cd7ee8acSart if (lcv == centeridx || 1033cd7ee8acSart (flags & PGO_ALLPAGES) != 0) 1034cd7ee8acSart /* need to do a wait or I/O! */ 1035cd7ee8acSart done = FALSE; 1036cd7ee8acSart continue; 1037cd7ee8acSart } 1038cd7ee8acSart 1039cd7ee8acSart /* 104052887a38Smpi * useful page: plug it in our result array 1041cd7ee8acSart */ 1042cd7ee8acSart pps[lcv] = ptmp; 1043cd7ee8acSart gotpages++; 104435164244Stedu } 1045cd7ee8acSart 1046cd7ee8acSart /* 1047cd7ee8acSart * step 1b: now we've either done everything needed or we 1048cd7ee8acSart * to unlock and do some waiting or I/O. 1049cd7ee8acSart */ 1050cd7ee8acSart *npagesp = gotpages; 1051a8f4448aSmpi return done ? VM_PAGER_OK : VM_PAGER_UNLOCK; 1052cd7ee8acSart } 1053cd7ee8acSart 1054cd7ee8acSart /* 1055cd7ee8acSart * step 2: get non-resident or busy pages. 1056b8a635f6Stedu * data structures are unlocked. 1057cd7ee8acSart */ 1058cd7ee8acSart for (lcv = 0, current_offset = offset ; lcv < maxpages ; 1059cd7ee8acSart lcv++, current_offset += PAGE_SIZE) { 1060cd7ee8acSart /* 1061cd7ee8acSart * - skip over pages we've already gotten or don't want 1062cd7ee8acSart * - skip over pages we don't _have_ to get 1063cd7ee8acSart */ 1064cd7ee8acSart if (pps[lcv] != NULL || 1065cd7ee8acSart (lcv != centeridx && (flags & PGO_ALLPAGES) == 0)) 1066cd7ee8acSart continue; 1067cd7ee8acSart 106828fbabcfSart pageidx = current_offset >> PAGE_SHIFT; 106928fbabcfSart 1070cd7ee8acSart /* 1071cd7ee8acSart * we have yet to locate the current page (pps[lcv]). we 1072cd7ee8acSart * first look for a page that is already at the current offset. 1073cd7ee8acSart * if we find a page, we check to see if it is busy or 1074cd7ee8acSart * released. if that is the case, then we sleep on the page 1075cd7ee8acSart * until it is no longer busy or released and repeat the lookup. 1076cd7ee8acSart * if the page we found is neither busy nor released, then we 1077cd7ee8acSart * busy it (so we own it) and plug it into pps[lcv]. this 1078cd7ee8acSart * 'break's the following while loop and indicates we are 1079cd7ee8acSart * ready to move on to the next page in the "lcv" loop above. 1080cd7ee8acSart * 1081cd7ee8acSart * if we exit the while loop with pps[lcv] still set to NULL, 1082cd7ee8acSart * then it means that we allocated a new busy/fake/clean page 1083cd7ee8acSart * ptmp in the object and we need to do I/O to fill in the data. 1084cd7ee8acSart */ 1085cd7ee8acSart 1086cd7ee8acSart /* top of "pps" while loop */ 1087cd7ee8acSart while (pps[lcv] == NULL) { 1088cd7ee8acSart /* look for a resident page */ 1089cd7ee8acSart ptmp = uvm_pagelookup(uobj, current_offset); 1090cd7ee8acSart 1091cd7ee8acSart /* not resident? allocate one now (if we can) */ 1092cd7ee8acSart if (ptmp == NULL) { 1093cd7ee8acSart 1094cd7ee8acSart ptmp = uvm_pagealloc(uobj, current_offset, 10958a42ed70Sart NULL, 0); 1096cd7ee8acSart 1097cd7ee8acSart /* out of RAM? */ 1098cd7ee8acSart if (ptmp == NULL) { 109969c04514Smpi rw_exit(uobj->vmobjlock); 1100cd7ee8acSart uvm_wait("uao_getpage"); 110169c04514Smpi rw_enter(uobj->vmobjlock, RW_WRITE); 110269c04514Smpi /* goto top of pps while loop */ 1103cd7ee8acSart continue; 1104cd7ee8acSart } 1105cd7ee8acSart 1106cd7ee8acSart /* 1107cd7ee8acSart * safe with PQ's unlocked: because we just 1108cd7ee8acSart * alloc'd the page 1109cd7ee8acSart */ 111065d6360cSart atomic_setbits_int(&ptmp->pg_flags, PQ_AOBJ); 1111cd7ee8acSart 1112cd7ee8acSart /* 1113cd7ee8acSart * got new page ready for I/O. break pps while 1114cd7ee8acSart * loop. pps[lcv] is still NULL. 1115cd7ee8acSart */ 1116cd7ee8acSart break; 1117cd7ee8acSart } 1118cd7ee8acSart 1119cd7ee8acSart /* page is there, see if we need to wait on it */ 11200b0fe1a1Soga if ((ptmp->pg_flags & PG_BUSY) != 0) { 11215b4619eaSmpi uvm_pagewait(ptmp, uobj->vmobjlock, "uao_get"); 11225b4619eaSmpi rw_enter(uobj->vmobjlock, RW_WRITE); 1123cd7ee8acSart continue; /* goto top of pps while loop */ 1124cd7ee8acSart } 1125cd7ee8acSart 1126cd7ee8acSart /* 112752887a38Smpi * if we get here then the page is resident and 112852887a38Smpi * unbusy. we busy it now (so we own it). 1129cd7ee8acSart */ 1130cd7ee8acSart /* we own it, caller must un-busy */ 113165d6360cSart atomic_setbits_int(&ptmp->pg_flags, PG_BUSY); 1132cd7ee8acSart UVM_PAGE_OWN(ptmp, "uao_get2"); 1133cd7ee8acSart pps[lcv] = ptmp; 1134cd7ee8acSart } 1135cd7ee8acSart 1136cd7ee8acSart /* 1137cd7ee8acSart * if we own the valid page at the correct offset, pps[lcv] will 1138cd7ee8acSart * point to it. nothing more to do except go to the next page. 1139cd7ee8acSart */ 1140cd7ee8acSart if (pps[lcv]) 1141cd7ee8acSart continue; /* next lcv */ 1142cd7ee8acSart 1143cd7ee8acSart /* 1144cd7ee8acSart * we have a "fake/busy/clean" page that we just allocated. 1145cd7ee8acSart * do the needed "i/o", either reading from swap or zeroing. 1146cd7ee8acSart */ 114757296fa7Smpi swslot = uao_find_swslot(uobj, pageidx); 1148cd7ee8acSart 114935164244Stedu /* just zero the page if there's nothing in swap. */ 11500b0fe1a1Soga if (swslot == 0) { 115135164244Stedu /* page hasn't existed before, just zero it. */ 1152cd7ee8acSart uvm_pagezero(ptmp); 115328fbabcfSart } else { 115452887a38Smpi /* 115552887a38Smpi * page in the swapped-out page. 115669c04514Smpi * unlock object for i/o, relock when done. 115752887a38Smpi */ 115869c04514Smpi 115969c04514Smpi rw_exit(uobj->vmobjlock); 1160cd7ee8acSart rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO); 116169c04514Smpi rw_enter(uobj->vmobjlock, RW_WRITE); 1162cd7ee8acSart 116352887a38Smpi /* 116452887a38Smpi * I/O done. check for errors. 116552887a38Smpi */ 116635164244Stedu if (rv != VM_PAGER_OK) { 116728fbabcfSart /* 116828fbabcfSart * remove the swap slot from the aobj 116928fbabcfSart * and mark the aobj as having no real slot. 117028fbabcfSart * don't free the swap slot, thus preventing 117128fbabcfSart * it from being used again. 117228fbabcfSart */ 117328fbabcfSart swslot = uao_set_swslot(&aobj->u_obj, pageidx, 117428fbabcfSart SWSLOT_BAD); 117528fbabcfSart uvm_swap_markbad(swslot, 1); 117628fbabcfSart 1177934ce9c8Skettenis if (ptmp->pg_flags & PG_WANTED) 1178934ce9c8Skettenis wakeup(ptmp); 117965d6360cSart atomic_clearbits_int(&ptmp->pg_flags, 118065d6360cSart PG_WANTED|PG_BUSY); 1181cd7ee8acSart UVM_PAGE_OWN(ptmp, NULL); 1182cd7ee8acSart uvm_lock_pageq(); 1183cd7ee8acSart uvm_pagefree(ptmp); 1184cd7ee8acSart uvm_unlock_pageq(); 118569c04514Smpi rw_exit(uobj->vmobjlock); 118628fbabcfSart 1187b9df1565Smpi return rv; 1188cd7ee8acSart } 1189cd7ee8acSart } 1190cd7ee8acSart 1191cd7ee8acSart /* 1192cd7ee8acSart * we got the page! clear the fake flag (indicates valid 1193cd7ee8acSart * data now in page) and plug into our result array. note 1194cd7ee8acSart * that page is still busy. 1195cd7ee8acSart * 1196cd7ee8acSart * it is the callers job to: 1197cd7ee8acSart * => check if the page is released 1198cd7ee8acSart * => unbusy the page 1199cd7ee8acSart * => activate the page 1200cd7ee8acSart */ 120165d6360cSart atomic_clearbits_int(&ptmp->pg_flags, PG_FAKE); 12022c7adcb7Sart pmap_clear_modify(ptmp); /* ... and clean */ 1203cd7ee8acSart pps[lcv] = ptmp; 1204cd7ee8acSart 1205cd7ee8acSart } /* lcv loop */ 1206cd7ee8acSart 120769c04514Smpi rw_exit(uobj->vmobjlock); 1208b9df1565Smpi return VM_PAGER_OK; 1209cd7ee8acSart } 1210cd7ee8acSart 1211cd7ee8acSart /* 12128a42ed70Sart * uao_dropswap: release any swap resources from this aobj page. 121369c04514Smpi * 121469c04514Smpi * => aobj must be locked or have a reference count of 0. 12158a42ed70Sart */ 121606be59c1Soga int 12172023d591Soga uao_dropswap(struct uvm_object *uobj, int pageidx) 12188a42ed70Sart { 12198a42ed70Sart int slot; 12208a42ed70Sart 122157296fa7Smpi KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 122257296fa7Smpi 12238a42ed70Sart slot = uao_set_swslot(uobj, pageidx, 0); 12248a42ed70Sart if (slot) { 12258a42ed70Sart uvm_swap_free(slot, 1); 12268a42ed70Sart } 1227b9df1565Smpi return slot; 12288a42ed70Sart } 122928fbabcfSart 123028fbabcfSart /* 123128fbabcfSart * page in every page in every aobj that is paged-out to a range of swslots. 123228fbabcfSart * 123369c04514Smpi * => aobj must be locked and is returned locked. 123428fbabcfSart * => returns TRUE if pagein was aborted due to lack of memory. 123528fbabcfSart */ 123628fbabcfSart boolean_t 12372023d591Soga uao_swap_off(int startslot, int endslot) 123828fbabcfSart { 12391af74310Smpi struct uvm_aobj *aobj; 124028fbabcfSart 124152887a38Smpi /* 12421af74310Smpi * Walk the list of all anonymous UVM objects. Grab the first. 124352887a38Smpi */ 1244bd69ae14Soga mtx_enter(&uao_list_lock); 12451af74310Smpi if ((aobj = LIST_FIRST(&uao_list)) == NULL) { 12461af74310Smpi mtx_leave(&uao_list_lock); 12471af74310Smpi return FALSE; 12481af74310Smpi } 12491af74310Smpi uao_reference(&aobj->u_obj); 125028fbabcfSart 12511af74310Smpi do { 12521af74310Smpi struct uvm_aobj *nextaobj; 125328fbabcfSart boolean_t rv; 125428fbabcfSart 125528fbabcfSart /* 12561af74310Smpi * Prefetch the next object and immediately hold a reference 12571af74310Smpi * on it, so neither the current nor the next entry could 12581af74310Smpi * disappear while we are iterating. 125928fbabcfSart */ 12601af74310Smpi if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) { 12611af74310Smpi uao_reference(&nextaobj->u_obj); 12621af74310Smpi } 1263bd69ae14Soga mtx_leave(&uao_list_lock); 126428fbabcfSart 126528fbabcfSart /* 12661af74310Smpi * Page in all pages in the swap slot range. 126728fbabcfSart */ 126869c04514Smpi rw_enter(aobj->u_obj.vmobjlock, RW_WRITE); 126928fbabcfSart rv = uao_pagein(aobj, startslot, endslot); 127069c04514Smpi rw_exit(aobj->u_obj.vmobjlock); 12711af74310Smpi 12721af74310Smpi /* Drop the reference of the current object. */ 12731af74310Smpi uao_detach(&aobj->u_obj); 127428fbabcfSart if (rv) { 12751af74310Smpi if (nextaobj) { 12761af74310Smpi uao_detach(&nextaobj->u_obj); 12771af74310Smpi } 127828fbabcfSart return rv; 127928fbabcfSart } 128028fbabcfSart 12811af74310Smpi aobj = nextaobj; 1282bd69ae14Soga mtx_enter(&uao_list_lock); 12831af74310Smpi } while (aobj); 128428fbabcfSart 128552887a38Smpi /* 128652887a38Smpi * done with traversal, unlock the list 128752887a38Smpi */ 1288bd69ae14Soga mtx_leave(&uao_list_lock); 128928fbabcfSart return FALSE; 129028fbabcfSart } 129128fbabcfSart 129228fbabcfSart /* 129328fbabcfSart * page in any pages from aobj in the given range. 129428fbabcfSart * 129528fbabcfSart * => returns TRUE if pagein was aborted due to lack of memory. 129628fbabcfSart */ 12971e3e475dSoga static boolean_t 12982023d591Soga uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot) 129928fbabcfSart { 130028fbabcfSart boolean_t rv; 130128fbabcfSart 130239c73ac7Smpi if (UAO_USES_SWHASH(aobj)) { 130328fbabcfSart struct uao_swhash_elt *elt; 130428fbabcfSart int bucket; 130528fbabcfSart 130628fbabcfSart restart: 130728fbabcfSart for (bucket = aobj->u_swhashmask; bucket >= 0; bucket--) { 130828fbabcfSart for (elt = LIST_FIRST(&aobj->u_swhash[bucket]); 130928fbabcfSart elt != NULL; 131028fbabcfSart elt = LIST_NEXT(elt, list)) { 131128fbabcfSart int i; 131228fbabcfSart 131328fbabcfSart for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) { 131428fbabcfSart int slot = elt->slots[i]; 131528fbabcfSart 131652887a38Smpi /* 131752887a38Smpi * if the slot isn't in range, skip it. 131852887a38Smpi */ 131928fbabcfSart if (slot < startslot || 132028fbabcfSart slot >= endslot) { 132128fbabcfSart continue; 132228fbabcfSart } 132328fbabcfSart 132428fbabcfSart /* 132528fbabcfSart * process the page, 132628fbabcfSart * the start over on this object 132728fbabcfSart * since the swhash elt 132828fbabcfSart * may have been freed. 132928fbabcfSart */ 133028fbabcfSart rv = uao_pagein_page(aobj, 133128fbabcfSart UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i); 133228fbabcfSart if (rv) { 133328fbabcfSart return rv; 133428fbabcfSart } 133528fbabcfSart goto restart; 133628fbabcfSart } 133728fbabcfSart } 133828fbabcfSart } 133928fbabcfSart } else { 134028fbabcfSart int i; 134128fbabcfSart 134228fbabcfSart for (i = 0; i < aobj->u_pages; i++) { 134328fbabcfSart int slot = aobj->u_swslots[i]; 134428fbabcfSart 134552887a38Smpi /* 134652887a38Smpi * if the slot isn't in range, skip it 134752887a38Smpi */ 134828fbabcfSart if (slot < startslot || slot >= endslot) { 134928fbabcfSart continue; 135028fbabcfSart } 135128fbabcfSart 135252887a38Smpi /* 135352887a38Smpi * process the page. 135452887a38Smpi */ 135528fbabcfSart rv = uao_pagein_page(aobj, i); 135628fbabcfSart if (rv) { 135728fbabcfSart return rv; 135828fbabcfSart } 135928fbabcfSart } 136028fbabcfSart } 136128fbabcfSart 136228fbabcfSart return FALSE; 136328fbabcfSart } 136428fbabcfSart 136528fbabcfSart /* 136652887a38Smpi * uao_pagein_page: page in a single page from an anonymous UVM object. 136752887a38Smpi * 136852887a38Smpi * => Returns TRUE if pagein was aborted due to lack of memory. 136928fbabcfSart */ 13701e3e475dSoga static boolean_t 13712023d591Soga uao_pagein_page(struct uvm_aobj *aobj, int pageidx) 137228fbabcfSart { 137369c04514Smpi struct uvm_object *uobj = &aobj->u_obj; 137428fbabcfSart struct vm_page *pg; 13756ec37434Smpi int rv, npages; 137628fbabcfSart 137728fbabcfSart pg = NULL; 137828fbabcfSart npages = 1; 137969c04514Smpi 138069c04514Smpi KASSERT(rw_write_held(uobj->vmobjlock)); 138136d5d901Skettenis rv = uao_get(&aobj->u_obj, (voff_t)pageidx << PAGE_SHIFT, 13821e8cdc2eSderaadt &pg, &npages, 0, PROT_READ | PROT_WRITE, 0, 0); 138328fbabcfSart 138469c04514Smpi /* 138569c04514Smpi * relock and finish up. 138669c04514Smpi */ 138769c04514Smpi rw_enter(uobj->vmobjlock, RW_WRITE); 138828fbabcfSart switch (rv) { 13891414b0faSart case VM_PAGER_OK: 139028fbabcfSart break; 139128fbabcfSart 13921414b0faSart case VM_PAGER_ERROR: 13931414b0faSart case VM_PAGER_REFAULT: 139428fbabcfSart /* 139528fbabcfSart * nothing more to do on errors. 13961414b0faSart * VM_PAGER_REFAULT can only mean that the anon was freed, 139728fbabcfSart * so again there's nothing to do. 139828fbabcfSart */ 139928fbabcfSart return FALSE; 140028fbabcfSart } 140128fbabcfSart 140228fbabcfSart /* 140328fbabcfSart * ok, we've got the page now. 140428fbabcfSart * mark it as dirty, clear its swslot and un-busy it. 140528fbabcfSart */ 14066ec37434Smpi uao_dropswap(&aobj->u_obj, pageidx); 140765d6360cSart atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_CLEAN|PG_FAKE); 140828fbabcfSart UVM_PAGE_OWN(pg, NULL); 140928fbabcfSart 141052887a38Smpi /* 141152887a38Smpi * deactivate the page (to put it on a page queue). 141252887a38Smpi */ 141328fbabcfSart uvm_lock_pageq(); 141428fbabcfSart uvm_pagedeactivate(pg); 141528fbabcfSart uvm_unlock_pageq(); 141628fbabcfSart 141728fbabcfSart return FALSE; 141828fbabcfSart } 14191731322cSespie 14201731322cSespie /* 14211731322cSespie * uao_dropswap_range: drop swapslots in the range. 14221731322cSespie * 14231731322cSespie * => aobj must be locked and is returned locked. 14241731322cSespie * => start is inclusive. end is exclusive. 14251731322cSespie */ 14261731322cSespie void 14271731322cSespie uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end) 14281731322cSespie { 14291731322cSespie struct uvm_aobj *aobj = (struct uvm_aobj *)uobj; 14301731322cSespie int swpgonlydelta = 0; 14311731322cSespie 143257296fa7Smpi KASSERT(UVM_OBJ_IS_AOBJ(uobj)); 143369c04514Smpi KASSERT(rw_write_held(uobj->vmobjlock)); 14341731322cSespie 14351731322cSespie if (end == 0) { 14361731322cSespie end = INT64_MAX; 14371731322cSespie } 14381731322cSespie 143939c73ac7Smpi if (UAO_USES_SWHASH(aobj)) { 14401731322cSespie int i, hashbuckets = aobj->u_swhashmask + 1; 14411731322cSespie voff_t taghi; 14421731322cSespie voff_t taglo; 14431731322cSespie 14441731322cSespie taglo = UAO_SWHASH_ELT_TAG(start); 14451731322cSespie taghi = UAO_SWHASH_ELT_TAG(end); 14461731322cSespie 14471731322cSespie for (i = 0; i < hashbuckets; i++) { 14481731322cSespie struct uao_swhash_elt *elt, *next; 14491731322cSespie 14501731322cSespie for (elt = LIST_FIRST(&aobj->u_swhash[i]); 14511731322cSespie elt != NULL; 14521731322cSespie elt = next) { 14531731322cSespie int startidx, endidx; 14541731322cSespie int j; 14551731322cSespie 14561731322cSespie next = LIST_NEXT(elt, list); 14571731322cSespie 14581731322cSespie if (elt->tag < taglo || taghi < elt->tag) { 14591731322cSespie continue; 14601731322cSespie } 14611731322cSespie 14621731322cSespie if (elt->tag == taglo) { 14631731322cSespie startidx = 14641731322cSespie UAO_SWHASH_ELT_PAGESLOT_IDX(start); 14651731322cSespie } else { 14661731322cSespie startidx = 0; 14671731322cSespie } 14681731322cSespie 14691731322cSespie if (elt->tag == taghi) { 14701731322cSespie endidx = 14711731322cSespie UAO_SWHASH_ELT_PAGESLOT_IDX(end); 14721731322cSespie } else { 14731731322cSespie endidx = UAO_SWHASH_CLUSTER_SIZE; 14741731322cSespie } 14751731322cSespie 14761731322cSespie for (j = startidx; j < endidx; j++) { 14771731322cSespie int slot = elt->slots[j]; 14781731322cSespie 14791731322cSespie KASSERT(uvm_pagelookup(&aobj->u_obj, 148036d5d901Skettenis (voff_t)(UAO_SWHASH_ELT_PAGEIDX_BASE(elt) 14811731322cSespie + j) << PAGE_SHIFT) == NULL); 14821731322cSespie 14831731322cSespie if (slot > 0) { 14841731322cSespie uvm_swap_free(slot, 1); 14851731322cSespie swpgonlydelta++; 14861731322cSespie KASSERT(elt->count > 0); 14871731322cSespie elt->slots[j] = 0; 14881731322cSespie elt->count--; 14891731322cSespie } 14901731322cSespie } 14911731322cSespie 14921731322cSespie if (elt->count == 0) { 14931731322cSespie LIST_REMOVE(elt, list); 14941731322cSespie pool_put(&uao_swhash_elt_pool, elt); 14951731322cSespie } 14961731322cSespie } 14971731322cSespie } 14981731322cSespie } else { 14991731322cSespie int i; 15001731322cSespie 15011731322cSespie if (aobj->u_pages < end) { 15021731322cSespie end = aobj->u_pages; 15031731322cSespie } 15041731322cSespie for (i = start; i < end; i++) { 15051731322cSespie int slot = aobj->u_swslots[i]; 15061731322cSespie 15071731322cSespie if (slot > 0) { 15081731322cSespie uvm_swap_free(slot, 1); 15091731322cSespie swpgonlydelta++; 15101731322cSespie } 15111731322cSespie } 15121731322cSespie } 15131731322cSespie 15141731322cSespie /* 15151731322cSespie * adjust the counter of pages only in swap for all 15161731322cSespie * the swap slots we've freed. 15171731322cSespie */ 15181731322cSespie if (swpgonlydelta > 0) { 15191731322cSespie KASSERT(uvmexp.swpgonly >= swpgonlydelta); 1520c4a864baSmpi atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta); 15211731322cSespie } 15221731322cSespie } 1523