xref: /openbsd-src/sys/uvm/uvm_aobj.c (revision 0b4f1452d8df729a522c5216b33d28622069a3aa)
1*0b4f1452Smpi /*	$OpenBSD: uvm_aobj.c,v 1.115 2024/12/27 12:04:40 mpi Exp $	*/
21414b0faSart /*	$NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $	*/
3cd7ee8acSart 
4cd7ee8acSart /*
5cd7ee8acSart  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
6cd7ee8acSart  *                    Washington University.
7cd7ee8acSart  * All rights reserved.
8cd7ee8acSart  *
9cd7ee8acSart  * Redistribution and use in source and binary forms, with or without
10cd7ee8acSart  * modification, are permitted provided that the following conditions
11cd7ee8acSart  * are met:
12cd7ee8acSart  * 1. Redistributions of source code must retain the above copyright
13cd7ee8acSart  *    notice, this list of conditions and the following disclaimer.
14cd7ee8acSart  * 2. Redistributions in binary form must reproduce the above copyright
15cd7ee8acSart  *    notice, this list of conditions and the following disclaimer in the
16cd7ee8acSart  *    documentation and/or other materials provided with the distribution.
17cd7ee8acSart  *
18cd7ee8acSart  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19cd7ee8acSart  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20cd7ee8acSart  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21cd7ee8acSart  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22cd7ee8acSart  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23cd7ee8acSart  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24cd7ee8acSart  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25cd7ee8acSart  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26cd7ee8acSart  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27cd7ee8acSart  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28cd7ee8acSart  *
29cd7ee8acSart  * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
30cd7ee8acSart  */
31cd7ee8acSart /*
32cd7ee8acSart  * uvm_aobj.c: anonymous memory uvm_object pager
33cd7ee8acSart  *
34cd7ee8acSart  * author: Chuck Silvers <chuq@chuq.com>
35cd7ee8acSart  * started: Jan-1998
36cd7ee8acSart  *
37cd7ee8acSart  * - design mostly from Chuck Cranor
38cd7ee8acSart  */
39cd7ee8acSart 
40cd7ee8acSart #include <sys/param.h>
41cd7ee8acSart #include <sys/systm.h>
42cd7ee8acSart #include <sys/malloc.h>
43fd628a11Sart #include <sys/kernel.h>
44cd7ee8acSart #include <sys/pool.h>
451731322cSespie #include <sys/stdint.h>
4603d1830dStedu #include <sys/atomic.h>
47cd7ee8acSart 
48cd7ee8acSart #include <uvm/uvm.h>
49cd7ee8acSart 
50cd7ee8acSart /*
5152887a38Smpi  * An anonymous UVM object (aobj) manages anonymous-memory.  In addition to
5252887a38Smpi  * keeping the list of resident pages, it may also keep a list of allocated
5352887a38Smpi  * swap blocks.  Depending on the size of the object, this list is either
5452887a38Smpi  * stored in an array (small objects) or in a hash table (large objects).
55cd7ee8acSart  */
56cd7ee8acSart 
57cd7ee8acSart /*
5852887a38Smpi  * Note: for hash tables, we break the address space of the aobj into blocks
5952887a38Smpi  * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two.
60cd7ee8acSart  */
61cd7ee8acSart #define	UAO_SWHASH_CLUSTER_SHIFT	4
62cd7ee8acSart #define	UAO_SWHASH_CLUSTER_SIZE		(1 << UAO_SWHASH_CLUSTER_SHIFT)
63cd7ee8acSart 
6452887a38Smpi /* Get the "tag" for this page index. */
6539c73ac7Smpi #define	UAO_SWHASH_ELT_TAG(idx)		((idx) >> UAO_SWHASH_CLUSTER_SHIFT)
6639c73ac7Smpi #define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \
6739c73ac7Smpi     ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1))
68cd7ee8acSart 
6952887a38Smpi /* Given an ELT and a page index, find the swap slot. */
7039c73ac7Smpi #define	UAO_SWHASH_ELT_PAGESLOT(elt, idx) \
7139c73ac7Smpi     ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)])
72cd7ee8acSart 
7352887a38Smpi /* Given an ELT, return its pageidx base. */
7439c73ac7Smpi #define	UAO_SWHASH_ELT_PAGEIDX_BASE(elt) \
7539c73ac7Smpi     ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT)
76cd7ee8acSart 
7752887a38Smpi /* The hash function. */
7839c73ac7Smpi #define	UAO_SWHASH_HASH(aobj, idx) \
7939c73ac7Smpi     (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \
8039c73ac7Smpi     & (aobj)->u_swhashmask)])
81cd7ee8acSart 
82cd7ee8acSart /*
8352887a38Smpi  * The threshold which determines whether we will use an array or a
84cd7ee8acSart  * hash table to store the list of allocated swap blocks.
85cd7ee8acSart  */
86cd7ee8acSart #define	UAO_SWHASH_THRESHOLD		(UAO_SWHASH_CLUSTER_SIZE * 4)
8739c73ac7Smpi #define	UAO_USES_SWHASH(aobj) \
8839c73ac7Smpi     ((aobj)->u_pages > UAO_SWHASH_THRESHOLD)
89cd7ee8acSart 
9052887a38Smpi /* The number of buckets in a hash, with an upper bound. */
91cd7ee8acSart #define	UAO_SWHASH_MAXBUCKETS		256
921731322cSespie #define	UAO_SWHASH_BUCKETS(pages) \
931731322cSespie     (min((pages) >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS))
94cd7ee8acSart 
95cd7ee8acSart 
96cd7ee8acSart /*
97cd7ee8acSart  * uao_swhash_elt: when a hash table is being used, this structure defines
98cd7ee8acSart  * the format of an entry in the bucket list.
99cd7ee8acSart  */
100cd7ee8acSart struct uao_swhash_elt {
101cd7ee8acSart 	LIST_ENTRY(uao_swhash_elt) list;	/* the hash list */
10240cf655dSart 	voff_t tag;				/* our 'tag' */
103cd7ee8acSart 	int count;				/* our number of active slots */
104cd7ee8acSart 	int slots[UAO_SWHASH_CLUSTER_SIZE];	/* the slots */
105cd7ee8acSart };
106cd7ee8acSart 
107cd7ee8acSart /*
108cd7ee8acSart  * uao_swhash: the swap hash table structure
109cd7ee8acSart  */
110cd7ee8acSart LIST_HEAD(uao_swhash, uao_swhash_elt);
111cd7ee8acSart 
112cd7ee8acSart /*
113cd7ee8acSart  * uao_swhash_elt_pool: pool of uao_swhash_elt structures
114cd7ee8acSart  */
115cd7ee8acSart struct pool uao_swhash_elt_pool;
116cd7ee8acSart 
117cd7ee8acSart /*
118cd7ee8acSart  * uvm_aobj: the actual anon-backed uvm_object
119cd7ee8acSart  *
120cd7ee8acSart  * => the uvm_object is at the top of the structure, this allows
1210b0fe1a1Soga  *   (struct uvm_aobj *) == (struct uvm_object *)
122cd7ee8acSart  * => only one of u_swslots and u_swhash is used in any given aobj
123cd7ee8acSart  */
124cd7ee8acSart struct uvm_aobj {
125b8a635f6Stedu 	struct uvm_object u_obj; /* has: pgops, memt, #pages, #refs */
126cd7ee8acSart 	int u_pages;		 /* number of pages in entire object */
127cd7ee8acSart 	int u_flags;		 /* the flags (see uvm_aobj.h) */
128cd7ee8acSart 	/*
129139fed43Soga 	 * Either an array or hashtable (array of bucket heads) of
130139fed43Soga 	 * offset -> swapslot mappings for the aobj.
131cd7ee8acSart 	 */
132139fed43Soga #define u_swslots	u_swap.slot_array
133139fed43Soga #define u_swhash	u_swap.slot_hash
134139fed43Soga 	union swslots {
135139fed43Soga 		int			*slot_array;
136139fed43Soga 		struct uao_swhash	*slot_hash;
137139fed43Soga 	} u_swap;
138cd7ee8acSart 	u_long u_swhashmask;		/* mask for hashtable */
139cd7ee8acSart 	LIST_ENTRY(uvm_aobj) u_list;	/* global list of aobjs */
140cd7ee8acSart };
141cd7ee8acSart 
142cd7ee8acSart struct pool uvm_aobj_pool;
143cd7ee8acSart 
1441e3e475dSoga static struct uao_swhash_elt	*uao_find_swhash_elt(struct uvm_aobj *, int,
1452023d591Soga 				     boolean_t);
1461e3e475dSoga static boolean_t		 uao_flush(struct uvm_object *, voff_t,
1471e3e475dSoga 				     voff_t, int);
1481e3e475dSoga static void			 uao_free(struct uvm_aobj *);
1491e3e475dSoga static int			 uao_get(struct uvm_object *, voff_t,
1501e3e475dSoga 				     vm_page_t *, int *, int, vm_prot_t,
1511e3e475dSoga 				     int, int);
1521e3e475dSoga static boolean_t		 uao_pagein(struct uvm_aobj *, int, int);
1531e3e475dSoga static boolean_t		 uao_pagein_page(struct uvm_aobj *, int);
154cd7ee8acSart 
1551731322cSespie void	uao_dropswap_range(struct uvm_object *, voff_t, voff_t);
1561731322cSespie void	uao_shrink_flush(struct uvm_object *, int, int);
1571731322cSespie int	uao_shrink_hash(struct uvm_object *, int);
1581731322cSespie int	uao_shrink_array(struct uvm_object *, int);
1591731322cSespie int	uao_shrink_convert(struct uvm_object *, int);
1601731322cSespie 
1611731322cSespie int	uao_grow_hash(struct uvm_object *, int);
1621731322cSespie int	uao_grow_array(struct uvm_object *, int);
1631731322cSespie int	uao_grow_convert(struct uvm_object *, int);
1641731322cSespie 
165cd7ee8acSart /*
166cd7ee8acSart  * aobj_pager
167cd7ee8acSart  *
168cd7ee8acSart  * note that some functions (e.g. put) are handled elsewhere
169cd7ee8acSart  */
1709f7b7ef0Smpi const struct uvm_pagerops aobj_pager = {
1719f7b7ef0Smpi 	.pgo_reference = uao_reference,
1729f7b7ef0Smpi 	.pgo_detach = uao_detach,
1739f7b7ef0Smpi 	.pgo_flush = uao_flush,
1749f7b7ef0Smpi 	.pgo_get = uao_get,
175cd7ee8acSart };
176cd7ee8acSart 
177cd7ee8acSart /*
178cd7ee8acSart  * uao_list: global list of active aobjs, locked by uao_list_lock
17969ba976bSoga  *
18069ba976bSoga  * Lock ordering: generally the locking order is object lock, then list lock.
18169ba976bSoga  * in the case of swap off we have to iterate over the list, and thus the
18269ba976bSoga  * ordering is reversed. In that case we must use trylocking to prevent
18369ba976bSoga  * deadlock.
184cd7ee8acSart  */
185bd69ae14Soga static LIST_HEAD(aobjlist, uvm_aobj) uao_list = LIST_HEAD_INITIALIZER(uao_list);
18669c04514Smpi static struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_MPFLOOR);
187cd7ee8acSart 
188cd7ee8acSart 
189cd7ee8acSart /*
190cd7ee8acSart  * functions
191cd7ee8acSart  */
192cd7ee8acSart /*
193cd7ee8acSart  * hash table/array related functions
194cd7ee8acSart  */
195cd7ee8acSart /*
196cd7ee8acSart  * uao_find_swhash_elt: find (or create) a hash table entry for a page
197cd7ee8acSart  * offset.
198cd7ee8acSart  */
1991e3e475dSoga static struct uao_swhash_elt *
2002023d591Soga uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, boolean_t create)
201cd7ee8acSart {
202cd7ee8acSart 	struct uao_swhash *swhash;
203cd7ee8acSart 	struct uao_swhash_elt *elt;
20440cf655dSart 	voff_t page_tag;
205cd7ee8acSart 
2061414b0faSart 	swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */
2071414b0faSart 	page_tag = UAO_SWHASH_ELT_TAG(pageidx);	/* tag to search for */
208cd7ee8acSart 
20952887a38Smpi 	/*
21052887a38Smpi 	 * now search the bucket for the requested tag
21152887a38Smpi 	 */
212fd628a11Sart 	LIST_FOREACH(elt, swhash, list) {
2131414b0faSart 		if (elt->tag == page_tag)
214b9df1565Smpi 			return elt;
215cd7ee8acSart 	}
2161414b0faSart 
2171414b0faSart 	if (!create)
218cd7ee8acSart 		return NULL;
2191414b0faSart 
22052887a38Smpi 	/*
22152887a38Smpi 	 * allocate a new entry for the bucket and init/insert it in
22252887a38Smpi 	 */
223e7d50abdSkettenis 	elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT | PR_ZERO);
224e7d50abdSkettenis 	/*
225e7d50abdSkettenis 	 * XXX We cannot sleep here as the hash table might disappear
226e7d50abdSkettenis 	 * from under our feet.  And we run the risk of deadlocking
227e7d50abdSkettenis 	 * the pagedeamon.  In fact this code will only be called by
228e7d50abdSkettenis 	 * the pagedaemon and allocation will only fail if we
229e7d50abdSkettenis 	 * exhausted the pagedeamon reserve.  In that case we're
230e7d50abdSkettenis 	 * doomed anyway, so panic.
231e7d50abdSkettenis 	 */
232e7d50abdSkettenis 	if (elt == NULL)
233e7d50abdSkettenis 		panic("%s: can't allocate entry", __func__);
234cd7ee8acSart 	LIST_INSERT_HEAD(swhash, elt, list);
235cd7ee8acSart 	elt->tag = page_tag;
2361414b0faSart 
237b9df1565Smpi 	return elt;
238cd7ee8acSart }
239cd7ee8acSart 
240cd7ee8acSart /*
241cd7ee8acSart  * uao_find_swslot: find the swap slot number for an aobj/pageidx
242cd7ee8acSart  */
243a375eb79Smpi int
24457296fa7Smpi uao_find_swslot(struct uvm_object *uobj, int pageidx)
245cd7ee8acSart {
24657296fa7Smpi 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
24757296fa7Smpi 
24857296fa7Smpi 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
249cd7ee8acSart 
25052887a38Smpi 	/*
25152887a38Smpi 	 * if noswap flag is set, then we never return a slot
25252887a38Smpi 	 */
253cd7ee8acSart 	if (aobj->u_flags & UAO_FLAG_NOSWAP)
254b9df1565Smpi 		return 0;
255cd7ee8acSart 
25652887a38Smpi 	/*
25752887a38Smpi 	 * if hashing, look in hash table.
25852887a38Smpi 	 */
25939c73ac7Smpi 	if (UAO_USES_SWHASH(aobj)) {
260cd7ee8acSart 		struct uao_swhash_elt *elt =
261cd7ee8acSart 		    uao_find_swhash_elt(aobj, pageidx, FALSE);
262cd7ee8acSart 
263cd7ee8acSart 		if (elt)
264b9df1565Smpi 			return UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
265cd7ee8acSart 		else
266b9df1565Smpi 			return 0;
267cd7ee8acSart 	}
268cd7ee8acSart 
26952887a38Smpi 	/*
27052887a38Smpi 	 * otherwise, look in the array
27152887a38Smpi 	 */
272b9df1565Smpi 	return aobj->u_swslots[pageidx];
273cd7ee8acSart }
274cd7ee8acSart 
275cd7ee8acSart /*
276cd7ee8acSart  * uao_set_swslot: set the swap slot for a page in an aobj.
277cd7ee8acSart  *
278cd7ee8acSart  * => setting a slot to zero frees the slot
27969c04514Smpi  * => object must be locked by caller
28052887a38Smpi  * => we return the old slot number, or -1 if we failed to allocate
28152887a38Smpi  *    memory to record the new slot number
282cd7ee8acSart  */
283cd7ee8acSart int
2842023d591Soga uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
285cd7ee8acSart {
286cd7ee8acSart 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
287cd7ee8acSart 	int oldslot;
288cd7ee8acSart 
28969c04514Smpi 	KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0);
29057296fa7Smpi 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
291ac25e10fSmpi 
29252887a38Smpi 	/*
29352887a38Smpi 	 * if noswap flag is set, then we can't set a slot
29452887a38Smpi 	 */
295cd7ee8acSart 	if (aobj->u_flags & UAO_FLAG_NOSWAP) {
296cd7ee8acSart 		if (slot == 0)
297b9df1565Smpi 			return 0;		/* a clear is ok */
298cd7ee8acSart 
299cd7ee8acSart 		/* but a set is not */
300cd7ee8acSart 		printf("uao_set_swslot: uobj = %p\n", uobj);
30152887a38Smpi 	    	panic("uao_set_swslot: attempt to set a slot on a NOSWAP object");
302cd7ee8acSart 	}
303cd7ee8acSart 
30452887a38Smpi 	/*
30552887a38Smpi 	 * are we using a hash table?  if so, add it in the hash.
30652887a38Smpi 	 */
30739c73ac7Smpi 	if (UAO_USES_SWHASH(aobj)) {
308cd7ee8acSart 		/*
309cd7ee8acSart 		 * Avoid allocating an entry just to free it again if
310cd7ee8acSart 		 * the page had not swap slot in the first place, and
311cd7ee8acSart 		 * we are freeing.
312cd7ee8acSart 		 */
3131414b0faSart 		struct uao_swhash_elt *elt =
3141414b0faSart 		    uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE);
315cd7ee8acSart 		if (elt == NULL) {
3161414b0faSart 			KASSERT(slot == 0);
317b9df1565Smpi 			return 0;
318cd7ee8acSart 		}
319cd7ee8acSart 
320cd7ee8acSart 		oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
321cd7ee8acSart 		UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
322cd7ee8acSart 
323cd7ee8acSart 		/*
324cd7ee8acSart 		 * now adjust the elt's reference counter and free it if we've
325cd7ee8acSart 		 * dropped it to zero.
326cd7ee8acSart 		 */
327cd7ee8acSart 		if (slot) {
328cd7ee8acSart 			if (oldslot == 0)
329cd7ee8acSart 				elt->count++;
33052887a38Smpi 		} else {
33152887a38Smpi 			if (oldslot)
332cd7ee8acSart 				elt->count--;
333cd7ee8acSart 
334cd7ee8acSart 			if (elt->count == 0) {
335cd7ee8acSart 				LIST_REMOVE(elt, list);
336cd7ee8acSart 				pool_put(&uao_swhash_elt_pool, elt);
337cd7ee8acSart 			}
338cd7ee8acSart 		}
339cd7ee8acSart 	} else {
340cd7ee8acSart 		/* we are using an array */
341cd7ee8acSart 		oldslot = aobj->u_swslots[pageidx];
342cd7ee8acSart 		aobj->u_swslots[pageidx] = slot;
343cd7ee8acSart 	}
344b9df1565Smpi 	return oldslot;
345cd7ee8acSart }
346cd7ee8acSart /*
347cd7ee8acSart  * end of hash/array functions
348cd7ee8acSart  */
349cd7ee8acSart 
350cd7ee8acSart /*
351cd7ee8acSart  * uao_free: free all resources held by an aobj, and then free the aobj
352cd7ee8acSart  *
353cd7ee8acSart  * => the aobj should be dead
354cd7ee8acSart  */
3551e3e475dSoga static void
3562023d591Soga uao_free(struct uvm_aobj *aobj)
357cd7ee8acSart {
3584b17baa2Smpi 	struct uvm_object *uobj = &aobj->u_obj;
3594b17baa2Smpi 
36057296fa7Smpi 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
36169c04514Smpi 	KASSERT(rw_write_held(uobj->vmobjlock));
3624b17baa2Smpi 	uao_dropswap_range(uobj, 0, 0);
36369c04514Smpi 	rw_exit(uobj->vmobjlock);
364cd7ee8acSart 
36539c73ac7Smpi 	if (UAO_USES_SWHASH(aobj)) {
366cd7ee8acSart 		/*
3674b17baa2Smpi 		 * free the hash table itself.
368cd7ee8acSart 		 */
3696540f983Stedu 		hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
370cd7ee8acSart 	} else {
371cd500754Sdhill 		free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
372cd7ee8acSart 	}
373cd7ee8acSart 
37452887a38Smpi 	/*
37552887a38Smpi 	 * finally free the aobj itself
37652887a38Smpi 	 */
3772c850ee8Smpi 	uvm_obj_destroy(uobj);
378cd7ee8acSart 	pool_put(&uvm_aobj_pool, aobj);
379cd7ee8acSart }
380cd7ee8acSart 
381cd7ee8acSart /*
382cd7ee8acSart  * pager functions
383cd7ee8acSart  */
384cd7ee8acSart 
385a4b88e66Smpi #ifdef TMPFS
386cd7ee8acSart /*
3871731322cSespie  * Shrink an aobj to a given number of pages. The procedure is always the same:
3881731322cSespie  * assess the necessity of data structure conversion (hash to array), secure
3891731322cSespie  * resources, flush pages and drop swap slots.
3901731322cSespie  *
3911731322cSespie  */
3921731322cSespie 
3931731322cSespie void
3941731322cSespie uao_shrink_flush(struct uvm_object *uobj, int startpg, int endpg)
3951731322cSespie {
3961731322cSespie 	KASSERT(startpg < endpg);
3971731322cSespie 	KASSERT(uobj->uo_refs == 1);
39836d5d901Skettenis 	uao_flush(uobj, (voff_t)startpg << PAGE_SHIFT,
39936d5d901Skettenis 	    (voff_t)endpg << PAGE_SHIFT, PGO_FREE);
4001731322cSespie 	uao_dropswap_range(uobj, startpg, endpg);
4011731322cSespie }
4021731322cSespie 
4031731322cSespie int
4041731322cSespie uao_shrink_hash(struct uvm_object *uobj, int pages)
4051731322cSespie {
4061731322cSespie 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
4071731322cSespie 	struct uao_swhash *new_swhash;
408810f6bbdSkettenis 	struct uao_swhash_elt *elt;
4091731322cSespie 	unsigned long new_hashmask;
4101731322cSespie 	int i;
4111731322cSespie 
41239c73ac7Smpi 	KASSERT(UAO_USES_SWHASH(aobj));
4131731322cSespie 
4141731322cSespie 	/*
4151731322cSespie 	 * If the size of the hash table doesn't change, all we need to do is
4161731322cSespie 	 * to adjust the page count.
4171731322cSespie 	 */
4181731322cSespie 	if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
4194f7816f6Skettenis 		uao_shrink_flush(uobj, pages, aobj->u_pages);
4201731322cSespie 		aobj->u_pages = pages;
4211731322cSespie 		return 0;
4221731322cSespie 	}
4231731322cSespie 
4241731322cSespie 	new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
4251731322cSespie 	    M_WAITOK | M_CANFAIL, &new_hashmask);
4261731322cSespie 	if (new_swhash == NULL)
4271731322cSespie 		return ENOMEM;
4281731322cSespie 
4291731322cSespie 	uao_shrink_flush(uobj, pages, aobj->u_pages);
4301731322cSespie 
4311731322cSespie 	/*
4321731322cSespie 	 * Even though the hash table size is changing, the hash of the buckets
4331731322cSespie 	 * we are interested in copying should not change.
4341731322cSespie 	 */
435810f6bbdSkettenis 	for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
436810f6bbdSkettenis 		while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
437810f6bbdSkettenis 			elt = LIST_FIRST(&aobj->u_swhash[i]);
438810f6bbdSkettenis 			LIST_REMOVE(elt, list);
439810f6bbdSkettenis 			LIST_INSERT_HEAD(&new_swhash[i], elt, list);
440810f6bbdSkettenis 		}
441810f6bbdSkettenis 	}
4421731322cSespie 
4436540f983Stedu 	hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
4441731322cSespie 
4451731322cSespie 	aobj->u_swhash = new_swhash;
4461731322cSespie 	aobj->u_pages = pages;
4471731322cSespie 	aobj->u_swhashmask = new_hashmask;
4481731322cSespie 
4491731322cSespie 	return 0;
4501731322cSespie }
4511731322cSespie 
4521731322cSespie int
4531731322cSespie uao_shrink_convert(struct uvm_object *uobj, int pages)
4541731322cSespie {
4551731322cSespie 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
4561731322cSespie 	struct uao_swhash_elt *elt;
4571731322cSespie 	int i, *new_swslots;
4581731322cSespie 
459540e394aSdoug 	new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
4601731322cSespie 	    M_WAITOK | M_CANFAIL | M_ZERO);
4611731322cSespie 	if (new_swslots == NULL)
4621731322cSespie 		return ENOMEM;
4631731322cSespie 
4641731322cSespie 	uao_shrink_flush(uobj, pages, aobj->u_pages);
4651731322cSespie 
46635164244Stedu 	/* Convert swap slots from hash to array.  */
4671731322cSespie 	for (i = 0; i < pages; i++) {
4681731322cSespie 		elt = uao_find_swhash_elt(aobj, i, FALSE);
4691731322cSespie 		if (elt != NULL) {
4701731322cSespie 			new_swslots[i] = UAO_SWHASH_ELT_PAGESLOT(elt, i);
4711731322cSespie 			if (new_swslots[i] != 0)
4721731322cSespie 				elt->count--;
4731731322cSespie 			if (elt->count == 0) {
4741731322cSespie 				LIST_REMOVE(elt, list);
4751731322cSespie 				pool_put(&uao_swhash_elt_pool, elt);
4761731322cSespie 			}
4771731322cSespie 		}
4781731322cSespie 	}
4791731322cSespie 
4806540f983Stedu 	hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
4811731322cSespie 
4821731322cSespie 	aobj->u_swslots = new_swslots;
4831731322cSespie 	aobj->u_pages = pages;
4841731322cSespie 
4851731322cSespie 	return 0;
4861731322cSespie }
4871731322cSespie 
4881731322cSespie int
4891731322cSespie uao_shrink_array(struct uvm_object *uobj, int pages)
4901731322cSespie {
4911731322cSespie 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
4921731322cSespie 	int i, *new_swslots;
4931731322cSespie 
494540e394aSdoug 	new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
4951731322cSespie 	    M_WAITOK | M_CANFAIL | M_ZERO);
4961731322cSespie 	if (new_swslots == NULL)
4971731322cSespie 		return ENOMEM;
4981731322cSespie 
4991731322cSespie 	uao_shrink_flush(uobj, pages, aobj->u_pages);
5001731322cSespie 
5011731322cSespie 	for (i = 0; i < pages; i++)
5021731322cSespie 		new_swslots[i] = aobj->u_swslots[i];
5031731322cSespie 
504cd500754Sdhill 	free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
5051731322cSespie 
5061731322cSespie 	aobj->u_swslots = new_swslots;
5071731322cSespie 	aobj->u_pages = pages;
5081731322cSespie 
5091731322cSespie 	return 0;
5101731322cSespie }
5111731322cSespie 
5121731322cSespie int
5131731322cSespie uao_shrink(struct uvm_object *uobj, int pages)
5141731322cSespie {
5151731322cSespie 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
5161731322cSespie 
5171731322cSespie 	KASSERT(pages < aobj->u_pages);
5181731322cSespie 
5191731322cSespie 	/*
5201731322cSespie 	 * Distinguish between three possible cases:
5211731322cSespie 	 * 1. aobj uses hash and must be converted to array.
5221731322cSespie 	 * 2. aobj uses array and array size needs to be adjusted.
5231731322cSespie 	 * 3. aobj uses hash and hash size needs to be adjusted.
5241731322cSespie 	 */
5251731322cSespie 	if (pages > UAO_SWHASH_THRESHOLD)
5261731322cSespie 		return uao_shrink_hash(uobj, pages);	/* case 3 */
5271731322cSespie 	else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
5281731322cSespie 		return uao_shrink_convert(uobj, pages);	/* case 1 */
5291731322cSespie 	else
5301731322cSespie 		return uao_shrink_array(uobj, pages);	/* case 2 */
5311731322cSespie }
5321731322cSespie 
5331731322cSespie /*
5341731322cSespie  * Grow an aobj to a given number of pages. Right now we only adjust the swap
5351731322cSespie  * slots. We could additionally handle page allocation directly, so that they
5361731322cSespie  * don't happen through uvm_fault(). That would allow us to use another
5371731322cSespie  * mechanism for the swap slots other than malloc(). It is thus mandatory that
5381731322cSespie  * the caller of these functions does not allow faults to happen in case of
5391731322cSespie  * growth error.
5401731322cSespie  */
5411731322cSespie int
5421731322cSespie uao_grow_array(struct uvm_object *uobj, int pages)
5431731322cSespie {
5441731322cSespie 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
5451731322cSespie 	int i, *new_swslots;
5461731322cSespie 
5471731322cSespie 	KASSERT(aobj->u_pages <= UAO_SWHASH_THRESHOLD);
5481731322cSespie 
549540e394aSdoug 	new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
5501731322cSespie 	    M_WAITOK | M_CANFAIL | M_ZERO);
5511731322cSespie 	if (new_swslots == NULL)
5521731322cSespie 		return ENOMEM;
5531731322cSespie 
5541731322cSespie 	for (i = 0; i < aobj->u_pages; i++)
5551731322cSespie 		new_swslots[i] = aobj->u_swslots[i];
5561731322cSespie 
557cd500754Sdhill 	free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
5581731322cSespie 
5591731322cSespie 	aobj->u_swslots = new_swslots;
5601731322cSespie 	aobj->u_pages = pages;
5611731322cSespie 
5621731322cSespie 	return 0;
5631731322cSespie }
5641731322cSespie 
5651731322cSespie int
5661731322cSespie uao_grow_hash(struct uvm_object *uobj, int pages)
5671731322cSespie {
5681731322cSespie 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
5691731322cSespie 	struct uao_swhash *new_swhash;
5701731322cSespie 	struct uao_swhash_elt *elt;
5711731322cSespie 	unsigned long new_hashmask;
5721731322cSespie 	int i;
5731731322cSespie 
5741731322cSespie 	KASSERT(pages > UAO_SWHASH_THRESHOLD);
5751731322cSespie 
5761731322cSespie 	/*
5771731322cSespie 	 * If the size of the hash table doesn't change, all we need to do is
5781731322cSespie 	 * to adjust the page count.
5791731322cSespie 	 */
5801731322cSespie 	if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
5811731322cSespie 		aobj->u_pages = pages;
5821731322cSespie 		return 0;
5831731322cSespie 	}
5841731322cSespie 
5851731322cSespie 	KASSERT(UAO_SWHASH_BUCKETS(aobj->u_pages) < UAO_SWHASH_BUCKETS(pages));
5861731322cSespie 
5871731322cSespie 	new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
5881731322cSespie 	    M_WAITOK | M_CANFAIL, &new_hashmask);
5891731322cSespie 	if (new_swhash == NULL)
5901731322cSespie 		return ENOMEM;
5911731322cSespie 
5921731322cSespie 	for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
5931731322cSespie 		while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
5941731322cSespie 			elt = LIST_FIRST(&aobj->u_swhash[i]);
5951731322cSespie 			LIST_REMOVE(elt, list);
5961731322cSespie 			LIST_INSERT_HEAD(&new_swhash[i], elt, list);
5971731322cSespie 		}
5981731322cSespie 	}
5991731322cSespie 
6006540f983Stedu 	hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
6011731322cSespie 
6021731322cSespie 	aobj->u_swhash = new_swhash;
6031731322cSespie 	aobj->u_pages = pages;
6041731322cSespie 	aobj->u_swhashmask = new_hashmask;
6051731322cSespie 
6061731322cSespie 	return 0;
6071731322cSespie }
6081731322cSespie 
6091731322cSespie int
6101731322cSespie uao_grow_convert(struct uvm_object *uobj, int pages)
6111731322cSespie {
6121731322cSespie 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
6131731322cSespie 	struct uao_swhash *new_swhash;
6141731322cSespie 	struct uao_swhash_elt *elt;
6151731322cSespie 	unsigned long new_hashmask;
6161731322cSespie 	int i, *old_swslots;
6171731322cSespie 
6181731322cSespie 	new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
6191731322cSespie 	    M_WAITOK | M_CANFAIL, &new_hashmask);
6201731322cSespie 	if (new_swhash == NULL)
6211731322cSespie 		return ENOMEM;
6221731322cSespie 
62335164244Stedu 	/* Set these now, so we can use uao_find_swhash_elt(). */
6241731322cSespie 	old_swslots = aobj->u_swslots;
6251731322cSespie 	aobj->u_swhash = new_swhash;
6261731322cSespie 	aobj->u_swhashmask = new_hashmask;
6271731322cSespie 
6281731322cSespie 	for (i = 0; i < aobj->u_pages; i++) {
6291731322cSespie 		if (old_swslots[i] != 0) {
6301731322cSespie 			elt = uao_find_swhash_elt(aobj, i, TRUE);
6311731322cSespie 			elt->count++;
6321731322cSespie 			UAO_SWHASH_ELT_PAGESLOT(elt, i) = old_swslots[i];
6331731322cSespie 		}
6341731322cSespie 	}
6351731322cSespie 
636cd500754Sdhill 	free(old_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
6371731322cSespie 	aobj->u_pages = pages;
6381731322cSespie 
6391731322cSespie 	return 0;
6401731322cSespie }
6411731322cSespie 
6421731322cSespie int
6431731322cSespie uao_grow(struct uvm_object *uobj, int pages)
6441731322cSespie {
6451731322cSespie 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
6461731322cSespie 
6471731322cSespie 	KASSERT(pages > aobj->u_pages);
6481731322cSespie 
6491731322cSespie 	/*
6501731322cSespie 	 * Distinguish between three possible cases:
6511731322cSespie 	 * 1. aobj uses hash and hash size needs to be adjusted.
6521731322cSespie 	 * 2. aobj uses array and array size needs to be adjusted.
6531731322cSespie 	 * 3. aobj uses array and must be converted to hash.
6541731322cSespie 	 */
6551731322cSespie 	if (pages <= UAO_SWHASH_THRESHOLD)
6561731322cSespie 		return uao_grow_array(uobj, pages);	/* case 2 */
6571731322cSespie 	else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
6581731322cSespie 		return uao_grow_hash(uobj, pages);	/* case 1 */
6591731322cSespie 	else
6601731322cSespie 		return uao_grow_convert(uobj, pages);
6611731322cSespie }
662a4b88e66Smpi #endif /* TMPFS */
6631731322cSespie 
6641731322cSespie /*
665cd7ee8acSart  * uao_create: create an aobj of the given size and return its uvm_object.
666cd7ee8acSart  *
6671731322cSespie  * => for normal use, flags are zero or UAO_FLAG_CANFAIL.
668cd7ee8acSart  * => for the kernel object, the flags are:
669cd7ee8acSart  *	UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
670cd7ee8acSart  *	UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
671cd7ee8acSart  */
672cd7ee8acSart struct uvm_object *
6732023d591Soga uao_create(vsize_t size, int flags)
674cd7ee8acSart {
67552887a38Smpi 	static struct uvm_aobj kernel_object_store;
67669c04514Smpi 	static struct rwlock bootstrap_kernel_object_lock;
67752887a38Smpi 	static int kobj_alloced = 0;
678cd7ee8acSart 	int pages = round_page(size) >> PAGE_SHIFT;
679cd7ee8acSart 	struct uvm_aobj *aobj;
6806894b7cfSmpi 	int refs;
681cd7ee8acSart 
68252887a38Smpi 	/*
68352887a38Smpi 	 * Allocate a new aobj, unless kernel object is requested.
68452887a38Smpi 	 */
68552887a38Smpi 	if (flags & UAO_FLAG_KERNOBJ) {
6866894b7cfSmpi 		KASSERT(!kobj_alloced);
687cd7ee8acSart 		aobj = &kernel_object_store;
688cd7ee8acSart 		aobj->u_pages = pages;
68952887a38Smpi 		aobj->u_flags = UAO_FLAG_NOSWAP;
6906894b7cfSmpi 		refs = UVM_OBJ_KERN;
691cd7ee8acSart 		kobj_alloced = UAO_FLAG_KERNOBJ;
692cd7ee8acSart 	} else if (flags & UAO_FLAG_KERNSWAP) {
6936894b7cfSmpi 		KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
694cd7ee8acSart 		aobj = &kernel_object_store;
695cd7ee8acSart 		kobj_alloced = UAO_FLAG_KERNSWAP;
69652887a38Smpi 	} else {
697cd7ee8acSart 		aobj = pool_get(&uvm_aobj_pool, PR_WAITOK);
698cd7ee8acSart 		aobj->u_pages = pages;
69952887a38Smpi 		aobj->u_flags = 0;
70052887a38Smpi 		refs = 1;
701cd7ee8acSart 	}
702cd7ee8acSart 
70352887a38Smpi 	/*
70452887a38Smpi 	 * allocate hash/array if necessary
70552887a38Smpi 	 */
7061731322cSespie  	if (flags == 0 || (flags & (UAO_FLAG_KERNSWAP | UAO_FLAG_CANFAIL))) {
7076894b7cfSmpi 		int mflags;
7086894b7cfSmpi 
7091731322cSespie 		if (flags)
7101731322cSespie 			mflags = M_NOWAIT;
7111731322cSespie 		else
7121731322cSespie 			mflags = M_WAITOK;
713cd7ee8acSart 
714cd7ee8acSart 		/* allocate hash table or array depending on object size */
71539c73ac7Smpi 		if (UAO_USES_SWHASH(aobj)) {
7161731322cSespie 			aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(pages),
717cd7ee8acSart 			    M_UVMAOBJ, mflags, &aobj->u_swhashmask);
7181731322cSespie 			if (aobj->u_swhash == NULL) {
7191731322cSespie 				if (flags & UAO_FLAG_CANFAIL) {
7201731322cSespie 					pool_put(&uvm_aobj_pool, aobj);
721b9df1565Smpi 					return NULL;
7221731322cSespie 				}
723cd7ee8acSart 				panic("uao_create: hashinit swhash failed");
7241731322cSespie 			}
725cd7ee8acSart 		} else {
726540e394aSdoug 			aobj->u_swslots = mallocarray(pages, sizeof(int),
72728a8f404Sart 			    M_UVMAOBJ, mflags|M_ZERO);
7281731322cSespie 			if (aobj->u_swslots == NULL) {
7291731322cSespie 				if (flags & UAO_FLAG_CANFAIL) {
7301731322cSespie 					pool_put(&uvm_aobj_pool, aobj);
731b9df1565Smpi 					return NULL;
7321731322cSespie 				}
733cd7ee8acSart 				panic("uao_create: malloc swslots failed");
734cd7ee8acSart 			}
7351731322cSespie 		}
736cd7ee8acSart 
7371731322cSespie 		if (flags & UAO_FLAG_KERNSWAP) {
738cd7ee8acSart 			aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
739b9df1565Smpi 			return &aobj->u_obj;
740cd7ee8acSart 			/* done! */
741cd7ee8acSart 		}
742cd7ee8acSart 	}
743cd7ee8acSart 
74452887a38Smpi 	/*
74552887a38Smpi 	 * Initialise UVM object.
74652887a38Smpi 	 */
747da3d0110Smpi 	uvm_obj_init(&aobj->u_obj, &aobj_pager, refs);
74869c04514Smpi 	if (flags & UAO_FLAG_KERNOBJ) {
74969c04514Smpi 		/* Use a temporary static lock for kernel_object. */
75069c04514Smpi 		rw_init(&bootstrap_kernel_object_lock, "kobjlk");
75169c04514Smpi 		uvm_obj_setlock(&aobj->u_obj, &bootstrap_kernel_object_lock);
75269c04514Smpi 	}
753cd7ee8acSart 
75452887a38Smpi 	/*
75552887a38Smpi  	 * now that aobj is ready, add it to the global list
75652887a38Smpi  	 */
757bd69ae14Soga 	mtx_enter(&uao_list_lock);
758cd7ee8acSart 	LIST_INSERT_HEAD(&uao_list, aobj, u_list);
759bd69ae14Soga 	mtx_leave(&uao_list_lock);
760cd7ee8acSart 
761b9df1565Smpi 	return &aobj->u_obj;
762cd7ee8acSart }
763cd7ee8acSart 
764cd7ee8acSart 
765cd7ee8acSart 
766cd7ee8acSart /*
767cd7ee8acSart  * uao_init: set up aobj pager subsystem
768cd7ee8acSart  *
769cd7ee8acSart  * => called at boot time from uvm_pager_init()
770cd7ee8acSart  */
77128fbabcfSart void
7722023d591Soga uao_init(void)
773cd7ee8acSart {
774cd7ee8acSart 	/*
7750b0fe1a1Soga 	 * NOTE: Pages for this pool must not come from a pageable
776cd7ee8acSart 	 * kernel map!
777cd7ee8acSart 	 */
7781378bae2Sdlg 	pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0,
7791378bae2Sdlg 	    IPL_NONE, PR_WAITOK, "uaoeltpl", NULL);
7801378bae2Sdlg 	pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0,
7811378bae2Sdlg 	    IPL_NONE, PR_WAITOK, "aobjpl", NULL);
782cd7ee8acSart }
783cd7ee8acSart 
784cd7ee8acSart /*
78552887a38Smpi  * uao_reference: hold a reference to an anonymous UVM object.
786cd7ee8acSart  */
787cd7ee8acSart void
7882023d591Soga uao_reference(struct uvm_object *uobj)
789cd7ee8acSart {
79052887a38Smpi 	/* Kernel object is persistent. */
7917cb53682Sart 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
792cd7ee8acSart 		return;
793cd7ee8acSart 
7941af74310Smpi 	atomic_inc_int(&uobj->uo_refs);
795cd7ee8acSart }
796cd7ee8acSart 
79728fbabcfSart 
798cd7ee8acSart /*
79952887a38Smpi  * uao_detach: drop a reference to an anonymous UVM object.
800cd7ee8acSart  */
801cd7ee8acSart void
8022023d591Soga uao_detach(struct uvm_object *uobj)
803cd7ee8acSart {
804cd7ee8acSart 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
8050b0fe1a1Soga 	struct vm_page *pg;
806cd7ee8acSart 
80752887a38Smpi 	/*
80852887a38Smpi 	 * Detaching from kernel_object is a NOP.
80952887a38Smpi 	 */
8101af74310Smpi 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
811cd7ee8acSart 		return;
812cd7ee8acSart 
81352887a38Smpi 	/*
81452887a38Smpi 	 * Drop the reference.  If it was the last one, destroy the object.
81552887a38Smpi 	 */
8161af74310Smpi 	if (atomic_dec_int_nv(&uobj->uo_refs) > 0) {
817cd7ee8acSart 		return;
818cd7ee8acSart 	}
819cd7ee8acSart 
82052887a38Smpi 	/*
82152887a38Smpi 	 * Remove the aobj from the global list.
82252887a38Smpi 	 */
823bd69ae14Soga 	mtx_enter(&uao_list_lock);
824cd7ee8acSart 	LIST_REMOVE(aobj, u_list);
825bd69ae14Soga 	mtx_leave(&uao_list_lock);
826cd7ee8acSart 
827cd7ee8acSart 	/*
82852887a38Smpi 	 * Free all the pages left in the aobj.  For each page, when the
82952887a38Smpi 	 * page is no longer busy (and thus after any disk I/O that it is
83052887a38Smpi 	 * involved in is complete), release any swap resources and free
83152887a38Smpi 	 * the page itself.
832cd7ee8acSart 	 */
83369c04514Smpi 	rw_enter(uobj->vmobjlock, RW_WRITE);
834262a556aSdlg 	while ((pg = RBT_ROOT(uvm_objtree, &uobj->memt)) != NULL) {
83569c04514Smpi 		pmap_page_protect(pg, PROT_NONE);
8369662fca4Sart 		if (pg->pg_flags & PG_BUSY) {
8375b4619eaSmpi 			uvm_pagewait(pg, uobj->vmobjlock, "uao_det");
8385b4619eaSmpi 			rw_enter(uobj->vmobjlock, RW_WRITE);
839cd7ee8acSart 			continue;
840cd7ee8acSart 		}
8418a42ed70Sart 		uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
84269c04514Smpi 		uvm_lock_pageq();
843cd7ee8acSart 		uvm_pagefree(pg);
844cd7ee8acSart 		uvm_unlock_pageq();
84569c04514Smpi 	}
846cd7ee8acSart 
84752887a38Smpi 	/*
84852887a38Smpi 	 * Finally, free the anonymous UVM object itself.
84952887a38Smpi 	 */
850cd7ee8acSart 	uao_free(aobj);
851cd7ee8acSart }
852cd7ee8acSart 
853cd7ee8acSart /*
85452887a38Smpi  * uao_flush: flush pages out of a uvm object
855e920f2c9Ssmart  *
856b8a635f6Stedu  * => if PGO_CLEANIT is not set, then we will not block.
857e920f2c9Ssmart  * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
858e920f2c9Ssmart  *	for flushing.
859e920f2c9Ssmart  * => NOTE: we are allowed to lock the page queues, so the caller
860e920f2c9Ssmart  *	must not be holding the lock on them [e.g. pagedaemon had
861e920f2c9Ssmart  *	better not call us with the queues locked]
862e920f2c9Ssmart  * => we return TRUE unless we encountered some sort of I/O error
863e920f2c9Ssmart  *	XXXJRT currently never happens, as we never directly initiate
864e920f2c9Ssmart  *	XXXJRT I/O
865cd7ee8acSart  */
866cd7ee8acSart boolean_t
8672023d591Soga uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
868cd7ee8acSart {
869e920f2c9Ssmart 	struct uvm_aobj *aobj = (struct uvm_aobj *) uobj;
87018725a33Smpi 	struct vm_page *pg;
87140cf655dSart 	voff_t curoff;
872e920f2c9Ssmart 
87357296fa7Smpi 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
87469c04514Smpi 	KASSERT(rw_write_held(uobj->vmobjlock));
8759f7b7ef0Smpi 
876e920f2c9Ssmart 	if (flags & PGO_ALLPAGES) {
877e920f2c9Ssmart 		start = 0;
87836d5d901Skettenis 		stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
879e920f2c9Ssmart 	} else {
880e920f2c9Ssmart 		start = trunc_page(start);
881e920f2c9Ssmart 		stop = round_page(stop);
88236d5d901Skettenis 		if (stop > ((voff_t)aobj->u_pages << PAGE_SHIFT)) {
883e920f2c9Ssmart 			printf("uao_flush: strange, got an out of range "
884e920f2c9Ssmart 			    "flush (fixed)\n");
88536d5d901Skettenis 			stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
886e920f2c9Ssmart 		}
887e920f2c9Ssmart 	}
888e920f2c9Ssmart 
889cd7ee8acSart 	/*
890e920f2c9Ssmart 	 * Don't need to do any work here if we're not freeing
891e920f2c9Ssmart 	 * or deactivating pages.
892cd7ee8acSart 	 */
8936894b7cfSmpi 	if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
894b9df1565Smpi 		return TRUE;
8956894b7cfSmpi 	}
896e920f2c9Ssmart 
897e920f2c9Ssmart 	curoff = start;
8980b0fe1a1Soga 	for (;;) {
8990b0fe1a1Soga 		if (curoff < stop) {
90018725a33Smpi 			pg = uvm_pagelookup(uobj, curoff);
9010b0fe1a1Soga 			curoff += PAGE_SIZE;
90218725a33Smpi 			if (pg == NULL)
903e920f2c9Ssmart 				continue;
904e920f2c9Ssmart 		} else {
9050b0fe1a1Soga 			break;
9060b0fe1a1Soga 		}
907e920f2c9Ssmart 
9080b0fe1a1Soga 		/* Make sure page is unbusy, else wait for it. */
90918725a33Smpi 		if (pg->pg_flags & PG_BUSY) {
9105b4619eaSmpi 			uvm_pagewait(pg, uobj->vmobjlock, "uaoflsh");
9115b4619eaSmpi 			rw_enter(uobj->vmobjlock, RW_WRITE);
9120b0fe1a1Soga 			curoff -= PAGE_SIZE;
913e920f2c9Ssmart 			continue;
914e920f2c9Ssmart 		}
915e920f2c9Ssmart 
916e920f2c9Ssmart 		switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
917e920f2c9Ssmart 		/*
918e920f2c9Ssmart 		 * XXX In these first 3 cases, we always just
919e920f2c9Ssmart 		 * XXX deactivate the page.  We may want to
920e920f2c9Ssmart 		 * XXX handle the different cases more specifically
921e920f2c9Ssmart 		 * XXX in the future.
922e920f2c9Ssmart 		 */
923e920f2c9Ssmart 		case PGO_CLEANIT|PGO_FREE:
9240b0fe1a1Soga 			/* FALLTHROUGH */
925e920f2c9Ssmart 		case PGO_CLEANIT|PGO_DEACTIVATE:
9260b0fe1a1Soga 			/* FALLTHROUGH */
927e920f2c9Ssmart 		case PGO_DEACTIVATE:
928e920f2c9Ssmart  deactivate_it:
92918725a33Smpi 			if (pg->wire_count != 0)
930e920f2c9Ssmart 				continue;
931e920f2c9Ssmart 
9320b0fe1a1Soga 			uvm_lock_pageq();
93318725a33Smpi 			uvm_pagedeactivate(pg);
9340b0fe1a1Soga 			uvm_unlock_pageq();
935e920f2c9Ssmart 
936e920f2c9Ssmart 			continue;
937e920f2c9Ssmart 		case PGO_FREE:
938e920f2c9Ssmart 			/*
939e920f2c9Ssmart 			 * If there are multiple references to
940e920f2c9Ssmart 			 * the object, just deactivate the page.
941e920f2c9Ssmart 			 */
942e920f2c9Ssmart 			if (uobj->uo_refs > 1)
943e920f2c9Ssmart 				goto deactivate_it;
944e920f2c9Ssmart 
9456f909936Svisa 			/* XXX skip the page if it's wired */
94618725a33Smpi 			if (pg->wire_count != 0)
947e920f2c9Ssmart 				continue;
948e920f2c9Ssmart 
94952887a38Smpi 			/*
95052887a38Smpi 			 * free the swap slot and the page.
95152887a38Smpi 			 */
95218725a33Smpi 			pmap_page_protect(pg, PROT_NONE);
953e920f2c9Ssmart 
95452887a38Smpi 			/*
95552887a38Smpi 			 * freeing swapslot here is not strictly necessary.
95652887a38Smpi 			 * however, leaving it here doesn't save much
95752887a38Smpi 			 * because we need to update swap accounting anyway.
95852887a38Smpi 			 */
95918725a33Smpi 			uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
9600b0fe1a1Soga 			uvm_lock_pageq();
96118725a33Smpi 			uvm_pagefree(pg);
9620b0fe1a1Soga 			uvm_unlock_pageq();
963e920f2c9Ssmart 
964e920f2c9Ssmart 			continue;
965e920f2c9Ssmart 		default:
966e920f2c9Ssmart 			panic("uao_flush: weird flags");
967e920f2c9Ssmart 		}
968e920f2c9Ssmart 	}
969e920f2c9Ssmart 
970b9df1565Smpi 	return TRUE;
971cd7ee8acSart }
972cd7ee8acSart 
973cd7ee8acSart /*
974cd7ee8acSart  * uao_get: fetch me a page
975cd7ee8acSart  *
976cd7ee8acSart  * we have three cases:
977cd7ee8acSart  * 1: page is resident     -> just return the page.
978cd7ee8acSart  * 2: page is zero-fill    -> allocate a new page and zero it.
979cd7ee8acSart  * 3: page is swapped out  -> fetch the page from swap.
980cd7ee8acSart  *
98169c04514Smpi  * cases 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot.
982cd7ee8acSart  * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
9831414b0faSart  * then we will need to return VM_PAGER_UNLOCK.
984cd7ee8acSart  *
985cd7ee8acSart  * => flags: PGO_ALLPAGES: get all of the pages
986cd7ee8acSart  *           PGO_LOCKED: fault data structures are locked
987cd7ee8acSart  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
988cd7ee8acSart  * => NOTE: caller must check for released pages!!
989cd7ee8acSart  */
9901e3e475dSoga static int
9912023d591Soga uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
9922023d591Soga     int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
993cd7ee8acSart {
994cd7ee8acSart 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
99540cf655dSart 	voff_t current_offset;
9961414b0faSart 	vm_page_t ptmp;
99728fbabcfSart 	int lcv, gotpages, maxpages, swslot, rv, pageidx;
998cd7ee8acSart 	boolean_t done;
999cd7ee8acSart 
100057296fa7Smpi 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
1001f3e62b59Smpi 	KASSERT(rw_lock_held(uobj->vmobjlock));
1002f3e62b59Smpi 	KASSERT(rw_write_held(uobj->vmobjlock) ||
1003f3e62b59Smpi 	    ((flags & PGO_LOCKED) != 0 && (access_type & PROT_WRITE) == 0));
10049f7b7ef0Smpi 
100552887a38Smpi 	/*
100652887a38Smpi  	 * get number of pages
100752887a38Smpi  	 */
1008cd7ee8acSart 	maxpages = *npagesp;
1009cd7ee8acSart 
1010cd7ee8acSart 	if (flags & PGO_LOCKED) {
101152887a38Smpi 		/*
101252887a38Smpi  		 * step 1a: get pages that are already resident.   only do
101352887a38Smpi 		 * this if the data structures are locked (i.e. the first
101452887a38Smpi 		 * time through).
101552887a38Smpi  		 */
1016cd7ee8acSart 		done = TRUE;	/* be optimistic */
1017cd7ee8acSart 		gotpages = 0;	/* # of pages we got so far */
1018cd7ee8acSart 
1019cd7ee8acSart 		for (lcv = 0, current_offset = offset ; lcv < maxpages ;
1020cd7ee8acSart 		    lcv++, current_offset += PAGE_SIZE) {
1021cd7ee8acSart 			/* do we care about this page?  if not, skip it */
1022cd7ee8acSart 			if (pps[lcv] == PGO_DONTCARE)
1023cd7ee8acSart 				continue;
1024cd7ee8acSart 
1025a8f4448aSmpi 			/* lookup page */
1026cd7ee8acSart 			ptmp = uvm_pagelookup(uobj, current_offset);
1027cd7ee8acSart 
1028cd7ee8acSart 			/*
102952887a38Smpi 			 * to be useful must get a non-busy page
103052887a38Smpi 			 */
1031a8f4448aSmpi 			if (ptmp == NULL || (ptmp->pg_flags & PG_BUSY) != 0) {
1032cd7ee8acSart 				if (lcv == centeridx ||
1033cd7ee8acSart 				    (flags & PGO_ALLPAGES) != 0)
1034cd7ee8acSart 					/* need to do a wait or I/O! */
1035cd7ee8acSart 					done = FALSE;
1036cd7ee8acSart 				continue;
1037cd7ee8acSart 			}
1038cd7ee8acSart 
1039cd7ee8acSart 			/*
104052887a38Smpi 			 * useful page: plug it in our result array
1041cd7ee8acSart 			 */
1042cd7ee8acSart 			pps[lcv] = ptmp;
1043cd7ee8acSart 			gotpages++;
104435164244Stedu 		}
1045cd7ee8acSart 
1046cd7ee8acSart 		/*
1047cd7ee8acSart  		 * step 1b: now we've either done everything needed or we
1048cd7ee8acSart 		 * to unlock and do some waiting or I/O.
1049cd7ee8acSart  		 */
1050cd7ee8acSart 		*npagesp = gotpages;
1051a8f4448aSmpi 		return done ? VM_PAGER_OK : VM_PAGER_UNLOCK;
1052cd7ee8acSart 	}
1053cd7ee8acSart 
1054cd7ee8acSart 	/*
1055cd7ee8acSart  	 * step 2: get non-resident or busy pages.
1056b8a635f6Stedu  	 * data structures are unlocked.
1057cd7ee8acSart  	 */
1058cd7ee8acSart 	for (lcv = 0, current_offset = offset ; lcv < maxpages ;
1059cd7ee8acSart 	    lcv++, current_offset += PAGE_SIZE) {
1060cd7ee8acSart 		/*
1061cd7ee8acSart 		 * - skip over pages we've already gotten or don't want
1062cd7ee8acSart 		 * - skip over pages we don't _have_ to get
1063cd7ee8acSart 		 */
1064cd7ee8acSart 		if (pps[lcv] != NULL ||
1065cd7ee8acSart 		    (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
1066cd7ee8acSart 			continue;
1067cd7ee8acSart 
106828fbabcfSart 		pageidx = current_offset >> PAGE_SHIFT;
106928fbabcfSart 
1070cd7ee8acSart 		/*
1071cd7ee8acSart  		 * we have yet to locate the current page (pps[lcv]).   we
1072cd7ee8acSart 		 * first look for a page that is already at the current offset.
1073cd7ee8acSart 		 * if we find a page, we check to see if it is busy or
1074cd7ee8acSart 		 * released.  if that is the case, then we sleep on the page
1075cd7ee8acSart 		 * until it is no longer busy or released and repeat the lookup.
1076cd7ee8acSart 		 * if the page we found is neither busy nor released, then we
1077cd7ee8acSart 		 * busy it (so we own it) and plug it into pps[lcv].   this
1078cd7ee8acSart 		 * 'break's the following while loop and indicates we are
1079cd7ee8acSart 		 * ready to move on to the next page in the "lcv" loop above.
1080cd7ee8acSart  		 *
1081cd7ee8acSart  		 * if we exit the while loop with pps[lcv] still set to NULL,
1082cd7ee8acSart 		 * then it means that we allocated a new busy/fake/clean page
1083cd7ee8acSart 		 * ptmp in the object and we need to do I/O to fill in the data.
1084cd7ee8acSart  		 */
1085cd7ee8acSart 
1086cd7ee8acSart 		/* top of "pps" while loop */
1087cd7ee8acSart 		while (pps[lcv] == NULL) {
1088cd7ee8acSart 			/* look for a resident page */
1089cd7ee8acSart 			ptmp = uvm_pagelookup(uobj, current_offset);
1090cd7ee8acSart 
1091cd7ee8acSart 			/* not resident?   allocate one now (if we can) */
1092cd7ee8acSart 			if (ptmp == NULL) {
1093cd7ee8acSart 
1094cd7ee8acSart 				ptmp = uvm_pagealloc(uobj, current_offset,
10958a42ed70Sart 				    NULL, 0);
1096cd7ee8acSart 
1097cd7ee8acSart 				/* out of RAM? */
1098cd7ee8acSart 				if (ptmp == NULL) {
109969c04514Smpi 					rw_exit(uobj->vmobjlock);
1100cd7ee8acSart 					uvm_wait("uao_getpage");
110169c04514Smpi 					rw_enter(uobj->vmobjlock, RW_WRITE);
110269c04514Smpi 					/* goto top of pps while loop */
1103cd7ee8acSart 					continue;
1104cd7ee8acSart 				}
1105cd7ee8acSart 
1106cd7ee8acSart 				/*
1107cd7ee8acSart 				 * safe with PQ's unlocked: because we just
1108cd7ee8acSart 				 * alloc'd the page
1109cd7ee8acSart 				 */
111065d6360cSart 				atomic_setbits_int(&ptmp->pg_flags, PQ_AOBJ);
1111cd7ee8acSart 
1112cd7ee8acSart 				/*
1113cd7ee8acSart 				 * got new page ready for I/O.  break pps while
1114cd7ee8acSart 				 * loop.  pps[lcv] is still NULL.
1115cd7ee8acSart 				 */
1116cd7ee8acSart 				break;
1117cd7ee8acSart 			}
1118cd7ee8acSart 
1119cd7ee8acSart 			/* page is there, see if we need to wait on it */
11200b0fe1a1Soga 			if ((ptmp->pg_flags & PG_BUSY) != 0) {
11215b4619eaSmpi 				uvm_pagewait(ptmp, uobj->vmobjlock, "uao_get");
11225b4619eaSmpi 				rw_enter(uobj->vmobjlock, RW_WRITE);
1123cd7ee8acSart 				continue;	/* goto top of pps while loop */
1124cd7ee8acSart 			}
1125cd7ee8acSart 
1126cd7ee8acSart 			/*
112752887a38Smpi  			 * if we get here then the page is resident and
112852887a38Smpi 			 * unbusy.  we busy it now (so we own it).
1129cd7ee8acSart  			 */
1130cd7ee8acSart 			/* we own it, caller must un-busy */
113165d6360cSart 			atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
1132cd7ee8acSart 			UVM_PAGE_OWN(ptmp, "uao_get2");
1133cd7ee8acSart 			pps[lcv] = ptmp;
1134cd7ee8acSart 		}
1135cd7ee8acSart 
1136cd7ee8acSart 		/*
1137cd7ee8acSart  		 * if we own the valid page at the correct offset, pps[lcv] will
1138cd7ee8acSart  		 * point to it.   nothing more to do except go to the next page.
1139cd7ee8acSart  		 */
1140cd7ee8acSart 		if (pps[lcv])
1141cd7ee8acSart 			continue;			/* next lcv */
1142cd7ee8acSart 
1143cd7ee8acSart 		/*
1144cd7ee8acSart  		 * we have a "fake/busy/clean" page that we just allocated.
1145cd7ee8acSart  		 * do the needed "i/o", either reading from swap or zeroing.
1146cd7ee8acSart  		 */
114757296fa7Smpi 		swslot = uao_find_swslot(uobj, pageidx);
1148cd7ee8acSart 
114935164244Stedu 		/* just zero the page if there's nothing in swap.  */
11500b0fe1a1Soga 		if (swslot == 0) {
115135164244Stedu 			/* page hasn't existed before, just zero it. */
1152cd7ee8acSart 			uvm_pagezero(ptmp);
115328fbabcfSart 		} else {
115452887a38Smpi 			/*
115552887a38Smpi 			 * page in the swapped-out page.
115669c04514Smpi 			 * unlock object for i/o, relock when done.
115752887a38Smpi 			 */
115869c04514Smpi 
115969c04514Smpi 			rw_exit(uobj->vmobjlock);
1160cd7ee8acSart 			rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
116169c04514Smpi 			rw_enter(uobj->vmobjlock, RW_WRITE);
1162cd7ee8acSart 
116352887a38Smpi 			/*
116452887a38Smpi 			 * I/O done.  check for errors.
116552887a38Smpi 			 */
116635164244Stedu 			if (rv != VM_PAGER_OK) {
116728fbabcfSart 				/*
116828fbabcfSart 				 * remove the swap slot from the aobj
116928fbabcfSart 				 * and mark the aobj as having no real slot.
117028fbabcfSart 				 * don't free the swap slot, thus preventing
117128fbabcfSart 				 * it from being used again.
117228fbabcfSart 				 */
117328fbabcfSart 				swslot = uao_set_swslot(&aobj->u_obj, pageidx,
117428fbabcfSart 							SWSLOT_BAD);
117528fbabcfSart 				uvm_swap_markbad(swslot, 1);
117628fbabcfSart 
1177934ce9c8Skettenis 				if (ptmp->pg_flags & PG_WANTED)
1178934ce9c8Skettenis 					wakeup(ptmp);
117965d6360cSart 				atomic_clearbits_int(&ptmp->pg_flags,
118065d6360cSart 				    PG_WANTED|PG_BUSY);
1181cd7ee8acSart 				UVM_PAGE_OWN(ptmp, NULL);
1182cd7ee8acSart 				uvm_lock_pageq();
1183cd7ee8acSart 				uvm_pagefree(ptmp);
1184cd7ee8acSart 				uvm_unlock_pageq();
118569c04514Smpi 				rw_exit(uobj->vmobjlock);
118628fbabcfSart 
1187b9df1565Smpi 				return rv;
1188cd7ee8acSart 			}
1189cd7ee8acSart 		}
1190cd7ee8acSart 
1191cd7ee8acSart 		/*
1192cd7ee8acSart  		 * we got the page!   clear the fake flag (indicates valid
1193cd7ee8acSart 		 * data now in page) and plug into our result array.   note
1194cd7ee8acSart 		 * that page is still busy.
1195cd7ee8acSart  		 *
1196cd7ee8acSart  		 * it is the callers job to:
1197cd7ee8acSart  		 * => check if the page is released
1198cd7ee8acSart  		 * => unbusy the page
1199cd7ee8acSart  		 * => activate the page
1200cd7ee8acSart  		 */
120165d6360cSart 		atomic_clearbits_int(&ptmp->pg_flags, PG_FAKE);
12022c7adcb7Sart 		pmap_clear_modify(ptmp);		/* ... and clean */
1203cd7ee8acSart 		pps[lcv] = ptmp;
1204cd7ee8acSart 
1205cd7ee8acSart 	}	/* lcv loop */
1206cd7ee8acSart 
120769c04514Smpi 	rw_exit(uobj->vmobjlock);
1208b9df1565Smpi 	return VM_PAGER_OK;
1209cd7ee8acSart }
1210cd7ee8acSart 
1211cd7ee8acSart /*
12128a42ed70Sart  * uao_dropswap:  release any swap resources from this aobj page.
121369c04514Smpi  *
121469c04514Smpi  * => aobj must be locked or have a reference count of 0.
12158a42ed70Sart  */
121606be59c1Soga int
12172023d591Soga uao_dropswap(struct uvm_object *uobj, int pageidx)
12188a42ed70Sart {
12198a42ed70Sart 	int slot;
12208a42ed70Sart 
122157296fa7Smpi 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
122257296fa7Smpi 
12238a42ed70Sart 	slot = uao_set_swslot(uobj, pageidx, 0);
12248a42ed70Sart 	if (slot) {
12258a42ed70Sart 		uvm_swap_free(slot, 1);
12268a42ed70Sart 	}
1227b9df1565Smpi 	return slot;
12288a42ed70Sart }
122928fbabcfSart 
123028fbabcfSart /*
123128fbabcfSart  * page in every page in every aobj that is paged-out to a range of swslots.
123228fbabcfSart  *
123369c04514Smpi  * => aobj must be locked and is returned locked.
123428fbabcfSart  * => returns TRUE if pagein was aborted due to lack of memory.
123528fbabcfSart  */
123628fbabcfSart boolean_t
12372023d591Soga uao_swap_off(int startslot, int endslot)
123828fbabcfSart {
12391af74310Smpi 	struct uvm_aobj *aobj;
124028fbabcfSart 
124152887a38Smpi 	/*
12421af74310Smpi 	 * Walk the list of all anonymous UVM objects.  Grab the first.
124352887a38Smpi 	 */
1244bd69ae14Soga 	mtx_enter(&uao_list_lock);
12451af74310Smpi 	if ((aobj = LIST_FIRST(&uao_list)) == NULL) {
12461af74310Smpi 		mtx_leave(&uao_list_lock);
12471af74310Smpi 		return FALSE;
12481af74310Smpi 	}
12491af74310Smpi 	uao_reference(&aobj->u_obj);
125028fbabcfSart 
12511af74310Smpi 	do {
12521af74310Smpi 		struct uvm_aobj *nextaobj;
125328fbabcfSart 		boolean_t rv;
125428fbabcfSart 
125528fbabcfSart 		/*
12561af74310Smpi 		 * Prefetch the next object and immediately hold a reference
12571af74310Smpi 		 * on it, so neither the current nor the next entry could
12581af74310Smpi 		 * disappear while we are iterating.
125928fbabcfSart 		 */
12601af74310Smpi 		if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) {
12611af74310Smpi 			uao_reference(&nextaobj->u_obj);
12621af74310Smpi 		}
1263bd69ae14Soga 		mtx_leave(&uao_list_lock);
126428fbabcfSart 
126528fbabcfSart 		/*
12661af74310Smpi 		 * Page in all pages in the swap slot range.
126728fbabcfSart 		 */
126869c04514Smpi 		rw_enter(aobj->u_obj.vmobjlock, RW_WRITE);
126928fbabcfSart 		rv = uao_pagein(aobj, startslot, endslot);
127069c04514Smpi 		rw_exit(aobj->u_obj.vmobjlock);
12711af74310Smpi 
12721af74310Smpi 		/* Drop the reference of the current object. */
12731af74310Smpi 		uao_detach(&aobj->u_obj);
127428fbabcfSart 		if (rv) {
12751af74310Smpi 			if (nextaobj) {
12761af74310Smpi 				uao_detach(&nextaobj->u_obj);
12771af74310Smpi 			}
127828fbabcfSart 			return rv;
127928fbabcfSart 		}
128028fbabcfSart 
12811af74310Smpi 		aobj = nextaobj;
1282bd69ae14Soga 		mtx_enter(&uao_list_lock);
12831af74310Smpi 	} while (aobj);
128428fbabcfSart 
128552887a38Smpi 	/*
128652887a38Smpi 	 * done with traversal, unlock the list
128752887a38Smpi 	 */
1288bd69ae14Soga 	mtx_leave(&uao_list_lock);
128928fbabcfSart 	return FALSE;
129028fbabcfSart }
129128fbabcfSart 
129228fbabcfSart /*
129328fbabcfSart  * page in any pages from aobj in the given range.
129428fbabcfSart  *
129528fbabcfSart  * => returns TRUE if pagein was aborted due to lack of memory.
129628fbabcfSart  */
12971e3e475dSoga static boolean_t
12982023d591Soga uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
129928fbabcfSart {
130028fbabcfSart 	boolean_t rv;
130128fbabcfSart 
130239c73ac7Smpi 	if (UAO_USES_SWHASH(aobj)) {
130328fbabcfSart 		struct uao_swhash_elt *elt;
130428fbabcfSart 		int bucket;
130528fbabcfSart 
130628fbabcfSart restart:
130728fbabcfSart 		for (bucket = aobj->u_swhashmask; bucket >= 0; bucket--) {
130828fbabcfSart 			for (elt = LIST_FIRST(&aobj->u_swhash[bucket]);
130928fbabcfSart 			     elt != NULL;
131028fbabcfSart 			     elt = LIST_NEXT(elt, list)) {
131128fbabcfSart 				int i;
131228fbabcfSart 
131328fbabcfSart 				for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
131428fbabcfSart 					int slot = elt->slots[i];
131528fbabcfSart 
131652887a38Smpi 					/*
131752887a38Smpi 					 * if the slot isn't in range, skip it.
131852887a38Smpi 					 */
131928fbabcfSart 					if (slot < startslot ||
132028fbabcfSart 					    slot >= endslot) {
132128fbabcfSart 						continue;
132228fbabcfSart 					}
132328fbabcfSart 
132428fbabcfSart 					/*
132528fbabcfSart 					 * process the page,
132628fbabcfSart 					 * the start over on this object
132728fbabcfSart 					 * since the swhash elt
132828fbabcfSart 					 * may have been freed.
132928fbabcfSart 					 */
133028fbabcfSart 					rv = uao_pagein_page(aobj,
133128fbabcfSart 					  UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
133228fbabcfSart 					if (rv) {
133328fbabcfSart 						return rv;
133428fbabcfSart 					}
133528fbabcfSart 					goto restart;
133628fbabcfSart 				}
133728fbabcfSart 			}
133828fbabcfSart 		}
133928fbabcfSart 	} else {
134028fbabcfSart 		int i;
134128fbabcfSart 
134228fbabcfSart 		for (i = 0; i < aobj->u_pages; i++) {
134328fbabcfSart 			int slot = aobj->u_swslots[i];
134428fbabcfSart 
134552887a38Smpi 			/*
134652887a38Smpi 			 * if the slot isn't in range, skip it
134752887a38Smpi 			 */
134828fbabcfSart 			if (slot < startslot || slot >= endslot) {
134928fbabcfSart 				continue;
135028fbabcfSart 			}
135128fbabcfSart 
135252887a38Smpi 			/*
135352887a38Smpi 			 * process the page.
135452887a38Smpi 			 */
135528fbabcfSart 			rv = uao_pagein_page(aobj, i);
135628fbabcfSart 			if (rv) {
135728fbabcfSart 				return rv;
135828fbabcfSart 			}
135928fbabcfSart 		}
136028fbabcfSart 	}
136128fbabcfSart 
136228fbabcfSart 	return FALSE;
136328fbabcfSart }
136428fbabcfSart 
136528fbabcfSart /*
136652887a38Smpi  * uao_pagein_page: page in a single page from an anonymous UVM object.
136752887a38Smpi  *
136852887a38Smpi  * => Returns TRUE if pagein was aborted due to lack of memory.
136928fbabcfSart  */
13701e3e475dSoga static boolean_t
13712023d591Soga uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
137228fbabcfSart {
137369c04514Smpi 	struct uvm_object *uobj = &aobj->u_obj;
137428fbabcfSart 	struct vm_page *pg;
13756ec37434Smpi 	int rv, npages;
137628fbabcfSart 
137728fbabcfSart 	pg = NULL;
137828fbabcfSart 	npages = 1;
137969c04514Smpi 
138069c04514Smpi 	KASSERT(rw_write_held(uobj->vmobjlock));
138136d5d901Skettenis 	rv = uao_get(&aobj->u_obj, (voff_t)pageidx << PAGE_SHIFT,
13821e8cdc2eSderaadt 	    &pg, &npages, 0, PROT_READ | PROT_WRITE, 0, 0);
138328fbabcfSart 
138469c04514Smpi 	/*
138569c04514Smpi 	 * relock and finish up.
138669c04514Smpi 	 */
138769c04514Smpi 	rw_enter(uobj->vmobjlock, RW_WRITE);
138828fbabcfSart 	switch (rv) {
13891414b0faSart 	case VM_PAGER_OK:
139028fbabcfSart 		break;
139128fbabcfSart 
13921414b0faSart 	case VM_PAGER_ERROR:
13931414b0faSart 	case VM_PAGER_REFAULT:
139428fbabcfSart 		/*
139528fbabcfSart 		 * nothing more to do on errors.
13961414b0faSart 		 * VM_PAGER_REFAULT can only mean that the anon was freed,
139728fbabcfSart 		 * so again there's nothing to do.
139828fbabcfSart 		 */
139928fbabcfSart 		return FALSE;
140028fbabcfSart 	}
140128fbabcfSart 
140228fbabcfSart 	/*
140328fbabcfSart 	 * ok, we've got the page now.
140428fbabcfSart 	 * mark it as dirty, clear its swslot and un-busy it.
140528fbabcfSart 	 */
14066ec37434Smpi 	uao_dropswap(&aobj->u_obj, pageidx);
140765d6360cSart 	atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_CLEAN|PG_FAKE);
140828fbabcfSart 	UVM_PAGE_OWN(pg, NULL);
140928fbabcfSart 
141052887a38Smpi 	/*
141152887a38Smpi 	 * deactivate the page (to put it on a page queue).
141252887a38Smpi 	 */
141328fbabcfSart 	uvm_lock_pageq();
141428fbabcfSart 	uvm_pagedeactivate(pg);
141528fbabcfSart 	uvm_unlock_pageq();
141628fbabcfSart 
141728fbabcfSart 	return FALSE;
141828fbabcfSart }
14191731322cSespie 
14201731322cSespie /*
14211731322cSespie  * uao_dropswap_range: drop swapslots in the range.
14221731322cSespie  *
14231731322cSespie  * => aobj must be locked and is returned locked.
14241731322cSespie  * => start is inclusive.  end is exclusive.
14251731322cSespie  */
14261731322cSespie void
14271731322cSespie uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
14281731322cSespie {
14291731322cSespie 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
14301731322cSespie 	int swpgonlydelta = 0;
14311731322cSespie 
143257296fa7Smpi 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
143369c04514Smpi 	KASSERT(rw_write_held(uobj->vmobjlock));
14341731322cSespie 
14351731322cSespie 	if (end == 0) {
14361731322cSespie 		end = INT64_MAX;
14371731322cSespie 	}
14381731322cSespie 
143939c73ac7Smpi 	if (UAO_USES_SWHASH(aobj)) {
14401731322cSespie 		int i, hashbuckets = aobj->u_swhashmask + 1;
14411731322cSespie 		voff_t taghi;
14421731322cSespie 		voff_t taglo;
14431731322cSespie 
14441731322cSespie 		taglo = UAO_SWHASH_ELT_TAG(start);
14451731322cSespie 		taghi = UAO_SWHASH_ELT_TAG(end);
14461731322cSespie 
14471731322cSespie 		for (i = 0; i < hashbuckets; i++) {
14481731322cSespie 			struct uao_swhash_elt *elt, *next;
14491731322cSespie 
14501731322cSespie 			for (elt = LIST_FIRST(&aobj->u_swhash[i]);
14511731322cSespie 			     elt != NULL;
14521731322cSespie 			     elt = next) {
14531731322cSespie 				int startidx, endidx;
14541731322cSespie 				int j;
14551731322cSespie 
14561731322cSespie 				next = LIST_NEXT(elt, list);
14571731322cSespie 
14581731322cSespie 				if (elt->tag < taglo || taghi < elt->tag) {
14591731322cSespie 					continue;
14601731322cSespie 				}
14611731322cSespie 
14621731322cSespie 				if (elt->tag == taglo) {
14631731322cSespie 					startidx =
14641731322cSespie 					    UAO_SWHASH_ELT_PAGESLOT_IDX(start);
14651731322cSespie 				} else {
14661731322cSespie 					startidx = 0;
14671731322cSespie 				}
14681731322cSespie 
14691731322cSespie 				if (elt->tag == taghi) {
14701731322cSespie 					endidx =
14711731322cSespie 					    UAO_SWHASH_ELT_PAGESLOT_IDX(end);
14721731322cSespie 				} else {
14731731322cSespie 					endidx = UAO_SWHASH_CLUSTER_SIZE;
14741731322cSespie 				}
14751731322cSespie 
14761731322cSespie 				for (j = startidx; j < endidx; j++) {
14771731322cSespie 					int slot = elt->slots[j];
14781731322cSespie 
14791731322cSespie 					KASSERT(uvm_pagelookup(&aobj->u_obj,
148036d5d901Skettenis 					    (voff_t)(UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
14811731322cSespie 					    + j) << PAGE_SHIFT) == NULL);
14821731322cSespie 
14831731322cSespie 					if (slot > 0) {
14841731322cSespie 						uvm_swap_free(slot, 1);
14851731322cSespie 						swpgonlydelta++;
14861731322cSespie 						KASSERT(elt->count > 0);
14871731322cSespie 						elt->slots[j] = 0;
14881731322cSespie 						elt->count--;
14891731322cSespie 					}
14901731322cSespie 				}
14911731322cSespie 
14921731322cSespie 				if (elt->count == 0) {
14931731322cSespie 					LIST_REMOVE(elt, list);
14941731322cSespie 					pool_put(&uao_swhash_elt_pool, elt);
14951731322cSespie 				}
14961731322cSespie 			}
14971731322cSespie 		}
14981731322cSespie 	} else {
14991731322cSespie 		int i;
15001731322cSespie 
15011731322cSespie 		if (aobj->u_pages < end) {
15021731322cSespie 			end = aobj->u_pages;
15031731322cSespie 		}
15041731322cSespie 		for (i = start; i < end; i++) {
15051731322cSespie 			int slot = aobj->u_swslots[i];
15061731322cSespie 
15071731322cSespie 			if (slot > 0) {
15081731322cSespie 				uvm_swap_free(slot, 1);
15091731322cSespie 				swpgonlydelta++;
15101731322cSespie 			}
15111731322cSespie 		}
15121731322cSespie 	}
15131731322cSespie 
15141731322cSespie 	/*
15151731322cSespie 	 * adjust the counter of pages only in swap for all
15161731322cSespie 	 * the swap slots we've freed.
15171731322cSespie 	 */
15181731322cSespie 	if (swpgonlydelta > 0) {
15191731322cSespie 		KASSERT(uvmexp.swpgonly >= swpgonlydelta);
1520c4a864baSmpi 		atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta);
15211731322cSespie 	}
15221731322cSespie }
1523