xref: /netbsd-src/sys/uvm/uvm_aobj.c (revision 943e1fb0b56d4a3ff98112be674bbb4cc261e425)
1*943e1fb0Sriastradh /*	$NetBSD: uvm_aobj.c,v 1.157 2023/02/24 11:03:13 riastradh Exp $	*/
2d9048520Smrg 
31f6b921cSmrg /*
47f45dbdfSchs  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
57f45dbdfSchs  *                    Washington University.
67f45dbdfSchs  * All rights reserved.
77f45dbdfSchs  *
87f45dbdfSchs  * Redistribution and use in source and binary forms, with or without
97f45dbdfSchs  * modification, are permitted provided that the following conditions
107f45dbdfSchs  * are met:
117f45dbdfSchs  * 1. Redistributions of source code must retain the above copyright
127f45dbdfSchs  *    notice, this list of conditions and the following disclaimer.
137f45dbdfSchs  * 2. Redistributions in binary form must reproduce the above copyright
147f45dbdfSchs  *    notice, this list of conditions and the following disclaimer in the
157f45dbdfSchs  *    documentation and/or other materials provided with the distribution.
167f45dbdfSchs  *
177f45dbdfSchs  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
187f45dbdfSchs  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
197f45dbdfSchs  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
207f45dbdfSchs  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
217f45dbdfSchs  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
227f45dbdfSchs  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
237f45dbdfSchs  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
247f45dbdfSchs  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
257f45dbdfSchs  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
267f45dbdfSchs  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
277f45dbdfSchs  *
281f6b921cSmrg  * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
291f6b921cSmrg  */
30cdc76ff9Srmind 
317f45dbdfSchs /*
327f45dbdfSchs  * uvm_aobj.c: anonymous memory uvm_object pager
337f45dbdfSchs  *
347f45dbdfSchs  * author: Chuck Silvers <chuq@chuq.com>
357f45dbdfSchs  * started: Jan-1998
367f45dbdfSchs  *
377f45dbdfSchs  * - design mostly from Chuck Cranor
387f45dbdfSchs  */
397f45dbdfSchs 
40b616d1caSlukem #include <sys/cdefs.h>
41*943e1fb0Sriastradh __KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.157 2023/02/24 11:03:13 riastradh Exp $");
42b616d1caSlukem 
43d8e04c90Spooka #ifdef _KERNEL_OPT
447f45dbdfSchs #include "opt_uvmhist.h"
45d8e04c90Spooka #endif
46f2caacc7Smrg 
47f2caacc7Smrg #include <sys/param.h>
48f2caacc7Smrg #include <sys/systm.h>
492ed28d2cSchs #include <sys/kernel.h>
50b5eb577dSrmind #include <sys/kmem.h>
515a4981d9Sthorpej #include <sys/pool.h>
5276a03311Smatt #include <sys/atomic.h>
53f2caacc7Smrg 
54f2caacc7Smrg #include <uvm/uvm.h>
55881d12e6Sad #include <uvm/uvm_page_array.h>
56f2caacc7Smrg 
57f2caacc7Smrg /*
588c260680Srmind  * An anonymous UVM object (aobj) manages anonymous-memory.  In addition to
598c260680Srmind  * keeping the list of resident pages, it may also keep a list of allocated
608c260680Srmind  * swap blocks.  Depending on the size of the object, this list is either
618c260680Srmind  * stored in an array (small objects) or in a hash table (large objects).
628c260680Srmind  *
638c260680Srmind  * Lock order
648c260680Srmind  *
651e840676Srmind  *	uao_list_lock ->
661e840676Srmind  *		uvm_object::vmobjlock
67f2caacc7Smrg  */
68f2caacc7Smrg 
69f2caacc7Smrg /*
708c260680Srmind  * Note: for hash tables, we break the address space of the aobj into blocks
718c260680Srmind  * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two.
72f2caacc7Smrg  */
73f2caacc7Smrg 
74f2caacc7Smrg #define	UAO_SWHASH_CLUSTER_SHIFT	4
75f2caacc7Smrg #define	UAO_SWHASH_CLUSTER_SIZE		(1 << UAO_SWHASH_CLUSTER_SHIFT)
76f2caacc7Smrg 
778c260680Srmind /* Get the "tag" for this page index. */
788c260680Srmind #define	UAO_SWHASH_ELT_TAG(idx)		((idx) >> UAO_SWHASH_CLUSTER_SHIFT)
798c260680Srmind #define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \
808c260680Srmind     ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1))
81f2caacc7Smrg 
828c260680Srmind /* Given an ELT and a page index, find the swap slot. */
838c260680Srmind #define	UAO_SWHASH_ELT_PAGESLOT(elt, idx) \
848c260680Srmind     ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)])
859df1f09bSyamt 
868c260680Srmind /* Given an ELT, return its pageidx base. */
87f2caacc7Smrg #define	UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
888c260680Srmind     ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT)
898c260680Srmind 
908c260680Srmind /* The hash function. */
918c260680Srmind #define	UAO_SWHASH_HASH(aobj, idx) \
928c260680Srmind     (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \
938c260680Srmind     & (aobj)->u_swhashmask)])
94f2caacc7Smrg 
95f2caacc7Smrg /*
968c260680Srmind  * The threshold which determines whether we will use an array or a
97f2caacc7Smrg  * hash table to store the list of allocated swap blocks.
98f2caacc7Smrg  */
99f2caacc7Smrg #define	UAO_SWHASH_THRESHOLD		(UAO_SWHASH_CLUSTER_SIZE * 4)
1008c260680Srmind #define	UAO_USES_SWHASH(aobj) \
1018c260680Srmind     ((aobj)->u_pages > UAO_SWHASH_THRESHOLD)
102f2caacc7Smrg 
1038c260680Srmind /* The number of buckets in a hash, with an upper bound. */
104f2caacc7Smrg #define	UAO_SWHASH_MAXBUCKETS		256
1058c260680Srmind #define	UAO_SWHASH_BUCKETS(aobj) \
1068c260680Srmind     (MIN((aobj)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS))
107f2caacc7Smrg 
108f2caacc7Smrg /*
109f2caacc7Smrg  * uao_swhash_elt: when a hash table is being used, this structure defines
110f2caacc7Smrg  * the format of an entry in the bucket list.
111f2caacc7Smrg  */
112f2caacc7Smrg 
113f2caacc7Smrg struct uao_swhash_elt {
114f2caacc7Smrg 	LIST_ENTRY(uao_swhash_elt) list;	/* the hash list */
1156e5b64c8Skleink 	voff_t tag;				/* our 'tag' */
116f2caacc7Smrg 	int count;				/* our number of active slots */
117f2caacc7Smrg 	int slots[UAO_SWHASH_CLUSTER_SIZE];	/* the slots */
118f2caacc7Smrg };
119f2caacc7Smrg 
120f2caacc7Smrg /*
121f2caacc7Smrg  * uao_swhash: the swap hash table structure
122f2caacc7Smrg  */
123f2caacc7Smrg 
124f2caacc7Smrg LIST_HEAD(uao_swhash, uao_swhash_elt);
125f2caacc7Smrg 
1265a4981d9Sthorpej /*
127cdc76ff9Srmind  * uao_swhash_elt_pool: pool of uao_swhash_elt structures.
128cdc76ff9Srmind  * Note: pages for this pool must not come from a pageable kernel map.
1295a4981d9Sthorpej  */
1308c260680Srmind static struct pool	uao_swhash_elt_pool	__cacheline_aligned;
131f2caacc7Smrg 
132f2caacc7Smrg /*
133f2caacc7Smrg  * uvm_aobj: the actual anon-backed uvm_object
134f2caacc7Smrg  *
135f2caacc7Smrg  * => the uvm_object is at the top of the structure, this allows
13664c6d1d2Schs  *   (struct uvm_aobj *) == (struct uvm_object *)
137f2caacc7Smrg  * => only one of u_swslots and u_swhash is used in any given aobj
138f2caacc7Smrg  */
139f2caacc7Smrg 
140f2caacc7Smrg struct uvm_aobj {
141881d12e6Sad 	struct uvm_object u_obj; /* has: lock, pgops, #pages, #refs */
1428a4036deScherry 	pgoff_t u_pages;	 /* number of pages in entire object */
143f2caacc7Smrg 	int u_flags;		 /* the flags (see uvm_aobj.h) */
144f2caacc7Smrg 	int *u_swslots;		 /* array of offset->swapslot mappings */
145e92c7d99Smrg 				 /*
146e92c7d99Smrg 				  * hashtable of offset->swapslot mappings
147e92c7d99Smrg 				  * (u_swhash is an array of bucket heads)
148e92c7d99Smrg 				  */
149e92c7d99Smrg 	struct uao_swhash *u_swhash;
150f2caacc7Smrg 	u_long u_swhashmask;		/* mask for hashtable */
151f2caacc7Smrg 	LIST_ENTRY(uvm_aobj) u_list;	/* global list of aobjs */
152c20b71f6Sriastradh 	int u_freelist;		  /* freelist to allocate pages from */
153f2caacc7Smrg };
154f2caacc7Smrg 
155325f5482Sjunyoung static void	uao_free(struct uvm_aobj *);
156325f5482Sjunyoung static int	uao_get(struct uvm_object *, voff_t, struct vm_page **,
157325f5482Sjunyoung 		    int *, int, vm_prot_t, int, int);
15893feeb12Smatt static int	uao_put(struct uvm_object *, voff_t, voff_t, int);
1596fbf5bf6Syamt 
1606fbf5bf6Syamt #if defined(VMSWAP)
1616fbf5bf6Syamt static struct uao_swhash_elt *uao_find_swhash_elt
162712239e3Sthorpej     (struct uvm_aobj *, int, bool);
1636fbf5bf6Syamt 
164712239e3Sthorpej static bool uao_pagein(struct uvm_aobj *, int, int);
165712239e3Sthorpej static bool uao_pagein_page(struct uvm_aobj *, int);
1666fbf5bf6Syamt #endif /* defined(VMSWAP) */
167f2caacc7Smrg 
168c20b71f6Sriastradh static struct vm_page	*uao_pagealloc(struct uvm_object *, voff_t, int);
169c20b71f6Sriastradh 
170f2caacc7Smrg /*
171f2caacc7Smrg  * aobj_pager
172f2caacc7Smrg  *
173f2caacc7Smrg  * note that some functions (e.g. put) are handled elsewhere
174f2caacc7Smrg  */
175f2caacc7Smrg 
176e8abff70Syamt const struct uvm_pagerops aobj_pager = {
177062f8e82Syamt 	.pgo_reference = uao_reference,
178062f8e82Syamt 	.pgo_detach = uao_detach,
179062f8e82Syamt 	.pgo_get = uao_get,
180062f8e82Syamt 	.pgo_put = uao_put,
181f2caacc7Smrg };
182f2caacc7Smrg 
183f2caacc7Smrg /*
184f2caacc7Smrg  * uao_list: global list of active aobjs, locked by uao_list_lock
185f2caacc7Smrg  */
186f2caacc7Smrg 
LIST_HEAD(aobjlist,uvm_aobj)1878c260680Srmind static LIST_HEAD(aobjlist, uvm_aobj) uao_list	__cacheline_aligned;
1888c260680Srmind static kmutex_t		uao_list_lock		__cacheline_aligned;
189f2caacc7Smrg 
190f2caacc7Smrg /*
191f2caacc7Smrg  * hash table/array related functions
192f2caacc7Smrg  */
193f2caacc7Smrg 
1946fbf5bf6Syamt #if defined(VMSWAP)
1956fbf5bf6Syamt 
196f2caacc7Smrg /*
197f2caacc7Smrg  * uao_find_swhash_elt: find (or create) a hash table entry for a page
198f2caacc7Smrg  * offset.
199f2caacc7Smrg  *
200f2caacc7Smrg  * => the object should be locked by the caller
201f2caacc7Smrg  */
202f2caacc7Smrg 
203e92c7d99Smrg static struct uao_swhash_elt *
204712239e3Sthorpej uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, bool create)
205f2caacc7Smrg {
206f2caacc7Smrg 	struct uao_swhash *swhash;
207f2caacc7Smrg 	struct uao_swhash_elt *elt;
2086e5b64c8Skleink 	voff_t page_tag;
209f2caacc7Smrg 
2102d06d793Schs 	swhash = UAO_SWHASH_HASH(aobj, pageidx);
2112d06d793Schs 	page_tag = UAO_SWHASH_ELT_TAG(pageidx);
212f2caacc7Smrg 
213f2caacc7Smrg 	/*
214f2caacc7Smrg 	 * now search the bucket for the requested tag
215f2caacc7Smrg 	 */
2162d06d793Schs 
2172ed28d2cSchs 	LIST_FOREACH(elt, swhash, list) {
2182d06d793Schs 		if (elt->tag == page_tag) {
2192d06d793Schs 			return elt;
220f2caacc7Smrg 		}
2212d06d793Schs 	}
2222d06d793Schs 	if (!create) {
223f2caacc7Smrg 		return NULL;
2242d06d793Schs 	}
225f2caacc7Smrg 
226f2caacc7Smrg 	/*
2275a4981d9Sthorpej 	 * allocate a new entry for the bucket and init/insert it in
228f2caacc7Smrg 	 */
2292d06d793Schs 
2302d06d793Schs 	elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT);
2312d06d793Schs 	if (elt == NULL) {
2322d06d793Schs 		return NULL;
2332d06d793Schs 	}
234f2caacc7Smrg 	LIST_INSERT_HEAD(swhash, elt, list);
235249efd73Schs 	elt->tag = page_tag;
236249efd73Schs 	elt->count = 0;
2372c871702Sperry 	memset(elt->slots, 0, sizeof(elt->slots));
2382d06d793Schs 	return elt;
239f2caacc7Smrg }
240f2caacc7Smrg 
241f2caacc7Smrg /*
242f2caacc7Smrg  * uao_find_swslot: find the swap slot number for an aobj/pageidx
243f2caacc7Smrg  *
244f2caacc7Smrg  * => object must be locked by caller
245f2caacc7Smrg  */
24664c6d1d2Schs 
24764c6d1d2Schs int
uao_find_swslot(struct uvm_object * uobj,int pageidx)248e569faccSthorpej uao_find_swslot(struct uvm_object *uobj, int pageidx)
249f2caacc7Smrg {
25064c6d1d2Schs 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
25164c6d1d2Schs 	struct uao_swhash_elt *elt;
252e92c7d99Smrg 
253ff872804Sad 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
254ff872804Sad 
255f2caacc7Smrg 	/*
256f2caacc7Smrg 	 * if noswap flag is set, then we never return a slot
257f2caacc7Smrg 	 */
258f2caacc7Smrg 
259f2caacc7Smrg 	if (aobj->u_flags & UAO_FLAG_NOSWAP)
2608c260680Srmind 		return 0;
261f2caacc7Smrg 
262f2caacc7Smrg 	/*
263f2caacc7Smrg 	 * if hashing, look in hash table.
264f2caacc7Smrg 	 */
265f2caacc7Smrg 
266f2caacc7Smrg 	if (UAO_USES_SWHASH(aobj)) {
267b3667adaSthorpej 		elt = uao_find_swhash_elt(aobj, pageidx, false);
2688c260680Srmind 		return elt ? UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) : 0;
269f2caacc7Smrg 	}
270f2caacc7Smrg 
271f2caacc7Smrg 	/*
272f2caacc7Smrg 	 * otherwise, look in the array
273f2caacc7Smrg 	 */
27464c6d1d2Schs 
2758c260680Srmind 	return aobj->u_swslots[pageidx];
276f2caacc7Smrg }
277f2caacc7Smrg 
278f2caacc7Smrg /*
279f2caacc7Smrg  * uao_set_swslot: set the swap slot for a page in an aobj.
280f2caacc7Smrg  *
281f2caacc7Smrg  * => setting a slot to zero frees the slot
282f2caacc7Smrg  * => object must be locked by caller
2832d06d793Schs  * => we return the old slot number, or -1 if we failed to allocate
2842d06d793Schs  *    memory to record the new slot number
285f2caacc7Smrg  */
28664c6d1d2Schs 
287e92c7d99Smrg int
uao_set_swslot(struct uvm_object * uobj,int pageidx,int slot)288e569faccSthorpej uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
289f2caacc7Smrg {
290f2caacc7Smrg 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
2912d06d793Schs 	struct uao_swhash_elt *elt;
292f2caacc7Smrg 	int oldslot;
293f3bd60e2Sskrll 	UVMHIST_FUNC(__func__);
294f3bd60e2Sskrll 	UVMHIST_CALLARGS(pdhist, "aobj %#jx pageidx %jd slot %jd",
295cb32a134Spgoyette 	    (uintptr_t)aobj, pageidx, slot, 0);
296f2caacc7Smrg 
297d2a0ebb6Sad 	KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0);
298ff872804Sad 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
29922d67cdeSrmind 
300f2caacc7Smrg 	/*
30164c6d1d2Schs 	 * if noswap flag is set, then we can't set a non-zero slot.
302f2caacc7Smrg 	 */
303f2caacc7Smrg 
304f2caacc7Smrg 	if (aobj->u_flags & UAO_FLAG_NOSWAP) {
3058c260680Srmind 		KASSERTMSG(slot == 0, "uao_set_swslot: no swap object");
3068c260680Srmind 		return 0;
307f2caacc7Smrg 	}
308f2caacc7Smrg 
309f2caacc7Smrg 	/*
310f2caacc7Smrg 	 * are we using a hash table?  if so, add it in the hash.
311f2caacc7Smrg 	 */
312f2caacc7Smrg 
313f2caacc7Smrg 	if (UAO_USES_SWHASH(aobj)) {
31419b7b646Schs 
3155a4981d9Sthorpej 		/*
3165a4981d9Sthorpej 		 * Avoid allocating an entry just to free it again if
3175a4981d9Sthorpej 		 * the page had not swap slot in the first place, and
3185a4981d9Sthorpej 		 * we are freeing.
3195a4981d9Sthorpej 		 */
32019b7b646Schs 
32164c6d1d2Schs 		elt = uao_find_swhash_elt(aobj, pageidx, slot != 0);
3225a4981d9Sthorpej 		if (elt == NULL) {
3232d06d793Schs 			return slot ? -1 : 0;
3245a4981d9Sthorpej 		}
325f2caacc7Smrg 
326f2caacc7Smrg 		oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
327f2caacc7Smrg 		UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
328f2caacc7Smrg 
329f2caacc7Smrg 		/*
330e92c7d99Smrg 		 * now adjust the elt's reference counter and free it if we've
331e92c7d99Smrg 		 * dropped it to zero.
332f2caacc7Smrg 		 */
333f2caacc7Smrg 
334e92c7d99Smrg 		if (slot) {
335f2caacc7Smrg 			if (oldslot == 0)
336f2caacc7Smrg 				elt->count++;
3372d06d793Schs 		} else {
3382d06d793Schs 			if (oldslot)
339f2caacc7Smrg 				elt->count--;
340f2caacc7Smrg 
341f2caacc7Smrg 			if (elt->count == 0) {
342f2caacc7Smrg 				LIST_REMOVE(elt, list);
3435a4981d9Sthorpej 				pool_put(&uao_swhash_elt_pool, elt);
344f2caacc7Smrg 			}
345f2caacc7Smrg 		}
346f2caacc7Smrg 	} else {
347f2caacc7Smrg 		/* we are using an array */
348f2caacc7Smrg 		oldslot = aobj->u_swslots[pageidx];
349f2caacc7Smrg 		aobj->u_swslots[pageidx] = slot;
350f2caacc7Smrg 	}
3518c260680Srmind 	return oldslot;
352f2caacc7Smrg }
353f2caacc7Smrg 
3546fbf5bf6Syamt #endif /* defined(VMSWAP) */
3556fbf5bf6Syamt 
356f2caacc7Smrg /*
357f2caacc7Smrg  * end of hash/array functions
358f2caacc7Smrg  */
359f2caacc7Smrg 
360f2caacc7Smrg /*
361f2caacc7Smrg  * uao_free: free all resources held by an aobj, and then free the aobj
362f2caacc7Smrg  *
363f2caacc7Smrg  * => the aobj should be dead
364f2caacc7Smrg  */
36564c6d1d2Schs 
366f2caacc7Smrg static void
uao_free(struct uvm_aobj * aobj)367e569faccSthorpej uao_free(struct uvm_aobj *aobj)
368f2caacc7Smrg {
3698c260680Srmind 	struct uvm_object *uobj = &aobj->u_obj;
3704a780c9aSad 
371ff872804Sad 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
372d2a0ebb6Sad 	KASSERT(rw_write_held(uobj->vmobjlock));
3731e840676Srmind 	uao_dropswap_range(uobj, 0, 0);
374d2a0ebb6Sad 	rw_exit(uobj->vmobjlock);
3756fbf5bf6Syamt 
3766fbf5bf6Syamt #if defined(VMSWAP)
377f2caacc7Smrg 	if (UAO_USES_SWHASH(aobj)) {
378f2caacc7Smrg 
379f2caacc7Smrg 		/*
3809df1f09bSyamt 		 * free the hash table itself.
381f2caacc7Smrg 		 */
38264c6d1d2Schs 
383b5eb577dSrmind 		hashdone(aobj->u_swhash, HASH_LIST, aobj->u_swhashmask);
384f2caacc7Smrg 	} else {
385f2caacc7Smrg 
386f2caacc7Smrg 		/*
387ff23aff6Sandvar 		 * free the array itself.
388f2caacc7Smrg 		 */
389f2caacc7Smrg 
390b5eb577dSrmind 		kmem_free(aobj->u_swslots, aobj->u_pages * sizeof(int));
391f2caacc7Smrg 	}
3926fbf5bf6Syamt #endif /* defined(VMSWAP) */
3936fbf5bf6Syamt 
394f2caacc7Smrg 	/*
395f2caacc7Smrg 	 * finally free the aobj itself
396f2caacc7Smrg 	 */
39764c6d1d2Schs 
3988c260680Srmind 	uvm_obj_destroy(uobj, true);
399cdc76ff9Srmind 	kmem_free(aobj, sizeof(struct uvm_aobj));
4001d7213c9Schs }
401f2caacc7Smrg 
402f2caacc7Smrg /*
403f2caacc7Smrg  * pager functions
404f2caacc7Smrg  */
405f2caacc7Smrg 
406f2caacc7Smrg /*
407f2caacc7Smrg  * uao_create: create an aobj of the given size and return its uvm_object.
408f2caacc7Smrg  *
409f2caacc7Smrg  * => for normal use, flags are always zero
410f2caacc7Smrg  * => for the kernel object, the flags are:
411f2caacc7Smrg  *	UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
412f2caacc7Smrg  *	UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
413f2caacc7Smrg  */
41464c6d1d2Schs 
415e92c7d99Smrg struct uvm_object *
uao_create(voff_t size,int flags)41606567088Schs uao_create(voff_t size, int flags)
417f2caacc7Smrg {
41864c6d1d2Schs 	static struct uvm_aobj kernel_object_store;
4199133d44eSchs 	static krwlock_t bootstrap_kernel_object_lock;
420c9e83a00Smartin 	static int kobj_alloced __diagused = 0;
42106567088Schs 	pgoff_t pages = round_page((uint64_t)size) >> PAGE_SHIFT;
422f2caacc7Smrg 	struct uvm_aobj *aobj;
42311bfc2d8Syamt 	int refs;
424f2caacc7Smrg 
425f2caacc7Smrg 	/*
426c22a3698Srmind 	 * Allocate a new aobj, unless kernel object is requested.
427f2caacc7Smrg 	 */
428f2caacc7Smrg 
42964c6d1d2Schs 	if (flags & UAO_FLAG_KERNOBJ) {
43064c6d1d2Schs 		KASSERT(!kobj_alloced);
431f2caacc7Smrg 		aobj = &kernel_object_store;
432f2caacc7Smrg 		aobj->u_pages = pages;
43364c6d1d2Schs 		aobj->u_flags = UAO_FLAG_NOSWAP;
43411bfc2d8Syamt 		refs = UVM_OBJ_KERN;
435f2caacc7Smrg 		kobj_alloced = UAO_FLAG_KERNOBJ;
436f2caacc7Smrg 	} else if (flags & UAO_FLAG_KERNSWAP) {
43764c6d1d2Schs 		KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
438f2caacc7Smrg 		aobj = &kernel_object_store;
439f2caacc7Smrg 		kobj_alloced = UAO_FLAG_KERNSWAP;
44011bfc2d8Syamt 		refs = 0xdeadbeaf; /* XXX: gcc */
44164c6d1d2Schs 	} else {
442cdc76ff9Srmind 		aobj = kmem_alloc(sizeof(struct uvm_aobj), KM_SLEEP);
443f2caacc7Smrg 		aobj->u_pages = pages;
44464c6d1d2Schs 		aobj->u_flags = 0;
44511bfc2d8Syamt 		refs = 1;
446f2caacc7Smrg 	}
447f2caacc7Smrg 
448f2caacc7Smrg 	/*
449c20b71f6Sriastradh 	 * no freelist by default
450c20b71f6Sriastradh 	 */
451c20b71f6Sriastradh 
452c20b71f6Sriastradh 	aobj->u_freelist = VM_NFREELIST;
453c20b71f6Sriastradh 
454c20b71f6Sriastradh 	/*
455f2caacc7Smrg  	 * allocate hash/array if necessary
456f2caacc7Smrg  	 *
457f2caacc7Smrg  	 * note: in the KERNSWAP case no need to worry about locking since
458f2caacc7Smrg  	 * we are still booting we should be the only thread around.
459f2caacc7Smrg  	 */
46064c6d1d2Schs 
461b5eb577dSrmind 	const int kernswap = (flags & UAO_FLAG_KERNSWAP) != 0;
4629133d44eSchs 	if (flags == 0 || kernswap) {
4639133d44eSchs #if defined(VMSWAP)
464249efd73Schs 
465f2caacc7Smrg 		/* allocate hash table or array depending on object size */
466f2caacc7Smrg 		if (UAO_USES_SWHASH(aobj)) {
467b5eb577dSrmind 			aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj),
4689133d44eSchs 			    HASH_LIST, true, &aobj->u_swhashmask);
469f2caacc7Smrg 		} else {
470b5eb577dSrmind 			aobj->u_swslots = kmem_zalloc(pages * sizeof(int),
4719133d44eSchs 			    KM_SLEEP);
472f2caacc7Smrg 		}
4736fbf5bf6Syamt #endif /* defined(VMSWAP) */
474f2caacc7Smrg 
4759133d44eSchs 		/*
4769133d44eSchs 		 * Replace kernel_object's temporary static lock with
4779133d44eSchs 		 * a regular rw_obj.  We cannot use uvm_obj_setlock()
4789133d44eSchs 		 * because that would try to free the old lock.
4799133d44eSchs 		 */
4809133d44eSchs 
4819133d44eSchs 		if (kernswap) {
4829133d44eSchs 			aobj->u_obj.vmobjlock = rw_obj_alloc();
4839133d44eSchs 			rw_destroy(&bootstrap_kernel_object_lock);
4849133d44eSchs 		}
485f2caacc7Smrg 		if (flags) {
486f2caacc7Smrg 			aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
4878c260680Srmind 			return &aobj->u_obj;
488f2caacc7Smrg 		}
489f2caacc7Smrg 	}
490f2caacc7Smrg 
491f2caacc7Smrg 	/*
492e225b7bdSrmind 	 * Initialise UVM object.
493f2caacc7Smrg 	 */
49464c6d1d2Schs 
495e225b7bdSrmind 	const bool kernobj = (flags & UAO_FLAG_KERNOBJ) != 0;
496e225b7bdSrmind 	uvm_obj_init(&aobj->u_obj, &aobj_pager, !kernobj, refs);
497e225b7bdSrmind 	if (__predict_false(kernobj)) {
4989133d44eSchs 		/* Use a temporary static lock for kernel_object. */
4999133d44eSchs 		rw_init(&bootstrap_kernel_object_lock);
5009133d44eSchs 		uvm_obj_setlock(&aobj->u_obj, &bootstrap_kernel_object_lock);
501e225b7bdSrmind 	}
502f2caacc7Smrg 
503f2caacc7Smrg 	/*
504f2caacc7Smrg  	 * now that aobj is ready, add it to the global list
505f2caacc7Smrg  	 */
50664c6d1d2Schs 
5074688843dSad 	mutex_enter(&uao_list_lock);
508f2caacc7Smrg 	LIST_INSERT_HEAD(&uao_list, aobj, u_list);
5094688843dSad 	mutex_exit(&uao_list_lock);
510f2caacc7Smrg 	return(&aobj->u_obj);
511f2caacc7Smrg }
512f2caacc7Smrg 
513f2caacc7Smrg /*
514c20b71f6Sriastradh  * uao_set_pgfl: allocate pages only from the specified freelist.
515c20b71f6Sriastradh  *
516c20b71f6Sriastradh  * => must be called before any pages are allocated for the object.
517e951f85bSriastradh  * => reset by setting it to VM_NFREELIST, meaning any freelist.
518c20b71f6Sriastradh  */
519c20b71f6Sriastradh 
520c20b71f6Sriastradh void
uao_set_pgfl(struct uvm_object * uobj,int freelist)521c20b71f6Sriastradh uao_set_pgfl(struct uvm_object *uobj, int freelist)
522c20b71f6Sriastradh {
523c20b71f6Sriastradh 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
524c20b71f6Sriastradh 
525c20b71f6Sriastradh 	KASSERTMSG((0 <= freelist), "invalid freelist %d", freelist);
526e951f85bSriastradh 	KASSERTMSG((freelist <= VM_NFREELIST), "invalid freelist %d",
527e951f85bSriastradh 	    freelist);
528c20b71f6Sriastradh 
529c20b71f6Sriastradh 	aobj->u_freelist = freelist;
530c20b71f6Sriastradh }
531c20b71f6Sriastradh 
532c20b71f6Sriastradh /*
533c20b71f6Sriastradh  * uao_pagealloc: allocate a page for aobj.
534c20b71f6Sriastradh  */
535c20b71f6Sriastradh 
536c20b71f6Sriastradh static inline struct vm_page *
uao_pagealloc(struct uvm_object * uobj,voff_t offset,int flags)537c20b71f6Sriastradh uao_pagealloc(struct uvm_object *uobj, voff_t offset, int flags)
538c20b71f6Sriastradh {
539c20b71f6Sriastradh 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
540c20b71f6Sriastradh 
541c20b71f6Sriastradh 	if (__predict_true(aobj->u_freelist == VM_NFREELIST))
542c20b71f6Sriastradh 		return uvm_pagealloc(uobj, offset, NULL, flags);
543c20b71f6Sriastradh 	else
544c20b71f6Sriastradh 		return uvm_pagealloc_strat(uobj, offset, NULL, flags,
545c20b71f6Sriastradh 		    UVM_PGA_STRAT_ONLY, aobj->u_freelist);
546c20b71f6Sriastradh }
547c20b71f6Sriastradh 
548c20b71f6Sriastradh /*
549f2caacc7Smrg  * uao_init: set up aobj pager subsystem
550f2caacc7Smrg  *
551f2caacc7Smrg  * => called at boot time from uvm_pager_init()
552f2caacc7Smrg  */
55364c6d1d2Schs 
55416f0ca36Schs void
uao_init(void)55564c6d1d2Schs uao_init(void)
556f2caacc7Smrg {
5575a4981d9Sthorpej 	static int uao_initialized;
5585a4981d9Sthorpej 
5595a4981d9Sthorpej 	if (uao_initialized)
5605a4981d9Sthorpej 		return;
561b3667adaSthorpej 	uao_initialized = true;
562f2caacc7Smrg 	LIST_INIT(&uao_list);
5634a780c9aSad 	mutex_init(&uao_list_lock, MUTEX_DEFAULT, IPL_NONE);
564fbd53556Spooka 	pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt),
565fbd53556Spooka 	    0, 0, 0, "uaoeltpl", NULL, IPL_VM);
566f2caacc7Smrg }
567f2caacc7Smrg 
568f2caacc7Smrg /*
5691e840676Srmind  * uao_reference: hold a reference to an anonymous UVM object.
570f2caacc7Smrg  */
571e92c7d99Smrg void
uao_reference(struct uvm_object * uobj)572e569faccSthorpej uao_reference(struct uvm_object *uobj)
573f2caacc7Smrg {
5741e840676Srmind 	/* Kernel object is persistent. */
5751e840676Srmind 	if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
576e1be8408Sad 		return;
5771e840676Srmind 	}
5781e840676Srmind 	atomic_inc_uint(&uobj->uo_refs);
57916f0ca36Schs }
58016f0ca36Schs 
58116f0ca36Schs /*
5821e840676Srmind  * uao_detach: drop a reference to an anonymous UVM object.
58316f0ca36Schs  */
584e92c7d99Smrg void
uao_detach(struct uvm_object * uobj)585e569faccSthorpej uao_detach(struct uvm_object *uobj)
586f2caacc7Smrg {
5871e840676Srmind 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
588881d12e6Sad 	struct uvm_page_array a;
5891e840676Srmind 	struct vm_page *pg;
5901e840676Srmind 
591f3bd60e2Sskrll 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
592e1be8408Sad 
593e1be8408Sad 	/*
5941e840676Srmind 	 * Detaching from kernel object is a NOP.
595e1be8408Sad 	 */
596e1be8408Sad 
597e1be8408Sad 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
59806c343acSad 		return;
599e1be8408Sad 
60016f0ca36Schs 	/*
6011e840676Srmind 	 * Drop the reference.  If it was the last one, destroy the object.
60216f0ca36Schs 	 */
60364c6d1d2Schs 
604b4dc18caSchs 	KASSERT(uobj->uo_refs > 0);
6054c1762c6Srin 	UVMHIST_LOG(maphist,"  (uobj=%#jx)  ref=%jd",
606cb32a134Spgoyette 	    (uintptr_t)uobj, uobj->uo_refs, 0, 0);
607ef3476fbSriastradh 	membar_release();
6081e840676Srmind 	if (atomic_dec_uint_nv(&uobj->uo_refs) > 0) {
609f2caacc7Smrg 		UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
610f2caacc7Smrg 		return;
611f2caacc7Smrg 	}
612ef3476fbSriastradh 	membar_acquire();
613f2caacc7Smrg 
614f2caacc7Smrg 	/*
6151e840676Srmind 	 * Remove the aobj from the global list.
616f2caacc7Smrg 	 */
61764c6d1d2Schs 
618615eff98Sad 	mutex_enter(&uao_list_lock);
619f2caacc7Smrg 	LIST_REMOVE(aobj, u_list);
620615eff98Sad 	mutex_exit(&uao_list_lock);
621f2caacc7Smrg 
622f2caacc7Smrg 	/*
6231e840676Srmind 	 * Free all the pages left in the aobj.  For each page, when the
6241e840676Srmind 	 * page is no longer busy (and thus after any disk I/O that it is
6251e840676Srmind 	 * involved in is complete), release any swap resources and free
6261e840676Srmind 	 * the page itself.
627f2caacc7Smrg 	 */
6284bfe0439Sad 	uvm_page_array_init(&a, uobj, 0);
629d2a0ebb6Sad 	rw_enter(uobj->vmobjlock, RW_WRITER);
6304bfe0439Sad 	while ((pg = uvm_page_array_fill_and_peek(&a, 0, 0)) != NULL) {
631881d12e6Sad 		uvm_page_array_advance(&a);
6325978ddc6Sad 		pmap_page_protect(pg, VM_PROT_NONE);
633f2caacc7Smrg 		if (pg->flags & PG_BUSY) {
6345972ba16Sad 			uvm_pagewait(pg, uobj->vmobjlock, "uao_det");
635881d12e6Sad 			uvm_page_array_clear(&a);
636d2a0ebb6Sad 			rw_enter(uobj->vmobjlock, RW_WRITER);
637f2caacc7Smrg 			continue;
638f2caacc7Smrg 		}
639d97d75d8Schs 		uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
640249efd73Schs 		uvm_pagefree(pg);
64164c6d1d2Schs 	}
642881d12e6Sad 	uvm_page_array_fini(&a);
643f2caacc7Smrg 
644f2caacc7Smrg 	/*
6451e840676Srmind 	 * Finally, free the anonymous UVM object itself.
646f2caacc7Smrg 	 */
647f2caacc7Smrg 
648f2caacc7Smrg 	uao_free(aobj);
649f2caacc7Smrg }
650f2caacc7Smrg 
651f2caacc7Smrg /*
65264c6d1d2Schs  * uao_put: flush pages out of a uvm object
653a448b595Sthorpej  *
654a448b595Sthorpej  * => object should be locked by caller.  we may _unlock_ the object
655a448b595Sthorpej  *	if (and only if) we need to clean a page (PGO_CLEANIT).
656a448b595Sthorpej  *	XXXJRT Currently, however, we don't.  In the case of cleaning
657a448b595Sthorpej  *	XXXJRT a page, we simply just deactivate it.  Should probably
658a448b595Sthorpej  *	XXXJRT handle this better, in the future (although "flushing"
659a448b595Sthorpej  *	XXXJRT anonymous memory isn't terribly important).
660a448b595Sthorpej  * => if PGO_CLEANIT is not set, then we will neither unlock the object
661a448b595Sthorpej  *	or block.
662a448b595Sthorpej  * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
663a448b595Sthorpej  *	for flushing.
66493feeb12Smatt  * => we return 0 unless we encountered some sort of I/O error
665a448b595Sthorpej  *	XXXJRT currently never happens, as we never directly initiate
666a448b595Sthorpej  *	XXXJRT I/O
667f2caacc7Smrg  */
668a448b595Sthorpej 
669b651fb88Sthorpej static int
uao_put(struct uvm_object * uobj,voff_t start,voff_t stop,int flags)670e569faccSthorpej uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
671f2caacc7Smrg {
672a448b595Sthorpej 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
673881d12e6Sad 	struct uvm_page_array a;
674881d12e6Sad 	struct vm_page *pg;
6756e5b64c8Skleink 	voff_t curoff;
676f3bd60e2Sskrll 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
677a448b595Sthorpej 
678ff872804Sad 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
679d2a0ebb6Sad 	KASSERT(rw_write_held(uobj->vmobjlock));
6804a780c9aSad 
681a448b595Sthorpej 	if (flags & PGO_ALLPAGES) {
682a448b595Sthorpej 		start = 0;
683a448b595Sthorpej 		stop = aobj->u_pages << PAGE_SHIFT;
684a448b595Sthorpej 	} else {
685a448b595Sthorpej 		start = trunc_page(start);
686873763eaSyamt 		if (stop == 0) {
687873763eaSyamt 			stop = aobj->u_pages << PAGE_SHIFT;
688873763eaSyamt 		} else {
689a448b595Sthorpej 			stop = round_page(stop);
690873763eaSyamt 		}
69106567088Schs 		if (stop > (uint64_t)(aobj->u_pages << PAGE_SHIFT)) {
69206567088Schs 			printf("uao_put: strange, got an out of range "
6934c1762c6Srin 			    "flush %#jx > %#jx (fixed)\n",
69406567088Schs 			    (uintmax_t)stop,
69506567088Schs 			    (uintmax_t)(aobj->u_pages << PAGE_SHIFT));
696a448b595Sthorpej 			stop = aobj->u_pages << PAGE_SHIFT;
697a448b595Sthorpej 		}
698a448b595Sthorpej 	}
699a448b595Sthorpej 	UVMHIST_LOG(maphist,
7004c1762c6Srin 	    " flush start=%#jx, stop=%#jx, flags=%#jx",
701881d12e6Sad 	    start, stop, flags, 0);
702e92c7d99Smrg 
703f2caacc7Smrg 	/*
704a448b595Sthorpej 	 * Don't need to do any work here if we're not freeing
705a448b595Sthorpej 	 * or deactivating pages.
706f2caacc7Smrg 	 */
70764c6d1d2Schs 
708a448b595Sthorpej 	if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
709d2a0ebb6Sad 		rw_exit(uobj->vmobjlock);
71064c6d1d2Schs 		return 0;
711a448b595Sthorpej 	}
712a448b595Sthorpej 
713185d25c1Sad 	/* locked: uobj */
7144bfe0439Sad 	uvm_page_array_init(&a, uobj, 0);
715881d12e6Sad 	curoff = start;
7164bfe0439Sad 	while ((pg = uvm_page_array_fill_and_peek(&a, curoff, 0)) != NULL) {
717881d12e6Sad 		if (pg->offset >= stop) {
7186ceef3fcSenami 			break;
719a448b595Sthorpej 		}
7200a2b10dbSyamt 
7210a2b10dbSyamt 		/*
7220a2b10dbSyamt 		 * wait and try again if the page is busy.
7230a2b10dbSyamt 		 */
7240a2b10dbSyamt 
7250a2b10dbSyamt 		if (pg->flags & PG_BUSY) {
7265972ba16Sad 			uvm_pagewait(pg, uobj->vmobjlock, "uao_put");
727881d12e6Sad 			uvm_page_array_clear(&a);
728d2a0ebb6Sad 			rw_enter(uobj->vmobjlock, RW_WRITER);
7290a2b10dbSyamt 			continue;
7300a2b10dbSyamt 		}
731881d12e6Sad 		uvm_page_array_advance(&a);
732881d12e6Sad 		curoff = pg->offset + PAGE_SIZE;
7330a2b10dbSyamt 
734a448b595Sthorpej 		switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
73564c6d1d2Schs 
736a448b595Sthorpej 		/*
737a448b595Sthorpej 		 * XXX In these first 3 cases, we always just
738a448b595Sthorpej 		 * XXX deactivate the page.  We may want to
739a448b595Sthorpej 		 * XXX handle the different cases more specifically
740a448b595Sthorpej 		 * XXX in the future.
741a448b595Sthorpej 		 */
74264c6d1d2Schs 
743a448b595Sthorpej 		case PGO_CLEANIT|PGO_FREE:
744a448b595Sthorpej 		case PGO_CLEANIT|PGO_DEACTIVATE:
745a448b595Sthorpej 		case PGO_DEACTIVATE:
74623e83a7aSthorpej  deactivate_it:
74794843b13Sad  			uvm_pagelock(pg);
74864c6d1d2Schs 			uvm_pagedeactivate(pg);
74994843b13Sad  			uvm_pageunlock(pg);
7500a2b10dbSyamt 			break;
751a448b595Sthorpej 
752a448b595Sthorpej 		case PGO_FREE:
75323e83a7aSthorpej 			/*
75423e83a7aSthorpej 			 * If there are multiple references to
75523e83a7aSthorpej 			 * the object, just deactivate the page.
75623e83a7aSthorpej 			 */
75764c6d1d2Schs 
75823e83a7aSthorpej 			if (uobj->uo_refs > 1)
75923e83a7aSthorpej 				goto deactivate_it;
76023e83a7aSthorpej 
761a448b595Sthorpej 			/*
7620a2b10dbSyamt 			 * free the swap slot and the page.
763a448b595Sthorpej 			 */
764a448b595Sthorpej 
76564c6d1d2Schs 			pmap_page_protect(pg, VM_PROT_NONE);
7669df1f09bSyamt 
7679df1f09bSyamt 			/*
7689df1f09bSyamt 			 * freeing swapslot here is not strictly necessary.
7699df1f09bSyamt 			 * however, leaving it here doesn't save much
7709df1f09bSyamt 			 * because we need to update swap accounting anyway.
7719df1f09bSyamt 			 */
7729df1f09bSyamt 
77364c6d1d2Schs 			uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
77464c6d1d2Schs 			uvm_pagefree(pg);
7750a2b10dbSyamt 			break;
7760a2b10dbSyamt 
7770a2b10dbSyamt 		default:
7780a2b10dbSyamt 			panic("%s: impossible", __func__);
7790a2b10dbSyamt 		}
7800a2b10dbSyamt 	}
781d2a0ebb6Sad 	rw_exit(uobj->vmobjlock);
782881d12e6Sad 	uvm_page_array_fini(&a);
78364c6d1d2Schs 	return 0;
784f2caacc7Smrg }
785f2caacc7Smrg 
786f2caacc7Smrg /*
787f2caacc7Smrg  * uao_get: fetch me a page
788f2caacc7Smrg  *
789f2caacc7Smrg  * we have three cases:
790f2caacc7Smrg  * 1: page is resident     -> just return the page.
791f2caacc7Smrg  * 2: page is zero-fill    -> allocate a new page and zero it.
792f2caacc7Smrg  * 3: page is swapped out  -> fetch the page from swap.
793f2caacc7Smrg  *
794812b46dfSad  * case 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot.
795812b46dfSad  * so, if the "center" page hits case 2/3 then we will need to return EBUSY.
796f2caacc7Smrg  *
797f2caacc7Smrg  * => prefer map unlocked (not required)
798f2caacc7Smrg  * => object must be locked!  we will _unlock_ it before starting any I/O.
799812b46dfSad  * => flags: PGO_LOCKED: fault data structures are locked
800f2caacc7Smrg  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
801f2caacc7Smrg  * => NOTE: caller must check for released pages!!
802f2caacc7Smrg  */
80364c6d1d2Schs 
804e92c7d99Smrg static int
uao_get(struct uvm_object * uobj,voff_t offset,struct vm_page ** pps,int * npagesp,int centeridx,vm_prot_t access_type,int advice,int flags)805e569faccSthorpej uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
8061a7bc55dSyamt     int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
807f2caacc7Smrg {
8086e5b64c8Skleink 	voff_t current_offset;
809a6b947c5Sad 	struct vm_page *ptmp;
810a6b947c5Sad 	int lcv, gotpages, maxpages, swslot, pageidx;
8110fd3595bSad 	bool overwrite = ((flags & PGO_OVERWRITE) != 0);
812a6b947c5Sad 	struct uvm_page_array a;
813f2caacc7Smrg 
814f3bd60e2Sskrll 	UVMHIST_FUNC(__func__);
815e9de1129Sskrll 	UVMHIST_CALLARGS(pdhist, "aobj=%#jx offset=%jd, flags=%#jx",
816cb32a134Spgoyette 		    (uintptr_t)uobj, offset, flags,0);
817f2caacc7Smrg 
818f2caacc7Smrg 	/*
8191d7848adSad 	 * the object must be locked.  it can only be a read lock when
820ff872804Sad 	 * processing a read fault with PGO_LOCKED.
8211d7848adSad 	 */
8221d7848adSad 
823ff872804Sad 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
8241d7848adSad 	KASSERT(rw_lock_held(uobj->vmobjlock));
8251d7848adSad 	KASSERT(rw_write_held(uobj->vmobjlock) ||
826ff872804Sad 	   ((flags & PGO_LOCKED) != 0 && (access_type & VM_PROT_WRITE) == 0));
8271d7848adSad 
8281d7848adSad 	/*
829f2caacc7Smrg  	 * get number of pages
830f2caacc7Smrg  	 */
83164c6d1d2Schs 
832f2caacc7Smrg 	maxpages = *npagesp;
833f2caacc7Smrg 
834f2caacc7Smrg 	/*
835f2caacc7Smrg  	 * step 1: handled the case where fault data structures are locked.
836f2caacc7Smrg  	 */
837f2caacc7Smrg 
838f2caacc7Smrg 	if (flags & PGO_LOCKED) {
83964c6d1d2Schs 
840f2caacc7Smrg 		/*
841e92c7d99Smrg  		 * step 1a: get pages that are already resident.   only do
842e92c7d99Smrg 		 * this if the data structures are locked (i.e. the first
843e92c7d99Smrg 		 * time through).
844f2caacc7Smrg  		 */
845f2caacc7Smrg 
8464bfe0439Sad 		uvm_page_array_init(&a, uobj, 0);
847f2caacc7Smrg 		gotpages = 0;	/* # of pages we got so far */
848ff872804Sad 		for (lcv = 0; lcv < maxpages; lcv++) {
8494bfe0439Sad 			ptmp = uvm_page_array_fill_and_peek(&a,
8504bfe0439Sad 			    offset + (lcv << PAGE_SHIFT), maxpages);
851ff872804Sad 			if (ptmp == NULL) {
852ff872804Sad 				break;
853f2caacc7Smrg 			}
854ff872804Sad 			KASSERT(ptmp->offset >= offset);
855ff872804Sad 			lcv = (ptmp->offset - offset) >> PAGE_SHIFT;
856ff872804Sad 			if (lcv >= maxpages) {
857ff872804Sad 				break;
858f2caacc7Smrg 			}
859ff872804Sad 			uvm_page_array_advance(&a);
860f2caacc7Smrg 
861e92c7d99Smrg 			/*
86264c6d1d2Schs 			 * to be useful must get a non-busy page
863e92c7d99Smrg 			 */
86464c6d1d2Schs 
865ff872804Sad 			if ((ptmp->flags & PG_BUSY) != 0) {
866f2caacc7Smrg 				continue;
867f2caacc7Smrg 			}
868f2caacc7Smrg 
869e92c7d99Smrg 			/*
870ff872804Sad 			 * useful page: plug it in our result array
871e92c7d99Smrg 			 */
872ff872804Sad 
87305a3457eSad 			KASSERT(uvm_pagegetdirty(ptmp) !=
87405a3457eSad 			    UVM_PAGE_STATUS_CLEAN);
875f2caacc7Smrg 			pps[lcv] = ptmp;
876f2caacc7Smrg 			gotpages++;
87764c6d1d2Schs 		}
878ff872804Sad 		uvm_page_array_fini(&a);
879f2caacc7Smrg 
880f2caacc7Smrg 		/*
881e92c7d99Smrg  		 * step 1b: now we've either done everything needed or we
882e92c7d99Smrg 		 * to unlock and do some waiting or I/O.
883f2caacc7Smrg  		 */
884f2caacc7Smrg 
885b616d2aaShannken 		UVMHIST_LOG(pdhist, "<- done (done=%jd)",
886b616d2aaShannken 		    (pps[centeridx] != NULL), 0,0,0);
887f2caacc7Smrg 		*npagesp = gotpages;
888812b46dfSad 		return pps[centeridx] != NULL ? 0 : EBUSY;
889f2caacc7Smrg 	}
890f2caacc7Smrg 
891f2caacc7Smrg 	/*
892f2caacc7Smrg  	 * step 2: get non-resident or busy pages.
893f2caacc7Smrg  	 * object is locked.   data structures are unlocked.
894f2caacc7Smrg  	 */
895f2caacc7Smrg 
89622161687Syamt 	if ((flags & PGO_SYNCIO) == 0) {
89722161687Syamt 		goto done;
89822161687Syamt 	}
89922161687Syamt 
900a6b947c5Sad 	uvm_page_array_init(&a, uobj, 0);
901a6b947c5Sad 	for (lcv = 0, current_offset = offset ; lcv < maxpages ;) {
90216f0ca36Schs 
903e92c7d99Smrg 		/*
904e92c7d99Smrg  		 * we have yet to locate the current page (pps[lcv]).   we
905e92c7d99Smrg 		 * first look for a page that is already at the current offset.
906e92c7d99Smrg 		 * if we find a page, we check to see if it is busy or
907e92c7d99Smrg 		 * released.  if that is the case, then we sleep on the page
908e92c7d99Smrg 		 * until it is no longer busy or released and repeat the lookup.
909e92c7d99Smrg 		 * if the page we found is neither busy nor released, then we
910a6b947c5Sad 		 * busy it (so we own it) and plug it into pps[lcv].   we are
911a6b947c5Sad 		 * ready to move on to the next page.
912f2caacc7Smrg  		 */
913f2caacc7Smrg 
914a6b947c5Sad 		ptmp = uvm_page_array_fill_and_peek(&a, current_offset,
915a6b947c5Sad 		    maxpages - lcv);
916f2caacc7Smrg 
917a6b947c5Sad 		if (ptmp != NULL && ptmp->offset == current_offset) {
918f2caacc7Smrg 			/* page is there, see if we need to wait on it */
91964c6d1d2Schs 			if ((ptmp->flags & PG_BUSY) != 0) {
920e92c7d99Smrg 				UVMHIST_LOG(pdhist,
9214c1762c6Srin 				    "sleeping, ptmp->flags %#jx\n",
922e92c7d99Smrg 				    ptmp->flags,0,0,0);
9235972ba16Sad 				uvm_pagewait(ptmp, uobj->vmobjlock, "uao_get");
924d2a0ebb6Sad 				rw_enter(uobj->vmobjlock, RW_WRITER);
925a6b947c5Sad 				uvm_page_array_clear(&a);
92664c6d1d2Schs 				continue;
927f2caacc7Smrg 			}
928f2caacc7Smrg 
929f2caacc7Smrg 			/*
930a6b947c5Sad  			 * if we get here then the page is resident and
931a6b947c5Sad 			 * unbusy.  we busy it now (so we own it).  if
932a6b947c5Sad 			 * overwriting, mark the page dirty up front as
933a6b947c5Sad 			 * it will be zapped via an unmanaged mapping.
934f2caacc7Smrg  			 */
93564c6d1d2Schs 
93605a3457eSad 			KASSERT(uvm_pagegetdirty(ptmp) !=
93705a3457eSad 			    UVM_PAGE_STATUS_CLEAN);
93848081d9aSad 			if (overwrite) {
93948081d9aSad 				uvm_pagemarkdirty(ptmp, UVM_PAGE_STATUS_DIRTY);
94048081d9aSad 			}
941e92c7d99Smrg 			/* we own it, caller must un-busy */
942e92c7d99Smrg 			ptmp->flags |= PG_BUSY;
943f2caacc7Smrg 			UVM_PAGE_OWN(ptmp, "uao_get2");
944a6b947c5Sad 			pps[lcv++] = ptmp;
945a6b947c5Sad 			current_offset += PAGE_SIZE;
946a6b947c5Sad 			uvm_page_array_advance(&a);
947a6b947c5Sad 			continue;
948a6b947c5Sad 		} else {
949a6b947c5Sad 			KASSERT(ptmp == NULL || ptmp->offset > current_offset);
950f2caacc7Smrg 		}
951f2caacc7Smrg 
952f2caacc7Smrg 		/*
953a6b947c5Sad 		 * not resident.  allocate a new busy/fake/clean page in the
954a6b947c5Sad 		 * object.  if it's in swap we need to do I/O to fill in the
955a6b947c5Sad 		 * data, otherwise the page needs to be cleared: if it's not
956a6b947c5Sad 		 * destined to be overwritten, then zero it here and now.
957f2caacc7Smrg 		 */
95864c6d1d2Schs 
959a6b947c5Sad 		pageidx = current_offset >> PAGE_SHIFT;
960a6b947c5Sad 		swslot = uao_find_swslot(uobj, pageidx);
961a6b947c5Sad 		ptmp = uao_pagealloc(uobj, current_offset,
962a6b947c5Sad 		    swslot != 0 || overwrite ? 0 : UVM_PGA_ZERO);
963a6b947c5Sad 
964a6b947c5Sad 		/* out of RAM? */
965a6b947c5Sad 		if (ptmp == NULL) {
966a6b947c5Sad 			rw_exit(uobj->vmobjlock);
967bf748078Ssimonb 			UVMHIST_LOG(pdhist, "sleeping, ptmp == NULL",0,0,0,0);
968a6b947c5Sad 			uvm_wait("uao_getpage");
969a6b947c5Sad 			rw_enter(uobj->vmobjlock, RW_WRITER);
970a6b947c5Sad 			uvm_page_array_clear(&a);
971a6b947c5Sad 			continue;
972a6b947c5Sad 		}
973f2caacc7Smrg 
974f2caacc7Smrg 		/*
975812b46dfSad  		 * if swslot == 0, page hasn't existed before and is zeroed.
976812b46dfSad  		 * otherwise we have a "fake/busy/clean" page that we just
977812b46dfSad  		 * allocated.  do the needed "i/o", reading from swap.
978f2caacc7Smrg  		 */
97964c6d1d2Schs 
980812b46dfSad 		if (swslot != 0) {
9816fbf5bf6Syamt #if defined(VMSWAP)
9826fbf5bf6Syamt 			int error;
9836fbf5bf6Syamt 
984cb32a134Spgoyette 			UVMHIST_LOG(pdhist, "pagein from swslot %jd",
985e92c7d99Smrg 			     swslot, 0,0,0);
986f2caacc7Smrg 
987f2caacc7Smrg 			/*
988f2caacc7Smrg 			 * page in the swapped-out page.
989f2caacc7Smrg 			 * unlock object for i/o, relock when done.
990f2caacc7Smrg 			 */
99164c6d1d2Schs 
992a8aa7072Schs 			uvm_page_array_clear(&a);
993d2a0ebb6Sad 			rw_exit(uobj->vmobjlock);
99464c6d1d2Schs 			error = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
995d2a0ebb6Sad 			rw_enter(uobj->vmobjlock, RW_WRITER);
996f2caacc7Smrg 
997f2caacc7Smrg 			/*
998f2caacc7Smrg 			 * I/O done.  check for errors.
999f2caacc7Smrg 			 */
100064c6d1d2Schs 
100164c6d1d2Schs 			if (error != 0) {
1002cb32a134Spgoyette 				UVMHIST_LOG(pdhist, "<- done (error=%jd)",
100364c6d1d2Schs 				    error,0,0,0);
100416f0ca36Schs 
100516f0ca36Schs 				/*
100616f0ca36Schs 				 * remove the swap slot from the aobj
100716f0ca36Schs 				 * and mark the aobj as having no real slot.
100816f0ca36Schs 				 * don't free the swap slot, thus preventing
100916f0ca36Schs 				 * it from being used again.
101016f0ca36Schs 				 */
101164c6d1d2Schs 
10121e840676Srmind 				swslot = uao_set_swslot(uobj, pageidx,
101316f0ca36Schs 				    SWSLOT_BAD);
10143bef9418Spk 				if (swslot > 0) {
101516f0ca36Schs 					uvm_swap_markbad(swslot, 1);
10162d06d793Schs 				}
101716f0ca36Schs 
1018f2caacc7Smrg 				uvm_pagefree(ptmp);
1019d2a0ebb6Sad 				rw_exit(uobj->vmobjlock);
1020812b46dfSad 				UVMHIST_LOG(pdhist, "<- done (error)",
1021812b46dfSad 				    error,lcv,0,0);
1022812b46dfSad 				if (lcv != 0) {
1023812b46dfSad 					uvm_page_unbusy(pps, lcv);
1024812b46dfSad 				}
1025812b46dfSad 				memset(pps, 0, maxpages * sizeof(pps[0]));
1026a8aa7072Schs 				uvm_page_array_fini(&a);
102764c6d1d2Schs 				return error;
1028f2caacc7Smrg 			}
10296fbf5bf6Syamt #else /* defined(VMSWAP) */
10306fbf5bf6Syamt 			panic("%s: pagein", __func__);
10316fbf5bf6Syamt #endif /* defined(VMSWAP) */
1032f2caacc7Smrg 		}
1033f2caacc7Smrg 
103405a3457eSad 		/*
103505a3457eSad 		 * note that we will allow the page being writably-mapped
10360fd3595bSad 		 * (!PG_RDONLY) regardless of access_type.  if overwrite,
10370fd3595bSad 		 * the page can be modified through an unmanaged mapping
10380fd3595bSad 		 * so mark it dirty up front.
103905a3457eSad 		 */
10400fd3595bSad 		if (overwrite) {
10410fd3595bSad 			uvm_pagemarkdirty(ptmp, UVM_PAGE_STATUS_DIRTY);
10420fd3595bSad 		} else {
104305a3457eSad 			uvm_pagemarkdirty(ptmp, UVM_PAGE_STATUS_UNKNOWN);
10440fd3595bSad 		}
10452b911319Syamt 
1046f2caacc7Smrg 		/*
1047e92c7d99Smrg  		 * we got the page!   clear the fake flag (indicates valid
1048e92c7d99Smrg 		 * data now in page) and plug into our result array.   note
1049e92c7d99Smrg 		 * that page is still busy.
1050f2caacc7Smrg  		 *
1051f2caacc7Smrg  		 * it is the callers job to:
1052f2caacc7Smrg  		 * => check if the page is released
1053f2caacc7Smrg  		 * => unbusy the page
1054f2caacc7Smrg  		 * => activate the page
1055f2caacc7Smrg  		 */
105605a3457eSad 		KASSERT(uvm_pagegetdirty(ptmp) != UVM_PAGE_STATUS_CLEAN);
105705a3457eSad 		KASSERT((ptmp->flags & PG_FAKE) != 0);
1058a6b947c5Sad 		KASSERT(ptmp->offset == current_offset);
105964c6d1d2Schs 		ptmp->flags &= ~PG_FAKE;
1060a6b947c5Sad 		pps[lcv++] = ptmp;
1061a6b947c5Sad 		current_offset += PAGE_SIZE;
106264c6d1d2Schs 	}
1063a6b947c5Sad 	uvm_page_array_fini(&a);
1064f2caacc7Smrg 
1065f2caacc7Smrg 	/*
1066f2caacc7Smrg  	 * finally, unlock object and return.
1067f2caacc7Smrg  	 */
1068f2caacc7Smrg 
106922161687Syamt done:
1070d2a0ebb6Sad 	rw_exit(uobj->vmobjlock);
1071f2caacc7Smrg 	UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
107264c6d1d2Schs 	return 0;
1073f2caacc7Smrg }
1074f2caacc7Smrg 
10756fbf5bf6Syamt #if defined(VMSWAP)
10766fbf5bf6Syamt 
1077f2caacc7Smrg /*
1078d97d75d8Schs  * uao_dropswap:  release any swap resources from this aobj page.
1079d97d75d8Schs  *
1080d97d75d8Schs  * => aobj must be locked or have a reference count of 0.
1081d97d75d8Schs  */
1082d97d75d8Schs 
1083d97d75d8Schs void
uao_dropswap(struct uvm_object * uobj,int pageidx)1084e569faccSthorpej uao_dropswap(struct uvm_object *uobj, int pageidx)
1085d97d75d8Schs {
1086d97d75d8Schs 	int slot;
1087d97d75d8Schs 
1088ff872804Sad 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
1089ff872804Sad 
1090d97d75d8Schs 	slot = uao_set_swslot(uobj, pageidx, 0);
1091d97d75d8Schs 	if (slot) {
1092d97d75d8Schs 		uvm_swap_free(slot, 1);
1093d97d75d8Schs 	}
1094d97d75d8Schs }
109516f0ca36Schs 
109616f0ca36Schs /*
109716f0ca36Schs  * page in every page in every aobj that is paged-out to a range of swslots.
109816f0ca36Schs  *
109916f0ca36Schs  * => nothing should be locked.
1100b3667adaSthorpej  * => returns true if pagein was aborted due to lack of memory.
110116f0ca36Schs  */
110264c6d1d2Schs 
1103712239e3Sthorpej bool
uao_swap_off(int startslot,int endslot)1104e569faccSthorpej uao_swap_off(int startslot, int endslot)
110516f0ca36Schs {
11061e840676Srmind 	struct uvm_aobj *aobj;
110716f0ca36Schs 
110816f0ca36Schs 	/*
11091e840676Srmind 	 * Walk the list of all anonymous UVM objects.  Grab the first.
111016f0ca36Schs 	 */
11114688843dSad 	mutex_enter(&uao_list_lock);
11121e840676Srmind 	if ((aobj = LIST_FIRST(&uao_list)) == NULL) {
11134688843dSad 		mutex_exit(&uao_list_lock);
1114b3667adaSthorpej 		return false;
111516f0ca36Schs 	}
11161e840676Srmind 	uao_reference(&aobj->u_obj);
111716f0ca36Schs 
11181e840676Srmind 	do {
11191e840676Srmind 		struct uvm_aobj *nextaobj;
11201e840676Srmind 		bool rv;
11211e840676Srmind 
11221e840676Srmind 		/*
11231e840676Srmind 		 * Prefetch the next object and immediately hold a reference
11241e840676Srmind 		 * on it, so neither the current nor the next entry could
11251e840676Srmind 		 * disappear while we are iterating.
11261e840676Srmind 		 */
11271e840676Srmind 		if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) {
11281e840676Srmind 			uao_reference(&nextaobj->u_obj);
11291e840676Srmind 		}
11301e840676Srmind 		mutex_exit(&uao_list_lock);
11311e840676Srmind 
11321e840676Srmind 		/*
11331e840676Srmind 		 * Page in all pages in the swap slot range.
11341e840676Srmind 		 */
1135d2a0ebb6Sad 		rw_enter(aobj->u_obj.vmobjlock, RW_WRITER);
11361e840676Srmind 		rv = uao_pagein(aobj, startslot, endslot);
1137d2a0ebb6Sad 		rw_exit(aobj->u_obj.vmobjlock);
11381e840676Srmind 
11391e840676Srmind 		/* Drop the reference of the current object. */
11401e840676Srmind 		uao_detach(&aobj->u_obj);
11411e840676Srmind 		if (rv) {
11421e840676Srmind 			if (nextaobj) {
11431e840676Srmind 				uao_detach(&nextaobj->u_obj);
11441e840676Srmind 			}
11451e840676Srmind 			return rv;
11461e840676Srmind 		}
11471e840676Srmind 
11481e840676Srmind 		aobj = nextaobj;
11491e840676Srmind 		mutex_enter(&uao_list_lock);
11501e840676Srmind 	} while (aobj);
11511e840676Srmind 
11521e840676Srmind 	mutex_exit(&uao_list_lock);
11531e840676Srmind 	return false;
11541e840676Srmind }
115516f0ca36Schs 
115616f0ca36Schs /*
115716f0ca36Schs  * page in any pages from aobj in the given range.
115816f0ca36Schs  *
115916f0ca36Schs  * => aobj must be locked and is returned locked.
1160b3667adaSthorpej  * => returns true if pagein was aborted due to lack of memory.
116116f0ca36Schs  */
1162712239e3Sthorpej static bool
uao_pagein(struct uvm_aobj * aobj,int startslot,int endslot)1163e569faccSthorpej uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
116416f0ca36Schs {
1165712239e3Sthorpej 	bool rv;
116616f0ca36Schs 
116716f0ca36Schs 	if (UAO_USES_SWHASH(aobj)) {
116816f0ca36Schs 		struct uao_swhash_elt *elt;
1169e715d2eeSchristos 		int buck;
117016f0ca36Schs 
117116f0ca36Schs restart:
1172e715d2eeSchristos 		for (buck = aobj->u_swhashmask; buck >= 0; buck--) {
1173e715d2eeSchristos 			for (elt = LIST_FIRST(&aobj->u_swhash[buck]);
117416f0ca36Schs 			     elt != NULL;
117516f0ca36Schs 			     elt = LIST_NEXT(elt, list)) {
117616f0ca36Schs 				int i;
117716f0ca36Schs 
117816f0ca36Schs 				for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
117916f0ca36Schs 					int slot = elt->slots[i];
118016f0ca36Schs 
118116f0ca36Schs 					/*
118216f0ca36Schs 					 * if the slot isn't in range, skip it.
118316f0ca36Schs 					 */
118464c6d1d2Schs 
118516f0ca36Schs 					if (slot < startslot ||
118616f0ca36Schs 					    slot >= endslot) {
118716f0ca36Schs 						continue;
118816f0ca36Schs 					}
118916f0ca36Schs 
119016f0ca36Schs 					/*
119116f0ca36Schs 					 * process the page,
119216f0ca36Schs 					 * the start over on this object
119316f0ca36Schs 					 * since the swhash elt
119416f0ca36Schs 					 * may have been freed.
119516f0ca36Schs 					 */
119664c6d1d2Schs 
119716f0ca36Schs 					rv = uao_pagein_page(aobj,
119816f0ca36Schs 					  UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
119916f0ca36Schs 					if (rv) {
120016f0ca36Schs 						return rv;
120116f0ca36Schs 					}
120216f0ca36Schs 					goto restart;
120316f0ca36Schs 				}
120416f0ca36Schs 			}
120516f0ca36Schs 		}
120616f0ca36Schs 	} else {
120716f0ca36Schs 		int i;
120816f0ca36Schs 
120916f0ca36Schs 		for (i = 0; i < aobj->u_pages; i++) {
121016f0ca36Schs 			int slot = aobj->u_swslots[i];
121116f0ca36Schs 
121216f0ca36Schs 			/*
121316f0ca36Schs 			 * if the slot isn't in range, skip it
121416f0ca36Schs 			 */
121564c6d1d2Schs 
121616f0ca36Schs 			if (slot < startslot || slot >= endslot) {
121716f0ca36Schs 				continue;
121816f0ca36Schs 			}
121916f0ca36Schs 
122016f0ca36Schs 			/*
122116f0ca36Schs 			 * process the page.
122216f0ca36Schs 			 */
122364c6d1d2Schs 
122416f0ca36Schs 			rv = uao_pagein_page(aobj, i);
122516f0ca36Schs 			if (rv) {
122616f0ca36Schs 				return rv;
122716f0ca36Schs 			}
122816f0ca36Schs 		}
122916f0ca36Schs 	}
123016f0ca36Schs 
1231b3667adaSthorpej 	return false;
123216f0ca36Schs }
123316f0ca36Schs 
123416f0ca36Schs /*
12358c260680Srmind  * uao_pagein_page: page in a single page from an anonymous UVM object.
123616f0ca36Schs  *
12378c260680Srmind  * => Returns true if pagein was aborted due to lack of memory.
12388c260680Srmind  * => Object must be locked and is returned locked.
123916f0ca36Schs  */
124064c6d1d2Schs 
1241712239e3Sthorpej static bool
uao_pagein_page(struct uvm_aobj * aobj,int pageidx)1242e569faccSthorpej uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
124316f0ca36Schs {
12448c260680Srmind 	struct uvm_object *uobj = &aobj->u_obj;
124516f0ca36Schs 	struct vm_page *pg;
12463bef9418Spk 	int rv, npages;
124716f0ca36Schs 
124816f0ca36Schs 	pg = NULL;
124916f0ca36Schs 	npages = 1;
12508c260680Srmind 
1251d2a0ebb6Sad 	KASSERT(rw_write_held(uobj->vmobjlock));
12528ccde42cSmsaitoh 	rv = uao_get(uobj, (voff_t)pageidx << PAGE_SHIFT, &pg, &npages,
12538c260680Srmind 	    0, VM_PROT_READ | VM_PROT_WRITE, 0, PGO_SYNCIO);
125416f0ca36Schs 
125516f0ca36Schs 	/*
125616f0ca36Schs 	 * relock and finish up.
125716f0ca36Schs 	 */
125816f0ca36Schs 
1259d2a0ebb6Sad 	rw_enter(uobj->vmobjlock, RW_WRITER);
126016f0ca36Schs 	switch (rv) {
1261dd82ad8eSchs 	case 0:
126216f0ca36Schs 		break;
126316f0ca36Schs 
1264dd82ad8eSchs 	case EIO:
1265dd82ad8eSchs 	case ERESTART:
126664c6d1d2Schs 
126716f0ca36Schs 		/*
126816f0ca36Schs 		 * nothing more to do on errors.
1269dd82ad8eSchs 		 * ERESTART can only mean that the anon was freed,
127016f0ca36Schs 		 * so again there's nothing to do.
127116f0ca36Schs 		 */
127216f0ca36Schs 
1273b3667adaSthorpej 		return false;
1274d022b5caSpk 
1275d022b5caSpk 	default:
1276b3667adaSthorpej 		return true;
127716f0ca36Schs 	}
127816f0ca36Schs 
127916f0ca36Schs 	/*
128016f0ca36Schs 	 * ok, we've got the page now.
128116f0ca36Schs 	 * mark it as dirty, clear its swslot and un-busy it.
128216f0ca36Schs 	 */
12833bef9418Spk 	uao_dropswap(&aobj->u_obj, pageidx);
128416f0ca36Schs 
128516f0ca36Schs 	/*
12869d3e3eabSyamt 	 * make sure it's on a page queue.
128716f0ca36Schs 	 */
128894843b13Sad 	uvm_pagelock(pg);
12899d3e3eabSyamt 	uvm_pageenqueue(pg);
12901912643fSad 	uvm_pagewakeup(pg);
129194843b13Sad 	uvm_pageunlock(pg);
1292f8b71599Syamt 
12931912643fSad 	pg->flags &= ~(PG_BUSY|PG_FAKE);
129405a3457eSad 	uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
12951912643fSad 	UVM_PAGE_OWN(pg, NULL);
1296f8b71599Syamt 
1297b3667adaSthorpej 	return false;
129816f0ca36Schs }
12996fbf5bf6Syamt 
13009df1f09bSyamt /*
13019df1f09bSyamt  * uao_dropswap_range: drop swapslots in the range.
13029df1f09bSyamt  *
13039df1f09bSyamt  * => aobj must be locked and is returned locked.
13049df1f09bSyamt  * => start is inclusive.  end is exclusive.
13059df1f09bSyamt  */
13069df1f09bSyamt 
13079df1f09bSyamt void
uao_dropswap_range(struct uvm_object * uobj,voff_t start,voff_t end)13089df1f09bSyamt uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
13099df1f09bSyamt {
13109df1f09bSyamt 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
13118c260680Srmind 	int swpgonlydelta = 0;
13129df1f09bSyamt 
1313ff872804Sad 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
1314d2a0ebb6Sad 	KASSERT(rw_write_held(uobj->vmobjlock));
13159df1f09bSyamt 
13169df1f09bSyamt 	if (end == 0) {
13179df1f09bSyamt 		end = INT64_MAX;
13189df1f09bSyamt 	}
13199df1f09bSyamt 
13209df1f09bSyamt 	if (UAO_USES_SWHASH(aobj)) {
13219df1f09bSyamt 		int i, hashbuckets = aobj->u_swhashmask + 1;
13229df1f09bSyamt 		voff_t taghi;
13239df1f09bSyamt 		voff_t taglo;
13249df1f09bSyamt 
13259df1f09bSyamt 		taglo = UAO_SWHASH_ELT_TAG(start);
13269df1f09bSyamt 		taghi = UAO_SWHASH_ELT_TAG(end);
13279df1f09bSyamt 
13289df1f09bSyamt 		for (i = 0; i < hashbuckets; i++) {
13299df1f09bSyamt 			struct uao_swhash_elt *elt, *next;
13309df1f09bSyamt 
13319df1f09bSyamt 			for (elt = LIST_FIRST(&aobj->u_swhash[i]);
13329df1f09bSyamt 			     elt != NULL;
13339df1f09bSyamt 			     elt = next) {
13349df1f09bSyamt 				int startidx, endidx;
13359df1f09bSyamt 				int j;
13369df1f09bSyamt 
13379df1f09bSyamt 				next = LIST_NEXT(elt, list);
13389df1f09bSyamt 
13399df1f09bSyamt 				if (elt->tag < taglo || taghi < elt->tag) {
13409df1f09bSyamt 					continue;
13419df1f09bSyamt 				}
13429df1f09bSyamt 
13439df1f09bSyamt 				if (elt->tag == taglo) {
13449df1f09bSyamt 					startidx =
13459df1f09bSyamt 					    UAO_SWHASH_ELT_PAGESLOT_IDX(start);
13469df1f09bSyamt 				} else {
13479df1f09bSyamt 					startidx = 0;
13489df1f09bSyamt 				}
13499df1f09bSyamt 
13509df1f09bSyamt 				if (elt->tag == taghi) {
13519df1f09bSyamt 					endidx =
13529df1f09bSyamt 					    UAO_SWHASH_ELT_PAGESLOT_IDX(end);
13539df1f09bSyamt 				} else {
13549df1f09bSyamt 					endidx = UAO_SWHASH_CLUSTER_SIZE;
13559df1f09bSyamt 				}
13569df1f09bSyamt 
13579df1f09bSyamt 				for (j = startidx; j < endidx; j++) {
13589df1f09bSyamt 					int slot = elt->slots[j];
13599df1f09bSyamt 
13609df1f09bSyamt 					KASSERT(uvm_pagelookup(&aobj->u_obj,
13619df1f09bSyamt 					    (UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
13629df1f09bSyamt 					    + j) << PAGE_SHIFT) == NULL);
13639df1f09bSyamt 					if (slot > 0) {
13649df1f09bSyamt 						uvm_swap_free(slot, 1);
13659df1f09bSyamt 						swpgonlydelta++;
13669df1f09bSyamt 						KASSERT(elt->count > 0);
13679df1f09bSyamt 						elt->slots[j] = 0;
13689df1f09bSyamt 						elt->count--;
13699df1f09bSyamt 					}
13709df1f09bSyamt 				}
13719df1f09bSyamt 
13729df1f09bSyamt 				if (elt->count == 0) {
13739df1f09bSyamt 					LIST_REMOVE(elt, list);
13749df1f09bSyamt 					pool_put(&uao_swhash_elt_pool, elt);
13759df1f09bSyamt 				}
13769df1f09bSyamt 			}
13779df1f09bSyamt 		}
13789df1f09bSyamt 	} else {
13799df1f09bSyamt 		int i;
13809df1f09bSyamt 
13819df1f09bSyamt 		if (aobj->u_pages < end) {
13829df1f09bSyamt 			end = aobj->u_pages;
13839df1f09bSyamt 		}
13849df1f09bSyamt 		for (i = start; i < end; i++) {
13859df1f09bSyamt 			int slot = aobj->u_swslots[i];
13869df1f09bSyamt 
13879df1f09bSyamt 			if (slot > 0) {
13889df1f09bSyamt 				uvm_swap_free(slot, 1);
13899df1f09bSyamt 				swpgonlydelta++;
13909df1f09bSyamt 			}
13919df1f09bSyamt 		}
13929df1f09bSyamt 	}
13939df1f09bSyamt 
13949df1f09bSyamt 	/*
13959df1f09bSyamt 	 * adjust the counter of pages only in swap for all
13969df1f09bSyamt 	 * the swap slots we've freed.
13979df1f09bSyamt 	 */
13989df1f09bSyamt 
13999df1f09bSyamt 	if (swpgonlydelta > 0) {
14009df1f09bSyamt 		KASSERT(uvmexp.swpgonly >= swpgonlydelta);
1401221d5f98Sad 		atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta);
14029df1f09bSyamt 	}
14039df1f09bSyamt }
14049df1f09bSyamt 
14056fbf5bf6Syamt #endif /* defined(VMSWAP) */
1406