xref: /netbsd-src/sys/uvm/uvm_aobj.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*	$NetBSD: uvm_aobj.c,v 1.122 2014/05/25 18:55:11 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
5  *                    Washington University.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
29  */
30 
31 /*
32  * uvm_aobj.c: anonymous memory uvm_object pager
33  *
34  * author: Chuck Silvers <chuq@chuq.com>
35  * started: Jan-1998
36  *
37  * - design mostly from Chuck Cranor
38  */
39 
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.122 2014/05/25 18:55:11 riastradh Exp $");
42 
43 #include "opt_uvmhist.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/kernel.h>
48 #include <sys/kmem.h>
49 #include <sys/pool.h>
50 #include <sys/atomic.h>
51 
52 #include <uvm/uvm.h>
53 
54 /*
55  * An anonymous UVM object (aobj) manages anonymous-memory.  In addition to
56  * keeping the list of resident pages, it may also keep a list of allocated
57  * swap blocks.  Depending on the size of the object, this list is either
58  * stored in an array (small objects) or in a hash table (large objects).
59  *
60  * Lock order
61  *
62  *	uao_list_lock ->
63  *		uvm_object::vmobjlock
64  */
65 
66 /*
67  * Note: for hash tables, we break the address space of the aobj into blocks
68  * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two.
69  */
70 
71 #define	UAO_SWHASH_CLUSTER_SHIFT	4
72 #define	UAO_SWHASH_CLUSTER_SIZE		(1 << UAO_SWHASH_CLUSTER_SHIFT)
73 
74 /* Get the "tag" for this page index. */
75 #define	UAO_SWHASH_ELT_TAG(idx)		((idx) >> UAO_SWHASH_CLUSTER_SHIFT)
76 #define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \
77     ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1))
78 
79 /* Given an ELT and a page index, find the swap slot. */
80 #define	UAO_SWHASH_ELT_PAGESLOT(elt, idx) \
81     ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)])
82 
83 /* Given an ELT, return its pageidx base. */
84 #define	UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
85     ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT)
86 
87 /* The hash function. */
88 #define	UAO_SWHASH_HASH(aobj, idx) \
89     (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \
90     & (aobj)->u_swhashmask)])
91 
92 /*
93  * The threshold which determines whether we will use an array or a
94  * hash table to store the list of allocated swap blocks.
95  */
96 #define	UAO_SWHASH_THRESHOLD		(UAO_SWHASH_CLUSTER_SIZE * 4)
97 #define	UAO_USES_SWHASH(aobj) \
98     ((aobj)->u_pages > UAO_SWHASH_THRESHOLD)
99 
100 /* The number of buckets in a hash, with an upper bound. */
101 #define	UAO_SWHASH_MAXBUCKETS		256
102 #define	UAO_SWHASH_BUCKETS(aobj) \
103     (MIN((aobj)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS))
104 
105 /*
106  * uao_swhash_elt: when a hash table is being used, this structure defines
107  * the format of an entry in the bucket list.
108  */
109 
110 struct uao_swhash_elt {
111 	LIST_ENTRY(uao_swhash_elt) list;	/* the hash list */
112 	voff_t tag;				/* our 'tag' */
113 	int count;				/* our number of active slots */
114 	int slots[UAO_SWHASH_CLUSTER_SIZE];	/* the slots */
115 };
116 
117 /*
118  * uao_swhash: the swap hash table structure
119  */
120 
121 LIST_HEAD(uao_swhash, uao_swhash_elt);
122 
123 /*
124  * uao_swhash_elt_pool: pool of uao_swhash_elt structures.
125  * Note: pages for this pool must not come from a pageable kernel map.
126  */
127 static struct pool	uao_swhash_elt_pool	__cacheline_aligned;
128 
129 /*
130  * uvm_aobj: the actual anon-backed uvm_object
131  *
132  * => the uvm_object is at the top of the structure, this allows
133  *   (struct uvm_aobj *) == (struct uvm_object *)
134  * => only one of u_swslots and u_swhash is used in any given aobj
135  */
136 
137 struct uvm_aobj {
138 	struct uvm_object u_obj; /* has: lock, pgops, memq, #pages, #refs */
139 	pgoff_t u_pages;	 /* number of pages in entire object */
140 	int u_flags;		 /* the flags (see uvm_aobj.h) */
141 	int *u_swslots;		 /* array of offset->swapslot mappings */
142 				 /*
143 				  * hashtable of offset->swapslot mappings
144 				  * (u_swhash is an array of bucket heads)
145 				  */
146 	struct uao_swhash *u_swhash;
147 	u_long u_swhashmask;		/* mask for hashtable */
148 	LIST_ENTRY(uvm_aobj) u_list;	/* global list of aobjs */
149 	int u_freelist;		  /* freelist to allocate pages from */
150 };
151 
152 static void	uao_free(struct uvm_aobj *);
153 static int	uao_get(struct uvm_object *, voff_t, struct vm_page **,
154 		    int *, int, vm_prot_t, int, int);
155 static int	uao_put(struct uvm_object *, voff_t, voff_t, int);
156 
157 #if defined(VMSWAP)
158 static struct uao_swhash_elt *uao_find_swhash_elt
159     (struct uvm_aobj *, int, bool);
160 
161 static bool uao_pagein(struct uvm_aobj *, int, int);
162 static bool uao_pagein_page(struct uvm_aobj *, int);
163 #endif /* defined(VMSWAP) */
164 
165 static struct vm_page	*uao_pagealloc(struct uvm_object *, voff_t, int);
166 
167 /*
168  * aobj_pager
169  *
170  * note that some functions (e.g. put) are handled elsewhere
171  */
172 
173 const struct uvm_pagerops aobj_pager = {
174 	.pgo_reference = uao_reference,
175 	.pgo_detach = uao_detach,
176 	.pgo_get = uao_get,
177 	.pgo_put = uao_put,
178 };
179 
180 /*
181  * uao_list: global list of active aobjs, locked by uao_list_lock
182  */
183 
184 static LIST_HEAD(aobjlist, uvm_aobj) uao_list	__cacheline_aligned;
185 static kmutex_t		uao_list_lock		__cacheline_aligned;
186 
187 /*
188  * hash table/array related functions
189  */
190 
191 #if defined(VMSWAP)
192 
193 /*
194  * uao_find_swhash_elt: find (or create) a hash table entry for a page
195  * offset.
196  *
197  * => the object should be locked by the caller
198  */
199 
200 static struct uao_swhash_elt *
201 uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, bool create)
202 {
203 	struct uao_swhash *swhash;
204 	struct uao_swhash_elt *elt;
205 	voff_t page_tag;
206 
207 	swhash = UAO_SWHASH_HASH(aobj, pageidx);
208 	page_tag = UAO_SWHASH_ELT_TAG(pageidx);
209 
210 	/*
211 	 * now search the bucket for the requested tag
212 	 */
213 
214 	LIST_FOREACH(elt, swhash, list) {
215 		if (elt->tag == page_tag) {
216 			return elt;
217 		}
218 	}
219 	if (!create) {
220 		return NULL;
221 	}
222 
223 	/*
224 	 * allocate a new entry for the bucket and init/insert it in
225 	 */
226 
227 	elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT);
228 	if (elt == NULL) {
229 		return NULL;
230 	}
231 	LIST_INSERT_HEAD(swhash, elt, list);
232 	elt->tag = page_tag;
233 	elt->count = 0;
234 	memset(elt->slots, 0, sizeof(elt->slots));
235 	return elt;
236 }
237 
238 /*
239  * uao_find_swslot: find the swap slot number for an aobj/pageidx
240  *
241  * => object must be locked by caller
242  */
243 
244 int
245 uao_find_swslot(struct uvm_object *uobj, int pageidx)
246 {
247 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
248 	struct uao_swhash_elt *elt;
249 
250 	/*
251 	 * if noswap flag is set, then we never return a slot
252 	 */
253 
254 	if (aobj->u_flags & UAO_FLAG_NOSWAP)
255 		return 0;
256 
257 	/*
258 	 * if hashing, look in hash table.
259 	 */
260 
261 	if (UAO_USES_SWHASH(aobj)) {
262 		elt = uao_find_swhash_elt(aobj, pageidx, false);
263 		return elt ? UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) : 0;
264 	}
265 
266 	/*
267 	 * otherwise, look in the array
268 	 */
269 
270 	return aobj->u_swslots[pageidx];
271 }
272 
273 /*
274  * uao_set_swslot: set the swap slot for a page in an aobj.
275  *
276  * => setting a slot to zero frees the slot
277  * => object must be locked by caller
278  * => we return the old slot number, or -1 if we failed to allocate
279  *    memory to record the new slot number
280  */
281 
282 int
283 uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
284 {
285 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
286 	struct uao_swhash_elt *elt;
287 	int oldslot;
288 	UVMHIST_FUNC("uao_set_swslot"); UVMHIST_CALLED(pdhist);
289 	UVMHIST_LOG(pdhist, "aobj %p pageidx %d slot %d",
290 	    aobj, pageidx, slot, 0);
291 
292 	KASSERT(mutex_owned(uobj->vmobjlock) || uobj->uo_refs == 0);
293 
294 	/*
295 	 * if noswap flag is set, then we can't set a non-zero slot.
296 	 */
297 
298 	if (aobj->u_flags & UAO_FLAG_NOSWAP) {
299 		KASSERTMSG(slot == 0, "uao_set_swslot: no swap object");
300 		return 0;
301 	}
302 
303 	/*
304 	 * are we using a hash table?  if so, add it in the hash.
305 	 */
306 
307 	if (UAO_USES_SWHASH(aobj)) {
308 
309 		/*
310 		 * Avoid allocating an entry just to free it again if
311 		 * the page had not swap slot in the first place, and
312 		 * we are freeing.
313 		 */
314 
315 		elt = uao_find_swhash_elt(aobj, pageidx, slot != 0);
316 		if (elt == NULL) {
317 			return slot ? -1 : 0;
318 		}
319 
320 		oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
321 		UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
322 
323 		/*
324 		 * now adjust the elt's reference counter and free it if we've
325 		 * dropped it to zero.
326 		 */
327 
328 		if (slot) {
329 			if (oldslot == 0)
330 				elt->count++;
331 		} else {
332 			if (oldslot)
333 				elt->count--;
334 
335 			if (elt->count == 0) {
336 				LIST_REMOVE(elt, list);
337 				pool_put(&uao_swhash_elt_pool, elt);
338 			}
339 		}
340 	} else {
341 		/* we are using an array */
342 		oldslot = aobj->u_swslots[pageidx];
343 		aobj->u_swslots[pageidx] = slot;
344 	}
345 	return oldslot;
346 }
347 
348 #endif /* defined(VMSWAP) */
349 
350 /*
351  * end of hash/array functions
352  */
353 
354 /*
355  * uao_free: free all resources held by an aobj, and then free the aobj
356  *
357  * => the aobj should be dead
358  */
359 
360 static void
361 uao_free(struct uvm_aobj *aobj)
362 {
363 	struct uvm_object *uobj = &aobj->u_obj;
364 
365 	KASSERT(mutex_owned(uobj->vmobjlock));
366 	uao_dropswap_range(uobj, 0, 0);
367 	mutex_exit(uobj->vmobjlock);
368 
369 #if defined(VMSWAP)
370 	if (UAO_USES_SWHASH(aobj)) {
371 
372 		/*
373 		 * free the hash table itself.
374 		 */
375 
376 		hashdone(aobj->u_swhash, HASH_LIST, aobj->u_swhashmask);
377 	} else {
378 
379 		/*
380 		 * free the array itsself.
381 		 */
382 
383 		kmem_free(aobj->u_swslots, aobj->u_pages * sizeof(int));
384 	}
385 #endif /* defined(VMSWAP) */
386 
387 	/*
388 	 * finally free the aobj itself
389 	 */
390 
391 	uvm_obj_destroy(uobj, true);
392 	kmem_free(aobj, sizeof(struct uvm_aobj));
393 }
394 
395 /*
396  * pager functions
397  */
398 
399 /*
400  * uao_create: create an aobj of the given size and return its uvm_object.
401  *
402  * => for normal use, flags are always zero
403  * => for the kernel object, the flags are:
404  *	UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
405  *	UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
406  */
407 
408 struct uvm_object *
409 uao_create(vsize_t size, int flags)
410 {
411 	static struct uvm_aobj kernel_object_store;
412 	static kmutex_t kernel_object_lock;
413 	static int kobj_alloced __diagused = 0;
414 	pgoff_t pages = round_page(size) >> PAGE_SHIFT;
415 	struct uvm_aobj *aobj;
416 	int refs;
417 
418 	/*
419 	 * Allocate a new aobj, unless kernel object is requested.
420 	 */
421 
422 	if (flags & UAO_FLAG_KERNOBJ) {
423 		KASSERT(!kobj_alloced);
424 		aobj = &kernel_object_store;
425 		aobj->u_pages = pages;
426 		aobj->u_flags = UAO_FLAG_NOSWAP;
427 		refs = UVM_OBJ_KERN;
428 		kobj_alloced = UAO_FLAG_KERNOBJ;
429 	} else if (flags & UAO_FLAG_KERNSWAP) {
430 		KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
431 		aobj = &kernel_object_store;
432 		kobj_alloced = UAO_FLAG_KERNSWAP;
433 		refs = 0xdeadbeaf; /* XXX: gcc */
434 	} else {
435 		aobj = kmem_alloc(sizeof(struct uvm_aobj), KM_SLEEP);
436 		aobj->u_pages = pages;
437 		aobj->u_flags = 0;
438 		refs = 1;
439 	}
440 
441 	/*
442 	 * no freelist by default
443 	 */
444 
445 	aobj->u_freelist = VM_NFREELIST;
446 
447 	/*
448  	 * allocate hash/array if necessary
449  	 *
450  	 * note: in the KERNSWAP case no need to worry about locking since
451  	 * we are still booting we should be the only thread around.
452  	 */
453 
454 	if (flags == 0 || (flags & UAO_FLAG_KERNSWAP) != 0) {
455 #if defined(VMSWAP)
456 		const int kernswap = (flags & UAO_FLAG_KERNSWAP) != 0;
457 
458 		/* allocate hash table or array depending on object size */
459 		if (UAO_USES_SWHASH(aobj)) {
460 			aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj),
461 			    HASH_LIST, kernswap ? false : true,
462 			    &aobj->u_swhashmask);
463 			if (aobj->u_swhash == NULL)
464 				panic("uao_create: hashinit swhash failed");
465 		} else {
466 			aobj->u_swslots = kmem_zalloc(pages * sizeof(int),
467 			    kernswap ? KM_NOSLEEP : KM_SLEEP);
468 			if (aobj->u_swslots == NULL)
469 				panic("uao_create: swslots allocation failed");
470 		}
471 #endif /* defined(VMSWAP) */
472 
473 		if (flags) {
474 			aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
475 			return &aobj->u_obj;
476 		}
477 	}
478 
479 	/*
480 	 * Initialise UVM object.
481 	 */
482 
483 	const bool kernobj = (flags & UAO_FLAG_KERNOBJ) != 0;
484 	uvm_obj_init(&aobj->u_obj, &aobj_pager, !kernobj, refs);
485 	if (__predict_false(kernobj)) {
486 		/* Initialisation only once, for UAO_FLAG_KERNOBJ. */
487 		mutex_init(&kernel_object_lock, MUTEX_DEFAULT, IPL_NONE);
488 		uvm_obj_setlock(&aobj->u_obj, &kernel_object_lock);
489 	}
490 
491 	/*
492  	 * now that aobj is ready, add it to the global list
493  	 */
494 
495 	mutex_enter(&uao_list_lock);
496 	LIST_INSERT_HEAD(&uao_list, aobj, u_list);
497 	mutex_exit(&uao_list_lock);
498 	return(&aobj->u_obj);
499 }
500 
501 /*
502  * uao_set_pgfl: allocate pages only from the specified freelist.
503  *
504  * => must be called before any pages are allocated for the object.
505  * => reset by setting it to VM_NFREELIST, meaning any freelist.
506  */
507 
508 void
509 uao_set_pgfl(struct uvm_object *uobj, int freelist)
510 {
511 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
512 
513 	KASSERTMSG((0 <= freelist), "invalid freelist %d", freelist);
514 	KASSERTMSG((freelist <= VM_NFREELIST), "invalid freelist %d",
515 	    freelist);
516 
517 	aobj->u_freelist = freelist;
518 }
519 
520 /*
521  * uao_pagealloc: allocate a page for aobj.
522  */
523 
524 static inline struct vm_page *
525 uao_pagealloc(struct uvm_object *uobj, voff_t offset, int flags)
526 {
527 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
528 
529 	if (__predict_true(aobj->u_freelist == VM_NFREELIST))
530 		return uvm_pagealloc(uobj, offset, NULL, flags);
531 	else
532 		return uvm_pagealloc_strat(uobj, offset, NULL, flags,
533 		    UVM_PGA_STRAT_ONLY, aobj->u_freelist);
534 }
535 
536 /*
537  * uao_init: set up aobj pager subsystem
538  *
539  * => called at boot time from uvm_pager_init()
540  */
541 
542 void
543 uao_init(void)
544 {
545 	static int uao_initialized;
546 
547 	if (uao_initialized)
548 		return;
549 	uao_initialized = true;
550 	LIST_INIT(&uao_list);
551 	mutex_init(&uao_list_lock, MUTEX_DEFAULT, IPL_NONE);
552 	pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt),
553 	    0, 0, 0, "uaoeltpl", NULL, IPL_VM);
554 }
555 
556 /*
557  * uao_reference: hold a reference to an anonymous UVM object.
558  */
559 void
560 uao_reference(struct uvm_object *uobj)
561 {
562 	/* Kernel object is persistent. */
563 	if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
564 		return;
565 	}
566 	atomic_inc_uint(&uobj->uo_refs);
567 }
568 
569 /*
570  * uao_detach: drop a reference to an anonymous UVM object.
571  */
572 void
573 uao_detach(struct uvm_object *uobj)
574 {
575 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
576 	struct vm_page *pg;
577 
578 	UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist);
579 
580 	/*
581 	 * Detaching from kernel object is a NOP.
582 	 */
583 
584 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
585 		return;
586 
587 	/*
588 	 * Drop the reference.  If it was the last one, destroy the object.
589 	 */
590 
591 	UVMHIST_LOG(maphist,"  (uobj=0x%x)  ref=%d", uobj,uobj->uo_refs,0,0);
592 	if (atomic_dec_uint_nv(&uobj->uo_refs) > 0) {
593 		UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
594 		return;
595 	}
596 
597 	/*
598 	 * Remove the aobj from the global list.
599 	 */
600 
601 	mutex_enter(&uao_list_lock);
602 	LIST_REMOVE(aobj, u_list);
603 	mutex_exit(&uao_list_lock);
604 
605 	/*
606 	 * Free all the pages left in the aobj.  For each page, when the
607 	 * page is no longer busy (and thus after any disk I/O that it is
608 	 * involved in is complete), release any swap resources and free
609 	 * the page itself.
610 	 */
611 
612 	mutex_enter(uobj->vmobjlock);
613 	mutex_enter(&uvm_pageqlock);
614 	while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL) {
615 		pmap_page_protect(pg, VM_PROT_NONE);
616 		if (pg->flags & PG_BUSY) {
617 			pg->flags |= PG_WANTED;
618 			mutex_exit(&uvm_pageqlock);
619 			UVM_UNLOCK_AND_WAIT(pg, uobj->vmobjlock, false,
620 			    "uao_det", 0);
621 			mutex_enter(uobj->vmobjlock);
622 			mutex_enter(&uvm_pageqlock);
623 			continue;
624 		}
625 		uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
626 		uvm_pagefree(pg);
627 	}
628 	mutex_exit(&uvm_pageqlock);
629 
630 	/*
631 	 * Finally, free the anonymous UVM object itself.
632 	 */
633 
634 	uao_free(aobj);
635 }
636 
637 /*
638  * uao_put: flush pages out of a uvm object
639  *
640  * => object should be locked by caller.  we may _unlock_ the object
641  *	if (and only if) we need to clean a page (PGO_CLEANIT).
642  *	XXXJRT Currently, however, we don't.  In the case of cleaning
643  *	XXXJRT a page, we simply just deactivate it.  Should probably
644  *	XXXJRT handle this better, in the future (although "flushing"
645  *	XXXJRT anonymous memory isn't terribly important).
646  * => if PGO_CLEANIT is not set, then we will neither unlock the object
647  *	or block.
648  * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
649  *	for flushing.
650  * => NOTE: we rely on the fact that the object's memq is a TAILQ and
651  *	that new pages are inserted on the tail end of the list.  thus,
652  *	we can make a complete pass through the object in one go by starting
653  *	at the head and working towards the tail (new pages are put in
654  *	front of us).
655  * => NOTE: we are allowed to lock the page queues, so the caller
656  *	must not be holding the lock on them [e.g. pagedaemon had
657  *	better not call us with the queues locked]
658  * => we return 0 unless we encountered some sort of I/O error
659  *	XXXJRT currently never happens, as we never directly initiate
660  *	XXXJRT I/O
661  *
662  * note on page traversal:
663  *	we can traverse the pages in an object either by going down the
664  *	linked list in "uobj->memq", or we can go over the address range
665  *	by page doing hash table lookups for each address.  depending
666  *	on how many pages are in the object it may be cheaper to do one
667  *	or the other.  we set "by_list" to true if we are using memq.
668  *	if the cost of a hash lookup was equal to the cost of the list
669  *	traversal we could compare the number of pages in the start->stop
670  *	range to the total number of pages in the object.  however, it
671  *	seems that a hash table lookup is more expensive than the linked
672  *	list traversal, so we multiply the number of pages in the
673  *	start->stop range by a penalty which we define below.
674  */
675 
676 static int
677 uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
678 {
679 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
680 	struct vm_page *pg, *nextpg, curmp, endmp;
681 	bool by_list;
682 	voff_t curoff;
683 	UVMHIST_FUNC("uao_put"); UVMHIST_CALLED(maphist);
684 
685 	KASSERT(mutex_owned(uobj->vmobjlock));
686 
687 	curoff = 0;
688 	if (flags & PGO_ALLPAGES) {
689 		start = 0;
690 		stop = aobj->u_pages << PAGE_SHIFT;
691 		by_list = true;		/* always go by the list */
692 	} else {
693 		start = trunc_page(start);
694 		if (stop == 0) {
695 			stop = aobj->u_pages << PAGE_SHIFT;
696 		} else {
697 			stop = round_page(stop);
698 		}
699 		if (stop > (aobj->u_pages << PAGE_SHIFT)) {
700 			printf("uao_flush: strange, got an out of range "
701 			    "flush (fixed)\n");
702 			stop = aobj->u_pages << PAGE_SHIFT;
703 		}
704 		by_list = (uobj->uo_npages <=
705 		    ((stop - start) >> PAGE_SHIFT) * UVM_PAGE_TREE_PENALTY);
706 	}
707 	UVMHIST_LOG(maphist,
708 	    " flush start=0x%lx, stop=0x%x, by_list=%d, flags=0x%x",
709 	    start, stop, by_list, flags);
710 
711 	/*
712 	 * Don't need to do any work here if we're not freeing
713 	 * or deactivating pages.
714 	 */
715 
716 	if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
717 		mutex_exit(uobj->vmobjlock);
718 		return 0;
719 	}
720 
721 	/*
722 	 * Initialize the marker pages.  See the comment in
723 	 * genfs_putpages() also.
724 	 */
725 
726 	curmp.flags = PG_MARKER;
727 	endmp.flags = PG_MARKER;
728 
729 	/*
730 	 * now do it.  note: we must update nextpg in the body of loop or we
731 	 * will get stuck.  we need to use nextpg if we'll traverse the list
732 	 * because we may free "pg" before doing the next loop.
733 	 */
734 
735 	if (by_list) {
736 		TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq.queue);
737 		nextpg = TAILQ_FIRST(&uobj->memq);
738 	} else {
739 		curoff = start;
740 		nextpg = NULL;	/* Quell compiler warning */
741 	}
742 
743 	/* locked: uobj */
744 	for (;;) {
745 		if (by_list) {
746 			pg = nextpg;
747 			if (pg == &endmp)
748 				break;
749 			nextpg = TAILQ_NEXT(pg, listq.queue);
750 			if (pg->flags & PG_MARKER)
751 				continue;
752 			if (pg->offset < start || pg->offset >= stop)
753 				continue;
754 		} else {
755 			if (curoff < stop) {
756 				pg = uvm_pagelookup(uobj, curoff);
757 				curoff += PAGE_SIZE;
758 			} else
759 				break;
760 			if (pg == NULL)
761 				continue;
762 		}
763 
764 		/*
765 		 * wait and try again if the page is busy.
766 		 */
767 
768 		if (pg->flags & PG_BUSY) {
769 			if (by_list) {
770 				TAILQ_INSERT_BEFORE(pg, &curmp, listq.queue);
771 			}
772 			pg->flags |= PG_WANTED;
773 			UVM_UNLOCK_AND_WAIT(pg, uobj->vmobjlock, 0,
774 			    "uao_put", 0);
775 			mutex_enter(uobj->vmobjlock);
776 			if (by_list) {
777 				nextpg = TAILQ_NEXT(&curmp, listq.queue);
778 				TAILQ_REMOVE(&uobj->memq, &curmp,
779 				    listq.queue);
780 			} else
781 				curoff -= PAGE_SIZE;
782 			continue;
783 		}
784 
785 		switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
786 
787 		/*
788 		 * XXX In these first 3 cases, we always just
789 		 * XXX deactivate the page.  We may want to
790 		 * XXX handle the different cases more specifically
791 		 * XXX in the future.
792 		 */
793 
794 		case PGO_CLEANIT|PGO_FREE:
795 		case PGO_CLEANIT|PGO_DEACTIVATE:
796 		case PGO_DEACTIVATE:
797  deactivate_it:
798 			mutex_enter(&uvm_pageqlock);
799 			/* skip the page if it's wired */
800 			if (pg->wire_count == 0) {
801 				uvm_pagedeactivate(pg);
802 			}
803 			mutex_exit(&uvm_pageqlock);
804 			break;
805 
806 		case PGO_FREE:
807 			/*
808 			 * If there are multiple references to
809 			 * the object, just deactivate the page.
810 			 */
811 
812 			if (uobj->uo_refs > 1)
813 				goto deactivate_it;
814 
815 			/*
816 			 * free the swap slot and the page.
817 			 */
818 
819 			pmap_page_protect(pg, VM_PROT_NONE);
820 
821 			/*
822 			 * freeing swapslot here is not strictly necessary.
823 			 * however, leaving it here doesn't save much
824 			 * because we need to update swap accounting anyway.
825 			 */
826 
827 			uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
828 			mutex_enter(&uvm_pageqlock);
829 			uvm_pagefree(pg);
830 			mutex_exit(&uvm_pageqlock);
831 			break;
832 
833 		default:
834 			panic("%s: impossible", __func__);
835 		}
836 	}
837 	if (by_list) {
838 		TAILQ_REMOVE(&uobj->memq, &endmp, listq.queue);
839 	}
840 	mutex_exit(uobj->vmobjlock);
841 	return 0;
842 }
843 
844 /*
845  * uao_get: fetch me a page
846  *
847  * we have three cases:
848  * 1: page is resident     -> just return the page.
849  * 2: page is zero-fill    -> allocate a new page and zero it.
850  * 3: page is swapped out  -> fetch the page from swap.
851  *
852  * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
853  * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
854  * then we will need to return EBUSY.
855  *
856  * => prefer map unlocked (not required)
857  * => object must be locked!  we will _unlock_ it before starting any I/O.
858  * => flags: PGO_ALLPAGES: get all of the pages
859  *           PGO_LOCKED: fault data structures are locked
860  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
861  * => NOTE: caller must check for released pages!!
862  */
863 
864 static int
865 uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
866     int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
867 {
868 	voff_t current_offset;
869 	struct vm_page *ptmp = NULL;	/* Quell compiler warning */
870 	int lcv, gotpages, maxpages, swslot, pageidx;
871 	bool done;
872 	UVMHIST_FUNC("uao_get"); UVMHIST_CALLED(pdhist);
873 
874 	UVMHIST_LOG(pdhist, "aobj=%p offset=%d, flags=%d",
875 		    (struct uvm_aobj *)uobj, offset, flags,0);
876 
877 	/*
878  	 * get number of pages
879  	 */
880 
881 	maxpages = *npagesp;
882 
883 	/*
884  	 * step 1: handled the case where fault data structures are locked.
885  	 */
886 
887 	if (flags & PGO_LOCKED) {
888 
889 		/*
890  		 * step 1a: get pages that are already resident.   only do
891 		 * this if the data structures are locked (i.e. the first
892 		 * time through).
893  		 */
894 
895 		done = true;	/* be optimistic */
896 		gotpages = 0;	/* # of pages we got so far */
897 		for (lcv = 0, current_offset = offset ; lcv < maxpages ;
898 		    lcv++, current_offset += PAGE_SIZE) {
899 			/* do we care about this page?  if not, skip it */
900 			if (pps[lcv] == PGO_DONTCARE)
901 				continue;
902 			ptmp = uvm_pagelookup(uobj, current_offset);
903 
904 			/*
905  			 * if page is new, attempt to allocate the page,
906 			 * zero-fill'd.
907  			 */
908 
909 			if (ptmp == NULL && uao_find_swslot(uobj,
910 			    current_offset >> PAGE_SHIFT) == 0) {
911 				ptmp = uao_pagealloc(uobj, current_offset,
912 				    UVM_FLAG_COLORMATCH|UVM_PGA_ZERO);
913 				if (ptmp) {
914 					/* new page */
915 					ptmp->flags &= ~(PG_FAKE);
916 					ptmp->pqflags |= PQ_AOBJ;
917 					goto gotpage;
918 				}
919 			}
920 
921 			/*
922 			 * to be useful must get a non-busy page
923 			 */
924 
925 			if (ptmp == NULL || (ptmp->flags & PG_BUSY) != 0) {
926 				if (lcv == centeridx ||
927 				    (flags & PGO_ALLPAGES) != 0)
928 					/* need to do a wait or I/O! */
929 					done = false;
930 					continue;
931 			}
932 
933 			/*
934 			 * useful page: busy/lock it and plug it in our
935 			 * result array
936 			 */
937 
938 			/* caller must un-busy this page */
939 			ptmp->flags |= PG_BUSY;
940 			UVM_PAGE_OWN(ptmp, "uao_get1");
941 gotpage:
942 			pps[lcv] = ptmp;
943 			gotpages++;
944 		}
945 
946 		/*
947  		 * step 1b: now we've either done everything needed or we
948 		 * to unlock and do some waiting or I/O.
949  		 */
950 
951 		UVMHIST_LOG(pdhist, "<- done (done=%d)", done, 0,0,0);
952 		*npagesp = gotpages;
953 		if (done)
954 			return 0;
955 		else
956 			return EBUSY;
957 	}
958 
959 	/*
960  	 * step 2: get non-resident or busy pages.
961  	 * object is locked.   data structures are unlocked.
962  	 */
963 
964 	if ((flags & PGO_SYNCIO) == 0) {
965 		goto done;
966 	}
967 
968 	for (lcv = 0, current_offset = offset ; lcv < maxpages ;
969 	    lcv++, current_offset += PAGE_SIZE) {
970 
971 		/*
972 		 * - skip over pages we've already gotten or don't want
973 		 * - skip over pages we don't _have_ to get
974 		 */
975 
976 		if (pps[lcv] != NULL ||
977 		    (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
978 			continue;
979 
980 		pageidx = current_offset >> PAGE_SHIFT;
981 
982 		/*
983  		 * we have yet to locate the current page (pps[lcv]).   we
984 		 * first look for a page that is already at the current offset.
985 		 * if we find a page, we check to see if it is busy or
986 		 * released.  if that is the case, then we sleep on the page
987 		 * until it is no longer busy or released and repeat the lookup.
988 		 * if the page we found is neither busy nor released, then we
989 		 * busy it (so we own it) and plug it into pps[lcv].   this
990 		 * 'break's the following while loop and indicates we are
991 		 * ready to move on to the next page in the "lcv" loop above.
992  		 *
993  		 * if we exit the while loop with pps[lcv] still set to NULL,
994 		 * then it means that we allocated a new busy/fake/clean page
995 		 * ptmp in the object and we need to do I/O to fill in the data.
996  		 */
997 
998 		/* top of "pps" while loop */
999 		while (pps[lcv] == NULL) {
1000 			/* look for a resident page */
1001 			ptmp = uvm_pagelookup(uobj, current_offset);
1002 
1003 			/* not resident?   allocate one now (if we can) */
1004 			if (ptmp == NULL) {
1005 
1006 				ptmp = uao_pagealloc(uobj, current_offset, 0);
1007 
1008 				/* out of RAM? */
1009 				if (ptmp == NULL) {
1010 					mutex_exit(uobj->vmobjlock);
1011 					UVMHIST_LOG(pdhist,
1012 					    "sleeping, ptmp == NULL\n",0,0,0,0);
1013 					uvm_wait("uao_getpage");
1014 					mutex_enter(uobj->vmobjlock);
1015 					continue;
1016 				}
1017 
1018 				/*
1019 				 * safe with PQ's unlocked: because we just
1020 				 * alloc'd the page
1021 				 */
1022 
1023 				ptmp->pqflags |= PQ_AOBJ;
1024 
1025 				/*
1026 				 * got new page ready for I/O.  break pps while
1027 				 * loop.  pps[lcv] is still NULL.
1028 				 */
1029 
1030 				break;
1031 			}
1032 
1033 			/* page is there, see if we need to wait on it */
1034 			if ((ptmp->flags & PG_BUSY) != 0) {
1035 				ptmp->flags |= PG_WANTED;
1036 				UVMHIST_LOG(pdhist,
1037 				    "sleeping, ptmp->flags 0x%x\n",
1038 				    ptmp->flags,0,0,0);
1039 				UVM_UNLOCK_AND_WAIT(ptmp, uobj->vmobjlock,
1040 				    false, "uao_get", 0);
1041 				mutex_enter(uobj->vmobjlock);
1042 				continue;
1043 			}
1044 
1045 			/*
1046  			 * if we get here then the page has become resident and
1047 			 * unbusy between steps 1 and 2.  we busy it now (so we
1048 			 * own it) and set pps[lcv] (so that we exit the while
1049 			 * loop).
1050  			 */
1051 
1052 			/* we own it, caller must un-busy */
1053 			ptmp->flags |= PG_BUSY;
1054 			UVM_PAGE_OWN(ptmp, "uao_get2");
1055 			pps[lcv] = ptmp;
1056 		}
1057 
1058 		/*
1059  		 * if we own the valid page at the correct offset, pps[lcv] will
1060  		 * point to it.   nothing more to do except go to the next page.
1061  		 */
1062 
1063 		if (pps[lcv])
1064 			continue;			/* next lcv */
1065 
1066 		/*
1067  		 * we have a "fake/busy/clean" page that we just allocated.
1068  		 * do the needed "i/o", either reading from swap or zeroing.
1069  		 */
1070 
1071 		swslot = uao_find_swslot(uobj, pageidx);
1072 
1073 		/*
1074  		 * just zero the page if there's nothing in swap.
1075  		 */
1076 
1077 		if (swslot == 0) {
1078 
1079 			/*
1080 			 * page hasn't existed before, just zero it.
1081 			 */
1082 
1083 			uvm_pagezero(ptmp);
1084 		} else {
1085 #if defined(VMSWAP)
1086 			int error;
1087 
1088 			UVMHIST_LOG(pdhist, "pagein from swslot %d",
1089 			     swslot, 0,0,0);
1090 
1091 			/*
1092 			 * page in the swapped-out page.
1093 			 * unlock object for i/o, relock when done.
1094 			 */
1095 
1096 			mutex_exit(uobj->vmobjlock);
1097 			error = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
1098 			mutex_enter(uobj->vmobjlock);
1099 
1100 			/*
1101 			 * I/O done.  check for errors.
1102 			 */
1103 
1104 			if (error != 0) {
1105 				UVMHIST_LOG(pdhist, "<- done (error=%d)",
1106 				    error,0,0,0);
1107 				if (ptmp->flags & PG_WANTED)
1108 					wakeup(ptmp);
1109 
1110 				/*
1111 				 * remove the swap slot from the aobj
1112 				 * and mark the aobj as having no real slot.
1113 				 * don't free the swap slot, thus preventing
1114 				 * it from being used again.
1115 				 */
1116 
1117 				swslot = uao_set_swslot(uobj, pageidx,
1118 				    SWSLOT_BAD);
1119 				if (swslot > 0) {
1120 					uvm_swap_markbad(swslot, 1);
1121 				}
1122 
1123 				mutex_enter(&uvm_pageqlock);
1124 				uvm_pagefree(ptmp);
1125 				mutex_exit(&uvm_pageqlock);
1126 				mutex_exit(uobj->vmobjlock);
1127 				return error;
1128 			}
1129 #else /* defined(VMSWAP) */
1130 			panic("%s: pagein", __func__);
1131 #endif /* defined(VMSWAP) */
1132 		}
1133 
1134 		if ((access_type & VM_PROT_WRITE) == 0) {
1135 			ptmp->flags |= PG_CLEAN;
1136 			pmap_clear_modify(ptmp);
1137 		}
1138 
1139 		/*
1140  		 * we got the page!   clear the fake flag (indicates valid
1141 		 * data now in page) and plug into our result array.   note
1142 		 * that page is still busy.
1143  		 *
1144  		 * it is the callers job to:
1145  		 * => check if the page is released
1146  		 * => unbusy the page
1147  		 * => activate the page
1148  		 */
1149 
1150 		ptmp->flags &= ~PG_FAKE;
1151 		pps[lcv] = ptmp;
1152 	}
1153 
1154 	/*
1155  	 * finally, unlock object and return.
1156  	 */
1157 
1158 done:
1159 	mutex_exit(uobj->vmobjlock);
1160 	UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
1161 	return 0;
1162 }
1163 
1164 #if defined(VMSWAP)
1165 
1166 /*
1167  * uao_dropswap:  release any swap resources from this aobj page.
1168  *
1169  * => aobj must be locked or have a reference count of 0.
1170  */
1171 
1172 void
1173 uao_dropswap(struct uvm_object *uobj, int pageidx)
1174 {
1175 	int slot;
1176 
1177 	slot = uao_set_swslot(uobj, pageidx, 0);
1178 	if (slot) {
1179 		uvm_swap_free(slot, 1);
1180 	}
1181 }
1182 
1183 /*
1184  * page in every page in every aobj that is paged-out to a range of swslots.
1185  *
1186  * => nothing should be locked.
1187  * => returns true if pagein was aborted due to lack of memory.
1188  */
1189 
1190 bool
1191 uao_swap_off(int startslot, int endslot)
1192 {
1193 	struct uvm_aobj *aobj;
1194 
1195 	/*
1196 	 * Walk the list of all anonymous UVM objects.  Grab the first.
1197 	 */
1198 	mutex_enter(&uao_list_lock);
1199 	if ((aobj = LIST_FIRST(&uao_list)) == NULL) {
1200 		mutex_exit(&uao_list_lock);
1201 		return false;
1202 	}
1203 	uao_reference(&aobj->u_obj);
1204 
1205 	do {
1206 		struct uvm_aobj *nextaobj;
1207 		bool rv;
1208 
1209 		/*
1210 		 * Prefetch the next object and immediately hold a reference
1211 		 * on it, so neither the current nor the next entry could
1212 		 * disappear while we are iterating.
1213 		 */
1214 		if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) {
1215 			uao_reference(&nextaobj->u_obj);
1216 		}
1217 		mutex_exit(&uao_list_lock);
1218 
1219 		/*
1220 		 * Page in all pages in the swap slot range.
1221 		 */
1222 		mutex_enter(aobj->u_obj.vmobjlock);
1223 		rv = uao_pagein(aobj, startslot, endslot);
1224 		mutex_exit(aobj->u_obj.vmobjlock);
1225 
1226 		/* Drop the reference of the current object. */
1227 		uao_detach(&aobj->u_obj);
1228 		if (rv) {
1229 			if (nextaobj) {
1230 				uao_detach(&nextaobj->u_obj);
1231 			}
1232 			return rv;
1233 		}
1234 
1235 		aobj = nextaobj;
1236 		mutex_enter(&uao_list_lock);
1237 	} while (aobj);
1238 
1239 	mutex_exit(&uao_list_lock);
1240 	return false;
1241 }
1242 
1243 /*
1244  * page in any pages from aobj in the given range.
1245  *
1246  * => aobj must be locked and is returned locked.
1247  * => returns true if pagein was aborted due to lack of memory.
1248  */
1249 static bool
1250 uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
1251 {
1252 	bool rv;
1253 
1254 	if (UAO_USES_SWHASH(aobj)) {
1255 		struct uao_swhash_elt *elt;
1256 		int buck;
1257 
1258 restart:
1259 		for (buck = aobj->u_swhashmask; buck >= 0; buck--) {
1260 			for (elt = LIST_FIRST(&aobj->u_swhash[buck]);
1261 			     elt != NULL;
1262 			     elt = LIST_NEXT(elt, list)) {
1263 				int i;
1264 
1265 				for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
1266 					int slot = elt->slots[i];
1267 
1268 					/*
1269 					 * if the slot isn't in range, skip it.
1270 					 */
1271 
1272 					if (slot < startslot ||
1273 					    slot >= endslot) {
1274 						continue;
1275 					}
1276 
1277 					/*
1278 					 * process the page,
1279 					 * the start over on this object
1280 					 * since the swhash elt
1281 					 * may have been freed.
1282 					 */
1283 
1284 					rv = uao_pagein_page(aobj,
1285 					  UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
1286 					if (rv) {
1287 						return rv;
1288 					}
1289 					goto restart;
1290 				}
1291 			}
1292 		}
1293 	} else {
1294 		int i;
1295 
1296 		for (i = 0; i < aobj->u_pages; i++) {
1297 			int slot = aobj->u_swslots[i];
1298 
1299 			/*
1300 			 * if the slot isn't in range, skip it
1301 			 */
1302 
1303 			if (slot < startslot || slot >= endslot) {
1304 				continue;
1305 			}
1306 
1307 			/*
1308 			 * process the page.
1309 			 */
1310 
1311 			rv = uao_pagein_page(aobj, i);
1312 			if (rv) {
1313 				return rv;
1314 			}
1315 		}
1316 	}
1317 
1318 	return false;
1319 }
1320 
1321 /*
1322  * uao_pagein_page: page in a single page from an anonymous UVM object.
1323  *
1324  * => Returns true if pagein was aborted due to lack of memory.
1325  * => Object must be locked and is returned locked.
1326  */
1327 
1328 static bool
1329 uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
1330 {
1331 	struct uvm_object *uobj = &aobj->u_obj;
1332 	struct vm_page *pg;
1333 	int rv, npages;
1334 
1335 	pg = NULL;
1336 	npages = 1;
1337 
1338 	KASSERT(mutex_owned(uobj->vmobjlock));
1339 	rv = uao_get(uobj, pageidx << PAGE_SHIFT, &pg, &npages,
1340 	    0, VM_PROT_READ | VM_PROT_WRITE, 0, PGO_SYNCIO);
1341 
1342 	/*
1343 	 * relock and finish up.
1344 	 */
1345 
1346 	mutex_enter(uobj->vmobjlock);
1347 	switch (rv) {
1348 	case 0:
1349 		break;
1350 
1351 	case EIO:
1352 	case ERESTART:
1353 
1354 		/*
1355 		 * nothing more to do on errors.
1356 		 * ERESTART can only mean that the anon was freed,
1357 		 * so again there's nothing to do.
1358 		 */
1359 
1360 		return false;
1361 
1362 	default:
1363 		return true;
1364 	}
1365 
1366 	/*
1367 	 * ok, we've got the page now.
1368 	 * mark it as dirty, clear its swslot and un-busy it.
1369 	 */
1370 	uao_dropswap(&aobj->u_obj, pageidx);
1371 
1372 	/*
1373 	 * make sure it's on a page queue.
1374 	 */
1375 	mutex_enter(&uvm_pageqlock);
1376 	if (pg->wire_count == 0)
1377 		uvm_pageenqueue(pg);
1378 	mutex_exit(&uvm_pageqlock);
1379 
1380 	if (pg->flags & PG_WANTED) {
1381 		wakeup(pg);
1382 	}
1383 	pg->flags &= ~(PG_WANTED|PG_BUSY|PG_CLEAN|PG_FAKE);
1384 	UVM_PAGE_OWN(pg, NULL);
1385 
1386 	return false;
1387 }
1388 
1389 /*
1390  * uao_dropswap_range: drop swapslots in the range.
1391  *
1392  * => aobj must be locked and is returned locked.
1393  * => start is inclusive.  end is exclusive.
1394  */
1395 
1396 void
1397 uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
1398 {
1399 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1400 	int swpgonlydelta = 0;
1401 
1402 	KASSERT(mutex_owned(uobj->vmobjlock));
1403 
1404 	if (end == 0) {
1405 		end = INT64_MAX;
1406 	}
1407 
1408 	if (UAO_USES_SWHASH(aobj)) {
1409 		int i, hashbuckets = aobj->u_swhashmask + 1;
1410 		voff_t taghi;
1411 		voff_t taglo;
1412 
1413 		taglo = UAO_SWHASH_ELT_TAG(start);
1414 		taghi = UAO_SWHASH_ELT_TAG(end);
1415 
1416 		for (i = 0; i < hashbuckets; i++) {
1417 			struct uao_swhash_elt *elt, *next;
1418 
1419 			for (elt = LIST_FIRST(&aobj->u_swhash[i]);
1420 			     elt != NULL;
1421 			     elt = next) {
1422 				int startidx, endidx;
1423 				int j;
1424 
1425 				next = LIST_NEXT(elt, list);
1426 
1427 				if (elt->tag < taglo || taghi < elt->tag) {
1428 					continue;
1429 				}
1430 
1431 				if (elt->tag == taglo) {
1432 					startidx =
1433 					    UAO_SWHASH_ELT_PAGESLOT_IDX(start);
1434 				} else {
1435 					startidx = 0;
1436 				}
1437 
1438 				if (elt->tag == taghi) {
1439 					endidx =
1440 					    UAO_SWHASH_ELT_PAGESLOT_IDX(end);
1441 				} else {
1442 					endidx = UAO_SWHASH_CLUSTER_SIZE;
1443 				}
1444 
1445 				for (j = startidx; j < endidx; j++) {
1446 					int slot = elt->slots[j];
1447 
1448 					KASSERT(uvm_pagelookup(&aobj->u_obj,
1449 					    (UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
1450 					    + j) << PAGE_SHIFT) == NULL);
1451 					if (slot > 0) {
1452 						uvm_swap_free(slot, 1);
1453 						swpgonlydelta++;
1454 						KASSERT(elt->count > 0);
1455 						elt->slots[j] = 0;
1456 						elt->count--;
1457 					}
1458 				}
1459 
1460 				if (elt->count == 0) {
1461 					LIST_REMOVE(elt, list);
1462 					pool_put(&uao_swhash_elt_pool, elt);
1463 				}
1464 			}
1465 		}
1466 	} else {
1467 		int i;
1468 
1469 		if (aobj->u_pages < end) {
1470 			end = aobj->u_pages;
1471 		}
1472 		for (i = start; i < end; i++) {
1473 			int slot = aobj->u_swslots[i];
1474 
1475 			if (slot > 0) {
1476 				uvm_swap_free(slot, 1);
1477 				swpgonlydelta++;
1478 			}
1479 		}
1480 	}
1481 
1482 	/*
1483 	 * adjust the counter of pages only in swap for all
1484 	 * the swap slots we've freed.
1485 	 */
1486 
1487 	if (swpgonlydelta > 0) {
1488 		mutex_enter(&uvm_swap_data_lock);
1489 		KASSERT(uvmexp.swpgonly >= swpgonlydelta);
1490 		uvmexp.swpgonly -= swpgonlydelta;
1491 		mutex_exit(&uvm_swap_data_lock);
1492 	}
1493 }
1494 
1495 #endif /* defined(VMSWAP) */
1496