xref: /netbsd-src/sys/uvm/uvm_aobj.c (revision 154bfe8e089c1a0a4e9ed8414f08d3da90949162)
1 /*	$NetBSD: uvm_aobj.c,v 1.151 2020/08/19 15:36:41 chs Exp $	*/
2 
3 /*
4  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
5  *                    Washington University.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
29  */
30 
31 /*
32  * uvm_aobj.c: anonymous memory uvm_object pager
33  *
34  * author: Chuck Silvers <chuq@chuq.com>
35  * started: Jan-1998
36  *
37  * - design mostly from Chuck Cranor
38  */
39 
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.151 2020/08/19 15:36:41 chs Exp $");
42 
43 #ifdef _KERNEL_OPT
44 #include "opt_uvmhist.h"
45 #endif
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/kernel.h>
50 #include <sys/kmem.h>
51 #include <sys/pool.h>
52 #include <sys/atomic.h>
53 
54 #include <uvm/uvm.h>
55 #include <uvm/uvm_page_array.h>
56 
57 /*
58  * An anonymous UVM object (aobj) manages anonymous-memory.  In addition to
59  * keeping the list of resident pages, it may also keep a list of allocated
60  * swap blocks.  Depending on the size of the object, this list is either
61  * stored in an array (small objects) or in a hash table (large objects).
62  *
63  * Lock order
64  *
65  *	uao_list_lock ->
66  *		uvm_object::vmobjlock
67  */
68 
69 /*
70  * Note: for hash tables, we break the address space of the aobj into blocks
71  * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two.
72  */
73 
74 #define	UAO_SWHASH_CLUSTER_SHIFT	4
75 #define	UAO_SWHASH_CLUSTER_SIZE		(1 << UAO_SWHASH_CLUSTER_SHIFT)
76 
77 /* Get the "tag" for this page index. */
78 #define	UAO_SWHASH_ELT_TAG(idx)		((idx) >> UAO_SWHASH_CLUSTER_SHIFT)
79 #define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \
80     ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1))
81 
82 /* Given an ELT and a page index, find the swap slot. */
83 #define	UAO_SWHASH_ELT_PAGESLOT(elt, idx) \
84     ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)])
85 
86 /* Given an ELT, return its pageidx base. */
87 #define	UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
88     ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT)
89 
90 /* The hash function. */
91 #define	UAO_SWHASH_HASH(aobj, idx) \
92     (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \
93     & (aobj)->u_swhashmask)])
94 
95 /*
96  * The threshold which determines whether we will use an array or a
97  * hash table to store the list of allocated swap blocks.
98  */
99 #define	UAO_SWHASH_THRESHOLD		(UAO_SWHASH_CLUSTER_SIZE * 4)
100 #define	UAO_USES_SWHASH(aobj) \
101     ((aobj)->u_pages > UAO_SWHASH_THRESHOLD)
102 
103 /* The number of buckets in a hash, with an upper bound. */
104 #define	UAO_SWHASH_MAXBUCKETS		256
105 #define	UAO_SWHASH_BUCKETS(aobj) \
106     (MIN((aobj)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS))
107 
108 /*
109  * uao_swhash_elt: when a hash table is being used, this structure defines
110  * the format of an entry in the bucket list.
111  */
112 
113 struct uao_swhash_elt {
114 	LIST_ENTRY(uao_swhash_elt) list;	/* the hash list */
115 	voff_t tag;				/* our 'tag' */
116 	int count;				/* our number of active slots */
117 	int slots[UAO_SWHASH_CLUSTER_SIZE];	/* the slots */
118 };
119 
120 /*
121  * uao_swhash: the swap hash table structure
122  */
123 
124 LIST_HEAD(uao_swhash, uao_swhash_elt);
125 
126 /*
127  * uao_swhash_elt_pool: pool of uao_swhash_elt structures.
128  * Note: pages for this pool must not come from a pageable kernel map.
129  */
130 static struct pool	uao_swhash_elt_pool	__cacheline_aligned;
131 
132 /*
133  * uvm_aobj: the actual anon-backed uvm_object
134  *
135  * => the uvm_object is at the top of the structure, this allows
136  *   (struct uvm_aobj *) == (struct uvm_object *)
137  * => only one of u_swslots and u_swhash is used in any given aobj
138  */
139 
140 struct uvm_aobj {
141 	struct uvm_object u_obj; /* has: lock, pgops, #pages, #refs */
142 	pgoff_t u_pages;	 /* number of pages in entire object */
143 	int u_flags;		 /* the flags (see uvm_aobj.h) */
144 	int *u_swslots;		 /* array of offset->swapslot mappings */
145 				 /*
146 				  * hashtable of offset->swapslot mappings
147 				  * (u_swhash is an array of bucket heads)
148 				  */
149 	struct uao_swhash *u_swhash;
150 	u_long u_swhashmask;		/* mask for hashtable */
151 	LIST_ENTRY(uvm_aobj) u_list;	/* global list of aobjs */
152 	int u_freelist;		  /* freelist to allocate pages from */
153 };
154 
155 static void	uao_free(struct uvm_aobj *);
156 static int	uao_get(struct uvm_object *, voff_t, struct vm_page **,
157 		    int *, int, vm_prot_t, int, int);
158 static int	uao_put(struct uvm_object *, voff_t, voff_t, int);
159 
160 #if defined(VMSWAP)
161 static struct uao_swhash_elt *uao_find_swhash_elt
162     (struct uvm_aobj *, int, bool);
163 
164 static bool uao_pagein(struct uvm_aobj *, int, int);
165 static bool uao_pagein_page(struct uvm_aobj *, int);
166 #endif /* defined(VMSWAP) */
167 
168 static struct vm_page	*uao_pagealloc(struct uvm_object *, voff_t, int);
169 
170 /*
171  * aobj_pager
172  *
173  * note that some functions (e.g. put) are handled elsewhere
174  */
175 
176 const struct uvm_pagerops aobj_pager = {
177 	.pgo_reference = uao_reference,
178 	.pgo_detach = uao_detach,
179 	.pgo_get = uao_get,
180 	.pgo_put = uao_put,
181 };
182 
183 /*
184  * uao_list: global list of active aobjs, locked by uao_list_lock
185  */
186 
187 static LIST_HEAD(aobjlist, uvm_aobj) uao_list	__cacheline_aligned;
188 static kmutex_t		uao_list_lock		__cacheline_aligned;
189 
190 /*
191  * hash table/array related functions
192  */
193 
194 #if defined(VMSWAP)
195 
196 /*
197  * uao_find_swhash_elt: find (or create) a hash table entry for a page
198  * offset.
199  *
200  * => the object should be locked by the caller
201  */
202 
203 static struct uao_swhash_elt *
204 uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, bool create)
205 {
206 	struct uao_swhash *swhash;
207 	struct uao_swhash_elt *elt;
208 	voff_t page_tag;
209 
210 	swhash = UAO_SWHASH_HASH(aobj, pageidx);
211 	page_tag = UAO_SWHASH_ELT_TAG(pageidx);
212 
213 	/*
214 	 * now search the bucket for the requested tag
215 	 */
216 
217 	LIST_FOREACH(elt, swhash, list) {
218 		if (elt->tag == page_tag) {
219 			return elt;
220 		}
221 	}
222 	if (!create) {
223 		return NULL;
224 	}
225 
226 	/*
227 	 * allocate a new entry for the bucket and init/insert it in
228 	 */
229 
230 	elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT);
231 	if (elt == NULL) {
232 		return NULL;
233 	}
234 	LIST_INSERT_HEAD(swhash, elt, list);
235 	elt->tag = page_tag;
236 	elt->count = 0;
237 	memset(elt->slots, 0, sizeof(elt->slots));
238 	return elt;
239 }
240 
241 /*
242  * uao_find_swslot: find the swap slot number for an aobj/pageidx
243  *
244  * => object must be locked by caller
245  */
246 
247 int
248 uao_find_swslot(struct uvm_object *uobj, int pageidx)
249 {
250 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
251 	struct uao_swhash_elt *elt;
252 
253 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
254 
255 	/*
256 	 * if noswap flag is set, then we never return a slot
257 	 */
258 
259 	if (aobj->u_flags & UAO_FLAG_NOSWAP)
260 		return 0;
261 
262 	/*
263 	 * if hashing, look in hash table.
264 	 */
265 
266 	if (UAO_USES_SWHASH(aobj)) {
267 		elt = uao_find_swhash_elt(aobj, pageidx, false);
268 		return elt ? UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) : 0;
269 	}
270 
271 	/*
272 	 * otherwise, look in the array
273 	 */
274 
275 	return aobj->u_swslots[pageidx];
276 }
277 
278 /*
279  * uao_set_swslot: set the swap slot for a page in an aobj.
280  *
281  * => setting a slot to zero frees the slot
282  * => object must be locked by caller
283  * => we return the old slot number, or -1 if we failed to allocate
284  *    memory to record the new slot number
285  */
286 
287 int
288 uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
289 {
290 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
291 	struct uao_swhash_elt *elt;
292 	int oldslot;
293 	UVMHIST_FUNC(__func__);
294 	UVMHIST_CALLARGS(pdhist, "aobj %#jx pageidx %jd slot %jd",
295 	    (uintptr_t)aobj, pageidx, slot, 0);
296 
297 	KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0);
298 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
299 
300 	/*
301 	 * if noswap flag is set, then we can't set a non-zero slot.
302 	 */
303 
304 	if (aobj->u_flags & UAO_FLAG_NOSWAP) {
305 		KASSERTMSG(slot == 0, "uao_set_swslot: no swap object");
306 		return 0;
307 	}
308 
309 	/*
310 	 * are we using a hash table?  if so, add it in the hash.
311 	 */
312 
313 	if (UAO_USES_SWHASH(aobj)) {
314 
315 		/*
316 		 * Avoid allocating an entry just to free it again if
317 		 * the page had not swap slot in the first place, and
318 		 * we are freeing.
319 		 */
320 
321 		elt = uao_find_swhash_elt(aobj, pageidx, slot != 0);
322 		if (elt == NULL) {
323 			return slot ? -1 : 0;
324 		}
325 
326 		oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
327 		UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
328 
329 		/*
330 		 * now adjust the elt's reference counter and free it if we've
331 		 * dropped it to zero.
332 		 */
333 
334 		if (slot) {
335 			if (oldslot == 0)
336 				elt->count++;
337 		} else {
338 			if (oldslot)
339 				elt->count--;
340 
341 			if (elt->count == 0) {
342 				LIST_REMOVE(elt, list);
343 				pool_put(&uao_swhash_elt_pool, elt);
344 			}
345 		}
346 	} else {
347 		/* we are using an array */
348 		oldslot = aobj->u_swslots[pageidx];
349 		aobj->u_swslots[pageidx] = slot;
350 	}
351 	return oldslot;
352 }
353 
354 #endif /* defined(VMSWAP) */
355 
356 /*
357  * end of hash/array functions
358  */
359 
360 /*
361  * uao_free: free all resources held by an aobj, and then free the aobj
362  *
363  * => the aobj should be dead
364  */
365 
366 static void
367 uao_free(struct uvm_aobj *aobj)
368 {
369 	struct uvm_object *uobj = &aobj->u_obj;
370 
371 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
372 	KASSERT(rw_write_held(uobj->vmobjlock));
373 	uao_dropswap_range(uobj, 0, 0);
374 	rw_exit(uobj->vmobjlock);
375 
376 #if defined(VMSWAP)
377 	if (UAO_USES_SWHASH(aobj)) {
378 
379 		/*
380 		 * free the hash table itself.
381 		 */
382 
383 		hashdone(aobj->u_swhash, HASH_LIST, aobj->u_swhashmask);
384 	} else {
385 
386 		/*
387 		 * free the array itsself.
388 		 */
389 
390 		kmem_free(aobj->u_swslots, aobj->u_pages * sizeof(int));
391 	}
392 #endif /* defined(VMSWAP) */
393 
394 	/*
395 	 * finally free the aobj itself
396 	 */
397 
398 	uvm_obj_destroy(uobj, true);
399 	kmem_free(aobj, sizeof(struct uvm_aobj));
400 }
401 
402 /*
403  * pager functions
404  */
405 
406 /*
407  * uao_create: create an aobj of the given size and return its uvm_object.
408  *
409  * => for normal use, flags are always zero
410  * => for the kernel object, the flags are:
411  *	UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
412  *	UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
413  */
414 
415 struct uvm_object *
416 uao_create(voff_t size, int flags)
417 {
418 	static struct uvm_aobj kernel_object_store;
419 	static krwlock_t kernel_object_lock __cacheline_aligned;
420 	static int kobj_alloced __diagused = 0;
421 	pgoff_t pages = round_page((uint64_t)size) >> PAGE_SHIFT;
422 	struct uvm_aobj *aobj;
423 	int refs;
424 
425 	/*
426 	 * Allocate a new aobj, unless kernel object is requested.
427 	 */
428 
429 	if (flags & UAO_FLAG_KERNOBJ) {
430 		KASSERT(!kobj_alloced);
431 		aobj = &kernel_object_store;
432 		aobj->u_pages = pages;
433 		aobj->u_flags = UAO_FLAG_NOSWAP;
434 		refs = UVM_OBJ_KERN;
435 		kobj_alloced = UAO_FLAG_KERNOBJ;
436 	} else if (flags & UAO_FLAG_KERNSWAP) {
437 		KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
438 		aobj = &kernel_object_store;
439 		kobj_alloced = UAO_FLAG_KERNSWAP;
440 		refs = 0xdeadbeaf; /* XXX: gcc */
441 	} else {
442 		aobj = kmem_alloc(sizeof(struct uvm_aobj), KM_SLEEP);
443 		aobj->u_pages = pages;
444 		aobj->u_flags = 0;
445 		refs = 1;
446 	}
447 
448 	/*
449 	 * no freelist by default
450 	 */
451 
452 	aobj->u_freelist = VM_NFREELIST;
453 
454 	/*
455  	 * allocate hash/array if necessary
456  	 *
457  	 * note: in the KERNSWAP case no need to worry about locking since
458  	 * we are still booting we should be the only thread around.
459  	 */
460 
461 	if (flags == 0 || (flags & UAO_FLAG_KERNSWAP) != 0) {
462 #if defined(VMSWAP)
463 		const int kernswap = (flags & UAO_FLAG_KERNSWAP) != 0;
464 
465 		/* allocate hash table or array depending on object size */
466 		if (UAO_USES_SWHASH(aobj)) {
467 			aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj),
468 			    HASH_LIST, kernswap ? false : true,
469 			    &aobj->u_swhashmask);
470 			if (aobj->u_swhash == NULL)
471 				panic("uao_create: hashinit swhash failed");
472 		} else {
473 			aobj->u_swslots = kmem_zalloc(pages * sizeof(int),
474 			    kernswap ? KM_NOSLEEP : KM_SLEEP);
475 			if (aobj->u_swslots == NULL)
476 				panic("uao_create: swslots allocation failed");
477 		}
478 #endif /* defined(VMSWAP) */
479 
480 		if (flags) {
481 			aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
482 			return &aobj->u_obj;
483 		}
484 	}
485 
486 	/*
487 	 * Initialise UVM object.
488 	 */
489 
490 	const bool kernobj = (flags & UAO_FLAG_KERNOBJ) != 0;
491 	uvm_obj_init(&aobj->u_obj, &aobj_pager, !kernobj, refs);
492 	if (__predict_false(kernobj)) {
493 		/* Initialisation only once, for UAO_FLAG_KERNOBJ. */
494 		rw_init(&kernel_object_lock);
495 		uvm_obj_setlock(&aobj->u_obj, &kernel_object_lock);
496 	}
497 
498 	/*
499  	 * now that aobj is ready, add it to the global list
500  	 */
501 
502 	mutex_enter(&uao_list_lock);
503 	LIST_INSERT_HEAD(&uao_list, aobj, u_list);
504 	mutex_exit(&uao_list_lock);
505 	return(&aobj->u_obj);
506 }
507 
508 /*
509  * uao_set_pgfl: allocate pages only from the specified freelist.
510  *
511  * => must be called before any pages are allocated for the object.
512  * => reset by setting it to VM_NFREELIST, meaning any freelist.
513  */
514 
515 void
516 uao_set_pgfl(struct uvm_object *uobj, int freelist)
517 {
518 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
519 
520 	KASSERTMSG((0 <= freelist), "invalid freelist %d", freelist);
521 	KASSERTMSG((freelist <= VM_NFREELIST), "invalid freelist %d",
522 	    freelist);
523 
524 	aobj->u_freelist = freelist;
525 }
526 
527 /*
528  * uao_pagealloc: allocate a page for aobj.
529  */
530 
531 static inline struct vm_page *
532 uao_pagealloc(struct uvm_object *uobj, voff_t offset, int flags)
533 {
534 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
535 
536 	if (__predict_true(aobj->u_freelist == VM_NFREELIST))
537 		return uvm_pagealloc(uobj, offset, NULL, flags);
538 	else
539 		return uvm_pagealloc_strat(uobj, offset, NULL, flags,
540 		    UVM_PGA_STRAT_ONLY, aobj->u_freelist);
541 }
542 
543 /*
544  * uao_init: set up aobj pager subsystem
545  *
546  * => called at boot time from uvm_pager_init()
547  */
548 
549 void
550 uao_init(void)
551 {
552 	static int uao_initialized;
553 
554 	if (uao_initialized)
555 		return;
556 	uao_initialized = true;
557 	LIST_INIT(&uao_list);
558 	mutex_init(&uao_list_lock, MUTEX_DEFAULT, IPL_NONE);
559 	pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt),
560 	    0, 0, 0, "uaoeltpl", NULL, IPL_VM);
561 }
562 
563 /*
564  * uao_reference: hold a reference to an anonymous UVM object.
565  */
566 void
567 uao_reference(struct uvm_object *uobj)
568 {
569 	/* Kernel object is persistent. */
570 	if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
571 		return;
572 	}
573 	atomic_inc_uint(&uobj->uo_refs);
574 }
575 
576 /*
577  * uao_detach: drop a reference to an anonymous UVM object.
578  */
579 void
580 uao_detach(struct uvm_object *uobj)
581 {
582 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
583 	struct uvm_page_array a;
584 	struct vm_page *pg;
585 
586 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
587 
588 	/*
589 	 * Detaching from kernel object is a NOP.
590 	 */
591 
592 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
593 		return;
594 
595 	/*
596 	 * Drop the reference.  If it was the last one, destroy the object.
597 	 */
598 
599 	KASSERT(uobj->uo_refs > 0);
600 	UVMHIST_LOG(maphist,"  (uobj=%#jx)  ref=%jd",
601 	    (uintptr_t)uobj, uobj->uo_refs, 0, 0);
602 	if (atomic_dec_uint_nv(&uobj->uo_refs) > 0) {
603 		UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
604 		return;
605 	}
606 
607 	/*
608 	 * Remove the aobj from the global list.
609 	 */
610 
611 	mutex_enter(&uao_list_lock);
612 	LIST_REMOVE(aobj, u_list);
613 	mutex_exit(&uao_list_lock);
614 
615 	/*
616 	 * Free all the pages left in the aobj.  For each page, when the
617 	 * page is no longer busy (and thus after any disk I/O that it is
618 	 * involved in is complete), release any swap resources and free
619 	 * the page itself.
620 	 */
621 	uvm_page_array_init(&a, uobj, 0);
622 	rw_enter(uobj->vmobjlock, RW_WRITER);
623 	while ((pg = uvm_page_array_fill_and_peek(&a, 0, 0)) != NULL) {
624 		uvm_page_array_advance(&a);
625 		pmap_page_protect(pg, VM_PROT_NONE);
626 		if (pg->flags & PG_BUSY) {
627 			uvm_pagewait(pg, uobj->vmobjlock, "uao_det");
628 			uvm_page_array_clear(&a);
629 			rw_enter(uobj->vmobjlock, RW_WRITER);
630 			continue;
631 		}
632 		uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
633 		uvm_pagefree(pg);
634 	}
635 	uvm_page_array_fini(&a);
636 
637 	/*
638 	 * Finally, free the anonymous UVM object itself.
639 	 */
640 
641 	uao_free(aobj);
642 }
643 
644 /*
645  * uao_put: flush pages out of a uvm object
646  *
647  * => object should be locked by caller.  we may _unlock_ the object
648  *	if (and only if) we need to clean a page (PGO_CLEANIT).
649  *	XXXJRT Currently, however, we don't.  In the case of cleaning
650  *	XXXJRT a page, we simply just deactivate it.  Should probably
651  *	XXXJRT handle this better, in the future (although "flushing"
652  *	XXXJRT anonymous memory isn't terribly important).
653  * => if PGO_CLEANIT is not set, then we will neither unlock the object
654  *	or block.
655  * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
656  *	for flushing.
657  * => we return 0 unless we encountered some sort of I/O error
658  *	XXXJRT currently never happens, as we never directly initiate
659  *	XXXJRT I/O
660  */
661 
662 static int
663 uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
664 {
665 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
666 	struct uvm_page_array a;
667 	struct vm_page *pg;
668 	voff_t curoff;
669 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
670 
671 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
672 	KASSERT(rw_write_held(uobj->vmobjlock));
673 
674 	if (flags & PGO_ALLPAGES) {
675 		start = 0;
676 		stop = aobj->u_pages << PAGE_SHIFT;
677 	} else {
678 		start = trunc_page(start);
679 		if (stop == 0) {
680 			stop = aobj->u_pages << PAGE_SHIFT;
681 		} else {
682 			stop = round_page(stop);
683 		}
684 		if (stop > (uint64_t)(aobj->u_pages << PAGE_SHIFT)) {
685 			printf("uao_put: strange, got an out of range "
686 			    "flush %#jx > %#jx (fixed)\n",
687 			    (uintmax_t)stop,
688 			    (uintmax_t)(aobj->u_pages << PAGE_SHIFT));
689 			stop = aobj->u_pages << PAGE_SHIFT;
690 		}
691 	}
692 	UVMHIST_LOG(maphist,
693 	    " flush start=%#jx, stop=%#jx, flags=%#jx",
694 	    start, stop, flags, 0);
695 
696 	/*
697 	 * Don't need to do any work here if we're not freeing
698 	 * or deactivating pages.
699 	 */
700 
701 	if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
702 		rw_exit(uobj->vmobjlock);
703 		return 0;
704 	}
705 
706 	/* locked: uobj */
707 	uvm_page_array_init(&a, uobj, 0);
708 	curoff = start;
709 	while ((pg = uvm_page_array_fill_and_peek(&a, curoff, 0)) != NULL) {
710 		if (pg->offset >= stop) {
711 			break;
712 		}
713 
714 		/*
715 		 * wait and try again if the page is busy.
716 		 */
717 
718 		if (pg->flags & PG_BUSY) {
719 			uvm_pagewait(pg, uobj->vmobjlock, "uao_put");
720 			uvm_page_array_clear(&a);
721 			rw_enter(uobj->vmobjlock, RW_WRITER);
722 			continue;
723 		}
724 		uvm_page_array_advance(&a);
725 		curoff = pg->offset + PAGE_SIZE;
726 
727 		switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
728 
729 		/*
730 		 * XXX In these first 3 cases, we always just
731 		 * XXX deactivate the page.  We may want to
732 		 * XXX handle the different cases more specifically
733 		 * XXX in the future.
734 		 */
735 
736 		case PGO_CLEANIT|PGO_FREE:
737 		case PGO_CLEANIT|PGO_DEACTIVATE:
738 		case PGO_DEACTIVATE:
739  deactivate_it:
740  			uvm_pagelock(pg);
741 			uvm_pagedeactivate(pg);
742  			uvm_pageunlock(pg);
743 			break;
744 
745 		case PGO_FREE:
746 			/*
747 			 * If there are multiple references to
748 			 * the object, just deactivate the page.
749 			 */
750 
751 			if (uobj->uo_refs > 1)
752 				goto deactivate_it;
753 
754 			/*
755 			 * free the swap slot and the page.
756 			 */
757 
758 			pmap_page_protect(pg, VM_PROT_NONE);
759 
760 			/*
761 			 * freeing swapslot here is not strictly necessary.
762 			 * however, leaving it here doesn't save much
763 			 * because we need to update swap accounting anyway.
764 			 */
765 
766 			uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
767 			uvm_pagefree(pg);
768 			break;
769 
770 		default:
771 			panic("%s: impossible", __func__);
772 		}
773 	}
774 	rw_exit(uobj->vmobjlock);
775 	uvm_page_array_fini(&a);
776 	return 0;
777 }
778 
779 /*
780  * uao_get: fetch me a page
781  *
782  * we have three cases:
783  * 1: page is resident     -> just return the page.
784  * 2: page is zero-fill    -> allocate a new page and zero it.
785  * 3: page is swapped out  -> fetch the page from swap.
786  *
787  * case 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot.
788  * so, if the "center" page hits case 2/3 then we will need to return EBUSY.
789  *
790  * => prefer map unlocked (not required)
791  * => object must be locked!  we will _unlock_ it before starting any I/O.
792  * => flags: PGO_LOCKED: fault data structures are locked
793  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
794  * => NOTE: caller must check for released pages!!
795  */
796 
797 static int
798 uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
799     int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
800 {
801 	voff_t current_offset;
802 	struct vm_page *ptmp;
803 	int lcv, gotpages, maxpages, swslot, pageidx;
804 	bool overwrite = ((flags & PGO_OVERWRITE) != 0);
805 	struct uvm_page_array a;
806 
807 	UVMHIST_FUNC(__func__);
808 	UVMHIST_CALLARGS(pdhist, "aobj=%#jx offset=%jd, flags=%jd",
809 		    (uintptr_t)uobj, offset, flags,0);
810 
811 	/*
812 	 * the object must be locked.  it can only be a read lock when
813 	 * processing a read fault with PGO_LOCKED.
814 	 */
815 
816 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
817 	KASSERT(rw_lock_held(uobj->vmobjlock));
818 	KASSERT(rw_write_held(uobj->vmobjlock) ||
819 	   ((flags & PGO_LOCKED) != 0 && (access_type & VM_PROT_WRITE) == 0));
820 
821 	/*
822  	 * get number of pages
823  	 */
824 
825 	maxpages = *npagesp;
826 
827 	/*
828  	 * step 1: handled the case where fault data structures are locked.
829  	 */
830 
831 	if (flags & PGO_LOCKED) {
832 
833 		/*
834  		 * step 1a: get pages that are already resident.   only do
835 		 * this if the data structures are locked (i.e. the first
836 		 * time through).
837  		 */
838 
839 		uvm_page_array_init(&a, uobj, 0);
840 		gotpages = 0;	/* # of pages we got so far */
841 		for (lcv = 0; lcv < maxpages; lcv++) {
842 			ptmp = uvm_page_array_fill_and_peek(&a,
843 			    offset + (lcv << PAGE_SHIFT), maxpages);
844 			if (ptmp == NULL) {
845 				break;
846 			}
847 			KASSERT(ptmp->offset >= offset);
848 			lcv = (ptmp->offset - offset) >> PAGE_SHIFT;
849 			if (lcv >= maxpages) {
850 				break;
851 			}
852 			uvm_page_array_advance(&a);
853 
854 			/*
855 			 * to be useful must get a non-busy page
856 			 */
857 
858 			if ((ptmp->flags & PG_BUSY) != 0) {
859 				continue;
860 			}
861 
862 			/*
863 			 * useful page: plug it in our result array
864 			 */
865 
866 			KASSERT(uvm_pagegetdirty(ptmp) !=
867 			    UVM_PAGE_STATUS_CLEAN);
868 			pps[lcv] = ptmp;
869 			gotpages++;
870 		}
871 		uvm_page_array_fini(&a);
872 
873 		/*
874  		 * step 1b: now we've either done everything needed or we
875 		 * to unlock and do some waiting or I/O.
876  		 */
877 
878 		UVMHIST_LOG(pdhist, "<- done (done=%jd)",
879 		    (pps[centeridx] != NULL), 0,0,0);
880 		*npagesp = gotpages;
881 		return pps[centeridx] != NULL ? 0 : EBUSY;
882 	}
883 
884 	/*
885  	 * step 2: get non-resident or busy pages.
886  	 * object is locked.   data structures are unlocked.
887  	 */
888 
889 	if ((flags & PGO_SYNCIO) == 0) {
890 		goto done;
891 	}
892 
893 	uvm_page_array_init(&a, uobj, 0);
894 	for (lcv = 0, current_offset = offset ; lcv < maxpages ;) {
895 
896 		/*
897  		 * we have yet to locate the current page (pps[lcv]).   we
898 		 * first look for a page that is already at the current offset.
899 		 * if we find a page, we check to see if it is busy or
900 		 * released.  if that is the case, then we sleep on the page
901 		 * until it is no longer busy or released and repeat the lookup.
902 		 * if the page we found is neither busy nor released, then we
903 		 * busy it (so we own it) and plug it into pps[lcv].   we are
904 		 * ready to move on to the next page.
905  		 */
906 
907 		ptmp = uvm_page_array_fill_and_peek(&a, current_offset,
908 		    maxpages - lcv);
909 
910 		if (ptmp != NULL && ptmp->offset == current_offset) {
911 			/* page is there, see if we need to wait on it */
912 			if ((ptmp->flags & PG_BUSY) != 0) {
913 				UVMHIST_LOG(pdhist,
914 				    "sleeping, ptmp->flags %#jx\n",
915 				    ptmp->flags,0,0,0);
916 				uvm_pagewait(ptmp, uobj->vmobjlock, "uao_get");
917 				rw_enter(uobj->vmobjlock, RW_WRITER);
918 				uvm_page_array_clear(&a);
919 				continue;
920 			}
921 
922 			/*
923  			 * if we get here then the page is resident and
924 			 * unbusy.  we busy it now (so we own it).  if
925 			 * overwriting, mark the page dirty up front as
926 			 * it will be zapped via an unmanaged mapping.
927  			 */
928 
929 			KASSERT(uvm_pagegetdirty(ptmp) !=
930 			    UVM_PAGE_STATUS_CLEAN);
931 			if (overwrite) {
932 				uvm_pagemarkdirty(ptmp, UVM_PAGE_STATUS_DIRTY);
933 			}
934 			/* we own it, caller must un-busy */
935 			ptmp->flags |= PG_BUSY;
936 			UVM_PAGE_OWN(ptmp, "uao_get2");
937 			pps[lcv++] = ptmp;
938 			current_offset += PAGE_SIZE;
939 			uvm_page_array_advance(&a);
940 			continue;
941 		} else {
942 			KASSERT(ptmp == NULL || ptmp->offset > current_offset);
943 		}
944 
945 		/*
946 		 * not resident.  allocate a new busy/fake/clean page in the
947 		 * object.  if it's in swap we need to do I/O to fill in the
948 		 * data, otherwise the page needs to be cleared: if it's not
949 		 * destined to be overwritten, then zero it here and now.
950 		 */
951 
952 		pageidx = current_offset >> PAGE_SHIFT;
953 		swslot = uao_find_swslot(uobj, pageidx);
954 		ptmp = uao_pagealloc(uobj, current_offset,
955 		    swslot != 0 || overwrite ? 0 : UVM_PGA_ZERO);
956 
957 		/* out of RAM? */
958 		if (ptmp == NULL) {
959 			rw_exit(uobj->vmobjlock);
960 			UVMHIST_LOG(pdhist, "sleeping, ptmp == NULL",0,0,0,0);
961 			uvm_wait("uao_getpage");
962 			rw_enter(uobj->vmobjlock, RW_WRITER);
963 			uvm_page_array_clear(&a);
964 			continue;
965 		}
966 
967 		/*
968  		 * if swslot == 0, page hasn't existed before and is zeroed.
969  		 * otherwise we have a "fake/busy/clean" page that we just
970  		 * allocated.  do the needed "i/o", reading from swap.
971  		 */
972 
973 		if (swslot != 0) {
974 #if defined(VMSWAP)
975 			int error;
976 
977 			UVMHIST_LOG(pdhist, "pagein from swslot %jd",
978 			     swslot, 0,0,0);
979 
980 			/*
981 			 * page in the swapped-out page.
982 			 * unlock object for i/o, relock when done.
983 			 */
984 
985 			uvm_page_array_clear(&a);
986 			rw_exit(uobj->vmobjlock);
987 			error = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
988 			rw_enter(uobj->vmobjlock, RW_WRITER);
989 
990 			/*
991 			 * I/O done.  check for errors.
992 			 */
993 
994 			if (error != 0) {
995 				UVMHIST_LOG(pdhist, "<- done (error=%jd)",
996 				    error,0,0,0);
997 
998 				/*
999 				 * remove the swap slot from the aobj
1000 				 * and mark the aobj as having no real slot.
1001 				 * don't free the swap slot, thus preventing
1002 				 * it from being used again.
1003 				 */
1004 
1005 				swslot = uao_set_swslot(uobj, pageidx,
1006 				    SWSLOT_BAD);
1007 				if (swslot > 0) {
1008 					uvm_swap_markbad(swslot, 1);
1009 				}
1010 
1011 				uvm_pagefree(ptmp);
1012 				rw_exit(uobj->vmobjlock);
1013 				UVMHIST_LOG(pdhist, "<- done (error)",
1014 				    error,lcv,0,0);
1015 				if (lcv != 0) {
1016 					uvm_page_unbusy(pps, lcv);
1017 				}
1018 				memset(pps, 0, maxpages * sizeof(pps[0]));
1019 				uvm_page_array_fini(&a);
1020 				return error;
1021 			}
1022 #else /* defined(VMSWAP) */
1023 			panic("%s: pagein", __func__);
1024 #endif /* defined(VMSWAP) */
1025 		}
1026 
1027 		/*
1028 		 * note that we will allow the page being writably-mapped
1029 		 * (!PG_RDONLY) regardless of access_type.  if overwrite,
1030 		 * the page can be modified through an unmanaged mapping
1031 		 * so mark it dirty up front.
1032 		 */
1033 		if (overwrite) {
1034 			uvm_pagemarkdirty(ptmp, UVM_PAGE_STATUS_DIRTY);
1035 		} else {
1036 			uvm_pagemarkdirty(ptmp, UVM_PAGE_STATUS_UNKNOWN);
1037 		}
1038 
1039 		/*
1040  		 * we got the page!   clear the fake flag (indicates valid
1041 		 * data now in page) and plug into our result array.   note
1042 		 * that page is still busy.
1043  		 *
1044  		 * it is the callers job to:
1045  		 * => check if the page is released
1046  		 * => unbusy the page
1047  		 * => activate the page
1048  		 */
1049 		KASSERT(uvm_pagegetdirty(ptmp) != UVM_PAGE_STATUS_CLEAN);
1050 		KASSERT((ptmp->flags & PG_FAKE) != 0);
1051 		KASSERT(ptmp->offset == current_offset);
1052 		ptmp->flags &= ~PG_FAKE;
1053 		pps[lcv++] = ptmp;
1054 		current_offset += PAGE_SIZE;
1055 	}
1056 	uvm_page_array_fini(&a);
1057 
1058 	/*
1059  	 * finally, unlock object and return.
1060  	 */
1061 
1062 done:
1063 	rw_exit(uobj->vmobjlock);
1064 	UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
1065 	return 0;
1066 }
1067 
1068 #if defined(VMSWAP)
1069 
1070 /*
1071  * uao_dropswap:  release any swap resources from this aobj page.
1072  *
1073  * => aobj must be locked or have a reference count of 0.
1074  */
1075 
1076 void
1077 uao_dropswap(struct uvm_object *uobj, int pageidx)
1078 {
1079 	int slot;
1080 
1081 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
1082 
1083 	slot = uao_set_swslot(uobj, pageidx, 0);
1084 	if (slot) {
1085 		uvm_swap_free(slot, 1);
1086 	}
1087 }
1088 
1089 /*
1090  * page in every page in every aobj that is paged-out to a range of swslots.
1091  *
1092  * => nothing should be locked.
1093  * => returns true if pagein was aborted due to lack of memory.
1094  */
1095 
1096 bool
1097 uao_swap_off(int startslot, int endslot)
1098 {
1099 	struct uvm_aobj *aobj;
1100 
1101 	/*
1102 	 * Walk the list of all anonymous UVM objects.  Grab the first.
1103 	 */
1104 	mutex_enter(&uao_list_lock);
1105 	if ((aobj = LIST_FIRST(&uao_list)) == NULL) {
1106 		mutex_exit(&uao_list_lock);
1107 		return false;
1108 	}
1109 	uao_reference(&aobj->u_obj);
1110 
1111 	do {
1112 		struct uvm_aobj *nextaobj;
1113 		bool rv;
1114 
1115 		/*
1116 		 * Prefetch the next object and immediately hold a reference
1117 		 * on it, so neither the current nor the next entry could
1118 		 * disappear while we are iterating.
1119 		 */
1120 		if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) {
1121 			uao_reference(&nextaobj->u_obj);
1122 		}
1123 		mutex_exit(&uao_list_lock);
1124 
1125 		/*
1126 		 * Page in all pages in the swap slot range.
1127 		 */
1128 		rw_enter(aobj->u_obj.vmobjlock, RW_WRITER);
1129 		rv = uao_pagein(aobj, startslot, endslot);
1130 		rw_exit(aobj->u_obj.vmobjlock);
1131 
1132 		/* Drop the reference of the current object. */
1133 		uao_detach(&aobj->u_obj);
1134 		if (rv) {
1135 			if (nextaobj) {
1136 				uao_detach(&nextaobj->u_obj);
1137 			}
1138 			return rv;
1139 		}
1140 
1141 		aobj = nextaobj;
1142 		mutex_enter(&uao_list_lock);
1143 	} while (aobj);
1144 
1145 	mutex_exit(&uao_list_lock);
1146 	return false;
1147 }
1148 
1149 /*
1150  * page in any pages from aobj in the given range.
1151  *
1152  * => aobj must be locked and is returned locked.
1153  * => returns true if pagein was aborted due to lack of memory.
1154  */
1155 static bool
1156 uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
1157 {
1158 	bool rv;
1159 
1160 	if (UAO_USES_SWHASH(aobj)) {
1161 		struct uao_swhash_elt *elt;
1162 		int buck;
1163 
1164 restart:
1165 		for (buck = aobj->u_swhashmask; buck >= 0; buck--) {
1166 			for (elt = LIST_FIRST(&aobj->u_swhash[buck]);
1167 			     elt != NULL;
1168 			     elt = LIST_NEXT(elt, list)) {
1169 				int i;
1170 
1171 				for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
1172 					int slot = elt->slots[i];
1173 
1174 					/*
1175 					 * if the slot isn't in range, skip it.
1176 					 */
1177 
1178 					if (slot < startslot ||
1179 					    slot >= endslot) {
1180 						continue;
1181 					}
1182 
1183 					/*
1184 					 * process the page,
1185 					 * the start over on this object
1186 					 * since the swhash elt
1187 					 * may have been freed.
1188 					 */
1189 
1190 					rv = uao_pagein_page(aobj,
1191 					  UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
1192 					if (rv) {
1193 						return rv;
1194 					}
1195 					goto restart;
1196 				}
1197 			}
1198 		}
1199 	} else {
1200 		int i;
1201 
1202 		for (i = 0; i < aobj->u_pages; i++) {
1203 			int slot = aobj->u_swslots[i];
1204 
1205 			/*
1206 			 * if the slot isn't in range, skip it
1207 			 */
1208 
1209 			if (slot < startslot || slot >= endslot) {
1210 				continue;
1211 			}
1212 
1213 			/*
1214 			 * process the page.
1215 			 */
1216 
1217 			rv = uao_pagein_page(aobj, i);
1218 			if (rv) {
1219 				return rv;
1220 			}
1221 		}
1222 	}
1223 
1224 	return false;
1225 }
1226 
1227 /*
1228  * uao_pagein_page: page in a single page from an anonymous UVM object.
1229  *
1230  * => Returns true if pagein was aborted due to lack of memory.
1231  * => Object must be locked and is returned locked.
1232  */
1233 
1234 static bool
1235 uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
1236 {
1237 	struct uvm_object *uobj = &aobj->u_obj;
1238 	struct vm_page *pg;
1239 	int rv, npages;
1240 
1241 	pg = NULL;
1242 	npages = 1;
1243 
1244 	KASSERT(rw_write_held(uobj->vmobjlock));
1245 	rv = uao_get(uobj, (voff_t)pageidx << PAGE_SHIFT, &pg, &npages,
1246 	    0, VM_PROT_READ | VM_PROT_WRITE, 0, PGO_SYNCIO);
1247 
1248 	/*
1249 	 * relock and finish up.
1250 	 */
1251 
1252 	rw_enter(uobj->vmobjlock, RW_WRITER);
1253 	switch (rv) {
1254 	case 0:
1255 		break;
1256 
1257 	case EIO:
1258 	case ERESTART:
1259 
1260 		/*
1261 		 * nothing more to do on errors.
1262 		 * ERESTART can only mean that the anon was freed,
1263 		 * so again there's nothing to do.
1264 		 */
1265 
1266 		return false;
1267 
1268 	default:
1269 		return true;
1270 	}
1271 
1272 	/*
1273 	 * ok, we've got the page now.
1274 	 * mark it as dirty, clear its swslot and un-busy it.
1275 	 */
1276 	uao_dropswap(&aobj->u_obj, pageidx);
1277 
1278 	/*
1279 	 * make sure it's on a page queue.
1280 	 */
1281 	uvm_pagelock(pg);
1282 	uvm_pageenqueue(pg);
1283 	uvm_pagewakeup(pg);
1284 	uvm_pageunlock(pg);
1285 
1286 	pg->flags &= ~(PG_BUSY|PG_FAKE);
1287 	uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY);
1288 	UVM_PAGE_OWN(pg, NULL);
1289 
1290 	return false;
1291 }
1292 
1293 /*
1294  * uao_dropswap_range: drop swapslots in the range.
1295  *
1296  * => aobj must be locked and is returned locked.
1297  * => start is inclusive.  end is exclusive.
1298  */
1299 
1300 void
1301 uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
1302 {
1303 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
1304 	int swpgonlydelta = 0;
1305 
1306 	KASSERT(UVM_OBJ_IS_AOBJ(uobj));
1307 	KASSERT(rw_write_held(uobj->vmobjlock));
1308 
1309 	if (end == 0) {
1310 		end = INT64_MAX;
1311 	}
1312 
1313 	if (UAO_USES_SWHASH(aobj)) {
1314 		int i, hashbuckets = aobj->u_swhashmask + 1;
1315 		voff_t taghi;
1316 		voff_t taglo;
1317 
1318 		taglo = UAO_SWHASH_ELT_TAG(start);
1319 		taghi = UAO_SWHASH_ELT_TAG(end);
1320 
1321 		for (i = 0; i < hashbuckets; i++) {
1322 			struct uao_swhash_elt *elt, *next;
1323 
1324 			for (elt = LIST_FIRST(&aobj->u_swhash[i]);
1325 			     elt != NULL;
1326 			     elt = next) {
1327 				int startidx, endidx;
1328 				int j;
1329 
1330 				next = LIST_NEXT(elt, list);
1331 
1332 				if (elt->tag < taglo || taghi < elt->tag) {
1333 					continue;
1334 				}
1335 
1336 				if (elt->tag == taglo) {
1337 					startidx =
1338 					    UAO_SWHASH_ELT_PAGESLOT_IDX(start);
1339 				} else {
1340 					startidx = 0;
1341 				}
1342 
1343 				if (elt->tag == taghi) {
1344 					endidx =
1345 					    UAO_SWHASH_ELT_PAGESLOT_IDX(end);
1346 				} else {
1347 					endidx = UAO_SWHASH_CLUSTER_SIZE;
1348 				}
1349 
1350 				for (j = startidx; j < endidx; j++) {
1351 					int slot = elt->slots[j];
1352 
1353 					KASSERT(uvm_pagelookup(&aobj->u_obj,
1354 					    (UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
1355 					    + j) << PAGE_SHIFT) == NULL);
1356 					if (slot > 0) {
1357 						uvm_swap_free(slot, 1);
1358 						swpgonlydelta++;
1359 						KASSERT(elt->count > 0);
1360 						elt->slots[j] = 0;
1361 						elt->count--;
1362 					}
1363 				}
1364 
1365 				if (elt->count == 0) {
1366 					LIST_REMOVE(elt, list);
1367 					pool_put(&uao_swhash_elt_pool, elt);
1368 				}
1369 			}
1370 		}
1371 	} else {
1372 		int i;
1373 
1374 		if (aobj->u_pages < end) {
1375 			end = aobj->u_pages;
1376 		}
1377 		for (i = start; i < end; i++) {
1378 			int slot = aobj->u_swslots[i];
1379 
1380 			if (slot > 0) {
1381 				uvm_swap_free(slot, 1);
1382 				swpgonlydelta++;
1383 			}
1384 		}
1385 	}
1386 
1387 	/*
1388 	 * adjust the counter of pages only in swap for all
1389 	 * the swap slots we've freed.
1390 	 */
1391 
1392 	if (swpgonlydelta > 0) {
1393 		KASSERT(uvmexp.swpgonly >= swpgonlydelta);
1394 		atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta);
1395 	}
1396 }
1397 
1398 #endif /* defined(VMSWAP) */
1399