xref: /openbsd-src/sys/uvm/uvm_aobj.c (revision 2b0358df1d88d06ef4139321dd05bd5e05d91eaf)
1 /*	$OpenBSD: uvm_aobj.c,v 1.36 2009/03/20 15:19:04 oga Exp $	*/
2 /*	$NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $	*/
3 
4 /*
5  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
6  *                    Washington University.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed by Charles D. Cranor and
20  *      Washington University.
21  * 4. The name of the author may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
36  */
37 /*
38  * uvm_aobj.c: anonymous memory uvm_object pager
39  *
40  * author: Chuck Silvers <chuq@chuq.com>
41  * started: Jan-1998
42  *
43  * - design mostly from Chuck Cranor
44  */
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/proc.h>
49 #include <sys/malloc.h>
50 #include <sys/kernel.h>
51 #include <sys/pool.h>
52 #include <sys/kernel.h>
53 
54 #include <uvm/uvm.h>
55 
56 /*
57  * an aobj manages anonymous-memory backed uvm_objects.   in addition
58  * to keeping the list of resident pages, it also keeps a list of
59  * allocated swap blocks.  depending on the size of the aobj this list
60  * of allocated swap blocks is either stored in an array (small objects)
61  * or in a hash table (large objects).
62  */
63 
64 /*
65  * local structures
66  */
67 
68 /*
69  * for hash tables, we break the address space of the aobj into blocks
70  * of UAO_SWHASH_CLUSTER_SIZE pages.   we require the cluster size to
71  * be a power of two.
72  */
73 
74 #define UAO_SWHASH_CLUSTER_SHIFT 4
75 #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT)
76 
77 /* get the "tag" for this page index */
78 #define UAO_SWHASH_ELT_TAG(PAGEIDX) \
79 	((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT)
80 
81 /* given an ELT and a page index, find the swap slot */
82 #define UAO_SWHASH_ELT_PAGESLOT(ELT, PAGEIDX) \
83 	((ELT)->slots[(PAGEIDX) & (UAO_SWHASH_CLUSTER_SIZE - 1)])
84 
85 /* given an ELT, return its pageidx base */
86 #define UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
87 	((ELT)->tag << UAO_SWHASH_CLUSTER_SHIFT)
88 
89 /*
90  * the swhash hash function
91  */
92 #define UAO_SWHASH_HASH(AOBJ, PAGEIDX) \
93 	(&(AOBJ)->u_swhash[(((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT) \
94 			    & (AOBJ)->u_swhashmask)])
95 
96 /*
97  * the swhash threshold determines if we will use an array or a
98  * hash table to store the list of allocated swap blocks.
99  */
100 
101 #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4)
102 #define UAO_USES_SWHASH(AOBJ) \
103 	((AOBJ)->u_pages > UAO_SWHASH_THRESHOLD)	/* use hash? */
104 
105 /*
106  * the number of buckets in a swhash, with an upper bound
107  */
108 #define UAO_SWHASH_MAXBUCKETS 256
109 #define UAO_SWHASH_BUCKETS(AOBJ) \
110 	(min((AOBJ)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, \
111 	     UAO_SWHASH_MAXBUCKETS))
112 
113 
114 /*
115  * uao_swhash_elt: when a hash table is being used, this structure defines
116  * the format of an entry in the bucket list.
117  */
118 
119 struct uao_swhash_elt {
120 	LIST_ENTRY(uao_swhash_elt) list;	/* the hash list */
121 	voff_t tag;				/* our 'tag' */
122 	int count;				/* our number of active slots */
123 	int slots[UAO_SWHASH_CLUSTER_SIZE];	/* the slots */
124 };
125 
126 /*
127  * uao_swhash: the swap hash table structure
128  */
129 
130 LIST_HEAD(uao_swhash, uao_swhash_elt);
131 
132 /*
133  * uao_swhash_elt_pool: pool of uao_swhash_elt structures
134  */
135 
136 struct pool uao_swhash_elt_pool;
137 
138 /*
139  * uvm_aobj: the actual anon-backed uvm_object
140  *
141  * => the uvm_object is at the top of the structure, this allows
142  *   (struct uvm_device *) == (struct uvm_object *)
143  * => only one of u_swslots and u_swhash is used in any given aobj
144  */
145 
146 struct uvm_aobj {
147 	struct uvm_object u_obj; /* has: lock, pgops, memq, #pages, #refs */
148 	int u_pages;		 /* number of pages in entire object */
149 	int u_flags;		 /* the flags (see uvm_aobj.h) */
150 	int *u_swslots;		 /* array of offset->swapslot mappings */
151 				 /*
152 				  * hashtable of offset->swapslot mappings
153 				  * (u_swhash is an array of bucket heads)
154 				  */
155 	struct uao_swhash *u_swhash;
156 	u_long u_swhashmask;		/* mask for hashtable */
157 	LIST_ENTRY(uvm_aobj) u_list;	/* global list of aobjs */
158 };
159 
160 /*
161  * uvm_aobj_pool: pool of uvm_aobj structures
162  */
163 
164 struct pool uvm_aobj_pool;
165 
166 /*
167  * local functions
168  */
169 
170 static struct uao_swhash_elt	*uao_find_swhash_elt(struct uvm_aobj *, int,
171 				     boolean_t);
172 static int			 uao_find_swslot(struct uvm_aobj *, int);
173 static boolean_t		 uao_flush(struct uvm_object *, voff_t,
174 				     voff_t, int);
175 static void			 uao_free(struct uvm_aobj *);
176 static int			 uao_get(struct uvm_object *, voff_t,
177 				     vm_page_t *, int *, int, vm_prot_t,
178 				     int, int);
179 static boolean_t		 uao_releasepg(struct vm_page *,
180 				     struct vm_page **);
181 static boolean_t		 uao_pagein(struct uvm_aobj *, int, int);
182 static boolean_t		 uao_pagein_page(struct uvm_aobj *, int);
183 
184 /*
185  * aobj_pager
186  *
187  * note that some functions (e.g. put) are handled elsewhere
188  */
189 
190 struct uvm_pagerops aobj_pager = {
191 	NULL,			/* init */
192 	uao_reference,		/* reference */
193 	uao_detach,		/* detach */
194 	NULL,			/* fault */
195 	uao_flush,		/* flush */
196 	uao_get,		/* get */
197 	NULL,			/* put (done by pagedaemon) */
198 	NULL,			/* cluster */
199 	NULL,			/* mk_pcluster */
200 	uao_releasepg		/* releasepg */
201 };
202 
203 /*
204  * uao_list: global list of active aobjs, locked by uao_list_lock
205  */
206 
207 static LIST_HEAD(aobjlist, uvm_aobj) uao_list;
208 static simple_lock_data_t uao_list_lock;
209 
210 
211 /*
212  * functions
213  */
214 
215 /*
216  * hash table/array related functions
217  */
218 
219 /*
220  * uao_find_swhash_elt: find (or create) a hash table entry for a page
221  * offset.
222  *
223  * => the object should be locked by the caller
224  */
225 
226 static struct uao_swhash_elt *
227 uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, boolean_t create)
228 {
229 	struct uao_swhash *swhash;
230 	struct uao_swhash_elt *elt;
231 	voff_t page_tag;
232 
233 	swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */
234 	page_tag = UAO_SWHASH_ELT_TAG(pageidx);	/* tag to search for */
235 
236 	/*
237 	 * now search the bucket for the requested tag
238 	 */
239 	LIST_FOREACH(elt, swhash, list) {
240 		if (elt->tag == page_tag)
241 			return(elt);
242 	}
243 
244 	/* fail now if we are not allowed to create a new entry in the bucket */
245 	if (!create)
246 		return NULL;
247 
248 
249 	/*
250 	 * allocate a new entry for the bucket and init/insert it in
251 	 */
252 	elt = pool_get(&uao_swhash_elt_pool, PR_WAITOK);
253 	LIST_INSERT_HEAD(swhash, elt, list);
254 	elt->tag = page_tag;
255 	elt->count = 0;
256 	memset(elt->slots, 0, sizeof(elt->slots));
257 
258 	return(elt);
259 }
260 
261 /*
262  * uao_find_swslot: find the swap slot number for an aobj/pageidx
263  *
264  * => object must be locked by caller
265  */
266 __inline static int
267 uao_find_swslot(struct uvm_aobj *aobj, int pageidx)
268 {
269 
270 	/*
271 	 * if noswap flag is set, then we never return a slot
272 	 */
273 
274 	if (aobj->u_flags & UAO_FLAG_NOSWAP)
275 		return(0);
276 
277 	/*
278 	 * if hashing, look in hash table.
279 	 */
280 
281 	if (UAO_USES_SWHASH(aobj)) {
282 		struct uao_swhash_elt *elt =
283 		    uao_find_swhash_elt(aobj, pageidx, FALSE);
284 
285 		if (elt)
286 			return(UAO_SWHASH_ELT_PAGESLOT(elt, pageidx));
287 		else
288 			return(0);
289 	}
290 
291 	/*
292 	 * otherwise, look in the array
293 	 */
294 	return(aobj->u_swslots[pageidx]);
295 }
296 
297 /*
298  * uao_set_swslot: set the swap slot for a page in an aobj.
299  *
300  * => setting a slot to zero frees the slot
301  * => object must be locked by caller
302  */
303 int
304 uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
305 {
306 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
307 	int oldslot;
308 	UVMHIST_FUNC("uao_set_swslot"); UVMHIST_CALLED(pdhist);
309 	UVMHIST_LOG(pdhist, "aobj %p pageidx %ld slot %ld",
310 	    aobj, pageidx, slot, 0);
311 
312 	/*
313 	 * if noswap flag is set, then we can't set a slot
314 	 */
315 
316 	if (aobj->u_flags & UAO_FLAG_NOSWAP) {
317 
318 		if (slot == 0)
319 			return(0);		/* a clear is ok */
320 
321 		/* but a set is not */
322 		printf("uao_set_swslot: uobj = %p\n", uobj);
323 	    panic("uao_set_swslot: attempt to set a slot on a NOSWAP object");
324 	}
325 
326 	/*
327 	 * are we using a hash table?  if so, add it in the hash.
328 	 */
329 
330 	if (UAO_USES_SWHASH(aobj)) {
331 
332 		/*
333 		 * Avoid allocating an entry just to free it again if
334 		 * the page had not swap slot in the first place, and
335 		 * we are freeing.
336 		 */
337 
338 		struct uao_swhash_elt *elt =
339 		    uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE);
340 		if (elt == NULL) {
341 			KASSERT(slot == 0);
342 			return (0);
343 		}
344 
345 		oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
346 		UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
347 
348 		/*
349 		 * now adjust the elt's reference counter and free it if we've
350 		 * dropped it to zero.
351 		 */
352 
353 		/* an allocation? */
354 		if (slot) {
355 			if (oldslot == 0)
356 				elt->count++;
357 		} else {		/* freeing slot ... */
358 			if (oldslot)	/* to be safe */
359 				elt->count--;
360 
361 			if (elt->count == 0) {
362 				LIST_REMOVE(elt, list);
363 				pool_put(&uao_swhash_elt_pool, elt);
364 			}
365 		}
366 	} else {
367 		/* we are using an array */
368 		oldslot = aobj->u_swslots[pageidx];
369 		aobj->u_swslots[pageidx] = slot;
370 	}
371 	return (oldslot);
372 }
373 
374 /*
375  * end of hash/array functions
376  */
377 
378 /*
379  * uao_free: free all resources held by an aobj, and then free the aobj
380  *
381  * => the aobj should be dead
382  */
383 static void
384 uao_free(struct uvm_aobj *aobj)
385 {
386 
387 	simple_unlock(&aobj->u_obj.vmobjlock);
388 
389 	if (UAO_USES_SWHASH(aobj)) {
390 		int i, hashbuckets = aobj->u_swhashmask + 1;
391 
392 		/*
393 		 * free the swslots from each hash bucket,
394 		 * then the hash bucket, and finally the hash table itself.
395 		 */
396 		for (i = 0; i < hashbuckets; i++) {
397 			struct uao_swhash_elt *elt, *next;
398 
399 			for (elt = LIST_FIRST(&aobj->u_swhash[i]);
400 			     elt != NULL;
401 			     elt = next) {
402 				int j;
403 
404 				for (j = 0; j < UAO_SWHASH_CLUSTER_SIZE; j++) {
405 					int slot = elt->slots[j];
406 
407 					if (slot == 0) {
408 						continue;
409 					}
410 					uvm_swap_free(slot, 1);
411 
412 					/*
413 					 * this page is no longer
414 					 * only in swap.
415 					 */
416 					simple_lock(&uvm.swap_data_lock);
417 					uvmexp.swpgonly--;
418 					simple_unlock(&uvm.swap_data_lock);
419 				}
420 
421 				next = LIST_NEXT(elt, list);
422 				pool_put(&uao_swhash_elt_pool, elt);
423 			}
424 		}
425 		free(aobj->u_swhash, M_UVMAOBJ);
426 	} else {
427 		int i;
428 
429 		/*
430 		 * free the array
431 		 */
432 
433 		for (i = 0; i < aobj->u_pages; i++) {
434 			int slot = aobj->u_swslots[i];
435 
436 			if (slot) {
437 				uvm_swap_free(slot, 1);
438 
439 				/* this page is no longer only in swap. */
440 				simple_lock(&uvm.swap_data_lock);
441 				uvmexp.swpgonly--;
442 				simple_unlock(&uvm.swap_data_lock);
443 			}
444 		}
445 		free(aobj->u_swslots, M_UVMAOBJ);
446 	}
447 
448 	/*
449 	 * finally free the aobj itself
450 	 */
451 	pool_put(&uvm_aobj_pool, aobj);
452 }
453 
454 /*
455  * pager functions
456  */
457 
458 /*
459  * uao_create: create an aobj of the given size and return its uvm_object.
460  *
461  * => for normal use, flags are always zero
462  * => for the kernel object, the flags are:
463  *	UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
464  *	UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
465  */
466 struct uvm_object *
467 uao_create(vsize_t size, int flags)
468 {
469 	static struct uvm_aobj kernel_object_store; /* home of kernel_object */
470 	static int kobj_alloced = 0;			/* not allocated yet */
471 	int pages = round_page(size) >> PAGE_SHIFT;
472 	struct uvm_aobj *aobj;
473 
474 	/*
475 	 * malloc a new aobj unless we are asked for the kernel object
476 	 */
477 	if (flags & UAO_FLAG_KERNOBJ) {		/* want kernel object? */
478 		if (kobj_alloced)
479 			panic("uao_create: kernel object already allocated");
480 
481 		aobj = &kernel_object_store;
482 		aobj->u_pages = pages;
483 		aobj->u_flags = UAO_FLAG_NOSWAP;	/* no swap to start */
484 		/* we are special, we never die */
485 		aobj->u_obj.uo_refs = UVM_OBJ_KERN;
486 		kobj_alloced = UAO_FLAG_KERNOBJ;
487 	} else if (flags & UAO_FLAG_KERNSWAP) {
488 		aobj = &kernel_object_store;
489 		if (kobj_alloced != UAO_FLAG_KERNOBJ)
490 		    panic("uao_create: asked to enable swap on kernel object");
491 		kobj_alloced = UAO_FLAG_KERNSWAP;
492 	} else {	/* normal object */
493 		aobj = pool_get(&uvm_aobj_pool, PR_WAITOK);
494 		aobj->u_pages = pages;
495 		aobj->u_flags = 0;		/* normal object */
496 		aobj->u_obj.uo_refs = 1;	/* start with 1 reference */
497 	}
498 
499 	/*
500  	 * allocate hash/array if necessary
501  	 *
502  	 * note: in the KERNSWAP case no need to worry about locking since
503  	 * we are still booting we should be the only thread around.
504  	 */
505 	if (flags == 0 || (flags & UAO_FLAG_KERNSWAP) != 0) {
506 		int mflags = (flags & UAO_FLAG_KERNSWAP) != 0 ?
507 		    M_NOWAIT : M_WAITOK;
508 
509 		/* allocate hash table or array depending on object size */
510 		if (UAO_USES_SWHASH(aobj)) {
511 			aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj),
512 			    M_UVMAOBJ, mflags, &aobj->u_swhashmask);
513 			if (aobj->u_swhash == NULL)
514 				panic("uao_create: hashinit swhash failed");
515 		} else {
516 			aobj->u_swslots = malloc(pages * sizeof(int),
517 			    M_UVMAOBJ, mflags|M_ZERO);
518 			if (aobj->u_swslots == NULL)
519 				panic("uao_create: malloc swslots failed");
520 		}
521 
522 		if (flags) {
523 			aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
524 			return(&aobj->u_obj);
525 			/* done! */
526 		}
527 	}
528 
529 	/*
530  	 * init aobj fields
531  	 */
532 	simple_lock_init(&aobj->u_obj.vmobjlock);
533 	aobj->u_obj.pgops = &aobj_pager;
534 	TAILQ_INIT(&aobj->u_obj.memq);
535 	aobj->u_obj.uo_npages = 0;
536 
537 	/*
538  	 * now that aobj is ready, add it to the global list
539  	 */
540 	simple_lock(&uao_list_lock);
541 	LIST_INSERT_HEAD(&uao_list, aobj, u_list);
542 	simple_unlock(&uao_list_lock);
543 
544 	/*
545  	 * done!
546  	 */
547 	return(&aobj->u_obj);
548 }
549 
550 
551 
552 /*
553  * uao_init: set up aobj pager subsystem
554  *
555  * => called at boot time from uvm_pager_init()
556  */
557 void
558 uao_init(void)
559 {
560 	static int uao_initialized;
561 
562 	if (uao_initialized)
563 		return;
564 	uao_initialized = TRUE;
565 
566 	LIST_INIT(&uao_list);
567 	simple_lock_init(&uao_list_lock);
568 
569 	/*
570 	 * NOTE: Pages fror this pool must not come from a pageable
571 	 * kernel map!
572 	 */
573 	pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt),
574 	    0, 0, 0, "uaoeltpl", &pool_allocator_nointr);
575 
576 	pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0, 0, 0,
577 	    "aobjpl", &pool_allocator_nointr);
578 }
579 
580 /*
581  * uao_reference: add a ref to an aobj
582  *
583  * => aobj must be unlocked
584  * => just lock it and call the locked version
585  */
586 void
587 uao_reference(struct uvm_object *uobj)
588 {
589 	simple_lock(&uobj->vmobjlock);
590 	uao_reference_locked(uobj);
591 	simple_unlock(&uobj->vmobjlock);
592 }
593 
594 /*
595  * uao_reference_locked: add a ref to an aobj that is already locked
596  *
597  * => aobj must be locked
598  * this needs to be separate from the normal routine
599  * since sometimes we need to add a reference to an aobj when
600  * it's already locked.
601  */
602 void
603 uao_reference_locked(struct uvm_object *uobj)
604 {
605 	UVMHIST_FUNC("uao_reference"); UVMHIST_CALLED(maphist);
606 
607 	/*
608  	 * kernel_object already has plenty of references, leave it alone.
609  	 */
610 
611 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
612 		return;
613 
614 	uobj->uo_refs++;		/* bump! */
615 	UVMHIST_LOG(maphist, "<- done (uobj=%p, ref = %ld)",
616 		    uobj, uobj->uo_refs,0,0);
617 }
618 
619 
620 /*
621  * uao_detach: drop a reference to an aobj
622  *
623  * => aobj must be unlocked
624  * => just lock it and call the locked version
625  */
626 void
627 uao_detach(struct uvm_object *uobj)
628 {
629 	simple_lock(&uobj->vmobjlock);
630 	uao_detach_locked(uobj);
631 }
632 
633 
634 /*
635  * uao_detach_locked: drop a reference to an aobj
636  *
637  * => aobj must be locked, and is unlocked (or freed) upon return.
638  * this needs to be separate from the normal routine
639  * since sometimes we need to detach from an aobj when
640  * it's already locked.
641  */
642 void
643 uao_detach_locked(struct uvm_object *uobj)
644 {
645 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
646 	struct vm_page *pg, *next;
647 	boolean_t busybody;
648 	UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist);
649 
650 	/*
651  	 * detaching from kernel_object is a noop.
652  	 */
653 	if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
654 		simple_unlock(&uobj->vmobjlock);
655 		return;
656 	}
657 
658 	UVMHIST_LOG(maphist,"  (uobj=%p)  ref=%ld", uobj,uobj->uo_refs,0,0);
659 	uobj->uo_refs--;				/* drop ref! */
660 	if (uobj->uo_refs) {				/* still more refs? */
661 		simple_unlock(&uobj->vmobjlock);
662 		UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
663 		return;
664 	}
665 
666 	/*
667  	 * remove the aobj from the global list.
668  	 */
669 	simple_lock(&uao_list_lock);
670 	LIST_REMOVE(aobj, u_list);
671 	simple_unlock(&uao_list_lock);
672 
673 	/*
674  	 * free all the pages that aren't PG_BUSY,
675 	 * mark for release any that are.
676  	 */
677 	busybody = FALSE;
678 	for (pg = TAILQ_FIRST(&uobj->memq); pg != NULL; pg = next) {
679 		next = TAILQ_NEXT(pg, listq);
680 		if (pg->pg_flags & PG_BUSY) {
681 			atomic_setbits_int(&pg->pg_flags, PG_RELEASED);
682 			busybody = TRUE;
683 			continue;
684 		}
685 
686 		/* zap the mappings, free the swap slot, free the page */
687 		pmap_page_protect(pg, VM_PROT_NONE);
688 		uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
689 		uvm_lock_pageq();
690 		uvm_pagefree(pg);
691 		uvm_unlock_pageq();
692 	}
693 
694 	/*
695  	 * if we found any busy pages, we're done for now.
696  	 * mark the aobj for death, releasepg will finish up for us.
697  	 */
698 	if (busybody) {
699 		aobj->u_flags |= UAO_FLAG_KILLME;
700 		simple_unlock(&aobj->u_obj.vmobjlock);
701 		return;
702 	}
703 
704 	/*
705  	 * finally, free the rest.
706  	 */
707 	uao_free(aobj);
708 }
709 
710 /*
711  * uao_flush: "flush" pages out of a uvm object
712  *
713  * => object should be locked by caller.  we may _unlock_ the object
714  *	if (and only if) we need to clean a page (PGO_CLEANIT).
715  *	XXXJRT Currently, however, we don't.  In the case of cleaning
716  *	XXXJRT a page, we simply just deactivate it.  Should probably
717  *	XXXJRT handle this better, in the future (although "flushing"
718  *	XXXJRT anonymous memory isn't terribly important).
719  * => if PGO_CLEANIT is not set, then we will neither unlock the object
720  *	or block.
721  * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
722  *	for flushing.
723  * => NOTE: we rely on the fact that the object's memq is a TAILQ and
724  *	that new pages are inserted on the tail end of the list.  thus,
725  *	we can make a complete pass through the object in one go by starting
726  *	at the head and working towards the tail (new pages are put in
727  *	front of us).
728  * => NOTE: we are allowed to lock the page queues, so the caller
729  *	must not be holding the lock on them [e.g. pagedaemon had
730  *	better not call us with the queues locked]
731  * => we return TRUE unless we encountered some sort of I/O error
732  *	XXXJRT currently never happens, as we never directly initiate
733  *	XXXJRT I/O
734  *
735  * comment on "cleaning" object and PG_BUSY pages:
736  *	this routine is holding the lock on the object.  the only time
737  *	that is can run into a PG_BUSY page that it does not own is if
738  *	some other process has started I/O on the page (e.g. either
739  *	a pagein or a pageout).  if the PG_BUSY page is being paged
740  *	in, then it can not be dirty (!PG_CLEAN) because no one has
741  *	had a change to modify it yet.  if the PG_BUSY page is being
742  *	paged out then it means that someone else has already started
743  *	cleaning the page for us (how nice!).  in this case, if we
744  *	have syncio specified, then after we make our pass through the
745  *	object we need to wait for the other PG_BUSY pages to clear
746  *	off (i.e. we need to do an iosync).  also note that once a
747  *	page is PG_BUSY is must stary in its object until it is un-busyed.
748  *	XXXJRT We never actually do this, as we are "flushing" anonymous
749  *	XXXJRT memory, which doesn't have persistent backing store.
750  *
751  * note on page traversal:
752  *	we can traverse the pages in an object either by going down the
753  *	linked list in "uobj->memq", or we can go over the address range
754  *	by page doing hash table lookups for each address.  depending
755  *	on how many pages are in the object it may be cheaper to do one
756  *	or the other.  we set "by_list" to true if we are using memq.
757  *	if the cost of a hash lookup was equal to the cost of the list
758  *	traversal we could compare the number of pages in the start->stop
759  *	range to the total number of pages in the object.  however, it
760  *	seems that a hash table lookup is more expensive than the linked
761  *	list traversal, so we multiply the number of pages in the
762  *	start->stop range by a penalty which we define below.
763  */
764 
765 #define	UAO_HASH_PENALTY 4	/* XXX: a guess */
766 
767 boolean_t
768 uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
769 {
770 	struct uvm_aobj *aobj = (struct uvm_aobj *) uobj;
771 	struct vm_page *pp, *ppnext;
772 	boolean_t retval, by_list;
773 	voff_t curoff;
774 	UVMHIST_FUNC("uao_flush"); UVMHIST_CALLED(maphist);
775 
776 	curoff = 0;	/* XXX: shut up gcc */
777 
778 	retval = TRUE;	/* default to success */
779 
780 	if (flags & PGO_ALLPAGES) {
781 		start = 0;
782 		stop = aobj->u_pages << PAGE_SHIFT;
783 		by_list = TRUE;		/* always go by the list */
784 	} else {
785 		start = trunc_page(start);
786 		stop = round_page(stop);
787 		if (stop > (aobj->u_pages << PAGE_SHIFT)) {
788 			printf("uao_flush: strange, got an out of range "
789 			    "flush (fixed)\n");
790 			stop = aobj->u_pages << PAGE_SHIFT;
791 		}
792 		by_list = (uobj->uo_npages <=
793 		    ((stop - start) >> PAGE_SHIFT) * UAO_HASH_PENALTY);
794 	}
795 
796 	UVMHIST_LOG(maphist,
797 	    " flush start=0x%lx, stop=0x%lx, by_list=%ld, flags=0x%lx",
798 	    (u_long)start, (u_long)stop, by_list, flags);
799 
800 	/*
801 	 * Don't need to do any work here if we're not freeing
802 	 * or deactivating pages.
803 	 */
804 	if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
805 		UVMHIST_LOG(maphist,
806 		    "<- done (no work to do)",0,0,0,0);
807 		return (retval);
808 	}
809 
810 	/*
811 	 * now do it.  note: we must update ppnext in the body of loop or we
812 	 * will get stuck.  we need to use ppnext because we may free "pp"
813 	 * before doing the next loop.
814 	 */
815 
816 	if (by_list) {
817 		pp = TAILQ_FIRST(&uobj->memq);
818 	} else {
819 		curoff = start;
820 		pp = uvm_pagelookup(uobj, curoff);
821 	}
822 
823 	ppnext = NULL;	/* XXX: shut up gcc */
824 	uvm_lock_pageq();	/* page queues locked */
825 
826 	/* locked: both page queues and uobj */
827 	for ( ; (by_list && pp != NULL) ||
828 	    (!by_list && curoff < stop) ; pp = ppnext) {
829 		if (by_list) {
830 			ppnext = TAILQ_NEXT(pp, listq);
831 
832 			/* range check */
833 			if (pp->offset < start || pp->offset >= stop)
834 				continue;
835 		} else {
836 			curoff += PAGE_SIZE;
837 			if (curoff < stop)
838 				ppnext = uvm_pagelookup(uobj, curoff);
839 
840 			/* null check */
841 			if (pp == NULL)
842 				continue;
843 		}
844 
845 		switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
846 		/*
847 		 * XXX In these first 3 cases, we always just
848 		 * XXX deactivate the page.  We may want to
849 		 * XXX handle the different cases more specifically
850 		 * XXX in the future.
851 		 */
852 		case PGO_CLEANIT|PGO_FREE:
853 		case PGO_CLEANIT|PGO_DEACTIVATE:
854 		case PGO_DEACTIVATE:
855  deactivate_it:
856 			/* skip the page if it's loaned or wired */
857 			if (pp->loan_count != 0 ||
858 			    pp->wire_count != 0)
859 				continue;
860 
861 #ifdef UBC
862 			/* ...and deactivate the page. */
863 			pmap_clear_reference(pp);
864 #else
865 			/* zap all mappings for the page. */
866 			pmap_page_protect(pp, VM_PROT_NONE);
867 
868 			/* ...and deactivate the page. */
869 #endif
870 			uvm_pagedeactivate(pp);
871 
872 			continue;
873 
874 		case PGO_FREE:
875 			/*
876 			 * If there are multiple references to
877 			 * the object, just deactivate the page.
878 			 */
879 			if (uobj->uo_refs > 1)
880 				goto deactivate_it;
881 
882 			/* XXX skip the page if it's loaned or wired */
883 			if (pp->loan_count != 0 ||
884 			    pp->wire_count != 0)
885 				continue;
886 
887 			/*
888 			 * mark the page as released if its busy.
889 			 */
890 			if (pp->pg_flags & PG_BUSY) {
891 				atomic_setbits_int(&pp->pg_flags, PG_RELEASED);
892 				continue;
893 			}
894 
895 			/* zap all mappings for the page. */
896 			pmap_page_protect(pp, VM_PROT_NONE);
897 
898 			uao_dropswap(uobj, pp->offset >> PAGE_SHIFT);
899 			uvm_pagefree(pp);
900 
901 			continue;
902 
903 		default:
904 			panic("uao_flush: weird flags");
905 		}
906 	}
907 
908 	uvm_unlock_pageq();
909 
910 	UVMHIST_LOG(maphist,
911 	    "<- done, rv=%ld",retval,0,0,0);
912 	return (retval);
913 }
914 
915 /*
916  * uao_get: fetch me a page
917  *
918  * we have three cases:
919  * 1: page is resident     -> just return the page.
920  * 2: page is zero-fill    -> allocate a new page and zero it.
921  * 3: page is swapped out  -> fetch the page from swap.
922  *
923  * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
924  * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
925  * then we will need to return VM_PAGER_UNLOCK.
926  *
927  * => prefer map unlocked (not required)
928  * => object must be locked!  we will _unlock_ it before starting any I/O.
929  * => flags: PGO_ALLPAGES: get all of the pages
930  *           PGO_LOCKED: fault data structures are locked
931  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
932  * => NOTE: caller must check for released pages!!
933  */
934 static int
935 uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
936     int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
937 {
938 	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
939 	voff_t current_offset;
940 	vm_page_t ptmp;
941 	int lcv, gotpages, maxpages, swslot, rv, pageidx;
942 	boolean_t done;
943 	UVMHIST_FUNC("uao_get"); UVMHIST_CALLED(pdhist);
944 
945 	UVMHIST_LOG(pdhist, "aobj=%p offset=%ld, flags=%ld",
946 		    aobj, (u_long)offset, flags,0);
947 
948 	/*
949  	 * get number of pages
950  	 */
951 	maxpages = *npagesp;
952 
953 	/*
954  	 * step 1: handled the case where fault data structures are locked.
955  	 */
956 
957 	if (flags & PGO_LOCKED) {
958 		/*
959  		 * step 1a: get pages that are already resident.   only do
960 		 * this if the data structures are locked (i.e. the first
961 		 * time through).
962  		 */
963 
964 		done = TRUE;	/* be optimistic */
965 		gotpages = 0;	/* # of pages we got so far */
966 
967 		for (lcv = 0, current_offset = offset ; lcv < maxpages ;
968 		    lcv++, current_offset += PAGE_SIZE) {
969 			/* do we care about this page?  if not, skip it */
970 			if (pps[lcv] == PGO_DONTCARE)
971 				continue;
972 
973 			ptmp = uvm_pagelookup(uobj, current_offset);
974 
975 			/*
976  			 * if page is new, attempt to allocate the page,
977 			 * zero-fill'd.
978  			 */
979 			if (ptmp == NULL && uao_find_swslot(aobj,
980 			    current_offset >> PAGE_SHIFT) == 0) {
981 				ptmp = uvm_pagealloc(uobj, current_offset,
982 				    NULL, UVM_PGA_ZERO);
983 				if (ptmp) {
984 					/* new page */
985 					atomic_clearbits_int(&ptmp->pg_flags,
986 					    PG_BUSY|PG_FAKE);
987 					atomic_setbits_int(&ptmp->pg_flags,
988 					    PQ_AOBJ);
989 					UVM_PAGE_OWN(ptmp, NULL);
990 				}
991 			}
992 
993 			/*
994 			 * to be useful must get a non-busy, non-released page
995 			 */
996 			if (ptmp == NULL ||
997 			    (ptmp->pg_flags & (PG_BUSY|PG_RELEASED)) != 0) {
998 				if (lcv == centeridx ||
999 				    (flags & PGO_ALLPAGES) != 0)
1000 					/* need to do a wait or I/O! */
1001 					done = FALSE;
1002 					continue;
1003 			}
1004 
1005 			/*
1006 			 * useful page: busy/lock it and plug it in our
1007 			 * result array
1008 			 */
1009 			/* caller must un-busy this page */
1010 			atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
1011 			UVM_PAGE_OWN(ptmp, "uao_get1");
1012 			pps[lcv] = ptmp;
1013 			gotpages++;
1014 
1015 		}	/* "for" lcv loop */
1016 
1017 		/*
1018  		 * step 1b: now we've either done everything needed or we
1019 		 * to unlock and do some waiting or I/O.
1020  		 */
1021 
1022 		UVMHIST_LOG(pdhist, "<- done (done=%ld)", done, 0,0,0);
1023 
1024 		*npagesp = gotpages;
1025 		if (done)
1026 			/* bingo! */
1027 			return(VM_PAGER_OK);
1028 		else
1029 			/* EEK!   Need to unlock and I/O */
1030 			return(VM_PAGER_UNLOCK);
1031 	}
1032 
1033 	/*
1034  	 * step 2: get non-resident or busy pages.
1035  	 * object is locked.   data structures are unlocked.
1036  	 */
1037 
1038 	for (lcv = 0, current_offset = offset ; lcv < maxpages ;
1039 	    lcv++, current_offset += PAGE_SIZE) {
1040 
1041 		/*
1042 		 * - skip over pages we've already gotten or don't want
1043 		 * - skip over pages we don't _have_ to get
1044 		 */
1045 
1046 		if (pps[lcv] != NULL ||
1047 		    (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
1048 			continue;
1049 
1050 		pageidx = current_offset >> PAGE_SHIFT;
1051 
1052 		/*
1053  		 * we have yet to locate the current page (pps[lcv]).   we
1054 		 * first look for a page that is already at the current offset.
1055 		 * if we find a page, we check to see if it is busy or
1056 		 * released.  if that is the case, then we sleep on the page
1057 		 * until it is no longer busy or released and repeat the lookup.
1058 		 * if the page we found is neither busy nor released, then we
1059 		 * busy it (so we own it) and plug it into pps[lcv].   this
1060 		 * 'break's the following while loop and indicates we are
1061 		 * ready to move on to the next page in the "lcv" loop above.
1062  		 *
1063  		 * if we exit the while loop with pps[lcv] still set to NULL,
1064 		 * then it means that we allocated a new busy/fake/clean page
1065 		 * ptmp in the object and we need to do I/O to fill in the data.
1066  		 */
1067 
1068 		/* top of "pps" while loop */
1069 		while (pps[lcv] == NULL) {
1070 			/* look for a resident page */
1071 			ptmp = uvm_pagelookup(uobj, current_offset);
1072 
1073 			/* not resident?   allocate one now (if we can) */
1074 			if (ptmp == NULL) {
1075 
1076 				ptmp = uvm_pagealloc(uobj, current_offset,
1077 				    NULL, 0);
1078 
1079 				/* out of RAM? */
1080 				if (ptmp == NULL) {
1081 					simple_unlock(&uobj->vmobjlock);
1082 					UVMHIST_LOG(pdhist,
1083 					    "sleeping, ptmp == NULL\n",0,0,0,0);
1084 					uvm_wait("uao_getpage");
1085 					simple_lock(&uobj->vmobjlock);
1086 					/* goto top of pps while loop */
1087 					continue;
1088 				}
1089 
1090 				/*
1091 				 * safe with PQ's unlocked: because we just
1092 				 * alloc'd the page
1093 				 */
1094 				atomic_setbits_int(&ptmp->pg_flags, PQ_AOBJ);
1095 
1096 				/*
1097 				 * got new page ready for I/O.  break pps while
1098 				 * loop.  pps[lcv] is still NULL.
1099 				 */
1100 				break;
1101 			}
1102 
1103 			/* page is there, see if we need to wait on it */
1104 			if ((ptmp->pg_flags & (PG_BUSY|PG_RELEASED)) != 0) {
1105 				atomic_setbits_int(&ptmp->pg_flags, PG_WANTED);
1106 				UVMHIST_LOG(pdhist,
1107 				    "sleeping, ptmp->flags 0x%lx\n",
1108 				    ptmp->pg_flags,0,0,0);
1109 				UVM_UNLOCK_AND_WAIT(ptmp, &uobj->vmobjlock,
1110 				    FALSE, "uao_get", 0);
1111 				simple_lock(&uobj->vmobjlock);
1112 				continue;	/* goto top of pps while loop */
1113 			}
1114 
1115 			/*
1116  			 * if we get here then the page has become resident and
1117 			 * unbusy between steps 1 and 2.  we busy it now (so we
1118 			 * own it) and set pps[lcv] (so that we exit the while
1119 			 * loop).
1120  			 */
1121 			/* we own it, caller must un-busy */
1122 			atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
1123 			UVM_PAGE_OWN(ptmp, "uao_get2");
1124 			pps[lcv] = ptmp;
1125 		}
1126 
1127 		/*
1128  		 * if we own the valid page at the correct offset, pps[lcv] will
1129  		 * point to it.   nothing more to do except go to the next page.
1130  		 */
1131 		if (pps[lcv])
1132 			continue;			/* next lcv */
1133 
1134 		/*
1135  		 * we have a "fake/busy/clean" page that we just allocated.
1136  		 * do the needed "i/o", either reading from swap or zeroing.
1137  		 */
1138 		swslot = uao_find_swslot(aobj, pageidx);
1139 
1140 		/*
1141  		 * just zero the page if there's nothing in swap.
1142  		 */
1143 		if (swslot == 0)
1144 		{
1145 			/*
1146 			 * page hasn't existed before, just zero it.
1147 			 */
1148 			uvm_pagezero(ptmp);
1149 		} else {
1150 			UVMHIST_LOG(pdhist, "pagein from swslot %ld",
1151 			     swslot, 0,0,0);
1152 
1153 			/*
1154 			 * page in the swapped-out page.
1155 			 * unlock object for i/o, relock when done.
1156 			 */
1157 			simple_unlock(&uobj->vmobjlock);
1158 			rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
1159 			simple_lock(&uobj->vmobjlock);
1160 
1161 			/*
1162 			 * I/O done.  check for errors.
1163 			 */
1164 			if (rv != VM_PAGER_OK)
1165 			{
1166 				UVMHIST_LOG(pdhist, "<- done (error=%ld)",
1167 				    rv,0,0,0);
1168 				if (ptmp->pg_flags & PG_WANTED)
1169 					wakeup(ptmp);
1170 
1171 				/*
1172 				 * remove the swap slot from the aobj
1173 				 * and mark the aobj as having no real slot.
1174 				 * don't free the swap slot, thus preventing
1175 				 * it from being used again.
1176 				 */
1177 				swslot = uao_set_swslot(&aobj->u_obj, pageidx,
1178 							SWSLOT_BAD);
1179 				uvm_swap_markbad(swslot, 1);
1180 
1181 				atomic_clearbits_int(&ptmp->pg_flags,
1182 				    PG_WANTED|PG_BUSY);
1183 				UVM_PAGE_OWN(ptmp, NULL);
1184 				uvm_lock_pageq();
1185 				uvm_pagefree(ptmp);
1186 				uvm_unlock_pageq();
1187 
1188 				simple_unlock(&uobj->vmobjlock);
1189 				return (rv);
1190 			}
1191 		}
1192 
1193 		/*
1194  		 * we got the page!   clear the fake flag (indicates valid
1195 		 * data now in page) and plug into our result array.   note
1196 		 * that page is still busy.
1197  		 *
1198  		 * it is the callers job to:
1199  		 * => check if the page is released
1200  		 * => unbusy the page
1201  		 * => activate the page
1202  		 */
1203 
1204 		/* data is valid ... */
1205 		atomic_clearbits_int(&ptmp->pg_flags, PG_FAKE);
1206 		pmap_clear_modify(ptmp);		/* ... and clean */
1207 		pps[lcv] = ptmp;
1208 
1209 	}	/* lcv loop */
1210 
1211 	/*
1212  	 * finally, unlock object and return.
1213  	 */
1214 
1215 	simple_unlock(&uobj->vmobjlock);
1216 	UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
1217 	return(VM_PAGER_OK);
1218 }
1219 
1220 /*
1221  * uao_releasepg: handle released page in an aobj
1222  *
1223  * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need
1224  *      to dispose of.
1225  * => caller must handle PG_WANTED case
1226  * => called with page's object locked, pageq's unlocked
1227  * => returns TRUE if page's object is still alive, FALSE if we
1228  *      killed the page's object.    if we return TRUE, then we
1229  *      return with the object locked.
1230  * => if (nextpgp != NULL) => we return the next page on the queue, and return
1231  *                              with the page queues locked [for pagedaemon]
1232  * => if (nextpgp == NULL) => we return with page queues unlocked [normal case]
1233  * => we kill the aobj if it is not referenced and we are suppose to
1234  *      kill it ("KILLME").
1235  */
1236 static boolean_t
1237 uao_releasepg(struct vm_page *pg, struct vm_page **nextpgp /* OUT */)
1238 {
1239 	struct uvm_aobj *aobj = (struct uvm_aobj *) pg->uobject;
1240 
1241 	KASSERT(pg->pg_flags & PG_RELEASED);
1242 
1243 	/*
1244  	 * dispose of the page [caller handles PG_WANTED] and swap slot.
1245  	 */
1246 	pmap_page_protect(pg, VM_PROT_NONE);
1247 	uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
1248 	uvm_lock_pageq();
1249 	if (nextpgp)
1250 		*nextpgp = TAILQ_NEXT(pg, pageq); /* next page for daemon */
1251 	uvm_pagefree(pg);
1252 	if (!nextpgp)
1253 		uvm_unlock_pageq();		/* keep locked for daemon */
1254 
1255 	/*
1256  	 * if we're not killing the object, we're done.
1257  	 */
1258 	if ((aobj->u_flags & UAO_FLAG_KILLME) == 0)
1259 		return TRUE;
1260 	KASSERT(aobj->u_obj.uo_refs == 0);
1261 
1262 	/*
1263  	 * if there are still pages in the object, we're done for now.
1264  	 */
1265 	if (aobj->u_obj.uo_npages != 0)
1266 		return TRUE;
1267 
1268 	KASSERT(TAILQ_EMPTY(&aobj->u_obj.memq));
1269 
1270 	/*
1271  	 * finally, free the rest.
1272  	 */
1273 	uao_free(aobj);
1274 
1275 	return FALSE;
1276 }
1277 
1278 
1279 /*
1280  * uao_dropswap:  release any swap resources from this aobj page.
1281  *
1282  * => aobj must be locked or have a reference count of 0.
1283  */
1284 
1285 void
1286 uao_dropswap(struct uvm_object *uobj, int pageidx)
1287 {
1288 	int slot;
1289 
1290 	slot = uao_set_swslot(uobj, pageidx, 0);
1291 	if (slot) {
1292 		uvm_swap_free(slot, 1);
1293 	}
1294 }
1295 
1296 
1297 /*
1298  * page in every page in every aobj that is paged-out to a range of swslots.
1299  *
1300  * => nothing should be locked.
1301  * => returns TRUE if pagein was aborted due to lack of memory.
1302  */
1303 boolean_t
1304 uao_swap_off(int startslot, int endslot)
1305 {
1306 	struct uvm_aobj *aobj, *nextaobj;
1307 
1308 	/*
1309 	 * walk the list of all aobjs.
1310 	 */
1311 
1312 restart:
1313 	simple_lock(&uao_list_lock);
1314 
1315 	for (aobj = LIST_FIRST(&uao_list);
1316 	     aobj != NULL;
1317 	     aobj = nextaobj) {
1318 		boolean_t rv;
1319 
1320 		/*
1321 		 * try to get the object lock,
1322 		 * start all over if we fail.
1323 		 * most of the time we'll get the aobj lock,
1324 		 * so this should be a rare case.
1325 		 */
1326 		if (!simple_lock_try(&aobj->u_obj.vmobjlock)) {
1327 			simple_unlock(&uao_list_lock);
1328 			goto restart;
1329 		}
1330 
1331 		/*
1332 		 * add a ref to the aobj so it doesn't disappear
1333 		 * while we're working.
1334 		 */
1335 		uao_reference_locked(&aobj->u_obj);
1336 
1337 		/*
1338 		 * now it's safe to unlock the uao list.
1339 		 */
1340 		simple_unlock(&uao_list_lock);
1341 
1342 		/*
1343 		 * page in any pages in the swslot range.
1344 		 * if there's an error, abort and return the error.
1345 		 */
1346 		rv = uao_pagein(aobj, startslot, endslot);
1347 		if (rv) {
1348 			uao_detach_locked(&aobj->u_obj);
1349 			return rv;
1350 		}
1351 
1352 		/*
1353 		 * we're done with this aobj.
1354 		 * relock the list and drop our ref on the aobj.
1355 		 */
1356 		simple_lock(&uao_list_lock);
1357 		nextaobj = LIST_NEXT(aobj, u_list);
1358 		uao_detach_locked(&aobj->u_obj);
1359 	}
1360 
1361 	/*
1362 	 * done with traversal, unlock the list
1363 	 */
1364 	simple_unlock(&uao_list_lock);
1365 	return FALSE;
1366 }
1367 
1368 
1369 /*
1370  * page in any pages from aobj in the given range.
1371  *
1372  * => aobj must be locked and is returned locked.
1373  * => returns TRUE if pagein was aborted due to lack of memory.
1374  */
1375 static boolean_t
1376 uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
1377 {
1378 	boolean_t rv;
1379 
1380 	if (UAO_USES_SWHASH(aobj)) {
1381 		struct uao_swhash_elt *elt;
1382 		int bucket;
1383 
1384 restart:
1385 		for (bucket = aobj->u_swhashmask; bucket >= 0; bucket--) {
1386 			for (elt = LIST_FIRST(&aobj->u_swhash[bucket]);
1387 			     elt != NULL;
1388 			     elt = LIST_NEXT(elt, list)) {
1389 				int i;
1390 
1391 				for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
1392 					int slot = elt->slots[i];
1393 
1394 					/*
1395 					 * if the slot isn't in range, skip it.
1396 					 */
1397 					if (slot < startslot ||
1398 					    slot >= endslot) {
1399 						continue;
1400 					}
1401 
1402 					/*
1403 					 * process the page,
1404 					 * the start over on this object
1405 					 * since the swhash elt
1406 					 * may have been freed.
1407 					 */
1408 					rv = uao_pagein_page(aobj,
1409 					  UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
1410 					if (rv) {
1411 						return rv;
1412 					}
1413 					goto restart;
1414 				}
1415 			}
1416 		}
1417 	} else {
1418 		int i;
1419 
1420 		for (i = 0; i < aobj->u_pages; i++) {
1421 			int slot = aobj->u_swslots[i];
1422 
1423 			/*
1424 			 * if the slot isn't in range, skip it
1425 			 */
1426 			if (slot < startslot || slot >= endslot) {
1427 				continue;
1428 			}
1429 
1430 			/*
1431 			 * process the page.
1432 			 */
1433 			rv = uao_pagein_page(aobj, i);
1434 			if (rv) {
1435 				return rv;
1436 			}
1437 		}
1438 	}
1439 
1440 	return FALSE;
1441 }
1442 
1443 /*
1444  * page in a page from an aobj.  used for swap_off.
1445  * returns TRUE if pagein was aborted due to lack of memory.
1446  *
1447  * => aobj must be locked and is returned locked.
1448  */
1449 static boolean_t
1450 uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
1451 {
1452 	struct vm_page *pg;
1453 	int rv, slot, npages;
1454 
1455 	pg = NULL;
1456 	npages = 1;
1457 	/* locked: aobj */
1458 	rv = uao_get(&aobj->u_obj, pageidx << PAGE_SHIFT,
1459 		     &pg, &npages, 0, VM_PROT_READ|VM_PROT_WRITE, 0, 0);
1460 	/* unlocked: aobj */
1461 
1462 	/*
1463 	 * relock and finish up.
1464 	 */
1465 	simple_lock(&aobj->u_obj.vmobjlock);
1466 
1467 	switch (rv) {
1468 	case VM_PAGER_OK:
1469 		break;
1470 
1471 	case VM_PAGER_ERROR:
1472 	case VM_PAGER_REFAULT:
1473 		/*
1474 		 * nothing more to do on errors.
1475 		 * VM_PAGER_REFAULT can only mean that the anon was freed,
1476 		 * so again there's nothing to do.
1477 		 */
1478 		return FALSE;
1479 
1480 	}
1481 	KASSERT((pg->pg_flags & PG_RELEASED) == 0);
1482 
1483 	/*
1484 	 * ok, we've got the page now.
1485 	 * mark it as dirty, clear its swslot and un-busy it.
1486 	 */
1487 	slot = uao_set_swslot(&aobj->u_obj, pageidx, 0);
1488 	uvm_swap_free(slot, 1);
1489 	atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_CLEAN|PG_FAKE);
1490 	UVM_PAGE_OWN(pg, NULL);
1491 
1492 	/*
1493 	 * deactivate the page (to put it on a page queue).
1494 	 */
1495 	pmap_clear_reference(pg);
1496 #ifndef UBC
1497 	pmap_page_protect(pg, VM_PROT_NONE);
1498 #endif
1499 	uvm_lock_pageq();
1500 	uvm_pagedeactivate(pg);
1501 	uvm_unlock_pageq();
1502 
1503 	return FALSE;
1504 }
1505