xref: /netbsd-src/sys/uvm/uvm_amap.c (revision aaf4ece63a859a04e37cf3a7229b5fab0157cc06)
1 /*	$NetBSD: uvm_amap.c,v 1.68 2005/12/24 20:45:10 perry Exp $	*/
2 
3 /*
4  *
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by Charles D. Cranor and
19  *      Washington University.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * uvm_amap.c: amap operations
37  */
38 
39 /*
40  * this file contains functions that perform operations on amaps.  see
41  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.68 2005/12/24 20:45:10 perry Exp $");
46 
47 #undef UVM_AMAP_INLINE		/* enable/disable amap inlines */
48 
49 #include "opt_uvmhist.h"
50 
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/proc.h>
54 #include <sys/malloc.h>
55 #include <sys/kernel.h>
56 #include <sys/pool.h>
57 
58 #define UVM_AMAP_C		/* ensure disabled inlines are in */
59 #include <uvm/uvm.h>
60 #include <uvm/uvm_swap.h>
61 
62 /*
63  * pool for allocation of vm_map structures.  note that the pool has
64  * its own simplelock for its protection.  also note that in order to
65  * avoid an endless loop, the amap pool's allocator cannot allocate
66  * memory from an amap (it currently goes through the kernel uobj, so
67  * we are ok).
68  */
69 POOL_INIT(uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0, "amappl",
70     &pool_allocator_nointr);
71 
72 MALLOC_DEFINE(M_UVMAMAP, "UVM amap", "UVM amap and related structures");
73 
74 static struct simplelock amap_list_lock = SIMPLELOCK_INITIALIZER;
75 static LIST_HEAD(, vm_amap) amap_list;
76 
77 /*
78  * local functions
79  */
80 
81 static inline void
82 amap_list_insert(struct vm_amap *amap)
83 {
84 
85 	simple_lock(&amap_list_lock);
86 	LIST_INSERT_HEAD(&amap_list, amap, am_list);
87 	simple_unlock(&amap_list_lock);
88 }
89 
90 static inline void
91 amap_list_remove(struct vm_amap *amap)
92 {
93 
94 	simple_lock(&amap_list_lock);
95 	LIST_REMOVE(amap, am_list);
96 	simple_unlock(&amap_list_lock);
97 }
98 
99 #ifdef UVM_AMAP_PPREF
100 /*
101  * what is ppref?   ppref is an _optional_ amap feature which is used
102  * to keep track of reference counts on a per-page basis.  it is enabled
103  * when UVM_AMAP_PPREF is defined.
104  *
105  * when enabled, an array of ints is allocated for the pprefs.  this
106  * array is allocated only when a partial reference is added to the
107  * map (either by unmapping part of the amap, or gaining a reference
108  * to only a part of an amap).  if the malloc of the array fails
109  * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
110  * that we tried to do ppref's but couldn't alloc the array so just
111  * give up (after all, this is an optional feature!).
112  *
113  * the array is divided into page sized "chunks."   for chunks of length 1,
114  * the chunk reference count plus one is stored in that chunk's slot.
115  * for chunks of length > 1 the first slot contains (the reference count
116  * plus one) * -1.    [the negative value indicates that the length is
117  * greater than one.]   the second slot of the chunk contains the length
118  * of the chunk.   here is an example:
119  *
120  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
121  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
122  *              <----------><-><----><-------><----><-><------->
123  * (x = don't care)
124  *
125  * this allows us to allow one int to contain the ref count for the whole
126  * chunk.    note that the "plus one" part is needed because a reference
127  * count of zero is neither positive or negative (need a way to tell
128  * if we've got one zero or a bunch of them).
129  *
130  * here are some in-line functions to help us.
131  */
132 
133 /*
134  * pp_getreflen: get the reference and length for a specific offset
135  *
136  * => ppref's amap must be locked
137  */
138 static inline void
139 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
140 {
141 
142 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
143 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
144 		*lenp = 1;
145 	} else {
146 		*refp = (ppref[offset] * -1) - 1;
147 		*lenp = ppref[offset+1];
148 	}
149 }
150 
151 /*
152  * pp_setreflen: set the reference and length for a specific offset
153  *
154  * => ppref's amap must be locked
155  */
156 static inline void
157 pp_setreflen(int *ppref, int offset, int ref, int len)
158 {
159 	if (len == 0)
160 		return;
161 	if (len == 1) {
162 		ppref[offset] = ref + 1;
163 	} else {
164 		ppref[offset] = (ref + 1) * -1;
165 		ppref[offset+1] = len;
166 	}
167 }
168 #endif /* UVM_AMAP_PPREF */
169 
170 /*
171  * amap_alloc1: internal function that allocates an amap, but does not
172  *	init the overlay.
173  *
174  * => lock on returned amap is init'd
175  */
176 static inline struct vm_amap *
177 amap_alloc1(int slots, int padslots, int waitf)
178 {
179 	struct vm_amap *amap;
180 	int totalslots;
181 
182 	amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK : 0);
183 	if (amap == NULL)
184 		return(NULL);
185 
186 	totalslots = malloc_roundup((slots + padslots) * sizeof(int)) /
187 	    sizeof(int);
188 	simple_lock_init(&amap->am_l);
189 	amap->am_ref = 1;
190 	amap->am_flags = 0;
191 #ifdef UVM_AMAP_PPREF
192 	amap->am_ppref = NULL;
193 #endif
194 	amap->am_maxslot = totalslots;
195 	amap->am_nslot = slots;
196 	amap->am_nused = 0;
197 
198 	amap->am_slots = malloc(totalslots * sizeof(int), M_UVMAMAP,
199 	    waitf);
200 	if (amap->am_slots == NULL)
201 		goto fail1;
202 
203 	amap->am_bckptr = malloc(totalslots * sizeof(int), M_UVMAMAP, waitf);
204 	if (amap->am_bckptr == NULL)
205 		goto fail2;
206 
207 	amap->am_anon = malloc(totalslots * sizeof(struct vm_anon *),
208 	    M_UVMAMAP, waitf);
209 	if (amap->am_anon == NULL)
210 		goto fail3;
211 
212 	return(amap);
213 
214 fail3:
215 	free(amap->am_bckptr, M_UVMAMAP);
216 fail2:
217 	free(amap->am_slots, M_UVMAMAP);
218 fail1:
219 	pool_put(&uvm_amap_pool, amap);
220 
221 	/*
222 	 * XXX hack to tell the pagedaemon how many pages we need,
223 	 * since we can need more than it would normally free.
224 	 */
225 	if (waitf == M_NOWAIT) {
226 		extern int uvm_extrapages;
227 		uvm_extrapages += ((sizeof(int) * 2 +
228 				    sizeof(struct vm_anon *)) *
229 				   totalslots) >> PAGE_SHIFT;
230 	}
231 	return (NULL);
232 }
233 
234 /*
235  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
236  *
237  * => caller should ensure sz is a multiple of PAGE_SIZE
238  * => reference count to new amap is set to one
239  * => new amap is returned unlocked
240  */
241 
242 struct vm_amap *
243 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
244 {
245 	struct vm_amap *amap;
246 	int slots, padslots;
247 	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
248 
249 	AMAP_B2SLOT(slots, sz);
250 	AMAP_B2SLOT(padslots, padsz);
251 
252 	amap = amap_alloc1(slots, padslots, waitf);
253 	if (amap)
254 		memset(amap->am_anon, 0,
255 		    amap->am_maxslot * sizeof(struct vm_anon *));
256 
257 	amap_list_insert(amap);
258 
259 	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
260 	return(amap);
261 }
262 
263 
264 /*
265  * amap_free: free an amap
266  *
267  * => the amap must be unlocked
268  * => the amap should have a zero reference count and be empty
269  */
270 void
271 amap_free(struct vm_amap *amap)
272 {
273 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
274 
275 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
276 	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
277 	LOCK_ASSERT(!simple_lock_held(&amap->am_l));
278 	free(amap->am_slots, M_UVMAMAP);
279 	free(amap->am_bckptr, M_UVMAMAP);
280 	free(amap->am_anon, M_UVMAMAP);
281 #ifdef UVM_AMAP_PPREF
282 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
283 		free(amap->am_ppref, M_UVMAMAP);
284 #endif
285 	pool_put(&uvm_amap_pool, amap);
286 	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
287 }
288 
289 /*
290  * amap_extend: extend the size of an amap (if needed)
291  *
292  * => called from uvm_map when we want to extend an amap to cover
293  *    a new mapping (rather than allocate a new one)
294  * => amap should be unlocked (we will lock it)
295  * => to safely extend an amap it should have a reference count of
296  *    one (thus it can't be shared)
297  */
298 int
299 amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags)
300 {
301 	struct vm_amap *amap = entry->aref.ar_amap;
302 	int slotoff = entry->aref.ar_pageoff;
303 	int slotmapped, slotadd, slotneed, slotadded, slotalloc;
304 	int slotadj, slotspace;
305 #ifdef UVM_AMAP_PPREF
306 	int *newppref, *oldppref;
307 #endif
308 	int i, *newsl, *newbck, *oldsl, *oldbck;
309 	struct vm_anon **newover, **oldover;
310 	int mflag = (flags & AMAP_EXTEND_NOWAIT) ? M_NOWAIT :
311 		        (M_WAITOK | M_CANFAIL);
312 
313 	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
314 
315 	UVMHIST_LOG(maphist, "  (entry=0x%x, addsize=0x%x, flags=0x%x)",
316 	    entry, addsize, flags, 0);
317 
318 	/*
319 	 * first, determine how many slots we need in the amap.  don't
320 	 * forget that ar_pageoff could be non-zero: this means that
321 	 * there are some unused slots before us in the amap.
322 	 */
323 
324 	amap_lock(amap);
325 	KASSERT(amap_refs(amap) == 1); /* amap can't be shared */
326 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
327 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
328 	if (flags & AMAP_EXTEND_FORWARDS) {
329 		slotneed = slotoff + slotmapped + slotadd;
330 		slotadj = 0;
331 		slotspace = 0;
332 	}
333 	else {
334 		slotneed = slotadd + slotmapped;
335 		slotadj = slotadd - slotoff;
336 		slotspace = amap->am_maxslot - slotmapped;
337 	}
338 
339 	/*
340 	 * case 1: we already have enough slots in the map and thus
341 	 * only need to bump the reference counts on the slots we are
342 	 * adding.
343 	 */
344 
345 	if (flags & AMAP_EXTEND_FORWARDS) {
346 		if (amap->am_nslot >= slotneed) {
347 #ifdef UVM_AMAP_PPREF
348 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
349 				amap_pp_adjref(amap, slotoff + slotmapped,
350 				    slotadd, 1);
351 			}
352 #endif
353 			amap_unlock(amap);
354 			UVMHIST_LOG(maphist,
355 			    "<- done (case 1f), amap = 0x%x, sltneed=%d",
356 			    amap, slotneed, 0, 0);
357 			return 0;
358 		}
359 	} else {
360 		if (slotadj <= 0) {
361 			slotoff -= slotadd;
362 			entry->aref.ar_pageoff = slotoff;
363 #ifdef UVM_AMAP_PPREF
364 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
365 				amap_pp_adjref(amap, slotoff, slotadd, 1);
366 			}
367 #endif
368 			amap_unlock(amap);
369 			UVMHIST_LOG(maphist,
370 			    "<- done (case 1b), amap = 0x%x, sltneed=%d",
371 			    amap, slotneed, 0, 0);
372 			return 0;
373 		}
374 	}
375 
376 	/*
377 	 * case 2: we pre-allocated slots for use and we just need to
378 	 * bump nslot up to take account for these slots.
379 	 */
380 
381 	if (amap->am_maxslot >= slotneed) {
382 		if (flags & AMAP_EXTEND_FORWARDS) {
383 #ifdef UVM_AMAP_PPREF
384 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
385 				if ((slotoff + slotmapped) < amap->am_nslot)
386 					amap_pp_adjref(amap,
387 					    slotoff + slotmapped,
388 					    (amap->am_nslot -
389 					    (slotoff + slotmapped)), 1);
390 				pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
391 				    slotneed - amap->am_nslot);
392 			}
393 #endif
394 			amap->am_nslot = slotneed;
395 			amap_unlock(amap);
396 
397 			/*
398 			 * no need to zero am_anon since that was done at
399 			 * alloc time and we never shrink an allocation.
400 			 */
401 
402 			UVMHIST_LOG(maphist,"<- done (case 2f), amap = 0x%x, "
403 			    "slotneed=%d", amap, slotneed, 0, 0);
404 			return 0;
405 		} else {
406 #ifdef UVM_AMAP_PPREF
407 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
408 				/*
409 				 * Slide up the ref counts on the pages that
410 				 * are actually in use.
411 				 */
412 				memmove(amap->am_ppref + slotspace,
413 				    amap->am_ppref + slotoff,
414 				    slotmapped * sizeof(int));
415 				/*
416 				 * Mark the (adjusted) gap at the front as
417 				 * referenced/not referenced.
418 				 */
419 				pp_setreflen(amap->am_ppref,
420 				    0, 0, slotspace - slotadd);
421 				pp_setreflen(amap->am_ppref,
422 				    slotspace - slotadd, 1, slotadd);
423 			}
424 #endif
425 
426 			/*
427 			 * Slide the anon pointers up and clear out
428 			 * the space we just made.
429 			 */
430 			memmove(amap->am_anon + slotspace,
431 			    amap->am_anon + slotoff,
432 			    slotmapped * sizeof(struct vm_anon*));
433 			memset(amap->am_anon + slotoff, 0,
434 			    (slotspace - slotoff) * sizeof(struct vm_anon *));
435 
436 			/*
437 			 * Slide the backpointers up, but don't bother
438 			 * wiping out the old slots.
439 			 */
440 			memmove(amap->am_bckptr + slotspace,
441 			    amap->am_bckptr + slotoff,
442 			    slotmapped * sizeof(int));
443 
444 			/*
445 			 * Adjust all the useful active slot numbers.
446 			 */
447 			for (i = 0; i < amap->am_nused; i++)
448 				amap->am_slots[i] += (slotspace - slotoff);
449 
450 			/*
451 			 * We just filled all the empty space in the
452 			 * front of the amap by activating a few new
453 			 * slots.
454 			 */
455 			amap->am_nslot = amap->am_maxslot;
456 			entry->aref.ar_pageoff = slotspace - slotadd;
457 			amap_unlock(amap);
458 
459 			UVMHIST_LOG(maphist,"<- done (case 2b), amap = 0x%x, "
460 			    "slotneed=%d", amap, slotneed, 0, 0);
461 			return 0;
462 		}
463 	}
464 
465 	/*
466 	 * case 3: we need to malloc a new amap and copy all the amap
467 	 * data over from old amap to the new one.
468 	 *
469 	 * note that the use of a kernel realloc() probably would not
470 	 * help here, since we wish to abort cleanly if one of the
471 	 * three (or four) mallocs fails.
472 	 */
473 
474 	amap_unlock(amap);	/* unlock in case we sleep in malloc */
475 
476 	if (slotneed >= UVM_AMAP_LARGE) {
477 		return E2BIG;
478 	}
479 
480 	slotalloc = malloc_roundup(slotneed * sizeof(int)) / sizeof(int);
481 #ifdef UVM_AMAP_PPREF
482 	newppref = NULL;
483 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
484 		newppref = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag);
485 #endif
486 	newsl = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag);
487 	newbck = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag);
488 	newover = malloc(slotalloc * sizeof(struct vm_anon *), M_UVMAMAP,
489 		    mflag);
490 	if (newsl == NULL || newbck == NULL || newover == NULL) {
491 #ifdef UVM_AMAP_PPREF
492 		if (newppref != NULL) {
493 			free(newppref, M_UVMAMAP);
494 		}
495 #endif
496 		if (newsl != NULL) {
497 			free(newsl, M_UVMAMAP);
498 		}
499 		if (newbck != NULL) {
500 			free(newbck, M_UVMAMAP);
501 		}
502 		if (newover != NULL) {
503 			free(newover, M_UVMAMAP);
504 		}
505 		return ENOMEM;
506 	}
507 	amap_lock(amap);
508 	KASSERT(amap->am_maxslot < slotneed);
509 
510 	/*
511 	 * now copy everything over to new malloc'd areas...
512 	 */
513 
514 	slotadded = slotalloc - amap->am_nslot;
515 	if (!(flags & AMAP_EXTEND_FORWARDS))
516 		slotspace = slotalloc - slotmapped;
517 
518 	/* do am_slots */
519 	oldsl = amap->am_slots;
520 	if (flags & AMAP_EXTEND_FORWARDS)
521 		memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
522 	else
523 		for (i = 0; i < amap->am_nused; i++)
524 			newsl[i] = oldsl[i] + slotspace - slotoff;
525 	amap->am_slots = newsl;
526 
527 	/* do am_anon */
528 	oldover = amap->am_anon;
529 	if (flags & AMAP_EXTEND_FORWARDS) {
530 		memcpy(newover, oldover,
531 		    sizeof(struct vm_anon *) * amap->am_nslot);
532 		memset(newover + amap->am_nslot, 0,
533 		    sizeof(struct vm_anon *) * slotadded);
534 	} else {
535 		memcpy(newover + slotspace, oldover + slotoff,
536 		    sizeof(struct vm_anon *) * slotmapped);
537 		memset(newover, 0,
538 		    sizeof(struct vm_anon *) * slotspace);
539 	}
540 	amap->am_anon = newover;
541 
542 	/* do am_bckptr */
543 	oldbck = amap->am_bckptr;
544 	if (flags & AMAP_EXTEND_FORWARDS)
545 		memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
546 	else
547 		memcpy(newbck + slotspace, oldbck + slotoff,
548 		    sizeof(int) * slotmapped);
549 	amap->am_bckptr = newbck;
550 
551 #ifdef UVM_AMAP_PPREF
552 	/* do ppref */
553 	oldppref = amap->am_ppref;
554 	if (newppref) {
555 		if (flags & AMAP_EXTEND_FORWARDS) {
556 			memcpy(newppref, oldppref,
557 			    sizeof(int) * amap->am_nslot);
558 			memset(newppref + amap->am_nslot, 0,
559 			    sizeof(int) * slotadded);
560 		} else {
561 			memcpy(newppref + slotspace, oldppref + slotoff,
562 			    sizeof(int) * slotmapped);
563 		}
564 		amap->am_ppref = newppref;
565 		if ((flags & AMAP_EXTEND_FORWARDS) &&
566 		    (slotoff + slotmapped) < amap->am_nslot)
567 			amap_pp_adjref(amap, slotoff + slotmapped,
568 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
569 		if (flags & AMAP_EXTEND_FORWARDS)
570 			pp_setreflen(newppref, amap->am_nslot, 1,
571 			    slotneed - amap->am_nslot);
572 		else {
573 			pp_setreflen(newppref, 0, 0,
574 			    slotalloc - slotneed);
575 			pp_setreflen(newppref, slotalloc - slotneed, 1,
576 			    slotneed - slotmapped);
577 		}
578 	} else {
579 		if (amap->am_ppref)
580 			amap->am_ppref = PPREF_NONE;
581 	}
582 #endif
583 
584 	/* update master values */
585 	if (flags & AMAP_EXTEND_FORWARDS)
586 		amap->am_nslot = slotneed;
587 	else {
588 		entry->aref.ar_pageoff = slotspace - slotadd;
589 		amap->am_nslot = slotalloc;
590 	}
591 	amap->am_maxslot = slotalloc;
592 
593 	amap_unlock(amap);
594 	free(oldsl, M_UVMAMAP);
595 	free(oldbck, M_UVMAMAP);
596 	free(oldover, M_UVMAMAP);
597 #ifdef UVM_AMAP_PPREF
598 	if (oldppref && oldppref != PPREF_NONE)
599 		free(oldppref, M_UVMAMAP);
600 #endif
601 	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
602 	    amap, slotneed, 0, 0);
603 	return 0;
604 }
605 
606 /*
607  * amap_share_protect: change protection of anons in a shared amap
608  *
609  * for shared amaps, given the current data structure layout, it is
610  * not possible for us to directly locate all maps referencing the
611  * shared anon (to change the protection).  in order to protect data
612  * in shared maps we use pmap_page_protect().  [this is useful for IPC
613  * mechanisms like map entry passing that may want to write-protect
614  * all mappings of a shared amap.]  we traverse am_anon or am_slots
615  * depending on the current state of the amap.
616  *
617  * => entry's map and amap must be locked by the caller
618  */
619 void
620 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
621 {
622 	struct vm_amap *amap = entry->aref.ar_amap;
623 	int slots, lcv, slot, stop;
624 
625 	LOCK_ASSERT(simple_lock_held(&amap->am_l));
626 
627 	AMAP_B2SLOT(slots, (entry->end - entry->start));
628 	stop = entry->aref.ar_pageoff + slots;
629 
630 	if (slots < amap->am_nused) {
631 		/* cheaper to traverse am_anon */
632 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
633 			if (amap->am_anon[lcv] == NULL)
634 				continue;
635 			if (amap->am_anon[lcv]->an_page != NULL)
636 				pmap_page_protect(amap->am_anon[lcv]->an_page,
637 						  prot);
638 		}
639 		return;
640 	}
641 
642 	/* cheaper to traverse am_slots */
643 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
644 		slot = amap->am_slots[lcv];
645 		if (slot < entry->aref.ar_pageoff || slot >= stop)
646 			continue;
647 		if (amap->am_anon[slot]->an_page != NULL)
648 			pmap_page_protect(amap->am_anon[slot]->an_page, prot);
649 	}
650 }
651 
652 /*
653  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
654  *
655  * => called from amap_unref when the final reference to an amap is
656  *	discarded (i.e. when reference count drops to 0)
657  * => the amap should be locked (by the caller)
658  */
659 
660 void
661 amap_wipeout(struct vm_amap *amap)
662 {
663 	int lcv, slot;
664 	struct vm_anon *anon;
665 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
666 	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
667 
668 	KASSERT(amap->am_ref == 0);
669 
670 	if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
671 		/*
672 		 * amap_swap_off will call us again.
673 		 */
674 		amap_unlock(amap);
675 		return;
676 	}
677 	amap_list_remove(amap);
678 	amap_unlock(amap);
679 
680 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
681 		int refs;
682 
683 		slot = amap->am_slots[lcv];
684 		anon = amap->am_anon[slot];
685 
686 		if (anon == NULL || anon->an_ref == 0)
687 			panic("amap_wipeout: corrupt amap");
688 
689 		simple_lock(&anon->an_lock);
690 		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
691 		    anon->an_ref, 0, 0);
692 		refs = --anon->an_ref;
693 		simple_unlock(&anon->an_lock);
694 		if (refs == 0) {
695 
696 			/*
697 			 * we had the last reference to a vm_anon. free it.
698 			 */
699 
700 			uvm_anfree(anon);
701 		}
702 
703 		if (curlwp->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
704 			preempt(1);
705 	}
706 
707 	/*
708 	 * now we free the map
709 	 */
710 
711 	amap->am_nused = 0;
712 	amap_free(amap);	/* will unlock and free amap */
713 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
714 }
715 
716 /*
717  * amap_copy: ensure that a map entry's "needs_copy" flag is false
718  *	by copying the amap if necessary.
719  *
720  * => an entry with a null amap pointer will get a new (blank) one.
721  * => the map that the map entry belongs to must be locked by caller.
722  * => the amap currently attached to "entry" (if any) must be unlocked.
723  * => if canchunk is true, then we may clip the entry into a chunk
724  * => "startva" and "endva" are used only if canchunk is true.  they are
725  *     used to limit chunking (e.g. if you have a large space that you
726  *     know you are going to need to allocate amaps for, there is no point
727  *     in allowing that to be chunked)
728  */
729 
730 void
731 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf,
732     boolean_t canchunk, vaddr_t startva, vaddr_t endva)
733 {
734 	struct vm_amap *amap, *srcamap;
735 	int slots, lcv;
736 	vaddr_t chunksize;
737 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
738 	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, waitf=%d)",
739 		    map, entry, waitf, 0);
740 
741 	/*
742 	 * is there a map to copy?   if not, create one from scratch.
743 	 */
744 
745 	if (entry->aref.ar_amap == NULL) {
746 
747 		/*
748 		 * check to see if we have a large amap that we can
749 		 * chunk.  we align startva/endva to chunk-sized
750 		 * boundaries and then clip to them.
751 		 */
752 
753 		if (canchunk && atop(entry->end - entry->start) >=
754 		    UVM_AMAP_LARGE) {
755 			/* convert slots to bytes */
756 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
757 			startva = (startva / chunksize) * chunksize;
758 			endva = roundup(endva, chunksize);
759 			UVMHIST_LOG(maphist, "  chunk amap ==> clip 0x%x->0x%x"
760 			    "to 0x%x->0x%x", entry->start, entry->end, startva,
761 			    endva);
762 			UVM_MAP_CLIP_START(map, entry, startva, NULL);
763 			/* watch out for endva wrap-around! */
764 			if (endva >= startva)
765 				UVM_MAP_CLIP_END(map, entry, endva, NULL);
766 		}
767 
768 		if (uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) {
769 			return;
770 		}
771 
772 		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
773 		entry->start, entry->end, 0, 0);
774 		entry->aref.ar_pageoff = 0;
775 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
776 		    waitf);
777 		if (entry->aref.ar_amap != NULL)
778 			entry->etype &= ~UVM_ET_NEEDSCOPY;
779 		return;
780 	}
781 
782 	/*
783 	 * first check and see if we are the only map entry
784 	 * referencing the amap we currently have.  if so, then we can
785 	 * just take it over rather than copying it.  note that we are
786 	 * reading am_ref with the amap unlocked... the value can only
787 	 * be one if we have the only reference to the amap (via our
788 	 * locked map).  if we are greater than one we fall through to
789 	 * the next case (where we double check the value).
790 	 */
791 
792 	if (entry->aref.ar_amap->am_ref == 1) {
793 		entry->etype &= ~UVM_ET_NEEDSCOPY;
794 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
795 		    0, 0, 0, 0);
796 		return;
797 	}
798 
799 	/*
800 	 * looks like we need to copy the map.
801 	 */
802 
803 	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it",
804 	    entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
805 	AMAP_B2SLOT(slots, entry->end - entry->start);
806 	amap = amap_alloc1(slots, 0, waitf);
807 	if (amap == NULL) {
808 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
809 		return;
810 	}
811 	srcamap = entry->aref.ar_amap;
812 	amap_lock(srcamap);
813 
814 	/*
815 	 * need to double check reference count now that we've got the
816 	 * src amap locked down.  the reference count could have
817 	 * changed while we were in malloc.  if the reference count
818 	 * dropped down to one we take over the old map rather than
819 	 * copying the amap.
820 	 */
821 
822 	if (srcamap->am_ref == 1) {		/* take it over? */
823 		entry->etype &= ~UVM_ET_NEEDSCOPY;
824 		amap->am_ref--;		/* drop final reference to map */
825 		amap_free(amap);	/* dispose of new (unused) amap */
826 		amap_unlock(srcamap);
827 		return;
828 	}
829 
830 	/*
831 	 * we must copy it now.
832 	 */
833 
834 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
835 	for (lcv = 0 ; lcv < slots; lcv++) {
836 		amap->am_anon[lcv] =
837 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
838 		if (amap->am_anon[lcv] == NULL)
839 			continue;
840 		simple_lock(&amap->am_anon[lcv]->an_lock);
841 		amap->am_anon[lcv]->an_ref++;
842 		simple_unlock(&amap->am_anon[lcv]->an_lock);
843 		amap->am_bckptr[lcv] = amap->am_nused;
844 		amap->am_slots[amap->am_nused] = lcv;
845 		amap->am_nused++;
846 	}
847 	memset(&amap->am_anon[lcv], 0,
848 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
849 
850 	/*
851 	 * drop our reference to the old amap (srcamap) and unlock.
852 	 * we know that the reference count on srcamap is greater than
853 	 * one (we checked above), so there is no way we could drop
854 	 * the count to zero.  [and no need to worry about freeing it]
855 	 */
856 
857 	srcamap->am_ref--;
858 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
859 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
860 #ifdef UVM_AMAP_PPREF
861 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
862 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
863 		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
864 	}
865 #endif
866 
867 	amap_unlock(srcamap);
868 
869 	amap_list_insert(amap);
870 
871 	/*
872 	 * install new amap.
873 	 */
874 
875 	entry->aref.ar_pageoff = 0;
876 	entry->aref.ar_amap = amap;
877 	entry->etype &= ~UVM_ET_NEEDSCOPY;
878 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
879 }
880 
881 /*
882  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
883  *
884  *	called during fork(2) when the parent process has a wired map
885  *	entry.   in that case we want to avoid write-protecting pages
886  *	in the parent's map (e.g. like what you'd do for a COW page)
887  *	so we resolve the COW here.
888  *
889  * => assume parent's entry was wired, thus all pages are resident.
890  * => assume pages that are loaned out (loan_count) are already mapped
891  *	read-only in all maps, and thus no need for us to worry about them
892  * => assume both parent and child vm_map's are locked
893  * => caller passes child's map/entry in to us
894  * => if we run out of memory we will unlock the amap and sleep _with_ the
895  *	parent and child vm_map's locked(!).    we have to do this since
896  *	we are in the middle of a fork(2) and we can't let the parent
897  *	map change until we are done copying all the map entrys.
898  * => XXXCDC: out of memory should cause fork to fail, but there is
899  *	currently no easy way to do this (needs fix)
900  * => page queues must be unlocked (we may lock them)
901  */
902 
903 void
904 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
905 {
906 	struct vm_amap *amap = entry->aref.ar_amap;
907 	int lcv, slot;
908 	struct vm_anon *anon, *nanon;
909 	struct vm_page *pg, *npg;
910 
911 	/*
912 	 * note that if we unlock the amap then we must ReStart the "lcv" for
913 	 * loop because some other process could reorder the anon's in the
914 	 * am_anon[] array on us while the lock is dropped.
915 	 */
916 
917 ReStart:
918 	amap_lock(amap);
919 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
920 
921 		/*
922 		 * get the page
923 		 */
924 
925 		slot = amap->am_slots[lcv];
926 		anon = amap->am_anon[slot];
927 		simple_lock(&anon->an_lock);
928 
929 		/*
930 		 * If the anon has only one ref, we must have already copied it.
931 		 * This can happen if we needed to sleep waiting for memory
932 		 * in a previous run through this loop.  The new page might
933 		 * even have been paged out, since the new page is not wired.
934 		 */
935 
936 		if (anon->an_ref == 1) {
937 			KASSERT(anon->an_page != NULL || anon->an_swslot != 0);
938 			simple_unlock(&anon->an_lock);
939 			continue;
940 		}
941 
942 		/*
943 		 * The old page must be resident since the parent is wired.
944 		 */
945 
946 		pg = anon->an_page;
947 		KASSERT(pg != NULL);
948 		KASSERT(pg->wire_count > 0);
949 
950 		/*
951 		 * If the page is loaned then it must already be mapped
952 		 * read-only and we don't need to copy it.
953 		 */
954 
955 		if (pg->loan_count != 0) {
956 			simple_unlock(&anon->an_lock);
957 			continue;
958 		}
959 		KASSERT(pg->uanon == anon && pg->uobject == NULL);
960 
961 		/*
962 		 * if the page is busy then we have to unlock, wait for
963 		 * it and then restart.
964 		 */
965 
966 		if (pg->flags & PG_BUSY) {
967 			pg->flags |= PG_WANTED;
968 			amap_unlock(amap);
969 			UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE,
970 			    "cownow", 0);
971 			goto ReStart;
972 		}
973 
974 		/*
975 		 * ok, time to do a copy-on-write to a new anon
976 		 */
977 
978 		nanon = uvm_analloc();
979 		if (nanon) {
980 			npg = uvm_pagealloc(NULL, 0, nanon, 0);
981 		} else
982 			npg = NULL;	/* XXX: quiet gcc warning */
983 		if (nanon == NULL || npg == NULL) {
984 
985 			/*
986 			 * XXXCDC: we should cause fork to fail, but we can't.
987 			 */
988 
989 			if (nanon) {
990 				nanon->an_ref--;
991 				simple_unlock(&nanon->an_lock);
992 				uvm_anfree(nanon);
993 			}
994 			simple_unlock(&anon->an_lock);
995 			amap_unlock(amap);
996 			uvm_wait("cownowpage");
997 			goto ReStart;
998 		}
999 
1000 		/*
1001 		 * got it... now we can copy the data and replace anon
1002 		 * with our new one...
1003 		 */
1004 
1005 		uvm_pagecopy(pg, npg);		/* old -> new */
1006 		anon->an_ref--;			/* can't drop to zero */
1007 		amap->am_anon[slot] = nanon;	/* replace */
1008 
1009 		/*
1010 		 * drop PG_BUSY on new page ... since we have had its owner
1011 		 * locked the whole time it can't be PG_RELEASED or PG_WANTED.
1012 		 */
1013 
1014 		uvm_lock_pageq();
1015 		uvm_pageactivate(npg);
1016 		uvm_unlock_pageq();
1017 		npg->flags &= ~(PG_BUSY|PG_FAKE);
1018 		UVM_PAGE_OWN(npg, NULL);
1019 		simple_unlock(&nanon->an_lock);
1020 		simple_unlock(&anon->an_lock);
1021 	}
1022 	amap_unlock(amap);
1023 }
1024 
1025 /*
1026  * amap_splitref: split a single reference into two separate references
1027  *
1028  * => called from uvm_map's clip routines
1029  * => origref's map should be locked
1030  * => origref->ar_amap should be unlocked (we will lock)
1031  */
1032 void
1033 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
1034 {
1035 	int leftslots;
1036 
1037 	AMAP_B2SLOT(leftslots, offset);
1038 	if (leftslots == 0)
1039 		panic("amap_splitref: split at zero offset");
1040 
1041 	amap_lock(origref->ar_amap);
1042 
1043 	/*
1044 	 * now: amap is locked and we have a valid am_mapped array.
1045 	 */
1046 
1047 	if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
1048 		panic("amap_splitref: map size check failed");
1049 
1050 #ifdef UVM_AMAP_PPREF
1051         /*
1052 	 * establish ppref before we add a duplicate reference to the amap
1053 	 */
1054 	if (origref->ar_amap->am_ppref == NULL)
1055 		amap_pp_establish(origref->ar_amap, origref->ar_pageoff);
1056 #endif
1057 
1058 	splitref->ar_amap = origref->ar_amap;
1059 	splitref->ar_amap->am_ref++;		/* not a share reference */
1060 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
1061 
1062 	amap_unlock(origref->ar_amap);
1063 }
1064 
1065 #ifdef UVM_AMAP_PPREF
1066 
1067 /*
1068  * amap_pp_establish: add a ppref array to an amap, if possible
1069  *
1070  * => amap locked by caller
1071  */
1072 void
1073 amap_pp_establish(struct vm_amap *amap, vaddr_t offset)
1074 {
1075 	amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot,
1076 	    M_UVMAMAP, M_NOWAIT);
1077 
1078 	/*
1079 	 * if we fail then we just won't use ppref for this amap
1080 	 */
1081 
1082 	if (amap->am_ppref == NULL) {
1083 		amap->am_ppref = PPREF_NONE;	/* not using it */
1084 		return;
1085 	}
1086 	memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot);
1087 	pp_setreflen(amap->am_ppref, 0, 0, offset);
1088 	pp_setreflen(amap->am_ppref, offset, amap->am_ref,
1089 	    amap->am_nslot - offset);
1090 	return;
1091 }
1092 
1093 /*
1094  * amap_pp_adjref: adjust reference count to a part of an amap using the
1095  * per-page reference count array.
1096  *
1097  * => map and amap locked by caller
1098  * => caller must check that ppref != PPREF_NONE before calling
1099  */
1100 void
1101 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
1102 {
1103 	int stopslot, *ppref, lcv, prevlcv;
1104 	int ref, len, prevref, prevlen;
1105 
1106 	stopslot = curslot + slotlen;
1107 	ppref = amap->am_ppref;
1108 	prevlcv = 0;
1109 
1110 	/*
1111 	 * first advance to the correct place in the ppref array,
1112 	 * fragment if needed.
1113 	 */
1114 
1115 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
1116 		pp_getreflen(ppref, lcv, &ref, &len);
1117 		if (lcv + len > curslot) {     /* goes past start? */
1118 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
1119 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
1120 			len = curslot - lcv;   /* new length of entry @ lcv */
1121 		}
1122 		prevlcv = lcv;
1123 	}
1124 	if (lcv != 0)
1125 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
1126 	else {
1127 		/* Ensure that the "prevref == ref" test below always
1128 		 * fails, since we're starting from the beginning of
1129 		 * the ppref array; that is, there is no previous
1130 		 * chunk.
1131 		 */
1132 		prevref = -1;
1133 		prevlen = 0;
1134 	}
1135 
1136 	/*
1137 	 * now adjust reference counts in range.  merge the first
1138 	 * changed entry with the last unchanged entry if possible.
1139 	 */
1140 
1141 	if (lcv != curslot)
1142 		panic("amap_pp_adjref: overshot target");
1143 
1144 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
1145 		pp_getreflen(ppref, lcv, &ref, &len);
1146 		if (lcv + len > stopslot) {     /* goes past end? */
1147 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
1148 			pp_setreflen(ppref, stopslot, ref,
1149 			    len - (stopslot - lcv));
1150 			len = stopslot - lcv;
1151 		}
1152 		ref += adjval;
1153 		if (ref < 0)
1154 			panic("amap_pp_adjref: negative reference count");
1155 		if (lcv == prevlcv + prevlen && ref == prevref) {
1156 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
1157 		} else {
1158 			pp_setreflen(ppref, lcv, ref, len);
1159 		}
1160 		if (ref == 0)
1161 			amap_wiperange(amap, lcv, len);
1162 	}
1163 
1164 }
1165 
1166 /*
1167  * amap_wiperange: wipe out a range of an amap
1168  * [different from amap_wipeout because the amap is kept intact]
1169  *
1170  * => both map and amap must be locked by caller.
1171  */
1172 void
1173 amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
1174 {
1175 	int byanon, lcv, stop, curslot, ptr, slotend;
1176 	struct vm_anon *anon;
1177 
1178 	/*
1179 	 * we can either traverse the amap by am_anon or by am_slots depending
1180 	 * on which is cheaper.    decide now.
1181 	 */
1182 
1183 	if (slots < amap->am_nused) {
1184 		byanon = TRUE;
1185 		lcv = slotoff;
1186 		stop = slotoff + slots;
1187 		slotend = 0;
1188 	} else {
1189 		byanon = FALSE;
1190 		lcv = 0;
1191 		stop = amap->am_nused;
1192 		slotend = slotoff + slots;
1193 	}
1194 
1195 	while (lcv < stop) {
1196 		int refs;
1197 
1198 		if (byanon) {
1199 			curslot = lcv++;	/* lcv advances here */
1200 			if (amap->am_anon[curslot] == NULL)
1201 				continue;
1202 		} else {
1203 			curslot = amap->am_slots[lcv];
1204 			if (curslot < slotoff || curslot >= slotend) {
1205 				lcv++;		/* lcv advances here */
1206 				continue;
1207 			}
1208 			stop--;	/* drop stop, since anon will be removed */
1209 		}
1210 		anon = amap->am_anon[curslot];
1211 
1212 		/*
1213 		 * remove it from the amap
1214 		 */
1215 
1216 		amap->am_anon[curslot] = NULL;
1217 		ptr = amap->am_bckptr[curslot];
1218 		if (ptr != (amap->am_nused - 1)) {
1219 			amap->am_slots[ptr] =
1220 			    amap->am_slots[amap->am_nused - 1];
1221 			amap->am_bckptr[amap->am_slots[ptr]] =
1222 			    ptr;    /* back ptr. */
1223 		}
1224 		amap->am_nused--;
1225 
1226 		/*
1227 		 * drop anon reference count
1228 		 */
1229 
1230 		simple_lock(&anon->an_lock);
1231 		refs = --anon->an_ref;
1232 		simple_unlock(&anon->an_lock);
1233 		if (refs == 0) {
1234 
1235 			/*
1236 			 * we just eliminated the last reference to an anon.
1237 			 * free it.
1238 			 */
1239 
1240 			uvm_anfree(anon);
1241 		}
1242 	}
1243 }
1244 
1245 #endif
1246 
1247 #if defined(VMSWAP)
1248 
1249 /*
1250  * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
1251  *
1252  * => called with swap_syscall_lock held.
1253  * => note that we don't always traverse all anons.
1254  *    eg. amaps being wiped out, released anons.
1255  * => return TRUE if failed.
1256  */
1257 
1258 boolean_t
1259 amap_swap_off(int startslot, int endslot)
1260 {
1261 	struct vm_amap *am;
1262 	struct vm_amap *am_next;
1263 	struct vm_amap marker_prev;
1264 	struct vm_amap marker_next;
1265 	struct lwp *l = curlwp;
1266 	boolean_t rv = FALSE;
1267 
1268 #if defined(DIAGNOSTIC)
1269 	memset(&marker_prev, 0, sizeof(marker_prev));
1270 	memset(&marker_next, 0, sizeof(marker_next));
1271 #endif /* defined(DIAGNOSTIC) */
1272 
1273 	PHOLD(l);
1274 	simple_lock(&amap_list_lock);
1275 	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
1276 		int i;
1277 
1278 		LIST_INSERT_BEFORE(am, &marker_prev, am_list);
1279 		LIST_INSERT_AFTER(am, &marker_next, am_list);
1280 
1281 		if (!amap_lock_try(am)) {
1282 			simple_unlock(&amap_list_lock);
1283 			preempt(1);
1284 			simple_lock(&amap_list_lock);
1285 			am_next = LIST_NEXT(&marker_prev, am_list);
1286 			if (am_next == &marker_next) {
1287 				am_next = LIST_NEXT(am_next, am_list);
1288 			} else {
1289 				KASSERT(LIST_NEXT(am_next, am_list) ==
1290 				    &marker_next);
1291 			}
1292 			LIST_REMOVE(&marker_prev, am_list);
1293 			LIST_REMOVE(&marker_next, am_list);
1294 			continue;
1295 		}
1296 
1297 		simple_unlock(&amap_list_lock);
1298 
1299 		if (am->am_nused <= 0) {
1300 			amap_unlock(am);
1301 			goto next;
1302 		}
1303 
1304 		for (i = 0; i < am->am_nused; i++) {
1305 			int slot;
1306 			int swslot;
1307 			struct vm_anon *anon;
1308 
1309 			slot = am->am_slots[i];
1310 			anon = am->am_anon[slot];
1311 			simple_lock(&anon->an_lock);
1312 
1313 			swslot = anon->an_swslot;
1314 			if (swslot < startslot || endslot <= swslot) {
1315 				simple_unlock(&anon->an_lock);
1316 				continue;
1317 			}
1318 
1319 			am->am_flags |= AMAP_SWAPOFF;
1320 			amap_unlock(am);
1321 
1322 			rv = uvm_anon_pagein(anon);
1323 
1324 			amap_lock(am);
1325 			am->am_flags &= ~AMAP_SWAPOFF;
1326 			if (amap_refs(am) == 0) {
1327 				amap_wipeout(am);
1328 				am = NULL;
1329 				break;
1330 			}
1331 			if (rv) {
1332 				break;
1333 			}
1334 			i = 0;
1335 		}
1336 
1337 		if (am) {
1338 			amap_unlock(am);
1339 		}
1340 
1341 next:
1342 		simple_lock(&amap_list_lock);
1343 		KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
1344 		    LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
1345 		    &marker_next);
1346 		am_next = LIST_NEXT(&marker_next, am_list);
1347 		LIST_REMOVE(&marker_prev, am_list);
1348 		LIST_REMOVE(&marker_next, am_list);
1349 	}
1350 	simple_unlock(&amap_list_lock);
1351 	PRELE(l);
1352 
1353 	return rv;
1354 }
1355 
1356 #endif /* defined(VMSWAP) */
1357