xref: /netbsd-src/sys/uvm/uvm_amap.c (revision daf6c4152fcddc27c445489775ed1f66ab4ea9a9)
1 /*	$NetBSD: uvm_amap.c,v 1.89 2011/02/02 15:13:33 chuck Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * uvm_amap.c: amap operations
30  */
31 
32 /*
33  * this file contains functions that perform operations on amaps.  see
34  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.89 2011/02/02 15:13:33 chuck Exp $");
39 
40 #include "opt_uvmhist.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/proc.h>
45 #include <sys/kernel.h>
46 #include <sys/kmem.h>
47 #include <sys/pool.h>
48 #include <sys/atomic.h>
49 
50 #include <uvm/uvm.h>
51 #include <uvm/uvm_swap.h>
52 
53 /*
54  * cache for allocation of vm_map structures.  note that in order to
55  * avoid an endless loop, the amap cache's allocator cannot allocate
56  * memory from an amap (it currently goes through the kernel uobj, so
57  * we are ok).
58  */
59 static struct pool_cache uvm_amap_cache;
60 static kmutex_t amap_list_lock;
61 static LIST_HEAD(, vm_amap) amap_list;
62 
63 /*
64  * local functions
65  */
66 
67 static inline void
68 amap_list_insert(struct vm_amap *amap)
69 {
70 
71 	mutex_enter(&amap_list_lock);
72 	LIST_INSERT_HEAD(&amap_list, amap, am_list);
73 	mutex_exit(&amap_list_lock);
74 }
75 
76 static inline void
77 amap_list_remove(struct vm_amap *amap)
78 {
79 
80 	mutex_enter(&amap_list_lock);
81 	LIST_REMOVE(amap, am_list);
82 	mutex_exit(&amap_list_lock);
83 }
84 
85 static int
86 amap_roundup_slots(int slots)
87 {
88 
89 	return kmem_roundup_size(slots * sizeof(int)) / sizeof(int);
90 }
91 
92 #ifdef UVM_AMAP_PPREF
93 /*
94  * what is ppref?   ppref is an _optional_ amap feature which is used
95  * to keep track of reference counts on a per-page basis.  it is enabled
96  * when UVM_AMAP_PPREF is defined.
97  *
98  * when enabled, an array of ints is allocated for the pprefs.  this
99  * array is allocated only when a partial reference is added to the
100  * map (either by unmapping part of the amap, or gaining a reference
101  * to only a part of an amap).  if the malloc of the array fails
102  * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
103  * that we tried to do ppref's but couldn't alloc the array so just
104  * give up (after all, this is an optional feature!).
105  *
106  * the array is divided into page sized "chunks."   for chunks of length 1,
107  * the chunk reference count plus one is stored in that chunk's slot.
108  * for chunks of length > 1 the first slot contains (the reference count
109  * plus one) * -1.    [the negative value indicates that the length is
110  * greater than one.]   the second slot of the chunk contains the length
111  * of the chunk.   here is an example:
112  *
113  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
114  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
115  *              <----------><-><----><-------><----><-><------->
116  * (x = don't care)
117  *
118  * this allows us to allow one int to contain the ref count for the whole
119  * chunk.    note that the "plus one" part is needed because a reference
120  * count of zero is neither positive or negative (need a way to tell
121  * if we've got one zero or a bunch of them).
122  *
123  * here are some in-line functions to help us.
124  */
125 
126 /*
127  * pp_getreflen: get the reference and length for a specific offset
128  *
129  * => ppref's amap must be locked
130  */
131 static inline void
132 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
133 {
134 
135 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
136 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
137 		*lenp = 1;
138 	} else {
139 		*refp = (ppref[offset] * -1) - 1;
140 		*lenp = ppref[offset+1];
141 	}
142 }
143 
144 /*
145  * pp_setreflen: set the reference and length for a specific offset
146  *
147  * => ppref's amap must be locked
148  */
149 static inline void
150 pp_setreflen(int *ppref, int offset, int ref, int len)
151 {
152 	if (len == 0)
153 		return;
154 	if (len == 1) {
155 		ppref[offset] = ref + 1;
156 	} else {
157 		ppref[offset] = (ref + 1) * -1;
158 		ppref[offset+1] = len;
159 	}
160 }
161 #endif /* UVM_AMAP_PPREF */
162 
163 /*
164  * amap_alloc1: internal function that allocates an amap, but does not
165  *	init the overlay.
166  *
167  * => lock on returned amap is init'd
168  */
169 static inline struct vm_amap *
170 amap_alloc1(int slots, int padslots, int waitf)
171 {
172 	struct vm_amap *amap;
173 	int totalslots;
174 	km_flag_t kmflags;
175 
176 	amap = pool_cache_get(&uvm_amap_cache,
177 	    ((waitf & UVM_FLAG_NOWAIT) != 0) ? PR_NOWAIT : PR_WAITOK);
178 	if (amap == NULL)
179 		return(NULL);
180 
181 	kmflags = ((waitf & UVM_FLAG_NOWAIT) != 0) ? KM_NOSLEEP : KM_SLEEP;
182 	totalslots = amap_roundup_slots(slots + padslots);
183 	mutex_init(&amap->am_l, MUTEX_DEFAULT, IPL_NONE);
184 	amap->am_ref = 1;
185 	amap->am_flags = 0;
186 #ifdef UVM_AMAP_PPREF
187 	amap->am_ppref = NULL;
188 #endif
189 	amap->am_maxslot = totalslots;
190 	amap->am_nslot = slots;
191 	amap->am_nused = 0;
192 
193 	amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags);
194 	if (amap->am_slots == NULL)
195 		goto fail1;
196 
197 	amap->am_bckptr = kmem_alloc(totalslots * sizeof(int), kmflags);
198 	if (amap->am_bckptr == NULL)
199 		goto fail2;
200 
201 	amap->am_anon = kmem_alloc(totalslots * sizeof(struct vm_anon *),
202 	    kmflags);
203 	if (amap->am_anon == NULL)
204 		goto fail3;
205 
206 	return(amap);
207 
208 fail3:
209 	kmem_free(amap->am_bckptr, totalslots * sizeof(int));
210 fail2:
211 	kmem_free(amap->am_slots, totalslots * sizeof(int));
212 fail1:
213 	mutex_destroy(&amap->am_l);
214 	pool_cache_put(&uvm_amap_cache, amap);
215 
216 	/*
217 	 * XXX hack to tell the pagedaemon how many pages we need,
218 	 * since we can need more than it would normally free.
219 	 */
220 	if ((waitf & UVM_FLAG_NOWAIT) != 0) {
221 		extern u_int uvm_extrapages;
222 		atomic_add_int(&uvm_extrapages,
223 		    ((sizeof(int) * 2 + sizeof(struct vm_anon *)) *
224 		    totalslots) >> PAGE_SHIFT);
225 	}
226 	return (NULL);
227 }
228 
229 /*
230  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
231  *
232  * => caller should ensure sz is a multiple of PAGE_SIZE
233  * => reference count to new amap is set to one
234  * => new amap is returned unlocked
235  */
236 
237 struct vm_amap *
238 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
239 {
240 	struct vm_amap *amap;
241 	int slots, padslots;
242 	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
243 
244 	AMAP_B2SLOT(slots, sz);
245 	AMAP_B2SLOT(padslots, padsz);
246 
247 	amap = amap_alloc1(slots, padslots, waitf);
248 	if (amap) {
249 		memset(amap->am_anon, 0,
250 		    amap->am_maxslot * sizeof(struct vm_anon *));
251 		amap_list_insert(amap);
252 	}
253 
254 	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
255 	return(amap);
256 }
257 
258 /*
259  * uvm_amap_init: initialize the amap system.
260  */
261 void
262 uvm_amap_init(void)
263 {
264 
265 	mutex_init(&amap_list_lock, MUTEX_DEFAULT, IPL_NONE);
266 
267 	pool_cache_bootstrap(&uvm_amap_cache, sizeof(struct vm_amap), 0, 0, 0,
268 	    "amappl", NULL, IPL_NONE, NULL, NULL, NULL);
269 }
270 
271 /*
272  * amap_free: free an amap
273  *
274  * => the amap must be unlocked
275  * => the amap should have a zero reference count and be empty
276  */
277 void
278 amap_free(struct vm_amap *amap)
279 {
280 	int slots;
281 
282 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
283 
284 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
285 	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
286 	KASSERT(!mutex_owned(&amap->am_l));
287 	slots = amap->am_maxslot;
288 	kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots));
289 	kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr));
290 	kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon));
291 #ifdef UVM_AMAP_PPREF
292 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
293 		kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref));
294 #endif
295 	mutex_destroy(&amap->am_l);
296 	pool_cache_put(&uvm_amap_cache, amap);
297 	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
298 }
299 
300 /*
301  * amap_extend: extend the size of an amap (if needed)
302  *
303  * => called from uvm_map when we want to extend an amap to cover
304  *    a new mapping (rather than allocate a new one)
305  * => amap should be unlocked (we will lock it)
306  * => to safely extend an amap it should have a reference count of
307  *    one (thus it can't be shared)
308  */
309 int
310 amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags)
311 {
312 	struct vm_amap *amap = entry->aref.ar_amap;
313 	int slotoff = entry->aref.ar_pageoff;
314 	int slotmapped, slotadd, slotneed, slotadded, slotalloc;
315 	int slotadj, slotspace;
316 	int oldnslots;
317 #ifdef UVM_AMAP_PPREF
318 	int *newppref, *oldppref;
319 #endif
320 	int i, *newsl, *newbck, *oldsl, *oldbck;
321 	struct vm_anon **newover, **oldover;
322 	const km_flag_t kmflags =
323 	    (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
324 
325 	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
326 
327 	UVMHIST_LOG(maphist, "  (entry=0x%x, addsize=0x%x, flags=0x%x)",
328 	    entry, addsize, flags, 0);
329 
330 	/*
331 	 * first, determine how many slots we need in the amap.  don't
332 	 * forget that ar_pageoff could be non-zero: this means that
333 	 * there are some unused slots before us in the amap.
334 	 */
335 
336 	amap_lock(amap);
337 	KASSERT(amap_refs(amap) == 1); /* amap can't be shared */
338 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
339 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
340 	if (flags & AMAP_EXTEND_FORWARDS) {
341 		slotneed = slotoff + slotmapped + slotadd;
342 		slotadj = 0;
343 		slotspace = 0;
344 	}
345 	else {
346 		slotneed = slotadd + slotmapped;
347 		slotadj = slotadd - slotoff;
348 		slotspace = amap->am_maxslot - slotmapped;
349 	}
350 
351 	/*
352 	 * case 1: we already have enough slots in the map and thus
353 	 * only need to bump the reference counts on the slots we are
354 	 * adding.
355 	 */
356 
357 	if (flags & AMAP_EXTEND_FORWARDS) {
358 		if (amap->am_nslot >= slotneed) {
359 #ifdef UVM_AMAP_PPREF
360 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
361 				amap_pp_adjref(amap, slotoff + slotmapped,
362 				    slotadd, 1);
363 			}
364 #endif
365 			amap_unlock(amap);
366 			UVMHIST_LOG(maphist,
367 			    "<- done (case 1f), amap = 0x%x, sltneed=%d",
368 			    amap, slotneed, 0, 0);
369 			return 0;
370 		}
371 	} else {
372 		if (slotadj <= 0) {
373 			slotoff -= slotadd;
374 			entry->aref.ar_pageoff = slotoff;
375 #ifdef UVM_AMAP_PPREF
376 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
377 				amap_pp_adjref(amap, slotoff, slotadd, 1);
378 			}
379 #endif
380 			amap_unlock(amap);
381 			UVMHIST_LOG(maphist,
382 			    "<- done (case 1b), amap = 0x%x, sltneed=%d",
383 			    amap, slotneed, 0, 0);
384 			return 0;
385 		}
386 	}
387 
388 	/*
389 	 * case 2: we pre-allocated slots for use and we just need to
390 	 * bump nslot up to take account for these slots.
391 	 */
392 
393 	if (amap->am_maxslot >= slotneed) {
394 		if (flags & AMAP_EXTEND_FORWARDS) {
395 #ifdef UVM_AMAP_PPREF
396 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
397 				if ((slotoff + slotmapped) < amap->am_nslot)
398 					amap_pp_adjref(amap,
399 					    slotoff + slotmapped,
400 					    (amap->am_nslot -
401 					    (slotoff + slotmapped)), 1);
402 				pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
403 				    slotneed - amap->am_nslot);
404 			}
405 #endif
406 			amap->am_nslot = slotneed;
407 			amap_unlock(amap);
408 
409 			/*
410 			 * no need to zero am_anon since that was done at
411 			 * alloc time and we never shrink an allocation.
412 			 */
413 
414 			UVMHIST_LOG(maphist,"<- done (case 2f), amap = 0x%x, "
415 			    "slotneed=%d", amap, slotneed, 0, 0);
416 			return 0;
417 		} else {
418 #ifdef UVM_AMAP_PPREF
419 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
420 				/*
421 				 * Slide up the ref counts on the pages that
422 				 * are actually in use.
423 				 */
424 				memmove(amap->am_ppref + slotspace,
425 				    amap->am_ppref + slotoff,
426 				    slotmapped * sizeof(int));
427 				/*
428 				 * Mark the (adjusted) gap at the front as
429 				 * referenced/not referenced.
430 				 */
431 				pp_setreflen(amap->am_ppref,
432 				    0, 0, slotspace - slotadd);
433 				pp_setreflen(amap->am_ppref,
434 				    slotspace - slotadd, 1, slotadd);
435 			}
436 #endif
437 
438 			/*
439 			 * Slide the anon pointers up and clear out
440 			 * the space we just made.
441 			 */
442 			memmove(amap->am_anon + slotspace,
443 			    amap->am_anon + slotoff,
444 			    slotmapped * sizeof(struct vm_anon*));
445 			memset(amap->am_anon + slotoff, 0,
446 			    (slotspace - slotoff) * sizeof(struct vm_anon *));
447 
448 			/*
449 			 * Slide the backpointers up, but don't bother
450 			 * wiping out the old slots.
451 			 */
452 			memmove(amap->am_bckptr + slotspace,
453 			    amap->am_bckptr + slotoff,
454 			    slotmapped * sizeof(int));
455 
456 			/*
457 			 * Adjust all the useful active slot numbers.
458 			 */
459 			for (i = 0; i < amap->am_nused; i++)
460 				amap->am_slots[i] += (slotspace - slotoff);
461 
462 			/*
463 			 * We just filled all the empty space in the
464 			 * front of the amap by activating a few new
465 			 * slots.
466 			 */
467 			amap->am_nslot = amap->am_maxslot;
468 			entry->aref.ar_pageoff = slotspace - slotadd;
469 			amap_unlock(amap);
470 
471 			UVMHIST_LOG(maphist,"<- done (case 2b), amap = 0x%x, "
472 			    "slotneed=%d", amap, slotneed, 0, 0);
473 			return 0;
474 		}
475 	}
476 
477 	/*
478 	 * case 3: we need to malloc a new amap and copy all the amap
479 	 * data over from old amap to the new one.
480 	 *
481 	 * note that the use of a kernel realloc() probably would not
482 	 * help here, since we wish to abort cleanly if one of the
483 	 * three (or four) mallocs fails.
484 	 */
485 
486 	amap_unlock(amap);	/* unlock in case we sleep in malloc */
487 
488 	if (slotneed >= UVM_AMAP_LARGE) {
489 		return E2BIG;
490 	}
491 
492 	slotalloc = amap_roundup_slots(slotneed);
493 #ifdef UVM_AMAP_PPREF
494 	newppref = NULL;
495 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
496 		newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags);
497 #endif
498 	newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags);
499 	newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags);
500 	newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags);
501 	if (newsl == NULL || newbck == NULL || newover == NULL) {
502 #ifdef UVM_AMAP_PPREF
503 		if (newppref != NULL) {
504 			kmem_free(newppref, slotalloc * sizeof(*newppref));
505 		}
506 #endif
507 		if (newsl != NULL) {
508 			kmem_free(newsl, slotalloc * sizeof(*newsl));
509 		}
510 		if (newbck != NULL) {
511 			kmem_free(newbck, slotalloc * sizeof(*newbck));
512 		}
513 		if (newover != NULL) {
514 			kmem_free(newover, slotalloc * sizeof(*newover));
515 		}
516 		return ENOMEM;
517 	}
518 	amap_lock(amap);
519 	KASSERT(amap->am_maxslot < slotneed);
520 
521 	/*
522 	 * now copy everything over to new malloc'd areas...
523 	 */
524 
525 	slotadded = slotalloc - amap->am_nslot;
526 	if (!(flags & AMAP_EXTEND_FORWARDS))
527 		slotspace = slotalloc - slotmapped;
528 
529 	/* do am_slots */
530 	oldsl = amap->am_slots;
531 	if (flags & AMAP_EXTEND_FORWARDS)
532 		memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
533 	else
534 		for (i = 0; i < amap->am_nused; i++)
535 			newsl[i] = oldsl[i] + slotspace - slotoff;
536 	amap->am_slots = newsl;
537 
538 	/* do am_anon */
539 	oldover = amap->am_anon;
540 	if (flags & AMAP_EXTEND_FORWARDS) {
541 		memcpy(newover, oldover,
542 		    sizeof(struct vm_anon *) * amap->am_nslot);
543 		memset(newover + amap->am_nslot, 0,
544 		    sizeof(struct vm_anon *) * slotadded);
545 	} else {
546 		memcpy(newover + slotspace, oldover + slotoff,
547 		    sizeof(struct vm_anon *) * slotmapped);
548 		memset(newover, 0,
549 		    sizeof(struct vm_anon *) * slotspace);
550 	}
551 	amap->am_anon = newover;
552 
553 	/* do am_bckptr */
554 	oldbck = amap->am_bckptr;
555 	if (flags & AMAP_EXTEND_FORWARDS)
556 		memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
557 	else
558 		memcpy(newbck + slotspace, oldbck + slotoff,
559 		    sizeof(int) * slotmapped);
560 	amap->am_bckptr = newbck;
561 
562 #ifdef UVM_AMAP_PPREF
563 	/* do ppref */
564 	oldppref = amap->am_ppref;
565 	if (newppref) {
566 		if (flags & AMAP_EXTEND_FORWARDS) {
567 			memcpy(newppref, oldppref,
568 			    sizeof(int) * amap->am_nslot);
569 			memset(newppref + amap->am_nslot, 0,
570 			    sizeof(int) * slotadded);
571 		} else {
572 			memcpy(newppref + slotspace, oldppref + slotoff,
573 			    sizeof(int) * slotmapped);
574 		}
575 		amap->am_ppref = newppref;
576 		if ((flags & AMAP_EXTEND_FORWARDS) &&
577 		    (slotoff + slotmapped) < amap->am_nslot)
578 			amap_pp_adjref(amap, slotoff + slotmapped,
579 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
580 		if (flags & AMAP_EXTEND_FORWARDS)
581 			pp_setreflen(newppref, amap->am_nslot, 1,
582 			    slotneed - amap->am_nslot);
583 		else {
584 			pp_setreflen(newppref, 0, 0,
585 			    slotalloc - slotneed);
586 			pp_setreflen(newppref, slotalloc - slotneed, 1,
587 			    slotneed - slotmapped);
588 		}
589 	} else {
590 		if (amap->am_ppref)
591 			amap->am_ppref = PPREF_NONE;
592 	}
593 #endif
594 
595 	/* update master values */
596 	if (flags & AMAP_EXTEND_FORWARDS)
597 		amap->am_nslot = slotneed;
598 	else {
599 		entry->aref.ar_pageoff = slotspace - slotadd;
600 		amap->am_nslot = slotalloc;
601 	}
602 	oldnslots = amap->am_maxslot;
603 	amap->am_maxslot = slotalloc;
604 
605 	amap_unlock(amap);
606 	kmem_free(oldsl, oldnslots * sizeof(*oldsl));
607 	kmem_free(oldbck, oldnslots * sizeof(*oldbck));
608 	kmem_free(oldover, oldnslots * sizeof(*oldover));
609 #ifdef UVM_AMAP_PPREF
610 	if (oldppref && oldppref != PPREF_NONE)
611 		kmem_free(oldppref, oldnslots * sizeof(*oldppref));
612 #endif
613 	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
614 	    amap, slotneed, 0, 0);
615 	return 0;
616 }
617 
618 /*
619  * amap_share_protect: change protection of anons in a shared amap
620  *
621  * for shared amaps, given the current data structure layout, it is
622  * not possible for us to directly locate all maps referencing the
623  * shared anon (to change the protection).  in order to protect data
624  * in shared maps we use pmap_page_protect().  [this is useful for IPC
625  * mechanisms like map entry passing that may want to write-protect
626  * all mappings of a shared amap.]  we traverse am_anon or am_slots
627  * depending on the current state of the amap.
628  *
629  * => entry's map and amap must be locked by the caller
630  */
631 void
632 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
633 {
634 	struct vm_amap *amap = entry->aref.ar_amap;
635 	int slots, lcv, slot, stop;
636 
637 	KASSERT(mutex_owned(&amap->am_l));
638 
639 	AMAP_B2SLOT(slots, (entry->end - entry->start));
640 	stop = entry->aref.ar_pageoff + slots;
641 
642 	if (slots < amap->am_nused) {
643 		/* cheaper to traverse am_anon */
644 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
645 			if (amap->am_anon[lcv] == NULL)
646 				continue;
647 			if (amap->am_anon[lcv]->an_page != NULL)
648 				pmap_page_protect(amap->am_anon[lcv]->an_page,
649 						  prot);
650 		}
651 		return;
652 	}
653 
654 	/* cheaper to traverse am_slots */
655 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
656 		slot = amap->am_slots[lcv];
657 		if (slot < entry->aref.ar_pageoff || slot >= stop)
658 			continue;
659 		if (amap->am_anon[slot]->an_page != NULL)
660 			pmap_page_protect(amap->am_anon[slot]->an_page, prot);
661 	}
662 }
663 
664 /*
665  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
666  *
667  * => called from amap_unref when the final reference to an amap is
668  *	discarded (i.e. when reference count drops to 0)
669  * => the amap should be locked (by the caller)
670  */
671 
672 void
673 amap_wipeout(struct vm_amap *amap)
674 {
675 	int lcv, slot;
676 	struct vm_anon *anon;
677 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
678 	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
679 
680 	KASSERT(amap->am_ref == 0);
681 
682 	if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
683 		/*
684 		 * amap_swap_off will call us again.
685 		 */
686 		amap_unlock(amap);
687 		return;
688 	}
689 	amap_list_remove(amap);
690 	amap_unlock(amap);
691 
692 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
693 		int refs;
694 
695 		slot = amap->am_slots[lcv];
696 		anon = amap->am_anon[slot];
697 		KASSERT(anon != NULL && anon->an_ref != 0);
698 
699 		mutex_enter(&anon->an_lock);
700 		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
701 		    anon->an_ref, 0, 0);
702 		refs = --anon->an_ref;
703 		mutex_exit(&anon->an_lock);
704 		if (refs == 0) {
705 
706 			/*
707 			 * we had the last reference to a vm_anon. free it.
708 			 */
709 
710 			uvm_anfree(anon);
711 		}
712 
713 		if (curlwp->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
714 			preempt();
715 	}
716 
717 	/*
718 	 * now we free the map
719 	 */
720 
721 	amap->am_nused = 0;
722 	amap_free(amap);	/* will unlock and free amap */
723 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
724 }
725 
726 /*
727  * amap_copy: ensure that a map entry's "needs_copy" flag is false
728  *	by copying the amap if necessary.
729  *
730  * => an entry with a null amap pointer will get a new (blank) one.
731  * => the map that the map entry belongs to must be locked by caller.
732  * => the amap currently attached to "entry" (if any) must be unlocked.
733  * => if canchunk is true, then we may clip the entry into a chunk
734  * => "startva" and "endva" are used only if canchunk is true.  they are
735  *     used to limit chunking (e.g. if you have a large space that you
736  *     know you are going to need to allocate amaps for, there is no point
737  *     in allowing that to be chunked)
738  */
739 
740 void
741 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags,
742     vaddr_t startva, vaddr_t endva)
743 {
744 	struct vm_amap *amap, *srcamap;
745 	int slots, lcv;
746 	vaddr_t chunksize;
747 	const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0;
748 	const bool canchunk = (flags & AMAP_COPY_NOCHUNK) == 0;
749 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
750 	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, flags=%d)",
751 		    map, entry, flags, 0);
752 
753 	KASSERT(map != kernel_map);	/* we use nointr pool */
754 
755 	/*
756 	 * is there a map to copy?   if not, create one from scratch.
757 	 */
758 
759 	if (entry->aref.ar_amap == NULL) {
760 
761 		/*
762 		 * check to see if we have a large amap that we can
763 		 * chunk.  we align startva/endva to chunk-sized
764 		 * boundaries and then clip to them.
765 		 */
766 
767 		if (canchunk && atop(entry->end - entry->start) >=
768 		    UVM_AMAP_LARGE) {
769 			/* convert slots to bytes */
770 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
771 			startva = (startva / chunksize) * chunksize;
772 			endva = roundup(endva, chunksize);
773 			UVMHIST_LOG(maphist, "  chunk amap ==> clip 0x%x->0x%x"
774 			    "to 0x%x->0x%x", entry->start, entry->end, startva,
775 			    endva);
776 			UVM_MAP_CLIP_START(map, entry, startva, NULL);
777 			/* watch out for endva wrap-around! */
778 			if (endva >= startva)
779 				UVM_MAP_CLIP_END(map, entry, endva, NULL);
780 		}
781 
782 		if ((flags & AMAP_COPY_NOMERGE) == 0 &&
783 		    uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) {
784 			return;
785 		}
786 
787 		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
788 		entry->start, entry->end, 0, 0);
789 		entry->aref.ar_pageoff = 0;
790 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
791 		    waitf);
792 		if (entry->aref.ar_amap != NULL)
793 			entry->etype &= ~UVM_ET_NEEDSCOPY;
794 		return;
795 	}
796 
797 	/*
798 	 * first check and see if we are the only map entry
799 	 * referencing the amap we currently have.  if so, then we can
800 	 * just take it over rather than copying it.  note that we are
801 	 * reading am_ref with the amap unlocked... the value can only
802 	 * be one if we have the only reference to the amap (via our
803 	 * locked map).  if we are greater than one we fall through to
804 	 * the next case (where we double check the value).
805 	 */
806 
807 	if (entry->aref.ar_amap->am_ref == 1) {
808 		entry->etype &= ~UVM_ET_NEEDSCOPY;
809 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
810 		    0, 0, 0, 0);
811 		return;
812 	}
813 
814 	/*
815 	 * looks like we need to copy the map.
816 	 */
817 
818 	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it",
819 	    entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
820 	AMAP_B2SLOT(slots, entry->end - entry->start);
821 	amap = amap_alloc1(slots, 0, waitf);
822 	if (amap == NULL) {
823 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
824 		return;
825 	}
826 	srcamap = entry->aref.ar_amap;
827 	amap_lock(srcamap);
828 
829 	/*
830 	 * need to double check reference count now that we've got the
831 	 * src amap locked down.  the reference count could have
832 	 * changed while we were in malloc.  if the reference count
833 	 * dropped down to one we take over the old map rather than
834 	 * copying the amap.
835 	 */
836 
837 	if (srcamap->am_ref == 1) {		/* take it over? */
838 		entry->etype &= ~UVM_ET_NEEDSCOPY;
839 		amap->am_ref--;		/* drop final reference to map */
840 		amap_free(amap);	/* dispose of new (unused) amap */
841 		amap_unlock(srcamap);
842 		return;
843 	}
844 
845 	/*
846 	 * we must copy it now.
847 	 */
848 
849 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
850 	for (lcv = 0 ; lcv < slots; lcv++) {
851 		amap->am_anon[lcv] =
852 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
853 		if (amap->am_anon[lcv] == NULL)
854 			continue;
855 		mutex_enter(&amap->am_anon[lcv]->an_lock);
856 		amap->am_anon[lcv]->an_ref++;
857 		mutex_exit(&amap->am_anon[lcv]->an_lock);
858 		amap->am_bckptr[lcv] = amap->am_nused;
859 		amap->am_slots[amap->am_nused] = lcv;
860 		amap->am_nused++;
861 	}
862 	memset(&amap->am_anon[lcv], 0,
863 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
864 
865 	/*
866 	 * drop our reference to the old amap (srcamap) and unlock.
867 	 * we know that the reference count on srcamap is greater than
868 	 * one (we checked above), so there is no way we could drop
869 	 * the count to zero.  [and no need to worry about freeing it]
870 	 */
871 
872 	srcamap->am_ref--;
873 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
874 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
875 #ifdef UVM_AMAP_PPREF
876 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
877 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
878 		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
879 	}
880 #endif
881 
882 	amap_unlock(srcamap);
883 
884 	amap_list_insert(amap);
885 
886 	/*
887 	 * install new amap.
888 	 */
889 
890 	entry->aref.ar_pageoff = 0;
891 	entry->aref.ar_amap = amap;
892 	entry->etype &= ~UVM_ET_NEEDSCOPY;
893 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
894 }
895 
896 /*
897  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
898  *
899  *	called during fork(2) when the parent process has a wired map
900  *	entry.   in that case we want to avoid write-protecting pages
901  *	in the parent's map (e.g. like what you'd do for a COW page)
902  *	so we resolve the COW here.
903  *
904  * => assume parent's entry was wired, thus all pages are resident.
905  * => assume pages that are loaned out (loan_count) are already mapped
906  *	read-only in all maps, and thus no need for us to worry about them
907  * => assume both parent and child vm_map's are locked
908  * => caller passes child's map/entry in to us
909  * => if we run out of memory we will unlock the amap and sleep _with_ the
910  *	parent and child vm_map's locked(!).    we have to do this since
911  *	we are in the middle of a fork(2) and we can't let the parent
912  *	map change until we are done copying all the map entrys.
913  * => XXXCDC: out of memory should cause fork to fail, but there is
914  *	currently no easy way to do this (needs fix)
915  * => page queues must be unlocked (we may lock them)
916  */
917 
918 void
919 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
920 {
921 	struct vm_amap *amap = entry->aref.ar_amap;
922 	int lcv, slot;
923 	struct vm_anon *anon, *nanon;
924 	struct vm_page *pg, *npg;
925 
926 	/*
927 	 * note that if we unlock the amap then we must ReStart the "lcv" for
928 	 * loop because some other process could reorder the anon's in the
929 	 * am_anon[] array on us while the lock is dropped.
930 	 */
931 
932 ReStart:
933 	amap_lock(amap);
934 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
935 
936 		/*
937 		 * get the page
938 		 */
939 
940 		slot = amap->am_slots[lcv];
941 		anon = amap->am_anon[slot];
942 		mutex_enter(&anon->an_lock);
943 
944 		/*
945 		 * If the anon has only one ref, we must have already copied it.
946 		 * This can happen if we needed to sleep waiting for memory
947 		 * in a previous run through this loop.  The new page might
948 		 * even have been paged out, since the new page is not wired.
949 		 */
950 
951 		if (anon->an_ref == 1) {
952 			KASSERT(anon->an_page != NULL || anon->an_swslot != 0);
953 			mutex_exit(&anon->an_lock);
954 			continue;
955 		}
956 
957 		/*
958 		 * The old page must be resident since the parent is wired.
959 		 */
960 
961 		pg = anon->an_page;
962 		KASSERT(pg != NULL);
963 		KASSERT(pg->wire_count > 0);
964 
965 		/*
966 		 * If the page is loaned then it must already be mapped
967 		 * read-only and we don't need to copy it.
968 		 */
969 
970 		if (pg->loan_count != 0) {
971 			mutex_exit(&anon->an_lock);
972 			continue;
973 		}
974 		KASSERT(pg->uanon == anon && pg->uobject == NULL);
975 
976 		/*
977 		 * if the page is busy then we have to unlock, wait for
978 		 * it and then restart.
979 		 */
980 
981 		if (pg->flags & PG_BUSY) {
982 			pg->flags |= PG_WANTED;
983 			amap_unlock(amap);
984 			UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, false,
985 			    "cownow", 0);
986 			goto ReStart;
987 		}
988 
989 		/*
990 		 * ok, time to do a copy-on-write to a new anon
991 		 */
992 
993 		nanon = uvm_analloc();
994 		if (nanon) {
995 			npg = uvm_pagealloc(NULL, 0, nanon, 0);
996 		} else
997 			npg = NULL;	/* XXX: quiet gcc warning */
998 		if (nanon == NULL || npg == NULL) {
999 
1000 			/*
1001 			 * XXXCDC: we should cause fork to fail, but we can't.
1002 			 */
1003 
1004 			if (nanon) {
1005 				nanon->an_ref--;
1006 				mutex_exit(&nanon->an_lock);
1007 				uvm_anfree(nanon);
1008 			}
1009 			mutex_exit(&anon->an_lock);
1010 			amap_unlock(amap);
1011 			uvm_wait("cownowpage");
1012 			goto ReStart;
1013 		}
1014 
1015 		/*
1016 		 * got it... now we can copy the data and replace anon
1017 		 * with our new one...
1018 		 */
1019 
1020 		uvm_pagecopy(pg, npg);		/* old -> new */
1021 		anon->an_ref--;			/* can't drop to zero */
1022 		amap->am_anon[slot] = nanon;	/* replace */
1023 
1024 		/*
1025 		 * drop PG_BUSY on new page ... since we have had its owner
1026 		 * locked the whole time it can't be PG_RELEASED or PG_WANTED.
1027 		 */
1028 
1029 		mutex_enter(&uvm_pageqlock);
1030 		uvm_pageactivate(npg);
1031 		mutex_exit(&uvm_pageqlock);
1032 		npg->flags &= ~(PG_BUSY|PG_FAKE);
1033 		UVM_PAGE_OWN(npg, NULL);
1034 		mutex_exit(&nanon->an_lock);
1035 		mutex_exit(&anon->an_lock);
1036 	}
1037 	amap_unlock(amap);
1038 }
1039 
1040 /*
1041  * amap_splitref: split a single reference into two separate references
1042  *
1043  * => called from uvm_map's clip routines
1044  * => origref's map should be locked
1045  * => origref->ar_amap should be unlocked (we will lock)
1046  */
1047 void
1048 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
1049 {
1050 	int leftslots;
1051 	struct vm_amap *amap;
1052 
1053 	KASSERT(splitref->ar_amap == origref->ar_amap);
1054 	AMAP_B2SLOT(leftslots, offset);
1055 	KASSERT(leftslots != 0);
1056 
1057 	amap = origref->ar_amap;
1058 	amap_lock(amap);
1059 
1060 	/*
1061 	 * now: amap is locked and we have a valid am_mapped array.
1062 	 */
1063 	KASSERT(amap->am_nslot - origref->ar_pageoff - leftslots > 0);
1064 
1065 #ifdef UVM_AMAP_PPREF
1066         /*
1067 	 * establish ppref before we add a duplicate reference to the amap
1068 	 */
1069 	if (amap->am_ppref == NULL)
1070 		amap_pp_establish(amap, origref->ar_pageoff);
1071 #endif
1072 
1073 	amap->am_ref++;		/* not a share reference */
1074 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
1075 
1076 	amap_unlock(amap);
1077 }
1078 
1079 #ifdef UVM_AMAP_PPREF
1080 
1081 /*
1082  * amap_pp_establish: add a ppref array to an amap, if possible
1083  *
1084  * => amap locked by caller
1085  */
1086 void
1087 amap_pp_establish(struct vm_amap *amap, vaddr_t offset)
1088 {
1089 
1090 	amap->am_ppref = kmem_alloc(amap->am_maxslot * sizeof(*amap->am_ppref),
1091 	    KM_NOSLEEP);
1092 
1093 	/*
1094 	 * if we fail then we just won't use ppref for this amap
1095 	 */
1096 
1097 	if (amap->am_ppref == NULL) {
1098 		amap->am_ppref = PPREF_NONE;	/* not using it */
1099 		return;
1100 	}
1101 	memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot);
1102 	pp_setreflen(amap->am_ppref, 0, 0, offset);
1103 	pp_setreflen(amap->am_ppref, offset, amap->am_ref,
1104 	    amap->am_nslot - offset);
1105 	return;
1106 }
1107 
1108 /*
1109  * amap_pp_adjref: adjust reference count to a part of an amap using the
1110  * per-page reference count array.
1111  *
1112  * => map and amap locked by caller
1113  * => caller must check that ppref != PPREF_NONE before calling
1114  */
1115 void
1116 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
1117 {
1118 	int stopslot, *ppref, lcv, prevlcv;
1119 	int ref, len, prevref, prevlen;
1120 
1121 	stopslot = curslot + slotlen;
1122 	ppref = amap->am_ppref;
1123 	prevlcv = 0;
1124 
1125 	/*
1126 	 * first advance to the correct place in the ppref array,
1127 	 * fragment if needed.
1128 	 */
1129 
1130 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
1131 		pp_getreflen(ppref, lcv, &ref, &len);
1132 		if (lcv + len > curslot) {     /* goes past start? */
1133 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
1134 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
1135 			len = curslot - lcv;   /* new length of entry @ lcv */
1136 		}
1137 		prevlcv = lcv;
1138 	}
1139 	if (lcv != 0)
1140 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
1141 	else {
1142 		/* Ensure that the "prevref == ref" test below always
1143 		 * fails, since we're starting from the beginning of
1144 		 * the ppref array; that is, there is no previous
1145 		 * chunk.
1146 		 */
1147 		prevref = -1;
1148 		prevlen = 0;
1149 	}
1150 
1151 	/*
1152 	 * now adjust reference counts in range.  merge the first
1153 	 * changed entry with the last unchanged entry if possible.
1154 	 */
1155 	KASSERT(lcv == curslot);
1156 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
1157 		pp_getreflen(ppref, lcv, &ref, &len);
1158 		if (lcv + len > stopslot) {     /* goes past end? */
1159 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
1160 			pp_setreflen(ppref, stopslot, ref,
1161 			    len - (stopslot - lcv));
1162 			len = stopslot - lcv;
1163 		}
1164 		ref += adjval;
1165 		KASSERT(ref >= 0);
1166 		if (lcv == prevlcv + prevlen && ref == prevref) {
1167 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
1168 		} else {
1169 			pp_setreflen(ppref, lcv, ref, len);
1170 		}
1171 		if (ref == 0)
1172 			amap_wiperange(amap, lcv, len);
1173 	}
1174 
1175 }
1176 
1177 /*
1178  * amap_wiperange: wipe out a range of an amap
1179  * [different from amap_wipeout because the amap is kept intact]
1180  *
1181  * => both map and amap must be locked by caller.
1182  */
1183 void
1184 amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
1185 {
1186 	int byanon, lcv, stop, curslot, ptr, slotend;
1187 	struct vm_anon *anon;
1188 
1189 	/*
1190 	 * we can either traverse the amap by am_anon or by am_slots depending
1191 	 * on which is cheaper.    decide now.
1192 	 */
1193 
1194 	if (slots < amap->am_nused) {
1195 		byanon = true;
1196 		lcv = slotoff;
1197 		stop = slotoff + slots;
1198 		slotend = 0;
1199 	} else {
1200 		byanon = false;
1201 		lcv = 0;
1202 		stop = amap->am_nused;
1203 		slotend = slotoff + slots;
1204 	}
1205 
1206 	while (lcv < stop) {
1207 		int refs;
1208 
1209 		if (byanon) {
1210 			curslot = lcv++;	/* lcv advances here */
1211 			if (amap->am_anon[curslot] == NULL)
1212 				continue;
1213 		} else {
1214 			curslot = amap->am_slots[lcv];
1215 			if (curslot < slotoff || curslot >= slotend) {
1216 				lcv++;		/* lcv advances here */
1217 				continue;
1218 			}
1219 			stop--;	/* drop stop, since anon will be removed */
1220 		}
1221 		anon = amap->am_anon[curslot];
1222 
1223 		/*
1224 		 * remove it from the amap
1225 		 */
1226 
1227 		amap->am_anon[curslot] = NULL;
1228 		ptr = amap->am_bckptr[curslot];
1229 		if (ptr != (amap->am_nused - 1)) {
1230 			amap->am_slots[ptr] =
1231 			    amap->am_slots[amap->am_nused - 1];
1232 			amap->am_bckptr[amap->am_slots[ptr]] =
1233 			    ptr;    /* back ptr. */
1234 		}
1235 		amap->am_nused--;
1236 
1237 		/*
1238 		 * drop anon reference count
1239 		 */
1240 
1241 		mutex_enter(&anon->an_lock);
1242 		refs = --anon->an_ref;
1243 		mutex_exit(&anon->an_lock);
1244 		if (refs == 0) {
1245 
1246 			/*
1247 			 * we just eliminated the last reference to an anon.
1248 			 * free it.
1249 			 */
1250 
1251 			uvm_anfree(anon);
1252 		}
1253 	}
1254 }
1255 
1256 #endif
1257 
1258 #if defined(VMSWAP)
1259 
1260 /*
1261  * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
1262  *
1263  * => called with swap_syscall_lock held.
1264  * => note that we don't always traverse all anons.
1265  *    eg. amaps being wiped out, released anons.
1266  * => return true if failed.
1267  */
1268 
1269 bool
1270 amap_swap_off(int startslot, int endslot)
1271 {
1272 	struct vm_amap *am;
1273 	struct vm_amap *am_next;
1274 	struct vm_amap marker_prev;
1275 	struct vm_amap marker_next;
1276 	bool rv = false;
1277 
1278 #if defined(DIAGNOSTIC)
1279 	memset(&marker_prev, 0, sizeof(marker_prev));
1280 	memset(&marker_next, 0, sizeof(marker_next));
1281 #endif /* defined(DIAGNOSTIC) */
1282 
1283 	mutex_enter(&amap_list_lock);
1284 	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
1285 		int i;
1286 
1287 		LIST_INSERT_BEFORE(am, &marker_prev, am_list);
1288 		LIST_INSERT_AFTER(am, &marker_next, am_list);
1289 
1290 		if (!amap_lock_try(am)) {
1291 			mutex_exit(&amap_list_lock);
1292 			preempt();
1293 			mutex_enter(&amap_list_lock);
1294 			am_next = LIST_NEXT(&marker_prev, am_list);
1295 			if (am_next == &marker_next) {
1296 				am_next = LIST_NEXT(am_next, am_list);
1297 			} else {
1298 				KASSERT(LIST_NEXT(am_next, am_list) ==
1299 				    &marker_next);
1300 			}
1301 			LIST_REMOVE(&marker_prev, am_list);
1302 			LIST_REMOVE(&marker_next, am_list);
1303 			continue;
1304 		}
1305 
1306 		mutex_exit(&amap_list_lock);
1307 
1308 		if (am->am_nused <= 0) {
1309 			amap_unlock(am);
1310 			goto next;
1311 		}
1312 
1313 		for (i = 0; i < am->am_nused; i++) {
1314 			int slot;
1315 			int swslot;
1316 			struct vm_anon *anon;
1317 
1318 			slot = am->am_slots[i];
1319 			anon = am->am_anon[slot];
1320 			mutex_enter(&anon->an_lock);
1321 
1322 			swslot = anon->an_swslot;
1323 			if (swslot < startslot || endslot <= swslot) {
1324 				mutex_exit(&anon->an_lock);
1325 				continue;
1326 			}
1327 
1328 			am->am_flags |= AMAP_SWAPOFF;
1329 			amap_unlock(am);
1330 
1331 			rv = uvm_anon_pagein(anon);
1332 
1333 			amap_lock(am);
1334 			am->am_flags &= ~AMAP_SWAPOFF;
1335 			if (amap_refs(am) == 0) {
1336 				amap_wipeout(am);
1337 				am = NULL;
1338 				break;
1339 			}
1340 			if (rv) {
1341 				break;
1342 			}
1343 			i = 0;
1344 		}
1345 
1346 		if (am) {
1347 			amap_unlock(am);
1348 		}
1349 
1350 next:
1351 		mutex_enter(&amap_list_lock);
1352 		KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
1353 		    LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
1354 		    &marker_next);
1355 		am_next = LIST_NEXT(&marker_next, am_list);
1356 		LIST_REMOVE(&marker_prev, am_list);
1357 		LIST_REMOVE(&marker_next, am_list);
1358 	}
1359 	mutex_exit(&amap_list_lock);
1360 
1361 	return rv;
1362 }
1363 
1364 #endif /* defined(VMSWAP) */
1365 
1366 /*
1367  * amap_lookup: look up a page in an amap
1368  *
1369  * => amap should be locked by caller.
1370  */
1371 struct vm_anon *
1372 amap_lookup(struct vm_aref *aref, vaddr_t offset)
1373 {
1374 	struct vm_anon *an;
1375 	int slot;
1376 	struct vm_amap *amap = aref->ar_amap;
1377 	UVMHIST_FUNC("amap_lookup"); UVMHIST_CALLED(maphist);
1378 	KASSERT(mutex_owned(&amap->am_l));
1379 
1380 	AMAP_B2SLOT(slot, offset);
1381 	slot += aref->ar_pageoff;
1382 	KASSERT(slot < amap->am_nslot);
1383 
1384 	UVMHIST_LOG(maphist, "<- done (amap=0x%x, offset=0x%x, result=0x%x)",
1385 	    amap, offset, amap->am_anon[slot], 0);
1386 	an = amap->am_anon[slot];
1387 	KASSERT(an == NULL || an->an_ref != 0);
1388 	return an;
1389 }
1390 
1391 /*
1392  * amap_lookups: look up a range of pages in an amap
1393  *
1394  * => amap should be locked by caller.
1395  * => XXXCDC: this interface is biased toward array-based amaps.  fix.
1396  */
1397 void
1398 amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons,
1399     int npages)
1400 {
1401 	int slot;
1402 	struct vm_amap *amap = aref->ar_amap;
1403 #if defined(DIAGNOSTIC)
1404 	int i;
1405 #endif /* defined(DIAGNOSTIC) */
1406 	UVMHIST_FUNC("amap_lookups"); UVMHIST_CALLED(maphist);
1407 	KASSERT(mutex_owned(&amap->am_l));
1408 
1409 	AMAP_B2SLOT(slot, offset);
1410 	slot += aref->ar_pageoff;
1411 
1412 	UVMHIST_LOG(maphist, "  slot=%d, npages=%d, nslot=%d", slot, npages,
1413 		amap->am_nslot, 0);
1414 
1415 	KASSERT((slot + (npages - 1)) < amap->am_nslot);
1416 	memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
1417 
1418 #if defined(DIAGNOSTIC)
1419 	for (i = 0; i < npages; i++) {
1420 		struct vm_anon * const an = anons[i];
1421 
1422 		if (an != NULL && an->an_ref == 0) {
1423 			panic("%s: ref=0 anon", __func__);
1424 		}
1425 	}
1426 #endif /* defined(DIAGNOSTIC) */
1427 	UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
1428 	return;
1429 }
1430 
1431 /*
1432  * amap_add: add (or replace) a page to an amap
1433  *
1434  * => caller must lock amap.
1435  * => if (replace) caller must lock anon because we might have to call
1436  *	pmap_page_protect on the anon's page.
1437  */
1438 void
1439 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
1440     bool replace)
1441 {
1442 	int slot;
1443 	struct vm_amap *amap = aref->ar_amap;
1444 	UVMHIST_FUNC("amap_add"); UVMHIST_CALLED(maphist);
1445 	KASSERT(mutex_owned(&amap->am_l));
1446 
1447 	AMAP_B2SLOT(slot, offset);
1448 	slot += aref->ar_pageoff;
1449 	KASSERT(slot < amap->am_nslot);
1450 
1451 	if (replace) {
1452 		KASSERT(amap->am_anon[slot] != NULL);
1453 		if (amap->am_anon[slot]->an_page != NULL &&
1454 		    (amap->am_flags & AMAP_SHARED) != 0) {
1455 			pmap_page_protect(amap->am_anon[slot]->an_page,
1456 			    VM_PROT_NONE);
1457 			/*
1458 			 * XXX: suppose page is supposed to be wired somewhere?
1459 			 */
1460 		}
1461 	} else {   /* !replace */
1462 		KASSERT(amap->am_anon[slot] == NULL);
1463 		amap->am_bckptr[slot] = amap->am_nused;
1464 		amap->am_slots[amap->am_nused] = slot;
1465 		amap->am_nused++;
1466 	}
1467 	amap->am_anon[slot] = anon;
1468 	UVMHIST_LOG(maphist,
1469 	    "<- done (amap=0x%x, offset=0x%x, anon=0x%x, rep=%d)",
1470 	    amap, offset, anon, replace);
1471 }
1472 
1473 /*
1474  * amap_unadd: remove a page from an amap
1475  *
1476  * => caller must lock amap
1477  */
1478 void
1479 amap_unadd(struct vm_aref *aref, vaddr_t offset)
1480 {
1481 	int ptr, slot;
1482 	struct vm_amap *amap = aref->ar_amap;
1483 	UVMHIST_FUNC("amap_unadd"); UVMHIST_CALLED(maphist);
1484 	KASSERT(mutex_owned(&amap->am_l));
1485 
1486 	AMAP_B2SLOT(slot, offset);
1487 	slot += aref->ar_pageoff;
1488 	KASSERT(slot < amap->am_nslot);
1489 	KASSERT(amap->am_anon[slot] != NULL);
1490 
1491 	amap->am_anon[slot] = NULL;
1492 	ptr = amap->am_bckptr[slot];
1493 
1494 	if (ptr != (amap->am_nused - 1)) {	/* swap to keep slots contig? */
1495 		amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1];
1496 		amap->am_bckptr[amap->am_slots[ptr]] = ptr;	/* back link */
1497 	}
1498 	amap->am_nused--;
1499 	UVMHIST_LOG(maphist, "<- done (amap=0x%x, slot=0x%x)", amap, slot,0, 0);
1500 }
1501 
1502 /*
1503  * amap_ref: gain a reference to an amap
1504  *
1505  * => amap must not be locked (we will lock)
1506  * => "offset" and "len" are in units of pages
1507  * => called at fork time to gain the child's reference
1508  */
1509 void
1510 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
1511 {
1512 	UVMHIST_FUNC("amap_ref"); UVMHIST_CALLED(maphist);
1513 
1514 	amap_lock(amap);
1515 	if (flags & AMAP_SHARED)
1516 		amap->am_flags |= AMAP_SHARED;
1517 #ifdef UVM_AMAP_PPREF
1518 	if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 &&
1519 	    len != amap->am_nslot)
1520 		amap_pp_establish(amap, offset);
1521 #endif
1522 	amap->am_ref++;
1523 #ifdef UVM_AMAP_PPREF
1524 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1525 		if (flags & AMAP_REFALL)
1526 			amap_pp_adjref(amap, 0, amap->am_nslot, 1);
1527 		else
1528 			amap_pp_adjref(amap, offset, len, 1);
1529 	}
1530 #endif
1531 	amap_unlock(amap);
1532 	UVMHIST_LOG(maphist,"<- done!  amap=0x%x", amap, 0, 0, 0);
1533 }
1534 
1535 /*
1536  * amap_unref: remove a reference to an amap
1537  *
1538  * => caller must remove all pmap-level references to this amap before
1539  *	dropping the reference
1540  * => called from uvm_unmap_detach [only]  ... note that entry is no
1541  *	longer part of a map and thus has no need for locking
1542  * => amap must be unlocked (we will lock it).
1543  */
1544 void
1545 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, bool all)
1546 {
1547 	UVMHIST_FUNC("amap_unref"); UVMHIST_CALLED(maphist);
1548 
1549 	/*
1550 	 * lock it
1551 	 */
1552 	amap_lock(amap);
1553 	UVMHIST_LOG(maphist,"  amap=0x%x  refs=%d, nused=%d",
1554 	    amap, amap->am_ref, amap->am_nused, 0);
1555 
1556 	KASSERT(amap_refs(amap) > 0);
1557 
1558 	/*
1559 	 * if we are the last reference, free the amap and return.
1560 	 */
1561 
1562 	amap->am_ref--;
1563 
1564 	if (amap_refs(amap) == 0) {
1565 		amap_wipeout(amap);	/* drops final ref and frees */
1566 		UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
1567 		return;			/* no need to unlock */
1568 	}
1569 
1570 	/*
1571 	 * otherwise just drop the reference count(s)
1572 	 */
1573 
1574 	if (amap_refs(amap) == 1 && (amap->am_flags & AMAP_SHARED) != 0)
1575 		amap->am_flags &= ~AMAP_SHARED;	/* clear shared flag */
1576 #ifdef UVM_AMAP_PPREF
1577 	if (amap->am_ppref == NULL && all == 0 && len != amap->am_nslot)
1578 		amap_pp_establish(amap, offset);
1579 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1580 		if (all)
1581 			amap_pp_adjref(amap, 0, amap->am_nslot, -1);
1582 		else
1583 			amap_pp_adjref(amap, offset, len, -1);
1584 	}
1585 #endif
1586 	amap_unlock(amap);
1587 
1588 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1589 }
1590 
1591