xref: /netbsd-src/sys/uvm/uvm_amap.c (revision 274254cdae52594c1aa480a736aef78313d15c9c)
1 /*	$NetBSD: uvm_amap.c,v 1.86 2009/03/28 21:45:55 rmind Exp $	*/
2 
3 /*
4  *
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by Charles D. Cranor and
19  *      Washington University.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * uvm_amap.c: amap operations
37  */
38 
39 /*
40  * this file contains functions that perform operations on amaps.  see
41  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.86 2009/03/28 21:45:55 rmind Exp $");
46 
47 #include "opt_uvmhist.h"
48 
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/proc.h>
52 #include <sys/kernel.h>
53 #include <sys/kmem.h>
54 #include <sys/pool.h>
55 #include <sys/atomic.h>
56 
57 #include <uvm/uvm.h>
58 #include <uvm/uvm_swap.h>
59 
60 /*
61  * cache for allocation of vm_map structures.  note that in order to
62  * avoid an endless loop, the amap cache's allocator cannot allocate
63  * memory from an amap (it currently goes through the kernel uobj, so
64  * we are ok).
65  */
66 static struct pool_cache uvm_amap_cache;
67 static kmutex_t amap_list_lock;
68 static LIST_HEAD(, vm_amap) amap_list;
69 
70 /*
71  * local functions
72  */
73 
74 static inline void
75 amap_list_insert(struct vm_amap *amap)
76 {
77 
78 	mutex_enter(&amap_list_lock);
79 	LIST_INSERT_HEAD(&amap_list, amap, am_list);
80 	mutex_exit(&amap_list_lock);
81 }
82 
83 static inline void
84 amap_list_remove(struct vm_amap *amap)
85 {
86 
87 	mutex_enter(&amap_list_lock);
88 	LIST_REMOVE(amap, am_list);
89 	mutex_exit(&amap_list_lock);
90 }
91 
92 static int
93 amap_roundup_slots(int slots)
94 {
95 
96 	return kmem_roundup_size(slots * sizeof(int)) / sizeof(int);
97 }
98 
99 #ifdef UVM_AMAP_PPREF
100 /*
101  * what is ppref?   ppref is an _optional_ amap feature which is used
102  * to keep track of reference counts on a per-page basis.  it is enabled
103  * when UVM_AMAP_PPREF is defined.
104  *
105  * when enabled, an array of ints is allocated for the pprefs.  this
106  * array is allocated only when a partial reference is added to the
107  * map (either by unmapping part of the amap, or gaining a reference
108  * to only a part of an amap).  if the malloc of the array fails
109  * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
110  * that we tried to do ppref's but couldn't alloc the array so just
111  * give up (after all, this is an optional feature!).
112  *
113  * the array is divided into page sized "chunks."   for chunks of length 1,
114  * the chunk reference count plus one is stored in that chunk's slot.
115  * for chunks of length > 1 the first slot contains (the reference count
116  * plus one) * -1.    [the negative value indicates that the length is
117  * greater than one.]   the second slot of the chunk contains the length
118  * of the chunk.   here is an example:
119  *
120  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
121  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
122  *              <----------><-><----><-------><----><-><------->
123  * (x = don't care)
124  *
125  * this allows us to allow one int to contain the ref count for the whole
126  * chunk.    note that the "plus one" part is needed because a reference
127  * count of zero is neither positive or negative (need a way to tell
128  * if we've got one zero or a bunch of them).
129  *
130  * here are some in-line functions to help us.
131  */
132 
133 /*
134  * pp_getreflen: get the reference and length for a specific offset
135  *
136  * => ppref's amap must be locked
137  */
138 static inline void
139 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
140 {
141 
142 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
143 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
144 		*lenp = 1;
145 	} else {
146 		*refp = (ppref[offset] * -1) - 1;
147 		*lenp = ppref[offset+1];
148 	}
149 }
150 
151 /*
152  * pp_setreflen: set the reference and length for a specific offset
153  *
154  * => ppref's amap must be locked
155  */
156 static inline void
157 pp_setreflen(int *ppref, int offset, int ref, int len)
158 {
159 	if (len == 0)
160 		return;
161 	if (len == 1) {
162 		ppref[offset] = ref + 1;
163 	} else {
164 		ppref[offset] = (ref + 1) * -1;
165 		ppref[offset+1] = len;
166 	}
167 }
168 #endif /* UVM_AMAP_PPREF */
169 
170 /*
171  * amap_alloc1: internal function that allocates an amap, but does not
172  *	init the overlay.
173  *
174  * => lock on returned amap is init'd
175  */
176 static inline struct vm_amap *
177 amap_alloc1(int slots, int padslots, int waitf)
178 {
179 	struct vm_amap *amap;
180 	int totalslots;
181 	km_flag_t kmflags;
182 
183 	amap = pool_cache_get(&uvm_amap_cache,
184 	    ((waitf & UVM_FLAG_NOWAIT) != 0) ? PR_NOWAIT : PR_WAITOK);
185 	if (amap == NULL)
186 		return(NULL);
187 
188 	kmflags = ((waitf & UVM_FLAG_NOWAIT) != 0) ? KM_NOSLEEP : KM_SLEEP;
189 	totalslots = amap_roundup_slots(slots + padslots);
190 	mutex_init(&amap->am_l, MUTEX_DEFAULT, IPL_NONE);
191 	amap->am_ref = 1;
192 	amap->am_flags = 0;
193 #ifdef UVM_AMAP_PPREF
194 	amap->am_ppref = NULL;
195 #endif
196 	amap->am_maxslot = totalslots;
197 	amap->am_nslot = slots;
198 	amap->am_nused = 0;
199 
200 	amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags);
201 	if (amap->am_slots == NULL)
202 		goto fail1;
203 
204 	amap->am_bckptr = kmem_alloc(totalslots * sizeof(int), kmflags);
205 	if (amap->am_bckptr == NULL)
206 		goto fail2;
207 
208 	amap->am_anon = kmem_alloc(totalslots * sizeof(struct vm_anon *),
209 	    kmflags);
210 	if (amap->am_anon == NULL)
211 		goto fail3;
212 
213 	return(amap);
214 
215 fail3:
216 	kmem_free(amap->am_bckptr, totalslots * sizeof(int));
217 fail2:
218 	kmem_free(amap->am_slots, totalslots * sizeof(int));
219 fail1:
220 	mutex_destroy(&amap->am_l);
221 	pool_cache_put(&uvm_amap_cache, amap);
222 
223 	/*
224 	 * XXX hack to tell the pagedaemon how many pages we need,
225 	 * since we can need more than it would normally free.
226 	 */
227 	if ((waitf & UVM_FLAG_NOWAIT) != 0) {
228 		extern u_int uvm_extrapages;
229 		atomic_add_int(&uvm_extrapages,
230 		    ((sizeof(int) * 2 + sizeof(struct vm_anon *)) *
231 		    totalslots) >> PAGE_SHIFT);
232 	}
233 	return (NULL);
234 }
235 
236 /*
237  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
238  *
239  * => caller should ensure sz is a multiple of PAGE_SIZE
240  * => reference count to new amap is set to one
241  * => new amap is returned unlocked
242  */
243 
244 struct vm_amap *
245 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
246 {
247 	struct vm_amap *amap;
248 	int slots, padslots;
249 	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
250 
251 	AMAP_B2SLOT(slots, sz);
252 	AMAP_B2SLOT(padslots, padsz);
253 
254 	amap = amap_alloc1(slots, padslots, waitf);
255 	if (amap) {
256 		memset(amap->am_anon, 0,
257 		    amap->am_maxslot * sizeof(struct vm_anon *));
258 		amap_list_insert(amap);
259 	}
260 
261 	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
262 	return(amap);
263 }
264 
265 /*
266  * uvm_amap_init: initialize the amap system.
267  */
268 void
269 uvm_amap_init(void)
270 {
271 
272 	mutex_init(&amap_list_lock, MUTEX_DEFAULT, IPL_NONE);
273 
274 	pool_cache_bootstrap(&uvm_amap_cache, sizeof(struct vm_amap), 0, 0, 0,
275 	    "amappl", NULL, IPL_NONE, NULL, NULL, NULL);
276 }
277 
278 /*
279  * amap_free: free an amap
280  *
281  * => the amap must be unlocked
282  * => the amap should have a zero reference count and be empty
283  */
284 void
285 amap_free(struct vm_amap *amap)
286 {
287 	int slots;
288 
289 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
290 
291 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
292 	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
293 	KASSERT(!mutex_owned(&amap->am_l));
294 	slots = amap->am_maxslot;
295 	kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots));
296 	kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr));
297 	kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon));
298 #ifdef UVM_AMAP_PPREF
299 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
300 		kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref));
301 #endif
302 	mutex_destroy(&amap->am_l);
303 	pool_cache_put(&uvm_amap_cache, amap);
304 	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
305 }
306 
307 /*
308  * amap_extend: extend the size of an amap (if needed)
309  *
310  * => called from uvm_map when we want to extend an amap to cover
311  *    a new mapping (rather than allocate a new one)
312  * => amap should be unlocked (we will lock it)
313  * => to safely extend an amap it should have a reference count of
314  *    one (thus it can't be shared)
315  */
316 int
317 amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags)
318 {
319 	struct vm_amap *amap = entry->aref.ar_amap;
320 	int slotoff = entry->aref.ar_pageoff;
321 	int slotmapped, slotadd, slotneed, slotadded, slotalloc;
322 	int slotadj, slotspace;
323 	int oldnslots;
324 #ifdef UVM_AMAP_PPREF
325 	int *newppref, *oldppref;
326 #endif
327 	int i, *newsl, *newbck, *oldsl, *oldbck;
328 	struct vm_anon **newover, **oldover;
329 	const km_flag_t kmflags =
330 	    (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
331 
332 	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
333 
334 	UVMHIST_LOG(maphist, "  (entry=0x%x, addsize=0x%x, flags=0x%x)",
335 	    entry, addsize, flags, 0);
336 
337 	/*
338 	 * first, determine how many slots we need in the amap.  don't
339 	 * forget that ar_pageoff could be non-zero: this means that
340 	 * there are some unused slots before us in the amap.
341 	 */
342 
343 	amap_lock(amap);
344 	KASSERT(amap_refs(amap) == 1); /* amap can't be shared */
345 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
346 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
347 	if (flags & AMAP_EXTEND_FORWARDS) {
348 		slotneed = slotoff + slotmapped + slotadd;
349 		slotadj = 0;
350 		slotspace = 0;
351 	}
352 	else {
353 		slotneed = slotadd + slotmapped;
354 		slotadj = slotadd - slotoff;
355 		slotspace = amap->am_maxslot - slotmapped;
356 	}
357 
358 	/*
359 	 * case 1: we already have enough slots in the map and thus
360 	 * only need to bump the reference counts on the slots we are
361 	 * adding.
362 	 */
363 
364 	if (flags & AMAP_EXTEND_FORWARDS) {
365 		if (amap->am_nslot >= slotneed) {
366 #ifdef UVM_AMAP_PPREF
367 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
368 				amap_pp_adjref(amap, slotoff + slotmapped,
369 				    slotadd, 1);
370 			}
371 #endif
372 			amap_unlock(amap);
373 			UVMHIST_LOG(maphist,
374 			    "<- done (case 1f), amap = 0x%x, sltneed=%d",
375 			    amap, slotneed, 0, 0);
376 			return 0;
377 		}
378 	} else {
379 		if (slotadj <= 0) {
380 			slotoff -= slotadd;
381 			entry->aref.ar_pageoff = slotoff;
382 #ifdef UVM_AMAP_PPREF
383 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
384 				amap_pp_adjref(amap, slotoff, slotadd, 1);
385 			}
386 #endif
387 			amap_unlock(amap);
388 			UVMHIST_LOG(maphist,
389 			    "<- done (case 1b), amap = 0x%x, sltneed=%d",
390 			    amap, slotneed, 0, 0);
391 			return 0;
392 		}
393 	}
394 
395 	/*
396 	 * case 2: we pre-allocated slots for use and we just need to
397 	 * bump nslot up to take account for these slots.
398 	 */
399 
400 	if (amap->am_maxslot >= slotneed) {
401 		if (flags & AMAP_EXTEND_FORWARDS) {
402 #ifdef UVM_AMAP_PPREF
403 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
404 				if ((slotoff + slotmapped) < amap->am_nslot)
405 					amap_pp_adjref(amap,
406 					    slotoff + slotmapped,
407 					    (amap->am_nslot -
408 					    (slotoff + slotmapped)), 1);
409 				pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
410 				    slotneed - amap->am_nslot);
411 			}
412 #endif
413 			amap->am_nslot = slotneed;
414 			amap_unlock(amap);
415 
416 			/*
417 			 * no need to zero am_anon since that was done at
418 			 * alloc time and we never shrink an allocation.
419 			 */
420 
421 			UVMHIST_LOG(maphist,"<- done (case 2f), amap = 0x%x, "
422 			    "slotneed=%d", amap, slotneed, 0, 0);
423 			return 0;
424 		} else {
425 #ifdef UVM_AMAP_PPREF
426 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
427 				/*
428 				 * Slide up the ref counts on the pages that
429 				 * are actually in use.
430 				 */
431 				memmove(amap->am_ppref + slotspace,
432 				    amap->am_ppref + slotoff,
433 				    slotmapped * sizeof(int));
434 				/*
435 				 * Mark the (adjusted) gap at the front as
436 				 * referenced/not referenced.
437 				 */
438 				pp_setreflen(amap->am_ppref,
439 				    0, 0, slotspace - slotadd);
440 				pp_setreflen(amap->am_ppref,
441 				    slotspace - slotadd, 1, slotadd);
442 			}
443 #endif
444 
445 			/*
446 			 * Slide the anon pointers up and clear out
447 			 * the space we just made.
448 			 */
449 			memmove(amap->am_anon + slotspace,
450 			    amap->am_anon + slotoff,
451 			    slotmapped * sizeof(struct vm_anon*));
452 			memset(amap->am_anon + slotoff, 0,
453 			    (slotspace - slotoff) * sizeof(struct vm_anon *));
454 
455 			/*
456 			 * Slide the backpointers up, but don't bother
457 			 * wiping out the old slots.
458 			 */
459 			memmove(amap->am_bckptr + slotspace,
460 			    amap->am_bckptr + slotoff,
461 			    slotmapped * sizeof(int));
462 
463 			/*
464 			 * Adjust all the useful active slot numbers.
465 			 */
466 			for (i = 0; i < amap->am_nused; i++)
467 				amap->am_slots[i] += (slotspace - slotoff);
468 
469 			/*
470 			 * We just filled all the empty space in the
471 			 * front of the amap by activating a few new
472 			 * slots.
473 			 */
474 			amap->am_nslot = amap->am_maxslot;
475 			entry->aref.ar_pageoff = slotspace - slotadd;
476 			amap_unlock(amap);
477 
478 			UVMHIST_LOG(maphist,"<- done (case 2b), amap = 0x%x, "
479 			    "slotneed=%d", amap, slotneed, 0, 0);
480 			return 0;
481 		}
482 	}
483 
484 	/*
485 	 * case 3: we need to malloc a new amap and copy all the amap
486 	 * data over from old amap to the new one.
487 	 *
488 	 * note that the use of a kernel realloc() probably would not
489 	 * help here, since we wish to abort cleanly if one of the
490 	 * three (or four) mallocs fails.
491 	 */
492 
493 	amap_unlock(amap);	/* unlock in case we sleep in malloc */
494 
495 	if (slotneed >= UVM_AMAP_LARGE) {
496 		return E2BIG;
497 	}
498 
499 	slotalloc = amap_roundup_slots(slotneed);
500 #ifdef UVM_AMAP_PPREF
501 	newppref = NULL;
502 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
503 		newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags);
504 #endif
505 	newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags);
506 	newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags);
507 	newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags);
508 	if (newsl == NULL || newbck == NULL || newover == NULL) {
509 #ifdef UVM_AMAP_PPREF
510 		if (newppref != NULL) {
511 			kmem_free(newppref, slotalloc * sizeof(*newppref));
512 		}
513 #endif
514 		if (newsl != NULL) {
515 			kmem_free(newsl, slotalloc * sizeof(*newsl));
516 		}
517 		if (newbck != NULL) {
518 			kmem_free(newbck, slotalloc * sizeof(*newbck));
519 		}
520 		if (newover != NULL) {
521 			kmem_free(newover, slotalloc * sizeof(*newover));
522 		}
523 		return ENOMEM;
524 	}
525 	amap_lock(amap);
526 	KASSERT(amap->am_maxslot < slotneed);
527 
528 	/*
529 	 * now copy everything over to new malloc'd areas...
530 	 */
531 
532 	slotadded = slotalloc - amap->am_nslot;
533 	if (!(flags & AMAP_EXTEND_FORWARDS))
534 		slotspace = slotalloc - slotmapped;
535 
536 	/* do am_slots */
537 	oldsl = amap->am_slots;
538 	if (flags & AMAP_EXTEND_FORWARDS)
539 		memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
540 	else
541 		for (i = 0; i < amap->am_nused; i++)
542 			newsl[i] = oldsl[i] + slotspace - slotoff;
543 	amap->am_slots = newsl;
544 
545 	/* do am_anon */
546 	oldover = amap->am_anon;
547 	if (flags & AMAP_EXTEND_FORWARDS) {
548 		memcpy(newover, oldover,
549 		    sizeof(struct vm_anon *) * amap->am_nslot);
550 		memset(newover + amap->am_nslot, 0,
551 		    sizeof(struct vm_anon *) * slotadded);
552 	} else {
553 		memcpy(newover + slotspace, oldover + slotoff,
554 		    sizeof(struct vm_anon *) * slotmapped);
555 		memset(newover, 0,
556 		    sizeof(struct vm_anon *) * slotspace);
557 	}
558 	amap->am_anon = newover;
559 
560 	/* do am_bckptr */
561 	oldbck = amap->am_bckptr;
562 	if (flags & AMAP_EXTEND_FORWARDS)
563 		memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
564 	else
565 		memcpy(newbck + slotspace, oldbck + slotoff,
566 		    sizeof(int) * slotmapped);
567 	amap->am_bckptr = newbck;
568 
569 #ifdef UVM_AMAP_PPREF
570 	/* do ppref */
571 	oldppref = amap->am_ppref;
572 	if (newppref) {
573 		if (flags & AMAP_EXTEND_FORWARDS) {
574 			memcpy(newppref, oldppref,
575 			    sizeof(int) * amap->am_nslot);
576 			memset(newppref + amap->am_nslot, 0,
577 			    sizeof(int) * slotadded);
578 		} else {
579 			memcpy(newppref + slotspace, oldppref + slotoff,
580 			    sizeof(int) * slotmapped);
581 		}
582 		amap->am_ppref = newppref;
583 		if ((flags & AMAP_EXTEND_FORWARDS) &&
584 		    (slotoff + slotmapped) < amap->am_nslot)
585 			amap_pp_adjref(amap, slotoff + slotmapped,
586 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
587 		if (flags & AMAP_EXTEND_FORWARDS)
588 			pp_setreflen(newppref, amap->am_nslot, 1,
589 			    slotneed - amap->am_nslot);
590 		else {
591 			pp_setreflen(newppref, 0, 0,
592 			    slotalloc - slotneed);
593 			pp_setreflen(newppref, slotalloc - slotneed, 1,
594 			    slotneed - slotmapped);
595 		}
596 	} else {
597 		if (amap->am_ppref)
598 			amap->am_ppref = PPREF_NONE;
599 	}
600 #endif
601 
602 	/* update master values */
603 	if (flags & AMAP_EXTEND_FORWARDS)
604 		amap->am_nslot = slotneed;
605 	else {
606 		entry->aref.ar_pageoff = slotspace - slotadd;
607 		amap->am_nslot = slotalloc;
608 	}
609 	oldnslots = amap->am_maxslot;
610 	amap->am_maxslot = slotalloc;
611 
612 	amap_unlock(amap);
613 	kmem_free(oldsl, oldnslots * sizeof(*oldsl));
614 	kmem_free(oldbck, oldnslots * sizeof(*oldbck));
615 	kmem_free(oldover, oldnslots * sizeof(*oldover));
616 #ifdef UVM_AMAP_PPREF
617 	if (oldppref && oldppref != PPREF_NONE)
618 		kmem_free(oldppref, oldnslots * sizeof(*oldppref));
619 #endif
620 	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
621 	    amap, slotneed, 0, 0);
622 	return 0;
623 }
624 
625 /*
626  * amap_share_protect: change protection of anons in a shared amap
627  *
628  * for shared amaps, given the current data structure layout, it is
629  * not possible for us to directly locate all maps referencing the
630  * shared anon (to change the protection).  in order to protect data
631  * in shared maps we use pmap_page_protect().  [this is useful for IPC
632  * mechanisms like map entry passing that may want to write-protect
633  * all mappings of a shared amap.]  we traverse am_anon or am_slots
634  * depending on the current state of the amap.
635  *
636  * => entry's map and amap must be locked by the caller
637  */
638 void
639 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
640 {
641 	struct vm_amap *amap = entry->aref.ar_amap;
642 	int slots, lcv, slot, stop;
643 
644 	KASSERT(mutex_owned(&amap->am_l));
645 
646 	AMAP_B2SLOT(slots, (entry->end - entry->start));
647 	stop = entry->aref.ar_pageoff + slots;
648 
649 	if (slots < amap->am_nused) {
650 		/* cheaper to traverse am_anon */
651 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
652 			if (amap->am_anon[lcv] == NULL)
653 				continue;
654 			if (amap->am_anon[lcv]->an_page != NULL)
655 				pmap_page_protect(amap->am_anon[lcv]->an_page,
656 						  prot);
657 		}
658 		return;
659 	}
660 
661 	/* cheaper to traverse am_slots */
662 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
663 		slot = amap->am_slots[lcv];
664 		if (slot < entry->aref.ar_pageoff || slot >= stop)
665 			continue;
666 		if (amap->am_anon[slot]->an_page != NULL)
667 			pmap_page_protect(amap->am_anon[slot]->an_page, prot);
668 	}
669 }
670 
671 /*
672  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
673  *
674  * => called from amap_unref when the final reference to an amap is
675  *	discarded (i.e. when reference count drops to 0)
676  * => the amap should be locked (by the caller)
677  */
678 
679 void
680 amap_wipeout(struct vm_amap *amap)
681 {
682 	int lcv, slot;
683 	struct vm_anon *anon;
684 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
685 	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
686 
687 	KASSERT(amap->am_ref == 0);
688 
689 	if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
690 		/*
691 		 * amap_swap_off will call us again.
692 		 */
693 		amap_unlock(amap);
694 		return;
695 	}
696 	amap_list_remove(amap);
697 	amap_unlock(amap);
698 
699 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
700 		int refs;
701 
702 		slot = amap->am_slots[lcv];
703 		anon = amap->am_anon[slot];
704 		KASSERT(anon != NULL && anon->an_ref != 0);
705 
706 		mutex_enter(&anon->an_lock);
707 		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
708 		    anon->an_ref, 0, 0);
709 		refs = --anon->an_ref;
710 		mutex_exit(&anon->an_lock);
711 		if (refs == 0) {
712 
713 			/*
714 			 * we had the last reference to a vm_anon. free it.
715 			 */
716 
717 			uvm_anfree(anon);
718 		}
719 
720 		if (curlwp->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
721 			preempt();
722 	}
723 
724 	/*
725 	 * now we free the map
726 	 */
727 
728 	amap->am_nused = 0;
729 	amap_free(amap);	/* will unlock and free amap */
730 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
731 }
732 
733 /*
734  * amap_copy: ensure that a map entry's "needs_copy" flag is false
735  *	by copying the amap if necessary.
736  *
737  * => an entry with a null amap pointer will get a new (blank) one.
738  * => the map that the map entry belongs to must be locked by caller.
739  * => the amap currently attached to "entry" (if any) must be unlocked.
740  * => if canchunk is true, then we may clip the entry into a chunk
741  * => "startva" and "endva" are used only if canchunk is true.  they are
742  *     used to limit chunking (e.g. if you have a large space that you
743  *     know you are going to need to allocate amaps for, there is no point
744  *     in allowing that to be chunked)
745  */
746 
747 void
748 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags,
749     vaddr_t startva, vaddr_t endva)
750 {
751 	struct vm_amap *amap, *srcamap;
752 	int slots, lcv;
753 	vaddr_t chunksize;
754 	const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0;
755 	const bool canchunk = (flags & AMAP_COPY_NOCHUNK) == 0;
756 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
757 	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, flags=%d)",
758 		    map, entry, flags, 0);
759 
760 	KASSERT(map != kernel_map);	/* we use nointr pool */
761 
762 	/*
763 	 * is there a map to copy?   if not, create one from scratch.
764 	 */
765 
766 	if (entry->aref.ar_amap == NULL) {
767 
768 		/*
769 		 * check to see if we have a large amap that we can
770 		 * chunk.  we align startva/endva to chunk-sized
771 		 * boundaries and then clip to them.
772 		 */
773 
774 		if (canchunk && atop(entry->end - entry->start) >=
775 		    UVM_AMAP_LARGE) {
776 			/* convert slots to bytes */
777 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
778 			startva = (startva / chunksize) * chunksize;
779 			endva = roundup(endva, chunksize);
780 			UVMHIST_LOG(maphist, "  chunk amap ==> clip 0x%x->0x%x"
781 			    "to 0x%x->0x%x", entry->start, entry->end, startva,
782 			    endva);
783 			UVM_MAP_CLIP_START(map, entry, startva, NULL);
784 			/* watch out for endva wrap-around! */
785 			if (endva >= startva)
786 				UVM_MAP_CLIP_END(map, entry, endva, NULL);
787 		}
788 
789 		if ((flags & AMAP_COPY_NOMERGE) == 0 &&
790 		    uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) {
791 			return;
792 		}
793 
794 		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
795 		entry->start, entry->end, 0, 0);
796 		entry->aref.ar_pageoff = 0;
797 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
798 		    waitf);
799 		if (entry->aref.ar_amap != NULL)
800 			entry->etype &= ~UVM_ET_NEEDSCOPY;
801 		return;
802 	}
803 
804 	/*
805 	 * first check and see if we are the only map entry
806 	 * referencing the amap we currently have.  if so, then we can
807 	 * just take it over rather than copying it.  note that we are
808 	 * reading am_ref with the amap unlocked... the value can only
809 	 * be one if we have the only reference to the amap (via our
810 	 * locked map).  if we are greater than one we fall through to
811 	 * the next case (where we double check the value).
812 	 */
813 
814 	if (entry->aref.ar_amap->am_ref == 1) {
815 		entry->etype &= ~UVM_ET_NEEDSCOPY;
816 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
817 		    0, 0, 0, 0);
818 		return;
819 	}
820 
821 	/*
822 	 * looks like we need to copy the map.
823 	 */
824 
825 	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it",
826 	    entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
827 	AMAP_B2SLOT(slots, entry->end - entry->start);
828 	amap = amap_alloc1(slots, 0, waitf);
829 	if (amap == NULL) {
830 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
831 		return;
832 	}
833 	srcamap = entry->aref.ar_amap;
834 	amap_lock(srcamap);
835 
836 	/*
837 	 * need to double check reference count now that we've got the
838 	 * src amap locked down.  the reference count could have
839 	 * changed while we were in malloc.  if the reference count
840 	 * dropped down to one we take over the old map rather than
841 	 * copying the amap.
842 	 */
843 
844 	if (srcamap->am_ref == 1) {		/* take it over? */
845 		entry->etype &= ~UVM_ET_NEEDSCOPY;
846 		amap->am_ref--;		/* drop final reference to map */
847 		amap_free(amap);	/* dispose of new (unused) amap */
848 		amap_unlock(srcamap);
849 		return;
850 	}
851 
852 	/*
853 	 * we must copy it now.
854 	 */
855 
856 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
857 	for (lcv = 0 ; lcv < slots; lcv++) {
858 		amap->am_anon[lcv] =
859 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
860 		if (amap->am_anon[lcv] == NULL)
861 			continue;
862 		mutex_enter(&amap->am_anon[lcv]->an_lock);
863 		amap->am_anon[lcv]->an_ref++;
864 		mutex_exit(&amap->am_anon[lcv]->an_lock);
865 		amap->am_bckptr[lcv] = amap->am_nused;
866 		amap->am_slots[amap->am_nused] = lcv;
867 		amap->am_nused++;
868 	}
869 	memset(&amap->am_anon[lcv], 0,
870 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
871 
872 	/*
873 	 * drop our reference to the old amap (srcamap) and unlock.
874 	 * we know that the reference count on srcamap is greater than
875 	 * one (we checked above), so there is no way we could drop
876 	 * the count to zero.  [and no need to worry about freeing it]
877 	 */
878 
879 	srcamap->am_ref--;
880 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
881 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
882 #ifdef UVM_AMAP_PPREF
883 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
884 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
885 		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
886 	}
887 #endif
888 
889 	amap_unlock(srcamap);
890 
891 	amap_list_insert(amap);
892 
893 	/*
894 	 * install new amap.
895 	 */
896 
897 	entry->aref.ar_pageoff = 0;
898 	entry->aref.ar_amap = amap;
899 	entry->etype &= ~UVM_ET_NEEDSCOPY;
900 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
901 }
902 
903 /*
904  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
905  *
906  *	called during fork(2) when the parent process has a wired map
907  *	entry.   in that case we want to avoid write-protecting pages
908  *	in the parent's map (e.g. like what you'd do for a COW page)
909  *	so we resolve the COW here.
910  *
911  * => assume parent's entry was wired, thus all pages are resident.
912  * => assume pages that are loaned out (loan_count) are already mapped
913  *	read-only in all maps, and thus no need for us to worry about them
914  * => assume both parent and child vm_map's are locked
915  * => caller passes child's map/entry in to us
916  * => if we run out of memory we will unlock the amap and sleep _with_ the
917  *	parent and child vm_map's locked(!).    we have to do this since
918  *	we are in the middle of a fork(2) and we can't let the parent
919  *	map change until we are done copying all the map entrys.
920  * => XXXCDC: out of memory should cause fork to fail, but there is
921  *	currently no easy way to do this (needs fix)
922  * => page queues must be unlocked (we may lock them)
923  */
924 
925 void
926 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
927 {
928 	struct vm_amap *amap = entry->aref.ar_amap;
929 	int lcv, slot;
930 	struct vm_anon *anon, *nanon;
931 	struct vm_page *pg, *npg;
932 
933 	/*
934 	 * note that if we unlock the amap then we must ReStart the "lcv" for
935 	 * loop because some other process could reorder the anon's in the
936 	 * am_anon[] array on us while the lock is dropped.
937 	 */
938 
939 ReStart:
940 	amap_lock(amap);
941 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
942 
943 		/*
944 		 * get the page
945 		 */
946 
947 		slot = amap->am_slots[lcv];
948 		anon = amap->am_anon[slot];
949 		mutex_enter(&anon->an_lock);
950 
951 		/*
952 		 * If the anon has only one ref, we must have already copied it.
953 		 * This can happen if we needed to sleep waiting for memory
954 		 * in a previous run through this loop.  The new page might
955 		 * even have been paged out, since the new page is not wired.
956 		 */
957 
958 		if (anon->an_ref == 1) {
959 			KASSERT(anon->an_page != NULL || anon->an_swslot != 0);
960 			mutex_exit(&anon->an_lock);
961 			continue;
962 		}
963 
964 		/*
965 		 * The old page must be resident since the parent is wired.
966 		 */
967 
968 		pg = anon->an_page;
969 		KASSERT(pg != NULL);
970 		KASSERT(pg->wire_count > 0);
971 
972 		/*
973 		 * If the page is loaned then it must already be mapped
974 		 * read-only and we don't need to copy it.
975 		 */
976 
977 		if (pg->loan_count != 0) {
978 			mutex_exit(&anon->an_lock);
979 			continue;
980 		}
981 		KASSERT(pg->uanon == anon && pg->uobject == NULL);
982 
983 		/*
984 		 * if the page is busy then we have to unlock, wait for
985 		 * it and then restart.
986 		 */
987 
988 		if (pg->flags & PG_BUSY) {
989 			pg->flags |= PG_WANTED;
990 			amap_unlock(amap);
991 			UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, false,
992 			    "cownow", 0);
993 			goto ReStart;
994 		}
995 
996 		/*
997 		 * ok, time to do a copy-on-write to a new anon
998 		 */
999 
1000 		nanon = uvm_analloc();
1001 		if (nanon) {
1002 			npg = uvm_pagealloc(NULL, 0, nanon, 0);
1003 		} else
1004 			npg = NULL;	/* XXX: quiet gcc warning */
1005 		if (nanon == NULL || npg == NULL) {
1006 
1007 			/*
1008 			 * XXXCDC: we should cause fork to fail, but we can't.
1009 			 */
1010 
1011 			if (nanon) {
1012 				nanon->an_ref--;
1013 				mutex_exit(&nanon->an_lock);
1014 				uvm_anfree(nanon);
1015 			}
1016 			mutex_exit(&anon->an_lock);
1017 			amap_unlock(amap);
1018 			uvm_wait("cownowpage");
1019 			goto ReStart;
1020 		}
1021 
1022 		/*
1023 		 * got it... now we can copy the data and replace anon
1024 		 * with our new one...
1025 		 */
1026 
1027 		uvm_pagecopy(pg, npg);		/* old -> new */
1028 		anon->an_ref--;			/* can't drop to zero */
1029 		amap->am_anon[slot] = nanon;	/* replace */
1030 
1031 		/*
1032 		 * drop PG_BUSY on new page ... since we have had its owner
1033 		 * locked the whole time it can't be PG_RELEASED or PG_WANTED.
1034 		 */
1035 
1036 		mutex_enter(&uvm_pageqlock);
1037 		uvm_pageactivate(npg);
1038 		mutex_exit(&uvm_pageqlock);
1039 		npg->flags &= ~(PG_BUSY|PG_FAKE);
1040 		UVM_PAGE_OWN(npg, NULL);
1041 		mutex_exit(&nanon->an_lock);
1042 		mutex_exit(&anon->an_lock);
1043 	}
1044 	amap_unlock(amap);
1045 }
1046 
1047 /*
1048  * amap_splitref: split a single reference into two separate references
1049  *
1050  * => called from uvm_map's clip routines
1051  * => origref's map should be locked
1052  * => origref->ar_amap should be unlocked (we will lock)
1053  */
1054 void
1055 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
1056 {
1057 	int leftslots;
1058 	struct vm_amap *amap;
1059 
1060 	KASSERT(splitref->ar_amap == origref->ar_amap);
1061 	AMAP_B2SLOT(leftslots, offset);
1062 	KASSERT(leftslots != 0);
1063 
1064 	amap = origref->ar_amap;
1065 	amap_lock(amap);
1066 
1067 	/*
1068 	 * now: amap is locked and we have a valid am_mapped array.
1069 	 */
1070 	KASSERT(amap->am_nslot - origref->ar_pageoff - leftslots > 0);
1071 
1072 #ifdef UVM_AMAP_PPREF
1073         /*
1074 	 * establish ppref before we add a duplicate reference to the amap
1075 	 */
1076 	if (amap->am_ppref == NULL)
1077 		amap_pp_establish(amap, origref->ar_pageoff);
1078 #endif
1079 
1080 	amap->am_ref++;		/* not a share reference */
1081 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
1082 
1083 	amap_unlock(amap);
1084 }
1085 
1086 #ifdef UVM_AMAP_PPREF
1087 
1088 /*
1089  * amap_pp_establish: add a ppref array to an amap, if possible
1090  *
1091  * => amap locked by caller
1092  */
1093 void
1094 amap_pp_establish(struct vm_amap *amap, vaddr_t offset)
1095 {
1096 
1097 	amap->am_ppref = kmem_alloc(amap->am_maxslot * sizeof(*amap->am_ppref),
1098 	    KM_NOSLEEP);
1099 
1100 	/*
1101 	 * if we fail then we just won't use ppref for this amap
1102 	 */
1103 
1104 	if (amap->am_ppref == NULL) {
1105 		amap->am_ppref = PPREF_NONE;	/* not using it */
1106 		return;
1107 	}
1108 	memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot);
1109 	pp_setreflen(amap->am_ppref, 0, 0, offset);
1110 	pp_setreflen(amap->am_ppref, offset, amap->am_ref,
1111 	    amap->am_nslot - offset);
1112 	return;
1113 }
1114 
1115 /*
1116  * amap_pp_adjref: adjust reference count to a part of an amap using the
1117  * per-page reference count array.
1118  *
1119  * => map and amap locked by caller
1120  * => caller must check that ppref != PPREF_NONE before calling
1121  */
1122 void
1123 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
1124 {
1125 	int stopslot, *ppref, lcv, prevlcv;
1126 	int ref, len, prevref, prevlen;
1127 
1128 	stopslot = curslot + slotlen;
1129 	ppref = amap->am_ppref;
1130 	prevlcv = 0;
1131 
1132 	/*
1133 	 * first advance to the correct place in the ppref array,
1134 	 * fragment if needed.
1135 	 */
1136 
1137 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
1138 		pp_getreflen(ppref, lcv, &ref, &len);
1139 		if (lcv + len > curslot) {     /* goes past start? */
1140 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
1141 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
1142 			len = curslot - lcv;   /* new length of entry @ lcv */
1143 		}
1144 		prevlcv = lcv;
1145 	}
1146 	if (lcv != 0)
1147 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
1148 	else {
1149 		/* Ensure that the "prevref == ref" test below always
1150 		 * fails, since we're starting from the beginning of
1151 		 * the ppref array; that is, there is no previous
1152 		 * chunk.
1153 		 */
1154 		prevref = -1;
1155 		prevlen = 0;
1156 	}
1157 
1158 	/*
1159 	 * now adjust reference counts in range.  merge the first
1160 	 * changed entry with the last unchanged entry if possible.
1161 	 */
1162 	KASSERT(lcv == curslot);
1163 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
1164 		pp_getreflen(ppref, lcv, &ref, &len);
1165 		if (lcv + len > stopslot) {     /* goes past end? */
1166 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
1167 			pp_setreflen(ppref, stopslot, ref,
1168 			    len - (stopslot - lcv));
1169 			len = stopslot - lcv;
1170 		}
1171 		ref += adjval;
1172 		KASSERT(ref >= 0);
1173 		if (lcv == prevlcv + prevlen && ref == prevref) {
1174 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
1175 		} else {
1176 			pp_setreflen(ppref, lcv, ref, len);
1177 		}
1178 		if (ref == 0)
1179 			amap_wiperange(amap, lcv, len);
1180 	}
1181 
1182 }
1183 
1184 /*
1185  * amap_wiperange: wipe out a range of an amap
1186  * [different from amap_wipeout because the amap is kept intact]
1187  *
1188  * => both map and amap must be locked by caller.
1189  */
1190 void
1191 amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
1192 {
1193 	int byanon, lcv, stop, curslot, ptr, slotend;
1194 	struct vm_anon *anon;
1195 
1196 	/*
1197 	 * we can either traverse the amap by am_anon or by am_slots depending
1198 	 * on which is cheaper.    decide now.
1199 	 */
1200 
1201 	if (slots < amap->am_nused) {
1202 		byanon = true;
1203 		lcv = slotoff;
1204 		stop = slotoff + slots;
1205 		slotend = 0;
1206 	} else {
1207 		byanon = false;
1208 		lcv = 0;
1209 		stop = amap->am_nused;
1210 		slotend = slotoff + slots;
1211 	}
1212 
1213 	while (lcv < stop) {
1214 		int refs;
1215 
1216 		if (byanon) {
1217 			curslot = lcv++;	/* lcv advances here */
1218 			if (amap->am_anon[curslot] == NULL)
1219 				continue;
1220 		} else {
1221 			curslot = amap->am_slots[lcv];
1222 			if (curslot < slotoff || curslot >= slotend) {
1223 				lcv++;		/* lcv advances here */
1224 				continue;
1225 			}
1226 			stop--;	/* drop stop, since anon will be removed */
1227 		}
1228 		anon = amap->am_anon[curslot];
1229 
1230 		/*
1231 		 * remove it from the amap
1232 		 */
1233 
1234 		amap->am_anon[curslot] = NULL;
1235 		ptr = amap->am_bckptr[curslot];
1236 		if (ptr != (amap->am_nused - 1)) {
1237 			amap->am_slots[ptr] =
1238 			    amap->am_slots[amap->am_nused - 1];
1239 			amap->am_bckptr[amap->am_slots[ptr]] =
1240 			    ptr;    /* back ptr. */
1241 		}
1242 		amap->am_nused--;
1243 
1244 		/*
1245 		 * drop anon reference count
1246 		 */
1247 
1248 		mutex_enter(&anon->an_lock);
1249 		refs = --anon->an_ref;
1250 		mutex_exit(&anon->an_lock);
1251 		if (refs == 0) {
1252 
1253 			/*
1254 			 * we just eliminated the last reference to an anon.
1255 			 * free it.
1256 			 */
1257 
1258 			uvm_anfree(anon);
1259 		}
1260 	}
1261 }
1262 
1263 #endif
1264 
1265 #if defined(VMSWAP)
1266 
1267 /*
1268  * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
1269  *
1270  * => called with swap_syscall_lock held.
1271  * => note that we don't always traverse all anons.
1272  *    eg. amaps being wiped out, released anons.
1273  * => return true if failed.
1274  */
1275 
1276 bool
1277 amap_swap_off(int startslot, int endslot)
1278 {
1279 	struct vm_amap *am;
1280 	struct vm_amap *am_next;
1281 	struct vm_amap marker_prev;
1282 	struct vm_amap marker_next;
1283 	struct lwp *l = curlwp;
1284 	bool rv = false;
1285 
1286 #if defined(DIAGNOSTIC)
1287 	memset(&marker_prev, 0, sizeof(marker_prev));
1288 	memset(&marker_next, 0, sizeof(marker_next));
1289 #endif /* defined(DIAGNOSTIC) */
1290 
1291 	uvm_lwp_hold(l);
1292 	mutex_enter(&amap_list_lock);
1293 	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
1294 		int i;
1295 
1296 		LIST_INSERT_BEFORE(am, &marker_prev, am_list);
1297 		LIST_INSERT_AFTER(am, &marker_next, am_list);
1298 
1299 		if (!amap_lock_try(am)) {
1300 			mutex_exit(&amap_list_lock);
1301 			preempt();
1302 			mutex_enter(&amap_list_lock);
1303 			am_next = LIST_NEXT(&marker_prev, am_list);
1304 			if (am_next == &marker_next) {
1305 				am_next = LIST_NEXT(am_next, am_list);
1306 			} else {
1307 				KASSERT(LIST_NEXT(am_next, am_list) ==
1308 				    &marker_next);
1309 			}
1310 			LIST_REMOVE(&marker_prev, am_list);
1311 			LIST_REMOVE(&marker_next, am_list);
1312 			continue;
1313 		}
1314 
1315 		mutex_exit(&amap_list_lock);
1316 
1317 		if (am->am_nused <= 0) {
1318 			amap_unlock(am);
1319 			goto next;
1320 		}
1321 
1322 		for (i = 0; i < am->am_nused; i++) {
1323 			int slot;
1324 			int swslot;
1325 			struct vm_anon *anon;
1326 
1327 			slot = am->am_slots[i];
1328 			anon = am->am_anon[slot];
1329 			mutex_enter(&anon->an_lock);
1330 
1331 			swslot = anon->an_swslot;
1332 			if (swslot < startslot || endslot <= swslot) {
1333 				mutex_exit(&anon->an_lock);
1334 				continue;
1335 			}
1336 
1337 			am->am_flags |= AMAP_SWAPOFF;
1338 			amap_unlock(am);
1339 
1340 			rv = uvm_anon_pagein(anon);
1341 
1342 			amap_lock(am);
1343 			am->am_flags &= ~AMAP_SWAPOFF;
1344 			if (amap_refs(am) == 0) {
1345 				amap_wipeout(am);
1346 				am = NULL;
1347 				break;
1348 			}
1349 			if (rv) {
1350 				break;
1351 			}
1352 			i = 0;
1353 		}
1354 
1355 		if (am) {
1356 			amap_unlock(am);
1357 		}
1358 
1359 next:
1360 		mutex_enter(&amap_list_lock);
1361 		KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
1362 		    LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
1363 		    &marker_next);
1364 		am_next = LIST_NEXT(&marker_next, am_list);
1365 		LIST_REMOVE(&marker_prev, am_list);
1366 		LIST_REMOVE(&marker_next, am_list);
1367 	}
1368 	mutex_exit(&amap_list_lock);
1369 	uvm_lwp_rele(l);
1370 
1371 	return rv;
1372 }
1373 
1374 #endif /* defined(VMSWAP) */
1375 
1376 /*
1377  * amap_lookup: look up a page in an amap
1378  *
1379  * => amap should be locked by caller.
1380  */
1381 struct vm_anon *
1382 amap_lookup(struct vm_aref *aref, vaddr_t offset)
1383 {
1384 	int slot;
1385 	struct vm_amap *amap = aref->ar_amap;
1386 	UVMHIST_FUNC("amap_lookup"); UVMHIST_CALLED(maphist);
1387 	KASSERT(mutex_owned(&amap->am_l));
1388 
1389 	AMAP_B2SLOT(slot, offset);
1390 	slot += aref->ar_pageoff;
1391 	KASSERT(slot < amap->am_nslot);
1392 
1393 	UVMHIST_LOG(maphist, "<- done (amap=0x%x, offset=0x%x, result=0x%x)",
1394 	    amap, offset, amap->am_anon[slot], 0);
1395 	return(amap->am_anon[slot]);
1396 }
1397 
1398 /*
1399  * amap_lookups: look up a range of pages in an amap
1400  *
1401  * => amap should be locked by caller.
1402  * => XXXCDC: this interface is biased toward array-based amaps.  fix.
1403  */
1404 void
1405 amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons,
1406     int npages)
1407 {
1408 	int slot;
1409 	struct vm_amap *amap = aref->ar_amap;
1410 	UVMHIST_FUNC("amap_lookups"); UVMHIST_CALLED(maphist);
1411 	KASSERT(mutex_owned(&amap->am_l));
1412 
1413 	AMAP_B2SLOT(slot, offset);
1414 	slot += aref->ar_pageoff;
1415 
1416 	UVMHIST_LOG(maphist, "  slot=%d, npages=%d, nslot=%d", slot, npages,
1417 		amap->am_nslot, 0);
1418 
1419 	KASSERT((slot + (npages - 1)) < amap->am_nslot);
1420 	memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
1421 
1422 	UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
1423 	return;
1424 }
1425 
1426 /*
1427  * amap_add: add (or replace) a page to an amap
1428  *
1429  * => caller must lock amap.
1430  * => if (replace) caller must lock anon because we might have to call
1431  *	pmap_page_protect on the anon's page.
1432  */
1433 void
1434 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
1435     bool replace)
1436 {
1437 	int slot;
1438 	struct vm_amap *amap = aref->ar_amap;
1439 	UVMHIST_FUNC("amap_add"); UVMHIST_CALLED(maphist);
1440 	KASSERT(mutex_owned(&amap->am_l));
1441 
1442 	AMAP_B2SLOT(slot, offset);
1443 	slot += aref->ar_pageoff;
1444 	KASSERT(slot < amap->am_nslot);
1445 
1446 	if (replace) {
1447 		KASSERT(amap->am_anon[slot] != NULL);
1448 		if (amap->am_anon[slot]->an_page != NULL &&
1449 		    (amap->am_flags & AMAP_SHARED) != 0) {
1450 			pmap_page_protect(amap->am_anon[slot]->an_page,
1451 			    VM_PROT_NONE);
1452 			/*
1453 			 * XXX: suppose page is supposed to be wired somewhere?
1454 			 */
1455 		}
1456 	} else {   /* !replace */
1457 		KASSERT(amap->am_anon[slot] == NULL);
1458 		amap->am_bckptr[slot] = amap->am_nused;
1459 		amap->am_slots[amap->am_nused] = slot;
1460 		amap->am_nused++;
1461 	}
1462 	amap->am_anon[slot] = anon;
1463 	UVMHIST_LOG(maphist,
1464 	    "<- done (amap=0x%x, offset=0x%x, anon=0x%x, rep=%d)",
1465 	    amap, offset, anon, replace);
1466 }
1467 
1468 /*
1469  * amap_unadd: remove a page from an amap
1470  *
1471  * => caller must lock amap
1472  */
1473 void
1474 amap_unadd(struct vm_aref *aref, vaddr_t offset)
1475 {
1476 	int ptr, slot;
1477 	struct vm_amap *amap = aref->ar_amap;
1478 	UVMHIST_FUNC("amap_unadd"); UVMHIST_CALLED(maphist);
1479 	KASSERT(mutex_owned(&amap->am_l));
1480 
1481 	AMAP_B2SLOT(slot, offset);
1482 	slot += aref->ar_pageoff;
1483 	KASSERT(slot < amap->am_nslot);
1484 	KASSERT(amap->am_anon[slot] != NULL);
1485 
1486 	amap->am_anon[slot] = NULL;
1487 	ptr = amap->am_bckptr[slot];
1488 
1489 	if (ptr != (amap->am_nused - 1)) {	/* swap to keep slots contig? */
1490 		amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1];
1491 		amap->am_bckptr[amap->am_slots[ptr]] = ptr;	/* back link */
1492 	}
1493 	amap->am_nused--;
1494 	UVMHIST_LOG(maphist, "<- done (amap=0x%x, slot=0x%x)", amap, slot,0, 0);
1495 }
1496 
1497 /*
1498  * amap_ref: gain a reference to an amap
1499  *
1500  * => amap must not be locked (we will lock)
1501  * => "offset" and "len" are in units of pages
1502  * => called at fork time to gain the child's reference
1503  */
1504 void
1505 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
1506 {
1507 	UVMHIST_FUNC("amap_ref"); UVMHIST_CALLED(maphist);
1508 
1509 	amap_lock(amap);
1510 	if (flags & AMAP_SHARED)
1511 		amap->am_flags |= AMAP_SHARED;
1512 #ifdef UVM_AMAP_PPREF
1513 	if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 &&
1514 	    len != amap->am_nslot)
1515 		amap_pp_establish(amap, offset);
1516 #endif
1517 	amap->am_ref++;
1518 #ifdef UVM_AMAP_PPREF
1519 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1520 		if (flags & AMAP_REFALL)
1521 			amap_pp_adjref(amap, 0, amap->am_nslot, 1);
1522 		else
1523 			amap_pp_adjref(amap, offset, len, 1);
1524 	}
1525 #endif
1526 	amap_unlock(amap);
1527 	UVMHIST_LOG(maphist,"<- done!  amap=0x%x", amap, 0, 0, 0);
1528 }
1529 
1530 /*
1531  * amap_unref: remove a reference to an amap
1532  *
1533  * => caller must remove all pmap-level references to this amap before
1534  *	dropping the reference
1535  * => called from uvm_unmap_detach [only]  ... note that entry is no
1536  *	longer part of a map and thus has no need for locking
1537  * => amap must be unlocked (we will lock it).
1538  */
1539 void
1540 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, bool all)
1541 {
1542 	UVMHIST_FUNC("amap_unref"); UVMHIST_CALLED(maphist);
1543 
1544 	/*
1545 	 * lock it
1546 	 */
1547 	amap_lock(amap);
1548 	UVMHIST_LOG(maphist,"  amap=0x%x  refs=%d, nused=%d",
1549 	    amap, amap->am_ref, amap->am_nused, 0);
1550 
1551 	KASSERT(amap_refs(amap) > 0);
1552 
1553 	/*
1554 	 * if we are the last reference, free the amap and return.
1555 	 */
1556 
1557 	amap->am_ref--;
1558 
1559 	if (amap_refs(amap) == 0) {
1560 		amap_wipeout(amap);	/* drops final ref and frees */
1561 		UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
1562 		return;			/* no need to unlock */
1563 	}
1564 
1565 	/*
1566 	 * otherwise just drop the reference count(s)
1567 	 */
1568 
1569 	if (amap_refs(amap) == 1 && (amap->am_flags & AMAP_SHARED) != 0)
1570 		amap->am_flags &= ~AMAP_SHARED;	/* clear shared flag */
1571 #ifdef UVM_AMAP_PPREF
1572 	if (amap->am_ppref == NULL && all == 0 && len != amap->am_nslot)
1573 		amap_pp_establish(amap, offset);
1574 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1575 		if (all)
1576 			amap_pp_adjref(amap, 0, amap->am_nslot, -1);
1577 		else
1578 			amap_pp_adjref(amap, offset, len, -1);
1579 	}
1580 #endif
1581 	amap_unlock(amap);
1582 
1583 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1584 }
1585 
1586