xref: /netbsd-src/sys/uvm/uvm_amap.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*	$NetBSD: uvm_amap.c,v 1.108 2017/10/28 00:37:13 pgoyette Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * uvm_amap.c: amap operations
30  */
31 
32 /*
33  * this file contains functions that perform operations on amaps.  see
34  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.108 2017/10/28 00:37:13 pgoyette Exp $");
39 
40 #include "opt_uvmhist.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/kmem.h>
46 #include <sys/pool.h>
47 #include <sys/atomic.h>
48 
49 #include <uvm/uvm.h>
50 #include <uvm/uvm_swap.h>
51 
52 /*
53  * cache for allocation of vm_map structures.  note that in order to
54  * avoid an endless loop, the amap cache's allocator cannot allocate
55  * memory from an amap (it currently goes through the kernel uobj, so
56  * we are ok).
57  */
58 static struct pool_cache uvm_amap_cache;
59 static kmutex_t amap_list_lock;
60 static LIST_HEAD(, vm_amap) amap_list;
61 
62 /*
63  * local functions
64  */
65 
66 static inline void
67 amap_list_insert(struct vm_amap *amap)
68 {
69 
70 	mutex_enter(&amap_list_lock);
71 	LIST_INSERT_HEAD(&amap_list, amap, am_list);
72 	mutex_exit(&amap_list_lock);
73 }
74 
75 static inline void
76 amap_list_remove(struct vm_amap *amap)
77 {
78 
79 	mutex_enter(&amap_list_lock);
80 	LIST_REMOVE(amap, am_list);
81 	mutex_exit(&amap_list_lock);
82 }
83 
84 static int
85 amap_roundup_slots(int slots)
86 {
87 
88 	return kmem_roundup_size(slots * sizeof(int)) / sizeof(int);
89 }
90 
91 #ifdef UVM_AMAP_PPREF
92 /*
93  * what is ppref?   ppref is an _optional_ amap feature which is used
94  * to keep track of reference counts on a per-page basis.  it is enabled
95  * when UVM_AMAP_PPREF is defined.
96  *
97  * when enabled, an array of ints is allocated for the pprefs.  this
98  * array is allocated only when a partial reference is added to the
99  * map (either by unmapping part of the amap, or gaining a reference
100  * to only a part of an amap).  if the allocation of the array fails
101  * (KM_NOSLEEP), then we set the array pointer to PPREF_NONE to indicate
102  * that we tried to do ppref's but couldn't alloc the array so just
103  * give up (after all, this is an optional feature!).
104  *
105  * the array is divided into page sized "chunks."   for chunks of length 1,
106  * the chunk reference count plus one is stored in that chunk's slot.
107  * for chunks of length > 1 the first slot contains (the reference count
108  * plus one) * -1.    [the negative value indicates that the length is
109  * greater than one.]   the second slot of the chunk contains the length
110  * of the chunk.   here is an example:
111  *
112  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
113  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
114  *              <----------><-><----><-------><----><-><------->
115  * (x = don't care)
116  *
117  * this allows us to allow one int to contain the ref count for the whole
118  * chunk.    note that the "plus one" part is needed because a reference
119  * count of zero is neither positive or negative (need a way to tell
120  * if we've got one zero or a bunch of them).
121  *
122  * here are some in-line functions to help us.
123  */
124 
125 /*
126  * pp_getreflen: get the reference and length for a specific offset
127  *
128  * => ppref's amap must be locked
129  */
130 static inline void
131 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
132 {
133 
134 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
135 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
136 		*lenp = 1;
137 	} else {
138 		*refp = (ppref[offset] * -1) - 1;
139 		*lenp = ppref[offset+1];
140 	}
141 }
142 
143 /*
144  * pp_setreflen: set the reference and length for a specific offset
145  *
146  * => ppref's amap must be locked
147  */
148 static inline void
149 pp_setreflen(int *ppref, int offset, int ref, int len)
150 {
151 	if (len == 0)
152 		return;
153 	if (len == 1) {
154 		ppref[offset] = ref + 1;
155 	} else {
156 		ppref[offset] = (ref + 1) * -1;
157 		ppref[offset+1] = len;
158 	}
159 }
160 #endif /* UVM_AMAP_PPREF */
161 
162 /*
163  * amap_alloc1: allocate an amap, but do not initialise the overlay.
164  *
165  * => Note: lock is not set.
166  */
167 static struct vm_amap *
168 amap_alloc1(int slots, int padslots, int flags)
169 {
170 	const bool nowait = (flags & UVM_FLAG_NOWAIT) != 0;
171 	const km_flag_t kmflags = nowait ? KM_NOSLEEP : KM_SLEEP;
172 	struct vm_amap *amap;
173 	int totalslots;
174 
175 	amap = pool_cache_get(&uvm_amap_cache, nowait ? PR_NOWAIT : PR_WAITOK);
176 	if (amap == NULL) {
177 		return NULL;
178 	}
179 	totalslots = amap_roundup_slots(slots + padslots);
180 	amap->am_lock = NULL;
181 	amap->am_ref = 1;
182 	amap->am_flags = 0;
183 #ifdef UVM_AMAP_PPREF
184 	amap->am_ppref = NULL;
185 #endif
186 	amap->am_maxslot = totalslots;
187 	amap->am_nslot = slots;
188 	amap->am_nused = 0;
189 
190 	/*
191 	 * Note: since allocations are likely big, we expect to reduce the
192 	 * memory fragmentation by allocating them in separate blocks.
193 	 */
194 	amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags);
195 	if (amap->am_slots == NULL)
196 		goto fail1;
197 
198 	amap->am_bckptr = kmem_alloc(totalslots * sizeof(int), kmflags);
199 	if (amap->am_bckptr == NULL)
200 		goto fail2;
201 
202 	amap->am_anon = kmem_alloc(totalslots * sizeof(struct vm_anon *),
203 	    kmflags);
204 	if (amap->am_anon == NULL)
205 		goto fail3;
206 
207 	return amap;
208 
209 fail3:
210 	kmem_free(amap->am_bckptr, totalslots * sizeof(int));
211 fail2:
212 	kmem_free(amap->am_slots, totalslots * sizeof(int));
213 fail1:
214 	pool_cache_put(&uvm_amap_cache, amap);
215 
216 	/*
217 	 * XXX hack to tell the pagedaemon how many pages we need,
218 	 * since we can need more than it would normally free.
219 	 */
220 	if (nowait) {
221 		extern u_int uvm_extrapages;
222 		atomic_add_int(&uvm_extrapages,
223 		    ((sizeof(int) * 2 + sizeof(struct vm_anon *)) *
224 		    totalslots) >> PAGE_SHIFT);
225 	}
226 	return NULL;
227 }
228 
229 /*
230  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
231  *
232  * => caller should ensure sz is a multiple of PAGE_SIZE
233  * => reference count to new amap is set to one
234  * => new amap is returned unlocked
235  */
236 
237 struct vm_amap *
238 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
239 {
240 	struct vm_amap *amap;
241 	int slots, padslots;
242 	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
243 
244 	AMAP_B2SLOT(slots, sz);
245 	AMAP_B2SLOT(padslots, padsz);
246 
247 	amap = amap_alloc1(slots, padslots, waitf);
248 	if (amap) {
249 		memset(amap->am_anon, 0,
250 		    amap->am_maxslot * sizeof(struct vm_anon *));
251 		amap->am_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
252 		amap_list_insert(amap);
253 	}
254 
255 	UVMHIST_LOG(maphist,"<- done, amap = 0x%#jx, sz=%jd", (uintptr_t)amap,
256 	    sz, 0, 0);
257 	return(amap);
258 }
259 
260 /*
261  * uvm_amap_init: initialize the amap system.
262  */
263 void
264 uvm_amap_init(void)
265 {
266 
267 	mutex_init(&amap_list_lock, MUTEX_DEFAULT, IPL_NONE);
268 
269 	pool_cache_bootstrap(&uvm_amap_cache, sizeof(struct vm_amap), 0, 0, 0,
270 	    "amappl", NULL, IPL_NONE, NULL, NULL, NULL);
271 }
272 
273 /*
274  * amap_free: free an amap
275  *
276  * => the amap must be unlocked
277  * => the amap should have a zero reference count and be empty
278  */
279 void
280 amap_free(struct vm_amap *amap)
281 {
282 	int slots;
283 
284 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
285 
286 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
287 	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
288 	if (amap->am_lock != NULL) {
289 		KASSERT(!mutex_owned(amap->am_lock));
290 		mutex_obj_free(amap->am_lock);
291 	}
292 	slots = amap->am_maxslot;
293 	kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots));
294 	kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr));
295 	kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon));
296 #ifdef UVM_AMAP_PPREF
297 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
298 		kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref));
299 #endif
300 	pool_cache_put(&uvm_amap_cache, amap);
301 	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%#jx", (uintptr_t)amap,
302 	    0, 0, 0);
303 }
304 
305 /*
306  * amap_extend: extend the size of an amap (if needed)
307  *
308  * => called from uvm_map when we want to extend an amap to cover
309  *    a new mapping (rather than allocate a new one)
310  * => amap should be unlocked (we will lock it)
311  * => to safely extend an amap it should have a reference count of
312  *    one (thus it can't be shared)
313  */
314 int
315 amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags)
316 {
317 	struct vm_amap *amap = entry->aref.ar_amap;
318 	int slotoff = entry->aref.ar_pageoff;
319 	int slotmapped, slotadd, slotneed, slotadded, slotalloc;
320 	int slotadj, slotspace;
321 	int oldnslots;
322 #ifdef UVM_AMAP_PPREF
323 	int *newppref, *oldppref;
324 #endif
325 	int i, *newsl, *newbck, *oldsl, *oldbck;
326 	struct vm_anon **newover, **oldover, *tofree;
327 	const km_flag_t kmflags =
328 	    (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
329 
330 	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
331 
332 	UVMHIST_LOG(maphist, "  (entry=0x%#jx, addsize=0x%jx, flags=0x%jx)",
333 	    (uintptr_t)entry, addsize, flags, 0);
334 
335 	/*
336 	 * first, determine how many slots we need in the amap.  don't
337 	 * forget that ar_pageoff could be non-zero: this means that
338 	 * there are some unused slots before us in the amap.
339 	 */
340 
341 	amap_lock(amap);
342 	KASSERT(amap_refs(amap) == 1); /* amap can't be shared */
343 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
344 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
345 	if (flags & AMAP_EXTEND_FORWARDS) {
346 		slotneed = slotoff + slotmapped + slotadd;
347 		slotadj = 0;
348 		slotspace = 0;
349 	}
350 	else {
351 		slotneed = slotadd + slotmapped;
352 		slotadj = slotadd - slotoff;
353 		slotspace = amap->am_maxslot - slotmapped;
354 	}
355 	tofree = NULL;
356 
357 	/*
358 	 * case 1: we already have enough slots in the map and thus
359 	 * only need to bump the reference counts on the slots we are
360 	 * adding.
361 	 */
362 
363 	if (flags & AMAP_EXTEND_FORWARDS) {
364 		if (amap->am_nslot >= slotneed) {
365 #ifdef UVM_AMAP_PPREF
366 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
367 				amap_pp_adjref(amap, slotoff + slotmapped,
368 				    slotadd, 1, &tofree);
369 			}
370 #endif
371 			uvm_anon_freelst(amap, tofree);
372 			UVMHIST_LOG(maphist,
373 			    "<- done (case 1f), amap = 0x%#jx, sltneed=%jd",
374 			    (uintptr_t)amap, slotneed, 0, 0);
375 			return 0;
376 		}
377 	} else {
378 		if (slotadj <= 0) {
379 			slotoff -= slotadd;
380 			entry->aref.ar_pageoff = slotoff;
381 #ifdef UVM_AMAP_PPREF
382 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
383 				amap_pp_adjref(amap, slotoff, slotadd, 1,
384 				    &tofree);
385 			}
386 #endif
387 			uvm_anon_freelst(amap, tofree);
388 			UVMHIST_LOG(maphist,
389 			    "<- done (case 1b), amap = 0x%#jx, sltneed=%jd",
390 			    (uintptr_t)amap, slotneed, 0, 0);
391 			return 0;
392 		}
393 	}
394 
395 	/*
396 	 * case 2: we pre-allocated slots for use and we just need to
397 	 * bump nslot up to take account for these slots.
398 	 */
399 
400 	if (amap->am_maxslot >= slotneed) {
401 		if (flags & AMAP_EXTEND_FORWARDS) {
402 #ifdef UVM_AMAP_PPREF
403 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
404 				if ((slotoff + slotmapped) < amap->am_nslot)
405 					amap_pp_adjref(amap,
406 					    slotoff + slotmapped,
407 					    (amap->am_nslot -
408 					    (slotoff + slotmapped)), 1,
409 					    &tofree);
410 				pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
411 				    slotneed - amap->am_nslot);
412 			}
413 #endif
414 			amap->am_nslot = slotneed;
415 			uvm_anon_freelst(amap, tofree);
416 
417 			/*
418 			 * no need to zero am_anon since that was done at
419 			 * alloc time and we never shrink an allocation.
420 			 */
421 
422 			UVMHIST_LOG(maphist,"<- done (case 2f), amap = 0x%#jx, "
423 			    "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0);
424 			return 0;
425 		} else {
426 #ifdef UVM_AMAP_PPREF
427 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
428 				/*
429 				 * Slide up the ref counts on the pages that
430 				 * are actually in use.
431 				 */
432 				memmove(amap->am_ppref + slotspace,
433 				    amap->am_ppref + slotoff,
434 				    slotmapped * sizeof(int));
435 				/*
436 				 * Mark the (adjusted) gap at the front as
437 				 * referenced/not referenced.
438 				 */
439 				pp_setreflen(amap->am_ppref,
440 				    0, 0, slotspace - slotadd);
441 				pp_setreflen(amap->am_ppref,
442 				    slotspace - slotadd, 1, slotadd);
443 			}
444 #endif
445 
446 			/*
447 			 * Slide the anon pointers up and clear out
448 			 * the space we just made.
449 			 */
450 			memmove(amap->am_anon + slotspace,
451 			    amap->am_anon + slotoff,
452 			    slotmapped * sizeof(struct vm_anon*));
453 			memset(amap->am_anon + slotoff, 0,
454 			    (slotspace - slotoff) * sizeof(struct vm_anon *));
455 
456 			/*
457 			 * Slide the backpointers up, but don't bother
458 			 * wiping out the old slots.
459 			 */
460 			memmove(amap->am_bckptr + slotspace,
461 			    amap->am_bckptr + slotoff,
462 			    slotmapped * sizeof(int));
463 
464 			/*
465 			 * Adjust all the useful active slot numbers.
466 			 */
467 			for (i = 0; i < amap->am_nused; i++)
468 				amap->am_slots[i] += (slotspace - slotoff);
469 
470 			/*
471 			 * We just filled all the empty space in the
472 			 * front of the amap by activating a few new
473 			 * slots.
474 			 */
475 			amap->am_nslot = amap->am_maxslot;
476 			entry->aref.ar_pageoff = slotspace - slotadd;
477 			amap_unlock(amap);
478 
479 			UVMHIST_LOG(maphist,"<- done (case 2b), amap = 0x%#jx, "
480 			    "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0);
481 			return 0;
482 		}
483 	}
484 
485 	/*
486 	 * Case 3: we need to allocate a new amap and copy all the amap
487 	 * data over from old amap to the new one.  Drop the lock before
488 	 * performing allocation.
489 	 *
490 	 * Note: since allocations are likely big, we expect to reduce the
491 	 * memory fragmentation by allocating them in separate blocks.
492 	 */
493 
494 	amap_unlock(amap);
495 
496 	if (slotneed >= UVM_AMAP_LARGE) {
497 		return E2BIG;
498 	}
499 
500 	slotalloc = amap_roundup_slots(slotneed);
501 #ifdef UVM_AMAP_PPREF
502 	newppref = NULL;
503 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
504 		/* Will be handled later if fails. */
505 		newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags);
506 	}
507 #endif
508 	newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags);
509 	newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags);
510 	newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags);
511 	if (newsl == NULL || newbck == NULL || newover == NULL) {
512 #ifdef UVM_AMAP_PPREF
513 		if (newppref != NULL) {
514 			kmem_free(newppref, slotalloc * sizeof(*newppref));
515 		}
516 #endif
517 		if (newsl != NULL) {
518 			kmem_free(newsl, slotalloc * sizeof(*newsl));
519 		}
520 		if (newbck != NULL) {
521 			kmem_free(newbck, slotalloc * sizeof(*newbck));
522 		}
523 		if (newover != NULL) {
524 			kmem_free(newover, slotalloc * sizeof(*newover));
525 		}
526 		return ENOMEM;
527 	}
528 	amap_lock(amap);
529 	KASSERT(amap->am_maxslot < slotneed);
530 
531 	/*
532 	 * Copy everything over to new allocated areas.
533 	 */
534 
535 	slotadded = slotalloc - amap->am_nslot;
536 	if (!(flags & AMAP_EXTEND_FORWARDS))
537 		slotspace = slotalloc - slotmapped;
538 
539 	/* do am_slots */
540 	oldsl = amap->am_slots;
541 	if (flags & AMAP_EXTEND_FORWARDS)
542 		memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
543 	else
544 		for (i = 0; i < amap->am_nused; i++)
545 			newsl[i] = oldsl[i] + slotspace - slotoff;
546 	amap->am_slots = newsl;
547 
548 	/* do am_anon */
549 	oldover = amap->am_anon;
550 	if (flags & AMAP_EXTEND_FORWARDS) {
551 		memcpy(newover, oldover,
552 		    sizeof(struct vm_anon *) * amap->am_nslot);
553 		memset(newover + amap->am_nslot, 0,
554 		    sizeof(struct vm_anon *) * slotadded);
555 	} else {
556 		memcpy(newover + slotspace, oldover + slotoff,
557 		    sizeof(struct vm_anon *) * slotmapped);
558 		memset(newover, 0,
559 		    sizeof(struct vm_anon *) * slotspace);
560 	}
561 	amap->am_anon = newover;
562 
563 	/* do am_bckptr */
564 	oldbck = amap->am_bckptr;
565 	if (flags & AMAP_EXTEND_FORWARDS)
566 		memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
567 	else
568 		memcpy(newbck + slotspace, oldbck + slotoff,
569 		    sizeof(int) * slotmapped);
570 	amap->am_bckptr = newbck;
571 
572 #ifdef UVM_AMAP_PPREF
573 	/* do ppref */
574 	oldppref = amap->am_ppref;
575 	if (newppref) {
576 		if (flags & AMAP_EXTEND_FORWARDS) {
577 			memcpy(newppref, oldppref,
578 			    sizeof(int) * amap->am_nslot);
579 			memset(newppref + amap->am_nslot, 0,
580 			    sizeof(int) * slotadded);
581 		} else {
582 			memcpy(newppref + slotspace, oldppref + slotoff,
583 			    sizeof(int) * slotmapped);
584 		}
585 		amap->am_ppref = newppref;
586 		if ((flags & AMAP_EXTEND_FORWARDS) &&
587 		    (slotoff + slotmapped) < amap->am_nslot)
588 			amap_pp_adjref(amap, slotoff + slotmapped,
589 			    (amap->am_nslot - (slotoff + slotmapped)), 1,
590 			    &tofree);
591 		if (flags & AMAP_EXTEND_FORWARDS)
592 			pp_setreflen(newppref, amap->am_nslot, 1,
593 			    slotneed - amap->am_nslot);
594 		else {
595 			pp_setreflen(newppref, 0, 0,
596 			    slotalloc - slotneed);
597 			pp_setreflen(newppref, slotalloc - slotneed, 1,
598 			    slotneed - slotmapped);
599 		}
600 	} else {
601 		if (amap->am_ppref)
602 			amap->am_ppref = PPREF_NONE;
603 	}
604 #endif
605 
606 	/* update master values */
607 	if (flags & AMAP_EXTEND_FORWARDS)
608 		amap->am_nslot = slotneed;
609 	else {
610 		entry->aref.ar_pageoff = slotspace - slotadd;
611 		amap->am_nslot = slotalloc;
612 	}
613 	oldnslots = amap->am_maxslot;
614 	amap->am_maxslot = slotalloc;
615 
616 	uvm_anon_freelst(amap, tofree);
617 
618 	kmem_free(oldsl, oldnslots * sizeof(*oldsl));
619 	kmem_free(oldbck, oldnslots * sizeof(*oldbck));
620 	kmem_free(oldover, oldnslots * sizeof(*oldover));
621 #ifdef UVM_AMAP_PPREF
622 	if (oldppref && oldppref != PPREF_NONE)
623 		kmem_free(oldppref, oldnslots * sizeof(*oldppref));
624 #endif
625 	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%#jx, slotneed=%jd",
626 	    (uintptr_t)amap, slotneed, 0, 0);
627 	return 0;
628 }
629 
630 /*
631  * amap_share_protect: change protection of anons in a shared amap
632  *
633  * for shared amaps, given the current data structure layout, it is
634  * not possible for us to directly locate all maps referencing the
635  * shared anon (to change the protection).  in order to protect data
636  * in shared maps we use pmap_page_protect().  [this is useful for IPC
637  * mechanisms like map entry passing that may want to write-protect
638  * all mappings of a shared amap.]  we traverse am_anon or am_slots
639  * depending on the current state of the amap.
640  *
641  * => entry's map and amap must be locked by the caller
642  */
643 void
644 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
645 {
646 	struct vm_amap *amap = entry->aref.ar_amap;
647 	u_int slots, lcv, slot, stop;
648 	struct vm_anon *anon;
649 
650 	KASSERT(mutex_owned(amap->am_lock));
651 
652 	AMAP_B2SLOT(slots, (entry->end - entry->start));
653 	stop = entry->aref.ar_pageoff + slots;
654 
655 	if (slots < amap->am_nused) {
656 		/*
657 		 * Cheaper to traverse am_anon.
658 		 */
659 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
660 			anon = amap->am_anon[lcv];
661 			if (anon == NULL) {
662 				continue;
663 			}
664 			if (anon->an_page) {
665 				pmap_page_protect(anon->an_page, prot);
666 			}
667 		}
668 		return;
669 	}
670 
671 	/*
672 	 * Cheaper to traverse am_slots.
673 	 */
674 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
675 		slot = amap->am_slots[lcv];
676 		if (slot < entry->aref.ar_pageoff || slot >= stop) {
677 			continue;
678 		}
679 		anon = amap->am_anon[slot];
680 		if (anon->an_page) {
681 			pmap_page_protect(anon->an_page, prot);
682 		}
683 	}
684 }
685 
686 /*
687  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
688  *
689  * => Called from amap_unref(), when reference count drops to zero.
690  * => amap must be locked.
691  */
692 
693 void
694 amap_wipeout(struct vm_amap *amap)
695 {
696 	struct vm_anon *tofree = NULL;
697 	u_int lcv;
698 
699 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
700 	UVMHIST_LOG(maphist,"(amap=0x%#jx)", (uintptr_t)amap, 0,0,0);
701 
702 	KASSERT(mutex_owned(amap->am_lock));
703 	KASSERT(amap->am_ref == 0);
704 
705 	if (__predict_false(amap->am_flags & AMAP_SWAPOFF)) {
706 		/*
707 		 * Note: amap_swap_off() will call us again.
708 		 */
709 		amap_unlock(amap);
710 		return;
711 	}
712 	amap_list_remove(amap);
713 
714 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
715 		struct vm_anon *anon;
716 		u_int slot;
717 
718 		slot = amap->am_slots[lcv];
719 		anon = amap->am_anon[slot];
720 		KASSERT(anon != NULL && anon->an_ref != 0);
721 
722 		KASSERT(anon->an_lock == amap->am_lock);
723 		UVMHIST_LOG(maphist,"  processing anon 0x%#jx, ref=%jd",
724 		    (uintptr_t)anon, anon->an_ref, 0, 0);
725 
726 		/*
727 		 * Drop the reference.  Defer freeing.
728 		 */
729 
730 		if (--anon->an_ref == 0) {
731 			anon->an_link = tofree;
732 			tofree = anon;
733 		}
734 		if (curlwp->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) {
735 			preempt();
736 		}
737 	}
738 
739 	/*
740 	 * Finally, destroy the amap.
741 	 */
742 
743 	amap->am_nused = 0;
744 	uvm_anon_freelst(amap, tofree);
745 	amap_free(amap);
746 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
747 }
748 
749 /*
750  * amap_copy: ensure that a map entry's "needs_copy" flag is false
751  *	by copying the amap if necessary.
752  *
753  * => an entry with a null amap pointer will get a new (blank) one.
754  * => the map that the map entry belongs to must be locked by caller.
755  * => the amap currently attached to "entry" (if any) must be unlocked.
756  * => if canchunk is true, then we may clip the entry into a chunk
757  * => "startva" and "endva" are used only if canchunk is true.  they are
758  *     used to limit chunking (e.g. if you have a large space that you
759  *     know you are going to need to allocate amaps for, there is no point
760  *     in allowing that to be chunked)
761  */
762 
763 void
764 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags,
765     vaddr_t startva, vaddr_t endva)
766 {
767 	const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0;
768 	struct vm_amap *amap, *srcamap;
769 	struct vm_anon *tofree;
770 	u_int slots, lcv;
771 	vsize_t len;
772 
773 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
774 	UVMHIST_LOG(maphist, "  (map=%#j, entry=%#j, flags=%jd)",
775 		    (uintptr_t)map, (uintptr_t)entry, flags, 0);
776 
777 	KASSERT(map != kernel_map);	/* we use nointr pool */
778 
779 	srcamap = entry->aref.ar_amap;
780 	len = entry->end - entry->start;
781 
782 	/*
783 	 * Is there an amap to copy?  If not, create one.
784 	 */
785 
786 	if (srcamap == NULL) {
787 		const bool canchunk = (flags & AMAP_COPY_NOCHUNK) == 0;
788 
789 		/*
790 		 * Check to see if we have a large amap that we can
791 		 * chunk.  We align startva/endva to chunk-sized
792 		 * boundaries and then clip to them.
793 		 */
794 
795 		if (canchunk && atop(len) >= UVM_AMAP_LARGE) {
796 			vsize_t chunksize;
797 
798 			/* Convert slots to bytes. */
799 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
800 			startva = (startva / chunksize) * chunksize;
801 			endva = roundup(endva, chunksize);
802 			UVMHIST_LOG(maphist,
803 			    "  chunk amap ==> clip 0x%jx->0x%jx to "
804 			    "0x%jx->0x%jx",
805 			    entry->start, entry->end, startva, endva);
806 			UVM_MAP_CLIP_START(map, entry, startva);
807 
808 			/* Watch out for endva wrap-around! */
809 			if (endva >= startva) {
810 				UVM_MAP_CLIP_END(map, entry, endva);
811 			}
812 		}
813 
814 		if ((flags & AMAP_COPY_NOMERGE) == 0 &&
815 		    uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) {
816 			return;
817 		}
818 
819 		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%jx->0x%jx]",
820 		    entry->start, entry->end, 0, 0);
821 
822 		/*
823 		 * Allocate an initialised amap and install it.
824 		 * Note: we must update the length after clipping.
825 		 */
826 		len = entry->end - entry->start;
827 		entry->aref.ar_pageoff = 0;
828 		entry->aref.ar_amap = amap_alloc(len, 0, waitf);
829 		if (entry->aref.ar_amap != NULL) {
830 			entry->etype &= ~UVM_ET_NEEDSCOPY;
831 		}
832 		return;
833 	}
834 
835 	/*
836 	 * First check and see if we are the only map entry referencing
837 	 * he amap we currently have.  If so, then just take it over instead
838 	 * of copying it.  Note that we are reading am_ref without lock held
839 	 * as the value value can only be one if we have the only reference
840 	 * to the amap (via our locked map).  If the value is greater than
841 	 * one, then allocate amap and re-check the value.
842 	 */
843 
844 	if (srcamap->am_ref == 1) {
845 		entry->etype &= ~UVM_ET_NEEDSCOPY;
846 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
847 		    0, 0, 0, 0);
848 		return;
849 	}
850 
851 	UVMHIST_LOG(maphist,"  amap=%#j, ref=%jd, must copy it",
852 	    (uintptr_t)srcamap, srcamap->am_ref, 0, 0);
853 
854 	/*
855 	 * Allocate a new amap (note: not initialised, no lock set, etc).
856 	 */
857 
858 	AMAP_B2SLOT(slots, len);
859 	amap = amap_alloc1(slots, 0, waitf);
860 	if (amap == NULL) {
861 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
862 		return;
863 	}
864 
865 	amap_lock(srcamap);
866 
867 	/*
868 	 * Re-check the reference count with the lock held.  If it has
869 	 * dropped to one - we can take over the existing map.
870 	 */
871 
872 	if (srcamap->am_ref == 1) {
873 		/* Just take over the existing amap. */
874 		entry->etype &= ~UVM_ET_NEEDSCOPY;
875 		amap_unlock(srcamap);
876 		/* Destroy the new (unused) amap. */
877 		amap->am_ref--;
878 		amap_free(amap);
879 		return;
880 	}
881 
882 	/*
883 	 * Copy the slots.  Zero the padded part.
884 	 */
885 
886 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
887 	for (lcv = 0 ; lcv < slots; lcv++) {
888 		amap->am_anon[lcv] =
889 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
890 		if (amap->am_anon[lcv] == NULL)
891 			continue;
892 		KASSERT(amap->am_anon[lcv]->an_lock == srcamap->am_lock);
893 		KASSERT(amap->am_anon[lcv]->an_ref > 0);
894 		KASSERT(amap->am_nused < amap->am_maxslot);
895 		amap->am_anon[lcv]->an_ref++;
896 		amap->am_bckptr[lcv] = amap->am_nused;
897 		amap->am_slots[amap->am_nused] = lcv;
898 		amap->am_nused++;
899 	}
900 	memset(&amap->am_anon[lcv], 0,
901 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
902 
903 	/*
904 	 * Drop our reference to the old amap (srcamap) and unlock.
905 	 * Since the reference count on srcamap is greater than one,
906 	 * (we checked above), it cannot drop to zero while it is locked.
907 	 */
908 
909 	srcamap->am_ref--;
910 	KASSERT(srcamap->am_ref > 0);
911 
912 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) {
913 		srcamap->am_flags &= ~AMAP_SHARED;
914 	}
915 	tofree = NULL;
916 #ifdef UVM_AMAP_PPREF
917 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
918 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
919 		    len >> PAGE_SHIFT, -1, &tofree);
920 	}
921 #endif
922 
923 	/*
924 	 * If we referenced any anons, then share the source amap's lock.
925 	 * Otherwise, we have nothing in common, so allocate a new one.
926 	 */
927 
928 	KASSERT(amap->am_lock == NULL);
929 	if (amap->am_nused != 0) {
930 		amap->am_lock = srcamap->am_lock;
931 		mutex_obj_hold(amap->am_lock);
932 	}
933 	uvm_anon_freelst(srcamap, tofree);
934 
935 	if (amap->am_lock == NULL) {
936 		amap->am_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
937 	}
938 	amap_list_insert(amap);
939 
940 	/*
941 	 * Install new amap.
942 	 */
943 
944 	entry->aref.ar_pageoff = 0;
945 	entry->aref.ar_amap = amap;
946 	entry->etype &= ~UVM_ET_NEEDSCOPY;
947 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
948 }
949 
950 /*
951  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
952  *
953  *	called during fork(2) when the parent process has a wired map
954  *	entry.   in that case we want to avoid write-protecting pages
955  *	in the parent's map (e.g. like what you'd do for a COW page)
956  *	so we resolve the COW here.
957  *
958  * => assume parent's entry was wired, thus all pages are resident.
959  * => assume pages that are loaned out (loan_count) are already mapped
960  *	read-only in all maps, and thus no need for us to worry about them
961  * => assume both parent and child vm_map's are locked
962  * => caller passes child's map/entry in to us
963  * => if we run out of memory we will unlock the amap and sleep _with_ the
964  *	parent and child vm_map's locked(!).    we have to do this since
965  *	we are in the middle of a fork(2) and we can't let the parent
966  *	map change until we are done copying all the map entrys.
967  * => XXXCDC: out of memory should cause fork to fail, but there is
968  *	currently no easy way to do this (needs fix)
969  * => page queues must be unlocked (we may lock them)
970  */
971 
972 void
973 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
974 {
975 	struct vm_amap *amap = entry->aref.ar_amap;
976 	struct vm_anon *anon, *nanon;
977 	struct vm_page *pg, *npg;
978 	u_int lcv, slot;
979 
980 	/*
981 	 * note that if we unlock the amap then we must ReStart the "lcv" for
982 	 * loop because some other process could reorder the anon's in the
983 	 * am_anon[] array on us while the lock is dropped.
984 	 */
985 
986 ReStart:
987 	amap_lock(amap);
988 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
989 		slot = amap->am_slots[lcv];
990 		anon = amap->am_anon[slot];
991 		KASSERT(anon->an_lock == amap->am_lock);
992 
993 		/*
994 		 * If anon has only one reference - we must have already
995 		 * copied it.  This can happen if we needed to sleep waiting
996 		 * for memory in a previous run through this loop.  The new
997 		 * page might even have been paged out, since is not wired.
998 		 */
999 
1000 		if (anon->an_ref == 1) {
1001 			KASSERT(anon->an_page != NULL || anon->an_swslot != 0);
1002 			continue;
1003 		}
1004 
1005 		/*
1006 		 * The old page must be resident since the parent is wired.
1007 		 */
1008 
1009 		pg = anon->an_page;
1010 		KASSERT(pg != NULL);
1011 		KASSERT(pg->wire_count > 0);
1012 
1013 		/*
1014 		 * If the page is loaned then it must already be mapped
1015 		 * read-only and we don't need to copy it.
1016 		 */
1017 
1018 		if (pg->loan_count != 0) {
1019 			continue;
1020 		}
1021 		KASSERT(pg->uanon == anon && pg->uobject == NULL);
1022 
1023 		/*
1024 		 * If the page is busy, then we have to unlock, wait for
1025 		 * it and then restart.
1026 		 */
1027 
1028 		if (pg->flags & PG_BUSY) {
1029 			pg->flags |= PG_WANTED;
1030 			UVM_UNLOCK_AND_WAIT(pg, amap->am_lock, false,
1031 			    "cownow", 0);
1032 			goto ReStart;
1033 		}
1034 
1035 		/*
1036 		 * Perform a copy-on-write.
1037 		 * First - get a new anon and a page.
1038 		 */
1039 
1040 		nanon = uvm_analloc();
1041 		if (nanon) {
1042 			nanon->an_lock = amap->am_lock;
1043 			npg = uvm_pagealloc(NULL, 0, nanon, 0);
1044 		} else {
1045 			npg = NULL;
1046 		}
1047 		if (nanon == NULL || npg == NULL) {
1048 			amap_unlock(amap);
1049 			if (nanon) {
1050 				nanon->an_lock = NULL;
1051 				nanon->an_ref--;
1052 				KASSERT(nanon->an_ref == 0);
1053 				uvm_anon_free(nanon);
1054 			}
1055 			uvm_wait("cownowpage");
1056 			goto ReStart;
1057 		}
1058 
1059 		/*
1060 		 * Copy the data and replace anon with the new one.
1061 		 * Also, setup its lock (share the with amap's lock).
1062 		 */
1063 
1064 		uvm_pagecopy(pg, npg);
1065 		anon->an_ref--;
1066 		KASSERT(anon->an_ref > 0);
1067 		amap->am_anon[slot] = nanon;
1068 
1069 		/*
1070 		 * Drop PG_BUSY on new page.  Since its owner was locked all
1071 		 * this time - it cannot be PG_RELEASED or PG_WANTED.
1072 		 */
1073 
1074 		mutex_enter(&uvm_pageqlock);
1075 		uvm_pageactivate(npg);
1076 		mutex_exit(&uvm_pageqlock);
1077 		npg->flags &= ~(PG_BUSY|PG_FAKE);
1078 		UVM_PAGE_OWN(npg, NULL);
1079 	}
1080 	amap_unlock(amap);
1081 }
1082 
1083 /*
1084  * amap_splitref: split a single reference into two separate references
1085  *
1086  * => called from uvm_map's clip routines
1087  * => origref's map should be locked
1088  * => origref->ar_amap should be unlocked (we will lock)
1089  */
1090 void
1091 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
1092 {
1093 	struct vm_amap *amap = origref->ar_amap;
1094 	u_int leftslots;
1095 
1096 	KASSERT(splitref->ar_amap == origref->ar_amap);
1097 	AMAP_B2SLOT(leftslots, offset);
1098 	KASSERT(leftslots != 0);
1099 
1100 	amap_lock(amap);
1101 	KASSERT(amap->am_nslot - origref->ar_pageoff - leftslots > 0);
1102 
1103 #ifdef UVM_AMAP_PPREF
1104 	/* Establish ppref before we add a duplicate reference to the amap. */
1105 	if (amap->am_ppref == NULL) {
1106 		amap_pp_establish(amap, origref->ar_pageoff);
1107 	}
1108 #endif
1109 	/* Note: not a share reference. */
1110 	amap->am_ref++;
1111 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
1112 	amap_unlock(amap);
1113 }
1114 
1115 #ifdef UVM_AMAP_PPREF
1116 
1117 /*
1118  * amap_pp_establish: add a ppref array to an amap, if possible.
1119  *
1120  * => amap should be locked by caller.
1121  */
1122 void
1123 amap_pp_establish(struct vm_amap *amap, vaddr_t offset)
1124 {
1125 	const size_t sz = amap->am_maxslot * sizeof(*amap->am_ppref);
1126 
1127 	KASSERT(mutex_owned(amap->am_lock));
1128 
1129 	amap->am_ppref = kmem_zalloc(sz, KM_NOSLEEP);
1130 	if (amap->am_ppref == NULL) {
1131 		/* Failure - just do not use ppref. */
1132 		amap->am_ppref = PPREF_NONE;
1133 		return;
1134 	}
1135 	pp_setreflen(amap->am_ppref, 0, 0, offset);
1136 	pp_setreflen(amap->am_ppref, offset, amap->am_ref,
1137 	    amap->am_nslot - offset);
1138 }
1139 
1140 /*
1141  * amap_pp_adjref: adjust reference count to a part of an amap using the
1142  * per-page reference count array.
1143  *
1144  * => caller must check that ppref != PPREF_NONE before calling.
1145  * => map and amap must be locked.
1146  */
1147 void
1148 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval,
1149     struct vm_anon **tofree)
1150 {
1151 	int stopslot, *ppref, lcv, prevlcv;
1152 	int ref, len, prevref, prevlen;
1153 
1154 	KASSERT(mutex_owned(amap->am_lock));
1155 
1156 	stopslot = curslot + slotlen;
1157 	ppref = amap->am_ppref;
1158 	prevlcv = 0;
1159 
1160 	/*
1161 	 * Advance to the correct place in the array, fragment if needed.
1162 	 */
1163 
1164 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
1165 		pp_getreflen(ppref, lcv, &ref, &len);
1166 		if (lcv + len > curslot) {     /* goes past start? */
1167 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
1168 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
1169 			len = curslot - lcv;   /* new length of entry @ lcv */
1170 		}
1171 		prevlcv = lcv;
1172 	}
1173 	if (lcv == 0) {
1174 		/*
1175 		 * Ensure that the "prevref == ref" test below always
1176 		 * fails, since we are starting from the beginning of
1177 		 * the ppref array; that is, there is no previous chunk.
1178 		 */
1179 		prevref = -1;
1180 		prevlen = 0;
1181 	} else {
1182 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
1183 	}
1184 
1185 	/*
1186 	 * Now adjust reference counts in range.  Merge the first
1187 	 * changed entry with the last unchanged entry if possible.
1188 	 */
1189 	KASSERT(lcv == curslot);
1190 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
1191 		pp_getreflen(ppref, lcv, &ref, &len);
1192 		if (lcv + len > stopslot) {     /* goes past end? */
1193 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
1194 			pp_setreflen(ppref, stopslot, ref,
1195 			    len - (stopslot - lcv));
1196 			len = stopslot - lcv;
1197 		}
1198 		ref += adjval;
1199 		KASSERT(ref >= 0);
1200 		KASSERT(ref <= amap->am_ref);
1201 		if (lcv == prevlcv + prevlen && ref == prevref) {
1202 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
1203 		} else {
1204 			pp_setreflen(ppref, lcv, ref, len);
1205 		}
1206 		if (ref == 0) {
1207 			amap_wiperange(amap, lcv, len, tofree);
1208 		}
1209 	}
1210 }
1211 
1212 /*
1213  * amap_wiperange: wipe out a range of an amap.
1214  * Note: different from amap_wipeout because the amap is kept intact.
1215  *
1216  * => Both map and amap must be locked by caller.
1217  */
1218 void
1219 amap_wiperange(struct vm_amap *amap, int slotoff, int slots,
1220     struct vm_anon **tofree)
1221 {
1222 	u_int lcv, stop, slotend;
1223 	bool byanon;
1224 
1225 	KASSERT(mutex_owned(amap->am_lock));
1226 
1227 	/*
1228 	 * We can either traverse the amap by am_anon or by am_slots.
1229 	 * Determine which way is less expensive.
1230 	 */
1231 
1232 	if (slots < amap->am_nused) {
1233 		byanon = true;
1234 		lcv = slotoff;
1235 		stop = slotoff + slots;
1236 		slotend = 0;
1237 	} else {
1238 		byanon = false;
1239 		lcv = 0;
1240 		stop = amap->am_nused;
1241 		slotend = slotoff + slots;
1242 	}
1243 
1244 	while (lcv < stop) {
1245 		struct vm_anon *anon;
1246 		u_int curslot, ptr, last;
1247 
1248 		if (byanon) {
1249 			curslot = lcv++;	/* lcv advances here */
1250 			if (amap->am_anon[curslot] == NULL)
1251 				continue;
1252 		} else {
1253 			curslot = amap->am_slots[lcv];
1254 			if (curslot < slotoff || curslot >= slotend) {
1255 				lcv++;		/* lcv advances here */
1256 				continue;
1257 			}
1258 			stop--;	/* drop stop, since anon will be removed */
1259 		}
1260 		anon = amap->am_anon[curslot];
1261 		KASSERT(anon->an_lock == amap->am_lock);
1262 
1263 		/*
1264 		 * Remove anon from the amap.
1265 		 */
1266 
1267 		amap->am_anon[curslot] = NULL;
1268 		ptr = amap->am_bckptr[curslot];
1269 		last = amap->am_nused - 1;
1270 		if (ptr != last) {
1271 			amap->am_slots[ptr] = amap->am_slots[last];
1272 			amap->am_bckptr[amap->am_slots[ptr]] = ptr;
1273 		}
1274 		amap->am_nused--;
1275 
1276 		/*
1277 		 * Drop its reference count.
1278 		 */
1279 
1280 		KASSERT(anon->an_lock == amap->am_lock);
1281 		if (--anon->an_ref == 0) {
1282 			/*
1283 			 * Eliminated the last reference to an anon - defer
1284 			 * freeing as uvm_anon_freelst() will unlock the amap.
1285 			 */
1286 			anon->an_link = *tofree;
1287 			*tofree = anon;
1288 		}
1289 	}
1290 }
1291 
1292 #endif
1293 
1294 #if defined(VMSWAP)
1295 
1296 /*
1297  * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
1298  *
1299  * => called with swap_syscall_lock held.
1300  * => note that we don't always traverse all anons.
1301  *    eg. amaps being wiped out, released anons.
1302  * => return true if failed.
1303  */
1304 
1305 bool
1306 amap_swap_off(int startslot, int endslot)
1307 {
1308 	struct vm_amap *am;
1309 	struct vm_amap *am_next;
1310 	struct vm_amap marker_prev;
1311 	struct vm_amap marker_next;
1312 	bool rv = false;
1313 
1314 #if defined(DIAGNOSTIC)
1315 	memset(&marker_prev, 0, sizeof(marker_prev));
1316 	memset(&marker_next, 0, sizeof(marker_next));
1317 #endif /* defined(DIAGNOSTIC) */
1318 
1319 	mutex_enter(&amap_list_lock);
1320 	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
1321 		int i;
1322 
1323 		LIST_INSERT_BEFORE(am, &marker_prev, am_list);
1324 		LIST_INSERT_AFTER(am, &marker_next, am_list);
1325 
1326 		if (!amap_lock_try(am)) {
1327 			mutex_exit(&amap_list_lock);
1328 			preempt();
1329 			mutex_enter(&amap_list_lock);
1330 			am_next = LIST_NEXT(&marker_prev, am_list);
1331 			if (am_next == &marker_next) {
1332 				am_next = LIST_NEXT(am_next, am_list);
1333 			} else {
1334 				KASSERT(LIST_NEXT(am_next, am_list) ==
1335 				    &marker_next);
1336 			}
1337 			LIST_REMOVE(&marker_prev, am_list);
1338 			LIST_REMOVE(&marker_next, am_list);
1339 			continue;
1340 		}
1341 
1342 		mutex_exit(&amap_list_lock);
1343 
1344 		if (am->am_nused <= 0) {
1345 			amap_unlock(am);
1346 			goto next;
1347 		}
1348 
1349 		for (i = 0; i < am->am_nused; i++) {
1350 			int slot;
1351 			int swslot;
1352 			struct vm_anon *anon;
1353 
1354 			slot = am->am_slots[i];
1355 			anon = am->am_anon[slot];
1356 			KASSERT(anon->an_lock == am->am_lock);
1357 
1358 			swslot = anon->an_swslot;
1359 			if (swslot < startslot || endslot <= swslot) {
1360 				continue;
1361 			}
1362 
1363 			am->am_flags |= AMAP_SWAPOFF;
1364 
1365 			rv = uvm_anon_pagein(am, anon);
1366 			amap_lock(am);
1367 
1368 			am->am_flags &= ~AMAP_SWAPOFF;
1369 			if (amap_refs(am) == 0) {
1370 				amap_wipeout(am);
1371 				am = NULL;
1372 				break;
1373 			}
1374 			if (rv) {
1375 				break;
1376 			}
1377 			i = 0;
1378 		}
1379 
1380 		if (am) {
1381 			amap_unlock(am);
1382 		}
1383 
1384 next:
1385 		mutex_enter(&amap_list_lock);
1386 		KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
1387 		    LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
1388 		    &marker_next);
1389 		am_next = LIST_NEXT(&marker_next, am_list);
1390 		LIST_REMOVE(&marker_prev, am_list);
1391 		LIST_REMOVE(&marker_next, am_list);
1392 	}
1393 	mutex_exit(&amap_list_lock);
1394 
1395 	return rv;
1396 }
1397 
1398 #endif /* defined(VMSWAP) */
1399 
1400 /*
1401  * amap_lookup: look up a page in an amap.
1402  *
1403  * => amap should be locked by caller.
1404  */
1405 struct vm_anon *
1406 amap_lookup(struct vm_aref *aref, vaddr_t offset)
1407 {
1408 	struct vm_amap *amap = aref->ar_amap;
1409 	struct vm_anon *an;
1410 	u_int slot;
1411 
1412 	UVMHIST_FUNC("amap_lookup"); UVMHIST_CALLED(maphist);
1413 	KASSERT(mutex_owned(amap->am_lock));
1414 
1415 	AMAP_B2SLOT(slot, offset);
1416 	slot += aref->ar_pageoff;
1417 	an = amap->am_anon[slot];
1418 
1419 	UVMHIST_LOG(maphist,
1420 	    "<- done (amap=0x%#jx, offset=0x%jx, result=0x%#jx)",
1421 	    (uintptr_t)amap, offset, (uintptr_t)an, 0);
1422 
1423 	KASSERT(slot < amap->am_nslot);
1424 	KASSERT(an == NULL || an->an_ref != 0);
1425 	KASSERT(an == NULL || an->an_lock == amap->am_lock);
1426 	return an;
1427 }
1428 
1429 /*
1430  * amap_lookups: look up a range of pages in an amap.
1431  *
1432  * => amap should be locked by caller.
1433  */
1434 void
1435 amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons,
1436     int npages)
1437 {
1438 	struct vm_amap *amap = aref->ar_amap;
1439 	u_int slot;
1440 
1441 	UVMHIST_FUNC("amap_lookups"); UVMHIST_CALLED(maphist);
1442 	KASSERT(mutex_owned(amap->am_lock));
1443 
1444 	AMAP_B2SLOT(slot, offset);
1445 	slot += aref->ar_pageoff;
1446 
1447 	UVMHIST_LOG(maphist, "  slot=%u, npages=%d, nslot=%d",
1448 	    slot, npages, amap->am_nslot, 0);
1449 
1450 	KASSERT((slot + (npages - 1)) < amap->am_nslot);
1451 	memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
1452 
1453 #if defined(DIAGNOSTIC)
1454 	for (int i = 0; i < npages; i++) {
1455 		struct vm_anon * const an = anons[i];
1456 		if (an == NULL) {
1457 			continue;
1458 		}
1459 		KASSERT(an->an_ref != 0);
1460 		KASSERT(an->an_lock == amap->am_lock);
1461 	}
1462 #endif
1463 	UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
1464 }
1465 
1466 /*
1467  * amap_add: add (or replace) a page to an amap.
1468  *
1469  * => amap should be locked by caller.
1470  * => anon must have the lock associated with this amap.
1471  */
1472 void
1473 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
1474     bool replace)
1475 {
1476 	struct vm_amap *amap = aref->ar_amap;
1477 	u_int slot;
1478 
1479 	UVMHIST_FUNC("amap_add"); UVMHIST_CALLED(maphist);
1480 	KASSERT(mutex_owned(amap->am_lock));
1481 	KASSERT(anon->an_lock == amap->am_lock);
1482 
1483 	AMAP_B2SLOT(slot, offset);
1484 	slot += aref->ar_pageoff;
1485 	KASSERT(slot < amap->am_nslot);
1486 
1487 	if (replace) {
1488 		struct vm_anon *oanon = amap->am_anon[slot];
1489 
1490 		KASSERT(oanon != NULL);
1491 		if (oanon->an_page && (amap->am_flags & AMAP_SHARED) != 0) {
1492 			pmap_page_protect(oanon->an_page, VM_PROT_NONE);
1493 			/*
1494 			 * XXX: suppose page is supposed to be wired somewhere?
1495 			 */
1496 		}
1497 	} else {
1498 		KASSERT(amap->am_anon[slot] == NULL);
1499 		KASSERT(amap->am_nused < amap->am_maxslot);
1500 		amap->am_bckptr[slot] = amap->am_nused;
1501 		amap->am_slots[amap->am_nused] = slot;
1502 		amap->am_nused++;
1503 	}
1504 	amap->am_anon[slot] = anon;
1505 	UVMHIST_LOG(maphist,
1506 	    "<- done (amap=0x%#jx, offset=0x%x, anon=0x%#jx, rep=%d)",
1507 	    (uintptr_t)amap, offset, (uintptr_t)anon, replace);
1508 }
1509 
1510 /*
1511  * amap_unadd: remove a page from an amap.
1512  *
1513  * => amap should be locked by caller.
1514  */
1515 void
1516 amap_unadd(struct vm_aref *aref, vaddr_t offset)
1517 {
1518 	struct vm_amap *amap = aref->ar_amap;
1519 	u_int slot, ptr, last;
1520 
1521 	UVMHIST_FUNC("amap_unadd"); UVMHIST_CALLED(maphist);
1522 	KASSERT(mutex_owned(amap->am_lock));
1523 
1524 	AMAP_B2SLOT(slot, offset);
1525 	slot += aref->ar_pageoff;
1526 	KASSERT(slot < amap->am_nslot);
1527 	KASSERT(amap->am_anon[slot] != NULL);
1528 	KASSERT(amap->am_anon[slot]->an_lock == amap->am_lock);
1529 
1530 	amap->am_anon[slot] = NULL;
1531 	ptr = amap->am_bckptr[slot];
1532 
1533 	last = amap->am_nused - 1;
1534 	if (ptr != last) {
1535 		/* Move the last entry to keep the slots contiguous. */
1536 		amap->am_slots[ptr] = amap->am_slots[last];
1537 		amap->am_bckptr[amap->am_slots[ptr]] = ptr;
1538 	}
1539 	amap->am_nused--;
1540 	UVMHIST_LOG(maphist, "<- done (amap=0x%#jx, slot=0x%jx)",
1541 	    (uintptr_t)amap, slot,0, 0);
1542 }
1543 
1544 /*
1545  * amap_adjref_anons: adjust the reference count(s) on amap and its anons.
1546  */
1547 static void
1548 amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len,
1549     int refv, bool all)
1550 {
1551 	struct vm_anon *tofree = NULL;
1552 
1553 #ifdef UVM_AMAP_PPREF
1554 	KASSERT(mutex_owned(amap->am_lock));
1555 
1556 	/*
1557 	 * We must establish the ppref array before changing am_ref
1558 	 * so that the ppref values match the current amap refcount.
1559 	 */
1560 
1561 	if (amap->am_ppref == NULL && !all && len != amap->am_nslot) {
1562 		amap_pp_establish(amap, offset);
1563 	}
1564 #endif
1565 
1566 	amap->am_ref += refv;
1567 
1568 #ifdef UVM_AMAP_PPREF
1569 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1570 		if (all) {
1571 			amap_pp_adjref(amap, 0, amap->am_nslot, refv, &tofree);
1572 		} else {
1573 			amap_pp_adjref(amap, offset, len, refv, &tofree);
1574 		}
1575 	}
1576 #endif
1577 	uvm_anon_freelst(amap, tofree);
1578 }
1579 
1580 /*
1581  * amap_ref: gain a reference to an amap.
1582  *
1583  * => amap must not be locked (we will lock).
1584  * => "offset" and "len" are in units of pages.
1585  * => Called at fork time to gain the child's reference.
1586  */
1587 void
1588 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
1589 {
1590 	UVMHIST_FUNC("amap_ref"); UVMHIST_CALLED(maphist);
1591 
1592 	amap_lock(amap);
1593 	if (flags & AMAP_SHARED) {
1594 		amap->am_flags |= AMAP_SHARED;
1595 	}
1596 	amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0);
1597 
1598 	UVMHIST_LOG(maphist,"<- done!  amap=0x%#jx", (uintptr_t)amap, 0, 0, 0);
1599 }
1600 
1601 /*
1602  * amap_unref: remove a reference to an amap.
1603  *
1604  * => All pmap-level references to this amap must be already removed.
1605  * => Called from uvm_unmap_detach(); entry is already removed from the map.
1606  * => We will lock amap, so it must be unlocked.
1607  */
1608 void
1609 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, bool all)
1610 {
1611 	UVMHIST_FUNC("amap_unref"); UVMHIST_CALLED(maphist);
1612 
1613 	amap_lock(amap);
1614 
1615 	UVMHIST_LOG(maphist,"  amap=0x%#jx  refs=%d, nused=%d",
1616 	    (uintptr_t)amap, amap->am_ref, amap->am_nused, 0);
1617 	KASSERT(amap->am_ref > 0);
1618 
1619 	if (amap->am_ref == 1) {
1620 
1621 		/*
1622 		 * If the last reference - wipeout and destroy the amap.
1623 		 */
1624 		amap->am_ref--;
1625 		amap_wipeout(amap);
1626 		UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
1627 		return;
1628 	}
1629 
1630 	/*
1631 	 * Otherwise, drop the reference count(s) on anons.
1632 	 */
1633 
1634 	if (amap->am_ref == 2 && (amap->am_flags & AMAP_SHARED) != 0) {
1635 		amap->am_flags &= ~AMAP_SHARED;
1636 	}
1637 	amap_adjref_anons(amap, offset, len, -1, all);
1638 
1639 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1640 }
1641