xref: /netbsd-src/sys/uvm/uvm_amap.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /*	$NetBSD: uvm_amap.c,v 1.126 2021/03/13 15:29:55 skrll Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * uvm_amap.c: amap operations
30  */
31 
32 /*
33  * this file contains functions that perform operations on amaps.  see
34  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.126 2021/03/13 15:29:55 skrll Exp $");
39 
40 #include "opt_uvmhist.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/kmem.h>
46 #include <sys/pool.h>
47 #include <sys/atomic.h>
48 
49 #include <uvm/uvm.h>
50 #include <uvm/uvm_swap.h>
51 
52 /*
53  * cache for allocation of vm_map structures.  note that in order to
54  * avoid an endless loop, the amap cache's allocator cannot allocate
55  * memory from an amap (it currently goes through the kernel uobj, so
56  * we are ok).
57  */
58 static struct pool_cache uvm_amap_cache;
59 static kmutex_t amap_list_lock __cacheline_aligned;
60 static LIST_HEAD(, vm_amap) amap_list;
61 
62 /*
63  * local functions
64  */
65 
66 static int
67 amap_roundup_slots(int slots)
68 {
69 
70 	return kmem_roundup_size(slots * sizeof(int)) / sizeof(int);
71 }
72 
73 #ifdef UVM_AMAP_PPREF
74 /*
75  * what is ppref?   ppref is an _optional_ amap feature which is used
76  * to keep track of reference counts on a per-page basis.  it is enabled
77  * when UVM_AMAP_PPREF is defined.
78  *
79  * when enabled, an array of ints is allocated for the pprefs.  this
80  * array is allocated only when a partial reference is added to the
81  * map (either by unmapping part of the amap, or gaining a reference
82  * to only a part of an amap).  if the allocation of the array fails
83  * (KM_NOSLEEP), then we set the array pointer to PPREF_NONE to indicate
84  * that we tried to do ppref's but couldn't alloc the array so just
85  * give up (after all, this is an optional feature!).
86  *
87  * the array is divided into page sized "chunks."   for chunks of length 1,
88  * the chunk reference count plus one is stored in that chunk's slot.
89  * for chunks of length > 1 the first slot contains (the reference count
90  * plus one) * -1.    [the negative value indicates that the length is
91  * greater than one.]   the second slot of the chunk contains the length
92  * of the chunk.   here is an example:
93  *
94  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
95  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
96  *              <----------><-><----><-------><----><-><------->
97  * (x = don't care)
98  *
99  * this allows us to allow one int to contain the ref count for the whole
100  * chunk.    note that the "plus one" part is needed because a reference
101  * count of zero is neither positive or negative (need a way to tell
102  * if we've got one zero or a bunch of them).
103  *
104  * here are some in-line functions to help us.
105  */
106 
107 /*
108  * pp_getreflen: get the reference and length for a specific offset
109  *
110  * => ppref's amap must be locked
111  */
112 static inline void
113 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
114 {
115 
116 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
117 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
118 		*lenp = 1;
119 	} else {
120 		*refp = (ppref[offset] * -1) - 1;
121 		*lenp = ppref[offset+1];
122 	}
123 }
124 
125 /*
126  * pp_setreflen: set the reference and length for a specific offset
127  *
128  * => ppref's amap must be locked
129  */
130 static inline void
131 pp_setreflen(int *ppref, int offset, int ref, int len)
132 {
133 	if (len == 0)
134 		return;
135 	if (len == 1) {
136 		ppref[offset] = ref + 1;
137 	} else {
138 		ppref[offset] = (ref + 1) * -1;
139 		ppref[offset+1] = len;
140 	}
141 }
142 #endif /* UVM_AMAP_PPREF */
143 
144 /*
145  * amap_alloc1: allocate an amap, but do not initialise the overlay.
146  *
147  * => Note: lock is not set.
148  */
149 static struct vm_amap *
150 amap_alloc1(int slots, int padslots, int flags)
151 {
152 	const bool nowait = (flags & UVM_FLAG_NOWAIT) != 0;
153 	const km_flag_t kmflags = nowait ? KM_NOSLEEP : KM_SLEEP;
154 	struct vm_amap *amap;
155 	krwlock_t *newlock, *oldlock;
156 	int totalslots;
157 
158 	amap = pool_cache_get(&uvm_amap_cache, nowait ? PR_NOWAIT : PR_WAITOK);
159 	if (amap == NULL) {
160 		return NULL;
161 	}
162 	KASSERT(amap->am_lock != NULL);
163 	KASSERT(amap->am_nused == 0);
164 
165 	/* Try to privatize the lock if currently shared. */
166 	if (rw_obj_refcnt(amap->am_lock) > 1) {
167 		newlock = rw_obj_tryalloc();
168 		if (newlock != NULL) {
169 		    	oldlock = amap->am_lock;
170 		    	mutex_enter(&amap_list_lock);
171 		    	amap->am_lock = newlock;
172 		    	mutex_exit(&amap_list_lock);
173 		    	rw_obj_free(oldlock);
174 		}
175 	}
176 
177 	totalslots = amap_roundup_slots(slots + padslots);
178 	amap->am_ref = 1;
179 	amap->am_flags = 0;
180 #ifdef UVM_AMAP_PPREF
181 	amap->am_ppref = NULL;
182 #endif
183 	amap->am_maxslot = totalslots;
184 	amap->am_nslot = slots;
185 
186 	/*
187 	 * Note: since allocations are likely big, we expect to reduce the
188 	 * memory fragmentation by allocating them in separate blocks.
189 	 */
190 	amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags);
191 	if (amap->am_slots == NULL)
192 		goto fail1;
193 
194 	amap->am_bckptr = kmem_alloc(totalslots * sizeof(int), kmflags);
195 	if (amap->am_bckptr == NULL)
196 		goto fail2;
197 
198 	amap->am_anon = kmem_alloc(totalslots * sizeof(struct vm_anon *),
199 	    kmflags);
200 	if (amap->am_anon == NULL)
201 		goto fail3;
202 
203 	return amap;
204 
205 fail3:
206 	kmem_free(amap->am_bckptr, totalslots * sizeof(int));
207 fail2:
208 	kmem_free(amap->am_slots, totalslots * sizeof(int));
209 fail1:
210 	pool_cache_put(&uvm_amap_cache, amap);
211 
212 	/*
213 	 * XXX hack to tell the pagedaemon how many pages we need,
214 	 * since we can need more than it would normally free.
215 	 */
216 	if (nowait) {
217 		extern u_int uvm_extrapages;
218 		atomic_add_int(&uvm_extrapages,
219 		    ((sizeof(int) * 2 + sizeof(struct vm_anon *)) *
220 		    totalslots) >> PAGE_SHIFT);
221 	}
222 	return NULL;
223 }
224 
225 /*
226  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
227  *
228  * => caller should ensure sz is a multiple of PAGE_SIZE
229  * => reference count to new amap is set to one
230  * => new amap is returned unlocked
231  */
232 
233 struct vm_amap *
234 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
235 {
236 	struct vm_amap *amap;
237 	int slots, padslots;
238 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
239 
240 	AMAP_B2SLOT(slots, sz);
241 	AMAP_B2SLOT(padslots, padsz);
242 
243 	amap = amap_alloc1(slots, padslots, waitf);
244 	if (amap) {
245 		memset(amap->am_anon, 0,
246 		    amap->am_maxslot * sizeof(struct vm_anon *));
247 	}
248 
249 	UVMHIST_LOG(maphist,"<- done, amap = %#jx, sz=%jd", (uintptr_t)amap,
250 	    sz, 0, 0);
251 	return(amap);
252 }
253 
254 /*
255  * amap_ctor: pool_cache constructor for new amaps
256  *
257  * => carefully synchronize with amap_swap_off()
258  */
259 static int
260 amap_ctor(void *arg, void *obj, int flags)
261 {
262 	struct vm_amap *amap = obj;
263 
264 	if ((flags & PR_NOWAIT) != 0) {
265 		amap->am_lock = rw_obj_tryalloc();
266 		if (amap->am_lock == NULL) {
267 			return ENOMEM;
268 		}
269 	} else {
270 		amap->am_lock = rw_obj_alloc();
271 	}
272 	amap->am_nused = 0;
273 	amap->am_flags = 0;
274 
275 	mutex_enter(&amap_list_lock);
276 	LIST_INSERT_HEAD(&amap_list, amap, am_list);
277 	mutex_exit(&amap_list_lock);
278 	return 0;
279 }
280 
281 /*
282  * amap_ctor: pool_cache destructor for amaps
283  *
284  * => carefully synchronize with amap_swap_off()
285  */
286 static void
287 amap_dtor(void *arg, void *obj)
288 {
289 	struct vm_amap *amap = obj;
290 
291 	KASSERT(amap->am_nused == 0);
292 
293 	mutex_enter(&amap_list_lock);
294 	LIST_REMOVE(amap, am_list);
295 	mutex_exit(&amap_list_lock);
296 	rw_obj_free(amap->am_lock);
297 }
298 
299 /*
300  * uvm_amap_init: initialize the amap system.
301  */
302 void
303 uvm_amap_init(void)
304 {
305 
306 	mutex_init(&amap_list_lock, MUTEX_DEFAULT, IPL_NONE);
307 
308 	pool_cache_bootstrap(&uvm_amap_cache, sizeof(struct vm_amap), 0, 0,
309 	    PR_LARGECACHE, "amappl", NULL, IPL_NONE, amap_ctor, amap_dtor,
310 	    NULL);
311 }
312 
313 /*
314  * amap_free: free an amap
315  *
316  * => the amap must be unlocked
317  * => the amap should have a zero reference count and be empty
318  */
319 void
320 amap_free(struct vm_amap *amap)
321 {
322 	int slots;
323 
324 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
325 
326 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
327 	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
328 	slots = amap->am_maxslot;
329 	kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots));
330 	kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr));
331 	kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon));
332 #ifdef UVM_AMAP_PPREF
333 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
334 		kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref));
335 #endif
336 	pool_cache_put(&uvm_amap_cache, amap);
337 	UVMHIST_LOG(maphist,"<- done, freed amap = %#jx", (uintptr_t)amap,
338 	    0, 0, 0);
339 }
340 
341 /*
342  * amap_extend: extend the size of an amap (if needed)
343  *
344  * => called from uvm_map when we want to extend an amap to cover
345  *    a new mapping (rather than allocate a new one)
346  * => amap should be unlocked (we will lock it)
347  * => to safely extend an amap it should have a reference count of
348  *    one (thus it can't be shared)
349  */
350 int
351 amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags)
352 {
353 	struct vm_amap *amap = entry->aref.ar_amap;
354 	int slotoff = entry->aref.ar_pageoff;
355 	int slotmapped, slotadd, slotneed, slotadded, slotalloc;
356 	int slotadj, slotarea, slotendoff;
357 	int oldnslots;
358 #ifdef UVM_AMAP_PPREF
359 	int *newppref, *oldppref;
360 #endif
361 	int i, *newsl, *newbck, *oldsl, *oldbck;
362 	struct vm_anon **newover, **oldover;
363 	const km_flag_t kmflags =
364 	    (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
365 
366 	UVMHIST_FUNC(__func__);
367 	UVMHIST_CALLARGS(maphist, "  (entry=%#jx, addsize=%#jx, flags=%#jx)",
368 	    (uintptr_t)entry, addsize, flags, 0);
369 
370 	/*
371 	 * first, determine how many slots we need in the amap.  don't
372 	 * forget that ar_pageoff could be non-zero: this means that
373 	 * there are some unused slots before us in the amap.
374 	 */
375 
376 	amap_lock(amap, RW_WRITER);
377 	KASSERT(amap_refs(amap) == 1); /* amap can't be shared */
378 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
379 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
380 	if (flags & AMAP_EXTEND_FORWARDS) {
381 		slotneed = slotoff + slotmapped + slotadd;
382 		slotadj = 0;
383 		slotarea = 0;
384 	} else {
385 		slotneed = slotadd + slotmapped;
386 		slotadj = slotadd - slotoff;
387 		slotarea = amap->am_maxslot - slotmapped;
388 	}
389 
390 	/*
391 	 * Because this amap only has 1 ref, we know that there is
392 	 * only one vm_map_entry pointing to it, and the one entry is
393 	 * using slots between slotoff and slotoff + slotmapped.  If
394 	 * we have been using ppref then we know that only slots in
395 	 * the one map entry's range can have anons, since ppref
396 	 * allowed us to free any anons outside that range as other map
397 	 * entries which used this amap were removed. But without ppref,
398 	 * we couldn't know which slots were still needed by other map
399 	 * entries, so we couldn't free any anons as we removed map
400 	 * entries, and so any slot from 0 to am_nslot can have an
401 	 * anon.  But now that we know there is only one map entry
402 	 * left and we know its range, we can free up any anons
403 	 * outside that range.  This is necessary because the rest of
404 	 * this function assumes that there are no anons in the amap
405 	 * outside of the one map entry's range.
406 	 */
407 
408 	slotendoff = slotoff + slotmapped;
409 	if (amap->am_ppref == PPREF_NONE) {
410 		amap_wiperange(amap, 0, slotoff);
411 		amap_wiperange(amap, slotendoff, amap->am_nslot - slotendoff);
412 	}
413 	for (i = 0; i < slotoff; i++) {
414 		KASSERT(amap->am_anon[i] == NULL);
415 	}
416 	for (i = slotendoff; i < amap->am_nslot - slotendoff; i++) {
417 		KASSERT(amap->am_anon[i] == NULL);
418 	}
419 
420 	/*
421 	 * case 1: we already have enough slots in the map and thus
422 	 * only need to bump the reference counts on the slots we are
423 	 * adding.
424 	 */
425 
426 	if (flags & AMAP_EXTEND_FORWARDS) {
427 		if (amap->am_nslot >= slotneed) {
428 #ifdef UVM_AMAP_PPREF
429 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
430 				amap_pp_adjref(amap, slotoff + slotmapped,
431 				    slotadd, 1);
432 			}
433 #endif
434 			amap_unlock(amap);
435 			UVMHIST_LOG(maphist,
436 			    "<- done (case 1f), amap = %#jx, sltneed=%jd",
437 			    (uintptr_t)amap, slotneed, 0, 0);
438 			return 0;
439 		}
440 	} else {
441 		if (slotadj <= 0) {
442 			slotoff -= slotadd;
443 			entry->aref.ar_pageoff = slotoff;
444 #ifdef UVM_AMAP_PPREF
445 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
446 				amap_pp_adjref(amap, slotoff, slotadd, 1);
447 			}
448 #endif
449 			amap_unlock(amap);
450 			UVMHIST_LOG(maphist,
451 			    "<- done (case 1b), amap = %#jx, sltneed=%jd",
452 			    (uintptr_t)amap, slotneed, 0, 0);
453 			return 0;
454 		}
455 	}
456 
457 	/*
458 	 * case 2: we pre-allocated slots for use and we just need to
459 	 * bump nslot up to take account for these slots.
460 	 */
461 
462 	if (amap->am_maxslot >= slotneed) {
463 		if (flags & AMAP_EXTEND_FORWARDS) {
464 #ifdef UVM_AMAP_PPREF
465 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
466 				if ((slotoff + slotmapped) < amap->am_nslot)
467 					amap_pp_adjref(amap,
468 					    slotoff + slotmapped,
469 					    (amap->am_nslot -
470 					    (slotoff + slotmapped)), 1);
471 				pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
472 				    slotneed - amap->am_nslot);
473 			}
474 #endif
475 			amap->am_nslot = slotneed;
476 			amap_unlock(amap);
477 
478 			/*
479 			 * no need to zero am_anon since that was done at
480 			 * alloc time and we never shrink an allocation.
481 			 */
482 
483 			UVMHIST_LOG(maphist,"<- done (case 2f), amap = %#jx, "
484 			    "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0);
485 			return 0;
486 		} else {
487 #ifdef UVM_AMAP_PPREF
488 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
489 				/*
490 				 * Slide up the ref counts on the pages that
491 				 * are actually in use.
492 				 */
493 				memmove(amap->am_ppref + slotarea,
494 				    amap->am_ppref + slotoff,
495 				    slotmapped * sizeof(int));
496 				/*
497 				 * Mark the (adjusted) gap at the front as
498 				 * referenced/not referenced.
499 				 */
500 				pp_setreflen(amap->am_ppref,
501 				    0, 0, slotarea - slotadd);
502 				pp_setreflen(amap->am_ppref,
503 				    slotarea - slotadd, 1, slotadd);
504 			}
505 #endif
506 
507 			/*
508 			 * Slide the anon pointers up and clear out
509 			 * the space we just made.
510 			 */
511 			memmove(amap->am_anon + slotarea,
512 			    amap->am_anon + slotoff,
513 			    slotmapped * sizeof(struct vm_anon*));
514 			memset(amap->am_anon + slotoff, 0,
515 			    (slotarea - slotoff) * sizeof(struct vm_anon *));
516 
517 			/*
518 			 * Slide the backpointers up, but don't bother
519 			 * wiping out the old slots.
520 			 */
521 			memmove(amap->am_bckptr + slotarea,
522 			    amap->am_bckptr + slotoff,
523 			    slotmapped * sizeof(int));
524 
525 			/*
526 			 * Adjust all the useful active slot numbers.
527 			 */
528 			for (i = 0; i < amap->am_nused; i++)
529 				amap->am_slots[i] += (slotarea - slotoff);
530 
531 			/*
532 			 * We just filled all the empty space in the
533 			 * front of the amap by activating a few new
534 			 * slots.
535 			 */
536 			amap->am_nslot = amap->am_maxslot;
537 			entry->aref.ar_pageoff = slotarea - slotadd;
538 			amap_unlock(amap);
539 
540 			UVMHIST_LOG(maphist,"<- done (case 2b), amap = %#jx, "
541 			    "slotneed=%jd", (uintptr_t)amap, slotneed, 0, 0);
542 			return 0;
543 		}
544 	}
545 
546 	/*
547 	 * Case 3: we need to allocate a new amap and copy all the amap
548 	 * data over from old amap to the new one.  Drop the lock before
549 	 * performing allocation.
550 	 *
551 	 * Note: since allocations are likely big, we expect to reduce the
552 	 * memory fragmentation by allocating them in separate blocks.
553 	 */
554 
555 	amap_unlock(amap);
556 
557 	if (slotneed >= UVM_AMAP_LARGE) {
558 		return E2BIG;
559 	}
560 
561 	slotalloc = amap_roundup_slots(slotneed);
562 #ifdef UVM_AMAP_PPREF
563 	newppref = NULL;
564 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
565 		/* Will be handled later if fails. */
566 		newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags);
567 	}
568 #endif
569 	newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags);
570 	newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags);
571 	newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags);
572 	if (newsl == NULL || newbck == NULL || newover == NULL) {
573 #ifdef UVM_AMAP_PPREF
574 		if (newppref != NULL) {
575 			kmem_free(newppref, slotalloc * sizeof(*newppref));
576 		}
577 #endif
578 		if (newsl != NULL) {
579 			kmem_free(newsl, slotalloc * sizeof(*newsl));
580 		}
581 		if (newbck != NULL) {
582 			kmem_free(newbck, slotalloc * sizeof(*newbck));
583 		}
584 		if (newover != NULL) {
585 			kmem_free(newover, slotalloc * sizeof(*newover));
586 		}
587 		return ENOMEM;
588 	}
589 	amap_lock(amap, RW_WRITER);
590 	KASSERT(amap->am_maxslot < slotneed);
591 
592 	/*
593 	 * Copy everything over to new allocated areas.
594 	 */
595 
596 	slotadded = slotalloc - amap->am_nslot;
597 	if (!(flags & AMAP_EXTEND_FORWARDS))
598 		slotarea = slotalloc - slotmapped;
599 
600 	/* do am_slots */
601 	oldsl = amap->am_slots;
602 	if (flags & AMAP_EXTEND_FORWARDS)
603 		memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
604 	else
605 		for (i = 0; i < amap->am_nused; i++)
606 			newsl[i] = oldsl[i] + slotarea - slotoff;
607 	amap->am_slots = newsl;
608 
609 	/* do am_anon */
610 	oldover = amap->am_anon;
611 	if (flags & AMAP_EXTEND_FORWARDS) {
612 		memcpy(newover, oldover,
613 		    sizeof(struct vm_anon *) * amap->am_nslot);
614 		memset(newover + amap->am_nslot, 0,
615 		    sizeof(struct vm_anon *) * slotadded);
616 	} else {
617 		memcpy(newover + slotarea, oldover + slotoff,
618 		    sizeof(struct vm_anon *) * slotmapped);
619 		memset(newover, 0,
620 		    sizeof(struct vm_anon *) * slotarea);
621 	}
622 	amap->am_anon = newover;
623 
624 	/* do am_bckptr */
625 	oldbck = amap->am_bckptr;
626 	if (flags & AMAP_EXTEND_FORWARDS)
627 		memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
628 	else
629 		memcpy(newbck + slotarea, oldbck + slotoff,
630 		    sizeof(int) * slotmapped);
631 	amap->am_bckptr = newbck;
632 
633 #ifdef UVM_AMAP_PPREF
634 	/* do ppref */
635 	oldppref = amap->am_ppref;
636 	if (newppref) {
637 		if (flags & AMAP_EXTEND_FORWARDS) {
638 			memcpy(newppref, oldppref,
639 			    sizeof(int) * amap->am_nslot);
640 			memset(newppref + amap->am_nslot, 0,
641 			    sizeof(int) * slotadded);
642 		} else {
643 			memcpy(newppref + slotarea, oldppref + slotoff,
644 			    sizeof(int) * slotmapped);
645 		}
646 		amap->am_ppref = newppref;
647 		if ((flags & AMAP_EXTEND_FORWARDS) &&
648 		    (slotoff + slotmapped) < amap->am_nslot)
649 			amap_pp_adjref(amap, slotoff + slotmapped,
650 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
651 		if (flags & AMAP_EXTEND_FORWARDS)
652 			pp_setreflen(newppref, amap->am_nslot, 1,
653 			    slotneed - amap->am_nslot);
654 		else {
655 			pp_setreflen(newppref, 0, 0,
656 			    slotalloc - slotneed);
657 			pp_setreflen(newppref, slotalloc - slotneed, 1,
658 			    slotneed - slotmapped);
659 		}
660 	} else {
661 		if (amap->am_ppref)
662 			amap->am_ppref = PPREF_NONE;
663 	}
664 #endif
665 
666 	/* update master values */
667 	if (flags & AMAP_EXTEND_FORWARDS)
668 		amap->am_nslot = slotneed;
669 	else {
670 		entry->aref.ar_pageoff = slotarea - slotadd;
671 		amap->am_nslot = slotalloc;
672 	}
673 	oldnslots = amap->am_maxslot;
674 	amap->am_maxslot = slotalloc;
675 	amap_unlock(amap);
676 
677 	kmem_free(oldsl, oldnslots * sizeof(*oldsl));
678 	kmem_free(oldbck, oldnslots * sizeof(*oldbck));
679 	kmem_free(oldover, oldnslots * sizeof(*oldover));
680 #ifdef UVM_AMAP_PPREF
681 	if (oldppref && oldppref != PPREF_NONE)
682 		kmem_free(oldppref, oldnslots * sizeof(*oldppref));
683 #endif
684 	UVMHIST_LOG(maphist,"<- done (case 3), amap = %#jx, slotneed=%jd",
685 	    (uintptr_t)amap, slotneed, 0, 0);
686 	return 0;
687 }
688 
689 /*
690  * amap_share_protect: change protection of anons in a shared amap
691  *
692  * for shared amaps, given the current data structure layout, it is
693  * not possible for us to directly locate all maps referencing the
694  * shared anon (to change the protection).  in order to protect data
695  * in shared maps we use pmap_page_protect().  [this is useful for IPC
696  * mechanisms like map entry passing that may want to write-protect
697  * all mappings of a shared amap.]  we traverse am_anon or am_slots
698  * depending on the current state of the amap.
699  *
700  * => entry's map and amap must be locked by the caller
701  */
702 void
703 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
704 {
705 	struct vm_amap *amap = entry->aref.ar_amap;
706 	u_int slots, lcv, slot, stop;
707 	struct vm_anon *anon;
708 
709 	KASSERT(rw_write_held(amap->am_lock));
710 
711 	AMAP_B2SLOT(slots, (entry->end - entry->start));
712 	stop = entry->aref.ar_pageoff + slots;
713 
714 	if (slots < amap->am_nused) {
715 		/*
716 		 * Cheaper to traverse am_anon.
717 		 */
718 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
719 			anon = amap->am_anon[lcv];
720 			if (anon == NULL) {
721 				continue;
722 			}
723 			if (anon->an_page) {
724 				pmap_page_protect(anon->an_page, prot);
725 			}
726 		}
727 		return;
728 	}
729 
730 	/*
731 	 * Cheaper to traverse am_slots.
732 	 */
733 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
734 		slot = amap->am_slots[lcv];
735 		if (slot < entry->aref.ar_pageoff || slot >= stop) {
736 			continue;
737 		}
738 		anon = amap->am_anon[slot];
739 		if (anon->an_page) {
740 			pmap_page_protect(anon->an_page, prot);
741 		}
742 	}
743 }
744 
745 /*
746  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
747  *
748  * => Called from amap_unref(), when reference count drops to zero.
749  * => amap must be locked.
750  */
751 
752 void
753 amap_wipeout(struct vm_amap *amap)
754 {
755 	u_int lcv;
756 
757 	UVMHIST_FUNC(__func__);
758 	UVMHIST_CALLARGS(maphist,"(amap=%#jx)", (uintptr_t)amap, 0,0,0);
759 
760 	KASSERT(rw_write_held(amap->am_lock));
761 	KASSERT(amap->am_ref == 0);
762 
763 	if (__predict_false(amap->am_flags & AMAP_SWAPOFF)) {
764 		/*
765 		 * Note: amap_swap_off() will call us again.
766 		 */
767 		amap_unlock(amap);
768 		return;
769 	}
770 
771 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
772 		struct vm_anon *anon;
773 		u_int slot;
774 
775 		slot = amap->am_slots[lcv];
776 		anon = amap->am_anon[slot];
777 		KASSERT(anon != NULL && anon->an_ref != 0);
778 
779 		KASSERT(anon->an_lock == amap->am_lock);
780 		UVMHIST_LOG(maphist,"  processing anon %#jx, ref=%jd",
781 		    (uintptr_t)anon, anon->an_ref, 0, 0);
782 
783 		/*
784 		 * Drop the reference.
785 		 */
786 
787 		if (__predict_true(--anon->an_ref == 0)) {
788 			uvm_anfree(anon);
789 		}
790 		if (__predict_false((lcv & 31) == 31)) {
791 			preempt_point();
792 		}
793 	}
794 
795 	/*
796 	 * Finally, destroy the amap.
797 	 */
798 
799 	amap->am_nused = 0;
800 	amap_unlock(amap);
801 	amap_free(amap);
802 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
803 }
804 
805 /*
806  * amap_copy: ensure that a map entry's "needs_copy" flag is false
807  *	by copying the amap if necessary.
808  *
809  * => an entry with a null amap pointer will get a new (blank) one.
810  * => the map that the map entry belongs to must be locked by caller.
811  * => the amap currently attached to "entry" (if any) must be unlocked.
812  * => if canchunk is true, then we may clip the entry into a chunk
813  * => "startva" and "endva" are used only if canchunk is true.  they are
814  *     used to limit chunking (e.g. if you have a large space that you
815  *     know you are going to need to allocate amaps for, there is no point
816  *     in allowing that to be chunked)
817  */
818 
819 void
820 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags,
821     vaddr_t startva, vaddr_t endva)
822 {
823 	const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0;
824 	struct vm_amap *amap, *srcamap;
825 	u_int slots, lcv;
826 	krwlock_t *oldlock;
827 	vsize_t len;
828 
829 	UVMHIST_FUNC(__func__);
830 	UVMHIST_CALLARGS(maphist, "  (map=%#jx, entry=%#jx, flags=%#jx)",
831 	    (uintptr_t)map, (uintptr_t)entry, flags, -2);
832 
833 	KASSERT(map != kernel_map);	/* we use nointr pool */
834 
835 	srcamap = entry->aref.ar_amap;
836 	len = entry->end - entry->start;
837 
838 	/*
839 	 * Is there an amap to copy?  If not, create one.
840 	 */
841 
842 	if (srcamap == NULL) {
843 		const bool canchunk = (flags & AMAP_COPY_NOCHUNK) == 0;
844 
845 		/*
846 		 * Check to see if we have a large amap that we can
847 		 * chunk.  We align startva/endva to chunk-sized
848 		 * boundaries and then clip to them.
849 		 */
850 
851 		if (canchunk && atop(len) >= UVM_AMAP_LARGE) {
852 			vsize_t chunksize;
853 
854 			/* Convert slots to bytes. */
855 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
856 			startva = (startva / chunksize) * chunksize;
857 			endva = roundup(endva, chunksize);
858 			UVMHIST_LOG(maphist,
859 			    "  chunk amap ==> clip %#jx->%#jx to %#jx->%#jx",
860 			    entry->start, entry->end, startva, endva);
861 			UVM_MAP_CLIP_START(map, entry, startva);
862 
863 			/* Watch out for endva wrap-around! */
864 			if (endva >= startva) {
865 				UVM_MAP_CLIP_END(map, entry, endva);
866 			}
867 		}
868 
869 		if ((flags & AMAP_COPY_NOMERGE) == 0 &&
870 		    uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) {
871 			return;
872 		}
873 
874 		UVMHIST_LOG(maphist, "<- done [creating new amap %#jx->%#jx]",
875 		    entry->start, entry->end, 0, 0);
876 
877 		/*
878 		 * Allocate an initialised amap and install it.
879 		 * Note: we must update the length after clipping.
880 		 */
881 		len = entry->end - entry->start;
882 		entry->aref.ar_pageoff = 0;
883 		entry->aref.ar_amap = amap_alloc(len, 0, waitf);
884 		if (entry->aref.ar_amap != NULL) {
885 			entry->etype &= ~UVM_ET_NEEDSCOPY;
886 		}
887 		return;
888 	}
889 
890 	/*
891 	 * First check and see if we are the only map entry referencing
892 	 * he amap we currently have.  If so, then just take it over instead
893 	 * of copying it.  Note that we are reading am_ref without lock held
894 	 * as the value value can only be one if we have the only reference
895 	 * to the amap (via our locked map).  If the value is greater than
896 	 * one, then allocate amap and re-check the value.
897 	 */
898 
899 	if (srcamap->am_ref == 1) {
900 		entry->etype &= ~UVM_ET_NEEDSCOPY;
901 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
902 		    0, 0, 0, 0);
903 		return;
904 	}
905 
906 	UVMHIST_LOG(maphist,"  amap=%#jx, ref=%jd, must copy it",
907 	    (uintptr_t)srcamap, srcamap->am_ref, 0, 0);
908 
909 	/*
910 	 * Allocate a new amap (note: not initialised, etc).
911 	 */
912 
913 	AMAP_B2SLOT(slots, len);
914 	amap = amap_alloc1(slots, 0, waitf);
915 	if (amap == NULL) {
916 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
917 		return;
918 	}
919 
920 	/*
921 	 * Make the new amap share the source amap's lock, and then lock
922 	 * both.  We must do this before we set am_nused != 0, otherwise
923 	 * amap_swap_off() can become interested in the amap.
924 	 */
925 
926 	oldlock = amap->am_lock;
927 	mutex_enter(&amap_list_lock);
928 	amap->am_lock = srcamap->am_lock;
929 	mutex_exit(&amap_list_lock);
930 	rw_obj_hold(amap->am_lock);
931 	rw_obj_free(oldlock);
932 
933 	amap_lock(srcamap, RW_WRITER);
934 
935 	/*
936 	 * Re-check the reference count with the lock held.  If it has
937 	 * dropped to one - we can take over the existing map.
938 	 */
939 
940 	if (srcamap->am_ref == 1) {
941 		/* Just take over the existing amap. */
942 		entry->etype &= ~UVM_ET_NEEDSCOPY;
943 		amap_unlock(srcamap);
944 		/* Destroy the new (unused) amap. */
945 		amap->am_ref--;
946 		amap_free(amap);
947 		return;
948 	}
949 
950 	/*
951 	 * Copy the slots.  Zero the padded part.
952 	 */
953 
954 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
955 	for (lcv = 0 ; lcv < slots; lcv++) {
956 		amap->am_anon[lcv] =
957 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
958 		if (amap->am_anon[lcv] == NULL)
959 			continue;
960 		KASSERT(amap->am_anon[lcv]->an_lock == srcamap->am_lock);
961 		KASSERT(amap->am_anon[lcv]->an_ref > 0);
962 		KASSERT(amap->am_nused < amap->am_maxslot);
963 		amap->am_anon[lcv]->an_ref++;
964 		amap->am_bckptr[lcv] = amap->am_nused;
965 		amap->am_slots[amap->am_nused] = lcv;
966 		amap->am_nused++;
967 	}
968 	memset(&amap->am_anon[lcv], 0,
969 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
970 
971 	/*
972 	 * Drop our reference to the old amap (srcamap) and unlock.
973 	 * Since the reference count on srcamap is greater than one,
974 	 * (we checked above), it cannot drop to zero while it is locked.
975 	 */
976 
977 	srcamap->am_ref--;
978 	KASSERT(srcamap->am_ref > 0);
979 
980 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0) {
981 		srcamap->am_flags &= ~AMAP_SHARED;
982 	}
983 #ifdef UVM_AMAP_PPREF
984 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
985 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
986 		    len >> PAGE_SHIFT, -1);
987 	}
988 #endif
989 
990 	amap_unlock(srcamap);
991 
992 	/*
993 	 * Install new amap.
994 	 */
995 
996 	entry->aref.ar_pageoff = 0;
997 	entry->aref.ar_amap = amap;
998 	entry->etype &= ~UVM_ET_NEEDSCOPY;
999 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
1000 }
1001 
1002 /*
1003  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
1004  *
1005  *	called during fork(2) when the parent process has a wired map
1006  *	entry.   in that case we want to avoid write-protecting pages
1007  *	in the parent's map (e.g. like what you'd do for a COW page)
1008  *	so we resolve the COW here.
1009  *
1010  * => assume parent's entry was wired, thus all pages are resident.
1011  * => assume pages that are loaned out (loan_count) are already mapped
1012  *	read-only in all maps, and thus no need for us to worry about them
1013  * => assume both parent and child vm_map's are locked
1014  * => caller passes child's map/entry in to us
1015  * => if we run out of memory we will unlock the amap and sleep _with_ the
1016  *	parent and child vm_map's locked(!).    we have to do this since
1017  *	we are in the middle of a fork(2) and we can't let the parent
1018  *	map change until we are done copying all the map entrys.
1019  * => XXXCDC: out of memory should cause fork to fail, but there is
1020  *	currently no easy way to do this (needs fix)
1021  */
1022 
1023 void
1024 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
1025 {
1026 	struct vm_amap *amap = entry->aref.ar_amap;
1027 	struct vm_anon *anon, *nanon;
1028 	struct vm_page *pg, *npg;
1029 	u_int lcv, slot;
1030 
1031 	/*
1032 	 * note that if we unlock the amap then we must ReStart the "lcv" for
1033 	 * loop because some other process could reorder the anon's in the
1034 	 * am_anon[] array on us while the lock is dropped.
1035 	 */
1036 
1037 ReStart:
1038 	amap_lock(amap, RW_WRITER);
1039 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
1040 		slot = amap->am_slots[lcv];
1041 		anon = amap->am_anon[slot];
1042 		KASSERT(anon->an_lock == amap->am_lock);
1043 
1044 		/*
1045 		 * If anon has only one reference - we must have already
1046 		 * copied it.  This can happen if we needed to sleep waiting
1047 		 * for memory in a previous run through this loop.  The new
1048 		 * page might even have been paged out, since is not wired.
1049 		 */
1050 
1051 		if (anon->an_ref == 1) {
1052 			KASSERT(anon->an_page != NULL || anon->an_swslot != 0);
1053 			continue;
1054 		}
1055 
1056 		/*
1057 		 * The old page must be resident since the parent is wired.
1058 		 */
1059 
1060 		pg = anon->an_page;
1061 		KASSERT(pg != NULL);
1062 		KASSERT(pg->wire_count > 0);
1063 
1064 		/*
1065 		 * If the page is loaned then it must already be mapped
1066 		 * read-only and we don't need to copy it.
1067 		 */
1068 
1069 		if (pg->loan_count != 0) {
1070 			continue;
1071 		}
1072 		KASSERT(pg->uanon == anon && pg->uobject == NULL);
1073 
1074 		/*
1075 		 * If the page is busy, then we have to unlock, wait for
1076 		 * it and then restart.
1077 		 */
1078 
1079 		if (pg->flags & PG_BUSY) {
1080 			uvm_pagewait(pg, amap->am_lock, "cownow");
1081 			goto ReStart;
1082 		}
1083 
1084 		/*
1085 		 * Perform a copy-on-write.
1086 		 * First - get a new anon and a page.
1087 		 */
1088 
1089 		nanon = uvm_analloc();
1090 		if (nanon) {
1091 			nanon->an_lock = amap->am_lock;
1092 			npg = uvm_pagealloc(NULL, 0, nanon, 0);
1093 		} else {
1094 			npg = NULL;
1095 		}
1096 		if (nanon == NULL || npg == NULL) {
1097 			amap_unlock(amap);
1098 			if (nanon) {
1099 				nanon->an_lock = NULL;
1100 				nanon->an_ref--;
1101 				KASSERT(nanon->an_ref == 0);
1102 				uvm_anfree(nanon);
1103 			}
1104 			uvm_wait("cownowpage");
1105 			goto ReStart;
1106 		}
1107 
1108 		/*
1109 		 * Copy the data and replace anon with the new one.
1110 		 * Also, setup its lock (share the with amap's lock).
1111 		 */
1112 
1113 		uvm_pagecopy(pg, npg);
1114 		anon->an_ref--;
1115 		KASSERT(anon->an_ref > 0);
1116 		amap->am_anon[slot] = nanon;
1117 
1118 		/*
1119 		 * Drop PG_BUSY on new page.  Since its owner was write
1120 		 * locked all this time - it cannot be PG_RELEASED or
1121 		 * waited on.
1122 		 */
1123 		uvm_pagelock(npg);
1124 		uvm_pageactivate(npg);
1125 		uvm_pageunlock(npg);
1126 		npg->flags &= ~(PG_BUSY|PG_FAKE);
1127 		UVM_PAGE_OWN(npg, NULL);
1128 	}
1129 	amap_unlock(amap);
1130 }
1131 
1132 /*
1133  * amap_splitref: split a single reference into two separate references
1134  *
1135  * => called from uvm_map's clip routines
1136  * => origref's map should be locked
1137  * => origref->ar_amap should be unlocked (we will lock)
1138  */
1139 void
1140 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
1141 {
1142 	struct vm_amap *amap = origref->ar_amap;
1143 	u_int leftslots;
1144 
1145 	KASSERT(splitref->ar_amap == origref->ar_amap);
1146 	AMAP_B2SLOT(leftslots, offset);
1147 	KASSERT(leftslots != 0);
1148 
1149 	amap_lock(amap, RW_WRITER);
1150 	KASSERT(amap->am_nslot - origref->ar_pageoff - leftslots > 0);
1151 
1152 #ifdef UVM_AMAP_PPREF
1153 	/* Establish ppref before we add a duplicate reference to the amap. */
1154 	if (amap->am_ppref == NULL) {
1155 		amap_pp_establish(amap, origref->ar_pageoff);
1156 	}
1157 #endif
1158 	/* Note: not a share reference. */
1159 	amap->am_ref++;
1160 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
1161 	amap_unlock(amap);
1162 }
1163 
1164 #ifdef UVM_AMAP_PPREF
1165 
1166 /*
1167  * amap_pp_establish: add a ppref array to an amap, if possible.
1168  *
1169  * => amap should be locked by caller.
1170  */
1171 void
1172 amap_pp_establish(struct vm_amap *amap, vaddr_t offset)
1173 {
1174 	const size_t sz = amap->am_maxslot * sizeof(*amap->am_ppref);
1175 
1176 	KASSERT(rw_write_held(amap->am_lock));
1177 
1178 	amap->am_ppref = kmem_zalloc(sz, KM_NOSLEEP);
1179 	if (amap->am_ppref == NULL) {
1180 		/* Failure - just do not use ppref. */
1181 		amap->am_ppref = PPREF_NONE;
1182 		return;
1183 	}
1184 	pp_setreflen(amap->am_ppref, 0, 0, offset);
1185 	pp_setreflen(amap->am_ppref, offset, amap->am_ref,
1186 	    amap->am_nslot - offset);
1187 }
1188 
1189 /*
1190  * amap_pp_adjref: adjust reference count to a part of an amap using the
1191  * per-page reference count array.
1192  *
1193  * => caller must check that ppref != PPREF_NONE before calling.
1194  * => map and amap must be locked.
1195  */
1196 void
1197 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
1198 {
1199 	int stopslot, *ppref, lcv, prevlcv;
1200 	int ref, len, prevref, prevlen;
1201 
1202 	KASSERT(rw_write_held(amap->am_lock));
1203 
1204 	stopslot = curslot + slotlen;
1205 	ppref = amap->am_ppref;
1206 	prevlcv = 0;
1207 
1208 	/*
1209 	 * Advance to the correct place in the array, fragment if needed.
1210 	 */
1211 
1212 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
1213 		pp_getreflen(ppref, lcv, &ref, &len);
1214 		if (lcv + len > curslot) {     /* goes past start? */
1215 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
1216 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
1217 			len = curslot - lcv;   /* new length of entry @ lcv */
1218 		}
1219 		prevlcv = lcv;
1220 	}
1221 	if (lcv == 0) {
1222 		/*
1223 		 * Ensure that the "prevref == ref" test below always
1224 		 * fails, since we are starting from the beginning of
1225 		 * the ppref array; that is, there is no previous chunk.
1226 		 */
1227 		prevref = -1;
1228 		prevlen = 0;
1229 	} else {
1230 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
1231 	}
1232 
1233 	/*
1234 	 * Now adjust reference counts in range.  Merge the first
1235 	 * changed entry with the last unchanged entry if possible.
1236 	 */
1237 	KASSERT(lcv == curslot);
1238 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
1239 		pp_getreflen(ppref, lcv, &ref, &len);
1240 		if (lcv + len > stopslot) {     /* goes past end? */
1241 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
1242 			pp_setreflen(ppref, stopslot, ref,
1243 			    len - (stopslot - lcv));
1244 			len = stopslot - lcv;
1245 		}
1246 		ref += adjval;
1247 		KASSERT(ref >= 0);
1248 		KASSERT(ref <= amap->am_ref);
1249 		if (lcv == prevlcv + prevlen && ref == prevref) {
1250 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
1251 		} else {
1252 			pp_setreflen(ppref, lcv, ref, len);
1253 		}
1254 		if (ref == 0) {
1255 			amap_wiperange(amap, lcv, len);
1256 		}
1257 	}
1258 }
1259 
1260 /*
1261  * amap_wiperange: wipe out a range of an amap.
1262  * Note: different from amap_wipeout because the amap is kept intact.
1263  *
1264  * => Both map and amap must be locked by caller.
1265  */
1266 void
1267 amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
1268 {
1269 	u_int lcv, stop, slotend;
1270 	bool byanon;
1271 
1272 	KASSERT(rw_write_held(amap->am_lock));
1273 
1274 	/*
1275 	 * We can either traverse the amap by am_anon or by am_slots.
1276 	 * Determine which way is less expensive.
1277 	 */
1278 
1279 	if (slots < amap->am_nused) {
1280 		byanon = true;
1281 		lcv = slotoff;
1282 		stop = slotoff + slots;
1283 		slotend = 0;
1284 	} else {
1285 		byanon = false;
1286 		lcv = 0;
1287 		stop = amap->am_nused;
1288 		slotend = slotoff + slots;
1289 	}
1290 
1291 	while (lcv < stop) {
1292 		struct vm_anon *anon;
1293 		u_int curslot, ptr, last;
1294 
1295 		if (byanon) {
1296 			curslot = lcv++;	/* lcv advances here */
1297 			if (amap->am_anon[curslot] == NULL)
1298 				continue;
1299 		} else {
1300 			curslot = amap->am_slots[lcv];
1301 			if (curslot < slotoff || curslot >= slotend) {
1302 				lcv++;		/* lcv advances here */
1303 				continue;
1304 			}
1305 			stop--;	/* drop stop, since anon will be removed */
1306 		}
1307 		anon = amap->am_anon[curslot];
1308 		KASSERT(anon->an_lock == amap->am_lock);
1309 
1310 		/*
1311 		 * Remove anon from the amap.
1312 		 */
1313 
1314 		amap->am_anon[curslot] = NULL;
1315 		ptr = amap->am_bckptr[curslot];
1316 		last = amap->am_nused - 1;
1317 		if (ptr != last) {
1318 			amap->am_slots[ptr] = amap->am_slots[last];
1319 			amap->am_bckptr[amap->am_slots[ptr]] = ptr;
1320 		}
1321 		amap->am_nused--;
1322 
1323 		/*
1324 		 * Drop its reference count.
1325 		 */
1326 
1327 		KASSERT(anon->an_lock == amap->am_lock);
1328 		if (--anon->an_ref == 0) {
1329 			uvm_anfree(anon);
1330 		}
1331 	}
1332 }
1333 
1334 #endif
1335 
1336 #if defined(VMSWAP)
1337 
1338 /*
1339  * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
1340  *
1341  * => called with swap_syscall_lock held.
1342  * => note that we don't always traverse all anons.
1343  *    eg. amaps being wiped out, released anons.
1344  * => return true if failed.
1345  */
1346 
1347 bool
1348 amap_swap_off(int startslot, int endslot)
1349 {
1350 	struct vm_amap *am;
1351 	struct vm_amap *am_next;
1352 	struct vm_amap marker_prev;
1353 	struct vm_amap marker_next;
1354 	bool rv = false;
1355 
1356 #if defined(DIAGNOSTIC)
1357 	memset(&marker_prev, 0, sizeof(marker_prev));
1358 	memset(&marker_next, 0, sizeof(marker_next));
1359 #endif /* defined(DIAGNOSTIC) */
1360 
1361 	mutex_enter(&amap_list_lock);
1362 	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
1363 		int i;
1364 
1365 		LIST_INSERT_BEFORE(am, &marker_prev, am_list);
1366 		LIST_INSERT_AFTER(am, &marker_next, am_list);
1367 
1368 		/* amap_list_lock prevents the lock pointer from changing. */
1369 		if (!amap_lock_try(am, RW_WRITER)) {
1370 			(void)kpause("amapswpo", false, 1, &amap_list_lock);
1371 			am_next = LIST_NEXT(&marker_prev, am_list);
1372 			if (am_next == &marker_next) {
1373 				am_next = LIST_NEXT(am_next, am_list);
1374 			} else {
1375 				KASSERT(LIST_NEXT(am_next, am_list) ==
1376 				    &marker_next);
1377 			}
1378 			LIST_REMOVE(&marker_prev, am_list);
1379 			LIST_REMOVE(&marker_next, am_list);
1380 			continue;
1381 		}
1382 
1383 		mutex_exit(&amap_list_lock);
1384 
1385 		/* If am_nused == 0, the amap could be free - careful. */
1386 		for (i = 0; i < am->am_nused; i++) {
1387 			int slot;
1388 			int swslot;
1389 			struct vm_anon *anon;
1390 
1391 			slot = am->am_slots[i];
1392 			anon = am->am_anon[slot];
1393 			KASSERT(anon->an_lock == am->am_lock);
1394 
1395 			swslot = anon->an_swslot;
1396 			if (swslot < startslot || endslot <= swslot) {
1397 				continue;
1398 			}
1399 
1400 			am->am_flags |= AMAP_SWAPOFF;
1401 
1402 			rv = uvm_anon_pagein(am, anon);
1403 			amap_lock(am, RW_WRITER);
1404 
1405 			am->am_flags &= ~AMAP_SWAPOFF;
1406 			if (amap_refs(am) == 0) {
1407 				amap_wipeout(am);
1408 				am = NULL;
1409 				break;
1410 			}
1411 			if (rv) {
1412 				break;
1413 			}
1414 			i = 0;
1415 		}
1416 
1417 		if (am) {
1418 			amap_unlock(am);
1419 		}
1420 
1421 		mutex_enter(&amap_list_lock);
1422 		KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
1423 		    LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
1424 		    &marker_next);
1425 		am_next = LIST_NEXT(&marker_next, am_list);
1426 		LIST_REMOVE(&marker_prev, am_list);
1427 		LIST_REMOVE(&marker_next, am_list);
1428 	}
1429 	mutex_exit(&amap_list_lock);
1430 
1431 	return rv;
1432 }
1433 
1434 #endif /* defined(VMSWAP) */
1435 
1436 /*
1437  * amap_lookup: look up a page in an amap.
1438  *
1439  * => amap should be locked by caller.
1440  */
1441 struct vm_anon *
1442 amap_lookup(struct vm_aref *aref, vaddr_t offset)
1443 {
1444 	struct vm_amap *amap = aref->ar_amap;
1445 	struct vm_anon *an;
1446 	u_int slot;
1447 
1448 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1449 	KASSERT(rw_lock_held(amap->am_lock));
1450 
1451 	AMAP_B2SLOT(slot, offset);
1452 	slot += aref->ar_pageoff;
1453 	an = amap->am_anon[slot];
1454 
1455 	UVMHIST_LOG(maphist,
1456 	    "<- done (amap=%#jx, offset=%#jx, result=%#jx)",
1457 	    (uintptr_t)amap, offset, (uintptr_t)an, 0);
1458 
1459 	KASSERT(slot < amap->am_nslot);
1460 	KASSERT(an == NULL || an->an_ref != 0);
1461 	KASSERT(an == NULL || an->an_lock == amap->am_lock);
1462 	return an;
1463 }
1464 
1465 /*
1466  * amap_lookups: look up a range of pages in an amap.
1467  *
1468  * => amap should be locked by caller.
1469  */
1470 void
1471 amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons,
1472     int npages)
1473 {
1474 	struct vm_amap *amap = aref->ar_amap;
1475 	u_int slot;
1476 
1477 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1478 	KASSERT(rw_lock_held(amap->am_lock));
1479 
1480 	AMAP_B2SLOT(slot, offset);
1481 	slot += aref->ar_pageoff;
1482 
1483 	UVMHIST_LOG(maphist, "  slot=%u, npages=%d, nslot=%d",
1484 	    slot, npages, amap->am_nslot, 0);
1485 
1486 	KASSERT((slot + (npages - 1)) < amap->am_nslot);
1487 	memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
1488 
1489 #if defined(DIAGNOSTIC)
1490 	for (int i = 0; i < npages; i++) {
1491 		struct vm_anon * const an = anons[i];
1492 		if (an == NULL) {
1493 			continue;
1494 		}
1495 		KASSERT(an->an_ref != 0);
1496 		KASSERT(an->an_lock == amap->am_lock);
1497 	}
1498 #endif
1499 	UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
1500 }
1501 
1502 /*
1503  * amap_add: add (or replace) a page to an amap.
1504  *
1505  * => amap should be locked by caller.
1506  * => anon must have the lock associated with this amap.
1507  */
1508 void
1509 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
1510     bool replace)
1511 {
1512 	struct vm_amap *amap = aref->ar_amap;
1513 	u_int slot;
1514 
1515 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1516 	KASSERT(rw_write_held(amap->am_lock));
1517 	KASSERT(anon->an_lock == amap->am_lock);
1518 
1519 	AMAP_B2SLOT(slot, offset);
1520 	slot += aref->ar_pageoff;
1521 	KASSERT(slot < amap->am_nslot);
1522 
1523 	if (replace) {
1524 		struct vm_anon *oanon = amap->am_anon[slot];
1525 
1526 		KASSERT(oanon != NULL);
1527 		if (oanon->an_page && (amap->am_flags & AMAP_SHARED) != 0) {
1528 			pmap_page_protect(oanon->an_page, VM_PROT_NONE);
1529 			/*
1530 			 * XXX: suppose page is supposed to be wired somewhere?
1531 			 */
1532 		}
1533 	} else {
1534 		KASSERT(amap->am_anon[slot] == NULL);
1535 		KASSERT(amap->am_nused < amap->am_maxslot);
1536 		amap->am_bckptr[slot] = amap->am_nused;
1537 		amap->am_slots[amap->am_nused] = slot;
1538 		amap->am_nused++;
1539 	}
1540 	amap->am_anon[slot] = anon;
1541 	UVMHIST_LOG(maphist,
1542 	    "<- done (amap=%#jx, offset=%#x, anon=%#jx, rep=%d)",
1543 	    (uintptr_t)amap, offset, (uintptr_t)anon, replace);
1544 }
1545 
1546 /*
1547  * amap_unadd: remove a page from an amap.
1548  *
1549  * => amap should be locked by caller.
1550  */
1551 void
1552 amap_unadd(struct vm_aref *aref, vaddr_t offset)
1553 {
1554 	struct vm_amap *amap = aref->ar_amap;
1555 	u_int slot, ptr, last;
1556 
1557 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1558 	KASSERT(rw_write_held(amap->am_lock));
1559 
1560 	AMAP_B2SLOT(slot, offset);
1561 	slot += aref->ar_pageoff;
1562 	KASSERT(slot < amap->am_nslot);
1563 	KASSERT(amap->am_anon[slot] != NULL);
1564 	KASSERT(amap->am_anon[slot]->an_lock == amap->am_lock);
1565 
1566 	amap->am_anon[slot] = NULL;
1567 	ptr = amap->am_bckptr[slot];
1568 
1569 	last = amap->am_nused - 1;
1570 	if (ptr != last) {
1571 		/* Move the last entry to keep the slots contiguous. */
1572 		amap->am_slots[ptr] = amap->am_slots[last];
1573 		amap->am_bckptr[amap->am_slots[ptr]] = ptr;
1574 	}
1575 	amap->am_nused--;
1576 	UVMHIST_LOG(maphist, "<- done (amap=%#jx, slot=%#jx)",
1577 	    (uintptr_t)amap, slot,0, 0);
1578 }
1579 
1580 /*
1581  * amap_adjref_anons: adjust the reference count(s) on amap and its anons.
1582  */
1583 static void
1584 amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len,
1585     int refv, bool all)
1586 {
1587 
1588 #ifdef UVM_AMAP_PPREF
1589 	KASSERT(rw_write_held(amap->am_lock));
1590 
1591 	/*
1592 	 * We must establish the ppref array before changing am_ref
1593 	 * so that the ppref values match the current amap refcount.
1594 	 */
1595 
1596 	if (amap->am_ppref == NULL) {
1597 		amap_pp_establish(amap, offset);
1598 	}
1599 #endif
1600 
1601 	amap->am_ref += refv;
1602 
1603 #ifdef UVM_AMAP_PPREF
1604 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1605 		amap_pp_adjref(amap, offset, len, refv);
1606 	}
1607 #endif
1608 	amap_unlock(amap);
1609 }
1610 
1611 /*
1612  * amap_ref: gain a reference to an amap.
1613  *
1614  * => amap must not be locked (we will lock).
1615  * => "offset" and "len" are in units of pages.
1616  * => Called at fork time to gain the child's reference.
1617  */
1618 void
1619 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
1620 {
1621 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1622 
1623 	amap_lock(amap, RW_WRITER);
1624 	if (flags & AMAP_SHARED) {
1625 		amap->am_flags |= AMAP_SHARED;
1626 	}
1627 	amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0);
1628 
1629 	UVMHIST_LOG(maphist,"<- done!  amap=%#jx", (uintptr_t)amap, 0, 0, 0);
1630 }
1631 
1632 /*
1633  * amap_unref: remove a reference to an amap.
1634  *
1635  * => All pmap-level references to this amap must be already removed.
1636  * => Called from uvm_unmap_detach(); entry is already removed from the map.
1637  * => We will lock amap, so it must be unlocked.
1638  */
1639 void
1640 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, bool all)
1641 {
1642 	UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist);
1643 
1644 	amap_lock(amap, RW_WRITER);
1645 
1646 	UVMHIST_LOG(maphist,"  amap=%#jx  refs=%d, nused=%d",
1647 	    (uintptr_t)amap, amap->am_ref, amap->am_nused, 0);
1648 	KASSERT(amap->am_ref > 0);
1649 
1650 	if (amap->am_ref == 1) {
1651 
1652 		/*
1653 		 * If the last reference - wipeout and destroy the amap.
1654 		 */
1655 		amap->am_ref--;
1656 		amap_wipeout(amap);
1657 		UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
1658 		return;
1659 	}
1660 
1661 	/*
1662 	 * Otherwise, drop the reference count(s) on anons.
1663 	 */
1664 
1665 	if (amap->am_ref == 2 && (amap->am_flags & AMAP_SHARED) != 0) {
1666 		amap->am_flags &= ~AMAP_SHARED;
1667 	}
1668 	amap_adjref_anons(amap, offset, len, -1, all);
1669 
1670 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1671 }
1672