xref: /netbsd-src/sys/uvm/uvm_amap.c (revision d25ffa98a4bfca1fe272f3c182496ec9934faac7)
1 /*	$NetBSD: uvm_amap.c,v 1.95 2011/06/18 21:13:29 rmind Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * uvm_amap.c: amap operations
30  */
31 
32 /*
33  * this file contains functions that perform operations on amaps.  see
34  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.95 2011/06/18 21:13:29 rmind Exp $");
39 
40 #include "opt_uvmhist.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/kmem.h>
46 #include <sys/pool.h>
47 #include <sys/atomic.h>
48 
49 #include <uvm/uvm.h>
50 #include <uvm/uvm_swap.h>
51 
52 /*
53  * cache for allocation of vm_map structures.  note that in order to
54  * avoid an endless loop, the amap cache's allocator cannot allocate
55  * memory from an amap (it currently goes through the kernel uobj, so
56  * we are ok).
57  */
58 static struct pool_cache uvm_amap_cache;
59 static kmutex_t amap_list_lock;
60 static LIST_HEAD(, vm_amap) amap_list;
61 
62 /*
63  * local functions
64  */
65 
66 static inline void
67 amap_list_insert(struct vm_amap *amap)
68 {
69 
70 	mutex_enter(&amap_list_lock);
71 	LIST_INSERT_HEAD(&amap_list, amap, am_list);
72 	mutex_exit(&amap_list_lock);
73 }
74 
75 static inline void
76 amap_list_remove(struct vm_amap *amap)
77 {
78 
79 	mutex_enter(&amap_list_lock);
80 	LIST_REMOVE(amap, am_list);
81 	mutex_exit(&amap_list_lock);
82 }
83 
84 static int
85 amap_roundup_slots(int slots)
86 {
87 
88 	return kmem_roundup_size(slots * sizeof(int)) / sizeof(int);
89 }
90 
91 #ifdef UVM_AMAP_PPREF
92 /*
93  * what is ppref?   ppref is an _optional_ amap feature which is used
94  * to keep track of reference counts on a per-page basis.  it is enabled
95  * when UVM_AMAP_PPREF is defined.
96  *
97  * when enabled, an array of ints is allocated for the pprefs.  this
98  * array is allocated only when a partial reference is added to the
99  * map (either by unmapping part of the amap, or gaining a reference
100  * to only a part of an amap).  if the allocation of the array fails
101  * (KM_NOSLEEP), then we set the array pointer to PPREF_NONE to indicate
102  * that we tried to do ppref's but couldn't alloc the array so just
103  * give up (after all, this is an optional feature!).
104  *
105  * the array is divided into page sized "chunks."   for chunks of length 1,
106  * the chunk reference count plus one is stored in that chunk's slot.
107  * for chunks of length > 1 the first slot contains (the reference count
108  * plus one) * -1.    [the negative value indicates that the length is
109  * greater than one.]   the second slot of the chunk contains the length
110  * of the chunk.   here is an example:
111  *
112  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
113  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
114  *              <----------><-><----><-------><----><-><------->
115  * (x = don't care)
116  *
117  * this allows us to allow one int to contain the ref count for the whole
118  * chunk.    note that the "plus one" part is needed because a reference
119  * count of zero is neither positive or negative (need a way to tell
120  * if we've got one zero or a bunch of them).
121  *
122  * here are some in-line functions to help us.
123  */
124 
125 /*
126  * pp_getreflen: get the reference and length for a specific offset
127  *
128  * => ppref's amap must be locked
129  */
130 static inline void
131 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
132 {
133 
134 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
135 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
136 		*lenp = 1;
137 	} else {
138 		*refp = (ppref[offset] * -1) - 1;
139 		*lenp = ppref[offset+1];
140 	}
141 }
142 
143 /*
144  * pp_setreflen: set the reference and length for a specific offset
145  *
146  * => ppref's amap must be locked
147  */
148 static inline void
149 pp_setreflen(int *ppref, int offset, int ref, int len)
150 {
151 	if (len == 0)
152 		return;
153 	if (len == 1) {
154 		ppref[offset] = ref + 1;
155 	} else {
156 		ppref[offset] = (ref + 1) * -1;
157 		ppref[offset+1] = len;
158 	}
159 }
160 #endif /* UVM_AMAP_PPREF */
161 
162 /*
163  * amap_alloc1: allocate an amap, but do not initialise the overlay.
164  *
165  * => Note: lock is not set.
166  */
167 static inline struct vm_amap *
168 amap_alloc1(int slots, int padslots, int flags)
169 {
170 	const bool nowait = (flags & UVM_FLAG_NOWAIT) != 0;
171 	const km_flag_t kmflags = nowait ? KM_NOSLEEP : KM_SLEEP;
172 	struct vm_amap *amap;
173 	int totalslots;
174 
175 	amap = pool_cache_get(&uvm_amap_cache, nowait ? PR_NOWAIT : PR_WAITOK);
176 	if (amap == NULL) {
177 		return NULL;
178 	}
179 	totalslots = amap_roundup_slots(slots + padslots);
180 	amap->am_lock = NULL;
181 	amap->am_ref = 1;
182 	amap->am_flags = 0;
183 #ifdef UVM_AMAP_PPREF
184 	amap->am_ppref = NULL;
185 #endif
186 	amap->am_maxslot = totalslots;
187 	amap->am_nslot = slots;
188 	amap->am_nused = 0;
189 
190 	/*
191 	 * Note: since allocations are likely big, we expect to reduce the
192 	 * memory fragmentation by allocating them in separate blocks.
193 	 */
194 	amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags);
195 	if (amap->am_slots == NULL)
196 		goto fail1;
197 
198 	amap->am_bckptr = kmem_alloc(totalslots * sizeof(int), kmflags);
199 	if (amap->am_bckptr == NULL)
200 		goto fail2;
201 
202 	amap->am_anon = kmem_alloc(totalslots * sizeof(struct vm_anon *),
203 	    kmflags);
204 	if (amap->am_anon == NULL)
205 		goto fail3;
206 
207 	return amap;
208 
209 fail3:
210 	kmem_free(amap->am_bckptr, totalslots * sizeof(int));
211 fail2:
212 	kmem_free(amap->am_slots, totalslots * sizeof(int));
213 fail1:
214 	pool_cache_put(&uvm_amap_cache, amap);
215 
216 	/*
217 	 * XXX hack to tell the pagedaemon how many pages we need,
218 	 * since we can need more than it would normally free.
219 	 */
220 	if (nowait) {
221 		extern u_int uvm_extrapages;
222 		atomic_add_int(&uvm_extrapages,
223 		    ((sizeof(int) * 2 + sizeof(struct vm_anon *)) *
224 		    totalslots) >> PAGE_SHIFT);
225 	}
226 	return NULL;
227 }
228 
229 /*
230  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
231  *
232  * => caller should ensure sz is a multiple of PAGE_SIZE
233  * => reference count to new amap is set to one
234  * => new amap is returned unlocked
235  */
236 
237 struct vm_amap *
238 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
239 {
240 	struct vm_amap *amap;
241 	int slots, padslots;
242 	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
243 
244 	AMAP_B2SLOT(slots, sz);
245 	AMAP_B2SLOT(padslots, padsz);
246 
247 	amap = amap_alloc1(slots, padslots, waitf);
248 	if (amap) {
249 		memset(amap->am_anon, 0,
250 		    amap->am_maxslot * sizeof(struct vm_anon *));
251 		amap->am_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
252 		amap_list_insert(amap);
253 	}
254 
255 	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
256 	return(amap);
257 }
258 
259 /*
260  * uvm_amap_init: initialize the amap system.
261  */
262 void
263 uvm_amap_init(void)
264 {
265 
266 	mutex_init(&amap_list_lock, MUTEX_DEFAULT, IPL_NONE);
267 
268 	pool_cache_bootstrap(&uvm_amap_cache, sizeof(struct vm_amap), 0, 0, 0,
269 	    "amappl", NULL, IPL_NONE, NULL, NULL, NULL);
270 }
271 
272 /*
273  * amap_free: free an amap
274  *
275  * => the amap must be unlocked
276  * => the amap should have a zero reference count and be empty
277  */
278 void
279 amap_free(struct vm_amap *amap)
280 {
281 	int slots;
282 
283 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
284 
285 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
286 	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
287 	if (amap->am_lock != NULL) {
288 		KASSERT(!mutex_owned(amap->am_lock));
289 		mutex_obj_free(amap->am_lock);
290 	}
291 	slots = amap->am_maxslot;
292 	kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots));
293 	kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr));
294 	kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon));
295 #ifdef UVM_AMAP_PPREF
296 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
297 		kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref));
298 #endif
299 	pool_cache_put(&uvm_amap_cache, amap);
300 	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
301 }
302 
303 /*
304  * amap_extend: extend the size of an amap (if needed)
305  *
306  * => called from uvm_map when we want to extend an amap to cover
307  *    a new mapping (rather than allocate a new one)
308  * => amap should be unlocked (we will lock it)
309  * => to safely extend an amap it should have a reference count of
310  *    one (thus it can't be shared)
311  */
312 int
313 amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags)
314 {
315 	struct vm_amap *amap = entry->aref.ar_amap;
316 	int slotoff = entry->aref.ar_pageoff;
317 	int slotmapped, slotadd, slotneed, slotadded, slotalloc;
318 	int slotadj, slotspace;
319 	int oldnslots;
320 #ifdef UVM_AMAP_PPREF
321 	int *newppref, *oldppref;
322 #endif
323 	int i, *newsl, *newbck, *oldsl, *oldbck;
324 	struct vm_anon **newover, **oldover, *tofree;
325 	const km_flag_t kmflags =
326 	    (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
327 
328 	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
329 
330 	UVMHIST_LOG(maphist, "  (entry=0x%x, addsize=0x%x, flags=0x%x)",
331 	    entry, addsize, flags, 0);
332 
333 	/*
334 	 * first, determine how many slots we need in the amap.  don't
335 	 * forget that ar_pageoff could be non-zero: this means that
336 	 * there are some unused slots before us in the amap.
337 	 */
338 
339 	amap_lock(amap);
340 	KASSERT(amap_refs(amap) == 1); /* amap can't be shared */
341 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
342 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
343 	if (flags & AMAP_EXTEND_FORWARDS) {
344 		slotneed = slotoff + slotmapped + slotadd;
345 		slotadj = 0;
346 		slotspace = 0;
347 	}
348 	else {
349 		slotneed = slotadd + slotmapped;
350 		slotadj = slotadd - slotoff;
351 		slotspace = amap->am_maxslot - slotmapped;
352 	}
353 	tofree = NULL;
354 
355 	/*
356 	 * case 1: we already have enough slots in the map and thus
357 	 * only need to bump the reference counts on the slots we are
358 	 * adding.
359 	 */
360 
361 	if (flags & AMAP_EXTEND_FORWARDS) {
362 		if (amap->am_nslot >= slotneed) {
363 #ifdef UVM_AMAP_PPREF
364 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
365 				amap_pp_adjref(amap, slotoff + slotmapped,
366 				    slotadd, 1, &tofree);
367 			}
368 			uvm_anfree(tofree);
369 #endif
370 			amap_unlock(amap);
371 			UVMHIST_LOG(maphist,
372 			    "<- done (case 1f), amap = 0x%x, sltneed=%d",
373 			    amap, slotneed, 0, 0);
374 			return 0;
375 		}
376 	} else {
377 		if (slotadj <= 0) {
378 			slotoff -= slotadd;
379 			entry->aref.ar_pageoff = slotoff;
380 #ifdef UVM_AMAP_PPREF
381 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
382 				amap_pp_adjref(amap, slotoff, slotadd, 1,
383 				    &tofree);
384 			}
385 			uvm_anfree(tofree);
386 #endif
387 			amap_unlock(amap);
388 			UVMHIST_LOG(maphist,
389 			    "<- done (case 1b), amap = 0x%x, sltneed=%d",
390 			    amap, slotneed, 0, 0);
391 			return 0;
392 		}
393 	}
394 
395 	/*
396 	 * case 2: we pre-allocated slots for use and we just need to
397 	 * bump nslot up to take account for these slots.
398 	 */
399 
400 	if (amap->am_maxslot >= slotneed) {
401 		if (flags & AMAP_EXTEND_FORWARDS) {
402 #ifdef UVM_AMAP_PPREF
403 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
404 				if ((slotoff + slotmapped) < amap->am_nslot)
405 					amap_pp_adjref(amap,
406 					    slotoff + slotmapped,
407 					    (amap->am_nslot -
408 					    (slotoff + slotmapped)), 1,
409 					    &tofree);
410 				pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
411 				    slotneed - amap->am_nslot);
412 			}
413 #endif
414 			amap->am_nslot = slotneed;
415 			uvm_anfree(tofree);
416 			amap_unlock(amap);
417 
418 			/*
419 			 * no need to zero am_anon since that was done at
420 			 * alloc time and we never shrink an allocation.
421 			 */
422 
423 			UVMHIST_LOG(maphist,"<- done (case 2f), amap = 0x%x, "
424 			    "slotneed=%d", amap, slotneed, 0, 0);
425 			return 0;
426 		} else {
427 #ifdef UVM_AMAP_PPREF
428 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
429 				/*
430 				 * Slide up the ref counts on the pages that
431 				 * are actually in use.
432 				 */
433 				memmove(amap->am_ppref + slotspace,
434 				    amap->am_ppref + slotoff,
435 				    slotmapped * sizeof(int));
436 				/*
437 				 * Mark the (adjusted) gap at the front as
438 				 * referenced/not referenced.
439 				 */
440 				pp_setreflen(amap->am_ppref,
441 				    0, 0, slotspace - slotadd);
442 				pp_setreflen(amap->am_ppref,
443 				    slotspace - slotadd, 1, slotadd);
444 			}
445 #endif
446 
447 			/*
448 			 * Slide the anon pointers up and clear out
449 			 * the space we just made.
450 			 */
451 			memmove(amap->am_anon + slotspace,
452 			    amap->am_anon + slotoff,
453 			    slotmapped * sizeof(struct vm_anon*));
454 			memset(amap->am_anon + slotoff, 0,
455 			    (slotspace - slotoff) * sizeof(struct vm_anon *));
456 
457 			/*
458 			 * Slide the backpointers up, but don't bother
459 			 * wiping out the old slots.
460 			 */
461 			memmove(amap->am_bckptr + slotspace,
462 			    amap->am_bckptr + slotoff,
463 			    slotmapped * sizeof(int));
464 
465 			/*
466 			 * Adjust all the useful active slot numbers.
467 			 */
468 			for (i = 0; i < amap->am_nused; i++)
469 				amap->am_slots[i] += (slotspace - slotoff);
470 
471 			/*
472 			 * We just filled all the empty space in the
473 			 * front of the amap by activating a few new
474 			 * slots.
475 			 */
476 			amap->am_nslot = amap->am_maxslot;
477 			entry->aref.ar_pageoff = slotspace - slotadd;
478 			amap_unlock(amap);
479 
480 			UVMHIST_LOG(maphist,"<- done (case 2b), amap = 0x%x, "
481 			    "slotneed=%d", amap, slotneed, 0, 0);
482 			return 0;
483 		}
484 	}
485 
486 	/*
487 	 * Case 3: we need to allocate a new amap and copy all the amap
488 	 * data over from old amap to the new one.  Drop the lock before
489 	 * performing allocation.
490 	 *
491 	 * Note: since allocations are likely big, we expect to reduce the
492 	 * memory fragmentation by allocating them in separate blocks.
493 	 */
494 
495 	amap_unlock(amap);
496 
497 	if (slotneed >= UVM_AMAP_LARGE) {
498 		return E2BIG;
499 	}
500 
501 	slotalloc = amap_roundup_slots(slotneed);
502 #ifdef UVM_AMAP_PPREF
503 	newppref = NULL;
504 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
505 		/* Will be handled later if fails. */
506 		newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags);
507 	}
508 #endif
509 	newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags);
510 	newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags);
511 	newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags);
512 	if (newsl == NULL || newbck == NULL || newover == NULL) {
513 #ifdef UVM_AMAP_PPREF
514 		if (newppref != NULL) {
515 			kmem_free(newppref, slotalloc * sizeof(*newppref));
516 		}
517 #endif
518 		if (newsl != NULL) {
519 			kmem_free(newsl, slotalloc * sizeof(*newsl));
520 		}
521 		if (newbck != NULL) {
522 			kmem_free(newbck, slotalloc * sizeof(*newbck));
523 		}
524 		if (newover != NULL) {
525 			kmem_free(newover, slotalloc * sizeof(*newover));
526 		}
527 		return ENOMEM;
528 	}
529 	amap_lock(amap);
530 	KASSERT(amap->am_maxslot < slotneed);
531 
532 	/*
533 	 * Copy everything over to new allocated areas.
534 	 */
535 
536 	slotadded = slotalloc - amap->am_nslot;
537 	if (!(flags & AMAP_EXTEND_FORWARDS))
538 		slotspace = slotalloc - slotmapped;
539 
540 	/* do am_slots */
541 	oldsl = amap->am_slots;
542 	if (flags & AMAP_EXTEND_FORWARDS)
543 		memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
544 	else
545 		for (i = 0; i < amap->am_nused; i++)
546 			newsl[i] = oldsl[i] + slotspace - slotoff;
547 	amap->am_slots = newsl;
548 
549 	/* do am_anon */
550 	oldover = amap->am_anon;
551 	if (flags & AMAP_EXTEND_FORWARDS) {
552 		memcpy(newover, oldover,
553 		    sizeof(struct vm_anon *) * amap->am_nslot);
554 		memset(newover + amap->am_nslot, 0,
555 		    sizeof(struct vm_anon *) * slotadded);
556 	} else {
557 		memcpy(newover + slotspace, oldover + slotoff,
558 		    sizeof(struct vm_anon *) * slotmapped);
559 		memset(newover, 0,
560 		    sizeof(struct vm_anon *) * slotspace);
561 	}
562 	amap->am_anon = newover;
563 
564 	/* do am_bckptr */
565 	oldbck = amap->am_bckptr;
566 	if (flags & AMAP_EXTEND_FORWARDS)
567 		memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
568 	else
569 		memcpy(newbck + slotspace, oldbck + slotoff,
570 		    sizeof(int) * slotmapped);
571 	amap->am_bckptr = newbck;
572 
573 #ifdef UVM_AMAP_PPREF
574 	/* do ppref */
575 	oldppref = amap->am_ppref;
576 	if (newppref) {
577 		if (flags & AMAP_EXTEND_FORWARDS) {
578 			memcpy(newppref, oldppref,
579 			    sizeof(int) * amap->am_nslot);
580 			memset(newppref + amap->am_nslot, 0,
581 			    sizeof(int) * slotadded);
582 		} else {
583 			memcpy(newppref + slotspace, oldppref + slotoff,
584 			    sizeof(int) * slotmapped);
585 		}
586 		amap->am_ppref = newppref;
587 		if ((flags & AMAP_EXTEND_FORWARDS) &&
588 		    (slotoff + slotmapped) < amap->am_nslot)
589 			amap_pp_adjref(amap, slotoff + slotmapped,
590 			    (amap->am_nslot - (slotoff + slotmapped)), 1,
591 			    &tofree);
592 		if (flags & AMAP_EXTEND_FORWARDS)
593 			pp_setreflen(newppref, amap->am_nslot, 1,
594 			    slotneed - amap->am_nslot);
595 		else {
596 			pp_setreflen(newppref, 0, 0,
597 			    slotalloc - slotneed);
598 			pp_setreflen(newppref, slotalloc - slotneed, 1,
599 			    slotneed - slotmapped);
600 		}
601 	} else {
602 		if (amap->am_ppref)
603 			amap->am_ppref = PPREF_NONE;
604 	}
605 #endif
606 
607 	/* update master values */
608 	if (flags & AMAP_EXTEND_FORWARDS)
609 		amap->am_nslot = slotneed;
610 	else {
611 		entry->aref.ar_pageoff = slotspace - slotadd;
612 		amap->am_nslot = slotalloc;
613 	}
614 	oldnslots = amap->am_maxslot;
615 	amap->am_maxslot = slotalloc;
616 
617 	uvm_anfree(tofree);
618 	amap_unlock(amap);
619 	kmem_free(oldsl, oldnslots * sizeof(*oldsl));
620 	kmem_free(oldbck, oldnslots * sizeof(*oldbck));
621 	kmem_free(oldover, oldnslots * sizeof(*oldover));
622 #ifdef UVM_AMAP_PPREF
623 	if (oldppref && oldppref != PPREF_NONE)
624 		kmem_free(oldppref, oldnslots * sizeof(*oldppref));
625 #endif
626 	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
627 	    amap, slotneed, 0, 0);
628 	return 0;
629 }
630 
631 /*
632  * amap_share_protect: change protection of anons in a shared amap
633  *
634  * for shared amaps, given the current data structure layout, it is
635  * not possible for us to directly locate all maps referencing the
636  * shared anon (to change the protection).  in order to protect data
637  * in shared maps we use pmap_page_protect().  [this is useful for IPC
638  * mechanisms like map entry passing that may want to write-protect
639  * all mappings of a shared amap.]  we traverse am_anon or am_slots
640  * depending on the current state of the amap.
641  *
642  * => entry's map and amap must be locked by the caller
643  */
644 void
645 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
646 {
647 	struct vm_amap *amap = entry->aref.ar_amap;
648 	u_int slots, lcv, slot, stop;
649 	struct vm_anon *anon;
650 
651 	KASSERT(mutex_owned(amap->am_lock));
652 
653 	AMAP_B2SLOT(slots, (entry->end - entry->start));
654 	stop = entry->aref.ar_pageoff + slots;
655 
656 	if (slots < amap->am_nused) {
657 		/*
658 		 * Cheaper to traverse am_anon.
659 		 */
660 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
661 			anon = amap->am_anon[lcv];
662 			if (anon == NULL) {
663 				continue;
664 			}
665 			if (anon->an_page) {
666 				pmap_page_protect(anon->an_page, prot);
667 			}
668 		}
669 		return;
670 	}
671 
672 	/*
673 	 * Cheaper to traverse am_slots.
674 	 */
675 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
676 		slot = amap->am_slots[lcv];
677 		if (slot < entry->aref.ar_pageoff || slot >= stop) {
678 			continue;
679 		}
680 		anon = amap->am_anon[slot];
681 		if (anon->an_page) {
682 			pmap_page_protect(anon->an_page, prot);
683 		}
684 	}
685 }
686 
687 /*
688  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
689  *
690  * => Called from amap_unref(), when reference count drops to zero.
691  * => amap must be locked.
692  */
693 
694 void
695 amap_wipeout(struct vm_amap *amap)
696 {
697 	u_int lcv;
698 
699 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
700 	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
701 
702 	KASSERT(mutex_owned(amap->am_lock));
703 	KASSERT(amap->am_ref == 0);
704 
705 	if (__predict_false(amap->am_flags & AMAP_SWAPOFF)) {
706 		/*
707 		 * Note: amap_swap_off() will call us again.
708 		 */
709 		amap_unlock(amap);
710 		return;
711 	}
712 	amap_list_remove(amap);
713 
714 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
715 		struct vm_anon *anon;
716 		u_int slot;
717 
718 		slot = amap->am_slots[lcv];
719 		anon = amap->am_anon[slot];
720 		KASSERT(anon != NULL && anon->an_ref != 0);
721 
722 		KASSERT(anon->an_lock == amap->am_lock);
723 		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
724 		    anon->an_ref, 0, 0);
725 
726 		/*
727 		 * Drop the reference, and free the anon, if it is last.
728 		 */
729 
730 		if (--anon->an_ref == 0) {
731 			uvm_anfree(anon);
732 		}
733 		if (curlwp->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) {
734 			preempt();
735 		}
736 	}
737 
738 	/*
739 	 * Finally, destroy the amap.
740 	 */
741 
742 	amap->am_nused = 0;
743 	amap_unlock(amap);
744 	amap_free(amap);
745 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
746 }
747 
748 /*
749  * amap_copy: ensure that a map entry's "needs_copy" flag is false
750  *	by copying the amap if necessary.
751  *
752  * => an entry with a null amap pointer will get a new (blank) one.
753  * => the map that the map entry belongs to must be locked by caller.
754  * => the amap currently attached to "entry" (if any) must be unlocked.
755  * => if canchunk is true, then we may clip the entry into a chunk
756  * => "startva" and "endva" are used only if canchunk is true.  they are
757  *     used to limit chunking (e.g. if you have a large space that you
758  *     know you are going to need to allocate amaps for, there is no point
759  *     in allowing that to be chunked)
760  */
761 
762 void
763 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags,
764     vaddr_t startva, vaddr_t endva)
765 {
766 	struct vm_amap *amap, *srcamap;
767 	struct vm_anon *tofree;
768 	int slots, lcv;
769 	vaddr_t chunksize;
770 	const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0;
771 	const bool canchunk = (flags & AMAP_COPY_NOCHUNK) == 0;
772 	kmutex_t *lock;
773 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
774 	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, flags=%d)",
775 		    map, entry, flags, 0);
776 
777 	KASSERT(map != kernel_map);	/* we use nointr pool */
778 
779 	/*
780 	 * is there a map to copy?   if not, create one from scratch.
781 	 */
782 
783 	if (entry->aref.ar_amap == NULL) {
784 
785 		/*
786 		 * check to see if we have a large amap that we can
787 		 * chunk.  we align startva/endva to chunk-sized
788 		 * boundaries and then clip to them.
789 		 */
790 
791 		if (canchunk && atop(entry->end - entry->start) >=
792 		    UVM_AMAP_LARGE) {
793 			/* convert slots to bytes */
794 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
795 			startva = (startva / chunksize) * chunksize;
796 			endva = roundup(endva, chunksize);
797 			UVMHIST_LOG(maphist, "  chunk amap ==> clip 0x%x->0x%x"
798 			    "to 0x%x->0x%x", entry->start, entry->end, startva,
799 			    endva);
800 			UVM_MAP_CLIP_START(map, entry, startva, NULL);
801 			/* watch out for endva wrap-around! */
802 			if (endva >= startva)
803 				UVM_MAP_CLIP_END(map, entry, endva, NULL);
804 		}
805 
806 		if ((flags & AMAP_COPY_NOMERGE) == 0 &&
807 		    uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) {
808 			return;
809 		}
810 
811 		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
812 		entry->start, entry->end, 0, 0);
813 		entry->aref.ar_pageoff = 0;
814 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
815 		    waitf);
816 		if (entry->aref.ar_amap != NULL)
817 			entry->etype &= ~UVM_ET_NEEDSCOPY;
818 		return;
819 	}
820 
821 	/*
822 	 * first check and see if we are the only map entry
823 	 * referencing the amap we currently have.  if so, then we can
824 	 * just take it over rather than copying it.  note that we are
825 	 * reading am_ref with the amap unlocked... the value can only
826 	 * be one if we have the only reference to the amap (via our
827 	 * locked map).  if we are greater than one we fall through to
828 	 * the next case (where we double check the value).
829 	 */
830 
831 	if (entry->aref.ar_amap->am_ref == 1) {
832 		entry->etype &= ~UVM_ET_NEEDSCOPY;
833 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
834 		    0, 0, 0, 0);
835 		return;
836 	}
837 
838 	/*
839 	 * looks like we need to copy the map.
840 	 */
841 
842 	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it",
843 	    entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
844 	AMAP_B2SLOT(slots, entry->end - entry->start);
845 	amap = amap_alloc1(slots, 0, waitf);
846 	if (amap == NULL) {
847 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
848 		return;
849 	}
850 	srcamap = entry->aref.ar_amap;
851 	amap_lock(srcamap);
852 
853 	/*
854 	 * need to double check reference count now that we've got the
855 	 * src amap locked down.  the reference count could have
856 	 * changed while we were allocating.  if the reference count
857 	 * dropped down to one we take over the old map rather than
858 	 * copying the amap.
859 	 */
860 
861 	if (srcamap->am_ref == 1) {		/* take it over? */
862 		entry->etype &= ~UVM_ET_NEEDSCOPY;
863 		amap->am_ref--;		/* drop final reference to map */
864 		amap_free(amap);	/* dispose of new (unused) amap */
865 		amap_unlock(srcamap);
866 		return;
867 	}
868 
869 	/*
870 	 * we must copy it now.
871 	 */
872 
873 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
874 	for (lcv = 0 ; lcv < slots; lcv++) {
875 		amap->am_anon[lcv] =
876 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
877 		if (amap->am_anon[lcv] == NULL)
878 			continue;
879 		KASSERT(amap->am_anon[lcv]->an_lock == srcamap->am_lock);
880 		amap->am_anon[lcv]->an_ref++;
881 		amap->am_bckptr[lcv] = amap->am_nused;
882 		amap->am_slots[amap->am_nused] = lcv;
883 		amap->am_nused++;
884 	}
885 	memset(&amap->am_anon[lcv], 0,
886 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
887 
888 	/*
889 	 * drop our reference to the old amap (srcamap) and unlock.
890 	 * we know that the reference count on srcamap is greater than
891 	 * one (we checked above), so there is no way we could drop
892 	 * the count to zero.  [and no need to worry about freeing it]
893 	 */
894 
895 	srcamap->am_ref--;
896 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
897 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
898 	tofree = NULL;
899 #ifdef UVM_AMAP_PPREF
900 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
901 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
902 		    (entry->end - entry->start) >> PAGE_SHIFT, -1, &tofree);
903 	}
904 #endif
905 	uvm_anfree(tofree);
906 	amap_unlock(srcamap);
907 
908 	/*
909 	 * if we referenced any anons then share the source amap's lock.
910 	 * otherwise we have nothing in common, so allocate a new one.
911 	 */
912 
913 	if (amap->am_nused != 0) {
914 		lock = srcamap->am_lock;
915 		mutex_obj_hold(lock);
916 	} else {
917 		lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
918 	}
919 	amap->am_lock = lock;
920 	amap_list_insert(amap);
921 
922 	/*
923 	 * install new amap.
924 	 */
925 
926 	entry->aref.ar_pageoff = 0;
927 	entry->aref.ar_amap = amap;
928 	entry->etype &= ~UVM_ET_NEEDSCOPY;
929 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
930 }
931 
932 /*
933  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
934  *
935  *	called during fork(2) when the parent process has a wired map
936  *	entry.   in that case we want to avoid write-protecting pages
937  *	in the parent's map (e.g. like what you'd do for a COW page)
938  *	so we resolve the COW here.
939  *
940  * => assume parent's entry was wired, thus all pages are resident.
941  * => assume pages that are loaned out (loan_count) are already mapped
942  *	read-only in all maps, and thus no need for us to worry about them
943  * => assume both parent and child vm_map's are locked
944  * => caller passes child's map/entry in to us
945  * => if we run out of memory we will unlock the amap and sleep _with_ the
946  *	parent and child vm_map's locked(!).    we have to do this since
947  *	we are in the middle of a fork(2) and we can't let the parent
948  *	map change until we are done copying all the map entrys.
949  * => XXXCDC: out of memory should cause fork to fail, but there is
950  *	currently no easy way to do this (needs fix)
951  * => page queues must be unlocked (we may lock them)
952  */
953 
954 void
955 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
956 {
957 	struct vm_amap *amap = entry->aref.ar_amap;
958 	struct vm_anon *anon, *nanon;
959 	struct vm_page *pg, *npg;
960 	u_int lcv, slot;
961 
962 	/*
963 	 * note that if we unlock the amap then we must ReStart the "lcv" for
964 	 * loop because some other process could reorder the anon's in the
965 	 * am_anon[] array on us while the lock is dropped.
966 	 */
967 
968 ReStart:
969 	amap_lock(amap);
970 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
971 		slot = amap->am_slots[lcv];
972 		anon = amap->am_anon[slot];
973 		KASSERT(anon->an_lock == amap->am_lock);
974 
975 		/*
976 		 * If anon has only one reference - we must have already
977 		 * copied it.  This can happen if we needed to sleep waiting
978 		 * for memory in a previous run through this loop.  The new
979 		 * page might even have been paged out, since is not wired.
980 		 */
981 
982 		if (anon->an_ref == 1) {
983 			KASSERT(anon->an_page != NULL || anon->an_swslot != 0);
984 			continue;
985 		}
986 
987 		/*
988 		 * The old page must be resident since the parent is wired.
989 		 */
990 
991 		pg = anon->an_page;
992 		KASSERT(pg != NULL);
993 		KASSERT(pg->wire_count > 0);
994 
995 		/*
996 		 * If the page is loaned then it must already be mapped
997 		 * read-only and we don't need to copy it.
998 		 */
999 
1000 		if (pg->loan_count != 0) {
1001 			continue;
1002 		}
1003 		KASSERT(pg->uanon == anon && pg->uobject == NULL);
1004 
1005 		/*
1006 		 * If the page is busy, then we have to unlock, wait for
1007 		 * it and then restart.
1008 		 */
1009 
1010 		if (pg->flags & PG_BUSY) {
1011 			pg->flags |= PG_WANTED;
1012 			UVM_UNLOCK_AND_WAIT(pg, amap->am_lock, false,
1013 			    "cownow", 0);
1014 			goto ReStart;
1015 		}
1016 
1017 		/*
1018 		 * Perform a copy-on-write.
1019 		 * First - get a new anon and a page.
1020 		 */
1021 
1022 		nanon = uvm_analloc();
1023 		if (nanon) {
1024 			npg = uvm_pagealloc(NULL, 0, nanon, 0);
1025 		} else {
1026 			npg = NULL;
1027 		}
1028 		if (nanon == NULL || npg == NULL) {
1029 
1030 			/*
1031 			 * XXXCDC: we should cause fork to fail, but we can't.
1032 			 */
1033 
1034 			if (nanon) {
1035 				nanon->an_ref--;
1036 				KASSERT(nanon->an_ref == 0);
1037 				uvm_anfree(nanon);
1038 			}
1039 			amap_unlock(amap);
1040 			uvm_wait("cownowpage");
1041 			goto ReStart;
1042 		}
1043 
1044 		/*
1045 		 * Copy the data and replace anon with the new one.
1046 		 * Also, setup its lock (share the with amap's lock).
1047 		 */
1048 
1049 		nanon->an_lock = amap->am_lock;
1050 		mutex_obj_hold(nanon->an_lock);
1051 		uvm_pagecopy(pg, npg);
1052 		anon->an_ref--;
1053 		KASSERT(anon->an_ref > 0);
1054 		amap->am_anon[slot] = nanon;
1055 
1056 		/*
1057 		 * Drop PG_BUSY on new page.  Since its owner was locked all
1058 		 * this time - it cannot be PG_RELEASED or PG_WANTED.
1059 		 */
1060 
1061 		mutex_enter(&uvm_pageqlock);
1062 		uvm_pageactivate(npg);
1063 		mutex_exit(&uvm_pageqlock);
1064 		npg->flags &= ~(PG_BUSY|PG_FAKE);
1065 		UVM_PAGE_OWN(npg, NULL);
1066 	}
1067 	amap_unlock(amap);
1068 }
1069 
1070 /*
1071  * amap_splitref: split a single reference into two separate references
1072  *
1073  * => called from uvm_map's clip routines
1074  * => origref's map should be locked
1075  * => origref->ar_amap should be unlocked (we will lock)
1076  */
1077 void
1078 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
1079 {
1080 	struct vm_amap *amap = origref->ar_amap;
1081 	u_int leftslots;
1082 
1083 	KASSERT(splitref->ar_amap == origref->ar_amap);
1084 	AMAP_B2SLOT(leftslots, offset);
1085 	KASSERT(leftslots != 0);
1086 
1087 	amap_lock(amap);
1088 	KASSERT(amap->am_nslot - origref->ar_pageoff - leftslots > 0);
1089 
1090 #ifdef UVM_AMAP_PPREF
1091 	/* Establish ppref before we add a duplicate reference to the amap. */
1092 	if (amap->am_ppref == NULL) {
1093 		amap_pp_establish(amap, origref->ar_pageoff);
1094 	}
1095 #endif
1096 	/* Note: not a share reference. */
1097 	amap->am_ref++;
1098 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
1099 	amap_unlock(amap);
1100 }
1101 
1102 #ifdef UVM_AMAP_PPREF
1103 
1104 /*
1105  * amap_pp_establish: add a ppref array to an amap, if possible.
1106  *
1107  * => amap should be locked by caller.
1108  */
1109 void
1110 amap_pp_establish(struct vm_amap *amap, vaddr_t offset)
1111 {
1112 	const size_t sz = amap->am_maxslot * sizeof(*amap->am_ppref);
1113 
1114 	KASSERT(mutex_owned(amap->am_lock));
1115 
1116 	amap->am_ppref = kmem_zalloc(sz, KM_NOSLEEP);
1117 	if (amap->am_ppref == NULL) {
1118 		/* Failure - just do not use ppref. */
1119 		amap->am_ppref = PPREF_NONE;
1120 		return;
1121 	}
1122 	pp_setreflen(amap->am_ppref, 0, 0, offset);
1123 	pp_setreflen(amap->am_ppref, offset, amap->am_ref,
1124 	    amap->am_nslot - offset);
1125 }
1126 
1127 /*
1128  * amap_pp_adjref: adjust reference count to a part of an amap using the
1129  * per-page reference count array.
1130  *
1131  * => caller must check that ppref != PPREF_NONE before calling.
1132  * => map and amap must be locked.
1133  */
1134 void
1135 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval,
1136     struct vm_anon **tofree)
1137 {
1138 	int stopslot, *ppref, lcv, prevlcv;
1139 	int ref, len, prevref, prevlen;
1140 
1141 	KASSERT(mutex_owned(amap->am_lock));
1142 
1143 	stopslot = curslot + slotlen;
1144 	ppref = amap->am_ppref;
1145 	prevlcv = 0;
1146 
1147 	/*
1148 	 * Advance to the correct place in the array, fragment if needed.
1149 	 */
1150 
1151 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
1152 		pp_getreflen(ppref, lcv, &ref, &len);
1153 		if (lcv + len > curslot) {     /* goes past start? */
1154 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
1155 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
1156 			len = curslot - lcv;   /* new length of entry @ lcv */
1157 		}
1158 		prevlcv = lcv;
1159 	}
1160 	if (lcv) {
1161 		/*
1162 		 * Ensure that the "prevref == ref" test below always
1163 		 * fails, since we are starting from the beginning of
1164 		 * the ppref array; that is, there is no previous chunk.
1165 		 */
1166 		prevref = -1;
1167 		prevlen = 0;
1168 	} else {
1169 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
1170 	}
1171 
1172 	/*
1173 	 * Now adjust reference counts in range.  Merge the first
1174 	 * changed entry with the last unchanged entry if possible.
1175 	 */
1176 	KASSERT(lcv == curslot);
1177 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
1178 		pp_getreflen(ppref, lcv, &ref, &len);
1179 		if (lcv + len > stopslot) {     /* goes past end? */
1180 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
1181 			pp_setreflen(ppref, stopslot, ref,
1182 			    len - (stopslot - lcv));
1183 			len = stopslot - lcv;
1184 		}
1185 		ref += adjval;
1186 		KASSERT(ref >= 0);
1187 		if (lcv == prevlcv + prevlen && ref == prevref) {
1188 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
1189 		} else {
1190 			pp_setreflen(ppref, lcv, ref, len);
1191 		}
1192 		if (ref == 0) {
1193 			amap_wiperange(amap, lcv, len, tofree);
1194 		}
1195 	}
1196 }
1197 
1198 /*
1199  * amap_wiperange: wipe out a range of an amap
1200  * [different from amap_wipeout because the amap is kept intact]
1201  *
1202  * => both map and amap must be locked by caller.
1203  */
1204 void
1205 amap_wiperange(struct vm_amap *amap, int slotoff, int slots,
1206     struct vm_anon **tofree)
1207 {
1208 	u_int lcv, stop, curslot, ptr, slotend;
1209 	struct vm_anon *anon;
1210 	bool byanon;
1211 
1212 	KASSERT(mutex_owned(amap->am_lock));
1213 
1214 	/*
1215 	 * we can either traverse the amap by am_anon or by am_slots depending
1216 	 * on which is cheaper.    decide now.
1217 	 */
1218 
1219 	if (slots < amap->am_nused) {
1220 		byanon = true;
1221 		lcv = slotoff;
1222 		stop = slotoff + slots;
1223 		slotend = 0;
1224 	} else {
1225 		byanon = false;
1226 		lcv = 0;
1227 		stop = amap->am_nused;
1228 		slotend = slotoff + slots;
1229 	}
1230 
1231 	while (lcv < stop) {
1232 		if (byanon) {
1233 			curslot = lcv++;	/* lcv advances here */
1234 			if (amap->am_anon[curslot] == NULL)
1235 				continue;
1236 		} else {
1237 			curslot = amap->am_slots[lcv];
1238 			if (curslot < slotoff || curslot >= slotend) {
1239 				lcv++;		/* lcv advances here */
1240 				continue;
1241 			}
1242 			stop--;	/* drop stop, since anon will be removed */
1243 		}
1244 		anon = amap->am_anon[curslot];
1245 
1246 		/*
1247 		 * Remove anon from the amap.
1248 		 */
1249 
1250 		amap->am_anon[curslot] = NULL;
1251 		ptr = amap->am_bckptr[curslot];
1252 		if (ptr != (amap->am_nused - 1)) {
1253 			amap->am_slots[ptr] =
1254 			    amap->am_slots[amap->am_nused - 1];
1255 			amap->am_bckptr[amap->am_slots[ptr]] =
1256 			    ptr;    /* back ptr. */
1257 		}
1258 		amap->am_nused--;
1259 
1260 		/*
1261 		 * Drop its reference count.
1262 		 */
1263 
1264 		KASSERT(anon->an_lock == amap->am_lock);
1265 		if (--anon->an_ref == 0) {
1266 			/*
1267 			 * Eliminated the last reference to an anon - defer
1268 			 * freeing as uvm_anfree() can unlock the amap.
1269 			 */
1270 			anon->an_link = *tofree;
1271 			*tofree = anon;
1272 		}
1273 	}
1274 }
1275 
1276 #endif
1277 
1278 #if defined(VMSWAP)
1279 
1280 /*
1281  * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
1282  *
1283  * => called with swap_syscall_lock held.
1284  * => note that we don't always traverse all anons.
1285  *    eg. amaps being wiped out, released anons.
1286  * => return true if failed.
1287  */
1288 
1289 bool
1290 amap_swap_off(int startslot, int endslot)
1291 {
1292 	struct vm_amap *am;
1293 	struct vm_amap *am_next;
1294 	struct vm_amap marker_prev;
1295 	struct vm_amap marker_next;
1296 	bool rv = false;
1297 
1298 #if defined(DIAGNOSTIC)
1299 	memset(&marker_prev, 0, sizeof(marker_prev));
1300 	memset(&marker_next, 0, sizeof(marker_next));
1301 #endif /* defined(DIAGNOSTIC) */
1302 
1303 	mutex_enter(&amap_list_lock);
1304 	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
1305 		int i;
1306 
1307 		LIST_INSERT_BEFORE(am, &marker_prev, am_list);
1308 		LIST_INSERT_AFTER(am, &marker_next, am_list);
1309 
1310 		if (!amap_lock_try(am)) {
1311 			mutex_exit(&amap_list_lock);
1312 			preempt();
1313 			mutex_enter(&amap_list_lock);
1314 			am_next = LIST_NEXT(&marker_prev, am_list);
1315 			if (am_next == &marker_next) {
1316 				am_next = LIST_NEXT(am_next, am_list);
1317 			} else {
1318 				KASSERT(LIST_NEXT(am_next, am_list) ==
1319 				    &marker_next);
1320 			}
1321 			LIST_REMOVE(&marker_prev, am_list);
1322 			LIST_REMOVE(&marker_next, am_list);
1323 			continue;
1324 		}
1325 
1326 		mutex_exit(&amap_list_lock);
1327 
1328 		if (am->am_nused <= 0) {
1329 			amap_unlock(am);
1330 			goto next;
1331 		}
1332 
1333 		for (i = 0; i < am->am_nused; i++) {
1334 			int slot;
1335 			int swslot;
1336 			struct vm_anon *anon;
1337 
1338 			slot = am->am_slots[i];
1339 			anon = am->am_anon[slot];
1340 			KASSERT(anon->an_lock == am->am_lock);
1341 
1342 			swslot = anon->an_swslot;
1343 			if (swslot < startslot || endslot <= swslot) {
1344 				continue;
1345 			}
1346 
1347 			am->am_flags |= AMAP_SWAPOFF;
1348 
1349 			rv = uvm_anon_pagein(anon);
1350 			amap_lock(am);
1351 
1352 			am->am_flags &= ~AMAP_SWAPOFF;
1353 			if (amap_refs(am) == 0) {
1354 				amap_wipeout(am);
1355 				am = NULL;
1356 				break;
1357 			}
1358 			if (rv) {
1359 				break;
1360 			}
1361 			i = 0;
1362 		}
1363 
1364 		if (am) {
1365 			amap_unlock(am);
1366 		}
1367 
1368 next:
1369 		mutex_enter(&amap_list_lock);
1370 		KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
1371 		    LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
1372 		    &marker_next);
1373 		am_next = LIST_NEXT(&marker_next, am_list);
1374 		LIST_REMOVE(&marker_prev, am_list);
1375 		LIST_REMOVE(&marker_next, am_list);
1376 	}
1377 	mutex_exit(&amap_list_lock);
1378 
1379 	return rv;
1380 }
1381 
1382 #endif /* defined(VMSWAP) */
1383 
1384 /*
1385  * amap_lookup: look up a page in an amap.
1386  *
1387  * => amap should be locked by caller.
1388  */
1389 struct vm_anon *
1390 amap_lookup(struct vm_aref *aref, vaddr_t offset)
1391 {
1392 	struct vm_amap *amap = aref->ar_amap;
1393 	struct vm_anon *an;
1394 	u_int slot;
1395 
1396 	UVMHIST_FUNC("amap_lookup"); UVMHIST_CALLED(maphist);
1397 	KASSERT(mutex_owned(amap->am_lock));
1398 
1399 	AMAP_B2SLOT(slot, offset);
1400 	slot += aref->ar_pageoff;
1401 	an = amap->am_anon[slot];
1402 
1403 	UVMHIST_LOG(maphist, "<- done (amap=0x%x, offset=0x%x, result=0x%x)",
1404 	    amap, offset, an, 0);
1405 
1406 	KASSERT(slot < amap->am_nslot);
1407 	KASSERT(an == NULL || an->an_ref != 0);
1408 	KASSERT(an == NULL || an->an_lock == amap->am_lock);
1409 	return an;
1410 }
1411 
1412 /*
1413  * amap_lookups: look up a range of pages in an amap.
1414  *
1415  * => amap should be locked by caller.
1416  */
1417 void
1418 amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons,
1419     int npages)
1420 {
1421 	struct vm_amap *amap = aref->ar_amap;
1422 	u_int slot;
1423 
1424 	UVMHIST_FUNC("amap_lookups"); UVMHIST_CALLED(maphist);
1425 	KASSERT(mutex_owned(amap->am_lock));
1426 
1427 	AMAP_B2SLOT(slot, offset);
1428 	slot += aref->ar_pageoff;
1429 
1430 	UVMHIST_LOG(maphist, "  slot=%u, npages=%d, nslot=%d",
1431 	    slot, npages, amap->am_nslot, 0);
1432 
1433 	KASSERT((slot + (npages - 1)) < amap->am_nslot);
1434 	memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
1435 
1436 #if defined(DIAGNOSTIC)
1437 	for (int i = 0; i < npages; i++) {
1438 		struct vm_anon * const an = anons[i];
1439 		if (an == NULL) {
1440 			continue;
1441 		}
1442 		KASSERT(an->an_ref != 0);
1443 		KASSERT(an->an_lock == amap->am_lock);
1444 	}
1445 #endif
1446 	UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
1447 }
1448 
1449 /*
1450  * amap_add: add (or replace) a page to an amap.
1451  *
1452  * => amap should be locked by caller.
1453  * => anon must have the lock associated with this amap.
1454  */
1455 void
1456 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
1457     bool replace)
1458 {
1459 	struct vm_amap *amap = aref->ar_amap;
1460 	u_int slot;
1461 
1462 	UVMHIST_FUNC("amap_add"); UVMHIST_CALLED(maphist);
1463 	KASSERT(mutex_owned(amap->am_lock));
1464 	KASSERT(anon->an_lock == amap->am_lock);
1465 
1466 	AMAP_B2SLOT(slot, offset);
1467 	slot += aref->ar_pageoff;
1468 	KASSERT(slot < amap->am_nslot);
1469 
1470 	if (replace) {
1471 		struct vm_anon *oanon = amap->am_anon[slot];
1472 
1473 		KASSERT(oanon != NULL);
1474 		if (oanon->an_page && (amap->am_flags & AMAP_SHARED) != 0) {
1475 			pmap_page_protect(oanon->an_page, VM_PROT_NONE);
1476 			/*
1477 			 * XXX: suppose page is supposed to be wired somewhere?
1478 			 */
1479 		}
1480 	} else {
1481 		KASSERT(amap->am_anon[slot] == NULL);
1482 		amap->am_bckptr[slot] = amap->am_nused;
1483 		amap->am_slots[amap->am_nused] = slot;
1484 		amap->am_nused++;
1485 	}
1486 	amap->am_anon[slot] = anon;
1487 	UVMHIST_LOG(maphist,
1488 	    "<- done (amap=0x%x, offset=0x%x, anon=0x%x, rep=%d)",
1489 	    amap, offset, anon, replace);
1490 }
1491 
1492 /*
1493  * amap_unadd: remove a page from an amap.
1494  *
1495  * => amap should be locked by caller.
1496  */
1497 void
1498 amap_unadd(struct vm_aref *aref, vaddr_t offset)
1499 {
1500 	struct vm_amap *amap = aref->ar_amap;
1501 	u_int slot, ptr, last;
1502 
1503 	UVMHIST_FUNC("amap_unadd"); UVMHIST_CALLED(maphist);
1504 	KASSERT(mutex_owned(amap->am_lock));
1505 
1506 	AMAP_B2SLOT(slot, offset);
1507 	slot += aref->ar_pageoff;
1508 	KASSERT(slot < amap->am_nslot);
1509 	KASSERT(amap->am_anon[slot] != NULL);
1510 	KASSERT(amap->am_anon[slot]->an_lock == amap->am_lock);
1511 
1512 	amap->am_anon[slot] = NULL;
1513 	ptr = amap->am_bckptr[slot];
1514 
1515 	last = amap->am_nused - 1;
1516 	if (ptr != last) {
1517 		/* Move the last entry to keep the slots contiguous. */
1518 		amap->am_slots[ptr] = amap->am_slots[last];
1519 		amap->am_bckptr[amap->am_slots[ptr]] = ptr;
1520 	}
1521 	amap->am_nused--;
1522 	UVMHIST_LOG(maphist, "<- done (amap=0x%x, slot=0x%x)", amap, slot,0, 0);
1523 }
1524 
1525 /*
1526  * amap_adjref_anons: adjust the reference count(s) on anons of the amap.
1527  */
1528 static void
1529 amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len,
1530     int refv, bool all)
1531 {
1532 #ifdef UVM_AMAP_PPREF
1533 	KASSERT(mutex_owned(amap->am_lock));
1534 
1535 	if (amap->am_ppref == NULL && !all && len != amap->am_nslot) {
1536 		amap_pp_establish(amap, offset);
1537 	}
1538 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1539 		struct vm_anon *tofree = NULL;
1540 
1541 		if (all) {
1542 			amap_pp_adjref(amap, 0, amap->am_nslot, refv, &tofree);
1543 		} else {
1544 			amap_pp_adjref(amap, offset, len, refv, &tofree);
1545 		}
1546 		uvm_anfree(tofree);
1547 	}
1548 #endif
1549 }
1550 
1551 /*
1552  * amap_ref: gain a reference to an amap.
1553  *
1554  * => amap must not be locked (we will lock).
1555  * => "offset" and "len" are in units of pages.
1556  * => Called at fork time to gain the child's reference.
1557  */
1558 void
1559 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
1560 {
1561 	UVMHIST_FUNC("amap_ref"); UVMHIST_CALLED(maphist);
1562 
1563 	amap_lock(amap);
1564 	if (flags & AMAP_SHARED) {
1565 		amap->am_flags |= AMAP_SHARED;
1566 	}
1567 	amap->am_ref++;
1568 	amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0);
1569 	amap_unlock(amap);
1570 
1571 	UVMHIST_LOG(maphist,"<- done!  amap=0x%x", amap, 0, 0, 0);
1572 }
1573 
1574 /*
1575  * amap_unref: remove a reference to an amap.
1576  *
1577  * => All pmap-level references to this amap must be already removed.
1578  * => Called from uvm_unmap_detach(); entry is already removed from the map.
1579  * => We will lock amap, so it must be unlocked.
1580  */
1581 void
1582 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, bool all)
1583 {
1584 	UVMHIST_FUNC("amap_unref"); UVMHIST_CALLED(maphist);
1585 
1586 	amap_lock(amap);
1587 
1588 	UVMHIST_LOG(maphist,"  amap=0x%x  refs=%d, nused=%d",
1589 	    amap, amap->am_ref, amap->am_nused, 0);
1590 	KASSERT(amap->am_ref > 0);
1591 
1592 	if (--amap->am_ref == 0) {
1593 		/*
1594 		 * If the last reference - wipeout and destroy the amap.
1595 		 */
1596 		amap_wipeout(amap);
1597 		UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
1598 		return;
1599 	}
1600 
1601 	/*
1602 	 * Otherwise, drop the reference count(s) on anons.
1603 	 */
1604 
1605 	if (amap->am_ref == 1 && (amap->am_flags & AMAP_SHARED) != 0) {
1606 		amap->am_flags &= ~AMAP_SHARED;
1607 	}
1608 	amap_adjref_anons(amap, offset, len, -1, all);
1609 	amap_unlock(amap);
1610 
1611 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1612 }
1613