xref: /netbsd-src/sys/uvm/uvm_amap.c (revision cb861154c176d3dcc8ff846f449e3c16a5f5edb5)
1 /*	$NetBSD: uvm_amap.c,v 1.90 2011/04/23 18:14:12 rmind Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * uvm_amap.c: amap operations
30  */
31 
32 /*
33  * this file contains functions that perform operations on amaps.  see
34  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.90 2011/04/23 18:14:12 rmind Exp $");
39 
40 #include "opt_uvmhist.h"
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/kmem.h>
46 #include <sys/pool.h>
47 #include <sys/atomic.h>
48 
49 #include <uvm/uvm.h>
50 #include <uvm/uvm_swap.h>
51 
52 /*
53  * cache for allocation of vm_map structures.  note that in order to
54  * avoid an endless loop, the amap cache's allocator cannot allocate
55  * memory from an amap (it currently goes through the kernel uobj, so
56  * we are ok).
57  */
58 static struct pool_cache uvm_amap_cache;
59 static kmutex_t amap_list_lock;
60 static LIST_HEAD(, vm_amap) amap_list;
61 
62 /*
63  * local functions
64  */
65 
66 static inline void
67 amap_list_insert(struct vm_amap *amap)
68 {
69 
70 	mutex_enter(&amap_list_lock);
71 	LIST_INSERT_HEAD(&amap_list, amap, am_list);
72 	mutex_exit(&amap_list_lock);
73 }
74 
75 static inline void
76 amap_list_remove(struct vm_amap *amap)
77 {
78 
79 	mutex_enter(&amap_list_lock);
80 	LIST_REMOVE(amap, am_list);
81 	mutex_exit(&amap_list_lock);
82 }
83 
84 static int
85 amap_roundup_slots(int slots)
86 {
87 
88 	return kmem_roundup_size(slots * sizeof(int)) / sizeof(int);
89 }
90 
91 #ifdef UVM_AMAP_PPREF
92 /*
93  * what is ppref?   ppref is an _optional_ amap feature which is used
94  * to keep track of reference counts on a per-page basis.  it is enabled
95  * when UVM_AMAP_PPREF is defined.
96  *
97  * when enabled, an array of ints is allocated for the pprefs.  this
98  * array is allocated only when a partial reference is added to the
99  * map (either by unmapping part of the amap, or gaining a reference
100  * to only a part of an amap).  if the allocation of the array fails
101  * (KM_NOSLEEP), then we set the array pointer to PPREF_NONE to indicate
102  * that we tried to do ppref's but couldn't alloc the array so just
103  * give up (after all, this is an optional feature!).
104  *
105  * the array is divided into page sized "chunks."   for chunks of length 1,
106  * the chunk reference count plus one is stored in that chunk's slot.
107  * for chunks of length > 1 the first slot contains (the reference count
108  * plus one) * -1.    [the negative value indicates that the length is
109  * greater than one.]   the second slot of the chunk contains the length
110  * of the chunk.   here is an example:
111  *
112  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
113  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
114  *              <----------><-><----><-------><----><-><------->
115  * (x = don't care)
116  *
117  * this allows us to allow one int to contain the ref count for the whole
118  * chunk.    note that the "plus one" part is needed because a reference
119  * count of zero is neither positive or negative (need a way to tell
120  * if we've got one zero or a bunch of them).
121  *
122  * here are some in-line functions to help us.
123  */
124 
125 /*
126  * pp_getreflen: get the reference and length for a specific offset
127  *
128  * => ppref's amap must be locked
129  */
130 static inline void
131 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
132 {
133 
134 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
135 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
136 		*lenp = 1;
137 	} else {
138 		*refp = (ppref[offset] * -1) - 1;
139 		*lenp = ppref[offset+1];
140 	}
141 }
142 
143 /*
144  * pp_setreflen: set the reference and length for a specific offset
145  *
146  * => ppref's amap must be locked
147  */
148 static inline void
149 pp_setreflen(int *ppref, int offset, int ref, int len)
150 {
151 	if (len == 0)
152 		return;
153 	if (len == 1) {
154 		ppref[offset] = ref + 1;
155 	} else {
156 		ppref[offset] = (ref + 1) * -1;
157 		ppref[offset+1] = len;
158 	}
159 }
160 #endif /* UVM_AMAP_PPREF */
161 
162 /*
163  * amap_alloc1: internal function that allocates an amap, but does not
164  *	init the overlay.
165  *
166  * => lock on returned amap is init'd
167  */
168 static inline struct vm_amap *
169 amap_alloc1(int slots, int padslots, int waitf)
170 {
171 	struct vm_amap *amap;
172 	int totalslots;
173 	km_flag_t kmflags;
174 
175 	amap = pool_cache_get(&uvm_amap_cache,
176 	    ((waitf & UVM_FLAG_NOWAIT) != 0) ? PR_NOWAIT : PR_WAITOK);
177 	if (amap == NULL)
178 		return(NULL);
179 
180 	kmflags = ((waitf & UVM_FLAG_NOWAIT) != 0) ? KM_NOSLEEP : KM_SLEEP;
181 	totalslots = amap_roundup_slots(slots + padslots);
182 	mutex_init(&amap->am_l, MUTEX_DEFAULT, IPL_NONE);
183 	amap->am_ref = 1;
184 	amap->am_flags = 0;
185 #ifdef UVM_AMAP_PPREF
186 	amap->am_ppref = NULL;
187 #endif
188 	amap->am_maxslot = totalslots;
189 	amap->am_nslot = slots;
190 	amap->am_nused = 0;
191 
192 	/*
193 	 * Note: since allocations are likely big, we expect to reduce the
194 	 * memory fragmentation by allocating them in separate blocks.
195 	 */
196 	amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags);
197 	if (amap->am_slots == NULL)
198 		goto fail1;
199 
200 	amap->am_bckptr = kmem_alloc(totalslots * sizeof(int), kmflags);
201 	if (amap->am_bckptr == NULL)
202 		goto fail2;
203 
204 	amap->am_anon = kmem_alloc(totalslots * sizeof(struct vm_anon *),
205 	    kmflags);
206 	if (amap->am_anon == NULL)
207 		goto fail3;
208 
209 	return(amap);
210 
211 fail3:
212 	kmem_free(amap->am_bckptr, totalslots * sizeof(int));
213 fail2:
214 	kmem_free(amap->am_slots, totalslots * sizeof(int));
215 fail1:
216 	mutex_destroy(&amap->am_l);
217 	pool_cache_put(&uvm_amap_cache, amap);
218 
219 	/*
220 	 * XXX hack to tell the pagedaemon how many pages we need,
221 	 * since we can need more than it would normally free.
222 	 */
223 	if ((waitf & UVM_FLAG_NOWAIT) != 0) {
224 		extern u_int uvm_extrapages;
225 		atomic_add_int(&uvm_extrapages,
226 		    ((sizeof(int) * 2 + sizeof(struct vm_anon *)) *
227 		    totalslots) >> PAGE_SHIFT);
228 	}
229 	return (NULL);
230 }
231 
232 /*
233  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
234  *
235  * => caller should ensure sz is a multiple of PAGE_SIZE
236  * => reference count to new amap is set to one
237  * => new amap is returned unlocked
238  */
239 
240 struct vm_amap *
241 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
242 {
243 	struct vm_amap *amap;
244 	int slots, padslots;
245 	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
246 
247 	AMAP_B2SLOT(slots, sz);
248 	AMAP_B2SLOT(padslots, padsz);
249 
250 	amap = amap_alloc1(slots, padslots, waitf);
251 	if (amap) {
252 		memset(amap->am_anon, 0,
253 		    amap->am_maxslot * sizeof(struct vm_anon *));
254 		amap_list_insert(amap);
255 	}
256 
257 	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
258 	return(amap);
259 }
260 
261 /*
262  * uvm_amap_init: initialize the amap system.
263  */
264 void
265 uvm_amap_init(void)
266 {
267 
268 	mutex_init(&amap_list_lock, MUTEX_DEFAULT, IPL_NONE);
269 
270 	pool_cache_bootstrap(&uvm_amap_cache, sizeof(struct vm_amap), 0, 0, 0,
271 	    "amappl", NULL, IPL_NONE, NULL, NULL, NULL);
272 }
273 
274 /*
275  * amap_free: free an amap
276  *
277  * => the amap must be unlocked
278  * => the amap should have a zero reference count and be empty
279  */
280 void
281 amap_free(struct vm_amap *amap)
282 {
283 	int slots;
284 
285 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
286 
287 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
288 	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
289 	KASSERT(!mutex_owned(&amap->am_l));
290 	slots = amap->am_maxslot;
291 	kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots));
292 	kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr));
293 	kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon));
294 #ifdef UVM_AMAP_PPREF
295 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
296 		kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref));
297 #endif
298 	mutex_destroy(&amap->am_l);
299 	pool_cache_put(&uvm_amap_cache, amap);
300 	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
301 }
302 
303 /*
304  * amap_extend: extend the size of an amap (if needed)
305  *
306  * => called from uvm_map when we want to extend an amap to cover
307  *    a new mapping (rather than allocate a new one)
308  * => amap should be unlocked (we will lock it)
309  * => to safely extend an amap it should have a reference count of
310  *    one (thus it can't be shared)
311  */
312 int
313 amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags)
314 {
315 	struct vm_amap *amap = entry->aref.ar_amap;
316 	int slotoff = entry->aref.ar_pageoff;
317 	int slotmapped, slotadd, slotneed, slotadded, slotalloc;
318 	int slotadj, slotspace;
319 	int oldnslots;
320 #ifdef UVM_AMAP_PPREF
321 	int *newppref, *oldppref;
322 #endif
323 	int i, *newsl, *newbck, *oldsl, *oldbck;
324 	struct vm_anon **newover, **oldover;
325 	const km_flag_t kmflags =
326 	    (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
327 
328 	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
329 
330 	UVMHIST_LOG(maphist, "  (entry=0x%x, addsize=0x%x, flags=0x%x)",
331 	    entry, addsize, flags, 0);
332 
333 	/*
334 	 * first, determine how many slots we need in the amap.  don't
335 	 * forget that ar_pageoff could be non-zero: this means that
336 	 * there are some unused slots before us in the amap.
337 	 */
338 
339 	amap_lock(amap);
340 	KASSERT(amap_refs(amap) == 1); /* amap can't be shared */
341 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
342 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
343 	if (flags & AMAP_EXTEND_FORWARDS) {
344 		slotneed = slotoff + slotmapped + slotadd;
345 		slotadj = 0;
346 		slotspace = 0;
347 	}
348 	else {
349 		slotneed = slotadd + slotmapped;
350 		slotadj = slotadd - slotoff;
351 		slotspace = amap->am_maxslot - slotmapped;
352 	}
353 
354 	/*
355 	 * case 1: we already have enough slots in the map and thus
356 	 * only need to bump the reference counts on the slots we are
357 	 * adding.
358 	 */
359 
360 	if (flags & AMAP_EXTEND_FORWARDS) {
361 		if (amap->am_nslot >= slotneed) {
362 #ifdef UVM_AMAP_PPREF
363 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
364 				amap_pp_adjref(amap, slotoff + slotmapped,
365 				    slotadd, 1);
366 			}
367 #endif
368 			amap_unlock(amap);
369 			UVMHIST_LOG(maphist,
370 			    "<- done (case 1f), amap = 0x%x, sltneed=%d",
371 			    amap, slotneed, 0, 0);
372 			return 0;
373 		}
374 	} else {
375 		if (slotadj <= 0) {
376 			slotoff -= slotadd;
377 			entry->aref.ar_pageoff = slotoff;
378 #ifdef UVM_AMAP_PPREF
379 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
380 				amap_pp_adjref(amap, slotoff, slotadd, 1);
381 			}
382 #endif
383 			amap_unlock(amap);
384 			UVMHIST_LOG(maphist,
385 			    "<- done (case 1b), amap = 0x%x, sltneed=%d",
386 			    amap, slotneed, 0, 0);
387 			return 0;
388 		}
389 	}
390 
391 	/*
392 	 * case 2: we pre-allocated slots for use and we just need to
393 	 * bump nslot up to take account for these slots.
394 	 */
395 
396 	if (amap->am_maxslot >= slotneed) {
397 		if (flags & AMAP_EXTEND_FORWARDS) {
398 #ifdef UVM_AMAP_PPREF
399 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
400 				if ((slotoff + slotmapped) < amap->am_nslot)
401 					amap_pp_adjref(amap,
402 					    slotoff + slotmapped,
403 					    (amap->am_nslot -
404 					    (slotoff + slotmapped)), 1);
405 				pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
406 				    slotneed - amap->am_nslot);
407 			}
408 #endif
409 			amap->am_nslot = slotneed;
410 			amap_unlock(amap);
411 
412 			/*
413 			 * no need to zero am_anon since that was done at
414 			 * alloc time and we never shrink an allocation.
415 			 */
416 
417 			UVMHIST_LOG(maphist,"<- done (case 2f), amap = 0x%x, "
418 			    "slotneed=%d", amap, slotneed, 0, 0);
419 			return 0;
420 		} else {
421 #ifdef UVM_AMAP_PPREF
422 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
423 				/*
424 				 * Slide up the ref counts on the pages that
425 				 * are actually in use.
426 				 */
427 				memmove(amap->am_ppref + slotspace,
428 				    amap->am_ppref + slotoff,
429 				    slotmapped * sizeof(int));
430 				/*
431 				 * Mark the (adjusted) gap at the front as
432 				 * referenced/not referenced.
433 				 */
434 				pp_setreflen(amap->am_ppref,
435 				    0, 0, slotspace - slotadd);
436 				pp_setreflen(amap->am_ppref,
437 				    slotspace - slotadd, 1, slotadd);
438 			}
439 #endif
440 
441 			/*
442 			 * Slide the anon pointers up and clear out
443 			 * the space we just made.
444 			 */
445 			memmove(amap->am_anon + slotspace,
446 			    amap->am_anon + slotoff,
447 			    slotmapped * sizeof(struct vm_anon*));
448 			memset(amap->am_anon + slotoff, 0,
449 			    (slotspace - slotoff) * sizeof(struct vm_anon *));
450 
451 			/*
452 			 * Slide the backpointers up, but don't bother
453 			 * wiping out the old slots.
454 			 */
455 			memmove(amap->am_bckptr + slotspace,
456 			    amap->am_bckptr + slotoff,
457 			    slotmapped * sizeof(int));
458 
459 			/*
460 			 * Adjust all the useful active slot numbers.
461 			 */
462 			for (i = 0; i < amap->am_nused; i++)
463 				amap->am_slots[i] += (slotspace - slotoff);
464 
465 			/*
466 			 * We just filled all the empty space in the
467 			 * front of the amap by activating a few new
468 			 * slots.
469 			 */
470 			amap->am_nslot = amap->am_maxslot;
471 			entry->aref.ar_pageoff = slotspace - slotadd;
472 			amap_unlock(amap);
473 
474 			UVMHIST_LOG(maphist,"<- done (case 2b), amap = 0x%x, "
475 			    "slotneed=%d", amap, slotneed, 0, 0);
476 			return 0;
477 		}
478 	}
479 
480 	/*
481 	 * Case 3: we need to allocate a new amap and copy all the amap
482 	 * data over from old amap to the new one.  Drop the lock before
483 	 * performing allocation.
484 	 *
485 	 * Note: since allocations are likely big, we expect to reduce the
486 	 * memory fragmentation by allocating them in separate blocks.
487 	 */
488 
489 	amap_unlock(amap);
490 
491 	if (slotneed >= UVM_AMAP_LARGE) {
492 		return E2BIG;
493 	}
494 
495 	slotalloc = amap_roundup_slots(slotneed);
496 #ifdef UVM_AMAP_PPREF
497 	newppref = NULL;
498 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
499 		/* Will be handled later if fails. */
500 		newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags);
501 	}
502 #endif
503 	newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags);
504 	newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags);
505 	newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags);
506 	if (newsl == NULL || newbck == NULL || newover == NULL) {
507 #ifdef UVM_AMAP_PPREF
508 		if (newppref != NULL) {
509 			kmem_free(newppref, slotalloc * sizeof(*newppref));
510 		}
511 #endif
512 		if (newsl != NULL) {
513 			kmem_free(newsl, slotalloc * sizeof(*newsl));
514 		}
515 		if (newbck != NULL) {
516 			kmem_free(newbck, slotalloc * sizeof(*newbck));
517 		}
518 		if (newover != NULL) {
519 			kmem_free(newover, slotalloc * sizeof(*newover));
520 		}
521 		return ENOMEM;
522 	}
523 	amap_lock(amap);
524 	KASSERT(amap->am_maxslot < slotneed);
525 
526 	/*
527 	 * Copy everything over to new allocated areas.
528 	 */
529 
530 	slotadded = slotalloc - amap->am_nslot;
531 	if (!(flags & AMAP_EXTEND_FORWARDS))
532 		slotspace = slotalloc - slotmapped;
533 
534 	/* do am_slots */
535 	oldsl = amap->am_slots;
536 	if (flags & AMAP_EXTEND_FORWARDS)
537 		memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
538 	else
539 		for (i = 0; i < amap->am_nused; i++)
540 			newsl[i] = oldsl[i] + slotspace - slotoff;
541 	amap->am_slots = newsl;
542 
543 	/* do am_anon */
544 	oldover = amap->am_anon;
545 	if (flags & AMAP_EXTEND_FORWARDS) {
546 		memcpy(newover, oldover,
547 		    sizeof(struct vm_anon *) * amap->am_nslot);
548 		memset(newover + amap->am_nslot, 0,
549 		    sizeof(struct vm_anon *) * slotadded);
550 	} else {
551 		memcpy(newover + slotspace, oldover + slotoff,
552 		    sizeof(struct vm_anon *) * slotmapped);
553 		memset(newover, 0,
554 		    sizeof(struct vm_anon *) * slotspace);
555 	}
556 	amap->am_anon = newover;
557 
558 	/* do am_bckptr */
559 	oldbck = amap->am_bckptr;
560 	if (flags & AMAP_EXTEND_FORWARDS)
561 		memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
562 	else
563 		memcpy(newbck + slotspace, oldbck + slotoff,
564 		    sizeof(int) * slotmapped);
565 	amap->am_bckptr = newbck;
566 
567 #ifdef UVM_AMAP_PPREF
568 	/* do ppref */
569 	oldppref = amap->am_ppref;
570 	if (newppref) {
571 		if (flags & AMAP_EXTEND_FORWARDS) {
572 			memcpy(newppref, oldppref,
573 			    sizeof(int) * amap->am_nslot);
574 			memset(newppref + amap->am_nslot, 0,
575 			    sizeof(int) * slotadded);
576 		} else {
577 			memcpy(newppref + slotspace, oldppref + slotoff,
578 			    sizeof(int) * slotmapped);
579 		}
580 		amap->am_ppref = newppref;
581 		if ((flags & AMAP_EXTEND_FORWARDS) &&
582 		    (slotoff + slotmapped) < amap->am_nslot)
583 			amap_pp_adjref(amap, slotoff + slotmapped,
584 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
585 		if (flags & AMAP_EXTEND_FORWARDS)
586 			pp_setreflen(newppref, amap->am_nslot, 1,
587 			    slotneed - amap->am_nslot);
588 		else {
589 			pp_setreflen(newppref, 0, 0,
590 			    slotalloc - slotneed);
591 			pp_setreflen(newppref, slotalloc - slotneed, 1,
592 			    slotneed - slotmapped);
593 		}
594 	} else {
595 		if (amap->am_ppref)
596 			amap->am_ppref = PPREF_NONE;
597 	}
598 #endif
599 
600 	/* update master values */
601 	if (flags & AMAP_EXTEND_FORWARDS)
602 		amap->am_nslot = slotneed;
603 	else {
604 		entry->aref.ar_pageoff = slotspace - slotadd;
605 		amap->am_nslot = slotalloc;
606 	}
607 	oldnslots = amap->am_maxslot;
608 	amap->am_maxslot = slotalloc;
609 
610 	amap_unlock(amap);
611 	kmem_free(oldsl, oldnslots * sizeof(*oldsl));
612 	kmem_free(oldbck, oldnslots * sizeof(*oldbck));
613 	kmem_free(oldover, oldnslots * sizeof(*oldover));
614 #ifdef UVM_AMAP_PPREF
615 	if (oldppref && oldppref != PPREF_NONE)
616 		kmem_free(oldppref, oldnslots * sizeof(*oldppref));
617 #endif
618 	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
619 	    amap, slotneed, 0, 0);
620 	return 0;
621 }
622 
623 /*
624  * amap_share_protect: change protection of anons in a shared amap
625  *
626  * for shared amaps, given the current data structure layout, it is
627  * not possible for us to directly locate all maps referencing the
628  * shared anon (to change the protection).  in order to protect data
629  * in shared maps we use pmap_page_protect().  [this is useful for IPC
630  * mechanisms like map entry passing that may want to write-protect
631  * all mappings of a shared amap.]  we traverse am_anon or am_slots
632  * depending on the current state of the amap.
633  *
634  * => entry's map and amap must be locked by the caller
635  */
636 void
637 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
638 {
639 	struct vm_amap *amap = entry->aref.ar_amap;
640 	int slots, lcv, slot, stop;
641 
642 	KASSERT(mutex_owned(&amap->am_l));
643 
644 	AMAP_B2SLOT(slots, (entry->end - entry->start));
645 	stop = entry->aref.ar_pageoff + slots;
646 
647 	if (slots < amap->am_nused) {
648 		/* cheaper to traverse am_anon */
649 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
650 			if (amap->am_anon[lcv] == NULL)
651 				continue;
652 			if (amap->am_anon[lcv]->an_page != NULL)
653 				pmap_page_protect(amap->am_anon[lcv]->an_page,
654 						  prot);
655 		}
656 		return;
657 	}
658 
659 	/* cheaper to traverse am_slots */
660 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
661 		slot = amap->am_slots[lcv];
662 		if (slot < entry->aref.ar_pageoff || slot >= stop)
663 			continue;
664 		if (amap->am_anon[slot]->an_page != NULL)
665 			pmap_page_protect(amap->am_anon[slot]->an_page, prot);
666 	}
667 }
668 
669 /*
670  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
671  *
672  * => called from amap_unref when the final reference to an amap is
673  *	discarded (i.e. when reference count drops to 0)
674  * => the amap should be locked (by the caller)
675  */
676 
677 void
678 amap_wipeout(struct vm_amap *amap)
679 {
680 	int lcv, slot;
681 	struct vm_anon *anon;
682 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
683 	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
684 
685 	KASSERT(amap->am_ref == 0);
686 
687 	if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
688 		/*
689 		 * amap_swap_off will call us again.
690 		 */
691 		amap_unlock(amap);
692 		return;
693 	}
694 	amap_list_remove(amap);
695 	amap_unlock(amap);
696 
697 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
698 		int refs;
699 
700 		slot = amap->am_slots[lcv];
701 		anon = amap->am_anon[slot];
702 		KASSERT(anon != NULL && anon->an_ref != 0);
703 
704 		mutex_enter(&anon->an_lock);
705 		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
706 		    anon->an_ref, 0, 0);
707 		refs = --anon->an_ref;
708 		mutex_exit(&anon->an_lock);
709 		if (refs == 0) {
710 
711 			/*
712 			 * we had the last reference to a vm_anon. free it.
713 			 */
714 
715 			uvm_anfree(anon);
716 		}
717 
718 		if (curlwp->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
719 			preempt();
720 	}
721 
722 	/*
723 	 * now we free the map
724 	 */
725 
726 	amap->am_nused = 0;
727 	amap_free(amap);	/* will unlock and free amap */
728 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
729 }
730 
731 /*
732  * amap_copy: ensure that a map entry's "needs_copy" flag is false
733  *	by copying the amap if necessary.
734  *
735  * => an entry with a null amap pointer will get a new (blank) one.
736  * => the map that the map entry belongs to must be locked by caller.
737  * => the amap currently attached to "entry" (if any) must be unlocked.
738  * => if canchunk is true, then we may clip the entry into a chunk
739  * => "startva" and "endva" are used only if canchunk is true.  they are
740  *     used to limit chunking (e.g. if you have a large space that you
741  *     know you are going to need to allocate amaps for, there is no point
742  *     in allowing that to be chunked)
743  */
744 
745 void
746 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags,
747     vaddr_t startva, vaddr_t endva)
748 {
749 	struct vm_amap *amap, *srcamap;
750 	int slots, lcv;
751 	vaddr_t chunksize;
752 	const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0;
753 	const bool canchunk = (flags & AMAP_COPY_NOCHUNK) == 0;
754 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
755 	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, flags=%d)",
756 		    map, entry, flags, 0);
757 
758 	KASSERT(map != kernel_map);	/* we use nointr pool */
759 
760 	/*
761 	 * is there a map to copy?   if not, create one from scratch.
762 	 */
763 
764 	if (entry->aref.ar_amap == NULL) {
765 
766 		/*
767 		 * check to see if we have a large amap that we can
768 		 * chunk.  we align startva/endva to chunk-sized
769 		 * boundaries and then clip to them.
770 		 */
771 
772 		if (canchunk && atop(entry->end - entry->start) >=
773 		    UVM_AMAP_LARGE) {
774 			/* convert slots to bytes */
775 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
776 			startva = (startva / chunksize) * chunksize;
777 			endva = roundup(endva, chunksize);
778 			UVMHIST_LOG(maphist, "  chunk amap ==> clip 0x%x->0x%x"
779 			    "to 0x%x->0x%x", entry->start, entry->end, startva,
780 			    endva);
781 			UVM_MAP_CLIP_START(map, entry, startva, NULL);
782 			/* watch out for endva wrap-around! */
783 			if (endva >= startva)
784 				UVM_MAP_CLIP_END(map, entry, endva, NULL);
785 		}
786 
787 		if ((flags & AMAP_COPY_NOMERGE) == 0 &&
788 		    uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) {
789 			return;
790 		}
791 
792 		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
793 		entry->start, entry->end, 0, 0);
794 		entry->aref.ar_pageoff = 0;
795 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
796 		    waitf);
797 		if (entry->aref.ar_amap != NULL)
798 			entry->etype &= ~UVM_ET_NEEDSCOPY;
799 		return;
800 	}
801 
802 	/*
803 	 * first check and see if we are the only map entry
804 	 * referencing the amap we currently have.  if so, then we can
805 	 * just take it over rather than copying it.  note that we are
806 	 * reading am_ref with the amap unlocked... the value can only
807 	 * be one if we have the only reference to the amap (via our
808 	 * locked map).  if we are greater than one we fall through to
809 	 * the next case (where we double check the value).
810 	 */
811 
812 	if (entry->aref.ar_amap->am_ref == 1) {
813 		entry->etype &= ~UVM_ET_NEEDSCOPY;
814 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
815 		    0, 0, 0, 0);
816 		return;
817 	}
818 
819 	/*
820 	 * looks like we need to copy the map.
821 	 */
822 
823 	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it",
824 	    entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
825 	AMAP_B2SLOT(slots, entry->end - entry->start);
826 	amap = amap_alloc1(slots, 0, waitf);
827 	if (amap == NULL) {
828 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
829 		return;
830 	}
831 	srcamap = entry->aref.ar_amap;
832 	amap_lock(srcamap);
833 
834 	/*
835 	 * need to double check reference count now that we've got the
836 	 * src amap locked down.  the reference count could have
837 	 * changed while we were allocating.  if the reference count
838 	 * dropped down to one we take over the old map rather than
839 	 * copying the amap.
840 	 */
841 
842 	if (srcamap->am_ref == 1) {		/* take it over? */
843 		entry->etype &= ~UVM_ET_NEEDSCOPY;
844 		amap->am_ref--;		/* drop final reference to map */
845 		amap_free(amap);	/* dispose of new (unused) amap */
846 		amap_unlock(srcamap);
847 		return;
848 	}
849 
850 	/*
851 	 * we must copy it now.
852 	 */
853 
854 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
855 	for (lcv = 0 ; lcv < slots; lcv++) {
856 		amap->am_anon[lcv] =
857 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
858 		if (amap->am_anon[lcv] == NULL)
859 			continue;
860 		mutex_enter(&amap->am_anon[lcv]->an_lock);
861 		amap->am_anon[lcv]->an_ref++;
862 		mutex_exit(&amap->am_anon[lcv]->an_lock);
863 		amap->am_bckptr[lcv] = amap->am_nused;
864 		amap->am_slots[amap->am_nused] = lcv;
865 		amap->am_nused++;
866 	}
867 	memset(&amap->am_anon[lcv], 0,
868 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
869 
870 	/*
871 	 * drop our reference to the old amap (srcamap) and unlock.
872 	 * we know that the reference count on srcamap is greater than
873 	 * one (we checked above), so there is no way we could drop
874 	 * the count to zero.  [and no need to worry about freeing it]
875 	 */
876 
877 	srcamap->am_ref--;
878 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
879 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
880 #ifdef UVM_AMAP_PPREF
881 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
882 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
883 		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
884 	}
885 #endif
886 
887 	amap_unlock(srcamap);
888 
889 	amap_list_insert(amap);
890 
891 	/*
892 	 * install new amap.
893 	 */
894 
895 	entry->aref.ar_pageoff = 0;
896 	entry->aref.ar_amap = amap;
897 	entry->etype &= ~UVM_ET_NEEDSCOPY;
898 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
899 }
900 
901 /*
902  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
903  *
904  *	called during fork(2) when the parent process has a wired map
905  *	entry.   in that case we want to avoid write-protecting pages
906  *	in the parent's map (e.g. like what you'd do for a COW page)
907  *	so we resolve the COW here.
908  *
909  * => assume parent's entry was wired, thus all pages are resident.
910  * => assume pages that are loaned out (loan_count) are already mapped
911  *	read-only in all maps, and thus no need for us to worry about them
912  * => assume both parent and child vm_map's are locked
913  * => caller passes child's map/entry in to us
914  * => if we run out of memory we will unlock the amap and sleep _with_ the
915  *	parent and child vm_map's locked(!).    we have to do this since
916  *	we are in the middle of a fork(2) and we can't let the parent
917  *	map change until we are done copying all the map entrys.
918  * => XXXCDC: out of memory should cause fork to fail, but there is
919  *	currently no easy way to do this (needs fix)
920  * => page queues must be unlocked (we may lock them)
921  */
922 
923 void
924 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
925 {
926 	struct vm_amap *amap = entry->aref.ar_amap;
927 	int lcv, slot;
928 	struct vm_anon *anon, *nanon;
929 	struct vm_page *pg, *npg;
930 
931 	/*
932 	 * note that if we unlock the amap then we must ReStart the "lcv" for
933 	 * loop because some other process could reorder the anon's in the
934 	 * am_anon[] array on us while the lock is dropped.
935 	 */
936 
937 ReStart:
938 	amap_lock(amap);
939 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
940 
941 		/*
942 		 * get the page
943 		 */
944 
945 		slot = amap->am_slots[lcv];
946 		anon = amap->am_anon[slot];
947 		mutex_enter(&anon->an_lock);
948 
949 		/*
950 		 * If the anon has only one ref, we must have already copied it.
951 		 * This can happen if we needed to sleep waiting for memory
952 		 * in a previous run through this loop.  The new page might
953 		 * even have been paged out, since the new page is not wired.
954 		 */
955 
956 		if (anon->an_ref == 1) {
957 			KASSERT(anon->an_page != NULL || anon->an_swslot != 0);
958 			mutex_exit(&anon->an_lock);
959 			continue;
960 		}
961 
962 		/*
963 		 * The old page must be resident since the parent is wired.
964 		 */
965 
966 		pg = anon->an_page;
967 		KASSERT(pg != NULL);
968 		KASSERT(pg->wire_count > 0);
969 
970 		/*
971 		 * If the page is loaned then it must already be mapped
972 		 * read-only and we don't need to copy it.
973 		 */
974 
975 		if (pg->loan_count != 0) {
976 			mutex_exit(&anon->an_lock);
977 			continue;
978 		}
979 		KASSERT(pg->uanon == anon && pg->uobject == NULL);
980 
981 		/*
982 		 * if the page is busy then we have to unlock, wait for
983 		 * it and then restart.
984 		 */
985 
986 		if (pg->flags & PG_BUSY) {
987 			pg->flags |= PG_WANTED;
988 			amap_unlock(amap);
989 			UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, false,
990 			    "cownow", 0);
991 			goto ReStart;
992 		}
993 
994 		/*
995 		 * ok, time to do a copy-on-write to a new anon
996 		 */
997 
998 		nanon = uvm_analloc();
999 		if (nanon) {
1000 			npg = uvm_pagealloc(NULL, 0, nanon, 0);
1001 		} else
1002 			npg = NULL;	/* XXX: quiet gcc warning */
1003 		if (nanon == NULL || npg == NULL) {
1004 
1005 			/*
1006 			 * XXXCDC: we should cause fork to fail, but we can't.
1007 			 */
1008 
1009 			if (nanon) {
1010 				nanon->an_ref--;
1011 				mutex_exit(&nanon->an_lock);
1012 				uvm_anfree(nanon);
1013 			}
1014 			mutex_exit(&anon->an_lock);
1015 			amap_unlock(amap);
1016 			uvm_wait("cownowpage");
1017 			goto ReStart;
1018 		}
1019 
1020 		/*
1021 		 * got it... now we can copy the data and replace anon
1022 		 * with our new one...
1023 		 */
1024 
1025 		uvm_pagecopy(pg, npg);		/* old -> new */
1026 		anon->an_ref--;			/* can't drop to zero */
1027 		amap->am_anon[slot] = nanon;	/* replace */
1028 
1029 		/*
1030 		 * drop PG_BUSY on new page ... since we have had its owner
1031 		 * locked the whole time it can't be PG_RELEASED or PG_WANTED.
1032 		 */
1033 
1034 		mutex_enter(&uvm_pageqlock);
1035 		uvm_pageactivate(npg);
1036 		mutex_exit(&uvm_pageqlock);
1037 		npg->flags &= ~(PG_BUSY|PG_FAKE);
1038 		UVM_PAGE_OWN(npg, NULL);
1039 		mutex_exit(&nanon->an_lock);
1040 		mutex_exit(&anon->an_lock);
1041 	}
1042 	amap_unlock(amap);
1043 }
1044 
1045 /*
1046  * amap_splitref: split a single reference into two separate references
1047  *
1048  * => called from uvm_map's clip routines
1049  * => origref's map should be locked
1050  * => origref->ar_amap should be unlocked (we will lock)
1051  */
1052 void
1053 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
1054 {
1055 	int leftslots;
1056 	struct vm_amap *amap;
1057 
1058 	KASSERT(splitref->ar_amap == origref->ar_amap);
1059 	AMAP_B2SLOT(leftslots, offset);
1060 	KASSERT(leftslots != 0);
1061 
1062 	amap = origref->ar_amap;
1063 	amap_lock(amap);
1064 
1065 	/*
1066 	 * now: amap is locked and we have a valid am_mapped array.
1067 	 */
1068 	KASSERT(amap->am_nslot - origref->ar_pageoff - leftslots > 0);
1069 
1070 #ifdef UVM_AMAP_PPREF
1071         /*
1072 	 * establish ppref before we add a duplicate reference to the amap
1073 	 */
1074 	if (amap->am_ppref == NULL)
1075 		amap_pp_establish(amap, origref->ar_pageoff);
1076 #endif
1077 
1078 	amap->am_ref++;		/* not a share reference */
1079 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
1080 
1081 	amap_unlock(amap);
1082 }
1083 
1084 #ifdef UVM_AMAP_PPREF
1085 
1086 /*
1087  * amap_pp_establish: add a ppref array to an amap, if possible
1088  *
1089  * => amap locked by caller
1090  */
1091 void
1092 amap_pp_establish(struct vm_amap *amap, vaddr_t offset)
1093 {
1094 
1095 	amap->am_ppref = kmem_alloc(amap->am_maxslot * sizeof(*amap->am_ppref),
1096 	    KM_NOSLEEP);
1097 
1098 	/*
1099 	 * if we fail then we just won't use ppref for this amap
1100 	 */
1101 
1102 	if (amap->am_ppref == NULL) {
1103 		amap->am_ppref = PPREF_NONE;	/* not using it */
1104 		return;
1105 	}
1106 	memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot);
1107 	pp_setreflen(amap->am_ppref, 0, 0, offset);
1108 	pp_setreflen(amap->am_ppref, offset, amap->am_ref,
1109 	    amap->am_nslot - offset);
1110 	return;
1111 }
1112 
1113 /*
1114  * amap_pp_adjref: adjust reference count to a part of an amap using the
1115  * per-page reference count array.
1116  *
1117  * => map and amap locked by caller
1118  * => caller must check that ppref != PPREF_NONE before calling
1119  */
1120 void
1121 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
1122 {
1123 	int stopslot, *ppref, lcv, prevlcv;
1124 	int ref, len, prevref, prevlen;
1125 
1126 	stopslot = curslot + slotlen;
1127 	ppref = amap->am_ppref;
1128 	prevlcv = 0;
1129 
1130 	/*
1131 	 * first advance to the correct place in the ppref array,
1132 	 * fragment if needed.
1133 	 */
1134 
1135 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
1136 		pp_getreflen(ppref, lcv, &ref, &len);
1137 		if (lcv + len > curslot) {     /* goes past start? */
1138 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
1139 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
1140 			len = curslot - lcv;   /* new length of entry @ lcv */
1141 		}
1142 		prevlcv = lcv;
1143 	}
1144 	if (lcv != 0)
1145 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
1146 	else {
1147 		/* Ensure that the "prevref == ref" test below always
1148 		 * fails, since we're starting from the beginning of
1149 		 * the ppref array; that is, there is no previous
1150 		 * chunk.
1151 		 */
1152 		prevref = -1;
1153 		prevlen = 0;
1154 	}
1155 
1156 	/*
1157 	 * now adjust reference counts in range.  merge the first
1158 	 * changed entry with the last unchanged entry if possible.
1159 	 */
1160 	KASSERT(lcv == curslot);
1161 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
1162 		pp_getreflen(ppref, lcv, &ref, &len);
1163 		if (lcv + len > stopslot) {     /* goes past end? */
1164 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
1165 			pp_setreflen(ppref, stopslot, ref,
1166 			    len - (stopslot - lcv));
1167 			len = stopslot - lcv;
1168 		}
1169 		ref += adjval;
1170 		KASSERT(ref >= 0);
1171 		if (lcv == prevlcv + prevlen && ref == prevref) {
1172 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
1173 		} else {
1174 			pp_setreflen(ppref, lcv, ref, len);
1175 		}
1176 		if (ref == 0)
1177 			amap_wiperange(amap, lcv, len);
1178 	}
1179 
1180 }
1181 
1182 /*
1183  * amap_wiperange: wipe out a range of an amap
1184  * [different from amap_wipeout because the amap is kept intact]
1185  *
1186  * => both map and amap must be locked by caller.
1187  */
1188 void
1189 amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
1190 {
1191 	int byanon, lcv, stop, curslot, ptr, slotend;
1192 	struct vm_anon *anon;
1193 
1194 	/*
1195 	 * we can either traverse the amap by am_anon or by am_slots depending
1196 	 * on which is cheaper.    decide now.
1197 	 */
1198 
1199 	if (slots < amap->am_nused) {
1200 		byanon = true;
1201 		lcv = slotoff;
1202 		stop = slotoff + slots;
1203 		slotend = 0;
1204 	} else {
1205 		byanon = false;
1206 		lcv = 0;
1207 		stop = amap->am_nused;
1208 		slotend = slotoff + slots;
1209 	}
1210 
1211 	while (lcv < stop) {
1212 		int refs;
1213 
1214 		if (byanon) {
1215 			curslot = lcv++;	/* lcv advances here */
1216 			if (amap->am_anon[curslot] == NULL)
1217 				continue;
1218 		} else {
1219 			curslot = amap->am_slots[lcv];
1220 			if (curslot < slotoff || curslot >= slotend) {
1221 				lcv++;		/* lcv advances here */
1222 				continue;
1223 			}
1224 			stop--;	/* drop stop, since anon will be removed */
1225 		}
1226 		anon = amap->am_anon[curslot];
1227 
1228 		/*
1229 		 * remove it from the amap
1230 		 */
1231 
1232 		amap->am_anon[curslot] = NULL;
1233 		ptr = amap->am_bckptr[curslot];
1234 		if (ptr != (amap->am_nused - 1)) {
1235 			amap->am_slots[ptr] =
1236 			    amap->am_slots[amap->am_nused - 1];
1237 			amap->am_bckptr[amap->am_slots[ptr]] =
1238 			    ptr;    /* back ptr. */
1239 		}
1240 		amap->am_nused--;
1241 
1242 		/*
1243 		 * drop anon reference count
1244 		 */
1245 
1246 		mutex_enter(&anon->an_lock);
1247 		refs = --anon->an_ref;
1248 		mutex_exit(&anon->an_lock);
1249 		if (refs == 0) {
1250 
1251 			/*
1252 			 * we just eliminated the last reference to an anon.
1253 			 * free it.
1254 			 */
1255 
1256 			uvm_anfree(anon);
1257 		}
1258 	}
1259 }
1260 
1261 #endif
1262 
1263 #if defined(VMSWAP)
1264 
1265 /*
1266  * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
1267  *
1268  * => called with swap_syscall_lock held.
1269  * => note that we don't always traverse all anons.
1270  *    eg. amaps being wiped out, released anons.
1271  * => return true if failed.
1272  */
1273 
1274 bool
1275 amap_swap_off(int startslot, int endslot)
1276 {
1277 	struct vm_amap *am;
1278 	struct vm_amap *am_next;
1279 	struct vm_amap marker_prev;
1280 	struct vm_amap marker_next;
1281 	bool rv = false;
1282 
1283 #if defined(DIAGNOSTIC)
1284 	memset(&marker_prev, 0, sizeof(marker_prev));
1285 	memset(&marker_next, 0, sizeof(marker_next));
1286 #endif /* defined(DIAGNOSTIC) */
1287 
1288 	mutex_enter(&amap_list_lock);
1289 	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
1290 		int i;
1291 
1292 		LIST_INSERT_BEFORE(am, &marker_prev, am_list);
1293 		LIST_INSERT_AFTER(am, &marker_next, am_list);
1294 
1295 		if (!amap_lock_try(am)) {
1296 			mutex_exit(&amap_list_lock);
1297 			preempt();
1298 			mutex_enter(&amap_list_lock);
1299 			am_next = LIST_NEXT(&marker_prev, am_list);
1300 			if (am_next == &marker_next) {
1301 				am_next = LIST_NEXT(am_next, am_list);
1302 			} else {
1303 				KASSERT(LIST_NEXT(am_next, am_list) ==
1304 				    &marker_next);
1305 			}
1306 			LIST_REMOVE(&marker_prev, am_list);
1307 			LIST_REMOVE(&marker_next, am_list);
1308 			continue;
1309 		}
1310 
1311 		mutex_exit(&amap_list_lock);
1312 
1313 		if (am->am_nused <= 0) {
1314 			amap_unlock(am);
1315 			goto next;
1316 		}
1317 
1318 		for (i = 0; i < am->am_nused; i++) {
1319 			int slot;
1320 			int swslot;
1321 			struct vm_anon *anon;
1322 
1323 			slot = am->am_slots[i];
1324 			anon = am->am_anon[slot];
1325 			mutex_enter(&anon->an_lock);
1326 
1327 			swslot = anon->an_swslot;
1328 			if (swslot < startslot || endslot <= swslot) {
1329 				mutex_exit(&anon->an_lock);
1330 				continue;
1331 			}
1332 
1333 			am->am_flags |= AMAP_SWAPOFF;
1334 			amap_unlock(am);
1335 
1336 			rv = uvm_anon_pagein(anon);
1337 
1338 			amap_lock(am);
1339 			am->am_flags &= ~AMAP_SWAPOFF;
1340 			if (amap_refs(am) == 0) {
1341 				amap_wipeout(am);
1342 				am = NULL;
1343 				break;
1344 			}
1345 			if (rv) {
1346 				break;
1347 			}
1348 			i = 0;
1349 		}
1350 
1351 		if (am) {
1352 			amap_unlock(am);
1353 		}
1354 
1355 next:
1356 		mutex_enter(&amap_list_lock);
1357 		KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
1358 		    LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
1359 		    &marker_next);
1360 		am_next = LIST_NEXT(&marker_next, am_list);
1361 		LIST_REMOVE(&marker_prev, am_list);
1362 		LIST_REMOVE(&marker_next, am_list);
1363 	}
1364 	mutex_exit(&amap_list_lock);
1365 
1366 	return rv;
1367 }
1368 
1369 #endif /* defined(VMSWAP) */
1370 
1371 /*
1372  * amap_lookup: look up a page in an amap
1373  *
1374  * => amap should be locked by caller.
1375  */
1376 struct vm_anon *
1377 amap_lookup(struct vm_aref *aref, vaddr_t offset)
1378 {
1379 	struct vm_anon *an;
1380 	int slot;
1381 	struct vm_amap *amap = aref->ar_amap;
1382 	UVMHIST_FUNC("amap_lookup"); UVMHIST_CALLED(maphist);
1383 	KASSERT(mutex_owned(&amap->am_l));
1384 
1385 	AMAP_B2SLOT(slot, offset);
1386 	slot += aref->ar_pageoff;
1387 	KASSERT(slot < amap->am_nslot);
1388 
1389 	UVMHIST_LOG(maphist, "<- done (amap=0x%x, offset=0x%x, result=0x%x)",
1390 	    amap, offset, amap->am_anon[slot], 0);
1391 	an = amap->am_anon[slot];
1392 	KASSERT(an == NULL || an->an_ref != 0);
1393 	return an;
1394 }
1395 
1396 /*
1397  * amap_lookups: look up a range of pages in an amap
1398  *
1399  * => amap should be locked by caller.
1400  * => XXXCDC: this interface is biased toward array-based amaps.  fix.
1401  */
1402 void
1403 amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons,
1404     int npages)
1405 {
1406 	int slot;
1407 	struct vm_amap *amap = aref->ar_amap;
1408 #if defined(DIAGNOSTIC)
1409 	int i;
1410 #endif /* defined(DIAGNOSTIC) */
1411 	UVMHIST_FUNC("amap_lookups"); UVMHIST_CALLED(maphist);
1412 	KASSERT(mutex_owned(&amap->am_l));
1413 
1414 	AMAP_B2SLOT(slot, offset);
1415 	slot += aref->ar_pageoff;
1416 
1417 	UVMHIST_LOG(maphist, "  slot=%d, npages=%d, nslot=%d", slot, npages,
1418 		amap->am_nslot, 0);
1419 
1420 	KASSERT((slot + (npages - 1)) < amap->am_nslot);
1421 	memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
1422 
1423 #if defined(DIAGNOSTIC)
1424 	for (i = 0; i < npages; i++) {
1425 		struct vm_anon * const an = anons[i];
1426 
1427 		if (an != NULL && an->an_ref == 0) {
1428 			panic("%s: ref=0 anon", __func__);
1429 		}
1430 	}
1431 #endif /* defined(DIAGNOSTIC) */
1432 	UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
1433 	return;
1434 }
1435 
1436 /*
1437  * amap_add: add (or replace) a page to an amap
1438  *
1439  * => caller must lock amap.
1440  * => if (replace) caller must lock anon because we might have to call
1441  *	pmap_page_protect on the anon's page.
1442  */
1443 void
1444 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
1445     bool replace)
1446 {
1447 	int slot;
1448 	struct vm_amap *amap = aref->ar_amap;
1449 	UVMHIST_FUNC("amap_add"); UVMHIST_CALLED(maphist);
1450 	KASSERT(mutex_owned(&amap->am_l));
1451 
1452 	AMAP_B2SLOT(slot, offset);
1453 	slot += aref->ar_pageoff;
1454 	KASSERT(slot < amap->am_nslot);
1455 
1456 	if (replace) {
1457 		KASSERT(amap->am_anon[slot] != NULL);
1458 		if (amap->am_anon[slot]->an_page != NULL &&
1459 		    (amap->am_flags & AMAP_SHARED) != 0) {
1460 			pmap_page_protect(amap->am_anon[slot]->an_page,
1461 			    VM_PROT_NONE);
1462 			/*
1463 			 * XXX: suppose page is supposed to be wired somewhere?
1464 			 */
1465 		}
1466 	} else {   /* !replace */
1467 		KASSERT(amap->am_anon[slot] == NULL);
1468 		amap->am_bckptr[slot] = amap->am_nused;
1469 		amap->am_slots[amap->am_nused] = slot;
1470 		amap->am_nused++;
1471 	}
1472 	amap->am_anon[slot] = anon;
1473 	UVMHIST_LOG(maphist,
1474 	    "<- done (amap=0x%x, offset=0x%x, anon=0x%x, rep=%d)",
1475 	    amap, offset, anon, replace);
1476 }
1477 
1478 /*
1479  * amap_unadd: remove a page from an amap
1480  *
1481  * => caller must lock amap
1482  */
1483 void
1484 amap_unadd(struct vm_aref *aref, vaddr_t offset)
1485 {
1486 	int ptr, slot;
1487 	struct vm_amap *amap = aref->ar_amap;
1488 	UVMHIST_FUNC("amap_unadd"); UVMHIST_CALLED(maphist);
1489 	KASSERT(mutex_owned(&amap->am_l));
1490 
1491 	AMAP_B2SLOT(slot, offset);
1492 	slot += aref->ar_pageoff;
1493 	KASSERT(slot < amap->am_nslot);
1494 	KASSERT(amap->am_anon[slot] != NULL);
1495 
1496 	amap->am_anon[slot] = NULL;
1497 	ptr = amap->am_bckptr[slot];
1498 
1499 	if (ptr != (amap->am_nused - 1)) {	/* swap to keep slots contig? */
1500 		amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1];
1501 		amap->am_bckptr[amap->am_slots[ptr]] = ptr;	/* back link */
1502 	}
1503 	amap->am_nused--;
1504 	UVMHIST_LOG(maphist, "<- done (amap=0x%x, slot=0x%x)", amap, slot,0, 0);
1505 }
1506 
1507 /*
1508  * amap_ref: gain a reference to an amap
1509  *
1510  * => amap must not be locked (we will lock)
1511  * => "offset" and "len" are in units of pages
1512  * => called at fork time to gain the child's reference
1513  */
1514 void
1515 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
1516 {
1517 	UVMHIST_FUNC("amap_ref"); UVMHIST_CALLED(maphist);
1518 
1519 	amap_lock(amap);
1520 	if (flags & AMAP_SHARED)
1521 		amap->am_flags |= AMAP_SHARED;
1522 #ifdef UVM_AMAP_PPREF
1523 	if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 &&
1524 	    len != amap->am_nslot)
1525 		amap_pp_establish(amap, offset);
1526 #endif
1527 	amap->am_ref++;
1528 #ifdef UVM_AMAP_PPREF
1529 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1530 		if (flags & AMAP_REFALL)
1531 			amap_pp_adjref(amap, 0, amap->am_nslot, 1);
1532 		else
1533 			amap_pp_adjref(amap, offset, len, 1);
1534 	}
1535 #endif
1536 	amap_unlock(amap);
1537 	UVMHIST_LOG(maphist,"<- done!  amap=0x%x", amap, 0, 0, 0);
1538 }
1539 
1540 /*
1541  * amap_unref: remove a reference to an amap
1542  *
1543  * => caller must remove all pmap-level references to this amap before
1544  *	dropping the reference
1545  * => called from uvm_unmap_detach [only]  ... note that entry is no
1546  *	longer part of a map and thus has no need for locking
1547  * => amap must be unlocked (we will lock it).
1548  */
1549 void
1550 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, bool all)
1551 {
1552 	UVMHIST_FUNC("amap_unref"); UVMHIST_CALLED(maphist);
1553 
1554 	/*
1555 	 * lock it
1556 	 */
1557 	amap_lock(amap);
1558 	UVMHIST_LOG(maphist,"  amap=0x%x  refs=%d, nused=%d",
1559 	    amap, amap->am_ref, amap->am_nused, 0);
1560 
1561 	KASSERT(amap_refs(amap) > 0);
1562 
1563 	/*
1564 	 * if we are the last reference, free the amap and return.
1565 	 */
1566 
1567 	amap->am_ref--;
1568 
1569 	if (amap_refs(amap) == 0) {
1570 		amap_wipeout(amap);	/* drops final ref and frees */
1571 		UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
1572 		return;			/* no need to unlock */
1573 	}
1574 
1575 	/*
1576 	 * otherwise just drop the reference count(s)
1577 	 */
1578 
1579 	if (amap_refs(amap) == 1 && (amap->am_flags & AMAP_SHARED) != 0)
1580 		amap->am_flags &= ~AMAP_SHARED;	/* clear shared flag */
1581 #ifdef UVM_AMAP_PPREF
1582 	if (amap->am_ppref == NULL && all == 0 && len != amap->am_nslot)
1583 		amap_pp_establish(amap, offset);
1584 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1585 		if (all)
1586 			amap_pp_adjref(amap, 0, amap->am_nslot, -1);
1587 		else
1588 			amap_pp_adjref(amap, offset, len, -1);
1589 	}
1590 #endif
1591 	amap_unlock(amap);
1592 
1593 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1594 }
1595