xref: /netbsd-src/sys/uvm/uvm_amap.c (revision fad4c9f71477ae11cea2ee75ec82151ac770a534)
1 /*	$NetBSD: uvm_amap.c,v 1.74 2006/06/25 08:03:46 yamt Exp $	*/
2 
3 /*
4  *
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by Charles D. Cranor and
19  *      Washington University.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * uvm_amap.c: amap operations
37  */
38 
39 /*
40  * this file contains functions that perform operations on amaps.  see
41  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.74 2006/06/25 08:03:46 yamt Exp $");
46 
47 #include "opt_uvmhist.h"
48 
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/proc.h>
52 #include <sys/kernel.h>
53 #include <sys/kmem.h>
54 #include <sys/pool.h>
55 
56 #include <uvm/uvm.h>
57 #include <uvm/uvm_swap.h>
58 
59 /*
60  * pool for allocation of vm_map structures.  note that the pool has
61  * its own simplelock for its protection.  also note that in order to
62  * avoid an endless loop, the amap pool's allocator cannot allocate
63  * memory from an amap (it currently goes through the kernel uobj, so
64  * we are ok).
65  */
66 POOL_INIT(uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0, "amappl",
67     &pool_allocator_nointr);
68 
69 static struct simplelock amap_list_lock = SIMPLELOCK_INITIALIZER;
70 static LIST_HEAD(, vm_amap) amap_list;
71 
72 /*
73  * local functions
74  */
75 
76 static inline void
77 amap_list_insert(struct vm_amap *amap)
78 {
79 
80 	simple_lock(&amap_list_lock);
81 	LIST_INSERT_HEAD(&amap_list, amap, am_list);
82 	simple_unlock(&amap_list_lock);
83 }
84 
85 static inline void
86 amap_list_remove(struct vm_amap *amap)
87 {
88 
89 	simple_lock(&amap_list_lock);
90 	LIST_REMOVE(amap, am_list);
91 	simple_unlock(&amap_list_lock);
92 }
93 
94 static int
95 amap_roundup_slots(int slots)
96 {
97 
98 	return kmem_roundup_size(slots * sizeof(int)) / sizeof(int);
99 }
100 
101 #ifdef UVM_AMAP_PPREF
102 /*
103  * what is ppref?   ppref is an _optional_ amap feature which is used
104  * to keep track of reference counts on a per-page basis.  it is enabled
105  * when UVM_AMAP_PPREF is defined.
106  *
107  * when enabled, an array of ints is allocated for the pprefs.  this
108  * array is allocated only when a partial reference is added to the
109  * map (either by unmapping part of the amap, or gaining a reference
110  * to only a part of an amap).  if the malloc of the array fails
111  * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
112  * that we tried to do ppref's but couldn't alloc the array so just
113  * give up (after all, this is an optional feature!).
114  *
115  * the array is divided into page sized "chunks."   for chunks of length 1,
116  * the chunk reference count plus one is stored in that chunk's slot.
117  * for chunks of length > 1 the first slot contains (the reference count
118  * plus one) * -1.    [the negative value indicates that the length is
119  * greater than one.]   the second slot of the chunk contains the length
120  * of the chunk.   here is an example:
121  *
122  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
123  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
124  *              <----------><-><----><-------><----><-><------->
125  * (x = don't care)
126  *
127  * this allows us to allow one int to contain the ref count for the whole
128  * chunk.    note that the "plus one" part is needed because a reference
129  * count of zero is neither positive or negative (need a way to tell
130  * if we've got one zero or a bunch of them).
131  *
132  * here are some in-line functions to help us.
133  */
134 
135 /*
136  * pp_getreflen: get the reference and length for a specific offset
137  *
138  * => ppref's amap must be locked
139  */
140 static inline void
141 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
142 {
143 
144 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
145 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
146 		*lenp = 1;
147 	} else {
148 		*refp = (ppref[offset] * -1) - 1;
149 		*lenp = ppref[offset+1];
150 	}
151 }
152 
153 /*
154  * pp_setreflen: set the reference and length for a specific offset
155  *
156  * => ppref's amap must be locked
157  */
158 static inline void
159 pp_setreflen(int *ppref, int offset, int ref, int len)
160 {
161 	if (len == 0)
162 		return;
163 	if (len == 1) {
164 		ppref[offset] = ref + 1;
165 	} else {
166 		ppref[offset] = (ref + 1) * -1;
167 		ppref[offset+1] = len;
168 	}
169 }
170 #endif /* UVM_AMAP_PPREF */
171 
172 /*
173  * amap_alloc1: internal function that allocates an amap, but does not
174  *	init the overlay.
175  *
176  * => lock on returned amap is init'd
177  */
178 static inline struct vm_amap *
179 amap_alloc1(int slots, int padslots, int waitf)
180 {
181 	struct vm_amap *amap;
182 	int totalslots;
183 	km_flag_t kmflags;
184 
185 	amap = pool_get(&uvm_amap_pool,
186 	    ((waitf & UVM_FLAG_NOWAIT) != 0) ? PR_NOWAIT : PR_WAITOK);
187 	if (amap == NULL)
188 		return(NULL);
189 
190 	kmflags = ((waitf & UVM_FLAG_NOWAIT) != 0) ? KM_NOSLEEP : KM_SLEEP;
191 	totalslots = amap_roundup_slots(slots + padslots);
192 	simple_lock_init(&amap->am_l);
193 	amap->am_ref = 1;
194 	amap->am_flags = 0;
195 #ifdef UVM_AMAP_PPREF
196 	amap->am_ppref = NULL;
197 #endif
198 	amap->am_maxslot = totalslots;
199 	amap->am_nslot = slots;
200 	amap->am_nused = 0;
201 
202 	amap->am_slots = kmem_alloc(totalslots * sizeof(int), kmflags);
203 	if (amap->am_slots == NULL)
204 		goto fail1;
205 
206 	amap->am_bckptr = kmem_alloc(totalslots * sizeof(int), kmflags);
207 	if (amap->am_bckptr == NULL)
208 		goto fail2;
209 
210 	amap->am_anon = kmem_alloc(totalslots * sizeof(struct vm_anon *),
211 	    kmflags);
212 	if (amap->am_anon == NULL)
213 		goto fail3;
214 
215 	return(amap);
216 
217 fail3:
218 	kmem_free(amap->am_bckptr, totalslots * sizeof(int));
219 fail2:
220 	kmem_free(amap->am_slots, totalslots * sizeof(int));
221 fail1:
222 	pool_put(&uvm_amap_pool, amap);
223 
224 	/*
225 	 * XXX hack to tell the pagedaemon how many pages we need,
226 	 * since we can need more than it would normally free.
227 	 */
228 	if ((waitf & UVM_FLAG_NOWAIT) != 0) {
229 		extern int uvm_extrapages;
230 		uvm_extrapages += ((sizeof(int) * 2 +
231 				    sizeof(struct vm_anon *)) *
232 				   totalslots) >> PAGE_SHIFT;
233 	}
234 	return (NULL);
235 }
236 
237 /*
238  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
239  *
240  * => caller should ensure sz is a multiple of PAGE_SIZE
241  * => reference count to new amap is set to one
242  * => new amap is returned unlocked
243  */
244 
245 struct vm_amap *
246 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
247 {
248 	struct vm_amap *amap;
249 	int slots, padslots;
250 	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
251 
252 	AMAP_B2SLOT(slots, sz);
253 	AMAP_B2SLOT(padslots, padsz);
254 
255 	amap = amap_alloc1(slots, padslots, waitf);
256 	if (amap) {
257 		memset(amap->am_anon, 0,
258 		    amap->am_maxslot * sizeof(struct vm_anon *));
259 		amap_list_insert(amap);
260 	}
261 
262 	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
263 	return(amap);
264 }
265 
266 
267 /*
268  * amap_free: free an amap
269  *
270  * => the amap must be unlocked
271  * => the amap should have a zero reference count and be empty
272  */
273 void
274 amap_free(struct vm_amap *amap)
275 {
276 	int slots;
277 
278 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
279 
280 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
281 	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
282 	LOCK_ASSERT(!simple_lock_held(&amap->am_l));
283 	slots = amap->am_maxslot;
284 	kmem_free(amap->am_slots, slots * sizeof(*amap->am_slots));
285 	kmem_free(amap->am_bckptr, slots * sizeof(*amap->am_bckptr));
286 	kmem_free(amap->am_anon, slots * sizeof(*amap->am_anon));
287 #ifdef UVM_AMAP_PPREF
288 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
289 		kmem_free(amap->am_ppref, slots * sizeof(*amap->am_ppref));
290 #endif
291 	pool_put(&uvm_amap_pool, amap);
292 	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
293 }
294 
295 /*
296  * amap_extend: extend the size of an amap (if needed)
297  *
298  * => called from uvm_map when we want to extend an amap to cover
299  *    a new mapping (rather than allocate a new one)
300  * => amap should be unlocked (we will lock it)
301  * => to safely extend an amap it should have a reference count of
302  *    one (thus it can't be shared)
303  */
304 int
305 amap_extend(struct vm_map_entry *entry, vsize_t addsize, int flags)
306 {
307 	struct vm_amap *amap = entry->aref.ar_amap;
308 	int slotoff = entry->aref.ar_pageoff;
309 	int slotmapped, slotadd, slotneed, slotadded, slotalloc;
310 	int slotadj, slotspace;
311 	int oldnslots;
312 #ifdef UVM_AMAP_PPREF
313 	int *newppref, *oldppref;
314 #endif
315 	int i, *newsl, *newbck, *oldsl, *oldbck;
316 	struct vm_anon **newover, **oldover;
317 	const km_flag_t kmflags =
318 	    (flags & AMAP_EXTEND_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
319 
320 	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
321 
322 	UVMHIST_LOG(maphist, "  (entry=0x%x, addsize=0x%x, flags=0x%x)",
323 	    entry, addsize, flags, 0);
324 
325 	/*
326 	 * first, determine how many slots we need in the amap.  don't
327 	 * forget that ar_pageoff could be non-zero: this means that
328 	 * there are some unused slots before us in the amap.
329 	 */
330 
331 	amap_lock(amap);
332 	KASSERT(amap_refs(amap) == 1); /* amap can't be shared */
333 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
334 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
335 	if (flags & AMAP_EXTEND_FORWARDS) {
336 		slotneed = slotoff + slotmapped + slotadd;
337 		slotadj = 0;
338 		slotspace = 0;
339 	}
340 	else {
341 		slotneed = slotadd + slotmapped;
342 		slotadj = slotadd - slotoff;
343 		slotspace = amap->am_maxslot - slotmapped;
344 	}
345 
346 	/*
347 	 * case 1: we already have enough slots in the map and thus
348 	 * only need to bump the reference counts on the slots we are
349 	 * adding.
350 	 */
351 
352 	if (flags & AMAP_EXTEND_FORWARDS) {
353 		if (amap->am_nslot >= slotneed) {
354 #ifdef UVM_AMAP_PPREF
355 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
356 				amap_pp_adjref(amap, slotoff + slotmapped,
357 				    slotadd, 1);
358 			}
359 #endif
360 			amap_unlock(amap);
361 			UVMHIST_LOG(maphist,
362 			    "<- done (case 1f), amap = 0x%x, sltneed=%d",
363 			    amap, slotneed, 0, 0);
364 			return 0;
365 		}
366 	} else {
367 		if (slotadj <= 0) {
368 			slotoff -= slotadd;
369 			entry->aref.ar_pageoff = slotoff;
370 #ifdef UVM_AMAP_PPREF
371 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
372 				amap_pp_adjref(amap, slotoff, slotadd, 1);
373 			}
374 #endif
375 			amap_unlock(amap);
376 			UVMHIST_LOG(maphist,
377 			    "<- done (case 1b), amap = 0x%x, sltneed=%d",
378 			    amap, slotneed, 0, 0);
379 			return 0;
380 		}
381 	}
382 
383 	/*
384 	 * case 2: we pre-allocated slots for use and we just need to
385 	 * bump nslot up to take account for these slots.
386 	 */
387 
388 	if (amap->am_maxslot >= slotneed) {
389 		if (flags & AMAP_EXTEND_FORWARDS) {
390 #ifdef UVM_AMAP_PPREF
391 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
392 				if ((slotoff + slotmapped) < amap->am_nslot)
393 					amap_pp_adjref(amap,
394 					    slotoff + slotmapped,
395 					    (amap->am_nslot -
396 					    (slotoff + slotmapped)), 1);
397 				pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
398 				    slotneed - amap->am_nslot);
399 			}
400 #endif
401 			amap->am_nslot = slotneed;
402 			amap_unlock(amap);
403 
404 			/*
405 			 * no need to zero am_anon since that was done at
406 			 * alloc time and we never shrink an allocation.
407 			 */
408 
409 			UVMHIST_LOG(maphist,"<- done (case 2f), amap = 0x%x, "
410 			    "slotneed=%d", amap, slotneed, 0, 0);
411 			return 0;
412 		} else {
413 #ifdef UVM_AMAP_PPREF
414 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
415 				/*
416 				 * Slide up the ref counts on the pages that
417 				 * are actually in use.
418 				 */
419 				memmove(amap->am_ppref + slotspace,
420 				    amap->am_ppref + slotoff,
421 				    slotmapped * sizeof(int));
422 				/*
423 				 * Mark the (adjusted) gap at the front as
424 				 * referenced/not referenced.
425 				 */
426 				pp_setreflen(amap->am_ppref,
427 				    0, 0, slotspace - slotadd);
428 				pp_setreflen(amap->am_ppref,
429 				    slotspace - slotadd, 1, slotadd);
430 			}
431 #endif
432 
433 			/*
434 			 * Slide the anon pointers up and clear out
435 			 * the space we just made.
436 			 */
437 			memmove(amap->am_anon + slotspace,
438 			    amap->am_anon + slotoff,
439 			    slotmapped * sizeof(struct vm_anon*));
440 			memset(amap->am_anon + slotoff, 0,
441 			    (slotspace - slotoff) * sizeof(struct vm_anon *));
442 
443 			/*
444 			 * Slide the backpointers up, but don't bother
445 			 * wiping out the old slots.
446 			 */
447 			memmove(amap->am_bckptr + slotspace,
448 			    amap->am_bckptr + slotoff,
449 			    slotmapped * sizeof(int));
450 
451 			/*
452 			 * Adjust all the useful active slot numbers.
453 			 */
454 			for (i = 0; i < amap->am_nused; i++)
455 				amap->am_slots[i] += (slotspace - slotoff);
456 
457 			/*
458 			 * We just filled all the empty space in the
459 			 * front of the amap by activating a few new
460 			 * slots.
461 			 */
462 			amap->am_nslot = amap->am_maxslot;
463 			entry->aref.ar_pageoff = slotspace - slotadd;
464 			amap_unlock(amap);
465 
466 			UVMHIST_LOG(maphist,"<- done (case 2b), amap = 0x%x, "
467 			    "slotneed=%d", amap, slotneed, 0, 0);
468 			return 0;
469 		}
470 	}
471 
472 	/*
473 	 * case 3: we need to malloc a new amap and copy all the amap
474 	 * data over from old amap to the new one.
475 	 *
476 	 * note that the use of a kernel realloc() probably would not
477 	 * help here, since we wish to abort cleanly if one of the
478 	 * three (or four) mallocs fails.
479 	 */
480 
481 	amap_unlock(amap);	/* unlock in case we sleep in malloc */
482 
483 	if (slotneed >= UVM_AMAP_LARGE) {
484 		return E2BIG;
485 	}
486 
487 	slotalloc = amap_roundup_slots(slotneed);
488 #ifdef UVM_AMAP_PPREF
489 	newppref = NULL;
490 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
491 		newppref = kmem_alloc(slotalloc * sizeof(*newppref), kmflags);
492 #endif
493 	newsl = kmem_alloc(slotalloc * sizeof(*newsl), kmflags);
494 	newbck = kmem_alloc(slotalloc * sizeof(*newbck), kmflags);
495 	newover = kmem_alloc(slotalloc * sizeof(*newover), kmflags);
496 	if (newsl == NULL || newbck == NULL || newover == NULL) {
497 #ifdef UVM_AMAP_PPREF
498 		if (newppref != NULL) {
499 			kmem_free(newppref, slotalloc * sizeof(*newppref));
500 		}
501 #endif
502 		if (newsl != NULL) {
503 			kmem_free(newsl, slotalloc * sizeof(*newsl));
504 		}
505 		if (newbck != NULL) {
506 			kmem_free(newbck, slotalloc * sizeof(*newbck));
507 		}
508 		if (newover != NULL) {
509 			kmem_free(newover, slotalloc * sizeof(*newover));
510 		}
511 		return ENOMEM;
512 	}
513 	amap_lock(amap);
514 	KASSERT(amap->am_maxslot < slotneed);
515 
516 	/*
517 	 * now copy everything over to new malloc'd areas...
518 	 */
519 
520 	slotadded = slotalloc - amap->am_nslot;
521 	if (!(flags & AMAP_EXTEND_FORWARDS))
522 		slotspace = slotalloc - slotmapped;
523 
524 	/* do am_slots */
525 	oldsl = amap->am_slots;
526 	if (flags & AMAP_EXTEND_FORWARDS)
527 		memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
528 	else
529 		for (i = 0; i < amap->am_nused; i++)
530 			newsl[i] = oldsl[i] + slotspace - slotoff;
531 	amap->am_slots = newsl;
532 
533 	/* do am_anon */
534 	oldover = amap->am_anon;
535 	if (flags & AMAP_EXTEND_FORWARDS) {
536 		memcpy(newover, oldover,
537 		    sizeof(struct vm_anon *) * amap->am_nslot);
538 		memset(newover + amap->am_nslot, 0,
539 		    sizeof(struct vm_anon *) * slotadded);
540 	} else {
541 		memcpy(newover + slotspace, oldover + slotoff,
542 		    sizeof(struct vm_anon *) * slotmapped);
543 		memset(newover, 0,
544 		    sizeof(struct vm_anon *) * slotspace);
545 	}
546 	amap->am_anon = newover;
547 
548 	/* do am_bckptr */
549 	oldbck = amap->am_bckptr;
550 	if (flags & AMAP_EXTEND_FORWARDS)
551 		memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
552 	else
553 		memcpy(newbck + slotspace, oldbck + slotoff,
554 		    sizeof(int) * slotmapped);
555 	amap->am_bckptr = newbck;
556 
557 #ifdef UVM_AMAP_PPREF
558 	/* do ppref */
559 	oldppref = amap->am_ppref;
560 	if (newppref) {
561 		if (flags & AMAP_EXTEND_FORWARDS) {
562 			memcpy(newppref, oldppref,
563 			    sizeof(int) * amap->am_nslot);
564 			memset(newppref + amap->am_nslot, 0,
565 			    sizeof(int) * slotadded);
566 		} else {
567 			memcpy(newppref + slotspace, oldppref + slotoff,
568 			    sizeof(int) * slotmapped);
569 		}
570 		amap->am_ppref = newppref;
571 		if ((flags & AMAP_EXTEND_FORWARDS) &&
572 		    (slotoff + slotmapped) < amap->am_nslot)
573 			amap_pp_adjref(amap, slotoff + slotmapped,
574 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
575 		if (flags & AMAP_EXTEND_FORWARDS)
576 			pp_setreflen(newppref, amap->am_nslot, 1,
577 			    slotneed - amap->am_nslot);
578 		else {
579 			pp_setreflen(newppref, 0, 0,
580 			    slotalloc - slotneed);
581 			pp_setreflen(newppref, slotalloc - slotneed, 1,
582 			    slotneed - slotmapped);
583 		}
584 	} else {
585 		if (amap->am_ppref)
586 			amap->am_ppref = PPREF_NONE;
587 	}
588 #endif
589 
590 	/* update master values */
591 	if (flags & AMAP_EXTEND_FORWARDS)
592 		amap->am_nslot = slotneed;
593 	else {
594 		entry->aref.ar_pageoff = slotspace - slotadd;
595 		amap->am_nslot = slotalloc;
596 	}
597 	oldnslots = amap->am_maxslot;
598 	amap->am_maxslot = slotalloc;
599 
600 	amap_unlock(amap);
601 	kmem_free(oldsl, oldnslots * sizeof(*oldsl));
602 	kmem_free(oldbck, oldnslots * sizeof(*oldbck));
603 	kmem_free(oldover, oldnslots * sizeof(*oldover));
604 #ifdef UVM_AMAP_PPREF
605 	if (oldppref && oldppref != PPREF_NONE)
606 		kmem_free(oldppref, oldnslots * sizeof(*oldppref));
607 #endif
608 	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
609 	    amap, slotneed, 0, 0);
610 	return 0;
611 }
612 
613 /*
614  * amap_share_protect: change protection of anons in a shared amap
615  *
616  * for shared amaps, given the current data structure layout, it is
617  * not possible for us to directly locate all maps referencing the
618  * shared anon (to change the protection).  in order to protect data
619  * in shared maps we use pmap_page_protect().  [this is useful for IPC
620  * mechanisms like map entry passing that may want to write-protect
621  * all mappings of a shared amap.]  we traverse am_anon or am_slots
622  * depending on the current state of the amap.
623  *
624  * => entry's map and amap must be locked by the caller
625  */
626 void
627 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
628 {
629 	struct vm_amap *amap = entry->aref.ar_amap;
630 	int slots, lcv, slot, stop;
631 
632 	LOCK_ASSERT(simple_lock_held(&amap->am_l));
633 
634 	AMAP_B2SLOT(slots, (entry->end - entry->start));
635 	stop = entry->aref.ar_pageoff + slots;
636 
637 	if (slots < amap->am_nused) {
638 		/* cheaper to traverse am_anon */
639 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
640 			if (amap->am_anon[lcv] == NULL)
641 				continue;
642 			if (amap->am_anon[lcv]->an_page != NULL)
643 				pmap_page_protect(amap->am_anon[lcv]->an_page,
644 						  prot);
645 		}
646 		return;
647 	}
648 
649 	/* cheaper to traverse am_slots */
650 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
651 		slot = amap->am_slots[lcv];
652 		if (slot < entry->aref.ar_pageoff || slot >= stop)
653 			continue;
654 		if (amap->am_anon[slot]->an_page != NULL)
655 			pmap_page_protect(amap->am_anon[slot]->an_page, prot);
656 	}
657 }
658 
659 /*
660  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
661  *
662  * => called from amap_unref when the final reference to an amap is
663  *	discarded (i.e. when reference count drops to 0)
664  * => the amap should be locked (by the caller)
665  */
666 
667 void
668 amap_wipeout(struct vm_amap *amap)
669 {
670 	int lcv, slot;
671 	struct vm_anon *anon;
672 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
673 	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
674 
675 	KASSERT(amap->am_ref == 0);
676 
677 	if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
678 		/*
679 		 * amap_swap_off will call us again.
680 		 */
681 		amap_unlock(amap);
682 		return;
683 	}
684 	amap_list_remove(amap);
685 	amap_unlock(amap);
686 
687 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
688 		int refs;
689 
690 		slot = amap->am_slots[lcv];
691 		anon = amap->am_anon[slot];
692 
693 		if (anon == NULL || anon->an_ref == 0)
694 			panic("amap_wipeout: corrupt amap");
695 
696 		simple_lock(&anon->an_lock);
697 		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
698 		    anon->an_ref, 0, 0);
699 		refs = --anon->an_ref;
700 		simple_unlock(&anon->an_lock);
701 		if (refs == 0) {
702 
703 			/*
704 			 * we had the last reference to a vm_anon. free it.
705 			 */
706 
707 			uvm_anfree(anon);
708 		}
709 
710 		if (curlwp->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
711 			preempt(1);
712 	}
713 
714 	/*
715 	 * now we free the map
716 	 */
717 
718 	amap->am_nused = 0;
719 	amap_free(amap);	/* will unlock and free amap */
720 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
721 }
722 
723 /*
724  * amap_copy: ensure that a map entry's "needs_copy" flag is false
725  *	by copying the amap if necessary.
726  *
727  * => an entry with a null amap pointer will get a new (blank) one.
728  * => the map that the map entry belongs to must be locked by caller.
729  * => the amap currently attached to "entry" (if any) must be unlocked.
730  * => if canchunk is true, then we may clip the entry into a chunk
731  * => "startva" and "endva" are used only if canchunk is true.  they are
732  *     used to limit chunking (e.g. if you have a large space that you
733  *     know you are going to need to allocate amaps for, there is no point
734  *     in allowing that to be chunked)
735  */
736 
737 void
738 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int flags,
739     vaddr_t startva, vaddr_t endva)
740 {
741 	struct vm_amap *amap, *srcamap;
742 	int slots, lcv;
743 	vaddr_t chunksize;
744 	const int waitf = (flags & AMAP_COPY_NOWAIT) ? UVM_FLAG_NOWAIT : 0;
745 	const boolean_t canchunk = (flags & AMAP_COPY_NOCHUNK) == 0;
746 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
747 	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, flags=%d)",
748 		    map, entry, flags, 0);
749 
750 	KASSERT(map != kernel_map);	/* we use nointr pool */
751 
752 	/*
753 	 * is there a map to copy?   if not, create one from scratch.
754 	 */
755 
756 	if (entry->aref.ar_amap == NULL) {
757 
758 		/*
759 		 * check to see if we have a large amap that we can
760 		 * chunk.  we align startva/endva to chunk-sized
761 		 * boundaries and then clip to them.
762 		 */
763 
764 		if (canchunk && atop(entry->end - entry->start) >=
765 		    UVM_AMAP_LARGE) {
766 			/* convert slots to bytes */
767 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
768 			startva = (startva / chunksize) * chunksize;
769 			endva = roundup(endva, chunksize);
770 			UVMHIST_LOG(maphist, "  chunk amap ==> clip 0x%x->0x%x"
771 			    "to 0x%x->0x%x", entry->start, entry->end, startva,
772 			    endva);
773 			UVM_MAP_CLIP_START(map, entry, startva, NULL);
774 			/* watch out for endva wrap-around! */
775 			if (endva >= startva)
776 				UVM_MAP_CLIP_END(map, entry, endva, NULL);
777 		}
778 
779 		if ((flags & AMAP_COPY_NOMERGE) == 0 &&
780 		    uvm_mapent_trymerge(map, entry, UVM_MERGE_COPYING)) {
781 			return;
782 		}
783 
784 		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
785 		entry->start, entry->end, 0, 0);
786 		entry->aref.ar_pageoff = 0;
787 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
788 		    waitf);
789 		if (entry->aref.ar_amap != NULL)
790 			entry->etype &= ~UVM_ET_NEEDSCOPY;
791 		return;
792 	}
793 
794 	/*
795 	 * first check and see if we are the only map entry
796 	 * referencing the amap we currently have.  if so, then we can
797 	 * just take it over rather than copying it.  note that we are
798 	 * reading am_ref with the amap unlocked... the value can only
799 	 * be one if we have the only reference to the amap (via our
800 	 * locked map).  if we are greater than one we fall through to
801 	 * the next case (where we double check the value).
802 	 */
803 
804 	if (entry->aref.ar_amap->am_ref == 1) {
805 		entry->etype &= ~UVM_ET_NEEDSCOPY;
806 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
807 		    0, 0, 0, 0);
808 		return;
809 	}
810 
811 	/*
812 	 * looks like we need to copy the map.
813 	 */
814 
815 	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it",
816 	    entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
817 	AMAP_B2SLOT(slots, entry->end - entry->start);
818 	amap = amap_alloc1(slots, 0, waitf);
819 	if (amap == NULL) {
820 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
821 		return;
822 	}
823 	srcamap = entry->aref.ar_amap;
824 	amap_lock(srcamap);
825 
826 	/*
827 	 * need to double check reference count now that we've got the
828 	 * src amap locked down.  the reference count could have
829 	 * changed while we were in malloc.  if the reference count
830 	 * dropped down to one we take over the old map rather than
831 	 * copying the amap.
832 	 */
833 
834 	if (srcamap->am_ref == 1) {		/* take it over? */
835 		entry->etype &= ~UVM_ET_NEEDSCOPY;
836 		amap->am_ref--;		/* drop final reference to map */
837 		amap_free(amap);	/* dispose of new (unused) amap */
838 		amap_unlock(srcamap);
839 		return;
840 	}
841 
842 	/*
843 	 * we must copy it now.
844 	 */
845 
846 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
847 	for (lcv = 0 ; lcv < slots; lcv++) {
848 		amap->am_anon[lcv] =
849 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
850 		if (amap->am_anon[lcv] == NULL)
851 			continue;
852 		simple_lock(&amap->am_anon[lcv]->an_lock);
853 		amap->am_anon[lcv]->an_ref++;
854 		simple_unlock(&amap->am_anon[lcv]->an_lock);
855 		amap->am_bckptr[lcv] = amap->am_nused;
856 		amap->am_slots[amap->am_nused] = lcv;
857 		amap->am_nused++;
858 	}
859 	memset(&amap->am_anon[lcv], 0,
860 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
861 
862 	/*
863 	 * drop our reference to the old amap (srcamap) and unlock.
864 	 * we know that the reference count on srcamap is greater than
865 	 * one (we checked above), so there is no way we could drop
866 	 * the count to zero.  [and no need to worry about freeing it]
867 	 */
868 
869 	srcamap->am_ref--;
870 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
871 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
872 #ifdef UVM_AMAP_PPREF
873 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
874 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
875 		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
876 	}
877 #endif
878 
879 	amap_unlock(srcamap);
880 
881 	amap_list_insert(amap);
882 
883 	/*
884 	 * install new amap.
885 	 */
886 
887 	entry->aref.ar_pageoff = 0;
888 	entry->aref.ar_amap = amap;
889 	entry->etype &= ~UVM_ET_NEEDSCOPY;
890 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
891 }
892 
893 /*
894  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
895  *
896  *	called during fork(2) when the parent process has a wired map
897  *	entry.   in that case we want to avoid write-protecting pages
898  *	in the parent's map (e.g. like what you'd do for a COW page)
899  *	so we resolve the COW here.
900  *
901  * => assume parent's entry was wired, thus all pages are resident.
902  * => assume pages that are loaned out (loan_count) are already mapped
903  *	read-only in all maps, and thus no need for us to worry about them
904  * => assume both parent and child vm_map's are locked
905  * => caller passes child's map/entry in to us
906  * => if we run out of memory we will unlock the amap and sleep _with_ the
907  *	parent and child vm_map's locked(!).    we have to do this since
908  *	we are in the middle of a fork(2) and we can't let the parent
909  *	map change until we are done copying all the map entrys.
910  * => XXXCDC: out of memory should cause fork to fail, but there is
911  *	currently no easy way to do this (needs fix)
912  * => page queues must be unlocked (we may lock them)
913  */
914 
915 void
916 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
917 {
918 	struct vm_amap *amap = entry->aref.ar_amap;
919 	int lcv, slot;
920 	struct vm_anon *anon, *nanon;
921 	struct vm_page *pg, *npg;
922 
923 	/*
924 	 * note that if we unlock the amap then we must ReStart the "lcv" for
925 	 * loop because some other process could reorder the anon's in the
926 	 * am_anon[] array on us while the lock is dropped.
927 	 */
928 
929 ReStart:
930 	amap_lock(amap);
931 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
932 
933 		/*
934 		 * get the page
935 		 */
936 
937 		slot = amap->am_slots[lcv];
938 		anon = amap->am_anon[slot];
939 		simple_lock(&anon->an_lock);
940 
941 		/*
942 		 * If the anon has only one ref, we must have already copied it.
943 		 * This can happen if we needed to sleep waiting for memory
944 		 * in a previous run through this loop.  The new page might
945 		 * even have been paged out, since the new page is not wired.
946 		 */
947 
948 		if (anon->an_ref == 1) {
949 			KASSERT(anon->an_page != NULL || anon->an_swslot != 0);
950 			simple_unlock(&anon->an_lock);
951 			continue;
952 		}
953 
954 		/*
955 		 * The old page must be resident since the parent is wired.
956 		 */
957 
958 		pg = anon->an_page;
959 		KASSERT(pg != NULL);
960 		KASSERT(pg->wire_count > 0);
961 
962 		/*
963 		 * If the page is loaned then it must already be mapped
964 		 * read-only and we don't need to copy it.
965 		 */
966 
967 		if (pg->loan_count != 0) {
968 			simple_unlock(&anon->an_lock);
969 			continue;
970 		}
971 		KASSERT(pg->uanon == anon && pg->uobject == NULL);
972 
973 		/*
974 		 * if the page is busy then we have to unlock, wait for
975 		 * it and then restart.
976 		 */
977 
978 		if (pg->flags & PG_BUSY) {
979 			pg->flags |= PG_WANTED;
980 			amap_unlock(amap);
981 			UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE,
982 			    "cownow", 0);
983 			goto ReStart;
984 		}
985 
986 		/*
987 		 * ok, time to do a copy-on-write to a new anon
988 		 */
989 
990 		nanon = uvm_analloc();
991 		if (nanon) {
992 			npg = uvm_pagealloc(NULL, 0, nanon, 0);
993 		} else
994 			npg = NULL;	/* XXX: quiet gcc warning */
995 		if (nanon == NULL || npg == NULL) {
996 
997 			/*
998 			 * XXXCDC: we should cause fork to fail, but we can't.
999 			 */
1000 
1001 			if (nanon) {
1002 				nanon->an_ref--;
1003 				simple_unlock(&nanon->an_lock);
1004 				uvm_anfree(nanon);
1005 			}
1006 			simple_unlock(&anon->an_lock);
1007 			amap_unlock(amap);
1008 			uvm_wait("cownowpage");
1009 			goto ReStart;
1010 		}
1011 
1012 		/*
1013 		 * got it... now we can copy the data and replace anon
1014 		 * with our new one...
1015 		 */
1016 
1017 		uvm_pagecopy(pg, npg);		/* old -> new */
1018 		anon->an_ref--;			/* can't drop to zero */
1019 		amap->am_anon[slot] = nanon;	/* replace */
1020 
1021 		/*
1022 		 * drop PG_BUSY on new page ... since we have had its owner
1023 		 * locked the whole time it can't be PG_RELEASED or PG_WANTED.
1024 		 */
1025 
1026 		uvm_lock_pageq();
1027 		uvm_pageactivate(npg);
1028 		uvm_unlock_pageq();
1029 		npg->flags &= ~(PG_BUSY|PG_FAKE);
1030 		UVM_PAGE_OWN(npg, NULL);
1031 		simple_unlock(&nanon->an_lock);
1032 		simple_unlock(&anon->an_lock);
1033 	}
1034 	amap_unlock(amap);
1035 }
1036 
1037 /*
1038  * amap_splitref: split a single reference into two separate references
1039  *
1040  * => called from uvm_map's clip routines
1041  * => origref's map should be locked
1042  * => origref->ar_amap should be unlocked (we will lock)
1043  */
1044 void
1045 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
1046 {
1047 	int leftslots;
1048 	struct vm_amap *amap;
1049 
1050 	KASSERT(splitref->ar_amap == origref->ar_amap);
1051 	AMAP_B2SLOT(leftslots, offset);
1052 	if (leftslots == 0)
1053 		panic("amap_splitref: split at zero offset");
1054 
1055 	amap = origref->ar_amap;
1056 	amap_lock(amap);
1057 
1058 	/*
1059 	 * now: amap is locked and we have a valid am_mapped array.
1060 	 */
1061 
1062 	if (amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
1063 		panic("amap_splitref: map size check failed");
1064 
1065 #ifdef UVM_AMAP_PPREF
1066         /*
1067 	 * establish ppref before we add a duplicate reference to the amap
1068 	 */
1069 	if (amap->am_ppref == NULL)
1070 		amap_pp_establish(amap, origref->ar_pageoff);
1071 #endif
1072 
1073 	amap->am_ref++;		/* not a share reference */
1074 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
1075 
1076 	amap_unlock(amap);
1077 }
1078 
1079 #ifdef UVM_AMAP_PPREF
1080 
1081 /*
1082  * amap_pp_establish: add a ppref array to an amap, if possible
1083  *
1084  * => amap locked by caller
1085  */
1086 void
1087 amap_pp_establish(struct vm_amap *amap, vaddr_t offset)
1088 {
1089 
1090 	amap->am_ppref = kmem_alloc(amap->am_maxslot * sizeof(*amap->am_ppref),
1091 	    KM_NOSLEEP);
1092 
1093 	/*
1094 	 * if we fail then we just won't use ppref for this amap
1095 	 */
1096 
1097 	if (amap->am_ppref == NULL) {
1098 		amap->am_ppref = PPREF_NONE;	/* not using it */
1099 		return;
1100 	}
1101 	memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot);
1102 	pp_setreflen(amap->am_ppref, 0, 0, offset);
1103 	pp_setreflen(amap->am_ppref, offset, amap->am_ref,
1104 	    amap->am_nslot - offset);
1105 	return;
1106 }
1107 
1108 /*
1109  * amap_pp_adjref: adjust reference count to a part of an amap using the
1110  * per-page reference count array.
1111  *
1112  * => map and amap locked by caller
1113  * => caller must check that ppref != PPREF_NONE before calling
1114  */
1115 void
1116 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
1117 {
1118 	int stopslot, *ppref, lcv, prevlcv;
1119 	int ref, len, prevref, prevlen;
1120 
1121 	stopslot = curslot + slotlen;
1122 	ppref = amap->am_ppref;
1123 	prevlcv = 0;
1124 
1125 	/*
1126 	 * first advance to the correct place in the ppref array,
1127 	 * fragment if needed.
1128 	 */
1129 
1130 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
1131 		pp_getreflen(ppref, lcv, &ref, &len);
1132 		if (lcv + len > curslot) {     /* goes past start? */
1133 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
1134 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
1135 			len = curslot - lcv;   /* new length of entry @ lcv */
1136 		}
1137 		prevlcv = lcv;
1138 	}
1139 	if (lcv != 0)
1140 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
1141 	else {
1142 		/* Ensure that the "prevref == ref" test below always
1143 		 * fails, since we're starting from the beginning of
1144 		 * the ppref array; that is, there is no previous
1145 		 * chunk.
1146 		 */
1147 		prevref = -1;
1148 		prevlen = 0;
1149 	}
1150 
1151 	/*
1152 	 * now adjust reference counts in range.  merge the first
1153 	 * changed entry with the last unchanged entry if possible.
1154 	 */
1155 
1156 	if (lcv != curslot)
1157 		panic("amap_pp_adjref: overshot target");
1158 
1159 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
1160 		pp_getreflen(ppref, lcv, &ref, &len);
1161 		if (lcv + len > stopslot) {     /* goes past end? */
1162 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
1163 			pp_setreflen(ppref, stopslot, ref,
1164 			    len - (stopslot - lcv));
1165 			len = stopslot - lcv;
1166 		}
1167 		ref += adjval;
1168 		if (ref < 0)
1169 			panic("amap_pp_adjref: negative reference count");
1170 		if (lcv == prevlcv + prevlen && ref == prevref) {
1171 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
1172 		} else {
1173 			pp_setreflen(ppref, lcv, ref, len);
1174 		}
1175 		if (ref == 0)
1176 			amap_wiperange(amap, lcv, len);
1177 	}
1178 
1179 }
1180 
1181 /*
1182  * amap_wiperange: wipe out a range of an amap
1183  * [different from amap_wipeout because the amap is kept intact]
1184  *
1185  * => both map and amap must be locked by caller.
1186  */
1187 void
1188 amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
1189 {
1190 	int byanon, lcv, stop, curslot, ptr, slotend;
1191 	struct vm_anon *anon;
1192 
1193 	/*
1194 	 * we can either traverse the amap by am_anon or by am_slots depending
1195 	 * on which is cheaper.    decide now.
1196 	 */
1197 
1198 	if (slots < amap->am_nused) {
1199 		byanon = TRUE;
1200 		lcv = slotoff;
1201 		stop = slotoff + slots;
1202 		slotend = 0;
1203 	} else {
1204 		byanon = FALSE;
1205 		lcv = 0;
1206 		stop = amap->am_nused;
1207 		slotend = slotoff + slots;
1208 	}
1209 
1210 	while (lcv < stop) {
1211 		int refs;
1212 
1213 		if (byanon) {
1214 			curslot = lcv++;	/* lcv advances here */
1215 			if (amap->am_anon[curslot] == NULL)
1216 				continue;
1217 		} else {
1218 			curslot = amap->am_slots[lcv];
1219 			if (curslot < slotoff || curslot >= slotend) {
1220 				lcv++;		/* lcv advances here */
1221 				continue;
1222 			}
1223 			stop--;	/* drop stop, since anon will be removed */
1224 		}
1225 		anon = amap->am_anon[curslot];
1226 
1227 		/*
1228 		 * remove it from the amap
1229 		 */
1230 
1231 		amap->am_anon[curslot] = NULL;
1232 		ptr = amap->am_bckptr[curslot];
1233 		if (ptr != (amap->am_nused - 1)) {
1234 			amap->am_slots[ptr] =
1235 			    amap->am_slots[amap->am_nused - 1];
1236 			amap->am_bckptr[amap->am_slots[ptr]] =
1237 			    ptr;    /* back ptr. */
1238 		}
1239 		amap->am_nused--;
1240 
1241 		/*
1242 		 * drop anon reference count
1243 		 */
1244 
1245 		simple_lock(&anon->an_lock);
1246 		refs = --anon->an_ref;
1247 		simple_unlock(&anon->an_lock);
1248 		if (refs == 0) {
1249 
1250 			/*
1251 			 * we just eliminated the last reference to an anon.
1252 			 * free it.
1253 			 */
1254 
1255 			uvm_anfree(anon);
1256 		}
1257 	}
1258 }
1259 
1260 #endif
1261 
1262 #if defined(VMSWAP)
1263 
1264 /*
1265  * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
1266  *
1267  * => called with swap_syscall_lock held.
1268  * => note that we don't always traverse all anons.
1269  *    eg. amaps being wiped out, released anons.
1270  * => return TRUE if failed.
1271  */
1272 
1273 boolean_t
1274 amap_swap_off(int startslot, int endslot)
1275 {
1276 	struct vm_amap *am;
1277 	struct vm_amap *am_next;
1278 	struct vm_amap marker_prev;
1279 	struct vm_amap marker_next;
1280 	struct lwp *l = curlwp;
1281 	boolean_t rv = FALSE;
1282 
1283 #if defined(DIAGNOSTIC)
1284 	memset(&marker_prev, 0, sizeof(marker_prev));
1285 	memset(&marker_next, 0, sizeof(marker_next));
1286 #endif /* defined(DIAGNOSTIC) */
1287 
1288 	PHOLD(l);
1289 	simple_lock(&amap_list_lock);
1290 	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
1291 		int i;
1292 
1293 		LIST_INSERT_BEFORE(am, &marker_prev, am_list);
1294 		LIST_INSERT_AFTER(am, &marker_next, am_list);
1295 
1296 		if (!amap_lock_try(am)) {
1297 			simple_unlock(&amap_list_lock);
1298 			preempt(1);
1299 			simple_lock(&amap_list_lock);
1300 			am_next = LIST_NEXT(&marker_prev, am_list);
1301 			if (am_next == &marker_next) {
1302 				am_next = LIST_NEXT(am_next, am_list);
1303 			} else {
1304 				KASSERT(LIST_NEXT(am_next, am_list) ==
1305 				    &marker_next);
1306 			}
1307 			LIST_REMOVE(&marker_prev, am_list);
1308 			LIST_REMOVE(&marker_next, am_list);
1309 			continue;
1310 		}
1311 
1312 		simple_unlock(&amap_list_lock);
1313 
1314 		if (am->am_nused <= 0) {
1315 			amap_unlock(am);
1316 			goto next;
1317 		}
1318 
1319 		for (i = 0; i < am->am_nused; i++) {
1320 			int slot;
1321 			int swslot;
1322 			struct vm_anon *anon;
1323 
1324 			slot = am->am_slots[i];
1325 			anon = am->am_anon[slot];
1326 			simple_lock(&anon->an_lock);
1327 
1328 			swslot = anon->an_swslot;
1329 			if (swslot < startslot || endslot <= swslot) {
1330 				simple_unlock(&anon->an_lock);
1331 				continue;
1332 			}
1333 
1334 			am->am_flags |= AMAP_SWAPOFF;
1335 			amap_unlock(am);
1336 
1337 			rv = uvm_anon_pagein(anon);
1338 
1339 			amap_lock(am);
1340 			am->am_flags &= ~AMAP_SWAPOFF;
1341 			if (amap_refs(am) == 0) {
1342 				amap_wipeout(am);
1343 				am = NULL;
1344 				break;
1345 			}
1346 			if (rv) {
1347 				break;
1348 			}
1349 			i = 0;
1350 		}
1351 
1352 		if (am) {
1353 			amap_unlock(am);
1354 		}
1355 
1356 next:
1357 		simple_lock(&amap_list_lock);
1358 		KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
1359 		    LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
1360 		    &marker_next);
1361 		am_next = LIST_NEXT(&marker_next, am_list);
1362 		LIST_REMOVE(&marker_prev, am_list);
1363 		LIST_REMOVE(&marker_next, am_list);
1364 	}
1365 	simple_unlock(&amap_list_lock);
1366 	PRELE(l);
1367 
1368 	return rv;
1369 }
1370 
1371 #endif /* defined(VMSWAP) */
1372 
1373 /*
1374  * amap_lookup: look up a page in an amap
1375  *
1376  * => amap should be locked by caller.
1377  */
1378 struct vm_anon *
1379 amap_lookup(struct vm_aref *aref, vaddr_t offset)
1380 {
1381 	int slot;
1382 	struct vm_amap *amap = aref->ar_amap;
1383 	UVMHIST_FUNC("amap_lookup"); UVMHIST_CALLED(maphist);
1384 	LOCK_ASSERT(simple_lock_held(&amap->am_l));
1385 
1386 	AMAP_B2SLOT(slot, offset);
1387 	slot += aref->ar_pageoff;
1388 
1389 	if (slot >= amap->am_nslot)
1390 		panic("amap_lookup: offset out of range");
1391 
1392 	UVMHIST_LOG(maphist, "<- done (amap=0x%x, offset=0x%x, result=0x%x)",
1393 	    amap, offset, amap->am_anon[slot], 0);
1394 	return(amap->am_anon[slot]);
1395 }
1396 
1397 /*
1398  * amap_lookups: look up a range of pages in an amap
1399  *
1400  * => amap should be locked by caller.
1401  * => XXXCDC: this interface is biased toward array-based amaps.  fix.
1402  */
1403 void
1404 amap_lookups(struct vm_aref *aref, vaddr_t offset, struct vm_anon **anons,
1405     int npages)
1406 {
1407 	int slot;
1408 	struct vm_amap *amap = aref->ar_amap;
1409 	UVMHIST_FUNC("amap_lookups"); UVMHIST_CALLED(maphist);
1410 	LOCK_ASSERT(simple_lock_held(&amap->am_l));
1411 
1412 	AMAP_B2SLOT(slot, offset);
1413 	slot += aref->ar_pageoff;
1414 
1415 	UVMHIST_LOG(maphist, "  slot=%d, npages=%d, nslot=%d", slot, npages,
1416 		amap->am_nslot, 0);
1417 
1418 	if ((slot + (npages - 1)) >= amap->am_nslot)
1419 		panic("amap_lookups: offset out of range");
1420 
1421 	memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
1422 
1423 	UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
1424 	return;
1425 }
1426 
1427 /*
1428  * amap_add: add (or replace) a page to an amap
1429  *
1430  * => caller must lock amap.
1431  * => if (replace) caller must lock anon because we might have to call
1432  *	pmap_page_protect on the anon's page.
1433  */
1434 void
1435 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
1436     boolean_t replace)
1437 {
1438 	int slot;
1439 	struct vm_amap *amap = aref->ar_amap;
1440 	UVMHIST_FUNC("amap_add"); UVMHIST_CALLED(maphist);
1441 	LOCK_ASSERT(simple_lock_held(&amap->am_l));
1442 
1443 	AMAP_B2SLOT(slot, offset);
1444 	slot += aref->ar_pageoff;
1445 
1446 	if (slot >= amap->am_nslot)
1447 		panic("amap_add: offset out of range");
1448 
1449 	if (replace) {
1450 
1451 		if (amap->am_anon[slot] == NULL)
1452 			panic("amap_add: replacing null anon");
1453 		if (amap->am_anon[slot]->an_page != NULL &&
1454 		    (amap->am_flags & AMAP_SHARED) != 0) {
1455 			pmap_page_protect(amap->am_anon[slot]->an_page,
1456 			    VM_PROT_NONE);
1457 			/*
1458 			 * XXX: suppose page is supposed to be wired somewhere?
1459 			 */
1460 		}
1461 	} else {   /* !replace */
1462 		if (amap->am_anon[slot] != NULL)
1463 			panic("amap_add: slot in use");
1464 
1465 		amap->am_bckptr[slot] = amap->am_nused;
1466 		amap->am_slots[amap->am_nused] = slot;
1467 		amap->am_nused++;
1468 	}
1469 	amap->am_anon[slot] = anon;
1470 	UVMHIST_LOG(maphist,
1471 	    "<- done (amap=0x%x, offset=0x%x, anon=0x%x, rep=%d)",
1472 	    amap, offset, anon, replace);
1473 }
1474 
1475 /*
1476  * amap_unadd: remove a page from an amap
1477  *
1478  * => caller must lock amap
1479  */
1480 void
1481 amap_unadd(struct vm_aref *aref, vaddr_t offset)
1482 {
1483 	int ptr, slot;
1484 	struct vm_amap *amap = aref->ar_amap;
1485 	UVMHIST_FUNC("amap_unadd"); UVMHIST_CALLED(maphist);
1486 	LOCK_ASSERT(simple_lock_held(&amap->am_l));
1487 
1488 	AMAP_B2SLOT(slot, offset);
1489 	slot += aref->ar_pageoff;
1490 
1491 	if (slot >= amap->am_nslot)
1492 		panic("amap_unadd: offset out of range");
1493 
1494 	if (amap->am_anon[slot] == NULL)
1495 		panic("amap_unadd: nothing there");
1496 
1497 	amap->am_anon[slot] = NULL;
1498 	ptr = amap->am_bckptr[slot];
1499 
1500 	if (ptr != (amap->am_nused - 1)) {	/* swap to keep slots contig? */
1501 		amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1];
1502 		amap->am_bckptr[amap->am_slots[ptr]] = ptr;	/* back link */
1503 	}
1504 	amap->am_nused--;
1505 	UVMHIST_LOG(maphist, "<- done (amap=0x%x, slot=0x%x)", amap, slot,0, 0);
1506 }
1507 
1508 /*
1509  * amap_ref: gain a reference to an amap
1510  *
1511  * => amap must not be locked (we will lock)
1512  * => "offset" and "len" are in units of pages
1513  * => called at fork time to gain the child's reference
1514  */
1515 void
1516 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
1517 {
1518 	UVMHIST_FUNC("amap_ref"); UVMHIST_CALLED(maphist);
1519 
1520 	amap_lock(amap);
1521 	if (flags & AMAP_SHARED)
1522 		amap->am_flags |= AMAP_SHARED;
1523 #ifdef UVM_AMAP_PPREF
1524 	if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 &&
1525 	    len != amap->am_nslot)
1526 		amap_pp_establish(amap, offset);
1527 #endif
1528 	amap->am_ref++;
1529 #ifdef UVM_AMAP_PPREF
1530 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1531 		if (flags & AMAP_REFALL)
1532 			amap_pp_adjref(amap, 0, amap->am_nslot, 1);
1533 		else
1534 			amap_pp_adjref(amap, offset, len, 1);
1535 	}
1536 #endif
1537 	amap_unlock(amap);
1538 	UVMHIST_LOG(maphist,"<- done!  amap=0x%x", amap, 0, 0, 0);
1539 }
1540 
1541 /*
1542  * amap_unref: remove a reference to an amap
1543  *
1544  * => caller must remove all pmap-level references to this amap before
1545  *	dropping the reference
1546  * => called from uvm_unmap_detach [only]  ... note that entry is no
1547  *	longer part of a map and thus has no need for locking
1548  * => amap must be unlocked (we will lock it).
1549  */
1550 void
1551 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, boolean_t all)
1552 {
1553 	UVMHIST_FUNC("amap_unref"); UVMHIST_CALLED(maphist);
1554 
1555 	/*
1556 	 * lock it
1557 	 */
1558 	amap_lock(amap);
1559 	UVMHIST_LOG(maphist,"  amap=0x%x  refs=%d, nused=%d",
1560 	    amap, amap->am_ref, amap->am_nused, 0);
1561 
1562 	KASSERT(amap_refs(amap) > 0);
1563 
1564 	/*
1565 	 * if we are the last reference, free the amap and return.
1566 	 */
1567 
1568 	amap->am_ref--;
1569 
1570 	if (amap_refs(amap) == 0) {
1571 		amap_wipeout(amap);	/* drops final ref and frees */
1572 		UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
1573 		return;			/* no need to unlock */
1574 	}
1575 
1576 	/*
1577 	 * otherwise just drop the reference count(s)
1578 	 */
1579 
1580 	if (amap_refs(amap) == 1 && (amap->am_flags & AMAP_SHARED) != 0)
1581 		amap->am_flags &= ~AMAP_SHARED;	/* clear shared flag */
1582 #ifdef UVM_AMAP_PPREF
1583 	if (amap->am_ppref == NULL && all == 0 && len != amap->am_nslot)
1584 		amap_pp_establish(amap, offset);
1585 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1586 		if (all)
1587 			amap_pp_adjref(amap, 0, amap->am_nslot, -1);
1588 		else
1589 			amap_pp_adjref(amap, offset, len, -1);
1590 	}
1591 #endif
1592 	amap_unlock(amap);
1593 
1594 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1595 }
1596 
1597