xref: /openbsd-src/sys/uvm/uvm_amap.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*	$OpenBSD: uvm_amap.c,v 1.46 2011/07/03 18:34:14 oga Exp $	*/
2 /*	$NetBSD: uvm_amap.c,v 1.27 2000/11/25 06:27:59 chs Exp $	*/
3 
4 /*
5  *
6  * Copyright (c) 1997 Charles D. Cranor and Washington University.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed by Charles D. Cranor and
20  *      Washington University.
21  * 4. The name of the author may not be used to endorse or promote products
22  *    derived from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /*
37  * uvm_amap.c: amap operations
38  */
39 
40 /*
41  * this file contains functions that perform operations on amaps.  see
42  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/malloc.h>
49 #include <sys/kernel.h>
50 #include <sys/pool.h>
51 
52 #include <uvm/uvm.h>
53 #include <uvm/uvm_swap.h>
54 
55 /*
56  * pool for allocation of vm_map structures.  note that the pool has
57  * its own simplelock for its protection.  also note that in order to
58  * avoid an endless loop, the amap pool's allocator cannot allocate
59  * memory from an amap (it currently goes through the kernel uobj, so
60  * we are ok).
61  */
62 
63 struct pool uvm_amap_pool;
64 
65 LIST_HEAD(, vm_amap) amap_list;
66 
67 #define MALLOC_SLOT_UNIT (2 * sizeof(int) + sizeof(struct vm_anon *))
68 
69 /*
70  * local functions
71  */
72 
73 static struct vm_amap *amap_alloc1(int, int, int);
74 static __inline void amap_list_insert(struct vm_amap *);
75 static __inline void amap_list_remove(struct vm_amap *);
76 
77 static __inline void
78 amap_list_insert(struct vm_amap *amap)
79 {
80 	LIST_INSERT_HEAD(&amap_list, amap, am_list);
81 }
82 
83 static __inline void
84 amap_list_remove(struct vm_amap *amap)
85 {
86 	LIST_REMOVE(amap, am_list);
87 }
88 
89 #ifdef UVM_AMAP_PPREF
90 /*
91  * what is ppref?   ppref is an _optional_ amap feature which is used
92  * to keep track of reference counts on a per-page basis.  it is enabled
93  * when UVM_AMAP_PPREF is defined.
94  *
95  * when enabled, an array of ints is allocated for the pprefs.  this
96  * array is allocated only when a partial reference is added to the
97  * map (either by unmapping part of the amap, or gaining a reference
98  * to only a part of an amap).  if the malloc of the array fails
99  * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
100  * that we tried to do ppref's but couldn't alloc the array so just
101  * give up (after all, this is an optional feature!).
102  *
103  * the array is divided into page sized "chunks."   for chunks of length 1,
104  * the chunk reference count plus one is stored in that chunk's slot.
105  * for chunks of length > 1 the first slot contains (the reference count
106  * plus one) * -1.    [the negative value indicates that the length is
107  * greater than one.]   the second slot of the chunk contains the length
108  * of the chunk.   here is an example:
109  *
110  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
111  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
112  *              <----------><-><----><-------><----><-><------->
113  * (x = don't care)
114  *
115  * this allows us to allow one int to contain the ref count for the whole
116  * chunk.    note that the "plus one" part is needed because a reference
117  * count of zero is neither positive or negative (need a way to tell
118  * if we've got one zero or a bunch of them).
119  *
120  * here are some in-line functions to help us.
121  */
122 
123 static __inline void pp_getreflen(int *, int, int *, int *);
124 static __inline void pp_setreflen(int *, int, int, int);
125 
126 /*
127  * pp_getreflen: get the reference and length for a specific offset
128  *
129  * => ppref's amap must be locked
130  */
131 static __inline void
132 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
133 {
134 
135 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
136 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
137 		*lenp = 1;
138 	} else {
139 		*refp = (ppref[offset] * -1) - 1;
140 		*lenp = ppref[offset+1];
141 	}
142 }
143 
144 /*
145  * pp_setreflen: set the reference and length for a specific offset
146  *
147  * => ppref's amap must be locked
148  */
149 static __inline void
150 pp_setreflen(int *ppref, int offset, int ref, int len)
151 {
152 	if (len == 1) {
153 		ppref[offset] = ref + 1;
154 	} else {
155 		ppref[offset] = (ref + 1) * -1;
156 		ppref[offset+1] = len;
157 	}
158 }
159 #endif
160 
161 /*
162  * amap_init: called at boot time to init global amap data structures
163  */
164 
165 void
166 amap_init(void)
167 {
168 	/*
169 	 * Initialize the vm_amap pool.
170 	 */
171 	pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0,
172 	    "amappl", &pool_allocator_nointr);
173 	pool_sethiwat(&uvm_amap_pool, 4096);
174 }
175 
176 /*
177  * amap_alloc1: internal function that allocates an amap, but does not
178  *	init the overlay.
179  *
180  * => lock on returned amap is init'd
181  */
182 static inline struct vm_amap *
183 amap_alloc1(int slots, int padslots, int waitf)
184 {
185 	struct vm_amap *amap;
186 	int totalslots;
187 
188 	amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK
189 	    : PR_NOWAIT);
190 	if (amap == NULL)
191 		return(NULL);
192 
193 	totalslots = malloc_roundup((slots + padslots) * MALLOC_SLOT_UNIT) /
194 	    MALLOC_SLOT_UNIT;
195 	amap->am_ref = 1;
196 	amap->am_flags = 0;
197 #ifdef UVM_AMAP_PPREF
198 	amap->am_ppref = NULL;
199 #endif
200 	amap->am_maxslot = totalslots;
201 	amap->am_nslot = slots;
202 	amap->am_nused = 0;
203 
204 	amap->am_slots = malloc(totalslots * MALLOC_SLOT_UNIT, M_UVMAMAP,
205 	    waitf);
206 	if (amap->am_slots == NULL)
207 		goto fail1;
208 
209 	amap->am_bckptr = (int *)(((char *)amap->am_slots) + totalslots *
210 	    sizeof(int));
211 	amap->am_anon = (struct vm_anon **)(((char *)amap->am_bckptr) +
212 	    totalslots * sizeof(int));
213 
214 	return(amap);
215 
216 fail1:
217 	pool_put(&uvm_amap_pool, amap);
218 	return (NULL);
219 }
220 
221 /*
222  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
223  *
224  * => caller should ensure sz is a multiple of PAGE_SIZE
225  * => reference count to new amap is set to one
226  * => new amap is returned unlocked
227  */
228 
229 struct vm_amap *
230 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
231 {
232 	struct vm_amap *amap;
233 	int slots, padslots;
234 
235 	AMAP_B2SLOT(slots, sz);		/* load slots */
236 	AMAP_B2SLOT(padslots, padsz);
237 
238 	amap = amap_alloc1(slots, padslots, waitf);
239 	if (amap) {
240 		memset(amap->am_anon, 0,
241 		    amap->am_maxslot * sizeof(struct vm_anon *));
242 		amap_list_insert(amap);
243 	}
244 
245 	return(amap);
246 }
247 
248 
249 /*
250  * amap_free: free an amap
251  *
252  * => the amap must be locked (mainly for simplelock accounting)
253  * => the amap should have a zero reference count and be empty
254  */
255 void
256 amap_free(struct vm_amap *amap)
257 {
258 
259 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
260 	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
261 
262 	free(amap->am_slots, M_UVMAMAP);
263 #ifdef UVM_AMAP_PPREF
264 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
265 		free(amap->am_ppref, M_UVMAMAP);
266 #endif
267 	pool_put(&uvm_amap_pool, amap);
268 
269 }
270 
271 /*
272  * amap_extend: extend the size of an amap (if needed)
273  *
274  * => called from uvm_map when we want to extend an amap to cover
275  *    a new mapping (rather than allocate a new one)
276  * => amap should be unlocked (we will lock it)
277  * => to safely extend an amap it should have a reference count of
278  *    one (thus it can't be shared)
279  * => XXXCDC: support padding at this level?
280  */
281 int
282 amap_extend(struct vm_map_entry *entry, vsize_t addsize)
283 {
284 	struct vm_amap *amap = entry->aref.ar_amap;
285 	int slotoff = entry->aref.ar_pageoff;
286 	int slotmapped, slotadd, slotneed, slotalloc;
287 #ifdef UVM_AMAP_PPREF
288 	int *newppref, *oldppref;
289 #endif
290 	u_int *newsl, *newbck, *oldsl, *oldbck;
291 	struct vm_anon **newover, **oldover;
292 	int slotadded;
293 
294 	/*
295 	 * first, determine how many slots we need in the amap.  don't
296 	 * forget that ar_pageoff could be non-zero: this means that
297 	 * there are some unused slots before us in the amap.
298 	 */
299 
300 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
301 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
302 	slotneed = slotoff + slotmapped + slotadd;
303 
304 	/*
305 	 * case 1: we already have enough slots in the map and thus
306 	 * only need to bump the reference counts on the slots we are
307 	 * adding.
308 	 */
309 
310 	if (amap->am_nslot >= slotneed) {
311 #ifdef UVM_AMAP_PPREF
312 		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
313 			amap_pp_adjref(amap, slotoff + slotmapped, slotadd, 1);
314 		}
315 #endif
316 		return (0);
317 	}
318 
319 	/*
320 	 * case 2: we pre-allocated slots for use and we just need to
321 	 * bump nslot up to take account for these slots.
322 	 */
323 
324 	if (amap->am_maxslot >= slotneed) {
325 #ifdef UVM_AMAP_PPREF
326 		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
327 			if ((slotoff + slotmapped) < amap->am_nslot)
328 				amap_pp_adjref(amap, slotoff + slotmapped,
329 				    (amap->am_nslot - (slotoff + slotmapped)),
330 				    1);
331 			pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
332 			   slotneed - amap->am_nslot);
333 		}
334 #endif
335 		amap->am_nslot = slotneed;
336 
337 		/*
338 		 * no need to zero am_anon since that was done at
339 		 * alloc time and we never shrink an allocation.
340 		 */
341 		return (0);
342 	}
343 
344 	/*
345 	 * case 3: we need to malloc a new amap and copy all the amap
346 	 * data over from old amap to the new one.
347 	 *
348 	 * XXXCDC: could we take advantage of a kernel realloc()?
349 	 */
350 
351 	if (slotneed >= UVM_AMAP_LARGE)
352 		return E2BIG;
353 
354 	slotalloc = malloc_roundup(slotneed * MALLOC_SLOT_UNIT) /
355 	    MALLOC_SLOT_UNIT;
356 #ifdef UVM_AMAP_PPREF
357 	newppref = NULL;
358 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
359 		newppref = malloc(slotalloc *sizeof(int), M_UVMAMAP,
360 		    M_WAITOK | M_CANFAIL);
361 		if (newppref == NULL) {
362 			/* give up if malloc fails */
363 			free(amap->am_ppref, M_UVMAMAP);
364 			amap->am_ppref = PPREF_NONE;
365 		}
366 	}
367 #endif
368 	newsl = malloc(slotalloc * MALLOC_SLOT_UNIT, M_UVMAMAP,
369 	    M_WAITOK | M_CANFAIL);
370 	if (newsl == NULL) {
371 #ifdef UVM_AMAP_PPREF
372 		if (newppref != NULL) {
373 			free(newppref, M_UVMAMAP);
374 		}
375 #endif
376 		return (ENOMEM);
377 	}
378 	newbck = (int *)(((char *)newsl) + slotalloc * sizeof(int));
379 	newover = (struct vm_anon **)(((char *)newbck) + slotalloc *
380 	    sizeof(int));
381 	KASSERT(amap->am_maxslot < slotneed);
382 
383 	/*
384 	 * now copy everything over to new malloc'd areas...
385 	 */
386 
387 	slotadded = slotalloc - amap->am_nslot;
388 
389 	/* do am_slots */
390 	oldsl = amap->am_slots;
391 	memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
392 	amap->am_slots = newsl;
393 
394 	/* do am_anon */
395 	oldover = amap->am_anon;
396 	memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot);
397 	memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) *
398 	    slotadded);
399 	amap->am_anon = newover;
400 
401 	/* do am_bckptr */
402 	oldbck = amap->am_bckptr;
403 	memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
404 	memset(newbck + amap->am_nslot, 0, sizeof(int) * slotadded); /* XXX: needed? */
405 	amap->am_bckptr = newbck;
406 
407 #ifdef UVM_AMAP_PPREF
408 	/* do ppref */
409 	oldppref = amap->am_ppref;
410 	if (newppref) {
411 		memcpy(newppref, oldppref, sizeof(int) * amap->am_nslot);
412 		memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded);
413 		amap->am_ppref = newppref;
414 		if ((slotoff + slotmapped) < amap->am_nslot)
415 			amap_pp_adjref(amap, slotoff + slotmapped,
416 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
417 		pp_setreflen(newppref, amap->am_nslot, 1,
418 		    slotneed - amap->am_nslot);
419 	}
420 #endif
421 
422 	/* update master values */
423 	amap->am_nslot = slotneed;
424 	amap->am_maxslot = slotalloc;
425 
426 	/* and free */
427 	free(oldsl, M_UVMAMAP);
428 #ifdef UVM_AMAP_PPREF
429 	if (oldppref && oldppref != PPREF_NONE)
430 		free(oldppref, M_UVMAMAP);
431 #endif
432 	return (0);
433 }
434 
435 /*
436  * amap_share_protect: change protection of anons in a shared amap
437  *
438  * for shared amaps, given the current data structure layout, it is
439  * not possible for us to directly locate all maps referencing the
440  * shared anon (to change the protection).  in order to protect data
441  * in shared maps we use pmap_page_protect().  [this is useful for IPC
442  * mechanisms like map entry passing that may want to write-protect
443  * all mappings of a shared amap.]  we traverse am_anon or am_slots
444  * depending on the current state of the amap.
445  *
446  * => entry's map and amap must be locked by the caller
447  */
448 void
449 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
450 {
451 	struct vm_amap *amap = entry->aref.ar_amap;
452 	int slots, lcv, slot, stop;
453 
454 	AMAP_B2SLOT(slots, (entry->end - entry->start));
455 	stop = entry->aref.ar_pageoff + slots;
456 
457 	if (slots < amap->am_nused) {
458 		/* cheaper to traverse am_anon */
459 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
460 			if (amap->am_anon[lcv] == NULL)
461 				continue;
462 			if (amap->am_anon[lcv]->an_page != NULL)
463 				pmap_page_protect(amap->am_anon[lcv]->an_page,
464 						  prot);
465 		}
466 		return;
467 	}
468 
469 	/* cheaper to traverse am_slots */
470 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
471 		slot = amap->am_slots[lcv];
472 		if (slot < entry->aref.ar_pageoff || slot >= stop)
473 			continue;
474 		if (amap->am_anon[slot]->an_page != NULL)
475 			pmap_page_protect(amap->am_anon[slot]->an_page, prot);
476 	}
477 	return;
478 }
479 
480 /*
481  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
482  *
483  * => called from amap_unref when the final reference to an amap is
484  *	discarded (i.e. when reference count == 1)
485  * => the amap should be locked (by the caller)
486  */
487 
488 void
489 amap_wipeout(struct vm_amap *amap)
490 {
491 	int lcv, slot;
492 	struct vm_anon *anon;
493 
494 	KASSERT(amap->am_ref == 0);
495 
496 	if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
497 		/*
498 		 * amap_swap_off will call us again.
499 		 */
500 		return;
501 	}
502 	amap_list_remove(amap);
503 
504 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
505 		int refs;
506 
507 		slot = amap->am_slots[lcv];
508 		anon = amap->am_anon[slot];
509 
510 		if (anon == NULL || anon->an_ref == 0)
511 			panic("amap_wipeout: corrupt amap");
512 
513 		simple_lock(&anon->an_lock); /* lock anon */
514 
515 		refs = --anon->an_ref;
516 		simple_unlock(&anon->an_lock);
517 		if (refs == 0) {
518 			/*
519 			 * we had the last reference to a vm_anon. free it.
520 			 */
521 			uvm_anfree(anon);
522 		}
523 	}
524 
525 	/*
526 	 * now we free the map
527 	 */
528 
529 	amap->am_ref = 0;	/* ... was one */
530 	amap->am_nused = 0;
531 	amap_free(amap);	/* will unlock and free amap */
532 }
533 
534 /*
535  * amap_copy: ensure that a map entry's "needs_copy" flag is false
536  *	by copying the amap if necessary.
537  *
538  * => an entry with a null amap pointer will get a new (blank) one.
539  * => the map that the map entry belongs to must be locked by caller.
540  * => the amap currently attached to "entry" (if any) must be unlocked.
541  * => if canchunk is true, then we may clip the entry into a chunk
542  * => "startva" and "endva" are used only if canchunk is true.  they are
543  *     used to limit chunking (e.g. if you have a large space that you
544  *     know you are going to need to allocate amaps for, there is no point
545  *     in allowing that to be chunked)
546  */
547 
548 void
549 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf,
550     boolean_t canchunk, vaddr_t startva, vaddr_t endva)
551 {
552 	struct vm_amap *amap, *srcamap;
553 	int slots, lcv;
554 	vaddr_t chunksize;
555 
556 	/*
557 	 * is there a map to copy?   if not, create one from scratch.
558 	 */
559 
560 	if (entry->aref.ar_amap == NULL) {
561 
562 		/*
563 		 * check to see if we have a large amap that we can
564 		 * chunk.  we align startva/endva to chunk-sized
565 		 * boundaries and then clip to them.
566 		 */
567 
568 		if (canchunk && atop(entry->end - entry->start) >=
569 		    UVM_AMAP_LARGE) {
570 			/* convert slots to bytes */
571 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
572 			startva = (startva / chunksize) * chunksize;
573 			endva = roundup(endva, chunksize);
574 			UVM_MAP_CLIP_START(map, entry, startva);
575 			/* watch out for endva wrap-around! */
576 			if (endva >= startva)
577 				UVM_MAP_CLIP_END(map, entry, endva);
578 		}
579 
580 		entry->aref.ar_pageoff = 0;
581 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
582 		    waitf);
583 		if (entry->aref.ar_amap != NULL)
584 			entry->etype &= ~UVM_ET_NEEDSCOPY;
585 		return;
586 	}
587 
588 	/*
589 	 * first check and see if we are the only map entry
590 	 * referencing the amap we currently have.  if so, then we can
591 	 * just take it over rather than copying it.  note that we are
592 	 * reading am_ref with the amap unlocked... the value can only
593 	 * be one if we have the only reference to the amap (via our
594 	 * locked map).  if we are greater than one we fall through to
595 	 * the next case (where we double check the value).
596 	 */
597 
598 	if (entry->aref.ar_amap->am_ref == 1) {
599 		entry->etype &= ~UVM_ET_NEEDSCOPY;
600 		return;
601 	}
602 
603 	/*
604 	 * looks like we need to copy the map.
605 	 */
606 
607 	AMAP_B2SLOT(slots, entry->end - entry->start);
608 	amap = amap_alloc1(slots, 0, waitf);
609 	if (amap == NULL)
610 		return;
611 	srcamap = entry->aref.ar_amap;
612 
613 	/*
614 	 * need to double check reference count now that we've got the
615 	 * src amap locked down.  the reference count could have
616 	 * changed while we were in malloc.  if the reference count
617 	 * dropped down to one we take over the old map rather than
618 	 * copying the amap.
619 	 */
620 
621 	if (srcamap->am_ref == 1) {		/* take it over? */
622 		entry->etype &= ~UVM_ET_NEEDSCOPY;
623 		amap->am_ref--;		/* drop final reference to map */
624 		amap_free(amap);	/* dispose of new (unused) amap */
625 		return;
626 	}
627 
628 	/*
629 	 * we must copy it now.
630 	 */
631 
632 	for (lcv = 0 ; lcv < slots; lcv++) {
633 		amap->am_anon[lcv] =
634 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
635 		if (amap->am_anon[lcv] == NULL)
636 			continue;
637 		simple_lock(&amap->am_anon[lcv]->an_lock);
638 		amap->am_anon[lcv]->an_ref++;
639 		simple_unlock(&amap->am_anon[lcv]->an_lock);
640 		amap->am_bckptr[lcv] = amap->am_nused;
641 		amap->am_slots[amap->am_nused] = lcv;
642 		amap->am_nused++;
643 	}
644 	memset(&amap->am_anon[lcv], 0,
645 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
646 
647 	/*
648 	 * drop our reference to the old amap (srcamap) and unlock.
649 	 * we know that the reference count on srcamap is greater than
650 	 * one (we checked above), so there is no way we could drop
651 	 * the count to zero.  [and no need to worry about freeing it]
652 	 */
653 
654 	srcamap->am_ref--;
655 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
656 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
657 #ifdef UVM_AMAP_PPREF
658 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
659 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
660 		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
661 	}
662 #endif
663 
664 	/*
665 	 * install new amap.
666 	 */
667 
668 	entry->aref.ar_pageoff = 0;
669 	entry->aref.ar_amap = amap;
670 	entry->etype &= ~UVM_ET_NEEDSCOPY;
671 
672 	amap_list_insert(amap);
673 }
674 
675 /*
676  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
677  *
678  *	called during fork(2) when the parent process has a wired map
679  *	entry.   in that case we want to avoid write-protecting pages
680  *	in the parent's map (e.g. like what you'd do for a COW page)
681  *	so we resolve the COW here.
682  *
683  * => assume parent's entry was wired, thus all pages are resident.
684  * => assume pages that are loaned out (loan_count) are already mapped
685  *	read-only in all maps, and thus no need for us to worry about them
686  * => assume both parent and child vm_map's are locked
687  * => caller passes child's map/entry in to us
688  * => if we run out of memory we will unlock the amap and sleep _with_ the
689  *	parent and child vm_map's locked(!).    we have to do this since
690  *	we are in the middle of a fork(2) and we can't let the parent
691  *	map change until we are done copying all the map entries.
692  * => XXXCDC: out of memory should cause fork to fail, but there is
693  *	currently no easy way to do this (needs fix)
694  * => page queues must be unlocked (we may lock them)
695  */
696 
697 void
698 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
699 {
700 	struct vm_amap *amap = entry->aref.ar_amap;
701 	int lcv, slot;
702 	struct vm_anon *anon, *nanon;
703 	struct vm_page *pg, *npg;
704 
705 	/*
706 	 * note that if we unlock the amap then we must ReStart the "lcv" for
707 	 * loop because some other process could reorder the anon's in the
708 	 * am_anon[] array on us while the lock is dropped.
709 	 */
710 ReStart:
711 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
712 
713 		/*
714 		 * get the page
715 		 */
716 
717 		slot = amap->am_slots[lcv];
718 		anon = amap->am_anon[slot];
719 		simple_lock(&anon->an_lock);
720 		pg = anon->an_page;
721 
722 		/*
723 		 * page must be resident since parent is wired
724 		 */
725 
726 		if (pg == NULL)
727 		    panic("amap_cow_now: non-resident wired page in anon %p",
728 			anon);
729 
730 		/*
731 		 * if the anon ref count is one and the page is not loaned,
732 		 * then we are safe (the child has exclusive access to the
733 		 * page).  if the page is loaned, then it must already be
734 		 * mapped read-only.
735 		 *
736 		 * we only need to get involved when these are not true.
737 		 * [note: if loan_count == 0, then the anon must own the page]
738 		 */
739 
740 		if (anon->an_ref > 1 && pg->loan_count == 0) {
741 
742 			/*
743 			 * if the page is busy then we have to unlock, wait for
744 			 * it and then restart.
745 			 */
746 			if (pg->pg_flags & PG_BUSY) {
747 				atomic_setbits_int(&pg->pg_flags, PG_WANTED);
748 				UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE,
749 				    "cownow", 0);
750 				goto ReStart;
751 			}
752 
753 			/*
754 			 * ok, time to do a copy-on-write to a new anon
755 			 */
756 			nanon = uvm_analloc();
757 			if (nanon) {
758 				npg = uvm_pagealloc(NULL, 0, nanon, 0);
759 			} else
760 				npg = NULL;	/* XXX: quiet gcc warning */
761 
762 			if (nanon == NULL || npg == NULL) {
763 				/* out of memory */
764 				/*
765 				 * XXXCDC: we should cause fork to fail, but
766 				 * we can't ...
767 				 */
768 				if (nanon) {
769 					simple_lock(&nanon->an_lock);
770 					uvm_anfree(nanon);
771 				}
772 				simple_unlock(&anon->an_lock);
773 				uvm_wait("cownowpage");
774 				goto ReStart;
775 			}
776 
777 			/*
778 			 * got it... now we can copy the data and replace anon
779 			 * with our new one...
780 			 */
781 			uvm_pagecopy(pg, npg);		/* old -> new */
782 			anon->an_ref--;			/* can't drop to zero */
783 			amap->am_anon[slot] = nanon;	/* replace */
784 
785 			/*
786 			 * drop PG_BUSY on new page ... since we have had it's
787 			 * owner locked the whole time it can't be
788 			 * PG_RELEASED | PG_WANTED.
789 			 */
790 			atomic_clearbits_int(&npg->pg_flags, PG_BUSY|PG_FAKE);
791 			UVM_PAGE_OWN(npg, NULL);
792 			uvm_lock_pageq();
793 			uvm_pageactivate(npg);
794 			uvm_unlock_pageq();
795 		}
796 
797 		simple_unlock(&anon->an_lock);
798 		/*
799 		 * done with this anon, next ...!
800 		 */
801 
802 	}	/* end of 'for' loop */
803 }
804 
805 /*
806  * amap_splitref: split a single reference into two separate references
807  *
808  * => called from uvm_map's clip routines
809  * => origref's map should be locked
810  * => origref->ar_amap should be unlocked (we will lock)
811  */
812 void
813 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
814 {
815 	int leftslots;
816 
817 	AMAP_B2SLOT(leftslots, offset);
818 	if (leftslots == 0)
819 		panic("amap_splitref: split at zero offset");
820 
821 	/*
822 	 * now: amap is locked and we have a valid am_mapped array.
823 	 */
824 
825 	if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
826 		panic("amap_splitref: map size check failed");
827 
828 #ifdef UVM_AMAP_PPREF
829         /*
830 	 * establish ppref before we add a duplicate reference to the amap
831 	 */
832 	if (origref->ar_amap->am_ppref == NULL)
833 		amap_pp_establish(origref->ar_amap);
834 #endif
835 
836 	splitref->ar_amap = origref->ar_amap;
837 	splitref->ar_amap->am_ref++;		/* not a share reference */
838 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
839 }
840 
841 #ifdef UVM_AMAP_PPREF
842 
843 /*
844  * amap_pp_establish: add a ppref array to an amap, if possible
845  *
846  * => amap locked by caller
847  */
848 void
849 amap_pp_establish(struct vm_amap *amap)
850 {
851 
852 	amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot,
853 	    M_UVMAMAP, M_NOWAIT|M_ZERO);
854 
855 	/*
856 	 * if we fail then we just won't use ppref for this amap
857 	 */
858 	if (amap->am_ppref == NULL) {
859 		amap->am_ppref = PPREF_NONE;	/* not using it */
860 		return;
861 	}
862 
863 	/*
864 	 * init ppref
865 	 */
866 	pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot);
867 }
868 
869 /*
870  * amap_pp_adjref: adjust reference count to a part of an amap using the
871  * per-page reference count array.
872  *
873  * => map and amap locked by caller
874  * => caller must check that ppref != PPREF_NONE before calling
875  */
876 void
877 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
878 {
879  	int stopslot, *ppref, lcv, prevlcv;
880  	int ref, len, prevref, prevlen;
881 
882 	stopslot = curslot + slotlen;
883 	ppref = amap->am_ppref;
884  	prevlcv = 0;
885 
886 	/*
887  	 * first advance to the correct place in the ppref array,
888  	 * fragment if needed.
889 	 */
890 
891 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
892 		pp_getreflen(ppref, lcv, &ref, &len);
893 		if (lcv + len > curslot) {     /* goes past start? */
894 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
895 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
896 			len = curslot - lcv;   /* new length of entry @ lcv */
897 		}
898 		prevlcv = lcv;
899 	}
900 	if (lcv != 0)
901 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
902 	else {
903 		/* Ensure that the "prevref == ref" test below always
904 		 * fails, since we're starting from the beginning of
905 		 * the ppref array; that is, there is no previous
906 		 * chunk.
907 		 */
908 		prevref = -1;
909 		prevlen = 0;
910 	}
911 
912 	/*
913 	 * now adjust reference counts in range.  merge the first
914 	 * changed entry with the last unchanged entry if possible.
915 	 */
916 
917 	if (lcv != curslot)
918 		panic("amap_pp_adjref: overshot target");
919 
920 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
921 		pp_getreflen(ppref, lcv, &ref, &len);
922 		if (lcv + len > stopslot) {     /* goes past end? */
923 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
924 			pp_setreflen(ppref, stopslot, ref,
925 			    len - (stopslot - lcv));
926 			len = stopslot - lcv;
927 		}
928 		ref += adjval;
929 		if (ref < 0)
930 			panic("amap_pp_adjref: negative reference count");
931 		if (lcv == prevlcv + prevlen && ref == prevref) {
932 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
933 		} else {
934 			pp_setreflen(ppref, lcv, ref, len);
935 		}
936 		if (ref == 0)
937 			amap_wiperange(amap, lcv, len);
938 	}
939 
940 }
941 
942 /*
943  * amap_wiperange: wipe out a range of an amap
944  * [different from amap_wipeout because the amap is kept intact]
945  *
946  * => both map and amap must be locked by caller.
947  */
948 void
949 amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
950 {
951 	int byanon, lcv, stop, curslot, ptr, slotend;
952 	struct vm_anon *anon;
953 
954 	/*
955 	 * we can either traverse the amap by am_anon or by am_slots depending
956 	 * on which is cheaper.    decide now.
957 	 */
958 
959 	if (slots < amap->am_nused) {
960 		byanon = TRUE;
961 		lcv = slotoff;
962 		stop = slotoff + slots;
963 	} else {
964 		byanon = FALSE;
965 		lcv = 0;
966 		stop = amap->am_nused;
967 		slotend = slotoff + slots;
968 	}
969 
970 	while (lcv < stop) {
971 		int refs;
972 
973   		if (byanon) {
974 			curslot = lcv++;	/* lcv advances here */
975 			if (amap->am_anon[curslot] == NULL)
976 				continue;
977 		} else {
978 			curslot = amap->am_slots[lcv];
979 			if (curslot < slotoff || curslot >= slotend) {
980 				lcv++;		/* lcv advances here */
981 				continue;
982 			}
983 			stop--;	/* drop stop, since anon will be removed */
984 		}
985 		anon = amap->am_anon[curslot];
986 
987 		/*
988 		 * remove it from the amap
989 		 */
990 		amap->am_anon[curslot] = NULL;
991 		ptr = amap->am_bckptr[curslot];
992 		if (ptr != (amap->am_nused - 1)) {
993 			amap->am_slots[ptr] =
994 			    amap->am_slots[amap->am_nused - 1];
995 			amap->am_bckptr[amap->am_slots[ptr]] =
996 			    ptr;    /* back ptr. */
997 		}
998 		amap->am_nused--;
999 
1000 		/*
1001 		 * drop anon reference count
1002 		 */
1003 		simple_lock(&anon->an_lock);
1004 		refs = --anon->an_ref;
1005 		simple_unlock(&anon->an_lock);
1006 		if (refs == 0) {
1007 			/*
1008 			 * we just eliminated the last reference to an anon.
1009 			 * free it.
1010 			 */
1011 			uvm_anfree(anon);
1012 		}
1013 	}
1014 }
1015 
1016 #endif
1017 
1018 /*
1019  * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
1020  *
1021  * => called with swap_syscall_lock held.
1022  * => note that we don't always traverse all anons.
1023  *    eg. amaps being wiped out, released anons.
1024  * => return TRUE if failed.
1025  */
1026 
1027 boolean_t
1028 amap_swap_off(int startslot, int endslot)
1029 {
1030 	struct vm_amap *am;
1031 	struct vm_amap *am_next;
1032 	struct vm_amap marker_prev;
1033 	struct vm_amap marker_next;
1034 	boolean_t rv = FALSE;
1035 
1036 #if defined(DIAGNOSTIC)
1037 	memset(&marker_prev, 0, sizeof(marker_prev));
1038 	memset(&marker_next, 0, sizeof(marker_next));
1039 #endif /* defined(DIAGNOSTIC) */
1040 
1041 	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
1042 		int i;
1043 
1044 		LIST_INSERT_BEFORE(am, &marker_prev, am_list);
1045 		LIST_INSERT_AFTER(am, &marker_next, am_list);
1046 
1047 		if (am->am_nused <= 0) {
1048 			goto next;
1049 		}
1050 
1051 		for (i = 0; i < am->am_nused; i++) {
1052 			int slot;
1053 			int swslot;
1054 			struct vm_anon *anon;
1055 
1056 			slot = am->am_slots[i];
1057 			anon = am->am_anon[slot];
1058 			simple_lock(&anon->an_lock);
1059 
1060 			swslot = anon->an_swslot;
1061 			if (swslot < startslot || endslot <= swslot) {
1062 				simple_unlock(&anon->an_lock);
1063 				continue;
1064 			}
1065 
1066 			am->am_flags |= AMAP_SWAPOFF;
1067 
1068 			rv = uvm_anon_pagein(anon);
1069 
1070 			am->am_flags &= ~AMAP_SWAPOFF;
1071 			if (amap_refs(am) == 0) {
1072 				amap_wipeout(am);
1073 				am = NULL;
1074 				break;
1075 			}
1076 			if (rv) {
1077 				break;
1078 			}
1079 			i = 0;
1080 		}
1081 
1082 next:
1083 		KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
1084 		    LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
1085 		    &marker_next);
1086 		am_next = LIST_NEXT(&marker_next, am_list);
1087 		LIST_REMOVE(&marker_prev, am_list);
1088 		LIST_REMOVE(&marker_next, am_list);
1089 	}
1090 
1091 	return rv;
1092 }
1093 
1094 /*
1095  * amap_lookup: look up a page in an amap
1096  *
1097  * => amap should be locked by caller.
1098  */
1099 struct vm_anon *
1100 amap_lookup(struct vm_aref *aref, vaddr_t offset)
1101 {
1102 	int slot;
1103 	struct vm_amap *amap = aref->ar_amap;
1104 
1105 	AMAP_B2SLOT(slot, offset);
1106 	slot += aref->ar_pageoff;
1107 
1108 	if (slot >= amap->am_nslot)
1109 		panic("amap_lookup: offset out of range");
1110 
1111 	return(amap->am_anon[slot]);
1112 }
1113 
1114 /*
1115  * amap_lookups: look up a range of pages in an amap
1116  *
1117  * => amap should be locked by caller.
1118  * => XXXCDC: this interface is biased toward array-based amaps.  fix.
1119  */
1120 void
1121 amap_lookups(struct vm_aref *aref, vaddr_t offset,
1122     struct vm_anon **anons, int npages)
1123 {
1124 	int slot;
1125 	struct vm_amap *amap = aref->ar_amap;
1126 
1127 	AMAP_B2SLOT(slot, offset);
1128 	slot += aref->ar_pageoff;
1129 
1130 	if ((slot + (npages - 1)) >= amap->am_nslot)
1131 		panic("amap_lookups: offset out of range");
1132 
1133 	memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
1134 
1135 	return;
1136 }
1137 
1138 /*
1139  * amap_add: add (or replace) a page to an amap
1140  *
1141  * => caller must lock amap.
1142  * => if (replace) caller must lock anon because we might have to call
1143  *	pmap_page_protect on the anon's page.
1144  * => returns an "offset" which is meaningful to amap_unadd().
1145  */
1146 void
1147 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
1148     boolean_t replace)
1149 {
1150 	int slot;
1151 	struct vm_amap *amap = aref->ar_amap;
1152 
1153 	AMAP_B2SLOT(slot, offset);
1154 	slot += aref->ar_pageoff;
1155 
1156 	if (slot >= amap->am_nslot)
1157 		panic("amap_add: offset out of range");
1158 
1159 	if (replace) {
1160 
1161 		if (amap->am_anon[slot] == NULL)
1162 			panic("amap_add: replacing null anon");
1163 		if (amap->am_anon[slot]->an_page != NULL &&
1164 		    (amap->am_flags & AMAP_SHARED) != 0) {
1165 			pmap_page_protect(amap->am_anon[slot]->an_page,
1166 			    VM_PROT_NONE);
1167 			/*
1168 			 * XXX: suppose page is supposed to be wired somewhere?
1169 			 */
1170 		}
1171 	} else {   /* !replace */
1172 		if (amap->am_anon[slot] != NULL)
1173 			panic("amap_add: slot in use");
1174 
1175 		amap->am_bckptr[slot] = amap->am_nused;
1176 		amap->am_slots[amap->am_nused] = slot;
1177 		amap->am_nused++;
1178 	}
1179 	amap->am_anon[slot] = anon;
1180 }
1181 
1182 /*
1183  * amap_unadd: remove a page from an amap
1184  *
1185  * => caller must lock amap
1186  */
1187 void
1188 amap_unadd(struct vm_aref *aref, vaddr_t offset)
1189 {
1190 	int ptr, slot;
1191 	struct vm_amap *amap = aref->ar_amap;
1192 
1193 	AMAP_B2SLOT(slot, offset);
1194 	slot += aref->ar_pageoff;
1195 
1196 	if (slot >= amap->am_nslot)
1197 		panic("amap_unadd: offset out of range");
1198 
1199 	if (amap->am_anon[slot] == NULL)
1200 		panic("amap_unadd: nothing there");
1201 
1202 	amap->am_anon[slot] = NULL;
1203 	ptr = amap->am_bckptr[slot];
1204 
1205 	if (ptr != (amap->am_nused - 1)) {	/* swap to keep slots contig? */
1206 		amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1];
1207 		amap->am_bckptr[amap->am_slots[ptr]] = ptr;	/* back link */
1208 	}
1209 	amap->am_nused--;
1210 }
1211 
1212 /*
1213  * amap_ref: gain a reference to an amap
1214  *
1215  * => amap must not be locked (we will lock)
1216  * => "offset" and "len" are in units of pages
1217  * => called at fork time to gain the child's reference
1218  */
1219 void
1220 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
1221 {
1222 
1223 	amap->am_ref++;
1224 	if (flags & AMAP_SHARED)
1225 		amap->am_flags |= AMAP_SHARED;
1226 #ifdef UVM_AMAP_PPREF
1227 	if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 &&
1228 	    len != amap->am_nslot)
1229 		amap_pp_establish(amap);
1230 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1231 		if (flags & AMAP_REFALL)
1232 			amap_pp_adjref(amap, 0, amap->am_nslot, 1);
1233 		else
1234 			amap_pp_adjref(amap, offset, len, 1);
1235 	}
1236 #endif
1237 }
1238 
1239 /*
1240  * amap_unref: remove a reference to an amap
1241  *
1242  * => caller must remove all pmap-level references to this amap before
1243  *	dropping the reference
1244  * => called from uvm_unmap_detach [only]  ... note that entry is no
1245  *	longer part of a map and thus has no need for locking
1246  * => amap must be unlocked (we will lock it).
1247  */
1248 void
1249 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, boolean_t all)
1250 {
1251 
1252 	/*
1253 	 * if we are the last reference, free the amap and return.
1254 	 */
1255 
1256 	if (amap->am_ref-- == 1) {
1257 		amap_wipeout(amap);	/* drops final ref and frees */
1258 		return;			/* no need to unlock */
1259 	}
1260 
1261 	/*
1262 	 * otherwise just drop the reference count(s)
1263 	 */
1264 	if (amap->am_ref == 1 && (amap->am_flags & AMAP_SHARED) != 0)
1265 		amap->am_flags &= ~AMAP_SHARED;	/* clear shared flag */
1266 #ifdef UVM_AMAP_PPREF
1267 	if (amap->am_ppref == NULL && all == 0 && len != amap->am_nslot)
1268 		amap_pp_establish(amap);
1269 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1270 		if (all)
1271 			amap_pp_adjref(amap, 0, amap->am_nslot, -1);
1272 		else
1273 			amap_pp_adjref(amap, offset, len, -1);
1274 	}
1275 #endif
1276 }
1277