xref: /openbsd-src/sys/uvm/uvm_amap.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: uvm_amap.c,v 1.53 2014/07/12 18:44:01 tedu Exp $	*/
2 /*	$NetBSD: uvm_amap.c,v 1.27 2000/11/25 06:27:59 chs Exp $	*/
3 
4 /*
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * uvm_amap.c: amap operations
31  *
32  * this file contains functions that perform operations on amaps.  see
33  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
34  */
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/proc.h>
39 #include <sys/malloc.h>
40 #include <sys/kernel.h>
41 #include <sys/pool.h>
42 
43 #include <uvm/uvm.h>
44 #include <uvm/uvm_swap.h>
45 
46 /*
47  * pool for allocation of vm_map structures.  note that in order to
48  * avoid an endless loop, the amap pool's allocator cannot allocate
49  * memory from an amap (it currently goes through the kernel uobj, so
50  * we are ok).
51  */
52 
53 struct pool uvm_amap_pool;
54 
55 LIST_HEAD(, vm_amap) amap_list;
56 
57 #define MALLOC_SLOT_UNIT (2 * sizeof(int) + sizeof(struct vm_anon *))
58 
59 /*
60  * local functions
61  */
62 
63 static struct vm_amap *amap_alloc1(int, int, int);
64 static __inline void amap_list_insert(struct vm_amap *);
65 static __inline void amap_list_remove(struct vm_amap *);
66 
67 static __inline void
68 amap_list_insert(struct vm_amap *amap)
69 {
70 	LIST_INSERT_HEAD(&amap_list, amap, am_list);
71 }
72 
73 static __inline void
74 amap_list_remove(struct vm_amap *amap)
75 {
76 	LIST_REMOVE(amap, am_list);
77 }
78 
79 #ifdef UVM_AMAP_PPREF
80 /*
81  * what is ppref?   ppref is an _optional_ amap feature which is used
82  * to keep track of reference counts on a per-page basis.  it is enabled
83  * when UVM_AMAP_PPREF is defined.
84  *
85  * when enabled, an array of ints is allocated for the pprefs.  this
86  * array is allocated only when a partial reference is added to the
87  * map (either by unmapping part of the amap, or gaining a reference
88  * to only a part of an amap).  if the malloc of the array fails
89  * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
90  * that we tried to do ppref's but couldn't alloc the array so just
91  * give up (after all, this is an optional feature!).
92  *
93  * the array is divided into page sized "chunks."   for chunks of length 1,
94  * the chunk reference count plus one is stored in that chunk's slot.
95  * for chunks of length > 1 the first slot contains (the reference count
96  * plus one) * -1.    [the negative value indicates that the length is
97  * greater than one.]   the second slot of the chunk contains the length
98  * of the chunk.   here is an example:
99  *
100  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
101  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
102  *              <----------><-><----><-------><----><-><------->
103  * (x = don't care)
104  *
105  * this allows us to allow one int to contain the ref count for the whole
106  * chunk.    note that the "plus one" part is needed because a reference
107  * count of zero is neither positive or negative (need a way to tell
108  * if we've got one zero or a bunch of them).
109  *
110  * here are some in-line functions to help us.
111  */
112 
113 static __inline void pp_getreflen(int *, int, int *, int *);
114 static __inline void pp_setreflen(int *, int, int, int);
115 
116 /*
117  * pp_getreflen: get the reference and length for a specific offset
118  */
119 static __inline void
120 pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
121 {
122 
123 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
124 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
125 		*lenp = 1;
126 	} else {
127 		*refp = (ppref[offset] * -1) - 1;
128 		*lenp = ppref[offset+1];
129 	}
130 }
131 
132 /*
133  * pp_setreflen: set the reference and length for a specific offset
134  */
135 static __inline void
136 pp_setreflen(int *ppref, int offset, int ref, int len)
137 {
138 	if (len == 1) {
139 		ppref[offset] = ref + 1;
140 	} else {
141 		ppref[offset] = (ref + 1) * -1;
142 		ppref[offset+1] = len;
143 	}
144 }
145 #endif
146 
147 /*
148  * amap_init: called at boot time to init global amap data structures
149  */
150 
151 void
152 amap_init(void)
153 {
154 	/* Initialize the vm_amap pool. */
155 	pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0,
156 	    "amappl", &pool_allocator_nointr);
157 	pool_sethiwat(&uvm_amap_pool, 4096);
158 }
159 
160 /*
161  * amap_alloc1: internal function that allocates an amap, but does not
162  *	init the overlay.
163  */
164 static inline struct vm_amap *
165 amap_alloc1(int slots, int padslots, int waitf)
166 {
167 	struct vm_amap *amap;
168 	int totalslots;
169 
170 	amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK
171 	    : PR_NOWAIT);
172 	if (amap == NULL)
173 		return(NULL);
174 
175 	totalslots = malloc_roundup((slots + padslots) * MALLOC_SLOT_UNIT) /
176 	    MALLOC_SLOT_UNIT;
177 	amap->am_ref = 1;
178 	amap->am_flags = 0;
179 #ifdef UVM_AMAP_PPREF
180 	amap->am_ppref = NULL;
181 #endif
182 	amap->am_maxslot = totalslots;
183 	amap->am_nslot = slots;
184 	amap->am_nused = 0;
185 
186 	amap->am_slots = malloc(totalslots * MALLOC_SLOT_UNIT, M_UVMAMAP,
187 	    waitf);
188 	if (amap->am_slots == NULL)
189 		goto fail1;
190 
191 	amap->am_bckptr = (int *)(((char *)amap->am_slots) + totalslots *
192 	    sizeof(int));
193 	amap->am_anon = (struct vm_anon **)(((char *)amap->am_bckptr) +
194 	    totalslots * sizeof(int));
195 
196 	return(amap);
197 
198 fail1:
199 	pool_put(&uvm_amap_pool, amap);
200 	return (NULL);
201 }
202 
203 /*
204  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
205  *
206  * => caller should ensure sz is a multiple of PAGE_SIZE
207  * => reference count to new amap is set to one
208  */
209 struct vm_amap *
210 amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
211 {
212 	struct vm_amap *amap;
213 	int slots, padslots;
214 
215 	AMAP_B2SLOT(slots, sz);		/* load slots */
216 	AMAP_B2SLOT(padslots, padsz);
217 
218 	amap = amap_alloc1(slots, padslots, waitf);
219 	if (amap) {
220 		memset(amap->am_anon, 0,
221 		    amap->am_maxslot * sizeof(struct vm_anon *));
222 		amap_list_insert(amap);
223 	}
224 
225 	return(amap);
226 }
227 
228 
229 /*
230  * amap_free: free an amap
231  *
232  * => the amap should have a zero reference count and be empty
233  */
234 void
235 amap_free(struct vm_amap *amap)
236 {
237 
238 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
239 	KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
240 
241 	free(amap->am_slots, M_UVMAMAP, 0);
242 #ifdef UVM_AMAP_PPREF
243 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
244 		free(amap->am_ppref, M_UVMAMAP, 0);
245 #endif
246 	pool_put(&uvm_amap_pool, amap);
247 
248 }
249 
250 /*
251  * amap_extend: extend the size of an amap (if needed)
252  *
253  * => called from uvm_map when we want to extend an amap to cover
254  *    a new mapping (rather than allocate a new one)
255  * => to safely extend an amap it should have a reference count of
256  *    one (thus it can't be shared)
257  * => XXXCDC: support padding at this level?
258  */
259 int
260 amap_extend(struct vm_map_entry *entry, vsize_t addsize)
261 {
262 	struct vm_amap *amap = entry->aref.ar_amap;
263 	int slotoff = entry->aref.ar_pageoff;
264 	int slotmapped, slotadd, slotneed, slotalloc;
265 #ifdef UVM_AMAP_PPREF
266 	int *newppref, *oldppref;
267 #endif
268 	u_int *newsl, *newbck, *oldsl, *oldbck;
269 	struct vm_anon **newover, **oldover;
270 	int slotadded;
271 
272 	/*
273 	 * first, determine how many slots we need in the amap.  don't
274 	 * forget that ar_pageoff could be non-zero: this means that
275 	 * there are some unused slots before us in the amap.
276 	 */
277 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
278 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
279 	slotneed = slotoff + slotmapped + slotadd;
280 
281 	/*
282 	 * case 1: we already have enough slots in the map and thus
283 	 * only need to bump the reference counts on the slots we are
284 	 * adding.
285 	 */
286 	if (amap->am_nslot >= slotneed) {
287 #ifdef UVM_AMAP_PPREF
288 		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
289 			amap_pp_adjref(amap, slotoff + slotmapped, slotadd, 1);
290 		}
291 #endif
292 		return (0);
293 	}
294 
295 	/*
296 	 * case 2: we pre-allocated slots for use and we just need to
297 	 * bump nslot up to take account for these slots.
298 	 */
299 	if (amap->am_maxslot >= slotneed) {
300 #ifdef UVM_AMAP_PPREF
301 		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
302 			if ((slotoff + slotmapped) < amap->am_nslot)
303 				amap_pp_adjref(amap, slotoff + slotmapped,
304 				    (amap->am_nslot - (slotoff + slotmapped)),
305 				    1);
306 			pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
307 			   slotneed - amap->am_nslot);
308 		}
309 #endif
310 		amap->am_nslot = slotneed;
311 		/*
312 		 * no need to zero am_anon since that was done at
313 		 * alloc time and we never shrink an allocation.
314 		 */
315 		return (0);
316 	}
317 
318 	/*
319 	 * case 3: we need to malloc a new amap and copy all the amap
320 	 * data over from old amap to the new one.
321 	 *
322 	 * XXXCDC: could we take advantage of a kernel realloc()?
323 	 */
324 	if (slotneed >= UVM_AMAP_LARGE)
325 		return E2BIG;
326 
327 	slotalloc = malloc_roundup(slotneed * MALLOC_SLOT_UNIT) /
328 	    MALLOC_SLOT_UNIT;
329 #ifdef UVM_AMAP_PPREF
330 	newppref = NULL;
331 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
332 		newppref = malloc(slotalloc *sizeof(int), M_UVMAMAP,
333 		    M_WAITOK | M_CANFAIL);
334 		if (newppref == NULL) {
335 			/* give up if malloc fails */
336 			free(amap->am_ppref, M_UVMAMAP, 0);
337 			amap->am_ppref = PPREF_NONE;
338 		}
339 	}
340 #endif
341 	newsl = malloc(slotalloc * MALLOC_SLOT_UNIT, M_UVMAMAP,
342 	    M_WAITOK | M_CANFAIL);
343 	if (newsl == NULL) {
344 #ifdef UVM_AMAP_PPREF
345 		if (newppref != NULL) {
346 			free(newppref, M_UVMAMAP, 0);
347 		}
348 #endif
349 		return (ENOMEM);
350 	}
351 	newbck = (int *)(((char *)newsl) + slotalloc * sizeof(int));
352 	newover = (struct vm_anon **)(((char *)newbck) + slotalloc *
353 	    sizeof(int));
354 	KASSERT(amap->am_maxslot < slotneed);
355 
356 	/* now copy everything over to new malloc'd areas... */
357 	slotadded = slotalloc - amap->am_nslot;
358 
359 	/* do am_slots */
360 	oldsl = amap->am_slots;
361 	memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
362 	amap->am_slots = newsl;
363 
364 	/* do am_anon */
365 	oldover = amap->am_anon;
366 	memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot);
367 	memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) *
368 	    slotadded);
369 	amap->am_anon = newover;
370 
371 	/* do am_bckptr */
372 	oldbck = amap->am_bckptr;
373 	memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
374 	memset(newbck + amap->am_nslot, 0, sizeof(int) * slotadded); /* XXX: needed? */
375 	amap->am_bckptr = newbck;
376 
377 #ifdef UVM_AMAP_PPREF
378 	/* do ppref */
379 	oldppref = amap->am_ppref;
380 	if (newppref) {
381 		memcpy(newppref, oldppref, sizeof(int) * amap->am_nslot);
382 		memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded);
383 		amap->am_ppref = newppref;
384 		if ((slotoff + slotmapped) < amap->am_nslot)
385 			amap_pp_adjref(amap, slotoff + slotmapped,
386 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
387 		pp_setreflen(newppref, amap->am_nslot, 1,
388 		    slotneed - amap->am_nslot);
389 	}
390 #endif
391 
392 	/* update master values */
393 	amap->am_nslot = slotneed;
394 	amap->am_maxslot = slotalloc;
395 
396 	/* and free */
397 	free(oldsl, M_UVMAMAP, 0);
398 #ifdef UVM_AMAP_PPREF
399 	if (oldppref && oldppref != PPREF_NONE)
400 		free(oldppref, M_UVMAMAP, 0);
401 #endif
402 	return (0);
403 }
404 
405 /*
406  * amap_share_protect: change protection of anons in a shared amap
407  *
408  * for shared amaps, given the current data structure layout, it is
409  * not possible for us to directly locate all maps referencing the
410  * shared anon (to change the protection).  in order to protect data
411  * in shared maps we use pmap_page_protect().  [this is useful for IPC
412  * mechanisms like map entry passing that may want to write-protect
413  * all mappings of a shared amap.]  we traverse am_anon or am_slots
414  * depending on the current state of the amap.
415  */
416 void
417 amap_share_protect(struct vm_map_entry *entry, vm_prot_t prot)
418 {
419 	struct vm_amap *amap = entry->aref.ar_amap;
420 	int slots, lcv, slot, stop;
421 
422 	AMAP_B2SLOT(slots, (entry->end - entry->start));
423 	stop = entry->aref.ar_pageoff + slots;
424 
425 	if (slots < amap->am_nused) {
426 		/* cheaper to traverse am_anon */
427 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
428 			if (amap->am_anon[lcv] == NULL)
429 				continue;
430 			if (amap->am_anon[lcv]->an_page != NULL)
431 				pmap_page_protect(amap->am_anon[lcv]->an_page,
432 						  prot);
433 		}
434 		return;
435 	}
436 
437 	/* cheaper to traverse am_slots */
438 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
439 		slot = amap->am_slots[lcv];
440 		if (slot < entry->aref.ar_pageoff || slot >= stop)
441 			continue;
442 		if (amap->am_anon[slot]->an_page != NULL)
443 			pmap_page_protect(amap->am_anon[slot]->an_page, prot);
444 	}
445 	return;
446 }
447 
448 /*
449  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
450  *
451  * => called from amap_unref when the final reference to an amap is
452  *	discarded (i.e. when reference count == 1)
453  */
454 
455 void
456 amap_wipeout(struct vm_amap *amap)
457 {
458 	int lcv, slot;
459 	struct vm_anon *anon;
460 
461 	KASSERT(amap->am_ref == 0);
462 
463 	if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
464 		/* amap_swap_off will call us again. */
465 		return;
466 	}
467 	amap_list_remove(amap);
468 
469 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
470 		int refs;
471 
472 		slot = amap->am_slots[lcv];
473 		anon = amap->am_anon[slot];
474 
475 		if (anon == NULL || anon->an_ref == 0)
476 			panic("amap_wipeout: corrupt amap");
477 
478 		refs = --anon->an_ref;
479 		if (refs == 0) {
480 			/* we had the last reference to a vm_anon. free it. */
481 			uvm_anfree(anon);
482 		}
483 	}
484 
485 	/* now we free the map */
486 	amap->am_ref = 0;	/* ... was one */
487 	amap->am_nused = 0;
488 	amap_free(amap);	/* will free amap */
489 }
490 
491 /*
492  * amap_copy: ensure that a map entry's "needs_copy" flag is false
493  *	by copying the amap if necessary.
494  *
495  * => an entry with a null amap pointer will get a new (blank) one.
496  * => if canchunk is true, then we may clip the entry into a chunk
497  * => "startva" and "endva" are used only if canchunk is true.  they are
498  *     used to limit chunking (e.g. if you have a large space that you
499  *     know you are going to need to allocate amaps for, there is no point
500  *     in allowing that to be chunked)
501  */
502 
503 void
504 amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf,
505     boolean_t canchunk, vaddr_t startva, vaddr_t endva)
506 {
507 	struct vm_amap *amap, *srcamap;
508 	int slots, lcv;
509 	vaddr_t chunksize;
510 
511 	/* is there a map to copy?   if not, create one from scratch. */
512 	if (entry->aref.ar_amap == NULL) {
513 		/*
514 		 * check to see if we have a large amap that we can
515 		 * chunk.  we align startva/endva to chunk-sized
516 		 * boundaries and then clip to them.
517 		 */
518 		if (canchunk && atop(entry->end - entry->start) >=
519 		    UVM_AMAP_LARGE) {
520 			/* convert slots to bytes */
521 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
522 			startva = (startva / chunksize) * chunksize;
523 			endva = roundup(endva, chunksize);
524 			UVM_MAP_CLIP_START(map, entry, startva);
525 			/* watch out for endva wrap-around! */
526 			if (endva >= startva)
527 				UVM_MAP_CLIP_END(map, entry, endva);
528 		}
529 
530 		entry->aref.ar_pageoff = 0;
531 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
532 		    waitf);
533 		if (entry->aref.ar_amap != NULL)
534 			entry->etype &= ~UVM_ET_NEEDSCOPY;
535 		return;
536 	}
537 
538 	/*
539 	 * first check and see if we are the only map entry
540 	 * referencing the amap we currently have.  if so, then we can
541 	 * just take it over rather than copying it.  the value can only
542 	 * be one if we have the only reference to the amap
543 	 */
544 	if (entry->aref.ar_amap->am_ref == 1) {
545 		entry->etype &= ~UVM_ET_NEEDSCOPY;
546 		return;
547 	}
548 
549 	/* looks like we need to copy the map. */
550 	AMAP_B2SLOT(slots, entry->end - entry->start);
551 	amap = amap_alloc1(slots, 0, waitf);
552 	if (amap == NULL)
553 		return;
554 	srcamap = entry->aref.ar_amap;
555 
556 	/*
557 	 * need to double check reference count now.  the reference count
558 	 * could have changed while we were in malloc.  if the reference count
559 	 * dropped down to one we take over the old map rather than
560 	 * copying the amap.
561 	 */
562 	if (srcamap->am_ref == 1) {		/* take it over? */
563 		entry->etype &= ~UVM_ET_NEEDSCOPY;
564 		amap->am_ref--;		/* drop final reference to map */
565 		amap_free(amap);	/* dispose of new (unused) amap */
566 		return;
567 	}
568 
569 	/* we must copy it now. */
570 	for (lcv = 0 ; lcv < slots; lcv++) {
571 		amap->am_anon[lcv] =
572 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
573 		if (amap->am_anon[lcv] == NULL)
574 			continue;
575 		amap->am_anon[lcv]->an_ref++;
576 		amap->am_bckptr[lcv] = amap->am_nused;
577 		amap->am_slots[amap->am_nused] = lcv;
578 		amap->am_nused++;
579 	}
580 	memset(&amap->am_anon[lcv], 0,
581 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
582 
583 	/*
584 	 * drop our reference to the old amap (srcamap).
585 	 * we know that the reference count on srcamap is greater than
586 	 * one (we checked above), so there is no way we could drop
587 	 * the count to zero.  [and no need to worry about freeing it]
588 	 */
589 	srcamap->am_ref--;
590 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
591 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
592 #ifdef UVM_AMAP_PPREF
593 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
594 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
595 		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
596 	}
597 #endif
598 
599 	/* install new amap. */
600 	entry->aref.ar_pageoff = 0;
601 	entry->aref.ar_amap = amap;
602 	entry->etype &= ~UVM_ET_NEEDSCOPY;
603 
604 	amap_list_insert(amap);
605 }
606 
607 /*
608  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
609  *
610  *	called during fork(2) when the parent process has a wired map
611  *	entry.   in that case we want to avoid write-protecting pages
612  *	in the parent's map (e.g. like what you'd do for a COW page)
613  *	so we resolve the COW here.
614  *
615  * => assume parent's entry was wired, thus all pages are resident.
616  * => assume pages that are loaned out (loan_count) are already mapped
617  *	read-only in all maps, and thus no need for us to worry about them
618  * => caller passes child's map/entry in to us
619  * => XXXCDC: out of memory should cause fork to fail, but there is
620  *	currently no easy way to do this (needs fix)
621  */
622 
623 void
624 amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
625 {
626 	struct vm_amap *amap = entry->aref.ar_amap;
627 	int lcv, slot;
628 	struct vm_anon *anon, *nanon;
629 	struct vm_page *pg, *npg;
630 
631 	/*
632 	 * note that if we wait, we must ReStart the "lcv" for loop because
633 	 * some other process could reorder the anon's in the
634 	 * am_anon[] array on us.
635 	 */
636 ReStart:
637 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
638 		/* get the page */
639 		slot = amap->am_slots[lcv];
640 		anon = amap->am_anon[slot];
641 		pg = anon->an_page;
642 
643 		/* page must be resident since parent is wired */
644 		if (pg == NULL)
645 			panic("amap_cow_now: non-resident wired page"
646 			    " in anon %p", anon);
647 
648 		/*
649 		 * if the anon ref count is one and the page is not loaned,
650 		 * then we are safe (the child has exclusive access to the
651 		 * page).  if the page is loaned, then it must already be
652 		 * mapped read-only.
653 		 *
654 		 * we only need to get involved when these are not true.
655 		 * [note: if loan_count == 0, then the anon must own the page]
656 		 */
657 		if (anon->an_ref > 1 && pg->loan_count == 0) {
658 			/*
659 			 * if the page is busy then we have to wait for
660 			 * it and then restart.
661 			 */
662 			if (pg->pg_flags & PG_BUSY) {
663 				atomic_setbits_int(&pg->pg_flags, PG_WANTED);
664 				UVM_WAIT(pg, FALSE, "cownow", 0);
665 				goto ReStart;
666 			}
667 
668 			/* ok, time to do a copy-on-write to a new anon */
669 			nanon = uvm_analloc();
670 			if (nanon) {
671 				npg = uvm_pagealloc(NULL, 0, nanon, 0);
672 			} else
673 				npg = NULL;	/* XXX: quiet gcc warning */
674 
675 			if (nanon == NULL || npg == NULL) {
676 				/* out of memory */
677 				/*
678 				 * XXXCDC: we should cause fork to fail, but
679 				 * we can't ...
680 				 */
681 				if (nanon) {
682 					uvm_anfree(nanon);
683 				}
684 				uvm_wait("cownowpage");
685 				goto ReStart;
686 			}
687 
688 			/*
689 			 * got it... now we can copy the data and replace anon
690 			 * with our new one...
691 			 */
692 			uvm_pagecopy(pg, npg);		/* old -> new */
693 			anon->an_ref--;			/* can't drop to zero */
694 			amap->am_anon[slot] = nanon;	/* replace */
695 
696 			/*
697 			 * drop PG_BUSY on new page ... since we have had its
698 			 * owner locked the whole time it can't be
699 			 * PG_RELEASED | PG_WANTED.
700 			 */
701 			atomic_clearbits_int(&npg->pg_flags, PG_BUSY|PG_FAKE);
702 			UVM_PAGE_OWN(npg, NULL);
703 			uvm_lock_pageq();
704 			uvm_pageactivate(npg);
705 			uvm_unlock_pageq();
706 		}
707 	}
708 }
709 
710 /*
711  * amap_splitref: split a single reference into two separate references
712  *
713  * => called from uvm_map's clip routines
714  */
715 void
716 amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
717 {
718 	int leftslots;
719 
720 	AMAP_B2SLOT(leftslots, offset);
721 	if (leftslots == 0)
722 		panic("amap_splitref: split at zero offset");
723 
724 	/* now: we have a valid am_mapped array. */
725 	if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
726 		panic("amap_splitref: map size check failed");
727 
728 #ifdef UVM_AMAP_PPREF
729         /* establish ppref before we add a duplicate reference to the amap */
730 	if (origref->ar_amap->am_ppref == NULL)
731 		amap_pp_establish(origref->ar_amap);
732 #endif
733 
734 	splitref->ar_amap = origref->ar_amap;
735 	splitref->ar_amap->am_ref++;		/* not a share reference */
736 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
737 }
738 
739 #ifdef UVM_AMAP_PPREF
740 
741 /*
742  * amap_pp_establish: add a ppref array to an amap, if possible
743  */
744 void
745 amap_pp_establish(struct vm_amap *amap)
746 {
747 
748 	amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot,
749 	    M_UVMAMAP, M_NOWAIT|M_ZERO);
750 
751 	/* if we fail then we just won't use ppref for this amap */
752 	if (amap->am_ppref == NULL) {
753 		amap->am_ppref = PPREF_NONE;	/* not using it */
754 		return;
755 	}
756 
757 	/* init ppref */
758 	pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot);
759 }
760 
761 /*
762  * amap_pp_adjref: adjust reference count to a part of an amap using the
763  * per-page reference count array.
764  *
765  * => caller must check that ppref != PPREF_NONE before calling
766  */
767 void
768 amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
769 {
770  	int stopslot, *ppref, lcv, prevlcv;
771  	int ref, len, prevref, prevlen;
772 
773 	stopslot = curslot + slotlen;
774 	ppref = amap->am_ppref;
775  	prevlcv = 0;
776 
777 	/*
778  	 * first advance to the correct place in the ppref array,
779  	 * fragment if needed.
780 	 */
781 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
782 		pp_getreflen(ppref, lcv, &ref, &len);
783 		if (lcv + len > curslot) {     /* goes past start? */
784 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
785 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
786 			len = curslot - lcv;   /* new length of entry @ lcv */
787 		}
788 		prevlcv = lcv;
789 	}
790 	if (lcv != 0)
791 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
792 	else {
793 		/* Ensure that the "prevref == ref" test below always
794 		 * fails, since we're starting from the beginning of
795 		 * the ppref array; that is, there is no previous
796 		 * chunk.
797 		 */
798 		prevref = -1;
799 		prevlen = 0;
800 	}
801 
802 	/*
803 	 * now adjust reference counts in range.  merge the first
804 	 * changed entry with the last unchanged entry if possible.
805 	 */
806 	if (lcv != curslot)
807 		panic("amap_pp_adjref: overshot target");
808 
809 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
810 		pp_getreflen(ppref, lcv, &ref, &len);
811 		if (lcv + len > stopslot) {     /* goes past end? */
812 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
813 			pp_setreflen(ppref, stopslot, ref,
814 			    len - (stopslot - lcv));
815 			len = stopslot - lcv;
816 		}
817 		ref += adjval;
818 		if (ref < 0)
819 			panic("amap_pp_adjref: negative reference count");
820 		if (lcv == prevlcv + prevlen && ref == prevref) {
821 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
822 		} else {
823 			pp_setreflen(ppref, lcv, ref, len);
824 		}
825 		if (ref == 0)
826 			amap_wiperange(amap, lcv, len);
827 	}
828 
829 }
830 
831 /*
832  * amap_wiperange: wipe out a range of an amap
833  * [different from amap_wipeout because the amap is kept intact]
834  */
835 void
836 amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
837 {
838 	int byanon, lcv, stop, curslot, ptr, slotend;
839 	struct vm_anon *anon;
840 
841 	/*
842 	 * we can either traverse the amap by am_anon or by am_slots depending
843 	 * on which is cheaper.    decide now.
844 	 */
845 	if (slots < amap->am_nused) {
846 		byanon = TRUE;
847 		lcv = slotoff;
848 		stop = slotoff + slots;
849 	} else {
850 		byanon = FALSE;
851 		lcv = 0;
852 		stop = amap->am_nused;
853 		slotend = slotoff + slots;
854 	}
855 
856 	while (lcv < stop) {
857 		int refs;
858 
859   		if (byanon) {
860 			curslot = lcv++;	/* lcv advances here */
861 			if (amap->am_anon[curslot] == NULL)
862 				continue;
863 		} else {
864 			curslot = amap->am_slots[lcv];
865 			if (curslot < slotoff || curslot >= slotend) {
866 				lcv++;		/* lcv advances here */
867 				continue;
868 			}
869 			stop--;	/* drop stop, since anon will be removed */
870 		}
871 		anon = amap->am_anon[curslot];
872 
873 		/* remove it from the amap */
874 		amap->am_anon[curslot] = NULL;
875 		ptr = amap->am_bckptr[curslot];
876 		if (ptr != (amap->am_nused - 1)) {
877 			amap->am_slots[ptr] =
878 			    amap->am_slots[amap->am_nused - 1];
879 			amap->am_bckptr[amap->am_slots[ptr]] =
880 			    ptr;    /* back ptr. */
881 		}
882 		amap->am_nused--;
883 
884 		/* drop anon reference count */
885 		refs = --anon->an_ref;
886 		if (refs == 0) {
887 			/*
888 			 * we just eliminated the last reference to an anon.
889 			 * free it.
890 			 */
891 			uvm_anfree(anon);
892 		}
893 	}
894 }
895 
896 #endif
897 
898 /*
899  * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
900  *
901  * => note that we don't always traverse all anons.
902  *    eg. amaps being wiped out, released anons.
903  * => return TRUE if failed.
904  */
905 
906 boolean_t
907 amap_swap_off(int startslot, int endslot)
908 {
909 	struct vm_amap *am;
910 	struct vm_amap *am_next;
911 	struct vm_amap marker_prev;
912 	struct vm_amap marker_next;
913 	boolean_t rv = FALSE;
914 
915 #if defined(DIAGNOSTIC)
916 	memset(&marker_prev, 0, sizeof(marker_prev));
917 	memset(&marker_next, 0, sizeof(marker_next));
918 #endif /* defined(DIAGNOSTIC) */
919 
920 	for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
921 		int i;
922 
923 		LIST_INSERT_BEFORE(am, &marker_prev, am_list);
924 		LIST_INSERT_AFTER(am, &marker_next, am_list);
925 
926 		if (am->am_nused <= 0) {
927 			goto next;
928 		}
929 
930 		for (i = 0; i < am->am_nused; i++) {
931 			int slot;
932 			int swslot;
933 			struct vm_anon *anon;
934 
935 			slot = am->am_slots[i];
936 			anon = am->am_anon[slot];
937 
938 			swslot = anon->an_swslot;
939 			if (swslot < startslot || endslot <= swslot) {
940 				continue;
941 			}
942 
943 			am->am_flags |= AMAP_SWAPOFF;
944 
945 			rv = uvm_anon_pagein(anon);
946 
947 			am->am_flags &= ~AMAP_SWAPOFF;
948 			if (amap_refs(am) == 0) {
949 				amap_wipeout(am);
950 				am = NULL;
951 				break;
952 			}
953 			if (rv) {
954 				break;
955 			}
956 			i = 0;
957 		}
958 
959 next:
960 		KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
961 		    LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
962 		    &marker_next);
963 		am_next = LIST_NEXT(&marker_next, am_list);
964 		LIST_REMOVE(&marker_prev, am_list);
965 		LIST_REMOVE(&marker_next, am_list);
966 	}
967 
968 	return rv;
969 }
970 
971 /*
972  * amap_lookup: look up a page in an amap
973  */
974 struct vm_anon *
975 amap_lookup(struct vm_aref *aref, vaddr_t offset)
976 {
977 	int slot;
978 	struct vm_amap *amap = aref->ar_amap;
979 
980 	AMAP_B2SLOT(slot, offset);
981 	slot += aref->ar_pageoff;
982 
983 	if (slot >= amap->am_nslot)
984 		panic("amap_lookup: offset out of range");
985 
986 	return(amap->am_anon[slot]);
987 }
988 
989 /*
990  * amap_lookups: look up a range of pages in an amap
991  *
992  * => XXXCDC: this interface is biased toward array-based amaps.  fix.
993  */
994 void
995 amap_lookups(struct vm_aref *aref, vaddr_t offset,
996     struct vm_anon **anons, int npages)
997 {
998 	int slot;
999 	struct vm_amap *amap = aref->ar_amap;
1000 
1001 	AMAP_B2SLOT(slot, offset);
1002 	slot += aref->ar_pageoff;
1003 
1004 	if ((slot + (npages - 1)) >= amap->am_nslot)
1005 		panic("amap_lookups: offset out of range");
1006 
1007 	memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
1008 
1009 	return;
1010 }
1011 
1012 /*
1013  * amap_add: add (or replace) a page to an amap
1014  *
1015  * => returns an "offset" which is meaningful to amap_unadd().
1016  */
1017 void
1018 amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
1019     boolean_t replace)
1020 {
1021 	int slot;
1022 	struct vm_amap *amap = aref->ar_amap;
1023 
1024 	AMAP_B2SLOT(slot, offset);
1025 	slot += aref->ar_pageoff;
1026 
1027 	if (slot >= amap->am_nslot)
1028 		panic("amap_add: offset out of range");
1029 
1030 	if (replace) {
1031 		if (amap->am_anon[slot] == NULL)
1032 			panic("amap_add: replacing null anon");
1033 		if (amap->am_anon[slot]->an_page != NULL &&
1034 		    (amap->am_flags & AMAP_SHARED) != 0) {
1035 			pmap_page_protect(amap->am_anon[slot]->an_page,
1036 			    VM_PROT_NONE);
1037 			/*
1038 			 * XXX: suppose page is supposed to be wired somewhere?
1039 			 */
1040 		}
1041 	} else {   /* !replace */
1042 		if (amap->am_anon[slot] != NULL)
1043 			panic("amap_add: slot in use");
1044 
1045 		amap->am_bckptr[slot] = amap->am_nused;
1046 		amap->am_slots[amap->am_nused] = slot;
1047 		amap->am_nused++;
1048 	}
1049 	amap->am_anon[slot] = anon;
1050 }
1051 
1052 /*
1053  * amap_unadd: remove a page from an amap
1054  */
1055 void
1056 amap_unadd(struct vm_aref *aref, vaddr_t offset)
1057 {
1058 	int ptr, slot;
1059 	struct vm_amap *amap = aref->ar_amap;
1060 
1061 	AMAP_B2SLOT(slot, offset);
1062 	slot += aref->ar_pageoff;
1063 
1064 	if (slot >= amap->am_nslot)
1065 		panic("amap_unadd: offset out of range");
1066 
1067 	if (amap->am_anon[slot] == NULL)
1068 		panic("amap_unadd: nothing there");
1069 
1070 	amap->am_anon[slot] = NULL;
1071 	ptr = amap->am_bckptr[slot];
1072 
1073 	if (ptr != (amap->am_nused - 1)) {	/* swap to keep slots contig? */
1074 		amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1];
1075 		amap->am_bckptr[amap->am_slots[ptr]] = ptr;	/* back link */
1076 	}
1077 	amap->am_nused--;
1078 }
1079 
1080 /*
1081  * amap_ref: gain a reference to an amap
1082  *
1083  * => "offset" and "len" are in units of pages
1084  * => called at fork time to gain the child's reference
1085  */
1086 void
1087 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
1088 {
1089 
1090 	amap->am_ref++;
1091 	if (flags & AMAP_SHARED)
1092 		amap->am_flags |= AMAP_SHARED;
1093 #ifdef UVM_AMAP_PPREF
1094 	if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 &&
1095 	    len != amap->am_nslot)
1096 		amap_pp_establish(amap);
1097 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1098 		if (flags & AMAP_REFALL)
1099 			amap_pp_adjref(amap, 0, amap->am_nslot, 1);
1100 		else
1101 			amap_pp_adjref(amap, offset, len, 1);
1102 	}
1103 #endif
1104 }
1105 
1106 /*
1107  * amap_unref: remove a reference to an amap
1108  *
1109  * => caller must remove all pmap-level references to this amap before
1110  *	dropping the reference
1111  * => called from uvm_unmap_detach [only]  ... note that entry is no
1112  *	longer part of a map
1113  */
1114 void
1115 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, boolean_t all)
1116 {
1117 
1118 	/* if we are the last reference, free the amap and return. */
1119 	if (amap->am_ref-- == 1) {
1120 		amap_wipeout(amap);	/* drops final ref and frees */
1121 		return;
1122 	}
1123 
1124 	/* otherwise just drop the reference count(s) */
1125 	if (amap->am_ref == 1 && (amap->am_flags & AMAP_SHARED) != 0)
1126 		amap->am_flags &= ~AMAP_SHARED;	/* clear shared flag */
1127 #ifdef UVM_AMAP_PPREF
1128 	if (amap->am_ppref == NULL && all == 0 && len != amap->am_nslot)
1129 		amap_pp_establish(amap);
1130 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
1131 		if (all)
1132 			amap_pp_adjref(amap, 0, amap->am_nslot, -1);
1133 		else
1134 			amap_pp_adjref(amap, offset, len, -1);
1135 	}
1136 #endif
1137 }
1138