xref: /netbsd-src/sys/uvm/uvm_amap.c (revision de1dfb1250df962f1ff3a011772cf58e605aed11)
1 /*	$NetBSD: uvm_amap.c,v 1.55 2004/05/12 20:09:50 yamt Exp $	*/
2 
3 /*
4  *
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by Charles D. Cranor and
19  *      Washington University.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * uvm_amap.c: amap operations
37  */
38 
39 /*
40  * this file contains functions that perform operations on amaps.  see
41  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.55 2004/05/12 20:09:50 yamt Exp $");
46 
47 #undef UVM_AMAP_INLINE		/* enable/disable amap inlines */
48 
49 #include "opt_uvmhist.h"
50 
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/proc.h>
54 #include <sys/malloc.h>
55 #include <sys/kernel.h>
56 #include <sys/pool.h>
57 
58 #define UVM_AMAP_C		/* ensure disabled inlines are in */
59 #include <uvm/uvm.h>
60 #include <uvm/uvm_swap.h>
61 
62 /*
63  * pool for allocation of vm_map structures.  note that the pool has
64  * its own simplelock for its protection.  also note that in order to
65  * avoid an endless loop, the amap pool's allocator cannot allocate
66  * memory from an amap (it currently goes through the kernel uobj, so
67  * we are ok).
68  */
69 POOL_INIT(uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0, "amappl",
70     &pool_allocator_nointr);
71 
72 MALLOC_DEFINE(M_UVMAMAP, "UVM amap", "UVM amap and related structures");
73 
74 /*
75  * local functions
76  */
77 
78 static struct vm_amap *amap_alloc1(int, int, int);
79 
80 #ifdef UVM_AMAP_PPREF
81 /*
82  * what is ppref?   ppref is an _optional_ amap feature which is used
83  * to keep track of reference counts on a per-page basis.  it is enabled
84  * when UVM_AMAP_PPREF is defined.
85  *
86  * when enabled, an array of ints is allocated for the pprefs.  this
87  * array is allocated only when a partial reference is added to the
88  * map (either by unmapping part of the amap, or gaining a reference
89  * to only a part of an amap).  if the malloc of the array fails
90  * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
91  * that we tried to do ppref's but couldn't alloc the array so just
92  * give up (after all, this is an optional feature!).
93  *
94  * the array is divided into page sized "chunks."   for chunks of length 1,
95  * the chunk reference count plus one is stored in that chunk's slot.
96  * for chunks of length > 1 the first slot contains (the reference count
97  * plus one) * -1.    [the negative value indicates that the length is
98  * greater than one.]   the second slot of the chunk contains the length
99  * of the chunk.   here is an example:
100  *
101  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
102  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
103  *              <----------><-><----><-------><----><-><------->
104  * (x = don't care)
105  *
106  * this allows us to allow one int to contain the ref count for the whole
107  * chunk.    note that the "plus one" part is needed because a reference
108  * count of zero is neither positive or negative (need a way to tell
109  * if we've got one zero or a bunch of them).
110  *
111  * here are some in-line functions to help us.
112  */
113 
114 static __inline void pp_getreflen(int *, int, int *, int *);
115 static __inline void pp_setreflen(int *, int, int, int);
116 
117 /*
118  * pp_getreflen: get the reference and length for a specific offset
119  *
120  * => ppref's amap must be locked
121  */
122 static __inline void
123 pp_getreflen(ppref, offset, refp, lenp)
124 	int *ppref, offset, *refp, *lenp;
125 {
126 
127 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
128 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
129 		*lenp = 1;
130 	} else {
131 		*refp = (ppref[offset] * -1) - 1;
132 		*lenp = ppref[offset+1];
133 	}
134 }
135 
136 /*
137  * pp_setreflen: set the reference and length for a specific offset
138  *
139  * => ppref's amap must be locked
140  */
141 static __inline void
142 pp_setreflen(ppref, offset, ref, len)
143 	int *ppref, offset, ref, len;
144 {
145 	if (len == 0)
146 		return;
147 	if (len == 1) {
148 		ppref[offset] = ref + 1;
149 	} else {
150 		ppref[offset] = (ref + 1) * -1;
151 		ppref[offset+1] = len;
152 	}
153 }
154 #endif
155 
156 /*
157  * amap_alloc1: internal function that allocates an amap, but does not
158  *	init the overlay.
159  *
160  * => lock on returned amap is init'd
161  */
162 static inline struct vm_amap *
163 amap_alloc1(slots, padslots, waitf)
164 	int slots, padslots, waitf;
165 {
166 	struct vm_amap *amap;
167 	int totalslots;
168 
169 	amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK : 0);
170 	if (amap == NULL)
171 		return(NULL);
172 
173 	totalslots = malloc_roundup((slots + padslots) * sizeof(int)) /
174 	    sizeof(int);
175 	simple_lock_init(&amap->am_l);
176 	amap->am_ref = 1;
177 	amap->am_flags = 0;
178 #ifdef UVM_AMAP_PPREF
179 	amap->am_ppref = NULL;
180 #endif
181 	amap->am_maxslot = totalslots;
182 	amap->am_nslot = slots;
183 	amap->am_nused = 0;
184 
185 	amap->am_slots = malloc(totalslots * sizeof(int), M_UVMAMAP,
186 	    waitf);
187 	if (amap->am_slots == NULL)
188 		goto fail1;
189 
190 	amap->am_bckptr = malloc(totalslots * sizeof(int), M_UVMAMAP, waitf);
191 	if (amap->am_bckptr == NULL)
192 		goto fail2;
193 
194 	amap->am_anon = malloc(totalslots * sizeof(struct vm_anon *),
195 	    M_UVMAMAP, waitf);
196 	if (amap->am_anon == NULL)
197 		goto fail3;
198 
199 	return(amap);
200 
201 fail3:
202 	free(amap->am_bckptr, M_UVMAMAP);
203 fail2:
204 	free(amap->am_slots, M_UVMAMAP);
205 fail1:
206 	pool_put(&uvm_amap_pool, amap);
207 	return (NULL);
208 }
209 
210 /*
211  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
212  *
213  * => caller should ensure sz is a multiple of PAGE_SIZE
214  * => reference count to new amap is set to one
215  * => new amap is returned unlocked
216  */
217 
218 struct vm_amap *
219 amap_alloc(sz, padsz, waitf)
220 	vaddr_t sz, padsz;
221 	int waitf;
222 {
223 	struct vm_amap *amap;
224 	int slots, padslots;
225 	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
226 
227 	AMAP_B2SLOT(slots, sz);
228 	AMAP_B2SLOT(padslots, padsz);
229 
230 	amap = amap_alloc1(slots, padslots, waitf);
231 	if (amap)
232 		memset(amap->am_anon, 0,
233 		    amap->am_maxslot * sizeof(struct vm_anon *));
234 
235 	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
236 	return(amap);
237 }
238 
239 
240 /*
241  * amap_free: free an amap
242  *
243  * => the amap must be unlocked
244  * => the amap should have a zero reference count and be empty
245  */
246 void
247 amap_free(amap)
248 	struct vm_amap *amap;
249 {
250 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
251 
252 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
253 	LOCK_ASSERT(!simple_lock_held(&amap->am_l));
254 	free(amap->am_slots, M_UVMAMAP);
255 	free(amap->am_bckptr, M_UVMAMAP);
256 	free(amap->am_anon, M_UVMAMAP);
257 #ifdef UVM_AMAP_PPREF
258 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
259 		free(amap->am_ppref, M_UVMAMAP);
260 #endif
261 	pool_put(&uvm_amap_pool, amap);
262 	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
263 }
264 
265 /*
266  * amap_extend: extend the size of an amap (if needed)
267  *
268  * => called from uvm_map when we want to extend an amap to cover
269  *    a new mapping (rather than allocate a new one)
270  * => amap should be unlocked (we will lock it)
271  * => to safely extend an amap it should have a reference count of
272  *    one (thus it can't be shared)
273  */
274 int
275 amap_extend(entry, addsize, flags)
276 	struct vm_map_entry *entry;
277 	vsize_t addsize;
278 	int flags;
279 {
280 	struct vm_amap *amap = entry->aref.ar_amap;
281 	int slotoff = entry->aref.ar_pageoff;
282 	int slotmapped, slotadd, slotneed, slotadded, slotalloc;
283 	int slotadj, slotspace;
284 #ifdef UVM_AMAP_PPREF
285 	int *newppref, *oldppref;
286 #endif
287 	int i, *newsl, *newbck, *oldsl, *oldbck;
288 	struct vm_anon **newover, **oldover;
289 	int mflag = (flags & AMAP_EXTEND_NOWAIT) ? M_NOWAIT :
290 		        (M_WAITOK | M_CANFAIL);
291 
292 	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
293 
294 	UVMHIST_LOG(maphist, "  (entry=0x%x, addsize=0x%x, flags=0x%x)",
295 	    entry, addsize, flags, 0);
296 
297 	/*
298 	 * first, determine how many slots we need in the amap.  don't
299 	 * forget that ar_pageoff could be non-zero: this means that
300 	 * there are some unused slots before us in the amap.
301 	 */
302 
303 	amap_lock(amap);
304 	KASSERT(amap_refs(amap) == 1); /* amap can't be shared */
305 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
306 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
307 	if (flags & AMAP_EXTEND_FORWARDS) {
308 		slotneed = slotoff + slotmapped + slotadd;
309 		slotadj = 0;
310 		slotspace = 0;
311 	}
312 	else {
313 		slotneed = slotadd + slotmapped;
314 		slotadj = slotadd - slotoff;
315 		slotspace = amap->am_maxslot - slotmapped;
316 	}
317 
318 	/*
319 	 * case 1: we already have enough slots in the map and thus
320 	 * only need to bump the reference counts on the slots we are
321 	 * adding.
322 	 */
323 
324 	if (flags & AMAP_EXTEND_FORWARDS) {
325 		if (amap->am_nslot >= slotneed) {
326 #ifdef UVM_AMAP_PPREF
327 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
328 				amap_pp_adjref(amap, slotoff + slotmapped,
329 				    slotadd, 1);
330 			}
331 #endif
332 			amap_unlock(amap);
333 			UVMHIST_LOG(maphist,
334 			    "<- done (case 1f), amap = 0x%x, sltneed=%d",
335 			    amap, slotneed, 0, 0);
336 			return 0;
337 		}
338 	} else {
339 		if (slotadj <= 0) {
340 			slotoff -= slotadd;
341 			entry->aref.ar_pageoff = slotoff;
342 #ifdef UVM_AMAP_PPREF
343 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
344 				amap_pp_adjref(amap, slotoff, slotadd, 1);
345 			}
346 #endif
347 			amap_unlock(amap);
348 			UVMHIST_LOG(maphist,
349 			    "<- done (case 1b), amap = 0x%x, sltneed=%d",
350 			    amap, slotneed, 0, 0);
351 			return 0;
352 		}
353 	}
354 
355 	/*
356 	 * case 2: we pre-allocated slots for use and we just need to
357 	 * bump nslot up to take account for these slots.
358 	 */
359 
360 	if (amap->am_maxslot >= slotneed) {
361 		if (flags & AMAP_EXTEND_FORWARDS) {
362 #ifdef UVM_AMAP_PPREF
363 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
364 				if ((slotoff + slotmapped) < amap->am_nslot)
365 					amap_pp_adjref(amap,
366 					    slotoff + slotmapped,
367 					    (amap->am_nslot -
368 					    (slotoff + slotmapped)), 1);
369 				pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
370 				    slotneed - amap->am_nslot);
371 			}
372 #endif
373 			amap->am_nslot = slotneed;
374 			amap_unlock(amap);
375 
376 			/*
377 			 * no need to zero am_anon since that was done at
378 			 * alloc time and we never shrink an allocation.
379 			 */
380 
381 			UVMHIST_LOG(maphist,"<- done (case 2f), amap = 0x%x, "
382 			    "slotneed=%d", amap, slotneed, 0, 0);
383 			return 0;
384 		} else {
385 #ifdef UVM_AMAP_PPREF
386 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
387 				/*
388 				 * Slide up the ref counts on the pages that
389 				 * are actually in use.
390 				 */
391 				memmove(amap->am_ppref + slotspace,
392 				    amap->am_ppref + slotoff,
393 				    slotmapped * sizeof(int));
394 				/*
395 				 * Mark the (adjusted) gap at the front as
396 				 * referenced/not referenced.
397 				 */
398 				pp_setreflen(amap->am_ppref,
399 				    0, 0, slotspace - slotadd);
400 				pp_setreflen(amap->am_ppref,
401 				    slotspace - slotadd, 1, slotadd);
402 			}
403 #endif
404 
405 			/*
406 			 * Slide the anon pointers up and clear out
407 			 * the space we just made.
408 			 */
409 			memmove(amap->am_anon + slotspace,
410 			    amap->am_anon + slotoff,
411 			    slotmapped * sizeof(struct vm_anon*));
412 			memset(amap->am_anon + slotoff, 0,
413 			    (slotspace - slotoff) * sizeof(struct vm_anon *));
414 
415 			/*
416 			 * Slide the backpointers up, but don't bother
417 			 * wiping out the old slots.
418 			 */
419 			memmove(amap->am_bckptr + slotspace,
420 			    amap->am_bckptr + slotoff,
421 			    slotmapped * sizeof(int));
422 
423 			/*
424 			 * Adjust all the useful active slot numbers.
425 			 */
426 			for (i = 0; i < amap->am_nused; i++)
427 				amap->am_slots[i] += (slotspace - slotoff);
428 
429 			/*
430 			 * We just filled all the empty space in the
431 			 * front of the amap by activating a few new
432 			 * slots.
433 			 */
434 			amap->am_nslot = amap->am_maxslot;
435 			entry->aref.ar_pageoff = slotspace - slotadd;
436 			amap_unlock(amap);
437 
438 			UVMHIST_LOG(maphist,"<- done (case 2b), amap = 0x%x, "
439 			    "slotneed=%d", amap, slotneed, 0, 0);
440 			return 0;
441 		}
442 	}
443 
444 	/*
445 	 * case 3: we need to malloc a new amap and copy all the amap
446 	 * data over from old amap to the new one.
447 	 *
448 	 * note that the use of a kernel realloc() probably would not
449 	 * help here, since we wish to abort cleanly if one of the
450 	 * three (or four) mallocs fails.
451 	 */
452 
453 	amap_unlock(amap);	/* unlock in case we sleep in malloc */
454 	slotalloc = malloc_roundup(slotneed * sizeof(int)) / sizeof(int);
455 #ifdef UVM_AMAP_PPREF
456 	newppref = NULL;
457 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
458 		newppref = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag);
459 #endif
460 	newsl = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag);
461 	newbck = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag);
462 	newover = malloc(slotalloc * sizeof(struct vm_anon *), M_UVMAMAP,
463 		    mflag);
464 	if (newsl == NULL || newbck == NULL || newover == NULL) {
465 #ifdef UVM_AMAP_PPREF
466 		if (newppref != NULL) {
467 			free(newppref, M_UVMAMAP);
468 		}
469 #endif
470 		if (newsl != NULL) {
471 			free(newsl, M_UVMAMAP);
472 		}
473 		if (newbck != NULL) {
474 			free(newbck, M_UVMAMAP);
475 		}
476 		if (newover != NULL) {
477 			free(newover, M_UVMAMAP);
478 		}
479 		return ENOMEM;
480 	}
481 	amap_lock(amap);
482 	KASSERT(amap->am_maxslot < slotneed);
483 
484 	/*
485 	 * now copy everything over to new malloc'd areas...
486 	 */
487 
488 	slotadded = slotalloc - amap->am_nslot;
489 	if (!(flags & AMAP_EXTEND_FORWARDS))
490 		slotspace = slotalloc - slotmapped;
491 
492 	/* do am_slots */
493 	oldsl = amap->am_slots;
494 	if (flags & AMAP_EXTEND_FORWARDS)
495 		memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
496 	else
497 		for (i = 0; i < amap->am_nused; i++)
498 			newsl[i] = oldsl[i] + slotspace - slotoff;
499 	amap->am_slots = newsl;
500 
501 	/* do am_anon */
502 	oldover = amap->am_anon;
503 	if (flags & AMAP_EXTEND_FORWARDS) {
504 		memcpy(newover, oldover,
505 		    sizeof(struct vm_anon *) * amap->am_nslot);
506 		memset(newover + amap->am_nslot, 0,
507 		    sizeof(struct vm_anon *) * slotadded);
508 	} else {
509 		memcpy(newover + slotspace, oldover + slotoff,
510 		    sizeof(struct vm_anon *) * slotmapped);
511 		memset(newover, 0,
512 		    sizeof(struct vm_anon *) * slotspace);
513 	}
514 	amap->am_anon = newover;
515 
516 	/* do am_bckptr */
517 	oldbck = amap->am_bckptr;
518 	if (flags & AMAP_EXTEND_FORWARDS)
519 		memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
520 	else
521 		memcpy(newbck + slotspace, oldbck + slotoff,
522 		    sizeof(int) * slotmapped);
523 	amap->am_bckptr = newbck;
524 
525 #ifdef UVM_AMAP_PPREF
526 	/* do ppref */
527 	oldppref = amap->am_ppref;
528 	if (newppref) {
529 		if (flags & AMAP_EXTEND_FORWARDS) {
530 			memcpy(newppref, oldppref,
531 			    sizeof(int) * amap->am_nslot);
532 			memset(newppref + amap->am_nslot, 0,
533 			    sizeof(int) * slotadded);
534 		} else {
535 			memcpy(newppref + slotspace, oldppref + slotoff,
536 			    sizeof(int) * slotmapped);
537 		}
538 		amap->am_ppref = newppref;
539 		if ((flags & AMAP_EXTEND_FORWARDS) &&
540 		    (slotoff + slotmapped) < amap->am_nslot)
541 			amap_pp_adjref(amap, slotoff + slotmapped,
542 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
543 		if (flags & AMAP_EXTEND_FORWARDS)
544 			pp_setreflen(newppref, amap->am_nslot, 1,
545 			    slotneed - amap->am_nslot);
546 		else {
547 			pp_setreflen(newppref, 0, 0,
548 			    slotalloc - slotneed);
549 			pp_setreflen(newppref, slotalloc - slotneed, 1,
550 			    slotneed - slotmapped);
551 		}
552 	} else {
553 		if (amap->am_ppref)
554 			amap->am_ppref = PPREF_NONE;
555 	}
556 #endif
557 
558 	/* update master values */
559 	if (flags & AMAP_EXTEND_FORWARDS)
560 		amap->am_nslot = slotneed;
561 	else {
562 		entry->aref.ar_pageoff = slotspace - slotadd;
563 		amap->am_nslot = slotalloc;
564 	}
565 	amap->am_maxslot = slotalloc;
566 
567 	amap_unlock(amap);
568 	free(oldsl, M_UVMAMAP);
569 	free(oldbck, M_UVMAMAP);
570 	free(oldover, M_UVMAMAP);
571 #ifdef UVM_AMAP_PPREF
572 	if (oldppref && oldppref != PPREF_NONE)
573 		free(oldppref, M_UVMAMAP);
574 #endif
575 	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
576 	    amap, slotneed, 0, 0);
577 	return 0;
578 }
579 
580 /*
581  * amap_share_protect: change protection of anons in a shared amap
582  *
583  * for shared amaps, given the current data structure layout, it is
584  * not possible for us to directly locate all maps referencing the
585  * shared anon (to change the protection).  in order to protect data
586  * in shared maps we use pmap_page_protect().  [this is useful for IPC
587  * mechanisms like map entry passing that may want to write-protect
588  * all mappings of a shared amap.]  we traverse am_anon or am_slots
589  * depending on the current state of the amap.
590  *
591  * => entry's map and amap must be locked by the caller
592  */
593 void
594 amap_share_protect(entry, prot)
595 	struct vm_map_entry *entry;
596 	vm_prot_t prot;
597 {
598 	struct vm_amap *amap = entry->aref.ar_amap;
599 	int slots, lcv, slot, stop;
600 
601 	LOCK_ASSERT(simple_lock_held(&amap->am_l));
602 
603 	AMAP_B2SLOT(slots, (entry->end - entry->start));
604 	stop = entry->aref.ar_pageoff + slots;
605 
606 	if (slots < amap->am_nused) {
607 		/* cheaper to traverse am_anon */
608 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
609 			if (amap->am_anon[lcv] == NULL)
610 				continue;
611 			if (amap->am_anon[lcv]->u.an_page != NULL)
612 				pmap_page_protect(amap->am_anon[lcv]->u.an_page,
613 						  prot);
614 		}
615 		return;
616 	}
617 
618 	/* cheaper to traverse am_slots */
619 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
620 		slot = amap->am_slots[lcv];
621 		if (slot < entry->aref.ar_pageoff || slot >= stop)
622 			continue;
623 		if (amap->am_anon[slot]->u.an_page != NULL)
624 			pmap_page_protect(amap->am_anon[slot]->u.an_page, prot);
625 	}
626 }
627 
628 /*
629  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
630  *
631  * => called from amap_unref when the final reference to an amap is
632  *	discarded (i.e. when reference count == 1)
633  * => the amap should be locked (by the caller)
634  */
635 
636 void
637 amap_wipeout(amap)
638 	struct vm_amap *amap;
639 {
640 	int lcv, slot;
641 	struct vm_anon *anon;
642 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
643 	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
644 
645 	amap_unlock(amap);
646 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
647 		int refs;
648 
649 		slot = amap->am_slots[lcv];
650 		anon = amap->am_anon[slot];
651 
652 		if (anon == NULL || anon->an_ref == 0)
653 			panic("amap_wipeout: corrupt amap");
654 
655 		simple_lock(&anon->an_lock);
656 		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
657 		    anon->an_ref, 0, 0);
658 		refs = --anon->an_ref;
659 		simple_unlock(&anon->an_lock);
660 		if (refs == 0) {
661 
662 			/*
663 			 * we had the last reference to a vm_anon. free it.
664 			 */
665 
666 			uvm_anfree(anon);
667 		}
668 
669 		/*
670 		 * XXX
671 		 * releasing the swap space held by an N anons is an O(N^2)
672 		 * operation because of the implementation of extents.
673 		 * if there are many anons, tearing down an exiting process'
674 		 * address space can take many seconds, which causes very
675 		 * annoying pauses.  we yield here to give other processes
676 		 * a chance to run.  this should be removed once the performance
677 		 * of swap space management is improved.
678 		 */
679 
680 		if (curlwp->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
681 			preempt(1);
682 	}
683 
684 	/*
685 	 * now we free the map
686 	 */
687 
688 	amap->am_ref = 0;	/* ... was one */
689 	amap->am_nused = 0;
690 	amap_free(amap);	/* will unlock and free amap */
691 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
692 }
693 
694 /*
695  * amap_copy: ensure that a map entry's "needs_copy" flag is false
696  *	by copying the amap if necessary.
697  *
698  * => an entry with a null amap pointer will get a new (blank) one.
699  * => the map that the map entry belongs to must be locked by caller.
700  * => the amap currently attached to "entry" (if any) must be unlocked.
701  * => if canchunk is true, then we may clip the entry into a chunk
702  * => "startva" and "endva" are used only if canchunk is true.  they are
703  *     used to limit chunking (e.g. if you have a large space that you
704  *     know you are going to need to allocate amaps for, there is no point
705  *     in allowing that to be chunked)
706  */
707 
708 void
709 amap_copy(map, entry, waitf, canchunk, startva, endva)
710 	struct vm_map *map;
711 	struct vm_map_entry *entry;
712 	int waitf;
713 	boolean_t canchunk;
714 	vaddr_t startva, endva;
715 {
716 	struct vm_amap *amap, *srcamap;
717 	int slots, lcv;
718 	vaddr_t chunksize;
719 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
720 	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, waitf=%d)",
721 		    map, entry, waitf, 0);
722 
723 	/*
724 	 * is there a map to copy?   if not, create one from scratch.
725 	 */
726 
727 	if (entry->aref.ar_amap == NULL) {
728 
729 		/*
730 		 * check to see if we have a large amap that we can
731 		 * chunk.  we align startva/endva to chunk-sized
732 		 * boundaries and then clip to them.
733 		 */
734 
735 		if (canchunk && atop(entry->end - entry->start) >=
736 		    UVM_AMAP_LARGE) {
737 			/* convert slots to bytes */
738 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
739 			startva = (startva / chunksize) * chunksize;
740 			endva = roundup(endva, chunksize);
741 			UVMHIST_LOG(maphist, "  chunk amap ==> clip 0x%x->0x%x"
742 			    "to 0x%x->0x%x", entry->start, entry->end, startva,
743 			    endva);
744 			UVM_MAP_CLIP_START(map, entry, startva);
745 			/* watch out for endva wrap-around! */
746 			if (endva >= startva)
747 				UVM_MAP_CLIP_END(map, entry, endva);
748 		}
749 
750 		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
751 		entry->start, entry->end, 0, 0);
752 		entry->aref.ar_pageoff = 0;
753 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
754 		    waitf);
755 		if (entry->aref.ar_amap != NULL)
756 			entry->etype &= ~UVM_ET_NEEDSCOPY;
757 		return;
758 	}
759 
760 	/*
761 	 * first check and see if we are the only map entry
762 	 * referencing the amap we currently have.  if so, then we can
763 	 * just take it over rather than copying it.  note that we are
764 	 * reading am_ref with the amap unlocked... the value can only
765 	 * be one if we have the only reference to the amap (via our
766 	 * locked map).  if we are greater than one we fall through to
767 	 * the next case (where we double check the value).
768 	 */
769 
770 	if (entry->aref.ar_amap->am_ref == 1) {
771 		entry->etype &= ~UVM_ET_NEEDSCOPY;
772 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
773 		    0, 0, 0, 0);
774 		return;
775 	}
776 
777 	/*
778 	 * looks like we need to copy the map.
779 	 */
780 
781 	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it",
782 	    entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
783 	AMAP_B2SLOT(slots, entry->end - entry->start);
784 	amap = amap_alloc1(slots, 0, waitf);
785 	if (amap == NULL) {
786 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
787 		return;
788 	}
789 	srcamap = entry->aref.ar_amap;
790 	amap_lock(srcamap);
791 
792 	/*
793 	 * need to double check reference count now that we've got the
794 	 * src amap locked down.  the reference count could have
795 	 * changed while we were in malloc.  if the reference count
796 	 * dropped down to one we take over the old map rather than
797 	 * copying the amap.
798 	 */
799 
800 	if (srcamap->am_ref == 1) {		/* take it over? */
801 		entry->etype &= ~UVM_ET_NEEDSCOPY;
802 		amap->am_ref--;		/* drop final reference to map */
803 		amap_free(amap);	/* dispose of new (unused) amap */
804 		amap_unlock(srcamap);
805 		return;
806 	}
807 
808 	/*
809 	 * we must copy it now.
810 	 */
811 
812 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
813 	for (lcv = 0 ; lcv < slots; lcv++) {
814 		amap->am_anon[lcv] =
815 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
816 		if (amap->am_anon[lcv] == NULL)
817 			continue;
818 		simple_lock(&amap->am_anon[lcv]->an_lock);
819 		amap->am_anon[lcv]->an_ref++;
820 		simple_unlock(&amap->am_anon[lcv]->an_lock);
821 		amap->am_bckptr[lcv] = amap->am_nused;
822 		amap->am_slots[amap->am_nused] = lcv;
823 		amap->am_nused++;
824 	}
825 	memset(&amap->am_anon[lcv], 0,
826 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
827 
828 	/*
829 	 * drop our reference to the old amap (srcamap) and unlock.
830 	 * we know that the reference count on srcamap is greater than
831 	 * one (we checked above), so there is no way we could drop
832 	 * the count to zero.  [and no need to worry about freeing it]
833 	 */
834 
835 	srcamap->am_ref--;
836 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
837 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
838 #ifdef UVM_AMAP_PPREF
839 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
840 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
841 		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
842 	}
843 #endif
844 
845 	amap_unlock(srcamap);
846 
847 	/*
848 	 * install new amap.
849 	 */
850 
851 	entry->aref.ar_pageoff = 0;
852 	entry->aref.ar_amap = amap;
853 	entry->etype &= ~UVM_ET_NEEDSCOPY;
854 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
855 }
856 
857 /*
858  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
859  *
860  *	called during fork(2) when the parent process has a wired map
861  *	entry.   in that case we want to avoid write-protecting pages
862  *	in the parent's map (e.g. like what you'd do for a COW page)
863  *	so we resolve the COW here.
864  *
865  * => assume parent's entry was wired, thus all pages are resident.
866  * => assume pages that are loaned out (loan_count) are already mapped
867  *	read-only in all maps, and thus no need for us to worry about them
868  * => assume both parent and child vm_map's are locked
869  * => caller passes child's map/entry in to us
870  * => if we run out of memory we will unlock the amap and sleep _with_ the
871  *	parent and child vm_map's locked(!).    we have to do this since
872  *	we are in the middle of a fork(2) and we can't let the parent
873  *	map change until we are done copying all the map entrys.
874  * => XXXCDC: out of memory should cause fork to fail, but there is
875  *	currently no easy way to do this (needs fix)
876  * => page queues must be unlocked (we may lock them)
877  */
878 
879 void
880 amap_cow_now(map, entry)
881 	struct vm_map *map;
882 	struct vm_map_entry *entry;
883 {
884 	struct vm_amap *amap = entry->aref.ar_amap;
885 	int lcv, slot;
886 	struct vm_anon *anon, *nanon;
887 	struct vm_page *pg, *npg;
888 
889 	/*
890 	 * note that if we unlock the amap then we must ReStart the "lcv" for
891 	 * loop because some other process could reorder the anon's in the
892 	 * am_anon[] array on us while the lock is dropped.
893 	 */
894 
895 ReStart:
896 	amap_lock(amap);
897 
898 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
899 
900 		/*
901 		 * get the page
902 		 */
903 
904 		slot = amap->am_slots[lcv];
905 		anon = amap->am_anon[slot];
906 		simple_lock(&anon->an_lock);
907 		pg = anon->u.an_page;
908 
909 		/*
910 		 * page must be resident since parent is wired
911 		 */
912 
913 		if (pg == NULL)
914 		    panic("amap_cow_now: non-resident wired page in anon %p",
915 			anon);
916 
917 		/*
918 		 * if the anon ref count is one and the page is not loaned,
919 		 * then we are safe (the child has exclusive access to the
920 		 * page).  if the page is loaned, then it must already be
921 		 * mapped read-only.
922 		 *
923 		 * we only need to get involved when these are not true.
924 		 * [note: if loan_count == 0, then the anon must own the page]
925 		 */
926 
927 		if (anon->an_ref > 1 && pg->loan_count == 0) {
928 
929 			/*
930 			 * if the page is busy then we have to unlock, wait for
931 			 * it and then restart.
932 			 */
933 			if (pg->flags & PG_BUSY) {
934 				pg->flags |= PG_WANTED;
935 				amap_unlock(amap);
936 				UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE,
937 				    "cownow", 0);
938 				goto ReStart;
939 			}
940 
941 			/*
942 			 * ok, time to do a copy-on-write to a new anon
943 			 */
944 			nanon = uvm_analloc();
945 			if (nanon) {
946 				/* nanon is locked! */
947 				npg = uvm_pagealloc(NULL, 0, nanon, 0);
948 			} else
949 				npg = NULL;	/* XXX: quiet gcc warning */
950 
951 			if (nanon == NULL || npg == NULL) {
952 				/* out of memory */
953 				/*
954 				 * XXXCDC: we should cause fork to fail, but
955 				 * we can't ...
956 				 */
957 				if (nanon) {
958 					nanon->an_ref--;
959 					simple_unlock(&nanon->an_lock);
960 					uvm_anfree(nanon);
961 				}
962 				simple_unlock(&anon->an_lock);
963 				amap_unlock(amap);
964 				uvm_wait("cownowpage");
965 				goto ReStart;
966 			}
967 
968 			/*
969 			 * got it... now we can copy the data and replace anon
970 			 * with our new one...
971 			 */
972 
973 			uvm_pagecopy(pg, npg);		/* old -> new */
974 			anon->an_ref--;			/* can't drop to zero */
975 			amap->am_anon[slot] = nanon;	/* replace */
976 
977 			/*
978 			 * drop PG_BUSY on new page ... since we have had it's
979 			 * owner locked the whole time it can't be
980 			 * PG_RELEASED | PG_WANTED.
981 			 */
982 
983 			uvm_lock_pageq();
984 			uvm_pageactivate(npg);
985 			uvm_unlock_pageq();
986 			npg->flags &= ~(PG_BUSY|PG_FAKE);
987 			UVM_PAGE_OWN(npg, NULL);
988 			simple_unlock(&nanon->an_lock);
989 		}
990 		simple_unlock(&anon->an_lock);
991 	}
992 	amap_unlock(amap);
993 }
994 
995 /*
996  * amap_splitref: split a single reference into two separate references
997  *
998  * => called from uvm_map's clip routines
999  * => origref's map should be locked
1000  * => origref->ar_amap should be unlocked (we will lock)
1001  */
1002 void
1003 amap_splitref(origref, splitref, offset)
1004 	struct vm_aref *origref, *splitref;
1005 	vaddr_t offset;
1006 {
1007 	int leftslots;
1008 
1009 	AMAP_B2SLOT(leftslots, offset);
1010 	if (leftslots == 0)
1011 		panic("amap_splitref: split at zero offset");
1012 
1013 	amap_lock(origref->ar_amap);
1014 
1015 	/*
1016 	 * now: amap is locked and we have a valid am_mapped array.
1017 	 */
1018 
1019 	if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
1020 		panic("amap_splitref: map size check failed");
1021 
1022 #ifdef UVM_AMAP_PPREF
1023         /*
1024 	 * establish ppref before we add a duplicate reference to the amap
1025 	 */
1026 	if (origref->ar_amap->am_ppref == NULL)
1027 		amap_pp_establish(origref->ar_amap, origref->ar_pageoff);
1028 #endif
1029 
1030 	splitref->ar_amap = origref->ar_amap;
1031 	splitref->ar_amap->am_ref++;		/* not a share reference */
1032 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
1033 
1034 	amap_unlock(origref->ar_amap);
1035 }
1036 
1037 #ifdef UVM_AMAP_PPREF
1038 
1039 /*
1040  * amap_pp_establish: add a ppref array to an amap, if possible
1041  *
1042  * => amap locked by caller
1043  */
1044 void
1045 amap_pp_establish(amap, offset)
1046 	struct vm_amap *amap;
1047 	vaddr_t offset;
1048 {
1049 	amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot,
1050 	    M_UVMAMAP, M_NOWAIT);
1051 
1052 	/*
1053 	 * if we fail then we just won't use ppref for this amap
1054 	 */
1055 
1056 	if (amap->am_ppref == NULL) {
1057 		amap->am_ppref = PPREF_NONE;	/* not using it */
1058 		return;
1059 	}
1060 	memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot);
1061 	pp_setreflen(amap->am_ppref, 0, 0, offset);
1062 	pp_setreflen(amap->am_ppref, offset, amap->am_ref,
1063 	    amap->am_nslot - offset);
1064 	return;
1065 }
1066 
1067 /*
1068  * amap_pp_adjref: adjust reference count to a part of an amap using the
1069  * per-page reference count array.
1070  *
1071  * => map and amap locked by caller
1072  * => caller must check that ppref != PPREF_NONE before calling
1073  */
1074 void
1075 amap_pp_adjref(amap, curslot, slotlen, adjval)
1076 	struct vm_amap *amap;
1077 	int curslot;
1078 	vsize_t slotlen;
1079 	int adjval;
1080 {
1081 	int stopslot, *ppref, lcv, prevlcv;
1082 	int ref, len, prevref, prevlen;
1083 
1084 	stopslot = curslot + slotlen;
1085 	ppref = amap->am_ppref;
1086 	prevlcv = 0;
1087 
1088 	/*
1089 	 * first advance to the correct place in the ppref array,
1090 	 * fragment if needed.
1091 	 */
1092 
1093 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
1094 		pp_getreflen(ppref, lcv, &ref, &len);
1095 		if (lcv + len > curslot) {     /* goes past start? */
1096 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
1097 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
1098 			len = curslot - lcv;   /* new length of entry @ lcv */
1099 		}
1100 		prevlcv = lcv;
1101 	}
1102 	if (lcv != 0)
1103 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
1104 	else {
1105 		/* Ensure that the "prevref == ref" test below always
1106 		 * fails, since we're starting from the beginning of
1107 		 * the ppref array; that is, there is no previous
1108 		 * chunk.
1109 		 */
1110 		prevref = -1;
1111 		prevlen = 0;
1112 	}
1113 
1114 	/*
1115 	 * now adjust reference counts in range.  merge the first
1116 	 * changed entry with the last unchanged entry if possible.
1117 	 */
1118 
1119 	if (lcv != curslot)
1120 		panic("amap_pp_adjref: overshot target");
1121 
1122 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
1123 		pp_getreflen(ppref, lcv, &ref, &len);
1124 		if (lcv + len > stopslot) {     /* goes past end? */
1125 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
1126 			pp_setreflen(ppref, stopslot, ref,
1127 			    len - (stopslot - lcv));
1128 			len = stopslot - lcv;
1129 		}
1130 		ref += adjval;
1131 		if (ref < 0)
1132 			panic("amap_pp_adjref: negative reference count");
1133 		if (lcv == prevlcv + prevlen && ref == prevref) {
1134 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
1135 		} else {
1136 			pp_setreflen(ppref, lcv, ref, len);
1137 		}
1138 		if (ref == 0)
1139 			amap_wiperange(amap, lcv, len);
1140 	}
1141 
1142 }
1143 
1144 /*
1145  * amap_wiperange: wipe out a range of an amap
1146  * [different from amap_wipeout because the amap is kept intact]
1147  *
1148  * => both map and amap must be locked by caller.
1149  */
1150 void
1151 amap_wiperange(amap, slotoff, slots)
1152 	struct vm_amap *amap;
1153 	int slotoff, slots;
1154 {
1155 	int byanon, lcv, stop, curslot, ptr, slotend;
1156 	struct vm_anon *anon;
1157 
1158 	/*
1159 	 * we can either traverse the amap by am_anon or by am_slots depending
1160 	 * on which is cheaper.    decide now.
1161 	 */
1162 
1163 	if (slots < amap->am_nused) {
1164 		byanon = TRUE;
1165 		lcv = slotoff;
1166 		stop = slotoff + slots;
1167 		slotend = 0;
1168 	} else {
1169 		byanon = FALSE;
1170 		lcv = 0;
1171 		stop = amap->am_nused;
1172 		slotend = slotoff + slots;
1173 	}
1174 
1175 	while (lcv < stop) {
1176 		int refs;
1177 
1178 		if (byanon) {
1179 			curslot = lcv++;	/* lcv advances here */
1180 			if (amap->am_anon[curslot] == NULL)
1181 				continue;
1182 		} else {
1183 			curslot = amap->am_slots[lcv];
1184 			if (curslot < slotoff || curslot >= slotend) {
1185 				lcv++;		/* lcv advances here */
1186 				continue;
1187 			}
1188 			stop--;	/* drop stop, since anon will be removed */
1189 		}
1190 		anon = amap->am_anon[curslot];
1191 
1192 		/*
1193 		 * remove it from the amap
1194 		 */
1195 
1196 		amap->am_anon[curslot] = NULL;
1197 		ptr = amap->am_bckptr[curslot];
1198 		if (ptr != (amap->am_nused - 1)) {
1199 			amap->am_slots[ptr] =
1200 			    amap->am_slots[amap->am_nused - 1];
1201 			amap->am_bckptr[amap->am_slots[ptr]] =
1202 			    ptr;    /* back ptr. */
1203 		}
1204 		amap->am_nused--;
1205 
1206 		/*
1207 		 * drop anon reference count
1208 		 */
1209 
1210 		simple_lock(&anon->an_lock);
1211 		refs = --anon->an_ref;
1212 		simple_unlock(&anon->an_lock);
1213 		if (refs == 0) {
1214 
1215 			/*
1216 			 * we just eliminated the last reference to an anon.
1217 			 * free it.
1218 			 */
1219 
1220 			uvm_anfree(anon);
1221 		}
1222 	}
1223 }
1224 
1225 #endif
1226