xref: /netbsd-src/sys/uvm/uvm_amap.c (revision d710132b4b8ce7f7cccaaf660cb16aa16b4077a0)
1 /*	$NetBSD: uvm_amap.c,v 1.52 2003/02/01 06:23:54 thorpej Exp $	*/
2 
3 /*
4  *
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by Charles D. Cranor and
19  *      Washington University.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * uvm_amap.c: amap operations
37  */
38 
39 /*
40  * this file contains functions that perform operations on amaps.  see
41  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.52 2003/02/01 06:23:54 thorpej Exp $");
46 
47 #undef UVM_AMAP_INLINE		/* enable/disable amap inlines */
48 
49 #include "opt_uvmhist.h"
50 
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/proc.h>
54 #include <sys/malloc.h>
55 #include <sys/kernel.h>
56 #include <sys/pool.h>
57 
58 #define UVM_AMAP_C		/* ensure disabled inlines are in */
59 #include <uvm/uvm.h>
60 #include <uvm/uvm_swap.h>
61 
62 /*
63  * pool for allocation of vm_map structures.  note that the pool has
64  * its own simplelock for its protection.  also note that in order to
65  * avoid an endless loop, the amap pool's allocator cannot allocate
66  * memory from an amap (it currently goes through the kernel uobj, so
67  * we are ok).
68  */
69 
70 struct pool uvm_amap_pool;
71 
72 MALLOC_DEFINE(M_UVMAMAP, "UVM amap", "UVM amap and related structures");
73 
74 /*
75  * local functions
76  */
77 
78 static struct vm_amap *amap_alloc1 __P((int, int, int));
79 
80 #ifdef UVM_AMAP_PPREF
81 /*
82  * what is ppref?   ppref is an _optional_ amap feature which is used
83  * to keep track of reference counts on a per-page basis.  it is enabled
84  * when UVM_AMAP_PPREF is defined.
85  *
86  * when enabled, an array of ints is allocated for the pprefs.  this
87  * array is allocated only when a partial reference is added to the
88  * map (either by unmapping part of the amap, or gaining a reference
89  * to only a part of an amap).  if the malloc of the array fails
90  * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
91  * that we tried to do ppref's but couldn't alloc the array so just
92  * give up (after all, this is an optional feature!).
93  *
94  * the array is divided into page sized "chunks."   for chunks of length 1,
95  * the chunk reference count plus one is stored in that chunk's slot.
96  * for chunks of length > 1 the first slot contains (the reference count
97  * plus one) * -1.    [the negative value indicates that the length is
98  * greater than one.]   the second slot of the chunk contains the length
99  * of the chunk.   here is an example:
100  *
101  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
102  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
103  *              <----------><-><----><-------><----><-><------->
104  * (x = don't care)
105  *
106  * this allows us to allow one int to contain the ref count for the whole
107  * chunk.    note that the "plus one" part is needed because a reference
108  * count of zero is neither positive or negative (need a way to tell
109  * if we've got one zero or a bunch of them).
110  *
111  * here are some in-line functions to help us.
112  */
113 
114 static __inline void pp_getreflen __P((int *, int, int *, int *));
115 static __inline void pp_setreflen __P((int *, int, int, int));
116 
117 /*
118  * pp_getreflen: get the reference and length for a specific offset
119  *
120  * => ppref's amap must be locked
121  */
122 static __inline void
123 pp_getreflen(ppref, offset, refp, lenp)
124 	int *ppref, offset, *refp, *lenp;
125 {
126 
127 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
128 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
129 		*lenp = 1;
130 	} else {
131 		*refp = (ppref[offset] * -1) - 1;
132 		*lenp = ppref[offset+1];
133 	}
134 }
135 
136 /*
137  * pp_setreflen: set the reference and length for a specific offset
138  *
139  * => ppref's amap must be locked
140  */
141 static __inline void
142 pp_setreflen(ppref, offset, ref, len)
143 	int *ppref, offset, ref, len;
144 {
145 	if (len == 0)
146 		return;
147 	if (len == 1) {
148 		ppref[offset] = ref + 1;
149 	} else {
150 		ppref[offset] = (ref + 1) * -1;
151 		ppref[offset+1] = len;
152 	}
153 }
154 #endif
155 
156 /*
157  * amap_init: called at boot time to init global amap data structures
158  */
159 
160 void
161 amap_init(void)
162 {
163 
164 	/*
165 	 * Initialize the vm_amap pool.
166 	 */
167 
168 	pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0,
169 	    "amappl", &pool_allocator_nointr);
170 }
171 
172 /*
173  * amap_alloc1: internal function that allocates an amap, but does not
174  *	init the overlay.
175  *
176  * => lock on returned amap is init'd
177  */
178 static inline struct vm_amap *
179 amap_alloc1(slots, padslots, waitf)
180 	int slots, padslots, waitf;
181 {
182 	struct vm_amap *amap;
183 	int totalslots;
184 
185 	amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK : 0);
186 	if (amap == NULL)
187 		return(NULL);
188 
189 	totalslots = malloc_roundup((slots + padslots) * sizeof(int)) /
190 	    sizeof(int);
191 	simple_lock_init(&amap->am_l);
192 	amap->am_ref = 1;
193 	amap->am_flags = 0;
194 #ifdef UVM_AMAP_PPREF
195 	amap->am_ppref = NULL;
196 #endif
197 	amap->am_maxslot = totalslots;
198 	amap->am_nslot = slots;
199 	amap->am_nused = 0;
200 
201 	amap->am_slots = malloc(totalslots * sizeof(int), M_UVMAMAP,
202 	    waitf);
203 	if (amap->am_slots == NULL)
204 		goto fail1;
205 
206 	amap->am_bckptr = malloc(totalslots * sizeof(int), M_UVMAMAP, waitf);
207 	if (amap->am_bckptr == NULL)
208 		goto fail2;
209 
210 	amap->am_anon = malloc(totalslots * sizeof(struct vm_anon *),
211 	    M_UVMAMAP, waitf);
212 	if (amap->am_anon == NULL)
213 		goto fail3;
214 
215 	return(amap);
216 
217 fail3:
218 	free(amap->am_bckptr, M_UVMAMAP);
219 fail2:
220 	free(amap->am_slots, M_UVMAMAP);
221 fail1:
222 	pool_put(&uvm_amap_pool, amap);
223 	return (NULL);
224 }
225 
226 /*
227  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
228  *
229  * => caller should ensure sz is a multiple of PAGE_SIZE
230  * => reference count to new amap is set to one
231  * => new amap is returned unlocked
232  */
233 
234 struct vm_amap *
235 amap_alloc(sz, padsz, waitf)
236 	vaddr_t sz, padsz;
237 	int waitf;
238 {
239 	struct vm_amap *amap;
240 	int slots, padslots;
241 	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
242 
243 	AMAP_B2SLOT(slots, sz);
244 	AMAP_B2SLOT(padslots, padsz);
245 
246 	amap = amap_alloc1(slots, padslots, waitf);
247 	if (amap)
248 		memset(amap->am_anon, 0,
249 		    amap->am_maxslot * sizeof(struct vm_anon *));
250 
251 	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
252 	return(amap);
253 }
254 
255 
256 /*
257  * amap_free: free an amap
258  *
259  * => the amap must be unlocked
260  * => the amap should have a zero reference count and be empty
261  */
262 void
263 amap_free(amap)
264 	struct vm_amap *amap;
265 {
266 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
267 
268 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
269 	LOCK_ASSERT(!simple_lock_held(&amap->am_l));
270 	free(amap->am_slots, M_UVMAMAP);
271 	free(amap->am_bckptr, M_UVMAMAP);
272 	free(amap->am_anon, M_UVMAMAP);
273 #ifdef UVM_AMAP_PPREF
274 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
275 		free(amap->am_ppref, M_UVMAMAP);
276 #endif
277 	pool_put(&uvm_amap_pool, amap);
278 	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
279 }
280 
281 /*
282  * amap_extend: extend the size of an amap (if needed)
283  *
284  * => called from uvm_map when we want to extend an amap to cover
285  *    a new mapping (rather than allocate a new one)
286  * => amap should be unlocked (we will lock it)
287  * => to safely extend an amap it should have a reference count of
288  *    one (thus it can't be shared)
289  */
290 int
291 amap_extend(entry, addsize, flags)
292 	struct vm_map_entry *entry;
293 	vsize_t addsize;
294 	int flags;
295 {
296 	struct vm_amap *amap = entry->aref.ar_amap;
297 	int slotoff = entry->aref.ar_pageoff;
298 	int slotmapped, slotadd, slotneed, slotadded, slotalloc;
299 	int slotadj, slotspace;
300 #ifdef UVM_AMAP_PPREF
301 	int *newppref, *oldppref;
302 #endif
303 	int i, *newsl, *newbck, *oldsl, *oldbck;
304 	struct vm_anon **newover, **oldover;
305 	int mflag = (flags & AMAP_EXTEND_NOWAIT) ? M_NOWAIT :
306 		        (M_WAITOK | M_CANFAIL);
307 
308 	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
309 
310 	UVMHIST_LOG(maphist, "  (entry=0x%x, addsize=0x%x, flags=0x%x)",
311 	    entry, addsize, flags, 0);
312 
313 	/*
314 	 * first, determine how many slots we need in the amap.  don't
315 	 * forget that ar_pageoff could be non-zero: this means that
316 	 * there are some unused slots before us in the amap.
317 	 */
318 
319 	amap_lock(amap);
320 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
321 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
322 	if (flags & AMAP_EXTEND_FORWARDS) {
323 		slotneed = slotoff + slotmapped + slotadd;
324 		slotadj = 0;
325 		slotspace = 0;
326 	}
327 	else {
328 		slotneed = slotadd + slotmapped;
329 		slotadj = slotadd - slotoff;
330 		slotspace = amap->am_maxslot - slotmapped;
331 	}
332 
333 	/*
334 	 * case 1: we already have enough slots in the map and thus
335 	 * only need to bump the reference counts on the slots we are
336 	 * adding.
337 	 */
338 
339 	if (flags & AMAP_EXTEND_FORWARDS) {
340 		if (amap->am_nslot >= slotneed) {
341 #ifdef UVM_AMAP_PPREF
342 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
343 				amap_pp_adjref(amap, slotoff + slotmapped,
344 				    slotadd, 1);
345 			}
346 #endif
347 			amap_unlock(amap);
348 			UVMHIST_LOG(maphist,
349 			    "<- done (case 1f), amap = 0x%x, sltneed=%d",
350 			    amap, slotneed, 0, 0);
351 			return 0;
352 		}
353 	} else {
354 		if (slotadj <= 0) {
355 			slotoff -= slotadd;
356 			entry->aref.ar_pageoff = slotoff;
357 #ifdef UVM_AMAP_PPREF
358 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
359 				amap_pp_adjref(amap, slotoff, slotadd, 1);
360 			}
361 #endif
362 			amap_unlock(amap);
363 			UVMHIST_LOG(maphist,
364 			    "<- done (case 1b), amap = 0x%x, sltneed=%d",
365 			    amap, slotneed, 0, 0);
366 			return 0;
367 		}
368 	}
369 
370 	/*
371 	 * case 2: we pre-allocated slots for use and we just need to
372 	 * bump nslot up to take account for these slots.
373 	 */
374 
375 	if (amap->am_maxslot >= slotneed) {
376 		if (flags & AMAP_EXTEND_FORWARDS) {
377 #ifdef UVM_AMAP_PPREF
378 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
379 				if ((slotoff + slotmapped) < amap->am_nslot)
380 					amap_pp_adjref(amap,
381 					    slotoff + slotmapped,
382 					    (amap->am_nslot -
383 					    (slotoff + slotmapped)), 1);
384 				pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
385 				    slotneed - amap->am_nslot);
386 			}
387 #endif
388 			amap->am_nslot = slotneed;
389 			amap_unlock(amap);
390 
391 			/*
392 			 * no need to zero am_anon since that was done at
393 			 * alloc time and we never shrink an allocation.
394 			 */
395 
396 			UVMHIST_LOG(maphist,"<- done (case 2f), amap = 0x%x, "
397 			    "slotneed=%d", amap, slotneed, 0, 0);
398 			return 0;
399 		} else {
400 #ifdef UVM_AMAP_PPREF
401 			if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
402 				/*
403 				 * Slide up the ref counts on the pages that
404 				 * are actually in use.
405 				 */
406 				memmove(amap->am_ppref + slotspace,
407 				    amap->am_ppref + slotoff,
408 				    slotmapped * sizeof(int));
409 				/*
410 				 * Mark the (adjusted) gap at the front as
411 				 * referenced/not referenced.
412 				 */
413 				pp_setreflen(amap->am_ppref,
414 				    0, 0, slotspace - slotadd);
415 				pp_setreflen(amap->am_ppref,
416 				    slotspace - slotadd, 1, slotadd);
417 			}
418 #endif
419 
420 			/*
421 			 * Slide the anon pointers up and clear out
422 			 * the space we just made.
423 			 */
424 			memmove(amap->am_anon + slotspace,
425 			    amap->am_anon + slotoff,
426 			    slotmapped * sizeof(struct vm_anon*));
427 			memset(amap->am_anon + slotoff, 0,
428 			    (slotspace - slotoff) * sizeof(struct vm_anon *));
429 
430 			/*
431 			 * Slide the backpointers up, but don't bother
432 			 * wiping out the old slots.
433 			 */
434 			memmove(amap->am_bckptr + slotspace,
435 			    amap->am_bckptr + slotoff,
436 			    slotmapped * sizeof(int));
437 
438 			/*
439 			 * Adjust all the useful active slot numbers.
440 			 */
441 			for (i = 0; i < amap->am_nused; i++)
442 				amap->am_slots[i] += (slotspace - slotoff);
443 
444 			/*
445 			 * We just filled all the empty space in the
446 			 * front of the amap by activating a few new
447 			 * slots.
448 			 */
449 			amap->am_nslot = amap->am_maxslot;
450 			entry->aref.ar_pageoff = slotspace - slotadd;
451 			amap_unlock(amap);
452 
453 			UVMHIST_LOG(maphist,"<- done (case 2b), amap = 0x%x, "
454 			    "slotneed=%d", amap, slotneed, 0, 0);
455 			return 0;
456 		}
457 	}
458 
459 	/*
460 	 * case 3: we need to malloc a new amap and copy all the amap
461 	 * data over from old amap to the new one.
462 	 *
463 	 * note that the use of a kernel realloc() probably would not
464 	 * help here, since we wish to abort cleanly if one of the
465 	 * three (or four) mallocs fails.
466 	 */
467 
468 	amap_unlock(amap);	/* unlock in case we sleep in malloc */
469 	slotalloc = malloc_roundup(slotneed * sizeof(int)) / sizeof(int);
470 #ifdef UVM_AMAP_PPREF
471 	newppref = NULL;
472 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
473 		newppref = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag);
474 #endif
475 	newsl = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag);
476 	newbck = malloc(slotalloc * sizeof(int), M_UVMAMAP, mflag);
477 	newover = malloc(slotalloc * sizeof(struct vm_anon *), M_UVMAMAP,
478 		    mflag);
479 	if (newsl == NULL || newbck == NULL || newover == NULL) {
480 #ifdef UVM_AMAP_PPREF
481 		if (newppref != NULL) {
482 			free(newppref, M_UVMAMAP);
483 		}
484 #endif
485 		if (newsl != NULL) {
486 			free(newsl, M_UVMAMAP);
487 		}
488 		if (newbck != NULL) {
489 			free(newbck, M_UVMAMAP);
490 		}
491 		if (newover != NULL) {
492 			free(newover, M_UVMAMAP);
493 		}
494 		return ENOMEM;
495 	}
496 	amap_lock(amap);
497 	KASSERT(amap->am_maxslot < slotneed);
498 
499 	/*
500 	 * now copy everything over to new malloc'd areas...
501 	 */
502 
503 	slotadded = slotalloc - amap->am_nslot;
504 	if (!(flags & AMAP_EXTEND_FORWARDS))
505 		slotspace = slotalloc - slotmapped;
506 
507 	/* do am_slots */
508 	oldsl = amap->am_slots;
509 	if (flags & AMAP_EXTEND_FORWARDS)
510 		memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
511 	else
512 		for (i = 0; i < amap->am_nused; i++)
513 			newsl[i] = oldsl[i] + slotspace - slotoff;
514 	amap->am_slots = newsl;
515 
516 	/* do am_anon */
517 	oldover = amap->am_anon;
518 	if (flags & AMAP_EXTEND_FORWARDS) {
519 		memcpy(newover, oldover,
520 		    sizeof(struct vm_anon *) * amap->am_nslot);
521 		memset(newover + amap->am_nslot, 0,
522 		    sizeof(struct vm_anon *) * slotadded);
523 	} else {
524 		memcpy(newover + slotspace, oldover + slotoff,
525 		    sizeof(struct vm_anon *) * slotmapped);
526 		memset(newover, 0,
527 		    sizeof(struct vm_anon *) * slotspace);
528 	}
529 	amap->am_anon = newover;
530 
531 	/* do am_bckptr */
532 	oldbck = amap->am_bckptr;
533 	if (flags & AMAP_EXTEND_FORWARDS)
534 		memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
535 	else
536 		memcpy(newbck + slotspace, oldbck + slotoff,
537 		    sizeof(int) * slotmapped);
538 	amap->am_bckptr = newbck;
539 
540 #ifdef UVM_AMAP_PPREF
541 	/* do ppref */
542 	oldppref = amap->am_ppref;
543 	if (newppref) {
544 		if (flags & AMAP_EXTEND_FORWARDS) {
545 			memcpy(newppref, oldppref,
546 			    sizeof(int) * amap->am_nslot);
547 			memset(newppref + amap->am_nslot, 0,
548 			    sizeof(int) * slotadded);
549 		} else {
550 			memcpy(newppref + slotspace, oldppref + slotoff,
551 			    sizeof(int) * slotmapped);
552 		}
553 		amap->am_ppref = newppref;
554 		if ((flags & AMAP_EXTEND_FORWARDS) &&
555 		    (slotoff + slotmapped) < amap->am_nslot)
556 			amap_pp_adjref(amap, slotoff + slotmapped,
557 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
558 		if (flags & AMAP_EXTEND_FORWARDS)
559 			pp_setreflen(newppref, amap->am_nslot, 1,
560 			    slotneed - amap->am_nslot);
561 		else {
562 			pp_setreflen(newppref, 0, 0,
563 			    slotalloc - slotneed);
564 			pp_setreflen(newppref, slotalloc - slotneed, 1,
565 			    slotneed - slotmapped);
566 		}
567 	} else {
568 		if (amap->am_ppref)
569 			amap->am_ppref = PPREF_NONE;
570 	}
571 #endif
572 
573 	/* update master values */
574 	if (flags & AMAP_EXTEND_FORWARDS)
575 		amap->am_nslot = slotneed;
576 	else {
577 		entry->aref.ar_pageoff = slotspace - slotadd;
578 		amap->am_nslot = slotalloc;
579 	}
580 	amap->am_maxslot = slotalloc;
581 
582 	amap_unlock(amap);
583 	free(oldsl, M_UVMAMAP);
584 	free(oldbck, M_UVMAMAP);
585 	free(oldover, M_UVMAMAP);
586 #ifdef UVM_AMAP_PPREF
587 	if (oldppref && oldppref != PPREF_NONE)
588 		free(oldppref, M_UVMAMAP);
589 #endif
590 	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
591 	    amap, slotneed, 0, 0);
592 	return 0;
593 }
594 
595 /*
596  * amap_share_protect: change protection of anons in a shared amap
597  *
598  * for shared amaps, given the current data structure layout, it is
599  * not possible for us to directly locate all maps referencing the
600  * shared anon (to change the protection).  in order to protect data
601  * in shared maps we use pmap_page_protect().  [this is useful for IPC
602  * mechanisms like map entry passing that may want to write-protect
603  * all mappings of a shared amap.]  we traverse am_anon or am_slots
604  * depending on the current state of the amap.
605  *
606  * => entry's map and amap must be locked by the caller
607  */
608 void
609 amap_share_protect(entry, prot)
610 	struct vm_map_entry *entry;
611 	vm_prot_t prot;
612 {
613 	struct vm_amap *amap = entry->aref.ar_amap;
614 	int slots, lcv, slot, stop;
615 
616 	LOCK_ASSERT(simple_lock_held(&amap->am_l));
617 
618 	AMAP_B2SLOT(slots, (entry->end - entry->start));
619 	stop = entry->aref.ar_pageoff + slots;
620 
621 	if (slots < amap->am_nused) {
622 		/* cheaper to traverse am_anon */
623 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
624 			if (amap->am_anon[lcv] == NULL)
625 				continue;
626 			if (amap->am_anon[lcv]->u.an_page != NULL)
627 				pmap_page_protect(amap->am_anon[lcv]->u.an_page,
628 						  prot);
629 		}
630 		return;
631 	}
632 
633 	/* cheaper to traverse am_slots */
634 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
635 		slot = amap->am_slots[lcv];
636 		if (slot < entry->aref.ar_pageoff || slot >= stop)
637 			continue;
638 		if (amap->am_anon[slot]->u.an_page != NULL)
639 			pmap_page_protect(amap->am_anon[slot]->u.an_page, prot);
640 	}
641 }
642 
643 /*
644  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
645  *
646  * => called from amap_unref when the final reference to an amap is
647  *	discarded (i.e. when reference count == 1)
648  * => the amap should be locked (by the caller)
649  */
650 
651 void
652 amap_wipeout(amap)
653 	struct vm_amap *amap;
654 {
655 	int lcv, slot;
656 	struct vm_anon *anon;
657 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
658 	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
659 
660 	amap_unlock(amap);
661 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
662 		int refs;
663 
664 		slot = amap->am_slots[lcv];
665 		anon = amap->am_anon[slot];
666 
667 		if (anon == NULL || anon->an_ref == 0)
668 			panic("amap_wipeout: corrupt amap");
669 
670 		simple_lock(&anon->an_lock);
671 		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
672 		    anon->an_ref, 0, 0);
673 		refs = --anon->an_ref;
674 		simple_unlock(&anon->an_lock);
675 		if (refs == 0) {
676 
677 			/*
678 			 * we had the last reference to a vm_anon. free it.
679 			 */
680 
681 			uvm_anfree(anon);
682 		}
683 
684 		/*
685 		 * XXX
686 		 * releasing the swap space held by an N anons is an O(N^2)
687 		 * operation because of the implementation of extents.
688 		 * if there are many anons, tearing down an exiting process'
689 		 * address space can take many seconds, which causes very
690 		 * annoying pauses.  we yield here to give other processes
691 		 * a chance to run.  this should be removed once the performance
692 		 * of swap space management is improved.
693 		 */
694 
695 		if (curlwp->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
696 			preempt(1);
697 	}
698 
699 	/*
700 	 * now we free the map
701 	 */
702 
703 	amap->am_ref = 0;	/* ... was one */
704 	amap->am_nused = 0;
705 	amap_free(amap);	/* will unlock and free amap */
706 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
707 }
708 
709 /*
710  * amap_copy: ensure that a map entry's "needs_copy" flag is false
711  *	by copying the amap if necessary.
712  *
713  * => an entry with a null amap pointer will get a new (blank) one.
714  * => the map that the map entry belongs to must be locked by caller.
715  * => the amap currently attached to "entry" (if any) must be unlocked.
716  * => if canchunk is true, then we may clip the entry into a chunk
717  * => "startva" and "endva" are used only if canchunk is true.  they are
718  *     used to limit chunking (e.g. if you have a large space that you
719  *     know you are going to need to allocate amaps for, there is no point
720  *     in allowing that to be chunked)
721  */
722 
723 void
724 amap_copy(map, entry, waitf, canchunk, startva, endva)
725 	struct vm_map *map;
726 	struct vm_map_entry *entry;
727 	int waitf;
728 	boolean_t canchunk;
729 	vaddr_t startva, endva;
730 {
731 	struct vm_amap *amap, *srcamap;
732 	int slots, lcv;
733 	vaddr_t chunksize;
734 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
735 	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, waitf=%d)",
736 		    map, entry, waitf, 0);
737 
738 	/*
739 	 * is there a map to copy?   if not, create one from scratch.
740 	 */
741 
742 	if (entry->aref.ar_amap == NULL) {
743 
744 		/*
745 		 * check to see if we have a large amap that we can
746 		 * chunk.  we align startva/endva to chunk-sized
747 		 * boundaries and then clip to them.
748 		 */
749 
750 		if (canchunk && atop(entry->end - entry->start) >=
751 		    UVM_AMAP_LARGE) {
752 			/* convert slots to bytes */
753 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
754 			startva = (startva / chunksize) * chunksize;
755 			endva = roundup(endva, chunksize);
756 			UVMHIST_LOG(maphist, "  chunk amap ==> clip 0x%x->0x%x"
757 			    "to 0x%x->0x%x", entry->start, entry->end, startva,
758 			    endva);
759 			UVM_MAP_CLIP_START(map, entry, startva);
760 			/* watch out for endva wrap-around! */
761 			if (endva >= startva)
762 				UVM_MAP_CLIP_END(map, entry, endva);
763 		}
764 
765 		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
766 		entry->start, entry->end, 0, 0);
767 		entry->aref.ar_pageoff = 0;
768 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
769 		    waitf);
770 		if (entry->aref.ar_amap != NULL)
771 			entry->etype &= ~UVM_ET_NEEDSCOPY;
772 		return;
773 	}
774 
775 	/*
776 	 * first check and see if we are the only map entry
777 	 * referencing the amap we currently have.  if so, then we can
778 	 * just take it over rather than copying it.  note that we are
779 	 * reading am_ref with the amap unlocked... the value can only
780 	 * be one if we have the only reference to the amap (via our
781 	 * locked map).  if we are greater than one we fall through to
782 	 * the next case (where we double check the value).
783 	 */
784 
785 	if (entry->aref.ar_amap->am_ref == 1) {
786 		entry->etype &= ~UVM_ET_NEEDSCOPY;
787 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
788 		    0, 0, 0, 0);
789 		return;
790 	}
791 
792 	/*
793 	 * looks like we need to copy the map.
794 	 */
795 
796 	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it",
797 	    entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
798 	AMAP_B2SLOT(slots, entry->end - entry->start);
799 	amap = amap_alloc1(slots, 0, waitf);
800 	if (amap == NULL) {
801 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
802 		return;
803 	}
804 	srcamap = entry->aref.ar_amap;
805 	amap_lock(srcamap);
806 
807 	/*
808 	 * need to double check reference count now that we've got the
809 	 * src amap locked down.  the reference count could have
810 	 * changed while we were in malloc.  if the reference count
811 	 * dropped down to one we take over the old map rather than
812 	 * copying the amap.
813 	 */
814 
815 	if (srcamap->am_ref == 1) {		/* take it over? */
816 		entry->etype &= ~UVM_ET_NEEDSCOPY;
817 		amap->am_ref--;		/* drop final reference to map */
818 		amap_free(amap);	/* dispose of new (unused) amap */
819 		amap_unlock(srcamap);
820 		return;
821 	}
822 
823 	/*
824 	 * we must copy it now.
825 	 */
826 
827 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
828 	for (lcv = 0 ; lcv < slots; lcv++) {
829 		amap->am_anon[lcv] =
830 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
831 		if (amap->am_anon[lcv] == NULL)
832 			continue;
833 		simple_lock(&amap->am_anon[lcv]->an_lock);
834 		amap->am_anon[lcv]->an_ref++;
835 		simple_unlock(&amap->am_anon[lcv]->an_lock);
836 		amap->am_bckptr[lcv] = amap->am_nused;
837 		amap->am_slots[amap->am_nused] = lcv;
838 		amap->am_nused++;
839 	}
840 	memset(&amap->am_anon[lcv], 0,
841 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
842 
843 	/*
844 	 * drop our reference to the old amap (srcamap) and unlock.
845 	 * we know that the reference count on srcamap is greater than
846 	 * one (we checked above), so there is no way we could drop
847 	 * the count to zero.  [and no need to worry about freeing it]
848 	 */
849 
850 	srcamap->am_ref--;
851 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
852 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
853 #ifdef UVM_AMAP_PPREF
854 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
855 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
856 		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
857 	}
858 #endif
859 
860 	amap_unlock(srcamap);
861 
862 	/*
863 	 * install new amap.
864 	 */
865 
866 	entry->aref.ar_pageoff = 0;
867 	entry->aref.ar_amap = amap;
868 	entry->etype &= ~UVM_ET_NEEDSCOPY;
869 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
870 }
871 
872 /*
873  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
874  *
875  *	called during fork(2) when the parent process has a wired map
876  *	entry.   in that case we want to avoid write-protecting pages
877  *	in the parent's map (e.g. like what you'd do for a COW page)
878  *	so we resolve the COW here.
879  *
880  * => assume parent's entry was wired, thus all pages are resident.
881  * => assume pages that are loaned out (loan_count) are already mapped
882  *	read-only in all maps, and thus no need for us to worry about them
883  * => assume both parent and child vm_map's are locked
884  * => caller passes child's map/entry in to us
885  * => if we run out of memory we will unlock the amap and sleep _with_ the
886  *	parent and child vm_map's locked(!).    we have to do this since
887  *	we are in the middle of a fork(2) and we can't let the parent
888  *	map change until we are done copying all the map entrys.
889  * => XXXCDC: out of memory should cause fork to fail, but there is
890  *	currently no easy way to do this (needs fix)
891  * => page queues must be unlocked (we may lock them)
892  */
893 
894 void
895 amap_cow_now(map, entry)
896 	struct vm_map *map;
897 	struct vm_map_entry *entry;
898 {
899 	struct vm_amap *amap = entry->aref.ar_amap;
900 	int lcv, slot;
901 	struct vm_anon *anon, *nanon;
902 	struct vm_page *pg, *npg;
903 
904 	/*
905 	 * note that if we unlock the amap then we must ReStart the "lcv" for
906 	 * loop because some other process could reorder the anon's in the
907 	 * am_anon[] array on us while the lock is dropped.
908 	 */
909 
910 ReStart:
911 	amap_lock(amap);
912 
913 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
914 
915 		/*
916 		 * get the page
917 		 */
918 
919 		slot = amap->am_slots[lcv];
920 		anon = amap->am_anon[slot];
921 		simple_lock(&anon->an_lock);
922 		pg = anon->u.an_page;
923 
924 		/*
925 		 * page must be resident since parent is wired
926 		 */
927 
928 		if (pg == NULL)
929 		    panic("amap_cow_now: non-resident wired page in anon %p",
930 			anon);
931 
932 		/*
933 		 * if the anon ref count is one and the page is not loaned,
934 		 * then we are safe (the child has exclusive access to the
935 		 * page).  if the page is loaned, then it must already be
936 		 * mapped read-only.
937 		 *
938 		 * we only need to get involved when these are not true.
939 		 * [note: if loan_count == 0, then the anon must own the page]
940 		 */
941 
942 		if (anon->an_ref > 1 && pg->loan_count == 0) {
943 
944 			/*
945 			 * if the page is busy then we have to unlock, wait for
946 			 * it and then restart.
947 			 */
948 			if (pg->flags & PG_BUSY) {
949 				pg->flags |= PG_WANTED;
950 				amap_unlock(amap);
951 				UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE,
952 				    "cownow", 0);
953 				goto ReStart;
954 			}
955 
956 			/*
957 			 * ok, time to do a copy-on-write to a new anon
958 			 */
959 			nanon = uvm_analloc();
960 			if (nanon) {
961 				/* nanon is locked! */
962 				npg = uvm_pagealloc(NULL, 0, nanon, 0);
963 			} else
964 				npg = NULL;	/* XXX: quiet gcc warning */
965 
966 			if (nanon == NULL || npg == NULL) {
967 				/* out of memory */
968 				/*
969 				 * XXXCDC: we should cause fork to fail, but
970 				 * we can't ...
971 				 */
972 				if (nanon) {
973 					nanon->an_ref--;
974 					simple_unlock(&nanon->an_lock);
975 					uvm_anfree(nanon);
976 				}
977 				simple_unlock(&anon->an_lock);
978 				amap_unlock(amap);
979 				uvm_wait("cownowpage");
980 				goto ReStart;
981 			}
982 
983 			/*
984 			 * got it... now we can copy the data and replace anon
985 			 * with our new one...
986 			 */
987 
988 			uvm_pagecopy(pg, npg);		/* old -> new */
989 			anon->an_ref--;			/* can't drop to zero */
990 			amap->am_anon[slot] = nanon;	/* replace */
991 
992 			/*
993 			 * drop PG_BUSY on new page ... since we have had it's
994 			 * owner locked the whole time it can't be
995 			 * PG_RELEASED | PG_WANTED.
996 			 */
997 
998 			uvm_lock_pageq();
999 			uvm_pageactivate(npg);
1000 			uvm_unlock_pageq();
1001 			npg->flags &= ~(PG_BUSY|PG_FAKE);
1002 			UVM_PAGE_OWN(npg, NULL);
1003 			simple_unlock(&nanon->an_lock);
1004 		}
1005 		simple_unlock(&anon->an_lock);
1006 	}
1007 	amap_unlock(amap);
1008 }
1009 
1010 /*
1011  * amap_splitref: split a single reference into two separate references
1012  *
1013  * => called from uvm_map's clip routines
1014  * => origref's map should be locked
1015  * => origref->ar_amap should be unlocked (we will lock)
1016  */
1017 void
1018 amap_splitref(origref, splitref, offset)
1019 	struct vm_aref *origref, *splitref;
1020 	vaddr_t offset;
1021 {
1022 	int leftslots;
1023 
1024 	AMAP_B2SLOT(leftslots, offset);
1025 	if (leftslots == 0)
1026 		panic("amap_splitref: split at zero offset");
1027 
1028 	amap_lock(origref->ar_amap);
1029 
1030 	/*
1031 	 * now: amap is locked and we have a valid am_mapped array.
1032 	 */
1033 
1034 	if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
1035 		panic("amap_splitref: map size check failed");
1036 
1037 #ifdef UVM_AMAP_PPREF
1038         /*
1039 	 * establish ppref before we add a duplicate reference to the amap
1040 	 */
1041 	if (origref->ar_amap->am_ppref == NULL)
1042 		amap_pp_establish(origref->ar_amap, origref->ar_pageoff);
1043 #endif
1044 
1045 	splitref->ar_amap = origref->ar_amap;
1046 	splitref->ar_amap->am_ref++;		/* not a share reference */
1047 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
1048 
1049 	amap_unlock(origref->ar_amap);
1050 }
1051 
1052 #ifdef UVM_AMAP_PPREF
1053 
1054 /*
1055  * amap_pp_establish: add a ppref array to an amap, if possible
1056  *
1057  * => amap locked by caller
1058  */
1059 void
1060 amap_pp_establish(amap, offset)
1061 	struct vm_amap *amap;
1062 	vaddr_t offset;
1063 {
1064 	amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot,
1065 	    M_UVMAMAP, M_NOWAIT);
1066 
1067 	/*
1068 	 * if we fail then we just won't use ppref for this amap
1069 	 */
1070 
1071 	if (amap->am_ppref == NULL) {
1072 		amap->am_ppref = PPREF_NONE;	/* not using it */
1073 		return;
1074 	}
1075 	memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot);
1076 	pp_setreflen(amap->am_ppref, 0, 0, offset);
1077 	pp_setreflen(amap->am_ppref, offset, amap->am_ref,
1078 	    amap->am_nslot - offset);
1079 	return;
1080 }
1081 
1082 /*
1083  * amap_pp_adjref: adjust reference count to a part of an amap using the
1084  * per-page reference count array.
1085  *
1086  * => map and amap locked by caller
1087  * => caller must check that ppref != PPREF_NONE before calling
1088  */
1089 void
1090 amap_pp_adjref(amap, curslot, slotlen, adjval)
1091 	struct vm_amap *amap;
1092 	int curslot;
1093 	vsize_t slotlen;
1094 	int adjval;
1095 {
1096 	int stopslot, *ppref, lcv, prevlcv;
1097 	int ref, len, prevref, prevlen;
1098 
1099 	stopslot = curslot + slotlen;
1100 	ppref = amap->am_ppref;
1101 	prevlcv = 0;
1102 
1103 	/*
1104 	 * first advance to the correct place in the ppref array,
1105 	 * fragment if needed.
1106 	 */
1107 
1108 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
1109 		pp_getreflen(ppref, lcv, &ref, &len);
1110 		if (lcv + len > curslot) {     /* goes past start? */
1111 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
1112 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
1113 			len = curslot - lcv;   /* new length of entry @ lcv */
1114 		}
1115 		prevlcv = lcv;
1116 	}
1117 	if (lcv != 0)
1118 		pp_getreflen(ppref, prevlcv, &prevref, &prevlen);
1119 	else {
1120 		/* Ensure that the "prevref == ref" test below always
1121 		 * fails, since we're starting from the beginning of
1122 		 * the ppref array; that is, there is no previous
1123 		 * chunk.
1124 		 */
1125 		prevref = -1;
1126 		prevlen = 0;
1127 	}
1128 
1129 	/*
1130 	 * now adjust reference counts in range.  merge the first
1131 	 * changed entry with the last unchanged entry if possible.
1132 	 */
1133 
1134 	if (lcv != curslot)
1135 		panic("amap_pp_adjref: overshot target");
1136 
1137 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
1138 		pp_getreflen(ppref, lcv, &ref, &len);
1139 		if (lcv + len > stopslot) {     /* goes past end? */
1140 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
1141 			pp_setreflen(ppref, stopslot, ref,
1142 			    len - (stopslot - lcv));
1143 			len = stopslot - lcv;
1144 		}
1145 		ref += adjval;
1146 		if (ref < 0)
1147 			panic("amap_pp_adjref: negative reference count");
1148 		if (lcv == prevlcv + prevlen && ref == prevref) {
1149 			pp_setreflen(ppref, prevlcv, ref, prevlen + len);
1150 		} else {
1151 			pp_setreflen(ppref, lcv, ref, len);
1152 		}
1153 		if (ref == 0)
1154 			amap_wiperange(amap, lcv, len);
1155 	}
1156 
1157 }
1158 
1159 /*
1160  * amap_wiperange: wipe out a range of an amap
1161  * [different from amap_wipeout because the amap is kept intact]
1162  *
1163  * => both map and amap must be locked by caller.
1164  */
1165 void
1166 amap_wiperange(amap, slotoff, slots)
1167 	struct vm_amap *amap;
1168 	int slotoff, slots;
1169 {
1170 	int byanon, lcv, stop, curslot, ptr, slotend;
1171 	struct vm_anon *anon;
1172 
1173 	/*
1174 	 * we can either traverse the amap by am_anon or by am_slots depending
1175 	 * on which is cheaper.    decide now.
1176 	 */
1177 
1178 	if (slots < amap->am_nused) {
1179 		byanon = TRUE;
1180 		lcv = slotoff;
1181 		stop = slotoff + slots;
1182 		slotend = 0;
1183 	} else {
1184 		byanon = FALSE;
1185 		lcv = 0;
1186 		stop = amap->am_nused;
1187 		slotend = slotoff + slots;
1188 	}
1189 
1190 	while (lcv < stop) {
1191 		int refs;
1192 
1193 		if (byanon) {
1194 			curslot = lcv++;	/* lcv advances here */
1195 			if (amap->am_anon[curslot] == NULL)
1196 				continue;
1197 		} else {
1198 			curslot = amap->am_slots[lcv];
1199 			if (curslot < slotoff || curslot >= slotend) {
1200 				lcv++;		/* lcv advances here */
1201 				continue;
1202 			}
1203 			stop--;	/* drop stop, since anon will be removed */
1204 		}
1205 		anon = amap->am_anon[curslot];
1206 
1207 		/*
1208 		 * remove it from the amap
1209 		 */
1210 
1211 		amap->am_anon[curslot] = NULL;
1212 		ptr = amap->am_bckptr[curslot];
1213 		if (ptr != (amap->am_nused - 1)) {
1214 			amap->am_slots[ptr] =
1215 			    amap->am_slots[amap->am_nused - 1];
1216 			amap->am_bckptr[amap->am_slots[ptr]] =
1217 			    ptr;    /* back ptr. */
1218 		}
1219 		amap->am_nused--;
1220 
1221 		/*
1222 		 * drop anon reference count
1223 		 */
1224 
1225 		simple_lock(&anon->an_lock);
1226 		refs = --anon->an_ref;
1227 		simple_unlock(&anon->an_lock);
1228 		if (refs == 0) {
1229 
1230 			/*
1231 			 * we just eliminated the last reference to an anon.
1232 			 * free it.
1233 			 */
1234 
1235 			uvm_anfree(anon);
1236 		}
1237 	}
1238 }
1239 
1240 #endif
1241