xref: /netbsd-src/sys/uvm/uvm_amap.c (revision 9fbd88883c38d0c0fbfcbe66d76fe6b0fab3f9de)
1 /*	$NetBSD: uvm_amap.c,v 1.40 2001/12/05 01:33:09 enami Exp $	*/
2 
3 /*
4  *
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *      This product includes software developed by Charles D. Cranor and
19  *      Washington University.
20  * 4. The name of the author may not be used to endorse or promote products
21  *    derived from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * uvm_amap.c: amap operations
37  */
38 
39 /*
40  * this file contains functions that perform operations on amaps.  see
41  * uvm_amap.h for a brief explanation of the role of amaps in uvm.
42  */
43 
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: uvm_amap.c,v 1.40 2001/12/05 01:33:09 enami Exp $");
46 
47 #undef UVM_AMAP_INLINE		/* enable/disable amap inlines */
48 
49 #include "opt_uvmhist.h"
50 
51 #include <sys/param.h>
52 #include <sys/systm.h>
53 #include <sys/proc.h>
54 #include <sys/malloc.h>
55 #include <sys/kernel.h>
56 #include <sys/pool.h>
57 
58 #define UVM_AMAP_C		/* ensure disabled inlines are in */
59 #include <uvm/uvm.h>
60 #include <uvm/uvm_swap.h>
61 
62 /*
63  * pool for allocation of vm_map structures.  note that the pool has
64  * its own simplelock for its protection.  also note that in order to
65  * avoid an endless loop, the amap pool's allocator cannot allocate
66  * memory from an amap (it currently goes through the kernel uobj, so
67  * we are ok).
68  */
69 
70 struct pool uvm_amap_pool;
71 
72 /*
73  * local functions
74  */
75 
76 static struct vm_amap *amap_alloc1 __P((int, int, int));
77 
78 #ifdef UVM_AMAP_PPREF
79 /*
80  * what is ppref?   ppref is an _optional_ amap feature which is used
81  * to keep track of reference counts on a per-page basis.  it is enabled
82  * when UVM_AMAP_PPREF is defined.
83  *
84  * when enabled, an array of ints is allocated for the pprefs.  this
85  * array is allocated only when a partial reference is added to the
86  * map (either by unmapping part of the amap, or gaining a reference
87  * to only a part of an amap).  if the malloc of the array fails
88  * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
89  * that we tried to do ppref's but couldn't alloc the array so just
90  * give up (after all, this is an optional feature!).
91  *
92  * the array is divided into page sized "chunks."   for chunks of length 1,
93  * the chunk reference count plus one is stored in that chunk's slot.
94  * for chunks of length > 1 the first slot contains (the reference count
95  * plus one) * -1.    [the negative value indicates that the length is
96  * greater than one.]   the second slot of the chunk contains the length
97  * of the chunk.   here is an example:
98  *
99  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
100  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
101  *              <----------><-><----><-------><----><-><------->
102  * (x = don't care)
103  *
104  * this allows us to allow one int to contain the ref count for the whole
105  * chunk.    note that the "plus one" part is needed because a reference
106  * count of zero is neither positive or negative (need a way to tell
107  * if we've got one zero or a bunch of them).
108  *
109  * here are some in-line functions to help us.
110  */
111 
112 static __inline void pp_getreflen __P((int *, int, int *, int *));
113 static __inline void pp_setreflen __P((int *, int, int, int));
114 
115 /*
116  * pp_getreflen: get the reference and length for a specific offset
117  *
118  * => ppref's amap must be locked
119  */
120 static __inline void
121 pp_getreflen(ppref, offset, refp, lenp)
122 	int *ppref, offset, *refp, *lenp;
123 {
124 
125 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
126 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
127 		*lenp = 1;
128 	} else {
129 		*refp = (ppref[offset] * -1) - 1;
130 		*lenp = ppref[offset+1];
131 	}
132 }
133 
134 /*
135  * pp_setreflen: set the reference and length for a specific offset
136  *
137  * => ppref's amap must be locked
138  */
139 static __inline void
140 pp_setreflen(ppref, offset, ref, len)
141 	int *ppref, offset, ref, len;
142 {
143 	if (len == 1) {
144 		ppref[offset] = ref + 1;
145 	} else {
146 		ppref[offset] = (ref + 1) * -1;
147 		ppref[offset+1] = len;
148 	}
149 }
150 #endif
151 
152 /*
153  * amap_init: called at boot time to init global amap data structures
154  */
155 
156 void
157 amap_init(void)
158 {
159 
160 	/*
161 	 * Initialize the vm_amap pool.
162 	 */
163 
164 	pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0,
165 	    "amappl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
166 	    M_UVMAMAP);
167 }
168 
169 /*
170  * amap_alloc1: internal function that allocates an amap, but does not
171  *	init the overlay.
172  *
173  * => lock on returned amap is init'd
174  */
175 static inline struct vm_amap *
176 amap_alloc1(slots, padslots, waitf)
177 	int slots, padslots, waitf;
178 {
179 	struct vm_amap *amap;
180 	int totalslots;
181 
182 	amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK : 0);
183 	if (amap == NULL)
184 		return(NULL);
185 
186 	totalslots = malloc_roundup((slots + padslots) * sizeof(int)) /
187 	    sizeof(int);
188 	simple_lock_init(&amap->am_l);
189 	amap->am_ref = 1;
190 	amap->am_flags = 0;
191 #ifdef UVM_AMAP_PPREF
192 	amap->am_ppref = NULL;
193 #endif
194 	amap->am_maxslot = totalslots;
195 	amap->am_nslot = slots;
196 	amap->am_nused = 0;
197 
198 	amap->am_slots = malloc(totalslots * sizeof(int), M_UVMAMAP,
199 	    waitf);
200 	if (amap->am_slots == NULL)
201 		goto fail1;
202 
203 	amap->am_bckptr = malloc(totalslots * sizeof(int), M_UVMAMAP, waitf);
204 	if (amap->am_bckptr == NULL)
205 		goto fail2;
206 
207 	amap->am_anon = malloc(totalslots * sizeof(struct vm_anon *),
208 	    M_UVMAMAP, waitf);
209 	if (amap->am_anon == NULL)
210 		goto fail3;
211 
212 	return(amap);
213 
214 fail3:
215 	free(amap->am_bckptr, M_UVMAMAP);
216 fail2:
217 	free(amap->am_slots, M_UVMAMAP);
218 fail1:
219 	pool_put(&uvm_amap_pool, amap);
220 	return (NULL);
221 }
222 
223 /*
224  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
225  *
226  * => caller should ensure sz is a multiple of PAGE_SIZE
227  * => reference count to new amap is set to one
228  * => new amap is returned unlocked
229  */
230 
231 struct vm_amap *
232 amap_alloc(sz, padsz, waitf)
233 	vaddr_t sz, padsz;
234 	int waitf;
235 {
236 	struct vm_amap *amap;
237 	int slots, padslots;
238 	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
239 
240 	AMAP_B2SLOT(slots, sz);
241 	AMAP_B2SLOT(padslots, padsz);
242 
243 	amap = amap_alloc1(slots, padslots, waitf);
244 	if (amap)
245 		memset(amap->am_anon, 0,
246 		    amap->am_maxslot * sizeof(struct vm_anon *));
247 
248 	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
249 	return(amap);
250 }
251 
252 
253 /*
254  * amap_free: free an amap
255  *
256  * => the amap must be unlocked
257  * => the amap should have a zero reference count and be empty
258  */
259 void
260 amap_free(amap)
261 	struct vm_amap *amap;
262 {
263 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
264 
265 	KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
266 	LOCK_ASSERT(!simple_lock_held(&amap->am_l));
267 	free(amap->am_slots, M_UVMAMAP);
268 	free(amap->am_bckptr, M_UVMAMAP);
269 	free(amap->am_anon, M_UVMAMAP);
270 #ifdef UVM_AMAP_PPREF
271 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
272 		free(amap->am_ppref, M_UVMAMAP);
273 #endif
274 	pool_put(&uvm_amap_pool, amap);
275 	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
276 }
277 
278 /*
279  * amap_extend: extend the size of an amap (if needed)
280  *
281  * => called from uvm_map when we want to extend an amap to cover
282  *    a new mapping (rather than allocate a new one)
283  * => amap should be unlocked (we will lock it)
284  * => to safely extend an amap it should have a reference count of
285  *    one (thus it can't be shared)
286  * => XXXCDC: needs a waitflag or failure return value?
287  * => XXXCDC: support padding at this level?
288  */
289 void
290 amap_extend(entry, addsize)
291 	struct vm_map_entry *entry;
292 	vsize_t addsize;
293 {
294 	struct vm_amap *amap = entry->aref.ar_amap;
295 	int slotoff = entry->aref.ar_pageoff;
296 	int slotmapped, slotadd, slotneed, slotadded, slotalloc;
297 #ifdef UVM_AMAP_PPREF
298 	int *newppref, *oldppref;
299 #endif
300 	int *newsl, *newbck, *oldsl, *oldbck;
301 	struct vm_anon **newover, **oldover;
302 	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
303 
304 	UVMHIST_LOG(maphist, "  (entry=0x%x, addsize=0x%x)", entry,addsize,0,0);
305 
306 	/*
307 	 * first, determine how many slots we need in the amap.  don't
308 	 * forget that ar_pageoff could be non-zero: this means that
309 	 * there are some unused slots before us in the amap.
310 	 */
311 
312 	amap_lock(amap);					/* lock! */
313 
314 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
315 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
316 	slotneed = slotoff + slotmapped + slotadd;
317 
318 	/*
319 	 * case 1: we already have enough slots in the map and thus
320 	 * only need to bump the reference counts on the slots we are
321 	 * adding.
322 	 */
323 
324 	if (amap->am_nslot >= slotneed) {
325 #ifdef UVM_AMAP_PPREF
326 		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
327 			amap_pp_adjref(amap, slotoff + slotmapped, slotadd, 1);
328 		}
329 #endif
330 		amap_unlock(amap);
331 		UVMHIST_LOG(maphist,"<- done (case 1), amap = 0x%x, sltneed=%d",
332 		    amap, slotneed, 0, 0);
333 		return;				/* done! */
334 	}
335 
336 	/*
337 	 * case 2: we pre-allocated slots for use and we just need to
338 	 * bump nslot up to take account for these slots.
339 	 */
340 	if (amap->am_maxslot >= slotneed) {
341 #ifdef UVM_AMAP_PPREF
342 		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
343 			if ((slotoff + slotmapped) < amap->am_nslot)
344 				amap_pp_adjref(amap, slotoff + slotmapped,
345 				    (amap->am_nslot - (slotoff + slotmapped)),
346 				    1);
347 			pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
348 			   slotneed - amap->am_nslot);
349 		}
350 #endif
351 		amap->am_nslot = slotneed;
352 		amap_unlock(amap);
353 		/*
354 		 * no need to zero am_anon since that was done at
355 		 * alloc time and we never shrink an allocation.
356 		 */
357 		UVMHIST_LOG(maphist,"<- done (case 2), amap = 0x%x, slotneed=%d",
358 		    amap, slotneed, 0, 0);
359 		return;
360 	}
361 
362 	/*
363 	 * case 3: we need to malloc a new amap and copy all the amap
364 	 * data over from old amap to the new one.
365 	 *
366 	 * XXXCDC: could we take advantage of a kernel realloc()?
367 	 */
368 
369 	amap_unlock(amap);	/* unlock in case we sleep in malloc */
370 	slotalloc = malloc_roundup(slotneed * sizeof(int)) / sizeof(int);
371 #ifdef UVM_AMAP_PPREF
372 	newppref = NULL;
373 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
374 		newppref = malloc(slotalloc * sizeof(int), M_UVMAMAP,
375 		    M_NOWAIT);
376 		if (newppref == NULL) {
377 			/* give up if malloc fails */
378 			free(amap->am_ppref, M_UVMAMAP);
379 			amap->am_ppref = PPREF_NONE;
380 		}
381 	}
382 #endif
383 	newsl = malloc(slotalloc * sizeof(int), M_UVMAMAP, M_WAITOK);
384 	newbck = malloc(slotalloc * sizeof(int), M_UVMAMAP, M_WAITOK);
385 	newover = malloc(slotalloc * sizeof(struct vm_anon *),
386 	    M_UVMAMAP, M_WAITOK);
387 	amap_lock(amap);			/* re-lock! */
388 	KASSERT(amap->am_maxslot < slotneed);
389 
390 	/*
391 	 * now copy everything over to new malloc'd areas...
392 	 */
393 
394 	slotadded = slotalloc - amap->am_nslot;
395 
396 	/* do am_slots */
397 	oldsl = amap->am_slots;
398 	memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
399 	amap->am_slots = newsl;
400 
401 	/* do am_anon */
402 	oldover = amap->am_anon;
403 	memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot);
404 	memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) * slotadded);
405 	amap->am_anon = newover;
406 
407 	/* do am_bckptr */
408 	oldbck = amap->am_bckptr;
409 	memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
410 	amap->am_bckptr = newbck;
411 
412 #ifdef UVM_AMAP_PPREF
413 	/* do ppref */
414 	oldppref = amap->am_ppref;
415 	if (newppref) {
416 		memcpy(newppref, oldppref, sizeof(int) * amap->am_nslot);
417 		memset(newppref + amap->am_nslot, 0, sizeof(int) * slotadded);
418 		amap->am_ppref = newppref;
419 		if ((slotoff + slotmapped) < amap->am_nslot)
420 			amap_pp_adjref(amap, slotoff + slotmapped,
421 			    (amap->am_nslot - (slotoff + slotmapped)), 1);
422 		pp_setreflen(newppref, amap->am_nslot, 1,
423 		    slotneed - amap->am_nslot);
424 	}
425 #endif
426 
427 	/* update master values */
428 	amap->am_nslot = slotneed;
429 	amap->am_maxslot = slotalloc;
430 
431 	amap_unlock(amap);
432 	free(oldsl, M_UVMAMAP);
433 	free(oldbck, M_UVMAMAP);
434 	free(oldover, M_UVMAMAP);
435 #ifdef UVM_AMAP_PPREF
436 	if (oldppref && oldppref != PPREF_NONE)
437 		free(oldppref, M_UVMAMAP);
438 #endif
439 	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
440 	    amap, slotneed, 0, 0);
441 }
442 
443 /*
444  * amap_share_protect: change protection of anons in a shared amap
445  *
446  * for shared amaps, given the current data structure layout, it is
447  * not possible for us to directly locate all maps referencing the
448  * shared anon (to change the protection).  in order to protect data
449  * in shared maps we use pmap_page_protect().  [this is useful for IPC
450  * mechanisms like map entry passing that may want to write-protect
451  * all mappings of a shared amap.]  we traverse am_anon or am_slots
452  * depending on the current state of the amap.
453  *
454  * => entry's map and amap must be locked by the caller
455  */
456 void
457 amap_share_protect(entry, prot)
458 	struct vm_map_entry *entry;
459 	vm_prot_t prot;
460 {
461 	struct vm_amap *amap = entry->aref.ar_amap;
462 	int slots, lcv, slot, stop;
463 
464 	LOCK_ASSERT(simple_lock_held(&amap->am_l));
465 
466 	AMAP_B2SLOT(slots, (entry->end - entry->start));
467 	stop = entry->aref.ar_pageoff + slots;
468 
469 	if (slots < amap->am_nused) {
470 		/* cheaper to traverse am_anon */
471 		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
472 			if (amap->am_anon[lcv] == NULL)
473 				continue;
474 			if (amap->am_anon[lcv]->u.an_page != NULL)
475 				pmap_page_protect(amap->am_anon[lcv]->u.an_page,
476 						  prot);
477 		}
478 		return;
479 	}
480 
481 	/* cheaper to traverse am_slots */
482 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
483 		slot = amap->am_slots[lcv];
484 		if (slot < entry->aref.ar_pageoff || slot >= stop)
485 			continue;
486 		if (amap->am_anon[slot]->u.an_page != NULL)
487 			pmap_page_protect(amap->am_anon[slot]->u.an_page, prot);
488 	}
489 }
490 
491 /*
492  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
493  *
494  * => called from amap_unref when the final reference to an amap is
495  *	discarded (i.e. when reference count == 1)
496  * => the amap should be locked (by the caller)
497  */
498 
499 void
500 amap_wipeout(amap)
501 	struct vm_amap *amap;
502 {
503 	int lcv, slot;
504 	struct vm_anon *anon;
505 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
506 	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
507 
508 	amap_unlock(amap);
509 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
510 		int refs;
511 
512 		slot = amap->am_slots[lcv];
513 		anon = amap->am_anon[slot];
514 
515 		if (anon == NULL || anon->an_ref == 0)
516 			panic("amap_wipeout: corrupt amap");
517 
518 		simple_lock(&anon->an_lock);
519 		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
520 		    anon->an_ref, 0, 0);
521 		refs = --anon->an_ref;
522 		simple_unlock(&anon->an_lock);
523 		if (refs == 0) {
524 
525 			/*
526 			 * we had the last reference to a vm_anon. free it.
527 			 */
528 
529 			uvm_anfree(anon);
530 		}
531 
532 		/*
533 		 * XXX
534 		 * releasing the swap space held by an N anons is an O(N^2)
535 		 * operation because of the implementation of extents.
536 		 * if there are many anons, tearing down an exiting process'
537 		 * address space can take many seconds, which causes very
538 		 * annoying pauses.  we yield here to give other processes
539 		 * a chance to run.  this should be removed once the performance
540 		 * of swap space management is improved.
541 		 */
542 
543 		if (curproc->p_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD)
544 			preempt(NULL);
545 	}
546 
547 	/*
548 	 * now we free the map
549 	 */
550 
551 	amap->am_ref = 0;	/* ... was one */
552 	amap->am_nused = 0;
553 	amap_free(amap);	/* will unlock and free amap */
554 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
555 }
556 
557 /*
558  * amap_copy: ensure that a map entry's "needs_copy" flag is false
559  *	by copying the amap if necessary.
560  *
561  * => an entry with a null amap pointer will get a new (blank) one.
562  * => the map that the map entry belongs to must be locked by caller.
563  * => the amap currently attached to "entry" (if any) must be unlocked.
564  * => if canchunk is true, then we may clip the entry into a chunk
565  * => "startva" and "endva" are used only if canchunk is true.  they are
566  *     used to limit chunking (e.g. if you have a large space that you
567  *     know you are going to need to allocate amaps for, there is no point
568  *     in allowing that to be chunked)
569  */
570 
571 void
572 amap_copy(map, entry, waitf, canchunk, startva, endva)
573 	struct vm_map *map;
574 	struct vm_map_entry *entry;
575 	int waitf;
576 	boolean_t canchunk;
577 	vaddr_t startva, endva;
578 {
579 	struct vm_amap *amap, *srcamap;
580 	int slots, lcv;
581 	vaddr_t chunksize;
582 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
583 	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, waitf=%d)",
584 		    map, entry, waitf, 0);
585 
586 	/*
587 	 * is there a map to copy?   if not, create one from scratch.
588 	 */
589 
590 	if (entry->aref.ar_amap == NULL) {
591 
592 		/*
593 		 * check to see if we have a large amap that we can
594 		 * chunk.  we align startva/endva to chunk-sized
595 		 * boundaries and then clip to them.
596 		 */
597 
598 		if (canchunk && atop(entry->end - entry->start) >=
599 		    UVM_AMAP_LARGE) {
600 			/* convert slots to bytes */
601 			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
602 			startva = (startva / chunksize) * chunksize;
603 			endva = roundup(endva, chunksize);
604 			UVMHIST_LOG(maphist, "  chunk amap ==> clip 0x%x->0x%x"
605 			    "to 0x%x->0x%x", entry->start, entry->end, startva,
606 			    endva);
607 			UVM_MAP_CLIP_START(map, entry, startva);
608 			/* watch out for endva wrap-around! */
609 			if (endva >= startva)
610 				UVM_MAP_CLIP_END(map, entry, endva);
611 		}
612 
613 		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
614 		entry->start, entry->end, 0, 0);
615 		entry->aref.ar_pageoff = 0;
616 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
617 		    waitf);
618 		if (entry->aref.ar_amap != NULL)
619 			entry->etype &= ~UVM_ET_NEEDSCOPY;
620 		return;
621 	}
622 
623 	/*
624 	 * first check and see if we are the only map entry
625 	 * referencing the amap we currently have.  if so, then we can
626 	 * just take it over rather than copying it.  note that we are
627 	 * reading am_ref with the amap unlocked... the value can only
628 	 * be one if we have the only reference to the amap (via our
629 	 * locked map).  if we are greater than one we fall through to
630 	 * the next case (where we double check the value).
631 	 */
632 
633 	if (entry->aref.ar_amap->am_ref == 1) {
634 		entry->etype &= ~UVM_ET_NEEDSCOPY;
635 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
636 		    0, 0, 0, 0);
637 		return;
638 	}
639 
640 	/*
641 	 * looks like we need to copy the map.
642 	 */
643 
644 	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it",
645 	    entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
646 	AMAP_B2SLOT(slots, entry->end - entry->start);
647 	amap = amap_alloc1(slots, 0, waitf);
648 	if (amap == NULL) {
649 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
650 		return;
651 	}
652 	srcamap = entry->aref.ar_amap;
653 	amap_lock(srcamap);
654 
655 	/*
656 	 * need to double check reference count now that we've got the
657 	 * src amap locked down.  the reference count could have
658 	 * changed while we were in malloc.  if the reference count
659 	 * dropped down to one we take over the old map rather than
660 	 * copying the amap.
661 	 */
662 
663 	if (srcamap->am_ref == 1) {		/* take it over? */
664 		entry->etype &= ~UVM_ET_NEEDSCOPY;
665 		amap->am_ref--;		/* drop final reference to map */
666 		amap_unlock(amap);
667 		amap_free(amap);	/* dispose of new (unused) amap */
668 		amap_unlock(srcamap);
669 		return;
670 	}
671 
672 	/*
673 	 * we must copy it now.
674 	 */
675 
676 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
677 	for (lcv = 0 ; lcv < slots; lcv++) {
678 		amap->am_anon[lcv] =
679 		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
680 		if (amap->am_anon[lcv] == NULL)
681 			continue;
682 		simple_lock(&amap->am_anon[lcv]->an_lock);
683 		amap->am_anon[lcv]->an_ref++;
684 		simple_unlock(&amap->am_anon[lcv]->an_lock);
685 		amap->am_bckptr[lcv] = amap->am_nused;
686 		amap->am_slots[amap->am_nused] = lcv;
687 		amap->am_nused++;
688 	}
689 	memset(&amap->am_anon[lcv], 0,
690 	    (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
691 
692 	/*
693 	 * drop our reference to the old amap (srcamap) and unlock.
694 	 * we know that the reference count on srcamap is greater than
695 	 * one (we checked above), so there is no way we could drop
696 	 * the count to zero.  [and no need to worry about freeing it]
697 	 */
698 
699 	srcamap->am_ref--;
700 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
701 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
702 #ifdef UVM_AMAP_PPREF
703 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
704 		amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
705 		    (entry->end - entry->start) >> PAGE_SHIFT, -1);
706 	}
707 #endif
708 
709 	amap_unlock(srcamap);
710 
711 	/*
712 	 * install new amap.
713 	 */
714 
715 	entry->aref.ar_pageoff = 0;
716 	entry->aref.ar_amap = amap;
717 	entry->etype &= ~UVM_ET_NEEDSCOPY;
718 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
719 }
720 
721 /*
722  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
723  *
724  *	called during fork(2) when the parent process has a wired map
725  *	entry.   in that case we want to avoid write-protecting pages
726  *	in the parent's map (e.g. like what you'd do for a COW page)
727  *	so we resolve the COW here.
728  *
729  * => assume parent's entry was wired, thus all pages are resident.
730  * => assume pages that are loaned out (loan_count) are already mapped
731  *	read-only in all maps, and thus no need for us to worry about them
732  * => assume both parent and child vm_map's are locked
733  * => caller passes child's map/entry in to us
734  * => if we run out of memory we will unlock the amap and sleep _with_ the
735  *	parent and child vm_map's locked(!).    we have to do this since
736  *	we are in the middle of a fork(2) and we can't let the parent
737  *	map change until we are done copying all the map entrys.
738  * => XXXCDC: out of memory should cause fork to fail, but there is
739  *	currently no easy way to do this (needs fix)
740  * => page queues must be unlocked (we may lock them)
741  */
742 
743 void
744 amap_cow_now(map, entry)
745 	struct vm_map *map;
746 	struct vm_map_entry *entry;
747 {
748 	struct vm_amap *amap = entry->aref.ar_amap;
749 	int lcv, slot;
750 	struct vm_anon *anon, *nanon;
751 	struct vm_page *pg, *npg;
752 
753 	/*
754 	 * note that if we unlock the amap then we must ReStart the "lcv" for
755 	 * loop because some other process could reorder the anon's in the
756 	 * am_anon[] array on us while the lock is dropped.
757 	 */
758 
759 ReStart:
760 	amap_lock(amap);
761 
762 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
763 
764 		/*
765 		 * get the page
766 		 */
767 
768 		slot = amap->am_slots[lcv];
769 		anon = amap->am_anon[slot];
770 		simple_lock(&anon->an_lock);
771 		pg = anon->u.an_page;
772 
773 		/*
774 		 * page must be resident since parent is wired
775 		 */
776 
777 		if (pg == NULL)
778 		    panic("amap_cow_now: non-resident wired page in anon %p",
779 			anon);
780 
781 		/*
782 		 * if the anon ref count is one and the page is not loaned,
783 		 * then we are safe (the child has exclusive access to the
784 		 * page).  if the page is loaned, then it must already be
785 		 * mapped read-only.
786 		 *
787 		 * we only need to get involved when these are not true.
788 		 * [note: if loan_count == 0, then the anon must own the page]
789 		 */
790 
791 		if (anon->an_ref > 1 && pg->loan_count == 0) {
792 
793 			/*
794 			 * if the page is busy then we have to unlock, wait for
795 			 * it and then restart.
796 			 */
797 			if (pg->flags & PG_BUSY) {
798 				pg->flags |= PG_WANTED;
799 				amap_unlock(amap);
800 				UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE,
801 				    "cownow", 0);
802 				goto ReStart;
803 			}
804 
805 			/*
806 			 * ok, time to do a copy-on-write to a new anon
807 			 */
808 			nanon = uvm_analloc();
809 			if (nanon) {
810 				/* nanon is locked! */
811 				npg = uvm_pagealloc(NULL, 0, nanon, 0);
812 			} else
813 				npg = NULL;	/* XXX: quiet gcc warning */
814 
815 			if (nanon == NULL || npg == NULL) {
816 				/* out of memory */
817 				/*
818 				 * XXXCDC: we should cause fork to fail, but
819 				 * we can't ...
820 				 */
821 				if (nanon) {
822 					nanon->an_ref--;
823 					simple_unlock(&nanon->an_lock);
824 					uvm_anfree(nanon);
825 				}
826 				simple_unlock(&anon->an_lock);
827 				amap_unlock(amap);
828 				uvm_wait("cownowpage");
829 				goto ReStart;
830 			}
831 
832 			/*
833 			 * got it... now we can copy the data and replace anon
834 			 * with our new one...
835 			 */
836 
837 			uvm_pagecopy(pg, npg);		/* old -> new */
838 			anon->an_ref--;			/* can't drop to zero */
839 			amap->am_anon[slot] = nanon;	/* replace */
840 
841 			/*
842 			 * drop PG_BUSY on new page ... since we have had it's
843 			 * owner locked the whole time it can't be
844 			 * PG_RELEASED | PG_WANTED.
845 			 */
846 
847 			npg->flags &= ~(PG_BUSY|PG_FAKE);
848 			UVM_PAGE_OWN(npg, NULL);
849 			uvm_lock_pageq();
850 			uvm_pageactivate(npg);
851 			uvm_unlock_pageq();
852 			simple_unlock(&nanon->an_lock);
853 		}
854 		simple_unlock(&anon->an_lock);
855 	}
856 	amap_unlock(amap);
857 }
858 
859 /*
860  * amap_splitref: split a single reference into two separate references
861  *
862  * => called from uvm_map's clip routines
863  * => origref's map should be locked
864  * => origref->ar_amap should be unlocked (we will lock)
865  */
866 void
867 amap_splitref(origref, splitref, offset)
868 	struct vm_aref *origref, *splitref;
869 	vaddr_t offset;
870 {
871 	int leftslots;
872 
873 	AMAP_B2SLOT(leftslots, offset);
874 	if (leftslots == 0)
875 		panic("amap_splitref: split at zero offset");
876 
877 	amap_lock(origref->ar_amap);
878 
879 	/*
880 	 * now: amap is locked and we have a valid am_mapped array.
881 	 */
882 
883 	if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
884 		panic("amap_splitref: map size check failed");
885 
886 #ifdef UVM_AMAP_PPREF
887         /*
888 	 * establish ppref before we add a duplicate reference to the amap
889 	 */
890 	if (origref->ar_amap->am_ppref == NULL)
891 		amap_pp_establish(origref->ar_amap);
892 #endif
893 
894 	splitref->ar_amap = origref->ar_amap;
895 	splitref->ar_amap->am_ref++;		/* not a share reference */
896 	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
897 
898 	amap_unlock(origref->ar_amap);
899 }
900 
901 #ifdef UVM_AMAP_PPREF
902 
903 /*
904  * amap_pp_establish: add a ppref array to an amap, if possible
905  *
906  * => amap locked by caller
907  */
908 void
909 amap_pp_establish(amap)
910 	struct vm_amap *amap;
911 {
912 	amap->am_ppref = malloc(sizeof(int) * amap->am_maxslot,
913 	    M_UVMAMAP, M_NOWAIT);
914 
915 	/*
916 	 * if we fail then we just won't use ppref for this amap
917 	 */
918 
919 	if (amap->am_ppref == NULL) {
920 		amap->am_ppref = PPREF_NONE;	/* not using it */
921 		return;
922 	}
923 	memset(amap->am_ppref, 0, sizeof(int) * amap->am_maxslot);
924 	pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot);
925 	return;
926 }
927 
928 /*
929  * amap_pp_adjref: adjust reference count to a part of an amap using the
930  * per-page reference count array.
931  *
932  * => map and amap locked by caller
933  * => caller must check that ppref != PPREF_NONE before calling
934  */
935 void
936 amap_pp_adjref(amap, curslot, slotlen, adjval)
937 	struct vm_amap *amap;
938 	int curslot;
939 	vsize_t slotlen;
940 	int adjval;
941 {
942 	int stopslot, *ppref, lcv;
943 	int ref, len;
944 
945 	stopslot = curslot + slotlen;
946 	ppref = amap->am_ppref;
947 
948 	/*
949 	 * first advance to the correct place in the ppref array, fragment
950 	 * if needed.
951 	 */
952 
953 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
954 		pp_getreflen(ppref, lcv, &ref, &len);
955 		if (lcv + len > curslot) {     /* goes past start? */
956 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
957 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
958 			len = curslot - lcv;   /* new length of entry @ lcv */
959 		}
960 	}
961 
962 	/*
963 	 * now adjust reference counts in range (make sure we dont overshoot)
964 	 */
965 
966 	if (lcv != curslot)
967 		panic("amap_pp_adjref: overshot target");
968 
969 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
970 		pp_getreflen(ppref, lcv, &ref, &len);
971 		if (lcv + len > stopslot) {     /* goes past end? */
972 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
973 			pp_setreflen(ppref, stopslot, ref,
974 			    len - (stopslot - lcv));
975 			len = stopslot - lcv;
976 		}
977 		ref = ref + adjval;    /* ADJUST! */
978 		if (ref < 0)
979 			panic("amap_pp_adjref: negative reference count");
980 		pp_setreflen(ppref, lcv, ref, len);
981 		if (ref == 0)
982 			amap_wiperange(amap, lcv, len);
983 	}
984 
985 }
986 
987 /*
988  * amap_wiperange: wipe out a range of an amap
989  * [different from amap_wipeout because the amap is kept intact]
990  *
991  * => both map and amap must be locked by caller.
992  */
993 void
994 amap_wiperange(amap, slotoff, slots)
995 	struct vm_amap *amap;
996 	int slotoff, slots;
997 {
998 	int byanon, lcv, stop, curslot, ptr, slotend;
999 	struct vm_anon *anon;
1000 
1001 	/*
1002 	 * we can either traverse the amap by am_anon or by am_slots depending
1003 	 * on which is cheaper.    decide now.
1004 	 */
1005 
1006 	if (slots < amap->am_nused) {
1007 		byanon = TRUE;
1008 		lcv = slotoff;
1009 		stop = slotoff + slots;
1010 	} else {
1011 		byanon = FALSE;
1012 		lcv = 0;
1013 		stop = amap->am_nused;
1014 		slotend = slotoff + slots;
1015 	}
1016 
1017 	while (lcv < stop) {
1018 		int refs;
1019 
1020 		if (byanon) {
1021 			curslot = lcv++;	/* lcv advances here */
1022 			if (amap->am_anon[curslot] == NULL)
1023 				continue;
1024 		} else {
1025 			curslot = amap->am_slots[lcv];
1026 			if (curslot < slotoff || curslot >= slotend) {
1027 				lcv++;		/* lcv advances here */
1028 				continue;
1029 			}
1030 			stop--;	/* drop stop, since anon will be removed */
1031 		}
1032 		anon = amap->am_anon[curslot];
1033 
1034 		/*
1035 		 * remove it from the amap
1036 		 */
1037 
1038 		amap->am_anon[curslot] = NULL;
1039 		ptr = amap->am_bckptr[curslot];
1040 		if (ptr != (amap->am_nused - 1)) {
1041 			amap->am_slots[ptr] =
1042 			    amap->am_slots[amap->am_nused - 1];
1043 			amap->am_bckptr[amap->am_slots[ptr]] =
1044 			    ptr;    /* back ptr. */
1045 		}
1046 		amap->am_nused--;
1047 
1048 		/*
1049 		 * drop anon reference count
1050 		 */
1051 
1052 		simple_lock(&anon->an_lock);
1053 		refs = --anon->an_ref;
1054 		simple_unlock(&anon->an_lock);
1055 		if (refs == 0) {
1056 
1057 			/*
1058 			 * we just eliminated the last reference to an anon.
1059 			 * free it.
1060 			 */
1061 
1062 			uvm_anfree(anon);
1063 		}
1064 	}
1065 }
1066 
1067 #endif
1068