xref: /netbsd-src/sys/uvm/uvm_amap.c (revision 7c7c171d130af9949261bc7dce2150a03c3d239c)
1 /*	$NetBSD: uvm_amap.c,v 1.7 1998/03/09 00:58:55 mrg Exp $	*/
2 
3 /*
4  * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
5  *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
6  */
7 /*
8  *
9  * Copyright (c) 1997 Charles D. Cranor and Washington University.
10  * All rights reserved.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *      This product includes software developed by Charles D. Cranor and
23  *      Washington University.
24  * 4. The name of the author may not be used to endorse or promote products
25  *    derived from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
28  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
29  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
30  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
31  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
32  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
36  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37  *
38  * from: Id: uvm_amap.c,v 1.1.2.25 1998/02/06 22:49:23 chs Exp
39  */
40 
41 /*
42  * uvm_amap.c: uvm amap ops
43  */
44 
45 #include "opt_uvmhist.h"
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/mount.h>
50 #include <sys/proc.h>
51 #include <sys/malloc.h>
52 
53 #include <vm/vm.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_kern.h>
56 
57 #include <sys/syscallargs.h>
58 
59 #define UVM_AMAP		/* pull in uvm_amap.h functions */
60 #include <uvm/uvm.h>
61 #include <uvm/uvm_swap.h>
62 
63 /*
64  * local functions
65  */
66 
67 static struct vm_amap *amap_alloc1 __P((int, int, int));
68 
69 #ifdef VM_AMAP_PPREF
70 /*
71  * what is ppref?   ppref is an _optional_ amap feature which is used
72  * to keep track of reference counts on a per-page basis.  it is enabled
73  * when VM_AMAP_PPREF is defined.
74  *
75  * when enabled, an array of ints is allocated for the pprefs.  this
76  * array is allocated only when a partial reference is added to the
77  * map (either by unmapping part of the amap, or gaining a reference
78  * to only a part of an amap).  if the malloc of the array fails
79  * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
80  * that we tried to do ppref's but couldn't alloc the array so just
81  * give up (after all, this is an optional feature!).
82  *
83  * the array is divided into page sized "chunks."   for chunks of length 1,
84  * the chunk reference count plus one is stored in that chunk's slot.
85  * for chunks of length > 1 the first slot contains (the reference count
86  * plus one) * -1.    [the negative value indicates that the length is
87  * greater than one.]   the second slot of the chunk contains the length
88  * of the chunk.   here is an example:
89  *
90  * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
91  *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
92  *              <----------><-><----><-------><----><-><------->
93  * (x = don't care)
94  *
95  * this allows us to allow one int to contain the ref count for the whole
96  * chunk.    note that the "plus one" part is needed because a reference
97  * count of zero is neither positive or negative (need a way to tell
98  * if we've got one zero or a bunch of them).
99  *
100  * here are some in-line functions to help us.
101  */
102 
103 static __inline void pp_getreflen __P((int *, int, int *, int *));
104 static __inline void pp_setreflen __P((int *, int, int, int));
105 
106 /*
107  * pp_getreflen: get the reference and length for a specific offset
108  */
109 static __inline void
110 pp_getreflen(ppref, offset, refp, lenp)
111 	int *ppref, offset, *refp, *lenp;
112 {
113 
114 	if (ppref[offset] > 0) {		/* chunk size must be 1 */
115 		*refp = ppref[offset] - 1;	/* don't forget to adjust */
116 		*lenp = 1;
117 	} else {
118 		*refp = (ppref[offset] * -1) - 1;
119 		*lenp = ppref[offset+1];
120 	}
121 }
122 
123 /*
124  * pp_setreflen: set the reference and length for a specific offset
125  */
126 static __inline void
127 pp_setreflen(ppref, offset, ref, len)
128 	int *ppref, offset, ref, len;
129 {
130 	if (len == 1) {
131 		ppref[offset] = ref + 1;
132 	} else {
133 		ppref[offset] = (ref + 1) * -1;
134 		ppref[offset+1] = len;
135 	}
136 }
137 #endif
138 
139 /*
140  * amap_alloc1: internal function that allocates an amap, but does not
141  *	init the overlay.
142  *
143  * => lock on returned amap is init'd
144  */
145 static inline struct vm_amap *
146 amap_alloc1(slots, padslots, waitf)
147 	int slots, padslots, waitf;
148 {
149 	struct vm_amap *amap;
150 	int totalslots = slots + padslots;
151 
152 	MALLOC(amap, struct vm_amap *, sizeof(*amap), M_UVMAMAP, waitf);
153 	if (amap == NULL)
154 		return(NULL);
155 
156 	simple_lock_init(&amap->am_l);
157 	amap->am_ref = 1;
158 	amap->am_flags = 0;
159 #ifdef VM_AMAP_PPREF
160 	amap->am_ppref = NULL;
161 #endif
162 	amap->am_maxslot = totalslots;
163 	amap->am_nslot = slots;
164 	amap->am_nused = 0;
165 	MALLOC(amap->am_slots,  int *, totalslots * sizeof(int), M_UVMAMAP, waitf);
166 	if (amap->am_slots) {
167 		MALLOC(amap->am_bckptr, int *, totalslots * sizeof(int), M_UVMAMAP, waitf);
168 		if (amap->am_bckptr) {
169 			MALLOC(amap->am_anon, struct vm_anon **,
170 			    totalslots * sizeof(struct vm_anon *), M_UVMAMAP, waitf);
171 		}
172 	}
173 
174 	if (amap->am_anon)
175 		return(amap);
176 
177 	if (amap->am_slots) {
178 		FREE(amap->am_slots, M_UVMAMAP);
179 		if (amap->am_bckptr)
180 			FREE(amap->am_bckptr, M_UVMAMAP);
181 	}
182 	FREE(amap, M_UVMAMAP);
183 	return (NULL);
184 }
185 
186 /*
187  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
188  *
189  * => caller should ensure sz is a multiple of PAGE_SIZE
190  * => reference count to new amap is set to one
191  * => new amap is returned unlocked
192  */
193 
194 struct vm_amap *
195 amap_alloc(sz, padsz, waitf)
196 	vm_offset_t sz, padsz;
197 	int waitf;
198 {
199 	struct vm_amap *amap;
200 	int slots, padslots;
201 	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
202 
203 	AMAP_B2SLOT(slots, sz);		/* load slots */
204 	AMAP_B2SLOT(padslots, padsz);
205 
206 	amap = amap_alloc1(slots, padslots, waitf);
207 	if (amap)
208 		bzero(amap->am_anon, (slots + padslots) * sizeof(struct vm_anon *));
209 
210 	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
211 	return(amap);
212 }
213 
214 
215 /*
216  * amap_free: free an amap
217  *
218  * => there should not be any valid references to the amap, so locking
219  *	of the amap being freed is not an issue (doesn't matter).
220  * => the amap is "gone" after we are done with it.
221  */
222 void
223 amap_free(amap)
224 	struct vm_amap *amap;
225 {
226 	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
227 
228 #ifdef DIAGNOSTIC
229 	if (amap->am_ref || amap->am_nused)
230 		panic("amap_free");
231 #endif
232 
233 	FREE(amap->am_slots, M_UVMAMAP);
234 	FREE(amap->am_bckptr, M_UVMAMAP);
235 	FREE(amap->am_anon, M_UVMAMAP);
236 #ifdef VM_AMAP_PPREF
237 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
238 		FREE(amap->am_ppref, M_UVMAMAP);
239 #endif
240 	FREE(amap, M_UVMAMAP);
241 
242 	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
243 }
244 
245 /*
246  * amap_extend: extend the size of an amap (if needed)
247  *
248  * => amap being extended should be passed in unlocked (we will lock
249  *	it as needed).
250  * => amap has a reference count of one (our map entry)
251  * => XXXCDC: should it have a waitflag???
252  */
253 void
254 amap_extend(entry, addsize)
255 	vm_map_entry_t entry;
256 	vm_size_t addsize;
257 {
258 	struct vm_amap *amap = entry->aref.ar_amap;
259 	int slotoff = entry->aref.ar_slotoff;
260 	int slotmapped, slotadd, slotneed;
261 #ifdef VM_AMAP_PPREF
262 	int *newppref, *oldppref;
263 #endif
264 	u_int *newsl, *newbck, *oldsl, *oldbck;
265 	struct vm_anon **newover, **oldover;
266 	int slotadded;
267 	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
268 
269 	UVMHIST_LOG(maphist, "  (entry=0x%x, addsize=0x%x)", entry,addsize,0,0);
270 
271 	/*
272 	 * first, determine how many slots we need in the amap.   don't forget
273 	 * that ar_slotoff could be non-zero: this means that there are some
274 	 * unused slots before us in the amap.
275 	 */
276 
277 	simple_lock(&amap->am_l);				/* lock! */
278 
279 	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
280 	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
281 	slotneed = slotoff + slotmapped + slotadd;
282 
283 	/*
284 	 * case 1: we already have enough slots in the map and thus only need
285 	 * to bump the reference counts on the slots we are adding.
286 	 */
287 
288 	if (amap->am_nslot >= slotneed) {
289 #ifdef VM_AMAP_PPREF
290 		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
291 			amap_pp_adjref(amap, slotoff + slotmapped, addsize, 1);
292 		}
293 #endif
294 		simple_unlock(&amap->am_l);
295 		UVMHIST_LOG(maphist,"<- done (case 1), amap = 0x%x, sltneed=%d",
296 		    amap, slotneed, 0, 0);
297 		return;				/* done! */
298 	}
299 
300 	/*
301 	 * case 2: we pre-allocated slots for use and we just need to bump
302 	 * nslot up to take account for these slots.
303 	 */
304 	if (amap->am_maxslot >= slotneed) {
305 #ifdef VM_AMAP_PPREF
306 		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
307 			if ((slotoff + slotmapped) < amap->am_nslot)
308 				amap_pp_adjref(amap, slotoff + slotmapped,
309 				    (amap->am_nslot - (slotoff + slotmapped)) *
310 				    PAGE_SIZE, 1);
311 			pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
312 			   slotneed - amap->am_nslot);
313 		}
314 #endif
315 		amap->am_nslot = slotneed;
316 		simple_unlock(&amap->am_l);
317 		/*
318 		 * no need to zero am_anon since that was done at alloc time and we
319 		 * never shrink an allocation.
320 		 */
321 		UVMHIST_LOG(maphist,"<- done (case 2), amap = 0x%x, slotneed=%d",
322 		    amap, slotneed, 0, 0);
323 		return;
324 	}
325 
326 	/*
327 	 * case 3: we need to malloc a new amap and copy all the amap data over
328 	 *
329 	 * XXX: should we pad out this allocation in hopes of avoid future case3
330 	 * extends?
331 	 * XXX: how about using kernel realloc?
332 	 *
333 	 * NOTE: we have the only map that has a reference to this amap locked.
334 	 * thus, no one else is going to try and change the amap while it is
335 	 * unlocked (but we unlock just to be safe).
336 	 */
337 
338 	simple_unlock(&amap->am_l);		/* unlock in case we sleep in malloc */
339 #ifdef VM_AMAP_PPREF
340 	newppref = NULL;
341 	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
342 		MALLOC(newppref, int *, slotneed * sizeof(int), M_UVMAMAP,
343 		    M_NOWAIT);
344 		if (newppref == NULL) {
345 			/* give up if malloc fails */
346 			FREE(amap->am_ppref, M_UVMAMAP);
347 			    amap->am_ppref = PPREF_NONE;
348 		}
349 	}
350 #endif
351 	MALLOC(newsl, int *, slotneed * sizeof(int), M_UVMAMAP, M_WAITOK);
352 	MALLOC(newbck, int *, slotneed * sizeof(int), M_UVMAMAP, M_WAITOK);
353 	MALLOC(newover, struct vm_anon **, slotneed * sizeof(struct vm_anon *),
354 						   M_UVMAMAP, M_WAITOK);
355 	simple_lock(&amap->am_l);		/* re-lock! */
356 
357 #ifdef DIAGNOSTIC
358 	if (amap->am_maxslot >= slotneed)
359 		panic("amap_extend: amap changed during malloc");
360 #endif
361 
362 	/*
363 	 * now copy everything over to new malloc'd areas...
364 	 */
365 
366 	slotadded = slotneed - amap->am_nslot;
367 
368 	/* do am_slots */
369 	oldsl = amap->am_slots;
370 	bcopy(oldsl, newsl, sizeof(int) * amap->am_nused);
371 	amap->am_slots = newsl;
372 
373 	/* do am_anon */
374 	oldover = amap->am_anon;
375 	bcopy(oldover, newover, sizeof(struct vm_anon *) * amap->am_nslot);
376 	bzero(newover + amap->am_nslot, sizeof(struct vm_anon *) * slotadded);
377 	amap->am_anon = newover;
378 
379 	/* do am_bckptr */
380 	oldbck = amap->am_bckptr;
381 	bcopy(oldbck, newbck, sizeof(int) * amap->am_nslot);
382 	bzero(newbck + amap->am_nslot, sizeof(int) * slotadded); /* XXX: needed? */
383 	amap->am_bckptr = newbck;
384 
385 #ifdef VM_AMAP_PPREF
386 	/* do ppref */
387 	oldppref = amap->am_ppref;
388 	if (newppref) {
389 		bcopy(oldppref, newppref, sizeof(int) * amap->am_nslot);
390 		bzero(newppref + amap->am_nslot, sizeof(int) * slotadded);
391 		amap->am_ppref = newppref;
392 		if ((slotoff + slotmapped) < amap->am_nslot)
393 			amap_pp_adjref(amap, slotoff + slotmapped,
394 			    (amap->am_nslot - (slotoff + slotmapped)) *
395 			    PAGE_SIZE, 1);
396 		pp_setreflen(newppref, amap->am_nslot, 1, slotadded);
397 	}
398 #endif
399 
400 	/* update master values */
401 	amap->am_nslot = slotneed;
402 	amap->am_maxslot = slotneed;
403 
404 	/* unlock */
405 	simple_unlock(&amap->am_l);
406 
407 	/* and free */
408 	FREE(oldsl, M_UVMAMAP);
409 	FREE(oldbck, M_UVMAMAP);
410 	FREE(oldover, M_UVMAMAP);
411 #ifdef VM_AMAP_PPREF
412 	if (oldppref && oldppref != PPREF_NONE)
413 		FREE(oldppref, M_UVMAMAP);
414 #endif
415 	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
416 	    amap, slotneed, 0, 0);
417 }
418 
419 /*
420  * amap_share_protect: change protection of an amap in a sharemap
421  *
422  * for sharemaps it is not possible to find all of the maps which
423  * reference the sharemap (e.g. to remove or change a mapping).
424  * in order to get around this (and support sharemaps) we use
425  * pmap_page_protect to change the protection on all mappings of the
426  * page.   we traverse am_anon or am_slots depending on the current
427  * state of the amap.
428  *
429  * => the map that entry belongs to must be locked by the caller.
430  * => the amap pointed to by entry->aref.ar_amap must be locked by caller.
431  * => the map should be locked before the amap (by the caller).
432  */
433 void
434 amap_share_protect(entry, prot)
435 	vm_map_entry_t entry;
436 	vm_prot_t prot;
437 {
438 	struct vm_amap *amap = entry->aref.ar_amap;
439 	int slots, lcv, slot, stop;
440 
441 	AMAP_B2SLOT(slots, (entry->end - entry->start));
442 	stop = entry->aref.ar_slotoff + slots;
443 
444 	if (slots < amap->am_nused) {
445 		/* cheaper to traverse am_anon */
446 		for (lcv = entry->aref.ar_slotoff ; lcv < stop ; lcv++) {
447 			if (amap->am_anon[lcv] == NULL)
448 				continue;
449 			if (amap->am_anon[lcv]->u.an_page != NULL)
450 				pmap_page_protect(
451 				    PMAP_PGARG(amap->am_anon[lcv]->u.an_page),
452 				prot);
453 		}
454 		return;
455 	}
456 
457 	/* cheaper to traverse am_slots */
458 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
459 		slot = amap->am_slots[lcv];
460 		if (slot < entry->aref.ar_slotoff || slot >= stop)
461 			continue;
462 		if (amap->am_anon[slot]->u.an_page != NULL)
463 			pmap_page_protect(
464 			    PMAP_PGARG(amap->am_anon[slot]->u.an_page), prot);
465 	}
466 	return;
467 }
468 
469 /*
470  * amap_wipeout: wipeout all anon's in an amap; then free the amap!
471  *
472  * => if amap is part of an active map entry, then the map that contains
473  *	the map entry must be locked.
474  * => amap's reference count should be one (the final reference).
475  * => the amap must be locked by the caller.
476  */
477 
478 void
479 amap_wipeout(amap)
480 	struct vm_amap *amap;
481 {
482 	int lcv, slot;
483 	struct vm_anon *anon;
484 	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
485 	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
486 
487 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
488 
489 		slot = amap->am_slots[lcv];
490 		anon = amap->am_anon[slot];
491 
492 		if (anon == NULL || anon->an_ref == 0)
493 			panic("amap_wipeout: corrupt amap");
494 
495 		simple_lock(&anon->an_lock); /* lock anon */
496 
497 		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon,
498 		    anon->an_ref, 0, 0);
499 
500 		if (--anon->an_ref != 0) {
501 			simple_unlock(&anon->an_lock);
502 			continue;
503 		}
504 
505 		/*
506 		 * we have last reference to a vm_anon.   free the vm_anon.
507 		 */
508 		uvm_anfree(anon);
509 	}
510 
511 	/*
512 	 * now we free the map
513 	 */
514 
515 	amap->am_nused = 0;
516 	amap->am_ref--;		/* drop final reference */
517 	amap_free(amap);
518 	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
519 }
520 
521 /*
522  * amap_copy: ensure that a map entry's "needs_copy" flag is false
523  *	by copying the amap if necessary.
524  *
525  * => an entry with a null amap pointer will get a new (blank) one.
526  * => the map that the map entry belongs to must be locked by caller.
527  * => the amap currently attached to "entry" (if any) must be unlocked.
528  * => if canchunk is true, then we may clip the entry into a chunk
529  */
530 
531 void
532 amap_copy(map, entry, waitf, canchunk, startva, endva)
533 	vm_map_t map;
534 	vm_map_entry_t entry;
535 	int waitf;
536 	boolean_t canchunk;
537 	vm_offset_t startva, endva;
538 {
539 	struct vm_amap *amap, *srcamap;
540 	int slots, lcv;
541 	vm_offset_t chunksize;
542 	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
543 	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, waitf=%d)", map, entry, waitf, 0);
544 
545 	/*
546 	 * is there a map to copy?   if not, create one from scratch.
547 	 */
548 
549 	if (entry->aref.ar_amap == NULL) {
550 
551 		/*
552 		 * check to see if we have a large amap that we can chunk.
553 		 * we align startva/endva to chunk-sized boundaries and then
554 		 * clip to them.
555 		 */
556 
557 		if (canchunk && atop(entry->end - entry->start) >=
558 		    UVM_AMAP_LARGE) {
559 			/* convert slots to bytes */
560 			chunksize = UVM_AMAP_CHUNK * PAGE_SIZE;
561 			startva = (startva / chunksize) * chunksize;
562 			endva = roundup(endva, chunksize);
563 			UVMHIST_LOG(maphist, "  chunk amap ==> clip 0x%x->0x%x"
564 			    "to 0x%x->0x%x", entry->start, entry->end, startva,
565 			    endva);
566 			UVM_MAP_CLIP_START(map, entry, startva);
567 			UVM_MAP_CLIP_END(map, entry, endva);
568 		}
569 
570 		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
571 		entry->start, entry->end, 0, 0);
572 		entry->aref.ar_slotoff = 0;
573 		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
574 		    waitf);
575 		if (entry->aref.ar_amap != NULL)
576 			entry->etype &= ~UVM_ET_NEEDSCOPY;
577 		return;
578 	}
579 
580 	/*
581 	 * first check and see if we are the only map entry referencing the amap
582 	 * we currently have.   if so, then we can just take it over rather
583 	 * than copying it.
584 	 */
585 
586 	if (entry->aref.ar_amap->am_ref == 1) {
587 		entry->etype &= ~UVM_ET_NEEDSCOPY;
588 		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
589 		    0, 0, 0, 0);
590 		return;
591 	}
592 
593 	/*
594 	 * looks like we need to copy the map.
595 	 */
596 
597 	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it",
598 	    entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
599 	AMAP_B2SLOT(slots, entry->end - entry->start);
600 	amap = amap_alloc1(slots, 0, waitf);
601 	if (amap == NULL) {
602 		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
603 		return;
604 	}
605 	srcamap = entry->aref.ar_amap;
606 	simple_lock(&srcamap->am_l);
607 
608 	/*
609 	 * need to double check reference count now that we've got the src amap
610 	 * locked down.   (in which case we lost a reference while we were
611 	 * mallocing the new map).
612 	 */
613 
614 	if (srcamap->am_ref == 1) {
615 		/*
616 		 * take over the old amap, get rid of the new one we just
617 		 * allocated.
618 		 */
619 		entry->etype &= ~UVM_ET_NEEDSCOPY;
620 		amap->am_ref--;		/* drop final reference to map */
621 		amap_free(amap);
622 		simple_unlock(&srcamap->am_l);
623 		return;
624 	}
625 
626 	/*
627 	 * copy it now.
628 	 */
629 
630 	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
631 	for (lcv = 0 ; lcv < slots; lcv++) {
632 		amap->am_anon[lcv] =
633 		    srcamap->am_anon[entry->aref.ar_slotoff + lcv];
634 		if (amap->am_anon[lcv] == NULL)
635 			continue;
636 		simple_lock(&amap->am_anon[lcv]->an_lock);
637 		amap->am_anon[lcv]->an_ref++;
638 		simple_unlock(&amap->am_anon[lcv]->an_lock);
639 		amap->am_bckptr[lcv] = amap->am_nused;
640 		amap->am_slots[amap->am_nused] = lcv;
641 		amap->am_nused++;
642 	}
643 
644 	/*
645 	 * drop our reference to the old amap (srcamap) and unlock.  we will
646 	 * not have the very last reference to srcamap so there is no need
647 	 * to worry about freeing it.
648 	 */
649 
650 	srcamap->am_ref--;
651 	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
652 		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
653 #ifdef VM_AMAP_PPREF
654 	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
655 		amap_pp_adjref(srcamap, entry->aref.ar_slotoff,
656 		    entry->end - entry->start, -1);
657 	}
658 #endif
659 
660 	simple_unlock(&srcamap->am_l);
661 
662 	/*
663 	 * install new amap.
664 	 */
665 
666 	entry->aref.ar_slotoff = 0;
667 	entry->aref.ar_amap = amap;
668 	entry->etype &= ~UVM_ET_NEEDSCOPY;
669 
670 	/*
671 	 * done!
672 	 */
673 	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
674 }
675 
676 /*
677  * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
678  *
679  *	called during fork(2) when the parent process has a wired map
680  *	entry.   in that case we want to avoid write-protecting pages
681  *	in the parent's map (e.g. like what you'd do for a COW page)
682  *	so we resolve the COW here.
683  *
684  * => assume parent's entry was wired, thus all pages are resident.
685  * => assume pages that are loaned out (loan_count) are already mapped
686  *	read-only in all maps, and thus no need for us to worry about them
687  * => assume both parent and child vm_map's are locked
688  * => caller passes child's map/entry in to us
689  * => if we run out of memory we will unlock the amap and sleep _with_ the
690  *	parent and child vm_map's locked(!).    we have to do this since
691  *	we are in the middle of a fork(2) and we can't let the parent
692  *	map change until we are done copying all the map entrys.
693  * => XXXCDC: out of memory should cause fork to fail, but there is
694  *	currently no easy way to do this (needs fix)
695  * => page queues must be unlocked (we may lock them)
696  */
697 
698 void
699 amap_cow_now(map, entry)
700 	struct vm_map *map;
701 	struct vm_map_entry *entry;
702 {
703 	struct vm_amap *amap = entry->aref.ar_amap;
704 	int lcv, slot;
705 	struct vm_anon *anon, *nanon;
706 	struct vm_page *pg, *npg;
707 
708 	/*
709 	 * note that if we unlock the amap then we must ReStart the "lcv" for
710 	 * loop because some other process could reorder the anon's in the
711 	 * am_anon[] array on us while the lock is dropped.
712 	 */
713 ReStart:
714 	simple_lock(&amap->am_l);
715 
716 	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
717 
718 		/*
719 		 * get the page
720 		 */
721 
722 		slot = amap->am_slots[lcv];
723 		anon = amap->am_anon[slot];
724 		simple_lock(&anon->an_lock);
725 		pg = anon->u.an_page;
726 
727 		/*
728 		 * page must be resident since parent is wired
729 		 */
730 
731 		if (pg == NULL)
732 		    panic("amap_cow_now: non-resident wired page in anon %p",
733 			anon);
734 
735 		/*
736 		 * if the anon ref count is one and the page is not loaned,
737 		 * then we are safe (the child has exclusive access to the
738 		 * page).  if the page is loaned, then it must already be
739 		 * mapped read-only.
740 		 *
741 		 * we only need to get involved when these are not true.
742 		 * [note: if loan_count == 0, then the anon must own the page]
743 		 */
744 
745 		if (anon->an_ref > 1 && pg->loan_count == 0) {
746 
747 			/*
748 			 * if the page is busy then we have to unlock, wait for
749 			 * it and then restart.
750 			 */
751 			if (pg->flags & PG_BUSY) {
752 				pg->flags |= PG_WANTED;
753 				simple_unlock(&amap->am_l);
754 				UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE,
755 				    "cownow", 0);
756 				goto ReStart;
757 			}
758 
759 			/*
760 			 * ok, time to do a copy-on-write to a new anon
761 			 */
762 			nanon = uvm_analloc();
763 			if (nanon)
764 				npg = uvm_pagealloc(NULL, 0, nanon);
765 			else
766 				npg = NULL;	/* XXX: quiet gcc warning */
767 
768 			if (nanon == NULL || npg == NULL) {
769 				/* out of memory */
770 				/*
771 				 * XXXCDC: we should cause fork to fail, but
772 				 * we can't ...
773 				 */
774 				if (nanon)
775 					uvm_anfree(nanon);
776 				simple_unlock(&anon->an_lock);
777 				simple_unlock(&amap->am_l);
778 				uvm_wait("cownowpage");
779 				goto ReStart;
780 			}
781 
782 			/*
783 			 * got it... now we can copy the data and replace anon
784 			 * with our new one...
785 			 */
786 			uvm_pagecopy(pg, npg);		/* old -> new */
787 			anon->an_ref--;			/* can't drop to zero */
788 			amap->am_anon[slot] = nanon;	/* replace */
789 
790 			/*
791 			 * drop PG_BUSY on new page ... since we have had it's
792 			 * owner locked the whole time it can't be
793 			 * PG_RELEASED | PG_WANTED.
794 			 */
795 			npg->flags &= ~(PG_BUSY|PG_FAKE);
796 			UVM_PAGE_OWN(npg, NULL);
797 			uvm_lock_pageq();
798 			uvm_pageactivate(npg);
799 			uvm_unlock_pageq();
800 		}
801 
802 		simple_unlock(&anon->an_lock);
803 		/*
804 		 * done with this anon, next ...!
805 		 */
806 
807 	}	/* end of 'for' loop */
808 
809 	return;
810 }
811 
812 /*
813  * amap_splitref: split a single reference into two seperate references
814  *
815  * => caller must lock map which is referencing the amap
816  * => caller must not lock amap referenced (we will do it)
817  */
818 void
819 amap_splitref(origref, splitref, offset)
820 	struct vm_aref *origref, *splitref;
821 	vm_offset_t offset;
822 {
823 	int leftslots;
824 	UVMHIST_FUNC("amap_splitref"); UVMHIST_CALLED(maphist);
825 
826 	AMAP_B2SLOT(leftslots, offset);
827 	if (leftslots == 0)
828 		panic("amap_splitref: split at zero offset");
829 
830 	/*
831 	 * lock the amap
832 	 */
833 	simple_lock(&origref->ar_amap->am_l);
834 
835 	/*
836 	 * now: amap is locked and we have a valid am_mapped array.
837 	 */
838 
839 	if (origref->ar_amap->am_nslot - origref->ar_slotoff - leftslots <= 0)
840 		panic("amap_splitref: map size check failed");
841 
842 	splitref->ar_amap = origref->ar_amap;
843 	splitref->ar_amap->am_ref++;		/* not a share reference */
844 	splitref->ar_slotoff = origref->ar_slotoff + leftslots;
845 
846 	simple_unlock(&origref->ar_amap->am_l);
847 }
848 
849 #ifdef VM_AMAP_PPREF
850 
851 /*
852  * amap_pp_establish: add a ppref array to an amap, if possible
853  *
854  * => amap locked by caller
855  */
856 void
857 amap_pp_establish(amap)
858 	struct vm_amap *amap;
859 {
860 
861 	MALLOC(amap->am_ppref, int *, sizeof(int) * amap->am_maxslot,
862 	    M_UVMAMAP, M_NOWAIT);
863 
864 	/*
865 	 * if we fail then we just won't use ppref for this amap
866 	 */
867 	if (amap->am_ppref == NULL) {
868 		amap->am_ppref = PPREF_NONE;	/* not using it */
869 		return;
870 	}
871 
872 	/*
873 	 * init ppref
874 	 */
875 	bzero(amap->am_ppref, sizeof(int) * amap->am_maxslot);
876 	pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot);
877 	return;
878 }
879 
880 /*
881  * amap_pp_adjref: adjust reference count to a part of an amap using the
882  * per-page reference count array.
883  *
884  * => map and amap locked by caller
885  * => caller must check that ppref != PPREF_NONE before calling
886  */
887 void
888 amap_pp_adjref(amap, curslot, bytelen, adjval)
889 	struct vm_amap *amap;
890 	int curslot;
891 	vm_size_t bytelen;
892 	int adjval;
893 {
894 	int slots, stopslot, *ppref, lcv;
895 	int ref, len;
896 
897 	/*
898 	 * get init values
899 	 */
900 
901 	AMAP_B2SLOT(slots, bytelen);
902 	stopslot = curslot + slots;
903 	ppref = amap->am_ppref;
904 
905 	/*
906 	 * first advance to the correct place in the ppref array, fragment
907 	 * if needed.
908 	 */
909 
910 	for (lcv = 0 ; lcv < curslot ; lcv += len) {
911 		pp_getreflen(ppref, lcv, &ref, &len);
912 		if (lcv + len > curslot) {     /* goes past start? */
913 			pp_setreflen(ppref, lcv, ref, curslot - lcv);
914 			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
915 			len = curslot - lcv;   /* new length of entry @ lcv */
916 		}
917 	}
918 
919 	/*
920 	 * now adjust reference counts in range (make sure we dont overshoot)
921 	 */
922 
923 	if (lcv != curslot)
924 		panic("ADJREF");
925 
926 	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
927 		pp_getreflen(ppref, lcv, &ref, &len);
928 		if (lcv + len > stopslot) {     /* goes past end? */
929 			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
930 			pp_setreflen(ppref, stopslot, ref,
931 			    len - (stopslot - lcv));
932 			len = stopslot - lcv;
933 		}
934 		ref = ref + adjval;    /* ADJUST! */
935 		if (ref < 0)
936 			panic("amap_pp_adjref: negative reference count");
937 		pp_setreflen(ppref, lcv, ref, len);
938 		if (ref == 0)
939 			amap_wiperange(amap, lcv, len);
940 	}
941 
942 }
943 
944 /*
945  * amap_wiperange: wipe out a range of an amap
946  * [different from amap_wipeout because the amap is kept intact]
947  *
948  * => both map and amap must be locked by caller.
949  */
950 void
951 amap_wiperange(amap, slotoff, slots)
952 	struct vm_amap *amap;
953 	int slotoff, slots;
954 {
955 	int byanon, lcv, stop, curslot, ptr;
956 	struct vm_anon *anon;
957 	UVMHIST_FUNC("amap_wiperange"); UVMHIST_CALLED(maphist);
958 
959 	/*
960 	 * we can either traverse the amap by am_anon or by am_slots depending
961 	 * on which is cheaper.    decide now.
962 	 */
963 
964 	if (slots < amap->am_nused) {
965 		byanon = TRUE;
966 		lcv = slotoff;
967 		stop = slotoff + slots;
968 	} else {
969 		byanon = FALSE;
970 		lcv = 0;
971 		stop = amap->am_nused;
972 	}
973 
974 	/*
975 	 * ok, now do it!
976 	 */
977 
978 	for (; lcv < stop; lcv++) {
979 
980 		/*
981 		 * verify the anon is ok.
982 		 */
983 		if (byanon) {
984 			if (amap->am_anon[lcv] == NULL)
985 				continue;
986 			curslot = lcv;
987 		} else {
988 			curslot = amap->am_slots[lcv];
989 			if (curslot < slotoff || curslot >= stop)
990 				continue;
991 		}
992 		anon = amap->am_anon[curslot];
993 
994 		/*
995 		 * remove it from the amap
996 		 */
997 		amap->am_anon[curslot] = NULL;
998 		ptr = amap->am_bckptr[curslot];
999 		if (ptr != (amap->am_nused - 1)) {
1000 			amap->am_slots[ptr] =
1001 			    amap->am_slots[amap->am_nused - 1];
1002 			amap->am_bckptr[amap->am_slots[ptr]] =
1003 			    ptr;    /* back ptr. */
1004 		}
1005 		amap->am_nused--;
1006 
1007 		/*
1008 		 * drop anon reference count
1009 		 */
1010 		simple_lock(&anon->an_lock);
1011 		if (--anon->an_ref != 0) {
1012 			simple_unlock(&anon->an_lock);
1013 			continue;
1014 		}
1015 
1016 		/*
1017 		 * we just eliminated the last reference to an anon.   free it.
1018 		 */
1019 		uvm_anfree(anon);
1020 	}
1021 }
1022 
1023 #endif
1024 
1025 /*
1026  * allocate anons
1027  */
1028 void
1029 uvm_anon_init()
1030 {
1031 	struct vm_anon *anon;
1032 	int nanon = uvmexp.free - (uvmexp.free / 16); /* XXXCDC ??? */
1033 	int lcv;
1034 
1035 	MALLOC(anon, struct vm_anon *, sizeof(*anon) * nanon, M_UVMAMAP, M_NOWAIT);
1036 	if (anon == NULL) {
1037 		printf("uvm_anon_init: can not allocate %d anons\n", nanon);
1038 		panic("uvm_anon_init");
1039 	}
1040 
1041 	bzero(anon, sizeof(*anon) * nanon);
1042 	uvm.afree = NULL;
1043 	uvmexp.nanon = uvmexp.nfreeanon = nanon;
1044 	for (lcv = 0 ; lcv < nanon ; lcv++) {
1045 		anon[lcv].u.an_nxt = uvm.afree;
1046 		uvm.afree = &anon[lcv];
1047 	}
1048 	simple_lock_init(&uvm.afreelock);
1049 }
1050 
1051 /*
1052  * add some more anons to the free pool.  called when we add
1053  * more swap space.
1054  */
1055 void
1056 uvm_anon_add(pages)
1057 	int	pages;
1058 {
1059 	struct vm_anon *anon;
1060 	int lcv;
1061 
1062 	MALLOC(anon, struct vm_anon *, sizeof(*anon) * pages, M_UVMAMAP,
1063 	    M_WAITOK);
1064 
1065 	simple_lock(&uvm.afreelock);
1066 	bzero(anon, sizeof(*anon) * pages);
1067 	uvmexp.nanon += pages;
1068 	uvmexp.nfreeanon += pages;
1069 	for (lcv = 0; lcv < pages; lcv++) {
1070 		anon[lcv].u.an_nxt = uvm.afree;
1071 		uvm.afree = &anon[lcv];
1072 	}
1073 	simple_unlock(&uvm.afreelock);
1074 }
1075 
1076 /*
1077  * allocate an anon
1078  */
1079 struct vm_anon *
1080 uvm_analloc()
1081 {
1082 	struct vm_anon *a;
1083 
1084 	simple_lock(&uvm.afreelock);
1085 	a = uvm.afree;
1086 	if (a) {
1087 		uvm.afree = a->u.an_nxt;
1088 		uvmexp.nfreeanon--;
1089 		a->an_ref = 1;
1090 		a->an_swslot = 0;
1091 		a->u.an_page = NULL;		/* so we can free quickly */
1092 		simple_lock_init(&a->an_lock);
1093 	}
1094 	simple_unlock(&uvm.afreelock);
1095 	return(a);
1096 }
1097 
1098 /*
1099  * uvm_anfree: free a single anon structure
1100  *
1101  * => caller must remove anon from its amap before calling (if it was in
1102  *	an amap).
1103  * => if anon was in use, then it must be locked by the caller and the
1104  *	caller must have dropped the reference count to zero.
1105  * => we may lock the pageq's.
1106  */
1107 void
1108 uvm_anfree(anon)
1109 	struct vm_anon *anon;
1110 {
1111 	struct vm_page *pg;
1112 	UVMHIST_FUNC("uvm_anfree"); UVMHIST_CALLED(maphist);
1113 	UVMHIST_LOG(maphist,"(anon=0x%x)", anon, 0,0,0);
1114 
1115 	/*
1116 	 * get page
1117 	 */
1118 
1119 	pg = anon->u.an_page;
1120 
1121 	/*
1122 	 * if there is a resident page and it is loaned, then anon may not
1123 	 * own it.   call out to uvm_anon_lockpage() to ensure the real owner
1124  	 * of the page has been identified and locked.
1125 	 */
1126 
1127 	if (pg && pg->loan_count)
1128 		pg = uvm_anon_lockloanpg(anon);
1129 
1130 	/*
1131 	 * if we have a resident page, we must dispose of it before freeing
1132 	 * the anon.
1133 	 */
1134 
1135 	if (pg) {
1136 
1137 		/*
1138 		 * if the page is owned by a uobject (now locked), then we must
1139 		 * kill the loan on the page rather than free it.
1140 		 */
1141 
1142 		if (pg->uobject) {
1143 
1144 			/* kill loan */
1145 			uvm_lock_pageq();
1146 #ifdef DIAGNOSTIC
1147 			if (pg->loan_count < 1)
1148 		panic("uvm_anfree: obj owned page with no loan count");
1149 #endif
1150 			pg->loan_count--;
1151 			pg->uanon = NULL;
1152 			uvm_unlock_pageq();
1153 			simple_unlock(&pg->uobject->vmobjlock);
1154 
1155 		} else {
1156 
1157 			/*
1158 			 * page has no uobject, so we must be the owner of it.
1159 			 *
1160 			 * if page is busy then we just mark it as released
1161 			 * (who ever has it busy must check for this when they
1162 			 * wake up).    if the page is not busy then we can
1163 			 * free it now.
1164 			 */
1165 
1166 			if ((pg->flags & PG_BUSY) != 0) {
1167 				/* tell them to dump it when done */
1168 				pg->flags |= PG_RELEASED;
1169 				simple_unlock(&anon->an_lock);
1170 				UVMHIST_LOG(maphist,
1171 				    "  anon 0x%x, page 0x%x: BUSY (released!)",
1172 				    anon, pg, 0, 0);
1173 				return;
1174 			}
1175 
1176 			pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
1177 			uvm_lock_pageq();	/* lock out pagedaemon */
1178 			uvm_pagefree(pg);	/* bye bye */
1179 			uvm_unlock_pageq();	/* free the daemon */
1180 
1181 			UVMHIST_LOG(maphist,"  anon 0x%x, page 0x%x: freed now!",
1182 			    anon, pg, 0, 0);
1183 		}
1184 	}
1185 
1186 	/*
1187 	 * are we using any backing store resources?   if so, free them.
1188 	 */
1189 	if (anon->an_swslot) {
1190 		/*
1191 		 * on backing store: no I/O in progress.  sole amap reference
1192 		 * is ours and we've got it locked down.   thus we can free,
1193 		 * and be done.
1194 		 */
1195 		UVMHIST_LOG(maphist,"  freeing anon 0x%x, paged to swslot 0x%x",
1196 		    anon, anon->an_swslot, 0, 0);
1197 		uvm_swap_free(anon->an_swslot, 1);
1198 		anon->an_swslot = 0;
1199 	}
1200 
1201 	/*
1202 	 * now that we've stripped the data areas from the anon, free the anon
1203 	 * itself!
1204 	 */
1205 	simple_lock(&uvm.afreelock);
1206 	anon->u.an_nxt = uvm.afree;
1207 	uvm.afree = anon;
1208 	uvmexp.nfreeanon++;
1209 	simple_unlock(&uvm.afreelock);
1210 }
1211 
1212 /*
1213  * uvm_anon_lockloanpg: given a locked anon, lock its resident page
1214  *
1215  * => anon is locked by caller
1216  * => on return: anon is locked
1217  *		 if there is a resident page:
1218  *			if it has a uobject, it is locked by us
1219  *			if it is ownerless, we take over as owner
1220  *		 we return the resident page (it can change during
1221  *		 this function)
1222  * => note that the only time an anon has an ownerless resident page
1223  *	is if the page was loaned from a uvm_object and the uvm_object
1224  *	disowned it
1225  * => this only needs to be called when you want to do an operation
1226  *	on an anon's resident page and that page has a non-zero loan
1227  *	count.
1228  */
1229 struct vm_page *
1230 uvm_anon_lockloanpg(anon)
1231 	struct vm_anon *anon;
1232 {
1233 	struct vm_page *pg;
1234 	boolean_t locked = FALSE;
1235 
1236 	/*
1237 	 * loop while we have a resident page that has a non-zero loan count.
1238 	 * if we successfully get our lock, we will "break" the loop.
1239 	 * note that the test for pg->loan_count is not protected -- this
1240 	 * may produce false positive results.   note that a false positive
1241 	 * result may cause us to do more work than we need to, but it will
1242 	 * not produce an incorrect result.
1243 	 */
1244 
1245 	while (((pg = anon->u.an_page) != NULL) && pg->loan_count != 0) {
1246 
1247 		/*
1248 		 * quickly check to see if the page has an object before
1249 		 * bothering to lock the page queues.   this may also produce
1250 		 * a false positive result, but that's ok because we do a real
1251 		 * check after that.
1252 		 *
1253 		 * XXX: quick check -- worth it?   need volatile?
1254 		 */
1255 
1256 		if (pg->uobject) {
1257 
1258 			uvm_lock_pageq();
1259 			if (pg->uobject) {	/* the "real" check */
1260 				locked =
1261 				    simple_lock_try(&pg->uobject->vmobjlock);
1262 			} else {
1263 				/* object disowned before we got PQ lock */
1264 				locked = TRUE;
1265 			}
1266 			uvm_unlock_pageq();
1267 
1268 			/*
1269 			 * if we didn't get a lock (try lock failed), then we
1270 			 * toggle our anon lock and try again
1271 			 */
1272 
1273 			if (!locked) {
1274 				simple_unlock(&anon->an_lock);
1275 				/*
1276 				 * someone locking the object has a chance to
1277 				 * lock us right now
1278 				 */
1279 				simple_lock(&anon->an_lock);
1280 				continue;		/* start over */
1281 			}
1282 		}
1283 
1284 		/*
1285 		 * if page is un-owned [i.e. the object dropped its ownership],
1286 		 * then we can take over as owner!
1287 		 */
1288 
1289 		if (pg->uobject == NULL && (pg->pqflags & PQ_ANON) == 0) {
1290 			uvm_lock_pageq();
1291 			pg->pqflags |= PQ_ANON;		/* take ownership... */
1292 			pg->loan_count--;	/* ... and drop our loan */
1293 			uvm_unlock_pageq();
1294 		}
1295 
1296 		/*
1297 		 * we did it!   break the loop
1298 		 */
1299 		break;
1300 	}
1301 
1302 	/*
1303 	 * done!
1304 	 */
1305 
1306 	return(pg);
1307 }
1308