xref: /openbsd-src/sys/uvm/uvm_map.c (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1 /*	$OpenBSD: uvm_map.c,v 1.145 2011/07/05 03:10:29 dhill Exp $	*/
2 /*	$NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $	*/
3 
4 /*
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * Copyright (c) 1991, 1993, The Regents of the University of California.
7  *
8  * All rights reserved.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * The Mach Operating System project at Carnegie-Mellon University.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by Charles D. Cranor,
24  *      Washington University, the University of California, Berkeley and
25  *      its contributors.
26  * 4. Neither the name of the University nor the names of its contributors
27  *    may be used to endorse or promote products derived from this software
28  *    without specific prior written permission.
29  *
30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40  * SUCH DAMAGE.
41  *
42  *	@(#)vm_map.c    8.3 (Berkeley) 1/12/94
43  * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
44  *
45  *
46  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
47  * All rights reserved.
48  *
49  * Permission to use, copy, modify and distribute this software and
50  * its documentation is hereby granted, provided that both the copyright
51  * notice and this permission notice appear in all copies of the
52  * software, derivative works or modified versions, and any portions
53  * thereof, and that both notices appear in supporting documentation.
54  *
55  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
56  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
57  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
58  *
59  * Carnegie Mellon requests users of this software to return to
60  *
61  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
62  *  School of Computer Science
63  *  Carnegie Mellon University
64  *  Pittsburgh PA 15213-3890
65  *
66  * any improvements or extensions that they make and grant Carnegie the
67  * rights to redistribute these changes.
68  */
69 
70 /*
71  * uvm_map.c: uvm map operations
72  */
73 
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/mman.h>
77 #include <sys/proc.h>
78 #include <sys/malloc.h>
79 #include <sys/pool.h>
80 #include <sys/kernel.h>
81 
82 #include <dev/rndvar.h>
83 
84 #ifdef SYSVSHM
85 #include <sys/shm.h>
86 #endif
87 
88 #include <uvm/uvm.h>
89 #undef RB_AUGMENT
90 #define RB_AUGMENT(x) uvm_rb_augment(x)
91 
92 #ifdef DDB
93 #include <uvm/uvm_ddb.h>
94 #endif
95 
96 static struct timeval uvm_kmapent_last_warn_time;
97 static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
98 
99 const char vmmapbsy[] = "vmmapbsy";
100 
101 /*
102  * pool for vmspace structures.
103  */
104 
105 struct pool uvm_vmspace_pool;
106 
107 /*
108  * pool for dynamically-allocated map entries.
109  */
110 
111 struct pool uvm_map_entry_pool;
112 struct pool uvm_map_entry_kmem_pool;
113 
114 #ifdef PMAP_GROWKERNEL
115 /*
116  * This global represents the end of the kernel virtual address
117  * space.  If we want to exceed this, we must grow the kernel
118  * virtual address space dynamically.
119  *
120  * Note, this variable is locked by kernel_map's lock.
121  */
122 vaddr_t uvm_maxkaddr;
123 #endif
124 
125 /*
126  * macros
127  */
128 
129 /*
130  * uvm_map_entry_link: insert entry into a map
131  *
132  * => map must be locked
133  */
134 #define uvm_map_entry_link(map, after_where, entry) do { \
135 	(map)->nentries++; \
136 	(entry)->prev = (after_where); \
137 	(entry)->next = (after_where)->next; \
138 	(entry)->prev->next = (entry); \
139 	(entry)->next->prev = (entry); \
140 	uvm_rb_insert(map, entry); \
141 } while (0)
142 
143 /*
144  * uvm_map_entry_unlink: remove entry from a map
145  *
146  * => map must be locked
147  */
148 #define uvm_map_entry_unlink(map, entry) do { \
149 	(map)->nentries--; \
150 	(entry)->next->prev = (entry)->prev; \
151 	(entry)->prev->next = (entry)->next; \
152 	uvm_rb_remove(map, entry); \
153 } while (0)
154 
155 /*
156  * SAVE_HINT: saves the specified entry as the hint for future lookups.
157  *
158  * => map need not be locked (protected by hint_lock).
159  */
160 #define SAVE_HINT(map,check,value) do { \
161 	simple_lock(&(map)->hint_lock); \
162 	if ((map)->hint == (check)) \
163 		(map)->hint = (value); \
164 	simple_unlock(&(map)->hint_lock); \
165 } while (0)
166 
167 /*
168  * VM_MAP_RANGE_CHECK: check and correct range
169  *
170  * => map must at least be read locked
171  */
172 
173 #define VM_MAP_RANGE_CHECK(map, start, end) do { \
174 	if (start < vm_map_min(map)) 		\
175 		start = vm_map_min(map);        \
176 	if (end > vm_map_max(map))              \
177 		end = vm_map_max(map);          \
178 	if (start > end)                        \
179 		start = end;                    \
180 } while (0)
181 
182 /*
183  * local prototypes
184  */
185 
186 void uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *);
187 void uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *);
188 void uvm_map_reference_amap(struct vm_map_entry *, int);
189 void uvm_map_unreference_amap(struct vm_map_entry *, int);
190 int uvm_map_spacefits(struct vm_map *, vaddr_t *, vsize_t,
191     struct vm_map_entry *, voff_t, vsize_t);
192 
193 struct vm_map_entry	*uvm_mapent_alloc(struct vm_map *, int);
194 void			uvm_mapent_free(struct vm_map_entry *);
195 
196 #ifdef KVA_GUARDPAGES
197 /*
198  * Number of kva guardpages in use.
199  */
200 int kva_guardpages;
201 #endif
202 
203 
204 /*
205  * Tree manipulation.
206  */
207 void uvm_rb_insert(struct vm_map *, struct vm_map_entry *);
208 void uvm_rb_remove(struct vm_map *, struct vm_map_entry *);
209 vsize_t uvm_rb_space(struct vm_map *, struct vm_map_entry *);
210 
211 #ifdef DEBUG
212 int _uvm_tree_sanity(struct vm_map *map, const char *name);
213 #endif
214 vsize_t uvm_rb_subtree_space(struct vm_map_entry *);
215 void uvm_rb_fixup(struct vm_map *, struct vm_map_entry *);
216 
217 static __inline int
218 uvm_compare(struct vm_map_entry *a, struct vm_map_entry *b)
219 {
220 	if (a->start < b->start)
221 		return (-1);
222 	else if (a->start > b->start)
223 		return (1);
224 
225 	return (0);
226 }
227 
228 
229 static __inline void
230 uvm_rb_augment(struct vm_map_entry *entry)
231 {
232 	entry->space = uvm_rb_subtree_space(entry);
233 }
234 
235 RB_PROTOTYPE(uvm_tree, vm_map_entry, rb_entry, uvm_compare);
236 
237 RB_GENERATE(uvm_tree, vm_map_entry, rb_entry, uvm_compare);
238 
239 vsize_t
240 uvm_rb_space(struct vm_map *map, struct vm_map_entry *entry)
241 {
242 	struct vm_map_entry *next;
243 	vaddr_t space;
244 
245 	if ((next = entry->next) == &map->header)
246 		space = map->max_offset - entry->end;
247 	else {
248 		KASSERT(next);
249 		space = next->start - entry->end;
250 	}
251 	return (space);
252 }
253 
254 vsize_t
255 uvm_rb_subtree_space(struct vm_map_entry *entry)
256 {
257 	vaddr_t space, tmp;
258 
259 	space = entry->ownspace;
260 	if (RB_LEFT(entry, rb_entry)) {
261 		tmp = RB_LEFT(entry, rb_entry)->space;
262 		if (tmp > space)
263 			space = tmp;
264 	}
265 
266 	if (RB_RIGHT(entry, rb_entry)) {
267 		tmp = RB_RIGHT(entry, rb_entry)->space;
268 		if (tmp > space)
269 			space = tmp;
270 	}
271 
272 	return (space);
273 }
274 
275 void
276 uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry)
277 {
278 	/* We need to traverse to the very top */
279 	do {
280 		entry->ownspace = uvm_rb_space(map, entry);
281 		entry->space = uvm_rb_subtree_space(entry);
282 	} while ((entry = RB_PARENT(entry, rb_entry)) != NULL);
283 }
284 
285 void
286 uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry)
287 {
288 	vaddr_t space = uvm_rb_space(map, entry);
289 	struct vm_map_entry *tmp;
290 
291 	entry->ownspace = entry->space = space;
292 	tmp = RB_INSERT(uvm_tree, &(map)->rbhead, entry);
293 #ifdef DIAGNOSTIC
294 	if (tmp != NULL)
295 		panic("uvm_rb_insert: duplicate entry?");
296 #endif
297 	uvm_rb_fixup(map, entry);
298 	if (entry->prev != &map->header)
299 		uvm_rb_fixup(map, entry->prev);
300 }
301 
302 void
303 uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry)
304 {
305 	struct vm_map_entry *parent;
306 
307 	parent = RB_PARENT(entry, rb_entry);
308 	RB_REMOVE(uvm_tree, &(map)->rbhead, entry);
309 	if (entry->prev != &map->header)
310 		uvm_rb_fixup(map, entry->prev);
311 	if (parent)
312 		uvm_rb_fixup(map, parent);
313 }
314 
315 #ifdef DEBUG
316 #define uvm_tree_sanity(x,y) _uvm_tree_sanity(x,y)
317 #else
318 #define uvm_tree_sanity(x,y)
319 #endif
320 
321 #ifdef DEBUG
322 int
323 _uvm_tree_sanity(struct vm_map *map, const char *name)
324 {
325 	struct vm_map_entry *tmp, *trtmp;
326 	int n = 0, i = 1;
327 
328 	RB_FOREACH(tmp, uvm_tree, &map->rbhead) {
329 		if (tmp->ownspace != uvm_rb_space(map, tmp)) {
330 			printf("%s: %d/%d ownspace %x != %x %s\n",
331 			    name, n + 1, map->nentries,
332 			    tmp->ownspace, uvm_rb_space(map, tmp),
333 			    tmp->next == &map->header ? "(last)" : "");
334 			goto error;
335 		}
336 	}
337 	trtmp = NULL;
338 	RB_FOREACH(tmp, uvm_tree, &map->rbhead) {
339 		if (tmp->space != uvm_rb_subtree_space(tmp)) {
340 			printf("%s: space %d != %d\n",
341 			    name, tmp->space, uvm_rb_subtree_space(tmp));
342 			goto error;
343 		}
344 		if (trtmp != NULL && trtmp->start >= tmp->start) {
345 			printf("%s: corrupt: 0x%lx >= 0x%lx\n",
346 			    name, trtmp->start, tmp->start);
347 			goto error;
348 		}
349 		n++;
350 
351 	    trtmp = tmp;
352 	}
353 
354 	if (n != map->nentries) {
355 		printf("%s: nentries: %d vs %d\n",
356 		    name, n, map->nentries);
357 		goto error;
358 	}
359 
360 	for (tmp = map->header.next; tmp && tmp != &map->header;
361 	    tmp = tmp->next, i++) {
362 		trtmp = RB_FIND(uvm_tree, &map->rbhead, tmp);
363 		if (trtmp != tmp) {
364 			printf("%s: lookup: %d: %p - %p: %p\n",
365 			    name, i, tmp, trtmp,
366 			    RB_PARENT(tmp, rb_entry));
367 			goto error;
368 		}
369 	}
370 
371 	return (0);
372  error:
373 #ifdef	DDB
374 	/* handy breakpoint location for error case */
375 	__asm(".globl treesanity_label\ntreesanity_label:");
376 #endif
377 	return (-1);
378 }
379 #endif
380 
381 /*
382  * uvm_mapent_alloc: allocate a map entry
383  */
384 
385 struct vm_map_entry *
386 uvm_mapent_alloc(struct vm_map *map, int flags)
387 {
388 	struct vm_map_entry *me, *ne;
389 	int s, i;
390 	int pool_flags;
391 
392 	pool_flags = PR_WAITOK;
393 	if (flags & UVM_FLAG_TRYLOCK)
394 		pool_flags = PR_NOWAIT;
395 
396 	if (map->flags & VM_MAP_INTRSAFE || cold) {
397 		s = splvm();
398 		simple_lock(&uvm.kentry_lock);
399 		me = uvm.kentry_free;
400 		if (me == NULL) {
401 			ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
402 			    &kd_nowait);
403 			if (ne == NULL)
404 				panic("uvm_mapent_alloc: cannot allocate map "
405 				    "entry");
406 			for (i = 0;
407 			    i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1;
408 			    i++)
409 				ne[i].next = &ne[i + 1];
410 			ne[i].next = NULL;
411 			me = ne;
412 			if (ratecheck(&uvm_kmapent_last_warn_time,
413 			    &uvm_kmapent_warn_rate))
414 				printf("uvm_mapent_alloc: out of static "
415 				    "map entries\n");
416 		}
417 		uvm.kentry_free = me->next;
418 		uvmexp.kmapent++;
419 		simple_unlock(&uvm.kentry_lock);
420 		splx(s);
421 		me->flags = UVM_MAP_STATIC;
422 	} else if (map == kernel_map) {
423 		splassert(IPL_NONE);
424 		me = pool_get(&uvm_map_entry_kmem_pool, pool_flags);
425 		if (me == NULL)
426 			goto out;
427 		me->flags = UVM_MAP_KMEM;
428 	} else {
429 		splassert(IPL_NONE);
430 		me = pool_get(&uvm_map_entry_pool, pool_flags);
431 		if (me == NULL)
432 			goto out;
433 		me->flags = 0;
434 	}
435 
436 out:
437 	return(me);
438 }
439 
440 /*
441  * uvm_mapent_free: free map entry
442  *
443  * => XXX: static pool for kernel map?
444  */
445 
446 void
447 uvm_mapent_free(struct vm_map_entry *me)
448 {
449 	int s;
450 
451 	if (me->flags & UVM_MAP_STATIC) {
452 		s = splvm();
453 		simple_lock(&uvm.kentry_lock);
454 		me->next = uvm.kentry_free;
455 		uvm.kentry_free = me;
456 		uvmexp.kmapent--;
457 		simple_unlock(&uvm.kentry_lock);
458 		splx(s);
459 	} else if (me->flags & UVM_MAP_KMEM) {
460 		splassert(IPL_NONE);
461 		pool_put(&uvm_map_entry_kmem_pool, me);
462 	} else {
463 		splassert(IPL_NONE);
464 		pool_put(&uvm_map_entry_pool, me);
465 	}
466 }
467 
468 /*
469  * uvm_mapent_copy: copy a map entry, preserving flags
470  */
471 
472 void
473 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
474 {
475 	memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) -
476 	    ((char *)src));
477 }
478 
479 /*
480  * uvm_map_entry_unwire: unwire a map entry
481  *
482  * => map should be locked by caller
483  */
484 void
485 uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry)
486 {
487 
488 	entry->wired_count = 0;
489 	uvm_fault_unwire_locked(map, entry->start, entry->end);
490 }
491 
492 
493 /*
494  * wrapper for calling amap_ref()
495  */
496 void
497 uvm_map_reference_amap(struct vm_map_entry *entry, int flags)
498 {
499 	amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff,
500 	    (entry->end - entry->start) >> PAGE_SHIFT, flags);
501 }
502 
503 
504 /*
505  * wrapper for calling amap_unref()
506  */
507 void
508 uvm_map_unreference_amap(struct vm_map_entry *entry, int flags)
509 {
510 	amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff,
511 	    (entry->end - entry->start) >> PAGE_SHIFT, flags);
512 }
513 
514 
515 /*
516  * uvm_map_init: init mapping system at boot time.   note that we allocate
517  * and init the static pool of structs vm_map_entry for the kernel here.
518  */
519 
520 void
521 uvm_map_init(void)
522 {
523 	static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
524 	int lcv;
525 
526 	/*
527 	 * set up static pool of kernel map entries ...
528 	 */
529 
530 	simple_lock_init(&uvm.kentry_lock);
531 	uvm.kentry_free = NULL;
532 	for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
533 		kernel_map_entry[lcv].next = uvm.kentry_free;
534 		uvm.kentry_free = &kernel_map_entry[lcv];
535 	}
536 
537 	/*
538 	 * initialize the map-related pools.
539 	 */
540 	pool_init(&uvm_vmspace_pool, sizeof(struct vmspace),
541 	    0, 0, 0, "vmsppl", &pool_allocator_nointr);
542 	pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry),
543 	    0, 0, 0, "vmmpepl", &pool_allocator_nointr);
544 	pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry),
545 	    0, 0, 0, "vmmpekpl", NULL);
546 	pool_sethiwat(&uvm_map_entry_pool, 8192);
547 }
548 
549 /*
550  * clippers
551  */
552 
553 /*
554  * uvm_map_clip_start: ensure that the entry begins at or after
555  *	the starting address, if it doesn't we split the entry.
556  *
557  * => caller should use UVM_MAP_CLIP_START macro rather than calling
558  *    this directly
559  * => map must be locked by caller
560  */
561 
562 void
563 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry,
564     vaddr_t start)
565 {
566 	struct vm_map_entry *new_entry;
567 	vaddr_t new_adj;
568 
569 	/* uvm_map_simplify_entry(map, entry); */ /* XXX */
570 
571 	uvm_tree_sanity(map, "clip_start entry");
572 
573 	/*
574 	 * Split off the front portion.  note that we must insert the new
575 	 * entry BEFORE this one, so that this entry has the specified
576 	 * starting address.
577 	 */
578 
579 	new_entry = uvm_mapent_alloc(map, 0);
580 	uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
581 
582 	new_entry->end = start;
583 	new_adj = start - new_entry->start;
584 	if (entry->object.uvm_obj)
585 		entry->offset += new_adj;	/* shift start over */
586 
587 	/* Does not change order for the RB tree */
588 	entry->start = start;
589 
590 	if (new_entry->aref.ar_amap) {
591 		amap_splitref(&new_entry->aref, &entry->aref, new_adj);
592 	}
593 
594 	uvm_map_entry_link(map, entry->prev, new_entry);
595 
596 	if (UVM_ET_ISSUBMAP(entry)) {
597 		/* ... unlikely to happen, but play it safe */
598 		 uvm_map_reference(new_entry->object.sub_map);
599 	} else {
600 		if (UVM_ET_ISOBJ(entry) &&
601 		    entry->object.uvm_obj->pgops &&
602 		    entry->object.uvm_obj->pgops->pgo_reference)
603 			entry->object.uvm_obj->pgops->pgo_reference(
604 			    entry->object.uvm_obj);
605 	}
606 
607 	uvm_tree_sanity(map, "clip_start leave");
608 }
609 
610 /*
611  * uvm_map_clip_end: ensure that the entry ends at or before
612  *	the ending address, if it doesn't we split the reference
613  *
614  * => caller should use UVM_MAP_CLIP_END macro rather than calling
615  *    this directly
616  * => map must be locked by caller
617  */
618 
619 void
620 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end)
621 {
622 	struct vm_map_entry *new_entry;
623 	vaddr_t new_adj; /* #bytes we move start forward */
624 
625 	uvm_tree_sanity(map, "clip_end entry");
626 	/*
627 	 *	Create a new entry and insert it
628 	 *	AFTER the specified entry
629 	 */
630 
631 	new_entry = uvm_mapent_alloc(map, 0);
632 	uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
633 
634 	new_entry->start = entry->end = end;
635 	new_adj = end - entry->start;
636 	if (new_entry->object.uvm_obj)
637 		new_entry->offset += new_adj;
638 
639 	if (entry->aref.ar_amap)
640 		amap_splitref(&entry->aref, &new_entry->aref, new_adj);
641 
642 	uvm_rb_fixup(map, entry);
643 
644 	uvm_map_entry_link(map, entry, new_entry);
645 
646 	if (UVM_ET_ISSUBMAP(entry)) {
647 		/* ... unlikely to happen, but play it safe */
648 	 	uvm_map_reference(new_entry->object.sub_map);
649 	} else {
650 		if (UVM_ET_ISOBJ(entry) &&
651 		    entry->object.uvm_obj->pgops &&
652 		    entry->object.uvm_obj->pgops->pgo_reference)
653 			entry->object.uvm_obj->pgops->pgo_reference(
654 			    entry->object.uvm_obj);
655 	}
656 	uvm_tree_sanity(map, "clip_end leave");
657 }
658 
659 
660 /*
661  *   M A P   -   m a i n   e n t r y   p o i n t
662  */
663 /*
664  * uvm_map: establish a valid mapping in a map
665  *
666  * => assume startp is page aligned.
667  * => assume size is a multiple of PAGE_SIZE.
668  * => assume sys_mmap provides enough of a "hint" to have us skip
669  *	over text/data/bss area.
670  * => map must be unlocked (we will lock it)
671  * => <uobj,uoffset> value meanings (4 cases):
672  *	 [1] <NULL,uoffset> 		== uoffset is a hint for PMAP_PREFER
673  *	 [2] <NULL,UVM_UNKNOWN_OFFSET>	== don't PMAP_PREFER
674  *	 [3] <uobj,uoffset>		== normal mapping
675  *	 [4] <uobj,UVM_UNKNOWN_OFFSET>	== uvm_map finds offset based on VA
676  *
677  *    case [4] is for kernel mappings where we don't know the offset until
678  *    we've found a virtual address.   note that kernel object offsets are
679  *    always relative to vm_map_min(kernel_map).
680  *
681  * => if `align' is non-zero, we try to align the virtual address to
682  *	the specified alignment.  this is only a hint; if we can't
683  *	do it, the address will be unaligned.  this is provided as
684  *	a mechanism for large pages.
685  *
686  * => XXXCDC: need way to map in external amap?
687  */
688 
689 int
690 uvm_map_p(struct vm_map *map, vaddr_t *startp, vsize_t size,
691     struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags,
692     struct proc *p)
693 {
694 	struct vm_map_entry *prev_entry, *new_entry;
695 #ifdef KVA_GUARDPAGES
696 	struct vm_map_entry *guard_entry;
697 #endif
698 	vm_prot_t prot = UVM_PROTECTION(flags), maxprot =
699 	    UVM_MAXPROTECTION(flags);
700 	vm_inherit_t inherit = UVM_INHERIT(flags);
701 	int advice = UVM_ADVICE(flags);
702 	int error;
703 
704 	/*
705 	 * Holes are incompatible with other types of mappings.
706 	 */
707 	if (flags & UVM_FLAG_HOLE) {
708 		KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) != 0 &&
709 		    (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
710 	}
711 
712 #ifdef KVA_GUARDPAGES
713 	if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) {
714 		/*
715 		 * kva_guardstart is initialized to the start of the kernelmap
716 		 * and cycles through the kva space.
717 		 * This way we should have a long time between re-use of kva.
718 		 */
719 		static vaddr_t kva_guardstart = 0;
720 		if (kva_guardstart == 0) {
721 			kva_guardstart = vm_map_min(map);
722 			printf("uvm_map: kva guard pages enabled: %p\n",
723 			    kva_guardstart);
724 		}
725 		size += PAGE_SIZE;	/* Add guard page at the end. */
726 		/*
727 		 * Try to fully exhaust kva prior to wrap-around.
728 		 * (This may eat your ram!)
729 		 */
730 		if (VM_MAX_KERNEL_ADDRESS - kva_guardstart < size) {
731 			static int wrap_counter = 0;
732 			printf("uvm_map: kva guard page wrap-around %d\n",
733 			    ++wrap_counter);
734 			kva_guardstart = vm_map_min(map);
735 		}
736 		*startp = kva_guardstart;
737 		/*
738 		 * Prepare for next round.
739 		 */
740 		kva_guardstart += size;
741 	}
742 #endif
743 
744 	uvm_tree_sanity(map, "map entry");
745 
746 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
747 		splassert(IPL_NONE);
748 	else
749 		splassert(IPL_VM);
750 
751 	/*
752 	 * step 0: sanity check of protection code
753 	 */
754 
755 	if ((prot & maxprot) != prot) {
756 		return (EACCES);
757 	}
758 
759 	/*
760 	 * step 1: figure out where to put new VM range
761 	 */
762 
763 	if (vm_map_lock_try(map) == FALSE) {
764 		if (flags & UVM_FLAG_TRYLOCK)
765 			return (EFAULT);
766 		vm_map_lock(map); /* could sleep here */
767 	}
768 	if ((prev_entry = uvm_map_findspace(map, *startp, size, startp,
769 	    uobj, uoffset, align, flags)) == NULL) {
770 		vm_map_unlock(map);
771 		return (ENOMEM);
772 	}
773 
774 #ifdef PMAP_GROWKERNEL
775 	{
776 		/*
777 		 * If the kernel pmap can't map the requested space,
778 		 * then allocate more resources for it.
779 		 */
780 		if (map == kernel_map && !(flags & UVM_FLAG_FIXED) &&
781 		    uvm_maxkaddr < (*startp + size))
782 			uvm_maxkaddr = pmap_growkernel(*startp + size);
783 	}
784 #endif
785 
786 	/*
787 	 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER
788 	 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET.   in
789 	 * either case we want to zero it  before storing it in the map entry
790 	 * (because it looks strange and confusing when debugging...)
791 	 *
792 	 * if uobj is not null
793 	 *   if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping
794 	 *      and we do not need to change uoffset.
795 	 *   if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset
796 	 *      now (based on the starting address of the map).   this case is
797 	 *      for kernel object mappings where we don't know the offset until
798 	 *      the virtual address is found (with uvm_map_findspace).   the
799 	 *      offset is the distance we are from the start of the map.
800 	 */
801 
802 	if (uobj == NULL) {
803 		uoffset = 0;
804 	} else {
805 		if (uoffset == UVM_UNKNOWN_OFFSET) {
806 			KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
807 			uoffset = *startp - vm_map_min(kernel_map);
808 		}
809 	}
810 
811 	/*
812 	 * step 2: try and insert in map by extending previous entry, if
813 	 * possible
814 	 * XXX: we don't try and pull back the next entry.   might be useful
815 	 * for a stack, but we are currently allocating our stack in advance.
816 	 */
817 
818 	if ((flags & UVM_FLAG_NOMERGE) == 0 &&
819 	    prev_entry->end == *startp && prev_entry != &map->header &&
820 	    prev_entry->object.uvm_obj == uobj) {
821 
822 		if (uobj && prev_entry->offset +
823 		    (prev_entry->end - prev_entry->start) != uoffset)
824 			goto step3;
825 
826 		if (UVM_ET_ISSUBMAP(prev_entry))
827 			goto step3;
828 
829 		if (prev_entry->protection != prot ||
830 		    prev_entry->max_protection != maxprot)
831 			goto step3;
832 
833 		if (prev_entry->inheritance != inherit ||
834 		    prev_entry->advice != advice)
835 			goto step3;
836 
837 		/* wiring status must match (new area is unwired) */
838 		if (VM_MAPENT_ISWIRED(prev_entry))
839 			goto step3;
840 
841 		/*
842 		 * can't extend a shared amap.  note: no need to lock amap to
843 		 * look at refs since we don't care about its exact value.
844 		 * if it is one (i.e. we have only reference) it will stay there
845 		 */
846 
847 		if (prev_entry->aref.ar_amap &&
848 		    amap_refs(prev_entry->aref.ar_amap) != 1) {
849 			goto step3;
850 		}
851 
852 		/*
853 		 * Only merge kernel mappings, but keep track
854 		 * of how much we skipped.
855 		 */
856 		if (map != kernel_map && map != kmem_map) {
857 			goto step3;
858 		}
859 
860 		if (prev_entry->aref.ar_amap) {
861 			error = amap_extend(prev_entry, size);
862 			if (error)
863 				goto step3;
864 		}
865 
866 		/*
867 		 * drop our reference to uobj since we are extending a reference
868 		 * that we already have (the ref count can not drop to zero).
869 		 */
870 
871 		if (uobj && uobj->pgops->pgo_detach)
872 			uobj->pgops->pgo_detach(uobj);
873 
874 		prev_entry->end += size;
875 		uvm_rb_fixup(map, prev_entry);
876 		map->size += size;
877 		if (p && uobj == NULL)
878 			p->p_vmspace->vm_dused += atop(size);
879 
880 		uvm_tree_sanity(map, "map leave 2");
881 
882 		vm_map_unlock(map);
883 		return (0);
884 
885 	}
886 step3:
887 
888 	/*
889 	 * step 3: allocate new entry and link it in
890 	 */
891 
892 #ifdef KVA_GUARDPAGES
893 	if (map == kernel_map && !(flags & UVM_FLAG_FIXED))
894 		size -= PAGE_SIZE;
895 #endif
896 
897 	new_entry = uvm_mapent_alloc(map, flags);
898 	if (new_entry == NULL) {
899 		vm_map_unlock(map);
900 		return (ENOMEM);
901 	}
902 	new_entry->start = *startp;
903 	new_entry->end = new_entry->start + size;
904 	new_entry->object.uvm_obj = uobj;
905 	new_entry->offset = uoffset;
906 
907 	if (uobj)
908 		new_entry->etype = UVM_ET_OBJ;
909 	else
910 		new_entry->etype = 0;
911 
912 	if (flags & UVM_FLAG_COPYONW) {
913 		new_entry->etype |= UVM_ET_COPYONWRITE;
914 		if ((flags & UVM_FLAG_OVERLAY) == 0)
915 			new_entry->etype |= UVM_ET_NEEDSCOPY;
916 	}
917 	if (flags & UVM_FLAG_HOLE)
918 		new_entry->etype |= UVM_ET_HOLE;
919 
920 	new_entry->protection = prot;
921 	new_entry->max_protection = maxprot;
922 	new_entry->inheritance = inherit;
923 	new_entry->wired_count = 0;
924 	new_entry->advice = advice;
925 	if (flags & UVM_FLAG_OVERLAY) {
926 		/*
927 		 * to_add: for BSS we overallocate a little since we
928 		 * are likely to extend
929 		 */
930 		vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ?
931 			UVM_AMAP_CHUNK << PAGE_SHIFT : 0;
932 		struct vm_amap *amap = amap_alloc(size, to_add, M_WAITOK);
933 		new_entry->aref.ar_pageoff = 0;
934 		new_entry->aref.ar_amap = amap;
935 	} else {
936 		new_entry->aref.ar_pageoff = 0;
937 		new_entry->aref.ar_amap = NULL;
938 	}
939 
940 	uvm_map_entry_link(map, prev_entry, new_entry);
941 
942 	map->size += size;
943 	if (p && uobj == NULL)
944 		p->p_vmspace->vm_dused += atop(size);
945 
946 
947 	/*
948 	 *      Update the free space hint
949 	 */
950 
951 	if ((map->first_free == prev_entry) &&
952 	    (prev_entry->end >= new_entry->start))
953 		map->first_free = new_entry;
954 
955 #ifdef KVA_GUARDPAGES
956 	/*
957 	 * Create the guard entry.
958 	 */
959 	if (map == kernel_map && !(flags & UVM_FLAG_FIXED)) {
960 		guard_entry = uvm_mapent_alloc(map, flags);
961 		if (guard_entry != NULL) {
962 			guard_entry->start = new_entry->end;
963 			guard_entry->end = guard_entry->start + PAGE_SIZE;
964 			guard_entry->object.uvm_obj = uobj;
965 			guard_entry->offset = uoffset;
966 			guard_entry->etype = MAP_ET_KVAGUARD;
967 			guard_entry->protection = prot;
968 			guard_entry->max_protection = maxprot;
969 			guard_entry->inheritance = inherit;
970 			guard_entry->wired_count = 0;
971 			guard_entry->advice = advice;
972 			guard_entry->aref.ar_pageoff = 0;
973 			guard_entry->aref.ar_amap = NULL;
974 			uvm_map_entry_link(map, new_entry, guard_entry);
975 			map->size += PAGE_SIZE;
976 			kva_guardpages++;
977 		}
978 	}
979 #endif
980 
981 	uvm_tree_sanity(map, "map leave");
982 
983 	vm_map_unlock(map);
984 	return (0);
985 }
986 
987 /*
988  * uvm_map_lookup_entry: find map entry at or before an address
989  *
990  * => map must at least be read-locked by caller
991  * => entry is returned in "entry"
992  * => return value is true if address is in the returned entry
993  */
994 
995 boolean_t
996 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
997     struct vm_map_entry **entry)
998 {
999 	struct vm_map_entry *cur;
1000 	struct vm_map_entry *last;
1001 	int			use_tree = 0;
1002 
1003 	/*
1004 	 * start looking either from the head of the
1005 	 * list, or from the hint.
1006 	 */
1007 
1008 	simple_lock(&map->hint_lock);
1009 	cur = map->hint;
1010 	simple_unlock(&map->hint_lock);
1011 
1012 	if (cur == &map->header)
1013 		cur = cur->next;
1014 
1015 	if (address >= cur->start) {
1016 	    	/*
1017 		 * go from hint to end of list.
1018 		 *
1019 		 * but first, make a quick check to see if
1020 		 * we are already looking at the entry we
1021 		 * want (which is usually the case).
1022 		 * note also that we don't need to save the hint
1023 		 * here... it is the same hint (unless we are
1024 		 * at the header, in which case the hint didn't
1025 		 * buy us anything anyway).
1026 		 */
1027 		last = &map->header;
1028 		if ((cur != last) && (cur->end > address)) {
1029 			*entry = cur;
1030 			return (TRUE);
1031 		}
1032 
1033 		if (map->nentries > 30)
1034 			use_tree = 1;
1035 	} else {
1036 	    	/*
1037 		 * go from start to hint, *inclusively*
1038 		 */
1039 		last = cur->next;
1040 		cur = map->header.next;
1041 		use_tree = 1;
1042 	}
1043 
1044 	uvm_tree_sanity(map, __func__);
1045 
1046 	if (use_tree) {
1047 		struct vm_map_entry *prev = &map->header;
1048 		cur = RB_ROOT(&map->rbhead);
1049 
1050 		/*
1051 		 * Simple lookup in the tree.  Happens when the hint is
1052 		 * invalid, or nentries reach a threshold.
1053 		 */
1054 		while (cur) {
1055 			if (address >= cur->start) {
1056 				if (address < cur->end) {
1057 					*entry = cur;
1058 					SAVE_HINT(map, map->hint, cur);
1059 					return (TRUE);
1060 				}
1061 				prev = cur;
1062 				cur = RB_RIGHT(cur, rb_entry);
1063 			} else
1064 				cur = RB_LEFT(cur, rb_entry);
1065 		}
1066 		*entry = prev;
1067 		return (FALSE);
1068 	}
1069 
1070 	/*
1071 	 * search linearly
1072 	 */
1073 
1074 	while (cur != last) {
1075 		if (cur->end > address) {
1076 			if (address >= cur->start) {
1077 			    	/*
1078 				 * save this lookup for future
1079 				 * hints, and return
1080 				 */
1081 
1082 				*entry = cur;
1083 				SAVE_HINT(map, map->hint, cur);
1084 				return (TRUE);
1085 			}
1086 			break;
1087 		}
1088 		cur = cur->next;
1089 	}
1090 
1091 	*entry = cur->prev;
1092 	SAVE_HINT(map, map->hint, *entry);
1093 	return (FALSE);
1094 }
1095 
1096 /*
1097  * Checks if address pointed to by phint fits into the empty
1098  * space before the vm_map_entry after.  Takes alignment and
1099  * offset into consideration.
1100  */
1101 
1102 int
1103 uvm_map_spacefits(struct vm_map *map, vaddr_t *phint, vsize_t length,
1104     struct vm_map_entry *after, voff_t uoffset, vsize_t align)
1105 {
1106 	vaddr_t hint = *phint;
1107 	vaddr_t end;
1108 
1109 #ifdef PMAP_PREFER
1110 	/*
1111 	 * push hint forward as needed to avoid VAC alias problems.
1112 	 * we only do this if a valid offset is specified.
1113 	 */
1114 	if (uoffset != UVM_UNKNOWN_OFFSET)
1115 		hint = PMAP_PREFER(uoffset, hint);
1116 #endif
1117 	if (align != 0)
1118 		if ((hint & (align - 1)) != 0)
1119 			hint = roundup(hint, align);
1120 	*phint = hint;
1121 
1122 	end = hint + length;
1123 	if (end > map->max_offset || end < hint)
1124 		return (FALSE);
1125 	if (after != NULL && after != &map->header && after->start < end)
1126 		return (FALSE);
1127 
1128 	return (TRUE);
1129 }
1130 
1131 /*
1132  * uvm_map_pie: return a random load address for a PIE executable
1133  * properly aligned.
1134  */
1135 
1136 #ifndef VM_PIE_MAX_ADDR
1137 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
1138 #endif
1139 
1140 #ifndef VM_PIE_MIN_ADDR
1141 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
1142 #endif
1143 
1144 #ifndef VM_PIE_MIN_ALIGN
1145 #define VM_PIE_MIN_ALIGN PAGE_SIZE
1146 #endif
1147 
1148 vaddr_t
1149 uvm_map_pie(vaddr_t align)
1150 {
1151 	vaddr_t addr, space, min;
1152 
1153 	align = MAX(align, VM_PIE_MIN_ALIGN);
1154 
1155 	/* round up to next alignment */
1156 	min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
1157 
1158 	if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
1159 		return (align);
1160 
1161 	space = (VM_PIE_MAX_ADDR - min) / align;
1162 	space = MIN(space, (u_int32_t)-1);
1163 
1164 	addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
1165 	addr += min;
1166 
1167 	return (addr);
1168 }
1169 
1170 /*
1171  * uvm_map_hint: return the beginning of the best area suitable for
1172  * creating a new mapping with "prot" protection.
1173  */
1174 vaddr_t
1175 uvm_map_hint1(struct proc *p, vm_prot_t prot, int skipheap)
1176 {
1177 	vaddr_t addr;
1178 
1179 #ifdef __i386__
1180 	/*
1181 	 * If executable skip first two pages, otherwise start
1182 	 * after data + heap region.
1183 	 */
1184 	if ((prot & VM_PROT_EXECUTE) &&
1185 	    ((vaddr_t)p->p_vmspace->vm_daddr >= I386_MAX_EXE_ADDR)) {
1186 		addr = (PAGE_SIZE*2) +
1187 		    (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
1188 		return (round_page(addr));
1189 	}
1190 #endif
1191 	/* start malloc/mmap after the brk */
1192 	addr = (vaddr_t)p->p_vmspace->vm_daddr;
1193 	if (skipheap)
1194 		addr += BRKSIZ;
1195 #if !defined(__vax__)
1196 	addr += arc4random() & (MIN((256 * 1024 * 1024), BRKSIZ) - 1);
1197 #endif
1198 	return (round_page(addr));
1199 }
1200 
1201 /*
1202  * uvm_map_findspace: find "length" sized space in "map".
1203  *
1204  * => "hint" is a hint about where we want it, unless FINDSPACE_FIXED is
1205  *	set (in which case we insist on using "hint").
1206  * => "result" is VA returned
1207  * => uobj/uoffset are to be used to handle VAC alignment, if required
1208  * => if `align' is non-zero, we attempt to align to that value.
1209  * => caller must at least have read-locked map
1210  * => returns NULL on failure, or pointer to prev. map entry if success
1211  * => note this is a cross between the old vm_map_findspace and vm_map_find
1212  */
1213 
1214 struct vm_map_entry *
1215 uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length,
1216     vaddr_t *result, struct uvm_object *uobj, voff_t uoffset, vsize_t align,
1217     int flags)
1218 {
1219 	struct vm_map_entry *entry, *next, *tmp;
1220 	struct vm_map_entry *child, *prev = NULL;
1221 	vaddr_t end, orig_hint;
1222 
1223 	KASSERT((align & (align - 1)) == 0);
1224 	KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0);
1225 
1226 	uvm_tree_sanity(map, "map_findspace entry");
1227 
1228 	/*
1229 	 * remember the original hint.  if we are aligning, then we
1230 	 * may have to try again with no alignment constraint if
1231 	 * we fail the first time.
1232 	 */
1233 
1234 	orig_hint = hint;
1235 	if (hint < map->min_offset) {	/* check ranges ... */
1236 		if (flags & UVM_FLAG_FIXED) {
1237 			return(NULL);
1238 		}
1239 		hint = map->min_offset;
1240 	}
1241 	if (hint > map->max_offset) {
1242 		return(NULL);
1243 	}
1244 
1245 	/*
1246 	 * Look for the first possible address; if there's already
1247 	 * something at this address, we have to start after it.
1248 	 */
1249 
1250 	if ((flags & UVM_FLAG_FIXED) == 0 && hint == map->min_offset) {
1251 		if ((entry = map->first_free) != &map->header)
1252 			hint = entry->end;
1253 	} else {
1254 		if (uvm_map_lookup_entry(map, hint, &tmp)) {
1255 			/* "hint" address already in use ... */
1256 			if (flags & UVM_FLAG_FIXED) {
1257 				return(NULL);
1258 			}
1259 			hint = tmp->end;
1260 		}
1261 		entry = tmp;
1262 	}
1263 
1264 	if (flags & UVM_FLAG_FIXED) {
1265 		end = hint + length;
1266 		if (end > map->max_offset || end < hint) {
1267 			goto error;
1268 		}
1269 		next = entry->next;
1270 		if (next == &map->header || next->start >= end)
1271 			goto found;
1272 		return(NULL); /* only one shot at it ... */
1273 	}
1274 
1275 	/* Try to find the space in the red-black tree */
1276 
1277 	/* Check slot before any entry */
1278 	if (uvm_map_spacefits(map, &hint, length, entry->next, uoffset, align))
1279 		goto found;
1280 
1281 	/* If there is not enough space in the whole tree, we fail */
1282 	tmp = RB_ROOT(&map->rbhead);
1283 	if (tmp == NULL || tmp->space < length)
1284 		goto error;
1285 
1286 	/* Find an entry close to hint that has enough space */
1287 	for (; tmp;) {
1288 		if (tmp->end >= hint &&
1289 		    (prev == NULL || tmp->end < prev->end)) {
1290 			if (tmp->ownspace >= length)
1291 				prev = tmp;
1292 			else if ((child = RB_RIGHT(tmp, rb_entry)) != NULL &&
1293 			    child->space >= length)
1294 				prev = tmp;
1295 		}
1296 		if (tmp->end < hint)
1297 			child = RB_RIGHT(tmp, rb_entry);
1298 		else if (tmp->end > hint)
1299 			child = RB_LEFT(tmp, rb_entry);
1300 		else {
1301 			if (tmp->ownspace >= length)
1302 				break;
1303 			child = RB_RIGHT(tmp, rb_entry);
1304 		}
1305 		if (child == NULL || child->space < length)
1306 			break;
1307 		tmp = child;
1308 	}
1309 
1310 	if (tmp != NULL && hint < tmp->end + tmp->ownspace) {
1311 		/*
1312 		 * Check if the entry that we found satifies the
1313 		 * space requirement
1314 		 */
1315 		if (hint < tmp->end)
1316 			hint = tmp->end;
1317 		if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset,
1318 			align)) {
1319 			entry = tmp;
1320 			goto found;
1321 		} else if (tmp->ownspace >= length)
1322 			goto listsearch;
1323 	}
1324 	if (prev == NULL)
1325 		goto error;
1326 
1327 	hint = prev->end;
1328 	if (uvm_map_spacefits(map, &hint, length, prev->next, uoffset,
1329 		align)) {
1330 		entry = prev;
1331 		goto found;
1332 	} else if (prev->ownspace >= length)
1333 		goto listsearch;
1334 
1335 	tmp = RB_RIGHT(prev, rb_entry);
1336 	for (;;) {
1337 		KASSERT(tmp && tmp->space >= length);
1338 		child = RB_LEFT(tmp, rb_entry);
1339 		if (child && child->space >= length) {
1340 			tmp = child;
1341 			continue;
1342 		}
1343 		if (tmp->ownspace >= length)
1344 			break;
1345 		tmp = RB_RIGHT(tmp, rb_entry);
1346 	}
1347 
1348 	hint = tmp->end;
1349 	if (uvm_map_spacefits(map, &hint, length, tmp->next, uoffset, align)) {
1350 		entry = tmp;
1351 		goto found;
1352 	}
1353 
1354 	/*
1355 	 * The tree fails to find an entry because of offset or alignment
1356 	 * restrictions.  Search the list instead.
1357 	 */
1358  listsearch:
1359 	/*
1360 	 * Look through the rest of the map, trying to fit a new region in
1361 	 * the gap between existing regions, or after the very last region.
1362 	 * note: entry->end   = base VA of current gap,
1363 	 *	 next->start  = VA of end of current gap
1364 	 */
1365 	for (;; hint = (entry = next)->end) {
1366 		/*
1367 		 * Find the end of the proposed new region.  Be sure we didn't
1368 		 * go beyond the end of the map, or wrap around the address;
1369 		 * if so, we lose.  Otherwise, if this is the last entry, or
1370 		 * if the proposed new region fits before the next entry, we
1371 		 * win.
1372 		 */
1373 
1374 #ifdef PMAP_PREFER
1375 		/*
1376 		 * push hint forward as needed to avoid VAC alias problems.
1377 		 * we only do this if a valid offset is specified.
1378 		 */
1379 		if (uoffset != UVM_UNKNOWN_OFFSET)
1380 			hint = PMAP_PREFER(uoffset, hint);
1381 #endif
1382 		if (align != 0) {
1383 			if ((hint & (align - 1)) != 0)
1384 				hint = roundup(hint, align);
1385 			/*
1386 			 * XXX Should we PMAP_PREFER() here again?
1387 			 */
1388 		}
1389 		end = hint + length;
1390 		if (end > map->max_offset || end < hint) {
1391 			goto error;
1392 		}
1393 		next = entry->next;
1394 		if (next == &map->header || next->start >= end)
1395 			break;
1396 	}
1397  found:
1398 	SAVE_HINT(map, map->hint, entry);
1399 	*result = hint;
1400 	return (entry);
1401 
1402  error:
1403 	if (align != 0) {
1404 		return (uvm_map_findspace(map, orig_hint,
1405 			    length, result, uobj, uoffset, 0, flags));
1406 	}
1407 	return (NULL);
1408 }
1409 
1410 /*
1411  *   U N M A P   -   m a i n   e n t r y   p o i n t
1412  */
1413 
1414 /*
1415  * uvm_unmap: remove mappings from a vm_map (from "start" up to "stop")
1416  *
1417  * => caller must check alignment and size
1418  * => map must be unlocked (we will lock it)
1419  */
1420 void
1421 uvm_unmap_p(vm_map_t map, vaddr_t start, vaddr_t end, struct proc *p)
1422 {
1423 	vm_map_entry_t dead_entries;
1424 
1425 	/*
1426 	 * work now done by helper functions.   wipe the pmap's and then
1427 	 * detach from the dead entries...
1428 	 */
1429 	vm_map_lock(map);
1430 	uvm_unmap_remove(map, start, end, &dead_entries, p, FALSE);
1431 	vm_map_unlock(map);
1432 
1433 	if (dead_entries != NULL)
1434 		uvm_unmap_detach(dead_entries, 0);
1435 
1436 }
1437 
1438 
1439 /*
1440  *   U N M A P   -   m a i n   h e l p e r   f u n c t i o n s
1441  */
1442 
1443 /*
1444  * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop")
1445  *
1446  * => caller must check alignment and size
1447  * => map must be locked by caller
1448  * => we return a list of map entries that we've remove from the map
1449  *    in "entry_list"
1450  */
1451 
1452 void
1453 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
1454     struct vm_map_entry **entry_list, struct proc *p, boolean_t remove_holes)
1455 {
1456 	struct vm_map_entry *entry, *first_entry, *next;
1457 	vaddr_t len;
1458 
1459 	VM_MAP_RANGE_CHECK(map, start, end);
1460 
1461 	uvm_tree_sanity(map, "unmap_remove entry");
1462 
1463 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
1464 		splassert(IPL_NONE);
1465 	else
1466 		splassert(IPL_VM);
1467 
1468 	/*
1469 	 * find first entry
1470 	 */
1471 	if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) {
1472 		/* clip and go... */
1473 		entry = first_entry;
1474 		UVM_MAP_CLIP_START(map, entry, start);
1475 		/* critical!  prevents stale hint */
1476 		SAVE_HINT(map, entry, entry->prev);
1477 
1478 	} else {
1479 		entry = first_entry->next;
1480 	}
1481 
1482 	/*
1483 	 * Save the free space hint
1484 	 */
1485 
1486 	if (map->first_free->start >= start)
1487 		map->first_free = entry->prev;
1488 
1489 	/*
1490 	 * note: we now re-use first_entry for a different task.  we remove
1491 	 * a number of map entries from the map and save them in a linked
1492 	 * list headed by "first_entry".  once we remove them from the map
1493 	 * the caller should unlock the map and drop the references to the
1494 	 * backing objects [c.f. uvm_unmap_detach].  the object is to
1495 	 * separate unmapping from reference dropping.  why?
1496 	 *   [1] the map has to be locked for unmapping
1497 	 *   [2] the map need not be locked for reference dropping
1498 	 *   [3] dropping references may trigger pager I/O, and if we hit
1499 	 *       a pager that does synchronous I/O we may have to wait for it.
1500 	 *   [4] we would like all waiting for I/O to occur with maps unlocked
1501 	 *       so that we don't block other threads.
1502 	 */
1503 	first_entry = NULL;
1504 	*entry_list = NULL;		/* to be safe */
1505 
1506 	/*
1507 	 * break up the area into map entry sized regions and unmap.  note
1508 	 * that all mappings have to be removed before we can even consider
1509 	 * dropping references to amaps or VM objects (otherwise we could end
1510 	 * up with a mapping to a page on the free list which would be very bad)
1511 	 */
1512 
1513 	while ((entry != &map->header) && (entry->start < end)) {
1514 
1515 		UVM_MAP_CLIP_END(map, entry, end);
1516 		next = entry->next;
1517 		len = entry->end - entry->start;
1518 		if (p && entry->object.uvm_obj == NULL)
1519 			p->p_vmspace->vm_dused -= atop(len);
1520 
1521 		/*
1522 		 * unwire before removing addresses from the pmap; otherwise
1523 		 * unwiring will put the entries back into the pmap (XXX).
1524 		 */
1525 
1526 		if (VM_MAPENT_ISWIRED(entry))
1527 			uvm_map_entry_unwire(map, entry);
1528 
1529 		/*
1530 		 * special case: handle mappings to anonymous kernel objects.
1531 		 * we want to free these pages right away...
1532 		 */
1533 #ifdef KVA_GUARDPAGES
1534 		if (map == kernel_map && entry->etype & MAP_ET_KVAGUARD) {
1535 			entry->etype &= ~MAP_ET_KVAGUARD;
1536 			kva_guardpages--;
1537 		} else		/* (code continues across line-break) */
1538 #endif
1539 		if (UVM_ET_ISHOLE(entry)) {
1540 			if (!remove_holes) {
1541 				entry = next;
1542 				continue;
1543 			}
1544 		} else if (map->flags & VM_MAP_INTRSAFE) {
1545 			uvm_km_pgremove_intrsafe(entry->start, entry->end);
1546 			pmap_kremove(entry->start, len);
1547 		} else if (UVM_ET_ISOBJ(entry) &&
1548 		    UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
1549 			KASSERT(vm_map_pmap(map) == pmap_kernel());
1550 
1551 			/*
1552 			 * note: kernel object mappings are currently used in
1553 			 * two ways:
1554 			 *  [1] "normal" mappings of pages in the kernel object
1555 			 *  [2] uvm_km_valloc'd allocations in which we
1556 			 *      pmap_enter in some non-kernel-object page
1557 			 *      (e.g. vmapbuf).
1558 			 *
1559 			 * for case [1], we need to remove the mapping from
1560 			 * the pmap and then remove the page from the kernel
1561 			 * object (because, once pages in a kernel object are
1562 			 * unmapped they are no longer needed, unlike, say,
1563 			 * a vnode where you might want the data to persist
1564 			 * until flushed out of a queue).
1565 			 *
1566 			 * for case [2], we need to remove the mapping from
1567 			 * the pmap.  there shouldn't be any pages at the
1568 			 * specified offset in the kernel object [but it
1569 			 * doesn't hurt to call uvm_km_pgremove just to be
1570 			 * safe?]
1571 			 *
1572 			 * uvm_km_pgremove currently does the following:
1573 			 *   for pages in the kernel object in range:
1574 			 *     - drops the swap slot
1575 			 *     - uvm_pagefree the page
1576 			 *
1577 			 * note there is version of uvm_km_pgremove() that
1578 			 * is used for "intrsafe" objects.
1579 			 */
1580 
1581 			/*
1582 			 * remove mappings from pmap and drop the pages
1583 			 * from the object.  offsets are always relative
1584 			 * to vm_map_min(kernel_map).
1585 			 */
1586 			pmap_remove(pmap_kernel(), entry->start, entry->end);
1587 			uvm_km_pgremove(entry->object.uvm_obj,
1588 			    entry->start - vm_map_min(kernel_map),
1589 			    entry->end - vm_map_min(kernel_map));
1590 
1591 			/*
1592 			 * null out kernel_object reference, we've just
1593 			 * dropped it
1594 			 */
1595 			entry->etype &= ~UVM_ET_OBJ;
1596 			entry->object.uvm_obj = NULL;	/* to be safe */
1597 
1598 		} else {
1599 			/*
1600 		 	 * remove mappings the standard way.
1601 		 	 */
1602 			pmap_remove(map->pmap, entry->start, entry->end);
1603 		}
1604 
1605 		/*
1606 		 * remove entry from map and put it on our list of entries
1607 		 * that we've nuked.  then go do next entry.
1608 		 */
1609 		/* critical! prevents stale hint */
1610 		SAVE_HINT(map, entry, entry->prev);
1611 
1612 		uvm_map_entry_unlink(map, entry);
1613 		map->size -= len;
1614 		entry->next = first_entry;
1615 		first_entry = entry;
1616 		entry = next;		/* next entry, please */
1617 	}
1618 #ifdef KVA_GUARDPAGES
1619 	/*
1620 	 * entry points at the map-entry after the last-removed map-entry.
1621 	 */
1622 	if (map == kernel_map && entry != &map->header &&
1623 	    entry->etype & MAP_ET_KVAGUARD && entry->start == end) {
1624 		/*
1625 		 * Removed range is followed by guard page;
1626 		 * remove that guard page now (or it will stay forever).
1627 		 */
1628 		entry->etype &= ~MAP_ET_KVAGUARD;
1629 		kva_guardpages--;
1630 
1631 		uvm_map_entry_unlink(map, entry);
1632 		map->size -= len;
1633 		entry->next = first_entry;
1634 		first_entry = entry;
1635 		entry = next;		/* next entry, please */
1636 	}
1637 #endif
1638 	/* if ((map->flags & VM_MAP_DYING) == 0) { */
1639 		pmap_update(vm_map_pmap(map));
1640 	/* } */
1641 
1642 
1643 	uvm_tree_sanity(map, "unmap_remove leave");
1644 
1645 	/*
1646 	 * now we've cleaned up the map and are ready for the caller to drop
1647 	 * references to the mapped objects.
1648 	 */
1649 
1650 	*entry_list = first_entry;
1651 }
1652 
1653 /*
1654  * uvm_unmap_detach: drop references in a chain of map entries
1655  *
1656  * => we will free the map entries as we traverse the list.
1657  */
1658 
1659 void
1660 uvm_unmap_detach(struct vm_map_entry *first_entry, int flags)
1661 {
1662 	struct vm_map_entry *next_entry;
1663 
1664 	while (first_entry) {
1665 		KASSERT(!VM_MAPENT_ISWIRED(first_entry));
1666 
1667 		/*
1668 		 * drop reference to amap, if we've got one
1669 		 */
1670 
1671 		if (first_entry->aref.ar_amap)
1672 			uvm_map_unreference_amap(first_entry, flags);
1673 
1674 		/*
1675 		 * drop reference to our backing object, if we've got one
1676 		 */
1677 
1678 		if (UVM_ET_ISSUBMAP(first_entry)) {
1679 			/* ... unlikely to happen, but play it safe */
1680 			uvm_map_deallocate(first_entry->object.sub_map);
1681 		} else {
1682 			if (UVM_ET_ISOBJ(first_entry) &&
1683 			    first_entry->object.uvm_obj->pgops->pgo_detach)
1684 				first_entry->object.uvm_obj->pgops->
1685 				    pgo_detach(first_entry->object.uvm_obj);
1686 		}
1687 
1688 		next_entry = first_entry->next;
1689 		uvm_mapent_free(first_entry);
1690 		first_entry = next_entry;
1691 	}
1692 }
1693 
1694 /*
1695  *   E X T R A C T I O N   F U N C T I O N S
1696  */
1697 
1698 /*
1699  * uvm_map_reserve: reserve space in a vm_map for future use.
1700  *
1701  * => we reserve space in a map by putting a dummy map entry in the
1702  *    map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE)
1703  * => map should be unlocked (we will write lock it)
1704  * => we return true if we were able to reserve space
1705  * => XXXCDC: should be inline?
1706  */
1707 
1708 int
1709 uvm_map_reserve(struct vm_map *map, vsize_t size, vaddr_t offset,
1710     vsize_t align, vaddr_t *raddr)
1711 {
1712 
1713 	size = round_page(size);
1714 	if (*raddr < vm_map_min(map))
1715 		*raddr = vm_map_min(map);                /* hint */
1716 
1717 	/*
1718 	 * reserve some virtual space.
1719 	 */
1720 
1721 	if (uvm_map(map, raddr, size, NULL, offset, 0,
1722 	    UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
1723 	    UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) {
1724 		return (FALSE);
1725 	}
1726 
1727 	return (TRUE);
1728 }
1729 
1730 /*
1731  * uvm_map_replace: replace a reserved (blank) area of memory with
1732  * real mappings.
1733  *
1734  * => caller must WRITE-LOCK the map
1735  * => we return TRUE if replacement was a success
1736  * => we expect the newents chain to have nnewents entries on it and
1737  *    we expect newents->prev to point to the last entry on the list
1738  * => note newents is allowed to be NULL
1739  */
1740 
1741 int
1742 uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end,
1743     struct vm_map_entry *newents, int nnewents)
1744 {
1745 	struct vm_map_entry *oldent, *last;
1746 
1747 	uvm_tree_sanity(map, "map_replace entry");
1748 
1749 	/*
1750 	 * first find the blank map entry at the specified address
1751 	 */
1752 
1753 	if (!uvm_map_lookup_entry(map, start, &oldent)) {
1754 		return(FALSE);
1755 	}
1756 
1757 	/*
1758 	 * check to make sure we have a proper blank entry
1759 	 */
1760 
1761 	if (oldent->start != start || oldent->end != end ||
1762 	    oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) {
1763 		return (FALSE);
1764 	}
1765 
1766 #ifdef DIAGNOSTIC
1767 	/*
1768 	 * sanity check the newents chain
1769 	 */
1770 	{
1771 		struct vm_map_entry *tmpent = newents;
1772 		int nent = 0;
1773 		vaddr_t cur = start;
1774 
1775 		while (tmpent) {
1776 			nent++;
1777 			if (tmpent->start < cur)
1778 				panic("uvm_map_replace1");
1779 			if (tmpent->start > tmpent->end || tmpent->end > end) {
1780 		printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n",
1781 			    tmpent->start, tmpent->end, end);
1782 				panic("uvm_map_replace2");
1783 			}
1784 			cur = tmpent->end;
1785 			if (tmpent->next) {
1786 				if (tmpent->next->prev != tmpent)
1787 					panic("uvm_map_replace3");
1788 			} else {
1789 				if (newents->prev != tmpent)
1790 					panic("uvm_map_replace4");
1791 			}
1792 			tmpent = tmpent->next;
1793 		}
1794 		if (nent != nnewents)
1795 			panic("uvm_map_replace5");
1796 	}
1797 #endif
1798 
1799 	/*
1800 	 * map entry is a valid blank!   replace it.   (this does all the
1801 	 * work of map entry link/unlink...).
1802 	 */
1803 
1804 	if (newents) {
1805 		last = newents->prev;		/* we expect this */
1806 
1807 		/* critical: flush stale hints out of map */
1808 		SAVE_HINT(map, map->hint, newents);
1809 		if (map->first_free == oldent)
1810 			map->first_free = last;
1811 
1812 		last->next = oldent->next;
1813 		last->next->prev = last;
1814 
1815 		/* Fix RB tree */
1816 		uvm_rb_remove(map, oldent);
1817 
1818 		newents->prev = oldent->prev;
1819 		newents->prev->next = newents;
1820 		map->nentries = map->nentries + (nnewents - 1);
1821 
1822 		/* Fixup the RB tree */
1823 		{
1824 			int i;
1825 			struct vm_map_entry *tmp;
1826 
1827 			tmp = newents;
1828 			for (i = 0; i < nnewents && tmp; i++) {
1829 				uvm_rb_insert(map, tmp);
1830 				tmp = tmp->next;
1831 			}
1832 		}
1833 	} else {
1834 
1835 		/* critical: flush stale hints out of map */
1836 		SAVE_HINT(map, map->hint, oldent->prev);
1837 		if (map->first_free == oldent)
1838 			map->first_free = oldent->prev;
1839 
1840 		/* NULL list of new entries: just remove the old one */
1841 		uvm_map_entry_unlink(map, oldent);
1842 	}
1843 
1844 
1845 	uvm_tree_sanity(map, "map_replace leave");
1846 
1847 	/*
1848 	 * now we can free the old blank entry, unlock the map and return.
1849 	 */
1850 
1851 	uvm_mapent_free(oldent);
1852 	return(TRUE);
1853 }
1854 
1855 /*
1856  * uvm_map_extract: extract a mapping from a map and put it somewhere
1857  *	(maybe removing the old mapping)
1858  *
1859  * => maps should be unlocked (we will write lock them)
1860  * => returns 0 on success, error code otherwise
1861  * => start must be page aligned
1862  * => len must be page sized
1863  * => flags:
1864  *      UVM_EXTRACT_REMOVE: remove mappings from srcmap
1865  *      UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only)
1866  *      UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs
1867  *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
1868  *    >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<<
1869  *    >>>NOTE: QREF's must be unmapped via the QREF path, thus should only
1870  *             be used from within the kernel in a kernel level map <<<
1871  */
1872 
1873 int
1874 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
1875     struct vm_map *dstmap, vaddr_t *dstaddrp, int flags)
1876 {
1877 	vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge,
1878 	    oldstart;
1879 	struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry;
1880 	struct vm_map_entry *deadentry, *oldentry;
1881 	vsize_t elen;
1882 	int nchain, error, copy_ok;
1883 
1884 	uvm_tree_sanity(srcmap, "map_extract src enter");
1885 	uvm_tree_sanity(dstmap, "map_extract dst enter");
1886 
1887 	/*
1888 	 * step 0: sanity check: start must be on a page boundary, length
1889 	 * must be page sized.  can't ask for CONTIG/QREF if you asked for
1890 	 * REMOVE.
1891 	 */
1892 
1893 	KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0);
1894 	KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 ||
1895 		(flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0);
1896 
1897 	/*
1898 	 * step 1: reserve space in the target map for the extracted area
1899 	 */
1900 
1901 	dstaddr = vm_map_min(dstmap);
1902 	if (uvm_map_reserve(dstmap, len, start, 0, &dstaddr) == FALSE)
1903 		return(ENOMEM);
1904 	*dstaddrp = dstaddr;	/* pass address back to caller */
1905 
1906 	/*
1907 	 * step 2: setup for the extraction process loop by init'ing the
1908 	 * map entry chain, locking src map, and looking up the first useful
1909 	 * entry in the map.
1910 	 */
1911 
1912 	end = start + len;
1913 	newend = dstaddr + len;
1914 	chain = endchain = NULL;
1915 	nchain = 0;
1916 	vm_map_lock(srcmap);
1917 
1918 	if (uvm_map_lookup_entry(srcmap, start, &entry)) {
1919 
1920 		/* "start" is within an entry */
1921 		if (flags & UVM_EXTRACT_QREF) {
1922 
1923 			/*
1924 			 * for quick references we don't clip the entry, so
1925 			 * the entry may map space "before" the starting
1926 			 * virtual address... this is the "fudge" factor
1927 			 * (which can be non-zero only the first time
1928 			 * through the "while" loop in step 3).
1929 			 */
1930 
1931 			fudge = start - entry->start;
1932 		} else {
1933 
1934 			/*
1935 			 * normal reference: we clip the map to fit (thus
1936 			 * fudge is zero)
1937 			 */
1938 
1939 			UVM_MAP_CLIP_START(srcmap, entry, start);
1940 			SAVE_HINT(srcmap, srcmap->hint, entry->prev);
1941 			fudge = 0;
1942 		}
1943 	} else {
1944 
1945 		/* "start" is not within an entry ... skip to next entry */
1946 		if (flags & UVM_EXTRACT_CONTIG) {
1947 			error = EINVAL;
1948 			goto bad;    /* definite hole here ... */
1949 		}
1950 
1951 		entry = entry->next;
1952 		fudge = 0;
1953 	}
1954 
1955 	/* save values from srcmap for step 6 */
1956 	orig_entry = entry;
1957 	orig_fudge = fudge;
1958 
1959 	/*
1960 	 * step 3: now start looping through the map entries, extracting
1961 	 * as we go.
1962 	 */
1963 
1964 	while (entry->start < end && entry != &srcmap->header) {
1965 
1966 		/* if we are not doing a quick reference, clip it */
1967 		if ((flags & UVM_EXTRACT_QREF) == 0)
1968 			UVM_MAP_CLIP_END(srcmap, entry, end);
1969 
1970 		/* clear needs_copy (allow chunking) */
1971 		if (UVM_ET_ISNEEDSCOPY(entry)) {
1972 			if (fudge)
1973 				oldstart = entry->start;
1974 			else
1975 				oldstart = 0;	/* XXX: gcc */
1976 			amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end);
1977 			if (UVM_ET_ISNEEDSCOPY(entry)) {  /* failed? */
1978 				error = ENOMEM;
1979 				goto bad;
1980 			}
1981 
1982 			/* amap_copy could clip (during chunk)!  update fudge */
1983 			if (fudge) {
1984 				fudge = fudge - (entry->start - oldstart);
1985 				orig_fudge = fudge;
1986 			}
1987 		}
1988 
1989 		/* calculate the offset of this from "start" */
1990 		oldoffset = (entry->start + fudge) - start;
1991 
1992 		/* allocate a new map entry */
1993 		newentry = uvm_mapent_alloc(dstmap, flags);
1994 		if (newentry == NULL) {
1995 			error = ENOMEM;
1996 			goto bad;
1997 		}
1998 
1999 		/* set up new map entry */
2000 		newentry->next = NULL;
2001 		newentry->prev = endchain;
2002 		newentry->start = dstaddr + oldoffset;
2003 		newentry->end =
2004 		    newentry->start + (entry->end - (entry->start + fudge));
2005 		if (newentry->end > newend || newentry->end < newentry->start)
2006 			newentry->end = newend;
2007 		newentry->object.uvm_obj = entry->object.uvm_obj;
2008 		if (newentry->object.uvm_obj) {
2009 			if (newentry->object.uvm_obj->pgops->pgo_reference)
2010 				newentry->object.uvm_obj->pgops->
2011 				    pgo_reference(newentry->object.uvm_obj);
2012 			newentry->offset = entry->offset + fudge;
2013 		} else {
2014 			newentry->offset = 0;
2015 		}
2016 		newentry->etype = entry->etype;
2017 		newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ?
2018 			entry->max_protection : entry->protection;
2019 		newentry->max_protection = entry->max_protection;
2020 		newentry->inheritance = entry->inheritance;
2021 		newentry->wired_count = 0;
2022 		newentry->aref.ar_amap = entry->aref.ar_amap;
2023 		if (newentry->aref.ar_amap) {
2024 			newentry->aref.ar_pageoff =
2025 			    entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT);
2026 			uvm_map_reference_amap(newentry, AMAP_SHARED |
2027 			    ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0));
2028 		} else {
2029 			newentry->aref.ar_pageoff = 0;
2030 		}
2031 		newentry->advice = entry->advice;
2032 
2033 		/* now link it on the chain */
2034 		nchain++;
2035 		if (endchain == NULL) {
2036 			chain = endchain = newentry;
2037 		} else {
2038 			endchain->next = newentry;
2039 			endchain = newentry;
2040 		}
2041 
2042 		/* end of 'while' loop! */
2043 		if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end &&
2044 		    (entry->next == &srcmap->header ||
2045 		    entry->next->start != entry->end)) {
2046 			error = EINVAL;
2047 			goto bad;
2048 		}
2049 		entry = entry->next;
2050 		fudge = 0;
2051 	}
2052 
2053 	/*
2054 	 * step 4: close off chain (in format expected by uvm_map_replace)
2055 	 */
2056 
2057 	if (chain)
2058 		chain->prev = endchain;
2059 
2060 	/*
2061 	 * step 5: attempt to lock the dest map so we can pmap_copy.
2062 	 * note usage of copy_ok:
2063 	 *   1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5)
2064 	 *   0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7
2065 	 */
2066 
2067 	if (srcmap == dstmap || vm_map_lock_try(dstmap) == TRUE) {
2068 		copy_ok = 1;
2069 		if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
2070 		    nchain)) {
2071 			if (srcmap != dstmap)
2072 				vm_map_unlock(dstmap);
2073 			error = EIO;
2074 			goto bad;
2075 		}
2076 	} else {
2077 		copy_ok = 0;
2078 		/* replace defered until step 7 */
2079 	}
2080 
2081 	/*
2082 	 * step 6: traverse the srcmap a second time to do the following:
2083 	 *  - if we got a lock on the dstmap do pmap_copy
2084 	 *  - if UVM_EXTRACT_REMOVE remove the entries
2085 	 * we make use of orig_entry and orig_fudge (saved in step 2)
2086 	 */
2087 
2088 	if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) {
2089 
2090 		/* purge possible stale hints from srcmap */
2091 		if (flags & UVM_EXTRACT_REMOVE) {
2092 			SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev);
2093 			if (srcmap->first_free->start >= start)
2094 				srcmap->first_free = orig_entry->prev;
2095 		}
2096 
2097 		entry = orig_entry;
2098 		fudge = orig_fudge;
2099 		deadentry = NULL;	/* for UVM_EXTRACT_REMOVE */
2100 
2101 		while (entry->start < end && entry != &srcmap->header) {
2102 			if (copy_ok) {
2103 				oldoffset = (entry->start + fudge) - start;
2104 				elen = MIN(end, entry->end) -
2105 				    (entry->start + fudge);
2106 				pmap_copy(dstmap->pmap, srcmap->pmap,
2107 				    dstaddr + oldoffset, elen,
2108 				    entry->start + fudge);
2109 			}
2110 
2111 			/* we advance "entry" in the following if statement */
2112 			if (flags & UVM_EXTRACT_REMOVE) {
2113 				pmap_remove(srcmap->pmap, entry->start,
2114 						entry->end);
2115         			oldentry = entry;	/* save entry */
2116         			entry = entry->next;	/* advance */
2117 				uvm_map_entry_unlink(srcmap, oldentry);
2118 							/* add to dead list */
2119 				oldentry->next = deadentry;
2120 				deadentry = oldentry;
2121       			} else {
2122         			entry = entry->next;		/* advance */
2123 			}
2124 
2125 			/* end of 'while' loop */
2126 			fudge = 0;
2127 		}
2128 		pmap_update(srcmap->pmap);
2129 
2130 		/*
2131 		 * unlock dstmap.  we will dispose of deadentry in
2132 		 * step 7 if needed
2133 		 */
2134 
2135 		if (copy_ok && srcmap != dstmap)
2136 			vm_map_unlock(dstmap);
2137 
2138 	}
2139 	else
2140 		deadentry = NULL; /* XXX: gcc */
2141 
2142 	/*
2143 	 * step 7: we are done with the source map, unlock.   if copy_ok
2144 	 * is 0 then we have not replaced the dummy mapping in dstmap yet
2145 	 * and we need to do so now.
2146 	 */
2147 
2148 	vm_map_unlock(srcmap);
2149 	if ((flags & UVM_EXTRACT_REMOVE) && deadentry)
2150 		uvm_unmap_detach(deadentry, 0);   /* dispose of old entries */
2151 
2152 	/* now do the replacement if we didn't do it in step 5 */
2153 	if (copy_ok == 0) {
2154 		vm_map_lock(dstmap);
2155 		error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
2156 		    nchain);
2157 		vm_map_unlock(dstmap);
2158 
2159 		if (error == FALSE) {
2160 			error = EIO;
2161 			goto bad2;
2162 		}
2163 	}
2164 
2165 	uvm_tree_sanity(srcmap, "map_extract src leave");
2166 	uvm_tree_sanity(dstmap, "map_extract dst leave");
2167 
2168 	return(0);
2169 
2170 	/*
2171 	 * bad: failure recovery
2172 	 */
2173 bad:
2174 	vm_map_unlock(srcmap);
2175 bad2:			/* src already unlocked */
2176 	if (chain)
2177 		uvm_unmap_detach(chain,
2178 		    (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0);
2179 
2180 	uvm_tree_sanity(srcmap, "map_extract src err leave");
2181 	uvm_tree_sanity(dstmap, "map_extract dst err leave");
2182 
2183 	uvm_unmap(dstmap, dstaddr, dstaddr+len);   /* ??? */
2184 	return(error);
2185 }
2186 
2187 /* end of extraction functions */
2188 
2189 /*
2190  * uvm_map_submap: punch down part of a map into a submap
2191  *
2192  * => only the kernel_map is allowed to be submapped
2193  * => the purpose of submapping is to break up the locking granularity
2194  *	of a larger map
2195  * => the range specified must have been mapped previously with a uvm_map()
2196  *	call [with uobj==NULL] to create a blank map entry in the main map.
2197  *	[And it had better still be blank!]
2198  * => maps which contain submaps should never be copied or forked.
2199  * => to remove a submap, use uvm_unmap() on the main map
2200  *	and then uvm_map_deallocate() the submap.
2201  * => main map must be unlocked.
2202  * => submap must have been init'd and have a zero reference count.
2203  *	[need not be locked as we don't actually reference it]
2204  */
2205 
2206 int
2207 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
2208     struct vm_map *submap)
2209 {
2210 	struct vm_map_entry *entry;
2211 	int result;
2212 
2213 	vm_map_lock(map);
2214 
2215 	VM_MAP_RANGE_CHECK(map, start, end);
2216 
2217 	if (uvm_map_lookup_entry(map, start, &entry)) {
2218 		UVM_MAP_CLIP_START(map, entry, start);
2219 		UVM_MAP_CLIP_END(map, entry, end);		/* to be safe */
2220 	} else {
2221 		entry = NULL;
2222 	}
2223 
2224 	if (entry != NULL &&
2225 	    entry->start == start && entry->end == end &&
2226 	    entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
2227 	    !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
2228 		entry->etype |= UVM_ET_SUBMAP;
2229 		entry->object.sub_map = submap;
2230 		entry->offset = 0;
2231 		uvm_map_reference(submap);
2232 		result = 0;
2233 	} else {
2234 		result = EINVAL;
2235 	}
2236 	vm_map_unlock(map);
2237 	return(result);
2238 }
2239 
2240 
2241 /*
2242  * uvm_map_protect: change map protection
2243  *
2244  * => set_max means set max_protection.
2245  * => map must be unlocked.
2246  */
2247 
2248 #define MASK(entry)     (UVM_ET_ISCOPYONWRITE(entry) ? \
2249 			 ~VM_PROT_WRITE : VM_PROT_ALL)
2250 #define max(a,b)        ((a) > (b) ? (a) : (b))
2251 
2252 int
2253 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
2254     vm_prot_t new_prot, boolean_t set_max)
2255 {
2256 	struct vm_map_entry *current, *entry;
2257 	int error = 0;
2258 
2259 	vm_map_lock(map);
2260 
2261 	VM_MAP_RANGE_CHECK(map, start, end);
2262 
2263 	if (uvm_map_lookup_entry(map, start, &entry)) {
2264 		UVM_MAP_CLIP_START(map, entry, start);
2265 	} else {
2266 		entry = entry->next;
2267 	}
2268 
2269 	/*
2270 	 * make a first pass to check for protection violations.
2271 	 */
2272 
2273 	current = entry;
2274 	while ((current != &map->header) && (current->start < end)) {
2275 		if (UVM_ET_ISSUBMAP(current)) {
2276 			error = EINVAL;
2277 			goto out;
2278 		}
2279 		if ((new_prot & current->max_protection) != new_prot) {
2280 			error = EACCES;
2281 			goto out;
2282 		}
2283 		current = current->next;
2284 	}
2285 
2286 	/* go back and fix up protections (no need to clip this time). */
2287 
2288 	current = entry;
2289 
2290 	while ((current != &map->header) && (current->start < end)) {
2291 		vm_prot_t old_prot;
2292 
2293 		UVM_MAP_CLIP_END(map, current, end);
2294 
2295 		old_prot = current->protection;
2296 		if (set_max)
2297 			current->protection =
2298 			    (current->max_protection = new_prot) & old_prot;
2299 		else
2300 			current->protection = new_prot;
2301 
2302 		/*
2303 		 * update physical map if necessary.  worry about copy-on-write
2304 		 * here -- CHECK THIS XXX
2305 		 */
2306 
2307 		if (current->protection != old_prot) {
2308 			/* update pmap! */
2309 			if ((current->protection & MASK(entry)) == PROT_NONE &&
2310 			    VM_MAPENT_ISWIRED(entry))
2311 				current->wired_count--;
2312 			pmap_protect(map->pmap, current->start, current->end,
2313 			    current->protection & MASK(entry));
2314 		}
2315 
2316 		/*
2317 		 * If the map is configured to lock any future mappings,
2318 		 * wire this entry now if the old protection was VM_PROT_NONE
2319 		 * and the new protection is not VM_PROT_NONE.
2320 		 */
2321 
2322 		if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
2323 		    VM_MAPENT_ISWIRED(entry) == 0 &&
2324 		    old_prot == VM_PROT_NONE &&
2325 		    new_prot != VM_PROT_NONE) {
2326 			if (uvm_map_pageable(map, entry->start, entry->end,
2327 			    FALSE, UVM_LK_ENTER|UVM_LK_EXIT) != 0) {
2328 				/*
2329 				 * If locking the entry fails, remember the
2330 				 * error if it's the first one.  Note we
2331 				 * still continue setting the protection in
2332 				 * the map, but will return the resource
2333 				 * shortage condition regardless.
2334 				 *
2335 				 * XXX Ignore what the actual error is,
2336 				 * XXX just call it a resource shortage
2337 				 * XXX so that it doesn't get confused
2338 				 * XXX what uvm_map_protect() itself would
2339 				 * XXX normally return.
2340 				 */
2341 				error = ENOMEM;
2342 			}
2343 		}
2344 
2345 		current = current->next;
2346 	}
2347 	pmap_update(map->pmap);
2348 
2349  out:
2350 	vm_map_unlock(map);
2351 	return (error);
2352 }
2353 
2354 #undef  max
2355 #undef  MASK
2356 
2357 /*
2358  * uvm_map_inherit: set inheritance code for range of addrs in map.
2359  *
2360  * => map must be unlocked
2361  * => note that the inherit code is used during a "fork".  see fork
2362  *	code for details.
2363  */
2364 
2365 int
2366 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
2367     vm_inherit_t new_inheritance)
2368 {
2369 	struct vm_map_entry *entry;
2370 
2371 	switch (new_inheritance) {
2372 	case MAP_INHERIT_NONE:
2373 	case MAP_INHERIT_COPY:
2374 	case MAP_INHERIT_SHARE:
2375 		break;
2376 	default:
2377 		return (EINVAL);
2378 	}
2379 
2380 	vm_map_lock(map);
2381 
2382 	VM_MAP_RANGE_CHECK(map, start, end);
2383 
2384 	if (uvm_map_lookup_entry(map, start, &entry)) {
2385 		UVM_MAP_CLIP_START(map, entry, start);
2386 	} else {
2387 		entry = entry->next;
2388 	}
2389 
2390 	while ((entry != &map->header) && (entry->start < end)) {
2391 		UVM_MAP_CLIP_END(map, entry, end);
2392 		entry->inheritance = new_inheritance;
2393 		entry = entry->next;
2394 	}
2395 
2396 	vm_map_unlock(map);
2397 	return (0);
2398 }
2399 
2400 /*
2401  * uvm_map_advice: set advice code for range of addrs in map.
2402  *
2403  * => map must be unlocked
2404  */
2405 
2406 int
2407 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
2408 {
2409 	struct vm_map_entry *entry;
2410 
2411 	switch (new_advice) {
2412 	case MADV_NORMAL:
2413 	case MADV_RANDOM:
2414 	case MADV_SEQUENTIAL:
2415 		/* nothing special here */
2416 		break;
2417 
2418 	default:
2419 		return (EINVAL);
2420 	}
2421 	vm_map_lock(map);
2422 	VM_MAP_RANGE_CHECK(map, start, end);
2423 	if (uvm_map_lookup_entry(map, start, &entry)) {
2424 		UVM_MAP_CLIP_START(map, entry, start);
2425 	} else {
2426 		entry = entry->next;
2427 	}
2428 
2429 	/*
2430 	 * XXXJRT: disallow holes?
2431 	 */
2432 
2433 	while ((entry != &map->header) && (entry->start < end)) {
2434 		UVM_MAP_CLIP_END(map, entry, end);
2435 
2436 		entry->advice = new_advice;
2437 		entry = entry->next;
2438 	}
2439 
2440 	vm_map_unlock(map);
2441 	return (0);
2442 }
2443 
2444 /*
2445  * uvm_map_pageable: sets the pageability of a range in a map.
2446  *
2447  * => wires map entries.  should not be used for transient page locking.
2448  *	for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()).
2449  * => regions sepcified as not pageable require lock-down (wired) memory
2450  *	and page tables.
2451  * => map must never be read-locked
2452  * => if islocked is TRUE, map is already write-locked
2453  * => we always unlock the map, since we must downgrade to a read-lock
2454  *	to call uvm_fault_wire()
2455  * => XXXCDC: check this and try and clean it up.
2456  */
2457 
2458 int
2459 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
2460     boolean_t new_pageable, int lockflags)
2461 {
2462 	struct vm_map_entry *entry, *start_entry, *failed_entry;
2463 	int rv;
2464 #ifdef DIAGNOSTIC
2465 	u_int timestamp_save;
2466 #endif
2467 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2468 
2469 	if ((lockflags & UVM_LK_ENTER) == 0)
2470 		vm_map_lock(map);
2471 
2472 	VM_MAP_RANGE_CHECK(map, start, end);
2473 
2474 	/*
2475 	 * only one pageability change may take place at one time, since
2476 	 * uvm_fault_wire assumes it will be called only once for each
2477 	 * wiring/unwiring.  therefore, we have to make sure we're actually
2478 	 * changing the pageability for the entire region.  we do so before
2479 	 * making any changes.
2480 	 */
2481 
2482 	if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) {
2483 		if ((lockflags & UVM_LK_EXIT) == 0)
2484 			vm_map_unlock(map);
2485 
2486 		return (EFAULT);
2487 	}
2488 	entry = start_entry;
2489 
2490 	/*
2491 	 * handle wiring and unwiring separately.
2492 	 */
2493 
2494 	if (new_pageable) {		/* unwire */
2495 		UVM_MAP_CLIP_START(map, entry, start);
2496 
2497 		/*
2498 		 * unwiring.  first ensure that the range to be unwired is
2499 		 * really wired down and that there are no holes.
2500 		 */
2501 
2502 		while ((entry != &map->header) && (entry->start < end)) {
2503 			if (entry->wired_count == 0 ||
2504 			    (entry->end < end &&
2505 			     (entry->next == &map->header ||
2506 			      entry->next->start > entry->end))) {
2507 				if ((lockflags & UVM_LK_EXIT) == 0)
2508 					vm_map_unlock(map);
2509 				return (EINVAL);
2510 			}
2511 			entry = entry->next;
2512 		}
2513 
2514 		/*
2515 		 * POSIX 1003.1b - a single munlock call unlocks a region,
2516 		 * regardless of the number of mlock calls made on that
2517 		 * region.
2518 		 */
2519 
2520 		entry = start_entry;
2521 		while ((entry != &map->header) && (entry->start < end)) {
2522 			UVM_MAP_CLIP_END(map, entry, end);
2523 			if (VM_MAPENT_ISWIRED(entry))
2524 				uvm_map_entry_unwire(map, entry);
2525 			entry = entry->next;
2526 		}
2527 		if ((lockflags & UVM_LK_EXIT) == 0)
2528 			vm_map_unlock(map);
2529 		return (0);
2530 	}
2531 
2532 	/*
2533 	 * wire case: in two passes [XXXCDC: ugly block of code here]
2534 	 *
2535 	 * 1: holding the write lock, we create any anonymous maps that need
2536 	 *    to be created.  then we clip each map entry to the region to
2537 	 *    be wired and increment its wiring count.
2538 	 *
2539 	 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
2540 	 *    in the pages for any newly wired area (wired_count == 1).
2541 	 *
2542 	 *    downgrading to a read lock for uvm_fault_wire avoids a possible
2543 	 *    deadlock with another thread that may have faulted on one of
2544 	 *    the pages to be wired (it would mark the page busy, blocking
2545 	 *    us, then in turn block on the map lock that we hold).  because
2546 	 *    of problems in the recursive lock package, we cannot upgrade
2547 	 *    to a write lock in vm_map_lookup.  thus, any actions that
2548 	 *    require the write lock must be done beforehand.  because we
2549 	 *    keep the read lock on the map, the copy-on-write status of the
2550 	 *    entries we modify here cannot change.
2551 	 */
2552 
2553 	while ((entry != &map->header) && (entry->start < end)) {
2554 		if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
2555 
2556 			/*
2557 			 * perform actions of vm_map_lookup that need the
2558 			 * write lock on the map: create an anonymous map
2559 			 * for a copy-on-write region, or an anonymous map
2560 			 * for a zero-fill region.  (XXXCDC: submap case
2561 			 * ok?)
2562 			 */
2563 
2564 			if (!UVM_ET_ISSUBMAP(entry)) {  /* not submap */
2565 				if (UVM_ET_ISNEEDSCOPY(entry) &&
2566 				    ((entry->protection & VM_PROT_WRITE) ||
2567 				     (entry->object.uvm_obj == NULL))) {
2568 					amap_copy(map, entry, M_WAITOK, TRUE,
2569 					    start, end);
2570 					/* XXXCDC: wait OK? */
2571 				}
2572 			}
2573 		}
2574 		UVM_MAP_CLIP_START(map, entry, start);
2575 		UVM_MAP_CLIP_END(map, entry, end);
2576 		entry->wired_count++;
2577 
2578 		/*
2579 		 * Check for holes
2580 		 */
2581 
2582 		if (entry->protection == VM_PROT_NONE ||
2583 		    (entry->end < end &&
2584 		     (entry->next == &map->header ||
2585 		      entry->next->start > entry->end))) {
2586 
2587 			/*
2588 			 * found one.  amap creation actions do not need to
2589 			 * be undone, but the wired counts need to be restored.
2590 			 */
2591 
2592 			while (entry != &map->header && entry->end > start) {
2593 				entry->wired_count--;
2594 				entry = entry->prev;
2595 			}
2596 			if ((lockflags & UVM_LK_EXIT) == 0)
2597 				vm_map_unlock(map);
2598 			return (EINVAL);
2599 		}
2600 		entry = entry->next;
2601 	}
2602 
2603 	/*
2604 	 * Pass 2.
2605 	 */
2606 
2607 #ifdef DIAGNOSTIC
2608 	timestamp_save = map->timestamp;
2609 #endif
2610 	vm_map_busy(map);
2611 	vm_map_downgrade(map);
2612 
2613 	rv = 0;
2614 	entry = start_entry;
2615 	while (entry != &map->header && entry->start < end) {
2616 		if (entry->wired_count == 1) {
2617 			rv = uvm_fault_wire(map, entry->start, entry->end,
2618 			    entry->protection);
2619 			if (rv) {
2620 				/*
2621 				 * wiring failed.  break out of the loop.
2622 				 * we'll clean up the map below, once we
2623 				 * have a write lock again.
2624 				 */
2625 				break;
2626 			}
2627 		}
2628 		entry = entry->next;
2629 	}
2630 
2631 	if (rv) {        /* failed? */
2632 
2633 		/*
2634 		 * Get back to an exclusive (write) lock.
2635 		 */
2636 
2637 		vm_map_upgrade(map);
2638 		vm_map_unbusy(map);
2639 
2640 #ifdef DIAGNOSTIC
2641 		if (timestamp_save != map->timestamp)
2642 			panic("uvm_map_pageable: stale map");
2643 #endif
2644 
2645 		/*
2646 		 * first drop the wiring count on all the entries
2647 		 * which haven't actually been wired yet.
2648 		 */
2649 
2650 		failed_entry = entry;
2651 		while (entry != &map->header && entry->start < end) {
2652 			entry->wired_count--;
2653 			entry = entry->next;
2654 		}
2655 
2656 		/*
2657 		 * now, unwire all the entries that were successfully
2658 		 * wired above.
2659 		 */
2660 
2661 		entry = start_entry;
2662 		while (entry != failed_entry) {
2663 			entry->wired_count--;
2664 			if (VM_MAPENT_ISWIRED(entry) == 0)
2665 				uvm_map_entry_unwire(map, entry);
2666 			entry = entry->next;
2667 		}
2668 		if ((lockflags & UVM_LK_EXIT) == 0)
2669 			vm_map_unlock(map);
2670 		return(rv);
2671 	}
2672 
2673 	/* We are holding a read lock here. */
2674 	if ((lockflags & UVM_LK_EXIT) == 0) {
2675 		vm_map_unbusy(map);
2676 		vm_map_unlock_read(map);
2677 	} else {
2678 
2679 		/*
2680 		 * Get back to an exclusive (write) lock.
2681 		 */
2682 
2683 		vm_map_upgrade(map);
2684 		vm_map_unbusy(map);
2685 	}
2686 
2687 	return (0);
2688 }
2689 
2690 /*
2691  * uvm_map_pageable_all: special case of uvm_map_pageable - affects
2692  * all mapped regions.
2693  *
2694  * => map must not be locked.
2695  * => if no flags are specified, all regions are unwired.
2696  * => XXXJRT: has some of the same problems as uvm_map_pageable() above.
2697  */
2698 
2699 int
2700 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
2701 {
2702 	struct vm_map_entry *entry, *failed_entry;
2703 	vsize_t size;
2704 	int error;
2705 #ifdef DIAGNOSTIC
2706 	u_int timestamp_save;
2707 #endif
2708 
2709 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2710 
2711 	vm_map_lock(map);
2712 
2713 	/*
2714 	 * handle wiring and unwiring separately.
2715 	 */
2716 
2717 	if (flags == 0) {			/* unwire */
2718 		/*
2719 		 * POSIX 1003.1b -- munlockall unlocks all regions,
2720 		 * regardless of how many times mlockall has been called.
2721 		 */
2722 		for (entry = map->header.next; entry != &map->header;
2723 		     entry = entry->next) {
2724 			if (VM_MAPENT_ISWIRED(entry))
2725 				uvm_map_entry_unwire(map, entry);
2726 		}
2727 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
2728 		vm_map_unlock(map);
2729 		return (0);
2730 
2731 		/*
2732 		 * end of unwire case!
2733 		 */
2734 	}
2735 
2736 	if (flags & MCL_FUTURE) {
2737 		/*
2738 		 * must wire all future mappings; remember this.
2739 		 */
2740 		vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
2741 	}
2742 
2743 	if ((flags & MCL_CURRENT) == 0) {
2744 		/*
2745 		 * no more work to do!
2746 		 */
2747 		vm_map_unlock(map);
2748 		return (0);
2749 	}
2750 
2751 	/*
2752 	 * wire case: in three passes [XXXCDC: ugly block of code here]
2753 	 *
2754 	 * 1: holding the write lock, count all pages mapped by non-wired
2755 	 *    entries.  if this would cause us to go over our limit, we fail.
2756 	 *
2757 	 * 2: still holding the write lock, we create any anonymous maps that
2758 	 *    need to be created.  then we increment its wiring count.
2759 	 *
2760 	 * 3: we downgrade to a read lock, and call uvm_fault_wire to fault
2761 	 *    in the pages for any newly wired area (wired_count == 1).
2762 	 *
2763 	 *    downgrading to a read lock for uvm_fault_wire avoids a possible
2764 	 *    deadlock with another thread that may have faulted on one of
2765 	 *    the pages to be wired (it would mark the page busy, blocking
2766 	 *    us, then in turn block on the map lock that we hold).  because
2767 	 *    of problems in the recursive lock package, we cannot upgrade
2768 	 *    to a write lock in vm_map_lookup.  thus, any actions that
2769 	 *    require the write lock must be done beforehand.  because we
2770 	 *    keep the read lock on the map, the copy-on-write status of the
2771 	 *    entries we modify here cannot change.
2772 	 */
2773 
2774 	for (size = 0, entry = map->header.next; entry != &map->header;
2775 	     entry = entry->next) {
2776 		if (entry->protection != VM_PROT_NONE &&
2777 		    VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
2778 			size += entry->end - entry->start;
2779 		}
2780 	}
2781 
2782 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
2783 		vm_map_unlock(map);
2784 		return (ENOMEM);		/* XXX overloaded */
2785 	}
2786 
2787 	/* XXX non-pmap_wired_count case must be handled by caller */
2788 #ifdef pmap_wired_count
2789 	if (limit != 0 &&
2790 	    (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) {
2791 		vm_map_unlock(map);
2792 		return (ENOMEM);		/* XXX overloaded */
2793 	}
2794 #endif
2795 
2796 	/*
2797 	 * Pass 2.
2798 	 */
2799 
2800 	for (entry = map->header.next; entry != &map->header;
2801 	     entry = entry->next) {
2802 		if (entry->protection == VM_PROT_NONE)
2803 			continue;
2804 		if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
2805 			/*
2806 			 * perform actions of vm_map_lookup that need the
2807 			 * write lock on the map: create an anonymous map
2808 			 * for a copy-on-write region, or an anonymous map
2809 			 * for a zero-fill region.  (XXXCDC: submap case
2810 			 * ok?)
2811 			 */
2812 			if (!UVM_ET_ISSUBMAP(entry)) {	/* not submap */
2813 				if (UVM_ET_ISNEEDSCOPY(entry) &&
2814 				    ((entry->protection & VM_PROT_WRITE) ||
2815 				     (entry->object.uvm_obj == NULL))) {
2816 					amap_copy(map, entry, M_WAITOK, TRUE,
2817 					    entry->start, entry->end);
2818 					/* XXXCDC: wait OK? */
2819 				}
2820 			}
2821 		}
2822 		entry->wired_count++;
2823 	}
2824 
2825 	/*
2826 	 * Pass 3.
2827 	 */
2828 
2829 #ifdef DIAGNOSTIC
2830 	timestamp_save = map->timestamp;
2831 #endif
2832 	vm_map_busy(map);
2833 	vm_map_downgrade(map);
2834 
2835 	for (error = 0, entry = map->header.next;
2836 	    entry != &map->header && error == 0;
2837 	    entry = entry->next) {
2838 		if (entry->wired_count == 1) {
2839 			error = uvm_fault_wire(map, entry->start, entry->end,
2840 			     entry->protection);
2841 		}
2842 	}
2843 
2844 	if (error) {	/* failed? */
2845 		/*
2846 		 * Get back an exclusive (write) lock.
2847 		 */
2848 		vm_map_upgrade(map);
2849 		vm_map_unbusy(map);
2850 
2851 #ifdef DIAGNOSTIC
2852 		if (timestamp_save != map->timestamp)
2853 			panic("uvm_map_pageable_all: stale map");
2854 #endif
2855 
2856 		/*
2857 		 * first drop the wiring count on all the entries
2858 		 * which haven't actually been wired yet.
2859 		 *
2860 		 * Skip VM_PROT_NONE entries like we did above.
2861 		 */
2862 		failed_entry = entry;
2863 		for (/* nothing */; entry != &map->header;
2864 		     entry = entry->next) {
2865 			if (entry->protection == VM_PROT_NONE)
2866 				continue;
2867 			entry->wired_count--;
2868 		}
2869 
2870 		/*
2871 		 * now, unwire all the entries that were successfully
2872 		 * wired above.
2873 		 *
2874 		 * Skip VM_PROT_NONE entries like we did above.
2875 		 */
2876 		for (entry = map->header.next; entry != failed_entry;
2877 		     entry = entry->next) {
2878 			if (entry->protection == VM_PROT_NONE)
2879 				continue;
2880 			entry->wired_count--;
2881 			if (VM_MAPENT_ISWIRED(entry))
2882 				uvm_map_entry_unwire(map, entry);
2883 		}
2884 		vm_map_unlock(map);
2885 		return (error);
2886 	}
2887 
2888 	/* We are holding a read lock here. */
2889 	vm_map_unbusy(map);
2890 	vm_map_unlock_read(map);
2891 
2892 	return (0);
2893 }
2894 
2895 /*
2896  * uvm_map_clean: clean out a map range
2897  *
2898  * => valid flags:
2899  *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
2900  *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
2901  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
2902  *   if (flags & PGO_FREE): any cached pages are freed after clean
2903  * => returns an error if any part of the specified range isn't mapped
2904  * => never a need to flush amap layer since the anonymous memory has
2905  *	no permanent home, but may deactivate pages there
2906  * => called from sys_msync() and sys_madvise()
2907  * => caller must not write-lock map (read OK).
2908  * => we may sleep while cleaning if SYNCIO [with map read-locked]
2909  */
2910 
2911 int	amap_clean_works = 1;	/* XXX for now, just in case... */
2912 
2913 int
2914 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
2915 {
2916 	struct vm_map_entry *current, *entry;
2917 	struct uvm_object *uobj;
2918 	struct vm_amap *amap;
2919 	struct vm_anon *anon;
2920 	struct vm_page *pg;
2921 	vaddr_t offset;
2922 	vsize_t size;
2923 	int rv, error, refs;
2924 
2925 	KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
2926 		(PGO_FREE|PGO_DEACTIVATE));
2927 
2928 	vm_map_lock_read(map);
2929 	VM_MAP_RANGE_CHECK(map, start, end);
2930 	if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
2931 		vm_map_unlock_read(map);
2932 		return (EFAULT);
2933 	}
2934 
2935 	/*
2936 	 * Make a first pass to check for holes.
2937 	 */
2938 
2939 	for (current = entry; current->start < end; current = current->next) {
2940 		if (UVM_ET_ISSUBMAP(current)) {
2941 			vm_map_unlock_read(map);
2942 			return (EINVAL);
2943 		}
2944 		if (end > current->end && (current->next == &map->header ||
2945 		    current->end != current->next->start)) {
2946 			vm_map_unlock_read(map);
2947 			return (EFAULT);
2948 		}
2949 	}
2950 
2951 	error = 0;
2952 
2953 	for (current = entry; current->start < end; current = current->next) {
2954 		amap = current->aref.ar_amap;	/* top layer */
2955 		uobj = current->object.uvm_obj;	/* bottom layer */
2956 		KASSERT(start >= current->start);
2957 
2958 		/*
2959 		 * No amap cleaning necessary if:
2960 		 *
2961 		 *	(1) There's no amap.
2962 		 *
2963 		 *	(2) We're not deactivating or freeing pages.
2964 		 */
2965 
2966 		if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
2967 			goto flush_object;
2968 
2969 		/* XXX for now, just in case... */
2970 		if (amap_clean_works == 0)
2971 			goto flush_object;
2972 
2973 		offset = start - current->start;
2974 		size = MIN(end, current->end) - start;
2975 		for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) {
2976 			anon = amap_lookup(&current->aref, offset);
2977 			if (anon == NULL)
2978 				continue;
2979 
2980 			simple_lock(&anon->an_lock);
2981 
2982 			pg = anon->an_page;
2983 			if (pg == NULL) {
2984 				simple_unlock(&anon->an_lock);
2985 				continue;
2986 			}
2987 
2988 			switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
2989 
2990 			/*
2991 			 * XXX In these first 3 cases, we always just
2992 			 * XXX deactivate the page.  We may want to
2993 			 * XXX handle the different cases more
2994 			 * XXX specifically, in the future.
2995 			 */
2996 
2997 			case PGO_CLEANIT|PGO_FREE:
2998 			case PGO_CLEANIT|PGO_DEACTIVATE:
2999 			case PGO_DEACTIVATE:
3000  deactivate_it:
3001 				/* skip the page if it's loaned or wired */
3002 				if (pg->loan_count != 0 ||
3003 				    pg->wire_count != 0) {
3004 					simple_unlock(&anon->an_lock);
3005 					continue;
3006 				}
3007 
3008 				uvm_lock_pageq();
3009 
3010 				/*
3011 				 * skip the page if it's not actually owned
3012 				 * by the anon (may simply be loaned to the
3013 				 * anon).
3014 				 */
3015 
3016 				if ((pg->pg_flags & PQ_ANON) == 0) {
3017 					KASSERT(pg->uobject == NULL);
3018 					uvm_unlock_pageq();
3019 					simple_unlock(&anon->an_lock);
3020 					continue;
3021 				}
3022 				KASSERT(pg->uanon == anon);
3023 
3024 #ifdef UBC
3025 				/* ...and deactivate the page. */
3026 				pmap_clear_reference(pg);
3027 #else
3028 				/* zap all mappings for the page. */
3029 				pmap_page_protect(pg, VM_PROT_NONE);
3030 
3031 				/* ...and deactivate the page. */
3032 #endif
3033 				uvm_pagedeactivate(pg);
3034 
3035 				uvm_unlock_pageq();
3036 				simple_unlock(&anon->an_lock);
3037 				continue;
3038 
3039 			case PGO_FREE:
3040 
3041 				/*
3042 				 * If there are multiple references to
3043 				 * the amap, just deactivate the page.
3044 				 */
3045 
3046 				if (amap_refs(amap) > 1)
3047 					goto deactivate_it;
3048 
3049 				/* XXX skip the page if it's wired */
3050 				if (pg->wire_count != 0) {
3051 					simple_unlock(&anon->an_lock);
3052 					continue;
3053 				}
3054 				amap_unadd(&current->aref, offset);
3055 				refs = --anon->an_ref;
3056 				simple_unlock(&anon->an_lock);
3057 				if (refs == 0)
3058 					uvm_anfree(anon);
3059 				continue;
3060 
3061 			default:
3062 				panic("uvm_map_clean: weird flags");
3063 			}
3064 		}
3065 
3066 flush_object:
3067 		/*
3068 		 * flush pages if we've got a valid backing object.
3069 		 *
3070 		 * Don't PGO_FREE if we don't have write permission
3071 	 	 * and don't flush if this is a copy-on-write object
3072 		 * since we can't know our permissions on it.
3073 		 */
3074 
3075 		offset = current->offset + (start - current->start);
3076 		size = MIN(end, current->end) - start;
3077 		if (uobj != NULL &&
3078 		    ((flags & PGO_FREE) == 0 ||
3079 		     ((entry->max_protection & VM_PROT_WRITE) != 0 &&
3080 		      (entry->etype & UVM_ET_COPYONWRITE) == 0))) {
3081 			simple_lock(&uobj->vmobjlock);
3082 			rv = uobj->pgops->pgo_flush(uobj, offset,
3083 			    offset + size, flags);
3084 			simple_unlock(&uobj->vmobjlock);
3085 
3086 			if (rv == FALSE)
3087 				error = EFAULT;
3088 		}
3089 		start += size;
3090 	}
3091 	vm_map_unlock_read(map);
3092 	return (error);
3093 }
3094 
3095 
3096 /*
3097  * uvm_map_checkprot: check protection in map
3098  *
3099  * => must allow specified protection in a fully allocated region.
3100  * => map must be read or write locked by caller.
3101  */
3102 
3103 boolean_t
3104 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
3105     vm_prot_t protection)
3106 {
3107 	struct vm_map_entry *entry;
3108 	struct vm_map_entry *tmp_entry;
3109 
3110 	if (!uvm_map_lookup_entry(map, start, &tmp_entry)) {
3111 		return(FALSE);
3112 	}
3113 	entry = tmp_entry;
3114 	while (start < end) {
3115 		if (entry == &map->header) {
3116 			return(FALSE);
3117 		}
3118 
3119 		/*
3120 		 * no holes allowed
3121 		 */
3122 
3123 		if (start < entry->start) {
3124 			return(FALSE);
3125 		}
3126 
3127 		/*
3128 		 * check protection associated with entry
3129 		 */
3130 
3131 		if ((entry->protection & protection) != protection) {
3132 			return(FALSE);
3133 		}
3134 
3135 		/* go to next entry */
3136 
3137 		start = entry->end;
3138 		entry = entry->next;
3139 	}
3140 	return(TRUE);
3141 }
3142 
3143 /*
3144  * uvmspace_alloc: allocate a vmspace structure.
3145  *
3146  * - structure includes vm_map and pmap
3147  * - XXX: no locking on this structure
3148  * - refcnt set to 1, rest must be init'd by caller
3149  */
3150 struct vmspace *
3151 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
3152     boolean_t remove_holes)
3153 {
3154 	struct vmspace *vm;
3155 
3156 	vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
3157 	uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
3158 	return (vm);
3159 }
3160 
3161 /*
3162  * uvmspace_init: initialize a vmspace structure.
3163  *
3164  * - XXX: no locking on this structure
3165  * - refcnt set to 1, rest must be init'd by caller
3166  */
3167 void
3168 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
3169     boolean_t pageable, boolean_t remove_holes)
3170 {
3171 
3172 	uvm_map_setup(&vm->vm_map, min, max, pageable ? VM_MAP_PAGEABLE : 0);
3173 
3174 	if (pmap)
3175 		pmap_reference(pmap);
3176 	else
3177 		pmap = pmap_create();
3178 	vm->vm_map.pmap = pmap;
3179 
3180 	vm->vm_refcnt = 1;
3181 
3182 	if (remove_holes)
3183 		pmap_remove_holes(&vm->vm_map);
3184 }
3185 
3186 /*
3187  * uvmspace_share: share a vmspace between two proceses
3188  *
3189  * - XXX: no locking on vmspace
3190  * - used for vfork, threads(?)
3191  */
3192 
3193 void
3194 uvmspace_share(struct proc *p1, struct proc *p2)
3195 {
3196 	p2->p_vmspace = p1->p_vmspace;
3197 	p1->p_vmspace->vm_refcnt++;
3198 }
3199 
3200 /*
3201  * uvmspace_exec: the process wants to exec a new program
3202  *
3203  * - XXX: no locking on vmspace
3204  */
3205 
3206 void
3207 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
3208 {
3209 	struct vmspace *nvm, *ovm = p->p_vmspace;
3210 	struct vm_map *map = &ovm->vm_map;
3211 
3212 	pmap_unuse_final(p);   /* before stack addresses go away */
3213 
3214 	/*
3215 	 * see if more than one process is using this vmspace...
3216 	 */
3217 
3218 	if (ovm->vm_refcnt == 1) {
3219 
3220 		/*
3221 		 * if p is the only process using its vmspace then we can safely
3222 		 * recycle that vmspace for the program that is being exec'd.
3223 		 */
3224 
3225 #ifdef SYSVSHM
3226 		/*
3227 		 * SYSV SHM semantics require us to kill all segments on an exec
3228 		 */
3229 		if (ovm->vm_shm)
3230 			shmexit(ovm);
3231 #endif
3232 
3233 		/*
3234 		 * POSIX 1003.1b -- "lock future mappings" is revoked
3235 		 * when a process execs another program image.
3236 		 */
3237 		vm_map_lock(map);
3238 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
3239 		vm_map_unlock(map);
3240 
3241 		/*
3242 		 * now unmap the old program
3243 		 */
3244 		uvm_unmap(map, map->min_offset, map->max_offset);
3245 
3246 		/*
3247 		 * but keep MMU holes unavailable
3248 		 */
3249 		pmap_remove_holes(map);
3250 
3251 		/*
3252 		 * resize the map
3253 		 */
3254 		vm_map_lock(map);
3255 		map->min_offset = start;
3256 		uvm_tree_sanity(map, "resize enter");
3257 		map->max_offset = end;
3258 		if (map->header.prev != &map->header)
3259 			uvm_rb_fixup(map, map->header.prev);
3260 		uvm_tree_sanity(map, "resize leave");
3261 		vm_map_unlock(map);
3262 
3263 
3264 	} else {
3265 
3266 		/*
3267 		 * p's vmspace is being shared, so we can't reuse it for p since
3268 		 * it is still being used for others.   allocate a new vmspace
3269 		 * for p
3270 		 */
3271 		nvm = uvmspace_alloc(start, end,
3272 			 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
3273 
3274 		/*
3275 		 * install new vmspace and drop our ref to the old one.
3276 		 */
3277 
3278 		pmap_deactivate(p);
3279 		p->p_vmspace = nvm;
3280 		pmap_activate(p);
3281 
3282 		uvmspace_free(ovm);
3283 	}
3284 }
3285 
3286 /*
3287  * uvmspace_free: free a vmspace data structure
3288  *
3289  * - XXX: no locking on vmspace
3290  */
3291 
3292 void
3293 uvmspace_free(struct vmspace *vm)
3294 {
3295 	struct vm_map_entry *dead_entries;
3296 
3297 	if (--vm->vm_refcnt == 0) {
3298 		/*
3299 		 * lock the map, to wait out all other references to it.  delete
3300 		 * all of the mappings and pages they hold, then call the pmap
3301 		 * module to reclaim anything left.
3302 		 */
3303 #ifdef SYSVSHM
3304 		/* Get rid of any SYSV shared memory segments. */
3305 		if (vm->vm_shm != NULL)
3306 			shmexit(vm);
3307 #endif
3308 		vm_map_lock(&vm->vm_map);
3309 		if (vm->vm_map.nentries) {
3310 			uvm_unmap_remove(&vm->vm_map,
3311 			    vm->vm_map.min_offset, vm->vm_map.max_offset,
3312 			    &dead_entries, NULL, TRUE);
3313 			if (dead_entries != NULL)
3314 				uvm_unmap_detach(dead_entries, 0);
3315 		}
3316 		pmap_destroy(vm->vm_map.pmap);
3317 		vm->vm_map.pmap = NULL;
3318 		pool_put(&uvm_vmspace_pool, vm);
3319 	}
3320 }
3321 
3322 /*
3323  * uvm_map_create: create map
3324  */
3325 vm_map_t
3326 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
3327 {
3328 	vm_map_t result;
3329 
3330 	result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK);
3331 	uvm_map_setup(result, min, max, flags);
3332 	result->pmap = pmap;
3333 	return(result);
3334 }
3335 
3336 /*
3337  * uvm_map_setup: init map
3338  *
3339  * => map must not be in service yet.
3340  */
3341 void
3342 uvm_map_setup(vm_map_t map, vaddr_t min, vaddr_t max, int flags)
3343 {
3344 
3345 	RB_INIT(&map->rbhead);
3346 	map->header.next = map->header.prev = &map->header;
3347 	map->nentries = 0;
3348 	map->size = 0;
3349 	map->ref_count = 1;
3350 	map->min_offset = min;
3351 	map->max_offset = max;
3352 	map->flags = flags;
3353 	map->first_free = &map->header;
3354 	map->hint = &map->header;
3355 	map->timestamp = 0;
3356 	rw_init(&map->lock, "vmmaplk");
3357 	simple_lock_init(&map->ref_lock);
3358 	simple_lock_init(&map->hint_lock);
3359 }
3360 
3361 
3362 
3363 /*
3364  * uvm_map_reference: add reference to a map
3365  *
3366  * => map need not be locked (we use ref_lock).
3367  */
3368 void
3369 uvm_map_reference(vm_map_t map)
3370 {
3371 	simple_lock(&map->ref_lock);
3372 	map->ref_count++;
3373 	simple_unlock(&map->ref_lock);
3374 }
3375 
3376 /*
3377  * uvm_map_deallocate: drop reference to a map
3378  *
3379  * => caller must not lock map
3380  * => we will zap map if ref count goes to zero
3381  */
3382 void
3383 uvm_map_deallocate(vm_map_t map)
3384 {
3385 	int c;
3386 
3387 	simple_lock(&map->ref_lock);
3388 	c = --map->ref_count;
3389 	simple_unlock(&map->ref_lock);
3390 	if (c > 0) {
3391 		return;
3392 	}
3393 
3394 	/*
3395 	 * all references gone.   unmap and free.
3396 	 */
3397 
3398 	uvm_unmap(map, map->min_offset, map->max_offset);
3399 	pmap_destroy(map->pmap);
3400 	free(map, M_VMMAP);
3401 }
3402 
3403 /*
3404  *   F O R K   -   m a i n   e n t r y   p o i n t
3405  */
3406 /*
3407  * uvmspace_fork: fork a process' main map
3408  *
3409  * => create a new vmspace for child process from parent.
3410  * => parent's map must not be locked.
3411  */
3412 
3413 struct vmspace *
3414 uvmspace_fork(struct vmspace *vm1)
3415 {
3416 	struct vmspace *vm2;
3417 	struct vm_map *old_map = &vm1->vm_map;
3418 	struct vm_map *new_map;
3419 	struct vm_map_entry *old_entry;
3420 	struct vm_map_entry *new_entry;
3421 	pmap_t          new_pmap;
3422 	boolean_t	protect_child;
3423 
3424 	vm_map_lock(old_map);
3425 
3426 	vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
3427 	    (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
3428 	memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
3429 	(caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
3430 	new_map = &vm2->vm_map;		  /* XXX */
3431 	new_pmap = new_map->pmap;
3432 
3433 	old_entry = old_map->header.next;
3434 
3435 	/*
3436 	 * go entry-by-entry
3437 	 */
3438 
3439 	while (old_entry != &old_map->header) {
3440 
3441 		/*
3442 		 * first, some sanity checks on the old entry
3443 		 */
3444 		if (UVM_ET_ISSUBMAP(old_entry))
3445 		    panic("fork: encountered a submap during fork (illegal)");
3446 
3447 		if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
3448 			    UVM_ET_ISNEEDSCOPY(old_entry))
3449 	panic("fork: non-copy_on_write map entry marked needs_copy (illegal)");
3450 
3451 
3452 		switch (old_entry->inheritance) {
3453 		case MAP_INHERIT_NONE:
3454 			/*
3455 			 * drop the mapping
3456 			 */
3457 			break;
3458 
3459 		case MAP_INHERIT_SHARE:
3460 			/*
3461 			 * share the mapping: this means we want the old and
3462 			 * new entries to share amaps and backing objects.
3463 			 */
3464 
3465 			/*
3466 			 * if the old_entry needs a new amap (due to prev fork)
3467 			 * then we need to allocate it now so that we have
3468 			 * something we own to share with the new_entry.   [in
3469 			 * other words, we need to clear needs_copy]
3470 			 */
3471 
3472 			if (UVM_ET_ISNEEDSCOPY(old_entry)) {
3473 				/* get our own amap, clears needs_copy */
3474 				amap_copy(old_map, old_entry, M_WAITOK, FALSE,
3475 				    0, 0);
3476 				/* XXXCDC: WAITOK??? */
3477 			}
3478 
3479 			new_entry = uvm_mapent_alloc(new_map, 0);
3480 			/* old_entry -> new_entry */
3481 			uvm_mapent_copy(old_entry, new_entry);
3482 
3483 			/* new pmap has nothing wired in it */
3484 			new_entry->wired_count = 0;
3485 
3486 			/*
3487 			 * gain reference to object backing the map (can't
3488 			 * be a submap, already checked this case).
3489 			 */
3490 			if (new_entry->aref.ar_amap)
3491 				/* share reference */
3492 				uvm_map_reference_amap(new_entry, AMAP_SHARED);
3493 
3494 			if (new_entry->object.uvm_obj &&
3495 			    new_entry->object.uvm_obj->pgops->pgo_reference)
3496 				new_entry->object.uvm_obj->
3497 				    pgops->pgo_reference(
3498 				        new_entry->object.uvm_obj);
3499 
3500 			/* insert entry at end of new_map's entry list */
3501 			uvm_map_entry_link(new_map, new_map->header.prev,
3502 			    new_entry);
3503 
3504 			/*
3505 			 * pmap_copy the mappings: this routine is optional
3506 			 * but if it is there it will reduce the number of
3507 			 * page faults in the new proc.
3508 			 */
3509 
3510 			pmap_copy(new_pmap, old_map->pmap, new_entry->start,
3511 			    (old_entry->end - old_entry->start),
3512 			    old_entry->start);
3513 
3514 			break;
3515 
3516 		case MAP_INHERIT_COPY:
3517 
3518 			/*
3519 			 * copy-on-write the mapping (using mmap's
3520 			 * MAP_PRIVATE semantics)
3521 			 *
3522 			 * allocate new_entry, adjust reference counts.
3523 			 * (note that new references are read-only).
3524 			 */
3525 
3526 			new_entry = uvm_mapent_alloc(new_map, 0);
3527 			/* old_entry -> new_entry */
3528 			uvm_mapent_copy(old_entry, new_entry);
3529 
3530 			if (new_entry->aref.ar_amap)
3531 				uvm_map_reference_amap(new_entry, 0);
3532 
3533 			if (new_entry->object.uvm_obj &&
3534 			    new_entry->object.uvm_obj->pgops->pgo_reference)
3535 				new_entry->object.uvm_obj->pgops->pgo_reference
3536 				    (new_entry->object.uvm_obj);
3537 
3538 			/* new pmap has nothing wired in it */
3539 			new_entry->wired_count = 0;
3540 
3541 			new_entry->etype |=
3542 			    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3543 			uvm_map_entry_link(new_map, new_map->header.prev,
3544 			    new_entry);
3545 
3546 			/*
3547 			 * the new entry will need an amap.  it will either
3548 			 * need to be copied from the old entry or created
3549 			 * from scratch (if the old entry does not have an
3550 			 * amap).  can we defer this process until later
3551 			 * (by setting "needs_copy") or do we need to copy
3552 			 * the amap now?
3553 			 *
3554 			 * we must copy the amap now if any of the following
3555 			 * conditions hold:
3556 			 * 1. the old entry has an amap and that amap is
3557 			 *    being shared.  this means that the old (parent)
3558 			 *    process is sharing the amap with another
3559 			 *    process.  if we do not clear needs_copy here
3560 			 *    we will end up in a situation where both the
3561 			 *    parent and child process are referring to the
3562 			 *    same amap with "needs_copy" set.  if the
3563 			 *    parent write-faults, the fault routine will
3564 			 *    clear "needs_copy" in the parent by allocating
3565 			 *    a new amap.   this is wrong because the
3566 			 *    parent is supposed to be sharing the old amap
3567 			 *    and the new amap will break that.
3568 			 *
3569 			 * 2. if the old entry has an amap and a non-zero
3570 			 *    wire count then we are going to have to call
3571 			 *    amap_cow_now to avoid page faults in the
3572 			 *    parent process.   since amap_cow_now requires
3573 			 *    "needs_copy" to be clear we might as well
3574 			 *    clear it here as well.
3575 			 *
3576 			 */
3577 
3578 			if (old_entry->aref.ar_amap != NULL) {
3579 
3580 			  if ((amap_flags(old_entry->aref.ar_amap) &
3581 			       AMAP_SHARED) != 0 ||
3582 			      VM_MAPENT_ISWIRED(old_entry)) {
3583 
3584 			    amap_copy(new_map, new_entry, M_WAITOK, FALSE,
3585 				      0, 0);
3586 			    /* XXXCDC: M_WAITOK ... ok? */
3587 			  }
3588 			}
3589 
3590 			/*
3591 			 * if the parent's entry is wired down, then the
3592 			 * parent process does not want page faults on
3593 			 * access to that memory.  this means that we
3594 			 * cannot do copy-on-write because we can't write
3595 			 * protect the old entry.   in this case we
3596 			 * resolve all copy-on-write faults now, using
3597 			 * amap_cow_now.   note that we have already
3598 			 * allocated any needed amap (above).
3599 			 */
3600 
3601 			if (VM_MAPENT_ISWIRED(old_entry)) {
3602 
3603 			  /*
3604 			   * resolve all copy-on-write faults now
3605 			   * (note that there is nothing to do if
3606 			   * the old mapping does not have an amap).
3607 			   * XXX: is it worthwhile to bother with pmap_copy
3608 			   * in this case?
3609 			   */
3610 			  if (old_entry->aref.ar_amap)
3611 			    amap_cow_now(new_map, new_entry);
3612 
3613 			} else {
3614 
3615 			  /*
3616 			   * setup mappings to trigger copy-on-write faults
3617 			   * we must write-protect the parent if it has
3618 			   * an amap and it is not already "needs_copy"...
3619 			   * if it is already "needs_copy" then the parent
3620 			   * has already been write-protected by a previous
3621 			   * fork operation.
3622 			   *
3623 			   * if we do not write-protect the parent, then
3624 			   * we must be sure to write-protect the child
3625 			   * after the pmap_copy() operation.
3626 			   *
3627 			   * XXX: pmap_copy should have some way of telling
3628 			   * us that it didn't do anything so we can avoid
3629 			   * calling pmap_protect needlessly.
3630 			   */
3631 
3632 			  if (old_entry->aref.ar_amap) {
3633 
3634 			    if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
3635 			      if (old_entry->max_protection & VM_PROT_WRITE) {
3636 				pmap_protect(old_map->pmap,
3637 					     old_entry->start,
3638 					     old_entry->end,
3639 					     old_entry->protection &
3640 					     ~VM_PROT_WRITE);
3641 			        pmap_update(old_map->pmap);
3642 
3643 			      }
3644 			      old_entry->etype |= UVM_ET_NEEDSCOPY;
3645 			    }
3646 
3647 			    /*
3648 			     * parent must now be write-protected
3649 			     */
3650 			    protect_child = FALSE;
3651 			  } else {
3652 
3653 			    /*
3654 			     * we only need to protect the child if the
3655 			     * parent has write access.
3656 			     */
3657 			    if (old_entry->max_protection & VM_PROT_WRITE)
3658 			      protect_child = TRUE;
3659 			    else
3660 			      protect_child = FALSE;
3661 
3662 			  }
3663 
3664 			  /*
3665 			   * copy the mappings
3666 			   * XXX: need a way to tell if this does anything
3667 			   */
3668 
3669 			  pmap_copy(new_pmap, old_map->pmap,
3670 				    new_entry->start,
3671 				    (old_entry->end - old_entry->start),
3672 				    old_entry->start);
3673 
3674 			  /*
3675 			   * protect the child's mappings if necessary
3676 			   */
3677 			  if (protect_child) {
3678 			    pmap_protect(new_pmap, new_entry->start,
3679 					 new_entry->end,
3680 					 new_entry->protection &
3681 					          ~VM_PROT_WRITE);
3682 			  }
3683 
3684 			}
3685 			break;
3686 		}  /* end of switch statement */
3687 		old_entry = old_entry->next;
3688 	}
3689 
3690 	new_map->size = old_map->size;
3691 	vm_map_unlock(old_map);
3692 
3693 #ifdef SYSVSHM
3694 	if (vm1->vm_shm)
3695 		shmfork(vm1, vm2);
3696 #endif
3697 
3698 #ifdef PMAP_FORK
3699 	pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap);
3700 #endif
3701 
3702 	return(vm2);
3703 }
3704 
3705 #if defined(DDB)
3706 
3707 /*
3708  * DDB hooks
3709  */
3710 
3711 /*
3712  * uvm_map_printit: actually prints the map
3713  */
3714 
3715 void
3716 uvm_map_printit(struct vm_map *map, boolean_t full,
3717     int (*pr)(const char *, ...))
3718 {
3719 	struct vm_map_entry *entry;
3720 
3721 	(*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
3722 	(*pr)("\t#ent=%d, sz=%u, ref=%d, version=%u, flags=0x%x\n",
3723 	    map->nentries, map->size, map->ref_count, map->timestamp,
3724 	    map->flags);
3725 #ifdef pmap_resident_count
3726 	(*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
3727 	    pmap_resident_count(map->pmap));
3728 #else
3729 	/* XXXCDC: this should be required ... */
3730 	(*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap);
3731 #endif
3732 	if (!full)
3733 		return;
3734 	for (entry = map->header.next; entry != &map->header;
3735 	    entry = entry->next) {
3736 		(*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
3737 		    entry, entry->start, entry->end, entry->object.uvm_obj,
3738 		    (long long)entry->offset, entry->aref.ar_amap,
3739 		    entry->aref.ar_pageoff);
3740 		(*pr)(
3741 		    "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, "
3742 		    "wc=%d, adv=%d\n",
3743 		    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
3744 		    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
3745 		    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
3746 		    entry->protection, entry->max_protection,
3747 		    entry->inheritance, entry->wired_count, entry->advice);
3748 	}
3749 }
3750 
3751 /*
3752  * uvm_object_printit: actually prints the object
3753  */
3754 
3755 void
3756 uvm_object_printit(struct uvm_object *uobj, boolean_t full,
3757     int (*pr)(const char *, ...))
3758 {
3759 	struct vm_page *pg;
3760 	int cnt = 0;
3761 
3762 	(*pr)("OBJECT %p: pgops=%p, npages=%d, ",
3763 	    uobj, uobj->pgops, uobj->uo_npages);
3764 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
3765 		(*pr)("refs=<SYSTEM>\n");
3766 	else
3767 		(*pr)("refs=%d\n", uobj->uo_refs);
3768 
3769 	if (!full) {
3770 		return;
3771 	}
3772 	(*pr)("  PAGES <pg,offset>:\n  ");
3773 	RB_FOREACH(pg, uvm_objtree, &uobj->memt) {
3774 		(*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
3775 		if ((cnt % 3) == 2) {
3776 			(*pr)("\n  ");
3777 		}
3778 		cnt++;
3779 	}
3780 	if ((cnt % 3) != 2) {
3781 		(*pr)("\n");
3782 	}
3783 }
3784 
3785 /*
3786  * uvm_page_printit: actually print the page
3787  */
3788 
3789 static const char page_flagbits[] =
3790 	"\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
3791 	"\11ZERO\15PAGER1\20FREE\21INACTIVE\22ACTIVE\24ENCRYPT\30PMAP0"
3792 	"\31PMAP1\32PMAP2\33PMAP3";
3793 
3794 void
3795 uvm_page_printit(struct vm_page *pg, boolean_t full,
3796     int (*pr)(const char *, ...))
3797 {
3798 	struct vm_page *tpg;
3799 	struct uvm_object *uobj;
3800 	struct pglist *pgl;
3801 
3802 	(*pr)("PAGE %p:\n", pg);
3803 	(*pr)("  flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
3804 	    pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
3805 	    (long long)pg->phys_addr);
3806 	(*pr)("  uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n",
3807 	    pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count);
3808 #if defined(UVM_PAGE_TRKOWN)
3809 	if (pg->pg_flags & PG_BUSY)
3810 		(*pr)("  owning process = %d, tag=%s\n",
3811 		    pg->owner, pg->owner_tag);
3812 	else
3813 		(*pr)("  page not busy, no owner\n");
3814 #else
3815 	(*pr)("  [page ownership tracking disabled]\n");
3816 #endif
3817 
3818 	if (!full)
3819 		return;
3820 
3821 	/* cross-verify object/anon */
3822 	if ((pg->pg_flags & PQ_FREE) == 0) {
3823 		if (pg->pg_flags & PQ_ANON) {
3824 			if (pg->uanon == NULL || pg->uanon->an_page != pg)
3825 			    (*pr)("  >>> ANON DOES NOT POINT HERE <<< (%p)\n",
3826 				(pg->uanon) ? pg->uanon->an_page : NULL);
3827 			else
3828 				(*pr)("  anon backpointer is OK\n");
3829 		} else {
3830 			uobj = pg->uobject;
3831 			if (uobj) {
3832 				(*pr)("  checking object list\n");
3833 				RB_FOREACH(tpg, uvm_objtree, &uobj->memt) {
3834 					if (tpg == pg) {
3835 						break;
3836 					}
3837 				}
3838 				if (tpg)
3839 					(*pr)("  page found on object list\n");
3840 				else
3841 			(*pr)("  >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n");
3842 			}
3843 		}
3844 	}
3845 
3846 	/* cross-verify page queue */
3847 	if (pg->pg_flags & PQ_FREE) {
3848 		if (uvm_pmr_isfree(pg))
3849 			printf("  page found in uvm_pmemrange\n");
3850 		else
3851 			printf("  >>> page not found in uvm_pmemrange <<<\n");
3852 		pgl = NULL;
3853 	} else if (pg->pg_flags & PQ_INACTIVE) {
3854 		pgl = (pg->pg_flags & PQ_SWAPBACKED) ?
3855 		    &uvm.page_inactive_swp : &uvm.page_inactive_obj;
3856 	} else if (pg->pg_flags & PQ_ACTIVE) {
3857 		pgl = &uvm.page_active;
3858  	} else {
3859 		pgl = NULL;
3860 	}
3861 
3862 	if (pgl) {
3863 		(*pr)("  checking pageq list\n");
3864 		TAILQ_FOREACH(tpg, pgl, pageq) {
3865 			if (tpg == pg) {
3866 				break;
3867 			}
3868 		}
3869 		if (tpg)
3870 			(*pr)("  page found on pageq list\n");
3871 		else
3872 			(*pr)("  >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
3873 	}
3874 }
3875 #endif
3876