xref: /openbsd-src/sys/uvm/uvm_map.c (revision 50b7afb2c2c0993b0894d4e34bf857cb13ed9c80)
1 /*	$OpenBSD: uvm_map.c,v 1.173 2014/07/13 15:33:28 pirofti Exp $	*/
2 /*	$NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $	*/
3 
4 /*
5  * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  *
19  *
20  * Copyright (c) 1997 Charles D. Cranor and Washington University.
21  * Copyright (c) 1991, 1993, The Regents of the University of California.
22  *
23  * All rights reserved.
24  *
25  * This code is derived from software contributed to Berkeley by
26  * The Mach Operating System project at Carnegie-Mellon University.
27  *
28  * Redistribution and use in source and binary forms, with or without
29  * modification, are permitted provided that the following conditions
30  * are met:
31  * 1. Redistributions of source code must retain the above copyright
32  *    notice, this list of conditions and the following disclaimer.
33  * 2. Redistributions in binary form must reproduce the above copyright
34  *    notice, this list of conditions and the following disclaimer in the
35  *    documentation and/or other materials provided with the distribution.
36  * 3. Neither the name of the University nor the names of its contributors
37  *    may be used to endorse or promote products derived from this software
38  *    without specific prior written permission.
39  *
40  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  *	@(#)vm_map.c    8.3 (Berkeley) 1/12/94
53  * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
54  *
55  *
56  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
57  * All rights reserved.
58  *
59  * Permission to use, copy, modify and distribute this software and
60  * its documentation is hereby granted, provided that both the copyright
61  * notice and this permission notice appear in all copies of the
62  * software, derivative works or modified versions, and any portions
63  * thereof, and that both notices appear in supporting documentation.
64  *
65  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
66  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
67  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
68  *
69  * Carnegie Mellon requests users of this software to return to
70  *
71  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
72  *  School of Computer Science
73  *  Carnegie Mellon University
74  *  Pittsburgh PA 15213-3890
75  *
76  * any improvements or extensions that they make and grant Carnegie the
77  * rights to redistribute these changes.
78  */
79 
80 /*
81  * uvm_map.c: uvm map operations
82  */
83 
84 /* #define DEBUG */
85 /* #define VMMAP_DEBUG */
86 
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/mman.h>
90 #include <sys/proc.h>
91 #include <sys/malloc.h>
92 #include <sys/pool.h>
93 #include <sys/kernel.h>
94 
95 #include <dev/rndvar.h>
96 
97 #ifdef SYSVSHM
98 #include <sys/shm.h>
99 #endif
100 
101 #include <uvm/uvm.h>
102 
103 #ifdef DDB
104 #include <uvm/uvm_ddb.h>
105 #endif
106 
107 #include <uvm/uvm_addr.h>
108 
109 
110 vsize_t			 uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t);
111 int			 uvm_mapent_isjoinable(struct vm_map*,
112 			    struct vm_map_entry*, struct vm_map_entry*);
113 struct vm_map_entry	*uvm_mapent_merge(struct vm_map*, struct vm_map_entry*,
114 			    struct vm_map_entry*, struct uvm_map_deadq*);
115 struct vm_map_entry	*uvm_mapent_tryjoin(struct vm_map*,
116 			    struct vm_map_entry*, struct uvm_map_deadq*);
117 struct vm_map_entry	*uvm_map_mkentry(struct vm_map*, struct vm_map_entry*,
118 			    struct vm_map_entry*, vaddr_t, vsize_t, int,
119 			    struct uvm_map_deadq*, struct vm_map_entry*);
120 struct vm_map_entry	*uvm_mapent_alloc(struct vm_map*, int);
121 void			 uvm_mapent_free(struct vm_map_entry*);
122 void			 uvm_unmap_kill_entry(struct vm_map*,
123 			    struct vm_map_entry*);
124 void			 uvm_mapent_mkfree(struct vm_map*,
125 			    struct vm_map_entry*, struct vm_map_entry**,
126 			    struct uvm_map_deadq*, boolean_t);
127 void			 uvm_map_pageable_pgon(struct vm_map*,
128 			    struct vm_map_entry*, struct vm_map_entry*,
129 			    vaddr_t, vaddr_t);
130 int			 uvm_map_pageable_wire(struct vm_map*,
131 			    struct vm_map_entry*, struct vm_map_entry*,
132 			    vaddr_t, vaddr_t, int);
133 void			 uvm_map_setup_entries(struct vm_map*);
134 void			 uvm_map_setup_md(struct vm_map*);
135 void			 uvm_map_teardown(struct vm_map*);
136 void			 uvm_map_vmspace_update(struct vm_map*,
137 			    struct uvm_map_deadq*, int);
138 void			 uvm_map_kmem_grow(struct vm_map*,
139 			    struct uvm_map_deadq*, vsize_t, int);
140 void			 uvm_map_freelist_update_clear(struct vm_map*,
141 			    struct uvm_map_deadq*);
142 void			 uvm_map_freelist_update_refill(struct vm_map *, int);
143 void			 uvm_map_freelist_update(struct vm_map*,
144 			    struct uvm_map_deadq*, vaddr_t, vaddr_t,
145 			    vaddr_t, vaddr_t, int);
146 struct vm_map_entry	*uvm_map_fix_space(struct vm_map*, struct vm_map_entry*,
147 			    vaddr_t, vaddr_t, int);
148 int			 uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int,
149 			    struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t,
150 			    int);
151 int			 uvm_map_findspace(struct vm_map*,
152 			    struct vm_map_entry**, struct vm_map_entry**,
153 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
154 			    vaddr_t);
155 vsize_t			 uvm_map_addr_augment_get(struct vm_map_entry*);
156 void			 uvm_map_addr_augment(struct vm_map_entry*);
157 
158 /*
159  * Tree management functions.
160  */
161 
162 static __inline void	 uvm_mapent_copy(struct vm_map_entry*,
163 			    struct vm_map_entry*);
164 static int		 uvm_mapentry_addrcmp(struct vm_map_entry*,
165 			    struct vm_map_entry*);
166 static int		 uvm_mapentry_freecmp(struct vm_map_entry*,
167 			    struct vm_map_entry*);
168 void			 uvm_mapent_free_insert(struct vm_map*,
169 			    struct uvm_addr_state*, struct vm_map_entry*);
170 void			 uvm_mapent_free_remove(struct vm_map*,
171 			    struct uvm_addr_state*, struct vm_map_entry*);
172 void			 uvm_mapent_addr_insert(struct vm_map*,
173 			    struct vm_map_entry*);
174 void			 uvm_mapent_addr_remove(struct vm_map*,
175 			    struct vm_map_entry*);
176 void			 uvm_map_splitentry(struct vm_map*,
177 			    struct vm_map_entry*, struct vm_map_entry*,
178 			    vaddr_t);
179 vsize_t			 uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t);
180 int			 uvm_mapent_bias(struct vm_map*, struct vm_map_entry*);
181 
182 /*
183  * uvm_vmspace_fork helper functions.
184  */
185 struct vm_map_entry	*uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
186 			    vsize_t, struct vm_map_entry*,
187 			    struct uvm_map_deadq*, int, int);
188 struct vm_map_entry	*uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
189 			    struct vm_map*, struct vm_map_entry*,
190 			    struct uvm_map_deadq*);
191 struct vm_map_entry	*uvm_mapent_forkcopy(struct vmspace*, struct vm_map*,
192 			    struct vm_map*, struct vm_map_entry*,
193 			    struct uvm_map_deadq*);
194 struct vm_map_entry	*uvm_mapent_forkzero(struct vmspace*, struct vm_map*,
195 			    struct vm_map*, struct vm_map_entry*,
196 			    struct uvm_map_deadq*);
197 
198 /*
199  * Tree validation.
200  */
201 #ifdef VMMAP_DEBUG
202 void			 uvm_tree_assert(struct vm_map*, int, char*,
203 			    char*, int);
204 #define UVM_ASSERT(map, cond, file, line)				\
205 	uvm_tree_assert((map), (cond), #cond, (file), (line))
206 void			 uvm_tree_sanity(struct vm_map*, char*, int);
207 void			 uvm_tree_size_chk(struct vm_map*, char*, int);
208 void			 vmspace_validate(struct vm_map*);
209 #else
210 #define uvm_tree_sanity(_map, _file, _line)		do {} while (0)
211 #define uvm_tree_size_chk(_map, _file, _line)		do {} while (0)
212 #define vmspace_validate(_map)				do {} while (0)
213 #endif
214 
215 /*
216  * All architectures will have pmap_prefer.
217  */
218 #ifndef PMAP_PREFER
219 #define PMAP_PREFER_ALIGN()	(vaddr_t)PAGE_SIZE
220 #define PMAP_PREFER_OFFSET(off)	0
221 #define PMAP_PREFER(addr, off)	(addr)
222 #endif
223 
224 
225 /*
226  * The kernel map will initially be VM_MAP_KSIZE_INIT bytes.
227  * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes.
228  *
229  * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size
230  * each time.
231  */
232 #define VM_MAP_KSIZE_INIT	(512 * (vaddr_t)PAGE_SIZE)
233 #define VM_MAP_KSIZE_DELTA	(256 * (vaddr_t)PAGE_SIZE)
234 #define VM_MAP_KSIZE_ALLOCMUL	4
235 /*
236  * When selecting a random free-space block, look at most FSPACE_DELTA blocks
237  * ahead.
238  */
239 #define FSPACE_DELTA		8
240 /*
241  * Put allocations adjecent to previous allocations when the free-space tree
242  * is larger than FSPACE_COMPACT entries.
243  *
244  * Alignment and PMAP_PREFER may still cause the entry to not be fully
245  * adjecent. Note that this strategy reduces memory fragmentation (by leaving
246  * a large space before or after the allocation).
247  */
248 #define FSPACE_COMPACT		128
249 /*
250  * Make the address selection skip at most this many bytes from the start of
251  * the free space in which the allocation takes place.
252  *
253  * The main idea behind a randomized address space is that an attacker cannot
254  * know where to target his attack. Therefore, the location of objects must be
255  * as random as possible. However, the goal is not to create the most sparse
256  * map that is possible.
257  * FSPACE_MAXOFF pushes the considered range in bytes down to less insane
258  * sizes, thereby reducing the sparseness. The biggest randomization comes
259  * from fragmentation, i.e. FSPACE_COMPACT.
260  */
261 #define FSPACE_MAXOFF		((vaddr_t)32 * 1024 * 1024)
262 /*
263  * Allow for small gaps in the overflow areas.
264  * Gap size is in bytes and does not have to be a multiple of page-size.
265  */
266 #define FSPACE_BIASGAP		((vaddr_t)32 * 1024)
267 
268 /* auto-allocate address lower bound */
269 #define VMMAP_MIN_ADDR		PAGE_SIZE
270 
271 
272 #ifdef DEADBEEF0
273 #define UVMMAP_DEADBEEF		((void*)DEADBEEF0)
274 #else
275 #define UVMMAP_DEADBEEF		((void*)0xdeadd0d0)
276 #endif
277 
278 #ifdef DEBUG
279 int uvm_map_printlocks = 0;
280 
281 #define LPRINTF(_args)							\
282 	do {								\
283 		if (uvm_map_printlocks)					\
284 			printf _args;					\
285 	} while (0)
286 #else
287 #define LPRINTF(_args)	do {} while (0)
288 #endif
289 
290 static struct timeval uvm_kmapent_last_warn_time;
291 static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
292 
293 const char vmmapbsy[] = "vmmapbsy";
294 
295 /*
296  * pool for vmspace structures.
297  */
298 struct pool uvm_vmspace_pool;
299 
300 /*
301  * pool for dynamically-allocated map entries.
302  */
303 struct pool uvm_map_entry_pool;
304 struct pool uvm_map_entry_kmem_pool;
305 
306 /*
307  * This global represents the end of the kernel virtual address
308  * space. If we want to exceed this, we must grow the kernel
309  * virtual address space dynamically.
310  *
311  * Note, this variable is locked by kernel_map's lock.
312  */
313 vaddr_t uvm_maxkaddr;
314 
315 /*
316  * Locking predicate.
317  */
318 #define UVM_MAP_REQ_WRITE(_map)						\
319 	do {								\
320 		if (((_map)->flags & VM_MAP_INTRSAFE) == 0)		\
321 			rw_assert_wrlock(&(_map)->lock);		\
322 	} while (0)
323 
324 /*
325  * Tree describing entries by address.
326  *
327  * Addresses are unique.
328  * Entries with start == end may only exist if they are the first entry
329  * (sorted by address) within a free-memory tree.
330  */
331 
332 static __inline int
333 uvm_mapentry_addrcmp(struct vm_map_entry *e1, struct vm_map_entry *e2)
334 {
335 	return e1->start < e2->start ? -1 : e1->start > e2->start;
336 }
337 
338 /*
339  * Tree describing free memory.
340  *
341  * Free memory is indexed (so we can use array semantics in O(log N).
342  * Free memory is ordered by size (so we can reduce fragmentation).
343  *
344  * The address range in the tree can be limited, having part of the
345  * free memory not in the free-memory tree. Only free memory in the
346  * tree will be considered during 'any address' allocations.
347  */
348 
349 static __inline int
350 uvm_mapentry_freecmp(struct vm_map_entry *e1, struct vm_map_entry *e2)
351 {
352 	int cmp = e1->fspace < e2->fspace ? -1 : e1->fspace > e2->fspace;
353 	return cmp ? cmp : uvm_mapentry_addrcmp(e1, e2);
354 }
355 
356 /*
357  * Copy mapentry.
358  */
359 static __inline void
360 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
361 {
362 	caddr_t csrc, cdst;
363 	size_t sz;
364 
365 	csrc = (caddr_t)src;
366 	cdst = (caddr_t)dst;
367 	csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
368 	cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
369 
370 	sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) -
371 	    offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
372 	memcpy(cdst, csrc, sz);
373 }
374 
375 /*
376  * Handle free-list insertion.
377  */
378 void
379 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr,
380     struct vm_map_entry *entry)
381 {
382 	const struct uvm_addr_functions *fun;
383 #ifdef VMMAP_DEBUG
384 	vaddr_t min, max, bound;
385 #endif
386 
387 #ifdef VMMAP_DEBUG
388 	/*
389 	 * Boundary check.
390 	 * Boundaries are folded if they go on the same free list.
391 	 */
392 	min = VMMAP_FREE_START(entry);
393 	max = VMMAP_FREE_END(entry);
394 
395 	while (min < max) {
396 		bound = uvm_map_boundary(map, min, max);
397 		KASSERT(uvm_map_uaddr(map, min) == uaddr);
398 		min = bound;
399 	}
400 #endif
401 	KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0);
402 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0);
403 
404 	UVM_MAP_REQ_WRITE(map);
405 
406 	/* Actual insert: forward to uaddr pointer. */
407 	if (uaddr != NULL) {
408 		fun = uaddr->uaddr_functions;
409 		KDASSERT(fun != NULL);
410 		if (fun->uaddr_free_insert != NULL)
411 			(*fun->uaddr_free_insert)(map, uaddr, entry);
412 		entry->etype |= UVM_ET_FREEMAPPED;
413 	}
414 
415 	/* Update fspace augmentation. */
416 	uvm_map_addr_augment(entry);
417 }
418 
419 /*
420  * Handle free-list removal.
421  */
422 void
423 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr,
424     struct vm_map_entry *entry)
425 {
426 	const struct uvm_addr_functions *fun;
427 
428 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL);
429 	KASSERT(uvm_map_uaddr_e(map, entry) == uaddr);
430 	UVM_MAP_REQ_WRITE(map);
431 
432 	if (uaddr != NULL) {
433 		fun = uaddr->uaddr_functions;
434 		if (fun->uaddr_free_remove != NULL)
435 			(*fun->uaddr_free_remove)(map, uaddr, entry);
436 		entry->etype &= ~UVM_ET_FREEMAPPED;
437 	}
438 }
439 
440 /*
441  * Handle address tree insertion.
442  */
443 void
444 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry)
445 {
446 	struct vm_map_entry *res;
447 
448 	if (RB_LEFT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF ||
449 	    RB_RIGHT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF ||
450 	    RB_PARENT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF)
451 		panic("uvm_mapent_addr_insert: entry still in addr list");
452 	KDASSERT(entry->start <= entry->end);
453 	KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&
454 	    (entry->end & (vaddr_t)PAGE_MASK) == 0);
455 
456 	UVM_MAP_REQ_WRITE(map);
457 	res = RB_INSERT(uvm_map_addr, &map->addr, entry);
458 	if (res != NULL) {
459 		panic("uvm_mapent_addr_insert: map %p entry %p "
460 		    "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision "
461 		    "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)",
462 		    map, entry,
463 		    entry->start, entry->end, entry->guard, entry->fspace,
464 		    res, res->start, res->end, res->guard, res->fspace);
465 	}
466 }
467 
468 /*
469  * Handle address tree removal.
470  */
471 void
472 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)
473 {
474 	struct vm_map_entry *res;
475 
476 	UVM_MAP_REQ_WRITE(map);
477 	res = RB_REMOVE(uvm_map_addr, &map->addr, entry);
478 	if (res != entry)
479 		panic("uvm_mapent_addr_remove");
480 	RB_LEFT(entry, daddrs.addr_entry) = RB_RIGHT(entry, daddrs.addr_entry) =
481 	    RB_PARENT(entry, daddrs.addr_entry) = UVMMAP_DEADBEEF;
482 }
483 
484 /*
485  * uvm_map_reference: add reference to a map
486  *
487  * XXX check map reference counter lock
488  */
489 #define uvm_map_reference(_map)						\
490 	do {								\
491 		map->ref_count++;					\
492 	} while (0)
493 
494 /*
495  * Calculate the dused delta.
496  */
497 vsize_t
498 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max)
499 {
500 	struct vmspace *vm;
501 	vsize_t sz;
502 	vaddr_t lmax;
503 	vaddr_t stack_begin, stack_end; /* Position of stack. */
504 
505 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
506 	vm = (struct vmspace *)map;
507 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
508 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
509 
510 	sz = 0;
511 	while (min != max) {
512 		lmax = max;
513 		if (min < stack_begin && lmax > stack_begin)
514 			lmax = stack_begin;
515 		else if (min < stack_end && lmax > stack_end)
516 			lmax = stack_end;
517 
518 		if (min >= stack_begin && min < stack_end) {
519 			/* nothing */
520 		} else
521 			sz += lmax - min;
522 		min = lmax;
523 	}
524 
525 	return sz >> PAGE_SHIFT;
526 }
527 
528 /*
529  * Find the entry describing the given address.
530  */
531 struct vm_map_entry*
532 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr)
533 {
534 	struct vm_map_entry *iter;
535 
536 	iter = RB_ROOT(atree);
537 	while (iter != NULL) {
538 		if (iter->start > addr)
539 			iter = RB_LEFT(iter, daddrs.addr_entry);
540 		else if (VMMAP_FREE_END(iter) <= addr)
541 			iter = RB_RIGHT(iter, daddrs.addr_entry);
542 		else
543 			return iter;
544 	}
545 	return NULL;
546 }
547 
548 /*
549  * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry)
550  *
551  * Push dead entries into a linked list.
552  * Since the linked list abuses the address tree for storage, the entry
553  * may not be linked in a map.
554  *
555  * *head must be initialized to NULL before the first call to this macro.
556  * uvm_unmap_detach(*head, 0) will remove dead entries.
557  */
558 static __inline void
559 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry)
560 {
561 	TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq);
562 }
563 #define DEAD_ENTRY_PUSH(_headptr, _entry)				\
564 	dead_entry_push((_headptr), (_entry))
565 
566 /*
567  * Helper function for uvm_map_findspace_tree.
568  *
569  * Given allocation constraints and pmap constraints, finds the
570  * lowest and highest address in a range that can be used for the
571  * allocation.
572  *
573  * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs.
574  *
575  *
576  * Big chunk of math with a seasoning of dragons.
577  */
578 int
579 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg,
580     struct vm_map_entry *sel, vaddr_t align,
581     vaddr_t pmap_align, vaddr_t pmap_off, int bias)
582 {
583 	vaddr_t sel_min, sel_max;
584 #ifdef PMAP_PREFER
585 	vaddr_t pmap_min, pmap_max;
586 #endif /* PMAP_PREFER */
587 #ifdef DIAGNOSTIC
588 	int bad;
589 #endif /* DIAGNOSTIC */
590 
591 	sel_min = VMMAP_FREE_START(sel);
592 	sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0);
593 
594 #ifdef PMAP_PREFER
595 
596 	/*
597 	 * There are two special cases, in which we can satisfy the align
598 	 * requirement and the pmap_prefer requirement.
599 	 * - when pmap_off == 0, we always select the largest of the two
600 	 * - when pmap_off % align == 0 and pmap_align > align, we simply
601 	 *   satisfy the pmap_align requirement and automatically
602 	 *   satisfy the align requirement.
603 	 */
604 	if (align > PAGE_SIZE &&
605 	    !(pmap_align > align && (pmap_off & (align - 1)) == 0)) {
606 		/*
607 		 * Simple case: only use align.
608 		 */
609 		sel_min = roundup(sel_min, align);
610 		sel_max &= ~(align - 1);
611 
612 		if (sel_min > sel_max)
613 			return ENOMEM;
614 
615 		/* Correct for bias. */
616 		if (sel_max - sel_min > FSPACE_BIASGAP) {
617 			if (bias > 0) {
618 				sel_min = sel_max - FSPACE_BIASGAP;
619 				sel_min = roundup(sel_min, align);
620 			} else if (bias < 0) {
621 				sel_max = sel_min + FSPACE_BIASGAP;
622 				sel_max &= ~(align - 1);
623 			}
624 		}
625 	} else if (pmap_align != 0) {
626 		/*
627 		 * Special case: satisfy both pmap_prefer and
628 		 * align argument.
629 		 */
630 		pmap_max = sel_max & ~(pmap_align - 1);
631 		pmap_min = sel_min;
632 		if (pmap_max < sel_min)
633 			return ENOMEM;
634 
635 		/* Adjust pmap_min for BIASGAP for top-addr bias. */
636 		if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP)
637 			pmap_min = pmap_max - FSPACE_BIASGAP;
638 		/* Align pmap_min. */
639 		pmap_min &= ~(pmap_align - 1);
640 		if (pmap_min < sel_min)
641 			pmap_min += pmap_align;
642 		if (pmap_min > pmap_max)
643 			return ENOMEM;
644 
645 		/* Adjust pmap_max for BIASGAP for bottom-addr bias. */
646 		if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) {
647 			pmap_max = (pmap_min + FSPACE_BIASGAP) &
648 			    ~(pmap_align - 1);
649 		}
650 		if (pmap_min > pmap_max)
651 			return ENOMEM;
652 
653 		/* Apply pmap prefer offset. */
654 		pmap_max |= pmap_off;
655 		if (pmap_max > sel_max)
656 			pmap_max -= pmap_align;
657 		pmap_min |= pmap_off;
658 		if (pmap_min < sel_min)
659 			pmap_min += pmap_align;
660 
661 		/*
662 		 * Fixup: it's possible that pmap_min and pmap_max
663 		 * cross eachother. In this case, try to find one
664 		 * address that is allowed.
665 		 * (This usually happens in biased case.)
666 		 */
667 		if (pmap_min > pmap_max) {
668 			if (pmap_min < sel_max)
669 				pmap_max = pmap_min;
670 			else if (pmap_max > sel_min)
671 				pmap_min = pmap_max;
672 			else
673 				return ENOMEM;
674 		}
675 
676 		/* Internal validation. */
677 		KDASSERT(pmap_min <= pmap_max);
678 
679 		sel_min = pmap_min;
680 		sel_max = pmap_max;
681 	} else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
682 		sel_min = sel_max - FSPACE_BIASGAP;
683 	else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
684 		sel_max = sel_min + FSPACE_BIASGAP;
685 
686 #else
687 
688 	if (align > PAGE_SIZE) {
689 		sel_min = roundup(sel_min, align);
690 		sel_max &= ~(align - 1);
691 		if (sel_min > sel_max)
692 			return ENOMEM;
693 
694 		if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) {
695 			if (bias > 0) {
696 				sel_min = roundup(sel_max - FSPACE_BIASGAP,
697 				    align);
698 			} else {
699 				sel_max = (sel_min + FSPACE_BIASGAP) &
700 				    ~(align - 1);
701 			}
702 		}
703 	} else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
704 		sel_min = sel_max - FSPACE_BIASGAP;
705 	else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
706 		sel_max = sel_min + FSPACE_BIASGAP;
707 
708 #endif
709 
710 	if (sel_min > sel_max)
711 		return ENOMEM;
712 
713 #ifdef DIAGNOSTIC
714 	bad = 0;
715 	/* Lower boundary check. */
716 	if (sel_min < VMMAP_FREE_START(sel)) {
717 		printf("sel_min: 0x%lx, but should be at least 0x%lx\n",
718 		    sel_min, VMMAP_FREE_START(sel));
719 		bad++;
720 	}
721 	/* Upper boundary check. */
722 	if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) {
723 		printf("sel_max: 0x%lx, but should be at most 0x%lx\n",
724 		    sel_max,
725 		    VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0));
726 		bad++;
727 	}
728 	/* Lower boundary alignment. */
729 	if (align != 0 && (sel_min & (align - 1)) != 0) {
730 		printf("sel_min: 0x%lx, not aligned to 0x%lx\n",
731 		    sel_min, align);
732 		bad++;
733 	}
734 	/* Upper boundary alignment. */
735 	if (align != 0 && (sel_max & (align - 1)) != 0) {
736 		printf("sel_max: 0x%lx, not aligned to 0x%lx\n",
737 		    sel_max, align);
738 		bad++;
739 	}
740 	/* Lower boundary PMAP_PREFER check. */
741 	if (pmap_align != 0 && align == 0 &&
742 	    (sel_min & (pmap_align - 1)) != pmap_off) {
743 		printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
744 		    sel_min, sel_min & (pmap_align - 1), pmap_off);
745 		bad++;
746 	}
747 	/* Upper boundary PMAP_PREFER check. */
748 	if (pmap_align != 0 && align == 0 &&
749 	    (sel_max & (pmap_align - 1)) != pmap_off) {
750 		printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
751 		    sel_max, sel_max & (pmap_align - 1), pmap_off);
752 		bad++;
753 	}
754 
755 	if (bad) {
756 		panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, "
757 		    "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, "
758 		    "bias = %d, "
759 		    "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)",
760 		    sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off,
761 		    bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel));
762 	}
763 #endif /* DIAGNOSTIC */
764 
765 	*min = sel_min;
766 	*max = sel_max;
767 	return 0;
768 }
769 
770 /*
771  * Test if memory starting at addr with sz bytes is free.
772  *
773  * Fills in *start_ptr and *end_ptr to be the first and last entry describing
774  * the space.
775  * If called with prefilled *start_ptr and *end_ptr, they are to be correct.
776  */
777 int
778 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr,
779     struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr,
780     vaddr_t addr, vsize_t sz)
781 {
782 	struct uvm_addr_state *free;
783 	struct uvm_map_addr *atree;
784 	struct vm_map_entry *i, *i_end;
785 
786 	/*
787 	 * Kernel memory above uvm_maxkaddr is considered unavailable.
788 	 */
789 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
790 		if (addr + sz > uvm_maxkaddr)
791 			return 0;
792 	}
793 
794 	atree = &map->addr;
795 
796 	/*
797 	 * Fill in first, last, so they point at the entries containing the
798 	 * first and last address of the range.
799 	 * Note that if they are not NULL, we don't perform the lookup.
800 	 */
801 	KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL);
802 	if (*start_ptr == NULL) {
803 		*start_ptr = uvm_map_entrybyaddr(atree, addr);
804 		if (*start_ptr == NULL)
805 			return 0;
806 	} else
807 		KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr));
808 	if (*end_ptr == NULL) {
809 		if (VMMAP_FREE_END(*start_ptr) >= addr + sz)
810 			*end_ptr = *start_ptr;
811 		else {
812 			*end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1);
813 			if (*end_ptr == NULL)
814 				return 0;
815 		}
816 	} else
817 		KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1));
818 
819 	/* Validation. */
820 	KDASSERT(*start_ptr != NULL && *end_ptr != NULL);
821 	KDASSERT((*start_ptr)->start <= addr &&
822 	    VMMAP_FREE_END(*start_ptr) > addr &&
823 	    (*end_ptr)->start < addr + sz &&
824 	    VMMAP_FREE_END(*end_ptr) >= addr + sz);
825 
826 	/*
827 	 * Check the none of the entries intersects with <addr, addr+sz>.
828 	 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is
829 	 * considered unavailable unless called by those allocators.
830 	 */
831 	i = *start_ptr;
832 	i_end = RB_NEXT(uvm_map_addr, atree, *end_ptr);
833 	for (; i != i_end;
834 	    i = RB_NEXT(uvm_map_addr, atree, i)) {
835 		if (i->start != i->end && i->end > addr)
836 			return 0;
837 
838 		/*
839 		 * uaddr_exe and uaddr_brk_stack may only be used
840 		 * by these allocators and the NULL uaddr (i.e. no
841 		 * uaddr).
842 		 * Reject if this requirement is not met.
843 		 */
844 		if (uaddr != NULL) {
845 			free = uvm_map_uaddr_e(map, i);
846 
847 			if (uaddr != free && free != NULL &&
848 			    (free == map->uaddr_exe ||
849 			     free == map->uaddr_brk_stack))
850 				return 0;
851 		}
852 	}
853 
854 	return -1;
855 }
856 
857 /*
858  * Invoke each address selector until an address is found.
859  * Will not invoke uaddr_exe.
860  */
861 int
862 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first,
863     struct vm_map_entry**last, vaddr_t *addr, vsize_t sz,
864     vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint)
865 {
866 	struct uvm_addr_state *uaddr;
867 	int i;
868 
869 	/*
870 	 * Allocation for sz bytes at any address,
871 	 * using the addr selectors in order.
872 	 */
873 	for (i = 0; i < nitems(map->uaddr_any); i++) {
874 		uaddr = map->uaddr_any[i];
875 
876 		if (uvm_addr_invoke(map, uaddr, first, last,
877 		    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
878 			return 0;
879 	}
880 
881 	/* Fall back to brk() and stack() address selectors. */
882 	uaddr = map->uaddr_brk_stack;
883 	if (uvm_addr_invoke(map, uaddr, first, last,
884 	    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
885 		return 0;
886 
887 	return ENOMEM;
888 }
889 
890 /* Calculate entry augmentation value. */
891 vsize_t
892 uvm_map_addr_augment_get(struct vm_map_entry *entry)
893 {
894 	vsize_t			 augment;
895 	struct vm_map_entry	*left, *right;
896 
897 	augment = entry->fspace;
898 	if ((left = RB_LEFT(entry, daddrs.addr_entry)) != NULL)
899 		augment = MAX(augment, left->fspace_augment);
900 	if ((right = RB_RIGHT(entry, daddrs.addr_entry)) != NULL)
901 		augment = MAX(augment, right->fspace_augment);
902 	return augment;
903 }
904 
905 /*
906  * Update augmentation data in entry.
907  */
908 void
909 uvm_map_addr_augment(struct vm_map_entry *entry)
910 {
911 	vsize_t			 augment;
912 
913 	while (entry != NULL) {
914 		/* Calculate value for augmentation. */
915 		augment = uvm_map_addr_augment_get(entry);
916 
917 		/*
918 		 * Descend update.
919 		 * Once we find an entry that already has the correct value,
920 		 * stop, since it means all its parents will use the correct
921 		 * value too.
922 		 */
923 		if (entry->fspace_augment == augment)
924 			return;
925 		entry->fspace_augment = augment;
926 		entry = RB_PARENT(entry, daddrs.addr_entry);
927 	}
928 }
929 
930 /*
931  * uvm_map: establish a valid mapping in map
932  *
933  * => *addr and sz must be a multiple of PAGE_SIZE.
934  * => *addr is ignored, except if flags contains UVM_FLAG_FIXED.
935  * => map must be unlocked.
936  * => <uobj,uoffset> value meanings (4 cases):
937  *	[1] <NULL,uoffset>		== uoffset is a hint for PMAP_PREFER
938  *	[2] <NULL,UVM_UNKNOWN_OFFSET>	== don't PMAP_PREFER
939  *	[3] <uobj,uoffset>		== normal mapping
940  *	[4] <uobj,UVM_UNKNOWN_OFFSET>	== uvm_map finds offset based on VA
941  *
942  *   case [4] is for kernel mappings where we don't know the offset until
943  *   we've found a virtual address.   note that kernel object offsets are
944  *   always relative to vm_map_min(kernel_map).
945  *
946  * => align: align vaddr, must be a power-of-2.
947  *    Align is only a hint and will be ignored if the alignment fails.
948  */
949 int
950 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,
951     struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags)
952 {
953 	struct vm_map_entry	*first, *last, *entry, *new;
954 	struct uvm_map_deadq	 dead;
955 	vm_prot_t		 prot;
956 	vm_prot_t		 maxprot;
957 	vm_inherit_t		 inherit;
958 	int			 advice;
959 	int			 error;
960 	vaddr_t			 pmap_align, pmap_offset;
961 	vaddr_t			 hint;
962 
963 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
964 		splassert(IPL_NONE);
965 	else
966 		splassert(IPL_VM);
967 
968 	/*
969 	 * We use pmap_align and pmap_offset as alignment and offset variables.
970 	 *
971 	 * Because the align parameter takes precedence over pmap prefer,
972 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
973 	 * if pmap_prefer will not align.
974 	 */
975 	if (uoffset == UVM_UNKNOWN_OFFSET) {
976 		pmap_align = MAX(align, PAGE_SIZE);
977 		pmap_offset = 0;
978 	} else {
979 		pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE);
980 		pmap_offset = PMAP_PREFER_OFFSET(uoffset);
981 
982 		if (align == 0 ||
983 		    (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) {
984 			/* pmap_offset satisfies align, no change. */
985 		} else {
986 			/* Align takes precedence over pmap prefer. */
987 			pmap_align = align;
988 			pmap_offset = 0;
989 		}
990 	}
991 
992 	/* Decode parameters. */
993 	prot = UVM_PROTECTION(flags);
994 	maxprot = UVM_MAXPROTECTION(flags);
995 	advice = UVM_ADVICE(flags);
996 	inherit = UVM_INHERIT(flags);
997 	error = 0;
998 	hint = trunc_page(*addr);
999 	TAILQ_INIT(&dead);
1000 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
1001 	KASSERT((align & (align - 1)) == 0);
1002 
1003 	/* Holes are incompatible with other types of mappings. */
1004 	if (flags & UVM_FLAG_HOLE) {
1005 		KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) &&
1006 		    (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
1007 	}
1008 
1009 	/* Unset hint for kernel_map non-fixed allocations. */
1010 	if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED))
1011 		hint = 0;
1012 
1013 	/* Check protection. */
1014 	if ((prot & maxprot) != prot)
1015 		return EACCES;
1016 
1017 	/*
1018 	 * Before grabbing the lock, allocate a map entry for later
1019 	 * use to ensure we don't wait for memory while holding the
1020 	 * vm_map_lock.
1021 	 */
1022 	new = uvm_mapent_alloc(map, flags);
1023 	if (new == NULL)
1024 		return(ENOMEM);
1025 
1026 	if (flags & UVM_FLAG_TRYLOCK) {
1027 		if (vm_map_lock_try(map) == FALSE) {
1028 			error = EFAULT;
1029 			goto out;
1030 		}
1031 	} else
1032 		vm_map_lock(map);
1033 
1034 	first = last = NULL;
1035 	if (flags & UVM_FLAG_FIXED) {
1036 		/*
1037 		 * Fixed location.
1038 		 *
1039 		 * Note: we ignore align, pmap_prefer.
1040 		 * Fill in first, last and *addr.
1041 		 */
1042 		KASSERT((*addr & PAGE_MASK) == 0);
1043 
1044 		/*
1045 		 * Grow pmap to include allocated address.
1046 		 * If the growth fails, the allocation will fail too.
1047 		 */
1048 		if ((map->flags & VM_MAP_ISVMSPACE) == 0 &&
1049 		    uvm_maxkaddr < (*addr + sz)) {
1050 			uvm_map_kmem_grow(map, &dead,
1051 			    *addr + sz - uvm_maxkaddr, flags);
1052 		}
1053 
1054 		/* Check that the space is available. */
1055 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1056 			error = ENOMEM;
1057 			goto unlock;
1058 		}
1059 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1060 	    (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE &&
1061 	    (align == 0 || (*addr & (align - 1)) == 0) &&
1062 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1063 		/*
1064 		 * Address used as hint.
1065 		 *
1066 		 * Note: we enforce the alignment restriction,
1067 		 * but ignore pmap_prefer.
1068 		 */
1069 	} else if ((maxprot & VM_PROT_EXECUTE) != 0 &&
1070 	    map->uaddr_exe != NULL) {
1071 		/* Run selection algorithm for executables. */
1072 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1073 		    addr, sz, pmap_align, pmap_offset, prot, hint);
1074 
1075 		/* Grow kernel memory and try again. */
1076 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1077 			uvm_map_kmem_grow(map, &dead, sz, flags);
1078 
1079 			error = uvm_addr_invoke(map, map->uaddr_exe,
1080 			    &first, &last, addr, sz,
1081 			    pmap_align, pmap_offset, prot, hint);
1082 		}
1083 
1084 		if (error != 0)
1085 			goto unlock;
1086 	} else {
1087 		/* Update freelists from vmspace. */
1088 		if (map->flags & VM_MAP_ISVMSPACE)
1089 			uvm_map_vmspace_update(map, &dead, flags);
1090 
1091 		error = uvm_map_findspace(map, &first, &last, addr, sz,
1092 		    pmap_align, pmap_offset, prot, hint);
1093 
1094 		/* Grow kernel memory and try again. */
1095 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1096 			uvm_map_kmem_grow(map, &dead, sz, flags);
1097 
1098 			error = uvm_map_findspace(map, &first, &last, addr, sz,
1099 			    pmap_align, pmap_offset, prot, hint);
1100 		}
1101 
1102 		if (error != 0)
1103 			goto unlock;
1104 	}
1105 
1106 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE ||
1107 	    uvm_maxkaddr >= *addr + sz);
1108 
1109 	/* If we only want a query, return now. */
1110 	if (flags & UVM_FLAG_QUERY) {
1111 		error = 0;
1112 		goto unlock;
1113 	}
1114 
1115 	if (uobj == NULL)
1116 		uoffset = 0;
1117 	else if (uoffset == UVM_UNKNOWN_OFFSET) {
1118 		KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
1119 		uoffset = *addr - vm_map_min(kernel_map);
1120 	}
1121 
1122 	/*
1123 	 * Create new entry.
1124 	 * first and last may be invalidated after this call.
1125 	 */
1126 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1127 	    new);
1128 	if (entry == NULL) {
1129 		error = ENOMEM;
1130 		goto unlock;
1131 	}
1132 	new = NULL;
1133 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1134 	entry->object.uvm_obj = uobj;
1135 	entry->offset = uoffset;
1136 	entry->protection = prot;
1137 	entry->max_protection = maxprot;
1138 	entry->inheritance = inherit;
1139 	entry->wired_count = 0;
1140 	entry->advice = advice;
1141 	if (uobj)
1142 		entry->etype |= UVM_ET_OBJ;
1143 	else if (flags & UVM_FLAG_HOLE)
1144 		entry->etype |= UVM_ET_HOLE;
1145 	if (flags & UVM_FLAG_COPYONW) {
1146 		entry->etype |= UVM_ET_COPYONWRITE;
1147 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1148 			entry->etype |= UVM_ET_NEEDSCOPY;
1149 	}
1150 	if (flags & UVM_FLAG_OVERLAY) {
1151 		entry->aref.ar_pageoff = 0;
1152 		entry->aref.ar_amap = amap_alloc(sz,
1153 		    ptoa(flags & UVM_FLAG_AMAPPAD ? UVM_AMAP_CHUNK : 0),
1154 		    M_WAITOK);
1155 	}
1156 
1157 	/* Update map and process statistics. */
1158 	if (!(flags & UVM_FLAG_HOLE)) {
1159 		map->size += sz;
1160 		if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL) {
1161 			((struct vmspace *)map)->vm_dused +=
1162 			    uvmspace_dused(map, *addr, *addr + sz);
1163 		}
1164 	}
1165 
1166 	/*
1167 	 * Try to merge entry.
1168 	 *
1169 	 * Userland allocations are kept separated most of the time.
1170 	 * Forego the effort of merging what most of the time can't be merged
1171 	 * and only try the merge if it concerns a kernel entry.
1172 	 */
1173 	if ((flags & UVM_FLAG_NOMERGE) == 0 &&
1174 	    (map->flags & VM_MAP_ISVMSPACE) == 0)
1175 		uvm_mapent_tryjoin(map, entry, &dead);
1176 
1177 unlock:
1178 	vm_map_unlock(map);
1179 
1180 	/*
1181 	 * Remove dead entries.
1182 	 *
1183 	 * Dead entries may be the result of merging.
1184 	 * uvm_map_mkentry may also create dead entries, when it attempts to
1185 	 * destroy free-space entries.
1186 	 */
1187 	uvm_unmap_detach(&dead, 0);
1188 out:
1189 	if (new)
1190 		uvm_mapent_free(new);
1191 	return error;
1192 }
1193 
1194 /*
1195  * True iff e1 and e2 can be joined together.
1196  */
1197 int
1198 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1,
1199     struct vm_map_entry *e2)
1200 {
1201 	KDASSERT(e1 != NULL && e2 != NULL);
1202 
1203 	/* Must be the same entry type and not have free memory between. */
1204 	if (e1->etype != e2->etype || e1->end != e2->start)
1205 		return 0;
1206 
1207 	/* Submaps are never joined. */
1208 	if (UVM_ET_ISSUBMAP(e1))
1209 		return 0;
1210 
1211 	/* Never merge wired memory. */
1212 	if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2))
1213 		return 0;
1214 
1215 	/* Protection, inheritance and advice must be equal. */
1216 	if (e1->protection != e2->protection ||
1217 	    e1->max_protection != e2->max_protection ||
1218 	    e1->inheritance != e2->inheritance ||
1219 	    e1->advice != e2->advice)
1220 		return 0;
1221 
1222 	/* If uvm_object: object itself and offsets within object must match. */
1223 	if (UVM_ET_ISOBJ(e1)) {
1224 		if (e1->object.uvm_obj != e2->object.uvm_obj)
1225 			return 0;
1226 		if (e1->offset + (e1->end - e1->start) != e2->offset)
1227 			return 0;
1228 	}
1229 
1230 	/*
1231 	 * Cannot join shared amaps.
1232 	 * Note: no need to lock amap to look at refs, since we don't care
1233 	 * about its exact value.
1234 	 * If it is 1 (i.e. we have the only reference) it will stay there.
1235 	 */
1236 	if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1)
1237 		return 0;
1238 	if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1)
1239 		return 0;
1240 
1241 	/* Apprently, e1 and e2 match. */
1242 	return 1;
1243 }
1244 
1245 /*
1246  * Join support function.
1247  *
1248  * Returns the merged entry on succes.
1249  * Returns NULL if the merge failed.
1250  */
1251 struct vm_map_entry*
1252 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1,
1253     struct vm_map_entry *e2, struct uvm_map_deadq *dead)
1254 {
1255 	struct uvm_addr_state *free;
1256 
1257 	/*
1258 	 * Amap of e1 must be extended to include e2.
1259 	 * e2 contains no real information in its amap,
1260 	 * so it can be erased immediately.
1261 	 */
1262 	if (e1->aref.ar_amap) {
1263 		if (amap_extend(e1, e2->end - e2->start))
1264 			return NULL;
1265 	}
1266 
1267 	/*
1268 	 * Don't drop obj reference:
1269 	 * uvm_unmap_detach will do this for us.
1270 	 */
1271 	free = uvm_map_uaddr_e(map, e1);
1272 	uvm_mapent_free_remove(map, free, e1);
1273 
1274 	free = uvm_map_uaddr_e(map, e2);
1275 	uvm_mapent_free_remove(map, free, e2);
1276 	uvm_mapent_addr_remove(map, e2);
1277 	e1->end = e2->end;
1278 	e1->guard = e2->guard;
1279 	e1->fspace = e2->fspace;
1280 	uvm_mapent_free_insert(map, free, e1);
1281 
1282 	DEAD_ENTRY_PUSH(dead, e2);
1283 	return e1;
1284 }
1285 
1286 /*
1287  * Attempt forward and backward joining of entry.
1288  *
1289  * Returns entry after joins.
1290  * We are guaranteed that the amap of entry is either non-existant or
1291  * has never been used.
1292  */
1293 struct vm_map_entry*
1294 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry,
1295     struct uvm_map_deadq *dead)
1296 {
1297 	struct vm_map_entry *other;
1298 	struct vm_map_entry *merged;
1299 
1300 	/* Merge with previous entry. */
1301 	other = RB_PREV(uvm_map_addr, &map->addr, entry);
1302 	if (other && uvm_mapent_isjoinable(map, other, entry)) {
1303 		merged = uvm_mapent_merge(map, other, entry, dead);
1304 		if (merged)
1305 			entry = merged;
1306 	}
1307 
1308 	/*
1309 	 * Merge with next entry.
1310 	 *
1311 	 * Because amap can only extend forward and the next entry
1312 	 * probably contains sensible info, only perform forward merging
1313 	 * in the absence of an amap.
1314 	 */
1315 	other = RB_NEXT(uvm_map_addr, &map->addr, entry);
1316 	if (other && entry->aref.ar_amap == NULL &&
1317 	    other->aref.ar_amap == NULL &&
1318 	    uvm_mapent_isjoinable(map, entry, other)) {
1319 		merged = uvm_mapent_merge(map, entry, other, dead);
1320 		if (merged)
1321 			entry = merged;
1322 	}
1323 
1324 	return entry;
1325 }
1326 
1327 /*
1328  * Kill entries that are no longer in a map.
1329  */
1330 void
1331 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags)
1332 {
1333 	struct vm_map_entry *entry;
1334 	int waitok;
1335 
1336 	waitok = flags & UVM_PLA_WAITOK;
1337 	while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1338 		if (waitok)
1339 			uvm_pause();
1340 		/* Drop reference to amap, if we've got one. */
1341 		if (entry->aref.ar_amap)
1342 			amap_unref(entry->aref.ar_amap,
1343 			    entry->aref.ar_pageoff,
1344 			    atop(entry->end - entry->start),
1345 			    flags & AMAP_REFALL);
1346 
1347 		/* Drop reference to our backing object, if we've got one. */
1348 		if (UVM_ET_ISSUBMAP(entry)) {
1349 			/* ... unlikely to happen, but play it safe */
1350 			uvm_map_deallocate(entry->object.sub_map);
1351 		} else if (UVM_ET_ISOBJ(entry) &&
1352 		    entry->object.uvm_obj->pgops->pgo_detach) {
1353 			entry->object.uvm_obj->pgops->pgo_detach(
1354 			    entry->object.uvm_obj);
1355 		}
1356 
1357 		/* Step to next. */
1358 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1359 		uvm_mapent_free(entry);
1360 	}
1361 }
1362 
1363 /*
1364  * Create and insert new entry.
1365  *
1366  * Returned entry contains new addresses and is inserted properly in the tree.
1367  * first and last are (probably) no longer valid.
1368  */
1369 struct vm_map_entry*
1370 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first,
1371     struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags,
1372     struct uvm_map_deadq *dead, struct vm_map_entry *new)
1373 {
1374 	struct vm_map_entry *entry, *prev;
1375 	struct uvm_addr_state *free;
1376 	vaddr_t min, max;	/* free space boundaries for new entry */
1377 
1378 	KDASSERT(map != NULL);
1379 	KDASSERT(first != NULL);
1380 	KDASSERT(last != NULL);
1381 	KDASSERT(dead != NULL);
1382 	KDASSERT(sz > 0);
1383 	KDASSERT(addr + sz > addr);
1384 	KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr);
1385 	KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz);
1386 	KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz));
1387 	uvm_tree_sanity(map, __FILE__, __LINE__);
1388 
1389 	min = addr + sz;
1390 	max = VMMAP_FREE_END(last);
1391 
1392 	/* Initialize new entry. */
1393 	if (new == NULL)
1394 		entry = uvm_mapent_alloc(map, flags);
1395 	else
1396 		entry = new;
1397 	if (entry == NULL)
1398 		return NULL;
1399 	entry->offset = 0;
1400 	entry->etype = 0;
1401 	entry->wired_count = 0;
1402 	entry->aref.ar_pageoff = 0;
1403 	entry->aref.ar_amap = NULL;
1404 
1405 	entry->start = addr;
1406 	entry->end = min;
1407 	entry->guard = 0;
1408 	entry->fspace = 0;
1409 
1410 	/* Reset free space in first. */
1411 	free = uvm_map_uaddr_e(map, first);
1412 	uvm_mapent_free_remove(map, free, first);
1413 	first->guard = 0;
1414 	first->fspace = 0;
1415 
1416 	/*
1417 	 * Remove all entries that are fully replaced.
1418 	 * We are iterating using last in reverse order.
1419 	 */
1420 	for (; first != last; last = prev) {
1421 		prev = RB_PREV(uvm_map_addr, &map->addr, last);
1422 
1423 		KDASSERT(last->start == last->end);
1424 		free = uvm_map_uaddr_e(map, last);
1425 		uvm_mapent_free_remove(map, free, last);
1426 		uvm_mapent_addr_remove(map, last);
1427 		DEAD_ENTRY_PUSH(dead, last);
1428 	}
1429 	/* Remove first if it is entirely inside <addr, addr+sz>.  */
1430 	if (first->start == addr) {
1431 		uvm_mapent_addr_remove(map, first);
1432 		DEAD_ENTRY_PUSH(dead, first);
1433 	} else {
1434 		uvm_map_fix_space(map, first, VMMAP_FREE_START(first),
1435 		    addr, flags);
1436 	}
1437 
1438 	/* Finally, link in entry. */
1439 	uvm_mapent_addr_insert(map, entry);
1440 	uvm_map_fix_space(map, entry, min, max, flags);
1441 
1442 	uvm_tree_sanity(map, __FILE__, __LINE__);
1443 	return entry;
1444 }
1445 
1446 /*
1447  * uvm_mapent_alloc: allocate a map entry
1448  */
1449 struct vm_map_entry *
1450 uvm_mapent_alloc(struct vm_map *map, int flags)
1451 {
1452 	struct vm_map_entry *me, *ne;
1453 	int s, i;
1454 	int pool_flags;
1455 
1456 	pool_flags = PR_WAITOK;
1457 	if (flags & UVM_FLAG_TRYLOCK)
1458 		pool_flags = PR_NOWAIT;
1459 
1460 	if (map->flags & VM_MAP_INTRSAFE || cold) {
1461 		s = splvm();
1462 		me = uvm.kentry_free;
1463 		if (me == NULL) {
1464 			ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
1465 			    &kd_nowait);
1466 			if (ne == NULL)
1467 				panic("uvm_mapent_alloc: cannot allocate map "
1468 				    "entry");
1469 			for (i = 0;
1470 			    i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1;
1471 			    i++)
1472 				RB_LEFT(&ne[i], daddrs.addr_entry) = &ne[i + 1];
1473 			RB_LEFT(&ne[i], daddrs.addr_entry) = NULL;
1474 			me = ne;
1475 			if (ratecheck(&uvm_kmapent_last_warn_time,
1476 			    &uvm_kmapent_warn_rate))
1477 				printf("uvm_mapent_alloc: out of static "
1478 				    "map entries\n");
1479 		}
1480 		uvm.kentry_free = RB_LEFT(me, daddrs.addr_entry);
1481 		uvmexp.kmapent++;
1482 		splx(s);
1483 		me->flags = UVM_MAP_STATIC;
1484 	} else if (map == kernel_map) {
1485 		splassert(IPL_NONE);
1486 		me = pool_get(&uvm_map_entry_kmem_pool, pool_flags);
1487 		if (me == NULL)
1488 			goto out;
1489 		me->flags = UVM_MAP_KMEM;
1490 	} else {
1491 		splassert(IPL_NONE);
1492 		me = pool_get(&uvm_map_entry_pool, pool_flags);
1493 		if (me == NULL)
1494 			goto out;
1495 		me->flags = 0;
1496 	}
1497 
1498 	if (me != NULL) {
1499 		RB_LEFT(me, daddrs.addr_entry) =
1500 		    RB_RIGHT(me, daddrs.addr_entry) =
1501 		    RB_PARENT(me, daddrs.addr_entry) = UVMMAP_DEADBEEF;
1502 	}
1503 
1504 out:
1505 	return(me);
1506 }
1507 
1508 /*
1509  * uvm_mapent_free: free map entry
1510  *
1511  * => XXX: static pool for kernel map?
1512  */
1513 void
1514 uvm_mapent_free(struct vm_map_entry *me)
1515 {
1516 	int s;
1517 
1518 	if (me->flags & UVM_MAP_STATIC) {
1519 		s = splvm();
1520 		RB_LEFT(me, daddrs.addr_entry) = uvm.kentry_free;
1521 		uvm.kentry_free = me;
1522 		uvmexp.kmapent--;
1523 		splx(s);
1524 	} else if (me->flags & UVM_MAP_KMEM) {
1525 		splassert(IPL_NONE);
1526 		pool_put(&uvm_map_entry_kmem_pool, me);
1527 	} else {
1528 		splassert(IPL_NONE);
1529 		pool_put(&uvm_map_entry_pool, me);
1530 	}
1531 }
1532 
1533 /*
1534  * uvm_map_lookup_entry: find map entry at or before an address.
1535  *
1536  * => map must at least be read-locked by caller
1537  * => entry is returned in "entry"
1538  * => return value is true if address is in the returned entry
1539  * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is
1540  * returned for those mappings.
1541  */
1542 boolean_t
1543 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
1544     struct vm_map_entry **entry)
1545 {
1546 	*entry = uvm_map_entrybyaddr(&map->addr, address);
1547 	return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&
1548 	    (*entry)->start <= address && (*entry)->end > address;
1549 }
1550 
1551 /*
1552  * uvm_map_pie: return a random load address for a PIE executable
1553  * properly aligned.
1554  */
1555 #ifndef VM_PIE_MAX_ADDR
1556 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
1557 #endif
1558 
1559 #ifndef VM_PIE_MIN_ADDR
1560 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
1561 #endif
1562 
1563 #ifndef VM_PIE_MIN_ALIGN
1564 #define VM_PIE_MIN_ALIGN PAGE_SIZE
1565 #endif
1566 
1567 vaddr_t
1568 uvm_map_pie(vaddr_t align)
1569 {
1570 	vaddr_t addr, space, min;
1571 
1572 	align = MAX(align, VM_PIE_MIN_ALIGN);
1573 
1574 	/* round up to next alignment */
1575 	min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
1576 
1577 	if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
1578 		return (align);
1579 
1580 	space = (VM_PIE_MAX_ADDR - min) / align;
1581 	space = MIN(space, (u_int32_t)-1);
1582 
1583 	addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
1584 	addr += min;
1585 
1586 	return (addr);
1587 }
1588 
1589 void
1590 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end)
1591 {
1592 	struct uvm_map_deadq dead;
1593 
1594 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0 &&
1595 	    (end & (vaddr_t)PAGE_MASK) == 0);
1596 	TAILQ_INIT(&dead);
1597 	vm_map_lock(map);
1598 	uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE);
1599 	vm_map_unlock(map);
1600 
1601 	uvm_unmap_detach(&dead, 0);
1602 }
1603 
1604 /*
1605  * Mark entry as free.
1606  *
1607  * entry will be put on the dead list.
1608  * The free space will be merged into the previous or a new entry,
1609  * unless markfree is false.
1610  */
1611 void
1612 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry,
1613     struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead,
1614     boolean_t markfree)
1615 {
1616 	struct uvm_addr_state	*free;
1617 	struct vm_map_entry	*prev;
1618 	vaddr_t			 addr;	/* Start of freed range. */
1619 	vaddr_t			 end;	/* End of freed range. */
1620 
1621 	prev = *prev_ptr;
1622 	if (prev == entry)
1623 		*prev_ptr = prev = NULL;
1624 
1625 	if (prev == NULL ||
1626 	    VMMAP_FREE_END(prev) != entry->start)
1627 		prev = RB_PREV(uvm_map_addr, &map->addr, entry);
1628 
1629 	/* Entry is describing only free memory and has nothing to drain into. */
1630 	if (prev == NULL && entry->start == entry->end && markfree) {
1631 		*prev_ptr = entry;
1632 		return;
1633 	}
1634 
1635 	addr = entry->start;
1636 	end = VMMAP_FREE_END(entry);
1637 	free = uvm_map_uaddr_e(map, entry);
1638 	uvm_mapent_free_remove(map, free, entry);
1639 	uvm_mapent_addr_remove(map, entry);
1640 	DEAD_ENTRY_PUSH(dead, entry);
1641 
1642 	if (markfree) {
1643 		if (prev) {
1644 			free = uvm_map_uaddr_e(map, prev);
1645 			uvm_mapent_free_remove(map, free, prev);
1646 		}
1647 		*prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0);
1648 	}
1649 }
1650 
1651 /*
1652  * Unwire and release referenced amap and object from map entry.
1653  */
1654 void
1655 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
1656 {
1657 	/* Unwire removed map entry. */
1658 	if (VM_MAPENT_ISWIRED(entry)) {
1659 		entry->wired_count = 0;
1660 		uvm_fault_unwire_locked(map, entry->start, entry->end);
1661 	}
1662 
1663 	/* Entry-type specific code. */
1664 	if (UVM_ET_ISHOLE(entry)) {
1665 		/* Nothing to be done for holes. */
1666 	} else if (map->flags & VM_MAP_INTRSAFE) {
1667 		KASSERT(vm_map_pmap(map) == pmap_kernel());
1668 		uvm_km_pgremove_intrsafe(entry->start, entry->end);
1669 		pmap_kremove(entry->start, entry->end - entry->start);
1670 	} else if (UVM_ET_ISOBJ(entry) &&
1671 	    UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
1672 		KASSERT(vm_map_pmap(map) == pmap_kernel());
1673 		/*
1674 		 * Note: kernel object mappings are currently used in
1675 		 * two ways:
1676 		 *  [1] "normal" mappings of pages in the kernel object
1677 		 *  [2] uvm_km_valloc'd allocations in which we
1678 		 *      pmap_enter in some non-kernel-object page
1679 		 *      (e.g. vmapbuf).
1680 		 *
1681 		 * for case [1], we need to remove the mapping from
1682 		 * the pmap and then remove the page from the kernel
1683 		 * object (because, once pages in a kernel object are
1684 		 * unmapped they are no longer needed, unlike, say,
1685 		 * a vnode where you might want the data to persist
1686 		 * until flushed out of a queue).
1687 		 *
1688 		 * for case [2], we need to remove the mapping from
1689 		 * the pmap.  there shouldn't be any pages at the
1690 		 * specified offset in the kernel object [but it
1691 		 * doesn't hurt to call uvm_km_pgremove just to be
1692 		 * safe?]
1693 		 *
1694 		 * uvm_km_pgremove currently does the following:
1695 		 *   for pages in the kernel object range:
1696 		 *     - drops the swap slot
1697 		 *     - uvm_pagefree the page
1698 		 *
1699 		 * note there is version of uvm_km_pgremove() that
1700 		 * is used for "intrsafe" objects.
1701 		 */
1702 		/*
1703 		 * remove mappings from pmap and drop the pages
1704 		 * from the object.  offsets are always relative
1705 		 * to vm_map_min(kernel_map).
1706 		 */
1707 		pmap_remove(pmap_kernel(), entry->start, entry->end);
1708 		uvm_km_pgremove(entry->object.uvm_obj,
1709 		    entry->start - vm_map_min(kernel_map),
1710 		    entry->end - vm_map_min(kernel_map));
1711 
1712 		/*
1713 		 * null out kernel_object reference, we've just
1714 		 * dropped it
1715 		 */
1716 		entry->etype &= ~UVM_ET_OBJ;
1717 		entry->object.uvm_obj = NULL;  /* to be safe */
1718 	} else {
1719 		/* remove mappings the standard way. */
1720 		pmap_remove(map->pmap, entry->start, entry->end);
1721 	}
1722 }
1723 
1724 /*
1725  * Remove all entries from start to end.
1726  *
1727  * If remove_holes, then remove ET_HOLE entries as well.
1728  * If markfree, entry will be properly marked free, otherwise, no replacement
1729  * entry will be put in the tree (corrupting the tree).
1730  */
1731 void
1732 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
1733     struct uvm_map_deadq *dead, boolean_t remove_holes,
1734     boolean_t markfree)
1735 {
1736 	struct vm_map_entry *prev_hint, *next, *entry;
1737 
1738 	start = MAX(start, map->min_offset);
1739 	end = MIN(end, map->max_offset);
1740 	if (start >= end)
1741 		return;
1742 
1743 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
1744 		splassert(IPL_NONE);
1745 	else
1746 		splassert(IPL_VM);
1747 
1748 	/* Find first affected entry. */
1749 	entry = uvm_map_entrybyaddr(&map->addr, start);
1750 	KDASSERT(entry != NULL && entry->start <= start);
1751 	if (entry->end <= start && markfree)
1752 		entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
1753 	else
1754 		UVM_MAP_CLIP_START(map, entry, start);
1755 
1756 	/*
1757 	 * Iterate entries until we reach end address.
1758 	 * prev_hint hints where the freed space can be appended to.
1759 	 */
1760 	prev_hint = NULL;
1761 	for (; entry != NULL && entry->start < end; entry = next) {
1762 		KDASSERT(entry->start >= start);
1763 		if (entry->end > end || !markfree)
1764 			UVM_MAP_CLIP_END(map, entry, end);
1765 		KDASSERT(entry->start >= start && entry->end <= end);
1766 		next = RB_NEXT(uvm_map_addr, &map->addr, entry);
1767 
1768 		/* Don't remove holes unless asked to do so. */
1769 		if (UVM_ET_ISHOLE(entry)) {
1770 			if (!remove_holes) {
1771 				prev_hint = entry;
1772 				continue;
1773 			}
1774 		}
1775 
1776 		/* Kill entry. */
1777 		uvm_unmap_kill_entry(map, entry);
1778 
1779 		/* Update space usage. */
1780 		if ((map->flags & VM_MAP_ISVMSPACE) &&
1781 		    entry->object.uvm_obj == NULL &&
1782 		    !UVM_ET_ISHOLE(entry)) {
1783 			((struct vmspace *)map)->vm_dused -=
1784 			    uvmspace_dused(map, entry->start, entry->end);
1785 		}
1786 		if (!UVM_ET_ISHOLE(entry))
1787 			map->size -= entry->end - entry->start;
1788 
1789 		/* Actual removal of entry. */
1790 		uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree);
1791 	}
1792 
1793 	pmap_update(vm_map_pmap(map));
1794 
1795 #ifdef VMMAP_DEBUG
1796 	if (markfree) {
1797 		for (entry = uvm_map_entrybyaddr(&map->addr, start);
1798 		    entry != NULL && entry->start < end;
1799 		    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
1800 			KDASSERT(entry->end <= start ||
1801 			    entry->start == entry->end ||
1802 			    UVM_ET_ISHOLE(entry));
1803 		}
1804 	} else {
1805 		vaddr_t a;
1806 		for (a = start; a < end; a += PAGE_SIZE)
1807 			KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL);
1808 	}
1809 #endif
1810 }
1811 
1812 /*
1813  * Mark all entries from first until end (exclusive) as pageable.
1814  *
1815  * Lock must be exclusive on entry and will not be touched.
1816  */
1817 void
1818 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first,
1819     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr)
1820 {
1821 	struct vm_map_entry *iter;
1822 
1823 	for (iter = first; iter != end;
1824 	    iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
1825 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
1826 		if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
1827 			continue;
1828 
1829 		iter->wired_count = 0;
1830 		uvm_fault_unwire_locked(map, iter->start, iter->end);
1831 	}
1832 }
1833 
1834 /*
1835  * Mark all entries from first until end (exclusive) as wired.
1836  *
1837  * Lockflags determines the lock state on return from this function.
1838  * Lock must be exclusive on entry.
1839  */
1840 int
1841 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first,
1842     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr,
1843     int lockflags)
1844 {
1845 	struct vm_map_entry *iter;
1846 #ifdef DIAGNOSTIC
1847 	unsigned int timestamp_save;
1848 #endif
1849 	int error;
1850 
1851 	/*
1852 	 * Wire pages in two passes:
1853 	 *
1854 	 * 1: holding the write lock, we create any anonymous maps that need
1855 	 *    to be created.  then we clip each map entry to the region to
1856 	 *    be wired and increment its wiring count.
1857 	 *
1858 	 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
1859 	 *    in the pages for any newly wired area (wired_count == 1).
1860 	 *
1861 	 *    downgrading to a read lock for uvm_fault_wire avoids a possible
1862 	 *    deadlock with another thread that may have faulted on one of
1863 	 *    the pages to be wired (it would mark the page busy, blocking
1864 	 *    us, then in turn block on the map lock that we hold).
1865 	 *    because we keep the read lock on the map, the copy-on-write
1866 	 *    status of the entries we modify here cannot change.
1867 	 */
1868 	for (iter = first; iter != end;
1869 	    iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
1870 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
1871 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
1872 		    iter->protection == VM_PROT_NONE)
1873 			continue;
1874 
1875 		/*
1876 		 * Perform actions of vm_map_lookup that need the write lock.
1877 		 * - create an anonymous map for copy-on-write
1878 		 * - anonymous map for zero-fill
1879 		 * Skip submaps.
1880 		 */
1881 		if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) &&
1882 		    UVM_ET_ISNEEDSCOPY(iter) &&
1883 		    ((iter->protection & VM_PROT_WRITE) ||
1884 		    iter->object.uvm_obj == NULL)) {
1885 			amap_copy(map, iter, M_WAITOK, TRUE,
1886 			    iter->start, iter->end);
1887 		}
1888 		iter->wired_count++;
1889 	}
1890 
1891 	/*
1892 	 * Pass 2.
1893 	 */
1894 #ifdef DIAGNOSTIC
1895 	timestamp_save = map->timestamp;
1896 #endif
1897 	vm_map_busy(map);
1898 	vm_map_downgrade(map);
1899 
1900 	error = 0;
1901 	for (iter = first; error == 0 && iter != end;
1902 	    iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
1903 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
1904 		    iter->protection == VM_PROT_NONE)
1905 			continue;
1906 
1907 		error = uvm_fault_wire(map, iter->start, iter->end,
1908 		    iter->protection);
1909 	}
1910 
1911 	if (error) {
1912 		/*
1913 		 * uvm_fault_wire failure
1914 		 *
1915 		 * Reacquire lock and undo our work.
1916 		 */
1917 		vm_map_upgrade(map);
1918 		vm_map_unbusy(map);
1919 #ifdef DIAGNOSTIC
1920 		if (timestamp_save != map->timestamp)
1921 			panic("uvm_map_pageable_wire: stale map");
1922 #endif
1923 
1924 		/*
1925 		 * first is no longer needed to restart loops.
1926 		 * Use it as iterator to unmap successful mappings.
1927 		 */
1928 		for (; first != iter;
1929 		    first = RB_NEXT(uvm_map_addr, &map->addr, first)) {
1930 			if (UVM_ET_ISHOLE(first) ||
1931 			    first->start == first->end ||
1932 			    first->protection == VM_PROT_NONE)
1933 				continue;
1934 
1935 			first->wired_count--;
1936 			if (!VM_MAPENT_ISWIRED(first)) {
1937 				uvm_fault_unwire_locked(map,
1938 				    iter->start, iter->end);
1939 			}
1940 		}
1941 
1942 		/* decrease counter in the rest of the entries */
1943 		for (; iter != end;
1944 		    iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
1945 			if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
1946 			    iter->protection == VM_PROT_NONE)
1947 				continue;
1948 
1949 			iter->wired_count--;
1950 		}
1951 
1952 		if ((lockflags & UVM_LK_EXIT) == 0)
1953 			vm_map_unlock(map);
1954 		return error;
1955 	}
1956 
1957 	/* We are currently holding a read lock. */
1958 	if ((lockflags & UVM_LK_EXIT) == 0) {
1959 		vm_map_unbusy(map);
1960 		vm_map_unlock_read(map);
1961 	} else {
1962 		vm_map_upgrade(map);
1963 		vm_map_unbusy(map);
1964 #ifdef DIAGNOSTIC
1965 		if (timestamp_save != map->timestamp)
1966 			panic("uvm_map_pageable_wire: stale map");
1967 #endif
1968 	}
1969 	return 0;
1970 }
1971 
1972 /*
1973  * uvm_map_pageable: set pageability of a range in a map.
1974  *
1975  * Flags:
1976  * UVM_LK_ENTER: map is already locked by caller
1977  * UVM_LK_EXIT:  don't unlock map on exit
1978  *
1979  * The full range must be in use (entries may not have fspace != 0).
1980  * UVM_ET_HOLE counts as unmapped.
1981  */
1982 int
1983 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
1984     boolean_t new_pageable, int lockflags)
1985 {
1986 	struct vm_map_entry *first, *last, *tmp;
1987 	int error;
1988 
1989 	start = trunc_page(start);
1990 	end = round_page(end);
1991 
1992 	if (start > end)
1993 		return EINVAL;
1994 	if (start == end)
1995 		return 0;	/* nothing to do */
1996 	if (start < map->min_offset)
1997 		return EFAULT; /* why? see first XXX below */
1998 	if (end > map->max_offset)
1999 		return EINVAL; /* why? see second XXX below */
2000 
2001 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2002 	if ((lockflags & UVM_LK_ENTER) == 0)
2003 		vm_map_lock(map);
2004 
2005 	/*
2006 	 * Find first entry.
2007 	 *
2008 	 * Initial test on start is different, because of the different
2009 	 * error returned. Rest is tested further down.
2010 	 */
2011 	first = uvm_map_entrybyaddr(&map->addr, start);
2012 	if (first->end <= start || UVM_ET_ISHOLE(first)) {
2013 		/*
2014 		 * XXX if the first address is not mapped, it is EFAULT?
2015 		 */
2016 		error = EFAULT;
2017 		goto out;
2018 	}
2019 
2020 	/* Check that the range has no holes. */
2021 	for (last = first; last != NULL && last->start < end;
2022 	    last = RB_NEXT(uvm_map_addr, &map->addr, last)) {
2023 		if (UVM_ET_ISHOLE(last) ||
2024 		    (last->end < end && VMMAP_FREE_END(last) != last->end)) {
2025 			/*
2026 			 * XXX unmapped memory in range, why is it EINVAL
2027 			 * instead of EFAULT?
2028 			 */
2029 			error = EINVAL;
2030 			goto out;
2031 		}
2032 	}
2033 
2034 	/*
2035 	 * Last ended at the first entry after the range.
2036 	 * Move back one step.
2037 	 *
2038 	 * Note that last may be NULL.
2039 	 */
2040 	if (last == NULL) {
2041 		last = RB_MAX(uvm_map_addr, &map->addr);
2042 		if (last->end < end) {
2043 			error = EINVAL;
2044 			goto out;
2045 		}
2046 	} else {
2047 		KASSERT(last != first);
2048 		last = RB_PREV(uvm_map_addr, &map->addr, last);
2049 	}
2050 
2051 	/* Wire/unwire pages here. */
2052 	if (new_pageable) {
2053 		/*
2054 		 * Mark pageable.
2055 		 * entries that are not wired are untouched.
2056 		 */
2057 		if (VM_MAPENT_ISWIRED(first))
2058 			UVM_MAP_CLIP_START(map, first, start);
2059 		/*
2060 		 * Split last at end.
2061 		 * Make tmp be the first entry after what is to be touched.
2062 		 * If last is not wired, don't touch it.
2063 		 */
2064 		if (VM_MAPENT_ISWIRED(last)) {
2065 			UVM_MAP_CLIP_END(map, last, end);
2066 			tmp = RB_NEXT(uvm_map_addr, &map->addr, last);
2067 		} else
2068 			tmp = last;
2069 
2070 		uvm_map_pageable_pgon(map, first, tmp, start, end);
2071 		error = 0;
2072 
2073 out:
2074 		if ((lockflags & UVM_LK_EXIT) == 0)
2075 			vm_map_unlock(map);
2076 		return error;
2077 	} else {
2078 		/*
2079 		 * Mark entries wired.
2080 		 * entries are always touched (because recovery needs this).
2081 		 */
2082 		if (!VM_MAPENT_ISWIRED(first))
2083 			UVM_MAP_CLIP_START(map, first, start);
2084 		/*
2085 		 * Split last at end.
2086 		 * Make tmp be the first entry after what is to be touched.
2087 		 * If last is not wired, don't touch it.
2088 		 */
2089 		if (!VM_MAPENT_ISWIRED(last)) {
2090 			UVM_MAP_CLIP_END(map, last, end);
2091 			tmp = RB_NEXT(uvm_map_addr, &map->addr, last);
2092 		} else
2093 			tmp = last;
2094 
2095 		return uvm_map_pageable_wire(map, first, tmp, start, end,
2096 		    lockflags);
2097 	}
2098 }
2099 
2100 /*
2101  * uvm_map_pageable_all: special case of uvm_map_pageable - affects
2102  * all mapped regions.
2103  *
2104  * Map must not be locked.
2105  * If no flags are specified, all ragions are unwired.
2106  */
2107 int
2108 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
2109 {
2110 	vsize_t size;
2111 	struct vm_map_entry *iter;
2112 
2113 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2114 	vm_map_lock(map);
2115 
2116 	if (flags == 0) {
2117 		uvm_map_pageable_pgon(map, RB_MIN(uvm_map_addr, &map->addr),
2118 		    NULL, map->min_offset, map->max_offset);
2119 
2120 		atomic_clearbits_int(&map->flags, VM_MAP_WIREFUTURE);
2121 		vm_map_unlock(map);
2122 		return 0;
2123 	}
2124 
2125 	if (flags & MCL_FUTURE)
2126 		atomic_setbits_int(&map->flags, VM_MAP_WIREFUTURE);
2127 	if (!(flags & MCL_CURRENT)) {
2128 		vm_map_unlock(map);
2129 		return 0;
2130 	}
2131 
2132 	/*
2133 	 * Count number of pages in all non-wired entries.
2134 	 * If the number exceeds the limit, abort.
2135 	 */
2136 	size = 0;
2137 	RB_FOREACH(iter, uvm_map_addr, &map->addr) {
2138 		if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2139 			continue;
2140 
2141 		size += iter->end - iter->start;
2142 	}
2143 
2144 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
2145 		vm_map_unlock(map);
2146 		return ENOMEM;
2147 	}
2148 
2149 	/* XXX non-pmap_wired_count case must be handled by caller */
2150 #ifdef pmap_wired_count
2151 	if (limit != 0 &&
2152 	    size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) {
2153 		vm_map_unlock(map);
2154 		return ENOMEM;
2155 	}
2156 #endif
2157 
2158 	/*
2159 	 * uvm_map_pageable_wire will release lcok
2160 	 */
2161 	return uvm_map_pageable_wire(map, RB_MIN(uvm_map_addr, &map->addr),
2162 	    NULL, map->min_offset, map->max_offset, 0);
2163 }
2164 
2165 /*
2166  * Initialize map.
2167  *
2168  * Allocates sufficient entries to describe the free memory in the map.
2169  */
2170 void
2171 uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags)
2172 {
2173 	int i;
2174 
2175 	KASSERT((min & (vaddr_t)PAGE_MASK) == 0);
2176 	KASSERT((max & (vaddr_t)PAGE_MASK) == 0 ||
2177 	    (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
2178 
2179 	/*
2180 	 * Update parameters.
2181 	 *
2182 	 * This code handles (vaddr_t)-1 and other page mask ending addresses
2183 	 * properly.
2184 	 * We lose the top page if the full virtual address space is used.
2185 	 */
2186 	if (max & (vaddr_t)PAGE_MASK) {
2187 		max += 1;
2188 		if (max == 0) /* overflow */
2189 			max -= PAGE_SIZE;
2190 	}
2191 
2192 	RB_INIT(&map->addr);
2193 	map->uaddr_exe = NULL;
2194 	for (i = 0; i < nitems(map->uaddr_any); ++i)
2195 		map->uaddr_any[i] = NULL;
2196 	map->uaddr_brk_stack = NULL;
2197 
2198 	map->size = 0;
2199 	map->ref_count = 1;
2200 	map->min_offset = min;
2201 	map->max_offset = max;
2202 	map->b_start = map->b_end = 0; /* Empty brk() area by default. */
2203 	map->s_start = map->s_end = 0; /* Empty stack area by default. */
2204 	map->flags = flags;
2205 	map->timestamp = 0;
2206 	rw_init(&map->lock, "vmmaplk");
2207 
2208 	/* Configure the allocators. */
2209 	if (flags & VM_MAP_ISVMSPACE)
2210 		uvm_map_setup_md(map);
2211 	else
2212 		map->uaddr_any[3] = &uaddr_kbootstrap;
2213 
2214 	/*
2215 	 * Fill map entries.
2216 	 * This requires a write-locked map (because of diagnostic assertions
2217 	 * in insert code).
2218 	 */
2219 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
2220 		if (rw_enter(&map->lock, RW_NOSLEEP|RW_WRITE) != 0)
2221 			panic("uvm_map_setup: rw_enter failed on new map");
2222 	}
2223 	uvm_map_setup_entries(map);
2224 	uvm_tree_sanity(map, __FILE__, __LINE__);
2225 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
2226 		rw_exit(&map->lock);
2227 }
2228 
2229 /*
2230  * Destroy the map.
2231  *
2232  * This is the inverse operation to uvm_map_setup.
2233  */
2234 void
2235 uvm_map_teardown(struct vm_map *map)
2236 {
2237 	struct uvm_map_deadq	 dead_entries;
2238 	int			 i, waitok = 0;
2239 	struct vm_map_entry	*entry, *tmp;
2240 #ifdef VMMAP_DEBUG
2241 	size_t			 numq, numt;
2242 #endif
2243 
2244 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
2245 		waitok = 1;
2246 	if (waitok) {
2247 		if (rw_enter(&map->lock, RW_NOSLEEP | RW_WRITE) != 0)
2248 			panic("uvm_map_teardown: rw_enter failed on free map");
2249 	}
2250 
2251 	/* Remove address selectors. */
2252 	uvm_addr_destroy(map->uaddr_exe);
2253 	map->uaddr_exe = NULL;
2254 	for (i = 0; i < nitems(map->uaddr_any); i++) {
2255 		uvm_addr_destroy(map->uaddr_any[i]);
2256 		map->uaddr_any[i] = NULL;
2257 	}
2258 	uvm_addr_destroy(map->uaddr_brk_stack);
2259 	map->uaddr_brk_stack = NULL;
2260 
2261 	/*
2262 	 * Remove entries.
2263 	 *
2264 	 * The following is based on graph breadth-first search.
2265 	 *
2266 	 * In color terms:
2267 	 * - the dead_entries set contains all nodes that are reachable
2268 	 *   (i.e. both the black and the grey nodes)
2269 	 * - any entry not in dead_entries is white
2270 	 * - any entry that appears in dead_entries before entry,
2271 	 *   is black, the rest is grey.
2272 	 * The set [entry, end] is also referred to as the wavefront.
2273 	 *
2274 	 * Since the tree is always a fully connected graph, the breadth-first
2275 	 * search guarantees that each vmmap_entry is visited exactly once.
2276 	 * The vm_map is broken down in linear time.
2277 	 */
2278 	TAILQ_INIT(&dead_entries);
2279 	if ((entry = RB_ROOT(&map->addr)) != NULL)
2280 		DEAD_ENTRY_PUSH(&dead_entries, entry);
2281 	while (entry != NULL) {
2282 		if (waitok)
2283 			uvm_pause();
2284 		uvm_unmap_kill_entry(map, entry);
2285 		if ((tmp = RB_LEFT(entry, daddrs.addr_entry)) != NULL)
2286 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2287 		if ((tmp = RB_RIGHT(entry, daddrs.addr_entry)) != NULL)
2288 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2289 		/* Update wave-front. */
2290 		entry = TAILQ_NEXT(entry, dfree.deadq);
2291 	}
2292 
2293 	if (waitok)
2294 		rw_exit(&map->lock);
2295 
2296 #ifdef VMMAP_DEBUG
2297 	numt = numq = 0;
2298 	RB_FOREACH(entry, uvm_map_addr, &map->addr)
2299 		numt++;
2300 	TAILQ_FOREACH(entry, &dead_entries, dfree.deadq)
2301 		numq++;
2302 	KASSERT(numt == numq);
2303 #endif
2304 	uvm_unmap_detach(&dead_entries, waitok ? UVM_PLA_WAITOK : 0);
2305 	pmap_destroy(map->pmap);
2306 	map->pmap = NULL;
2307 }
2308 
2309 /*
2310  * Populate map with free-memory entries.
2311  *
2312  * Map must be initialized and empty.
2313  */
2314 void
2315 uvm_map_setup_entries(struct vm_map *map)
2316 {
2317 	KDASSERT(RB_EMPTY(&map->addr));
2318 
2319 	uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0);
2320 }
2321 
2322 /*
2323  * Split entry at given address.
2324  *
2325  * orig:  entry that is to be split.
2326  * next:  a newly allocated map entry that is not linked.
2327  * split: address at which the split is done.
2328  */
2329 void
2330 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,
2331     struct vm_map_entry *next, vaddr_t split)
2332 {
2333 	struct uvm_addr_state *free, *free_before;
2334 	vsize_t adj;
2335 
2336 	if ((split & PAGE_MASK) != 0) {
2337 		panic("uvm_map_splitentry: split address 0x%lx "
2338 		    "not on page boundary!", split);
2339 	}
2340 	KDASSERT(map != NULL && orig != NULL && next != NULL);
2341 	uvm_tree_sanity(map, __FILE__, __LINE__);
2342 	KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split);
2343 
2344 #ifdef VMMAP_DEBUG
2345 	KDASSERT(RB_FIND(uvm_map_addr, &map->addr, orig) == orig);
2346 	KDASSERT(RB_FIND(uvm_map_addr, &map->addr, next) != next);
2347 #endif /* VMMAP_DEBUG */
2348 
2349 	/*
2350 	 * Free space will change, unlink from free space tree.
2351 	 */
2352 	free = uvm_map_uaddr_e(map, orig);
2353 	uvm_mapent_free_remove(map, free, orig);
2354 
2355 	adj = split - orig->start;
2356 
2357 	uvm_mapent_copy(orig, next);
2358 	if (split >= orig->end) {
2359 		next->etype = 0;
2360 		next->offset = 0;
2361 		next->wired_count = 0;
2362 		next->start = next->end = split;
2363 		next->guard = 0;
2364 		next->fspace = VMMAP_FREE_END(orig) - split;
2365 		next->aref.ar_amap = NULL;
2366 		next->aref.ar_pageoff = 0;
2367 		orig->guard = MIN(orig->guard, split - orig->end);
2368 		orig->fspace = split - VMMAP_FREE_START(orig);
2369 	} else {
2370 		orig->fspace = 0;
2371 		orig->guard = 0;
2372 		orig->end = next->start = split;
2373 
2374 		if (next->aref.ar_amap)
2375 			amap_splitref(&orig->aref, &next->aref, adj);
2376 		if (UVM_ET_ISSUBMAP(orig)) {
2377 			uvm_map_reference(next->object.sub_map);
2378 			next->offset += adj;
2379 		} else if (UVM_ET_ISOBJ(orig)) {
2380 			if (next->object.uvm_obj->pgops &&
2381 			    next->object.uvm_obj->pgops->pgo_reference) {
2382 				next->object.uvm_obj->pgops->pgo_reference(
2383 				    next->object.uvm_obj);
2384 			}
2385 			next->offset += adj;
2386 		}
2387 	}
2388 
2389 	/*
2390 	 * Link next into address tree.
2391 	 * Link orig and next into free-space tree.
2392 	 *
2393 	 * Don't insert 'next' into the addr tree until orig has been linked,
2394 	 * in case the free-list looks at adjecent entries in the addr tree
2395 	 * for its decisions.
2396 	 */
2397 	if (orig->fspace > 0)
2398 		free_before = free;
2399 	else
2400 		free_before = uvm_map_uaddr_e(map, orig);
2401 	uvm_mapent_free_insert(map, free_before, orig);
2402 	uvm_mapent_addr_insert(map, next);
2403 	uvm_mapent_free_insert(map, free, next);
2404 
2405 	uvm_tree_sanity(map, __FILE__, __LINE__);
2406 }
2407 
2408 
2409 #ifdef VMMAP_DEBUG
2410 
2411 void
2412 uvm_tree_assert(struct vm_map *map, int test, char *test_str,
2413     char *file, int line)
2414 {
2415 	char* map_special;
2416 
2417 	if (test)
2418 		return;
2419 
2420 	if (map == kernel_map)
2421 		map_special = " (kernel_map)";
2422 	else if (map == kmem_map)
2423 		map_special = " (kmem_map)";
2424 	else
2425 		map_special = "";
2426 	panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file,
2427 	    line, test_str);
2428 }
2429 
2430 /*
2431  * Check that map is sane.
2432  */
2433 void
2434 uvm_tree_sanity(struct vm_map *map, char *file, int line)
2435 {
2436 	struct vm_map_entry	*iter;
2437 	vaddr_t			 addr;
2438 	vaddr_t			 min, max, bound; /* Bounds checker. */
2439 	struct uvm_addr_state	*free;
2440 
2441 	addr = vm_map_min(map);
2442 	RB_FOREACH(iter, uvm_map_addr, &map->addr) {
2443 		/*
2444 		 * Valid start, end.
2445 		 * Catch overflow for end+fspace.
2446 		 */
2447 		UVM_ASSERT(map, iter->end >= iter->start, file, line);
2448 		UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line);
2449 
2450 		/* May not be empty. */
2451 		UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter),
2452 		    file, line);
2453 
2454 		/* Addresses for entry must lie within map boundaries. */
2455 		UVM_ASSERT(map, iter->start >= vm_map_min(map) &&
2456 		    VMMAP_FREE_END(iter) <= vm_map_max(map), file, line);
2457 
2458 		/* Tree may not have gaps. */
2459 		UVM_ASSERT(map, iter->start == addr, file, line);
2460 		addr = VMMAP_FREE_END(iter);
2461 
2462 		/*
2463 		 * Free space may not cross boundaries, unless the same
2464 		 * free list is used on both sides of the border.
2465 		 */
2466 		min = VMMAP_FREE_START(iter);
2467 		max = VMMAP_FREE_END(iter);
2468 
2469 		while (min < max &&
2470 		    (bound = uvm_map_boundary(map, min, max)) != max) {
2471 			UVM_ASSERT(map,
2472 			    uvm_map_uaddr(map, bound - 1) ==
2473 			    uvm_map_uaddr(map, bound),
2474 			    file, line);
2475 			min = bound;
2476 		}
2477 
2478 		free = uvm_map_uaddr_e(map, iter);
2479 		if (free) {
2480 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0,
2481 			    file, line);
2482 		} else {
2483 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0,
2484 			    file, line);
2485 		}
2486 	}
2487 	UVM_ASSERT(map, addr == vm_map_max(map), file, line);
2488 }
2489 
2490 void
2491 uvm_tree_size_chk(struct vm_map *map, char *file, int line)
2492 {
2493 	struct vm_map_entry *iter;
2494 	vsize_t size;
2495 
2496 	size = 0;
2497 	RB_FOREACH(iter, uvm_map_addr, &map->addr) {
2498 		if (!UVM_ET_ISHOLE(iter))
2499 			size += iter->end - iter->start;
2500 	}
2501 
2502 	if (map->size != size)
2503 		printf("map size = 0x%lx, should be 0x%lx\n", map->size, size);
2504 	UVM_ASSERT(map, map->size == size, file, line);
2505 
2506 	vmspace_validate(map);
2507 }
2508 
2509 /*
2510  * This function validates the statistics on vmspace.
2511  */
2512 void
2513 vmspace_validate(struct vm_map *map)
2514 {
2515 	struct vmspace *vm;
2516 	struct vm_map_entry *iter;
2517 	vaddr_t imin, imax;
2518 	vaddr_t stack_begin, stack_end; /* Position of stack. */
2519 	vsize_t stack, heap; /* Measured sizes. */
2520 
2521 	if (!(map->flags & VM_MAP_ISVMSPACE))
2522 		return;
2523 
2524 	vm = (struct vmspace *)map;
2525 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2526 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2527 
2528 	stack = heap = 0;
2529 	RB_FOREACH(iter, uvm_map_addr, &map->addr) {
2530 		imin = imax = iter->start;
2531 
2532 		if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL)
2533 			continue;
2534 
2535 		/*
2536 		 * Update stack, heap.
2537 		 * Keep in mind that (theoretically) the entries of
2538 		 * userspace and stack may be joined.
2539 		 */
2540 		while (imin != iter->end) {
2541 			/*
2542 			 * Set imax to the first boundary crossed between
2543 			 * imin and stack addresses.
2544 			 */
2545 			imax = iter->end;
2546 			if (imin < stack_begin && imax > stack_begin)
2547 				imax = stack_begin;
2548 			else if (imin < stack_end && imax > stack_end)
2549 				imax = stack_end;
2550 
2551 			if (imin >= stack_begin && imin < stack_end)
2552 				stack += imax - imin;
2553 			else
2554 				heap += imax - imin;
2555 			imin = imax;
2556 		}
2557 	}
2558 
2559 	heap >>= PAGE_SHIFT;
2560 	if (heap != vm->vm_dused) {
2561 		printf("vmspace stack range: 0x%lx-0x%lx\n",
2562 		    stack_begin, stack_end);
2563 		panic("vmspace_validate: vmspace.vm_dused invalid, "
2564 		    "expected %ld pgs, got %ld pgs in map %p",
2565 		    heap, vm->vm_dused,
2566 		    map);
2567 	}
2568 }
2569 
2570 #endif /* VMMAP_DEBUG */
2571 
2572 /*
2573  * uvm_map_init: init mapping system at boot time.   note that we allocate
2574  * and init the static pool of structs vm_map_entry for the kernel here.
2575  */
2576 void
2577 uvm_map_init(void)
2578 {
2579 	static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
2580 	int lcv;
2581 
2582 	/* now set up static pool of kernel map entries ... */
2583 	uvm.kentry_free = NULL;
2584 	for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
2585 		RB_LEFT(&kernel_map_entry[lcv], daddrs.addr_entry) =
2586 		    uvm.kentry_free;
2587 		uvm.kentry_free = &kernel_map_entry[lcv];
2588 	}
2589 
2590 	/* initialize the map-related pools. */
2591 	pool_init(&uvm_vmspace_pool, sizeof(struct vmspace),
2592 	    0, 0, 0, "vmsppl", &pool_allocator_nointr);
2593 	pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry),
2594 	    0, 0, 0, "vmmpepl", &pool_allocator_nointr);
2595 	pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry),
2596 	    0, 0, 0, "vmmpekpl", NULL);
2597 	pool_sethiwat(&uvm_map_entry_pool, 8192);
2598 
2599 	uvm_addr_init();
2600 }
2601 
2602 #if defined(DDB)
2603 
2604 /*
2605  * DDB hooks
2606  */
2607 
2608 /*
2609  * uvm_map_printit: actually prints the map
2610  */
2611 void
2612 uvm_map_printit(struct vm_map *map, boolean_t full,
2613     int (*pr)(const char *, ...))
2614 {
2615 	struct vmspace			*vm;
2616 	struct vm_map_entry		*entry;
2617 	struct uvm_addr_state		*free;
2618 	int				 in_free, i;
2619 	char				 buf[8];
2620 
2621 	(*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
2622 	(*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n",
2623 	    map->b_start, map->b_end);
2624 	(*pr)("\tstack allocate range: 0x%lx-0x%lx\n",
2625 	    map->s_start, map->s_end);
2626 	(*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n",
2627 	    map->size, map->ref_count, map->timestamp,
2628 	    map->flags);
2629 #ifdef pmap_resident_count
2630 	(*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
2631 	    pmap_resident_count(map->pmap));
2632 #else
2633 	/* XXXCDC: this should be required ... */
2634 	(*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap);
2635 #endif
2636 
2637 	/* struct vmspace handling. */
2638 	if (map->flags & VM_MAP_ISVMSPACE) {
2639 		vm = (struct vmspace *)map;
2640 
2641 		(*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n",
2642 		    vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss);
2643 		(*pr)("\tvm_tsize=%u vm_dsize=%u\n",
2644 		    vm->vm_tsize, vm->vm_dsize);
2645 		(*pr)("\tvm_taddr=%p vm_daddr=%p\n",
2646 		    vm->vm_taddr, vm->vm_daddr);
2647 		(*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n",
2648 		    vm->vm_maxsaddr, vm->vm_minsaddr);
2649 	}
2650 
2651 	if (!full)
2652 		goto print_uaddr;
2653 	RB_FOREACH(entry, uvm_map_addr, &map->addr) {
2654 		(*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
2655 		    entry, entry->start, entry->end, entry->object.uvm_obj,
2656 		    (long long)entry->offset, entry->aref.ar_amap,
2657 		    entry->aref.ar_pageoff);
2658 		(*pr)("\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, "
2659 		    "wc=%d, adv=%d\n",
2660 		    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
2661 		    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
2662 		    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
2663 		    entry->protection, entry->max_protection,
2664 		    entry->inheritance, entry->wired_count, entry->advice);
2665 
2666 		free = uvm_map_uaddr_e(map, entry);
2667 		in_free = (free != NULL);
2668 		(*pr)("\thole=%c, free=%c, guard=0x%lx, "
2669 		    "free=0x%lx-0x%lx\n",
2670 		    (entry->etype & UVM_ET_HOLE) ? 'T' : 'F',
2671 		    in_free ? 'T' : 'F',
2672 		    entry->guard,
2673 		    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
2674 		(*pr)("\tfspace_augment=%lu\n", entry->fspace_augment);
2675 		(*pr)("\tfreemapped=%c, uaddr=%p\n",
2676 		    (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free);
2677 		if (free) {
2678 			(*pr)("\t\t(0x%lx-0x%lx %s)\n",
2679 			    free->uaddr_minaddr, free->uaddr_maxaddr,
2680 			    free->uaddr_functions->uaddr_name);
2681 		}
2682 	}
2683 
2684 print_uaddr:
2685 	uvm_addr_print(map->uaddr_exe, "exe", full, pr);
2686 	for (i = 0; i < nitems(map->uaddr_any); i++) {
2687 		snprintf(&buf[0], sizeof(buf), "any[%d]", i);
2688 		uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr);
2689 	}
2690 	uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr);
2691 }
2692 
2693 /*
2694  * uvm_object_printit: actually prints the object
2695  */
2696 void
2697 uvm_object_printit(uobj, full, pr)
2698 	struct uvm_object *uobj;
2699 	boolean_t full;
2700 	int (*pr)(const char *, ...);
2701 {
2702 	struct vm_page *pg;
2703 	int cnt = 0;
2704 
2705 	(*pr)("OBJECT %p: pgops=%p, npages=%d, ",
2706 	    uobj, uobj->pgops, uobj->uo_npages);
2707 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
2708 		(*pr)("refs=<SYSTEM>\n");
2709 	else
2710 		(*pr)("refs=%d\n", uobj->uo_refs);
2711 
2712 	if (!full) {
2713 		return;
2714 	}
2715 	(*pr)("  PAGES <pg,offset>:\n  ");
2716 	RB_FOREACH(pg, uvm_objtree, &uobj->memt) {
2717 		(*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
2718 		if ((cnt % 3) == 2) {
2719 			(*pr)("\n  ");
2720 		}
2721 		cnt++;
2722 	}
2723 	if ((cnt % 3) != 2) {
2724 		(*pr)("\n");
2725 	}
2726 }
2727 
2728 /*
2729  * uvm_page_printit: actually print the page
2730  */
2731 static const char page_flagbits[] =
2732 	"\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
2733 	"\11ZERO\15PAGER1\20FREE\21INACTIVE\22ACTIVE\24ENCRYPT\30PMAP0"
2734 	"\31PMAP1\32PMAP2\33PMAP3";
2735 
2736 void
2737 uvm_page_printit(pg, full, pr)
2738 	struct vm_page *pg;
2739 	boolean_t full;
2740 	int (*pr)(const char *, ...);
2741 {
2742 	struct vm_page *tpg;
2743 	struct uvm_object *uobj;
2744 	struct pglist *pgl;
2745 
2746 	(*pr)("PAGE %p:\n", pg);
2747 	(*pr)("  flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
2748 	    pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
2749 	    (long long)pg->phys_addr);
2750 	(*pr)("  uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n",
2751 	    pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count);
2752 #if defined(UVM_PAGE_TRKOWN)
2753 	if (pg->pg_flags & PG_BUSY)
2754 		(*pr)("  owning process = %d, tag=%s",
2755 		    pg->owner, pg->owner_tag);
2756 	else
2757 		(*pr)("  page not busy, no owner");
2758 #else
2759 	(*pr)("  [page ownership tracking disabled]");
2760 #endif
2761 	(*pr)("\tvm_page_md %p\n", &pg->mdpage);
2762 
2763 	if (!full)
2764 		return;
2765 
2766 	/* cross-verify object/anon */
2767 	if ((pg->pg_flags & PQ_FREE) == 0) {
2768 		if (pg->pg_flags & PQ_ANON) {
2769 			if (pg->uanon == NULL || pg->uanon->an_page != pg)
2770 			    (*pr)("  >>> ANON DOES NOT POINT HERE <<< (%p)\n",
2771 				(pg->uanon) ? pg->uanon->an_page : NULL);
2772 			else
2773 				(*pr)("  anon backpointer is OK\n");
2774 		} else {
2775 			uobj = pg->uobject;
2776 			if (uobj) {
2777 				(*pr)("  checking object list\n");
2778 				RB_FOREACH(tpg, uvm_objtree, &uobj->memt) {
2779 					if (tpg == pg) {
2780 						break;
2781 					}
2782 				}
2783 				if (tpg)
2784 					(*pr)("  page found on object list\n");
2785 				else
2786 					(*pr)("  >>> PAGE NOT FOUND "
2787 					    "ON OBJECT LIST! <<<\n");
2788 			}
2789 		}
2790 	}
2791 
2792 	/* cross-verify page queue */
2793 	if (pg->pg_flags & PQ_FREE) {
2794 		if (uvm_pmr_isfree(pg))
2795 			(*pr)("  page found in uvm_pmemrange\n");
2796 		else
2797 			(*pr)("  >>> page not found in uvm_pmemrange <<<\n");
2798 		pgl = NULL;
2799 	} else if (pg->pg_flags & PQ_INACTIVE) {
2800 		pgl = (pg->pg_flags & PQ_SWAPBACKED) ?
2801 		    &uvm.page_inactive_swp : &uvm.page_inactive_obj;
2802 	} else if (pg->pg_flags & PQ_ACTIVE) {
2803 		pgl = &uvm.page_active;
2804  	} else {
2805 		pgl = NULL;
2806 	}
2807 
2808 	if (pgl) {
2809 		(*pr)("  checking pageq list\n");
2810 		TAILQ_FOREACH(tpg, pgl, pageq) {
2811 			if (tpg == pg) {
2812 				break;
2813 			}
2814 		}
2815 		if (tpg)
2816 			(*pr)("  page found on pageq list\n");
2817 		else
2818 			(*pr)("  >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
2819 	}
2820 }
2821 #endif
2822 
2823 /*
2824  * uvm_map_protect: change map protection
2825  *
2826  * => set_max means set max_protection.
2827  * => map must be unlocked.
2828  */
2829 int
2830 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
2831     vm_prot_t new_prot, boolean_t set_max)
2832 {
2833 	struct vm_map_entry *first, *iter;
2834 	vm_prot_t old_prot;
2835 	vm_prot_t mask;
2836 	int error;
2837 
2838 	if (start > end)
2839 		return EINVAL;
2840 	start = MAX(start, map->min_offset);
2841 	end = MIN(end, map->max_offset);
2842 	if (start >= end)
2843 		return 0;
2844 
2845 	error = 0;
2846 	vm_map_lock(map);
2847 
2848 	/*
2849 	 * Set up first and last.
2850 	 * - first will contain first entry at or after start.
2851 	 */
2852 	first = uvm_map_entrybyaddr(&map->addr, start);
2853 	KDASSERT(first != NULL);
2854 	if (first->end < start)
2855 		first = RB_NEXT(uvm_map_addr, &map->addr, first);
2856 
2857 	/* First, check for protection violations. */
2858 	for (iter = first; iter != NULL && iter->start < end;
2859 	    iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
2860 		/* Treat memory holes as free space. */
2861 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
2862 			continue;
2863 
2864 		if (UVM_ET_ISSUBMAP(iter)) {
2865 			error = EINVAL;
2866 			goto out;
2867 		}
2868 		if ((new_prot & iter->max_protection) != new_prot) {
2869 			error = EACCES;
2870 			goto out;
2871 		}
2872 	}
2873 
2874 	/* Fix protections.  */
2875 	for (iter = first; iter != NULL && iter->start < end;
2876 	    iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
2877 		/* Treat memory holes as free space. */
2878 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
2879 			continue;
2880 
2881 		old_prot = iter->protection;
2882 
2883 		/*
2884 		 * Skip adapting protection iff old and new protection
2885 		 * are equal.
2886 		 */
2887 		if (set_max) {
2888 			if (old_prot == (new_prot & old_prot) &&
2889 			    iter->max_protection == new_prot)
2890 				continue;
2891 		} else {
2892 			if (old_prot == new_prot)
2893 				continue;
2894 		}
2895 
2896 		UVM_MAP_CLIP_START(map, iter, start);
2897 		UVM_MAP_CLIP_END(map, iter, end);
2898 
2899 		if (set_max) {
2900 			iter->max_protection = new_prot;
2901 			iter->protection &= new_prot;
2902 		} else
2903 			iter->protection = new_prot;
2904 
2905 		/*
2906 		 * update physical map if necessary.  worry about copy-on-write
2907 		 * here -- CHECK THIS XXX
2908 		 */
2909 		if (iter->protection != old_prot) {
2910 			mask = UVM_ET_ISCOPYONWRITE(iter) ?
2911 			    ~VM_PROT_WRITE : VM_PROT_ALL;
2912 
2913 			/* update pmap */
2914 			if ((iter->protection & mask) == PROT_NONE &&
2915 			    VM_MAPENT_ISWIRED(iter)) {
2916 				/*
2917 				 * TODO(ariane) this is stupid. wired_count
2918 				 * is 0 if not wired, otherwise anything
2919 				 * larger than 0 (incremented once each time
2920 				 * wire is called).
2921 				 * Mostly to be able to undo the damage on
2922 				 * failure. Not the actually be a wired
2923 				 * refcounter...
2924 				 * Originally: iter->wired_count--;
2925 				 * (don't we have to unwire this in the pmap
2926 				 * as well?)
2927 				 */
2928 				iter->wired_count = 0;
2929 			}
2930 			pmap_protect(map->pmap, iter->start, iter->end,
2931 			    iter->protection & mask);
2932 		}
2933 
2934 		/*
2935 		 * If the map is configured to lock any future mappings,
2936 		 * wire this entry now if the old protection was VM_PROT_NONE
2937 		 * and the new protection is not VM_PROT_NONE.
2938 		 */
2939 		if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
2940 		    VM_MAPENT_ISWIRED(iter) == 0 &&
2941 		    old_prot == VM_PROT_NONE &&
2942 		    new_prot != VM_PROT_NONE) {
2943 			if (uvm_map_pageable(map, iter->start, iter->end,
2944 			    FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) {
2945 				/*
2946 				 * If locking the entry fails, remember the
2947 				 * error if it's the first one.  Note we
2948 				 * still continue setting the protection in
2949 				 * the map, but it will return the resource
2950 				 * storage condition regardless.
2951 				 *
2952 				 * XXX Ignore what the actual error is,
2953 				 * XXX just call it a resource shortage
2954 				 * XXX so that it doesn't get confused
2955 				 * XXX what uvm_map_protect() itself would
2956 				 * XXX normally return.
2957 				 */
2958 				error = ENOMEM;
2959 			}
2960 		}
2961 	}
2962 	pmap_update(map->pmap);
2963 
2964 out:
2965 	vm_map_unlock(map);
2966 	return error;
2967 }
2968 
2969 /*
2970  * uvmspace_alloc: allocate a vmspace structure.
2971  *
2972  * - structure includes vm_map and pmap
2973  * - XXX: no locking on this structure
2974  * - refcnt set to 1, rest must be init'd by caller
2975  */
2976 struct vmspace *
2977 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
2978     boolean_t remove_holes)
2979 {
2980 	struct vmspace *vm;
2981 
2982 	vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
2983 	uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
2984 	return (vm);
2985 }
2986 
2987 /*
2988  * uvmspace_init: initialize a vmspace structure.
2989  *
2990  * - XXX: no locking on this structure
2991  * - refcnt set to 1, rest must be init'd by caller
2992  */
2993 void
2994 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
2995     boolean_t pageable, boolean_t remove_holes)
2996 {
2997 	if (pmap)
2998 		pmap_reference(pmap);
2999 	else
3000 		pmap = pmap_create();
3001 	vm->vm_map.pmap = pmap;
3002 
3003 	uvm_map_setup(&vm->vm_map, min, max,
3004 	    (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE);
3005 
3006 	vm->vm_refcnt = 1;
3007 
3008 	if (remove_holes)
3009 		pmap_remove_holes(&vm->vm_map);
3010 }
3011 
3012 /*
3013  * uvmspace_share: share a vmspace between two processes
3014  *
3015  * - XXX: no locking on vmspace
3016  * - used for vfork
3017  */
3018 
3019 struct vmspace *
3020 uvmspace_share(struct process *pr)
3021 {
3022 	struct vmspace *vm = pr->ps_vmspace;
3023 
3024 	vm->vm_refcnt++;
3025 	return vm;
3026 }
3027 
3028 /*
3029  * uvmspace_exec: the process wants to exec a new program
3030  *
3031  * - XXX: no locking on vmspace
3032  */
3033 
3034 void
3035 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
3036 {
3037 	struct process *pr = p->p_p;
3038 	struct vmspace *nvm, *ovm = pr->ps_vmspace;
3039 	struct vm_map *map = &ovm->vm_map;
3040 	struct uvm_map_deadq dead_entries;
3041 
3042 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0);
3043 	KASSERT((end & (vaddr_t)PAGE_MASK) == 0 ||
3044 	    (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
3045 
3046 	pmap_unuse_final(p);   /* before stack addresses go away */
3047 	TAILQ_INIT(&dead_entries);
3048 
3049 	/* see if more than one process is using this vmspace...  */
3050 	if (ovm->vm_refcnt == 1) {
3051 		/*
3052 		 * If pr is the only process using its vmspace then
3053 		 * we can safely recycle that vmspace for the program
3054 		 * that is being exec'd.
3055 		 */
3056 
3057 #ifdef SYSVSHM
3058 		/*
3059 		 * SYSV SHM semantics require us to kill all segments on an exec
3060 		 */
3061 		if (ovm->vm_shm)
3062 			shmexit(ovm);
3063 #endif
3064 
3065 		/*
3066 		 * POSIX 1003.1b -- "lock future mappings" is revoked
3067 		 * when a process execs another program image.
3068 		 */
3069 		vm_map_lock(map);
3070 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
3071 
3072 		/*
3073 		 * now unmap the old program
3074 		 *
3075 		 * Instead of attempting to keep the map valid, we simply
3076 		 * nuke all entries and ask uvm_map_setup to reinitialize
3077 		 * the map to the new boundaries.
3078 		 *
3079 		 * uvm_unmap_remove will actually nuke all entries for us
3080 		 * (as in, not replace them with free-memory entries).
3081 		 */
3082 		uvm_unmap_remove(map, map->min_offset, map->max_offset,
3083 		    &dead_entries, TRUE, FALSE);
3084 
3085 		KDASSERT(RB_EMPTY(&map->addr));
3086 
3087 		/* Nuke statistics and boundaries. */
3088 		bzero(&ovm->vm_startcopy,
3089 		    (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy);
3090 
3091 
3092 		if (end & (vaddr_t)PAGE_MASK) {
3093 			end += 1;
3094 			if (end == 0) /* overflow */
3095 				end -= PAGE_SIZE;
3096 		}
3097 
3098 		/* Setup new boundaries and populate map with entries. */
3099 		map->min_offset = start;
3100 		map->max_offset = end;
3101 		uvm_map_setup_entries(map);
3102 		vm_map_unlock(map);
3103 
3104 		/* but keep MMU holes unavailable */
3105 		pmap_remove_holes(map);
3106 	} else {
3107 		/*
3108 		 * pr's vmspace is being shared, so we can't reuse
3109 		 * it for pr since it is still being used for others.
3110 		 * allocate a new vmspace for pr
3111 		 */
3112 		nvm = uvmspace_alloc(start, end,
3113 		    (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
3114 
3115 		/* install new vmspace and drop our ref to the old one. */
3116 		pmap_deactivate(p);
3117 		p->p_vmspace = pr->ps_vmspace = nvm;
3118 		pmap_activate(p);
3119 
3120 		uvmspace_free(ovm);
3121 	}
3122 
3123 	/* Release dead entries */
3124 	uvm_unmap_detach(&dead_entries, 0);
3125 }
3126 
3127 /*
3128  * uvmspace_free: free a vmspace data structure
3129  *
3130  * - XXX: no locking on vmspace
3131  */
3132 void
3133 uvmspace_free(struct vmspace *vm)
3134 {
3135 	if (--vm->vm_refcnt == 0) {
3136 		/*
3137 		 * lock the map, to wait out all other references to it.  delete
3138 		 * all of the mappings and pages they hold, then call the pmap
3139 		 * module to reclaim anything left.
3140 		 */
3141 #ifdef SYSVSHM
3142 		/* Get rid of any SYSV shared memory segments. */
3143 		if (vm->vm_shm != NULL)
3144 			shmexit(vm);
3145 #endif
3146 
3147 		uvm_map_teardown(&vm->vm_map);
3148 		pool_put(&uvm_vmspace_pool, vm);
3149 	}
3150 }
3151 
3152 /*
3153  * Clone map entry into other map.
3154  *
3155  * Mapping will be placed at dstaddr, for the same length.
3156  * Space must be available.
3157  * Reference counters are incremented.
3158  */
3159 struct vm_map_entry *
3160 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3161     vsize_t off, struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
3162     int mapent_flags, int amap_share_flags)
3163 {
3164 	struct vm_map_entry *new_entry, *first, *last;
3165 
3166 	KDASSERT(!UVM_ET_ISSUBMAP(old_entry));
3167 
3168 	/* Create new entry (linked in on creation). Fill in first, last. */
3169 	first = last = NULL;
3170 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) {
3171 		panic("uvmspace_fork: no space in map for "
3172 		    "entry in empty map");
3173 	}
3174 	new_entry = uvm_map_mkentry(dstmap, first, last,
3175 	    dstaddr, dstlen, mapent_flags, dead, NULL);
3176 	if (new_entry == NULL)
3177 		return NULL;
3178 	/* old_entry -> new_entry */
3179 	new_entry->object = old_entry->object;
3180 	new_entry->offset = old_entry->offset;
3181 	new_entry->aref = old_entry->aref;
3182 	new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
3183 	new_entry->protection = old_entry->protection;
3184 	new_entry->max_protection = old_entry->max_protection;
3185 	new_entry->inheritance = old_entry->inheritance;
3186 	new_entry->advice = old_entry->advice;
3187 
3188 	/* gain reference to object backing the map (can't be a submap). */
3189 	if (new_entry->aref.ar_amap) {
3190 		new_entry->aref.ar_pageoff += off >> PAGE_SHIFT;
3191 		amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3192 		    (new_entry->end - new_entry->start) >> PAGE_SHIFT,
3193 		    amap_share_flags);
3194 	}
3195 
3196 	if (UVM_ET_ISOBJ(new_entry) &&
3197 	    new_entry->object.uvm_obj->pgops->pgo_reference) {
3198 		new_entry->offset += off;
3199 		new_entry->object.uvm_obj->pgops->pgo_reference
3200 		    (new_entry->object.uvm_obj);
3201 	}
3202 
3203 	return new_entry;
3204 }
3205 
3206 /*
3207  * share the mapping: this means we want the old and
3208  * new entries to share amaps and backing objects.
3209  */
3210 struct vm_map_entry *
3211 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
3212     struct vm_map *old_map,
3213     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3214 {
3215 	struct vm_map_entry *new_entry;
3216 
3217 	/*
3218 	 * if the old_entry needs a new amap (due to prev fork)
3219 	 * then we need to allocate it now so that we have
3220 	 * something we own to share with the new_entry.   [in
3221 	 * other words, we need to clear needs_copy]
3222 	 */
3223 
3224 	if (UVM_ET_ISNEEDSCOPY(old_entry)) {
3225 		/* get our own amap, clears needs_copy */
3226 		amap_copy(old_map, old_entry, M_WAITOK, FALSE,
3227 		    0, 0);
3228 		/* XXXCDC: WAITOK??? */
3229 	}
3230 
3231 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3232 	    old_entry->end - old_entry->start, 0, old_entry,
3233 	    dead, 0, AMAP_SHARED);
3234 
3235 	/*
3236 	 * pmap_copy the mappings: this routine is optional
3237 	 * but if it is there it will reduce the number of
3238 	 * page faults in the new proc.
3239 	 */
3240 	if (!UVM_ET_ISHOLE(new_entry))
3241 		pmap_copy(new_map->pmap, old_map->pmap, new_entry->start,
3242 		    (new_entry->end - new_entry->start), new_entry->start);
3243 
3244 	return (new_entry);
3245 }
3246 
3247 /*
3248  * copy-on-write the mapping (using mmap's
3249  * MAP_PRIVATE semantics)
3250  *
3251  * allocate new_entry, adjust reference counts.
3252  * (note that new references are read-only).
3253  */
3254 struct vm_map_entry *
3255 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map,
3256     struct vm_map *old_map,
3257     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3258 {
3259 	struct vm_map_entry	*new_entry;
3260 	boolean_t		 protect_child;
3261 
3262 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3263 	    old_entry->end - old_entry->start, 0, old_entry,
3264 	    dead, 0, 0);
3265 
3266 	new_entry->etype |=
3267 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3268 
3269 	/*
3270 	 * the new entry will need an amap.  it will either
3271 	 * need to be copied from the old entry or created
3272 	 * from scratch (if the old entry does not have an
3273 	 * amap).  can we defer this process until later
3274 	 * (by setting "needs_copy") or do we need to copy
3275 	 * the amap now?
3276 	 *
3277 	 * we must copy the amap now if any of the following
3278 	 * conditions hold:
3279 	 * 1. the old entry has an amap and that amap is
3280 	 *    being shared.  this means that the old (parent)
3281 	 *    process is sharing the amap with another
3282 	 *    process.  if we do not clear needs_copy here
3283 	 *    we will end up in a situation where both the
3284 	 *    parent and child process are referring to the
3285 	 *    same amap with "needs_copy" set.  if the
3286 	 *    parent write-faults, the fault routine will
3287 	 *    clear "needs_copy" in the parent by allocating
3288 	 *    a new amap.   this is wrong because the
3289 	 *    parent is supposed to be sharing the old amap
3290 	 *    and the new amap will break that.
3291 	 *
3292 	 * 2. if the old entry has an amap and a non-zero
3293 	 *    wire count then we are going to have to call
3294 	 *    amap_cow_now to avoid page faults in the
3295 	 *    parent process.   since amap_cow_now requires
3296 	 *    "needs_copy" to be clear we might as well
3297 	 *    clear it here as well.
3298 	 *
3299 	 */
3300 	if (old_entry->aref.ar_amap != NULL &&
3301 	    ((amap_flags(old_entry->aref.ar_amap) &
3302 	    AMAP_SHARED) != 0 ||
3303 	    VM_MAPENT_ISWIRED(old_entry))) {
3304 		amap_copy(new_map, new_entry, M_WAITOK, FALSE,
3305 		    0, 0);
3306 		/* XXXCDC: M_WAITOK ... ok? */
3307 	}
3308 
3309 	/*
3310 	 * if the parent's entry is wired down, then the
3311 	 * parent process does not want page faults on
3312 	 * access to that memory.  this means that we
3313 	 * cannot do copy-on-write because we can't write
3314 	 * protect the old entry.   in this case we
3315 	 * resolve all copy-on-write faults now, using
3316 	 * amap_cow_now.   note that we have already
3317 	 * allocated any needed amap (above).
3318 	 */
3319 	if (VM_MAPENT_ISWIRED(old_entry)) {
3320 		/*
3321 		 * resolve all copy-on-write faults now
3322 		 * (note that there is nothing to do if
3323 		 * the old mapping does not have an amap).
3324 		 * XXX: is it worthwhile to bother with
3325 		 * pmap_copy in this case?
3326 		 */
3327 		if (old_entry->aref.ar_amap)
3328 			amap_cow_now(new_map, new_entry);
3329 	} else {
3330 		if (old_entry->aref.ar_amap) {
3331 			/*
3332 			 * setup mappings to trigger copy-on-write faults
3333 			 * we must write-protect the parent if it has
3334 			 * an amap and it is not already "needs_copy"...
3335 			 * if it is already "needs_copy" then the parent
3336 			 * has already been write-protected by a previous
3337 			 * fork operation.
3338 			 *
3339 			 * if we do not write-protect the parent, then
3340 			 * we must be sure to write-protect the child
3341 			 * after the pmap_copy() operation.
3342 			 *
3343 			 * XXX: pmap_copy should have some way of telling
3344 			 * us that it didn't do anything so we can avoid
3345 			 * calling pmap_protect needlessly.
3346 			 */
3347 			if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
3348 				if (old_entry->max_protection &
3349 				    VM_PROT_WRITE) {
3350 					pmap_protect(old_map->pmap,
3351 					    old_entry->start,
3352 					    old_entry->end,
3353 					    old_entry->protection &
3354 					    ~VM_PROT_WRITE);
3355 					pmap_update(old_map->pmap);
3356 				}
3357 				old_entry->etype |= UVM_ET_NEEDSCOPY;
3358 			}
3359 
3360 	  		/* parent must now be write-protected */
3361 	  		protect_child = FALSE;
3362 		} else {
3363 			/*
3364 			 * we only need to protect the child if the
3365 			 * parent has write access.
3366 			 */
3367 			if (old_entry->max_protection & VM_PROT_WRITE)
3368 				protect_child = TRUE;
3369 			else
3370 				protect_child = FALSE;
3371 		}
3372 		/*
3373 		 * copy the mappings
3374 		 * XXX: need a way to tell if this does anything
3375 		 */
3376 		if (!UVM_ET_ISHOLE(new_entry))
3377 			pmap_copy(new_map->pmap, old_map->pmap,
3378 			    new_entry->start,
3379 			    (old_entry->end - old_entry->start),
3380 			    old_entry->start);
3381 
3382 		/* protect the child's mappings if necessary */
3383 		if (protect_child) {
3384 			pmap_protect(new_map->pmap, new_entry->start,
3385 			    new_entry->end,
3386 			    new_entry->protection &
3387 			    ~VM_PROT_WRITE);
3388 		}
3389 	}
3390 
3391 	return (new_entry);
3392 }
3393 
3394 /*
3395  * zero the mapping: the new entry will be zero initialized
3396  */
3397 struct vm_map_entry *
3398 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map,
3399     struct vm_map *old_map,
3400     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3401 {
3402 	struct vm_map_entry *new_entry;
3403 
3404 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3405 	    old_entry->end - old_entry->start, 0, old_entry,
3406 	    dead, 0, 0);
3407 
3408 	new_entry->etype |=
3409 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3410 
3411 	if (new_entry->aref.ar_amap) {
3412 		amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3413 		    atop(new_entry->end - new_entry->start), 0);
3414 		new_entry->aref.ar_amap = NULL;
3415 		new_entry->aref.ar_pageoff = 0;
3416 	}
3417 
3418 	if (UVM_ET_ISOBJ(new_entry)) {
3419 		if (new_entry->object.uvm_obj->pgops->pgo_detach)
3420 			new_entry->object.uvm_obj->pgops->pgo_detach(
3421 			    new_entry->object.uvm_obj);
3422 		new_entry->object.uvm_obj = NULL;
3423 		new_entry->etype &= ~UVM_ET_OBJ;
3424 	}
3425 
3426 	return (new_entry);
3427 }
3428 
3429 /*
3430  * uvmspace_fork: fork a process' main map
3431  *
3432  * => create a new vmspace for child process from parent.
3433  * => parent's map must not be locked.
3434  */
3435 struct vmspace *
3436 uvmspace_fork(struct process *pr)
3437 {
3438 	struct vmspace *vm1 = pr->ps_vmspace;
3439 	struct vmspace *vm2;
3440 	struct vm_map *old_map = &vm1->vm_map;
3441 	struct vm_map *new_map;
3442 	struct vm_map_entry *old_entry, *new_entry;
3443 	struct uvm_map_deadq dead;
3444 
3445 	vm_map_lock(old_map);
3446 
3447 	vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
3448 	    (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
3449 	memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
3450 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
3451 	vm2->vm_dused = 0; /* Statistic managed by us. */
3452 	new_map = &vm2->vm_map;
3453 	vm_map_lock(new_map);
3454 
3455 	/* go entry-by-entry */
3456 	TAILQ_INIT(&dead);
3457 	RB_FOREACH(old_entry, uvm_map_addr, &old_map->addr) {
3458 		if (old_entry->start == old_entry->end)
3459 			continue;
3460 
3461 		/* first, some sanity checks on the old entry */
3462 		if (UVM_ET_ISSUBMAP(old_entry)) {
3463 			panic("fork: encountered a submap during fork "
3464 			    "(illegal)");
3465 		}
3466 
3467 		if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
3468 		    UVM_ET_ISNEEDSCOPY(old_entry)) {
3469 			panic("fork: non-copy_on_write map entry marked "
3470 			    "needs_copy (illegal)");
3471 		}
3472 
3473 		/* Apply inheritance. */
3474 		switch (old_entry->inheritance) {
3475 		case MAP_INHERIT_SHARE:
3476 			new_entry = uvm_mapent_forkshared(vm2, new_map,
3477 			    old_map, old_entry, &dead);
3478 			break;
3479 		case MAP_INHERIT_COPY:
3480 			new_entry = uvm_mapent_forkcopy(vm2, new_map,
3481 			    old_map, old_entry, &dead);
3482 			break;
3483 		case MAP_INHERIT_ZERO:
3484 			new_entry = uvm_mapent_forkzero(vm2, new_map,
3485 			    old_map, old_entry, &dead);
3486 			break;
3487 		default:
3488 			continue;
3489 		}
3490 
3491 	 	/* Update process statistics. */
3492 		if (!UVM_ET_ISHOLE(new_entry))
3493 			new_map->size += new_entry->end - new_entry->start;
3494 		if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) {
3495 			vm2->vm_dused += uvmspace_dused(
3496 			    new_map, new_entry->start, new_entry->end);
3497 		}
3498 	}
3499 
3500 	vm_map_unlock(old_map);
3501 	vm_map_unlock(new_map);
3502 
3503 	/*
3504 	 * This can actually happen, if multiple entries described a
3505 	 * space in which an entry was inherited.
3506 	 */
3507 	uvm_unmap_detach(&dead, 0);
3508 
3509 #ifdef SYSVSHM
3510 	if (vm1->vm_shm)
3511 		shmfork(vm1, vm2);
3512 #endif
3513 
3514 #ifdef PMAP_FORK
3515 	pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap);
3516 #endif
3517 
3518 	return vm2;
3519 }
3520 
3521 /*
3522  * uvm_map_hint: return the beginning of the best area suitable for
3523  * creating a new mapping with "prot" protection.
3524  */
3525 vaddr_t
3526 uvm_map_hint(struct vmspace *vm, vm_prot_t prot)
3527 {
3528 	vaddr_t addr;
3529 	vaddr_t spacing;
3530 
3531 #ifdef __i386__
3532 	/*
3533 	 * If executable skip first two pages, otherwise start
3534 	 * after data + heap region.
3535 	 */
3536 	if ((prot & VM_PROT_EXECUTE) != 0 &&
3537 	    (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) {
3538 		addr = (PAGE_SIZE*2) +
3539 		    (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
3540 		return (round_page(addr));
3541 	}
3542 #endif
3543 
3544 #if defined (__LP64__) && !defined (__mips64__)
3545 	spacing = (MIN((4UL * 1024 * 1024 * 1024), BRKSIZ) - 1);
3546 #else
3547 	spacing = (MIN((256 * 1024 * 1024), BRKSIZ) - 1);
3548 #endif
3549 
3550 	addr = (vaddr_t)vm->vm_daddr;
3551 	/*
3552 	 * Start malloc/mmap after the brk.
3553 	 * If the random spacing area has been used up,
3554 	 * the brk area becomes fair game for mmap as well.
3555 	 */
3556 	if (vm->vm_dused < spacing >> PAGE_SHIFT)
3557 		addr += BRKSIZ;
3558 #if !defined(__vax__)
3559 	addr += arc4random() & spacing;
3560 #endif
3561 	return (round_page(addr));
3562 }
3563 
3564 /*
3565  * uvm_map_submap: punch down part of a map into a submap
3566  *
3567  * => only the kernel_map is allowed to be submapped
3568  * => the purpose of submapping is to break up the locking granularity
3569  *	of a larger map
3570  * => the range specified must have been mapped previously with a uvm_map()
3571  *	call [with uobj==NULL] to create a blank map entry in the main map.
3572  *	[And it had better still be blank!]
3573  * => maps which contain submaps should never be copied or forked.
3574  * => to remove a submap, use uvm_unmap() on the main map
3575  *	and then uvm_map_deallocate() the submap.
3576  * => main map must be unlocked.
3577  * => submap must have been init'd and have a zero reference count.
3578  *	[need not be locked as we don't actually reference it]
3579  */
3580 int
3581 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
3582     struct vm_map *submap)
3583 {
3584 	struct vm_map_entry *entry;
3585 	int result;
3586 
3587 	if (start > map->max_offset || end > map->max_offset ||
3588 	    start < map->min_offset || end < map->min_offset)
3589 		return EINVAL;
3590 
3591 	vm_map_lock(map);
3592 
3593 	if (uvm_map_lookup_entry(map, start, &entry)) {
3594 		UVM_MAP_CLIP_START(map, entry, start);
3595 		UVM_MAP_CLIP_END(map, entry, end);
3596 	} else
3597 		entry = NULL;
3598 
3599 	if (entry != NULL &&
3600 	    entry->start == start && entry->end == end &&
3601 	    entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
3602 	    !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
3603 		entry->etype |= UVM_ET_SUBMAP;
3604 		entry->object.sub_map = submap;
3605 		entry->offset = 0;
3606 		uvm_map_reference(submap);
3607 		result = 0;
3608 	} else
3609 		result = EINVAL;
3610 
3611 	vm_map_unlock(map);
3612 	return(result);
3613 }
3614 
3615 /*
3616  * uvm_map_checkprot: check protection in map
3617  *
3618  * => must allow specific protection in a fully allocated region.
3619  * => map mut be read or write locked by caller.
3620  */
3621 boolean_t
3622 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
3623     vm_prot_t protection)
3624 {
3625 	struct vm_map_entry *entry;
3626 
3627 	if (start < map->min_offset || end > map->max_offset || start > end)
3628 		return FALSE;
3629 	if (start == end)
3630 		return TRUE;
3631 
3632 	/*
3633 	 * Iterate entries.
3634 	 */
3635 	for (entry = uvm_map_entrybyaddr(&map->addr, start);
3636 	    entry != NULL && entry->start < end;
3637 	    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
3638 		/* Fail if a hole is found. */
3639 		if (UVM_ET_ISHOLE(entry) ||
3640 		    (entry->end < end && entry->end != VMMAP_FREE_END(entry)))
3641 			return FALSE;
3642 
3643 		/* Check protection. */
3644 		if ((entry->protection & protection) != protection)
3645 			return FALSE;
3646 	}
3647 	return TRUE;
3648 }
3649 
3650 /*
3651  * uvm_map_create: create map
3652  */
3653 vm_map_t
3654 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
3655 {
3656 	vm_map_t result;
3657 
3658 	result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK);
3659 	result->pmap = pmap;
3660 	uvm_map_setup(result, min, max, flags);
3661 	return(result);
3662 }
3663 
3664 /*
3665  * uvm_map_deallocate: drop reference to a map
3666  *
3667  * => caller must not lock map
3668  * => we will zap map if ref count goes to zero
3669  */
3670 void
3671 uvm_map_deallocate(vm_map_t map)
3672 {
3673 	int c;
3674 	struct uvm_map_deadq dead;
3675 
3676 	c = --map->ref_count;
3677 	if (c > 0) {
3678 		return;
3679 	}
3680 
3681 	/*
3682 	 * all references gone.   unmap and free.
3683 	 *
3684 	 * No lock required: we are only one to access this map.
3685 	 */
3686 	TAILQ_INIT(&dead);
3687 	uvm_tree_sanity(map, __FILE__, __LINE__);
3688 	uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead,
3689 	    TRUE, FALSE);
3690 	pmap_destroy(map->pmap);
3691 	KASSERT(RB_EMPTY(&map->addr));
3692 	free(map, M_VMMAP, 0);
3693 
3694 	uvm_unmap_detach(&dead, 0);
3695 }
3696 
3697 /*
3698  * uvm_map_inherit: set inheritance code for range of addrs in map.
3699  *
3700  * => map must be unlocked
3701  * => note that the inherit code is used during a "fork".  see fork
3702  *	code for details.
3703  */
3704 int
3705 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
3706     vm_inherit_t new_inheritance)
3707 {
3708 	struct vm_map_entry *entry;
3709 
3710 	switch (new_inheritance) {
3711 	case MAP_INHERIT_NONE:
3712 	case MAP_INHERIT_COPY:
3713 	case MAP_INHERIT_SHARE:
3714 	case MAP_INHERIT_ZERO:
3715 		break;
3716 	default:
3717 		return (EINVAL);
3718 	}
3719 
3720 	if (start > end)
3721 		return EINVAL;
3722 	start = MAX(start, map->min_offset);
3723 	end = MIN(end, map->max_offset);
3724 	if (start >= end)
3725 		return 0;
3726 
3727 	vm_map_lock(map);
3728 
3729 	entry = uvm_map_entrybyaddr(&map->addr, start);
3730 	if (entry->end > start)
3731 		UVM_MAP_CLIP_START(map, entry, start);
3732 	else
3733 		entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
3734 
3735 	while (entry != NULL && entry->start < end) {
3736 		UVM_MAP_CLIP_END(map, entry, end);
3737 		entry->inheritance = new_inheritance;
3738 		entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
3739 	}
3740 
3741 	vm_map_unlock(map);
3742 	return (0);
3743 }
3744 
3745 /*
3746  * uvm_map_advice: set advice code for range of addrs in map.
3747  *
3748  * => map must be unlocked
3749  */
3750 int
3751 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
3752 {
3753 	struct vm_map_entry *entry;
3754 
3755 	switch (new_advice) {
3756 	case MADV_NORMAL:
3757 	case MADV_RANDOM:
3758 	case MADV_SEQUENTIAL:
3759 		break;
3760 	default:
3761 		return (EINVAL);
3762 	}
3763 
3764 	if (start > end)
3765 		return EINVAL;
3766 	start = MAX(start, map->min_offset);
3767 	end = MIN(end, map->max_offset);
3768 	if (start >= end)
3769 		return 0;
3770 
3771 	vm_map_lock(map);
3772 
3773 	entry = uvm_map_entrybyaddr(&map->addr, start);
3774 	if (entry != NULL && entry->end > start)
3775 		UVM_MAP_CLIP_START(map, entry, start);
3776 	else if (entry!= NULL)
3777 		entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
3778 
3779 	/*
3780 	 * XXXJRT: disallow holes?
3781 	 */
3782 	while (entry != NULL && entry->start < end) {
3783 		UVM_MAP_CLIP_END(map, entry, end);
3784 		entry->advice = new_advice;
3785 		entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
3786 	}
3787 
3788 	vm_map_unlock(map);
3789 	return (0);
3790 }
3791 
3792 /*
3793  * uvm_map_extract: extract a mapping from a map and put it somewhere
3794  * in the kernel_map, setting protection to max_prot.
3795  *
3796  * => map should be unlocked (we will write lock it and kernel_map)
3797  * => returns 0 on success, error code otherwise
3798  * => start must be page aligned
3799  * => len must be page sized
3800  * => flags:
3801  *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
3802  * Mappings are QREF's.
3803  */
3804 int
3805 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
3806     vaddr_t *dstaddrp, int flags)
3807 {
3808 	struct uvm_map_deadq dead;
3809 	struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2;
3810 	vaddr_t dstaddr;
3811 	vaddr_t end;
3812 	vaddr_t cp_start;
3813 	vsize_t cp_len, cp_off;
3814 	int error;
3815 
3816 	TAILQ_INIT(&dead);
3817 	end = start + len;
3818 
3819 	/*
3820 	 * Sanity check on the parameters.
3821 	 * Also, since the mapping may not contain gaps, error out if the
3822 	 * mapped area is not in source map.
3823 	 */
3824 	if ((start & (vaddr_t)PAGE_MASK) != 0 ||
3825 	    (end & (vaddr_t)PAGE_MASK) != 0 || end < start)
3826 		return EINVAL;
3827 	if (start < srcmap->min_offset || end > srcmap->max_offset)
3828 		return EINVAL;
3829 
3830 	/* Initialize dead entries. Handle len == 0 case. */
3831 	if (len == 0)
3832 		return 0;
3833 
3834 	/* Acquire lock on srcmap. */
3835 	vm_map_lock(srcmap);
3836 
3837 	/* Lock srcmap, lookup first and last entry in <start,len>. */
3838 	first = uvm_map_entrybyaddr(&srcmap->addr, start);
3839 
3840 	/* Check that the range is contiguous. */
3841 	for (entry = first; entry != NULL && entry->end < end;
3842 	    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
3843 		if (VMMAP_FREE_END(entry) != entry->end ||
3844 		    UVM_ET_ISHOLE(entry)) {
3845 			error = EINVAL;
3846 			goto fail;
3847 		}
3848 	}
3849 	if (entry == NULL || UVM_ET_ISHOLE(entry)) {
3850 		error = EINVAL;
3851 		goto fail;
3852 	}
3853 
3854 	/*
3855 	 * Handle need-copy flag.
3856 	 * This may invalidate last, hence the re-initialization during the
3857 	 * loop.
3858 	 *
3859 	 * Also, perform clipping of last if not UVM_EXTRACT_QREF.
3860 	 */
3861 	for (entry = first; entry != NULL && entry->start < end;
3862 	    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
3863 		if (UVM_ET_ISNEEDSCOPY(entry))
3864 			amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end);
3865 		if (UVM_ET_ISNEEDSCOPY(entry)) {
3866 			/*
3867 			 * amap_copy failure
3868 			 */
3869 			error = ENOMEM;
3870 			goto fail;
3871 		}
3872 	}
3873 
3874 	/* Lock destination map (kernel_map). */
3875 	vm_map_lock(kernel_map);
3876 
3877 	if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len,
3878 	    MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start),
3879 	    VM_PROT_NONE, 0) != 0) {
3880 		error = ENOMEM;
3881 		goto fail2;
3882 	}
3883 	*dstaddrp = dstaddr;
3884 
3885 	/*
3886 	 * We now have srcmap and kernel_map locked.
3887 	 * dstaddr contains the destination offset in dstmap.
3888 	 */
3889 	/* step 1: start looping through map entries, performing extraction. */
3890 	for (entry = first; entry != NULL && entry->start < end;
3891 	    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
3892 		KDASSERT(!UVM_ET_ISNEEDSCOPY(entry));
3893 		if (UVM_ET_ISHOLE(entry))
3894 			continue;
3895 
3896 		/* Calculate uvm_mapent_clone parameters. */
3897 		cp_start = entry->start;
3898 		if (cp_start < start) {
3899 			cp_off = start - cp_start;
3900 			cp_start = start;
3901 		} else
3902 			cp_off = 0;
3903 		cp_len = MIN(entry->end, end) - cp_start;
3904 
3905 		newentry = uvm_mapent_clone(kernel_map,
3906 		    cp_start - start + dstaddr, cp_len, cp_off,
3907 		    entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
3908 		if (newentry == NULL) {
3909 			error = ENOMEM;
3910 			goto fail2_unmap;
3911 		}
3912 		kernel_map->size += cp_len;
3913 		if (flags & UVM_EXTRACT_FIXPROT)
3914 			newentry->protection = newentry->max_protection;
3915 
3916 		/*
3917 		 * Step 2: perform pmap copy.
3918 		 * (Doing this in the loop saves one RB traversal.)
3919 		 */
3920 		pmap_copy(kernel_map->pmap, srcmap->pmap,
3921 		    cp_start - start + dstaddr, cp_len, cp_start);
3922 	}
3923 	pmap_update(kernel_map->pmap);
3924 
3925 	error = 0;
3926 
3927 	/* Unmap copied entries on failure. */
3928 fail2_unmap:
3929 	if (error) {
3930 		uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead,
3931 		    FALSE, TRUE);
3932 	}
3933 
3934 	/* Release maps, release dead entries. */
3935 fail2:
3936 	vm_map_unlock(kernel_map);
3937 
3938 fail:
3939 	vm_map_unlock(srcmap);
3940 
3941 	uvm_unmap_detach(&dead, 0);
3942 
3943 	return error;
3944 }
3945 
3946 /*
3947  * uvm_map_clean: clean out a map range
3948  *
3949  * => valid flags:
3950  *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
3951  *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
3952  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
3953  *   if (flags & PGO_FREE): any cached pages are freed after clean
3954  * => returns an error if any part of the specified range isn't mapped
3955  * => never a need to flush amap layer since the anonymous memory has
3956  *	no permanent home, but may deactivate pages there
3957  * => called from sys_msync() and sys_madvise()
3958  * => caller must not write-lock map (read OK).
3959  * => we may sleep while cleaning if SYNCIO [with map read-locked]
3960  */
3961 int	amap_clean_works = 1;	/* XXX for now, just in case... */
3962 
3963 int
3964 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
3965 {
3966 	struct vm_map_entry *first, *entry;
3967 	struct vm_amap *amap;
3968 	struct vm_anon *anon;
3969 	struct vm_page *pg;
3970 	struct uvm_object *uobj;
3971 	vaddr_t cp_start, cp_end;
3972 	int refs;
3973 	int error;
3974 	boolean_t rv;
3975 
3976 	KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
3977 	    (PGO_FREE|PGO_DEACTIVATE));
3978 
3979 	if (start > end || start < map->min_offset || end > map->max_offset)
3980 		return EINVAL;
3981 
3982 	vm_map_lock_read(map);
3983 	first = uvm_map_entrybyaddr(&map->addr, start);
3984 
3985 	/* Make a first pass to check for holes. */
3986 	for (entry = first; entry->start < end;
3987 	    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
3988 		if (UVM_ET_ISSUBMAP(entry)) {
3989 			vm_map_unlock_read(map);
3990 			return EINVAL;
3991 		}
3992 		if (UVM_ET_ISSUBMAP(entry) ||
3993 		    UVM_ET_ISHOLE(entry) ||
3994 		    (entry->end < end &&
3995 		    VMMAP_FREE_END(entry) != entry->end)) {
3996 			vm_map_unlock_read(map);
3997 			return EFAULT;
3998 		}
3999 	}
4000 
4001 	error = 0;
4002 	for (entry = first; entry != NULL && entry->start < end;
4003 	    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
4004 		amap = entry->aref.ar_amap;	/* top layer */
4005 		if (UVM_ET_ISOBJ(entry))
4006 			uobj = entry->object.uvm_obj;
4007 		else
4008 			uobj = NULL;
4009 
4010 		/*
4011 		 * No amap cleaning necessary if:
4012 		 *  - there's no amap
4013 		 *  - we're not deactivating or freeing pages.
4014 		 */
4015 		if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
4016 			goto flush_object;
4017 		if (!amap_clean_works)
4018 			goto flush_object;
4019 
4020 		cp_start = MAX(entry->start, start);
4021 		cp_end = MIN(entry->end, end);
4022 
4023 		for (; cp_start != cp_end; cp_start += PAGE_SIZE) {
4024 			anon = amap_lookup(&entry->aref,
4025 			    cp_start - entry->start);
4026 			if (anon == NULL)
4027 				continue;
4028 
4029 			pg = anon->an_page;
4030 			if (pg == NULL) {
4031 				continue;
4032 			}
4033 
4034 			switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
4035 			/*
4036 			 * XXX In these first 3 cases, we always just
4037 			 * XXX deactivate the page.  We may want to
4038 			 * XXX handle the different cases more
4039 			 * XXX specifically, in the future.
4040 			 */
4041 			case PGO_CLEANIT|PGO_FREE:
4042 			case PGO_CLEANIT|PGO_DEACTIVATE:
4043 			case PGO_DEACTIVATE:
4044 deactivate_it:
4045 				/* skip the page if it's loaned or wired */
4046 				if (pg->loan_count != 0 ||
4047 				    pg->wire_count != 0) {
4048 					break;
4049 				}
4050 
4051 				uvm_lock_pageq();
4052 
4053 				/*
4054 				 * skip the page if it's not actually owned
4055 				 * by the anon (may simply be loaned to the
4056 				 * anon).
4057 				 */
4058 				if ((pg->pg_flags & PQ_ANON) == 0) {
4059 					KASSERT(pg->uobject == NULL);
4060 					uvm_unlock_pageq();
4061 					break;
4062 				}
4063 				KASSERT(pg->uanon == anon);
4064 
4065 				/* zap all mappings for the page. */
4066 				pmap_page_protect(pg, VM_PROT_NONE);
4067 
4068 				/* ...and deactivate the page. */
4069 				uvm_pagedeactivate(pg);
4070 
4071 				uvm_unlock_pageq();
4072 				break;
4073 			case PGO_FREE:
4074 				/*
4075 				 * If there are mutliple references to
4076 				 * the amap, just deactivate the page.
4077 				 */
4078 				if (amap_refs(amap) > 1)
4079 					goto deactivate_it;
4080 
4081 				/* XXX skip the page if it's wired */
4082 				if (pg->wire_count != 0) {
4083 					break;
4084 				}
4085 				amap_unadd(&entry->aref,
4086 				    cp_start - entry->start);
4087 				refs = --anon->an_ref;
4088 				if (refs == 0)
4089 					uvm_anfree(anon);
4090 				break;
4091 			default:
4092 				panic("uvm_map_clean: weird flags");
4093 			}
4094 		}
4095 
4096 flush_object:
4097 		cp_start = MAX(entry->start, start);
4098 		cp_end = MIN(entry->end, end);
4099 
4100 		/*
4101 		 * flush pages if we've got a valid backing object.
4102 		 *
4103 		 * Don't PGO_FREE if we don't have write permission
4104 		 * and don't flush if this is a copy-on-write object
4105 		 * since we can't know our permissions on it.
4106 		 */
4107 		if (uobj != NULL &&
4108 		    ((flags & PGO_FREE) == 0 ||
4109 		     ((entry->max_protection & VM_PROT_WRITE) != 0 &&
4110 		      (entry->etype & UVM_ET_COPYONWRITE) == 0))) {
4111 			rv = uobj->pgops->pgo_flush(uobj,
4112 			    cp_start - entry->start + entry->offset,
4113 			    cp_end - entry->start + entry->offset, flags);
4114 
4115 			if (rv == FALSE)
4116 				error = EFAULT;
4117 		}
4118 	}
4119 
4120 	vm_map_unlock_read(map);
4121 	return error;
4122 }
4123 
4124 /*
4125  * UVM_MAP_CLIP_END implementation
4126  */
4127 void
4128 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4129 {
4130 	struct vm_map_entry *tmp;
4131 
4132 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4133 	tmp = uvm_mapent_alloc(map, 0);
4134 
4135 	/* Invoke splitentry. */
4136 	uvm_map_splitentry(map, entry, tmp, addr);
4137 }
4138 
4139 /*
4140  * UVM_MAP_CLIP_START implementation
4141  *
4142  * Clippers are required to not change the pointers to the entry they are
4143  * clipping on.
4144  * Since uvm_map_splitentry turns the original entry into the lowest
4145  * entry (address wise) we do a swap between the new entry and the original
4146  * entry, prior to calling uvm_map_splitentry.
4147  */
4148 void
4149 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4150 {
4151 	struct vm_map_entry *tmp;
4152 	struct uvm_addr_state *free;
4153 
4154 	/* Unlink original. */
4155 	free = uvm_map_uaddr_e(map, entry);
4156 	uvm_mapent_free_remove(map, free, entry);
4157 	uvm_mapent_addr_remove(map, entry);
4158 
4159 	/* Copy entry. */
4160 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4161 	tmp = uvm_mapent_alloc(map, 0);
4162 	uvm_mapent_copy(entry, tmp);
4163 
4164 	/* Put new entry in place of original entry. */
4165 	uvm_mapent_addr_insert(map, tmp);
4166 	uvm_mapent_free_insert(map, free, tmp);
4167 
4168 	/* Invoke splitentry. */
4169 	uvm_map_splitentry(map, tmp, entry, addr);
4170 }
4171 
4172 /*
4173  * Boundary fixer.
4174  */
4175 static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t);
4176 static __inline vaddr_t
4177 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound)
4178 {
4179 	return (min < bound && max > bound) ? bound : max;
4180 }
4181 
4182 /*
4183  * Choose free list based on address at start of free space.
4184  *
4185  * The uvm_addr_state returned contains addr and is the first of:
4186  * - uaddr_exe
4187  * - uaddr_brk_stack
4188  * - uaddr_any
4189  */
4190 struct uvm_addr_state*
4191 uvm_map_uaddr(struct vm_map *map, vaddr_t addr)
4192 {
4193 	struct uvm_addr_state *uaddr;
4194 	int i;
4195 
4196 	/* Special case the first page, to prevent mmap from returning 0. */
4197 	if (addr < VMMAP_MIN_ADDR)
4198 		return NULL;
4199 
4200 	/* Upper bound for kernel maps at uvm_maxkaddr. */
4201 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
4202 		if (addr >= uvm_maxkaddr)
4203 			return NULL;
4204 	}
4205 
4206 	/* Is the address inside the exe-only map? */
4207 	if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr &&
4208 	    addr < map->uaddr_exe->uaddr_maxaddr)
4209 		return map->uaddr_exe;
4210 
4211 	/* Check if the space falls inside brk/stack area. */
4212 	if ((addr >= map->b_start && addr < map->b_end) ||
4213 	    (addr >= map->s_start && addr < map->s_end)) {
4214 		if (map->uaddr_brk_stack != NULL &&
4215 		    addr >= map->uaddr_brk_stack->uaddr_minaddr &&
4216 		    addr < map->uaddr_brk_stack->uaddr_maxaddr) {
4217 			return map->uaddr_brk_stack;
4218 		} else
4219 			return NULL;
4220 	}
4221 
4222 	/*
4223 	 * Check the other selectors.
4224 	 *
4225 	 * These selectors are only marked as the owner, if they have insert
4226 	 * functions.
4227 	 */
4228 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4229 		uaddr = map->uaddr_any[i];
4230 		if (uaddr == NULL)
4231 			continue;
4232 		if (uaddr->uaddr_functions->uaddr_free_insert == NULL)
4233 			continue;
4234 
4235 		if (addr >= uaddr->uaddr_minaddr &&
4236 		    addr < uaddr->uaddr_maxaddr)
4237 			return uaddr;
4238 	}
4239 
4240 	return NULL;
4241 }
4242 
4243 /*
4244  * Choose free list based on address at start of free space.
4245  *
4246  * The uvm_addr_state returned contains addr and is the first of:
4247  * - uaddr_exe
4248  * - uaddr_brk_stack
4249  * - uaddr_any
4250  */
4251 struct uvm_addr_state*
4252 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry)
4253 {
4254 	return uvm_map_uaddr(map, VMMAP_FREE_START(entry));
4255 }
4256 
4257 /*
4258  * Returns the first free-memory boundary that is crossed by [min-max].
4259  */
4260 vsize_t
4261 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max)
4262 {
4263 	struct uvm_addr_state	*uaddr;
4264 	int			 i;
4265 
4266 	/* Never return first page. */
4267 	max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR);
4268 
4269 	/* Treat the maxkaddr special, if the map is a kernel_map. */
4270 	if ((map->flags & VM_MAP_ISVMSPACE) == 0)
4271 		max = uvm_map_boundfix(min, max, uvm_maxkaddr);
4272 
4273 	/* Check for exe-only boundaries. */
4274 	if (map->uaddr_exe != NULL) {
4275 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr);
4276 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr);
4277 	}
4278 
4279 	/* Check for exe-only boundaries. */
4280 	if (map->uaddr_brk_stack != NULL) {
4281 		max = uvm_map_boundfix(min, max,
4282 		    map->uaddr_brk_stack->uaddr_minaddr);
4283 		max = uvm_map_boundfix(min, max,
4284 		    map->uaddr_brk_stack->uaddr_maxaddr);
4285 	}
4286 
4287 	/* Check other boundaries. */
4288 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4289 		uaddr = map->uaddr_any[i];
4290 		if (uaddr != NULL) {
4291 			max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr);
4292 			max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr);
4293 		}
4294 	}
4295 
4296 	/* Boundaries at stack and brk() area. */
4297 	max = uvm_map_boundfix(min, max, map->s_start);
4298 	max = uvm_map_boundfix(min, max, map->s_end);
4299 	max = uvm_map_boundfix(min, max, map->b_start);
4300 	max = uvm_map_boundfix(min, max, map->b_end);
4301 
4302 	return max;
4303 }
4304 
4305 /*
4306  * Update map allocation start and end addresses from proc vmspace.
4307  */
4308 void
4309 uvm_map_vmspace_update(struct vm_map *map,
4310     struct uvm_map_deadq *dead, int flags)
4311 {
4312 	struct vmspace *vm;
4313 	vaddr_t b_start, b_end, s_start, s_end;
4314 
4315 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
4316 	KASSERT(offsetof(struct vmspace, vm_map) == 0);
4317 
4318 	/*
4319 	 * Derive actual allocation boundaries from vmspace.
4320 	 */
4321 	vm = (struct vmspace *)map;
4322 	b_start = (vaddr_t)vm->vm_daddr;
4323 	b_end   = b_start + BRKSIZ;
4324 	s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4325 	s_end   = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4326 #ifdef DIAGNOSTIC
4327 	if ((b_start & (vaddr_t)PAGE_MASK) != 0 ||
4328 	    (b_end & (vaddr_t)PAGE_MASK) != 0 ||
4329 	    (s_start & (vaddr_t)PAGE_MASK) != 0 ||
4330 	    (s_end & (vaddr_t)PAGE_MASK) != 0) {
4331 		panic("uvm_map_vmspace_update: vmspace %p invalid bounds: "
4332 		    "b=0x%lx-0x%lx s=0x%lx-0x%lx",
4333 		    vm, b_start, b_end, s_start, s_end);
4334 	}
4335 #endif
4336 
4337 	if (__predict_true(map->b_start == b_start && map->b_end == b_end &&
4338 	    map->s_start == s_start && map->s_end == s_end))
4339 		return;
4340 
4341 	uvm_map_freelist_update(map, dead, b_start, b_end,
4342 	    s_start, s_end, flags);
4343 }
4344 
4345 /*
4346  * Grow kernel memory.
4347  *
4348  * This function is only called for kernel maps when an allocation fails.
4349  *
4350  * If the map has a gap that is large enough to accomodate alloc_sz, this
4351  * function will make sure map->free will include it.
4352  */
4353 void
4354 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead,
4355     vsize_t alloc_sz, int flags)
4356 {
4357 	vsize_t sz;
4358 	vaddr_t end;
4359 	struct vm_map_entry *entry;
4360 
4361 	/* Kernel memory only. */
4362 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0);
4363 	/* Destroy free list. */
4364 	uvm_map_freelist_update_clear(map, dead);
4365 
4366 	/* Include the guard page in the hard minimum requirement of alloc_sz. */
4367 	if (map->flags & VM_MAP_GUARDPAGES)
4368 		alloc_sz += PAGE_SIZE;
4369 
4370 	/*
4371 	 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA.
4372 	 *
4373 	 * Don't handle the case where the multiplication overflows:
4374 	 * if that happens, the allocation is probably too big anyway.
4375 	 */
4376 	sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA);
4377 
4378 	/*
4379 	 * Walk forward until a gap large enough for alloc_sz shows up.
4380 	 *
4381 	 * We assume the kernel map has no boundaries.
4382 	 * uvm_maxkaddr may be zero.
4383 	 */
4384 	end = MAX(uvm_maxkaddr, map->min_offset);
4385 	entry = uvm_map_entrybyaddr(&map->addr, end);
4386 	while (entry && entry->fspace < alloc_sz)
4387 		entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
4388 	if (entry) {
4389 		end = MAX(VMMAP_FREE_START(entry), end);
4390 		end += MIN(sz, map->max_offset - end);
4391 	} else
4392 		end = map->max_offset;
4393 
4394 	/* Reserve pmap entries. */
4395 #ifdef PMAP_GROWKERNEL
4396 	uvm_maxkaddr = pmap_growkernel(end);
4397 #else
4398 	uvm_maxkaddr = end;
4399 #endif
4400 
4401 	/* Rebuild free list. */
4402 	uvm_map_freelist_update_refill(map, flags);
4403 }
4404 
4405 /*
4406  * Freelist update subfunction: unlink all entries from freelists.
4407  */
4408 void
4409 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead)
4410 {
4411 	struct uvm_addr_state *free;
4412 	struct vm_map_entry *entry, *prev, *next;
4413 
4414 	prev = NULL;
4415 	for (entry = RB_MIN(uvm_map_addr, &map->addr); entry != NULL;
4416 	    entry = next) {
4417 		next = RB_NEXT(uvm_map_addr, &map->addr, entry);
4418 
4419 		free = uvm_map_uaddr_e(map, entry);
4420 		uvm_mapent_free_remove(map, free, entry);
4421 
4422 		if (prev != NULL && entry->start == entry->end) {
4423 			prev->fspace += VMMAP_FREE_END(entry) - entry->end;
4424 			uvm_mapent_addr_remove(map, entry);
4425 			DEAD_ENTRY_PUSH(dead, entry);
4426 		} else
4427 			prev = entry;
4428 	}
4429 }
4430 
4431 /*
4432  * Freelist update subfunction: refill the freelists with entries.
4433  */
4434 void
4435 uvm_map_freelist_update_refill(struct vm_map *map, int flags)
4436 {
4437 	struct vm_map_entry *entry;
4438 	vaddr_t min, max;
4439 
4440 	RB_FOREACH(entry, uvm_map_addr, &map->addr) {
4441 		min = VMMAP_FREE_START(entry);
4442 		max = VMMAP_FREE_END(entry);
4443 		entry->fspace = 0;
4444 
4445 		entry = uvm_map_fix_space(map, entry, min, max, flags);
4446 	}
4447 
4448 	uvm_tree_sanity(map, __FILE__, __LINE__);
4449 }
4450 
4451 /*
4452  * Change {a,b}_{start,end} allocation ranges and associated free lists.
4453  */
4454 void
4455 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead,
4456     vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags)
4457 {
4458 	KDASSERT(b_end >= b_start && s_end >= s_start);
4459 
4460 	/* Clear all free lists. */
4461 	uvm_map_freelist_update_clear(map, dead);
4462 
4463 	/* Apply new bounds. */
4464 	map->b_start = b_start;
4465 	map->b_end   = b_end;
4466 	map->s_start = s_start;
4467 	map->s_end   = s_end;
4468 
4469 	/* Refill free lists. */
4470 	uvm_map_freelist_update_refill(map, flags);
4471 }
4472 
4473 /*
4474  * Assign a uvm_addr_state to the specified pointer in vm_map.
4475  *
4476  * May sleep.
4477  */
4478 void
4479 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which,
4480     struct uvm_addr_state *newval)
4481 {
4482 	struct uvm_map_deadq dead;
4483 
4484 	/* Pointer which must be in this map. */
4485 	KASSERT(which != NULL);
4486 	KASSERT((void*)map <= (void*)(which) &&
4487 	    (void*)(which) < (void*)(map + 1));
4488 
4489 	vm_map_lock(map);
4490 	TAILQ_INIT(&dead);
4491 	uvm_map_freelist_update_clear(map, &dead);
4492 
4493 	uvm_addr_destroy(*which);
4494 	*which = newval;
4495 
4496 	uvm_map_freelist_update_refill(map, 0);
4497 	vm_map_unlock(map);
4498 	uvm_unmap_detach(&dead, 0);
4499 }
4500 
4501 /*
4502  * Correct space insert.
4503  *
4504  * Entry must not be on any freelist.
4505  */
4506 struct vm_map_entry*
4507 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry,
4508     vaddr_t min, vaddr_t max, int flags)
4509 {
4510 	struct uvm_addr_state	*free, *entfree;
4511 	vaddr_t			 lmax;
4512 
4513 	KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0);
4514 	KDASSERT(min <= max);
4515 	KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) ||
4516 	    min == map->min_offset);
4517 
4518 	/*
4519 	 * During the function, entfree will always point at the uaddr state
4520 	 * for entry.
4521 	 */
4522 	entfree = (entry == NULL ? NULL :
4523 	    uvm_map_uaddr_e(map, entry));
4524 
4525 	while (min != max) {
4526 		/* Claim guard page for entry. */
4527 		if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL &&
4528 		    VMMAP_FREE_END(entry) == entry->end &&
4529 		    entry->start != entry->end) {
4530 			if (max - min == 2 * PAGE_SIZE) {
4531 				/*
4532 				 * If the free-space gap is exactly 2 pages,
4533 				 * we make the guard 2 pages instead of 1.
4534 				 * Because in a guarded map, an area needs
4535 				 * at least 2 pages to allocate from:
4536 				 * one page for the allocation and one for
4537 				 * the guard.
4538 				 */
4539 				entry->guard = 2 * PAGE_SIZE;
4540 				min = max;
4541 			} else {
4542 				entry->guard = PAGE_SIZE;
4543 				min += PAGE_SIZE;
4544 			}
4545 			continue;
4546 		}
4547 
4548 		/*
4549 		 * Handle the case where entry has a 2-page guard, but the
4550 		 * space after entry is freed.
4551 		 */
4552 		if (entry != NULL && entry->fspace == 0 &&
4553 		    entry->guard > PAGE_SIZE) {
4554 			entry->guard = PAGE_SIZE;
4555 			min = VMMAP_FREE_START(entry);
4556 		}
4557 
4558 		lmax = uvm_map_boundary(map, min, max);
4559 		free = uvm_map_uaddr(map, min);
4560 
4561 		/*
4562 		 * Entries are merged if they point at the same uvm_free().
4563 		 * Exception to that rule: if min == uvm_maxkaddr, a new
4564 		 * entry is started regardless (otherwise the allocators
4565 		 * will get confused).
4566 		 */
4567 		if (entry != NULL && free == entfree &&
4568 		    !((map->flags & VM_MAP_ISVMSPACE) == 0 &&
4569 		    min == uvm_maxkaddr)) {
4570 			KDASSERT(VMMAP_FREE_END(entry) == min);
4571 			entry->fspace += lmax - min;
4572 		} else {
4573 			/*
4574 			 * Commit entry to free list: it'll not be added to
4575 			 * anymore.
4576 			 * We'll start a new entry and add to that entry
4577 			 * instead.
4578 			 */
4579 			if (entry != NULL)
4580 				uvm_mapent_free_insert(map, entfree, entry);
4581 
4582 			/* New entry for new uaddr. */
4583 			entry = uvm_mapent_alloc(map, flags);
4584 			KDASSERT(entry != NULL);
4585 			entry->end = entry->start = min;
4586 			entry->guard = 0;
4587 			entry->fspace = lmax - min;
4588 			entry->object.uvm_obj = NULL;
4589 			entry->offset = 0;
4590 			entry->etype = 0;
4591 			entry->protection = entry->max_protection = 0;
4592 			entry->inheritance = 0;
4593 			entry->wired_count = 0;
4594 			entry->advice = 0;
4595 			entry->aref.ar_pageoff = 0;
4596 			entry->aref.ar_amap = NULL;
4597 			uvm_mapent_addr_insert(map, entry);
4598 
4599 			entfree = free;
4600 		}
4601 
4602 		min = lmax;
4603 	}
4604 	/* Finally put entry on the uaddr state. */
4605 	if (entry != NULL)
4606 		uvm_mapent_free_insert(map, entfree, entry);
4607 
4608 	return entry;
4609 }
4610 
4611 /*
4612  * MQuery style of allocation.
4613  *
4614  * This allocator searches forward until sufficient space is found to map
4615  * the given size.
4616  *
4617  * XXX: factor in offset (via pmap_prefer) and protection?
4618  */
4619 int
4620 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset,
4621     int flags)
4622 {
4623 	struct vm_map_entry *entry, *last;
4624 	vaddr_t addr;
4625 	vaddr_t tmp, pmap_align, pmap_offset;
4626 	int error;
4627 
4628 	addr = *addr_p;
4629 	vm_map_lock_read(map);
4630 
4631 	/* Configure pmap prefer. */
4632 	if (offset != UVM_UNKNOWN_OFFSET) {
4633 		pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN());
4634 		pmap_offset = PMAP_PREFER_OFFSET(offset);
4635 	} else {
4636 		pmap_align = PAGE_SIZE;
4637 		pmap_offset = 0;
4638 	}
4639 
4640 	/* Align address to pmap_prefer unless FLAG_FIXED is set. */
4641 	if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) {
4642 	  	tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
4643 		if (tmp < addr)
4644 			tmp += pmap_align;
4645 		addr = tmp;
4646 	}
4647 
4648 	/* First, check if the requested range is fully available. */
4649 	entry = uvm_map_entrybyaddr(&map->addr, addr);
4650 	last = NULL;
4651 	if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
4652 		error = 0;
4653 		goto out;
4654 	}
4655 	if (flags & UVM_FLAG_FIXED) {
4656 		error = EINVAL;
4657 		goto out;
4658 	}
4659 
4660 	error = ENOMEM; /* Default error from here. */
4661 
4662 	/*
4663 	 * At this point, the memory at <addr, sz> is not available.
4664 	 * The reasons are:
4665 	 * [1] it's outside the map,
4666 	 * [2] it starts in used memory (and therefore needs to move
4667 	 *     toward the first free page in entry),
4668 	 * [3] it starts in free memory but bumps into used memory.
4669 	 *
4670 	 * Note that for case [2], the forward moving is handled by the
4671 	 * for loop below.
4672 	 */
4673 	if (entry == NULL) {
4674 		/* [1] Outside the map. */
4675 		if (addr >= map->max_offset)
4676 			goto out;
4677 		else
4678 			entry = RB_MIN(uvm_map_addr, &map->addr);
4679 	} else if (VMMAP_FREE_START(entry) <= addr) {
4680 		/* [3] Bumped into used memory. */
4681 		entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
4682 	}
4683 
4684 	/* Test if the next entry is sufficient for the allocation. */
4685 	for (; entry != NULL;
4686 	    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
4687 		if (entry->fspace == 0)
4688 			continue;
4689 		addr = VMMAP_FREE_START(entry);
4690 
4691 restart:	/* Restart address checks on address change. */
4692 		tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
4693 		if (tmp < addr)
4694 			tmp += pmap_align;
4695 		addr = tmp;
4696 		if (addr >= VMMAP_FREE_END(entry))
4697 			continue;
4698 
4699 		/* Skip brk() allocation addresses. */
4700 		if (addr + sz > map->b_start && addr < map->b_end) {
4701 			if (VMMAP_FREE_END(entry) > map->b_end) {
4702 				addr = map->b_end;
4703 				goto restart;
4704 			} else
4705 				continue;
4706 		}
4707 		/* Skip stack allocation addresses. */
4708 		if (addr + sz > map->s_start && addr < map->s_end) {
4709 			if (VMMAP_FREE_END(entry) > map->s_end) {
4710 				addr = map->s_end;
4711 				goto restart;
4712 			} else
4713 				continue;
4714 		}
4715 
4716 		last = NULL;
4717 		if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
4718 			error = 0;
4719 			goto out;
4720 		}
4721 	}
4722 
4723 out:
4724 	vm_map_unlock_read(map);
4725 	if (error == 0)
4726 		*addr_p = addr;
4727 	return error;
4728 }
4729 
4730 /*
4731  * Determine allocation bias.
4732  *
4733  * Returns 1 if we should bias to high addresses, -1 for a bias towards low
4734  * addresses, or 0 for no bias.
4735  * The bias mechanism is intended to avoid clashing with brk() and stack
4736  * areas.
4737  */
4738 int
4739 uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry)
4740 {
4741 	vaddr_t start, end;
4742 
4743 	start = VMMAP_FREE_START(entry);
4744 	end = VMMAP_FREE_END(entry);
4745 
4746 	/* Stay at the top of brk() area. */
4747 	if (end >= map->b_start && start < map->b_end)
4748 		return 1;
4749 	/* Stay at the far end of the stack area. */
4750 	if (end >= map->s_start && start < map->s_end) {
4751 #ifdef MACHINE_STACK_GROWS_UP
4752 		return 1;
4753 #else
4754 		return -1;
4755 #endif
4756 	}
4757 
4758 	/* No bias, this area is meant for us. */
4759 	return 0;
4760 }
4761 
4762 
4763 boolean_t
4764 vm_map_lock_try_ln(struct vm_map *map, char *file, int line)
4765 {
4766 	boolean_t rv;
4767 
4768 	if (map->flags & VM_MAP_INTRSAFE) {
4769 		rv = TRUE;
4770 	} else {
4771 		if (map->flags & VM_MAP_BUSY) {
4772 			return (FALSE);
4773 		}
4774 		rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0);
4775 	}
4776 
4777 	if (rv) {
4778 		map->timestamp++;
4779 		LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
4780 		uvm_tree_sanity(map, file, line);
4781 		uvm_tree_size_chk(map, file, line);
4782 	}
4783 
4784 	return (rv);
4785 }
4786 
4787 void
4788 vm_map_lock_ln(struct vm_map *map, char *file, int line)
4789 {
4790 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
4791 		do {
4792 			while (map->flags & VM_MAP_BUSY) {
4793 				map->flags |= VM_MAP_WANTLOCK;
4794 				tsleep(&map->flags, PVM, (char *)vmmapbsy, 0);
4795 			}
4796 		} while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0);
4797 	}
4798 
4799 	map->timestamp++;
4800 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
4801 	uvm_tree_sanity(map, file, line);
4802 	uvm_tree_size_chk(map, file, line);
4803 }
4804 
4805 void
4806 vm_map_lock_read_ln(struct vm_map *map, char *file, int line)
4807 {
4808 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
4809 		rw_enter_read(&map->lock);
4810 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
4811 	uvm_tree_sanity(map, file, line);
4812 	uvm_tree_size_chk(map, file, line);
4813 }
4814 
4815 void
4816 vm_map_unlock_ln(struct vm_map *map, char *file, int line)
4817 {
4818 	uvm_tree_sanity(map, file, line);
4819 	uvm_tree_size_chk(map, file, line);
4820 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
4821 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
4822 		rw_exit(&map->lock);
4823 }
4824 
4825 void
4826 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line)
4827 {
4828 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
4829 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
4830 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
4831 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
4832 		rw_exit_read(&map->lock);
4833 }
4834 
4835 void
4836 vm_map_downgrade_ln(struct vm_map *map, char *file, int line)
4837 {
4838 	uvm_tree_sanity(map, file, line);
4839 	uvm_tree_size_chk(map, file, line);
4840 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
4841 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
4842 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
4843 		rw_enter(&map->lock, RW_DOWNGRADE);
4844 }
4845 
4846 void
4847 vm_map_upgrade_ln(struct vm_map *map, char *file, int line)
4848 {
4849 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
4850 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
4851 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
4852 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
4853 		rw_exit_read(&map->lock);
4854 		rw_enter_write(&map->lock);
4855 	}
4856 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
4857 	uvm_tree_sanity(map, file, line);
4858 }
4859 
4860 void
4861 vm_map_busy_ln(struct vm_map *map, char *file, int line)
4862 {
4863 	map->flags |= VM_MAP_BUSY;
4864 }
4865 
4866 void
4867 vm_map_unbusy_ln(struct vm_map *map, char *file, int line)
4868 {
4869 	int oflags;
4870 
4871 	oflags = map->flags;
4872 	map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK);
4873 	if (oflags & VM_MAP_WANTLOCK)
4874 		wakeup(&map->flags);
4875 }
4876 
4877 
4878 #undef RB_AUGMENT
4879 #define RB_AUGMENT(x)	uvm_map_addr_augment((x))
4880 RB_GENERATE(uvm_map_addr, vm_map_entry, daddrs.addr_entry,
4881     uvm_mapentry_addrcmp);
4882 #undef RB_AUGMENT
4883 
4884 
4885 /*
4886  * MD code: vmspace allocator setup.
4887  */
4888 
4889 #ifdef __i386__
4890 void
4891 uvm_map_setup_md(struct vm_map *map)
4892 {
4893 	vaddr_t		min, max;
4894 
4895 	min = map->min_offset;
4896 	max = map->max_offset;
4897 
4898 	/*
4899 	 * Ensure the selectors will not try to manage page 0;
4900 	 * it's too special.
4901 	 */
4902 	if (min < VMMAP_MIN_ADDR)
4903 		min = VMMAP_MIN_ADDR;
4904 
4905 #if 0	/* Cool stuff, not yet */
4906 	/* Hinted allocations. */
4907 	map->uaddr_any[1] = uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), max,
4908 	    1024 * 1024 * 1024);
4909 
4910 	/* Executable code is special. */
4911 	map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR);
4912 	/* Place normal allocations beyond executable mappings. */
4913 	map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max);
4914 #else	/* Crappy stuff, for now */
4915 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
4916 #endif
4917 
4918 #ifndef SMALL_KERNEL
4919 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
4920 #endif /* !SMALL_KERNEL */
4921 }
4922 #elif __LP64__
4923 void
4924 uvm_map_setup_md(struct vm_map *map)
4925 {
4926 	vaddr_t		min, max;
4927 
4928 	min = map->min_offset;
4929 	max = map->max_offset;
4930 
4931 	/*
4932 	 * Ensure the selectors will not try to manage page 0;
4933 	 * it's too special.
4934 	 */
4935 	if (min < VMMAP_MIN_ADDR)
4936 		min = VMMAP_MIN_ADDR;
4937 
4938 #if 0	/* Cool stuff, not yet */
4939 	/* Hinted allocations above 4GB */
4940 	map->uaddr_any[0] =
4941 	    uaddr_hint_create(0x100000000ULL, max, 1024 * 1024 * 1024);
4942 	/* Hinted allocations below 4GB */
4943 	map->uaddr_any[1] =
4944 	    uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), 0x100000000ULL,
4945 	    1024 * 1024 * 1024);
4946 	/* Normal allocations, always above 4GB */
4947 	map->uaddr_any[3] =
4948 	    uaddr_pivot_create(MAX(min, 0x100000000ULL), max);
4949 #else	/* Crappy stuff, for now */
4950 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
4951 #endif
4952 
4953 #ifndef SMALL_KERNEL
4954 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
4955 #endif /* !SMALL_KERNEL */
4956 }
4957 #else	/* non-i386, 32 bit */
4958 void
4959 uvm_map_setup_md(struct vm_map *map)
4960 {
4961 	vaddr_t		min, max;
4962 
4963 	min = map->min_offset;
4964 	max = map->max_offset;
4965 
4966 	/*
4967 	 * Ensure the selectors will not try to manage page 0;
4968 	 * it's too special.
4969 	 */
4970 	if (min < VMMAP_MIN_ADDR)
4971 		min = VMMAP_MIN_ADDR;
4972 
4973 #if 0	/* Cool stuff, not yet */
4974 	/* Hinted allocations. */
4975 	map->uaddr_any[1] = uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), max,
4976 	    1024 * 1024 * 1024);
4977 	/* Normal allocations. */
4978 	map->uaddr_any[3] = uaddr_pivot_create(min, max);
4979 #else	/* Crappy stuff, for now */
4980 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
4981 #endif
4982 
4983 #ifndef SMALL_KERNEL
4984 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
4985 #endif /* !SMALL_KERNEL */
4986 }
4987 #endif
4988