xref: /openbsd-src/sys/uvm/uvm_map.c (revision 4e1ee0786f11cc571bd0be17d38e46f635c719fc)
1 /*	$OpenBSD: uvm_map.c,v 1.278 2021/10/05 15:37:21 mpi Exp $	*/
2 /*	$NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $	*/
3 
4 /*
5  * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  *
19  *
20  * Copyright (c) 1997 Charles D. Cranor and Washington University.
21  * Copyright (c) 1991, 1993, The Regents of the University of California.
22  *
23  * All rights reserved.
24  *
25  * This code is derived from software contributed to Berkeley by
26  * The Mach Operating System project at Carnegie-Mellon University.
27  *
28  * Redistribution and use in source and binary forms, with or without
29  * modification, are permitted provided that the following conditions
30  * are met:
31  * 1. Redistributions of source code must retain the above copyright
32  *    notice, this list of conditions and the following disclaimer.
33  * 2. Redistributions in binary form must reproduce the above copyright
34  *    notice, this list of conditions and the following disclaimer in the
35  *    documentation and/or other materials provided with the distribution.
36  * 3. Neither the name of the University nor the names of its contributors
37  *    may be used to endorse or promote products derived from this software
38  *    without specific prior written permission.
39  *
40  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  *	@(#)vm_map.c    8.3 (Berkeley) 1/12/94
53  * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
54  *
55  *
56  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
57  * All rights reserved.
58  *
59  * Permission to use, copy, modify and distribute this software and
60  * its documentation is hereby granted, provided that both the copyright
61  * notice and this permission notice appear in all copies of the
62  * software, derivative works or modified versions, and any portions
63  * thereof, and that both notices appear in supporting documentation.
64  *
65  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
66  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
67  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
68  *
69  * Carnegie Mellon requests users of this software to return to
70  *
71  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
72  *  School of Computer Science
73  *  Carnegie Mellon University
74  *  Pittsburgh PA 15213-3890
75  *
76  * any improvements or extensions that they make and grant Carnegie the
77  * rights to redistribute these changes.
78  */
79 
80 /*
81  * uvm_map.c: uvm map operations
82  */
83 
84 /* #define DEBUG */
85 /* #define VMMAP_DEBUG */
86 
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/acct.h>
90 #include <sys/mman.h>
91 #include <sys/proc.h>
92 #include <sys/malloc.h>
93 #include <sys/pool.h>
94 #include <sys/sysctl.h>
95 #include <sys/signalvar.h>
96 #include <sys/syslog.h>
97 #include <sys/user.h>
98 #include <sys/tracepoint.h>
99 
100 #ifdef SYSVSHM
101 #include <sys/shm.h>
102 #endif
103 
104 #include <uvm/uvm.h>
105 
106 #ifdef DDB
107 #include <uvm/uvm_ddb.h>
108 #endif
109 
110 #include <uvm/uvm_addr.h>
111 
112 
113 vsize_t			 uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t);
114 int			 uvm_mapent_isjoinable(struct vm_map*,
115 			    struct vm_map_entry*, struct vm_map_entry*);
116 struct vm_map_entry	*uvm_mapent_merge(struct vm_map*, struct vm_map_entry*,
117 			    struct vm_map_entry*, struct uvm_map_deadq*);
118 struct vm_map_entry	*uvm_mapent_tryjoin(struct vm_map*,
119 			    struct vm_map_entry*, struct uvm_map_deadq*);
120 struct vm_map_entry	*uvm_map_mkentry(struct vm_map*, struct vm_map_entry*,
121 			    struct vm_map_entry*, vaddr_t, vsize_t, int,
122 			    struct uvm_map_deadq*, struct vm_map_entry*);
123 struct vm_map_entry	*uvm_mapent_alloc(struct vm_map*, int);
124 void			 uvm_mapent_free(struct vm_map_entry*);
125 void			 uvm_unmap_kill_entry(struct vm_map*,
126 			    struct vm_map_entry*);
127 void			 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *);
128 void			 uvm_mapent_mkfree(struct vm_map*,
129 			    struct vm_map_entry*, struct vm_map_entry**,
130 			    struct uvm_map_deadq*, boolean_t);
131 void			 uvm_map_pageable_pgon(struct vm_map*,
132 			    struct vm_map_entry*, struct vm_map_entry*,
133 			    vaddr_t, vaddr_t);
134 int			 uvm_map_pageable_wire(struct vm_map*,
135 			    struct vm_map_entry*, struct vm_map_entry*,
136 			    vaddr_t, vaddr_t, int);
137 void			 uvm_map_setup_entries(struct vm_map*);
138 void			 uvm_map_setup_md(struct vm_map*);
139 void			 uvm_map_teardown(struct vm_map*);
140 void			 uvm_map_vmspace_update(struct vm_map*,
141 			    struct uvm_map_deadq*, int);
142 void			 uvm_map_kmem_grow(struct vm_map*,
143 			    struct uvm_map_deadq*, vsize_t, int);
144 void			 uvm_map_freelist_update_clear(struct vm_map*,
145 			    struct uvm_map_deadq*);
146 void			 uvm_map_freelist_update_refill(struct vm_map *, int);
147 void			 uvm_map_freelist_update(struct vm_map*,
148 			    struct uvm_map_deadq*, vaddr_t, vaddr_t,
149 			    vaddr_t, vaddr_t, int);
150 struct vm_map_entry	*uvm_map_fix_space(struct vm_map*, struct vm_map_entry*,
151 			    vaddr_t, vaddr_t, int);
152 int			 uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int,
153 			    struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t,
154 			    int);
155 int			 uvm_map_findspace(struct vm_map*,
156 			    struct vm_map_entry**, struct vm_map_entry**,
157 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
158 			    vaddr_t);
159 vsize_t			 uvm_map_addr_augment_get(struct vm_map_entry*);
160 void			 uvm_map_addr_augment(struct vm_map_entry*);
161 
162 int			 uvm_map_inentry_recheck(u_long, vaddr_t,
163 			     struct p_inentry *);
164 boolean_t		 uvm_map_inentry_fix(struct proc *, struct p_inentry *,
165 			     vaddr_t, int (*)(vm_map_entry_t), u_long);
166 /*
167  * Tree management functions.
168  */
169 
170 static inline void	 uvm_mapent_copy(struct vm_map_entry*,
171 			    struct vm_map_entry*);
172 static inline int	 uvm_mapentry_addrcmp(const struct vm_map_entry*,
173 			    const struct vm_map_entry*);
174 void			 uvm_mapent_free_insert(struct vm_map*,
175 			    struct uvm_addr_state*, struct vm_map_entry*);
176 void			 uvm_mapent_free_remove(struct vm_map*,
177 			    struct uvm_addr_state*, struct vm_map_entry*);
178 void			 uvm_mapent_addr_insert(struct vm_map*,
179 			    struct vm_map_entry*);
180 void			 uvm_mapent_addr_remove(struct vm_map*,
181 			    struct vm_map_entry*);
182 void			 uvm_map_splitentry(struct vm_map*,
183 			    struct vm_map_entry*, struct vm_map_entry*,
184 			    vaddr_t);
185 vsize_t			 uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t);
186 
187 /*
188  * uvm_vmspace_fork helper functions.
189  */
190 struct vm_map_entry	*uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
191 			    vsize_t, vm_prot_t, vm_prot_t,
192 			    struct vm_map_entry*, struct uvm_map_deadq*, int,
193 			    int);
194 struct vm_map_entry	*uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t,
195 			    vsize_t, vm_prot_t, vm_prot_t, struct vm_map*,
196 			    struct vm_map_entry*, struct uvm_map_deadq*);
197 struct vm_map_entry	*uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
198 			    struct vm_map*, struct vm_map_entry*,
199 			    struct uvm_map_deadq*);
200 struct vm_map_entry	*uvm_mapent_forkcopy(struct vmspace*, struct vm_map*,
201 			    struct vm_map*, struct vm_map_entry*,
202 			    struct uvm_map_deadq*);
203 struct vm_map_entry	*uvm_mapent_forkzero(struct vmspace*, struct vm_map*,
204 			    struct vm_map*, struct vm_map_entry*,
205 			    struct uvm_map_deadq*);
206 
207 /*
208  * Tree validation.
209  */
210 #ifdef VMMAP_DEBUG
211 void			 uvm_tree_assert(struct vm_map*, int, char*,
212 			    char*, int);
213 #define UVM_ASSERT(map, cond, file, line)				\
214 	uvm_tree_assert((map), (cond), #cond, (file), (line))
215 void			 uvm_tree_sanity(struct vm_map*, char*, int);
216 void			 uvm_tree_size_chk(struct vm_map*, char*, int);
217 void			 vmspace_validate(struct vm_map*);
218 #else
219 #define uvm_tree_sanity(_map, _file, _line)		do {} while (0)
220 #define uvm_tree_size_chk(_map, _file, _line)		do {} while (0)
221 #define vmspace_validate(_map)				do {} while (0)
222 #endif
223 
224 /*
225  * All architectures will have pmap_prefer.
226  */
227 #ifndef PMAP_PREFER
228 #define PMAP_PREFER_ALIGN()	(vaddr_t)PAGE_SIZE
229 #define PMAP_PREFER_OFFSET(off)	0
230 #define PMAP_PREFER(addr, off)	(addr)
231 #endif
232 
233 /*
234  * The kernel map will initially be VM_MAP_KSIZE_INIT bytes.
235  * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes.
236  *
237  * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size
238  * each time.
239  */
240 #define VM_MAP_KSIZE_INIT	(512 * (vaddr_t)PAGE_SIZE)
241 #define VM_MAP_KSIZE_DELTA	(256 * (vaddr_t)PAGE_SIZE)
242 #define VM_MAP_KSIZE_ALLOCMUL	4
243 /*
244  * When selecting a random free-space block, look at most FSPACE_DELTA blocks
245  * ahead.
246  */
247 #define FSPACE_DELTA		8
248 /*
249  * Put allocations adjecent to previous allocations when the free-space tree
250  * is larger than FSPACE_COMPACT entries.
251  *
252  * Alignment and PMAP_PREFER may still cause the entry to not be fully
253  * adjecent. Note that this strategy reduces memory fragmentation (by leaving
254  * a large space before or after the allocation).
255  */
256 #define FSPACE_COMPACT		128
257 /*
258  * Make the address selection skip at most this many bytes from the start of
259  * the free space in which the allocation takes place.
260  *
261  * The main idea behind a randomized address space is that an attacker cannot
262  * know where to target his attack. Therefore, the location of objects must be
263  * as random as possible. However, the goal is not to create the most sparse
264  * map that is possible.
265  * FSPACE_MAXOFF pushes the considered range in bytes down to less insane
266  * sizes, thereby reducing the sparseness. The biggest randomization comes
267  * from fragmentation, i.e. FSPACE_COMPACT.
268  */
269 #define FSPACE_MAXOFF		((vaddr_t)32 * 1024 * 1024)
270 /*
271  * Allow for small gaps in the overflow areas.
272  * Gap size is in bytes and does not have to be a multiple of page-size.
273  */
274 #define FSPACE_BIASGAP		((vaddr_t)32 * 1024)
275 
276 /* auto-allocate address lower bound */
277 #define VMMAP_MIN_ADDR		PAGE_SIZE
278 
279 
280 #ifdef DEADBEEF0
281 #define UVMMAP_DEADBEEF		((unsigned long)DEADBEEF0)
282 #else
283 #define UVMMAP_DEADBEEF		((unsigned long)0xdeadd0d0)
284 #endif
285 
286 #ifdef DEBUG
287 int uvm_map_printlocks = 0;
288 
289 #define LPRINTF(_args)							\
290 	do {								\
291 		if (uvm_map_printlocks)					\
292 			printf _args;					\
293 	} while (0)
294 #else
295 #define LPRINTF(_args)	do {} while (0)
296 #endif
297 
298 static struct mutex uvm_kmapent_mtx;
299 static struct timeval uvm_kmapent_last_warn_time;
300 static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
301 
302 const char vmmapbsy[] = "vmmapbsy";
303 
304 /*
305  * pool for vmspace structures.
306  */
307 struct pool uvm_vmspace_pool;
308 
309 /*
310  * pool for dynamically-allocated map entries.
311  */
312 struct pool uvm_map_entry_pool;
313 struct pool uvm_map_entry_kmem_pool;
314 
315 /*
316  * This global represents the end of the kernel virtual address
317  * space. If we want to exceed this, we must grow the kernel
318  * virtual address space dynamically.
319  *
320  * Note, this variable is locked by kernel_map's lock.
321  */
322 vaddr_t uvm_maxkaddr;
323 
324 /*
325  * Locking predicate.
326  */
327 #define UVM_MAP_REQ_WRITE(_map)						\
328 	do {								\
329 		if ((_map)->ref_count > 0) {				\
330 			if (((_map)->flags & VM_MAP_INTRSAFE) == 0)	\
331 				rw_assert_wrlock(&(_map)->lock);	\
332 			else						\
333 				MUTEX_ASSERT_LOCKED(&(_map)->mtx);	\
334 		}							\
335 	} while (0)
336 
337 #define	vm_map_modflags(map, set, clear)				\
338 	do {								\
339 		mtx_enter(&(map)->flags_lock);				\
340 		(map)->flags = ((map)->flags | (set)) & ~(clear);	\
341 		mtx_leave(&(map)->flags_lock);				\
342 	} while (0)
343 
344 
345 /*
346  * Tree describing entries by address.
347  *
348  * Addresses are unique.
349  * Entries with start == end may only exist if they are the first entry
350  * (sorted by address) within a free-memory tree.
351  */
352 
353 static inline int
354 uvm_mapentry_addrcmp(const struct vm_map_entry *e1,
355     const struct vm_map_entry *e2)
356 {
357 	return e1->start < e2->start ? -1 : e1->start > e2->start;
358 }
359 
360 /*
361  * Copy mapentry.
362  */
363 static inline void
364 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
365 {
366 	caddr_t csrc, cdst;
367 	size_t sz;
368 
369 	csrc = (caddr_t)src;
370 	cdst = (caddr_t)dst;
371 	csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
372 	cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
373 
374 	sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) -
375 	    offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
376 	memcpy(cdst, csrc, sz);
377 }
378 
379 /*
380  * Handle free-list insertion.
381  */
382 void
383 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr,
384     struct vm_map_entry *entry)
385 {
386 	const struct uvm_addr_functions *fun;
387 #ifdef VMMAP_DEBUG
388 	vaddr_t min, max, bound;
389 #endif
390 
391 #ifdef VMMAP_DEBUG
392 	/*
393 	 * Boundary check.
394 	 * Boundaries are folded if they go on the same free list.
395 	 */
396 	min = VMMAP_FREE_START(entry);
397 	max = VMMAP_FREE_END(entry);
398 
399 	while (min < max) {
400 		bound = uvm_map_boundary(map, min, max);
401 		KASSERT(uvm_map_uaddr(map, min) == uaddr);
402 		min = bound;
403 	}
404 #endif
405 	KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0);
406 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0);
407 
408 	UVM_MAP_REQ_WRITE(map);
409 
410 	/* Actual insert: forward to uaddr pointer. */
411 	if (uaddr != NULL) {
412 		fun = uaddr->uaddr_functions;
413 		KDASSERT(fun != NULL);
414 		if (fun->uaddr_free_insert != NULL)
415 			(*fun->uaddr_free_insert)(map, uaddr, entry);
416 		entry->etype |= UVM_ET_FREEMAPPED;
417 	}
418 
419 	/* Update fspace augmentation. */
420 	uvm_map_addr_augment(entry);
421 }
422 
423 /*
424  * Handle free-list removal.
425  */
426 void
427 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr,
428     struct vm_map_entry *entry)
429 {
430 	const struct uvm_addr_functions *fun;
431 
432 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL);
433 	KASSERT(uvm_map_uaddr_e(map, entry) == uaddr);
434 	UVM_MAP_REQ_WRITE(map);
435 
436 	if (uaddr != NULL) {
437 		fun = uaddr->uaddr_functions;
438 		if (fun->uaddr_free_remove != NULL)
439 			(*fun->uaddr_free_remove)(map, uaddr, entry);
440 		entry->etype &= ~UVM_ET_FREEMAPPED;
441 	}
442 }
443 
444 /*
445  * Handle address tree insertion.
446  */
447 void
448 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry)
449 {
450 	struct vm_map_entry *res;
451 
452 	if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF))
453 		panic("uvm_mapent_addr_insert: entry still in addr list");
454 	KDASSERT(entry->start <= entry->end);
455 	KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&
456 	    (entry->end & (vaddr_t)PAGE_MASK) == 0);
457 
458 	TRACEPOINT(uvm, map_insert,
459 	    entry->start, entry->end, entry->protection, NULL);
460 
461 	UVM_MAP_REQ_WRITE(map);
462 	res = RBT_INSERT(uvm_map_addr, &map->addr, entry);
463 	if (res != NULL) {
464 		panic("uvm_mapent_addr_insert: map %p entry %p "
465 		    "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision "
466 		    "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)",
467 		    map, entry,
468 		    entry->start, entry->end, entry->guard, entry->fspace,
469 		    res, res->start, res->end, res->guard, res->fspace);
470 	}
471 }
472 
473 /*
474  * Handle address tree removal.
475  */
476 void
477 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)
478 {
479 	struct vm_map_entry *res;
480 
481 	TRACEPOINT(uvm, map_remove,
482 	    entry->start, entry->end, entry->protection, NULL);
483 
484 	UVM_MAP_REQ_WRITE(map);
485 	res = RBT_REMOVE(uvm_map_addr, &map->addr, entry);
486 	if (res != entry)
487 		panic("uvm_mapent_addr_remove");
488 	RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF);
489 }
490 
491 /*
492  * uvm_map_reference: add reference to a map
493  *
494  * => map need not be locked
495  */
496 void
497 uvm_map_reference(struct vm_map *map)
498 {
499 	atomic_inc_int(&map->ref_count);
500 }
501 
502 /*
503  * Calculate the dused delta.
504  */
505 vsize_t
506 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max)
507 {
508 	struct vmspace *vm;
509 	vsize_t sz;
510 	vaddr_t lmax;
511 	vaddr_t stack_begin, stack_end; /* Position of stack. */
512 
513 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
514 	vm = (struct vmspace *)map;
515 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
516 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
517 
518 	sz = 0;
519 	while (min != max) {
520 		lmax = max;
521 		if (min < stack_begin && lmax > stack_begin)
522 			lmax = stack_begin;
523 		else if (min < stack_end && lmax > stack_end)
524 			lmax = stack_end;
525 
526 		if (min >= stack_begin && min < stack_end) {
527 			/* nothing */
528 		} else
529 			sz += lmax - min;
530 		min = lmax;
531 	}
532 
533 	return sz >> PAGE_SHIFT;
534 }
535 
536 /*
537  * Find the entry describing the given address.
538  */
539 struct vm_map_entry*
540 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr)
541 {
542 	struct vm_map_entry *iter;
543 
544 	iter = RBT_ROOT(uvm_map_addr, atree);
545 	while (iter != NULL) {
546 		if (iter->start > addr)
547 			iter = RBT_LEFT(uvm_map_addr, iter);
548 		else if (VMMAP_FREE_END(iter) <= addr)
549 			iter = RBT_RIGHT(uvm_map_addr, iter);
550 		else
551 			return iter;
552 	}
553 	return NULL;
554 }
555 
556 /*
557  * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry)
558  *
559  * Push dead entries into a linked list.
560  * Since the linked list abuses the address tree for storage, the entry
561  * may not be linked in a map.
562  *
563  * *head must be initialized to NULL before the first call to this macro.
564  * uvm_unmap_detach(*head, 0) will remove dead entries.
565  */
566 static inline void
567 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry)
568 {
569 	TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq);
570 }
571 #define DEAD_ENTRY_PUSH(_headptr, _entry)				\
572 	dead_entry_push((_headptr), (_entry))
573 
574 /*
575  * Helper function for uvm_map_findspace_tree.
576  *
577  * Given allocation constraints and pmap constraints, finds the
578  * lowest and highest address in a range that can be used for the
579  * allocation.
580  *
581  * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs.
582  *
583  *
584  * Big chunk of math with a seasoning of dragons.
585  */
586 int
587 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg,
588     struct vm_map_entry *sel, vaddr_t align,
589     vaddr_t pmap_align, vaddr_t pmap_off, int bias)
590 {
591 	vaddr_t sel_min, sel_max;
592 #ifdef PMAP_PREFER
593 	vaddr_t pmap_min, pmap_max;
594 #endif /* PMAP_PREFER */
595 #ifdef DIAGNOSTIC
596 	int bad;
597 #endif /* DIAGNOSTIC */
598 
599 	sel_min = VMMAP_FREE_START(sel);
600 	sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0);
601 
602 #ifdef PMAP_PREFER
603 
604 	/*
605 	 * There are two special cases, in which we can satisfy the align
606 	 * requirement and the pmap_prefer requirement.
607 	 * - when pmap_off == 0, we always select the largest of the two
608 	 * - when pmap_off % align == 0 and pmap_align > align, we simply
609 	 *   satisfy the pmap_align requirement and automatically
610 	 *   satisfy the align requirement.
611 	 */
612 	if (align > PAGE_SIZE &&
613 	    !(pmap_align > align && (pmap_off & (align - 1)) == 0)) {
614 		/*
615 		 * Simple case: only use align.
616 		 */
617 		sel_min = roundup(sel_min, align);
618 		sel_max &= ~(align - 1);
619 
620 		if (sel_min > sel_max)
621 			return ENOMEM;
622 
623 		/* Correct for bias. */
624 		if (sel_max - sel_min > FSPACE_BIASGAP) {
625 			if (bias > 0) {
626 				sel_min = sel_max - FSPACE_BIASGAP;
627 				sel_min = roundup(sel_min, align);
628 			} else if (bias < 0) {
629 				sel_max = sel_min + FSPACE_BIASGAP;
630 				sel_max &= ~(align - 1);
631 			}
632 		}
633 	} else if (pmap_align != 0) {
634 		/*
635 		 * Special case: satisfy both pmap_prefer and
636 		 * align argument.
637 		 */
638 		pmap_max = sel_max & ~(pmap_align - 1);
639 		pmap_min = sel_min;
640 		if (pmap_max < sel_min)
641 			return ENOMEM;
642 
643 		/* Adjust pmap_min for BIASGAP for top-addr bias. */
644 		if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP)
645 			pmap_min = pmap_max - FSPACE_BIASGAP;
646 		/* Align pmap_min. */
647 		pmap_min &= ~(pmap_align - 1);
648 		if (pmap_min < sel_min)
649 			pmap_min += pmap_align;
650 		if (pmap_min > pmap_max)
651 			return ENOMEM;
652 
653 		/* Adjust pmap_max for BIASGAP for bottom-addr bias. */
654 		if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) {
655 			pmap_max = (pmap_min + FSPACE_BIASGAP) &
656 			    ~(pmap_align - 1);
657 		}
658 		if (pmap_min > pmap_max)
659 			return ENOMEM;
660 
661 		/* Apply pmap prefer offset. */
662 		pmap_max |= pmap_off;
663 		if (pmap_max > sel_max)
664 			pmap_max -= pmap_align;
665 		pmap_min |= pmap_off;
666 		if (pmap_min < sel_min)
667 			pmap_min += pmap_align;
668 
669 		/*
670 		 * Fixup: it's possible that pmap_min and pmap_max
671 		 * cross each other. In this case, try to find one
672 		 * address that is allowed.
673 		 * (This usually happens in biased case.)
674 		 */
675 		if (pmap_min > pmap_max) {
676 			if (pmap_min < sel_max)
677 				pmap_max = pmap_min;
678 			else if (pmap_max > sel_min)
679 				pmap_min = pmap_max;
680 			else
681 				return ENOMEM;
682 		}
683 
684 		/* Internal validation. */
685 		KDASSERT(pmap_min <= pmap_max);
686 
687 		sel_min = pmap_min;
688 		sel_max = pmap_max;
689 	} else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
690 		sel_min = sel_max - FSPACE_BIASGAP;
691 	else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
692 		sel_max = sel_min + FSPACE_BIASGAP;
693 
694 #else
695 
696 	if (align > PAGE_SIZE) {
697 		sel_min = roundup(sel_min, align);
698 		sel_max &= ~(align - 1);
699 		if (sel_min > sel_max)
700 			return ENOMEM;
701 
702 		if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) {
703 			if (bias > 0) {
704 				sel_min = roundup(sel_max - FSPACE_BIASGAP,
705 				    align);
706 			} else {
707 				sel_max = (sel_min + FSPACE_BIASGAP) &
708 				    ~(align - 1);
709 			}
710 		}
711 	} else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
712 		sel_min = sel_max - FSPACE_BIASGAP;
713 	else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
714 		sel_max = sel_min + FSPACE_BIASGAP;
715 
716 #endif
717 
718 	if (sel_min > sel_max)
719 		return ENOMEM;
720 
721 #ifdef DIAGNOSTIC
722 	bad = 0;
723 	/* Lower boundary check. */
724 	if (sel_min < VMMAP_FREE_START(sel)) {
725 		printf("sel_min: 0x%lx, but should be at least 0x%lx\n",
726 		    sel_min, VMMAP_FREE_START(sel));
727 		bad++;
728 	}
729 	/* Upper boundary check. */
730 	if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) {
731 		printf("sel_max: 0x%lx, but should be at most 0x%lx\n",
732 		    sel_max,
733 		    VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0));
734 		bad++;
735 	}
736 	/* Lower boundary alignment. */
737 	if (align != 0 && (sel_min & (align - 1)) != 0) {
738 		printf("sel_min: 0x%lx, not aligned to 0x%lx\n",
739 		    sel_min, align);
740 		bad++;
741 	}
742 	/* Upper boundary alignment. */
743 	if (align != 0 && (sel_max & (align - 1)) != 0) {
744 		printf("sel_max: 0x%lx, not aligned to 0x%lx\n",
745 		    sel_max, align);
746 		bad++;
747 	}
748 	/* Lower boundary PMAP_PREFER check. */
749 	if (pmap_align != 0 && align == 0 &&
750 	    (sel_min & (pmap_align - 1)) != pmap_off) {
751 		printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
752 		    sel_min, sel_min & (pmap_align - 1), pmap_off);
753 		bad++;
754 	}
755 	/* Upper boundary PMAP_PREFER check. */
756 	if (pmap_align != 0 && align == 0 &&
757 	    (sel_max & (pmap_align - 1)) != pmap_off) {
758 		printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
759 		    sel_max, sel_max & (pmap_align - 1), pmap_off);
760 		bad++;
761 	}
762 
763 	if (bad) {
764 		panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, "
765 		    "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, "
766 		    "bias = %d, "
767 		    "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)",
768 		    sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off,
769 		    bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel));
770 	}
771 #endif /* DIAGNOSTIC */
772 
773 	*min = sel_min;
774 	*max = sel_max;
775 	return 0;
776 }
777 
778 /*
779  * Test if memory starting at addr with sz bytes is free.
780  *
781  * Fills in *start_ptr and *end_ptr to be the first and last entry describing
782  * the space.
783  * If called with prefilled *start_ptr and *end_ptr, they are to be correct.
784  */
785 int
786 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr,
787     struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr,
788     vaddr_t addr, vsize_t sz)
789 {
790 	struct uvm_addr_state *free;
791 	struct uvm_map_addr *atree;
792 	struct vm_map_entry *i, *i_end;
793 
794 	if (addr + sz < addr)
795 		return 0;
796 
797 	/*
798 	 * Kernel memory above uvm_maxkaddr is considered unavailable.
799 	 */
800 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
801 		if (addr + sz > uvm_maxkaddr)
802 			return 0;
803 	}
804 
805 	atree = &map->addr;
806 
807 	/*
808 	 * Fill in first, last, so they point at the entries containing the
809 	 * first and last address of the range.
810 	 * Note that if they are not NULL, we don't perform the lookup.
811 	 */
812 	KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL);
813 	if (*start_ptr == NULL) {
814 		*start_ptr = uvm_map_entrybyaddr(atree, addr);
815 		if (*start_ptr == NULL)
816 			return 0;
817 	} else
818 		KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr));
819 	if (*end_ptr == NULL) {
820 		if (VMMAP_FREE_END(*start_ptr) >= addr + sz)
821 			*end_ptr = *start_ptr;
822 		else {
823 			*end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1);
824 			if (*end_ptr == NULL)
825 				return 0;
826 		}
827 	} else
828 		KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1));
829 
830 	/* Validation. */
831 	KDASSERT(*start_ptr != NULL && *end_ptr != NULL);
832 	KDASSERT((*start_ptr)->start <= addr &&
833 	    VMMAP_FREE_END(*start_ptr) > addr &&
834 	    (*end_ptr)->start < addr + sz &&
835 	    VMMAP_FREE_END(*end_ptr) >= addr + sz);
836 
837 	/*
838 	 * Check the none of the entries intersects with <addr, addr+sz>.
839 	 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is
840 	 * considered unavailable unless called by those allocators.
841 	 */
842 	i = *start_ptr;
843 	i_end = RBT_NEXT(uvm_map_addr, *end_ptr);
844 	for (; i != i_end;
845 	    i = RBT_NEXT(uvm_map_addr, i)) {
846 		if (i->start != i->end && i->end > addr)
847 			return 0;
848 
849 		/*
850 		 * uaddr_exe and uaddr_brk_stack may only be used
851 		 * by these allocators and the NULL uaddr (i.e. no
852 		 * uaddr).
853 		 * Reject if this requirement is not met.
854 		 */
855 		if (uaddr != NULL) {
856 			free = uvm_map_uaddr_e(map, i);
857 
858 			if (uaddr != free && free != NULL &&
859 			    (free == map->uaddr_exe ||
860 			     free == map->uaddr_brk_stack))
861 				return 0;
862 		}
863 	}
864 
865 	return -1;
866 }
867 
868 /*
869  * Invoke each address selector until an address is found.
870  * Will not invoke uaddr_exe.
871  */
872 int
873 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first,
874     struct vm_map_entry**last, vaddr_t *addr, vsize_t sz,
875     vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint)
876 {
877 	struct uvm_addr_state *uaddr;
878 	int i;
879 
880 	/*
881 	 * Allocation for sz bytes at any address,
882 	 * using the addr selectors in order.
883 	 */
884 	for (i = 0; i < nitems(map->uaddr_any); i++) {
885 		uaddr = map->uaddr_any[i];
886 
887 		if (uvm_addr_invoke(map, uaddr, first, last,
888 		    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
889 			return 0;
890 	}
891 
892 	/* Fall back to brk() and stack() address selectors. */
893 	uaddr = map->uaddr_brk_stack;
894 	if (uvm_addr_invoke(map, uaddr, first, last,
895 	    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
896 		return 0;
897 
898 	return ENOMEM;
899 }
900 
901 /* Calculate entry augmentation value. */
902 vsize_t
903 uvm_map_addr_augment_get(struct vm_map_entry *entry)
904 {
905 	vsize_t			 augment;
906 	struct vm_map_entry	*left, *right;
907 
908 	augment = entry->fspace;
909 	if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL)
910 		augment = MAX(augment, left->fspace_augment);
911 	if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
912 		augment = MAX(augment, right->fspace_augment);
913 	return augment;
914 }
915 
916 /*
917  * Update augmentation data in entry.
918  */
919 void
920 uvm_map_addr_augment(struct vm_map_entry *entry)
921 {
922 	vsize_t			 augment;
923 
924 	while (entry != NULL) {
925 		/* Calculate value for augmentation. */
926 		augment = uvm_map_addr_augment_get(entry);
927 
928 		/*
929 		 * Descend update.
930 		 * Once we find an entry that already has the correct value,
931 		 * stop, since it means all its parents will use the correct
932 		 * value too.
933 		 */
934 		if (entry->fspace_augment == augment)
935 			return;
936 		entry->fspace_augment = augment;
937 		entry = RBT_PARENT(uvm_map_addr, entry);
938 	}
939 }
940 
941 /*
942  * uvm_mapanon: establish a valid mapping in map for an anon
943  *
944  * => *addr and sz must be a multiple of PAGE_SIZE.
945  * => *addr is ignored, except if flags contains UVM_FLAG_FIXED.
946  * => map must be unlocked.
947  *
948  * => align: align vaddr, must be a power-of-2.
949  *    Align is only a hint and will be ignored if the alignment fails.
950  */
951 int
952 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz,
953     vsize_t align, unsigned int flags)
954 {
955 	struct vm_map_entry	*first, *last, *entry, *new;
956 	struct uvm_map_deadq	 dead;
957 	vm_prot_t		 prot;
958 	vm_prot_t		 maxprot;
959 	vm_inherit_t		 inherit;
960 	int			 advice;
961 	int			 error;
962 	vaddr_t			 pmap_align, pmap_offset;
963 	vaddr_t			 hint;
964 
965 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE);
966 	KASSERT(map != kernel_map);
967 	KASSERT((map->flags & UVM_FLAG_HOLE) == 0);
968 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
969 	splassert(IPL_NONE);
970 	KASSERT((flags & UVM_FLAG_TRYLOCK) == 0);
971 
972 	/*
973 	 * We use pmap_align and pmap_offset as alignment and offset variables.
974 	 *
975 	 * Because the align parameter takes precedence over pmap prefer,
976 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
977 	 * if pmap_prefer will not align.
978 	 */
979 	pmap_align = MAX(align, PAGE_SIZE);
980 	pmap_offset = 0;
981 
982 	/* Decode parameters. */
983 	prot = UVM_PROTECTION(flags);
984 	maxprot = UVM_MAXPROTECTION(flags);
985 	advice = UVM_ADVICE(flags);
986 	inherit = UVM_INHERIT(flags);
987 	error = 0;
988 	hint = trunc_page(*addr);
989 	TAILQ_INIT(&dead);
990 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
991 	KASSERT((align & (align - 1)) == 0);
992 
993 	/* Check protection. */
994 	if ((prot & maxprot) != prot)
995 		return EACCES;
996 
997 	/*
998 	 * Before grabbing the lock, allocate a map entry for later
999 	 * use to ensure we don't wait for memory while holding the
1000 	 * vm_map_lock.
1001 	 */
1002 	new = uvm_mapent_alloc(map, flags);
1003 	if (new == NULL)
1004 		return ENOMEM;
1005 
1006 	vm_map_lock(map);
1007 	first = last = NULL;
1008 	if (flags & UVM_FLAG_FIXED) {
1009 		/*
1010 		 * Fixed location.
1011 		 *
1012 		 * Note: we ignore align, pmap_prefer.
1013 		 * Fill in first, last and *addr.
1014 		 */
1015 		KASSERT((*addr & PAGE_MASK) == 0);
1016 
1017 		/* Check that the space is available. */
1018 		if (flags & UVM_FLAG_UNMAP) {
1019 			if ((flags & UVM_FLAG_STACK) &&
1020 			    !uvm_map_is_stack_remappable(map, *addr, sz)) {
1021 				error = EINVAL;
1022 				goto unlock;
1023 			}
1024 			uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE);
1025 		}
1026 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1027 			error = ENOMEM;
1028 			goto unlock;
1029 		}
1030 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1031 	    (align == 0 || (*addr & (align - 1)) == 0) &&
1032 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1033 		/*
1034 		 * Address used as hint.
1035 		 *
1036 		 * Note: we enforce the alignment restriction,
1037 		 * but ignore pmap_prefer.
1038 		 */
1039 	} else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1040 		/* Run selection algorithm for executables. */
1041 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1042 		    addr, sz, pmap_align, pmap_offset, prot, hint);
1043 
1044 		if (error != 0)
1045 			goto unlock;
1046 	} else {
1047 		/* Update freelists from vmspace. */
1048 		uvm_map_vmspace_update(map, &dead, flags);
1049 
1050 		error = uvm_map_findspace(map, &first, &last, addr, sz,
1051 		    pmap_align, pmap_offset, prot, hint);
1052 
1053 		if (error != 0)
1054 			goto unlock;
1055 	}
1056 
1057 	/* Double-check if selected address doesn't cause overflow. */
1058 	if (*addr + sz < *addr) {
1059 		error = ENOMEM;
1060 		goto unlock;
1061 	}
1062 
1063 	/* If we only want a query, return now. */
1064 	if (flags & UVM_FLAG_QUERY) {
1065 		error = 0;
1066 		goto unlock;
1067 	}
1068 
1069 	/*
1070 	 * Create new entry.
1071 	 * first and last may be invalidated after this call.
1072 	 */
1073 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1074 	    new);
1075 	if (entry == NULL) {
1076 		error = ENOMEM;
1077 		goto unlock;
1078 	}
1079 	new = NULL;
1080 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1081 	entry->object.uvm_obj = NULL;
1082 	entry->offset = 0;
1083 	entry->protection = prot;
1084 	entry->max_protection = maxprot;
1085 	entry->inheritance = inherit;
1086 	entry->wired_count = 0;
1087 	entry->advice = advice;
1088 	if (prot & PROT_WRITE)
1089 		map->wserial++;
1090 	if (flags & UVM_FLAG_SYSCALL) {
1091 		entry->etype |= UVM_ET_SYSCALL;
1092 		map->wserial++;
1093 	}
1094 	if (flags & UVM_FLAG_STACK) {
1095 		entry->etype |= UVM_ET_STACK;
1096 		if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP))
1097 			map->sserial++;
1098 	}
1099 	if (flags & UVM_FLAG_COPYONW) {
1100 		entry->etype |= UVM_ET_COPYONWRITE;
1101 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1102 			entry->etype |= UVM_ET_NEEDSCOPY;
1103 	}
1104 	if (flags & UVM_FLAG_CONCEAL)
1105 		entry->etype |= UVM_ET_CONCEAL;
1106 	if (flags & UVM_FLAG_OVERLAY) {
1107 		entry->aref.ar_pageoff = 0;
1108 		entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1109 	}
1110 
1111 	/* Update map and process statistics. */
1112 	map->size += sz;
1113 	if (prot != PROT_NONE) {
1114 		((struct vmspace *)map)->vm_dused +=
1115 		    uvmspace_dused(map, *addr, *addr + sz);
1116 	}
1117 
1118 unlock:
1119 	vm_map_unlock(map);
1120 
1121 	/*
1122 	 * Remove dead entries.
1123 	 *
1124 	 * Dead entries may be the result of merging.
1125 	 * uvm_map_mkentry may also create dead entries, when it attempts to
1126 	 * destroy free-space entries.
1127 	 */
1128 	uvm_unmap_detach(&dead, 0);
1129 
1130 	if (new)
1131 		uvm_mapent_free(new);
1132 	return error;
1133 }
1134 
1135 /*
1136  * uvm_map: establish a valid mapping in map
1137  *
1138  * => *addr and sz must be a multiple of PAGE_SIZE.
1139  * => map must be unlocked.
1140  * => <uobj,uoffset> value meanings (4 cases):
1141  *	[1] <NULL,uoffset>		== uoffset is a hint for PMAP_PREFER
1142  *	[2] <NULL,UVM_UNKNOWN_OFFSET>	== don't PMAP_PREFER
1143  *	[3] <uobj,uoffset>		== normal mapping
1144  *	[4] <uobj,UVM_UNKNOWN_OFFSET>	== uvm_map finds offset based on VA
1145  *
1146  *   case [4] is for kernel mappings where we don't know the offset until
1147  *   we've found a virtual address.   note that kernel object offsets are
1148  *   always relative to vm_map_min(kernel_map).
1149  *
1150  * => align: align vaddr, must be a power-of-2.
1151  *    Align is only a hint and will be ignored if the alignment fails.
1152  */
1153 int
1154 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,
1155     struct uvm_object *uobj, voff_t uoffset,
1156     vsize_t align, unsigned int flags)
1157 {
1158 	struct vm_map_entry	*first, *last, *entry, *new;
1159 	struct uvm_map_deadq	 dead;
1160 	vm_prot_t		 prot;
1161 	vm_prot_t		 maxprot;
1162 	vm_inherit_t		 inherit;
1163 	int			 advice;
1164 	int			 error;
1165 	vaddr_t			 pmap_align, pmap_offset;
1166 	vaddr_t			 hint;
1167 
1168 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
1169 		splassert(IPL_NONE);
1170 	else
1171 		splassert(IPL_VM);
1172 
1173 	/*
1174 	 * We use pmap_align and pmap_offset as alignment and offset variables.
1175 	 *
1176 	 * Because the align parameter takes precedence over pmap prefer,
1177 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
1178 	 * if pmap_prefer will not align.
1179 	 */
1180 	if (uoffset == UVM_UNKNOWN_OFFSET) {
1181 		pmap_align = MAX(align, PAGE_SIZE);
1182 		pmap_offset = 0;
1183 	} else {
1184 		pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE);
1185 		pmap_offset = PMAP_PREFER_OFFSET(uoffset);
1186 
1187 		if (align == 0 ||
1188 		    (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) {
1189 			/* pmap_offset satisfies align, no change. */
1190 		} else {
1191 			/* Align takes precedence over pmap prefer. */
1192 			pmap_align = align;
1193 			pmap_offset = 0;
1194 		}
1195 	}
1196 
1197 	/* Decode parameters. */
1198 	prot = UVM_PROTECTION(flags);
1199 	maxprot = UVM_MAXPROTECTION(flags);
1200 	advice = UVM_ADVICE(flags);
1201 	inherit = UVM_INHERIT(flags);
1202 	error = 0;
1203 	hint = trunc_page(*addr);
1204 	TAILQ_INIT(&dead);
1205 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
1206 	KASSERT((align & (align - 1)) == 0);
1207 
1208 	/* Holes are incompatible with other types of mappings. */
1209 	if (flags & UVM_FLAG_HOLE) {
1210 		KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) &&
1211 		    (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
1212 	}
1213 
1214 	/* Unset hint for kernel_map non-fixed allocations. */
1215 	if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED))
1216 		hint = 0;
1217 
1218 	/* Check protection. */
1219 	if ((prot & maxprot) != prot)
1220 		return EACCES;
1221 
1222 	if (map == kernel_map &&
1223 	    (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
1224 		panic("uvm_map: kernel map W^X violation requested");
1225 
1226 	/*
1227 	 * Before grabbing the lock, allocate a map entry for later
1228 	 * use to ensure we don't wait for memory while holding the
1229 	 * vm_map_lock.
1230 	 */
1231 	new = uvm_mapent_alloc(map, flags);
1232 	if (new == NULL)
1233 		return ENOMEM;
1234 
1235 	if (flags & UVM_FLAG_TRYLOCK) {
1236 		if (vm_map_lock_try(map) == FALSE) {
1237 			error = EFAULT;
1238 			goto out;
1239 		}
1240 	} else {
1241 		vm_map_lock(map);
1242 	}
1243 
1244 	first = last = NULL;
1245 	if (flags & UVM_FLAG_FIXED) {
1246 		/*
1247 		 * Fixed location.
1248 		 *
1249 		 * Note: we ignore align, pmap_prefer.
1250 		 * Fill in first, last and *addr.
1251 		 */
1252 		KASSERT((*addr & PAGE_MASK) == 0);
1253 
1254 		/*
1255 		 * Grow pmap to include allocated address.
1256 		 * If the growth fails, the allocation will fail too.
1257 		 */
1258 		if ((map->flags & VM_MAP_ISVMSPACE) == 0 &&
1259 		    uvm_maxkaddr < (*addr + sz)) {
1260 			uvm_map_kmem_grow(map, &dead,
1261 			    *addr + sz - uvm_maxkaddr, flags);
1262 		}
1263 
1264 		/* Check that the space is available. */
1265 		if (flags & UVM_FLAG_UNMAP)
1266 			uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE);
1267 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1268 			error = ENOMEM;
1269 			goto unlock;
1270 		}
1271 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1272 	    (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE &&
1273 	    (align == 0 || (*addr & (align - 1)) == 0) &&
1274 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1275 		/*
1276 		 * Address used as hint.
1277 		 *
1278 		 * Note: we enforce the alignment restriction,
1279 		 * but ignore pmap_prefer.
1280 		 */
1281 	} else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1282 		/* Run selection algorithm for executables. */
1283 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1284 		    addr, sz, pmap_align, pmap_offset, prot, hint);
1285 
1286 		/* Grow kernel memory and try again. */
1287 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1288 			uvm_map_kmem_grow(map, &dead, sz, flags);
1289 
1290 			error = uvm_addr_invoke(map, map->uaddr_exe,
1291 			    &first, &last, addr, sz,
1292 			    pmap_align, pmap_offset, prot, hint);
1293 		}
1294 
1295 		if (error != 0)
1296 			goto unlock;
1297 	} else {
1298 		/* Update freelists from vmspace. */
1299 		if (map->flags & VM_MAP_ISVMSPACE)
1300 			uvm_map_vmspace_update(map, &dead, flags);
1301 
1302 		error = uvm_map_findspace(map, &first, &last, addr, sz,
1303 		    pmap_align, pmap_offset, prot, hint);
1304 
1305 		/* Grow kernel memory and try again. */
1306 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1307 			uvm_map_kmem_grow(map, &dead, sz, flags);
1308 
1309 			error = uvm_map_findspace(map, &first, &last, addr, sz,
1310 			    pmap_align, pmap_offset, prot, hint);
1311 		}
1312 
1313 		if (error != 0)
1314 			goto unlock;
1315 	}
1316 
1317 	/* Double-check if selected address doesn't cause overflow. */
1318 	if (*addr + sz < *addr) {
1319 		error = ENOMEM;
1320 		goto unlock;
1321 	}
1322 
1323 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE ||
1324 	    uvm_maxkaddr >= *addr + sz);
1325 
1326 	/* If we only want a query, return now. */
1327 	if (flags & UVM_FLAG_QUERY) {
1328 		error = 0;
1329 		goto unlock;
1330 	}
1331 
1332 	if (uobj == NULL)
1333 		uoffset = 0;
1334 	else if (uoffset == UVM_UNKNOWN_OFFSET) {
1335 		KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
1336 		uoffset = *addr - vm_map_min(kernel_map);
1337 	}
1338 
1339 	/*
1340 	 * Create new entry.
1341 	 * first and last may be invalidated after this call.
1342 	 */
1343 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1344 	    new);
1345 	if (entry == NULL) {
1346 		error = ENOMEM;
1347 		goto unlock;
1348 	}
1349 	new = NULL;
1350 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1351 	entry->object.uvm_obj = uobj;
1352 	entry->offset = uoffset;
1353 	entry->protection = prot;
1354 	entry->max_protection = maxprot;
1355 	entry->inheritance = inherit;
1356 	entry->wired_count = 0;
1357 	entry->advice = advice;
1358 	if (prot & PROT_WRITE)
1359 		map->wserial++;
1360 	if (flags & UVM_FLAG_SYSCALL) {
1361 		entry->etype |= UVM_ET_SYSCALL;
1362 		map->wserial++;
1363 	}
1364 	if (flags & UVM_FLAG_STACK) {
1365 		entry->etype |= UVM_ET_STACK;
1366 		if (flags & UVM_FLAG_UNMAP)
1367 			map->sserial++;
1368 	}
1369 	if (uobj)
1370 		entry->etype |= UVM_ET_OBJ;
1371 	else if (flags & UVM_FLAG_HOLE)
1372 		entry->etype |= UVM_ET_HOLE;
1373 	if (flags & UVM_FLAG_NOFAULT)
1374 		entry->etype |= UVM_ET_NOFAULT;
1375 	if (flags & UVM_FLAG_WC)
1376 		entry->etype |= UVM_ET_WC;
1377 	if (flags & UVM_FLAG_COPYONW) {
1378 		entry->etype |= UVM_ET_COPYONWRITE;
1379 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1380 			entry->etype |= UVM_ET_NEEDSCOPY;
1381 	}
1382 	if (flags & UVM_FLAG_CONCEAL)
1383 		entry->etype |= UVM_ET_CONCEAL;
1384 	if (flags & UVM_FLAG_OVERLAY) {
1385 		entry->aref.ar_pageoff = 0;
1386 		entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1387 	}
1388 
1389 	/* Update map and process statistics. */
1390 	if (!(flags & UVM_FLAG_HOLE)) {
1391 		map->size += sz;
1392 		if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL &&
1393 		    prot != PROT_NONE) {
1394 			((struct vmspace *)map)->vm_dused +=
1395 			    uvmspace_dused(map, *addr, *addr + sz);
1396 		}
1397 	}
1398 
1399 	/*
1400 	 * Try to merge entry.
1401 	 *
1402 	 * Userland allocations are kept separated most of the time.
1403 	 * Forego the effort of merging what most of the time can't be merged
1404 	 * and only try the merge if it concerns a kernel entry.
1405 	 */
1406 	if ((flags & UVM_FLAG_NOMERGE) == 0 &&
1407 	    (map->flags & VM_MAP_ISVMSPACE) == 0)
1408 		uvm_mapent_tryjoin(map, entry, &dead);
1409 
1410 unlock:
1411 	vm_map_unlock(map);
1412 
1413 	/*
1414 	 * Remove dead entries.
1415 	 *
1416 	 * Dead entries may be the result of merging.
1417 	 * uvm_map_mkentry may also create dead entries, when it attempts to
1418 	 * destroy free-space entries.
1419 	 */
1420 	if (map->flags & VM_MAP_INTRSAFE)
1421 		uvm_unmap_detach_intrsafe(&dead);
1422 	else
1423 		uvm_unmap_detach(&dead, 0);
1424 out:
1425 	if (new)
1426 		uvm_mapent_free(new);
1427 	return error;
1428 }
1429 
1430 /*
1431  * True iff e1 and e2 can be joined together.
1432  */
1433 int
1434 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1,
1435     struct vm_map_entry *e2)
1436 {
1437 	KDASSERT(e1 != NULL && e2 != NULL);
1438 
1439 	/* Must be the same entry type and not have free memory between. */
1440 	if (e1->etype != e2->etype || e1->end != e2->start)
1441 		return 0;
1442 
1443 	/* Submaps are never joined. */
1444 	if (UVM_ET_ISSUBMAP(e1))
1445 		return 0;
1446 
1447 	/* Never merge wired memory. */
1448 	if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2))
1449 		return 0;
1450 
1451 	/* Protection, inheritance and advice must be equal. */
1452 	if (e1->protection != e2->protection ||
1453 	    e1->max_protection != e2->max_protection ||
1454 	    e1->inheritance != e2->inheritance ||
1455 	    e1->advice != e2->advice)
1456 		return 0;
1457 
1458 	/* If uvm_object: object itself and offsets within object must match. */
1459 	if (UVM_ET_ISOBJ(e1)) {
1460 		if (e1->object.uvm_obj != e2->object.uvm_obj)
1461 			return 0;
1462 		if (e1->offset + (e1->end - e1->start) != e2->offset)
1463 			return 0;
1464 	}
1465 
1466 	/*
1467 	 * Cannot join shared amaps.
1468 	 * Note: no need to lock amap to look at refs, since we don't care
1469 	 * about its exact value.
1470 	 * If it is 1 (i.e. we have the only reference) it will stay there.
1471 	 */
1472 	if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1)
1473 		return 0;
1474 	if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1)
1475 		return 0;
1476 
1477 	/* Apparently, e1 and e2 match. */
1478 	return 1;
1479 }
1480 
1481 /*
1482  * Join support function.
1483  *
1484  * Returns the merged entry on success.
1485  * Returns NULL if the merge failed.
1486  */
1487 struct vm_map_entry*
1488 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1,
1489     struct vm_map_entry *e2, struct uvm_map_deadq *dead)
1490 {
1491 	struct uvm_addr_state *free;
1492 
1493 	/*
1494 	 * Merging is not supported for map entries that
1495 	 * contain an amap in e1. This should never happen
1496 	 * anyway, because only kernel entries are merged.
1497 	 * These do not contain amaps.
1498 	 * e2 contains no real information in its amap,
1499 	 * so it can be erased immediately.
1500 	 */
1501 	KASSERT(e1->aref.ar_amap == NULL);
1502 
1503 	/*
1504 	 * Don't drop obj reference:
1505 	 * uvm_unmap_detach will do this for us.
1506 	 */
1507 	free = uvm_map_uaddr_e(map, e1);
1508 	uvm_mapent_free_remove(map, free, e1);
1509 
1510 	free = uvm_map_uaddr_e(map, e2);
1511 	uvm_mapent_free_remove(map, free, e2);
1512 	uvm_mapent_addr_remove(map, e2);
1513 	e1->end = e2->end;
1514 	e1->guard = e2->guard;
1515 	e1->fspace = e2->fspace;
1516 	uvm_mapent_free_insert(map, free, e1);
1517 
1518 	DEAD_ENTRY_PUSH(dead, e2);
1519 	return e1;
1520 }
1521 
1522 /*
1523  * Attempt forward and backward joining of entry.
1524  *
1525  * Returns entry after joins.
1526  * We are guaranteed that the amap of entry is either non-existent or
1527  * has never been used.
1528  */
1529 struct vm_map_entry*
1530 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry,
1531     struct uvm_map_deadq *dead)
1532 {
1533 	struct vm_map_entry *other;
1534 	struct vm_map_entry *merged;
1535 
1536 	/* Merge with previous entry. */
1537 	other = RBT_PREV(uvm_map_addr, entry);
1538 	if (other && uvm_mapent_isjoinable(map, other, entry)) {
1539 		merged = uvm_mapent_merge(map, other, entry, dead);
1540 		if (merged)
1541 			entry = merged;
1542 	}
1543 
1544 	/*
1545 	 * Merge with next entry.
1546 	 *
1547 	 * Because amap can only extend forward and the next entry
1548 	 * probably contains sensible info, only perform forward merging
1549 	 * in the absence of an amap.
1550 	 */
1551 	other = RBT_NEXT(uvm_map_addr, entry);
1552 	if (other && entry->aref.ar_amap == NULL &&
1553 	    other->aref.ar_amap == NULL &&
1554 	    uvm_mapent_isjoinable(map, entry, other)) {
1555 		merged = uvm_mapent_merge(map, entry, other, dead);
1556 		if (merged)
1557 			entry = merged;
1558 	}
1559 
1560 	return entry;
1561 }
1562 
1563 /*
1564  * Kill entries that are no longer in a map.
1565  */
1566 void
1567 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags)
1568 {
1569 	struct vm_map_entry *entry, *tmp;
1570 	int waitok = flags & UVM_PLA_WAITOK;
1571 
1572 	TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) {
1573 		/* Drop reference to amap, if we've got one. */
1574 		if (entry->aref.ar_amap)
1575 			amap_unref(entry->aref.ar_amap,
1576 			    entry->aref.ar_pageoff,
1577 			    atop(entry->end - entry->start),
1578 			    flags & AMAP_REFALL);
1579 
1580 		/* Skip entries for which we have to grab the kernel lock. */
1581 		if (UVM_ET_ISSUBMAP(entry) || UVM_ET_ISOBJ(entry))
1582 			continue;
1583 
1584 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1585 		uvm_mapent_free(entry);
1586 	}
1587 
1588 	if (TAILQ_EMPTY(deadq))
1589 		return;
1590 
1591 	KERNEL_LOCK();
1592 	while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1593 		if (waitok)
1594 			uvm_pause();
1595 		/* Drop reference to our backing object, if we've got one. */
1596 		if (UVM_ET_ISSUBMAP(entry)) {
1597 			/* ... unlikely to happen, but play it safe */
1598 			uvm_map_deallocate(entry->object.sub_map);
1599 		} else if (UVM_ET_ISOBJ(entry) &&
1600 		    entry->object.uvm_obj->pgops->pgo_detach) {
1601 			entry->object.uvm_obj->pgops->pgo_detach(
1602 			    entry->object.uvm_obj);
1603 		}
1604 
1605 		/* Step to next. */
1606 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1607 		uvm_mapent_free(entry);
1608 	}
1609 	KERNEL_UNLOCK();
1610 }
1611 
1612 void
1613 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq)
1614 {
1615 	struct vm_map_entry *entry;
1616 
1617 	while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1618 		KASSERT(entry->aref.ar_amap == NULL);
1619 		KASSERT(!UVM_ET_ISSUBMAP(entry));
1620 		KASSERT(!UVM_ET_ISOBJ(entry));
1621 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1622 		uvm_mapent_free(entry);
1623 	}
1624 }
1625 
1626 /*
1627  * Create and insert new entry.
1628  *
1629  * Returned entry contains new addresses and is inserted properly in the tree.
1630  * first and last are (probably) no longer valid.
1631  */
1632 struct vm_map_entry*
1633 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first,
1634     struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags,
1635     struct uvm_map_deadq *dead, struct vm_map_entry *new)
1636 {
1637 	struct vm_map_entry *entry, *prev;
1638 	struct uvm_addr_state *free;
1639 	vaddr_t min, max;	/* free space boundaries for new entry */
1640 
1641 	KDASSERT(map != NULL);
1642 	KDASSERT(first != NULL);
1643 	KDASSERT(last != NULL);
1644 	KDASSERT(dead != NULL);
1645 	KDASSERT(sz > 0);
1646 	KDASSERT(addr + sz > addr);
1647 	KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr);
1648 	KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz);
1649 	KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz));
1650 	uvm_tree_sanity(map, __FILE__, __LINE__);
1651 
1652 	min = addr + sz;
1653 	max = VMMAP_FREE_END(last);
1654 
1655 	/* Initialize new entry. */
1656 	if (new == NULL)
1657 		entry = uvm_mapent_alloc(map, flags);
1658 	else
1659 		entry = new;
1660 	if (entry == NULL)
1661 		return NULL;
1662 	entry->offset = 0;
1663 	entry->etype = 0;
1664 	entry->wired_count = 0;
1665 	entry->aref.ar_pageoff = 0;
1666 	entry->aref.ar_amap = NULL;
1667 
1668 	entry->start = addr;
1669 	entry->end = min;
1670 	entry->guard = 0;
1671 	entry->fspace = 0;
1672 
1673 	/* Reset free space in first. */
1674 	free = uvm_map_uaddr_e(map, first);
1675 	uvm_mapent_free_remove(map, free, first);
1676 	first->guard = 0;
1677 	first->fspace = 0;
1678 
1679 	/*
1680 	 * Remove all entries that are fully replaced.
1681 	 * We are iterating using last in reverse order.
1682 	 */
1683 	for (; first != last; last = prev) {
1684 		prev = RBT_PREV(uvm_map_addr, last);
1685 
1686 		KDASSERT(last->start == last->end);
1687 		free = uvm_map_uaddr_e(map, last);
1688 		uvm_mapent_free_remove(map, free, last);
1689 		uvm_mapent_addr_remove(map, last);
1690 		DEAD_ENTRY_PUSH(dead, last);
1691 	}
1692 	/* Remove first if it is entirely inside <addr, addr+sz>.  */
1693 	if (first->start == addr) {
1694 		uvm_mapent_addr_remove(map, first);
1695 		DEAD_ENTRY_PUSH(dead, first);
1696 	} else {
1697 		uvm_map_fix_space(map, first, VMMAP_FREE_START(first),
1698 		    addr, flags);
1699 	}
1700 
1701 	/* Finally, link in entry. */
1702 	uvm_mapent_addr_insert(map, entry);
1703 	uvm_map_fix_space(map, entry, min, max, flags);
1704 
1705 	uvm_tree_sanity(map, __FILE__, __LINE__);
1706 	return entry;
1707 }
1708 
1709 
1710 /*
1711  * uvm_mapent_alloc: allocate a map entry
1712  */
1713 struct vm_map_entry *
1714 uvm_mapent_alloc(struct vm_map *map, int flags)
1715 {
1716 	struct vm_map_entry *me, *ne;
1717 	int pool_flags;
1718 	int i;
1719 
1720 	pool_flags = PR_WAITOK;
1721 	if (flags & UVM_FLAG_TRYLOCK)
1722 		pool_flags = PR_NOWAIT;
1723 
1724 	if (map->flags & VM_MAP_INTRSAFE || cold) {
1725 		mtx_enter(&uvm_kmapent_mtx);
1726 		if (SLIST_EMPTY(&uvm.kentry_free)) {
1727 			ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
1728 			    &kd_nowait);
1729 			if (ne == NULL)
1730 				panic("uvm_mapent_alloc: cannot allocate map "
1731 				    "entry");
1732 			for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) {
1733 				SLIST_INSERT_HEAD(&uvm.kentry_free,
1734 				    &ne[i], daddrs.addr_kentry);
1735 			}
1736 			if (ratecheck(&uvm_kmapent_last_warn_time,
1737 			    &uvm_kmapent_warn_rate))
1738 				printf("uvm_mapent_alloc: out of static "
1739 				    "map entries\n");
1740 		}
1741 		me = SLIST_FIRST(&uvm.kentry_free);
1742 		SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry);
1743 		uvmexp.kmapent++;
1744 		mtx_leave(&uvm_kmapent_mtx);
1745 		me->flags = UVM_MAP_STATIC;
1746 	} else if (map == kernel_map) {
1747 		splassert(IPL_NONE);
1748 		me = pool_get(&uvm_map_entry_kmem_pool, pool_flags);
1749 		if (me == NULL)
1750 			goto out;
1751 		me->flags = UVM_MAP_KMEM;
1752 	} else {
1753 		splassert(IPL_NONE);
1754 		me = pool_get(&uvm_map_entry_pool, pool_flags);
1755 		if (me == NULL)
1756 			goto out;
1757 		me->flags = 0;
1758 	}
1759 
1760 	RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF);
1761 out:
1762 	return me;
1763 }
1764 
1765 /*
1766  * uvm_mapent_free: free map entry
1767  *
1768  * => XXX: static pool for kernel map?
1769  */
1770 void
1771 uvm_mapent_free(struct vm_map_entry *me)
1772 {
1773 	if (me->flags & UVM_MAP_STATIC) {
1774 		mtx_enter(&uvm_kmapent_mtx);
1775 		SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry);
1776 		uvmexp.kmapent--;
1777 		mtx_leave(&uvm_kmapent_mtx);
1778 	} else if (me->flags & UVM_MAP_KMEM) {
1779 		splassert(IPL_NONE);
1780 		pool_put(&uvm_map_entry_kmem_pool, me);
1781 	} else {
1782 		splassert(IPL_NONE);
1783 		pool_put(&uvm_map_entry_pool, me);
1784 	}
1785 }
1786 
1787 /*
1788  * uvm_map_lookup_entry: find map entry at or before an address.
1789  *
1790  * => map must at least be read-locked by caller
1791  * => entry is returned in "entry"
1792  * => return value is true if address is in the returned entry
1793  * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is
1794  * returned for those mappings.
1795  */
1796 boolean_t
1797 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
1798     struct vm_map_entry **entry)
1799 {
1800 	*entry = uvm_map_entrybyaddr(&map->addr, address);
1801 	return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&
1802 	    (*entry)->start <= address && (*entry)->end > address;
1803 }
1804 
1805 /*
1806  * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet
1807  * grown -- then uvm_map_check_region_range() should not cache the entry
1808  * because growth won't be seen.
1809  */
1810 int
1811 uvm_map_inentry_sp(vm_map_entry_t entry)
1812 {
1813 	if ((entry->etype & UVM_ET_STACK) == 0) {
1814 		if (entry->protection == PROT_NONE)
1815 			return (-1);	/* don't update range */
1816 		return (0);
1817 	}
1818 	return (1);
1819 }
1820 
1821 /*
1822  * The system call must not come from a writeable entry, W^X is violated.
1823  * (Would be nice if we can spot aliasing, which is also kind of bad)
1824  *
1825  * The system call must come from an syscall-labeled entry (which are
1826  * the text regions of the main program, sigtramp, ld.so, or libc).
1827  */
1828 int
1829 uvm_map_inentry_pc(vm_map_entry_t entry)
1830 {
1831 	if (entry->protection & PROT_WRITE)
1832 		return (0);	/* not permitted */
1833 	if ((entry->etype & UVM_ET_SYSCALL) == 0)
1834 		return (0);	/* not permitted */
1835 	return (1);
1836 }
1837 
1838 int
1839 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie)
1840 {
1841 	return (serial != ie->ie_serial || ie->ie_start == 0 ||
1842 	    addr < ie->ie_start || addr >= ie->ie_end);
1843 }
1844 
1845 /*
1846  * Inside a vm_map find the reg address and verify it via function.
1847  * Remember low and high addresses of region if valid and return TRUE,
1848  * else return FALSE.
1849  */
1850 boolean_t
1851 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr,
1852     int (*fn)(vm_map_entry_t), u_long serial)
1853 {
1854 	vm_map_t map = &p->p_vmspace->vm_map;
1855 	vm_map_entry_t entry;
1856 	int ret;
1857 
1858 	if (addr < map->min_offset || addr >= map->max_offset)
1859 		return (FALSE);
1860 
1861 	/* lock map */
1862 	vm_map_lock_read(map);
1863 
1864 	/* lookup */
1865 	if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) {
1866 		vm_map_unlock_read(map);
1867 		return (FALSE);
1868 	}
1869 
1870 	ret = (*fn)(entry);
1871 	if (ret == 0) {
1872 		vm_map_unlock_read(map);
1873 		return (FALSE);
1874 	} else if (ret == 1) {
1875 		ie->ie_start = entry->start;
1876 		ie->ie_end = entry->end;
1877 		ie->ie_serial = serial;
1878 	} else {
1879 		/* do not update, re-check later */
1880 	}
1881 	vm_map_unlock_read(map);
1882 	return (TRUE);
1883 }
1884 
1885 boolean_t
1886 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr,
1887     const char *fmt, int (*fn)(vm_map_entry_t), u_long serial)
1888 {
1889 	union sigval sv;
1890 	boolean_t ok = TRUE;
1891 
1892 	if (uvm_map_inentry_recheck(serial, addr, ie)) {
1893 		ok = uvm_map_inentry_fix(p, ie, addr, fn, serial);
1894 		if (!ok) {
1895 			KERNEL_LOCK();
1896 			printf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid,
1897 			    addr, ie->ie_start, ie->ie_end);
1898 			p->p_p->ps_acflag |= AMAP;
1899 			sv.sival_ptr = (void *)PROC_PC(p);
1900 			trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv);
1901 			KERNEL_UNLOCK();
1902 		}
1903 	}
1904 	return (ok);
1905 }
1906 
1907 /*
1908  * Check whether the given address range can be converted to a MAP_STACK
1909  * mapping.
1910  *
1911  * Must be called with map locked.
1912  */
1913 boolean_t
1914 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz)
1915 {
1916 	vaddr_t end = addr + sz;
1917 	struct vm_map_entry *first, *iter, *prev = NULL;
1918 
1919 	if (!uvm_map_lookup_entry(map, addr, &first)) {
1920 		printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n",
1921 		    addr, end, map);
1922 		return FALSE;
1923 	}
1924 
1925 	/*
1926 	 * Check that the address range exists and is contiguous.
1927 	 */
1928 	for (iter = first; iter != NULL && iter->start < end;
1929 	    prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) {
1930 		/*
1931 		 * Make sure that we do not have holes in the range.
1932 		 */
1933 #if 0
1934 		if (prev != NULL) {
1935 			printf("prev->start 0x%lx, prev->end 0x%lx, "
1936 			    "iter->start 0x%lx, iter->end 0x%lx\n",
1937 			    prev->start, prev->end, iter->start, iter->end);
1938 		}
1939 #endif
1940 
1941 		if (prev != NULL && prev->end != iter->start) {
1942 			printf("map stack 0x%lx-0x%lx of map %p failed: "
1943 			    "hole in range\n", addr, end, map);
1944 			return FALSE;
1945 		}
1946 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) {
1947 			printf("map stack 0x%lx-0x%lx of map %p failed: "
1948 			    "hole in range\n", addr, end, map);
1949 			return FALSE;
1950 		}
1951 	}
1952 
1953 	return TRUE;
1954 }
1955 
1956 /*
1957  * Remap the middle-pages of an existing mapping as a stack range.
1958  * If there exists a previous contiguous mapping with the given range
1959  * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the
1960  * mapping is dropped, and a new anon mapping is created and marked as
1961  * a stack.
1962  *
1963  * Must be called with map unlocked.
1964  */
1965 int
1966 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz)
1967 {
1968 	vm_map_t map = &p->p_vmspace->vm_map;
1969 	vaddr_t start, end;
1970 	int error;
1971 	int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
1972 	    PROT_READ | PROT_WRITE | PROT_EXEC,
1973 	    MAP_INHERIT_COPY, MADV_NORMAL,
1974 	    UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP |
1975 	    UVM_FLAG_COPYONW);
1976 
1977 	start = round_page(addr);
1978 	end = trunc_page(addr + sz);
1979 #ifdef MACHINE_STACK_GROWS_UP
1980 	if (end == addr + sz)
1981 		end -= PAGE_SIZE;
1982 #else
1983 	if (start == addr)
1984 		start += PAGE_SIZE;
1985 #endif
1986 
1987 	if (start < map->min_offset || end >= map->max_offset || end < start)
1988 		return EINVAL;
1989 
1990 	error = uvm_mapanon(map, &start, end - start, 0, flags);
1991 	if (error != 0)
1992 		printf("map stack for pid %d failed\n", p->p_p->ps_pid);
1993 
1994 	return error;
1995 }
1996 
1997 /*
1998  * uvm_map_pie: return a random load address for a PIE executable
1999  * properly aligned.
2000  */
2001 #ifndef VM_PIE_MAX_ADDR
2002 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
2003 #endif
2004 
2005 #ifndef VM_PIE_MIN_ADDR
2006 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
2007 #endif
2008 
2009 #ifndef VM_PIE_MIN_ALIGN
2010 #define VM_PIE_MIN_ALIGN PAGE_SIZE
2011 #endif
2012 
2013 vaddr_t
2014 uvm_map_pie(vaddr_t align)
2015 {
2016 	vaddr_t addr, space, min;
2017 
2018 	align = MAX(align, VM_PIE_MIN_ALIGN);
2019 
2020 	/* round up to next alignment */
2021 	min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
2022 
2023 	if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
2024 		return (align);
2025 
2026 	space = (VM_PIE_MAX_ADDR - min) / align;
2027 	space = MIN(space, (u_int32_t)-1);
2028 
2029 	addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
2030 	addr += min;
2031 
2032 	return (addr);
2033 }
2034 
2035 void
2036 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end)
2037 {
2038 	struct uvm_map_deadq dead;
2039 
2040 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0 &&
2041 	    (end & (vaddr_t)PAGE_MASK) == 0);
2042 	TAILQ_INIT(&dead);
2043 	vm_map_lock(map);
2044 	uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE);
2045 	vm_map_unlock(map);
2046 
2047 	if (map->flags & VM_MAP_INTRSAFE)
2048 		uvm_unmap_detach_intrsafe(&dead);
2049 	else
2050 		uvm_unmap_detach(&dead, 0);
2051 }
2052 
2053 /*
2054  * Mark entry as free.
2055  *
2056  * entry will be put on the dead list.
2057  * The free space will be merged into the previous or a new entry,
2058  * unless markfree is false.
2059  */
2060 void
2061 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry,
2062     struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead,
2063     boolean_t markfree)
2064 {
2065 	struct uvm_addr_state	*free;
2066 	struct vm_map_entry	*prev;
2067 	vaddr_t			 addr;	/* Start of freed range. */
2068 	vaddr_t			 end;	/* End of freed range. */
2069 
2070 	prev = *prev_ptr;
2071 	if (prev == entry)
2072 		*prev_ptr = prev = NULL;
2073 
2074 	if (prev == NULL ||
2075 	    VMMAP_FREE_END(prev) != entry->start)
2076 		prev = RBT_PREV(uvm_map_addr, entry);
2077 
2078 	/* Entry is describing only free memory and has nothing to drain into. */
2079 	if (prev == NULL && entry->start == entry->end && markfree) {
2080 		*prev_ptr = entry;
2081 		return;
2082 	}
2083 
2084 	addr = entry->start;
2085 	end = VMMAP_FREE_END(entry);
2086 	free = uvm_map_uaddr_e(map, entry);
2087 	uvm_mapent_free_remove(map, free, entry);
2088 	uvm_mapent_addr_remove(map, entry);
2089 	DEAD_ENTRY_PUSH(dead, entry);
2090 
2091 	if (markfree) {
2092 		if (prev) {
2093 			free = uvm_map_uaddr_e(map, prev);
2094 			uvm_mapent_free_remove(map, free, prev);
2095 		}
2096 		*prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0);
2097 	}
2098 }
2099 
2100 /*
2101  * Unwire and release referenced amap and object from map entry.
2102  */
2103 void
2104 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
2105 {
2106 	/* Unwire removed map entry. */
2107 	if (VM_MAPENT_ISWIRED(entry)) {
2108 		KERNEL_LOCK();
2109 		entry->wired_count = 0;
2110 		uvm_fault_unwire_locked(map, entry->start, entry->end);
2111 		KERNEL_UNLOCK();
2112 	}
2113 
2114 	/* Entry-type specific code. */
2115 	if (UVM_ET_ISHOLE(entry)) {
2116 		/* Nothing to be done for holes. */
2117 	} else if (map->flags & VM_MAP_INTRSAFE) {
2118 		KASSERT(vm_map_pmap(map) == pmap_kernel());
2119 		uvm_km_pgremove_intrsafe(entry->start, entry->end);
2120 		pmap_kremove(entry->start, entry->end - entry->start);
2121 	} else if (UVM_ET_ISOBJ(entry) &&
2122 	    UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
2123 		KASSERT(vm_map_pmap(map) == pmap_kernel());
2124 		/*
2125 		 * Note: kernel object mappings are currently used in
2126 		 * two ways:
2127 		 *  [1] "normal" mappings of pages in the kernel object
2128 		 *  [2] uvm_km_valloc'd allocations in which we
2129 		 *      pmap_enter in some non-kernel-object page
2130 		 *      (e.g. vmapbuf).
2131 		 *
2132 		 * for case [1], we need to remove the mapping from
2133 		 * the pmap and then remove the page from the kernel
2134 		 * object (because, once pages in a kernel object are
2135 		 * unmapped they are no longer needed, unlike, say,
2136 		 * a vnode where you might want the data to persist
2137 		 * until flushed out of a queue).
2138 		 *
2139 		 * for case [2], we need to remove the mapping from
2140 		 * the pmap.  there shouldn't be any pages at the
2141 		 * specified offset in the kernel object [but it
2142 		 * doesn't hurt to call uvm_km_pgremove just to be
2143 		 * safe?]
2144 		 *
2145 		 * uvm_km_pgremove currently does the following:
2146 		 *   for pages in the kernel object range:
2147 		 *     - drops the swap slot
2148 		 *     - uvm_pagefree the page
2149 		 *
2150 		 * note there is version of uvm_km_pgremove() that
2151 		 * is used for "intrsafe" objects.
2152 		 */
2153 		/*
2154 		 * remove mappings from pmap and drop the pages
2155 		 * from the object.  offsets are always relative
2156 		 * to vm_map_min(kernel_map).
2157 		 */
2158 		pmap_remove(pmap_kernel(), entry->start, entry->end);
2159 		uvm_km_pgremove(entry->object.uvm_obj,
2160 		    entry->start - vm_map_min(kernel_map),
2161 		    entry->end - vm_map_min(kernel_map));
2162 
2163 		/*
2164 		 * null out kernel_object reference, we've just
2165 		 * dropped it
2166 		 */
2167 		entry->etype &= ~UVM_ET_OBJ;
2168 		entry->object.uvm_obj = NULL;  /* to be safe */
2169 	} else {
2170 		/* remove mappings the standard way. */
2171 		pmap_remove(map->pmap, entry->start, entry->end);
2172 	}
2173 }
2174 
2175 /*
2176  * Remove all entries from start to end.
2177  *
2178  * If remove_holes, then remove ET_HOLE entries as well.
2179  * If markfree, entry will be properly marked free, otherwise, no replacement
2180  * entry will be put in the tree (corrupting the tree).
2181  */
2182 void
2183 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
2184     struct uvm_map_deadq *dead, boolean_t remove_holes,
2185     boolean_t markfree)
2186 {
2187 	struct vm_map_entry *prev_hint, *next, *entry;
2188 
2189 	start = MAX(start, map->min_offset);
2190 	end = MIN(end, map->max_offset);
2191 	if (start >= end)
2192 		return;
2193 
2194 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
2195 		splassert(IPL_NONE);
2196 	else
2197 		splassert(IPL_VM);
2198 
2199 	/* Find first affected entry. */
2200 	entry = uvm_map_entrybyaddr(&map->addr, start);
2201 	KDASSERT(entry != NULL && entry->start <= start);
2202 	if (entry->end <= start && markfree)
2203 		entry = RBT_NEXT(uvm_map_addr, entry);
2204 	else
2205 		UVM_MAP_CLIP_START(map, entry, start);
2206 
2207 	/*
2208 	 * Iterate entries until we reach end address.
2209 	 * prev_hint hints where the freed space can be appended to.
2210 	 */
2211 	prev_hint = NULL;
2212 	for (; entry != NULL && entry->start < end; entry = next) {
2213 		KDASSERT(entry->start >= start);
2214 		if (entry->end > end || !markfree)
2215 			UVM_MAP_CLIP_END(map, entry, end);
2216 		KDASSERT(entry->start >= start && entry->end <= end);
2217 		next = RBT_NEXT(uvm_map_addr, entry);
2218 
2219 		/* Don't remove holes unless asked to do so. */
2220 		if (UVM_ET_ISHOLE(entry)) {
2221 			if (!remove_holes) {
2222 				prev_hint = entry;
2223 				continue;
2224 			}
2225 		}
2226 
2227 		/* A stack has been removed.. */
2228 		if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE))
2229 			map->sserial++;
2230 
2231 		/* Kill entry. */
2232 		uvm_unmap_kill_entry(map, entry);
2233 
2234 		/* Update space usage. */
2235 		if ((map->flags & VM_MAP_ISVMSPACE) &&
2236 		    entry->object.uvm_obj == NULL &&
2237 		    entry->protection != PROT_NONE &&
2238 		    !UVM_ET_ISHOLE(entry)) {
2239 			((struct vmspace *)map)->vm_dused -=
2240 			    uvmspace_dused(map, entry->start, entry->end);
2241 		}
2242 		if (!UVM_ET_ISHOLE(entry))
2243 			map->size -= entry->end - entry->start;
2244 
2245 		/* Actual removal of entry. */
2246 		uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree);
2247 	}
2248 
2249 	pmap_update(vm_map_pmap(map));
2250 
2251 #ifdef VMMAP_DEBUG
2252 	if (markfree) {
2253 		for (entry = uvm_map_entrybyaddr(&map->addr, start);
2254 		    entry != NULL && entry->start < end;
2255 		    entry = RBT_NEXT(uvm_map_addr, entry)) {
2256 			KDASSERT(entry->end <= start ||
2257 			    entry->start == entry->end ||
2258 			    UVM_ET_ISHOLE(entry));
2259 		}
2260 	} else {
2261 		vaddr_t a;
2262 		for (a = start; a < end; a += PAGE_SIZE)
2263 			KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL);
2264 	}
2265 #endif
2266 }
2267 
2268 /*
2269  * Mark all entries from first until end (exclusive) as pageable.
2270  *
2271  * Lock must be exclusive on entry and will not be touched.
2272  */
2273 void
2274 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first,
2275     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr)
2276 {
2277 	struct vm_map_entry *iter;
2278 
2279 	for (iter = first; iter != end;
2280 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2281 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2282 		if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2283 			continue;
2284 
2285 		iter->wired_count = 0;
2286 		uvm_fault_unwire_locked(map, iter->start, iter->end);
2287 	}
2288 }
2289 
2290 /*
2291  * Mark all entries from first until end (exclusive) as wired.
2292  *
2293  * Lockflags determines the lock state on return from this function.
2294  * Lock must be exclusive on entry.
2295  */
2296 int
2297 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first,
2298     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr,
2299     int lockflags)
2300 {
2301 	struct vm_map_entry *iter;
2302 #ifdef DIAGNOSTIC
2303 	unsigned int timestamp_save;
2304 #endif
2305 	int error;
2306 
2307 	/*
2308 	 * Wire pages in two passes:
2309 	 *
2310 	 * 1: holding the write lock, we create any anonymous maps that need
2311 	 *    to be created.  then we clip each map entry to the region to
2312 	 *    be wired and increment its wiring count.
2313 	 *
2314 	 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
2315 	 *    in the pages for any newly wired area (wired_count == 1).
2316 	 *
2317 	 *    downgrading to a read lock for uvm_fault_wire avoids a possible
2318 	 *    deadlock with another thread that may have faulted on one of
2319 	 *    the pages to be wired (it would mark the page busy, blocking
2320 	 *    us, then in turn block on the map lock that we hold).
2321 	 *    because we keep the read lock on the map, the copy-on-write
2322 	 *    status of the entries we modify here cannot change.
2323 	 */
2324 	for (iter = first; iter != end;
2325 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2326 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2327 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2328 		    iter->protection == PROT_NONE)
2329 			continue;
2330 
2331 		/*
2332 		 * Perform actions of vm_map_lookup that need the write lock.
2333 		 * - create an anonymous map for copy-on-write
2334 		 * - anonymous map for zero-fill
2335 		 * Skip submaps.
2336 		 */
2337 		if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) &&
2338 		    UVM_ET_ISNEEDSCOPY(iter) &&
2339 		    ((iter->protection & PROT_WRITE) ||
2340 		    iter->object.uvm_obj == NULL)) {
2341 			amap_copy(map, iter, M_WAITOK,
2342 			    UVM_ET_ISSTACK(iter) ? FALSE : TRUE,
2343 			    iter->start, iter->end);
2344 		}
2345 		iter->wired_count++;
2346 	}
2347 
2348 	/*
2349 	 * Pass 2.
2350 	 */
2351 #ifdef DIAGNOSTIC
2352 	timestamp_save = map->timestamp;
2353 #endif
2354 	vm_map_busy(map);
2355 	vm_map_downgrade(map);
2356 
2357 	error = 0;
2358 	for (iter = first; error == 0 && iter != end;
2359 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2360 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2361 		    iter->protection == PROT_NONE)
2362 			continue;
2363 
2364 		error = uvm_fault_wire(map, iter->start, iter->end,
2365 		    iter->protection);
2366 	}
2367 
2368 	if (error) {
2369 		/*
2370 		 * uvm_fault_wire failure
2371 		 *
2372 		 * Reacquire lock and undo our work.
2373 		 */
2374 		vm_map_upgrade(map);
2375 		vm_map_unbusy(map);
2376 #ifdef DIAGNOSTIC
2377 		if (timestamp_save != map->timestamp)
2378 			panic("uvm_map_pageable_wire: stale map");
2379 #endif
2380 
2381 		/*
2382 		 * first is no longer needed to restart loops.
2383 		 * Use it as iterator to unmap successful mappings.
2384 		 */
2385 		for (; first != iter;
2386 		    first = RBT_NEXT(uvm_map_addr, first)) {
2387 			if (UVM_ET_ISHOLE(first) ||
2388 			    first->start == first->end ||
2389 			    first->protection == PROT_NONE)
2390 				continue;
2391 
2392 			first->wired_count--;
2393 			if (!VM_MAPENT_ISWIRED(first)) {
2394 				uvm_fault_unwire_locked(map,
2395 				    iter->start, iter->end);
2396 			}
2397 		}
2398 
2399 		/* decrease counter in the rest of the entries */
2400 		for (; iter != end;
2401 		    iter = RBT_NEXT(uvm_map_addr, iter)) {
2402 			if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2403 			    iter->protection == PROT_NONE)
2404 				continue;
2405 
2406 			iter->wired_count--;
2407 		}
2408 
2409 		if ((lockflags & UVM_LK_EXIT) == 0)
2410 			vm_map_unlock(map);
2411 		return error;
2412 	}
2413 
2414 	/* We are currently holding a read lock. */
2415 	if ((lockflags & UVM_LK_EXIT) == 0) {
2416 		vm_map_unbusy(map);
2417 		vm_map_unlock_read(map);
2418 	} else {
2419 		vm_map_upgrade(map);
2420 		vm_map_unbusy(map);
2421 #ifdef DIAGNOSTIC
2422 		if (timestamp_save != map->timestamp)
2423 			panic("uvm_map_pageable_wire: stale map");
2424 #endif
2425 	}
2426 	return 0;
2427 }
2428 
2429 /*
2430  * uvm_map_pageable: set pageability of a range in a map.
2431  *
2432  * Flags:
2433  * UVM_LK_ENTER: map is already locked by caller
2434  * UVM_LK_EXIT:  don't unlock map on exit
2435  *
2436  * The full range must be in use (entries may not have fspace != 0).
2437  * UVM_ET_HOLE counts as unmapped.
2438  */
2439 int
2440 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
2441     boolean_t new_pageable, int lockflags)
2442 {
2443 	struct vm_map_entry *first, *last, *tmp;
2444 	int error;
2445 
2446 	start = trunc_page(start);
2447 	end = round_page(end);
2448 
2449 	if (start > end)
2450 		return EINVAL;
2451 	if (start == end)
2452 		return 0;	/* nothing to do */
2453 	if (start < map->min_offset)
2454 		return EFAULT; /* why? see first XXX below */
2455 	if (end > map->max_offset)
2456 		return EINVAL; /* why? see second XXX below */
2457 
2458 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2459 	if ((lockflags & UVM_LK_ENTER) == 0)
2460 		vm_map_lock(map);
2461 
2462 	/*
2463 	 * Find first entry.
2464 	 *
2465 	 * Initial test on start is different, because of the different
2466 	 * error returned. Rest is tested further down.
2467 	 */
2468 	first = uvm_map_entrybyaddr(&map->addr, start);
2469 	if (first->end <= start || UVM_ET_ISHOLE(first)) {
2470 		/*
2471 		 * XXX if the first address is not mapped, it is EFAULT?
2472 		 */
2473 		error = EFAULT;
2474 		goto out;
2475 	}
2476 
2477 	/* Check that the range has no holes. */
2478 	for (last = first; last != NULL && last->start < end;
2479 	    last = RBT_NEXT(uvm_map_addr, last)) {
2480 		if (UVM_ET_ISHOLE(last) ||
2481 		    (last->end < end && VMMAP_FREE_END(last) != last->end)) {
2482 			/*
2483 			 * XXX unmapped memory in range, why is it EINVAL
2484 			 * instead of EFAULT?
2485 			 */
2486 			error = EINVAL;
2487 			goto out;
2488 		}
2489 	}
2490 
2491 	/*
2492 	 * Last ended at the first entry after the range.
2493 	 * Move back one step.
2494 	 *
2495 	 * Note that last may be NULL.
2496 	 */
2497 	if (last == NULL) {
2498 		last = RBT_MAX(uvm_map_addr, &map->addr);
2499 		if (last->end < end) {
2500 			error = EINVAL;
2501 			goto out;
2502 		}
2503 	} else {
2504 		KASSERT(last != first);
2505 		last = RBT_PREV(uvm_map_addr, last);
2506 	}
2507 
2508 	/* Wire/unwire pages here. */
2509 	if (new_pageable) {
2510 		/*
2511 		 * Mark pageable.
2512 		 * entries that are not wired are untouched.
2513 		 */
2514 		if (VM_MAPENT_ISWIRED(first))
2515 			UVM_MAP_CLIP_START(map, first, start);
2516 		/*
2517 		 * Split last at end.
2518 		 * Make tmp be the first entry after what is to be touched.
2519 		 * If last is not wired, don't touch it.
2520 		 */
2521 		if (VM_MAPENT_ISWIRED(last)) {
2522 			UVM_MAP_CLIP_END(map, last, end);
2523 			tmp = RBT_NEXT(uvm_map_addr, last);
2524 		} else
2525 			tmp = last;
2526 
2527 		uvm_map_pageable_pgon(map, first, tmp, start, end);
2528 		error = 0;
2529 
2530 out:
2531 		if ((lockflags & UVM_LK_EXIT) == 0)
2532 			vm_map_unlock(map);
2533 		return error;
2534 	} else {
2535 		/*
2536 		 * Mark entries wired.
2537 		 * entries are always touched (because recovery needs this).
2538 		 */
2539 		if (!VM_MAPENT_ISWIRED(first))
2540 			UVM_MAP_CLIP_START(map, first, start);
2541 		/*
2542 		 * Split last at end.
2543 		 * Make tmp be the first entry after what is to be touched.
2544 		 * If last is not wired, don't touch it.
2545 		 */
2546 		if (!VM_MAPENT_ISWIRED(last)) {
2547 			UVM_MAP_CLIP_END(map, last, end);
2548 			tmp = RBT_NEXT(uvm_map_addr, last);
2549 		} else
2550 			tmp = last;
2551 
2552 		return uvm_map_pageable_wire(map, first, tmp, start, end,
2553 		    lockflags);
2554 	}
2555 }
2556 
2557 /*
2558  * uvm_map_pageable_all: special case of uvm_map_pageable - affects
2559  * all mapped regions.
2560  *
2561  * Map must not be locked.
2562  * If no flags are specified, all ragions are unwired.
2563  */
2564 int
2565 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
2566 {
2567 	vsize_t size;
2568 	struct vm_map_entry *iter;
2569 
2570 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2571 	vm_map_lock(map);
2572 
2573 	if (flags == 0) {
2574 		uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr),
2575 		    NULL, map->min_offset, map->max_offset);
2576 
2577 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
2578 		vm_map_unlock(map);
2579 		return 0;
2580 	}
2581 
2582 	if (flags & MCL_FUTURE)
2583 		vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
2584 	if (!(flags & MCL_CURRENT)) {
2585 		vm_map_unlock(map);
2586 		return 0;
2587 	}
2588 
2589 	/*
2590 	 * Count number of pages in all non-wired entries.
2591 	 * If the number exceeds the limit, abort.
2592 	 */
2593 	size = 0;
2594 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2595 		if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2596 			continue;
2597 
2598 		size += iter->end - iter->start;
2599 	}
2600 
2601 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
2602 		vm_map_unlock(map);
2603 		return ENOMEM;
2604 	}
2605 
2606 	/* XXX non-pmap_wired_count case must be handled by caller */
2607 #ifdef pmap_wired_count
2608 	if (limit != 0 &&
2609 	    size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) {
2610 		vm_map_unlock(map);
2611 		return ENOMEM;
2612 	}
2613 #endif
2614 
2615 	/*
2616 	 * uvm_map_pageable_wire will release lock
2617 	 */
2618 	return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr),
2619 	    NULL, map->min_offset, map->max_offset, 0);
2620 }
2621 
2622 /*
2623  * Initialize map.
2624  *
2625  * Allocates sufficient entries to describe the free memory in the map.
2626  */
2627 void
2628 uvm_map_setup(struct vm_map *map, pmap_t pmap, vaddr_t min, vaddr_t max,
2629     int flags)
2630 {
2631 	int i;
2632 
2633 	KASSERT((min & (vaddr_t)PAGE_MASK) == 0);
2634 	KASSERT((max & (vaddr_t)PAGE_MASK) == 0 ||
2635 	    (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
2636 
2637 	/*
2638 	 * Update parameters.
2639 	 *
2640 	 * This code handles (vaddr_t)-1 and other page mask ending addresses
2641 	 * properly.
2642 	 * We lose the top page if the full virtual address space is used.
2643 	 */
2644 	if (max & (vaddr_t)PAGE_MASK) {
2645 		max += 1;
2646 		if (max == 0) /* overflow */
2647 			max -= PAGE_SIZE;
2648 	}
2649 
2650 	RBT_INIT(uvm_map_addr, &map->addr);
2651 	map->uaddr_exe = NULL;
2652 	for (i = 0; i < nitems(map->uaddr_any); ++i)
2653 		map->uaddr_any[i] = NULL;
2654 	map->uaddr_brk_stack = NULL;
2655 
2656 	map->pmap = pmap;
2657 	map->size = 0;
2658 	map->ref_count = 0;
2659 	map->min_offset = min;
2660 	map->max_offset = max;
2661 	map->b_start = map->b_end = 0; /* Empty brk() area by default. */
2662 	map->s_start = map->s_end = 0; /* Empty stack area by default. */
2663 	map->flags = flags;
2664 	map->timestamp = 0;
2665 	if (flags & VM_MAP_ISVMSPACE)
2666 		rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK);
2667 	else
2668 		rw_init(&map->lock, "kmmaplk");
2669 	mtx_init(&map->mtx, IPL_VM);
2670 	mtx_init(&map->flags_lock, IPL_VM);
2671 
2672 	/* Configure the allocators. */
2673 	if (flags & VM_MAP_ISVMSPACE)
2674 		uvm_map_setup_md(map);
2675 	else
2676 		map->uaddr_any[3] = &uaddr_kbootstrap;
2677 
2678 	/*
2679 	 * Fill map entries.
2680 	 * We do not need to write-lock the map here because only the current
2681 	 * thread sees it right now. Initialize ref_count to 0 above to avoid
2682 	 * bogus triggering of lock-not-held assertions.
2683 	 */
2684 	uvm_map_setup_entries(map);
2685 	uvm_tree_sanity(map, __FILE__, __LINE__);
2686 	map->ref_count = 1;
2687 }
2688 
2689 /*
2690  * Destroy the map.
2691  *
2692  * This is the inverse operation to uvm_map_setup.
2693  */
2694 void
2695 uvm_map_teardown(struct vm_map *map)
2696 {
2697 	struct uvm_map_deadq	 dead_entries;
2698 	struct vm_map_entry	*entry, *tmp;
2699 #ifdef VMMAP_DEBUG
2700 	size_t			 numq, numt;
2701 #endif
2702 	int			 i;
2703 
2704 	KERNEL_ASSERT_LOCKED();
2705 	KERNEL_UNLOCK();
2706 	KERNEL_ASSERT_UNLOCKED();
2707 
2708 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
2709 
2710 	/* Remove address selectors. */
2711 	uvm_addr_destroy(map->uaddr_exe);
2712 	map->uaddr_exe = NULL;
2713 	for (i = 0; i < nitems(map->uaddr_any); i++) {
2714 		uvm_addr_destroy(map->uaddr_any[i]);
2715 		map->uaddr_any[i] = NULL;
2716 	}
2717 	uvm_addr_destroy(map->uaddr_brk_stack);
2718 	map->uaddr_brk_stack = NULL;
2719 
2720 	/*
2721 	 * Remove entries.
2722 	 *
2723 	 * The following is based on graph breadth-first search.
2724 	 *
2725 	 * In color terms:
2726 	 * - the dead_entries set contains all nodes that are reachable
2727 	 *   (i.e. both the black and the grey nodes)
2728 	 * - any entry not in dead_entries is white
2729 	 * - any entry that appears in dead_entries before entry,
2730 	 *   is black, the rest is grey.
2731 	 * The set [entry, end] is also referred to as the wavefront.
2732 	 *
2733 	 * Since the tree is always a fully connected graph, the breadth-first
2734 	 * search guarantees that each vmmap_entry is visited exactly once.
2735 	 * The vm_map is broken down in linear time.
2736 	 */
2737 	TAILQ_INIT(&dead_entries);
2738 	if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL)
2739 		DEAD_ENTRY_PUSH(&dead_entries, entry);
2740 	while (entry != NULL) {
2741 		sched_pause(yield);
2742 		uvm_unmap_kill_entry(map, entry);
2743 		if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL)
2744 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2745 		if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
2746 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2747 		/* Update wave-front. */
2748 		entry = TAILQ_NEXT(entry, dfree.deadq);
2749 	}
2750 
2751 #ifdef VMMAP_DEBUG
2752 	numt = numq = 0;
2753 	RBT_FOREACH(entry, uvm_map_addr, &map->addr)
2754 		numt++;
2755 	TAILQ_FOREACH(entry, &dead_entries, dfree.deadq)
2756 		numq++;
2757 	KASSERT(numt == numq);
2758 #endif
2759 	uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK);
2760 
2761 	KERNEL_LOCK();
2762 
2763 	pmap_destroy(map->pmap);
2764 	map->pmap = NULL;
2765 }
2766 
2767 /*
2768  * Populate map with free-memory entries.
2769  *
2770  * Map must be initialized and empty.
2771  */
2772 void
2773 uvm_map_setup_entries(struct vm_map *map)
2774 {
2775 	KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
2776 
2777 	uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0);
2778 }
2779 
2780 /*
2781  * Split entry at given address.
2782  *
2783  * orig:  entry that is to be split.
2784  * next:  a newly allocated map entry that is not linked.
2785  * split: address at which the split is done.
2786  */
2787 void
2788 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,
2789     struct vm_map_entry *next, vaddr_t split)
2790 {
2791 	struct uvm_addr_state *free, *free_before;
2792 	vsize_t adj;
2793 
2794 	if ((split & PAGE_MASK) != 0) {
2795 		panic("uvm_map_splitentry: split address 0x%lx "
2796 		    "not on page boundary!", split);
2797 	}
2798 	KDASSERT(map != NULL && orig != NULL && next != NULL);
2799 	uvm_tree_sanity(map, __FILE__, __LINE__);
2800 	KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split);
2801 
2802 #ifdef VMMAP_DEBUG
2803 	KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig);
2804 	KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next);
2805 #endif /* VMMAP_DEBUG */
2806 
2807 	/*
2808 	 * Free space will change, unlink from free space tree.
2809 	 */
2810 	free = uvm_map_uaddr_e(map, orig);
2811 	uvm_mapent_free_remove(map, free, orig);
2812 
2813 	adj = split - orig->start;
2814 
2815 	uvm_mapent_copy(orig, next);
2816 	if (split >= orig->end) {
2817 		next->etype = 0;
2818 		next->offset = 0;
2819 		next->wired_count = 0;
2820 		next->start = next->end = split;
2821 		next->guard = 0;
2822 		next->fspace = VMMAP_FREE_END(orig) - split;
2823 		next->aref.ar_amap = NULL;
2824 		next->aref.ar_pageoff = 0;
2825 		orig->guard = MIN(orig->guard, split - orig->end);
2826 		orig->fspace = split - VMMAP_FREE_START(orig);
2827 	} else {
2828 		orig->fspace = 0;
2829 		orig->guard = 0;
2830 		orig->end = next->start = split;
2831 
2832 		if (next->aref.ar_amap) {
2833 			amap_splitref(&orig->aref, &next->aref, adj);
2834 		}
2835 		if (UVM_ET_ISSUBMAP(orig)) {
2836 			uvm_map_reference(next->object.sub_map);
2837 			next->offset += adj;
2838 		} else if (UVM_ET_ISOBJ(orig)) {
2839 			if (next->object.uvm_obj->pgops &&
2840 			    next->object.uvm_obj->pgops->pgo_reference) {
2841 				KERNEL_LOCK();
2842 				next->object.uvm_obj->pgops->pgo_reference(
2843 				    next->object.uvm_obj);
2844 				KERNEL_UNLOCK();
2845 			}
2846 			next->offset += adj;
2847 		}
2848 	}
2849 
2850 	/*
2851 	 * Link next into address tree.
2852 	 * Link orig and next into free-space tree.
2853 	 *
2854 	 * Don't insert 'next' into the addr tree until orig has been linked,
2855 	 * in case the free-list looks at adjecent entries in the addr tree
2856 	 * for its decisions.
2857 	 */
2858 	if (orig->fspace > 0)
2859 		free_before = free;
2860 	else
2861 		free_before = uvm_map_uaddr_e(map, orig);
2862 	uvm_mapent_free_insert(map, free_before, orig);
2863 	uvm_mapent_addr_insert(map, next);
2864 	uvm_mapent_free_insert(map, free, next);
2865 
2866 	uvm_tree_sanity(map, __FILE__, __LINE__);
2867 }
2868 
2869 
2870 #ifdef VMMAP_DEBUG
2871 
2872 void
2873 uvm_tree_assert(struct vm_map *map, int test, char *test_str,
2874     char *file, int line)
2875 {
2876 	char* map_special;
2877 
2878 	if (test)
2879 		return;
2880 
2881 	if (map == kernel_map)
2882 		map_special = " (kernel_map)";
2883 	else if (map == kmem_map)
2884 		map_special = " (kmem_map)";
2885 	else
2886 		map_special = "";
2887 	panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file,
2888 	    line, test_str);
2889 }
2890 
2891 /*
2892  * Check that map is sane.
2893  */
2894 void
2895 uvm_tree_sanity(struct vm_map *map, char *file, int line)
2896 {
2897 	struct vm_map_entry	*iter;
2898 	vaddr_t			 addr;
2899 	vaddr_t			 min, max, bound; /* Bounds checker. */
2900 	struct uvm_addr_state	*free;
2901 
2902 	addr = vm_map_min(map);
2903 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2904 		/*
2905 		 * Valid start, end.
2906 		 * Catch overflow for end+fspace.
2907 		 */
2908 		UVM_ASSERT(map, iter->end >= iter->start, file, line);
2909 		UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line);
2910 
2911 		/* May not be empty. */
2912 		UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter),
2913 		    file, line);
2914 
2915 		/* Addresses for entry must lie within map boundaries. */
2916 		UVM_ASSERT(map, iter->start >= vm_map_min(map) &&
2917 		    VMMAP_FREE_END(iter) <= vm_map_max(map), file, line);
2918 
2919 		/* Tree may not have gaps. */
2920 		UVM_ASSERT(map, iter->start == addr, file, line);
2921 		addr = VMMAP_FREE_END(iter);
2922 
2923 		/*
2924 		 * Free space may not cross boundaries, unless the same
2925 		 * free list is used on both sides of the border.
2926 		 */
2927 		min = VMMAP_FREE_START(iter);
2928 		max = VMMAP_FREE_END(iter);
2929 
2930 		while (min < max &&
2931 		    (bound = uvm_map_boundary(map, min, max)) != max) {
2932 			UVM_ASSERT(map,
2933 			    uvm_map_uaddr(map, bound - 1) ==
2934 			    uvm_map_uaddr(map, bound),
2935 			    file, line);
2936 			min = bound;
2937 		}
2938 
2939 		free = uvm_map_uaddr_e(map, iter);
2940 		if (free) {
2941 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0,
2942 			    file, line);
2943 		} else {
2944 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0,
2945 			    file, line);
2946 		}
2947 	}
2948 	UVM_ASSERT(map, addr == vm_map_max(map), file, line);
2949 }
2950 
2951 void
2952 uvm_tree_size_chk(struct vm_map *map, char *file, int line)
2953 {
2954 	struct vm_map_entry *iter;
2955 	vsize_t size;
2956 
2957 	size = 0;
2958 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2959 		if (!UVM_ET_ISHOLE(iter))
2960 			size += iter->end - iter->start;
2961 	}
2962 
2963 	if (map->size != size)
2964 		printf("map size = 0x%lx, should be 0x%lx\n", map->size, size);
2965 	UVM_ASSERT(map, map->size == size, file, line);
2966 
2967 	vmspace_validate(map);
2968 }
2969 
2970 /*
2971  * This function validates the statistics on vmspace.
2972  */
2973 void
2974 vmspace_validate(struct vm_map *map)
2975 {
2976 	struct vmspace *vm;
2977 	struct vm_map_entry *iter;
2978 	vaddr_t imin, imax;
2979 	vaddr_t stack_begin, stack_end; /* Position of stack. */
2980 	vsize_t stack, heap; /* Measured sizes. */
2981 
2982 	if (!(map->flags & VM_MAP_ISVMSPACE))
2983 		return;
2984 
2985 	vm = (struct vmspace *)map;
2986 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2987 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2988 
2989 	stack = heap = 0;
2990 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2991 		imin = imax = iter->start;
2992 
2993 		if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL ||
2994 		    iter->prot != PROT_NONE)
2995 			continue;
2996 
2997 		/*
2998 		 * Update stack, heap.
2999 		 * Keep in mind that (theoretically) the entries of
3000 		 * userspace and stack may be joined.
3001 		 */
3002 		while (imin != iter->end) {
3003 			/*
3004 			 * Set imax to the first boundary crossed between
3005 			 * imin and stack addresses.
3006 			 */
3007 			imax = iter->end;
3008 			if (imin < stack_begin && imax > stack_begin)
3009 				imax = stack_begin;
3010 			else if (imin < stack_end && imax > stack_end)
3011 				imax = stack_end;
3012 
3013 			if (imin >= stack_begin && imin < stack_end)
3014 				stack += imax - imin;
3015 			else
3016 				heap += imax - imin;
3017 			imin = imax;
3018 		}
3019 	}
3020 
3021 	heap >>= PAGE_SHIFT;
3022 	if (heap != vm->vm_dused) {
3023 		printf("vmspace stack range: 0x%lx-0x%lx\n",
3024 		    stack_begin, stack_end);
3025 		panic("vmspace_validate: vmspace.vm_dused invalid, "
3026 		    "expected %ld pgs, got %ld pgs in map %p",
3027 		    heap, vm->vm_dused,
3028 		    map);
3029 	}
3030 }
3031 
3032 #endif /* VMMAP_DEBUG */
3033 
3034 /*
3035  * uvm_map_init: init mapping system at boot time.   note that we allocate
3036  * and init the static pool of structs vm_map_entry for the kernel here.
3037  */
3038 void
3039 uvm_map_init(void)
3040 {
3041 	static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
3042 	int lcv;
3043 
3044 	/* now set up static pool of kernel map entries ... */
3045 	mtx_init(&uvm_kmapent_mtx, IPL_VM);
3046 	SLIST_INIT(&uvm.kentry_free);
3047 	for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
3048 		SLIST_INSERT_HEAD(&uvm.kentry_free,
3049 		    &kernel_map_entry[lcv], daddrs.addr_kentry);
3050 	}
3051 
3052 	/* initialize the map-related pools. */
3053 	pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0,
3054 	    IPL_NONE, PR_WAITOK, "vmsppl", NULL);
3055 	pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0,
3056 	    IPL_VM, PR_WAITOK, "vmmpepl", NULL);
3057 	pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0,
3058 	    IPL_VM, 0, "vmmpekpl", NULL);
3059 	pool_sethiwat(&uvm_map_entry_pool, 8192);
3060 
3061 	uvm_addr_init();
3062 }
3063 
3064 #if defined(DDB)
3065 
3066 /*
3067  * DDB hooks
3068  */
3069 
3070 /*
3071  * uvm_map_printit: actually prints the map
3072  */
3073 void
3074 uvm_map_printit(struct vm_map *map, boolean_t full,
3075     int (*pr)(const char *, ...))
3076 {
3077 	struct vmspace			*vm;
3078 	struct vm_map_entry		*entry;
3079 	struct uvm_addr_state		*free;
3080 	int				 in_free, i;
3081 	char				 buf[8];
3082 
3083 	(*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
3084 	(*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n",
3085 	    map->b_start, map->b_end);
3086 	(*pr)("\tstack allocate range: 0x%lx-0x%lx\n",
3087 	    map->s_start, map->s_end);
3088 	(*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n",
3089 	    map->size, map->ref_count, map->timestamp,
3090 	    map->flags);
3091 	(*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
3092 	    pmap_resident_count(map->pmap));
3093 
3094 	/* struct vmspace handling. */
3095 	if (map->flags & VM_MAP_ISVMSPACE) {
3096 		vm = (struct vmspace *)map;
3097 
3098 		(*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n",
3099 		    vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss);
3100 		(*pr)("\tvm_tsize=%u vm_dsize=%u\n",
3101 		    vm->vm_tsize, vm->vm_dsize);
3102 		(*pr)("\tvm_taddr=%p vm_daddr=%p\n",
3103 		    vm->vm_taddr, vm->vm_daddr);
3104 		(*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n",
3105 		    vm->vm_maxsaddr, vm->vm_minsaddr);
3106 	}
3107 
3108 	if (!full)
3109 		goto print_uaddr;
3110 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
3111 		(*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
3112 		    entry, entry->start, entry->end, entry->object.uvm_obj,
3113 		    (long long)entry->offset, entry->aref.ar_amap,
3114 		    entry->aref.ar_pageoff);
3115 		(*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, "
3116 		    "syscall=%c, prot(max)=%d/%d, inh=%d, "
3117 		    "wc=%d, adv=%d\n",
3118 		    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
3119 		    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
3120 		    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
3121 		    (entry->etype & UVM_ET_STACK) ? 'T' : 'F',
3122 		    (entry->etype & UVM_ET_SYSCALL) ? 'T' : 'F',
3123 		    entry->protection, entry->max_protection,
3124 		    entry->inheritance, entry->wired_count, entry->advice);
3125 
3126 		free = uvm_map_uaddr_e(map, entry);
3127 		in_free = (free != NULL);
3128 		(*pr)("\thole=%c, free=%c, guard=0x%lx, "
3129 		    "free=0x%lx-0x%lx\n",
3130 		    (entry->etype & UVM_ET_HOLE) ? 'T' : 'F',
3131 		    in_free ? 'T' : 'F',
3132 		    entry->guard,
3133 		    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
3134 		(*pr)("\tfspace_augment=%lu\n", entry->fspace_augment);
3135 		(*pr)("\tfreemapped=%c, uaddr=%p\n",
3136 		    (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free);
3137 		if (free) {
3138 			(*pr)("\t\t(0x%lx-0x%lx %s)\n",
3139 			    free->uaddr_minaddr, free->uaddr_maxaddr,
3140 			    free->uaddr_functions->uaddr_name);
3141 		}
3142 	}
3143 
3144 print_uaddr:
3145 	uvm_addr_print(map->uaddr_exe, "exe", full, pr);
3146 	for (i = 0; i < nitems(map->uaddr_any); i++) {
3147 		snprintf(&buf[0], sizeof(buf), "any[%d]", i);
3148 		uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr);
3149 	}
3150 	uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr);
3151 }
3152 
3153 /*
3154  * uvm_object_printit: actually prints the object
3155  */
3156 void
3157 uvm_object_printit(struct uvm_object *uobj, boolean_t full,
3158     int (*pr)(const char *, ...))
3159 {
3160 	struct vm_page *pg;
3161 	int cnt = 0;
3162 
3163 	(*pr)("OBJECT %p: pgops=%p, npages=%d, ",
3164 	    uobj, uobj->pgops, uobj->uo_npages);
3165 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
3166 		(*pr)("refs=<SYSTEM>\n");
3167 	else
3168 		(*pr)("refs=%d\n", uobj->uo_refs);
3169 
3170 	if (!full) {
3171 		return;
3172 	}
3173 	(*pr)("  PAGES <pg,offset>:\n  ");
3174 	RBT_FOREACH(pg, uvm_objtree, &uobj->memt) {
3175 		(*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
3176 		if ((cnt % 3) == 2) {
3177 			(*pr)("\n  ");
3178 		}
3179 		cnt++;
3180 	}
3181 	if ((cnt % 3) != 2) {
3182 		(*pr)("\n");
3183 	}
3184 }
3185 
3186 /*
3187  * uvm_page_printit: actually print the page
3188  */
3189 static const char page_flagbits[] =
3190 	"\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
3191 	"\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ"
3192 	"\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5";
3193 
3194 void
3195 uvm_page_printit(struct vm_page *pg, boolean_t full,
3196     int (*pr)(const char *, ...))
3197 {
3198 	struct vm_page *tpg;
3199 	struct uvm_object *uobj;
3200 	struct pglist *pgl;
3201 
3202 	(*pr)("PAGE %p:\n", pg);
3203 	(*pr)("  flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
3204 	    pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
3205 	    (long long)pg->phys_addr);
3206 	(*pr)("  uobject=%p, uanon=%p, offset=0x%llx\n",
3207 	    pg->uobject, pg->uanon, (long long)pg->offset);
3208 #if defined(UVM_PAGE_TRKOWN)
3209 	if (pg->pg_flags & PG_BUSY)
3210 		(*pr)("  owning thread = %d, tag=%s",
3211 		    pg->owner, pg->owner_tag);
3212 	else
3213 		(*pr)("  page not busy, no owner");
3214 #else
3215 	(*pr)("  [page ownership tracking disabled]");
3216 #endif
3217 	(*pr)("\tvm_page_md %p\n", &pg->mdpage);
3218 
3219 	if (!full)
3220 		return;
3221 
3222 	/* cross-verify object/anon */
3223 	if ((pg->pg_flags & PQ_FREE) == 0) {
3224 		if (pg->pg_flags & PQ_ANON) {
3225 			if (pg->uanon == NULL || pg->uanon->an_page != pg)
3226 			    (*pr)("  >>> ANON DOES NOT POINT HERE <<< (%p)\n",
3227 				(pg->uanon) ? pg->uanon->an_page : NULL);
3228 			else
3229 				(*pr)("  anon backpointer is OK\n");
3230 		} else {
3231 			uobj = pg->uobject;
3232 			if (uobj) {
3233 				(*pr)("  checking object list\n");
3234 				RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) {
3235 					if (tpg == pg) {
3236 						break;
3237 					}
3238 				}
3239 				if (tpg)
3240 					(*pr)("  page found on object list\n");
3241 				else
3242 					(*pr)("  >>> PAGE NOT FOUND "
3243 					    "ON OBJECT LIST! <<<\n");
3244 			}
3245 		}
3246 	}
3247 
3248 	/* cross-verify page queue */
3249 	if (pg->pg_flags & PQ_FREE) {
3250 		if (uvm_pmr_isfree(pg))
3251 			(*pr)("  page found in uvm_pmemrange\n");
3252 		else
3253 			(*pr)("  >>> page not found in uvm_pmemrange <<<\n");
3254 		pgl = NULL;
3255 	} else if (pg->pg_flags & PQ_INACTIVE) {
3256 		pgl = (pg->pg_flags & PQ_SWAPBACKED) ?
3257 		    &uvm.page_inactive_swp : &uvm.page_inactive_obj;
3258 	} else if (pg->pg_flags & PQ_ACTIVE) {
3259 		pgl = &uvm.page_active;
3260  	} else {
3261 		pgl = NULL;
3262 	}
3263 
3264 	if (pgl) {
3265 		(*pr)("  checking pageq list\n");
3266 		TAILQ_FOREACH(tpg, pgl, pageq) {
3267 			if (tpg == pg) {
3268 				break;
3269 			}
3270 		}
3271 		if (tpg)
3272 			(*pr)("  page found on pageq list\n");
3273 		else
3274 			(*pr)("  >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
3275 	}
3276 }
3277 #endif
3278 
3279 /*
3280  * uvm_map_protect: change map protection
3281  *
3282  * => set_max means set max_protection.
3283  * => map must be unlocked.
3284  */
3285 int
3286 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
3287     vm_prot_t new_prot, boolean_t set_max)
3288 {
3289 	struct vm_map_entry *first, *iter;
3290 	vm_prot_t old_prot;
3291 	vm_prot_t mask;
3292 	vsize_t dused;
3293 	int error;
3294 
3295 	if (start > end)
3296 		return EINVAL;
3297 	start = MAX(start, map->min_offset);
3298 	end = MIN(end, map->max_offset);
3299 	if (start >= end)
3300 		return 0;
3301 
3302 	dused = 0;
3303 	error = 0;
3304 	vm_map_lock(map);
3305 
3306 	/*
3307 	 * Set up first and last.
3308 	 * - first will contain first entry at or after start.
3309 	 */
3310 	first = uvm_map_entrybyaddr(&map->addr, start);
3311 	KDASSERT(first != NULL);
3312 	if (first->end <= start)
3313 		first = RBT_NEXT(uvm_map_addr, first);
3314 
3315 	/* First, check for protection violations. */
3316 	for (iter = first; iter != NULL && iter->start < end;
3317 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
3318 		/* Treat memory holes as free space. */
3319 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3320 			continue;
3321 
3322 		old_prot = iter->protection;
3323 		if (old_prot == PROT_NONE && new_prot != old_prot) {
3324 			dused += uvmspace_dused(
3325 			    map, MAX(start, iter->start), MIN(end, iter->end));
3326 		}
3327 
3328 		if (UVM_ET_ISSUBMAP(iter)) {
3329 			error = EINVAL;
3330 			goto out;
3331 		}
3332 		if ((new_prot & iter->max_protection) != new_prot) {
3333 			error = EACCES;
3334 			goto out;
3335 		}
3336 		if (map == kernel_map &&
3337 		    (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
3338 			panic("uvm_map_protect: kernel map W^X violation requested");
3339 	}
3340 
3341 	/* Check limits. */
3342 	if (dused > 0 && (map->flags & VM_MAP_ISVMSPACE)) {
3343 		vsize_t limit = lim_cur(RLIMIT_DATA);
3344 		dused = ptoa(dused);
3345 		if (limit < dused ||
3346 		    limit - dused < ptoa(((struct vmspace *)map)->vm_dused)) {
3347 			error = ENOMEM;
3348 			goto out;
3349 		}
3350 	}
3351 
3352 	/* Fix protections.  */
3353 	for (iter = first; iter != NULL && iter->start < end;
3354 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
3355 		/* Treat memory holes as free space. */
3356 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3357 			continue;
3358 
3359 		old_prot = iter->protection;
3360 
3361 		/*
3362 		 * Skip adapting protection iff old and new protection
3363 		 * are equal.
3364 		 */
3365 		if (set_max) {
3366 			if (old_prot == (new_prot & old_prot) &&
3367 			    iter->max_protection == new_prot)
3368 				continue;
3369 		} else {
3370 			if (old_prot == new_prot)
3371 				continue;
3372 		}
3373 
3374 		UVM_MAP_CLIP_START(map, iter, start);
3375 		UVM_MAP_CLIP_END(map, iter, end);
3376 
3377 		if (set_max) {
3378 			iter->max_protection = new_prot;
3379 			iter->protection &= new_prot;
3380 		} else
3381 			iter->protection = new_prot;
3382 
3383 		/*
3384 		 * update physical map if necessary.  worry about copy-on-write
3385 		 * here -- CHECK THIS XXX
3386 		 */
3387 		if (iter->protection != old_prot) {
3388 			mask = UVM_ET_ISCOPYONWRITE(iter) ?
3389 			    ~PROT_WRITE : PROT_MASK;
3390 
3391 			/* XXX should only wserial++ if no split occurs */
3392 			if (iter->protection & PROT_WRITE)
3393 				map->wserial++;
3394 
3395 			if (map->flags & VM_MAP_ISVMSPACE) {
3396 				if (old_prot == PROT_NONE) {
3397 					((struct vmspace *)map)->vm_dused +=
3398 					    uvmspace_dused(map, iter->start,
3399 					        iter->end);
3400 				}
3401 				if (iter->protection == PROT_NONE) {
3402 					((struct vmspace *)map)->vm_dused -=
3403 					    uvmspace_dused(map, iter->start,
3404 					        iter->end);
3405 				}
3406 			}
3407 
3408 			/* update pmap */
3409 			if ((iter->protection & mask) == PROT_NONE &&
3410 			    VM_MAPENT_ISWIRED(iter)) {
3411 				/*
3412 				 * TODO(ariane) this is stupid. wired_count
3413 				 * is 0 if not wired, otherwise anything
3414 				 * larger than 0 (incremented once each time
3415 				 * wire is called).
3416 				 * Mostly to be able to undo the damage on
3417 				 * failure. Not the actually be a wired
3418 				 * refcounter...
3419 				 * Originally: iter->wired_count--;
3420 				 * (don't we have to unwire this in the pmap
3421 				 * as well?)
3422 				 */
3423 				iter->wired_count = 0;
3424 			}
3425 			pmap_protect(map->pmap, iter->start, iter->end,
3426 			    iter->protection & mask);
3427 		}
3428 
3429 		/*
3430 		 * If the map is configured to lock any future mappings,
3431 		 * wire this entry now if the old protection was PROT_NONE
3432 		 * and the new protection is not PROT_NONE.
3433 		 */
3434 		if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
3435 		    VM_MAPENT_ISWIRED(iter) == 0 &&
3436 		    old_prot == PROT_NONE &&
3437 		    new_prot != PROT_NONE) {
3438 			if (uvm_map_pageable(map, iter->start, iter->end,
3439 			    FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) {
3440 				/*
3441 				 * If locking the entry fails, remember the
3442 				 * error if it's the first one.  Note we
3443 				 * still continue setting the protection in
3444 				 * the map, but it will return the resource
3445 				 * storage condition regardless.
3446 				 *
3447 				 * XXX Ignore what the actual error is,
3448 				 * XXX just call it a resource shortage
3449 				 * XXX so that it doesn't get confused
3450 				 * XXX what uvm_map_protect() itself would
3451 				 * XXX normally return.
3452 				 */
3453 				error = ENOMEM;
3454 			}
3455 		}
3456 	}
3457 	pmap_update(map->pmap);
3458 
3459 out:
3460 	vm_map_unlock(map);
3461 	return error;
3462 }
3463 
3464 /*
3465  * uvmspace_alloc: allocate a vmspace structure.
3466  *
3467  * - structure includes vm_map and pmap
3468  * - XXX: no locking on this structure
3469  * - refcnt set to 1, rest must be init'd by caller
3470  */
3471 struct vmspace *
3472 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
3473     boolean_t remove_holes)
3474 {
3475 	struct vmspace *vm;
3476 
3477 	vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
3478 	uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
3479 	return (vm);
3480 }
3481 
3482 /*
3483  * uvmspace_init: initialize a vmspace structure.
3484  *
3485  * - XXX: no locking on this structure
3486  * - refcnt set to 1, rest must be init'd by caller
3487  */
3488 void
3489 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
3490     boolean_t pageable, boolean_t remove_holes)
3491 {
3492 	KASSERT(pmap == NULL || pmap == pmap_kernel());
3493 
3494 	if (pmap)
3495 		pmap_reference(pmap);
3496 	else
3497 		pmap = pmap_create();
3498 
3499 	uvm_map_setup(&vm->vm_map, pmap, min, max,
3500 	    (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE);
3501 
3502 	vm->vm_refcnt = 1;
3503 
3504 	if (remove_holes)
3505 		pmap_remove_holes(vm);
3506 }
3507 
3508 /*
3509  * uvmspace_share: share a vmspace between two processes
3510  *
3511  * - used for vfork
3512  */
3513 
3514 struct vmspace *
3515 uvmspace_share(struct process *pr)
3516 {
3517 	struct vmspace *vm = pr->ps_vmspace;
3518 
3519 	uvmspace_addref(vm);
3520 	return vm;
3521 }
3522 
3523 /*
3524  * uvmspace_exec: the process wants to exec a new program
3525  *
3526  * - XXX: no locking on vmspace
3527  */
3528 
3529 void
3530 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
3531 {
3532 	struct process *pr = p->p_p;
3533 	struct vmspace *nvm, *ovm = pr->ps_vmspace;
3534 	struct vm_map *map = &ovm->vm_map;
3535 	struct uvm_map_deadq dead_entries;
3536 
3537 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0);
3538 	KASSERT((end & (vaddr_t)PAGE_MASK) == 0 ||
3539 	    (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
3540 
3541 	pmap_unuse_final(p);   /* before stack addresses go away */
3542 	TAILQ_INIT(&dead_entries);
3543 
3544 	/* see if more than one process is using this vmspace...  */
3545 	if (ovm->vm_refcnt == 1) {
3546 		/*
3547 		 * If pr is the only process using its vmspace then
3548 		 * we can safely recycle that vmspace for the program
3549 		 * that is being exec'd.
3550 		 */
3551 
3552 #ifdef SYSVSHM
3553 		/*
3554 		 * SYSV SHM semantics require us to kill all segments on an exec
3555 		 */
3556 		if (ovm->vm_shm)
3557 			shmexit(ovm);
3558 #endif
3559 
3560 		/*
3561 		 * POSIX 1003.1b -- "lock future mappings" is revoked
3562 		 * when a process execs another program image.
3563 		 */
3564 		vm_map_lock(map);
3565 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE|VM_MAP_SYSCALL_ONCE);
3566 
3567 		/*
3568 		 * now unmap the old program
3569 		 *
3570 		 * Instead of attempting to keep the map valid, we simply
3571 		 * nuke all entries and ask uvm_map_setup to reinitialize
3572 		 * the map to the new boundaries.
3573 		 *
3574 		 * uvm_unmap_remove will actually nuke all entries for us
3575 		 * (as in, not replace them with free-memory entries).
3576 		 */
3577 		uvm_unmap_remove(map, map->min_offset, map->max_offset,
3578 		    &dead_entries, TRUE, FALSE);
3579 
3580 		KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
3581 
3582 		/* Nuke statistics and boundaries. */
3583 		memset(&ovm->vm_startcopy, 0,
3584 		    (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy);
3585 
3586 
3587 		if (end & (vaddr_t)PAGE_MASK) {
3588 			end += 1;
3589 			if (end == 0) /* overflow */
3590 				end -= PAGE_SIZE;
3591 		}
3592 
3593 		/* Setup new boundaries and populate map with entries. */
3594 		map->min_offset = start;
3595 		map->max_offset = end;
3596 		uvm_map_setup_entries(map);
3597 		vm_map_unlock(map);
3598 
3599 		/* but keep MMU holes unavailable */
3600 		pmap_remove_holes(ovm);
3601 	} else {
3602 		/*
3603 		 * pr's vmspace is being shared, so we can't reuse
3604 		 * it for pr since it is still being used for others.
3605 		 * allocate a new vmspace for pr
3606 		 */
3607 		nvm = uvmspace_alloc(start, end,
3608 		    (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
3609 
3610 		/* install new vmspace and drop our ref to the old one. */
3611 		pmap_deactivate(p);
3612 		p->p_vmspace = pr->ps_vmspace = nvm;
3613 		pmap_activate(p);
3614 
3615 		uvmspace_free(ovm);
3616 	}
3617 
3618 	/* Release dead entries */
3619 	uvm_unmap_detach(&dead_entries, 0);
3620 }
3621 
3622 /*
3623  * uvmspace_addref: add a reference to a vmspace.
3624  */
3625 void
3626 uvmspace_addref(struct vmspace *vm)
3627 {
3628 	KERNEL_ASSERT_LOCKED();
3629 	KASSERT(vm->vm_refcnt > 0);
3630 
3631 	vm->vm_refcnt++;
3632 }
3633 
3634 /*
3635  * uvmspace_free: free a vmspace data structure
3636  */
3637 void
3638 uvmspace_free(struct vmspace *vm)
3639 {
3640 	KERNEL_ASSERT_LOCKED();
3641 
3642 	if (--vm->vm_refcnt == 0) {
3643 		/*
3644 		 * lock the map, to wait out all other references to it.  delete
3645 		 * all of the mappings and pages they hold, then call the pmap
3646 		 * module to reclaim anything left.
3647 		 */
3648 #ifdef SYSVSHM
3649 		/* Get rid of any SYSV shared memory segments. */
3650 		if (vm->vm_shm != NULL)
3651 			shmexit(vm);
3652 #endif
3653 
3654 		uvm_map_teardown(&vm->vm_map);
3655 		pool_put(&uvm_vmspace_pool, vm);
3656 	}
3657 }
3658 
3659 /*
3660  * uvm_share: Map the address range [srcaddr, srcaddr + sz) in
3661  * srcmap to the address range [dstaddr, dstaddr + sz) in
3662  * dstmap.
3663  *
3664  * The whole address range in srcmap must be backed by an object
3665  * (no holes).
3666  *
3667  * If successful, the address ranges share memory and the destination
3668  * address range uses the protection flags in prot.
3669  *
3670  * This routine assumes that sz is a multiple of PAGE_SIZE and
3671  * that dstaddr and srcaddr are page-aligned.
3672  */
3673 int
3674 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot,
3675     struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz)
3676 {
3677 	int ret = 0;
3678 	vaddr_t unmap_end;
3679 	vaddr_t dstva;
3680 	vsize_t s_off, len, n = sz, remain;
3681 	struct vm_map_entry *first = NULL, *last = NULL;
3682 	struct vm_map_entry *src_entry, *psrc_entry = NULL;
3683 	struct uvm_map_deadq dead;
3684 
3685 	if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr)
3686 		return EINVAL;
3687 
3688 	TAILQ_INIT(&dead);
3689 	vm_map_lock(dstmap);
3690 	vm_map_lock_read(srcmap);
3691 
3692 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) {
3693 		ret = ENOMEM;
3694 		goto exit_unlock;
3695 	}
3696 	if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) {
3697 		ret = EINVAL;
3698 		goto exit_unlock;
3699 	}
3700 
3701 	dstva = dstaddr;
3702 	unmap_end = dstaddr;
3703 	for (; src_entry != NULL;
3704 	    psrc_entry = src_entry,
3705 	    src_entry = RBT_NEXT(uvm_map_addr, src_entry)) {
3706 		/* hole in address space, bail out */
3707 		if (psrc_entry != NULL && psrc_entry->end != src_entry->start)
3708 			break;
3709 		if (src_entry->start >= srcaddr + sz)
3710 			break;
3711 
3712 		if (UVM_ET_ISSUBMAP(src_entry))
3713 			panic("uvm_share: encountered a submap (illegal)");
3714 		if (!UVM_ET_ISCOPYONWRITE(src_entry) &&
3715 		    UVM_ET_ISNEEDSCOPY(src_entry))
3716 			panic("uvm_share: non-copy_on_write map entries "
3717 			    "marked needs_copy (illegal)");
3718 
3719 		/*
3720 		 * srcaddr > map entry start? means we are in the middle of a
3721 		 * map, so we calculate the offset to use in the source map.
3722 		 */
3723 		if (srcaddr > src_entry->start)
3724 			s_off = srcaddr - src_entry->start;
3725 		else if (srcaddr == src_entry->start)
3726 			s_off = 0;
3727 		else
3728 			panic("uvm_share: map entry start > srcaddr");
3729 
3730 		remain = src_entry->end - src_entry->start - s_off;
3731 
3732 		/* Determine how many bytes to share in this pass */
3733 		if (n < remain)
3734 			len = n;
3735 		else
3736 			len = remain;
3737 
3738 		if (uvm_mapent_share(dstmap, dstva, len, s_off, prot, prot,
3739 		    srcmap, src_entry, &dead) == NULL)
3740 			break;
3741 
3742 		n -= len;
3743 		dstva += len;
3744 		srcaddr += len;
3745 		unmap_end = dstva + len;
3746 		if (n == 0)
3747 			goto exit_unlock;
3748 	}
3749 
3750 	ret = EINVAL;
3751 	uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE);
3752 
3753 exit_unlock:
3754 	vm_map_unlock_read(srcmap);
3755 	vm_map_unlock(dstmap);
3756 	uvm_unmap_detach(&dead, 0);
3757 
3758 	return ret;
3759 }
3760 
3761 /*
3762  * Clone map entry into other map.
3763  *
3764  * Mapping will be placed at dstaddr, for the same length.
3765  * Space must be available.
3766  * Reference counters are incremented.
3767  */
3768 struct vm_map_entry *
3769 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3770     vsize_t off, vm_prot_t prot, vm_prot_t maxprot,
3771     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
3772     int mapent_flags, int amap_share_flags)
3773 {
3774 	struct vm_map_entry *new_entry, *first, *last;
3775 
3776 	KDASSERT(!UVM_ET_ISSUBMAP(old_entry));
3777 
3778 	/* Create new entry (linked in on creation). Fill in first, last. */
3779 	first = last = NULL;
3780 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) {
3781 		panic("uvm_mapent_clone: no space in map for "
3782 		    "entry in empty map");
3783 	}
3784 	new_entry = uvm_map_mkentry(dstmap, first, last,
3785 	    dstaddr, dstlen, mapent_flags, dead, NULL);
3786 	if (new_entry == NULL)
3787 		return NULL;
3788 	/* old_entry -> new_entry */
3789 	new_entry->object = old_entry->object;
3790 	new_entry->offset = old_entry->offset;
3791 	new_entry->aref = old_entry->aref;
3792 	new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
3793 	new_entry->protection = prot;
3794 	new_entry->max_protection = maxprot;
3795 	new_entry->inheritance = old_entry->inheritance;
3796 	new_entry->advice = old_entry->advice;
3797 
3798 	/* gain reference to object backing the map (can't be a submap). */
3799 	if (new_entry->aref.ar_amap) {
3800 		new_entry->aref.ar_pageoff += off >> PAGE_SHIFT;
3801 		amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3802 		    (new_entry->end - new_entry->start) >> PAGE_SHIFT,
3803 		    amap_share_flags);
3804 	}
3805 
3806 	if (UVM_ET_ISOBJ(new_entry) &&
3807 	    new_entry->object.uvm_obj->pgops->pgo_reference) {
3808 		new_entry->offset += off;
3809 		new_entry->object.uvm_obj->pgops->pgo_reference
3810 		    (new_entry->object.uvm_obj);
3811 	}
3812 
3813 	return new_entry;
3814 }
3815 
3816 struct vm_map_entry *
3817 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3818     vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map,
3819     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3820 {
3821 	/*
3822 	 * If old_entry refers to a copy-on-write region that has not yet been
3823 	 * written to (needs_copy flag is set), then we need to allocate a new
3824 	 * amap for old_entry.
3825 	 *
3826 	 * If we do not do this, and the process owning old_entry does a copy-on
3827 	 * write later, old_entry and new_entry will refer to different memory
3828 	 * regions, and the memory between the processes is no longer shared.
3829 	 *
3830 	 * [in other words, we need to clear needs_copy]
3831 	 */
3832 
3833 	if (UVM_ET_ISNEEDSCOPY(old_entry)) {
3834 		/* get our own amap, clears needs_copy */
3835 		amap_copy(old_map, old_entry, M_WAITOK, FALSE, 0, 0);
3836 		/* XXXCDC: WAITOK??? */
3837 	}
3838 
3839 	return uvm_mapent_clone(dstmap, dstaddr, dstlen, off,
3840 	    prot, maxprot, old_entry, dead, 0, AMAP_SHARED);
3841 }
3842 
3843 /*
3844  * share the mapping: this means we want the old and
3845  * new entries to share amaps and backing objects.
3846  */
3847 struct vm_map_entry *
3848 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
3849     struct vm_map *old_map,
3850     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3851 {
3852 	struct vm_map_entry *new_entry;
3853 
3854 	new_entry = uvm_mapent_share(new_map, old_entry->start,
3855 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3856 	    old_entry->max_protection, old_map, old_entry, dead);
3857 
3858 	/*
3859 	 * pmap_copy the mappings: this routine is optional
3860 	 * but if it is there it will reduce the number of
3861 	 * page faults in the new proc.
3862 	 */
3863 	if (!UVM_ET_ISHOLE(new_entry))
3864 		pmap_copy(new_map->pmap, old_map->pmap, new_entry->start,
3865 		    (new_entry->end - new_entry->start), new_entry->start);
3866 
3867 	return (new_entry);
3868 }
3869 
3870 /*
3871  * copy-on-write the mapping (using mmap's
3872  * MAP_PRIVATE semantics)
3873  *
3874  * allocate new_entry, adjust reference counts.
3875  * (note that new references are read-only).
3876  */
3877 struct vm_map_entry *
3878 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map,
3879     struct vm_map *old_map,
3880     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3881 {
3882 	struct vm_map_entry	*new_entry;
3883 	boolean_t		 protect_child;
3884 
3885 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3886 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3887 	    old_entry->max_protection, old_entry, dead, 0, 0);
3888 
3889 	new_entry->etype |=
3890 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3891 
3892 	/*
3893 	 * the new entry will need an amap.  it will either
3894 	 * need to be copied from the old entry or created
3895 	 * from scratch (if the old entry does not have an
3896 	 * amap).  can we defer this process until later
3897 	 * (by setting "needs_copy") or do we need to copy
3898 	 * the amap now?
3899 	 *
3900 	 * we must copy the amap now if any of the following
3901 	 * conditions hold:
3902 	 * 1. the old entry has an amap and that amap is
3903 	 *    being shared.  this means that the old (parent)
3904 	 *    process is sharing the amap with another
3905 	 *    process.  if we do not clear needs_copy here
3906 	 *    we will end up in a situation where both the
3907 	 *    parent and child process are referring to the
3908 	 *    same amap with "needs_copy" set.  if the
3909 	 *    parent write-faults, the fault routine will
3910 	 *    clear "needs_copy" in the parent by allocating
3911 	 *    a new amap.   this is wrong because the
3912 	 *    parent is supposed to be sharing the old amap
3913 	 *    and the new amap will break that.
3914 	 *
3915 	 * 2. if the old entry has an amap and a non-zero
3916 	 *    wire count then we are going to have to call
3917 	 *    amap_cow_now to avoid page faults in the
3918 	 *    parent process.   since amap_cow_now requires
3919 	 *    "needs_copy" to be clear we might as well
3920 	 *    clear it here as well.
3921 	 *
3922 	 */
3923 	if (old_entry->aref.ar_amap != NULL &&
3924 	    ((amap_flags(old_entry->aref.ar_amap) &
3925 	    AMAP_SHARED) != 0 ||
3926 	    VM_MAPENT_ISWIRED(old_entry))) {
3927 		amap_copy(new_map, new_entry, M_WAITOK, FALSE,
3928 		    0, 0);
3929 		/* XXXCDC: M_WAITOK ... ok? */
3930 	}
3931 
3932 	/*
3933 	 * if the parent's entry is wired down, then the
3934 	 * parent process does not want page faults on
3935 	 * access to that memory.  this means that we
3936 	 * cannot do copy-on-write because we can't write
3937 	 * protect the old entry.   in this case we
3938 	 * resolve all copy-on-write faults now, using
3939 	 * amap_cow_now.   note that we have already
3940 	 * allocated any needed amap (above).
3941 	 */
3942 	if (VM_MAPENT_ISWIRED(old_entry)) {
3943 		/*
3944 		 * resolve all copy-on-write faults now
3945 		 * (note that there is nothing to do if
3946 		 * the old mapping does not have an amap).
3947 		 * XXX: is it worthwhile to bother with
3948 		 * pmap_copy in this case?
3949 		 */
3950 		if (old_entry->aref.ar_amap)
3951 			amap_cow_now(new_map, new_entry);
3952 	} else {
3953 		if (old_entry->aref.ar_amap) {
3954 			/*
3955 			 * setup mappings to trigger copy-on-write faults
3956 			 * we must write-protect the parent if it has
3957 			 * an amap and it is not already "needs_copy"...
3958 			 * if it is already "needs_copy" then the parent
3959 			 * has already been write-protected by a previous
3960 			 * fork operation.
3961 			 *
3962 			 * if we do not write-protect the parent, then
3963 			 * we must be sure to write-protect the child
3964 			 * after the pmap_copy() operation.
3965 			 *
3966 			 * XXX: pmap_copy should have some way of telling
3967 			 * us that it didn't do anything so we can avoid
3968 			 * calling pmap_protect needlessly.
3969 			 */
3970 			if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
3971 				if (old_entry->max_protection & PROT_WRITE) {
3972 					pmap_protect(old_map->pmap,
3973 					    old_entry->start,
3974 					    old_entry->end,
3975 					    old_entry->protection &
3976 					    ~PROT_WRITE);
3977 					pmap_update(old_map->pmap);
3978 				}
3979 				old_entry->etype |= UVM_ET_NEEDSCOPY;
3980 			}
3981 
3982 	  		/* parent must now be write-protected */
3983 	  		protect_child = FALSE;
3984 		} else {
3985 			/*
3986 			 * we only need to protect the child if the
3987 			 * parent has write access.
3988 			 */
3989 			if (old_entry->max_protection & PROT_WRITE)
3990 				protect_child = TRUE;
3991 			else
3992 				protect_child = FALSE;
3993 		}
3994 		/*
3995 		 * copy the mappings
3996 		 * XXX: need a way to tell if this does anything
3997 		 */
3998 		if (!UVM_ET_ISHOLE(new_entry))
3999 			pmap_copy(new_map->pmap, old_map->pmap,
4000 			    new_entry->start,
4001 			    (old_entry->end - old_entry->start),
4002 			    old_entry->start);
4003 
4004 		/* protect the child's mappings if necessary */
4005 		if (protect_child) {
4006 			pmap_protect(new_map->pmap, new_entry->start,
4007 			    new_entry->end,
4008 			    new_entry->protection &
4009 			    ~PROT_WRITE);
4010 		}
4011 	}
4012 
4013 	return (new_entry);
4014 }
4015 
4016 /*
4017  * zero the mapping: the new entry will be zero initialized
4018  */
4019 struct vm_map_entry *
4020 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map,
4021     struct vm_map *old_map,
4022     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
4023 {
4024 	struct vm_map_entry *new_entry;
4025 
4026 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
4027 	    old_entry->end - old_entry->start, 0, old_entry->protection,
4028 	    old_entry->max_protection, old_entry, dead, 0, 0);
4029 
4030 	new_entry->etype |=
4031 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
4032 
4033 	if (new_entry->aref.ar_amap) {
4034 		amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
4035 		    atop(new_entry->end - new_entry->start), 0);
4036 		new_entry->aref.ar_amap = NULL;
4037 		new_entry->aref.ar_pageoff = 0;
4038 	}
4039 
4040 	if (UVM_ET_ISOBJ(new_entry)) {
4041 		if (new_entry->object.uvm_obj->pgops->pgo_detach)
4042 			new_entry->object.uvm_obj->pgops->pgo_detach(
4043 			    new_entry->object.uvm_obj);
4044 		new_entry->object.uvm_obj = NULL;
4045 		new_entry->etype &= ~UVM_ET_OBJ;
4046 	}
4047 
4048 	return (new_entry);
4049 }
4050 
4051 /*
4052  * uvmspace_fork: fork a process' main map
4053  *
4054  * => create a new vmspace for child process from parent.
4055  * => parent's map must not be locked.
4056  */
4057 struct vmspace *
4058 uvmspace_fork(struct process *pr)
4059 {
4060 	struct vmspace *vm1 = pr->ps_vmspace;
4061 	struct vmspace *vm2;
4062 	struct vm_map *old_map = &vm1->vm_map;
4063 	struct vm_map *new_map;
4064 	struct vm_map_entry *old_entry, *new_entry;
4065 	struct uvm_map_deadq dead;
4066 
4067 	vm_map_lock(old_map);
4068 
4069 	vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
4070 	    (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
4071 	memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
4072 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
4073 	vm2->vm_dused = 0; /* Statistic managed by us. */
4074 	new_map = &vm2->vm_map;
4075 	vm_map_lock(new_map);
4076 
4077 	/* go entry-by-entry */
4078 	TAILQ_INIT(&dead);
4079 	RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) {
4080 		if (old_entry->start == old_entry->end)
4081 			continue;
4082 
4083 		/* first, some sanity checks on the old entry */
4084 		if (UVM_ET_ISSUBMAP(old_entry)) {
4085 			panic("fork: encountered a submap during fork "
4086 			    "(illegal)");
4087 		}
4088 
4089 		if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
4090 		    UVM_ET_ISNEEDSCOPY(old_entry)) {
4091 			panic("fork: non-copy_on_write map entry marked "
4092 			    "needs_copy (illegal)");
4093 		}
4094 
4095 		/* Apply inheritance. */
4096 		switch (old_entry->inheritance) {
4097 		case MAP_INHERIT_SHARE:
4098 			new_entry = uvm_mapent_forkshared(vm2, new_map,
4099 			    old_map, old_entry, &dead);
4100 			break;
4101 		case MAP_INHERIT_COPY:
4102 			new_entry = uvm_mapent_forkcopy(vm2, new_map,
4103 			    old_map, old_entry, &dead);
4104 			break;
4105 		case MAP_INHERIT_ZERO:
4106 			new_entry = uvm_mapent_forkzero(vm2, new_map,
4107 			    old_map, old_entry, &dead);
4108 			break;
4109 		default:
4110 			continue;
4111 		}
4112 
4113 	 	/* Update process statistics. */
4114 		if (!UVM_ET_ISHOLE(new_entry))
4115 			new_map->size += new_entry->end - new_entry->start;
4116 		if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry) &&
4117 		    new_entry->protection != PROT_NONE) {
4118 			vm2->vm_dused += uvmspace_dused(
4119 			    new_map, new_entry->start, new_entry->end);
4120 		}
4121 	}
4122 
4123 	vm_map_unlock(old_map);
4124 	vm_map_unlock(new_map);
4125 
4126 	/*
4127 	 * This can actually happen, if multiple entries described a
4128 	 * space in which an entry was inherited.
4129 	 */
4130 	uvm_unmap_detach(&dead, 0);
4131 
4132 #ifdef SYSVSHM
4133 	if (vm1->vm_shm)
4134 		shmfork(vm1, vm2);
4135 #endif
4136 
4137 	return vm2;
4138 }
4139 
4140 /*
4141  * uvm_map_hint: return the beginning of the best area suitable for
4142  * creating a new mapping with "prot" protection.
4143  */
4144 vaddr_t
4145 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr,
4146     vaddr_t maxaddr)
4147 {
4148 	vaddr_t addr;
4149 	vaddr_t spacing;
4150 
4151 #ifdef __i386__
4152 	/*
4153 	 * If executable skip first two pages, otherwise start
4154 	 * after data + heap region.
4155 	 */
4156 	if ((prot & PROT_EXEC) != 0 &&
4157 	    (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) {
4158 		addr = (PAGE_SIZE*2) +
4159 		    (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
4160 		return (round_page(addr));
4161 	}
4162 #endif
4163 
4164 #if defined (__LP64__)
4165 	spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4166 #else
4167 	spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4168 #endif
4169 
4170 	/*
4171 	 * Start malloc/mmap after the brk.
4172 	 */
4173 	addr = (vaddr_t)vm->vm_daddr + BRKSIZ;
4174 	addr = MAX(addr, minaddr);
4175 
4176 	if (addr < maxaddr) {
4177 		while (spacing > maxaddr - addr)
4178 			spacing >>= 1;
4179 	}
4180 	addr += arc4random() & spacing;
4181 	return (round_page(addr));
4182 }
4183 
4184 /*
4185  * uvm_map_submap: punch down part of a map into a submap
4186  *
4187  * => only the kernel_map is allowed to be submapped
4188  * => the purpose of submapping is to break up the locking granularity
4189  *	of a larger map
4190  * => the range specified must have been mapped previously with a uvm_map()
4191  *	call [with uobj==NULL] to create a blank map entry in the main map.
4192  *	[And it had better still be blank!]
4193  * => maps which contain submaps should never be copied or forked.
4194  * => to remove a submap, use uvm_unmap() on the main map
4195  *	and then uvm_map_deallocate() the submap.
4196  * => main map must be unlocked.
4197  * => submap must have been init'd and have a zero reference count.
4198  *	[need not be locked as we don't actually reference it]
4199  */
4200 int
4201 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
4202     struct vm_map *submap)
4203 {
4204 	struct vm_map_entry *entry;
4205 	int result;
4206 
4207 	if (start > map->max_offset || end > map->max_offset ||
4208 	    start < map->min_offset || end < map->min_offset)
4209 		return EINVAL;
4210 
4211 	vm_map_lock(map);
4212 
4213 	if (uvm_map_lookup_entry(map, start, &entry)) {
4214 		UVM_MAP_CLIP_START(map, entry, start);
4215 		UVM_MAP_CLIP_END(map, entry, end);
4216 	} else
4217 		entry = NULL;
4218 
4219 	if (entry != NULL &&
4220 	    entry->start == start && entry->end == end &&
4221 	    entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
4222 	    !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
4223 		entry->etype |= UVM_ET_SUBMAP;
4224 		entry->object.sub_map = submap;
4225 		entry->offset = 0;
4226 		uvm_map_reference(submap);
4227 		result = 0;
4228 	} else
4229 		result = EINVAL;
4230 
4231 	vm_map_unlock(map);
4232 	return result;
4233 }
4234 
4235 /*
4236  * uvm_map_checkprot: check protection in map
4237  *
4238  * => must allow specific protection in a fully allocated region.
4239  * => map mut be read or write locked by caller.
4240  */
4241 boolean_t
4242 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
4243     vm_prot_t protection)
4244 {
4245 	struct vm_map_entry *entry;
4246 
4247 	if (start < map->min_offset || end > map->max_offset || start > end)
4248 		return FALSE;
4249 	if (start == end)
4250 		return TRUE;
4251 
4252 	/*
4253 	 * Iterate entries.
4254 	 */
4255 	for (entry = uvm_map_entrybyaddr(&map->addr, start);
4256 	    entry != NULL && entry->start < end;
4257 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4258 		/* Fail if a hole is found. */
4259 		if (UVM_ET_ISHOLE(entry) ||
4260 		    (entry->end < end && entry->end != VMMAP_FREE_END(entry)))
4261 			return FALSE;
4262 
4263 		/* Check protection. */
4264 		if ((entry->protection & protection) != protection)
4265 			return FALSE;
4266 	}
4267 	return TRUE;
4268 }
4269 
4270 /*
4271  * uvm_map_create: create map
4272  */
4273 vm_map_t
4274 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
4275 {
4276 	vm_map_t map;
4277 
4278 	map = malloc(sizeof *map, M_VMMAP, M_WAITOK);
4279 	uvm_map_setup(map, pmap, min, max, flags);
4280 	return (map);
4281 }
4282 
4283 /*
4284  * uvm_map_deallocate: drop reference to a map
4285  *
4286  * => caller must not lock map
4287  * => we will zap map if ref count goes to zero
4288  */
4289 void
4290 uvm_map_deallocate(vm_map_t map)
4291 {
4292 	int c;
4293 	struct uvm_map_deadq dead;
4294 
4295 	c = atomic_dec_int_nv(&map->ref_count);
4296 	if (c > 0) {
4297 		return;
4298 	}
4299 
4300 	/*
4301 	 * all references gone.   unmap and free.
4302 	 *
4303 	 * No lock required: we are only one to access this map.
4304 	 */
4305 	TAILQ_INIT(&dead);
4306 	uvm_tree_sanity(map, __FILE__, __LINE__);
4307 	uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead,
4308 	    TRUE, FALSE);
4309 	pmap_destroy(map->pmap);
4310 	KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
4311 	free(map, M_VMMAP, sizeof *map);
4312 
4313 	uvm_unmap_detach(&dead, 0);
4314 }
4315 
4316 /*
4317  * uvm_map_inherit: set inheritance code for range of addrs in map.
4318  *
4319  * => map must be unlocked
4320  * => note that the inherit code is used during a "fork".  see fork
4321  *	code for details.
4322  */
4323 int
4324 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
4325     vm_inherit_t new_inheritance)
4326 {
4327 	struct vm_map_entry *entry;
4328 
4329 	switch (new_inheritance) {
4330 	case MAP_INHERIT_NONE:
4331 	case MAP_INHERIT_COPY:
4332 	case MAP_INHERIT_SHARE:
4333 	case MAP_INHERIT_ZERO:
4334 		break;
4335 	default:
4336 		return (EINVAL);
4337 	}
4338 
4339 	if (start > end)
4340 		return EINVAL;
4341 	start = MAX(start, map->min_offset);
4342 	end = MIN(end, map->max_offset);
4343 	if (start >= end)
4344 		return 0;
4345 
4346 	vm_map_lock(map);
4347 
4348 	entry = uvm_map_entrybyaddr(&map->addr, start);
4349 	if (entry->end > start)
4350 		UVM_MAP_CLIP_START(map, entry, start);
4351 	else
4352 		entry = RBT_NEXT(uvm_map_addr, entry);
4353 
4354 	while (entry != NULL && entry->start < end) {
4355 		UVM_MAP_CLIP_END(map, entry, end);
4356 		entry->inheritance = new_inheritance;
4357 		entry = RBT_NEXT(uvm_map_addr, entry);
4358 	}
4359 
4360 	vm_map_unlock(map);
4361 	return (0);
4362 }
4363 
4364 /*
4365  * uvm_map_syscall: permit system calls for range of addrs in map.
4366  *
4367  * => map must be unlocked
4368  */
4369 int
4370 uvm_map_syscall(struct vm_map *map, vaddr_t start, vaddr_t end)
4371 {
4372 	struct vm_map_entry *entry;
4373 
4374 	if (start > end)
4375 		return EINVAL;
4376 	start = MAX(start, map->min_offset);
4377 	end = MIN(end, map->max_offset);
4378 	if (start >= end)
4379 		return 0;
4380 	if (map->flags & VM_MAP_SYSCALL_ONCE)	/* only allowed once */
4381 		return (EPERM);
4382 
4383 	vm_map_lock(map);
4384 
4385 	entry = uvm_map_entrybyaddr(&map->addr, start);
4386 	if (entry->end > start)
4387 		UVM_MAP_CLIP_START(map, entry, start);
4388 	else
4389 		entry = RBT_NEXT(uvm_map_addr, entry);
4390 
4391 	while (entry != NULL && entry->start < end) {
4392 		UVM_MAP_CLIP_END(map, entry, end);
4393 		entry->etype |= UVM_ET_SYSCALL;
4394 		entry = RBT_NEXT(uvm_map_addr, entry);
4395 	}
4396 
4397 	map->wserial++;
4398 	map->flags |= VM_MAP_SYSCALL_ONCE;
4399 	vm_map_unlock(map);
4400 	return (0);
4401 }
4402 
4403 /*
4404  * uvm_map_advice: set advice code for range of addrs in map.
4405  *
4406  * => map must be unlocked
4407  */
4408 int
4409 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
4410 {
4411 	struct vm_map_entry *entry;
4412 
4413 	switch (new_advice) {
4414 	case MADV_NORMAL:
4415 	case MADV_RANDOM:
4416 	case MADV_SEQUENTIAL:
4417 		break;
4418 	default:
4419 		return (EINVAL);
4420 	}
4421 
4422 	if (start > end)
4423 		return EINVAL;
4424 	start = MAX(start, map->min_offset);
4425 	end = MIN(end, map->max_offset);
4426 	if (start >= end)
4427 		return 0;
4428 
4429 	vm_map_lock(map);
4430 
4431 	entry = uvm_map_entrybyaddr(&map->addr, start);
4432 	if (entry != NULL && entry->end > start)
4433 		UVM_MAP_CLIP_START(map, entry, start);
4434 	else if (entry!= NULL)
4435 		entry = RBT_NEXT(uvm_map_addr, entry);
4436 
4437 	/*
4438 	 * XXXJRT: disallow holes?
4439 	 */
4440 	while (entry != NULL && entry->start < end) {
4441 		UVM_MAP_CLIP_END(map, entry, end);
4442 		entry->advice = new_advice;
4443 		entry = RBT_NEXT(uvm_map_addr, entry);
4444 	}
4445 
4446 	vm_map_unlock(map);
4447 	return (0);
4448 }
4449 
4450 /*
4451  * uvm_map_extract: extract a mapping from a map and put it somewhere
4452  * in the kernel_map, setting protection to max_prot.
4453  *
4454  * => map should be unlocked (we will write lock it and kernel_map)
4455  * => returns 0 on success, error code otherwise
4456  * => start must be page aligned
4457  * => len must be page sized
4458  * => flags:
4459  *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
4460  * Mappings are QREF's.
4461  */
4462 int
4463 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
4464     vaddr_t *dstaddrp, int flags)
4465 {
4466 	struct uvm_map_deadq dead;
4467 	struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2;
4468 	vaddr_t dstaddr;
4469 	vaddr_t end;
4470 	vaddr_t cp_start;
4471 	vsize_t cp_len, cp_off;
4472 	int error;
4473 
4474 	TAILQ_INIT(&dead);
4475 	end = start + len;
4476 
4477 	/*
4478 	 * Sanity check on the parameters.
4479 	 * Also, since the mapping may not contain gaps, error out if the
4480 	 * mapped area is not in source map.
4481 	 */
4482 	if ((start & (vaddr_t)PAGE_MASK) != 0 ||
4483 	    (end & (vaddr_t)PAGE_MASK) != 0 || end < start)
4484 		return EINVAL;
4485 	if (start < srcmap->min_offset || end > srcmap->max_offset)
4486 		return EINVAL;
4487 
4488 	/* Initialize dead entries. Handle len == 0 case. */
4489 	if (len == 0)
4490 		return 0;
4491 
4492 	/* Acquire lock on srcmap. */
4493 	vm_map_lock(srcmap);
4494 
4495 	/* Lock srcmap, lookup first and last entry in <start,len>. */
4496 	first = uvm_map_entrybyaddr(&srcmap->addr, start);
4497 
4498 	/* Check that the range is contiguous. */
4499 	for (entry = first; entry != NULL && entry->end < end;
4500 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4501 		if (VMMAP_FREE_END(entry) != entry->end ||
4502 		    UVM_ET_ISHOLE(entry)) {
4503 			error = EINVAL;
4504 			goto fail;
4505 		}
4506 	}
4507 	if (entry == NULL || UVM_ET_ISHOLE(entry)) {
4508 		error = EINVAL;
4509 		goto fail;
4510 	}
4511 
4512 	/*
4513 	 * Handle need-copy flag.
4514 	 */
4515 	for (entry = first; entry != NULL && entry->start < end;
4516 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4517 		if (UVM_ET_ISNEEDSCOPY(entry))
4518 			amap_copy(srcmap, entry, M_NOWAIT,
4519 			    UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end);
4520 		if (UVM_ET_ISNEEDSCOPY(entry)) {
4521 			/*
4522 			 * amap_copy failure
4523 			 */
4524 			error = ENOMEM;
4525 			goto fail;
4526 		}
4527 	}
4528 
4529 	/* Lock destination map (kernel_map). */
4530 	vm_map_lock(kernel_map);
4531 
4532 	if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len,
4533 	    MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start),
4534 	    PROT_NONE, 0) != 0) {
4535 		error = ENOMEM;
4536 		goto fail2;
4537 	}
4538 	*dstaddrp = dstaddr;
4539 
4540 	/*
4541 	 * We now have srcmap and kernel_map locked.
4542 	 * dstaddr contains the destination offset in dstmap.
4543 	 */
4544 	/* step 1: start looping through map entries, performing extraction. */
4545 	for (entry = first; entry != NULL && entry->start < end;
4546 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4547 		KDASSERT(!UVM_ET_ISNEEDSCOPY(entry));
4548 		if (UVM_ET_ISHOLE(entry))
4549 			continue;
4550 
4551 		/* Calculate uvm_mapent_clone parameters. */
4552 		cp_start = entry->start;
4553 		if (cp_start < start) {
4554 			cp_off = start - cp_start;
4555 			cp_start = start;
4556 		} else
4557 			cp_off = 0;
4558 		cp_len = MIN(entry->end, end) - cp_start;
4559 
4560 		newentry = uvm_mapent_clone(kernel_map,
4561 		    cp_start - start + dstaddr, cp_len, cp_off,
4562 		    entry->protection, entry->max_protection,
4563 		    entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
4564 		if (newentry == NULL) {
4565 			error = ENOMEM;
4566 			goto fail2_unmap;
4567 		}
4568 		kernel_map->size += cp_len;
4569 		if (flags & UVM_EXTRACT_FIXPROT)
4570 			newentry->protection = newentry->max_protection;
4571 
4572 		/*
4573 		 * Step 2: perform pmap copy.
4574 		 * (Doing this in the loop saves one RB traversal.)
4575 		 */
4576 		pmap_copy(kernel_map->pmap, srcmap->pmap,
4577 		    cp_start - start + dstaddr, cp_len, cp_start);
4578 	}
4579 	pmap_update(kernel_map->pmap);
4580 
4581 	error = 0;
4582 
4583 	/* Unmap copied entries on failure. */
4584 fail2_unmap:
4585 	if (error) {
4586 		uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead,
4587 		    FALSE, TRUE);
4588 	}
4589 
4590 	/* Release maps, release dead entries. */
4591 fail2:
4592 	vm_map_unlock(kernel_map);
4593 
4594 fail:
4595 	vm_map_unlock(srcmap);
4596 
4597 	uvm_unmap_detach(&dead, 0);
4598 
4599 	return error;
4600 }
4601 
4602 /*
4603  * uvm_map_clean: clean out a map range
4604  *
4605  * => valid flags:
4606  *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
4607  *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
4608  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
4609  *   if (flags & PGO_FREE): any cached pages are freed after clean
4610  * => returns an error if any part of the specified range isn't mapped
4611  * => never a need to flush amap layer since the anonymous memory has
4612  *	no permanent home, but may deactivate pages there
4613  * => called from sys_msync() and sys_madvise()
4614  * => caller must not write-lock map (read OK).
4615  * => we may sleep while cleaning if SYNCIO [with map read-locked]
4616  */
4617 
4618 int
4619 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
4620 {
4621 	struct vm_map_entry *first, *entry;
4622 	struct vm_amap *amap;
4623 	struct vm_anon *anon;
4624 	struct vm_page *pg;
4625 	struct uvm_object *uobj;
4626 	vaddr_t cp_start, cp_end;
4627 	int refs;
4628 	int error;
4629 	boolean_t rv;
4630 
4631 	KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
4632 	    (PGO_FREE|PGO_DEACTIVATE));
4633 
4634 	if (start > end || start < map->min_offset || end > map->max_offset)
4635 		return EINVAL;
4636 
4637 	vm_map_lock_read(map);
4638 	first = uvm_map_entrybyaddr(&map->addr, start);
4639 
4640 	/* Make a first pass to check for holes. */
4641 	for (entry = first; entry != NULL && entry->start < end;
4642 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4643 		if (UVM_ET_ISSUBMAP(entry)) {
4644 			vm_map_unlock_read(map);
4645 			return EINVAL;
4646 		}
4647 		if (UVM_ET_ISSUBMAP(entry) ||
4648 		    UVM_ET_ISHOLE(entry) ||
4649 		    (entry->end < end &&
4650 		    VMMAP_FREE_END(entry) != entry->end)) {
4651 			vm_map_unlock_read(map);
4652 			return EFAULT;
4653 		}
4654 	}
4655 
4656 	error = 0;
4657 	for (entry = first; entry != NULL && entry->start < end;
4658 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4659 		amap = entry->aref.ar_amap;	/* top layer */
4660 		if (UVM_ET_ISOBJ(entry))
4661 			uobj = entry->object.uvm_obj;
4662 		else
4663 			uobj = NULL;
4664 
4665 		/*
4666 		 * No amap cleaning necessary if:
4667 		 *  - there's no amap
4668 		 *  - we're not deactivating or freeing pages.
4669 		 */
4670 		if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
4671 			goto flush_object;
4672 
4673 		cp_start = MAX(entry->start, start);
4674 		cp_end = MIN(entry->end, end);
4675 
4676 		amap_lock(amap);
4677 		for (; cp_start != cp_end; cp_start += PAGE_SIZE) {
4678 			anon = amap_lookup(&entry->aref,
4679 			    cp_start - entry->start);
4680 			if (anon == NULL)
4681 				continue;
4682 
4683 			KASSERT(anon->an_lock == amap->am_lock);
4684 			pg = anon->an_page;
4685 			if (pg == NULL) {
4686 				continue;
4687 			}
4688 			KASSERT(pg->pg_flags & PQ_ANON);
4689 
4690 			switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
4691 			/*
4692 			 * XXX In these first 3 cases, we always just
4693 			 * XXX deactivate the page.  We may want to
4694 			 * XXX handle the different cases more
4695 			 * XXX specifically, in the future.
4696 			 */
4697 			case PGO_CLEANIT|PGO_FREE:
4698 			case PGO_CLEANIT|PGO_DEACTIVATE:
4699 			case PGO_DEACTIVATE:
4700 deactivate_it:
4701 				/* skip the page if it's wired */
4702 				if (pg->wire_count != 0)
4703 					break;
4704 
4705 				uvm_lock_pageq();
4706 
4707 				KASSERT(pg->uanon == anon);
4708 
4709 				/* zap all mappings for the page. */
4710 				pmap_page_protect(pg, PROT_NONE);
4711 
4712 				/* ...and deactivate the page. */
4713 				uvm_pagedeactivate(pg);
4714 
4715 				uvm_unlock_pageq();
4716 				break;
4717 			case PGO_FREE:
4718 				/*
4719 				 * If there are multiple references to
4720 				 * the amap, just deactivate the page.
4721 				 */
4722 				if (amap_refs(amap) > 1)
4723 					goto deactivate_it;
4724 
4725 				/* XXX skip the page if it's wired */
4726 				if (pg->wire_count != 0) {
4727 					break;
4728 				}
4729 				amap_unadd(&entry->aref,
4730 				    cp_start - entry->start);
4731 				refs = --anon->an_ref;
4732 				if (refs == 0)
4733 					uvm_anfree(anon);
4734 				break;
4735 			default:
4736 				panic("uvm_map_clean: weird flags");
4737 			}
4738 		}
4739 		amap_unlock(amap);
4740 
4741 flush_object:
4742 		cp_start = MAX(entry->start, start);
4743 		cp_end = MIN(entry->end, end);
4744 
4745 		/*
4746 		 * flush pages if we've got a valid backing object.
4747 		 *
4748 		 * Don't PGO_FREE if we don't have write permission
4749 		 * and don't flush if this is a copy-on-write object
4750 		 * since we can't know our permissions on it.
4751 		 */
4752 		if (uobj != NULL &&
4753 		    ((flags & PGO_FREE) == 0 ||
4754 		     ((entry->max_protection & PROT_WRITE) != 0 &&
4755 		      (entry->etype & UVM_ET_COPYONWRITE) == 0))) {
4756 			rv = uobj->pgops->pgo_flush(uobj,
4757 			    cp_start - entry->start + entry->offset,
4758 			    cp_end - entry->start + entry->offset, flags);
4759 
4760 			if (rv == FALSE)
4761 				error = EFAULT;
4762 		}
4763 	}
4764 
4765 	vm_map_unlock_read(map);
4766 	return error;
4767 }
4768 
4769 /*
4770  * UVM_MAP_CLIP_END implementation
4771  */
4772 void
4773 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4774 {
4775 	struct vm_map_entry *tmp;
4776 
4777 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4778 	tmp = uvm_mapent_alloc(map, 0);
4779 
4780 	/* Invoke splitentry. */
4781 	uvm_map_splitentry(map, entry, tmp, addr);
4782 }
4783 
4784 /*
4785  * UVM_MAP_CLIP_START implementation
4786  *
4787  * Clippers are required to not change the pointers to the entry they are
4788  * clipping on.
4789  * Since uvm_map_splitentry turns the original entry into the lowest
4790  * entry (address wise) we do a swap between the new entry and the original
4791  * entry, prior to calling uvm_map_splitentry.
4792  */
4793 void
4794 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4795 {
4796 	struct vm_map_entry *tmp;
4797 	struct uvm_addr_state *free;
4798 
4799 	/* Unlink original. */
4800 	free = uvm_map_uaddr_e(map, entry);
4801 	uvm_mapent_free_remove(map, free, entry);
4802 	uvm_mapent_addr_remove(map, entry);
4803 
4804 	/* Copy entry. */
4805 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4806 	tmp = uvm_mapent_alloc(map, 0);
4807 	uvm_mapent_copy(entry, tmp);
4808 
4809 	/* Put new entry in place of original entry. */
4810 	uvm_mapent_addr_insert(map, tmp);
4811 	uvm_mapent_free_insert(map, free, tmp);
4812 
4813 	/* Invoke splitentry. */
4814 	uvm_map_splitentry(map, tmp, entry, addr);
4815 }
4816 
4817 /*
4818  * Boundary fixer.
4819  */
4820 static inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t);
4821 static inline vaddr_t
4822 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound)
4823 {
4824 	return (min < bound && max > bound) ? bound : max;
4825 }
4826 
4827 /*
4828  * Choose free list based on address at start of free space.
4829  *
4830  * The uvm_addr_state returned contains addr and is the first of:
4831  * - uaddr_exe
4832  * - uaddr_brk_stack
4833  * - uaddr_any
4834  */
4835 struct uvm_addr_state*
4836 uvm_map_uaddr(struct vm_map *map, vaddr_t addr)
4837 {
4838 	struct uvm_addr_state *uaddr;
4839 	int i;
4840 
4841 	/* Special case the first page, to prevent mmap from returning 0. */
4842 	if (addr < VMMAP_MIN_ADDR)
4843 		return NULL;
4844 
4845 	/* Upper bound for kernel maps at uvm_maxkaddr. */
4846 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
4847 		if (addr >= uvm_maxkaddr)
4848 			return NULL;
4849 	}
4850 
4851 	/* Is the address inside the exe-only map? */
4852 	if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr &&
4853 	    addr < map->uaddr_exe->uaddr_maxaddr)
4854 		return map->uaddr_exe;
4855 
4856 	/* Check if the space falls inside brk/stack area. */
4857 	if ((addr >= map->b_start && addr < map->b_end) ||
4858 	    (addr >= map->s_start && addr < map->s_end)) {
4859 		if (map->uaddr_brk_stack != NULL &&
4860 		    addr >= map->uaddr_brk_stack->uaddr_minaddr &&
4861 		    addr < map->uaddr_brk_stack->uaddr_maxaddr) {
4862 			return map->uaddr_brk_stack;
4863 		} else
4864 			return NULL;
4865 	}
4866 
4867 	/*
4868 	 * Check the other selectors.
4869 	 *
4870 	 * These selectors are only marked as the owner, if they have insert
4871 	 * functions.
4872 	 */
4873 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4874 		uaddr = map->uaddr_any[i];
4875 		if (uaddr == NULL)
4876 			continue;
4877 		if (uaddr->uaddr_functions->uaddr_free_insert == NULL)
4878 			continue;
4879 
4880 		if (addr >= uaddr->uaddr_minaddr &&
4881 		    addr < uaddr->uaddr_maxaddr)
4882 			return uaddr;
4883 	}
4884 
4885 	return NULL;
4886 }
4887 
4888 /*
4889  * Choose free list based on address at start of free space.
4890  *
4891  * The uvm_addr_state returned contains addr and is the first of:
4892  * - uaddr_exe
4893  * - uaddr_brk_stack
4894  * - uaddr_any
4895  */
4896 struct uvm_addr_state*
4897 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry)
4898 {
4899 	return uvm_map_uaddr(map, VMMAP_FREE_START(entry));
4900 }
4901 
4902 /*
4903  * Returns the first free-memory boundary that is crossed by [min-max].
4904  */
4905 vsize_t
4906 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max)
4907 {
4908 	struct uvm_addr_state	*uaddr;
4909 	int			 i;
4910 
4911 	/* Never return first page. */
4912 	max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR);
4913 
4914 	/* Treat the maxkaddr special, if the map is a kernel_map. */
4915 	if ((map->flags & VM_MAP_ISVMSPACE) == 0)
4916 		max = uvm_map_boundfix(min, max, uvm_maxkaddr);
4917 
4918 	/* Check for exe-only boundaries. */
4919 	if (map->uaddr_exe != NULL) {
4920 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr);
4921 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr);
4922 	}
4923 
4924 	/* Check for exe-only boundaries. */
4925 	if (map->uaddr_brk_stack != NULL) {
4926 		max = uvm_map_boundfix(min, max,
4927 		    map->uaddr_brk_stack->uaddr_minaddr);
4928 		max = uvm_map_boundfix(min, max,
4929 		    map->uaddr_brk_stack->uaddr_maxaddr);
4930 	}
4931 
4932 	/* Check other boundaries. */
4933 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4934 		uaddr = map->uaddr_any[i];
4935 		if (uaddr != NULL) {
4936 			max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr);
4937 			max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr);
4938 		}
4939 	}
4940 
4941 	/* Boundaries at stack and brk() area. */
4942 	max = uvm_map_boundfix(min, max, map->s_start);
4943 	max = uvm_map_boundfix(min, max, map->s_end);
4944 	max = uvm_map_boundfix(min, max, map->b_start);
4945 	max = uvm_map_boundfix(min, max, map->b_end);
4946 
4947 	return max;
4948 }
4949 
4950 /*
4951  * Update map allocation start and end addresses from proc vmspace.
4952  */
4953 void
4954 uvm_map_vmspace_update(struct vm_map *map,
4955     struct uvm_map_deadq *dead, int flags)
4956 {
4957 	struct vmspace *vm;
4958 	vaddr_t b_start, b_end, s_start, s_end;
4959 
4960 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
4961 	KASSERT(offsetof(struct vmspace, vm_map) == 0);
4962 
4963 	/*
4964 	 * Derive actual allocation boundaries from vmspace.
4965 	 */
4966 	vm = (struct vmspace *)map;
4967 	b_start = (vaddr_t)vm->vm_daddr;
4968 	b_end   = b_start + BRKSIZ;
4969 	s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4970 	s_end   = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4971 #ifdef DIAGNOSTIC
4972 	if ((b_start & (vaddr_t)PAGE_MASK) != 0 ||
4973 	    (b_end & (vaddr_t)PAGE_MASK) != 0 ||
4974 	    (s_start & (vaddr_t)PAGE_MASK) != 0 ||
4975 	    (s_end & (vaddr_t)PAGE_MASK) != 0) {
4976 		panic("uvm_map_vmspace_update: vmspace %p invalid bounds: "
4977 		    "b=0x%lx-0x%lx s=0x%lx-0x%lx",
4978 		    vm, b_start, b_end, s_start, s_end);
4979 	}
4980 #endif
4981 
4982 	if (__predict_true(map->b_start == b_start && map->b_end == b_end &&
4983 	    map->s_start == s_start && map->s_end == s_end))
4984 		return;
4985 
4986 	uvm_map_freelist_update(map, dead, b_start, b_end,
4987 	    s_start, s_end, flags);
4988 }
4989 
4990 /*
4991  * Grow kernel memory.
4992  *
4993  * This function is only called for kernel maps when an allocation fails.
4994  *
4995  * If the map has a gap that is large enough to accommodate alloc_sz, this
4996  * function will make sure map->free will include it.
4997  */
4998 void
4999 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead,
5000     vsize_t alloc_sz, int flags)
5001 {
5002 	vsize_t sz;
5003 	vaddr_t end;
5004 	struct vm_map_entry *entry;
5005 
5006 	/* Kernel memory only. */
5007 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0);
5008 	/* Destroy free list. */
5009 	uvm_map_freelist_update_clear(map, dead);
5010 
5011 	/* Include the guard page in the hard minimum requirement of alloc_sz. */
5012 	if (map->flags & VM_MAP_GUARDPAGES)
5013 		alloc_sz += PAGE_SIZE;
5014 
5015 	/*
5016 	 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA.
5017 	 *
5018 	 * Don't handle the case where the multiplication overflows:
5019 	 * if that happens, the allocation is probably too big anyway.
5020 	 */
5021 	sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA);
5022 
5023 	/*
5024 	 * Walk forward until a gap large enough for alloc_sz shows up.
5025 	 *
5026 	 * We assume the kernel map has no boundaries.
5027 	 * uvm_maxkaddr may be zero.
5028 	 */
5029 	end = MAX(uvm_maxkaddr, map->min_offset);
5030 	entry = uvm_map_entrybyaddr(&map->addr, end);
5031 	while (entry && entry->fspace < alloc_sz)
5032 		entry = RBT_NEXT(uvm_map_addr, entry);
5033 	if (entry) {
5034 		end = MAX(VMMAP_FREE_START(entry), end);
5035 		end += MIN(sz, map->max_offset - end);
5036 	} else
5037 		end = map->max_offset;
5038 
5039 	/* Reserve pmap entries. */
5040 #ifdef PMAP_GROWKERNEL
5041 	uvm_maxkaddr = pmap_growkernel(end);
5042 #else
5043 	uvm_maxkaddr = MAX(uvm_maxkaddr, end);
5044 #endif
5045 
5046 	/* Rebuild free list. */
5047 	uvm_map_freelist_update_refill(map, flags);
5048 }
5049 
5050 /*
5051  * Freelist update subfunction: unlink all entries from freelists.
5052  */
5053 void
5054 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead)
5055 {
5056 	struct uvm_addr_state *free;
5057 	struct vm_map_entry *entry, *prev, *next;
5058 
5059 	prev = NULL;
5060 	for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL;
5061 	    entry = next) {
5062 		next = RBT_NEXT(uvm_map_addr, entry);
5063 
5064 		free = uvm_map_uaddr_e(map, entry);
5065 		uvm_mapent_free_remove(map, free, entry);
5066 
5067 		if (prev != NULL && entry->start == entry->end) {
5068 			prev->fspace += VMMAP_FREE_END(entry) - entry->end;
5069 			uvm_mapent_addr_remove(map, entry);
5070 			DEAD_ENTRY_PUSH(dead, entry);
5071 		} else
5072 			prev = entry;
5073 	}
5074 }
5075 
5076 /*
5077  * Freelist update subfunction: refill the freelists with entries.
5078  */
5079 void
5080 uvm_map_freelist_update_refill(struct vm_map *map, int flags)
5081 {
5082 	struct vm_map_entry *entry;
5083 	vaddr_t min, max;
5084 
5085 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
5086 		min = VMMAP_FREE_START(entry);
5087 		max = VMMAP_FREE_END(entry);
5088 		entry->fspace = 0;
5089 
5090 		entry = uvm_map_fix_space(map, entry, min, max, flags);
5091 	}
5092 
5093 	uvm_tree_sanity(map, __FILE__, __LINE__);
5094 }
5095 
5096 /*
5097  * Change {a,b}_{start,end} allocation ranges and associated free lists.
5098  */
5099 void
5100 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead,
5101     vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags)
5102 {
5103 	KDASSERT(b_end >= b_start && s_end >= s_start);
5104 
5105 	/* Clear all free lists. */
5106 	uvm_map_freelist_update_clear(map, dead);
5107 
5108 	/* Apply new bounds. */
5109 	map->b_start = b_start;
5110 	map->b_end   = b_end;
5111 	map->s_start = s_start;
5112 	map->s_end   = s_end;
5113 
5114 	/* Refill free lists. */
5115 	uvm_map_freelist_update_refill(map, flags);
5116 }
5117 
5118 /*
5119  * Assign a uvm_addr_state to the specified pointer in vm_map.
5120  *
5121  * May sleep.
5122  */
5123 void
5124 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which,
5125     struct uvm_addr_state *newval)
5126 {
5127 	struct uvm_map_deadq dead;
5128 
5129 	/* Pointer which must be in this map. */
5130 	KASSERT(which != NULL);
5131 	KASSERT((void*)map <= (void*)(which) &&
5132 	    (void*)(which) < (void*)(map + 1));
5133 
5134 	vm_map_lock(map);
5135 	TAILQ_INIT(&dead);
5136 	uvm_map_freelist_update_clear(map, &dead);
5137 
5138 	uvm_addr_destroy(*which);
5139 	*which = newval;
5140 
5141 	uvm_map_freelist_update_refill(map, 0);
5142 	vm_map_unlock(map);
5143 	uvm_unmap_detach(&dead, 0);
5144 }
5145 
5146 /*
5147  * Correct space insert.
5148  *
5149  * Entry must not be on any freelist.
5150  */
5151 struct vm_map_entry*
5152 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry,
5153     vaddr_t min, vaddr_t max, int flags)
5154 {
5155 	struct uvm_addr_state	*free, *entfree;
5156 	vaddr_t			 lmax;
5157 
5158 	KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0);
5159 	KDASSERT(min <= max);
5160 	KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) ||
5161 	    min == map->min_offset);
5162 
5163 	/*
5164 	 * During the function, entfree will always point at the uaddr state
5165 	 * for entry.
5166 	 */
5167 	entfree = (entry == NULL ? NULL :
5168 	    uvm_map_uaddr_e(map, entry));
5169 
5170 	while (min != max) {
5171 		/* Claim guard page for entry. */
5172 		if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL &&
5173 		    VMMAP_FREE_END(entry) == entry->end &&
5174 		    entry->start != entry->end) {
5175 			if (max - min == 2 * PAGE_SIZE) {
5176 				/*
5177 				 * If the free-space gap is exactly 2 pages,
5178 				 * we make the guard 2 pages instead of 1.
5179 				 * Because in a guarded map, an area needs
5180 				 * at least 2 pages to allocate from:
5181 				 * one page for the allocation and one for
5182 				 * the guard.
5183 				 */
5184 				entry->guard = 2 * PAGE_SIZE;
5185 				min = max;
5186 			} else {
5187 				entry->guard = PAGE_SIZE;
5188 				min += PAGE_SIZE;
5189 			}
5190 			continue;
5191 		}
5192 
5193 		/*
5194 		 * Handle the case where entry has a 2-page guard, but the
5195 		 * space after entry is freed.
5196 		 */
5197 		if (entry != NULL && entry->fspace == 0 &&
5198 		    entry->guard > PAGE_SIZE) {
5199 			entry->guard = PAGE_SIZE;
5200 			min = VMMAP_FREE_START(entry);
5201 		}
5202 
5203 		lmax = uvm_map_boundary(map, min, max);
5204 		free = uvm_map_uaddr(map, min);
5205 
5206 		/*
5207 		 * Entries are merged if they point at the same uvm_free().
5208 		 * Exception to that rule: if min == uvm_maxkaddr, a new
5209 		 * entry is started regardless (otherwise the allocators
5210 		 * will get confused).
5211 		 */
5212 		if (entry != NULL && free == entfree &&
5213 		    !((map->flags & VM_MAP_ISVMSPACE) == 0 &&
5214 		    min == uvm_maxkaddr)) {
5215 			KDASSERT(VMMAP_FREE_END(entry) == min);
5216 			entry->fspace += lmax - min;
5217 		} else {
5218 			/*
5219 			 * Commit entry to free list: it'll not be added to
5220 			 * anymore.
5221 			 * We'll start a new entry and add to that entry
5222 			 * instead.
5223 			 */
5224 			if (entry != NULL)
5225 				uvm_mapent_free_insert(map, entfree, entry);
5226 
5227 			/* New entry for new uaddr. */
5228 			entry = uvm_mapent_alloc(map, flags);
5229 			KDASSERT(entry != NULL);
5230 			entry->end = entry->start = min;
5231 			entry->guard = 0;
5232 			entry->fspace = lmax - min;
5233 			entry->object.uvm_obj = NULL;
5234 			entry->offset = 0;
5235 			entry->etype = 0;
5236 			entry->protection = entry->max_protection = 0;
5237 			entry->inheritance = 0;
5238 			entry->wired_count = 0;
5239 			entry->advice = 0;
5240 			entry->aref.ar_pageoff = 0;
5241 			entry->aref.ar_amap = NULL;
5242 			uvm_mapent_addr_insert(map, entry);
5243 
5244 			entfree = free;
5245 		}
5246 
5247 		min = lmax;
5248 	}
5249 	/* Finally put entry on the uaddr state. */
5250 	if (entry != NULL)
5251 		uvm_mapent_free_insert(map, entfree, entry);
5252 
5253 	return entry;
5254 }
5255 
5256 /*
5257  * MQuery style of allocation.
5258  *
5259  * This allocator searches forward until sufficient space is found to map
5260  * the given size.
5261  *
5262  * XXX: factor in offset (via pmap_prefer) and protection?
5263  */
5264 int
5265 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset,
5266     int flags)
5267 {
5268 	struct vm_map_entry *entry, *last;
5269 	vaddr_t addr;
5270 	vaddr_t tmp, pmap_align, pmap_offset;
5271 	int error;
5272 
5273 	addr = *addr_p;
5274 	vm_map_lock_read(map);
5275 
5276 	/* Configure pmap prefer. */
5277 	if (offset != UVM_UNKNOWN_OFFSET) {
5278 		pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN());
5279 		pmap_offset = PMAP_PREFER_OFFSET(offset);
5280 	} else {
5281 		pmap_align = PAGE_SIZE;
5282 		pmap_offset = 0;
5283 	}
5284 
5285 	/* Align address to pmap_prefer unless FLAG_FIXED is set. */
5286 	if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) {
5287 	  	tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5288 		if (tmp < addr)
5289 			tmp += pmap_align;
5290 		addr = tmp;
5291 	}
5292 
5293 	/* First, check if the requested range is fully available. */
5294 	entry = uvm_map_entrybyaddr(&map->addr, addr);
5295 	last = NULL;
5296 	if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5297 		error = 0;
5298 		goto out;
5299 	}
5300 	if (flags & UVM_FLAG_FIXED) {
5301 		error = EINVAL;
5302 		goto out;
5303 	}
5304 
5305 	error = ENOMEM; /* Default error from here. */
5306 
5307 	/*
5308 	 * At this point, the memory at <addr, sz> is not available.
5309 	 * The reasons are:
5310 	 * [1] it's outside the map,
5311 	 * [2] it starts in used memory (and therefore needs to move
5312 	 *     toward the first free page in entry),
5313 	 * [3] it starts in free memory but bumps into used memory.
5314 	 *
5315 	 * Note that for case [2], the forward moving is handled by the
5316 	 * for loop below.
5317 	 */
5318 	if (entry == NULL) {
5319 		/* [1] Outside the map. */
5320 		if (addr >= map->max_offset)
5321 			goto out;
5322 		else
5323 			entry = RBT_MIN(uvm_map_addr, &map->addr);
5324 	} else if (VMMAP_FREE_START(entry) <= addr) {
5325 		/* [3] Bumped into used memory. */
5326 		entry = RBT_NEXT(uvm_map_addr, entry);
5327 	}
5328 
5329 	/* Test if the next entry is sufficient for the allocation. */
5330 	for (; entry != NULL;
5331 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
5332 		if (entry->fspace == 0)
5333 			continue;
5334 		addr = VMMAP_FREE_START(entry);
5335 
5336 restart:	/* Restart address checks on address change. */
5337 		tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5338 		if (tmp < addr)
5339 			tmp += pmap_align;
5340 		addr = tmp;
5341 		if (addr >= VMMAP_FREE_END(entry))
5342 			continue;
5343 
5344 		/* Skip brk() allocation addresses. */
5345 		if (addr + sz > map->b_start && addr < map->b_end) {
5346 			if (VMMAP_FREE_END(entry) > map->b_end) {
5347 				addr = map->b_end;
5348 				goto restart;
5349 			} else
5350 				continue;
5351 		}
5352 		/* Skip stack allocation addresses. */
5353 		if (addr + sz > map->s_start && addr < map->s_end) {
5354 			if (VMMAP_FREE_END(entry) > map->s_end) {
5355 				addr = map->s_end;
5356 				goto restart;
5357 			} else
5358 				continue;
5359 		}
5360 
5361 		last = NULL;
5362 		if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5363 			error = 0;
5364 			goto out;
5365 		}
5366 	}
5367 
5368 out:
5369 	vm_map_unlock_read(map);
5370 	if (error == 0)
5371 		*addr_p = addr;
5372 	return error;
5373 }
5374 
5375 boolean_t
5376 vm_map_lock_try_ln(struct vm_map *map, char *file, int line)
5377 {
5378 	boolean_t rv;
5379 
5380 	if (map->flags & VM_MAP_INTRSAFE) {
5381 		rv = mtx_enter_try(&map->mtx);
5382 	} else {
5383 		mtx_enter(&map->flags_lock);
5384 		if (map->flags & VM_MAP_BUSY) {
5385 			mtx_leave(&map->flags_lock);
5386 			return (FALSE);
5387 		}
5388 		mtx_leave(&map->flags_lock);
5389 		rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0);
5390 		/* check if the lock is busy and back out if we won the race */
5391 		if (rv) {
5392 			mtx_enter(&map->flags_lock);
5393 			if (map->flags & VM_MAP_BUSY) {
5394 				rw_exit(&map->lock);
5395 				rv = FALSE;
5396 			}
5397 			mtx_leave(&map->flags_lock);
5398 		}
5399 	}
5400 
5401 	if (rv) {
5402 		map->timestamp++;
5403 		LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5404 		uvm_tree_sanity(map, file, line);
5405 		uvm_tree_size_chk(map, file, line);
5406 	}
5407 
5408 	return (rv);
5409 }
5410 
5411 void
5412 vm_map_lock_ln(struct vm_map *map, char *file, int line)
5413 {
5414 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5415 		do {
5416 			mtx_enter(&map->flags_lock);
5417 tryagain:
5418 			while (map->flags & VM_MAP_BUSY) {
5419 				map->flags |= VM_MAP_WANTLOCK;
5420 				msleep_nsec(&map->flags, &map->flags_lock,
5421 				    PVM, vmmapbsy, INFSLP);
5422 			}
5423 			mtx_leave(&map->flags_lock);
5424 		} while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0);
5425 		/* check if the lock is busy and back out if we won the race */
5426 		mtx_enter(&map->flags_lock);
5427 		if (map->flags & VM_MAP_BUSY) {
5428 			rw_exit(&map->lock);
5429 			goto tryagain;
5430 		}
5431 		mtx_leave(&map->flags_lock);
5432 	} else {
5433 		mtx_enter(&map->mtx);
5434 	}
5435 
5436 	map->timestamp++;
5437 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5438 	uvm_tree_sanity(map, file, line);
5439 	uvm_tree_size_chk(map, file, line);
5440 }
5441 
5442 void
5443 vm_map_lock_read_ln(struct vm_map *map, char *file, int line)
5444 {
5445 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5446 		rw_enter_read(&map->lock);
5447 	else
5448 		mtx_enter(&map->mtx);
5449 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5450 	uvm_tree_sanity(map, file, line);
5451 	uvm_tree_size_chk(map, file, line);
5452 }
5453 
5454 void
5455 vm_map_unlock_ln(struct vm_map *map, char *file, int line)
5456 {
5457 	uvm_tree_sanity(map, file, line);
5458 	uvm_tree_size_chk(map, file, line);
5459 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5460 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5461 		rw_exit(&map->lock);
5462 	else
5463 		mtx_leave(&map->mtx);
5464 }
5465 
5466 void
5467 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line)
5468 {
5469 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
5470 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
5471 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5472 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5473 		rw_exit_read(&map->lock);
5474 	else
5475 		mtx_leave(&map->mtx);
5476 }
5477 
5478 void
5479 vm_map_downgrade_ln(struct vm_map *map, char *file, int line)
5480 {
5481 	uvm_tree_sanity(map, file, line);
5482 	uvm_tree_size_chk(map, file, line);
5483 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5484 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5485 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5486 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5487 		rw_enter(&map->lock, RW_DOWNGRADE);
5488 }
5489 
5490 void
5491 vm_map_upgrade_ln(struct vm_map *map, char *file, int line)
5492 {
5493 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
5494 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
5495 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5496 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5497 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5498 		rw_exit_read(&map->lock);
5499 		rw_enter_write(&map->lock);
5500 	}
5501 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5502 	uvm_tree_sanity(map, file, line);
5503 }
5504 
5505 void
5506 vm_map_busy_ln(struct vm_map *map, char *file, int line)
5507 {
5508 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5509 	mtx_enter(&map->flags_lock);
5510 	map->flags |= VM_MAP_BUSY;
5511 	mtx_leave(&map->flags_lock);
5512 }
5513 
5514 void
5515 vm_map_unbusy_ln(struct vm_map *map, char *file, int line)
5516 {
5517 	int oflags;
5518 
5519 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5520 	mtx_enter(&map->flags_lock);
5521 	oflags = map->flags;
5522 	map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK);
5523 	mtx_leave(&map->flags_lock);
5524 	if (oflags & VM_MAP_WANTLOCK)
5525 		wakeup(&map->flags);
5526 }
5527 
5528 #ifndef SMALL_KERNEL
5529 int
5530 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve,
5531     size_t *lenp)
5532 {
5533 	struct vm_map_entry *entry;
5534 	vaddr_t start;
5535 	int cnt, maxcnt, error = 0;
5536 
5537 	KASSERT(*lenp > 0);
5538 	KASSERT((*lenp % sizeof(*kve)) == 0);
5539 	cnt = 0;
5540 	maxcnt = *lenp / sizeof(*kve);
5541 	KASSERT(maxcnt > 0);
5542 
5543 	/*
5544 	 * Return only entries whose address is above the given base
5545 	 * address.  This allows userland to iterate without knowing the
5546 	 * number of entries beforehand.
5547 	 */
5548 	start = (vaddr_t)kve[0].kve_start;
5549 
5550 	vm_map_lock(map);
5551 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
5552 		if (cnt == maxcnt) {
5553 			error = ENOMEM;
5554 			break;
5555 		}
5556 		if (start != 0 && entry->start < start)
5557 			continue;
5558 		kve->kve_start = entry->start;
5559 		kve->kve_end = entry->end;
5560 		kve->kve_guard = entry->guard;
5561 		kve->kve_fspace = entry->fspace;
5562 		kve->kve_fspace_augment = entry->fspace_augment;
5563 		kve->kve_offset = entry->offset;
5564 		kve->kve_wired_count = entry->wired_count;
5565 		kve->kve_etype = entry->etype;
5566 		kve->kve_protection = entry->protection;
5567 		kve->kve_max_protection = entry->max_protection;
5568 		kve->kve_advice = entry->advice;
5569 		kve->kve_inheritance = entry->inheritance;
5570 		kve->kve_flags = entry->flags;
5571 		kve++;
5572 		cnt++;
5573 	}
5574 	vm_map_unlock(map);
5575 
5576 	KASSERT(cnt <= maxcnt);
5577 
5578 	*lenp = sizeof(*kve) * cnt;
5579 	return error;
5580 }
5581 #endif
5582 
5583 
5584 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry,
5585     uvm_mapentry_addrcmp, uvm_map_addr_augment);
5586 
5587 
5588 /*
5589  * MD code: vmspace allocator setup.
5590  */
5591 
5592 #ifdef __i386__
5593 void
5594 uvm_map_setup_md(struct vm_map *map)
5595 {
5596 	vaddr_t		min, max;
5597 
5598 	min = map->min_offset;
5599 	max = map->max_offset;
5600 
5601 	/*
5602 	 * Ensure the selectors will not try to manage page 0;
5603 	 * it's too special.
5604 	 */
5605 	if (min < VMMAP_MIN_ADDR)
5606 		min = VMMAP_MIN_ADDR;
5607 
5608 #if 0	/* Cool stuff, not yet */
5609 	/* Executable code is special. */
5610 	map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR);
5611 	/* Place normal allocations beyond executable mappings. */
5612 	map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max);
5613 #else	/* Crappy stuff, for now */
5614 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5615 #endif
5616 
5617 #ifndef SMALL_KERNEL
5618 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5619 #endif /* !SMALL_KERNEL */
5620 }
5621 #elif __LP64__
5622 void
5623 uvm_map_setup_md(struct vm_map *map)
5624 {
5625 	vaddr_t		min, max;
5626 
5627 	min = map->min_offset;
5628 	max = map->max_offset;
5629 
5630 	/*
5631 	 * Ensure the selectors will not try to manage page 0;
5632 	 * it's too special.
5633 	 */
5634 	if (min < VMMAP_MIN_ADDR)
5635 		min = VMMAP_MIN_ADDR;
5636 
5637 #if 0	/* Cool stuff, not yet */
5638 	map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max);
5639 #else	/* Crappy stuff, for now */
5640 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5641 #endif
5642 
5643 #ifndef SMALL_KERNEL
5644 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5645 #endif /* !SMALL_KERNEL */
5646 }
5647 #else	/* non-i386, 32 bit */
5648 void
5649 uvm_map_setup_md(struct vm_map *map)
5650 {
5651 	vaddr_t		min, max;
5652 
5653 	min = map->min_offset;
5654 	max = map->max_offset;
5655 
5656 	/*
5657 	 * Ensure the selectors will not try to manage page 0;
5658 	 * it's too special.
5659 	 */
5660 	if (min < VMMAP_MIN_ADDR)
5661 		min = VMMAP_MIN_ADDR;
5662 
5663 #if 0	/* Cool stuff, not yet */
5664 	map->uaddr_any[3] = uaddr_pivot_create(min, max);
5665 #else	/* Crappy stuff, for now */
5666 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5667 #endif
5668 
5669 #ifndef SMALL_KERNEL
5670 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5671 #endif /* !SMALL_KERNEL */
5672 }
5673 #endif
5674