xref: /openbsd-src/sys/uvm/uvm_map.c (revision c1a45aed656e7d5627c30c92421893a76f370ccb)
1 /*	$OpenBSD: uvm_map.c,v 1.290 2022/03/12 08:11:07 mpi Exp $	*/
2 /*	$NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $	*/
3 
4 /*
5  * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  *
19  *
20  * Copyright (c) 1997 Charles D. Cranor and Washington University.
21  * Copyright (c) 1991, 1993, The Regents of the University of California.
22  *
23  * All rights reserved.
24  *
25  * This code is derived from software contributed to Berkeley by
26  * The Mach Operating System project at Carnegie-Mellon University.
27  *
28  * Redistribution and use in source and binary forms, with or without
29  * modification, are permitted provided that the following conditions
30  * are met:
31  * 1. Redistributions of source code must retain the above copyright
32  *    notice, this list of conditions and the following disclaimer.
33  * 2. Redistributions in binary form must reproduce the above copyright
34  *    notice, this list of conditions and the following disclaimer in the
35  *    documentation and/or other materials provided with the distribution.
36  * 3. Neither the name of the University nor the names of its contributors
37  *    may be used to endorse or promote products derived from this software
38  *    without specific prior written permission.
39  *
40  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  *	@(#)vm_map.c    8.3 (Berkeley) 1/12/94
53  * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
54  *
55  *
56  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
57  * All rights reserved.
58  *
59  * Permission to use, copy, modify and distribute this software and
60  * its documentation is hereby granted, provided that both the copyright
61  * notice and this permission notice appear in all copies of the
62  * software, derivative works or modified versions, and any portions
63  * thereof, and that both notices appear in supporting documentation.
64  *
65  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
66  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
67  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
68  *
69  * Carnegie Mellon requests users of this software to return to
70  *
71  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
72  *  School of Computer Science
73  *  Carnegie Mellon University
74  *  Pittsburgh PA 15213-3890
75  *
76  * any improvements or extensions that they make and grant Carnegie the
77  * rights to redistribute these changes.
78  */
79 
80 /*
81  * uvm_map.c: uvm map operations
82  */
83 
84 /* #define DEBUG */
85 /* #define VMMAP_DEBUG */
86 
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/acct.h>
90 #include <sys/mman.h>
91 #include <sys/proc.h>
92 #include <sys/malloc.h>
93 #include <sys/pool.h>
94 #include <sys/sysctl.h>
95 #include <sys/signalvar.h>
96 #include <sys/syslog.h>
97 #include <sys/user.h>
98 #include <sys/tracepoint.h>
99 
100 #ifdef SYSVSHM
101 #include <sys/shm.h>
102 #endif
103 
104 #include <uvm/uvm.h>
105 
106 #ifdef DDB
107 #include <uvm/uvm_ddb.h>
108 #endif
109 
110 #include <uvm/uvm_addr.h>
111 
112 
113 vsize_t			 uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t);
114 int			 uvm_mapent_isjoinable(struct vm_map*,
115 			    struct vm_map_entry*, struct vm_map_entry*);
116 struct vm_map_entry	*uvm_mapent_merge(struct vm_map*, struct vm_map_entry*,
117 			    struct vm_map_entry*, struct uvm_map_deadq*);
118 struct vm_map_entry	*uvm_mapent_tryjoin(struct vm_map*,
119 			    struct vm_map_entry*, struct uvm_map_deadq*);
120 struct vm_map_entry	*uvm_map_mkentry(struct vm_map*, struct vm_map_entry*,
121 			    struct vm_map_entry*, vaddr_t, vsize_t, int,
122 			    struct uvm_map_deadq*, struct vm_map_entry*);
123 struct vm_map_entry	*uvm_mapent_alloc(struct vm_map*, int);
124 void			 uvm_mapent_free(struct vm_map_entry*);
125 void			 uvm_unmap_kill_entry(struct vm_map*,
126 			    struct vm_map_entry*);
127 void			 uvm_unmap_kill_entry_withlock(struct vm_map *,
128 			    struct vm_map_entry *, int);
129 void			 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *);
130 void			 uvm_mapent_mkfree(struct vm_map*,
131 			    struct vm_map_entry*, struct vm_map_entry**,
132 			    struct uvm_map_deadq*, boolean_t);
133 void			 uvm_map_pageable_pgon(struct vm_map*,
134 			    struct vm_map_entry*, struct vm_map_entry*,
135 			    vaddr_t, vaddr_t);
136 int			 uvm_map_pageable_wire(struct vm_map*,
137 			    struct vm_map_entry*, struct vm_map_entry*,
138 			    vaddr_t, vaddr_t, int);
139 void			 uvm_map_setup_entries(struct vm_map*);
140 void			 uvm_map_setup_md(struct vm_map*);
141 void			 uvm_map_teardown(struct vm_map*);
142 void			 uvm_map_vmspace_update(struct vm_map*,
143 			    struct uvm_map_deadq*, int);
144 void			 uvm_map_kmem_grow(struct vm_map*,
145 			    struct uvm_map_deadq*, vsize_t, int);
146 void			 uvm_map_freelist_update_clear(struct vm_map*,
147 			    struct uvm_map_deadq*);
148 void			 uvm_map_freelist_update_refill(struct vm_map *, int);
149 void			 uvm_map_freelist_update(struct vm_map*,
150 			    struct uvm_map_deadq*, vaddr_t, vaddr_t,
151 			    vaddr_t, vaddr_t, int);
152 struct vm_map_entry	*uvm_map_fix_space(struct vm_map*, struct vm_map_entry*,
153 			    vaddr_t, vaddr_t, int);
154 int			 uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int,
155 			    struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t,
156 			    int);
157 int			 uvm_map_findspace(struct vm_map*,
158 			    struct vm_map_entry**, struct vm_map_entry**,
159 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
160 			    vaddr_t);
161 vsize_t			 uvm_map_addr_augment_get(struct vm_map_entry*);
162 void			 uvm_map_addr_augment(struct vm_map_entry*);
163 
164 int			 uvm_map_inentry_recheck(u_long, vaddr_t,
165 			     struct p_inentry *);
166 boolean_t		 uvm_map_inentry_fix(struct proc *, struct p_inentry *,
167 			     vaddr_t, int (*)(vm_map_entry_t), u_long);
168 /*
169  * Tree management functions.
170  */
171 
172 static inline void	 uvm_mapent_copy(struct vm_map_entry*,
173 			    struct vm_map_entry*);
174 static inline int	 uvm_mapentry_addrcmp(const struct vm_map_entry*,
175 			    const struct vm_map_entry*);
176 void			 uvm_mapent_free_insert(struct vm_map*,
177 			    struct uvm_addr_state*, struct vm_map_entry*);
178 void			 uvm_mapent_free_remove(struct vm_map*,
179 			    struct uvm_addr_state*, struct vm_map_entry*);
180 void			 uvm_mapent_addr_insert(struct vm_map*,
181 			    struct vm_map_entry*);
182 void			 uvm_mapent_addr_remove(struct vm_map*,
183 			    struct vm_map_entry*);
184 void			 uvm_map_splitentry(struct vm_map*,
185 			    struct vm_map_entry*, struct vm_map_entry*,
186 			    vaddr_t);
187 vsize_t			 uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t);
188 
189 /*
190  * uvm_vmspace_fork helper functions.
191  */
192 struct vm_map_entry	*uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
193 			    vsize_t, vm_prot_t, vm_prot_t,
194 			    struct vm_map_entry*, struct uvm_map_deadq*, int,
195 			    int);
196 struct vm_map_entry	*uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t,
197 			    vsize_t, vm_prot_t, vm_prot_t, struct vm_map*,
198 			    struct vm_map_entry*, struct uvm_map_deadq*);
199 struct vm_map_entry	*uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
200 			    struct vm_map*, struct vm_map_entry*,
201 			    struct uvm_map_deadq*);
202 struct vm_map_entry	*uvm_mapent_forkcopy(struct vmspace*, struct vm_map*,
203 			    struct vm_map*, struct vm_map_entry*,
204 			    struct uvm_map_deadq*);
205 struct vm_map_entry	*uvm_mapent_forkzero(struct vmspace*, struct vm_map*,
206 			    struct vm_map*, struct vm_map_entry*,
207 			    struct uvm_map_deadq*);
208 
209 /*
210  * Tree validation.
211  */
212 #ifdef VMMAP_DEBUG
213 void			 uvm_tree_assert(struct vm_map*, int, char*,
214 			    char*, int);
215 #define UVM_ASSERT(map, cond, file, line)				\
216 	uvm_tree_assert((map), (cond), #cond, (file), (line))
217 void			 uvm_tree_sanity(struct vm_map*, char*, int);
218 void			 uvm_tree_size_chk(struct vm_map*, char*, int);
219 void			 vmspace_validate(struct vm_map*);
220 #else
221 #define uvm_tree_sanity(_map, _file, _line)		do {} while (0)
222 #define uvm_tree_size_chk(_map, _file, _line)		do {} while (0)
223 #define vmspace_validate(_map)				do {} while (0)
224 #endif
225 
226 /*
227  * All architectures will have pmap_prefer.
228  */
229 #ifndef PMAP_PREFER
230 #define PMAP_PREFER_ALIGN()	(vaddr_t)PAGE_SIZE
231 #define PMAP_PREFER_OFFSET(off)	0
232 #define PMAP_PREFER(addr, off)	(addr)
233 #endif
234 
235 /*
236  * The kernel map will initially be VM_MAP_KSIZE_INIT bytes.
237  * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes.
238  *
239  * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size
240  * each time.
241  */
242 #define VM_MAP_KSIZE_INIT	(512 * (vaddr_t)PAGE_SIZE)
243 #define VM_MAP_KSIZE_DELTA	(256 * (vaddr_t)PAGE_SIZE)
244 #define VM_MAP_KSIZE_ALLOCMUL	4
245 /*
246  * When selecting a random free-space block, look at most FSPACE_DELTA blocks
247  * ahead.
248  */
249 #define FSPACE_DELTA		8
250 /*
251  * Put allocations adjecent to previous allocations when the free-space tree
252  * is larger than FSPACE_COMPACT entries.
253  *
254  * Alignment and PMAP_PREFER may still cause the entry to not be fully
255  * adjecent. Note that this strategy reduces memory fragmentation (by leaving
256  * a large space before or after the allocation).
257  */
258 #define FSPACE_COMPACT		128
259 /*
260  * Make the address selection skip at most this many bytes from the start of
261  * the free space in which the allocation takes place.
262  *
263  * The main idea behind a randomized address space is that an attacker cannot
264  * know where to target his attack. Therefore, the location of objects must be
265  * as random as possible. However, the goal is not to create the most sparse
266  * map that is possible.
267  * FSPACE_MAXOFF pushes the considered range in bytes down to less insane
268  * sizes, thereby reducing the sparseness. The biggest randomization comes
269  * from fragmentation, i.e. FSPACE_COMPACT.
270  */
271 #define FSPACE_MAXOFF		((vaddr_t)32 * 1024 * 1024)
272 /*
273  * Allow for small gaps in the overflow areas.
274  * Gap size is in bytes and does not have to be a multiple of page-size.
275  */
276 #define FSPACE_BIASGAP		((vaddr_t)32 * 1024)
277 
278 /* auto-allocate address lower bound */
279 #define VMMAP_MIN_ADDR		PAGE_SIZE
280 
281 
282 #ifdef DEADBEEF0
283 #define UVMMAP_DEADBEEF		((unsigned long)DEADBEEF0)
284 #else
285 #define UVMMAP_DEADBEEF		((unsigned long)0xdeadd0d0)
286 #endif
287 
288 #ifdef DEBUG
289 int uvm_map_printlocks = 0;
290 
291 #define LPRINTF(_args)							\
292 	do {								\
293 		if (uvm_map_printlocks)					\
294 			printf _args;					\
295 	} while (0)
296 #else
297 #define LPRINTF(_args)	do {} while (0)
298 #endif
299 
300 static struct mutex uvm_kmapent_mtx;
301 static struct timeval uvm_kmapent_last_warn_time;
302 static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
303 
304 const char vmmapbsy[] = "vmmapbsy";
305 
306 /*
307  * pool for vmspace structures.
308  */
309 struct pool uvm_vmspace_pool;
310 
311 /*
312  * pool for dynamically-allocated map entries.
313  */
314 struct pool uvm_map_entry_pool;
315 struct pool uvm_map_entry_kmem_pool;
316 
317 /*
318  * This global represents the end of the kernel virtual address
319  * space. If we want to exceed this, we must grow the kernel
320  * virtual address space dynamically.
321  *
322  * Note, this variable is locked by kernel_map's lock.
323  */
324 vaddr_t uvm_maxkaddr;
325 
326 /*
327  * Locking predicate.
328  */
329 #define UVM_MAP_REQ_WRITE(_map)						\
330 	do {								\
331 		if ((_map)->ref_count > 0) {				\
332 			if (((_map)->flags & VM_MAP_INTRSAFE) == 0)	\
333 				rw_assert_wrlock(&(_map)->lock);	\
334 			else						\
335 				MUTEX_ASSERT_LOCKED(&(_map)->mtx);	\
336 		}							\
337 	} while (0)
338 
339 #define	vm_map_modflags(map, set, clear)				\
340 	do {								\
341 		mtx_enter(&(map)->flags_lock);				\
342 		(map)->flags = ((map)->flags | (set)) & ~(clear);	\
343 		mtx_leave(&(map)->flags_lock);				\
344 	} while (0)
345 
346 
347 /*
348  * Tree describing entries by address.
349  *
350  * Addresses are unique.
351  * Entries with start == end may only exist if they are the first entry
352  * (sorted by address) within a free-memory tree.
353  */
354 
355 static inline int
356 uvm_mapentry_addrcmp(const struct vm_map_entry *e1,
357     const struct vm_map_entry *e2)
358 {
359 	return e1->start < e2->start ? -1 : e1->start > e2->start;
360 }
361 
362 /*
363  * Copy mapentry.
364  */
365 static inline void
366 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
367 {
368 	caddr_t csrc, cdst;
369 	size_t sz;
370 
371 	csrc = (caddr_t)src;
372 	cdst = (caddr_t)dst;
373 	csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
374 	cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
375 
376 	sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) -
377 	    offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
378 	memcpy(cdst, csrc, sz);
379 }
380 
381 /*
382  * Handle free-list insertion.
383  */
384 void
385 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr,
386     struct vm_map_entry *entry)
387 {
388 	const struct uvm_addr_functions *fun;
389 #ifdef VMMAP_DEBUG
390 	vaddr_t min, max, bound;
391 #endif
392 
393 #ifdef VMMAP_DEBUG
394 	/*
395 	 * Boundary check.
396 	 * Boundaries are folded if they go on the same free list.
397 	 */
398 	min = VMMAP_FREE_START(entry);
399 	max = VMMAP_FREE_END(entry);
400 
401 	while (min < max) {
402 		bound = uvm_map_boundary(map, min, max);
403 		KASSERT(uvm_map_uaddr(map, min) == uaddr);
404 		min = bound;
405 	}
406 #endif
407 	KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0);
408 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0);
409 
410 	UVM_MAP_REQ_WRITE(map);
411 
412 	/* Actual insert: forward to uaddr pointer. */
413 	if (uaddr != NULL) {
414 		fun = uaddr->uaddr_functions;
415 		KDASSERT(fun != NULL);
416 		if (fun->uaddr_free_insert != NULL)
417 			(*fun->uaddr_free_insert)(map, uaddr, entry);
418 		entry->etype |= UVM_ET_FREEMAPPED;
419 	}
420 
421 	/* Update fspace augmentation. */
422 	uvm_map_addr_augment(entry);
423 }
424 
425 /*
426  * Handle free-list removal.
427  */
428 void
429 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr,
430     struct vm_map_entry *entry)
431 {
432 	const struct uvm_addr_functions *fun;
433 
434 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL);
435 	KASSERT(uvm_map_uaddr_e(map, entry) == uaddr);
436 	UVM_MAP_REQ_WRITE(map);
437 
438 	if (uaddr != NULL) {
439 		fun = uaddr->uaddr_functions;
440 		if (fun->uaddr_free_remove != NULL)
441 			(*fun->uaddr_free_remove)(map, uaddr, entry);
442 		entry->etype &= ~UVM_ET_FREEMAPPED;
443 	}
444 }
445 
446 /*
447  * Handle address tree insertion.
448  */
449 void
450 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry)
451 {
452 	struct vm_map_entry *res;
453 
454 	if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF))
455 		panic("uvm_mapent_addr_insert: entry still in addr list");
456 	KDASSERT(entry->start <= entry->end);
457 	KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&
458 	    (entry->end & (vaddr_t)PAGE_MASK) == 0);
459 
460 	TRACEPOINT(uvm, map_insert,
461 	    entry->start, entry->end, entry->protection, NULL);
462 
463 	UVM_MAP_REQ_WRITE(map);
464 	res = RBT_INSERT(uvm_map_addr, &map->addr, entry);
465 	if (res != NULL) {
466 		panic("uvm_mapent_addr_insert: map %p entry %p "
467 		    "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision "
468 		    "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)",
469 		    map, entry,
470 		    entry->start, entry->end, entry->guard, entry->fspace,
471 		    res, res->start, res->end, res->guard, res->fspace);
472 	}
473 }
474 
475 /*
476  * Handle address tree removal.
477  */
478 void
479 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)
480 {
481 	struct vm_map_entry *res;
482 
483 	TRACEPOINT(uvm, map_remove,
484 	    entry->start, entry->end, entry->protection, NULL);
485 
486 	UVM_MAP_REQ_WRITE(map);
487 	res = RBT_REMOVE(uvm_map_addr, &map->addr, entry);
488 	if (res != entry)
489 		panic("uvm_mapent_addr_remove");
490 	RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF);
491 }
492 
493 /*
494  * uvm_map_reference: add reference to a map
495  *
496  * => map need not be locked
497  */
498 void
499 uvm_map_reference(struct vm_map *map)
500 {
501 	atomic_inc_int(&map->ref_count);
502 }
503 
504 void
505 uvm_map_lock_entry(struct vm_map_entry *entry)
506 {
507 	if (entry->aref.ar_amap != NULL) {
508 		amap_lock(entry->aref.ar_amap);
509 	}
510 	if (UVM_ET_ISOBJ(entry)) {
511 		rw_enter(entry->object.uvm_obj->vmobjlock, RW_WRITE);
512 	}
513 }
514 
515 void
516 uvm_map_unlock_entry(struct vm_map_entry *entry)
517 {
518 	if (UVM_ET_ISOBJ(entry)) {
519 		rw_exit(entry->object.uvm_obj->vmobjlock);
520 	}
521 	if (entry->aref.ar_amap != NULL) {
522 		amap_unlock(entry->aref.ar_amap);
523 	}
524 }
525 
526 /*
527  * Calculate the dused delta.
528  */
529 vsize_t
530 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max)
531 {
532 	struct vmspace *vm;
533 	vsize_t sz;
534 	vaddr_t lmax;
535 	vaddr_t stack_begin, stack_end; /* Position of stack. */
536 
537 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
538 	vm = (struct vmspace *)map;
539 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
540 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
541 
542 	sz = 0;
543 	while (min != max) {
544 		lmax = max;
545 		if (min < stack_begin && lmax > stack_begin)
546 			lmax = stack_begin;
547 		else if (min < stack_end && lmax > stack_end)
548 			lmax = stack_end;
549 
550 		if (min >= stack_begin && min < stack_end) {
551 			/* nothing */
552 		} else
553 			sz += lmax - min;
554 		min = lmax;
555 	}
556 
557 	return sz >> PAGE_SHIFT;
558 }
559 
560 /*
561  * Find the entry describing the given address.
562  */
563 struct vm_map_entry*
564 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr)
565 {
566 	struct vm_map_entry *iter;
567 
568 	iter = RBT_ROOT(uvm_map_addr, atree);
569 	while (iter != NULL) {
570 		if (iter->start > addr)
571 			iter = RBT_LEFT(uvm_map_addr, iter);
572 		else if (VMMAP_FREE_END(iter) <= addr)
573 			iter = RBT_RIGHT(uvm_map_addr, iter);
574 		else
575 			return iter;
576 	}
577 	return NULL;
578 }
579 
580 /*
581  * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry)
582  *
583  * Push dead entries into a linked list.
584  * Since the linked list abuses the address tree for storage, the entry
585  * may not be linked in a map.
586  *
587  * *head must be initialized to NULL before the first call to this macro.
588  * uvm_unmap_detach(*head, 0) will remove dead entries.
589  */
590 static inline void
591 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry)
592 {
593 	TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq);
594 }
595 #define DEAD_ENTRY_PUSH(_headptr, _entry)				\
596 	dead_entry_push((_headptr), (_entry))
597 
598 /*
599  * Helper function for uvm_map_findspace_tree.
600  *
601  * Given allocation constraints and pmap constraints, finds the
602  * lowest and highest address in a range that can be used for the
603  * allocation.
604  *
605  * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs.
606  *
607  *
608  * Big chunk of math with a seasoning of dragons.
609  */
610 int
611 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg,
612     struct vm_map_entry *sel, vaddr_t align,
613     vaddr_t pmap_align, vaddr_t pmap_off, int bias)
614 {
615 	vaddr_t sel_min, sel_max;
616 #ifdef PMAP_PREFER
617 	vaddr_t pmap_min, pmap_max;
618 #endif /* PMAP_PREFER */
619 #ifdef DIAGNOSTIC
620 	int bad;
621 #endif /* DIAGNOSTIC */
622 
623 	sel_min = VMMAP_FREE_START(sel);
624 	sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0);
625 
626 #ifdef PMAP_PREFER
627 
628 	/*
629 	 * There are two special cases, in which we can satisfy the align
630 	 * requirement and the pmap_prefer requirement.
631 	 * - when pmap_off == 0, we always select the largest of the two
632 	 * - when pmap_off % align == 0 and pmap_align > align, we simply
633 	 *   satisfy the pmap_align requirement and automatically
634 	 *   satisfy the align requirement.
635 	 */
636 	if (align > PAGE_SIZE &&
637 	    !(pmap_align > align && (pmap_off & (align - 1)) == 0)) {
638 		/*
639 		 * Simple case: only use align.
640 		 */
641 		sel_min = roundup(sel_min, align);
642 		sel_max &= ~(align - 1);
643 
644 		if (sel_min > sel_max)
645 			return ENOMEM;
646 
647 		/* Correct for bias. */
648 		if (sel_max - sel_min > FSPACE_BIASGAP) {
649 			if (bias > 0) {
650 				sel_min = sel_max - FSPACE_BIASGAP;
651 				sel_min = roundup(sel_min, align);
652 			} else if (bias < 0) {
653 				sel_max = sel_min + FSPACE_BIASGAP;
654 				sel_max &= ~(align - 1);
655 			}
656 		}
657 	} else if (pmap_align != 0) {
658 		/*
659 		 * Special case: satisfy both pmap_prefer and
660 		 * align argument.
661 		 */
662 		pmap_max = sel_max & ~(pmap_align - 1);
663 		pmap_min = sel_min;
664 		if (pmap_max < sel_min)
665 			return ENOMEM;
666 
667 		/* Adjust pmap_min for BIASGAP for top-addr bias. */
668 		if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP)
669 			pmap_min = pmap_max - FSPACE_BIASGAP;
670 		/* Align pmap_min. */
671 		pmap_min &= ~(pmap_align - 1);
672 		if (pmap_min < sel_min)
673 			pmap_min += pmap_align;
674 		if (pmap_min > pmap_max)
675 			return ENOMEM;
676 
677 		/* Adjust pmap_max for BIASGAP for bottom-addr bias. */
678 		if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) {
679 			pmap_max = (pmap_min + FSPACE_BIASGAP) &
680 			    ~(pmap_align - 1);
681 		}
682 		if (pmap_min > pmap_max)
683 			return ENOMEM;
684 
685 		/* Apply pmap prefer offset. */
686 		pmap_max |= pmap_off;
687 		if (pmap_max > sel_max)
688 			pmap_max -= pmap_align;
689 		pmap_min |= pmap_off;
690 		if (pmap_min < sel_min)
691 			pmap_min += pmap_align;
692 
693 		/*
694 		 * Fixup: it's possible that pmap_min and pmap_max
695 		 * cross each other. In this case, try to find one
696 		 * address that is allowed.
697 		 * (This usually happens in biased case.)
698 		 */
699 		if (pmap_min > pmap_max) {
700 			if (pmap_min < sel_max)
701 				pmap_max = pmap_min;
702 			else if (pmap_max > sel_min)
703 				pmap_min = pmap_max;
704 			else
705 				return ENOMEM;
706 		}
707 
708 		/* Internal validation. */
709 		KDASSERT(pmap_min <= pmap_max);
710 
711 		sel_min = pmap_min;
712 		sel_max = pmap_max;
713 	} else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
714 		sel_min = sel_max - FSPACE_BIASGAP;
715 	else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
716 		sel_max = sel_min + FSPACE_BIASGAP;
717 
718 #else
719 
720 	if (align > PAGE_SIZE) {
721 		sel_min = roundup(sel_min, align);
722 		sel_max &= ~(align - 1);
723 		if (sel_min > sel_max)
724 			return ENOMEM;
725 
726 		if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) {
727 			if (bias > 0) {
728 				sel_min = roundup(sel_max - FSPACE_BIASGAP,
729 				    align);
730 			} else {
731 				sel_max = (sel_min + FSPACE_BIASGAP) &
732 				    ~(align - 1);
733 			}
734 		}
735 	} else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
736 		sel_min = sel_max - FSPACE_BIASGAP;
737 	else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
738 		sel_max = sel_min + FSPACE_BIASGAP;
739 
740 #endif
741 
742 	if (sel_min > sel_max)
743 		return ENOMEM;
744 
745 #ifdef DIAGNOSTIC
746 	bad = 0;
747 	/* Lower boundary check. */
748 	if (sel_min < VMMAP_FREE_START(sel)) {
749 		printf("sel_min: 0x%lx, but should be at least 0x%lx\n",
750 		    sel_min, VMMAP_FREE_START(sel));
751 		bad++;
752 	}
753 	/* Upper boundary check. */
754 	if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) {
755 		printf("sel_max: 0x%lx, but should be at most 0x%lx\n",
756 		    sel_max,
757 		    VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0));
758 		bad++;
759 	}
760 	/* Lower boundary alignment. */
761 	if (align != 0 && (sel_min & (align - 1)) != 0) {
762 		printf("sel_min: 0x%lx, not aligned to 0x%lx\n",
763 		    sel_min, align);
764 		bad++;
765 	}
766 	/* Upper boundary alignment. */
767 	if (align != 0 && (sel_max & (align - 1)) != 0) {
768 		printf("sel_max: 0x%lx, not aligned to 0x%lx\n",
769 		    sel_max, align);
770 		bad++;
771 	}
772 	/* Lower boundary PMAP_PREFER check. */
773 	if (pmap_align != 0 && align == 0 &&
774 	    (sel_min & (pmap_align - 1)) != pmap_off) {
775 		printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
776 		    sel_min, sel_min & (pmap_align - 1), pmap_off);
777 		bad++;
778 	}
779 	/* Upper boundary PMAP_PREFER check. */
780 	if (pmap_align != 0 && align == 0 &&
781 	    (sel_max & (pmap_align - 1)) != pmap_off) {
782 		printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
783 		    sel_max, sel_max & (pmap_align - 1), pmap_off);
784 		bad++;
785 	}
786 
787 	if (bad) {
788 		panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, "
789 		    "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, "
790 		    "bias = %d, "
791 		    "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)",
792 		    sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off,
793 		    bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel));
794 	}
795 #endif /* DIAGNOSTIC */
796 
797 	*min = sel_min;
798 	*max = sel_max;
799 	return 0;
800 }
801 
802 /*
803  * Test if memory starting at addr with sz bytes is free.
804  *
805  * Fills in *start_ptr and *end_ptr to be the first and last entry describing
806  * the space.
807  * If called with prefilled *start_ptr and *end_ptr, they are to be correct.
808  */
809 int
810 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr,
811     struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr,
812     vaddr_t addr, vsize_t sz)
813 {
814 	struct uvm_addr_state *free;
815 	struct uvm_map_addr *atree;
816 	struct vm_map_entry *i, *i_end;
817 
818 	if (addr + sz < addr)
819 		return 0;
820 
821 	/*
822 	 * Kernel memory above uvm_maxkaddr is considered unavailable.
823 	 */
824 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
825 		if (addr + sz > uvm_maxkaddr)
826 			return 0;
827 	}
828 
829 	atree = &map->addr;
830 
831 	/*
832 	 * Fill in first, last, so they point at the entries containing the
833 	 * first and last address of the range.
834 	 * Note that if they are not NULL, we don't perform the lookup.
835 	 */
836 	KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL);
837 	if (*start_ptr == NULL) {
838 		*start_ptr = uvm_map_entrybyaddr(atree, addr);
839 		if (*start_ptr == NULL)
840 			return 0;
841 	} else
842 		KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr));
843 	if (*end_ptr == NULL) {
844 		if (VMMAP_FREE_END(*start_ptr) >= addr + sz)
845 			*end_ptr = *start_ptr;
846 		else {
847 			*end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1);
848 			if (*end_ptr == NULL)
849 				return 0;
850 		}
851 	} else
852 		KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1));
853 
854 	/* Validation. */
855 	KDASSERT(*start_ptr != NULL && *end_ptr != NULL);
856 	KDASSERT((*start_ptr)->start <= addr &&
857 	    VMMAP_FREE_END(*start_ptr) > addr &&
858 	    (*end_ptr)->start < addr + sz &&
859 	    VMMAP_FREE_END(*end_ptr) >= addr + sz);
860 
861 	/*
862 	 * Check the none of the entries intersects with <addr, addr+sz>.
863 	 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is
864 	 * considered unavailable unless called by those allocators.
865 	 */
866 	i = *start_ptr;
867 	i_end = RBT_NEXT(uvm_map_addr, *end_ptr);
868 	for (; i != i_end;
869 	    i = RBT_NEXT(uvm_map_addr, i)) {
870 		if (i->start != i->end && i->end > addr)
871 			return 0;
872 
873 		/*
874 		 * uaddr_exe and uaddr_brk_stack may only be used
875 		 * by these allocators and the NULL uaddr (i.e. no
876 		 * uaddr).
877 		 * Reject if this requirement is not met.
878 		 */
879 		if (uaddr != NULL) {
880 			free = uvm_map_uaddr_e(map, i);
881 
882 			if (uaddr != free && free != NULL &&
883 			    (free == map->uaddr_exe ||
884 			     free == map->uaddr_brk_stack))
885 				return 0;
886 		}
887 	}
888 
889 	return -1;
890 }
891 
892 /*
893  * Invoke each address selector until an address is found.
894  * Will not invoke uaddr_exe.
895  */
896 int
897 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first,
898     struct vm_map_entry**last, vaddr_t *addr, vsize_t sz,
899     vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint)
900 {
901 	struct uvm_addr_state *uaddr;
902 	int i;
903 
904 	/*
905 	 * Allocation for sz bytes at any address,
906 	 * using the addr selectors in order.
907 	 */
908 	for (i = 0; i < nitems(map->uaddr_any); i++) {
909 		uaddr = map->uaddr_any[i];
910 
911 		if (uvm_addr_invoke(map, uaddr, first, last,
912 		    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
913 			return 0;
914 	}
915 
916 	/* Fall back to brk() and stack() address selectors. */
917 	uaddr = map->uaddr_brk_stack;
918 	if (uvm_addr_invoke(map, uaddr, first, last,
919 	    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
920 		return 0;
921 
922 	return ENOMEM;
923 }
924 
925 /* Calculate entry augmentation value. */
926 vsize_t
927 uvm_map_addr_augment_get(struct vm_map_entry *entry)
928 {
929 	vsize_t			 augment;
930 	struct vm_map_entry	*left, *right;
931 
932 	augment = entry->fspace;
933 	if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL)
934 		augment = MAX(augment, left->fspace_augment);
935 	if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
936 		augment = MAX(augment, right->fspace_augment);
937 	return augment;
938 }
939 
940 /*
941  * Update augmentation data in entry.
942  */
943 void
944 uvm_map_addr_augment(struct vm_map_entry *entry)
945 {
946 	vsize_t			 augment;
947 
948 	while (entry != NULL) {
949 		/* Calculate value for augmentation. */
950 		augment = uvm_map_addr_augment_get(entry);
951 
952 		/*
953 		 * Descend update.
954 		 * Once we find an entry that already has the correct value,
955 		 * stop, since it means all its parents will use the correct
956 		 * value too.
957 		 */
958 		if (entry->fspace_augment == augment)
959 			return;
960 		entry->fspace_augment = augment;
961 		entry = RBT_PARENT(uvm_map_addr, entry);
962 	}
963 }
964 
965 /*
966  * uvm_mapanon: establish a valid mapping in map for an anon
967  *
968  * => *addr and sz must be a multiple of PAGE_SIZE.
969  * => *addr is ignored, except if flags contains UVM_FLAG_FIXED.
970  * => map must be unlocked.
971  *
972  * => align: align vaddr, must be a power-of-2.
973  *    Align is only a hint and will be ignored if the alignment fails.
974  */
975 int
976 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz,
977     vsize_t align, unsigned int flags)
978 {
979 	struct vm_map_entry	*first, *last, *entry, *new;
980 	struct uvm_map_deadq	 dead;
981 	vm_prot_t		 prot;
982 	vm_prot_t		 maxprot;
983 	vm_inherit_t		 inherit;
984 	int			 advice;
985 	int			 error;
986 	vaddr_t			 pmap_align, pmap_offset;
987 	vaddr_t			 hint;
988 
989 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE);
990 	KASSERT(map != kernel_map);
991 	KASSERT((map->flags & UVM_FLAG_HOLE) == 0);
992 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
993 	splassert(IPL_NONE);
994 	KASSERT((flags & UVM_FLAG_TRYLOCK) == 0);
995 
996 	/*
997 	 * We use pmap_align and pmap_offset as alignment and offset variables.
998 	 *
999 	 * Because the align parameter takes precedence over pmap prefer,
1000 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
1001 	 * if pmap_prefer will not align.
1002 	 */
1003 	pmap_align = MAX(align, PAGE_SIZE);
1004 	pmap_offset = 0;
1005 
1006 	/* Decode parameters. */
1007 	prot = UVM_PROTECTION(flags);
1008 	maxprot = UVM_MAXPROTECTION(flags);
1009 	advice = UVM_ADVICE(flags);
1010 	inherit = UVM_INHERIT(flags);
1011 	error = 0;
1012 	hint = trunc_page(*addr);
1013 	TAILQ_INIT(&dead);
1014 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
1015 	KASSERT((align & (align - 1)) == 0);
1016 
1017 	/* Check protection. */
1018 	if ((prot & maxprot) != prot)
1019 		return EACCES;
1020 
1021 	/*
1022 	 * Before grabbing the lock, allocate a map entry for later
1023 	 * use to ensure we don't wait for memory while holding the
1024 	 * vm_map_lock.
1025 	 */
1026 	new = uvm_mapent_alloc(map, flags);
1027 	if (new == NULL)
1028 		return ENOMEM;
1029 
1030 	vm_map_lock(map);
1031 	first = last = NULL;
1032 	if (flags & UVM_FLAG_FIXED) {
1033 		/*
1034 		 * Fixed location.
1035 		 *
1036 		 * Note: we ignore align, pmap_prefer.
1037 		 * Fill in first, last and *addr.
1038 		 */
1039 		KASSERT((*addr & PAGE_MASK) == 0);
1040 
1041 		/* Check that the space is available. */
1042 		if (flags & UVM_FLAG_UNMAP) {
1043 			if ((flags & UVM_FLAG_STACK) &&
1044 			    !uvm_map_is_stack_remappable(map, *addr, sz)) {
1045 				error = EINVAL;
1046 				goto unlock;
1047 			}
1048 			uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE);
1049 		}
1050 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1051 			error = ENOMEM;
1052 			goto unlock;
1053 		}
1054 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1055 	    (align == 0 || (*addr & (align - 1)) == 0) &&
1056 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1057 		/*
1058 		 * Address used as hint.
1059 		 *
1060 		 * Note: we enforce the alignment restriction,
1061 		 * but ignore pmap_prefer.
1062 		 */
1063 	} else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1064 		/* Run selection algorithm for executables. */
1065 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1066 		    addr, sz, pmap_align, pmap_offset, prot, hint);
1067 
1068 		if (error != 0)
1069 			goto unlock;
1070 	} else {
1071 		/* Update freelists from vmspace. */
1072 		uvm_map_vmspace_update(map, &dead, flags);
1073 
1074 		error = uvm_map_findspace(map, &first, &last, addr, sz,
1075 		    pmap_align, pmap_offset, prot, hint);
1076 
1077 		if (error != 0)
1078 			goto unlock;
1079 	}
1080 
1081 	/* Double-check if selected address doesn't cause overflow. */
1082 	if (*addr + sz < *addr) {
1083 		error = ENOMEM;
1084 		goto unlock;
1085 	}
1086 
1087 	/* If we only want a query, return now. */
1088 	if (flags & UVM_FLAG_QUERY) {
1089 		error = 0;
1090 		goto unlock;
1091 	}
1092 
1093 	/*
1094 	 * Create new entry.
1095 	 * first and last may be invalidated after this call.
1096 	 */
1097 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1098 	    new);
1099 	if (entry == NULL) {
1100 		error = ENOMEM;
1101 		goto unlock;
1102 	}
1103 	new = NULL;
1104 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1105 	entry->object.uvm_obj = NULL;
1106 	entry->offset = 0;
1107 	entry->protection = prot;
1108 	entry->max_protection = maxprot;
1109 	entry->inheritance = inherit;
1110 	entry->wired_count = 0;
1111 	entry->advice = advice;
1112 	if (prot & PROT_WRITE)
1113 		map->wserial++;
1114 	if (flags & UVM_FLAG_SYSCALL) {
1115 		entry->etype |= UVM_ET_SYSCALL;
1116 		map->wserial++;
1117 	}
1118 	if (flags & UVM_FLAG_STACK) {
1119 		entry->etype |= UVM_ET_STACK;
1120 		if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP))
1121 			map->sserial++;
1122 	}
1123 	if (flags & UVM_FLAG_COPYONW) {
1124 		entry->etype |= UVM_ET_COPYONWRITE;
1125 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1126 			entry->etype |= UVM_ET_NEEDSCOPY;
1127 	}
1128 	if (flags & UVM_FLAG_CONCEAL)
1129 		entry->etype |= UVM_ET_CONCEAL;
1130 	if (flags & UVM_FLAG_OVERLAY) {
1131 		entry->aref.ar_pageoff = 0;
1132 		entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1133 	}
1134 
1135 	/* Update map and process statistics. */
1136 	map->size += sz;
1137 	if (prot != PROT_NONE) {
1138 		((struct vmspace *)map)->vm_dused +=
1139 		    uvmspace_dused(map, *addr, *addr + sz);
1140 	}
1141 
1142 unlock:
1143 	vm_map_unlock(map);
1144 
1145 	/*
1146 	 * Remove dead entries.
1147 	 *
1148 	 * Dead entries may be the result of merging.
1149 	 * uvm_map_mkentry may also create dead entries, when it attempts to
1150 	 * destroy free-space entries.
1151 	 */
1152 	uvm_unmap_detach(&dead, 0);
1153 
1154 	if (new)
1155 		uvm_mapent_free(new);
1156 	return error;
1157 }
1158 
1159 /*
1160  * uvm_map: establish a valid mapping in map
1161  *
1162  * => *addr and sz must be a multiple of PAGE_SIZE.
1163  * => map must be unlocked.
1164  * => <uobj,uoffset> value meanings (4 cases):
1165  *	[1] <NULL,uoffset>		== uoffset is a hint for PMAP_PREFER
1166  *	[2] <NULL,UVM_UNKNOWN_OFFSET>	== don't PMAP_PREFER
1167  *	[3] <uobj,uoffset>		== normal mapping
1168  *	[4] <uobj,UVM_UNKNOWN_OFFSET>	== uvm_map finds offset based on VA
1169  *
1170  *   case [4] is for kernel mappings where we don't know the offset until
1171  *   we've found a virtual address.   note that kernel object offsets are
1172  *   always relative to vm_map_min(kernel_map).
1173  *
1174  * => align: align vaddr, must be a power-of-2.
1175  *    Align is only a hint and will be ignored if the alignment fails.
1176  */
1177 int
1178 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,
1179     struct uvm_object *uobj, voff_t uoffset,
1180     vsize_t align, unsigned int flags)
1181 {
1182 	struct vm_map_entry	*first, *last, *entry, *new;
1183 	struct uvm_map_deadq	 dead;
1184 	vm_prot_t		 prot;
1185 	vm_prot_t		 maxprot;
1186 	vm_inherit_t		 inherit;
1187 	int			 advice;
1188 	int			 error;
1189 	vaddr_t			 pmap_align, pmap_offset;
1190 	vaddr_t			 hint;
1191 
1192 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
1193 		splassert(IPL_NONE);
1194 	else
1195 		splassert(IPL_VM);
1196 
1197 	/*
1198 	 * We use pmap_align and pmap_offset as alignment and offset variables.
1199 	 *
1200 	 * Because the align parameter takes precedence over pmap prefer,
1201 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
1202 	 * if pmap_prefer will not align.
1203 	 */
1204 	if (uoffset == UVM_UNKNOWN_OFFSET) {
1205 		pmap_align = MAX(align, PAGE_SIZE);
1206 		pmap_offset = 0;
1207 	} else {
1208 		pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE);
1209 		pmap_offset = PMAP_PREFER_OFFSET(uoffset);
1210 
1211 		if (align == 0 ||
1212 		    (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) {
1213 			/* pmap_offset satisfies align, no change. */
1214 		} else {
1215 			/* Align takes precedence over pmap prefer. */
1216 			pmap_align = align;
1217 			pmap_offset = 0;
1218 		}
1219 	}
1220 
1221 	/* Decode parameters. */
1222 	prot = UVM_PROTECTION(flags);
1223 	maxprot = UVM_MAXPROTECTION(flags);
1224 	advice = UVM_ADVICE(flags);
1225 	inherit = UVM_INHERIT(flags);
1226 	error = 0;
1227 	hint = trunc_page(*addr);
1228 	TAILQ_INIT(&dead);
1229 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
1230 	KASSERT((align & (align - 1)) == 0);
1231 
1232 	/* Holes are incompatible with other types of mappings. */
1233 	if (flags & UVM_FLAG_HOLE) {
1234 		KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) &&
1235 		    (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
1236 	}
1237 
1238 	/* Unset hint for kernel_map non-fixed allocations. */
1239 	if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED))
1240 		hint = 0;
1241 
1242 	/* Check protection. */
1243 	if ((prot & maxprot) != prot)
1244 		return EACCES;
1245 
1246 	if (map == kernel_map &&
1247 	    (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
1248 		panic("uvm_map: kernel map W^X violation requested");
1249 
1250 	/*
1251 	 * Before grabbing the lock, allocate a map entry for later
1252 	 * use to ensure we don't wait for memory while holding the
1253 	 * vm_map_lock.
1254 	 */
1255 	new = uvm_mapent_alloc(map, flags);
1256 	if (new == NULL)
1257 		return ENOMEM;
1258 
1259 	if (flags & UVM_FLAG_TRYLOCK) {
1260 		if (vm_map_lock_try(map) == FALSE) {
1261 			error = EFAULT;
1262 			goto out;
1263 		}
1264 	} else {
1265 		vm_map_lock(map);
1266 	}
1267 
1268 	first = last = NULL;
1269 	if (flags & UVM_FLAG_FIXED) {
1270 		/*
1271 		 * Fixed location.
1272 		 *
1273 		 * Note: we ignore align, pmap_prefer.
1274 		 * Fill in first, last and *addr.
1275 		 */
1276 		KASSERT((*addr & PAGE_MASK) == 0);
1277 
1278 		/*
1279 		 * Grow pmap to include allocated address.
1280 		 * If the growth fails, the allocation will fail too.
1281 		 */
1282 		if ((map->flags & VM_MAP_ISVMSPACE) == 0 &&
1283 		    uvm_maxkaddr < (*addr + sz)) {
1284 			uvm_map_kmem_grow(map, &dead,
1285 			    *addr + sz - uvm_maxkaddr, flags);
1286 		}
1287 
1288 		/* Check that the space is available. */
1289 		if (flags & UVM_FLAG_UNMAP)
1290 			uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE);
1291 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1292 			error = ENOMEM;
1293 			goto unlock;
1294 		}
1295 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1296 	    (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE &&
1297 	    (align == 0 || (*addr & (align - 1)) == 0) &&
1298 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1299 		/*
1300 		 * Address used as hint.
1301 		 *
1302 		 * Note: we enforce the alignment restriction,
1303 		 * but ignore pmap_prefer.
1304 		 */
1305 	} else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1306 		/* Run selection algorithm for executables. */
1307 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1308 		    addr, sz, pmap_align, pmap_offset, prot, hint);
1309 
1310 		/* Grow kernel memory and try again. */
1311 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1312 			uvm_map_kmem_grow(map, &dead, sz, flags);
1313 
1314 			error = uvm_addr_invoke(map, map->uaddr_exe,
1315 			    &first, &last, addr, sz,
1316 			    pmap_align, pmap_offset, prot, hint);
1317 		}
1318 
1319 		if (error != 0)
1320 			goto unlock;
1321 	} else {
1322 		/* Update freelists from vmspace. */
1323 		if (map->flags & VM_MAP_ISVMSPACE)
1324 			uvm_map_vmspace_update(map, &dead, flags);
1325 
1326 		error = uvm_map_findspace(map, &first, &last, addr, sz,
1327 		    pmap_align, pmap_offset, prot, hint);
1328 
1329 		/* Grow kernel memory and try again. */
1330 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1331 			uvm_map_kmem_grow(map, &dead, sz, flags);
1332 
1333 			error = uvm_map_findspace(map, &first, &last, addr, sz,
1334 			    pmap_align, pmap_offset, prot, hint);
1335 		}
1336 
1337 		if (error != 0)
1338 			goto unlock;
1339 	}
1340 
1341 	/* Double-check if selected address doesn't cause overflow. */
1342 	if (*addr + sz < *addr) {
1343 		error = ENOMEM;
1344 		goto unlock;
1345 	}
1346 
1347 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE ||
1348 	    uvm_maxkaddr >= *addr + sz);
1349 
1350 	/* If we only want a query, return now. */
1351 	if (flags & UVM_FLAG_QUERY) {
1352 		error = 0;
1353 		goto unlock;
1354 	}
1355 
1356 	if (uobj == NULL)
1357 		uoffset = 0;
1358 	else if (uoffset == UVM_UNKNOWN_OFFSET) {
1359 		KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
1360 		uoffset = *addr - vm_map_min(kernel_map);
1361 	}
1362 
1363 	/*
1364 	 * Create new entry.
1365 	 * first and last may be invalidated after this call.
1366 	 */
1367 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1368 	    new);
1369 	if (entry == NULL) {
1370 		error = ENOMEM;
1371 		goto unlock;
1372 	}
1373 	new = NULL;
1374 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1375 	entry->object.uvm_obj = uobj;
1376 	entry->offset = uoffset;
1377 	entry->protection = prot;
1378 	entry->max_protection = maxprot;
1379 	entry->inheritance = inherit;
1380 	entry->wired_count = 0;
1381 	entry->advice = advice;
1382 	if (prot & PROT_WRITE)
1383 		map->wserial++;
1384 	if (flags & UVM_FLAG_SYSCALL) {
1385 		entry->etype |= UVM_ET_SYSCALL;
1386 		map->wserial++;
1387 	}
1388 	if (flags & UVM_FLAG_STACK) {
1389 		entry->etype |= UVM_ET_STACK;
1390 		if (flags & UVM_FLAG_UNMAP)
1391 			map->sserial++;
1392 	}
1393 	if (uobj)
1394 		entry->etype |= UVM_ET_OBJ;
1395 	else if (flags & UVM_FLAG_HOLE)
1396 		entry->etype |= UVM_ET_HOLE;
1397 	if (flags & UVM_FLAG_NOFAULT)
1398 		entry->etype |= UVM_ET_NOFAULT;
1399 	if (flags & UVM_FLAG_WC)
1400 		entry->etype |= UVM_ET_WC;
1401 	if (flags & UVM_FLAG_COPYONW) {
1402 		entry->etype |= UVM_ET_COPYONWRITE;
1403 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1404 			entry->etype |= UVM_ET_NEEDSCOPY;
1405 	}
1406 	if (flags & UVM_FLAG_CONCEAL)
1407 		entry->etype |= UVM_ET_CONCEAL;
1408 	if (flags & UVM_FLAG_OVERLAY) {
1409 		entry->aref.ar_pageoff = 0;
1410 		entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1411 	}
1412 
1413 	/* Update map and process statistics. */
1414 	if (!(flags & UVM_FLAG_HOLE)) {
1415 		map->size += sz;
1416 		if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL &&
1417 		    prot != PROT_NONE) {
1418 			((struct vmspace *)map)->vm_dused +=
1419 			    uvmspace_dused(map, *addr, *addr + sz);
1420 		}
1421 	}
1422 
1423 	/*
1424 	 * Try to merge entry.
1425 	 *
1426 	 * Userland allocations are kept separated most of the time.
1427 	 * Forego the effort of merging what most of the time can't be merged
1428 	 * and only try the merge if it concerns a kernel entry.
1429 	 */
1430 	if ((flags & UVM_FLAG_NOMERGE) == 0 &&
1431 	    (map->flags & VM_MAP_ISVMSPACE) == 0)
1432 		uvm_mapent_tryjoin(map, entry, &dead);
1433 
1434 unlock:
1435 	vm_map_unlock(map);
1436 
1437 	/*
1438 	 * Remove dead entries.
1439 	 *
1440 	 * Dead entries may be the result of merging.
1441 	 * uvm_map_mkentry may also create dead entries, when it attempts to
1442 	 * destroy free-space entries.
1443 	 */
1444 	if (map->flags & VM_MAP_INTRSAFE)
1445 		uvm_unmap_detach_intrsafe(&dead);
1446 	else
1447 		uvm_unmap_detach(&dead, 0);
1448 out:
1449 	if (new)
1450 		uvm_mapent_free(new);
1451 	return error;
1452 }
1453 
1454 /*
1455  * True iff e1 and e2 can be joined together.
1456  */
1457 int
1458 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1,
1459     struct vm_map_entry *e2)
1460 {
1461 	KDASSERT(e1 != NULL && e2 != NULL);
1462 
1463 	/* Must be the same entry type and not have free memory between. */
1464 	if (e1->etype != e2->etype || e1->end != e2->start)
1465 		return 0;
1466 
1467 	/* Submaps are never joined. */
1468 	if (UVM_ET_ISSUBMAP(e1))
1469 		return 0;
1470 
1471 	/* Never merge wired memory. */
1472 	if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2))
1473 		return 0;
1474 
1475 	/* Protection, inheritance and advice must be equal. */
1476 	if (e1->protection != e2->protection ||
1477 	    e1->max_protection != e2->max_protection ||
1478 	    e1->inheritance != e2->inheritance ||
1479 	    e1->advice != e2->advice)
1480 		return 0;
1481 
1482 	/* If uvm_object: object itself and offsets within object must match. */
1483 	if (UVM_ET_ISOBJ(e1)) {
1484 		if (e1->object.uvm_obj != e2->object.uvm_obj)
1485 			return 0;
1486 		if (e1->offset + (e1->end - e1->start) != e2->offset)
1487 			return 0;
1488 	}
1489 
1490 	/*
1491 	 * Cannot join shared amaps.
1492 	 * Note: no need to lock amap to look at refs, since we don't care
1493 	 * about its exact value.
1494 	 * If it is 1 (i.e. we have the only reference) it will stay there.
1495 	 */
1496 	if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1)
1497 		return 0;
1498 	if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1)
1499 		return 0;
1500 
1501 	/* Apparently, e1 and e2 match. */
1502 	return 1;
1503 }
1504 
1505 /*
1506  * Join support function.
1507  *
1508  * Returns the merged entry on success.
1509  * Returns NULL if the merge failed.
1510  */
1511 struct vm_map_entry*
1512 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1,
1513     struct vm_map_entry *e2, struct uvm_map_deadq *dead)
1514 {
1515 	struct uvm_addr_state *free;
1516 
1517 	/*
1518 	 * Merging is not supported for map entries that
1519 	 * contain an amap in e1. This should never happen
1520 	 * anyway, because only kernel entries are merged.
1521 	 * These do not contain amaps.
1522 	 * e2 contains no real information in its amap,
1523 	 * so it can be erased immediately.
1524 	 */
1525 	KASSERT(e1->aref.ar_amap == NULL);
1526 
1527 	/*
1528 	 * Don't drop obj reference:
1529 	 * uvm_unmap_detach will do this for us.
1530 	 */
1531 	free = uvm_map_uaddr_e(map, e1);
1532 	uvm_mapent_free_remove(map, free, e1);
1533 
1534 	free = uvm_map_uaddr_e(map, e2);
1535 	uvm_mapent_free_remove(map, free, e2);
1536 	uvm_mapent_addr_remove(map, e2);
1537 	e1->end = e2->end;
1538 	e1->guard = e2->guard;
1539 	e1->fspace = e2->fspace;
1540 	uvm_mapent_free_insert(map, free, e1);
1541 
1542 	DEAD_ENTRY_PUSH(dead, e2);
1543 	return e1;
1544 }
1545 
1546 /*
1547  * Attempt forward and backward joining of entry.
1548  *
1549  * Returns entry after joins.
1550  * We are guaranteed that the amap of entry is either non-existent or
1551  * has never been used.
1552  */
1553 struct vm_map_entry*
1554 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry,
1555     struct uvm_map_deadq *dead)
1556 {
1557 	struct vm_map_entry *other;
1558 	struct vm_map_entry *merged;
1559 
1560 	/* Merge with previous entry. */
1561 	other = RBT_PREV(uvm_map_addr, entry);
1562 	if (other && uvm_mapent_isjoinable(map, other, entry)) {
1563 		merged = uvm_mapent_merge(map, other, entry, dead);
1564 		if (merged)
1565 			entry = merged;
1566 	}
1567 
1568 	/*
1569 	 * Merge with next entry.
1570 	 *
1571 	 * Because amap can only extend forward and the next entry
1572 	 * probably contains sensible info, only perform forward merging
1573 	 * in the absence of an amap.
1574 	 */
1575 	other = RBT_NEXT(uvm_map_addr, entry);
1576 	if (other && entry->aref.ar_amap == NULL &&
1577 	    other->aref.ar_amap == NULL &&
1578 	    uvm_mapent_isjoinable(map, entry, other)) {
1579 		merged = uvm_mapent_merge(map, entry, other, dead);
1580 		if (merged)
1581 			entry = merged;
1582 	}
1583 
1584 	return entry;
1585 }
1586 
1587 /*
1588  * Kill entries that are no longer in a map.
1589  */
1590 void
1591 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags)
1592 {
1593 	struct vm_map_entry *entry, *tmp;
1594 	int waitok = flags & UVM_PLA_WAITOK;
1595 
1596 	TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) {
1597 		/* Drop reference to amap, if we've got one. */
1598 		if (entry->aref.ar_amap)
1599 			amap_unref(entry->aref.ar_amap,
1600 			    entry->aref.ar_pageoff,
1601 			    atop(entry->end - entry->start),
1602 			    flags & AMAP_REFALL);
1603 
1604 		/* Skip entries for which we have to grab the kernel lock. */
1605 		if (UVM_ET_ISSUBMAP(entry) || UVM_ET_ISOBJ(entry))
1606 			continue;
1607 
1608 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1609 		uvm_mapent_free(entry);
1610 	}
1611 
1612 	if (TAILQ_EMPTY(deadq))
1613 		return;
1614 
1615 	KERNEL_LOCK();
1616 	while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1617 		if (waitok)
1618 			uvm_pause();
1619 		/* Drop reference to our backing object, if we've got one. */
1620 		if (UVM_ET_ISSUBMAP(entry)) {
1621 			/* ... unlikely to happen, but play it safe */
1622 			uvm_map_deallocate(entry->object.sub_map);
1623 		} else if (UVM_ET_ISOBJ(entry) &&
1624 		    entry->object.uvm_obj->pgops->pgo_detach) {
1625 			entry->object.uvm_obj->pgops->pgo_detach(
1626 			    entry->object.uvm_obj);
1627 		}
1628 
1629 		/* Step to next. */
1630 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1631 		uvm_mapent_free(entry);
1632 	}
1633 	KERNEL_UNLOCK();
1634 }
1635 
1636 void
1637 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq)
1638 {
1639 	struct vm_map_entry *entry;
1640 
1641 	while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1642 		KASSERT(entry->aref.ar_amap == NULL);
1643 		KASSERT(!UVM_ET_ISSUBMAP(entry));
1644 		KASSERT(!UVM_ET_ISOBJ(entry));
1645 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1646 		uvm_mapent_free(entry);
1647 	}
1648 }
1649 
1650 /*
1651  * Create and insert new entry.
1652  *
1653  * Returned entry contains new addresses and is inserted properly in the tree.
1654  * first and last are (probably) no longer valid.
1655  */
1656 struct vm_map_entry*
1657 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first,
1658     struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags,
1659     struct uvm_map_deadq *dead, struct vm_map_entry *new)
1660 {
1661 	struct vm_map_entry *entry, *prev;
1662 	struct uvm_addr_state *free;
1663 	vaddr_t min, max;	/* free space boundaries for new entry */
1664 
1665 	KDASSERT(map != NULL);
1666 	KDASSERT(first != NULL);
1667 	KDASSERT(last != NULL);
1668 	KDASSERT(dead != NULL);
1669 	KDASSERT(sz > 0);
1670 	KDASSERT(addr + sz > addr);
1671 	KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr);
1672 	KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz);
1673 	KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz));
1674 	uvm_tree_sanity(map, __FILE__, __LINE__);
1675 
1676 	min = addr + sz;
1677 	max = VMMAP_FREE_END(last);
1678 
1679 	/* Initialize new entry. */
1680 	if (new == NULL)
1681 		entry = uvm_mapent_alloc(map, flags);
1682 	else
1683 		entry = new;
1684 	if (entry == NULL)
1685 		return NULL;
1686 	entry->offset = 0;
1687 	entry->etype = 0;
1688 	entry->wired_count = 0;
1689 	entry->aref.ar_pageoff = 0;
1690 	entry->aref.ar_amap = NULL;
1691 
1692 	entry->start = addr;
1693 	entry->end = min;
1694 	entry->guard = 0;
1695 	entry->fspace = 0;
1696 
1697 	/* Reset free space in first. */
1698 	free = uvm_map_uaddr_e(map, first);
1699 	uvm_mapent_free_remove(map, free, first);
1700 	first->guard = 0;
1701 	first->fspace = 0;
1702 
1703 	/*
1704 	 * Remove all entries that are fully replaced.
1705 	 * We are iterating using last in reverse order.
1706 	 */
1707 	for (; first != last; last = prev) {
1708 		prev = RBT_PREV(uvm_map_addr, last);
1709 
1710 		KDASSERT(last->start == last->end);
1711 		free = uvm_map_uaddr_e(map, last);
1712 		uvm_mapent_free_remove(map, free, last);
1713 		uvm_mapent_addr_remove(map, last);
1714 		DEAD_ENTRY_PUSH(dead, last);
1715 	}
1716 	/* Remove first if it is entirely inside <addr, addr+sz>.  */
1717 	if (first->start == addr) {
1718 		uvm_mapent_addr_remove(map, first);
1719 		DEAD_ENTRY_PUSH(dead, first);
1720 	} else {
1721 		uvm_map_fix_space(map, first, VMMAP_FREE_START(first),
1722 		    addr, flags);
1723 	}
1724 
1725 	/* Finally, link in entry. */
1726 	uvm_mapent_addr_insert(map, entry);
1727 	uvm_map_fix_space(map, entry, min, max, flags);
1728 
1729 	uvm_tree_sanity(map, __FILE__, __LINE__);
1730 	return entry;
1731 }
1732 
1733 
1734 /*
1735  * uvm_mapent_alloc: allocate a map entry
1736  */
1737 struct vm_map_entry *
1738 uvm_mapent_alloc(struct vm_map *map, int flags)
1739 {
1740 	struct vm_map_entry *me, *ne;
1741 	int pool_flags;
1742 	int i;
1743 
1744 	pool_flags = PR_WAITOK;
1745 	if (flags & UVM_FLAG_TRYLOCK)
1746 		pool_flags = PR_NOWAIT;
1747 
1748 	if (map->flags & VM_MAP_INTRSAFE || cold) {
1749 		mtx_enter(&uvm_kmapent_mtx);
1750 		if (SLIST_EMPTY(&uvm.kentry_free)) {
1751 			ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
1752 			    &kd_nowait);
1753 			if (ne == NULL)
1754 				panic("uvm_mapent_alloc: cannot allocate map "
1755 				    "entry");
1756 			for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) {
1757 				SLIST_INSERT_HEAD(&uvm.kentry_free,
1758 				    &ne[i], daddrs.addr_kentry);
1759 			}
1760 			if (ratecheck(&uvm_kmapent_last_warn_time,
1761 			    &uvm_kmapent_warn_rate))
1762 				printf("uvm_mapent_alloc: out of static "
1763 				    "map entries\n");
1764 		}
1765 		me = SLIST_FIRST(&uvm.kentry_free);
1766 		SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry);
1767 		uvmexp.kmapent++;
1768 		mtx_leave(&uvm_kmapent_mtx);
1769 		me->flags = UVM_MAP_STATIC;
1770 	} else if (map == kernel_map) {
1771 		splassert(IPL_NONE);
1772 		me = pool_get(&uvm_map_entry_kmem_pool, pool_flags);
1773 		if (me == NULL)
1774 			goto out;
1775 		me->flags = UVM_MAP_KMEM;
1776 	} else {
1777 		splassert(IPL_NONE);
1778 		me = pool_get(&uvm_map_entry_pool, pool_flags);
1779 		if (me == NULL)
1780 			goto out;
1781 		me->flags = 0;
1782 	}
1783 
1784 	RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF);
1785 out:
1786 	return me;
1787 }
1788 
1789 /*
1790  * uvm_mapent_free: free map entry
1791  *
1792  * => XXX: static pool for kernel map?
1793  */
1794 void
1795 uvm_mapent_free(struct vm_map_entry *me)
1796 {
1797 	if (me->flags & UVM_MAP_STATIC) {
1798 		mtx_enter(&uvm_kmapent_mtx);
1799 		SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry);
1800 		uvmexp.kmapent--;
1801 		mtx_leave(&uvm_kmapent_mtx);
1802 	} else if (me->flags & UVM_MAP_KMEM) {
1803 		splassert(IPL_NONE);
1804 		pool_put(&uvm_map_entry_kmem_pool, me);
1805 	} else {
1806 		splassert(IPL_NONE);
1807 		pool_put(&uvm_map_entry_pool, me);
1808 	}
1809 }
1810 
1811 /*
1812  * uvm_map_lookup_entry: find map entry at or before an address.
1813  *
1814  * => map must at least be read-locked by caller
1815  * => entry is returned in "entry"
1816  * => return value is true if address is in the returned entry
1817  * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is
1818  * returned for those mappings.
1819  */
1820 boolean_t
1821 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
1822     struct vm_map_entry **entry)
1823 {
1824 	*entry = uvm_map_entrybyaddr(&map->addr, address);
1825 	return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&
1826 	    (*entry)->start <= address && (*entry)->end > address;
1827 }
1828 
1829 /*
1830  * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet
1831  * grown -- then uvm_map_check_region_range() should not cache the entry
1832  * because growth won't be seen.
1833  */
1834 int
1835 uvm_map_inentry_sp(vm_map_entry_t entry)
1836 {
1837 	if ((entry->etype & UVM_ET_STACK) == 0) {
1838 		if (entry->protection == PROT_NONE)
1839 			return (-1);	/* don't update range */
1840 		return (0);
1841 	}
1842 	return (1);
1843 }
1844 
1845 /*
1846  * The system call must not come from a writeable entry, W^X is violated.
1847  * (Would be nice if we can spot aliasing, which is also kind of bad)
1848  *
1849  * The system call must come from an syscall-labeled entry (which are
1850  * the text regions of the main program, sigtramp, ld.so, or libc).
1851  */
1852 int
1853 uvm_map_inentry_pc(vm_map_entry_t entry)
1854 {
1855 	if (entry->protection & PROT_WRITE)
1856 		return (0);	/* not permitted */
1857 	if ((entry->etype & UVM_ET_SYSCALL) == 0)
1858 		return (0);	/* not permitted */
1859 	return (1);
1860 }
1861 
1862 int
1863 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie)
1864 {
1865 	return (serial != ie->ie_serial || ie->ie_start == 0 ||
1866 	    addr < ie->ie_start || addr >= ie->ie_end);
1867 }
1868 
1869 /*
1870  * Inside a vm_map find the reg address and verify it via function.
1871  * Remember low and high addresses of region if valid and return TRUE,
1872  * else return FALSE.
1873  */
1874 boolean_t
1875 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr,
1876     int (*fn)(vm_map_entry_t), u_long serial)
1877 {
1878 	vm_map_t map = &p->p_vmspace->vm_map;
1879 	vm_map_entry_t entry;
1880 	int ret;
1881 
1882 	if (addr < map->min_offset || addr >= map->max_offset)
1883 		return (FALSE);
1884 
1885 	/* lock map */
1886 	vm_map_lock_read(map);
1887 
1888 	/* lookup */
1889 	if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) {
1890 		vm_map_unlock_read(map);
1891 		return (FALSE);
1892 	}
1893 
1894 	ret = (*fn)(entry);
1895 	if (ret == 0) {
1896 		vm_map_unlock_read(map);
1897 		return (FALSE);
1898 	} else if (ret == 1) {
1899 		ie->ie_start = entry->start;
1900 		ie->ie_end = entry->end;
1901 		ie->ie_serial = serial;
1902 	} else {
1903 		/* do not update, re-check later */
1904 	}
1905 	vm_map_unlock_read(map);
1906 	return (TRUE);
1907 }
1908 
1909 boolean_t
1910 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr,
1911     const char *fmt, int (*fn)(vm_map_entry_t), u_long serial)
1912 {
1913 	union sigval sv;
1914 	boolean_t ok = TRUE;
1915 
1916 	if (uvm_map_inentry_recheck(serial, addr, ie)) {
1917 		ok = uvm_map_inentry_fix(p, ie, addr, fn, serial);
1918 		if (!ok) {
1919 			KERNEL_LOCK();
1920 			printf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid,
1921 			    addr, ie->ie_start, ie->ie_end-1);
1922 			p->p_p->ps_acflag |= AMAP;
1923 			sv.sival_ptr = (void *)PROC_PC(p);
1924 			trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv);
1925 			KERNEL_UNLOCK();
1926 		}
1927 	}
1928 	return (ok);
1929 }
1930 
1931 /*
1932  * Check whether the given address range can be converted to a MAP_STACK
1933  * mapping.
1934  *
1935  * Must be called with map locked.
1936  */
1937 boolean_t
1938 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz)
1939 {
1940 	vaddr_t end = addr + sz;
1941 	struct vm_map_entry *first, *iter, *prev = NULL;
1942 
1943 	if (!uvm_map_lookup_entry(map, addr, &first)) {
1944 		printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n",
1945 		    addr, end, map);
1946 		return FALSE;
1947 	}
1948 
1949 	/*
1950 	 * Check that the address range exists and is contiguous.
1951 	 */
1952 	for (iter = first; iter != NULL && iter->start < end;
1953 	    prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) {
1954 		/*
1955 		 * Make sure that we do not have holes in the range.
1956 		 */
1957 #if 0
1958 		if (prev != NULL) {
1959 			printf("prev->start 0x%lx, prev->end 0x%lx, "
1960 			    "iter->start 0x%lx, iter->end 0x%lx\n",
1961 			    prev->start, prev->end, iter->start, iter->end);
1962 		}
1963 #endif
1964 
1965 		if (prev != NULL && prev->end != iter->start) {
1966 			printf("map stack 0x%lx-0x%lx of map %p failed: "
1967 			    "hole in range\n", addr, end, map);
1968 			return FALSE;
1969 		}
1970 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) {
1971 			printf("map stack 0x%lx-0x%lx of map %p failed: "
1972 			    "hole in range\n", addr, end, map);
1973 			return FALSE;
1974 		}
1975 	}
1976 
1977 	return TRUE;
1978 }
1979 
1980 /*
1981  * Remap the middle-pages of an existing mapping as a stack range.
1982  * If there exists a previous contiguous mapping with the given range
1983  * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the
1984  * mapping is dropped, and a new anon mapping is created and marked as
1985  * a stack.
1986  *
1987  * Must be called with map unlocked.
1988  */
1989 int
1990 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz)
1991 {
1992 	vm_map_t map = &p->p_vmspace->vm_map;
1993 	vaddr_t start, end;
1994 	int error;
1995 	int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
1996 	    PROT_READ | PROT_WRITE | PROT_EXEC,
1997 	    MAP_INHERIT_COPY, MADV_NORMAL,
1998 	    UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP |
1999 	    UVM_FLAG_COPYONW);
2000 
2001 	start = round_page(addr);
2002 	end = trunc_page(addr + sz);
2003 #ifdef MACHINE_STACK_GROWS_UP
2004 	if (end == addr + sz)
2005 		end -= PAGE_SIZE;
2006 #else
2007 	if (start == addr)
2008 		start += PAGE_SIZE;
2009 #endif
2010 
2011 	if (start < map->min_offset || end >= map->max_offset || end < start)
2012 		return EINVAL;
2013 
2014 	error = uvm_mapanon(map, &start, end - start, 0, flags);
2015 	if (error != 0)
2016 		printf("map stack for pid %d failed\n", p->p_p->ps_pid);
2017 
2018 	return error;
2019 }
2020 
2021 /*
2022  * uvm_map_pie: return a random load address for a PIE executable
2023  * properly aligned.
2024  */
2025 #ifndef VM_PIE_MAX_ADDR
2026 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
2027 #endif
2028 
2029 #ifndef VM_PIE_MIN_ADDR
2030 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
2031 #endif
2032 
2033 #ifndef VM_PIE_MIN_ALIGN
2034 #define VM_PIE_MIN_ALIGN PAGE_SIZE
2035 #endif
2036 
2037 vaddr_t
2038 uvm_map_pie(vaddr_t align)
2039 {
2040 	vaddr_t addr, space, min;
2041 
2042 	align = MAX(align, VM_PIE_MIN_ALIGN);
2043 
2044 	/* round up to next alignment */
2045 	min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
2046 
2047 	if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
2048 		return (align);
2049 
2050 	space = (VM_PIE_MAX_ADDR - min) / align;
2051 	space = MIN(space, (u_int32_t)-1);
2052 
2053 	addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
2054 	addr += min;
2055 
2056 	return (addr);
2057 }
2058 
2059 void
2060 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end)
2061 {
2062 	struct uvm_map_deadq dead;
2063 
2064 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0 &&
2065 	    (end & (vaddr_t)PAGE_MASK) == 0);
2066 	TAILQ_INIT(&dead);
2067 	vm_map_lock(map);
2068 	uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE);
2069 	vm_map_unlock(map);
2070 
2071 	if (map->flags & VM_MAP_INTRSAFE)
2072 		uvm_unmap_detach_intrsafe(&dead);
2073 	else
2074 		uvm_unmap_detach(&dead, 0);
2075 }
2076 
2077 /*
2078  * Mark entry as free.
2079  *
2080  * entry will be put on the dead list.
2081  * The free space will be merged into the previous or a new entry,
2082  * unless markfree is false.
2083  */
2084 void
2085 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry,
2086     struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead,
2087     boolean_t markfree)
2088 {
2089 	struct uvm_addr_state	*free;
2090 	struct vm_map_entry	*prev;
2091 	vaddr_t			 addr;	/* Start of freed range. */
2092 	vaddr_t			 end;	/* End of freed range. */
2093 
2094 	prev = *prev_ptr;
2095 	if (prev == entry)
2096 		*prev_ptr = prev = NULL;
2097 
2098 	if (prev == NULL ||
2099 	    VMMAP_FREE_END(prev) != entry->start)
2100 		prev = RBT_PREV(uvm_map_addr, entry);
2101 
2102 	/* Entry is describing only free memory and has nothing to drain into. */
2103 	if (prev == NULL && entry->start == entry->end && markfree) {
2104 		*prev_ptr = entry;
2105 		return;
2106 	}
2107 
2108 	addr = entry->start;
2109 	end = VMMAP_FREE_END(entry);
2110 	free = uvm_map_uaddr_e(map, entry);
2111 	uvm_mapent_free_remove(map, free, entry);
2112 	uvm_mapent_addr_remove(map, entry);
2113 	DEAD_ENTRY_PUSH(dead, entry);
2114 
2115 	if (markfree) {
2116 		if (prev) {
2117 			free = uvm_map_uaddr_e(map, prev);
2118 			uvm_mapent_free_remove(map, free, prev);
2119 		}
2120 		*prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0);
2121 	}
2122 }
2123 
2124 /*
2125  * Unwire and release referenced amap and object from map entry.
2126  */
2127 void
2128 uvm_unmap_kill_entry_withlock(struct vm_map *map, struct vm_map_entry *entry,
2129     int needlock)
2130 {
2131 	/* Unwire removed map entry. */
2132 	if (VM_MAPENT_ISWIRED(entry)) {
2133 		KERNEL_LOCK();
2134 		entry->wired_count = 0;
2135 		uvm_fault_unwire_locked(map, entry->start, entry->end);
2136 		KERNEL_UNLOCK();
2137 	}
2138 
2139 	if (needlock)
2140 		uvm_map_lock_entry(entry);
2141 
2142 	/* Entry-type specific code. */
2143 	if (UVM_ET_ISHOLE(entry)) {
2144 		/* Nothing to be done for holes. */
2145 	} else if (map->flags & VM_MAP_INTRSAFE) {
2146 		KASSERT(vm_map_pmap(map) == pmap_kernel());
2147 
2148 		uvm_km_pgremove_intrsafe(entry->start, entry->end);
2149 	} else if (UVM_ET_ISOBJ(entry) &&
2150 	    UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
2151 		KASSERT(vm_map_pmap(map) == pmap_kernel());
2152 		/*
2153 		 * Note: kernel object mappings are currently used in
2154 		 * two ways:
2155 		 *  [1] "normal" mappings of pages in the kernel object
2156 		 *  [2] uvm_km_valloc'd allocations in which we
2157 		 *      pmap_enter in some non-kernel-object page
2158 		 *      (e.g. vmapbuf).
2159 		 *
2160 		 * for case [1], we need to remove the mapping from
2161 		 * the pmap and then remove the page from the kernel
2162 		 * object (because, once pages in a kernel object are
2163 		 * unmapped they are no longer needed, unlike, say,
2164 		 * a vnode where you might want the data to persist
2165 		 * until flushed out of a queue).
2166 		 *
2167 		 * for case [2], we need to remove the mapping from
2168 		 * the pmap.  there shouldn't be any pages at the
2169 		 * specified offset in the kernel object [but it
2170 		 * doesn't hurt to call uvm_km_pgremove just to be
2171 		 * safe?]
2172 		 *
2173 		 * uvm_km_pgremove currently does the following:
2174 		 *   for pages in the kernel object range:
2175 		 *     - drops the swap slot
2176 		 *     - uvm_pagefree the page
2177 		 *
2178 		 * note there is version of uvm_km_pgremove() that
2179 		 * is used for "intrsafe" objects.
2180 		 */
2181 		/*
2182 		 * remove mappings from pmap and drop the pages
2183 		 * from the object.  offsets are always relative
2184 		 * to vm_map_min(kernel_map).
2185 		 */
2186 		uvm_km_pgremove(entry->object.uvm_obj, entry->start,
2187 		    entry->end);
2188 	} else {
2189 		/* remove mappings the standard way. */
2190 		pmap_remove(map->pmap, entry->start, entry->end);
2191 	}
2192 
2193 	if (needlock)
2194 		uvm_map_unlock_entry(entry);
2195 }
2196 
2197 void
2198 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
2199 {
2200 	uvm_unmap_kill_entry_withlock(map, entry, 0);
2201 }
2202 
2203 /*
2204  * Remove all entries from start to end.
2205  *
2206  * If remove_holes, then remove ET_HOLE entries as well.
2207  * If markfree, entry will be properly marked free, otherwise, no replacement
2208  * entry will be put in the tree (corrupting the tree).
2209  */
2210 void
2211 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
2212     struct uvm_map_deadq *dead, boolean_t remove_holes,
2213     boolean_t markfree)
2214 {
2215 	struct vm_map_entry *prev_hint, *next, *entry;
2216 
2217 	start = MAX(start, map->min_offset);
2218 	end = MIN(end, map->max_offset);
2219 	if (start >= end)
2220 		return;
2221 
2222 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
2223 		splassert(IPL_NONE);
2224 	else
2225 		splassert(IPL_VM);
2226 
2227 	/* Find first affected entry. */
2228 	entry = uvm_map_entrybyaddr(&map->addr, start);
2229 	KDASSERT(entry != NULL && entry->start <= start);
2230 	if (entry->end <= start && markfree)
2231 		entry = RBT_NEXT(uvm_map_addr, entry);
2232 	else
2233 		UVM_MAP_CLIP_START(map, entry, start);
2234 
2235 	/*
2236 	 * Iterate entries until we reach end address.
2237 	 * prev_hint hints where the freed space can be appended to.
2238 	 */
2239 	prev_hint = NULL;
2240 	for (; entry != NULL && entry->start < end; entry = next) {
2241 		KDASSERT(entry->start >= start);
2242 		if (entry->end > end || !markfree)
2243 			UVM_MAP_CLIP_END(map, entry, end);
2244 		KDASSERT(entry->start >= start && entry->end <= end);
2245 		next = RBT_NEXT(uvm_map_addr, entry);
2246 
2247 		/* Don't remove holes unless asked to do so. */
2248 		if (UVM_ET_ISHOLE(entry)) {
2249 			if (!remove_holes) {
2250 				prev_hint = entry;
2251 				continue;
2252 			}
2253 		}
2254 
2255 		/* A stack has been removed.. */
2256 		if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE))
2257 			map->sserial++;
2258 
2259 		/* Kill entry. */
2260 		uvm_unmap_kill_entry_withlock(map, entry, 1);
2261 
2262 		/* Update space usage. */
2263 		if ((map->flags & VM_MAP_ISVMSPACE) &&
2264 		    entry->object.uvm_obj == NULL &&
2265 		    entry->protection != PROT_NONE &&
2266 		    !UVM_ET_ISHOLE(entry)) {
2267 			((struct vmspace *)map)->vm_dused -=
2268 			    uvmspace_dused(map, entry->start, entry->end);
2269 		}
2270 		if (!UVM_ET_ISHOLE(entry))
2271 			map->size -= entry->end - entry->start;
2272 
2273 		/* Actual removal of entry. */
2274 		uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree);
2275 	}
2276 
2277 	pmap_update(vm_map_pmap(map));
2278 
2279 #ifdef VMMAP_DEBUG
2280 	if (markfree) {
2281 		for (entry = uvm_map_entrybyaddr(&map->addr, start);
2282 		    entry != NULL && entry->start < end;
2283 		    entry = RBT_NEXT(uvm_map_addr, entry)) {
2284 			KDASSERT(entry->end <= start ||
2285 			    entry->start == entry->end ||
2286 			    UVM_ET_ISHOLE(entry));
2287 		}
2288 	} else {
2289 		vaddr_t a;
2290 		for (a = start; a < end; a += PAGE_SIZE)
2291 			KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL);
2292 	}
2293 #endif
2294 }
2295 
2296 /*
2297  * Mark all entries from first until end (exclusive) as pageable.
2298  *
2299  * Lock must be exclusive on entry and will not be touched.
2300  */
2301 void
2302 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first,
2303     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr)
2304 {
2305 	struct vm_map_entry *iter;
2306 
2307 	for (iter = first; iter != end;
2308 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2309 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2310 		if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2311 			continue;
2312 
2313 		iter->wired_count = 0;
2314 		uvm_fault_unwire_locked(map, iter->start, iter->end);
2315 	}
2316 }
2317 
2318 /*
2319  * Mark all entries from first until end (exclusive) as wired.
2320  *
2321  * Lockflags determines the lock state on return from this function.
2322  * Lock must be exclusive on entry.
2323  */
2324 int
2325 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first,
2326     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr,
2327     int lockflags)
2328 {
2329 	struct vm_map_entry *iter;
2330 #ifdef DIAGNOSTIC
2331 	unsigned int timestamp_save;
2332 #endif
2333 	int error;
2334 
2335 	/*
2336 	 * Wire pages in two passes:
2337 	 *
2338 	 * 1: holding the write lock, we create any anonymous maps that need
2339 	 *    to be created.  then we clip each map entry to the region to
2340 	 *    be wired and increment its wiring count.
2341 	 *
2342 	 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
2343 	 *    in the pages for any newly wired area (wired_count == 1).
2344 	 *
2345 	 *    downgrading to a read lock for uvm_fault_wire avoids a possible
2346 	 *    deadlock with another thread that may have faulted on one of
2347 	 *    the pages to be wired (it would mark the page busy, blocking
2348 	 *    us, then in turn block on the map lock that we hold).
2349 	 *    because we keep the read lock on the map, the copy-on-write
2350 	 *    status of the entries we modify here cannot change.
2351 	 */
2352 	for (iter = first; iter != end;
2353 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2354 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2355 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2356 		    iter->protection == PROT_NONE)
2357 			continue;
2358 
2359 		/*
2360 		 * Perform actions of vm_map_lookup that need the write lock.
2361 		 * - create an anonymous map for copy-on-write
2362 		 * - anonymous map for zero-fill
2363 		 * Skip submaps.
2364 		 */
2365 		if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) &&
2366 		    UVM_ET_ISNEEDSCOPY(iter) &&
2367 		    ((iter->protection & PROT_WRITE) ||
2368 		    iter->object.uvm_obj == NULL)) {
2369 			amap_copy(map, iter, M_WAITOK,
2370 			    UVM_ET_ISSTACK(iter) ? FALSE : TRUE,
2371 			    iter->start, iter->end);
2372 		}
2373 		iter->wired_count++;
2374 	}
2375 
2376 	/*
2377 	 * Pass 2.
2378 	 */
2379 #ifdef DIAGNOSTIC
2380 	timestamp_save = map->timestamp;
2381 #endif
2382 	vm_map_busy(map);
2383 	vm_map_downgrade(map);
2384 
2385 	error = 0;
2386 	for (iter = first; error == 0 && iter != end;
2387 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2388 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2389 		    iter->protection == PROT_NONE)
2390 			continue;
2391 
2392 		error = uvm_fault_wire(map, iter->start, iter->end,
2393 		    iter->protection);
2394 	}
2395 
2396 	if (error) {
2397 		/*
2398 		 * uvm_fault_wire failure
2399 		 *
2400 		 * Reacquire lock and undo our work.
2401 		 */
2402 		vm_map_upgrade(map);
2403 		vm_map_unbusy(map);
2404 #ifdef DIAGNOSTIC
2405 		if (timestamp_save != map->timestamp)
2406 			panic("uvm_map_pageable_wire: stale map");
2407 #endif
2408 
2409 		/*
2410 		 * first is no longer needed to restart loops.
2411 		 * Use it as iterator to unmap successful mappings.
2412 		 */
2413 		for (; first != iter;
2414 		    first = RBT_NEXT(uvm_map_addr, first)) {
2415 			if (UVM_ET_ISHOLE(first) ||
2416 			    first->start == first->end ||
2417 			    first->protection == PROT_NONE)
2418 				continue;
2419 
2420 			first->wired_count--;
2421 			if (!VM_MAPENT_ISWIRED(first)) {
2422 				uvm_fault_unwire_locked(map,
2423 				    first->start, first->end);
2424 			}
2425 		}
2426 
2427 		/* decrease counter in the rest of the entries */
2428 		for (; iter != end;
2429 		    iter = RBT_NEXT(uvm_map_addr, iter)) {
2430 			if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2431 			    iter->protection == PROT_NONE)
2432 				continue;
2433 
2434 			iter->wired_count--;
2435 		}
2436 
2437 		if ((lockflags & UVM_LK_EXIT) == 0)
2438 			vm_map_unlock(map);
2439 		return error;
2440 	}
2441 
2442 	/* We are currently holding a read lock. */
2443 	if ((lockflags & UVM_LK_EXIT) == 0) {
2444 		vm_map_unbusy(map);
2445 		vm_map_unlock_read(map);
2446 	} else {
2447 		vm_map_upgrade(map);
2448 		vm_map_unbusy(map);
2449 #ifdef DIAGNOSTIC
2450 		if (timestamp_save != map->timestamp)
2451 			panic("uvm_map_pageable_wire: stale map");
2452 #endif
2453 	}
2454 	return 0;
2455 }
2456 
2457 /*
2458  * uvm_map_pageable: set pageability of a range in a map.
2459  *
2460  * Flags:
2461  * UVM_LK_ENTER: map is already locked by caller
2462  * UVM_LK_EXIT:  don't unlock map on exit
2463  *
2464  * The full range must be in use (entries may not have fspace != 0).
2465  * UVM_ET_HOLE counts as unmapped.
2466  */
2467 int
2468 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
2469     boolean_t new_pageable, int lockflags)
2470 {
2471 	struct vm_map_entry *first, *last, *tmp;
2472 	int error;
2473 
2474 	start = trunc_page(start);
2475 	end = round_page(end);
2476 
2477 	if (start > end)
2478 		return EINVAL;
2479 	if (start == end)
2480 		return 0;	/* nothing to do */
2481 	if (start < map->min_offset)
2482 		return EFAULT; /* why? see first XXX below */
2483 	if (end > map->max_offset)
2484 		return EINVAL; /* why? see second XXX below */
2485 
2486 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2487 	if ((lockflags & UVM_LK_ENTER) == 0)
2488 		vm_map_lock(map);
2489 
2490 	/*
2491 	 * Find first entry.
2492 	 *
2493 	 * Initial test on start is different, because of the different
2494 	 * error returned. Rest is tested further down.
2495 	 */
2496 	first = uvm_map_entrybyaddr(&map->addr, start);
2497 	if (first->end <= start || UVM_ET_ISHOLE(first)) {
2498 		/*
2499 		 * XXX if the first address is not mapped, it is EFAULT?
2500 		 */
2501 		error = EFAULT;
2502 		goto out;
2503 	}
2504 
2505 	/* Check that the range has no holes. */
2506 	for (last = first; last != NULL && last->start < end;
2507 	    last = RBT_NEXT(uvm_map_addr, last)) {
2508 		if (UVM_ET_ISHOLE(last) ||
2509 		    (last->end < end && VMMAP_FREE_END(last) != last->end)) {
2510 			/*
2511 			 * XXX unmapped memory in range, why is it EINVAL
2512 			 * instead of EFAULT?
2513 			 */
2514 			error = EINVAL;
2515 			goto out;
2516 		}
2517 	}
2518 
2519 	/*
2520 	 * Last ended at the first entry after the range.
2521 	 * Move back one step.
2522 	 *
2523 	 * Note that last may be NULL.
2524 	 */
2525 	if (last == NULL) {
2526 		last = RBT_MAX(uvm_map_addr, &map->addr);
2527 		if (last->end < end) {
2528 			error = EINVAL;
2529 			goto out;
2530 		}
2531 	} else {
2532 		KASSERT(last != first);
2533 		last = RBT_PREV(uvm_map_addr, last);
2534 	}
2535 
2536 	/* Wire/unwire pages here. */
2537 	if (new_pageable) {
2538 		/*
2539 		 * Mark pageable.
2540 		 * entries that are not wired are untouched.
2541 		 */
2542 		if (VM_MAPENT_ISWIRED(first))
2543 			UVM_MAP_CLIP_START(map, first, start);
2544 		/*
2545 		 * Split last at end.
2546 		 * Make tmp be the first entry after what is to be touched.
2547 		 * If last is not wired, don't touch it.
2548 		 */
2549 		if (VM_MAPENT_ISWIRED(last)) {
2550 			UVM_MAP_CLIP_END(map, last, end);
2551 			tmp = RBT_NEXT(uvm_map_addr, last);
2552 		} else
2553 			tmp = last;
2554 
2555 		uvm_map_pageable_pgon(map, first, tmp, start, end);
2556 		error = 0;
2557 
2558 out:
2559 		if ((lockflags & UVM_LK_EXIT) == 0)
2560 			vm_map_unlock(map);
2561 		return error;
2562 	} else {
2563 		/*
2564 		 * Mark entries wired.
2565 		 * entries are always touched (because recovery needs this).
2566 		 */
2567 		if (!VM_MAPENT_ISWIRED(first))
2568 			UVM_MAP_CLIP_START(map, first, start);
2569 		/*
2570 		 * Split last at end.
2571 		 * Make tmp be the first entry after what is to be touched.
2572 		 * If last is not wired, don't touch it.
2573 		 */
2574 		if (!VM_MAPENT_ISWIRED(last)) {
2575 			UVM_MAP_CLIP_END(map, last, end);
2576 			tmp = RBT_NEXT(uvm_map_addr, last);
2577 		} else
2578 			tmp = last;
2579 
2580 		return uvm_map_pageable_wire(map, first, tmp, start, end,
2581 		    lockflags);
2582 	}
2583 }
2584 
2585 /*
2586  * uvm_map_pageable_all: special case of uvm_map_pageable - affects
2587  * all mapped regions.
2588  *
2589  * Map must not be locked.
2590  * If no flags are specified, all ragions are unwired.
2591  */
2592 int
2593 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
2594 {
2595 	vsize_t size;
2596 	struct vm_map_entry *iter;
2597 
2598 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2599 	vm_map_lock(map);
2600 
2601 	if (flags == 0) {
2602 		uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr),
2603 		    NULL, map->min_offset, map->max_offset);
2604 
2605 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
2606 		vm_map_unlock(map);
2607 		return 0;
2608 	}
2609 
2610 	if (flags & MCL_FUTURE)
2611 		vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
2612 	if (!(flags & MCL_CURRENT)) {
2613 		vm_map_unlock(map);
2614 		return 0;
2615 	}
2616 
2617 	/*
2618 	 * Count number of pages in all non-wired entries.
2619 	 * If the number exceeds the limit, abort.
2620 	 */
2621 	size = 0;
2622 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2623 		if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2624 			continue;
2625 
2626 		size += iter->end - iter->start;
2627 	}
2628 
2629 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
2630 		vm_map_unlock(map);
2631 		return ENOMEM;
2632 	}
2633 
2634 	/* XXX non-pmap_wired_count case must be handled by caller */
2635 #ifdef pmap_wired_count
2636 	if (limit != 0 &&
2637 	    size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) {
2638 		vm_map_unlock(map);
2639 		return ENOMEM;
2640 	}
2641 #endif
2642 
2643 	/*
2644 	 * uvm_map_pageable_wire will release lock
2645 	 */
2646 	return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr),
2647 	    NULL, map->min_offset, map->max_offset, 0);
2648 }
2649 
2650 /*
2651  * Initialize map.
2652  *
2653  * Allocates sufficient entries to describe the free memory in the map.
2654  */
2655 void
2656 uvm_map_setup(struct vm_map *map, pmap_t pmap, vaddr_t min, vaddr_t max,
2657     int flags)
2658 {
2659 	int i;
2660 
2661 	KASSERT((min & (vaddr_t)PAGE_MASK) == 0);
2662 	KASSERT((max & (vaddr_t)PAGE_MASK) == 0 ||
2663 	    (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
2664 
2665 	/*
2666 	 * Update parameters.
2667 	 *
2668 	 * This code handles (vaddr_t)-1 and other page mask ending addresses
2669 	 * properly.
2670 	 * We lose the top page if the full virtual address space is used.
2671 	 */
2672 	if (max & (vaddr_t)PAGE_MASK) {
2673 		max += 1;
2674 		if (max == 0) /* overflow */
2675 			max -= PAGE_SIZE;
2676 	}
2677 
2678 	RBT_INIT(uvm_map_addr, &map->addr);
2679 	map->uaddr_exe = NULL;
2680 	for (i = 0; i < nitems(map->uaddr_any); ++i)
2681 		map->uaddr_any[i] = NULL;
2682 	map->uaddr_brk_stack = NULL;
2683 
2684 	map->pmap = pmap;
2685 	map->size = 0;
2686 	map->ref_count = 0;
2687 	map->min_offset = min;
2688 	map->max_offset = max;
2689 	map->b_start = map->b_end = 0; /* Empty brk() area by default. */
2690 	map->s_start = map->s_end = 0; /* Empty stack area by default. */
2691 	map->flags = flags;
2692 	map->timestamp = 0;
2693 	if (flags & VM_MAP_ISVMSPACE)
2694 		rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK);
2695 	else
2696 		rw_init(&map->lock, "kmmaplk");
2697 	mtx_init(&map->mtx, IPL_VM);
2698 	mtx_init(&map->flags_lock, IPL_VM);
2699 
2700 	/* Configure the allocators. */
2701 	if (flags & VM_MAP_ISVMSPACE)
2702 		uvm_map_setup_md(map);
2703 	else
2704 		map->uaddr_any[3] = &uaddr_kbootstrap;
2705 
2706 	/*
2707 	 * Fill map entries.
2708 	 * We do not need to write-lock the map here because only the current
2709 	 * thread sees it right now. Initialize ref_count to 0 above to avoid
2710 	 * bogus triggering of lock-not-held assertions.
2711 	 */
2712 	uvm_map_setup_entries(map);
2713 	uvm_tree_sanity(map, __FILE__, __LINE__);
2714 	map->ref_count = 1;
2715 }
2716 
2717 /*
2718  * Destroy the map.
2719  *
2720  * This is the inverse operation to uvm_map_setup.
2721  */
2722 void
2723 uvm_map_teardown(struct vm_map *map)
2724 {
2725 	struct uvm_map_deadq	 dead_entries;
2726 	struct vm_map_entry	*entry, *tmp;
2727 #ifdef VMMAP_DEBUG
2728 	size_t			 numq, numt;
2729 #endif
2730 	int			 i;
2731 
2732 	KERNEL_ASSERT_LOCKED();
2733 	KERNEL_UNLOCK();
2734 	KERNEL_ASSERT_UNLOCKED();
2735 
2736 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
2737 
2738 	/* Remove address selectors. */
2739 	uvm_addr_destroy(map->uaddr_exe);
2740 	map->uaddr_exe = NULL;
2741 	for (i = 0; i < nitems(map->uaddr_any); i++) {
2742 		uvm_addr_destroy(map->uaddr_any[i]);
2743 		map->uaddr_any[i] = NULL;
2744 	}
2745 	uvm_addr_destroy(map->uaddr_brk_stack);
2746 	map->uaddr_brk_stack = NULL;
2747 
2748 	/*
2749 	 * Remove entries.
2750 	 *
2751 	 * The following is based on graph breadth-first search.
2752 	 *
2753 	 * In color terms:
2754 	 * - the dead_entries set contains all nodes that are reachable
2755 	 *   (i.e. both the black and the grey nodes)
2756 	 * - any entry not in dead_entries is white
2757 	 * - any entry that appears in dead_entries before entry,
2758 	 *   is black, the rest is grey.
2759 	 * The set [entry, end] is also referred to as the wavefront.
2760 	 *
2761 	 * Since the tree is always a fully connected graph, the breadth-first
2762 	 * search guarantees that each vmmap_entry is visited exactly once.
2763 	 * The vm_map is broken down in linear time.
2764 	 */
2765 	TAILQ_INIT(&dead_entries);
2766 	if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL)
2767 		DEAD_ENTRY_PUSH(&dead_entries, entry);
2768 	while (entry != NULL) {
2769 		sched_pause(yield);
2770 		uvm_unmap_kill_entry(map, entry);
2771 		if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL)
2772 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2773 		if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
2774 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2775 		/* Update wave-front. */
2776 		entry = TAILQ_NEXT(entry, dfree.deadq);
2777 	}
2778 
2779 #ifdef VMMAP_DEBUG
2780 	numt = numq = 0;
2781 	RBT_FOREACH(entry, uvm_map_addr, &map->addr)
2782 		numt++;
2783 	TAILQ_FOREACH(entry, &dead_entries, dfree.deadq)
2784 		numq++;
2785 	KASSERT(numt == numq);
2786 #endif
2787 	uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK);
2788 
2789 	KERNEL_LOCK();
2790 
2791 	pmap_destroy(map->pmap);
2792 	map->pmap = NULL;
2793 }
2794 
2795 /*
2796  * Populate map with free-memory entries.
2797  *
2798  * Map must be initialized and empty.
2799  */
2800 void
2801 uvm_map_setup_entries(struct vm_map *map)
2802 {
2803 	KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
2804 
2805 	uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0);
2806 }
2807 
2808 /*
2809  * Split entry at given address.
2810  *
2811  * orig:  entry that is to be split.
2812  * next:  a newly allocated map entry that is not linked.
2813  * split: address at which the split is done.
2814  */
2815 void
2816 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,
2817     struct vm_map_entry *next, vaddr_t split)
2818 {
2819 	struct uvm_addr_state *free, *free_before;
2820 	vsize_t adj;
2821 
2822 	if ((split & PAGE_MASK) != 0) {
2823 		panic("uvm_map_splitentry: split address 0x%lx "
2824 		    "not on page boundary!", split);
2825 	}
2826 	KDASSERT(map != NULL && orig != NULL && next != NULL);
2827 	uvm_tree_sanity(map, __FILE__, __LINE__);
2828 	KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split);
2829 
2830 #ifdef VMMAP_DEBUG
2831 	KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig);
2832 	KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next);
2833 #endif /* VMMAP_DEBUG */
2834 
2835 	/*
2836 	 * Free space will change, unlink from free space tree.
2837 	 */
2838 	free = uvm_map_uaddr_e(map, orig);
2839 	uvm_mapent_free_remove(map, free, orig);
2840 
2841 	adj = split - orig->start;
2842 
2843 	uvm_mapent_copy(orig, next);
2844 	if (split >= orig->end) {
2845 		next->etype = 0;
2846 		next->offset = 0;
2847 		next->wired_count = 0;
2848 		next->start = next->end = split;
2849 		next->guard = 0;
2850 		next->fspace = VMMAP_FREE_END(orig) - split;
2851 		next->aref.ar_amap = NULL;
2852 		next->aref.ar_pageoff = 0;
2853 		orig->guard = MIN(orig->guard, split - orig->end);
2854 		orig->fspace = split - VMMAP_FREE_START(orig);
2855 	} else {
2856 		orig->fspace = 0;
2857 		orig->guard = 0;
2858 		orig->end = next->start = split;
2859 
2860 		if (next->aref.ar_amap) {
2861 			amap_splitref(&orig->aref, &next->aref, adj);
2862 		}
2863 		if (UVM_ET_ISSUBMAP(orig)) {
2864 			uvm_map_reference(next->object.sub_map);
2865 			next->offset += adj;
2866 		} else if (UVM_ET_ISOBJ(orig)) {
2867 			if (next->object.uvm_obj->pgops &&
2868 			    next->object.uvm_obj->pgops->pgo_reference) {
2869 				KERNEL_LOCK();
2870 				next->object.uvm_obj->pgops->pgo_reference(
2871 				    next->object.uvm_obj);
2872 				KERNEL_UNLOCK();
2873 			}
2874 			next->offset += adj;
2875 		}
2876 	}
2877 
2878 	/*
2879 	 * Link next into address tree.
2880 	 * Link orig and next into free-space tree.
2881 	 *
2882 	 * Don't insert 'next' into the addr tree until orig has been linked,
2883 	 * in case the free-list looks at adjecent entries in the addr tree
2884 	 * for its decisions.
2885 	 */
2886 	if (orig->fspace > 0)
2887 		free_before = free;
2888 	else
2889 		free_before = uvm_map_uaddr_e(map, orig);
2890 	uvm_mapent_free_insert(map, free_before, orig);
2891 	uvm_mapent_addr_insert(map, next);
2892 	uvm_mapent_free_insert(map, free, next);
2893 
2894 	uvm_tree_sanity(map, __FILE__, __LINE__);
2895 }
2896 
2897 
2898 #ifdef VMMAP_DEBUG
2899 
2900 void
2901 uvm_tree_assert(struct vm_map *map, int test, char *test_str,
2902     char *file, int line)
2903 {
2904 	char* map_special;
2905 
2906 	if (test)
2907 		return;
2908 
2909 	if (map == kernel_map)
2910 		map_special = " (kernel_map)";
2911 	else if (map == kmem_map)
2912 		map_special = " (kmem_map)";
2913 	else
2914 		map_special = "";
2915 	panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file,
2916 	    line, test_str);
2917 }
2918 
2919 /*
2920  * Check that map is sane.
2921  */
2922 void
2923 uvm_tree_sanity(struct vm_map *map, char *file, int line)
2924 {
2925 	struct vm_map_entry	*iter;
2926 	vaddr_t			 addr;
2927 	vaddr_t			 min, max, bound; /* Bounds checker. */
2928 	struct uvm_addr_state	*free;
2929 
2930 	addr = vm_map_min(map);
2931 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2932 		/*
2933 		 * Valid start, end.
2934 		 * Catch overflow for end+fspace.
2935 		 */
2936 		UVM_ASSERT(map, iter->end >= iter->start, file, line);
2937 		UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line);
2938 
2939 		/* May not be empty. */
2940 		UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter),
2941 		    file, line);
2942 
2943 		/* Addresses for entry must lie within map boundaries. */
2944 		UVM_ASSERT(map, iter->start >= vm_map_min(map) &&
2945 		    VMMAP_FREE_END(iter) <= vm_map_max(map), file, line);
2946 
2947 		/* Tree may not have gaps. */
2948 		UVM_ASSERT(map, iter->start == addr, file, line);
2949 		addr = VMMAP_FREE_END(iter);
2950 
2951 		/*
2952 		 * Free space may not cross boundaries, unless the same
2953 		 * free list is used on both sides of the border.
2954 		 */
2955 		min = VMMAP_FREE_START(iter);
2956 		max = VMMAP_FREE_END(iter);
2957 
2958 		while (min < max &&
2959 		    (bound = uvm_map_boundary(map, min, max)) != max) {
2960 			UVM_ASSERT(map,
2961 			    uvm_map_uaddr(map, bound - 1) ==
2962 			    uvm_map_uaddr(map, bound),
2963 			    file, line);
2964 			min = bound;
2965 		}
2966 
2967 		free = uvm_map_uaddr_e(map, iter);
2968 		if (free) {
2969 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0,
2970 			    file, line);
2971 		} else {
2972 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0,
2973 			    file, line);
2974 		}
2975 	}
2976 	UVM_ASSERT(map, addr == vm_map_max(map), file, line);
2977 }
2978 
2979 void
2980 uvm_tree_size_chk(struct vm_map *map, char *file, int line)
2981 {
2982 	struct vm_map_entry *iter;
2983 	vsize_t size;
2984 
2985 	size = 0;
2986 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2987 		if (!UVM_ET_ISHOLE(iter))
2988 			size += iter->end - iter->start;
2989 	}
2990 
2991 	if (map->size != size)
2992 		printf("map size = 0x%lx, should be 0x%lx\n", map->size, size);
2993 	UVM_ASSERT(map, map->size == size, file, line);
2994 
2995 	vmspace_validate(map);
2996 }
2997 
2998 /*
2999  * This function validates the statistics on vmspace.
3000  */
3001 void
3002 vmspace_validate(struct vm_map *map)
3003 {
3004 	struct vmspace *vm;
3005 	struct vm_map_entry *iter;
3006 	vaddr_t imin, imax;
3007 	vaddr_t stack_begin, stack_end; /* Position of stack. */
3008 	vsize_t stack, heap; /* Measured sizes. */
3009 
3010 	if (!(map->flags & VM_MAP_ISVMSPACE))
3011 		return;
3012 
3013 	vm = (struct vmspace *)map;
3014 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
3015 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
3016 
3017 	stack = heap = 0;
3018 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
3019 		imin = imax = iter->start;
3020 
3021 		if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL ||
3022 		    iter->prot != PROT_NONE)
3023 			continue;
3024 
3025 		/*
3026 		 * Update stack, heap.
3027 		 * Keep in mind that (theoretically) the entries of
3028 		 * userspace and stack may be joined.
3029 		 */
3030 		while (imin != iter->end) {
3031 			/*
3032 			 * Set imax to the first boundary crossed between
3033 			 * imin and stack addresses.
3034 			 */
3035 			imax = iter->end;
3036 			if (imin < stack_begin && imax > stack_begin)
3037 				imax = stack_begin;
3038 			else if (imin < stack_end && imax > stack_end)
3039 				imax = stack_end;
3040 
3041 			if (imin >= stack_begin && imin < stack_end)
3042 				stack += imax - imin;
3043 			else
3044 				heap += imax - imin;
3045 			imin = imax;
3046 		}
3047 	}
3048 
3049 	heap >>= PAGE_SHIFT;
3050 	if (heap != vm->vm_dused) {
3051 		printf("vmspace stack range: 0x%lx-0x%lx\n",
3052 		    stack_begin, stack_end);
3053 		panic("vmspace_validate: vmspace.vm_dused invalid, "
3054 		    "expected %ld pgs, got %ld pgs in map %p",
3055 		    heap, vm->vm_dused,
3056 		    map);
3057 	}
3058 }
3059 
3060 #endif /* VMMAP_DEBUG */
3061 
3062 /*
3063  * uvm_map_init: init mapping system at boot time.   note that we allocate
3064  * and init the static pool of structs vm_map_entry for the kernel here.
3065  */
3066 void
3067 uvm_map_init(void)
3068 {
3069 	static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
3070 	int lcv;
3071 
3072 	/* now set up static pool of kernel map entries ... */
3073 	mtx_init(&uvm_kmapent_mtx, IPL_VM);
3074 	SLIST_INIT(&uvm.kentry_free);
3075 	for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
3076 		SLIST_INSERT_HEAD(&uvm.kentry_free,
3077 		    &kernel_map_entry[lcv], daddrs.addr_kentry);
3078 	}
3079 
3080 	/* initialize the map-related pools. */
3081 	pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0,
3082 	    IPL_NONE, PR_WAITOK, "vmsppl", NULL);
3083 	pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0,
3084 	    IPL_VM, PR_WAITOK, "vmmpepl", NULL);
3085 	pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0,
3086 	    IPL_VM, 0, "vmmpekpl", NULL);
3087 	pool_sethiwat(&uvm_map_entry_pool, 8192);
3088 
3089 	uvm_addr_init();
3090 }
3091 
3092 #if defined(DDB)
3093 
3094 /*
3095  * DDB hooks
3096  */
3097 
3098 /*
3099  * uvm_map_printit: actually prints the map
3100  */
3101 void
3102 uvm_map_printit(struct vm_map *map, boolean_t full,
3103     int (*pr)(const char *, ...))
3104 {
3105 	struct vmspace			*vm;
3106 	struct vm_map_entry		*entry;
3107 	struct uvm_addr_state		*free;
3108 	int				 in_free, i;
3109 	char				 buf[8];
3110 
3111 	(*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
3112 	(*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n",
3113 	    map->b_start, map->b_end);
3114 	(*pr)("\tstack allocate range: 0x%lx-0x%lx\n",
3115 	    map->s_start, map->s_end);
3116 	(*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n",
3117 	    map->size, map->ref_count, map->timestamp,
3118 	    map->flags);
3119 	(*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
3120 	    pmap_resident_count(map->pmap));
3121 
3122 	/* struct vmspace handling. */
3123 	if (map->flags & VM_MAP_ISVMSPACE) {
3124 		vm = (struct vmspace *)map;
3125 
3126 		(*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n",
3127 		    vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss);
3128 		(*pr)("\tvm_tsize=%u vm_dsize=%u\n",
3129 		    vm->vm_tsize, vm->vm_dsize);
3130 		(*pr)("\tvm_taddr=%p vm_daddr=%p\n",
3131 		    vm->vm_taddr, vm->vm_daddr);
3132 		(*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n",
3133 		    vm->vm_maxsaddr, vm->vm_minsaddr);
3134 	}
3135 
3136 	if (!full)
3137 		goto print_uaddr;
3138 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
3139 		(*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
3140 		    entry, entry->start, entry->end, entry->object.uvm_obj,
3141 		    (long long)entry->offset, entry->aref.ar_amap,
3142 		    entry->aref.ar_pageoff);
3143 		(*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, "
3144 		    "syscall=%c, prot(max)=%d/%d, inh=%d, "
3145 		    "wc=%d, adv=%d\n",
3146 		    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
3147 		    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
3148 		    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
3149 		    (entry->etype & UVM_ET_STACK) ? 'T' : 'F',
3150 		    (entry->etype & UVM_ET_SYSCALL) ? 'T' : 'F',
3151 		    entry->protection, entry->max_protection,
3152 		    entry->inheritance, entry->wired_count, entry->advice);
3153 
3154 		free = uvm_map_uaddr_e(map, entry);
3155 		in_free = (free != NULL);
3156 		(*pr)("\thole=%c, free=%c, guard=0x%lx, "
3157 		    "free=0x%lx-0x%lx\n",
3158 		    (entry->etype & UVM_ET_HOLE) ? 'T' : 'F',
3159 		    in_free ? 'T' : 'F',
3160 		    entry->guard,
3161 		    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
3162 		(*pr)("\tfspace_augment=%lu\n", entry->fspace_augment);
3163 		(*pr)("\tfreemapped=%c, uaddr=%p\n",
3164 		    (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free);
3165 		if (free) {
3166 			(*pr)("\t\t(0x%lx-0x%lx %s)\n",
3167 			    free->uaddr_minaddr, free->uaddr_maxaddr,
3168 			    free->uaddr_functions->uaddr_name);
3169 		}
3170 	}
3171 
3172 print_uaddr:
3173 	uvm_addr_print(map->uaddr_exe, "exe", full, pr);
3174 	for (i = 0; i < nitems(map->uaddr_any); i++) {
3175 		snprintf(&buf[0], sizeof(buf), "any[%d]", i);
3176 		uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr);
3177 	}
3178 	uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr);
3179 }
3180 
3181 /*
3182  * uvm_object_printit: actually prints the object
3183  */
3184 void
3185 uvm_object_printit(struct uvm_object *uobj, boolean_t full,
3186     int (*pr)(const char *, ...))
3187 {
3188 	struct vm_page *pg;
3189 	int cnt = 0;
3190 
3191 	(*pr)("OBJECT %p: pgops=%p, npages=%d, ",
3192 	    uobj, uobj->pgops, uobj->uo_npages);
3193 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
3194 		(*pr)("refs=<SYSTEM>\n");
3195 	else
3196 		(*pr)("refs=%d\n", uobj->uo_refs);
3197 
3198 	if (!full) {
3199 		return;
3200 	}
3201 	(*pr)("  PAGES <pg,offset>:\n  ");
3202 	RBT_FOREACH(pg, uvm_objtree, &uobj->memt) {
3203 		(*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
3204 		if ((cnt % 3) == 2) {
3205 			(*pr)("\n  ");
3206 		}
3207 		cnt++;
3208 	}
3209 	if ((cnt % 3) != 2) {
3210 		(*pr)("\n");
3211 	}
3212 }
3213 
3214 /*
3215  * uvm_page_printit: actually print the page
3216  */
3217 static const char page_flagbits[] =
3218 	"\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
3219 	"\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ"
3220 	"\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5";
3221 
3222 void
3223 uvm_page_printit(struct vm_page *pg, boolean_t full,
3224     int (*pr)(const char *, ...))
3225 {
3226 	struct vm_page *tpg;
3227 	struct uvm_object *uobj;
3228 	struct pglist *pgl;
3229 
3230 	(*pr)("PAGE %p:\n", pg);
3231 	(*pr)("  flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
3232 	    pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
3233 	    (long long)pg->phys_addr);
3234 	(*pr)("  uobject=%p, uanon=%p, offset=0x%llx\n",
3235 	    pg->uobject, pg->uanon, (long long)pg->offset);
3236 #if defined(UVM_PAGE_TRKOWN)
3237 	if (pg->pg_flags & PG_BUSY)
3238 		(*pr)("  owning thread = %d, tag=%s",
3239 		    pg->owner, pg->owner_tag);
3240 	else
3241 		(*pr)("  page not busy, no owner");
3242 #else
3243 	(*pr)("  [page ownership tracking disabled]");
3244 #endif
3245 	(*pr)("\tvm_page_md %p\n", &pg->mdpage);
3246 
3247 	if (!full)
3248 		return;
3249 
3250 	/* cross-verify object/anon */
3251 	if ((pg->pg_flags & PQ_FREE) == 0) {
3252 		if (pg->pg_flags & PQ_ANON) {
3253 			if (pg->uanon == NULL || pg->uanon->an_page != pg)
3254 			    (*pr)("  >>> ANON DOES NOT POINT HERE <<< (%p)\n",
3255 				(pg->uanon) ? pg->uanon->an_page : NULL);
3256 			else
3257 				(*pr)("  anon backpointer is OK\n");
3258 		} else {
3259 			uobj = pg->uobject;
3260 			if (uobj) {
3261 				(*pr)("  checking object list\n");
3262 				RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) {
3263 					if (tpg == pg) {
3264 						break;
3265 					}
3266 				}
3267 				if (tpg)
3268 					(*pr)("  page found on object list\n");
3269 				else
3270 					(*pr)("  >>> PAGE NOT FOUND "
3271 					    "ON OBJECT LIST! <<<\n");
3272 			}
3273 		}
3274 	}
3275 
3276 	/* cross-verify page queue */
3277 	if (pg->pg_flags & PQ_FREE) {
3278 		if (uvm_pmr_isfree(pg))
3279 			(*pr)("  page found in uvm_pmemrange\n");
3280 		else
3281 			(*pr)("  >>> page not found in uvm_pmemrange <<<\n");
3282 		pgl = NULL;
3283 	} else if (pg->pg_flags & PQ_INACTIVE) {
3284 		pgl = (pg->pg_flags & PQ_SWAPBACKED) ?
3285 		    &uvm.page_inactive_swp : &uvm.page_inactive_obj;
3286 	} else if (pg->pg_flags & PQ_ACTIVE) {
3287 		pgl = &uvm.page_active;
3288  	} else {
3289 		pgl = NULL;
3290 	}
3291 
3292 	if (pgl) {
3293 		(*pr)("  checking pageq list\n");
3294 		TAILQ_FOREACH(tpg, pgl, pageq) {
3295 			if (tpg == pg) {
3296 				break;
3297 			}
3298 		}
3299 		if (tpg)
3300 			(*pr)("  page found on pageq list\n");
3301 		else
3302 			(*pr)("  >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
3303 	}
3304 }
3305 #endif
3306 
3307 /*
3308  * uvm_map_protect: change map protection
3309  *
3310  * => set_max means set max_protection.
3311  * => map must be unlocked.
3312  */
3313 int
3314 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
3315     vm_prot_t new_prot, boolean_t set_max)
3316 {
3317 	struct vm_map_entry *first, *iter;
3318 	vm_prot_t old_prot;
3319 	vm_prot_t mask;
3320 	vsize_t dused;
3321 	int error;
3322 
3323 	if (start > end)
3324 		return EINVAL;
3325 	start = MAX(start, map->min_offset);
3326 	end = MIN(end, map->max_offset);
3327 	if (start >= end)
3328 		return 0;
3329 
3330 	dused = 0;
3331 	error = 0;
3332 	vm_map_lock(map);
3333 
3334 	/*
3335 	 * Set up first and last.
3336 	 * - first will contain first entry at or after start.
3337 	 */
3338 	first = uvm_map_entrybyaddr(&map->addr, start);
3339 	KDASSERT(first != NULL);
3340 	if (first->end <= start)
3341 		first = RBT_NEXT(uvm_map_addr, first);
3342 
3343 	/* First, check for protection violations. */
3344 	for (iter = first; iter != NULL && iter->start < end;
3345 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
3346 		/* Treat memory holes as free space. */
3347 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3348 			continue;
3349 
3350 		old_prot = iter->protection;
3351 		if (old_prot == PROT_NONE && new_prot != old_prot) {
3352 			dused += uvmspace_dused(
3353 			    map, MAX(start, iter->start), MIN(end, iter->end));
3354 		}
3355 
3356 		if (UVM_ET_ISSUBMAP(iter)) {
3357 			error = EINVAL;
3358 			goto out;
3359 		}
3360 		if ((new_prot & iter->max_protection) != new_prot) {
3361 			error = EACCES;
3362 			goto out;
3363 		}
3364 		if (map == kernel_map &&
3365 		    (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
3366 			panic("uvm_map_protect: kernel map W^X violation requested");
3367 	}
3368 
3369 	/* Check limits. */
3370 	if (dused > 0 && (map->flags & VM_MAP_ISVMSPACE)) {
3371 		vsize_t limit = lim_cur(RLIMIT_DATA);
3372 		dused = ptoa(dused);
3373 		if (limit < dused ||
3374 		    limit - dused < ptoa(((struct vmspace *)map)->vm_dused)) {
3375 			error = ENOMEM;
3376 			goto out;
3377 		}
3378 	}
3379 
3380 	/* Fix protections.  */
3381 	for (iter = first; iter != NULL && iter->start < end;
3382 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
3383 		/* Treat memory holes as free space. */
3384 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3385 			continue;
3386 
3387 		old_prot = iter->protection;
3388 
3389 		/*
3390 		 * Skip adapting protection iff old and new protection
3391 		 * are equal.
3392 		 */
3393 		if (set_max) {
3394 			if (old_prot == (new_prot & old_prot) &&
3395 			    iter->max_protection == new_prot)
3396 				continue;
3397 		} else {
3398 			if (old_prot == new_prot)
3399 				continue;
3400 		}
3401 
3402 		UVM_MAP_CLIP_START(map, iter, start);
3403 		UVM_MAP_CLIP_END(map, iter, end);
3404 
3405 		if (set_max) {
3406 			iter->max_protection = new_prot;
3407 			iter->protection &= new_prot;
3408 		} else
3409 			iter->protection = new_prot;
3410 
3411 		/*
3412 		 * update physical map if necessary.  worry about copy-on-write
3413 		 * here -- CHECK THIS XXX
3414 		 */
3415 		if (iter->protection != old_prot) {
3416 			mask = UVM_ET_ISCOPYONWRITE(iter) ?
3417 			    ~PROT_WRITE : PROT_MASK;
3418 
3419 			/* XXX should only wserial++ if no split occurs */
3420 			if (iter->protection & PROT_WRITE)
3421 				map->wserial++;
3422 
3423 			if (map->flags & VM_MAP_ISVMSPACE) {
3424 				if (old_prot == PROT_NONE) {
3425 					((struct vmspace *)map)->vm_dused +=
3426 					    uvmspace_dused(map, iter->start,
3427 					        iter->end);
3428 				}
3429 				if (iter->protection == PROT_NONE) {
3430 					((struct vmspace *)map)->vm_dused -=
3431 					    uvmspace_dused(map, iter->start,
3432 					        iter->end);
3433 				}
3434 			}
3435 
3436 			/* update pmap */
3437 			if ((iter->protection & mask) == PROT_NONE &&
3438 			    VM_MAPENT_ISWIRED(iter)) {
3439 				/*
3440 				 * TODO(ariane) this is stupid. wired_count
3441 				 * is 0 if not wired, otherwise anything
3442 				 * larger than 0 (incremented once each time
3443 				 * wire is called).
3444 				 * Mostly to be able to undo the damage on
3445 				 * failure. Not the actually be a wired
3446 				 * refcounter...
3447 				 * Originally: iter->wired_count--;
3448 				 * (don't we have to unwire this in the pmap
3449 				 * as well?)
3450 				 */
3451 				iter->wired_count = 0;
3452 			}
3453 			uvm_map_lock_entry(iter);
3454 			pmap_protect(map->pmap, iter->start, iter->end,
3455 			    iter->protection & mask);
3456 			uvm_map_unlock_entry(iter);
3457 		}
3458 
3459 		/*
3460 		 * If the map is configured to lock any future mappings,
3461 		 * wire this entry now if the old protection was PROT_NONE
3462 		 * and the new protection is not PROT_NONE.
3463 		 */
3464 		if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
3465 		    VM_MAPENT_ISWIRED(iter) == 0 &&
3466 		    old_prot == PROT_NONE &&
3467 		    new_prot != PROT_NONE) {
3468 			if (uvm_map_pageable(map, iter->start, iter->end,
3469 			    FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) {
3470 				/*
3471 				 * If locking the entry fails, remember the
3472 				 * error if it's the first one.  Note we
3473 				 * still continue setting the protection in
3474 				 * the map, but it will return the resource
3475 				 * storage condition regardless.
3476 				 *
3477 				 * XXX Ignore what the actual error is,
3478 				 * XXX just call it a resource shortage
3479 				 * XXX so that it doesn't get confused
3480 				 * XXX what uvm_map_protect() itself would
3481 				 * XXX normally return.
3482 				 */
3483 				error = ENOMEM;
3484 			}
3485 		}
3486 	}
3487 	pmap_update(map->pmap);
3488 
3489 out:
3490 	vm_map_unlock(map);
3491 	return error;
3492 }
3493 
3494 /*
3495  * uvmspace_alloc: allocate a vmspace structure.
3496  *
3497  * - structure includes vm_map and pmap
3498  * - XXX: no locking on this structure
3499  * - refcnt set to 1, rest must be init'd by caller
3500  */
3501 struct vmspace *
3502 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
3503     boolean_t remove_holes)
3504 {
3505 	struct vmspace *vm;
3506 
3507 	vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
3508 	uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
3509 	return (vm);
3510 }
3511 
3512 /*
3513  * uvmspace_init: initialize a vmspace structure.
3514  *
3515  * - XXX: no locking on this structure
3516  * - refcnt set to 1, rest must be init'd by caller
3517  */
3518 void
3519 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
3520     boolean_t pageable, boolean_t remove_holes)
3521 {
3522 	KASSERT(pmap == NULL || pmap == pmap_kernel());
3523 
3524 	if (pmap)
3525 		pmap_reference(pmap);
3526 	else
3527 		pmap = pmap_create();
3528 
3529 	uvm_map_setup(&vm->vm_map, pmap, min, max,
3530 	    (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE);
3531 
3532 	vm->vm_refcnt = 1;
3533 
3534 	if (remove_holes)
3535 		pmap_remove_holes(vm);
3536 }
3537 
3538 /*
3539  * uvmspace_share: share a vmspace between two processes
3540  *
3541  * - used for vfork
3542  */
3543 
3544 struct vmspace *
3545 uvmspace_share(struct process *pr)
3546 {
3547 	struct vmspace *vm = pr->ps_vmspace;
3548 
3549 	uvmspace_addref(vm);
3550 	return vm;
3551 }
3552 
3553 /*
3554  * uvmspace_exec: the process wants to exec a new program
3555  *
3556  * - XXX: no locking on vmspace
3557  */
3558 
3559 void
3560 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
3561 {
3562 	struct process *pr = p->p_p;
3563 	struct vmspace *nvm, *ovm = pr->ps_vmspace;
3564 	struct vm_map *map = &ovm->vm_map;
3565 	struct uvm_map_deadq dead_entries;
3566 
3567 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0);
3568 	KASSERT((end & (vaddr_t)PAGE_MASK) == 0 ||
3569 	    (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
3570 
3571 	pmap_unuse_final(p);   /* before stack addresses go away */
3572 	TAILQ_INIT(&dead_entries);
3573 
3574 	/* see if more than one process is using this vmspace...  */
3575 	if (ovm->vm_refcnt == 1) {
3576 		/*
3577 		 * If pr is the only process using its vmspace then
3578 		 * we can safely recycle that vmspace for the program
3579 		 * that is being exec'd.
3580 		 */
3581 
3582 #ifdef SYSVSHM
3583 		/*
3584 		 * SYSV SHM semantics require us to kill all segments on an exec
3585 		 */
3586 		if (ovm->vm_shm)
3587 			shmexit(ovm);
3588 #endif
3589 
3590 		/*
3591 		 * POSIX 1003.1b -- "lock future mappings" is revoked
3592 		 * when a process execs another program image.
3593 		 */
3594 		vm_map_lock(map);
3595 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE|VM_MAP_SYSCALL_ONCE);
3596 
3597 		/*
3598 		 * now unmap the old program
3599 		 *
3600 		 * Instead of attempting to keep the map valid, we simply
3601 		 * nuke all entries and ask uvm_map_setup to reinitialize
3602 		 * the map to the new boundaries.
3603 		 *
3604 		 * uvm_unmap_remove will actually nuke all entries for us
3605 		 * (as in, not replace them with free-memory entries).
3606 		 */
3607 		uvm_unmap_remove(map, map->min_offset, map->max_offset,
3608 		    &dead_entries, TRUE, FALSE);
3609 
3610 		KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
3611 
3612 		/* Nuke statistics and boundaries. */
3613 		memset(&ovm->vm_startcopy, 0,
3614 		    (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy);
3615 
3616 
3617 		if (end & (vaddr_t)PAGE_MASK) {
3618 			end += 1;
3619 			if (end == 0) /* overflow */
3620 				end -= PAGE_SIZE;
3621 		}
3622 
3623 		/* Setup new boundaries and populate map with entries. */
3624 		map->min_offset = start;
3625 		map->max_offset = end;
3626 		uvm_map_setup_entries(map);
3627 		vm_map_unlock(map);
3628 
3629 		/* but keep MMU holes unavailable */
3630 		pmap_remove_holes(ovm);
3631 	} else {
3632 		/*
3633 		 * pr's vmspace is being shared, so we can't reuse
3634 		 * it for pr since it is still being used for others.
3635 		 * allocate a new vmspace for pr
3636 		 */
3637 		nvm = uvmspace_alloc(start, end,
3638 		    (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
3639 
3640 		/* install new vmspace and drop our ref to the old one. */
3641 		pmap_deactivate(p);
3642 		p->p_vmspace = pr->ps_vmspace = nvm;
3643 		pmap_activate(p);
3644 
3645 		uvmspace_free(ovm);
3646 	}
3647 
3648 	/* Release dead entries */
3649 	uvm_unmap_detach(&dead_entries, 0);
3650 }
3651 
3652 /*
3653  * uvmspace_addref: add a reference to a vmspace.
3654  */
3655 void
3656 uvmspace_addref(struct vmspace *vm)
3657 {
3658 	KERNEL_ASSERT_LOCKED();
3659 	KASSERT(vm->vm_refcnt > 0);
3660 
3661 	vm->vm_refcnt++;
3662 }
3663 
3664 /*
3665  * uvmspace_free: free a vmspace data structure
3666  */
3667 void
3668 uvmspace_free(struct vmspace *vm)
3669 {
3670 	KERNEL_ASSERT_LOCKED();
3671 
3672 	if (--vm->vm_refcnt == 0) {
3673 		/*
3674 		 * lock the map, to wait out all other references to it.  delete
3675 		 * all of the mappings and pages they hold, then call the pmap
3676 		 * module to reclaim anything left.
3677 		 */
3678 #ifdef SYSVSHM
3679 		/* Get rid of any SYSV shared memory segments. */
3680 		if (vm->vm_shm != NULL)
3681 			shmexit(vm);
3682 #endif
3683 
3684 		uvm_map_teardown(&vm->vm_map);
3685 		pool_put(&uvm_vmspace_pool, vm);
3686 	}
3687 }
3688 
3689 /*
3690  * uvm_share: Map the address range [srcaddr, srcaddr + sz) in
3691  * srcmap to the address range [dstaddr, dstaddr + sz) in
3692  * dstmap.
3693  *
3694  * The whole address range in srcmap must be backed by an object
3695  * (no holes).
3696  *
3697  * If successful, the address ranges share memory and the destination
3698  * address range uses the protection flags in prot.
3699  *
3700  * This routine assumes that sz is a multiple of PAGE_SIZE and
3701  * that dstaddr and srcaddr are page-aligned.
3702  */
3703 int
3704 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot,
3705     struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz)
3706 {
3707 	int ret = 0;
3708 	vaddr_t unmap_end;
3709 	vaddr_t dstva;
3710 	vsize_t s_off, len, n = sz, remain;
3711 	struct vm_map_entry *first = NULL, *last = NULL;
3712 	struct vm_map_entry *src_entry, *psrc_entry = NULL;
3713 	struct uvm_map_deadq dead;
3714 
3715 	if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr)
3716 		return EINVAL;
3717 
3718 	TAILQ_INIT(&dead);
3719 	vm_map_lock(dstmap);
3720 	vm_map_lock_read(srcmap);
3721 
3722 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) {
3723 		ret = ENOMEM;
3724 		goto exit_unlock;
3725 	}
3726 	if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) {
3727 		ret = EINVAL;
3728 		goto exit_unlock;
3729 	}
3730 
3731 	dstva = dstaddr;
3732 	unmap_end = dstaddr;
3733 	for (; src_entry != NULL;
3734 	    psrc_entry = src_entry,
3735 	    src_entry = RBT_NEXT(uvm_map_addr, src_entry)) {
3736 		/* hole in address space, bail out */
3737 		if (psrc_entry != NULL && psrc_entry->end != src_entry->start)
3738 			break;
3739 		if (src_entry->start >= srcaddr + sz)
3740 			break;
3741 
3742 		if (UVM_ET_ISSUBMAP(src_entry))
3743 			panic("uvm_share: encountered a submap (illegal)");
3744 		if (!UVM_ET_ISCOPYONWRITE(src_entry) &&
3745 		    UVM_ET_ISNEEDSCOPY(src_entry))
3746 			panic("uvm_share: non-copy_on_write map entries "
3747 			    "marked needs_copy (illegal)");
3748 
3749 		/*
3750 		 * srcaddr > map entry start? means we are in the middle of a
3751 		 * map, so we calculate the offset to use in the source map.
3752 		 */
3753 		if (srcaddr > src_entry->start)
3754 			s_off = srcaddr - src_entry->start;
3755 		else if (srcaddr == src_entry->start)
3756 			s_off = 0;
3757 		else
3758 			panic("uvm_share: map entry start > srcaddr");
3759 
3760 		remain = src_entry->end - src_entry->start - s_off;
3761 
3762 		/* Determine how many bytes to share in this pass */
3763 		if (n < remain)
3764 			len = n;
3765 		else
3766 			len = remain;
3767 
3768 		if (uvm_mapent_share(dstmap, dstva, len, s_off, prot, prot,
3769 		    srcmap, src_entry, &dead) == NULL)
3770 			break;
3771 
3772 		n -= len;
3773 		dstva += len;
3774 		srcaddr += len;
3775 		unmap_end = dstva + len;
3776 		if (n == 0)
3777 			goto exit_unlock;
3778 	}
3779 
3780 	ret = EINVAL;
3781 	uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE);
3782 
3783 exit_unlock:
3784 	vm_map_unlock_read(srcmap);
3785 	vm_map_unlock(dstmap);
3786 	uvm_unmap_detach(&dead, 0);
3787 
3788 	return ret;
3789 }
3790 
3791 /*
3792  * Clone map entry into other map.
3793  *
3794  * Mapping will be placed at dstaddr, for the same length.
3795  * Space must be available.
3796  * Reference counters are incremented.
3797  */
3798 struct vm_map_entry *
3799 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3800     vsize_t off, vm_prot_t prot, vm_prot_t maxprot,
3801     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
3802     int mapent_flags, int amap_share_flags)
3803 {
3804 	struct vm_map_entry *new_entry, *first, *last;
3805 
3806 	KDASSERT(!UVM_ET_ISSUBMAP(old_entry));
3807 
3808 	/* Create new entry (linked in on creation). Fill in first, last. */
3809 	first = last = NULL;
3810 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) {
3811 		panic("uvm_mapent_clone: no space in map for "
3812 		    "entry in empty map");
3813 	}
3814 	new_entry = uvm_map_mkentry(dstmap, first, last,
3815 	    dstaddr, dstlen, mapent_flags, dead, NULL);
3816 	if (new_entry == NULL)
3817 		return NULL;
3818 	/* old_entry -> new_entry */
3819 	new_entry->object = old_entry->object;
3820 	new_entry->offset = old_entry->offset;
3821 	new_entry->aref = old_entry->aref;
3822 	new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
3823 	new_entry->protection = prot;
3824 	new_entry->max_protection = maxprot;
3825 	new_entry->inheritance = old_entry->inheritance;
3826 	new_entry->advice = old_entry->advice;
3827 
3828 	/* gain reference to object backing the map (can't be a submap). */
3829 	if (new_entry->aref.ar_amap) {
3830 		new_entry->aref.ar_pageoff += off >> PAGE_SHIFT;
3831 		amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3832 		    (new_entry->end - new_entry->start) >> PAGE_SHIFT,
3833 		    amap_share_flags);
3834 	}
3835 
3836 	if (UVM_ET_ISOBJ(new_entry) &&
3837 	    new_entry->object.uvm_obj->pgops->pgo_reference) {
3838 		new_entry->offset += off;
3839 		new_entry->object.uvm_obj->pgops->pgo_reference
3840 		    (new_entry->object.uvm_obj);
3841 	}
3842 
3843 	return new_entry;
3844 }
3845 
3846 struct vm_map_entry *
3847 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3848     vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map,
3849     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3850 {
3851 	/*
3852 	 * If old_entry refers to a copy-on-write region that has not yet been
3853 	 * written to (needs_copy flag is set), then we need to allocate a new
3854 	 * amap for old_entry.
3855 	 *
3856 	 * If we do not do this, and the process owning old_entry does a copy-on
3857 	 * write later, old_entry and new_entry will refer to different memory
3858 	 * regions, and the memory between the processes is no longer shared.
3859 	 *
3860 	 * [in other words, we need to clear needs_copy]
3861 	 */
3862 
3863 	if (UVM_ET_ISNEEDSCOPY(old_entry)) {
3864 		/* get our own amap, clears needs_copy */
3865 		amap_copy(old_map, old_entry, M_WAITOK, FALSE, 0, 0);
3866 		/* XXXCDC: WAITOK??? */
3867 	}
3868 
3869 	return uvm_mapent_clone(dstmap, dstaddr, dstlen, off,
3870 	    prot, maxprot, old_entry, dead, 0, AMAP_SHARED);
3871 }
3872 
3873 /*
3874  * share the mapping: this means we want the old and
3875  * new entries to share amaps and backing objects.
3876  */
3877 struct vm_map_entry *
3878 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
3879     struct vm_map *old_map,
3880     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3881 {
3882 	struct vm_map_entry *new_entry;
3883 
3884 	new_entry = uvm_mapent_share(new_map, old_entry->start,
3885 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3886 	    old_entry->max_protection, old_map, old_entry, dead);
3887 
3888 	/*
3889 	 * pmap_copy the mappings: this routine is optional
3890 	 * but if it is there it will reduce the number of
3891 	 * page faults in the new proc.
3892 	 */
3893 	if (!UVM_ET_ISHOLE(new_entry))
3894 		pmap_copy(new_map->pmap, old_map->pmap, new_entry->start,
3895 		    (new_entry->end - new_entry->start), new_entry->start);
3896 
3897 	return (new_entry);
3898 }
3899 
3900 /*
3901  * copy-on-write the mapping (using mmap's
3902  * MAP_PRIVATE semantics)
3903  *
3904  * allocate new_entry, adjust reference counts.
3905  * (note that new references are read-only).
3906  */
3907 struct vm_map_entry *
3908 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map,
3909     struct vm_map *old_map,
3910     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3911 {
3912 	struct vm_map_entry	*new_entry;
3913 	boolean_t		 protect_child;
3914 
3915 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3916 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3917 	    old_entry->max_protection, old_entry, dead, 0, 0);
3918 
3919 	new_entry->etype |=
3920 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3921 
3922 	/*
3923 	 * the new entry will need an amap.  it will either
3924 	 * need to be copied from the old entry or created
3925 	 * from scratch (if the old entry does not have an
3926 	 * amap).  can we defer this process until later
3927 	 * (by setting "needs_copy") or do we need to copy
3928 	 * the amap now?
3929 	 *
3930 	 * we must copy the amap now if any of the following
3931 	 * conditions hold:
3932 	 * 1. the old entry has an amap and that amap is
3933 	 *    being shared.  this means that the old (parent)
3934 	 *    process is sharing the amap with another
3935 	 *    process.  if we do not clear needs_copy here
3936 	 *    we will end up in a situation where both the
3937 	 *    parent and child process are referring to the
3938 	 *    same amap with "needs_copy" set.  if the
3939 	 *    parent write-faults, the fault routine will
3940 	 *    clear "needs_copy" in the parent by allocating
3941 	 *    a new amap.   this is wrong because the
3942 	 *    parent is supposed to be sharing the old amap
3943 	 *    and the new amap will break that.
3944 	 *
3945 	 * 2. if the old entry has an amap and a non-zero
3946 	 *    wire count then we are going to have to call
3947 	 *    amap_cow_now to avoid page faults in the
3948 	 *    parent process.   since amap_cow_now requires
3949 	 *    "needs_copy" to be clear we might as well
3950 	 *    clear it here as well.
3951 	 *
3952 	 */
3953 	if (old_entry->aref.ar_amap != NULL &&
3954 	    ((amap_flags(old_entry->aref.ar_amap) &
3955 	    AMAP_SHARED) != 0 ||
3956 	    VM_MAPENT_ISWIRED(old_entry))) {
3957 		amap_copy(new_map, new_entry, M_WAITOK, FALSE,
3958 		    0, 0);
3959 		/* XXXCDC: M_WAITOK ... ok? */
3960 	}
3961 
3962 	/*
3963 	 * if the parent's entry is wired down, then the
3964 	 * parent process does not want page faults on
3965 	 * access to that memory.  this means that we
3966 	 * cannot do copy-on-write because we can't write
3967 	 * protect the old entry.   in this case we
3968 	 * resolve all copy-on-write faults now, using
3969 	 * amap_cow_now.   note that we have already
3970 	 * allocated any needed amap (above).
3971 	 */
3972 	if (VM_MAPENT_ISWIRED(old_entry)) {
3973 		/*
3974 		 * resolve all copy-on-write faults now
3975 		 * (note that there is nothing to do if
3976 		 * the old mapping does not have an amap).
3977 		 * XXX: is it worthwhile to bother with
3978 		 * pmap_copy in this case?
3979 		 */
3980 		if (old_entry->aref.ar_amap)
3981 			amap_cow_now(new_map, new_entry);
3982 	} else {
3983 		if (old_entry->aref.ar_amap) {
3984 			/*
3985 			 * setup mappings to trigger copy-on-write faults
3986 			 * we must write-protect the parent if it has
3987 			 * an amap and it is not already "needs_copy"...
3988 			 * if it is already "needs_copy" then the parent
3989 			 * has already been write-protected by a previous
3990 			 * fork operation.
3991 			 *
3992 			 * if we do not write-protect the parent, then
3993 			 * we must be sure to write-protect the child
3994 			 * after the pmap_copy() operation.
3995 			 *
3996 			 * XXX: pmap_copy should have some way of telling
3997 			 * us that it didn't do anything so we can avoid
3998 			 * calling pmap_protect needlessly.
3999 			 */
4000 			if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
4001 				if (old_entry->max_protection & PROT_WRITE) {
4002 					uvm_map_lock_entry(old_entry);
4003 					pmap_protect(old_map->pmap,
4004 					    old_entry->start,
4005 					    old_entry->end,
4006 					    old_entry->protection &
4007 					    ~PROT_WRITE);
4008 					uvm_map_unlock_entry(old_entry);
4009 					pmap_update(old_map->pmap);
4010 				}
4011 				old_entry->etype |= UVM_ET_NEEDSCOPY;
4012 			}
4013 
4014 	  		/* parent must now be write-protected */
4015 	  		protect_child = FALSE;
4016 		} else {
4017 			/*
4018 			 * we only need to protect the child if the
4019 			 * parent has write access.
4020 			 */
4021 			if (old_entry->max_protection & PROT_WRITE)
4022 				protect_child = TRUE;
4023 			else
4024 				protect_child = FALSE;
4025 		}
4026 		/*
4027 		 * copy the mappings
4028 		 * XXX: need a way to tell if this does anything
4029 		 */
4030 		if (!UVM_ET_ISHOLE(new_entry))
4031 			pmap_copy(new_map->pmap, old_map->pmap,
4032 			    new_entry->start,
4033 			    (old_entry->end - old_entry->start),
4034 			    old_entry->start);
4035 
4036 		/* protect the child's mappings if necessary */
4037 		if (protect_child) {
4038 			pmap_protect(new_map->pmap, new_entry->start,
4039 			    new_entry->end,
4040 			    new_entry->protection &
4041 			    ~PROT_WRITE);
4042 		}
4043 	}
4044 
4045 	return (new_entry);
4046 }
4047 
4048 /*
4049  * zero the mapping: the new entry will be zero initialized
4050  */
4051 struct vm_map_entry *
4052 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map,
4053     struct vm_map *old_map,
4054     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
4055 {
4056 	struct vm_map_entry *new_entry;
4057 
4058 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
4059 	    old_entry->end - old_entry->start, 0, old_entry->protection,
4060 	    old_entry->max_protection, old_entry, dead, 0, 0);
4061 
4062 	new_entry->etype |=
4063 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
4064 
4065 	if (new_entry->aref.ar_amap) {
4066 		amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
4067 		    atop(new_entry->end - new_entry->start), 0);
4068 		new_entry->aref.ar_amap = NULL;
4069 		new_entry->aref.ar_pageoff = 0;
4070 	}
4071 
4072 	if (UVM_ET_ISOBJ(new_entry)) {
4073 		if (new_entry->object.uvm_obj->pgops->pgo_detach)
4074 			new_entry->object.uvm_obj->pgops->pgo_detach(
4075 			    new_entry->object.uvm_obj);
4076 		new_entry->object.uvm_obj = NULL;
4077 		new_entry->etype &= ~UVM_ET_OBJ;
4078 	}
4079 
4080 	return (new_entry);
4081 }
4082 
4083 /*
4084  * uvmspace_fork: fork a process' main map
4085  *
4086  * => create a new vmspace for child process from parent.
4087  * => parent's map must not be locked.
4088  */
4089 struct vmspace *
4090 uvmspace_fork(struct process *pr)
4091 {
4092 	struct vmspace *vm1 = pr->ps_vmspace;
4093 	struct vmspace *vm2;
4094 	struct vm_map *old_map = &vm1->vm_map;
4095 	struct vm_map *new_map;
4096 	struct vm_map_entry *old_entry, *new_entry;
4097 	struct uvm_map_deadq dead;
4098 
4099 	vm_map_lock(old_map);
4100 
4101 	vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
4102 	    (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
4103 	memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
4104 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
4105 	vm2->vm_dused = 0; /* Statistic managed by us. */
4106 	new_map = &vm2->vm_map;
4107 	vm_map_lock(new_map);
4108 
4109 	/* go entry-by-entry */
4110 	TAILQ_INIT(&dead);
4111 	RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) {
4112 		if (old_entry->start == old_entry->end)
4113 			continue;
4114 
4115 		/* first, some sanity checks on the old entry */
4116 		if (UVM_ET_ISSUBMAP(old_entry)) {
4117 			panic("fork: encountered a submap during fork "
4118 			    "(illegal)");
4119 		}
4120 
4121 		if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
4122 		    UVM_ET_ISNEEDSCOPY(old_entry)) {
4123 			panic("fork: non-copy_on_write map entry marked "
4124 			    "needs_copy (illegal)");
4125 		}
4126 
4127 		/* Apply inheritance. */
4128 		switch (old_entry->inheritance) {
4129 		case MAP_INHERIT_SHARE:
4130 			new_entry = uvm_mapent_forkshared(vm2, new_map,
4131 			    old_map, old_entry, &dead);
4132 			break;
4133 		case MAP_INHERIT_COPY:
4134 			new_entry = uvm_mapent_forkcopy(vm2, new_map,
4135 			    old_map, old_entry, &dead);
4136 			break;
4137 		case MAP_INHERIT_ZERO:
4138 			new_entry = uvm_mapent_forkzero(vm2, new_map,
4139 			    old_map, old_entry, &dead);
4140 			break;
4141 		default:
4142 			continue;
4143 		}
4144 
4145 	 	/* Update process statistics. */
4146 		if (!UVM_ET_ISHOLE(new_entry))
4147 			new_map->size += new_entry->end - new_entry->start;
4148 		if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry) &&
4149 		    new_entry->protection != PROT_NONE) {
4150 			vm2->vm_dused += uvmspace_dused(
4151 			    new_map, new_entry->start, new_entry->end);
4152 		}
4153 	}
4154 
4155 	vm_map_unlock(old_map);
4156 	vm_map_unlock(new_map);
4157 
4158 	/*
4159 	 * This can actually happen, if multiple entries described a
4160 	 * space in which an entry was inherited.
4161 	 */
4162 	uvm_unmap_detach(&dead, 0);
4163 
4164 #ifdef SYSVSHM
4165 	if (vm1->vm_shm)
4166 		shmfork(vm1, vm2);
4167 #endif
4168 
4169 	return vm2;
4170 }
4171 
4172 /*
4173  * uvm_map_hint: return the beginning of the best area suitable for
4174  * creating a new mapping with "prot" protection.
4175  */
4176 vaddr_t
4177 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr,
4178     vaddr_t maxaddr)
4179 {
4180 	vaddr_t addr;
4181 	vaddr_t spacing;
4182 
4183 #ifdef __i386__
4184 	/*
4185 	 * If executable skip first two pages, otherwise start
4186 	 * after data + heap region.
4187 	 */
4188 	if ((prot & PROT_EXEC) != 0 &&
4189 	    (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) {
4190 		addr = (PAGE_SIZE*2) +
4191 		    (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
4192 		return (round_page(addr));
4193 	}
4194 #endif
4195 
4196 #if defined (__LP64__)
4197 	spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4198 #else
4199 	spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4200 #endif
4201 
4202 	/*
4203 	 * Start malloc/mmap after the brk.
4204 	 */
4205 	addr = (vaddr_t)vm->vm_daddr + BRKSIZ;
4206 	addr = MAX(addr, minaddr);
4207 
4208 	if (addr < maxaddr) {
4209 		while (spacing > maxaddr - addr)
4210 			spacing >>= 1;
4211 	}
4212 	addr += arc4random() & spacing;
4213 	return (round_page(addr));
4214 }
4215 
4216 /*
4217  * uvm_map_submap: punch down part of a map into a submap
4218  *
4219  * => only the kernel_map is allowed to be submapped
4220  * => the purpose of submapping is to break up the locking granularity
4221  *	of a larger map
4222  * => the range specified must have been mapped previously with a uvm_map()
4223  *	call [with uobj==NULL] to create a blank map entry in the main map.
4224  *	[And it had better still be blank!]
4225  * => maps which contain submaps should never be copied or forked.
4226  * => to remove a submap, use uvm_unmap() on the main map
4227  *	and then uvm_map_deallocate() the submap.
4228  * => main map must be unlocked.
4229  * => submap must have been init'd and have a zero reference count.
4230  *	[need not be locked as we don't actually reference it]
4231  */
4232 int
4233 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
4234     struct vm_map *submap)
4235 {
4236 	struct vm_map_entry *entry;
4237 	int result;
4238 
4239 	if (start > map->max_offset || end > map->max_offset ||
4240 	    start < map->min_offset || end < map->min_offset)
4241 		return EINVAL;
4242 
4243 	vm_map_lock(map);
4244 
4245 	if (uvm_map_lookup_entry(map, start, &entry)) {
4246 		UVM_MAP_CLIP_START(map, entry, start);
4247 		UVM_MAP_CLIP_END(map, entry, end);
4248 	} else
4249 		entry = NULL;
4250 
4251 	if (entry != NULL &&
4252 	    entry->start == start && entry->end == end &&
4253 	    entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
4254 	    !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
4255 		entry->etype |= UVM_ET_SUBMAP;
4256 		entry->object.sub_map = submap;
4257 		entry->offset = 0;
4258 		uvm_map_reference(submap);
4259 		result = 0;
4260 	} else
4261 		result = EINVAL;
4262 
4263 	vm_map_unlock(map);
4264 	return result;
4265 }
4266 
4267 /*
4268  * uvm_map_checkprot: check protection in map
4269  *
4270  * => must allow specific protection in a fully allocated region.
4271  * => map mut be read or write locked by caller.
4272  */
4273 boolean_t
4274 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
4275     vm_prot_t protection)
4276 {
4277 	struct vm_map_entry *entry;
4278 
4279 	if (start < map->min_offset || end > map->max_offset || start > end)
4280 		return FALSE;
4281 	if (start == end)
4282 		return TRUE;
4283 
4284 	/*
4285 	 * Iterate entries.
4286 	 */
4287 	for (entry = uvm_map_entrybyaddr(&map->addr, start);
4288 	    entry != NULL && entry->start < end;
4289 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4290 		/* Fail if a hole is found. */
4291 		if (UVM_ET_ISHOLE(entry) ||
4292 		    (entry->end < end && entry->end != VMMAP_FREE_END(entry)))
4293 			return FALSE;
4294 
4295 		/* Check protection. */
4296 		if ((entry->protection & protection) != protection)
4297 			return FALSE;
4298 	}
4299 	return TRUE;
4300 }
4301 
4302 /*
4303  * uvm_map_create: create map
4304  */
4305 vm_map_t
4306 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
4307 {
4308 	vm_map_t map;
4309 
4310 	map = malloc(sizeof *map, M_VMMAP, M_WAITOK);
4311 	uvm_map_setup(map, pmap, min, max, flags);
4312 	return (map);
4313 }
4314 
4315 /*
4316  * uvm_map_deallocate: drop reference to a map
4317  *
4318  * => caller must not lock map
4319  * => we will zap map if ref count goes to zero
4320  */
4321 void
4322 uvm_map_deallocate(vm_map_t map)
4323 {
4324 	int c;
4325 	struct uvm_map_deadq dead;
4326 
4327 	c = atomic_dec_int_nv(&map->ref_count);
4328 	if (c > 0) {
4329 		return;
4330 	}
4331 
4332 	/*
4333 	 * all references gone.   unmap and free.
4334 	 *
4335 	 * No lock required: we are only one to access this map.
4336 	 */
4337 	TAILQ_INIT(&dead);
4338 	uvm_tree_sanity(map, __FILE__, __LINE__);
4339 	uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead,
4340 	    TRUE, FALSE);
4341 	pmap_destroy(map->pmap);
4342 	KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
4343 	free(map, M_VMMAP, sizeof *map);
4344 
4345 	uvm_unmap_detach(&dead, 0);
4346 }
4347 
4348 /*
4349  * uvm_map_inherit: set inheritance code for range of addrs in map.
4350  *
4351  * => map must be unlocked
4352  * => note that the inherit code is used during a "fork".  see fork
4353  *	code for details.
4354  */
4355 int
4356 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
4357     vm_inherit_t new_inheritance)
4358 {
4359 	struct vm_map_entry *entry;
4360 
4361 	switch (new_inheritance) {
4362 	case MAP_INHERIT_NONE:
4363 	case MAP_INHERIT_COPY:
4364 	case MAP_INHERIT_SHARE:
4365 	case MAP_INHERIT_ZERO:
4366 		break;
4367 	default:
4368 		return (EINVAL);
4369 	}
4370 
4371 	if (start > end)
4372 		return EINVAL;
4373 	start = MAX(start, map->min_offset);
4374 	end = MIN(end, map->max_offset);
4375 	if (start >= end)
4376 		return 0;
4377 
4378 	vm_map_lock(map);
4379 
4380 	entry = uvm_map_entrybyaddr(&map->addr, start);
4381 	if (entry->end > start)
4382 		UVM_MAP_CLIP_START(map, entry, start);
4383 	else
4384 		entry = RBT_NEXT(uvm_map_addr, entry);
4385 
4386 	while (entry != NULL && entry->start < end) {
4387 		UVM_MAP_CLIP_END(map, entry, end);
4388 		entry->inheritance = new_inheritance;
4389 		entry = RBT_NEXT(uvm_map_addr, entry);
4390 	}
4391 
4392 	vm_map_unlock(map);
4393 	return (0);
4394 }
4395 
4396 /*
4397  * uvm_map_syscall: permit system calls for range of addrs in map.
4398  *
4399  * => map must be unlocked
4400  */
4401 int
4402 uvm_map_syscall(struct vm_map *map, vaddr_t start, vaddr_t end)
4403 {
4404 	struct vm_map_entry *entry;
4405 
4406 	if (start > end)
4407 		return EINVAL;
4408 	start = MAX(start, map->min_offset);
4409 	end = MIN(end, map->max_offset);
4410 	if (start >= end)
4411 		return 0;
4412 	if (map->flags & VM_MAP_SYSCALL_ONCE)	/* only allowed once */
4413 		return (EPERM);
4414 
4415 	vm_map_lock(map);
4416 
4417 	entry = uvm_map_entrybyaddr(&map->addr, start);
4418 	if (entry->end > start)
4419 		UVM_MAP_CLIP_START(map, entry, start);
4420 	else
4421 		entry = RBT_NEXT(uvm_map_addr, entry);
4422 
4423 	while (entry != NULL && entry->start < end) {
4424 		UVM_MAP_CLIP_END(map, entry, end);
4425 		entry->etype |= UVM_ET_SYSCALL;
4426 		entry = RBT_NEXT(uvm_map_addr, entry);
4427 	}
4428 
4429 	map->wserial++;
4430 	map->flags |= VM_MAP_SYSCALL_ONCE;
4431 	vm_map_unlock(map);
4432 	return (0);
4433 }
4434 
4435 /*
4436  * uvm_map_advice: set advice code for range of addrs in map.
4437  *
4438  * => map must be unlocked
4439  */
4440 int
4441 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
4442 {
4443 	struct vm_map_entry *entry;
4444 
4445 	switch (new_advice) {
4446 	case MADV_NORMAL:
4447 	case MADV_RANDOM:
4448 	case MADV_SEQUENTIAL:
4449 		break;
4450 	default:
4451 		return (EINVAL);
4452 	}
4453 
4454 	if (start > end)
4455 		return EINVAL;
4456 	start = MAX(start, map->min_offset);
4457 	end = MIN(end, map->max_offset);
4458 	if (start >= end)
4459 		return 0;
4460 
4461 	vm_map_lock(map);
4462 
4463 	entry = uvm_map_entrybyaddr(&map->addr, start);
4464 	if (entry != NULL && entry->end > start)
4465 		UVM_MAP_CLIP_START(map, entry, start);
4466 	else if (entry!= NULL)
4467 		entry = RBT_NEXT(uvm_map_addr, entry);
4468 
4469 	/*
4470 	 * XXXJRT: disallow holes?
4471 	 */
4472 	while (entry != NULL && entry->start < end) {
4473 		UVM_MAP_CLIP_END(map, entry, end);
4474 		entry->advice = new_advice;
4475 		entry = RBT_NEXT(uvm_map_addr, entry);
4476 	}
4477 
4478 	vm_map_unlock(map);
4479 	return (0);
4480 }
4481 
4482 /*
4483  * uvm_map_extract: extract a mapping from a map and put it somewhere
4484  * in the kernel_map, setting protection to max_prot.
4485  *
4486  * => map should be unlocked (we will write lock it and kernel_map)
4487  * => returns 0 on success, error code otherwise
4488  * => start must be page aligned
4489  * => len must be page sized
4490  * => flags:
4491  *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
4492  * Mappings are QREF's.
4493  */
4494 int
4495 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
4496     vaddr_t *dstaddrp, int flags)
4497 {
4498 	struct uvm_map_deadq dead;
4499 	struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2;
4500 	vaddr_t dstaddr;
4501 	vaddr_t end;
4502 	vaddr_t cp_start;
4503 	vsize_t cp_len, cp_off;
4504 	int error;
4505 
4506 	TAILQ_INIT(&dead);
4507 	end = start + len;
4508 
4509 	/*
4510 	 * Sanity check on the parameters.
4511 	 * Also, since the mapping may not contain gaps, error out if the
4512 	 * mapped area is not in source map.
4513 	 */
4514 	if ((start & (vaddr_t)PAGE_MASK) != 0 ||
4515 	    (end & (vaddr_t)PAGE_MASK) != 0 || end < start)
4516 		return EINVAL;
4517 	if (start < srcmap->min_offset || end > srcmap->max_offset)
4518 		return EINVAL;
4519 
4520 	/* Initialize dead entries. Handle len == 0 case. */
4521 	if (len == 0)
4522 		return 0;
4523 
4524 	/* Acquire lock on srcmap. */
4525 	vm_map_lock(srcmap);
4526 
4527 	/* Lock srcmap, lookup first and last entry in <start,len>. */
4528 	first = uvm_map_entrybyaddr(&srcmap->addr, start);
4529 
4530 	/* Check that the range is contiguous. */
4531 	for (entry = first; entry != NULL && entry->end < end;
4532 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4533 		if (VMMAP_FREE_END(entry) != entry->end ||
4534 		    UVM_ET_ISHOLE(entry)) {
4535 			error = EINVAL;
4536 			goto fail;
4537 		}
4538 	}
4539 	if (entry == NULL || UVM_ET_ISHOLE(entry)) {
4540 		error = EINVAL;
4541 		goto fail;
4542 	}
4543 
4544 	/*
4545 	 * Handle need-copy flag.
4546 	 */
4547 	for (entry = first; entry != NULL && entry->start < end;
4548 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4549 		if (UVM_ET_ISNEEDSCOPY(entry))
4550 			amap_copy(srcmap, entry, M_NOWAIT,
4551 			    UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end);
4552 		if (UVM_ET_ISNEEDSCOPY(entry)) {
4553 			/*
4554 			 * amap_copy failure
4555 			 */
4556 			error = ENOMEM;
4557 			goto fail;
4558 		}
4559 	}
4560 
4561 	/* Lock destination map (kernel_map). */
4562 	vm_map_lock(kernel_map);
4563 
4564 	if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len,
4565 	    MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start),
4566 	    PROT_NONE, 0) != 0) {
4567 		error = ENOMEM;
4568 		goto fail2;
4569 	}
4570 	*dstaddrp = dstaddr;
4571 
4572 	/*
4573 	 * We now have srcmap and kernel_map locked.
4574 	 * dstaddr contains the destination offset in dstmap.
4575 	 */
4576 	/* step 1: start looping through map entries, performing extraction. */
4577 	for (entry = first; entry != NULL && entry->start < end;
4578 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4579 		KDASSERT(!UVM_ET_ISNEEDSCOPY(entry));
4580 		if (UVM_ET_ISHOLE(entry))
4581 			continue;
4582 
4583 		/* Calculate uvm_mapent_clone parameters. */
4584 		cp_start = entry->start;
4585 		if (cp_start < start) {
4586 			cp_off = start - cp_start;
4587 			cp_start = start;
4588 		} else
4589 			cp_off = 0;
4590 		cp_len = MIN(entry->end, end) - cp_start;
4591 
4592 		newentry = uvm_mapent_clone(kernel_map,
4593 		    cp_start - start + dstaddr, cp_len, cp_off,
4594 		    entry->protection, entry->max_protection,
4595 		    entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
4596 		if (newentry == NULL) {
4597 			error = ENOMEM;
4598 			goto fail2_unmap;
4599 		}
4600 		kernel_map->size += cp_len;
4601 		if (flags & UVM_EXTRACT_FIXPROT)
4602 			newentry->protection = newentry->max_protection;
4603 
4604 		/*
4605 		 * Step 2: perform pmap copy.
4606 		 * (Doing this in the loop saves one RB traversal.)
4607 		 */
4608 		pmap_copy(kernel_map->pmap, srcmap->pmap,
4609 		    cp_start - start + dstaddr, cp_len, cp_start);
4610 	}
4611 	pmap_update(kernel_map->pmap);
4612 
4613 	error = 0;
4614 
4615 	/* Unmap copied entries on failure. */
4616 fail2_unmap:
4617 	if (error) {
4618 		uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead,
4619 		    FALSE, TRUE);
4620 	}
4621 
4622 	/* Release maps, release dead entries. */
4623 fail2:
4624 	vm_map_unlock(kernel_map);
4625 
4626 fail:
4627 	vm_map_unlock(srcmap);
4628 
4629 	uvm_unmap_detach(&dead, 0);
4630 
4631 	return error;
4632 }
4633 
4634 /*
4635  * uvm_map_clean: clean out a map range
4636  *
4637  * => valid flags:
4638  *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
4639  *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
4640  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
4641  *   if (flags & PGO_FREE): any cached pages are freed after clean
4642  * => returns an error if any part of the specified range isn't mapped
4643  * => never a need to flush amap layer since the anonymous memory has
4644  *	no permanent home, but may deactivate pages there
4645  * => called from sys_msync() and sys_madvise()
4646  * => caller must not write-lock map (read OK).
4647  * => we may sleep while cleaning if SYNCIO [with map read-locked]
4648  */
4649 
4650 int
4651 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
4652 {
4653 	struct vm_map_entry *first, *entry;
4654 	struct vm_amap *amap;
4655 	struct vm_anon *anon;
4656 	struct vm_page *pg;
4657 	struct uvm_object *uobj;
4658 	vaddr_t cp_start, cp_end;
4659 	int refs;
4660 	int error;
4661 	boolean_t rv;
4662 
4663 	KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
4664 	    (PGO_FREE|PGO_DEACTIVATE));
4665 
4666 	if (start > end || start < map->min_offset || end > map->max_offset)
4667 		return EINVAL;
4668 
4669 	vm_map_lock_read(map);
4670 	first = uvm_map_entrybyaddr(&map->addr, start);
4671 
4672 	/* Make a first pass to check for holes. */
4673 	for (entry = first; entry != NULL && entry->start < end;
4674 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4675 		if (UVM_ET_ISSUBMAP(entry)) {
4676 			vm_map_unlock_read(map);
4677 			return EINVAL;
4678 		}
4679 		if (UVM_ET_ISSUBMAP(entry) ||
4680 		    UVM_ET_ISHOLE(entry) ||
4681 		    (entry->end < end &&
4682 		    VMMAP_FREE_END(entry) != entry->end)) {
4683 			vm_map_unlock_read(map);
4684 			return EFAULT;
4685 		}
4686 	}
4687 
4688 	error = 0;
4689 	for (entry = first; entry != NULL && entry->start < end;
4690 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4691 		amap = entry->aref.ar_amap;	/* top layer */
4692 		if (UVM_ET_ISOBJ(entry))
4693 			uobj = entry->object.uvm_obj;
4694 		else
4695 			uobj = NULL;
4696 
4697 		/*
4698 		 * No amap cleaning necessary if:
4699 		 *  - there's no amap
4700 		 *  - we're not deactivating or freeing pages.
4701 		 */
4702 		if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
4703 			goto flush_object;
4704 
4705 		cp_start = MAX(entry->start, start);
4706 		cp_end = MIN(entry->end, end);
4707 
4708 		amap_lock(amap);
4709 		for (; cp_start != cp_end; cp_start += PAGE_SIZE) {
4710 			anon = amap_lookup(&entry->aref,
4711 			    cp_start - entry->start);
4712 			if (anon == NULL)
4713 				continue;
4714 
4715 			KASSERT(anon->an_lock == amap->am_lock);
4716 			pg = anon->an_page;
4717 			if (pg == NULL) {
4718 				continue;
4719 			}
4720 			KASSERT(pg->pg_flags & PQ_ANON);
4721 
4722 			switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
4723 			/*
4724 			 * XXX In these first 3 cases, we always just
4725 			 * XXX deactivate the page.  We may want to
4726 			 * XXX handle the different cases more
4727 			 * XXX specifically, in the future.
4728 			 */
4729 			case PGO_CLEANIT|PGO_FREE:
4730 			case PGO_CLEANIT|PGO_DEACTIVATE:
4731 			case PGO_DEACTIVATE:
4732 deactivate_it:
4733 				/* skip the page if it's wired */
4734 				if (pg->wire_count != 0)
4735 					break;
4736 
4737 				uvm_lock_pageq();
4738 
4739 				KASSERT(pg->uanon == anon);
4740 
4741 				/* zap all mappings for the page. */
4742 				pmap_page_protect(pg, PROT_NONE);
4743 
4744 				/* ...and deactivate the page. */
4745 				uvm_pagedeactivate(pg);
4746 
4747 				uvm_unlock_pageq();
4748 				break;
4749 			case PGO_FREE:
4750 				/*
4751 				 * If there are multiple references to
4752 				 * the amap, just deactivate the page.
4753 				 */
4754 				if (amap_refs(amap) > 1)
4755 					goto deactivate_it;
4756 
4757 				/* XXX skip the page if it's wired */
4758 				if (pg->wire_count != 0) {
4759 					break;
4760 				}
4761 				amap_unadd(&entry->aref,
4762 				    cp_start - entry->start);
4763 				refs = --anon->an_ref;
4764 				if (refs == 0)
4765 					uvm_anfree(anon);
4766 				break;
4767 			default:
4768 				panic("uvm_map_clean: weird flags");
4769 			}
4770 		}
4771 		amap_unlock(amap);
4772 
4773 flush_object:
4774 		cp_start = MAX(entry->start, start);
4775 		cp_end = MIN(entry->end, end);
4776 
4777 		/*
4778 		 * flush pages if we've got a valid backing object.
4779 		 *
4780 		 * Don't PGO_FREE if we don't have write permission
4781 		 * and don't flush if this is a copy-on-write object
4782 		 * since we can't know our permissions on it.
4783 		 */
4784 		if (uobj != NULL &&
4785 		    ((flags & PGO_FREE) == 0 ||
4786 		     ((entry->max_protection & PROT_WRITE) != 0 &&
4787 		      (entry->etype & UVM_ET_COPYONWRITE) == 0))) {
4788 			rw_enter(uobj->vmobjlock, RW_WRITE);
4789 			rv = uobj->pgops->pgo_flush(uobj,
4790 			    cp_start - entry->start + entry->offset,
4791 			    cp_end - entry->start + entry->offset, flags);
4792 			rw_exit(uobj->vmobjlock);
4793 
4794 			if (rv == FALSE)
4795 				error = EFAULT;
4796 		}
4797 	}
4798 
4799 	vm_map_unlock_read(map);
4800 	return error;
4801 }
4802 
4803 /*
4804  * UVM_MAP_CLIP_END implementation
4805  */
4806 void
4807 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4808 {
4809 	struct vm_map_entry *tmp;
4810 
4811 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4812 	tmp = uvm_mapent_alloc(map, 0);
4813 
4814 	/* Invoke splitentry. */
4815 	uvm_map_splitentry(map, entry, tmp, addr);
4816 }
4817 
4818 /*
4819  * UVM_MAP_CLIP_START implementation
4820  *
4821  * Clippers are required to not change the pointers to the entry they are
4822  * clipping on.
4823  * Since uvm_map_splitentry turns the original entry into the lowest
4824  * entry (address wise) we do a swap between the new entry and the original
4825  * entry, prior to calling uvm_map_splitentry.
4826  */
4827 void
4828 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4829 {
4830 	struct vm_map_entry *tmp;
4831 	struct uvm_addr_state *free;
4832 
4833 	/* Unlink original. */
4834 	free = uvm_map_uaddr_e(map, entry);
4835 	uvm_mapent_free_remove(map, free, entry);
4836 	uvm_mapent_addr_remove(map, entry);
4837 
4838 	/* Copy entry. */
4839 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4840 	tmp = uvm_mapent_alloc(map, 0);
4841 	uvm_mapent_copy(entry, tmp);
4842 
4843 	/* Put new entry in place of original entry. */
4844 	uvm_mapent_addr_insert(map, tmp);
4845 	uvm_mapent_free_insert(map, free, tmp);
4846 
4847 	/* Invoke splitentry. */
4848 	uvm_map_splitentry(map, tmp, entry, addr);
4849 }
4850 
4851 /*
4852  * Boundary fixer.
4853  */
4854 static inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t);
4855 static inline vaddr_t
4856 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound)
4857 {
4858 	return (min < bound && max > bound) ? bound : max;
4859 }
4860 
4861 /*
4862  * Choose free list based on address at start of free space.
4863  *
4864  * The uvm_addr_state returned contains addr and is the first of:
4865  * - uaddr_exe
4866  * - uaddr_brk_stack
4867  * - uaddr_any
4868  */
4869 struct uvm_addr_state*
4870 uvm_map_uaddr(struct vm_map *map, vaddr_t addr)
4871 {
4872 	struct uvm_addr_state *uaddr;
4873 	int i;
4874 
4875 	/* Special case the first page, to prevent mmap from returning 0. */
4876 	if (addr < VMMAP_MIN_ADDR)
4877 		return NULL;
4878 
4879 	/* Upper bound for kernel maps at uvm_maxkaddr. */
4880 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
4881 		if (addr >= uvm_maxkaddr)
4882 			return NULL;
4883 	}
4884 
4885 	/* Is the address inside the exe-only map? */
4886 	if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr &&
4887 	    addr < map->uaddr_exe->uaddr_maxaddr)
4888 		return map->uaddr_exe;
4889 
4890 	/* Check if the space falls inside brk/stack area. */
4891 	if ((addr >= map->b_start && addr < map->b_end) ||
4892 	    (addr >= map->s_start && addr < map->s_end)) {
4893 		if (map->uaddr_brk_stack != NULL &&
4894 		    addr >= map->uaddr_brk_stack->uaddr_minaddr &&
4895 		    addr < map->uaddr_brk_stack->uaddr_maxaddr) {
4896 			return map->uaddr_brk_stack;
4897 		} else
4898 			return NULL;
4899 	}
4900 
4901 	/*
4902 	 * Check the other selectors.
4903 	 *
4904 	 * These selectors are only marked as the owner, if they have insert
4905 	 * functions.
4906 	 */
4907 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4908 		uaddr = map->uaddr_any[i];
4909 		if (uaddr == NULL)
4910 			continue;
4911 		if (uaddr->uaddr_functions->uaddr_free_insert == NULL)
4912 			continue;
4913 
4914 		if (addr >= uaddr->uaddr_minaddr &&
4915 		    addr < uaddr->uaddr_maxaddr)
4916 			return uaddr;
4917 	}
4918 
4919 	return NULL;
4920 }
4921 
4922 /*
4923  * Choose free list based on address at start of free space.
4924  *
4925  * The uvm_addr_state returned contains addr and is the first of:
4926  * - uaddr_exe
4927  * - uaddr_brk_stack
4928  * - uaddr_any
4929  */
4930 struct uvm_addr_state*
4931 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry)
4932 {
4933 	return uvm_map_uaddr(map, VMMAP_FREE_START(entry));
4934 }
4935 
4936 /*
4937  * Returns the first free-memory boundary that is crossed by [min-max].
4938  */
4939 vsize_t
4940 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max)
4941 {
4942 	struct uvm_addr_state	*uaddr;
4943 	int			 i;
4944 
4945 	/* Never return first page. */
4946 	max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR);
4947 
4948 	/* Treat the maxkaddr special, if the map is a kernel_map. */
4949 	if ((map->flags & VM_MAP_ISVMSPACE) == 0)
4950 		max = uvm_map_boundfix(min, max, uvm_maxkaddr);
4951 
4952 	/* Check for exe-only boundaries. */
4953 	if (map->uaddr_exe != NULL) {
4954 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr);
4955 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr);
4956 	}
4957 
4958 	/* Check for exe-only boundaries. */
4959 	if (map->uaddr_brk_stack != NULL) {
4960 		max = uvm_map_boundfix(min, max,
4961 		    map->uaddr_brk_stack->uaddr_minaddr);
4962 		max = uvm_map_boundfix(min, max,
4963 		    map->uaddr_brk_stack->uaddr_maxaddr);
4964 	}
4965 
4966 	/* Check other boundaries. */
4967 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4968 		uaddr = map->uaddr_any[i];
4969 		if (uaddr != NULL) {
4970 			max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr);
4971 			max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr);
4972 		}
4973 	}
4974 
4975 	/* Boundaries at stack and brk() area. */
4976 	max = uvm_map_boundfix(min, max, map->s_start);
4977 	max = uvm_map_boundfix(min, max, map->s_end);
4978 	max = uvm_map_boundfix(min, max, map->b_start);
4979 	max = uvm_map_boundfix(min, max, map->b_end);
4980 
4981 	return max;
4982 }
4983 
4984 /*
4985  * Update map allocation start and end addresses from proc vmspace.
4986  */
4987 void
4988 uvm_map_vmspace_update(struct vm_map *map,
4989     struct uvm_map_deadq *dead, int flags)
4990 {
4991 	struct vmspace *vm;
4992 	vaddr_t b_start, b_end, s_start, s_end;
4993 
4994 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
4995 	KASSERT(offsetof(struct vmspace, vm_map) == 0);
4996 
4997 	/*
4998 	 * Derive actual allocation boundaries from vmspace.
4999 	 */
5000 	vm = (struct vmspace *)map;
5001 	b_start = (vaddr_t)vm->vm_daddr;
5002 	b_end   = b_start + BRKSIZ;
5003 	s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
5004 	s_end   = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
5005 #ifdef DIAGNOSTIC
5006 	if ((b_start & (vaddr_t)PAGE_MASK) != 0 ||
5007 	    (b_end & (vaddr_t)PAGE_MASK) != 0 ||
5008 	    (s_start & (vaddr_t)PAGE_MASK) != 0 ||
5009 	    (s_end & (vaddr_t)PAGE_MASK) != 0) {
5010 		panic("uvm_map_vmspace_update: vmspace %p invalid bounds: "
5011 		    "b=0x%lx-0x%lx s=0x%lx-0x%lx",
5012 		    vm, b_start, b_end, s_start, s_end);
5013 	}
5014 #endif
5015 
5016 	if (__predict_true(map->b_start == b_start && map->b_end == b_end &&
5017 	    map->s_start == s_start && map->s_end == s_end))
5018 		return;
5019 
5020 	uvm_map_freelist_update(map, dead, b_start, b_end,
5021 	    s_start, s_end, flags);
5022 }
5023 
5024 /*
5025  * Grow kernel memory.
5026  *
5027  * This function is only called for kernel maps when an allocation fails.
5028  *
5029  * If the map has a gap that is large enough to accommodate alloc_sz, this
5030  * function will make sure map->free will include it.
5031  */
5032 void
5033 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead,
5034     vsize_t alloc_sz, int flags)
5035 {
5036 	vsize_t sz;
5037 	vaddr_t end;
5038 	struct vm_map_entry *entry;
5039 
5040 	/* Kernel memory only. */
5041 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0);
5042 	/* Destroy free list. */
5043 	uvm_map_freelist_update_clear(map, dead);
5044 
5045 	/* Include the guard page in the hard minimum requirement of alloc_sz. */
5046 	if (map->flags & VM_MAP_GUARDPAGES)
5047 		alloc_sz += PAGE_SIZE;
5048 
5049 	/*
5050 	 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA.
5051 	 *
5052 	 * Don't handle the case where the multiplication overflows:
5053 	 * if that happens, the allocation is probably too big anyway.
5054 	 */
5055 	sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA);
5056 
5057 	/*
5058 	 * Walk forward until a gap large enough for alloc_sz shows up.
5059 	 *
5060 	 * We assume the kernel map has no boundaries.
5061 	 * uvm_maxkaddr may be zero.
5062 	 */
5063 	end = MAX(uvm_maxkaddr, map->min_offset);
5064 	entry = uvm_map_entrybyaddr(&map->addr, end);
5065 	while (entry && entry->fspace < alloc_sz)
5066 		entry = RBT_NEXT(uvm_map_addr, entry);
5067 	if (entry) {
5068 		end = MAX(VMMAP_FREE_START(entry), end);
5069 		end += MIN(sz, map->max_offset - end);
5070 	} else
5071 		end = map->max_offset;
5072 
5073 	/* Reserve pmap entries. */
5074 #ifdef PMAP_GROWKERNEL
5075 	uvm_maxkaddr = pmap_growkernel(end);
5076 #else
5077 	uvm_maxkaddr = MAX(uvm_maxkaddr, end);
5078 #endif
5079 
5080 	/* Rebuild free list. */
5081 	uvm_map_freelist_update_refill(map, flags);
5082 }
5083 
5084 /*
5085  * Freelist update subfunction: unlink all entries from freelists.
5086  */
5087 void
5088 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead)
5089 {
5090 	struct uvm_addr_state *free;
5091 	struct vm_map_entry *entry, *prev, *next;
5092 
5093 	prev = NULL;
5094 	for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL;
5095 	    entry = next) {
5096 		next = RBT_NEXT(uvm_map_addr, entry);
5097 
5098 		free = uvm_map_uaddr_e(map, entry);
5099 		uvm_mapent_free_remove(map, free, entry);
5100 
5101 		if (prev != NULL && entry->start == entry->end) {
5102 			prev->fspace += VMMAP_FREE_END(entry) - entry->end;
5103 			uvm_mapent_addr_remove(map, entry);
5104 			DEAD_ENTRY_PUSH(dead, entry);
5105 		} else
5106 			prev = entry;
5107 	}
5108 }
5109 
5110 /*
5111  * Freelist update subfunction: refill the freelists with entries.
5112  */
5113 void
5114 uvm_map_freelist_update_refill(struct vm_map *map, int flags)
5115 {
5116 	struct vm_map_entry *entry;
5117 	vaddr_t min, max;
5118 
5119 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
5120 		min = VMMAP_FREE_START(entry);
5121 		max = VMMAP_FREE_END(entry);
5122 		entry->fspace = 0;
5123 
5124 		entry = uvm_map_fix_space(map, entry, min, max, flags);
5125 	}
5126 
5127 	uvm_tree_sanity(map, __FILE__, __LINE__);
5128 }
5129 
5130 /*
5131  * Change {a,b}_{start,end} allocation ranges and associated free lists.
5132  */
5133 void
5134 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead,
5135     vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags)
5136 {
5137 	KDASSERT(b_end >= b_start && s_end >= s_start);
5138 
5139 	/* Clear all free lists. */
5140 	uvm_map_freelist_update_clear(map, dead);
5141 
5142 	/* Apply new bounds. */
5143 	map->b_start = b_start;
5144 	map->b_end   = b_end;
5145 	map->s_start = s_start;
5146 	map->s_end   = s_end;
5147 
5148 	/* Refill free lists. */
5149 	uvm_map_freelist_update_refill(map, flags);
5150 }
5151 
5152 /*
5153  * Assign a uvm_addr_state to the specified pointer in vm_map.
5154  *
5155  * May sleep.
5156  */
5157 void
5158 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which,
5159     struct uvm_addr_state *newval)
5160 {
5161 	struct uvm_map_deadq dead;
5162 
5163 	/* Pointer which must be in this map. */
5164 	KASSERT(which != NULL);
5165 	KASSERT((void*)map <= (void*)(which) &&
5166 	    (void*)(which) < (void*)(map + 1));
5167 
5168 	vm_map_lock(map);
5169 	TAILQ_INIT(&dead);
5170 	uvm_map_freelist_update_clear(map, &dead);
5171 
5172 	uvm_addr_destroy(*which);
5173 	*which = newval;
5174 
5175 	uvm_map_freelist_update_refill(map, 0);
5176 	vm_map_unlock(map);
5177 	uvm_unmap_detach(&dead, 0);
5178 }
5179 
5180 /*
5181  * Correct space insert.
5182  *
5183  * Entry must not be on any freelist.
5184  */
5185 struct vm_map_entry*
5186 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry,
5187     vaddr_t min, vaddr_t max, int flags)
5188 {
5189 	struct uvm_addr_state	*free, *entfree;
5190 	vaddr_t			 lmax;
5191 
5192 	KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0);
5193 	KDASSERT(min <= max);
5194 	KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) ||
5195 	    min == map->min_offset);
5196 
5197 	/*
5198 	 * During the function, entfree will always point at the uaddr state
5199 	 * for entry.
5200 	 */
5201 	entfree = (entry == NULL ? NULL :
5202 	    uvm_map_uaddr_e(map, entry));
5203 
5204 	while (min != max) {
5205 		/* Claim guard page for entry. */
5206 		if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL &&
5207 		    VMMAP_FREE_END(entry) == entry->end &&
5208 		    entry->start != entry->end) {
5209 			if (max - min == 2 * PAGE_SIZE) {
5210 				/*
5211 				 * If the free-space gap is exactly 2 pages,
5212 				 * we make the guard 2 pages instead of 1.
5213 				 * Because in a guarded map, an area needs
5214 				 * at least 2 pages to allocate from:
5215 				 * one page for the allocation and one for
5216 				 * the guard.
5217 				 */
5218 				entry->guard = 2 * PAGE_SIZE;
5219 				min = max;
5220 			} else {
5221 				entry->guard = PAGE_SIZE;
5222 				min += PAGE_SIZE;
5223 			}
5224 			continue;
5225 		}
5226 
5227 		/*
5228 		 * Handle the case where entry has a 2-page guard, but the
5229 		 * space after entry is freed.
5230 		 */
5231 		if (entry != NULL && entry->fspace == 0 &&
5232 		    entry->guard > PAGE_SIZE) {
5233 			entry->guard = PAGE_SIZE;
5234 			min = VMMAP_FREE_START(entry);
5235 		}
5236 
5237 		lmax = uvm_map_boundary(map, min, max);
5238 		free = uvm_map_uaddr(map, min);
5239 
5240 		/*
5241 		 * Entries are merged if they point at the same uvm_free().
5242 		 * Exception to that rule: if min == uvm_maxkaddr, a new
5243 		 * entry is started regardless (otherwise the allocators
5244 		 * will get confused).
5245 		 */
5246 		if (entry != NULL && free == entfree &&
5247 		    !((map->flags & VM_MAP_ISVMSPACE) == 0 &&
5248 		    min == uvm_maxkaddr)) {
5249 			KDASSERT(VMMAP_FREE_END(entry) == min);
5250 			entry->fspace += lmax - min;
5251 		} else {
5252 			/*
5253 			 * Commit entry to free list: it'll not be added to
5254 			 * anymore.
5255 			 * We'll start a new entry and add to that entry
5256 			 * instead.
5257 			 */
5258 			if (entry != NULL)
5259 				uvm_mapent_free_insert(map, entfree, entry);
5260 
5261 			/* New entry for new uaddr. */
5262 			entry = uvm_mapent_alloc(map, flags);
5263 			KDASSERT(entry != NULL);
5264 			entry->end = entry->start = min;
5265 			entry->guard = 0;
5266 			entry->fspace = lmax - min;
5267 			entry->object.uvm_obj = NULL;
5268 			entry->offset = 0;
5269 			entry->etype = 0;
5270 			entry->protection = entry->max_protection = 0;
5271 			entry->inheritance = 0;
5272 			entry->wired_count = 0;
5273 			entry->advice = 0;
5274 			entry->aref.ar_pageoff = 0;
5275 			entry->aref.ar_amap = NULL;
5276 			uvm_mapent_addr_insert(map, entry);
5277 
5278 			entfree = free;
5279 		}
5280 
5281 		min = lmax;
5282 	}
5283 	/* Finally put entry on the uaddr state. */
5284 	if (entry != NULL)
5285 		uvm_mapent_free_insert(map, entfree, entry);
5286 
5287 	return entry;
5288 }
5289 
5290 /*
5291  * MQuery style of allocation.
5292  *
5293  * This allocator searches forward until sufficient space is found to map
5294  * the given size.
5295  *
5296  * XXX: factor in offset (via pmap_prefer) and protection?
5297  */
5298 int
5299 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset,
5300     int flags)
5301 {
5302 	struct vm_map_entry *entry, *last;
5303 	vaddr_t addr;
5304 	vaddr_t tmp, pmap_align, pmap_offset;
5305 	int error;
5306 
5307 	addr = *addr_p;
5308 	vm_map_lock_read(map);
5309 
5310 	/* Configure pmap prefer. */
5311 	if (offset != UVM_UNKNOWN_OFFSET) {
5312 		pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN());
5313 		pmap_offset = PMAP_PREFER_OFFSET(offset);
5314 	} else {
5315 		pmap_align = PAGE_SIZE;
5316 		pmap_offset = 0;
5317 	}
5318 
5319 	/* Align address to pmap_prefer unless FLAG_FIXED is set. */
5320 	if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) {
5321 	  	tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5322 		if (tmp < addr)
5323 			tmp += pmap_align;
5324 		addr = tmp;
5325 	}
5326 
5327 	/* First, check if the requested range is fully available. */
5328 	entry = uvm_map_entrybyaddr(&map->addr, addr);
5329 	last = NULL;
5330 	if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5331 		error = 0;
5332 		goto out;
5333 	}
5334 	if (flags & UVM_FLAG_FIXED) {
5335 		error = EINVAL;
5336 		goto out;
5337 	}
5338 
5339 	error = ENOMEM; /* Default error from here. */
5340 
5341 	/*
5342 	 * At this point, the memory at <addr, sz> is not available.
5343 	 * The reasons are:
5344 	 * [1] it's outside the map,
5345 	 * [2] it starts in used memory (and therefore needs to move
5346 	 *     toward the first free page in entry),
5347 	 * [3] it starts in free memory but bumps into used memory.
5348 	 *
5349 	 * Note that for case [2], the forward moving is handled by the
5350 	 * for loop below.
5351 	 */
5352 	if (entry == NULL) {
5353 		/* [1] Outside the map. */
5354 		if (addr >= map->max_offset)
5355 			goto out;
5356 		else
5357 			entry = RBT_MIN(uvm_map_addr, &map->addr);
5358 	} else if (VMMAP_FREE_START(entry) <= addr) {
5359 		/* [3] Bumped into used memory. */
5360 		entry = RBT_NEXT(uvm_map_addr, entry);
5361 	}
5362 
5363 	/* Test if the next entry is sufficient for the allocation. */
5364 	for (; entry != NULL;
5365 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
5366 		if (entry->fspace == 0)
5367 			continue;
5368 		addr = VMMAP_FREE_START(entry);
5369 
5370 restart:	/* Restart address checks on address change. */
5371 		tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5372 		if (tmp < addr)
5373 			tmp += pmap_align;
5374 		addr = tmp;
5375 		if (addr >= VMMAP_FREE_END(entry))
5376 			continue;
5377 
5378 		/* Skip brk() allocation addresses. */
5379 		if (addr + sz > map->b_start && addr < map->b_end) {
5380 			if (VMMAP_FREE_END(entry) > map->b_end) {
5381 				addr = map->b_end;
5382 				goto restart;
5383 			} else
5384 				continue;
5385 		}
5386 		/* Skip stack allocation addresses. */
5387 		if (addr + sz > map->s_start && addr < map->s_end) {
5388 			if (VMMAP_FREE_END(entry) > map->s_end) {
5389 				addr = map->s_end;
5390 				goto restart;
5391 			} else
5392 				continue;
5393 		}
5394 
5395 		last = NULL;
5396 		if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5397 			error = 0;
5398 			goto out;
5399 		}
5400 	}
5401 
5402 out:
5403 	vm_map_unlock_read(map);
5404 	if (error == 0)
5405 		*addr_p = addr;
5406 	return error;
5407 }
5408 
5409 boolean_t
5410 vm_map_lock_try_ln(struct vm_map *map, char *file, int line)
5411 {
5412 	boolean_t rv;
5413 
5414 	if (map->flags & VM_MAP_INTRSAFE) {
5415 		rv = mtx_enter_try(&map->mtx);
5416 	} else {
5417 		mtx_enter(&map->flags_lock);
5418 		if (map->flags & VM_MAP_BUSY) {
5419 			mtx_leave(&map->flags_lock);
5420 			return (FALSE);
5421 		}
5422 		mtx_leave(&map->flags_lock);
5423 		rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0);
5424 		/* check if the lock is busy and back out if we won the race */
5425 		if (rv) {
5426 			mtx_enter(&map->flags_lock);
5427 			if (map->flags & VM_MAP_BUSY) {
5428 				rw_exit(&map->lock);
5429 				rv = FALSE;
5430 			}
5431 			mtx_leave(&map->flags_lock);
5432 		}
5433 	}
5434 
5435 	if (rv) {
5436 		map->timestamp++;
5437 		LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5438 		uvm_tree_sanity(map, file, line);
5439 		uvm_tree_size_chk(map, file, line);
5440 	}
5441 
5442 	return (rv);
5443 }
5444 
5445 void
5446 vm_map_lock_ln(struct vm_map *map, char *file, int line)
5447 {
5448 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5449 		do {
5450 			mtx_enter(&map->flags_lock);
5451 tryagain:
5452 			while (map->flags & VM_MAP_BUSY) {
5453 				map->flags |= VM_MAP_WANTLOCK;
5454 				msleep_nsec(&map->flags, &map->flags_lock,
5455 				    PVM, vmmapbsy, INFSLP);
5456 			}
5457 			mtx_leave(&map->flags_lock);
5458 		} while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0);
5459 		/* check if the lock is busy and back out if we won the race */
5460 		mtx_enter(&map->flags_lock);
5461 		if (map->flags & VM_MAP_BUSY) {
5462 			rw_exit(&map->lock);
5463 			goto tryagain;
5464 		}
5465 		mtx_leave(&map->flags_lock);
5466 	} else {
5467 		mtx_enter(&map->mtx);
5468 	}
5469 
5470 	map->timestamp++;
5471 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5472 	uvm_tree_sanity(map, file, line);
5473 	uvm_tree_size_chk(map, file, line);
5474 }
5475 
5476 void
5477 vm_map_lock_read_ln(struct vm_map *map, char *file, int line)
5478 {
5479 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5480 		rw_enter_read(&map->lock);
5481 	else
5482 		mtx_enter(&map->mtx);
5483 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5484 	uvm_tree_sanity(map, file, line);
5485 	uvm_tree_size_chk(map, file, line);
5486 }
5487 
5488 void
5489 vm_map_unlock_ln(struct vm_map *map, char *file, int line)
5490 {
5491 	uvm_tree_sanity(map, file, line);
5492 	uvm_tree_size_chk(map, file, line);
5493 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5494 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5495 		rw_exit(&map->lock);
5496 	else
5497 		mtx_leave(&map->mtx);
5498 }
5499 
5500 void
5501 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line)
5502 {
5503 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
5504 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
5505 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5506 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5507 		rw_exit_read(&map->lock);
5508 	else
5509 		mtx_leave(&map->mtx);
5510 }
5511 
5512 void
5513 vm_map_downgrade_ln(struct vm_map *map, char *file, int line)
5514 {
5515 	uvm_tree_sanity(map, file, line);
5516 	uvm_tree_size_chk(map, file, line);
5517 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5518 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5519 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5520 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5521 		rw_enter(&map->lock, RW_DOWNGRADE);
5522 }
5523 
5524 void
5525 vm_map_upgrade_ln(struct vm_map *map, char *file, int line)
5526 {
5527 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
5528 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
5529 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5530 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5531 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5532 		rw_exit_read(&map->lock);
5533 		rw_enter_write(&map->lock);
5534 	}
5535 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5536 	uvm_tree_sanity(map, file, line);
5537 }
5538 
5539 void
5540 vm_map_busy_ln(struct vm_map *map, char *file, int line)
5541 {
5542 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5543 	mtx_enter(&map->flags_lock);
5544 	map->flags |= VM_MAP_BUSY;
5545 	mtx_leave(&map->flags_lock);
5546 }
5547 
5548 void
5549 vm_map_unbusy_ln(struct vm_map *map, char *file, int line)
5550 {
5551 	int oflags;
5552 
5553 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5554 	mtx_enter(&map->flags_lock);
5555 	oflags = map->flags;
5556 	map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK);
5557 	mtx_leave(&map->flags_lock);
5558 	if (oflags & VM_MAP_WANTLOCK)
5559 		wakeup(&map->flags);
5560 }
5561 
5562 #ifndef SMALL_KERNEL
5563 int
5564 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve,
5565     size_t *lenp)
5566 {
5567 	struct vm_map_entry *entry;
5568 	vaddr_t start;
5569 	int cnt, maxcnt, error = 0;
5570 
5571 	KASSERT(*lenp > 0);
5572 	KASSERT((*lenp % sizeof(*kve)) == 0);
5573 	cnt = 0;
5574 	maxcnt = *lenp / sizeof(*kve);
5575 	KASSERT(maxcnt > 0);
5576 
5577 	/*
5578 	 * Return only entries whose address is above the given base
5579 	 * address.  This allows userland to iterate without knowing the
5580 	 * number of entries beforehand.
5581 	 */
5582 	start = (vaddr_t)kve[0].kve_start;
5583 
5584 	vm_map_lock(map);
5585 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
5586 		if (cnt == maxcnt) {
5587 			error = ENOMEM;
5588 			break;
5589 		}
5590 		if (start != 0 && entry->start < start)
5591 			continue;
5592 		kve->kve_start = entry->start;
5593 		kve->kve_end = entry->end;
5594 		kve->kve_guard = entry->guard;
5595 		kve->kve_fspace = entry->fspace;
5596 		kve->kve_fspace_augment = entry->fspace_augment;
5597 		kve->kve_offset = entry->offset;
5598 		kve->kve_wired_count = entry->wired_count;
5599 		kve->kve_etype = entry->etype;
5600 		kve->kve_protection = entry->protection;
5601 		kve->kve_max_protection = entry->max_protection;
5602 		kve->kve_advice = entry->advice;
5603 		kve->kve_inheritance = entry->inheritance;
5604 		kve->kve_flags = entry->flags;
5605 		kve++;
5606 		cnt++;
5607 	}
5608 	vm_map_unlock(map);
5609 
5610 	KASSERT(cnt <= maxcnt);
5611 
5612 	*lenp = sizeof(*kve) * cnt;
5613 	return error;
5614 }
5615 #endif
5616 
5617 
5618 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry,
5619     uvm_mapentry_addrcmp, uvm_map_addr_augment);
5620 
5621 
5622 /*
5623  * MD code: vmspace allocator setup.
5624  */
5625 
5626 #ifdef __i386__
5627 void
5628 uvm_map_setup_md(struct vm_map *map)
5629 {
5630 	vaddr_t		min, max;
5631 
5632 	min = map->min_offset;
5633 	max = map->max_offset;
5634 
5635 	/*
5636 	 * Ensure the selectors will not try to manage page 0;
5637 	 * it's too special.
5638 	 */
5639 	if (min < VMMAP_MIN_ADDR)
5640 		min = VMMAP_MIN_ADDR;
5641 
5642 #if 0	/* Cool stuff, not yet */
5643 	/* Executable code is special. */
5644 	map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR);
5645 	/* Place normal allocations beyond executable mappings. */
5646 	map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max);
5647 #else	/* Crappy stuff, for now */
5648 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5649 #endif
5650 
5651 #ifndef SMALL_KERNEL
5652 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5653 #endif /* !SMALL_KERNEL */
5654 }
5655 #elif __LP64__
5656 void
5657 uvm_map_setup_md(struct vm_map *map)
5658 {
5659 	vaddr_t		min, max;
5660 
5661 	min = map->min_offset;
5662 	max = map->max_offset;
5663 
5664 	/*
5665 	 * Ensure the selectors will not try to manage page 0;
5666 	 * it's too special.
5667 	 */
5668 	if (min < VMMAP_MIN_ADDR)
5669 		min = VMMAP_MIN_ADDR;
5670 
5671 #if 0	/* Cool stuff, not yet */
5672 	map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max);
5673 #else	/* Crappy stuff, for now */
5674 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5675 #endif
5676 
5677 #ifndef SMALL_KERNEL
5678 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5679 #endif /* !SMALL_KERNEL */
5680 }
5681 #else	/* non-i386, 32 bit */
5682 void
5683 uvm_map_setup_md(struct vm_map *map)
5684 {
5685 	vaddr_t		min, max;
5686 
5687 	min = map->min_offset;
5688 	max = map->max_offset;
5689 
5690 	/*
5691 	 * Ensure the selectors will not try to manage page 0;
5692 	 * it's too special.
5693 	 */
5694 	if (min < VMMAP_MIN_ADDR)
5695 		min = VMMAP_MIN_ADDR;
5696 
5697 #if 0	/* Cool stuff, not yet */
5698 	map->uaddr_any[3] = uaddr_pivot_create(min, max);
5699 #else	/* Crappy stuff, for now */
5700 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5701 #endif
5702 
5703 #ifndef SMALL_KERNEL
5704 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5705 #endif /* !SMALL_KERNEL */
5706 }
5707 #endif
5708