xref: /openbsd-src/sys/uvm/uvm_map.c (revision c020cf82e0cc147236f01a8dca7052034cf9d30d)
1 /*	$OpenBSD: uvm_map.c,v 1.264 2020/03/25 14:55:14 mpi Exp $	*/
2 /*	$NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $	*/
3 
4 /*
5  * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  *
19  *
20  * Copyright (c) 1997 Charles D. Cranor and Washington University.
21  * Copyright (c) 1991, 1993, The Regents of the University of California.
22  *
23  * All rights reserved.
24  *
25  * This code is derived from software contributed to Berkeley by
26  * The Mach Operating System project at Carnegie-Mellon University.
27  *
28  * Redistribution and use in source and binary forms, with or without
29  * modification, are permitted provided that the following conditions
30  * are met:
31  * 1. Redistributions of source code must retain the above copyright
32  *    notice, this list of conditions and the following disclaimer.
33  * 2. Redistributions in binary form must reproduce the above copyright
34  *    notice, this list of conditions and the following disclaimer in the
35  *    documentation and/or other materials provided with the distribution.
36  * 3. Neither the name of the University nor the names of its contributors
37  *    may be used to endorse or promote products derived from this software
38  *    without specific prior written permission.
39  *
40  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  *	@(#)vm_map.c    8.3 (Berkeley) 1/12/94
53  * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
54  *
55  *
56  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
57  * All rights reserved.
58  *
59  * Permission to use, copy, modify and distribute this software and
60  * its documentation is hereby granted, provided that both the copyright
61  * notice and this permission notice appear in all copies of the
62  * software, derivative works or modified versions, and any portions
63  * thereof, and that both notices appear in supporting documentation.
64  *
65  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
66  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
67  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
68  *
69  * Carnegie Mellon requests users of this software to return to
70  *
71  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
72  *  School of Computer Science
73  *  Carnegie Mellon University
74  *  Pittsburgh PA 15213-3890
75  *
76  * any improvements or extensions that they make and grant Carnegie the
77  * rights to redistribute these changes.
78  */
79 
80 /*
81  * uvm_map.c: uvm map operations
82  */
83 
84 /* #define DEBUG */
85 /* #define VMMAP_DEBUG */
86 
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/acct.h>
90 #include <sys/mman.h>
91 #include <sys/proc.h>
92 #include <sys/malloc.h>
93 #include <sys/pool.h>
94 #include <sys/sysctl.h>
95 #include <sys/signalvar.h>
96 #include <sys/syslog.h>
97 #include <sys/user.h>
98 
99 #ifdef SYSVSHM
100 #include <sys/shm.h>
101 #endif
102 
103 #include <uvm/uvm.h>
104 
105 #ifdef DDB
106 #include <uvm/uvm_ddb.h>
107 #endif
108 
109 #include <uvm/uvm_addr.h>
110 
111 
112 vsize_t			 uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t);
113 int			 uvm_mapent_isjoinable(struct vm_map*,
114 			    struct vm_map_entry*, struct vm_map_entry*);
115 struct vm_map_entry	*uvm_mapent_merge(struct vm_map*, struct vm_map_entry*,
116 			    struct vm_map_entry*, struct uvm_map_deadq*);
117 struct vm_map_entry	*uvm_mapent_tryjoin(struct vm_map*,
118 			    struct vm_map_entry*, struct uvm_map_deadq*);
119 struct vm_map_entry	*uvm_map_mkentry(struct vm_map*, struct vm_map_entry*,
120 			    struct vm_map_entry*, vaddr_t, vsize_t, int,
121 			    struct uvm_map_deadq*, struct vm_map_entry*);
122 struct vm_map_entry	*uvm_mapent_alloc(struct vm_map*, int);
123 void			 uvm_mapent_free(struct vm_map_entry*);
124 void			 uvm_unmap_kill_entry(struct vm_map*,
125 			    struct vm_map_entry*);
126 void			 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *);
127 void			 uvm_mapent_mkfree(struct vm_map*,
128 			    struct vm_map_entry*, struct vm_map_entry**,
129 			    struct uvm_map_deadq*, boolean_t);
130 void			 uvm_map_pageable_pgon(struct vm_map*,
131 			    struct vm_map_entry*, struct vm_map_entry*,
132 			    vaddr_t, vaddr_t);
133 int			 uvm_map_pageable_wire(struct vm_map*,
134 			    struct vm_map_entry*, struct vm_map_entry*,
135 			    vaddr_t, vaddr_t, int);
136 void			 uvm_map_setup_entries(struct vm_map*);
137 void			 uvm_map_setup_md(struct vm_map*);
138 void			 uvm_map_teardown(struct vm_map*);
139 void			 uvm_map_vmspace_update(struct vm_map*,
140 			    struct uvm_map_deadq*, int);
141 void			 uvm_map_kmem_grow(struct vm_map*,
142 			    struct uvm_map_deadq*, vsize_t, int);
143 void			 uvm_map_freelist_update_clear(struct vm_map*,
144 			    struct uvm_map_deadq*);
145 void			 uvm_map_freelist_update_refill(struct vm_map *, int);
146 void			 uvm_map_freelist_update(struct vm_map*,
147 			    struct uvm_map_deadq*, vaddr_t, vaddr_t,
148 			    vaddr_t, vaddr_t, int);
149 struct vm_map_entry	*uvm_map_fix_space(struct vm_map*, struct vm_map_entry*,
150 			    vaddr_t, vaddr_t, int);
151 int			 uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int,
152 			    struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t,
153 			    int);
154 int			 uvm_map_findspace(struct vm_map*,
155 			    struct vm_map_entry**, struct vm_map_entry**,
156 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
157 			    vaddr_t);
158 vsize_t			 uvm_map_addr_augment_get(struct vm_map_entry*);
159 void			 uvm_map_addr_augment(struct vm_map_entry*);
160 
161 int			 uvm_map_inentry_recheck(u_long, vaddr_t,
162 			     struct p_inentry *);
163 boolean_t		 uvm_map_inentry_fix(struct proc *, struct p_inentry *,
164 			     vaddr_t, int (*)(vm_map_entry_t), u_long);
165 /*
166  * Tree management functions.
167  */
168 
169 static __inline void	 uvm_mapent_copy(struct vm_map_entry*,
170 			    struct vm_map_entry*);
171 static inline int	 uvm_mapentry_addrcmp(const struct vm_map_entry*,
172 			    const struct vm_map_entry*);
173 void			 uvm_mapent_free_insert(struct vm_map*,
174 			    struct uvm_addr_state*, struct vm_map_entry*);
175 void			 uvm_mapent_free_remove(struct vm_map*,
176 			    struct uvm_addr_state*, struct vm_map_entry*);
177 void			 uvm_mapent_addr_insert(struct vm_map*,
178 			    struct vm_map_entry*);
179 void			 uvm_mapent_addr_remove(struct vm_map*,
180 			    struct vm_map_entry*);
181 void			 uvm_map_splitentry(struct vm_map*,
182 			    struct vm_map_entry*, struct vm_map_entry*,
183 			    vaddr_t);
184 vsize_t			 uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t);
185 int			 uvm_mapent_bias(struct vm_map*, struct vm_map_entry*);
186 
187 /*
188  * uvm_vmspace_fork helper functions.
189  */
190 struct vm_map_entry	*uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
191 			    vsize_t, vm_prot_t, vm_prot_t,
192 			    struct vm_map_entry*, struct uvm_map_deadq*, int,
193 			    int);
194 struct vm_map_entry	*uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t,
195 			    vsize_t, vm_prot_t, vm_prot_t, struct vm_map*,
196 			    struct vm_map_entry*, struct uvm_map_deadq*);
197 struct vm_map_entry	*uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
198 			    struct vm_map*, struct vm_map_entry*,
199 			    struct uvm_map_deadq*);
200 struct vm_map_entry	*uvm_mapent_forkcopy(struct vmspace*, struct vm_map*,
201 			    struct vm_map*, struct vm_map_entry*,
202 			    struct uvm_map_deadq*);
203 struct vm_map_entry	*uvm_mapent_forkzero(struct vmspace*, struct vm_map*,
204 			    struct vm_map*, struct vm_map_entry*,
205 			    struct uvm_map_deadq*);
206 
207 /*
208  * Tree validation.
209  */
210 #ifdef VMMAP_DEBUG
211 void			 uvm_tree_assert(struct vm_map*, int, char*,
212 			    char*, int);
213 #define UVM_ASSERT(map, cond, file, line)				\
214 	uvm_tree_assert((map), (cond), #cond, (file), (line))
215 void			 uvm_tree_sanity(struct vm_map*, char*, int);
216 void			 uvm_tree_size_chk(struct vm_map*, char*, int);
217 void			 vmspace_validate(struct vm_map*);
218 #else
219 #define uvm_tree_sanity(_map, _file, _line)		do {} while (0)
220 #define uvm_tree_size_chk(_map, _file, _line)		do {} while (0)
221 #define vmspace_validate(_map)				do {} while (0)
222 #endif
223 
224 /*
225  * All architectures will have pmap_prefer.
226  */
227 #ifndef PMAP_PREFER
228 #define PMAP_PREFER_ALIGN()	(vaddr_t)PAGE_SIZE
229 #define PMAP_PREFER_OFFSET(off)	0
230 #define PMAP_PREFER(addr, off)	(addr)
231 #endif
232 
233 /*
234  * The kernel map will initially be VM_MAP_KSIZE_INIT bytes.
235  * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes.
236  *
237  * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size
238  * each time.
239  */
240 #define VM_MAP_KSIZE_INIT	(512 * (vaddr_t)PAGE_SIZE)
241 #define VM_MAP_KSIZE_DELTA	(256 * (vaddr_t)PAGE_SIZE)
242 #define VM_MAP_KSIZE_ALLOCMUL	4
243 /*
244  * When selecting a random free-space block, look at most FSPACE_DELTA blocks
245  * ahead.
246  */
247 #define FSPACE_DELTA		8
248 /*
249  * Put allocations adjecent to previous allocations when the free-space tree
250  * is larger than FSPACE_COMPACT entries.
251  *
252  * Alignment and PMAP_PREFER may still cause the entry to not be fully
253  * adjecent. Note that this strategy reduces memory fragmentation (by leaving
254  * a large space before or after the allocation).
255  */
256 #define FSPACE_COMPACT		128
257 /*
258  * Make the address selection skip at most this many bytes from the start of
259  * the free space in which the allocation takes place.
260  *
261  * The main idea behind a randomized address space is that an attacker cannot
262  * know where to target his attack. Therefore, the location of objects must be
263  * as random as possible. However, the goal is not to create the most sparse
264  * map that is possible.
265  * FSPACE_MAXOFF pushes the considered range in bytes down to less insane
266  * sizes, thereby reducing the sparseness. The biggest randomization comes
267  * from fragmentation, i.e. FSPACE_COMPACT.
268  */
269 #define FSPACE_MAXOFF		((vaddr_t)32 * 1024 * 1024)
270 /*
271  * Allow for small gaps in the overflow areas.
272  * Gap size is in bytes and does not have to be a multiple of page-size.
273  */
274 #define FSPACE_BIASGAP		((vaddr_t)32 * 1024)
275 
276 /* auto-allocate address lower bound */
277 #define VMMAP_MIN_ADDR		PAGE_SIZE
278 
279 
280 #ifdef DEADBEEF0
281 #define UVMMAP_DEADBEEF		((unsigned long)DEADBEEF0)
282 #else
283 #define UVMMAP_DEADBEEF		((unsigned long)0xdeadd0d0)
284 #endif
285 
286 #ifdef DEBUG
287 int uvm_map_printlocks = 0;
288 
289 #define LPRINTF(_args)							\
290 	do {								\
291 		if (uvm_map_printlocks)					\
292 			printf _args;					\
293 	} while (0)
294 #else
295 #define LPRINTF(_args)	do {} while (0)
296 #endif
297 
298 static struct mutex uvm_kmapent_mtx;
299 static struct timeval uvm_kmapent_last_warn_time;
300 static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
301 
302 const char vmmapbsy[] = "vmmapbsy";
303 
304 /*
305  * pool for vmspace structures.
306  */
307 struct pool uvm_vmspace_pool;
308 
309 /*
310  * pool for dynamically-allocated map entries.
311  */
312 struct pool uvm_map_entry_pool;
313 struct pool uvm_map_entry_kmem_pool;
314 
315 /*
316  * This global represents the end of the kernel virtual address
317  * space. If we want to exceed this, we must grow the kernel
318  * virtual address space dynamically.
319  *
320  * Note, this variable is locked by kernel_map's lock.
321  */
322 vaddr_t uvm_maxkaddr;
323 
324 /*
325  * Locking predicate.
326  */
327 #define UVM_MAP_REQ_WRITE(_map)						\
328 	do {								\
329 		if ((_map)->ref_count > 0) {				\
330 			if (((_map)->flags & VM_MAP_INTRSAFE) == 0)	\
331 				rw_assert_wrlock(&(_map)->lock);	\
332 			else						\
333 				MUTEX_ASSERT_LOCKED(&(_map)->mtx);	\
334 		}							\
335 	} while (0)
336 
337 #define	vm_map_modflags(map, set, clear)				\
338 	do {								\
339 		mtx_enter(&(map)->flags_lock);				\
340 		(map)->flags = ((map)->flags | (set)) & ~(clear);	\
341 		mtx_leave(&(map)->flags_lock);				\
342 	} while (0)
343 
344 
345 /*
346  * Tree describing entries by address.
347  *
348  * Addresses are unique.
349  * Entries with start == end may only exist if they are the first entry
350  * (sorted by address) within a free-memory tree.
351  */
352 
353 static inline int
354 uvm_mapentry_addrcmp(const struct vm_map_entry *e1,
355     const struct vm_map_entry *e2)
356 {
357 	return e1->start < e2->start ? -1 : e1->start > e2->start;
358 }
359 
360 /*
361  * Copy mapentry.
362  */
363 static __inline void
364 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
365 {
366 	caddr_t csrc, cdst;
367 	size_t sz;
368 
369 	csrc = (caddr_t)src;
370 	cdst = (caddr_t)dst;
371 	csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
372 	cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
373 
374 	sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) -
375 	    offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
376 	memcpy(cdst, csrc, sz);
377 }
378 
379 /*
380  * Handle free-list insertion.
381  */
382 void
383 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr,
384     struct vm_map_entry *entry)
385 {
386 	const struct uvm_addr_functions *fun;
387 #ifdef VMMAP_DEBUG
388 	vaddr_t min, max, bound;
389 #endif
390 
391 #ifdef VMMAP_DEBUG
392 	/*
393 	 * Boundary check.
394 	 * Boundaries are folded if they go on the same free list.
395 	 */
396 	min = VMMAP_FREE_START(entry);
397 	max = VMMAP_FREE_END(entry);
398 
399 	while (min < max) {
400 		bound = uvm_map_boundary(map, min, max);
401 		KASSERT(uvm_map_uaddr(map, min) == uaddr);
402 		min = bound;
403 	}
404 #endif
405 	KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0);
406 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0);
407 
408 	UVM_MAP_REQ_WRITE(map);
409 
410 	/* Actual insert: forward to uaddr pointer. */
411 	if (uaddr != NULL) {
412 		fun = uaddr->uaddr_functions;
413 		KDASSERT(fun != NULL);
414 		if (fun->uaddr_free_insert != NULL)
415 			(*fun->uaddr_free_insert)(map, uaddr, entry);
416 		entry->etype |= UVM_ET_FREEMAPPED;
417 	}
418 
419 	/* Update fspace augmentation. */
420 	uvm_map_addr_augment(entry);
421 }
422 
423 /*
424  * Handle free-list removal.
425  */
426 void
427 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr,
428     struct vm_map_entry *entry)
429 {
430 	const struct uvm_addr_functions *fun;
431 
432 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL);
433 	KASSERT(uvm_map_uaddr_e(map, entry) == uaddr);
434 	UVM_MAP_REQ_WRITE(map);
435 
436 	if (uaddr != NULL) {
437 		fun = uaddr->uaddr_functions;
438 		if (fun->uaddr_free_remove != NULL)
439 			(*fun->uaddr_free_remove)(map, uaddr, entry);
440 		entry->etype &= ~UVM_ET_FREEMAPPED;
441 	}
442 }
443 
444 /*
445  * Handle address tree insertion.
446  */
447 void
448 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry)
449 {
450 	struct vm_map_entry *res;
451 
452 	if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF))
453 		panic("uvm_mapent_addr_insert: entry still in addr list");
454 	KDASSERT(entry->start <= entry->end);
455 	KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&
456 	    (entry->end & (vaddr_t)PAGE_MASK) == 0);
457 
458 	UVM_MAP_REQ_WRITE(map);
459 	res = RBT_INSERT(uvm_map_addr, &map->addr, entry);
460 	if (res != NULL) {
461 		panic("uvm_mapent_addr_insert: map %p entry %p "
462 		    "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision "
463 		    "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)",
464 		    map, entry,
465 		    entry->start, entry->end, entry->guard, entry->fspace,
466 		    res, res->start, res->end, res->guard, res->fspace);
467 	}
468 }
469 
470 /*
471  * Handle address tree removal.
472  */
473 void
474 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)
475 {
476 	struct vm_map_entry *res;
477 
478 	UVM_MAP_REQ_WRITE(map);
479 	res = RBT_REMOVE(uvm_map_addr, &map->addr, entry);
480 	if (res != entry)
481 		panic("uvm_mapent_addr_remove");
482 	RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF);
483 }
484 
485 /*
486  * uvm_map_reference: add reference to a map
487  *
488  * XXX check map reference counter lock
489  */
490 #define uvm_map_reference(_map)						\
491 	do {								\
492 		map->ref_count++;					\
493 	} while (0)
494 
495 /*
496  * Calculate the dused delta.
497  */
498 vsize_t
499 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max)
500 {
501 	struct vmspace *vm;
502 	vsize_t sz;
503 	vaddr_t lmax;
504 	vaddr_t stack_begin, stack_end; /* Position of stack. */
505 
506 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
507 	vm = (struct vmspace *)map;
508 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
509 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
510 
511 	sz = 0;
512 	while (min != max) {
513 		lmax = max;
514 		if (min < stack_begin && lmax > stack_begin)
515 			lmax = stack_begin;
516 		else if (min < stack_end && lmax > stack_end)
517 			lmax = stack_end;
518 
519 		if (min >= stack_begin && min < stack_end) {
520 			/* nothing */
521 		} else
522 			sz += lmax - min;
523 		min = lmax;
524 	}
525 
526 	return sz >> PAGE_SHIFT;
527 }
528 
529 /*
530  * Find the entry describing the given address.
531  */
532 struct vm_map_entry*
533 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr)
534 {
535 	struct vm_map_entry *iter;
536 
537 	iter = RBT_ROOT(uvm_map_addr, atree);
538 	while (iter != NULL) {
539 		if (iter->start > addr)
540 			iter = RBT_LEFT(uvm_map_addr, iter);
541 		else if (VMMAP_FREE_END(iter) <= addr)
542 			iter = RBT_RIGHT(uvm_map_addr, iter);
543 		else
544 			return iter;
545 	}
546 	return NULL;
547 }
548 
549 /*
550  * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry)
551  *
552  * Push dead entries into a linked list.
553  * Since the linked list abuses the address tree for storage, the entry
554  * may not be linked in a map.
555  *
556  * *head must be initialized to NULL before the first call to this macro.
557  * uvm_unmap_detach(*head, 0) will remove dead entries.
558  */
559 static __inline void
560 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry)
561 {
562 	TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq);
563 }
564 #define DEAD_ENTRY_PUSH(_headptr, _entry)				\
565 	dead_entry_push((_headptr), (_entry))
566 
567 /*
568  * Helper function for uvm_map_findspace_tree.
569  *
570  * Given allocation constraints and pmap constraints, finds the
571  * lowest and highest address in a range that can be used for the
572  * allocation.
573  *
574  * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs.
575  *
576  *
577  * Big chunk of math with a seasoning of dragons.
578  */
579 int
580 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg,
581     struct vm_map_entry *sel, vaddr_t align,
582     vaddr_t pmap_align, vaddr_t pmap_off, int bias)
583 {
584 	vaddr_t sel_min, sel_max;
585 #ifdef PMAP_PREFER
586 	vaddr_t pmap_min, pmap_max;
587 #endif /* PMAP_PREFER */
588 #ifdef DIAGNOSTIC
589 	int bad;
590 #endif /* DIAGNOSTIC */
591 
592 	sel_min = VMMAP_FREE_START(sel);
593 	sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0);
594 
595 #ifdef PMAP_PREFER
596 
597 	/*
598 	 * There are two special cases, in which we can satisfy the align
599 	 * requirement and the pmap_prefer requirement.
600 	 * - when pmap_off == 0, we always select the largest of the two
601 	 * - when pmap_off % align == 0 and pmap_align > align, we simply
602 	 *   satisfy the pmap_align requirement and automatically
603 	 *   satisfy the align requirement.
604 	 */
605 	if (align > PAGE_SIZE &&
606 	    !(pmap_align > align && (pmap_off & (align - 1)) == 0)) {
607 		/*
608 		 * Simple case: only use align.
609 		 */
610 		sel_min = roundup(sel_min, align);
611 		sel_max &= ~(align - 1);
612 
613 		if (sel_min > sel_max)
614 			return ENOMEM;
615 
616 		/* Correct for bias. */
617 		if (sel_max - sel_min > FSPACE_BIASGAP) {
618 			if (bias > 0) {
619 				sel_min = sel_max - FSPACE_BIASGAP;
620 				sel_min = roundup(sel_min, align);
621 			} else if (bias < 0) {
622 				sel_max = sel_min + FSPACE_BIASGAP;
623 				sel_max &= ~(align - 1);
624 			}
625 		}
626 	} else if (pmap_align != 0) {
627 		/*
628 		 * Special case: satisfy both pmap_prefer and
629 		 * align argument.
630 		 */
631 		pmap_max = sel_max & ~(pmap_align - 1);
632 		pmap_min = sel_min;
633 		if (pmap_max < sel_min)
634 			return ENOMEM;
635 
636 		/* Adjust pmap_min for BIASGAP for top-addr bias. */
637 		if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP)
638 			pmap_min = pmap_max - FSPACE_BIASGAP;
639 		/* Align pmap_min. */
640 		pmap_min &= ~(pmap_align - 1);
641 		if (pmap_min < sel_min)
642 			pmap_min += pmap_align;
643 		if (pmap_min > pmap_max)
644 			return ENOMEM;
645 
646 		/* Adjust pmap_max for BIASGAP for bottom-addr bias. */
647 		if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) {
648 			pmap_max = (pmap_min + FSPACE_BIASGAP) &
649 			    ~(pmap_align - 1);
650 		}
651 		if (pmap_min > pmap_max)
652 			return ENOMEM;
653 
654 		/* Apply pmap prefer offset. */
655 		pmap_max |= pmap_off;
656 		if (pmap_max > sel_max)
657 			pmap_max -= pmap_align;
658 		pmap_min |= pmap_off;
659 		if (pmap_min < sel_min)
660 			pmap_min += pmap_align;
661 
662 		/*
663 		 * Fixup: it's possible that pmap_min and pmap_max
664 		 * cross eachother. In this case, try to find one
665 		 * address that is allowed.
666 		 * (This usually happens in biased case.)
667 		 */
668 		if (pmap_min > pmap_max) {
669 			if (pmap_min < sel_max)
670 				pmap_max = pmap_min;
671 			else if (pmap_max > sel_min)
672 				pmap_min = pmap_max;
673 			else
674 				return ENOMEM;
675 		}
676 
677 		/* Internal validation. */
678 		KDASSERT(pmap_min <= pmap_max);
679 
680 		sel_min = pmap_min;
681 		sel_max = pmap_max;
682 	} else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
683 		sel_min = sel_max - FSPACE_BIASGAP;
684 	else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
685 		sel_max = sel_min + FSPACE_BIASGAP;
686 
687 #else
688 
689 	if (align > PAGE_SIZE) {
690 		sel_min = roundup(sel_min, align);
691 		sel_max &= ~(align - 1);
692 		if (sel_min > sel_max)
693 			return ENOMEM;
694 
695 		if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) {
696 			if (bias > 0) {
697 				sel_min = roundup(sel_max - FSPACE_BIASGAP,
698 				    align);
699 			} else {
700 				sel_max = (sel_min + FSPACE_BIASGAP) &
701 				    ~(align - 1);
702 			}
703 		}
704 	} else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
705 		sel_min = sel_max - FSPACE_BIASGAP;
706 	else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
707 		sel_max = sel_min + FSPACE_BIASGAP;
708 
709 #endif
710 
711 	if (sel_min > sel_max)
712 		return ENOMEM;
713 
714 #ifdef DIAGNOSTIC
715 	bad = 0;
716 	/* Lower boundary check. */
717 	if (sel_min < VMMAP_FREE_START(sel)) {
718 		printf("sel_min: 0x%lx, but should be at least 0x%lx\n",
719 		    sel_min, VMMAP_FREE_START(sel));
720 		bad++;
721 	}
722 	/* Upper boundary check. */
723 	if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) {
724 		printf("sel_max: 0x%lx, but should be at most 0x%lx\n",
725 		    sel_max,
726 		    VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0));
727 		bad++;
728 	}
729 	/* Lower boundary alignment. */
730 	if (align != 0 && (sel_min & (align - 1)) != 0) {
731 		printf("sel_min: 0x%lx, not aligned to 0x%lx\n",
732 		    sel_min, align);
733 		bad++;
734 	}
735 	/* Upper boundary alignment. */
736 	if (align != 0 && (sel_max & (align - 1)) != 0) {
737 		printf("sel_max: 0x%lx, not aligned to 0x%lx\n",
738 		    sel_max, align);
739 		bad++;
740 	}
741 	/* Lower boundary PMAP_PREFER check. */
742 	if (pmap_align != 0 && align == 0 &&
743 	    (sel_min & (pmap_align - 1)) != pmap_off) {
744 		printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
745 		    sel_min, sel_min & (pmap_align - 1), pmap_off);
746 		bad++;
747 	}
748 	/* Upper boundary PMAP_PREFER check. */
749 	if (pmap_align != 0 && align == 0 &&
750 	    (sel_max & (pmap_align - 1)) != pmap_off) {
751 		printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
752 		    sel_max, sel_max & (pmap_align - 1), pmap_off);
753 		bad++;
754 	}
755 
756 	if (bad) {
757 		panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, "
758 		    "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, "
759 		    "bias = %d, "
760 		    "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)",
761 		    sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off,
762 		    bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel));
763 	}
764 #endif /* DIAGNOSTIC */
765 
766 	*min = sel_min;
767 	*max = sel_max;
768 	return 0;
769 }
770 
771 /*
772  * Test if memory starting at addr with sz bytes is free.
773  *
774  * Fills in *start_ptr and *end_ptr to be the first and last entry describing
775  * the space.
776  * If called with prefilled *start_ptr and *end_ptr, they are to be correct.
777  */
778 int
779 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr,
780     struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr,
781     vaddr_t addr, vsize_t sz)
782 {
783 	struct uvm_addr_state *free;
784 	struct uvm_map_addr *atree;
785 	struct vm_map_entry *i, *i_end;
786 
787 	if (addr + sz < addr)
788 		return 0;
789 
790 	/*
791 	 * Kernel memory above uvm_maxkaddr is considered unavailable.
792 	 */
793 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
794 		if (addr + sz > uvm_maxkaddr)
795 			return 0;
796 	}
797 
798 	atree = &map->addr;
799 
800 	/*
801 	 * Fill in first, last, so they point at the entries containing the
802 	 * first and last address of the range.
803 	 * Note that if they are not NULL, we don't perform the lookup.
804 	 */
805 	KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL);
806 	if (*start_ptr == NULL) {
807 		*start_ptr = uvm_map_entrybyaddr(atree, addr);
808 		if (*start_ptr == NULL)
809 			return 0;
810 	} else
811 		KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr));
812 	if (*end_ptr == NULL) {
813 		if (VMMAP_FREE_END(*start_ptr) >= addr + sz)
814 			*end_ptr = *start_ptr;
815 		else {
816 			*end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1);
817 			if (*end_ptr == NULL)
818 				return 0;
819 		}
820 	} else
821 		KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1));
822 
823 	/* Validation. */
824 	KDASSERT(*start_ptr != NULL && *end_ptr != NULL);
825 	KDASSERT((*start_ptr)->start <= addr &&
826 	    VMMAP_FREE_END(*start_ptr) > addr &&
827 	    (*end_ptr)->start < addr + sz &&
828 	    VMMAP_FREE_END(*end_ptr) >= addr + sz);
829 
830 	/*
831 	 * Check the none of the entries intersects with <addr, addr+sz>.
832 	 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is
833 	 * considered unavailable unless called by those allocators.
834 	 */
835 	i = *start_ptr;
836 	i_end = RBT_NEXT(uvm_map_addr, *end_ptr);
837 	for (; i != i_end;
838 	    i = RBT_NEXT(uvm_map_addr, i)) {
839 		if (i->start != i->end && i->end > addr)
840 			return 0;
841 
842 		/*
843 		 * uaddr_exe and uaddr_brk_stack may only be used
844 		 * by these allocators and the NULL uaddr (i.e. no
845 		 * uaddr).
846 		 * Reject if this requirement is not met.
847 		 */
848 		if (uaddr != NULL) {
849 			free = uvm_map_uaddr_e(map, i);
850 
851 			if (uaddr != free && free != NULL &&
852 			    (free == map->uaddr_exe ||
853 			     free == map->uaddr_brk_stack))
854 				return 0;
855 		}
856 	}
857 
858 	return -1;
859 }
860 
861 /*
862  * Invoke each address selector until an address is found.
863  * Will not invoke uaddr_exe.
864  */
865 int
866 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first,
867     struct vm_map_entry**last, vaddr_t *addr, vsize_t sz,
868     vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint)
869 {
870 	struct uvm_addr_state *uaddr;
871 	int i;
872 
873 	/*
874 	 * Allocation for sz bytes at any address,
875 	 * using the addr selectors in order.
876 	 */
877 	for (i = 0; i < nitems(map->uaddr_any); i++) {
878 		uaddr = map->uaddr_any[i];
879 
880 		if (uvm_addr_invoke(map, uaddr, first, last,
881 		    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
882 			return 0;
883 	}
884 
885 	/* Fall back to brk() and stack() address selectors. */
886 	uaddr = map->uaddr_brk_stack;
887 	if (uvm_addr_invoke(map, uaddr, first, last,
888 	    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
889 		return 0;
890 
891 	return ENOMEM;
892 }
893 
894 /* Calculate entry augmentation value. */
895 vsize_t
896 uvm_map_addr_augment_get(struct vm_map_entry *entry)
897 {
898 	vsize_t			 augment;
899 	struct vm_map_entry	*left, *right;
900 
901 	augment = entry->fspace;
902 	if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL)
903 		augment = MAX(augment, left->fspace_augment);
904 	if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
905 		augment = MAX(augment, right->fspace_augment);
906 	return augment;
907 }
908 
909 /*
910  * Update augmentation data in entry.
911  */
912 void
913 uvm_map_addr_augment(struct vm_map_entry *entry)
914 {
915 	vsize_t			 augment;
916 
917 	while (entry != NULL) {
918 		/* Calculate value for augmentation. */
919 		augment = uvm_map_addr_augment_get(entry);
920 
921 		/*
922 		 * Descend update.
923 		 * Once we find an entry that already has the correct value,
924 		 * stop, since it means all its parents will use the correct
925 		 * value too.
926 		 */
927 		if (entry->fspace_augment == augment)
928 			return;
929 		entry->fspace_augment = augment;
930 		entry = RBT_PARENT(uvm_map_addr, entry);
931 	}
932 }
933 
934 /*
935  * uvm_mapanon: establish a valid mapping in map for an anon
936  *
937  * => *addr and sz must be a multiple of PAGE_SIZE.
938  * => *addr is ignored, except if flags contains UVM_FLAG_FIXED.
939  * => map must be unlocked.
940  *
941  * => align: align vaddr, must be a power-of-2.
942  *    Align is only a hint and will be ignored if the alignment fails.
943  */
944 int
945 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz,
946     vsize_t align, unsigned int flags)
947 {
948 	struct vm_map_entry	*first, *last, *entry, *new;
949 	struct uvm_map_deadq	 dead;
950 	vm_prot_t		 prot;
951 	vm_prot_t		 maxprot;
952 	vm_inherit_t		 inherit;
953 	int			 advice;
954 	int			 error;
955 	vaddr_t			 pmap_align, pmap_offset;
956 	vaddr_t			 hint;
957 
958 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE);
959 	KASSERT(map != kernel_map);
960 	KASSERT((map->flags & UVM_FLAG_HOLE) == 0);
961 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
962 	splassert(IPL_NONE);
963 	KASSERT((flags & UVM_FLAG_TRYLOCK) == 0);
964 
965 	/*
966 	 * We use pmap_align and pmap_offset as alignment and offset variables.
967 	 *
968 	 * Because the align parameter takes precedence over pmap prefer,
969 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
970 	 * if pmap_prefer will not align.
971 	 */
972 	pmap_align = MAX(align, PAGE_SIZE);
973 	pmap_offset = 0;
974 
975 	/* Decode parameters. */
976 	prot = UVM_PROTECTION(flags);
977 	maxprot = UVM_MAXPROTECTION(flags);
978 	advice = UVM_ADVICE(flags);
979 	inherit = UVM_INHERIT(flags);
980 	error = 0;
981 	hint = trunc_page(*addr);
982 	TAILQ_INIT(&dead);
983 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
984 	KASSERT((align & (align - 1)) == 0);
985 
986 	/* Check protection. */
987 	if ((prot & maxprot) != prot)
988 		return EACCES;
989 
990 	/*
991 	 * Before grabbing the lock, allocate a map entry for later
992 	 * use to ensure we don't wait for memory while holding the
993 	 * vm_map_lock.
994 	 */
995 	new = uvm_mapent_alloc(map, flags);
996 	if (new == NULL)
997 		return(ENOMEM);
998 
999 	vm_map_lock(map);
1000 	first = last = NULL;
1001 	if (flags & UVM_FLAG_FIXED) {
1002 		/*
1003 		 * Fixed location.
1004 		 *
1005 		 * Note: we ignore align, pmap_prefer.
1006 		 * Fill in first, last and *addr.
1007 		 */
1008 		KASSERT((*addr & PAGE_MASK) == 0);
1009 
1010 		/* Check that the space is available. */
1011 		if (flags & UVM_FLAG_UNMAP) {
1012 			if ((flags & UVM_FLAG_STACK) &&
1013 			    !uvm_map_is_stack_remappable(map, *addr, sz)) {
1014 				error = EINVAL;
1015 				goto unlock;
1016 			}
1017 			uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE);
1018 		}
1019 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1020 			error = ENOMEM;
1021 			goto unlock;
1022 		}
1023 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1024 	    (align == 0 || (*addr & (align - 1)) == 0) &&
1025 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1026 		/*
1027 		 * Address used as hint.
1028 		 *
1029 		 * Note: we enforce the alignment restriction,
1030 		 * but ignore pmap_prefer.
1031 		 */
1032 	} else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1033 		/* Run selection algorithm for executables. */
1034 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1035 		    addr, sz, pmap_align, pmap_offset, prot, hint);
1036 
1037 		if (error != 0)
1038 			goto unlock;
1039 	} else {
1040 		/* Update freelists from vmspace. */
1041 		uvm_map_vmspace_update(map, &dead, flags);
1042 
1043 		error = uvm_map_findspace(map, &first, &last, addr, sz,
1044 		    pmap_align, pmap_offset, prot, hint);
1045 
1046 		if (error != 0)
1047 			goto unlock;
1048 	}
1049 
1050 	/* Double-check if selected address doesn't cause overflow. */
1051 	if (*addr + sz < *addr) {
1052 		error = ENOMEM;
1053 		goto unlock;
1054 	}
1055 
1056 	/* If we only want a query, return now. */
1057 	if (flags & UVM_FLAG_QUERY) {
1058 		error = 0;
1059 		goto unlock;
1060 	}
1061 
1062 	/*
1063 	 * Create new entry.
1064 	 * first and last may be invalidated after this call.
1065 	 */
1066 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1067 	    new);
1068 	if (entry == NULL) {
1069 		error = ENOMEM;
1070 		goto unlock;
1071 	}
1072 	new = NULL;
1073 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1074 	entry->object.uvm_obj = NULL;
1075 	entry->offset = 0;
1076 	entry->protection = prot;
1077 	entry->max_protection = maxprot;
1078 	entry->inheritance = inherit;
1079 	entry->wired_count = 0;
1080 	entry->advice = advice;
1081 	if (prot & PROT_WRITE)
1082 		map->wserial++;
1083 	if (flags & UVM_FLAG_SYSCALL) {
1084 		entry->etype |= UVM_ET_SYSCALL;
1085 		map->wserial++;
1086 	}
1087 	if (flags & UVM_FLAG_STACK) {
1088 		entry->etype |= UVM_ET_STACK;
1089 		if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP))
1090 			map->sserial++;
1091 	}
1092 	if (flags & UVM_FLAG_COPYONW) {
1093 		entry->etype |= UVM_ET_COPYONWRITE;
1094 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1095 			entry->etype |= UVM_ET_NEEDSCOPY;
1096 	}
1097 	if (flags & UVM_FLAG_CONCEAL)
1098 		entry->etype |= UVM_ET_CONCEAL;
1099 	if (flags & UVM_FLAG_OVERLAY) {
1100 		KERNEL_LOCK();
1101 		entry->aref.ar_pageoff = 0;
1102 		entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1103 		KERNEL_UNLOCK();
1104 	}
1105 
1106 	/* Update map and process statistics. */
1107 	map->size += sz;
1108 	if (prot != PROT_NONE) {
1109 		((struct vmspace *)map)->vm_dused +=
1110 		    uvmspace_dused(map, *addr, *addr + sz);
1111 	}
1112 
1113 unlock:
1114 	vm_map_unlock(map);
1115 
1116 	/*
1117 	 * Remove dead entries.
1118 	 *
1119 	 * Dead entries may be the result of merging.
1120 	 * uvm_map_mkentry may also create dead entries, when it attempts to
1121 	 * destroy free-space entries.
1122 	 */
1123 	uvm_unmap_detach(&dead, 0);
1124 
1125 	if (new)
1126 		uvm_mapent_free(new);
1127 	return error;
1128 }
1129 
1130 /*
1131  * uvm_map: establish a valid mapping in map
1132  *
1133  * => *addr and sz must be a multiple of PAGE_SIZE.
1134  * => map must be unlocked.
1135  * => <uobj,uoffset> value meanings (4 cases):
1136  *	[1] <NULL,uoffset>		== uoffset is a hint for PMAP_PREFER
1137  *	[2] <NULL,UVM_UNKNOWN_OFFSET>	== don't PMAP_PREFER
1138  *	[3] <uobj,uoffset>		== normal mapping
1139  *	[4] <uobj,UVM_UNKNOWN_OFFSET>	== uvm_map finds offset based on VA
1140  *
1141  *   case [4] is for kernel mappings where we don't know the offset until
1142  *   we've found a virtual address.   note that kernel object offsets are
1143  *   always relative to vm_map_min(kernel_map).
1144  *
1145  * => align: align vaddr, must be a power-of-2.
1146  *    Align is only a hint and will be ignored if the alignment fails.
1147  */
1148 int
1149 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,
1150     struct uvm_object *uobj, voff_t uoffset,
1151     vsize_t align, unsigned int flags)
1152 {
1153 	struct vm_map_entry	*first, *last, *entry, *new;
1154 	struct uvm_map_deadq	 dead;
1155 	vm_prot_t		 prot;
1156 	vm_prot_t		 maxprot;
1157 	vm_inherit_t		 inherit;
1158 	int			 advice;
1159 	int			 error;
1160 	vaddr_t			 pmap_align, pmap_offset;
1161 	vaddr_t			 hint;
1162 
1163 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
1164 		splassert(IPL_NONE);
1165 	else
1166 		splassert(IPL_VM);
1167 
1168 	/*
1169 	 * We use pmap_align and pmap_offset as alignment and offset variables.
1170 	 *
1171 	 * Because the align parameter takes precedence over pmap prefer,
1172 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
1173 	 * if pmap_prefer will not align.
1174 	 */
1175 	if (uoffset == UVM_UNKNOWN_OFFSET) {
1176 		pmap_align = MAX(align, PAGE_SIZE);
1177 		pmap_offset = 0;
1178 	} else {
1179 		pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE);
1180 		pmap_offset = PMAP_PREFER_OFFSET(uoffset);
1181 
1182 		if (align == 0 ||
1183 		    (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) {
1184 			/* pmap_offset satisfies align, no change. */
1185 		} else {
1186 			/* Align takes precedence over pmap prefer. */
1187 			pmap_align = align;
1188 			pmap_offset = 0;
1189 		}
1190 	}
1191 
1192 	/* Decode parameters. */
1193 	prot = UVM_PROTECTION(flags);
1194 	maxprot = UVM_MAXPROTECTION(flags);
1195 	advice = UVM_ADVICE(flags);
1196 	inherit = UVM_INHERIT(flags);
1197 	error = 0;
1198 	hint = trunc_page(*addr);
1199 	TAILQ_INIT(&dead);
1200 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
1201 	KASSERT((align & (align - 1)) == 0);
1202 
1203 	/* Holes are incompatible with other types of mappings. */
1204 	if (flags & UVM_FLAG_HOLE) {
1205 		KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) &&
1206 		    (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
1207 	}
1208 
1209 	/* Unset hint for kernel_map non-fixed allocations. */
1210 	if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED))
1211 		hint = 0;
1212 
1213 	/* Check protection. */
1214 	if ((prot & maxprot) != prot)
1215 		return EACCES;
1216 
1217 	if (map == kernel_map &&
1218 	    (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
1219 		panic("uvm_map: kernel map W^X violation requested");
1220 
1221 	/*
1222 	 * Before grabbing the lock, allocate a map entry for later
1223 	 * use to ensure we don't wait for memory while holding the
1224 	 * vm_map_lock.
1225 	 */
1226 	new = uvm_mapent_alloc(map, flags);
1227 	if (new == NULL)
1228 		return(ENOMEM);
1229 
1230 	if (flags & UVM_FLAG_TRYLOCK) {
1231 		if (vm_map_lock_try(map) == FALSE) {
1232 			error = EFAULT;
1233 			goto out;
1234 		}
1235 	} else {
1236 		vm_map_lock(map);
1237 	}
1238 
1239 	first = last = NULL;
1240 	if (flags & UVM_FLAG_FIXED) {
1241 		/*
1242 		 * Fixed location.
1243 		 *
1244 		 * Note: we ignore align, pmap_prefer.
1245 		 * Fill in first, last and *addr.
1246 		 */
1247 		KASSERT((*addr & PAGE_MASK) == 0);
1248 
1249 		/*
1250 		 * Grow pmap to include allocated address.
1251 		 * If the growth fails, the allocation will fail too.
1252 		 */
1253 		if ((map->flags & VM_MAP_ISVMSPACE) == 0 &&
1254 		    uvm_maxkaddr < (*addr + sz)) {
1255 			uvm_map_kmem_grow(map, &dead,
1256 			    *addr + sz - uvm_maxkaddr, flags);
1257 		}
1258 
1259 		/* Check that the space is available. */
1260 		if (flags & UVM_FLAG_UNMAP)
1261 			uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE);
1262 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1263 			error = ENOMEM;
1264 			goto unlock;
1265 		}
1266 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1267 	    (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE &&
1268 	    (align == 0 || (*addr & (align - 1)) == 0) &&
1269 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1270 		/*
1271 		 * Address used as hint.
1272 		 *
1273 		 * Note: we enforce the alignment restriction,
1274 		 * but ignore pmap_prefer.
1275 		 */
1276 	} else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1277 		/* Run selection algorithm for executables. */
1278 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1279 		    addr, sz, pmap_align, pmap_offset, prot, hint);
1280 
1281 		/* Grow kernel memory and try again. */
1282 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1283 			uvm_map_kmem_grow(map, &dead, sz, flags);
1284 
1285 			error = uvm_addr_invoke(map, map->uaddr_exe,
1286 			    &first, &last, addr, sz,
1287 			    pmap_align, pmap_offset, prot, hint);
1288 		}
1289 
1290 		if (error != 0)
1291 			goto unlock;
1292 	} else {
1293 		/* Update freelists from vmspace. */
1294 		if (map->flags & VM_MAP_ISVMSPACE)
1295 			uvm_map_vmspace_update(map, &dead, flags);
1296 
1297 		error = uvm_map_findspace(map, &first, &last, addr, sz,
1298 		    pmap_align, pmap_offset, prot, hint);
1299 
1300 		/* Grow kernel memory and try again. */
1301 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1302 			uvm_map_kmem_grow(map, &dead, sz, flags);
1303 
1304 			error = uvm_map_findspace(map, &first, &last, addr, sz,
1305 			    pmap_align, pmap_offset, prot, hint);
1306 		}
1307 
1308 		if (error != 0)
1309 			goto unlock;
1310 	}
1311 
1312 	/* Double-check if selected address doesn't cause overflow. */
1313 	if (*addr + sz < *addr) {
1314 		error = ENOMEM;
1315 		goto unlock;
1316 	}
1317 
1318 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE ||
1319 	    uvm_maxkaddr >= *addr + sz);
1320 
1321 	/* If we only want a query, return now. */
1322 	if (flags & UVM_FLAG_QUERY) {
1323 		error = 0;
1324 		goto unlock;
1325 	}
1326 
1327 	if (uobj == NULL)
1328 		uoffset = 0;
1329 	else if (uoffset == UVM_UNKNOWN_OFFSET) {
1330 		KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
1331 		uoffset = *addr - vm_map_min(kernel_map);
1332 	}
1333 
1334 	/*
1335 	 * Create new entry.
1336 	 * first and last may be invalidated after this call.
1337 	 */
1338 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1339 	    new);
1340 	if (entry == NULL) {
1341 		error = ENOMEM;
1342 		goto unlock;
1343 	}
1344 	new = NULL;
1345 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1346 	entry->object.uvm_obj = uobj;
1347 	entry->offset = uoffset;
1348 	entry->protection = prot;
1349 	entry->max_protection = maxprot;
1350 	entry->inheritance = inherit;
1351 	entry->wired_count = 0;
1352 	entry->advice = advice;
1353 	if (prot & PROT_WRITE)
1354 		map->wserial++;
1355 	if (flags & UVM_FLAG_SYSCALL) {
1356 		entry->etype |= UVM_ET_SYSCALL;
1357 		map->wserial++;
1358 	}
1359 	if (flags & UVM_FLAG_STACK) {
1360 		entry->etype |= UVM_ET_STACK;
1361 		if (flags & UVM_FLAG_UNMAP)
1362 			map->sserial++;
1363 	}
1364 	if (uobj)
1365 		entry->etype |= UVM_ET_OBJ;
1366 	else if (flags & UVM_FLAG_HOLE)
1367 		entry->etype |= UVM_ET_HOLE;
1368 	if (flags & UVM_FLAG_NOFAULT)
1369 		entry->etype |= UVM_ET_NOFAULT;
1370 	if (flags & UVM_FLAG_WC)
1371 		entry->etype |= UVM_ET_WC;
1372 	if (flags & UVM_FLAG_COPYONW) {
1373 		entry->etype |= UVM_ET_COPYONWRITE;
1374 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1375 			entry->etype |= UVM_ET_NEEDSCOPY;
1376 	}
1377 	if (flags & UVM_FLAG_CONCEAL)
1378 		entry->etype |= UVM_ET_CONCEAL;
1379 	if (flags & UVM_FLAG_OVERLAY) {
1380 		entry->aref.ar_pageoff = 0;
1381 		entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1382 	}
1383 
1384 	/* Update map and process statistics. */
1385 	if (!(flags & UVM_FLAG_HOLE)) {
1386 		map->size += sz;
1387 		if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL &&
1388 		    prot != PROT_NONE) {
1389 			((struct vmspace *)map)->vm_dused +=
1390 			    uvmspace_dused(map, *addr, *addr + sz);
1391 		}
1392 	}
1393 
1394 	/*
1395 	 * Try to merge entry.
1396 	 *
1397 	 * Userland allocations are kept separated most of the time.
1398 	 * Forego the effort of merging what most of the time can't be merged
1399 	 * and only try the merge if it concerns a kernel entry.
1400 	 */
1401 	if ((flags & UVM_FLAG_NOMERGE) == 0 &&
1402 	    (map->flags & VM_MAP_ISVMSPACE) == 0)
1403 		uvm_mapent_tryjoin(map, entry, &dead);
1404 
1405 unlock:
1406 	vm_map_unlock(map);
1407 
1408 	/*
1409 	 * Remove dead entries.
1410 	 *
1411 	 * Dead entries may be the result of merging.
1412 	 * uvm_map_mkentry may also create dead entries, when it attempts to
1413 	 * destroy free-space entries.
1414 	 */
1415 	if (map->flags & VM_MAP_INTRSAFE)
1416 		uvm_unmap_detach_intrsafe(&dead);
1417 	else
1418 		uvm_unmap_detach(&dead, 0);
1419 out:
1420 	if (new)
1421 		uvm_mapent_free(new);
1422 	return error;
1423 }
1424 
1425 /*
1426  * True iff e1 and e2 can be joined together.
1427  */
1428 int
1429 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1,
1430     struct vm_map_entry *e2)
1431 {
1432 	KDASSERT(e1 != NULL && e2 != NULL);
1433 
1434 	/* Must be the same entry type and not have free memory between. */
1435 	if (e1->etype != e2->etype || e1->end != e2->start)
1436 		return 0;
1437 
1438 	/* Submaps are never joined. */
1439 	if (UVM_ET_ISSUBMAP(e1))
1440 		return 0;
1441 
1442 	/* Never merge wired memory. */
1443 	if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2))
1444 		return 0;
1445 
1446 	/* Protection, inheritance and advice must be equal. */
1447 	if (e1->protection != e2->protection ||
1448 	    e1->max_protection != e2->max_protection ||
1449 	    e1->inheritance != e2->inheritance ||
1450 	    e1->advice != e2->advice)
1451 		return 0;
1452 
1453 	/* If uvm_object: object itself and offsets within object must match. */
1454 	if (UVM_ET_ISOBJ(e1)) {
1455 		if (e1->object.uvm_obj != e2->object.uvm_obj)
1456 			return 0;
1457 		if (e1->offset + (e1->end - e1->start) != e2->offset)
1458 			return 0;
1459 	}
1460 
1461 	/*
1462 	 * Cannot join shared amaps.
1463 	 * Note: no need to lock amap to look at refs, since we don't care
1464 	 * about its exact value.
1465 	 * If it is 1 (i.e. we have the only reference) it will stay there.
1466 	 */
1467 	if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1)
1468 		return 0;
1469 	if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1)
1470 		return 0;
1471 
1472 	/* Apprently, e1 and e2 match. */
1473 	return 1;
1474 }
1475 
1476 /*
1477  * Join support function.
1478  *
1479  * Returns the merged entry on succes.
1480  * Returns NULL if the merge failed.
1481  */
1482 struct vm_map_entry*
1483 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1,
1484     struct vm_map_entry *e2, struct uvm_map_deadq *dead)
1485 {
1486 	struct uvm_addr_state *free;
1487 
1488 	/*
1489 	 * Merging is not supported for map entries that
1490 	 * contain an amap in e1. This should never happen
1491 	 * anyway, because only kernel entries are merged.
1492 	 * These do not contain amaps.
1493 	 * e2 contains no real information in its amap,
1494 	 * so it can be erased immediately.
1495 	 */
1496 	KASSERT(e1->aref.ar_amap == NULL);
1497 
1498 	/*
1499 	 * Don't drop obj reference:
1500 	 * uvm_unmap_detach will do this for us.
1501 	 */
1502 	free = uvm_map_uaddr_e(map, e1);
1503 	uvm_mapent_free_remove(map, free, e1);
1504 
1505 	free = uvm_map_uaddr_e(map, e2);
1506 	uvm_mapent_free_remove(map, free, e2);
1507 	uvm_mapent_addr_remove(map, e2);
1508 	e1->end = e2->end;
1509 	e1->guard = e2->guard;
1510 	e1->fspace = e2->fspace;
1511 	uvm_mapent_free_insert(map, free, e1);
1512 
1513 	DEAD_ENTRY_PUSH(dead, e2);
1514 	return e1;
1515 }
1516 
1517 /*
1518  * Attempt forward and backward joining of entry.
1519  *
1520  * Returns entry after joins.
1521  * We are guaranteed that the amap of entry is either non-existent or
1522  * has never been used.
1523  */
1524 struct vm_map_entry*
1525 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry,
1526     struct uvm_map_deadq *dead)
1527 {
1528 	struct vm_map_entry *other;
1529 	struct vm_map_entry *merged;
1530 
1531 	/* Merge with previous entry. */
1532 	other = RBT_PREV(uvm_map_addr, entry);
1533 	if (other && uvm_mapent_isjoinable(map, other, entry)) {
1534 		merged = uvm_mapent_merge(map, other, entry, dead);
1535 		if (merged)
1536 			entry = merged;
1537 	}
1538 
1539 	/*
1540 	 * Merge with next entry.
1541 	 *
1542 	 * Because amap can only extend forward and the next entry
1543 	 * probably contains sensible info, only perform forward merging
1544 	 * in the absence of an amap.
1545 	 */
1546 	other = RBT_NEXT(uvm_map_addr, entry);
1547 	if (other && entry->aref.ar_amap == NULL &&
1548 	    other->aref.ar_amap == NULL &&
1549 	    uvm_mapent_isjoinable(map, entry, other)) {
1550 		merged = uvm_mapent_merge(map, entry, other, dead);
1551 		if (merged)
1552 			entry = merged;
1553 	}
1554 
1555 	return entry;
1556 }
1557 
1558 /*
1559  * Kill entries that are no longer in a map.
1560  */
1561 void
1562 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags)
1563 {
1564 	struct vm_map_entry *entry, *tmp;
1565 	int waitok = flags & UVM_PLA_WAITOK;
1566 
1567 	TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) {
1568 		/* Skip entries for which we have to grab the kernel lock. */
1569 		if (entry->aref.ar_amap || UVM_ET_ISSUBMAP(entry) ||
1570 		    UVM_ET_ISOBJ(entry))
1571 			continue;
1572 
1573 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1574 		uvm_mapent_free(entry);
1575 	}
1576 
1577 	if (TAILQ_EMPTY(deadq))
1578 		return;
1579 
1580 	KERNEL_LOCK();
1581 	while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1582 		if (waitok)
1583 			uvm_pause();
1584 		/* Drop reference to amap, if we've got one. */
1585 		if (entry->aref.ar_amap)
1586 			amap_unref(entry->aref.ar_amap,
1587 			    entry->aref.ar_pageoff,
1588 			    atop(entry->end - entry->start),
1589 			    flags & AMAP_REFALL);
1590 
1591 		/* Drop reference to our backing object, if we've got one. */
1592 		if (UVM_ET_ISSUBMAP(entry)) {
1593 			/* ... unlikely to happen, but play it safe */
1594 			uvm_map_deallocate(entry->object.sub_map);
1595 		} else if (UVM_ET_ISOBJ(entry) &&
1596 		    entry->object.uvm_obj->pgops->pgo_detach) {
1597 			entry->object.uvm_obj->pgops->pgo_detach(
1598 			    entry->object.uvm_obj);
1599 		}
1600 
1601 		/* Step to next. */
1602 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1603 		uvm_mapent_free(entry);
1604 	}
1605 	KERNEL_UNLOCK();
1606 }
1607 
1608 void
1609 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq)
1610 {
1611 	struct vm_map_entry *entry;
1612 
1613 	while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1614 		KASSERT(entry->aref.ar_amap == NULL);
1615 		KASSERT(!UVM_ET_ISSUBMAP(entry));
1616 		KASSERT(!UVM_ET_ISOBJ(entry));
1617 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1618 		uvm_mapent_free(entry);
1619 	}
1620 }
1621 
1622 /*
1623  * Create and insert new entry.
1624  *
1625  * Returned entry contains new addresses and is inserted properly in the tree.
1626  * first and last are (probably) no longer valid.
1627  */
1628 struct vm_map_entry*
1629 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first,
1630     struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags,
1631     struct uvm_map_deadq *dead, struct vm_map_entry *new)
1632 {
1633 	struct vm_map_entry *entry, *prev;
1634 	struct uvm_addr_state *free;
1635 	vaddr_t min, max;	/* free space boundaries for new entry */
1636 
1637 	KDASSERT(map != NULL);
1638 	KDASSERT(first != NULL);
1639 	KDASSERT(last != NULL);
1640 	KDASSERT(dead != NULL);
1641 	KDASSERT(sz > 0);
1642 	KDASSERT(addr + sz > addr);
1643 	KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr);
1644 	KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz);
1645 	KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz));
1646 	uvm_tree_sanity(map, __FILE__, __LINE__);
1647 
1648 	min = addr + sz;
1649 	max = VMMAP_FREE_END(last);
1650 
1651 	/* Initialize new entry. */
1652 	if (new == NULL)
1653 		entry = uvm_mapent_alloc(map, flags);
1654 	else
1655 		entry = new;
1656 	if (entry == NULL)
1657 		return NULL;
1658 	entry->offset = 0;
1659 	entry->etype = 0;
1660 	entry->wired_count = 0;
1661 	entry->aref.ar_pageoff = 0;
1662 	entry->aref.ar_amap = NULL;
1663 
1664 	entry->start = addr;
1665 	entry->end = min;
1666 	entry->guard = 0;
1667 	entry->fspace = 0;
1668 
1669 	/* Reset free space in first. */
1670 	free = uvm_map_uaddr_e(map, first);
1671 	uvm_mapent_free_remove(map, free, first);
1672 	first->guard = 0;
1673 	first->fspace = 0;
1674 
1675 	/*
1676 	 * Remove all entries that are fully replaced.
1677 	 * We are iterating using last in reverse order.
1678 	 */
1679 	for (; first != last; last = prev) {
1680 		prev = RBT_PREV(uvm_map_addr, last);
1681 
1682 		KDASSERT(last->start == last->end);
1683 		free = uvm_map_uaddr_e(map, last);
1684 		uvm_mapent_free_remove(map, free, last);
1685 		uvm_mapent_addr_remove(map, last);
1686 		DEAD_ENTRY_PUSH(dead, last);
1687 	}
1688 	/* Remove first if it is entirely inside <addr, addr+sz>.  */
1689 	if (first->start == addr) {
1690 		uvm_mapent_addr_remove(map, first);
1691 		DEAD_ENTRY_PUSH(dead, first);
1692 	} else {
1693 		uvm_map_fix_space(map, first, VMMAP_FREE_START(first),
1694 		    addr, flags);
1695 	}
1696 
1697 	/* Finally, link in entry. */
1698 	uvm_mapent_addr_insert(map, entry);
1699 	uvm_map_fix_space(map, entry, min, max, flags);
1700 
1701 	uvm_tree_sanity(map, __FILE__, __LINE__);
1702 	return entry;
1703 }
1704 
1705 
1706 /*
1707  * uvm_mapent_alloc: allocate a map entry
1708  */
1709 struct vm_map_entry *
1710 uvm_mapent_alloc(struct vm_map *map, int flags)
1711 {
1712 	struct vm_map_entry *me, *ne;
1713 	int pool_flags;
1714 	int i;
1715 
1716 	pool_flags = PR_WAITOK;
1717 	if (flags & UVM_FLAG_TRYLOCK)
1718 		pool_flags = PR_NOWAIT;
1719 
1720 	if (map->flags & VM_MAP_INTRSAFE || cold) {
1721 		mtx_enter(&uvm_kmapent_mtx);
1722 		if (SLIST_EMPTY(&uvm.kentry_free)) {
1723 			ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
1724 			    &kd_nowait);
1725 			if (ne == NULL)
1726 				panic("uvm_mapent_alloc: cannot allocate map "
1727 				    "entry");
1728 			for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) {
1729 				SLIST_INSERT_HEAD(&uvm.kentry_free,
1730 				    &ne[i], daddrs.addr_kentry);
1731 			}
1732 			if (ratecheck(&uvm_kmapent_last_warn_time,
1733 			    &uvm_kmapent_warn_rate))
1734 				printf("uvm_mapent_alloc: out of static "
1735 				    "map entries\n");
1736 		}
1737 		me = SLIST_FIRST(&uvm.kentry_free);
1738 		SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry);
1739 		uvmexp.kmapent++;
1740 		mtx_leave(&uvm_kmapent_mtx);
1741 		me->flags = UVM_MAP_STATIC;
1742 	} else if (map == kernel_map) {
1743 		splassert(IPL_NONE);
1744 		me = pool_get(&uvm_map_entry_kmem_pool, pool_flags);
1745 		if (me == NULL)
1746 			goto out;
1747 		me->flags = UVM_MAP_KMEM;
1748 	} else {
1749 		splassert(IPL_NONE);
1750 		me = pool_get(&uvm_map_entry_pool, pool_flags);
1751 		if (me == NULL)
1752 			goto out;
1753 		me->flags = 0;
1754 	}
1755 
1756 	RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF);
1757 out:
1758 	return(me);
1759 }
1760 
1761 /*
1762  * uvm_mapent_free: free map entry
1763  *
1764  * => XXX: static pool for kernel map?
1765  */
1766 void
1767 uvm_mapent_free(struct vm_map_entry *me)
1768 {
1769 	if (me->flags & UVM_MAP_STATIC) {
1770 		mtx_enter(&uvm_kmapent_mtx);
1771 		SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry);
1772 		uvmexp.kmapent--;
1773 		mtx_leave(&uvm_kmapent_mtx);
1774 	} else if (me->flags & UVM_MAP_KMEM) {
1775 		splassert(IPL_NONE);
1776 		pool_put(&uvm_map_entry_kmem_pool, me);
1777 	} else {
1778 		splassert(IPL_NONE);
1779 		pool_put(&uvm_map_entry_pool, me);
1780 	}
1781 }
1782 
1783 /*
1784  * uvm_map_lookup_entry: find map entry at or before an address.
1785  *
1786  * => map must at least be read-locked by caller
1787  * => entry is returned in "entry"
1788  * => return value is true if address is in the returned entry
1789  * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is
1790  * returned for those mappings.
1791  */
1792 boolean_t
1793 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
1794     struct vm_map_entry **entry)
1795 {
1796 	*entry = uvm_map_entrybyaddr(&map->addr, address);
1797 	return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&
1798 	    (*entry)->start <= address && (*entry)->end > address;
1799 }
1800 
1801 /*
1802  * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet
1803  * grown -- then uvm_map_check_region_range() should not cache the entry
1804  * because growth won't be seen.
1805  */
1806 int
1807 uvm_map_inentry_sp(vm_map_entry_t entry)
1808 {
1809 	if ((entry->etype & UVM_ET_STACK) == 0) {
1810 		if (entry->protection == PROT_NONE)
1811 			return (-1);	/* don't update range */
1812 		return (0);
1813 	}
1814 	return (1);
1815 }
1816 
1817 /*
1818  * The system call must not come from a writeable entry, W^X is violated.
1819  * (Would be nice if we can spot aliasing, which is also kind of bad)
1820  *
1821  * The system call must come from an syscall-labeled entry (which are
1822  * the text regions of the main program, sigtramp, ld.so, or libc).
1823  */
1824 int
1825 uvm_map_inentry_pc(vm_map_entry_t entry)
1826 {
1827 	if (entry->protection & PROT_WRITE)
1828 		return (0);	/* not permitted */
1829 	if ((entry->etype & UVM_ET_SYSCALL) == 0)
1830 		return (0);	/* not permitted */
1831 	return (1);
1832 }
1833 
1834 int
1835 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie)
1836 {
1837 	return (serial != ie->ie_serial || ie->ie_start == 0 ||
1838 	    addr < ie->ie_start || addr >= ie->ie_end);
1839 }
1840 
1841 /*
1842  * Inside a vm_map find the reg address and verify it via function.
1843  * Remember low and high addresses of region if valid and return TRUE,
1844  * else return FALSE.
1845  */
1846 boolean_t
1847 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr,
1848     int (*fn)(vm_map_entry_t), u_long serial)
1849 {
1850 	vm_map_t map = &p->p_vmspace->vm_map;
1851 	vm_map_entry_t entry;
1852 	int ret;
1853 
1854 	if (addr < map->min_offset || addr >= map->max_offset)
1855 		return (FALSE);
1856 
1857 	/* lock map */
1858 	vm_map_lock_read(map);
1859 
1860 	/* lookup */
1861 	if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) {
1862 		vm_map_unlock_read(map);
1863 		return (FALSE);
1864 	}
1865 
1866 	ret = (*fn)(entry);
1867 	if (ret == 0) {
1868 		vm_map_unlock_read(map);
1869 		return (FALSE);
1870 	} else if (ret == 1) {
1871 		ie->ie_start = entry->start;
1872 		ie->ie_end = entry->end;
1873 		ie->ie_serial = serial;
1874 	} else {
1875 		/* do not update, re-check later */
1876 	}
1877 	vm_map_unlock_read(map);
1878 	return (TRUE);
1879 }
1880 
1881 boolean_t
1882 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr,
1883     const char *fmt, int (*fn)(vm_map_entry_t), u_long serial)
1884 {
1885 	union sigval sv;
1886 	boolean_t ok = TRUE;
1887 
1888 	if (uvm_map_inentry_recheck(serial, addr, ie)) {
1889 		KERNEL_LOCK();
1890 		ok = uvm_map_inentry_fix(p, ie, addr, fn, serial);
1891 		if (!ok) {
1892 			printf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid,
1893 			    addr, ie->ie_start, ie->ie_end);
1894 			p->p_p->ps_acflag |= AMAP;
1895 			sv.sival_ptr = (void *)PROC_PC(p);
1896 			trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv);
1897 		}
1898 		KERNEL_UNLOCK();
1899 	}
1900 	return (ok);
1901 }
1902 
1903 /*
1904  * Check whether the given address range can be converted to a MAP_STACK
1905  * mapping.
1906  *
1907  * Must be called with map locked.
1908  */
1909 boolean_t
1910 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz)
1911 {
1912 	vaddr_t end = addr + sz;
1913 	struct vm_map_entry *first, *iter, *prev = NULL;
1914 
1915 	if (!uvm_map_lookup_entry(map, addr, &first)) {
1916 		printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n",
1917 		    addr, end, map);
1918 		return FALSE;
1919 	}
1920 
1921 	/*
1922 	 * Check that the address range exists and is contiguous.
1923 	 */
1924 	for (iter = first; iter != NULL && iter->start < end;
1925 	    prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) {
1926 		/*
1927 		 * Make sure that we do not have holes in the range.
1928 		 */
1929 #if 0
1930 		if (prev != NULL) {
1931 			printf("prev->start 0x%lx, prev->end 0x%lx, "
1932 			    "iter->start 0x%lx, iter->end 0x%lx\n",
1933 			    prev->start, prev->end, iter->start, iter->end);
1934 		}
1935 #endif
1936 
1937 		if (prev != NULL && prev->end != iter->start) {
1938 			printf("map stack 0x%lx-0x%lx of map %p failed: "
1939 			    "hole in range\n", addr, end, map);
1940 			return FALSE;
1941 		}
1942 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) {
1943 			printf("map stack 0x%lx-0x%lx of map %p failed: "
1944 			    "hole in range\n", addr, end, map);
1945 			return FALSE;
1946 		}
1947 	}
1948 
1949 	return TRUE;
1950 }
1951 
1952 /*
1953  * Remap the middle-pages of an existing mapping as a stack range.
1954  * If there exists a previous contiguous mapping with the given range
1955  * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the
1956  * mapping is dropped, and a new anon mapping is created and marked as
1957  * a stack.
1958  *
1959  * Must be called with map unlocked.
1960  */
1961 int
1962 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz)
1963 {
1964 	vm_map_t map = &p->p_vmspace->vm_map;
1965 	vaddr_t start, end;
1966 	int error;
1967 	int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
1968 	    PROT_READ | PROT_WRITE | PROT_EXEC,
1969 	    MAP_INHERIT_COPY, MADV_NORMAL,
1970 	    UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP |
1971 	    UVM_FLAG_COPYONW);
1972 
1973 	start = round_page(addr);
1974 	end = trunc_page(addr + sz);
1975 #ifdef MACHINE_STACK_GROWS_UP
1976 	if (end == addr + sz)
1977 		end -= PAGE_SIZE;
1978 #else
1979 	if (start == addr)
1980 		start += PAGE_SIZE;
1981 #endif
1982 
1983 	if (start < map->min_offset || end >= map->max_offset || end < start)
1984 		return EINVAL;
1985 
1986 	error = uvm_mapanon(map, &start, end - start, 0, flags);
1987 	if (error != 0)
1988 		printf("map stack for pid %d failed\n", p->p_p->ps_pid);
1989 
1990 	return error;
1991 }
1992 
1993 /*
1994  * uvm_map_pie: return a random load address for a PIE executable
1995  * properly aligned.
1996  */
1997 #ifndef VM_PIE_MAX_ADDR
1998 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
1999 #endif
2000 
2001 #ifndef VM_PIE_MIN_ADDR
2002 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
2003 #endif
2004 
2005 #ifndef VM_PIE_MIN_ALIGN
2006 #define VM_PIE_MIN_ALIGN PAGE_SIZE
2007 #endif
2008 
2009 vaddr_t
2010 uvm_map_pie(vaddr_t align)
2011 {
2012 	vaddr_t addr, space, min;
2013 
2014 	align = MAX(align, VM_PIE_MIN_ALIGN);
2015 
2016 	/* round up to next alignment */
2017 	min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
2018 
2019 	if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
2020 		return (align);
2021 
2022 	space = (VM_PIE_MAX_ADDR - min) / align;
2023 	space = MIN(space, (u_int32_t)-1);
2024 
2025 	addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
2026 	addr += min;
2027 
2028 	return (addr);
2029 }
2030 
2031 void
2032 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end)
2033 {
2034 	struct uvm_map_deadq dead;
2035 
2036 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0 &&
2037 	    (end & (vaddr_t)PAGE_MASK) == 0);
2038 	TAILQ_INIT(&dead);
2039 	vm_map_lock(map);
2040 	uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE);
2041 	vm_map_unlock(map);
2042 
2043 	if (map->flags & VM_MAP_INTRSAFE)
2044 		uvm_unmap_detach_intrsafe(&dead);
2045 	else
2046 		uvm_unmap_detach(&dead, 0);
2047 }
2048 
2049 /*
2050  * Mark entry as free.
2051  *
2052  * entry will be put on the dead list.
2053  * The free space will be merged into the previous or a new entry,
2054  * unless markfree is false.
2055  */
2056 void
2057 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry,
2058     struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead,
2059     boolean_t markfree)
2060 {
2061 	struct uvm_addr_state	*free;
2062 	struct vm_map_entry	*prev;
2063 	vaddr_t			 addr;	/* Start of freed range. */
2064 	vaddr_t			 end;	/* End of freed range. */
2065 
2066 	prev = *prev_ptr;
2067 	if (prev == entry)
2068 		*prev_ptr = prev = NULL;
2069 
2070 	if (prev == NULL ||
2071 	    VMMAP_FREE_END(prev) != entry->start)
2072 		prev = RBT_PREV(uvm_map_addr, entry);
2073 
2074 	/* Entry is describing only free memory and has nothing to drain into. */
2075 	if (prev == NULL && entry->start == entry->end && markfree) {
2076 		*prev_ptr = entry;
2077 		return;
2078 	}
2079 
2080 	addr = entry->start;
2081 	end = VMMAP_FREE_END(entry);
2082 	free = uvm_map_uaddr_e(map, entry);
2083 	uvm_mapent_free_remove(map, free, entry);
2084 	uvm_mapent_addr_remove(map, entry);
2085 	DEAD_ENTRY_PUSH(dead, entry);
2086 
2087 	if (markfree) {
2088 		if (prev) {
2089 			free = uvm_map_uaddr_e(map, prev);
2090 			uvm_mapent_free_remove(map, free, prev);
2091 		}
2092 		*prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0);
2093 	}
2094 }
2095 
2096 /*
2097  * Unwire and release referenced amap and object from map entry.
2098  */
2099 void
2100 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
2101 {
2102 	/* Unwire removed map entry. */
2103 	if (VM_MAPENT_ISWIRED(entry)) {
2104 		KERNEL_LOCK();
2105 		entry->wired_count = 0;
2106 		uvm_fault_unwire_locked(map, entry->start, entry->end);
2107 		KERNEL_UNLOCK();
2108 	}
2109 
2110 	/* Entry-type specific code. */
2111 	if (UVM_ET_ISHOLE(entry)) {
2112 		/* Nothing to be done for holes. */
2113 	} else if (map->flags & VM_MAP_INTRSAFE) {
2114 		KASSERT(vm_map_pmap(map) == pmap_kernel());
2115 		uvm_km_pgremove_intrsafe(entry->start, entry->end);
2116 		pmap_kremove(entry->start, entry->end - entry->start);
2117 	} else if (UVM_ET_ISOBJ(entry) &&
2118 	    UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
2119 		KASSERT(vm_map_pmap(map) == pmap_kernel());
2120 		/*
2121 		 * Note: kernel object mappings are currently used in
2122 		 * two ways:
2123 		 *  [1] "normal" mappings of pages in the kernel object
2124 		 *  [2] uvm_km_valloc'd allocations in which we
2125 		 *      pmap_enter in some non-kernel-object page
2126 		 *      (e.g. vmapbuf).
2127 		 *
2128 		 * for case [1], we need to remove the mapping from
2129 		 * the pmap and then remove the page from the kernel
2130 		 * object (because, once pages in a kernel object are
2131 		 * unmapped they are no longer needed, unlike, say,
2132 		 * a vnode where you might want the data to persist
2133 		 * until flushed out of a queue).
2134 		 *
2135 		 * for case [2], we need to remove the mapping from
2136 		 * the pmap.  there shouldn't be any pages at the
2137 		 * specified offset in the kernel object [but it
2138 		 * doesn't hurt to call uvm_km_pgremove just to be
2139 		 * safe?]
2140 		 *
2141 		 * uvm_km_pgremove currently does the following:
2142 		 *   for pages in the kernel object range:
2143 		 *     - drops the swap slot
2144 		 *     - uvm_pagefree the page
2145 		 *
2146 		 * note there is version of uvm_km_pgremove() that
2147 		 * is used for "intrsafe" objects.
2148 		 */
2149 		/*
2150 		 * remove mappings from pmap and drop the pages
2151 		 * from the object.  offsets are always relative
2152 		 * to vm_map_min(kernel_map).
2153 		 */
2154 		pmap_remove(pmap_kernel(), entry->start, entry->end);
2155 		uvm_km_pgremove(entry->object.uvm_obj,
2156 		    entry->start - vm_map_min(kernel_map),
2157 		    entry->end - vm_map_min(kernel_map));
2158 
2159 		/*
2160 		 * null out kernel_object reference, we've just
2161 		 * dropped it
2162 		 */
2163 		entry->etype &= ~UVM_ET_OBJ;
2164 		entry->object.uvm_obj = NULL;  /* to be safe */
2165 	} else {
2166 		/* remove mappings the standard way. */
2167 		pmap_remove(map->pmap, entry->start, entry->end);
2168 	}
2169 }
2170 
2171 /*
2172  * Remove all entries from start to end.
2173  *
2174  * If remove_holes, then remove ET_HOLE entries as well.
2175  * If markfree, entry will be properly marked free, otherwise, no replacement
2176  * entry will be put in the tree (corrupting the tree).
2177  */
2178 void
2179 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
2180     struct uvm_map_deadq *dead, boolean_t remove_holes,
2181     boolean_t markfree)
2182 {
2183 	struct vm_map_entry *prev_hint, *next, *entry;
2184 
2185 	start = MAX(start, map->min_offset);
2186 	end = MIN(end, map->max_offset);
2187 	if (start >= end)
2188 		return;
2189 
2190 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
2191 		splassert(IPL_NONE);
2192 	else
2193 		splassert(IPL_VM);
2194 
2195 	/* Find first affected entry. */
2196 	entry = uvm_map_entrybyaddr(&map->addr, start);
2197 	KDASSERT(entry != NULL && entry->start <= start);
2198 	if (entry->end <= start && markfree)
2199 		entry = RBT_NEXT(uvm_map_addr, entry);
2200 	else
2201 		UVM_MAP_CLIP_START(map, entry, start);
2202 
2203 	/*
2204 	 * Iterate entries until we reach end address.
2205 	 * prev_hint hints where the freed space can be appended to.
2206 	 */
2207 	prev_hint = NULL;
2208 	for (; entry != NULL && entry->start < end; entry = next) {
2209 		KDASSERT(entry->start >= start);
2210 		if (entry->end > end || !markfree)
2211 			UVM_MAP_CLIP_END(map, entry, end);
2212 		KDASSERT(entry->start >= start && entry->end <= end);
2213 		next = RBT_NEXT(uvm_map_addr, entry);
2214 
2215 		/* Don't remove holes unless asked to do so. */
2216 		if (UVM_ET_ISHOLE(entry)) {
2217 			if (!remove_holes) {
2218 				prev_hint = entry;
2219 				continue;
2220 			}
2221 		}
2222 
2223 		/* A stack has been removed.. */
2224 		if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE))
2225 			map->sserial++;
2226 
2227 		/* Kill entry. */
2228 		uvm_unmap_kill_entry(map, entry);
2229 
2230 		/* Update space usage. */
2231 		if ((map->flags & VM_MAP_ISVMSPACE) &&
2232 		    entry->object.uvm_obj == NULL &&
2233 		    entry->protection != PROT_NONE &&
2234 		    !UVM_ET_ISHOLE(entry)) {
2235 			((struct vmspace *)map)->vm_dused -=
2236 			    uvmspace_dused(map, entry->start, entry->end);
2237 		}
2238 		if (!UVM_ET_ISHOLE(entry))
2239 			map->size -= entry->end - entry->start;
2240 
2241 		/* Actual removal of entry. */
2242 		uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree);
2243 	}
2244 
2245 	pmap_update(vm_map_pmap(map));
2246 
2247 #ifdef VMMAP_DEBUG
2248 	if (markfree) {
2249 		for (entry = uvm_map_entrybyaddr(&map->addr, start);
2250 		    entry != NULL && entry->start < end;
2251 		    entry = RBT_NEXT(uvm_map_addr, entry)) {
2252 			KDASSERT(entry->end <= start ||
2253 			    entry->start == entry->end ||
2254 			    UVM_ET_ISHOLE(entry));
2255 		}
2256 	} else {
2257 		vaddr_t a;
2258 		for (a = start; a < end; a += PAGE_SIZE)
2259 			KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL);
2260 	}
2261 #endif
2262 }
2263 
2264 /*
2265  * Mark all entries from first until end (exclusive) as pageable.
2266  *
2267  * Lock must be exclusive on entry and will not be touched.
2268  */
2269 void
2270 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first,
2271     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr)
2272 {
2273 	struct vm_map_entry *iter;
2274 
2275 	for (iter = first; iter != end;
2276 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2277 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2278 		if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2279 			continue;
2280 
2281 		iter->wired_count = 0;
2282 		uvm_fault_unwire_locked(map, iter->start, iter->end);
2283 	}
2284 }
2285 
2286 /*
2287  * Mark all entries from first until end (exclusive) as wired.
2288  *
2289  * Lockflags determines the lock state on return from this function.
2290  * Lock must be exclusive on entry.
2291  */
2292 int
2293 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first,
2294     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr,
2295     int lockflags)
2296 {
2297 	struct vm_map_entry *iter;
2298 #ifdef DIAGNOSTIC
2299 	unsigned int timestamp_save;
2300 #endif
2301 	int error;
2302 
2303 	/*
2304 	 * Wire pages in two passes:
2305 	 *
2306 	 * 1: holding the write lock, we create any anonymous maps that need
2307 	 *    to be created.  then we clip each map entry to the region to
2308 	 *    be wired and increment its wiring count.
2309 	 *
2310 	 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
2311 	 *    in the pages for any newly wired area (wired_count == 1).
2312 	 *
2313 	 *    downgrading to a read lock for uvm_fault_wire avoids a possible
2314 	 *    deadlock with another thread that may have faulted on one of
2315 	 *    the pages to be wired (it would mark the page busy, blocking
2316 	 *    us, then in turn block on the map lock that we hold).
2317 	 *    because we keep the read lock on the map, the copy-on-write
2318 	 *    status of the entries we modify here cannot change.
2319 	 */
2320 	for (iter = first; iter != end;
2321 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2322 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2323 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2324 		    iter->protection == PROT_NONE)
2325 			continue;
2326 
2327 		/*
2328 		 * Perform actions of vm_map_lookup that need the write lock.
2329 		 * - create an anonymous map for copy-on-write
2330 		 * - anonymous map for zero-fill
2331 		 * Skip submaps.
2332 		 */
2333 		if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) &&
2334 		    UVM_ET_ISNEEDSCOPY(iter) &&
2335 		    ((iter->protection & PROT_WRITE) ||
2336 		    iter->object.uvm_obj == NULL)) {
2337 			amap_copy(map, iter, M_WAITOK,
2338 			    UVM_ET_ISSTACK(iter) ? FALSE : TRUE,
2339 			    iter->start, iter->end);
2340 		}
2341 		iter->wired_count++;
2342 	}
2343 
2344 	/*
2345 	 * Pass 2.
2346 	 */
2347 #ifdef DIAGNOSTIC
2348 	timestamp_save = map->timestamp;
2349 #endif
2350 	vm_map_busy(map);
2351 	vm_map_downgrade(map);
2352 
2353 	error = 0;
2354 	for (iter = first; error == 0 && iter != end;
2355 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2356 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2357 		    iter->protection == PROT_NONE)
2358 			continue;
2359 
2360 		error = uvm_fault_wire(map, iter->start, iter->end,
2361 		    iter->protection);
2362 	}
2363 
2364 	if (error) {
2365 		/*
2366 		 * uvm_fault_wire failure
2367 		 *
2368 		 * Reacquire lock and undo our work.
2369 		 */
2370 		vm_map_upgrade(map);
2371 		vm_map_unbusy(map);
2372 #ifdef DIAGNOSTIC
2373 		if (timestamp_save != map->timestamp)
2374 			panic("uvm_map_pageable_wire: stale map");
2375 #endif
2376 
2377 		/*
2378 		 * first is no longer needed to restart loops.
2379 		 * Use it as iterator to unmap successful mappings.
2380 		 */
2381 		for (; first != iter;
2382 		    first = RBT_NEXT(uvm_map_addr, first)) {
2383 			if (UVM_ET_ISHOLE(first) ||
2384 			    first->start == first->end ||
2385 			    first->protection == PROT_NONE)
2386 				continue;
2387 
2388 			first->wired_count--;
2389 			if (!VM_MAPENT_ISWIRED(first)) {
2390 				uvm_fault_unwire_locked(map,
2391 				    iter->start, iter->end);
2392 			}
2393 		}
2394 
2395 		/* decrease counter in the rest of the entries */
2396 		for (; iter != end;
2397 		    iter = RBT_NEXT(uvm_map_addr, iter)) {
2398 			if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2399 			    iter->protection == PROT_NONE)
2400 				continue;
2401 
2402 			iter->wired_count--;
2403 		}
2404 
2405 		if ((lockflags & UVM_LK_EXIT) == 0)
2406 			vm_map_unlock(map);
2407 		return error;
2408 	}
2409 
2410 	/* We are currently holding a read lock. */
2411 	if ((lockflags & UVM_LK_EXIT) == 0) {
2412 		vm_map_unbusy(map);
2413 		vm_map_unlock_read(map);
2414 	} else {
2415 		vm_map_upgrade(map);
2416 		vm_map_unbusy(map);
2417 #ifdef DIAGNOSTIC
2418 		if (timestamp_save != map->timestamp)
2419 			panic("uvm_map_pageable_wire: stale map");
2420 #endif
2421 	}
2422 	return 0;
2423 }
2424 
2425 /*
2426  * uvm_map_pageable: set pageability of a range in a map.
2427  *
2428  * Flags:
2429  * UVM_LK_ENTER: map is already locked by caller
2430  * UVM_LK_EXIT:  don't unlock map on exit
2431  *
2432  * The full range must be in use (entries may not have fspace != 0).
2433  * UVM_ET_HOLE counts as unmapped.
2434  */
2435 int
2436 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
2437     boolean_t new_pageable, int lockflags)
2438 {
2439 	struct vm_map_entry *first, *last, *tmp;
2440 	int error;
2441 
2442 	start = trunc_page(start);
2443 	end = round_page(end);
2444 
2445 	if (start > end)
2446 		return EINVAL;
2447 	if (start == end)
2448 		return 0;	/* nothing to do */
2449 	if (start < map->min_offset)
2450 		return EFAULT; /* why? see first XXX below */
2451 	if (end > map->max_offset)
2452 		return EINVAL; /* why? see second XXX below */
2453 
2454 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2455 	if ((lockflags & UVM_LK_ENTER) == 0)
2456 		vm_map_lock(map);
2457 
2458 	/*
2459 	 * Find first entry.
2460 	 *
2461 	 * Initial test on start is different, because of the different
2462 	 * error returned. Rest is tested further down.
2463 	 */
2464 	first = uvm_map_entrybyaddr(&map->addr, start);
2465 	if (first->end <= start || UVM_ET_ISHOLE(first)) {
2466 		/*
2467 		 * XXX if the first address is not mapped, it is EFAULT?
2468 		 */
2469 		error = EFAULT;
2470 		goto out;
2471 	}
2472 
2473 	/* Check that the range has no holes. */
2474 	for (last = first; last != NULL && last->start < end;
2475 	    last = RBT_NEXT(uvm_map_addr, last)) {
2476 		if (UVM_ET_ISHOLE(last) ||
2477 		    (last->end < end && VMMAP_FREE_END(last) != last->end)) {
2478 			/*
2479 			 * XXX unmapped memory in range, why is it EINVAL
2480 			 * instead of EFAULT?
2481 			 */
2482 			error = EINVAL;
2483 			goto out;
2484 		}
2485 	}
2486 
2487 	/*
2488 	 * Last ended at the first entry after the range.
2489 	 * Move back one step.
2490 	 *
2491 	 * Note that last may be NULL.
2492 	 */
2493 	if (last == NULL) {
2494 		last = RBT_MAX(uvm_map_addr, &map->addr);
2495 		if (last->end < end) {
2496 			error = EINVAL;
2497 			goto out;
2498 		}
2499 	} else {
2500 		KASSERT(last != first);
2501 		last = RBT_PREV(uvm_map_addr, last);
2502 	}
2503 
2504 	/* Wire/unwire pages here. */
2505 	if (new_pageable) {
2506 		/*
2507 		 * Mark pageable.
2508 		 * entries that are not wired are untouched.
2509 		 */
2510 		if (VM_MAPENT_ISWIRED(first))
2511 			UVM_MAP_CLIP_START(map, first, start);
2512 		/*
2513 		 * Split last at end.
2514 		 * Make tmp be the first entry after what is to be touched.
2515 		 * If last is not wired, don't touch it.
2516 		 */
2517 		if (VM_MAPENT_ISWIRED(last)) {
2518 			UVM_MAP_CLIP_END(map, last, end);
2519 			tmp = RBT_NEXT(uvm_map_addr, last);
2520 		} else
2521 			tmp = last;
2522 
2523 		uvm_map_pageable_pgon(map, first, tmp, start, end);
2524 		error = 0;
2525 
2526 out:
2527 		if ((lockflags & UVM_LK_EXIT) == 0)
2528 			vm_map_unlock(map);
2529 		return error;
2530 	} else {
2531 		/*
2532 		 * Mark entries wired.
2533 		 * entries are always touched (because recovery needs this).
2534 		 */
2535 		if (!VM_MAPENT_ISWIRED(first))
2536 			UVM_MAP_CLIP_START(map, first, start);
2537 		/*
2538 		 * Split last at end.
2539 		 * Make tmp be the first entry after what is to be touched.
2540 		 * If last is not wired, don't touch it.
2541 		 */
2542 		if (!VM_MAPENT_ISWIRED(last)) {
2543 			UVM_MAP_CLIP_END(map, last, end);
2544 			tmp = RBT_NEXT(uvm_map_addr, last);
2545 		} else
2546 			tmp = last;
2547 
2548 		return uvm_map_pageable_wire(map, first, tmp, start, end,
2549 		    lockflags);
2550 	}
2551 }
2552 
2553 /*
2554  * uvm_map_pageable_all: special case of uvm_map_pageable - affects
2555  * all mapped regions.
2556  *
2557  * Map must not be locked.
2558  * If no flags are specified, all ragions are unwired.
2559  */
2560 int
2561 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
2562 {
2563 	vsize_t size;
2564 	struct vm_map_entry *iter;
2565 
2566 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2567 	vm_map_lock(map);
2568 
2569 	if (flags == 0) {
2570 		uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr),
2571 		    NULL, map->min_offset, map->max_offset);
2572 
2573 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
2574 		vm_map_unlock(map);
2575 		return 0;
2576 	}
2577 
2578 	if (flags & MCL_FUTURE)
2579 		vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
2580 	if (!(flags & MCL_CURRENT)) {
2581 		vm_map_unlock(map);
2582 		return 0;
2583 	}
2584 
2585 	/*
2586 	 * Count number of pages in all non-wired entries.
2587 	 * If the number exceeds the limit, abort.
2588 	 */
2589 	size = 0;
2590 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2591 		if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2592 			continue;
2593 
2594 		size += iter->end - iter->start;
2595 	}
2596 
2597 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
2598 		vm_map_unlock(map);
2599 		return ENOMEM;
2600 	}
2601 
2602 	/* XXX non-pmap_wired_count case must be handled by caller */
2603 #ifdef pmap_wired_count
2604 	if (limit != 0 &&
2605 	    size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) {
2606 		vm_map_unlock(map);
2607 		return ENOMEM;
2608 	}
2609 #endif
2610 
2611 	/*
2612 	 * uvm_map_pageable_wire will release lcok
2613 	 */
2614 	return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr),
2615 	    NULL, map->min_offset, map->max_offset, 0);
2616 }
2617 
2618 /*
2619  * Initialize map.
2620  *
2621  * Allocates sufficient entries to describe the free memory in the map.
2622  */
2623 void
2624 uvm_map_setup(struct vm_map *map, pmap_t pmap, vaddr_t min, vaddr_t max,
2625     int flags)
2626 {
2627 	int i;
2628 
2629 	KASSERT((min & (vaddr_t)PAGE_MASK) == 0);
2630 	KASSERT((max & (vaddr_t)PAGE_MASK) == 0 ||
2631 	    (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
2632 
2633 	/*
2634 	 * Update parameters.
2635 	 *
2636 	 * This code handles (vaddr_t)-1 and other page mask ending addresses
2637 	 * properly.
2638 	 * We lose the top page if the full virtual address space is used.
2639 	 */
2640 	if (max & (vaddr_t)PAGE_MASK) {
2641 		max += 1;
2642 		if (max == 0) /* overflow */
2643 			max -= PAGE_SIZE;
2644 	}
2645 
2646 	RBT_INIT(uvm_map_addr, &map->addr);
2647 	map->uaddr_exe = NULL;
2648 	for (i = 0; i < nitems(map->uaddr_any); ++i)
2649 		map->uaddr_any[i] = NULL;
2650 	map->uaddr_brk_stack = NULL;
2651 
2652 	map->pmap = pmap;
2653 	map->size = 0;
2654 	map->ref_count = 0;
2655 	map->min_offset = min;
2656 	map->max_offset = max;
2657 	map->b_start = map->b_end = 0; /* Empty brk() area by default. */
2658 	map->s_start = map->s_end = 0; /* Empty stack area by default. */
2659 	map->flags = flags;
2660 	map->timestamp = 0;
2661 	if (flags & VM_MAP_ISVMSPACE)
2662 		rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK);
2663 	else
2664 		rw_init(&map->lock, "kmmaplk");
2665 	mtx_init(&map->mtx, IPL_VM);
2666 	mtx_init(&map->flags_lock, IPL_VM);
2667 
2668 	/* Configure the allocators. */
2669 	if (flags & VM_MAP_ISVMSPACE)
2670 		uvm_map_setup_md(map);
2671 	else
2672 		map->uaddr_any[3] = &uaddr_kbootstrap;
2673 
2674 	/*
2675 	 * Fill map entries.
2676 	 * We do not need to write-lock the map here because only the current
2677 	 * thread sees it right now. Initialize ref_count to 0 above to avoid
2678 	 * bogus triggering of lock-not-held assertions.
2679 	 */
2680 	uvm_map_setup_entries(map);
2681 	uvm_tree_sanity(map, __FILE__, __LINE__);
2682 	map->ref_count = 1;
2683 }
2684 
2685 /*
2686  * Destroy the map.
2687  *
2688  * This is the inverse operation to uvm_map_setup.
2689  */
2690 void
2691 uvm_map_teardown(struct vm_map *map)
2692 {
2693 	struct uvm_map_deadq	 dead_entries;
2694 	struct vm_map_entry	*entry, *tmp;
2695 #ifdef VMMAP_DEBUG
2696 	size_t			 numq, numt;
2697 #endif
2698 	int			 i;
2699 
2700 	KERNEL_ASSERT_LOCKED();
2701 	KERNEL_UNLOCK();
2702 	KERNEL_ASSERT_UNLOCKED();
2703 
2704 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
2705 
2706 	/* Remove address selectors. */
2707 	uvm_addr_destroy(map->uaddr_exe);
2708 	map->uaddr_exe = NULL;
2709 	for (i = 0; i < nitems(map->uaddr_any); i++) {
2710 		uvm_addr_destroy(map->uaddr_any[i]);
2711 		map->uaddr_any[i] = NULL;
2712 	}
2713 	uvm_addr_destroy(map->uaddr_brk_stack);
2714 	map->uaddr_brk_stack = NULL;
2715 
2716 	/*
2717 	 * Remove entries.
2718 	 *
2719 	 * The following is based on graph breadth-first search.
2720 	 *
2721 	 * In color terms:
2722 	 * - the dead_entries set contains all nodes that are reachable
2723 	 *   (i.e. both the black and the grey nodes)
2724 	 * - any entry not in dead_entries is white
2725 	 * - any entry that appears in dead_entries before entry,
2726 	 *   is black, the rest is grey.
2727 	 * The set [entry, end] is also referred to as the wavefront.
2728 	 *
2729 	 * Since the tree is always a fully connected graph, the breadth-first
2730 	 * search guarantees that each vmmap_entry is visited exactly once.
2731 	 * The vm_map is broken down in linear time.
2732 	 */
2733 	TAILQ_INIT(&dead_entries);
2734 	if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL)
2735 		DEAD_ENTRY_PUSH(&dead_entries, entry);
2736 	while (entry != NULL) {
2737 		sched_pause(yield);
2738 		uvm_unmap_kill_entry(map, entry);
2739 		if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL)
2740 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2741 		if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
2742 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2743 		/* Update wave-front. */
2744 		entry = TAILQ_NEXT(entry, dfree.deadq);
2745 	}
2746 
2747 #ifdef VMMAP_DEBUG
2748 	numt = numq = 0;
2749 	RBT_FOREACH(entry, uvm_map_addr, &map->addr)
2750 		numt++;
2751 	TAILQ_FOREACH(entry, &dead_entries, dfree.deadq)
2752 		numq++;
2753 	KASSERT(numt == numq);
2754 #endif
2755 	uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK);
2756 
2757 	KERNEL_LOCK();
2758 
2759 	pmap_destroy(map->pmap);
2760 	map->pmap = NULL;
2761 }
2762 
2763 /*
2764  * Populate map with free-memory entries.
2765  *
2766  * Map must be initialized and empty.
2767  */
2768 void
2769 uvm_map_setup_entries(struct vm_map *map)
2770 {
2771 	KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
2772 
2773 	uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0);
2774 }
2775 
2776 /*
2777  * Split entry at given address.
2778  *
2779  * orig:  entry that is to be split.
2780  * next:  a newly allocated map entry that is not linked.
2781  * split: address at which the split is done.
2782  */
2783 void
2784 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,
2785     struct vm_map_entry *next, vaddr_t split)
2786 {
2787 	struct uvm_addr_state *free, *free_before;
2788 	vsize_t adj;
2789 
2790 	if ((split & PAGE_MASK) != 0) {
2791 		panic("uvm_map_splitentry: split address 0x%lx "
2792 		    "not on page boundary!", split);
2793 	}
2794 	KDASSERT(map != NULL && orig != NULL && next != NULL);
2795 	uvm_tree_sanity(map, __FILE__, __LINE__);
2796 	KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split);
2797 
2798 #ifdef VMMAP_DEBUG
2799 	KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig);
2800 	KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next);
2801 #endif /* VMMAP_DEBUG */
2802 
2803 	/*
2804 	 * Free space will change, unlink from free space tree.
2805 	 */
2806 	free = uvm_map_uaddr_e(map, orig);
2807 	uvm_mapent_free_remove(map, free, orig);
2808 
2809 	adj = split - orig->start;
2810 
2811 	uvm_mapent_copy(orig, next);
2812 	if (split >= orig->end) {
2813 		next->etype = 0;
2814 		next->offset = 0;
2815 		next->wired_count = 0;
2816 		next->start = next->end = split;
2817 		next->guard = 0;
2818 		next->fspace = VMMAP_FREE_END(orig) - split;
2819 		next->aref.ar_amap = NULL;
2820 		next->aref.ar_pageoff = 0;
2821 		orig->guard = MIN(orig->guard, split - orig->end);
2822 		orig->fspace = split - VMMAP_FREE_START(orig);
2823 	} else {
2824 		orig->fspace = 0;
2825 		orig->guard = 0;
2826 		orig->end = next->start = split;
2827 
2828 		if (next->aref.ar_amap) {
2829 			KERNEL_LOCK();
2830 			amap_splitref(&orig->aref, &next->aref, adj);
2831 			KERNEL_UNLOCK();
2832 		}
2833 		if (UVM_ET_ISSUBMAP(orig)) {
2834 			uvm_map_reference(next->object.sub_map);
2835 			next->offset += adj;
2836 		} else if (UVM_ET_ISOBJ(orig)) {
2837 			if (next->object.uvm_obj->pgops &&
2838 			    next->object.uvm_obj->pgops->pgo_reference) {
2839 				KERNEL_LOCK();
2840 				next->object.uvm_obj->pgops->pgo_reference(
2841 				    next->object.uvm_obj);
2842 				KERNEL_UNLOCK();
2843 			}
2844 			next->offset += adj;
2845 		}
2846 	}
2847 
2848 	/*
2849 	 * Link next into address tree.
2850 	 * Link orig and next into free-space tree.
2851 	 *
2852 	 * Don't insert 'next' into the addr tree until orig has been linked,
2853 	 * in case the free-list looks at adjecent entries in the addr tree
2854 	 * for its decisions.
2855 	 */
2856 	if (orig->fspace > 0)
2857 		free_before = free;
2858 	else
2859 		free_before = uvm_map_uaddr_e(map, orig);
2860 	uvm_mapent_free_insert(map, free_before, orig);
2861 	uvm_mapent_addr_insert(map, next);
2862 	uvm_mapent_free_insert(map, free, next);
2863 
2864 	uvm_tree_sanity(map, __FILE__, __LINE__);
2865 }
2866 
2867 
2868 #ifdef VMMAP_DEBUG
2869 
2870 void
2871 uvm_tree_assert(struct vm_map *map, int test, char *test_str,
2872     char *file, int line)
2873 {
2874 	char* map_special;
2875 
2876 	if (test)
2877 		return;
2878 
2879 	if (map == kernel_map)
2880 		map_special = " (kernel_map)";
2881 	else if (map == kmem_map)
2882 		map_special = " (kmem_map)";
2883 	else
2884 		map_special = "";
2885 	panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file,
2886 	    line, test_str);
2887 }
2888 
2889 /*
2890  * Check that map is sane.
2891  */
2892 void
2893 uvm_tree_sanity(struct vm_map *map, char *file, int line)
2894 {
2895 	struct vm_map_entry	*iter;
2896 	vaddr_t			 addr;
2897 	vaddr_t			 min, max, bound; /* Bounds checker. */
2898 	struct uvm_addr_state	*free;
2899 
2900 	addr = vm_map_min(map);
2901 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2902 		/*
2903 		 * Valid start, end.
2904 		 * Catch overflow for end+fspace.
2905 		 */
2906 		UVM_ASSERT(map, iter->end >= iter->start, file, line);
2907 		UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line);
2908 
2909 		/* May not be empty. */
2910 		UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter),
2911 		    file, line);
2912 
2913 		/* Addresses for entry must lie within map boundaries. */
2914 		UVM_ASSERT(map, iter->start >= vm_map_min(map) &&
2915 		    VMMAP_FREE_END(iter) <= vm_map_max(map), file, line);
2916 
2917 		/* Tree may not have gaps. */
2918 		UVM_ASSERT(map, iter->start == addr, file, line);
2919 		addr = VMMAP_FREE_END(iter);
2920 
2921 		/*
2922 		 * Free space may not cross boundaries, unless the same
2923 		 * free list is used on both sides of the border.
2924 		 */
2925 		min = VMMAP_FREE_START(iter);
2926 		max = VMMAP_FREE_END(iter);
2927 
2928 		while (min < max &&
2929 		    (bound = uvm_map_boundary(map, min, max)) != max) {
2930 			UVM_ASSERT(map,
2931 			    uvm_map_uaddr(map, bound - 1) ==
2932 			    uvm_map_uaddr(map, bound),
2933 			    file, line);
2934 			min = bound;
2935 		}
2936 
2937 		free = uvm_map_uaddr_e(map, iter);
2938 		if (free) {
2939 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0,
2940 			    file, line);
2941 		} else {
2942 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0,
2943 			    file, line);
2944 		}
2945 	}
2946 	UVM_ASSERT(map, addr == vm_map_max(map), file, line);
2947 }
2948 
2949 void
2950 uvm_tree_size_chk(struct vm_map *map, char *file, int line)
2951 {
2952 	struct vm_map_entry *iter;
2953 	vsize_t size;
2954 
2955 	size = 0;
2956 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2957 		if (!UVM_ET_ISHOLE(iter))
2958 			size += iter->end - iter->start;
2959 	}
2960 
2961 	if (map->size != size)
2962 		printf("map size = 0x%lx, should be 0x%lx\n", map->size, size);
2963 	UVM_ASSERT(map, map->size == size, file, line);
2964 
2965 	vmspace_validate(map);
2966 }
2967 
2968 /*
2969  * This function validates the statistics on vmspace.
2970  */
2971 void
2972 vmspace_validate(struct vm_map *map)
2973 {
2974 	struct vmspace *vm;
2975 	struct vm_map_entry *iter;
2976 	vaddr_t imin, imax;
2977 	vaddr_t stack_begin, stack_end; /* Position of stack. */
2978 	vsize_t stack, heap; /* Measured sizes. */
2979 
2980 	if (!(map->flags & VM_MAP_ISVMSPACE))
2981 		return;
2982 
2983 	vm = (struct vmspace *)map;
2984 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2985 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2986 
2987 	stack = heap = 0;
2988 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2989 		imin = imax = iter->start;
2990 
2991 		if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL ||
2992 		    iter->prot != PROT_NONE)
2993 			continue;
2994 
2995 		/*
2996 		 * Update stack, heap.
2997 		 * Keep in mind that (theoretically) the entries of
2998 		 * userspace and stack may be joined.
2999 		 */
3000 		while (imin != iter->end) {
3001 			/*
3002 			 * Set imax to the first boundary crossed between
3003 			 * imin and stack addresses.
3004 			 */
3005 			imax = iter->end;
3006 			if (imin < stack_begin && imax > stack_begin)
3007 				imax = stack_begin;
3008 			else if (imin < stack_end && imax > stack_end)
3009 				imax = stack_end;
3010 
3011 			if (imin >= stack_begin && imin < stack_end)
3012 				stack += imax - imin;
3013 			else
3014 				heap += imax - imin;
3015 			imin = imax;
3016 		}
3017 	}
3018 
3019 	heap >>= PAGE_SHIFT;
3020 	if (heap != vm->vm_dused) {
3021 		printf("vmspace stack range: 0x%lx-0x%lx\n",
3022 		    stack_begin, stack_end);
3023 		panic("vmspace_validate: vmspace.vm_dused invalid, "
3024 		    "expected %ld pgs, got %ld pgs in map %p",
3025 		    heap, vm->vm_dused,
3026 		    map);
3027 	}
3028 }
3029 
3030 #endif /* VMMAP_DEBUG */
3031 
3032 /*
3033  * uvm_map_init: init mapping system at boot time.   note that we allocate
3034  * and init the static pool of structs vm_map_entry for the kernel here.
3035  */
3036 void
3037 uvm_map_init(void)
3038 {
3039 	static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
3040 	int lcv;
3041 
3042 	/* now set up static pool of kernel map entries ... */
3043 	mtx_init(&uvm_kmapent_mtx, IPL_VM);
3044 	SLIST_INIT(&uvm.kentry_free);
3045 	for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
3046 		SLIST_INSERT_HEAD(&uvm.kentry_free,
3047 		    &kernel_map_entry[lcv], daddrs.addr_kentry);
3048 	}
3049 
3050 	/* initialize the map-related pools. */
3051 	pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0,
3052 	    IPL_NONE, PR_WAITOK, "vmsppl", NULL);
3053 	pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0,
3054 	    IPL_VM, PR_WAITOK, "vmmpepl", NULL);
3055 	pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0,
3056 	    IPL_VM, 0, "vmmpekpl", NULL);
3057 	pool_sethiwat(&uvm_map_entry_pool, 8192);
3058 
3059 	uvm_addr_init();
3060 }
3061 
3062 #if defined(DDB)
3063 
3064 /*
3065  * DDB hooks
3066  */
3067 
3068 /*
3069  * uvm_map_printit: actually prints the map
3070  */
3071 void
3072 uvm_map_printit(struct vm_map *map, boolean_t full,
3073     int (*pr)(const char *, ...))
3074 {
3075 	struct vmspace			*vm;
3076 	struct vm_map_entry		*entry;
3077 	struct uvm_addr_state		*free;
3078 	int				 in_free, i;
3079 	char				 buf[8];
3080 
3081 	(*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
3082 	(*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n",
3083 	    map->b_start, map->b_end);
3084 	(*pr)("\tstack allocate range: 0x%lx-0x%lx\n",
3085 	    map->s_start, map->s_end);
3086 	(*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n",
3087 	    map->size, map->ref_count, map->timestamp,
3088 	    map->flags);
3089 	(*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
3090 	    pmap_resident_count(map->pmap));
3091 
3092 	/* struct vmspace handling. */
3093 	if (map->flags & VM_MAP_ISVMSPACE) {
3094 		vm = (struct vmspace *)map;
3095 
3096 		(*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n",
3097 		    vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss);
3098 		(*pr)("\tvm_tsize=%u vm_dsize=%u\n",
3099 		    vm->vm_tsize, vm->vm_dsize);
3100 		(*pr)("\tvm_taddr=%p vm_daddr=%p\n",
3101 		    vm->vm_taddr, vm->vm_daddr);
3102 		(*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n",
3103 		    vm->vm_maxsaddr, vm->vm_minsaddr);
3104 	}
3105 
3106 	if (!full)
3107 		goto print_uaddr;
3108 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
3109 		(*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
3110 		    entry, entry->start, entry->end, entry->object.uvm_obj,
3111 		    (long long)entry->offset, entry->aref.ar_amap,
3112 		    entry->aref.ar_pageoff);
3113 		(*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, "
3114 		    "syscall=%c, prot(max)=%d/%d, inh=%d, "
3115 		    "wc=%d, adv=%d\n",
3116 		    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
3117 		    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
3118 		    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
3119 		    (entry->etype & UVM_ET_STACK) ? 'T' : 'F',
3120 		    (entry->etype & UVM_ET_SYSCALL) ? 'T' : 'F',
3121 		    entry->protection, entry->max_protection,
3122 		    entry->inheritance, entry->wired_count, entry->advice);
3123 
3124 		free = uvm_map_uaddr_e(map, entry);
3125 		in_free = (free != NULL);
3126 		(*pr)("\thole=%c, free=%c, guard=0x%lx, "
3127 		    "free=0x%lx-0x%lx\n",
3128 		    (entry->etype & UVM_ET_HOLE) ? 'T' : 'F',
3129 		    in_free ? 'T' : 'F',
3130 		    entry->guard,
3131 		    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
3132 		(*pr)("\tfspace_augment=%lu\n", entry->fspace_augment);
3133 		(*pr)("\tfreemapped=%c, uaddr=%p\n",
3134 		    (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free);
3135 		if (free) {
3136 			(*pr)("\t\t(0x%lx-0x%lx %s)\n",
3137 			    free->uaddr_minaddr, free->uaddr_maxaddr,
3138 			    free->uaddr_functions->uaddr_name);
3139 		}
3140 	}
3141 
3142 print_uaddr:
3143 	uvm_addr_print(map->uaddr_exe, "exe", full, pr);
3144 	for (i = 0; i < nitems(map->uaddr_any); i++) {
3145 		snprintf(&buf[0], sizeof(buf), "any[%d]", i);
3146 		uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr);
3147 	}
3148 	uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr);
3149 }
3150 
3151 /*
3152  * uvm_object_printit: actually prints the object
3153  */
3154 void
3155 uvm_object_printit(uobj, full, pr)
3156 	struct uvm_object *uobj;
3157 	boolean_t full;
3158 	int (*pr)(const char *, ...);
3159 {
3160 	struct vm_page *pg;
3161 	int cnt = 0;
3162 
3163 	(*pr)("OBJECT %p: pgops=%p, npages=%d, ",
3164 	    uobj, uobj->pgops, uobj->uo_npages);
3165 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
3166 		(*pr)("refs=<SYSTEM>\n");
3167 	else
3168 		(*pr)("refs=%d\n", uobj->uo_refs);
3169 
3170 	if (!full) {
3171 		return;
3172 	}
3173 	(*pr)("  PAGES <pg,offset>:\n  ");
3174 	RBT_FOREACH(pg, uvm_objtree, &uobj->memt) {
3175 		(*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
3176 		if ((cnt % 3) == 2) {
3177 			(*pr)("\n  ");
3178 		}
3179 		cnt++;
3180 	}
3181 	if ((cnt % 3) != 2) {
3182 		(*pr)("\n");
3183 	}
3184 }
3185 
3186 /*
3187  * uvm_page_printit: actually print the page
3188  */
3189 static const char page_flagbits[] =
3190 	"\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
3191 	"\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ"
3192 	"\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5";
3193 
3194 void
3195 uvm_page_printit(pg, full, pr)
3196 	struct vm_page *pg;
3197 	boolean_t full;
3198 	int (*pr)(const char *, ...);
3199 {
3200 	struct vm_page *tpg;
3201 	struct uvm_object *uobj;
3202 	struct pglist *pgl;
3203 
3204 	(*pr)("PAGE %p:\n", pg);
3205 	(*pr)("  flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
3206 	    pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
3207 	    (long long)pg->phys_addr);
3208 	(*pr)("  uobject=%p, uanon=%p, offset=0x%llx\n",
3209 	    pg->uobject, pg->uanon, (long long)pg->offset);
3210 #if defined(UVM_PAGE_TRKOWN)
3211 	if (pg->pg_flags & PG_BUSY)
3212 		(*pr)("  owning thread = %d, tag=%s",
3213 		    pg->owner, pg->owner_tag);
3214 	else
3215 		(*pr)("  page not busy, no owner");
3216 #else
3217 	(*pr)("  [page ownership tracking disabled]");
3218 #endif
3219 	(*pr)("\tvm_page_md %p\n", &pg->mdpage);
3220 
3221 	if (!full)
3222 		return;
3223 
3224 	/* cross-verify object/anon */
3225 	if ((pg->pg_flags & PQ_FREE) == 0) {
3226 		if (pg->pg_flags & PQ_ANON) {
3227 			if (pg->uanon == NULL || pg->uanon->an_page != pg)
3228 			    (*pr)("  >>> ANON DOES NOT POINT HERE <<< (%p)\n",
3229 				(pg->uanon) ? pg->uanon->an_page : NULL);
3230 			else
3231 				(*pr)("  anon backpointer is OK\n");
3232 		} else {
3233 			uobj = pg->uobject;
3234 			if (uobj) {
3235 				(*pr)("  checking object list\n");
3236 				RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) {
3237 					if (tpg == pg) {
3238 						break;
3239 					}
3240 				}
3241 				if (tpg)
3242 					(*pr)("  page found on object list\n");
3243 				else
3244 					(*pr)("  >>> PAGE NOT FOUND "
3245 					    "ON OBJECT LIST! <<<\n");
3246 			}
3247 		}
3248 	}
3249 
3250 	/* cross-verify page queue */
3251 	if (pg->pg_flags & PQ_FREE) {
3252 		if (uvm_pmr_isfree(pg))
3253 			(*pr)("  page found in uvm_pmemrange\n");
3254 		else
3255 			(*pr)("  >>> page not found in uvm_pmemrange <<<\n");
3256 		pgl = NULL;
3257 	} else if (pg->pg_flags & PQ_INACTIVE) {
3258 		pgl = (pg->pg_flags & PQ_SWAPBACKED) ?
3259 		    &uvm.page_inactive_swp : &uvm.page_inactive_obj;
3260 	} else if (pg->pg_flags & PQ_ACTIVE) {
3261 		pgl = &uvm.page_active;
3262  	} else {
3263 		pgl = NULL;
3264 	}
3265 
3266 	if (pgl) {
3267 		(*pr)("  checking pageq list\n");
3268 		TAILQ_FOREACH(tpg, pgl, pageq) {
3269 			if (tpg == pg) {
3270 				break;
3271 			}
3272 		}
3273 		if (tpg)
3274 			(*pr)("  page found on pageq list\n");
3275 		else
3276 			(*pr)("  >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
3277 	}
3278 }
3279 #endif
3280 
3281 /*
3282  * uvm_map_protect: change map protection
3283  *
3284  * => set_max means set max_protection.
3285  * => map must be unlocked.
3286  */
3287 int
3288 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
3289     vm_prot_t new_prot, boolean_t set_max)
3290 {
3291 	struct vm_map_entry *first, *iter;
3292 	vm_prot_t old_prot;
3293 	vm_prot_t mask;
3294 	vsize_t dused;
3295 	int error;
3296 
3297 	if (start > end)
3298 		return EINVAL;
3299 	start = MAX(start, map->min_offset);
3300 	end = MIN(end, map->max_offset);
3301 	if (start >= end)
3302 		return 0;
3303 
3304 	dused = 0;
3305 	error = 0;
3306 	vm_map_lock(map);
3307 
3308 	/*
3309 	 * Set up first and last.
3310 	 * - first will contain first entry at or after start.
3311 	 */
3312 	first = uvm_map_entrybyaddr(&map->addr, start);
3313 	KDASSERT(first != NULL);
3314 	if (first->end <= start)
3315 		first = RBT_NEXT(uvm_map_addr, first);
3316 
3317 	/* First, check for protection violations. */
3318 	for (iter = first; iter != NULL && iter->start < end;
3319 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
3320 		/* Treat memory holes as free space. */
3321 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3322 			continue;
3323 
3324 		old_prot = iter->protection;
3325 		if (old_prot == PROT_NONE && new_prot != old_prot) {
3326 			dused += uvmspace_dused(
3327 			    map, MAX(start, iter->start), MIN(end, iter->end));
3328 		}
3329 
3330 		if (UVM_ET_ISSUBMAP(iter)) {
3331 			error = EINVAL;
3332 			goto out;
3333 		}
3334 		if ((new_prot & iter->max_protection) != new_prot) {
3335 			error = EACCES;
3336 			goto out;
3337 		}
3338 		if (map == kernel_map &&
3339 		    (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
3340 			panic("uvm_map_protect: kernel map W^X violation requested");
3341 	}
3342 
3343 	/* Check limits. */
3344 	if (dused > 0 && (map->flags & VM_MAP_ISVMSPACE)) {
3345 		vsize_t limit = lim_cur(RLIMIT_DATA);
3346 		dused = ptoa(dused);
3347 		if (limit < dused ||
3348 		    limit - dused < ptoa(((struct vmspace *)map)->vm_dused)) {
3349 			error = ENOMEM;
3350 			goto out;
3351 		}
3352 	}
3353 
3354 	/* Fix protections.  */
3355 	for (iter = first; iter != NULL && iter->start < end;
3356 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
3357 		/* Treat memory holes as free space. */
3358 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3359 			continue;
3360 
3361 		old_prot = iter->protection;
3362 
3363 		/*
3364 		 * Skip adapting protection iff old and new protection
3365 		 * are equal.
3366 		 */
3367 		if (set_max) {
3368 			if (old_prot == (new_prot & old_prot) &&
3369 			    iter->max_protection == new_prot)
3370 				continue;
3371 		} else {
3372 			if (old_prot == new_prot)
3373 				continue;
3374 		}
3375 
3376 		UVM_MAP_CLIP_START(map, iter, start);
3377 		UVM_MAP_CLIP_END(map, iter, end);
3378 
3379 		if (set_max) {
3380 			iter->max_protection = new_prot;
3381 			iter->protection &= new_prot;
3382 		} else
3383 			iter->protection = new_prot;
3384 
3385 		/*
3386 		 * update physical map if necessary.  worry about copy-on-write
3387 		 * here -- CHECK THIS XXX
3388 		 */
3389 		if (iter->protection != old_prot) {
3390 			mask = UVM_ET_ISCOPYONWRITE(iter) ?
3391 			    ~PROT_WRITE : PROT_MASK;
3392 
3393 			/* XXX should only wserial++ if no split occurs */
3394 			if (iter->protection & PROT_WRITE)
3395 				map->wserial++;
3396 
3397 			if (map->flags & VM_MAP_ISVMSPACE) {
3398 				if (old_prot == PROT_NONE) {
3399 					((struct vmspace *)map)->vm_dused +=
3400 					    uvmspace_dused(map, iter->start,
3401 					        iter->end);
3402 				}
3403 				if (iter->protection == PROT_NONE) {
3404 					((struct vmspace *)map)->vm_dused -=
3405 					    uvmspace_dused(map, iter->start,
3406 					        iter->end);
3407 				}
3408 			}
3409 
3410 			/* update pmap */
3411 			if ((iter->protection & mask) == PROT_NONE &&
3412 			    VM_MAPENT_ISWIRED(iter)) {
3413 				/*
3414 				 * TODO(ariane) this is stupid. wired_count
3415 				 * is 0 if not wired, otherwise anything
3416 				 * larger than 0 (incremented once each time
3417 				 * wire is called).
3418 				 * Mostly to be able to undo the damage on
3419 				 * failure. Not the actually be a wired
3420 				 * refcounter...
3421 				 * Originally: iter->wired_count--;
3422 				 * (don't we have to unwire this in the pmap
3423 				 * as well?)
3424 				 */
3425 				iter->wired_count = 0;
3426 			}
3427 			pmap_protect(map->pmap, iter->start, iter->end,
3428 			    iter->protection & mask);
3429 		}
3430 
3431 		/*
3432 		 * If the map is configured to lock any future mappings,
3433 		 * wire this entry now if the old protection was PROT_NONE
3434 		 * and the new protection is not PROT_NONE.
3435 		 */
3436 		if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
3437 		    VM_MAPENT_ISWIRED(iter) == 0 &&
3438 		    old_prot == PROT_NONE &&
3439 		    new_prot != PROT_NONE) {
3440 			if (uvm_map_pageable(map, iter->start, iter->end,
3441 			    FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) {
3442 				/*
3443 				 * If locking the entry fails, remember the
3444 				 * error if it's the first one.  Note we
3445 				 * still continue setting the protection in
3446 				 * the map, but it will return the resource
3447 				 * storage condition regardless.
3448 				 *
3449 				 * XXX Ignore what the actual error is,
3450 				 * XXX just call it a resource shortage
3451 				 * XXX so that it doesn't get confused
3452 				 * XXX what uvm_map_protect() itself would
3453 				 * XXX normally return.
3454 				 */
3455 				error = ENOMEM;
3456 			}
3457 		}
3458 	}
3459 	pmap_update(map->pmap);
3460 
3461 out:
3462 	vm_map_unlock(map);
3463 	return error;
3464 }
3465 
3466 /*
3467  * uvmspace_alloc: allocate a vmspace structure.
3468  *
3469  * - structure includes vm_map and pmap
3470  * - XXX: no locking on this structure
3471  * - refcnt set to 1, rest must be init'd by caller
3472  */
3473 struct vmspace *
3474 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
3475     boolean_t remove_holes)
3476 {
3477 	struct vmspace *vm;
3478 
3479 	vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
3480 	uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
3481 	return (vm);
3482 }
3483 
3484 /*
3485  * uvmspace_init: initialize a vmspace structure.
3486  *
3487  * - XXX: no locking on this structure
3488  * - refcnt set to 1, rest must be init'd by caller
3489  */
3490 void
3491 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
3492     boolean_t pageable, boolean_t remove_holes)
3493 {
3494 	KASSERT(pmap == NULL || pmap == pmap_kernel());
3495 
3496 	if (pmap)
3497 		pmap_reference(pmap);
3498 	else
3499 		pmap = pmap_create();
3500 
3501 	uvm_map_setup(&vm->vm_map, pmap, min, max,
3502 	    (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE);
3503 
3504 	vm->vm_refcnt = 1;
3505 
3506 	if (remove_holes)
3507 		pmap_remove_holes(vm);
3508 }
3509 
3510 /*
3511  * uvmspace_share: share a vmspace between two processes
3512  *
3513  * - XXX: no locking on vmspace
3514  * - used for vfork
3515  */
3516 
3517 struct vmspace *
3518 uvmspace_share(struct process *pr)
3519 {
3520 	struct vmspace *vm = pr->ps_vmspace;
3521 
3522 	vm->vm_refcnt++;
3523 	return vm;
3524 }
3525 
3526 /*
3527  * uvmspace_exec: the process wants to exec a new program
3528  *
3529  * - XXX: no locking on vmspace
3530  */
3531 
3532 void
3533 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
3534 {
3535 	struct process *pr = p->p_p;
3536 	struct vmspace *nvm, *ovm = pr->ps_vmspace;
3537 	struct vm_map *map = &ovm->vm_map;
3538 	struct uvm_map_deadq dead_entries;
3539 
3540 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0);
3541 	KASSERT((end & (vaddr_t)PAGE_MASK) == 0 ||
3542 	    (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
3543 
3544 	pmap_unuse_final(p);   /* before stack addresses go away */
3545 	TAILQ_INIT(&dead_entries);
3546 
3547 	/* see if more than one process is using this vmspace...  */
3548 	if (ovm->vm_refcnt == 1) {
3549 		/*
3550 		 * If pr is the only process using its vmspace then
3551 		 * we can safely recycle that vmspace for the program
3552 		 * that is being exec'd.
3553 		 */
3554 
3555 #ifdef SYSVSHM
3556 		/*
3557 		 * SYSV SHM semantics require us to kill all segments on an exec
3558 		 */
3559 		if (ovm->vm_shm)
3560 			shmexit(ovm);
3561 #endif
3562 
3563 		/*
3564 		 * POSIX 1003.1b -- "lock future mappings" is revoked
3565 		 * when a process execs another program image.
3566 		 */
3567 		vm_map_lock(map);
3568 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE|VM_MAP_SYSCALL_ONCE);
3569 
3570 		/*
3571 		 * now unmap the old program
3572 		 *
3573 		 * Instead of attempting to keep the map valid, we simply
3574 		 * nuke all entries and ask uvm_map_setup to reinitialize
3575 		 * the map to the new boundaries.
3576 		 *
3577 		 * uvm_unmap_remove will actually nuke all entries for us
3578 		 * (as in, not replace them with free-memory entries).
3579 		 */
3580 		uvm_unmap_remove(map, map->min_offset, map->max_offset,
3581 		    &dead_entries, TRUE, FALSE);
3582 
3583 		KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
3584 
3585 		/* Nuke statistics and boundaries. */
3586 		memset(&ovm->vm_startcopy, 0,
3587 		    (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy);
3588 
3589 
3590 		if (end & (vaddr_t)PAGE_MASK) {
3591 			end += 1;
3592 			if (end == 0) /* overflow */
3593 				end -= PAGE_SIZE;
3594 		}
3595 
3596 		/* Setup new boundaries and populate map with entries. */
3597 		map->min_offset = start;
3598 		map->max_offset = end;
3599 		uvm_map_setup_entries(map);
3600 		vm_map_unlock(map);
3601 
3602 		/* but keep MMU holes unavailable */
3603 		pmap_remove_holes(ovm);
3604 	} else {
3605 		/*
3606 		 * pr's vmspace is being shared, so we can't reuse
3607 		 * it for pr since it is still being used for others.
3608 		 * allocate a new vmspace for pr
3609 		 */
3610 		nvm = uvmspace_alloc(start, end,
3611 		    (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
3612 
3613 		/* install new vmspace and drop our ref to the old one. */
3614 		pmap_deactivate(p);
3615 		p->p_vmspace = pr->ps_vmspace = nvm;
3616 		pmap_activate(p);
3617 
3618 		uvmspace_free(ovm);
3619 	}
3620 
3621 	/* Release dead entries */
3622 	uvm_unmap_detach(&dead_entries, 0);
3623 }
3624 
3625 /*
3626  * uvmspace_free: free a vmspace data structure
3627  *
3628  * - XXX: no locking on vmspace
3629  */
3630 void
3631 uvmspace_free(struct vmspace *vm)
3632 {
3633 	if (--vm->vm_refcnt == 0) {
3634 		/*
3635 		 * lock the map, to wait out all other references to it.  delete
3636 		 * all of the mappings and pages they hold, then call the pmap
3637 		 * module to reclaim anything left.
3638 		 */
3639 #ifdef SYSVSHM
3640 		/* Get rid of any SYSV shared memory segments. */
3641 		if (vm->vm_shm != NULL)
3642 			shmexit(vm);
3643 #endif
3644 
3645 		uvm_map_teardown(&vm->vm_map);
3646 		pool_put(&uvm_vmspace_pool, vm);
3647 	}
3648 }
3649 
3650 /*
3651  * uvm_share: Map the address range [srcaddr, srcaddr + sz) in
3652  * srcmap to the address range [dstaddr, dstaddr + sz) in
3653  * dstmap.
3654  *
3655  * The whole address range in srcmap must be backed by an object
3656  * (no holes).
3657  *
3658  * If successful, the address ranges share memory and the destination
3659  * address range uses the protection flags in prot.
3660  *
3661  * This routine assumes that sz is a multiple of PAGE_SIZE and
3662  * that dstaddr and srcaddr are page-aligned.
3663  */
3664 int
3665 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot,
3666     struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz)
3667 {
3668 	int ret = 0;
3669 	vaddr_t unmap_end;
3670 	vaddr_t dstva;
3671 	vsize_t s_off, len, n = sz, remain;
3672 	struct vm_map_entry *first = NULL, *last = NULL;
3673 	struct vm_map_entry *src_entry, *psrc_entry = NULL;
3674 	struct uvm_map_deadq dead;
3675 
3676 	if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr)
3677 		return EINVAL;
3678 
3679 	TAILQ_INIT(&dead);
3680 	vm_map_lock(dstmap);
3681 	vm_map_lock_read(srcmap);
3682 
3683 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) {
3684 		ret = ENOMEM;
3685 		goto exit_unlock;
3686 	}
3687 	if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) {
3688 		ret = EINVAL;
3689 		goto exit_unlock;
3690 	}
3691 
3692 	dstva = dstaddr;
3693 	unmap_end = dstaddr;
3694 	for (; src_entry != NULL;
3695 	    psrc_entry = src_entry,
3696 	    src_entry = RBT_NEXT(uvm_map_addr, src_entry)) {
3697 		/* hole in address space, bail out */
3698 		if (psrc_entry != NULL && psrc_entry->end != src_entry->start)
3699 			break;
3700 		if (src_entry->start >= srcaddr + sz)
3701 			break;
3702 
3703 		if (UVM_ET_ISSUBMAP(src_entry))
3704 			panic("uvm_share: encountered a submap (illegal)");
3705 		if (!UVM_ET_ISCOPYONWRITE(src_entry) &&
3706 		    UVM_ET_ISNEEDSCOPY(src_entry))
3707 			panic("uvm_share: non-copy_on_write map entries "
3708 			    "marked needs_copy (illegal)");
3709 
3710 		/*
3711 		 * srcaddr > map entry start? means we are in the middle of a
3712 		 * map, so we calculate the offset to use in the source map.
3713 		 */
3714 		if (srcaddr > src_entry->start)
3715 			s_off = srcaddr - src_entry->start;
3716 		else if (srcaddr == src_entry->start)
3717 			s_off = 0;
3718 		else
3719 			panic("uvm_share: map entry start > srcaddr");
3720 
3721 		remain = src_entry->end - src_entry->start - s_off;
3722 
3723 		/* Determine how many bytes to share in this pass */
3724 		if (n < remain)
3725 			len = n;
3726 		else
3727 			len = remain;
3728 
3729 		if (uvm_mapent_share(dstmap, dstva, len, s_off, prot, prot,
3730 		    srcmap, src_entry, &dead) == NULL)
3731 			break;
3732 
3733 		n -= len;
3734 		dstva += len;
3735 		srcaddr += len;
3736 		unmap_end = dstva + len;
3737 		if (n == 0)
3738 			goto exit_unlock;
3739 	}
3740 
3741 	ret = EINVAL;
3742 	uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE);
3743 
3744 exit_unlock:
3745 	vm_map_unlock_read(srcmap);
3746 	vm_map_unlock(dstmap);
3747 	uvm_unmap_detach(&dead, 0);
3748 
3749 	return ret;
3750 }
3751 
3752 /*
3753  * Clone map entry into other map.
3754  *
3755  * Mapping will be placed at dstaddr, for the same length.
3756  * Space must be available.
3757  * Reference counters are incremented.
3758  */
3759 struct vm_map_entry *
3760 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3761     vsize_t off, vm_prot_t prot, vm_prot_t maxprot,
3762     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
3763     int mapent_flags, int amap_share_flags)
3764 {
3765 	struct vm_map_entry *new_entry, *first, *last;
3766 
3767 	KDASSERT(!UVM_ET_ISSUBMAP(old_entry));
3768 
3769 	/* Create new entry (linked in on creation). Fill in first, last. */
3770 	first = last = NULL;
3771 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) {
3772 		panic("uvm_mapent_clone: no space in map for "
3773 		    "entry in empty map");
3774 	}
3775 	new_entry = uvm_map_mkentry(dstmap, first, last,
3776 	    dstaddr, dstlen, mapent_flags, dead, NULL);
3777 	if (new_entry == NULL)
3778 		return NULL;
3779 	/* old_entry -> new_entry */
3780 	new_entry->object = old_entry->object;
3781 	new_entry->offset = old_entry->offset;
3782 	new_entry->aref = old_entry->aref;
3783 	new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
3784 	new_entry->protection = prot;
3785 	new_entry->max_protection = maxprot;
3786 	new_entry->inheritance = old_entry->inheritance;
3787 	new_entry->advice = old_entry->advice;
3788 
3789 	/* gain reference to object backing the map (can't be a submap). */
3790 	if (new_entry->aref.ar_amap) {
3791 		new_entry->aref.ar_pageoff += off >> PAGE_SHIFT;
3792 		amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3793 		    (new_entry->end - new_entry->start) >> PAGE_SHIFT,
3794 		    amap_share_flags);
3795 	}
3796 
3797 	if (UVM_ET_ISOBJ(new_entry) &&
3798 	    new_entry->object.uvm_obj->pgops->pgo_reference) {
3799 		new_entry->offset += off;
3800 		new_entry->object.uvm_obj->pgops->pgo_reference
3801 		    (new_entry->object.uvm_obj);
3802 	}
3803 
3804 	return new_entry;
3805 }
3806 
3807 struct vm_map_entry *
3808 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3809     vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map,
3810     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3811 {
3812 	/*
3813 	 * If old_entry refers to a copy-on-write region that has not yet been
3814 	 * written to (needs_copy flag is set), then we need to allocate a new
3815 	 * amap for old_entry.
3816 	 *
3817 	 * If we do not do this, and the process owning old_entry does a copy-on
3818 	 * write later, old_entry and new_entry will refer to different memory
3819 	 * regions, and the memory between the processes is no longer shared.
3820 	 *
3821 	 * [in other words, we need to clear needs_copy]
3822 	 */
3823 
3824 	if (UVM_ET_ISNEEDSCOPY(old_entry)) {
3825 		/* get our own amap, clears needs_copy */
3826 		amap_copy(old_map, old_entry, M_WAITOK, FALSE, 0, 0);
3827 		/* XXXCDC: WAITOK??? */
3828 	}
3829 
3830 	return uvm_mapent_clone(dstmap, dstaddr, dstlen, off,
3831 	    prot, maxprot, old_entry, dead, 0, AMAP_SHARED);
3832 }
3833 
3834 /*
3835  * share the mapping: this means we want the old and
3836  * new entries to share amaps and backing objects.
3837  */
3838 struct vm_map_entry *
3839 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
3840     struct vm_map *old_map,
3841     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3842 {
3843 	struct vm_map_entry *new_entry;
3844 
3845 	new_entry = uvm_mapent_share(new_map, old_entry->start,
3846 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3847 	    old_entry->max_protection, old_map, old_entry, dead);
3848 
3849 	/*
3850 	 * pmap_copy the mappings: this routine is optional
3851 	 * but if it is there it will reduce the number of
3852 	 * page faults in the new proc.
3853 	 */
3854 	if (!UVM_ET_ISHOLE(new_entry))
3855 		pmap_copy(new_map->pmap, old_map->pmap, new_entry->start,
3856 		    (new_entry->end - new_entry->start), new_entry->start);
3857 
3858 	return (new_entry);
3859 }
3860 
3861 /*
3862  * copy-on-write the mapping (using mmap's
3863  * MAP_PRIVATE semantics)
3864  *
3865  * allocate new_entry, adjust reference counts.
3866  * (note that new references are read-only).
3867  */
3868 struct vm_map_entry *
3869 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map,
3870     struct vm_map *old_map,
3871     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3872 {
3873 	struct vm_map_entry	*new_entry;
3874 	boolean_t		 protect_child;
3875 
3876 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3877 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3878 	    old_entry->max_protection, old_entry, dead, 0, 0);
3879 
3880 	new_entry->etype |=
3881 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3882 
3883 	/*
3884 	 * the new entry will need an amap.  it will either
3885 	 * need to be copied from the old entry or created
3886 	 * from scratch (if the old entry does not have an
3887 	 * amap).  can we defer this process until later
3888 	 * (by setting "needs_copy") or do we need to copy
3889 	 * the amap now?
3890 	 *
3891 	 * we must copy the amap now if any of the following
3892 	 * conditions hold:
3893 	 * 1. the old entry has an amap and that amap is
3894 	 *    being shared.  this means that the old (parent)
3895 	 *    process is sharing the amap with another
3896 	 *    process.  if we do not clear needs_copy here
3897 	 *    we will end up in a situation where both the
3898 	 *    parent and child process are referring to the
3899 	 *    same amap with "needs_copy" set.  if the
3900 	 *    parent write-faults, the fault routine will
3901 	 *    clear "needs_copy" in the parent by allocating
3902 	 *    a new amap.   this is wrong because the
3903 	 *    parent is supposed to be sharing the old amap
3904 	 *    and the new amap will break that.
3905 	 *
3906 	 * 2. if the old entry has an amap and a non-zero
3907 	 *    wire count then we are going to have to call
3908 	 *    amap_cow_now to avoid page faults in the
3909 	 *    parent process.   since amap_cow_now requires
3910 	 *    "needs_copy" to be clear we might as well
3911 	 *    clear it here as well.
3912 	 *
3913 	 */
3914 	if (old_entry->aref.ar_amap != NULL &&
3915 	    ((amap_flags(old_entry->aref.ar_amap) &
3916 	    AMAP_SHARED) != 0 ||
3917 	    VM_MAPENT_ISWIRED(old_entry))) {
3918 		amap_copy(new_map, new_entry, M_WAITOK, FALSE,
3919 		    0, 0);
3920 		/* XXXCDC: M_WAITOK ... ok? */
3921 	}
3922 
3923 	/*
3924 	 * if the parent's entry is wired down, then the
3925 	 * parent process does not want page faults on
3926 	 * access to that memory.  this means that we
3927 	 * cannot do copy-on-write because we can't write
3928 	 * protect the old entry.   in this case we
3929 	 * resolve all copy-on-write faults now, using
3930 	 * amap_cow_now.   note that we have already
3931 	 * allocated any needed amap (above).
3932 	 */
3933 	if (VM_MAPENT_ISWIRED(old_entry)) {
3934 		/*
3935 		 * resolve all copy-on-write faults now
3936 		 * (note that there is nothing to do if
3937 		 * the old mapping does not have an amap).
3938 		 * XXX: is it worthwhile to bother with
3939 		 * pmap_copy in this case?
3940 		 */
3941 		if (old_entry->aref.ar_amap)
3942 			amap_cow_now(new_map, new_entry);
3943 	} else {
3944 		if (old_entry->aref.ar_amap) {
3945 			/*
3946 			 * setup mappings to trigger copy-on-write faults
3947 			 * we must write-protect the parent if it has
3948 			 * an amap and it is not already "needs_copy"...
3949 			 * if it is already "needs_copy" then the parent
3950 			 * has already been write-protected by a previous
3951 			 * fork operation.
3952 			 *
3953 			 * if we do not write-protect the parent, then
3954 			 * we must be sure to write-protect the child
3955 			 * after the pmap_copy() operation.
3956 			 *
3957 			 * XXX: pmap_copy should have some way of telling
3958 			 * us that it didn't do anything so we can avoid
3959 			 * calling pmap_protect needlessly.
3960 			 */
3961 			if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
3962 				if (old_entry->max_protection & PROT_WRITE) {
3963 					pmap_protect(old_map->pmap,
3964 					    old_entry->start,
3965 					    old_entry->end,
3966 					    old_entry->protection &
3967 					    ~PROT_WRITE);
3968 					pmap_update(old_map->pmap);
3969 				}
3970 				old_entry->etype |= UVM_ET_NEEDSCOPY;
3971 			}
3972 
3973 	  		/* parent must now be write-protected */
3974 	  		protect_child = FALSE;
3975 		} else {
3976 			/*
3977 			 * we only need to protect the child if the
3978 			 * parent has write access.
3979 			 */
3980 			if (old_entry->max_protection & PROT_WRITE)
3981 				protect_child = TRUE;
3982 			else
3983 				protect_child = FALSE;
3984 		}
3985 		/*
3986 		 * copy the mappings
3987 		 * XXX: need a way to tell if this does anything
3988 		 */
3989 		if (!UVM_ET_ISHOLE(new_entry))
3990 			pmap_copy(new_map->pmap, old_map->pmap,
3991 			    new_entry->start,
3992 			    (old_entry->end - old_entry->start),
3993 			    old_entry->start);
3994 
3995 		/* protect the child's mappings if necessary */
3996 		if (protect_child) {
3997 			pmap_protect(new_map->pmap, new_entry->start,
3998 			    new_entry->end,
3999 			    new_entry->protection &
4000 			    ~PROT_WRITE);
4001 		}
4002 	}
4003 
4004 	return (new_entry);
4005 }
4006 
4007 /*
4008  * zero the mapping: the new entry will be zero initialized
4009  */
4010 struct vm_map_entry *
4011 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map,
4012     struct vm_map *old_map,
4013     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
4014 {
4015 	struct vm_map_entry *new_entry;
4016 
4017 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
4018 	    old_entry->end - old_entry->start, 0, old_entry->protection,
4019 	    old_entry->max_protection, old_entry, dead, 0, 0);
4020 
4021 	new_entry->etype |=
4022 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
4023 
4024 	if (new_entry->aref.ar_amap) {
4025 		amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
4026 		    atop(new_entry->end - new_entry->start), 0);
4027 		new_entry->aref.ar_amap = NULL;
4028 		new_entry->aref.ar_pageoff = 0;
4029 	}
4030 
4031 	if (UVM_ET_ISOBJ(new_entry)) {
4032 		if (new_entry->object.uvm_obj->pgops->pgo_detach)
4033 			new_entry->object.uvm_obj->pgops->pgo_detach(
4034 			    new_entry->object.uvm_obj);
4035 		new_entry->object.uvm_obj = NULL;
4036 		new_entry->etype &= ~UVM_ET_OBJ;
4037 	}
4038 
4039 	return (new_entry);
4040 }
4041 
4042 /*
4043  * uvmspace_fork: fork a process' main map
4044  *
4045  * => create a new vmspace for child process from parent.
4046  * => parent's map must not be locked.
4047  */
4048 struct vmspace *
4049 uvmspace_fork(struct process *pr)
4050 {
4051 	struct vmspace *vm1 = pr->ps_vmspace;
4052 	struct vmspace *vm2;
4053 	struct vm_map *old_map = &vm1->vm_map;
4054 	struct vm_map *new_map;
4055 	struct vm_map_entry *old_entry, *new_entry;
4056 	struct uvm_map_deadq dead;
4057 
4058 	vm_map_lock(old_map);
4059 
4060 	vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
4061 	    (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
4062 	memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
4063 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
4064 	vm2->vm_dused = 0; /* Statistic managed by us. */
4065 	new_map = &vm2->vm_map;
4066 	vm_map_lock(new_map);
4067 
4068 	/* go entry-by-entry */
4069 	TAILQ_INIT(&dead);
4070 	RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) {
4071 		if (old_entry->start == old_entry->end)
4072 			continue;
4073 
4074 		/* first, some sanity checks on the old entry */
4075 		if (UVM_ET_ISSUBMAP(old_entry)) {
4076 			panic("fork: encountered a submap during fork "
4077 			    "(illegal)");
4078 		}
4079 
4080 		if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
4081 		    UVM_ET_ISNEEDSCOPY(old_entry)) {
4082 			panic("fork: non-copy_on_write map entry marked "
4083 			    "needs_copy (illegal)");
4084 		}
4085 
4086 		/* Apply inheritance. */
4087 		switch (old_entry->inheritance) {
4088 		case MAP_INHERIT_SHARE:
4089 			new_entry = uvm_mapent_forkshared(vm2, new_map,
4090 			    old_map, old_entry, &dead);
4091 			break;
4092 		case MAP_INHERIT_COPY:
4093 			new_entry = uvm_mapent_forkcopy(vm2, new_map,
4094 			    old_map, old_entry, &dead);
4095 			break;
4096 		case MAP_INHERIT_ZERO:
4097 			new_entry = uvm_mapent_forkzero(vm2, new_map,
4098 			    old_map, old_entry, &dead);
4099 			break;
4100 		default:
4101 			continue;
4102 		}
4103 
4104 	 	/* Update process statistics. */
4105 		if (!UVM_ET_ISHOLE(new_entry))
4106 			new_map->size += new_entry->end - new_entry->start;
4107 		if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry) &&
4108 		    new_entry->protection != PROT_NONE) {
4109 			vm2->vm_dused += uvmspace_dused(
4110 			    new_map, new_entry->start, new_entry->end);
4111 		}
4112 	}
4113 
4114 	vm_map_unlock(old_map);
4115 	vm_map_unlock(new_map);
4116 
4117 	/*
4118 	 * This can actually happen, if multiple entries described a
4119 	 * space in which an entry was inherited.
4120 	 */
4121 	uvm_unmap_detach(&dead, 0);
4122 
4123 #ifdef SYSVSHM
4124 	if (vm1->vm_shm)
4125 		shmfork(vm1, vm2);
4126 #endif
4127 
4128 	return vm2;
4129 }
4130 
4131 /*
4132  * uvm_map_hint: return the beginning of the best area suitable for
4133  * creating a new mapping with "prot" protection.
4134  */
4135 vaddr_t
4136 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr,
4137     vaddr_t maxaddr)
4138 {
4139 	vaddr_t addr;
4140 	vaddr_t spacing;
4141 
4142 #ifdef __i386__
4143 	/*
4144 	 * If executable skip first two pages, otherwise start
4145 	 * after data + heap region.
4146 	 */
4147 	if ((prot & PROT_EXEC) != 0 &&
4148 	    (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) {
4149 		addr = (PAGE_SIZE*2) +
4150 		    (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
4151 		return (round_page(addr));
4152 	}
4153 #endif
4154 
4155 #if defined (__LP64__)
4156 	spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4157 #else
4158 	spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4159 #endif
4160 
4161 	/*
4162 	 * Start malloc/mmap after the brk.
4163 	 */
4164 	addr = (vaddr_t)vm->vm_daddr + BRKSIZ;
4165 	addr = MAX(addr, minaddr);
4166 
4167 	if (addr < maxaddr) {
4168 		while (spacing > maxaddr - addr)
4169 			spacing >>= 1;
4170 	}
4171 	addr += arc4random() & spacing;
4172 	return (round_page(addr));
4173 }
4174 
4175 /*
4176  * uvm_map_submap: punch down part of a map into a submap
4177  *
4178  * => only the kernel_map is allowed to be submapped
4179  * => the purpose of submapping is to break up the locking granularity
4180  *	of a larger map
4181  * => the range specified must have been mapped previously with a uvm_map()
4182  *	call [with uobj==NULL] to create a blank map entry in the main map.
4183  *	[And it had better still be blank!]
4184  * => maps which contain submaps should never be copied or forked.
4185  * => to remove a submap, use uvm_unmap() on the main map
4186  *	and then uvm_map_deallocate() the submap.
4187  * => main map must be unlocked.
4188  * => submap must have been init'd and have a zero reference count.
4189  *	[need not be locked as we don't actually reference it]
4190  */
4191 int
4192 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
4193     struct vm_map *submap)
4194 {
4195 	struct vm_map_entry *entry;
4196 	int result;
4197 
4198 	if (start > map->max_offset || end > map->max_offset ||
4199 	    start < map->min_offset || end < map->min_offset)
4200 		return EINVAL;
4201 
4202 	vm_map_lock(map);
4203 
4204 	if (uvm_map_lookup_entry(map, start, &entry)) {
4205 		UVM_MAP_CLIP_START(map, entry, start);
4206 		UVM_MAP_CLIP_END(map, entry, end);
4207 	} else
4208 		entry = NULL;
4209 
4210 	if (entry != NULL &&
4211 	    entry->start == start && entry->end == end &&
4212 	    entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
4213 	    !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
4214 		entry->etype |= UVM_ET_SUBMAP;
4215 		entry->object.sub_map = submap;
4216 		entry->offset = 0;
4217 		uvm_map_reference(submap);
4218 		result = 0;
4219 	} else
4220 		result = EINVAL;
4221 
4222 	vm_map_unlock(map);
4223 	return(result);
4224 }
4225 
4226 /*
4227  * uvm_map_checkprot: check protection in map
4228  *
4229  * => must allow specific protection in a fully allocated region.
4230  * => map mut be read or write locked by caller.
4231  */
4232 boolean_t
4233 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
4234     vm_prot_t protection)
4235 {
4236 	struct vm_map_entry *entry;
4237 
4238 	if (start < map->min_offset || end > map->max_offset || start > end)
4239 		return FALSE;
4240 	if (start == end)
4241 		return TRUE;
4242 
4243 	/*
4244 	 * Iterate entries.
4245 	 */
4246 	for (entry = uvm_map_entrybyaddr(&map->addr, start);
4247 	    entry != NULL && entry->start < end;
4248 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4249 		/* Fail if a hole is found. */
4250 		if (UVM_ET_ISHOLE(entry) ||
4251 		    (entry->end < end && entry->end != VMMAP_FREE_END(entry)))
4252 			return FALSE;
4253 
4254 		/* Check protection. */
4255 		if ((entry->protection & protection) != protection)
4256 			return FALSE;
4257 	}
4258 	return TRUE;
4259 }
4260 
4261 /*
4262  * uvm_map_create: create map
4263  */
4264 vm_map_t
4265 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
4266 {
4267 	vm_map_t map;
4268 
4269 	map = malloc(sizeof *map, M_VMMAP, M_WAITOK);
4270 	uvm_map_setup(map, pmap, min, max, flags);
4271 	return (map);
4272 }
4273 
4274 /*
4275  * uvm_map_deallocate: drop reference to a map
4276  *
4277  * => caller must not lock map
4278  * => we will zap map if ref count goes to zero
4279  */
4280 void
4281 uvm_map_deallocate(vm_map_t map)
4282 {
4283 	int c;
4284 	struct uvm_map_deadq dead;
4285 
4286 	c = --map->ref_count;
4287 	if (c > 0) {
4288 		return;
4289 	}
4290 
4291 	/*
4292 	 * all references gone.   unmap and free.
4293 	 *
4294 	 * No lock required: we are only one to access this map.
4295 	 */
4296 	TAILQ_INIT(&dead);
4297 	uvm_tree_sanity(map, __FILE__, __LINE__);
4298 	uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead,
4299 	    TRUE, FALSE);
4300 	pmap_destroy(map->pmap);
4301 	KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
4302 	free(map, M_VMMAP, sizeof *map);
4303 
4304 	uvm_unmap_detach(&dead, 0);
4305 }
4306 
4307 /*
4308  * uvm_map_inherit: set inheritance code for range of addrs in map.
4309  *
4310  * => map must be unlocked
4311  * => note that the inherit code is used during a "fork".  see fork
4312  *	code for details.
4313  */
4314 int
4315 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
4316     vm_inherit_t new_inheritance)
4317 {
4318 	struct vm_map_entry *entry;
4319 
4320 	switch (new_inheritance) {
4321 	case MAP_INHERIT_NONE:
4322 	case MAP_INHERIT_COPY:
4323 	case MAP_INHERIT_SHARE:
4324 	case MAP_INHERIT_ZERO:
4325 		break;
4326 	default:
4327 		return (EINVAL);
4328 	}
4329 
4330 	if (start > end)
4331 		return EINVAL;
4332 	start = MAX(start, map->min_offset);
4333 	end = MIN(end, map->max_offset);
4334 	if (start >= end)
4335 		return 0;
4336 
4337 	vm_map_lock(map);
4338 
4339 	entry = uvm_map_entrybyaddr(&map->addr, start);
4340 	if (entry->end > start)
4341 		UVM_MAP_CLIP_START(map, entry, start);
4342 	else
4343 		entry = RBT_NEXT(uvm_map_addr, entry);
4344 
4345 	while (entry != NULL && entry->start < end) {
4346 		UVM_MAP_CLIP_END(map, entry, end);
4347 		entry->inheritance = new_inheritance;
4348 		entry = RBT_NEXT(uvm_map_addr, entry);
4349 	}
4350 
4351 	vm_map_unlock(map);
4352 	return (0);
4353 }
4354 
4355 /*
4356  * uvm_map_syscall: permit system calls for range of addrs in map.
4357  *
4358  * => map must be unlocked
4359  */
4360 int
4361 uvm_map_syscall(struct vm_map *map, vaddr_t start, vaddr_t end)
4362 {
4363 	struct vm_map_entry *entry;
4364 
4365 	if (start > end)
4366 		return EINVAL;
4367 	start = MAX(start, map->min_offset);
4368 	end = MIN(end, map->max_offset);
4369 	if (start >= end)
4370 		return 0;
4371 	if (map->flags & VM_MAP_SYSCALL_ONCE)	/* only allowed once */
4372 		return (EPERM);
4373 
4374 	vm_map_lock(map);
4375 
4376 	entry = uvm_map_entrybyaddr(&map->addr, start);
4377 	if (entry->end > start)
4378 		UVM_MAP_CLIP_START(map, entry, start);
4379 	else
4380 		entry = RBT_NEXT(uvm_map_addr, entry);
4381 
4382 	while (entry != NULL && entry->start < end) {
4383 		UVM_MAP_CLIP_END(map, entry, end);
4384 		entry->etype |= UVM_ET_SYSCALL;
4385 		entry = RBT_NEXT(uvm_map_addr, entry);
4386 	}
4387 
4388 	map->wserial++;
4389 	map->flags |= VM_MAP_SYSCALL_ONCE;
4390 	vm_map_unlock(map);
4391 	return (0);
4392 }
4393 
4394 /*
4395  * uvm_map_advice: set advice code for range of addrs in map.
4396  *
4397  * => map must be unlocked
4398  */
4399 int
4400 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
4401 {
4402 	struct vm_map_entry *entry;
4403 
4404 	switch (new_advice) {
4405 	case MADV_NORMAL:
4406 	case MADV_RANDOM:
4407 	case MADV_SEQUENTIAL:
4408 		break;
4409 	default:
4410 		return (EINVAL);
4411 	}
4412 
4413 	if (start > end)
4414 		return EINVAL;
4415 	start = MAX(start, map->min_offset);
4416 	end = MIN(end, map->max_offset);
4417 	if (start >= end)
4418 		return 0;
4419 
4420 	vm_map_lock(map);
4421 
4422 	entry = uvm_map_entrybyaddr(&map->addr, start);
4423 	if (entry != NULL && entry->end > start)
4424 		UVM_MAP_CLIP_START(map, entry, start);
4425 	else if (entry!= NULL)
4426 		entry = RBT_NEXT(uvm_map_addr, entry);
4427 
4428 	/*
4429 	 * XXXJRT: disallow holes?
4430 	 */
4431 	while (entry != NULL && entry->start < end) {
4432 		UVM_MAP_CLIP_END(map, entry, end);
4433 		entry->advice = new_advice;
4434 		entry = RBT_NEXT(uvm_map_addr, entry);
4435 	}
4436 
4437 	vm_map_unlock(map);
4438 	return (0);
4439 }
4440 
4441 /*
4442  * uvm_map_extract: extract a mapping from a map and put it somewhere
4443  * in the kernel_map, setting protection to max_prot.
4444  *
4445  * => map should be unlocked (we will write lock it and kernel_map)
4446  * => returns 0 on success, error code otherwise
4447  * => start must be page aligned
4448  * => len must be page sized
4449  * => flags:
4450  *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
4451  * Mappings are QREF's.
4452  */
4453 int
4454 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
4455     vaddr_t *dstaddrp, int flags)
4456 {
4457 	struct uvm_map_deadq dead;
4458 	struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2;
4459 	vaddr_t dstaddr;
4460 	vaddr_t end;
4461 	vaddr_t cp_start;
4462 	vsize_t cp_len, cp_off;
4463 	int error;
4464 
4465 	TAILQ_INIT(&dead);
4466 	end = start + len;
4467 
4468 	/*
4469 	 * Sanity check on the parameters.
4470 	 * Also, since the mapping may not contain gaps, error out if the
4471 	 * mapped area is not in source map.
4472 	 */
4473 	if ((start & (vaddr_t)PAGE_MASK) != 0 ||
4474 	    (end & (vaddr_t)PAGE_MASK) != 0 || end < start)
4475 		return EINVAL;
4476 	if (start < srcmap->min_offset || end > srcmap->max_offset)
4477 		return EINVAL;
4478 
4479 	/* Initialize dead entries. Handle len == 0 case. */
4480 	if (len == 0)
4481 		return 0;
4482 
4483 	/* Acquire lock on srcmap. */
4484 	vm_map_lock(srcmap);
4485 
4486 	/* Lock srcmap, lookup first and last entry in <start,len>. */
4487 	first = uvm_map_entrybyaddr(&srcmap->addr, start);
4488 
4489 	/* Check that the range is contiguous. */
4490 	for (entry = first; entry != NULL && entry->end < end;
4491 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4492 		if (VMMAP_FREE_END(entry) != entry->end ||
4493 		    UVM_ET_ISHOLE(entry)) {
4494 			error = EINVAL;
4495 			goto fail;
4496 		}
4497 	}
4498 	if (entry == NULL || UVM_ET_ISHOLE(entry)) {
4499 		error = EINVAL;
4500 		goto fail;
4501 	}
4502 
4503 	/*
4504 	 * Handle need-copy flag.
4505 	 */
4506 	for (entry = first; entry != NULL && entry->start < end;
4507 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4508 		if (UVM_ET_ISNEEDSCOPY(entry))
4509 			amap_copy(srcmap, entry, M_NOWAIT,
4510 			    UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end);
4511 		if (UVM_ET_ISNEEDSCOPY(entry)) {
4512 			/*
4513 			 * amap_copy failure
4514 			 */
4515 			error = ENOMEM;
4516 			goto fail;
4517 		}
4518 	}
4519 
4520 	/* Lock destination map (kernel_map). */
4521 	vm_map_lock(kernel_map);
4522 
4523 	if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len,
4524 	    MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start),
4525 	    PROT_NONE, 0) != 0) {
4526 		error = ENOMEM;
4527 		goto fail2;
4528 	}
4529 	*dstaddrp = dstaddr;
4530 
4531 	/*
4532 	 * We now have srcmap and kernel_map locked.
4533 	 * dstaddr contains the destination offset in dstmap.
4534 	 */
4535 	/* step 1: start looping through map entries, performing extraction. */
4536 	for (entry = first; entry != NULL && entry->start < end;
4537 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4538 		KDASSERT(!UVM_ET_ISNEEDSCOPY(entry));
4539 		if (UVM_ET_ISHOLE(entry))
4540 			continue;
4541 
4542 		/* Calculate uvm_mapent_clone parameters. */
4543 		cp_start = entry->start;
4544 		if (cp_start < start) {
4545 			cp_off = start - cp_start;
4546 			cp_start = start;
4547 		} else
4548 			cp_off = 0;
4549 		cp_len = MIN(entry->end, end) - cp_start;
4550 
4551 		newentry = uvm_mapent_clone(kernel_map,
4552 		    cp_start - start + dstaddr, cp_len, cp_off,
4553 		    entry->protection, entry->max_protection,
4554 		    entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
4555 		if (newentry == NULL) {
4556 			error = ENOMEM;
4557 			goto fail2_unmap;
4558 		}
4559 		kernel_map->size += cp_len;
4560 		if (flags & UVM_EXTRACT_FIXPROT)
4561 			newentry->protection = newentry->max_protection;
4562 
4563 		/*
4564 		 * Step 2: perform pmap copy.
4565 		 * (Doing this in the loop saves one RB traversal.)
4566 		 */
4567 		pmap_copy(kernel_map->pmap, srcmap->pmap,
4568 		    cp_start - start + dstaddr, cp_len, cp_start);
4569 	}
4570 	pmap_update(kernel_map->pmap);
4571 
4572 	error = 0;
4573 
4574 	/* Unmap copied entries on failure. */
4575 fail2_unmap:
4576 	if (error) {
4577 		uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead,
4578 		    FALSE, TRUE);
4579 	}
4580 
4581 	/* Release maps, release dead entries. */
4582 fail2:
4583 	vm_map_unlock(kernel_map);
4584 
4585 fail:
4586 	vm_map_unlock(srcmap);
4587 
4588 	uvm_unmap_detach(&dead, 0);
4589 
4590 	return error;
4591 }
4592 
4593 /*
4594  * uvm_map_clean: clean out a map range
4595  *
4596  * => valid flags:
4597  *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
4598  *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
4599  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
4600  *   if (flags & PGO_FREE): any cached pages are freed after clean
4601  * => returns an error if any part of the specified range isn't mapped
4602  * => never a need to flush amap layer since the anonymous memory has
4603  *	no permanent home, but may deactivate pages there
4604  * => called from sys_msync() and sys_madvise()
4605  * => caller must not write-lock map (read OK).
4606  * => we may sleep while cleaning if SYNCIO [with map read-locked]
4607  */
4608 
4609 int
4610 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
4611 {
4612 	struct vm_map_entry *first, *entry;
4613 	struct vm_amap *amap;
4614 	struct vm_anon *anon;
4615 	struct vm_page *pg;
4616 	struct uvm_object *uobj;
4617 	vaddr_t cp_start, cp_end;
4618 	int refs;
4619 	int error;
4620 	boolean_t rv;
4621 
4622 	KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
4623 	    (PGO_FREE|PGO_DEACTIVATE));
4624 
4625 	if (start > end || start < map->min_offset || end > map->max_offset)
4626 		return EINVAL;
4627 
4628 	vm_map_lock_read(map);
4629 	first = uvm_map_entrybyaddr(&map->addr, start);
4630 
4631 	/* Make a first pass to check for holes. */
4632 	for (entry = first; entry != NULL && entry->start < end;
4633 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4634 		if (UVM_ET_ISSUBMAP(entry)) {
4635 			vm_map_unlock_read(map);
4636 			return EINVAL;
4637 		}
4638 		if (UVM_ET_ISSUBMAP(entry) ||
4639 		    UVM_ET_ISHOLE(entry) ||
4640 		    (entry->end < end &&
4641 		    VMMAP_FREE_END(entry) != entry->end)) {
4642 			vm_map_unlock_read(map);
4643 			return EFAULT;
4644 		}
4645 	}
4646 
4647 	error = 0;
4648 	for (entry = first; entry != NULL && entry->start < end;
4649 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4650 		amap = entry->aref.ar_amap;	/* top layer */
4651 		if (UVM_ET_ISOBJ(entry))
4652 			uobj = entry->object.uvm_obj;
4653 		else
4654 			uobj = NULL;
4655 
4656 		/*
4657 		 * No amap cleaning necessary if:
4658 		 *  - there's no amap
4659 		 *  - we're not deactivating or freeing pages.
4660 		 */
4661 		if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
4662 			goto flush_object;
4663 
4664 		cp_start = MAX(entry->start, start);
4665 		cp_end = MIN(entry->end, end);
4666 
4667 		for (; cp_start != cp_end; cp_start += PAGE_SIZE) {
4668 			anon = amap_lookup(&entry->aref,
4669 			    cp_start - entry->start);
4670 			if (anon == NULL)
4671 				continue;
4672 
4673 			pg = anon->an_page;
4674 			if (pg == NULL) {
4675 				continue;
4676 			}
4677 			KASSERT(pg->pg_flags & PQ_ANON);
4678 
4679 			switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
4680 			/*
4681 			 * XXX In these first 3 cases, we always just
4682 			 * XXX deactivate the page.  We may want to
4683 			 * XXX handle the different cases more
4684 			 * XXX specifically, in the future.
4685 			 */
4686 			case PGO_CLEANIT|PGO_FREE:
4687 			case PGO_CLEANIT|PGO_DEACTIVATE:
4688 			case PGO_DEACTIVATE:
4689 deactivate_it:
4690 				/* skip the page if it's wired */
4691 				if (pg->wire_count != 0)
4692 					break;
4693 
4694 				uvm_lock_pageq();
4695 
4696 				KASSERT(pg->uanon == anon);
4697 
4698 				/* zap all mappings for the page. */
4699 				pmap_page_protect(pg, PROT_NONE);
4700 
4701 				/* ...and deactivate the page. */
4702 				uvm_pagedeactivate(pg);
4703 
4704 				uvm_unlock_pageq();
4705 				break;
4706 			case PGO_FREE:
4707 				/*
4708 				 * If there are multiple references to
4709 				 * the amap, just deactivate the page.
4710 				 */
4711 				if (amap_refs(amap) > 1)
4712 					goto deactivate_it;
4713 
4714 				/* XXX skip the page if it's wired */
4715 				if (pg->wire_count != 0) {
4716 					break;
4717 				}
4718 				amap_unadd(&entry->aref,
4719 				    cp_start - entry->start);
4720 				refs = --anon->an_ref;
4721 				if (refs == 0)
4722 					uvm_anfree(anon);
4723 				break;
4724 			default:
4725 				panic("uvm_map_clean: weird flags");
4726 			}
4727 		}
4728 
4729 flush_object:
4730 		cp_start = MAX(entry->start, start);
4731 		cp_end = MIN(entry->end, end);
4732 
4733 		/*
4734 		 * flush pages if we've got a valid backing object.
4735 		 *
4736 		 * Don't PGO_FREE if we don't have write permission
4737 		 * and don't flush if this is a copy-on-write object
4738 		 * since we can't know our permissions on it.
4739 		 */
4740 		if (uobj != NULL &&
4741 		    ((flags & PGO_FREE) == 0 ||
4742 		     ((entry->max_protection & PROT_WRITE) != 0 &&
4743 		      (entry->etype & UVM_ET_COPYONWRITE) == 0))) {
4744 			rv = uobj->pgops->pgo_flush(uobj,
4745 			    cp_start - entry->start + entry->offset,
4746 			    cp_end - entry->start + entry->offset, flags);
4747 
4748 			if (rv == FALSE)
4749 				error = EFAULT;
4750 		}
4751 	}
4752 
4753 	vm_map_unlock_read(map);
4754 	return error;
4755 }
4756 
4757 /*
4758  * UVM_MAP_CLIP_END implementation
4759  */
4760 void
4761 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4762 {
4763 	struct vm_map_entry *tmp;
4764 
4765 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4766 	tmp = uvm_mapent_alloc(map, 0);
4767 
4768 	/* Invoke splitentry. */
4769 	uvm_map_splitentry(map, entry, tmp, addr);
4770 }
4771 
4772 /*
4773  * UVM_MAP_CLIP_START implementation
4774  *
4775  * Clippers are required to not change the pointers to the entry they are
4776  * clipping on.
4777  * Since uvm_map_splitentry turns the original entry into the lowest
4778  * entry (address wise) we do a swap between the new entry and the original
4779  * entry, prior to calling uvm_map_splitentry.
4780  */
4781 void
4782 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4783 {
4784 	struct vm_map_entry *tmp;
4785 	struct uvm_addr_state *free;
4786 
4787 	/* Unlink original. */
4788 	free = uvm_map_uaddr_e(map, entry);
4789 	uvm_mapent_free_remove(map, free, entry);
4790 	uvm_mapent_addr_remove(map, entry);
4791 
4792 	/* Copy entry. */
4793 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4794 	tmp = uvm_mapent_alloc(map, 0);
4795 	uvm_mapent_copy(entry, tmp);
4796 
4797 	/* Put new entry in place of original entry. */
4798 	uvm_mapent_addr_insert(map, tmp);
4799 	uvm_mapent_free_insert(map, free, tmp);
4800 
4801 	/* Invoke splitentry. */
4802 	uvm_map_splitentry(map, tmp, entry, addr);
4803 }
4804 
4805 /*
4806  * Boundary fixer.
4807  */
4808 static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t);
4809 static __inline vaddr_t
4810 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound)
4811 {
4812 	return (min < bound && max > bound) ? bound : max;
4813 }
4814 
4815 /*
4816  * Choose free list based on address at start of free space.
4817  *
4818  * The uvm_addr_state returned contains addr and is the first of:
4819  * - uaddr_exe
4820  * - uaddr_brk_stack
4821  * - uaddr_any
4822  */
4823 struct uvm_addr_state*
4824 uvm_map_uaddr(struct vm_map *map, vaddr_t addr)
4825 {
4826 	struct uvm_addr_state *uaddr;
4827 	int i;
4828 
4829 	/* Special case the first page, to prevent mmap from returning 0. */
4830 	if (addr < VMMAP_MIN_ADDR)
4831 		return NULL;
4832 
4833 	/* Upper bound for kernel maps at uvm_maxkaddr. */
4834 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
4835 		if (addr >= uvm_maxkaddr)
4836 			return NULL;
4837 	}
4838 
4839 	/* Is the address inside the exe-only map? */
4840 	if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr &&
4841 	    addr < map->uaddr_exe->uaddr_maxaddr)
4842 		return map->uaddr_exe;
4843 
4844 	/* Check if the space falls inside brk/stack area. */
4845 	if ((addr >= map->b_start && addr < map->b_end) ||
4846 	    (addr >= map->s_start && addr < map->s_end)) {
4847 		if (map->uaddr_brk_stack != NULL &&
4848 		    addr >= map->uaddr_brk_stack->uaddr_minaddr &&
4849 		    addr < map->uaddr_brk_stack->uaddr_maxaddr) {
4850 			return map->uaddr_brk_stack;
4851 		} else
4852 			return NULL;
4853 	}
4854 
4855 	/*
4856 	 * Check the other selectors.
4857 	 *
4858 	 * These selectors are only marked as the owner, if they have insert
4859 	 * functions.
4860 	 */
4861 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4862 		uaddr = map->uaddr_any[i];
4863 		if (uaddr == NULL)
4864 			continue;
4865 		if (uaddr->uaddr_functions->uaddr_free_insert == NULL)
4866 			continue;
4867 
4868 		if (addr >= uaddr->uaddr_minaddr &&
4869 		    addr < uaddr->uaddr_maxaddr)
4870 			return uaddr;
4871 	}
4872 
4873 	return NULL;
4874 }
4875 
4876 /*
4877  * Choose free list based on address at start of free space.
4878  *
4879  * The uvm_addr_state returned contains addr and is the first of:
4880  * - uaddr_exe
4881  * - uaddr_brk_stack
4882  * - uaddr_any
4883  */
4884 struct uvm_addr_state*
4885 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry)
4886 {
4887 	return uvm_map_uaddr(map, VMMAP_FREE_START(entry));
4888 }
4889 
4890 /*
4891  * Returns the first free-memory boundary that is crossed by [min-max].
4892  */
4893 vsize_t
4894 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max)
4895 {
4896 	struct uvm_addr_state	*uaddr;
4897 	int			 i;
4898 
4899 	/* Never return first page. */
4900 	max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR);
4901 
4902 	/* Treat the maxkaddr special, if the map is a kernel_map. */
4903 	if ((map->flags & VM_MAP_ISVMSPACE) == 0)
4904 		max = uvm_map_boundfix(min, max, uvm_maxkaddr);
4905 
4906 	/* Check for exe-only boundaries. */
4907 	if (map->uaddr_exe != NULL) {
4908 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr);
4909 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr);
4910 	}
4911 
4912 	/* Check for exe-only boundaries. */
4913 	if (map->uaddr_brk_stack != NULL) {
4914 		max = uvm_map_boundfix(min, max,
4915 		    map->uaddr_brk_stack->uaddr_minaddr);
4916 		max = uvm_map_boundfix(min, max,
4917 		    map->uaddr_brk_stack->uaddr_maxaddr);
4918 	}
4919 
4920 	/* Check other boundaries. */
4921 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4922 		uaddr = map->uaddr_any[i];
4923 		if (uaddr != NULL) {
4924 			max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr);
4925 			max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr);
4926 		}
4927 	}
4928 
4929 	/* Boundaries at stack and brk() area. */
4930 	max = uvm_map_boundfix(min, max, map->s_start);
4931 	max = uvm_map_boundfix(min, max, map->s_end);
4932 	max = uvm_map_boundfix(min, max, map->b_start);
4933 	max = uvm_map_boundfix(min, max, map->b_end);
4934 
4935 	return max;
4936 }
4937 
4938 /*
4939  * Update map allocation start and end addresses from proc vmspace.
4940  */
4941 void
4942 uvm_map_vmspace_update(struct vm_map *map,
4943     struct uvm_map_deadq *dead, int flags)
4944 {
4945 	struct vmspace *vm;
4946 	vaddr_t b_start, b_end, s_start, s_end;
4947 
4948 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
4949 	KASSERT(offsetof(struct vmspace, vm_map) == 0);
4950 
4951 	/*
4952 	 * Derive actual allocation boundaries from vmspace.
4953 	 */
4954 	vm = (struct vmspace *)map;
4955 	b_start = (vaddr_t)vm->vm_daddr;
4956 	b_end   = b_start + BRKSIZ;
4957 	s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4958 	s_end   = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4959 #ifdef DIAGNOSTIC
4960 	if ((b_start & (vaddr_t)PAGE_MASK) != 0 ||
4961 	    (b_end & (vaddr_t)PAGE_MASK) != 0 ||
4962 	    (s_start & (vaddr_t)PAGE_MASK) != 0 ||
4963 	    (s_end & (vaddr_t)PAGE_MASK) != 0) {
4964 		panic("uvm_map_vmspace_update: vmspace %p invalid bounds: "
4965 		    "b=0x%lx-0x%lx s=0x%lx-0x%lx",
4966 		    vm, b_start, b_end, s_start, s_end);
4967 	}
4968 #endif
4969 
4970 	if (__predict_true(map->b_start == b_start && map->b_end == b_end &&
4971 	    map->s_start == s_start && map->s_end == s_end))
4972 		return;
4973 
4974 	uvm_map_freelist_update(map, dead, b_start, b_end,
4975 	    s_start, s_end, flags);
4976 }
4977 
4978 /*
4979  * Grow kernel memory.
4980  *
4981  * This function is only called for kernel maps when an allocation fails.
4982  *
4983  * If the map has a gap that is large enough to accommodate alloc_sz, this
4984  * function will make sure map->free will include it.
4985  */
4986 void
4987 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead,
4988     vsize_t alloc_sz, int flags)
4989 {
4990 	vsize_t sz;
4991 	vaddr_t end;
4992 	struct vm_map_entry *entry;
4993 
4994 	/* Kernel memory only. */
4995 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0);
4996 	/* Destroy free list. */
4997 	uvm_map_freelist_update_clear(map, dead);
4998 
4999 	/* Include the guard page in the hard minimum requirement of alloc_sz. */
5000 	if (map->flags & VM_MAP_GUARDPAGES)
5001 		alloc_sz += PAGE_SIZE;
5002 
5003 	/*
5004 	 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA.
5005 	 *
5006 	 * Don't handle the case where the multiplication overflows:
5007 	 * if that happens, the allocation is probably too big anyway.
5008 	 */
5009 	sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA);
5010 
5011 	/*
5012 	 * Walk forward until a gap large enough for alloc_sz shows up.
5013 	 *
5014 	 * We assume the kernel map has no boundaries.
5015 	 * uvm_maxkaddr may be zero.
5016 	 */
5017 	end = MAX(uvm_maxkaddr, map->min_offset);
5018 	entry = uvm_map_entrybyaddr(&map->addr, end);
5019 	while (entry && entry->fspace < alloc_sz)
5020 		entry = RBT_NEXT(uvm_map_addr, entry);
5021 	if (entry) {
5022 		end = MAX(VMMAP_FREE_START(entry), end);
5023 		end += MIN(sz, map->max_offset - end);
5024 	} else
5025 		end = map->max_offset;
5026 
5027 	/* Reserve pmap entries. */
5028 #ifdef PMAP_GROWKERNEL
5029 	uvm_maxkaddr = pmap_growkernel(end);
5030 #else
5031 	uvm_maxkaddr = MAX(uvm_maxkaddr, end);
5032 #endif
5033 
5034 	/* Rebuild free list. */
5035 	uvm_map_freelist_update_refill(map, flags);
5036 }
5037 
5038 /*
5039  * Freelist update subfunction: unlink all entries from freelists.
5040  */
5041 void
5042 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead)
5043 {
5044 	struct uvm_addr_state *free;
5045 	struct vm_map_entry *entry, *prev, *next;
5046 
5047 	prev = NULL;
5048 	for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL;
5049 	    entry = next) {
5050 		next = RBT_NEXT(uvm_map_addr, entry);
5051 
5052 		free = uvm_map_uaddr_e(map, entry);
5053 		uvm_mapent_free_remove(map, free, entry);
5054 
5055 		if (prev != NULL && entry->start == entry->end) {
5056 			prev->fspace += VMMAP_FREE_END(entry) - entry->end;
5057 			uvm_mapent_addr_remove(map, entry);
5058 			DEAD_ENTRY_PUSH(dead, entry);
5059 		} else
5060 			prev = entry;
5061 	}
5062 }
5063 
5064 /*
5065  * Freelist update subfunction: refill the freelists with entries.
5066  */
5067 void
5068 uvm_map_freelist_update_refill(struct vm_map *map, int flags)
5069 {
5070 	struct vm_map_entry *entry;
5071 	vaddr_t min, max;
5072 
5073 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
5074 		min = VMMAP_FREE_START(entry);
5075 		max = VMMAP_FREE_END(entry);
5076 		entry->fspace = 0;
5077 
5078 		entry = uvm_map_fix_space(map, entry, min, max, flags);
5079 	}
5080 
5081 	uvm_tree_sanity(map, __FILE__, __LINE__);
5082 }
5083 
5084 /*
5085  * Change {a,b}_{start,end} allocation ranges and associated free lists.
5086  */
5087 void
5088 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead,
5089     vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags)
5090 {
5091 	KDASSERT(b_end >= b_start && s_end >= s_start);
5092 
5093 	/* Clear all free lists. */
5094 	uvm_map_freelist_update_clear(map, dead);
5095 
5096 	/* Apply new bounds. */
5097 	map->b_start = b_start;
5098 	map->b_end   = b_end;
5099 	map->s_start = s_start;
5100 	map->s_end   = s_end;
5101 
5102 	/* Refill free lists. */
5103 	uvm_map_freelist_update_refill(map, flags);
5104 }
5105 
5106 /*
5107  * Assign a uvm_addr_state to the specified pointer in vm_map.
5108  *
5109  * May sleep.
5110  */
5111 void
5112 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which,
5113     struct uvm_addr_state *newval)
5114 {
5115 	struct uvm_map_deadq dead;
5116 
5117 	/* Pointer which must be in this map. */
5118 	KASSERT(which != NULL);
5119 	KASSERT((void*)map <= (void*)(which) &&
5120 	    (void*)(which) < (void*)(map + 1));
5121 
5122 	vm_map_lock(map);
5123 	TAILQ_INIT(&dead);
5124 	uvm_map_freelist_update_clear(map, &dead);
5125 
5126 	uvm_addr_destroy(*which);
5127 	*which = newval;
5128 
5129 	uvm_map_freelist_update_refill(map, 0);
5130 	vm_map_unlock(map);
5131 	uvm_unmap_detach(&dead, 0);
5132 }
5133 
5134 /*
5135  * Correct space insert.
5136  *
5137  * Entry must not be on any freelist.
5138  */
5139 struct vm_map_entry*
5140 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry,
5141     vaddr_t min, vaddr_t max, int flags)
5142 {
5143 	struct uvm_addr_state	*free, *entfree;
5144 	vaddr_t			 lmax;
5145 
5146 	KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0);
5147 	KDASSERT(min <= max);
5148 	KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) ||
5149 	    min == map->min_offset);
5150 
5151 	/*
5152 	 * During the function, entfree will always point at the uaddr state
5153 	 * for entry.
5154 	 */
5155 	entfree = (entry == NULL ? NULL :
5156 	    uvm_map_uaddr_e(map, entry));
5157 
5158 	while (min != max) {
5159 		/* Claim guard page for entry. */
5160 		if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL &&
5161 		    VMMAP_FREE_END(entry) == entry->end &&
5162 		    entry->start != entry->end) {
5163 			if (max - min == 2 * PAGE_SIZE) {
5164 				/*
5165 				 * If the free-space gap is exactly 2 pages,
5166 				 * we make the guard 2 pages instead of 1.
5167 				 * Because in a guarded map, an area needs
5168 				 * at least 2 pages to allocate from:
5169 				 * one page for the allocation and one for
5170 				 * the guard.
5171 				 */
5172 				entry->guard = 2 * PAGE_SIZE;
5173 				min = max;
5174 			} else {
5175 				entry->guard = PAGE_SIZE;
5176 				min += PAGE_SIZE;
5177 			}
5178 			continue;
5179 		}
5180 
5181 		/*
5182 		 * Handle the case where entry has a 2-page guard, but the
5183 		 * space after entry is freed.
5184 		 */
5185 		if (entry != NULL && entry->fspace == 0 &&
5186 		    entry->guard > PAGE_SIZE) {
5187 			entry->guard = PAGE_SIZE;
5188 			min = VMMAP_FREE_START(entry);
5189 		}
5190 
5191 		lmax = uvm_map_boundary(map, min, max);
5192 		free = uvm_map_uaddr(map, min);
5193 
5194 		/*
5195 		 * Entries are merged if they point at the same uvm_free().
5196 		 * Exception to that rule: if min == uvm_maxkaddr, a new
5197 		 * entry is started regardless (otherwise the allocators
5198 		 * will get confused).
5199 		 */
5200 		if (entry != NULL && free == entfree &&
5201 		    !((map->flags & VM_MAP_ISVMSPACE) == 0 &&
5202 		    min == uvm_maxkaddr)) {
5203 			KDASSERT(VMMAP_FREE_END(entry) == min);
5204 			entry->fspace += lmax - min;
5205 		} else {
5206 			/*
5207 			 * Commit entry to free list: it'll not be added to
5208 			 * anymore.
5209 			 * We'll start a new entry and add to that entry
5210 			 * instead.
5211 			 */
5212 			if (entry != NULL)
5213 				uvm_mapent_free_insert(map, entfree, entry);
5214 
5215 			/* New entry for new uaddr. */
5216 			entry = uvm_mapent_alloc(map, flags);
5217 			KDASSERT(entry != NULL);
5218 			entry->end = entry->start = min;
5219 			entry->guard = 0;
5220 			entry->fspace = lmax - min;
5221 			entry->object.uvm_obj = NULL;
5222 			entry->offset = 0;
5223 			entry->etype = 0;
5224 			entry->protection = entry->max_protection = 0;
5225 			entry->inheritance = 0;
5226 			entry->wired_count = 0;
5227 			entry->advice = 0;
5228 			entry->aref.ar_pageoff = 0;
5229 			entry->aref.ar_amap = NULL;
5230 			uvm_mapent_addr_insert(map, entry);
5231 
5232 			entfree = free;
5233 		}
5234 
5235 		min = lmax;
5236 	}
5237 	/* Finally put entry on the uaddr state. */
5238 	if (entry != NULL)
5239 		uvm_mapent_free_insert(map, entfree, entry);
5240 
5241 	return entry;
5242 }
5243 
5244 /*
5245  * MQuery style of allocation.
5246  *
5247  * This allocator searches forward until sufficient space is found to map
5248  * the given size.
5249  *
5250  * XXX: factor in offset (via pmap_prefer) and protection?
5251  */
5252 int
5253 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset,
5254     int flags)
5255 {
5256 	struct vm_map_entry *entry, *last;
5257 	vaddr_t addr;
5258 	vaddr_t tmp, pmap_align, pmap_offset;
5259 	int error;
5260 
5261 	addr = *addr_p;
5262 	vm_map_lock_read(map);
5263 
5264 	/* Configure pmap prefer. */
5265 	if (offset != UVM_UNKNOWN_OFFSET) {
5266 		pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN());
5267 		pmap_offset = PMAP_PREFER_OFFSET(offset);
5268 	} else {
5269 		pmap_align = PAGE_SIZE;
5270 		pmap_offset = 0;
5271 	}
5272 
5273 	/* Align address to pmap_prefer unless FLAG_FIXED is set. */
5274 	if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) {
5275 	  	tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5276 		if (tmp < addr)
5277 			tmp += pmap_align;
5278 		addr = tmp;
5279 	}
5280 
5281 	/* First, check if the requested range is fully available. */
5282 	entry = uvm_map_entrybyaddr(&map->addr, addr);
5283 	last = NULL;
5284 	if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5285 		error = 0;
5286 		goto out;
5287 	}
5288 	if (flags & UVM_FLAG_FIXED) {
5289 		error = EINVAL;
5290 		goto out;
5291 	}
5292 
5293 	error = ENOMEM; /* Default error from here. */
5294 
5295 	/*
5296 	 * At this point, the memory at <addr, sz> is not available.
5297 	 * The reasons are:
5298 	 * [1] it's outside the map,
5299 	 * [2] it starts in used memory (and therefore needs to move
5300 	 *     toward the first free page in entry),
5301 	 * [3] it starts in free memory but bumps into used memory.
5302 	 *
5303 	 * Note that for case [2], the forward moving is handled by the
5304 	 * for loop below.
5305 	 */
5306 	if (entry == NULL) {
5307 		/* [1] Outside the map. */
5308 		if (addr >= map->max_offset)
5309 			goto out;
5310 		else
5311 			entry = RBT_MIN(uvm_map_addr, &map->addr);
5312 	} else if (VMMAP_FREE_START(entry) <= addr) {
5313 		/* [3] Bumped into used memory. */
5314 		entry = RBT_NEXT(uvm_map_addr, entry);
5315 	}
5316 
5317 	/* Test if the next entry is sufficient for the allocation. */
5318 	for (; entry != NULL;
5319 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
5320 		if (entry->fspace == 0)
5321 			continue;
5322 		addr = VMMAP_FREE_START(entry);
5323 
5324 restart:	/* Restart address checks on address change. */
5325 		tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5326 		if (tmp < addr)
5327 			tmp += pmap_align;
5328 		addr = tmp;
5329 		if (addr >= VMMAP_FREE_END(entry))
5330 			continue;
5331 
5332 		/* Skip brk() allocation addresses. */
5333 		if (addr + sz > map->b_start && addr < map->b_end) {
5334 			if (VMMAP_FREE_END(entry) > map->b_end) {
5335 				addr = map->b_end;
5336 				goto restart;
5337 			} else
5338 				continue;
5339 		}
5340 		/* Skip stack allocation addresses. */
5341 		if (addr + sz > map->s_start && addr < map->s_end) {
5342 			if (VMMAP_FREE_END(entry) > map->s_end) {
5343 				addr = map->s_end;
5344 				goto restart;
5345 			} else
5346 				continue;
5347 		}
5348 
5349 		last = NULL;
5350 		if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5351 			error = 0;
5352 			goto out;
5353 		}
5354 	}
5355 
5356 out:
5357 	vm_map_unlock_read(map);
5358 	if (error == 0)
5359 		*addr_p = addr;
5360 	return error;
5361 }
5362 
5363 /*
5364  * Determine allocation bias.
5365  *
5366  * Returns 1 if we should bias to high addresses, -1 for a bias towards low
5367  * addresses, or 0 for no bias.
5368  * The bias mechanism is intended to avoid clashing with brk() and stack
5369  * areas.
5370  */
5371 int
5372 uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry)
5373 {
5374 	vaddr_t start, end;
5375 
5376 	start = VMMAP_FREE_START(entry);
5377 	end = VMMAP_FREE_END(entry);
5378 
5379 	/* Stay at the top of brk() area. */
5380 	if (end >= map->b_start && start < map->b_end)
5381 		return 1;
5382 	/* Stay at the far end of the stack area. */
5383 	if (end >= map->s_start && start < map->s_end) {
5384 #ifdef MACHINE_STACK_GROWS_UP
5385 		return 1;
5386 #else
5387 		return -1;
5388 #endif
5389 	}
5390 
5391 	/* No bias, this area is meant for us. */
5392 	return 0;
5393 }
5394 
5395 
5396 boolean_t
5397 vm_map_lock_try_ln(struct vm_map *map, char *file, int line)
5398 {
5399 	boolean_t rv;
5400 
5401 	if (map->flags & VM_MAP_INTRSAFE) {
5402 		rv = mtx_enter_try(&map->mtx);
5403 	} else {
5404 		mtx_enter(&map->flags_lock);
5405 		if (map->flags & VM_MAP_BUSY) {
5406 			mtx_leave(&map->flags_lock);
5407 			return (FALSE);
5408 		}
5409 		mtx_leave(&map->flags_lock);
5410 		rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0);
5411 		/* check if the lock is busy and back out if we won the race */
5412 		if (rv) {
5413 			mtx_enter(&map->flags_lock);
5414 			if (map->flags & VM_MAP_BUSY) {
5415 				rw_exit(&map->lock);
5416 				rv = FALSE;
5417 			}
5418 			mtx_leave(&map->flags_lock);
5419 		}
5420 	}
5421 
5422 	if (rv) {
5423 		map->timestamp++;
5424 		LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5425 		uvm_tree_sanity(map, file, line);
5426 		uvm_tree_size_chk(map, file, line);
5427 	}
5428 
5429 	return (rv);
5430 }
5431 
5432 void
5433 vm_map_lock_ln(struct vm_map *map, char *file, int line)
5434 {
5435 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5436 		do {
5437 			mtx_enter(&map->flags_lock);
5438 tryagain:
5439 			while (map->flags & VM_MAP_BUSY) {
5440 				map->flags |= VM_MAP_WANTLOCK;
5441 				msleep_nsec(&map->flags, &map->flags_lock,
5442 				    PVM, vmmapbsy, INFSLP);
5443 			}
5444 			mtx_leave(&map->flags_lock);
5445 		} while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0);
5446 		/* check if the lock is busy and back out if we won the race */
5447 		mtx_enter(&map->flags_lock);
5448 		if (map->flags & VM_MAP_BUSY) {
5449 			rw_exit(&map->lock);
5450 			goto tryagain;
5451 		}
5452 		mtx_leave(&map->flags_lock);
5453 	} else {
5454 		mtx_enter(&map->mtx);
5455 	}
5456 
5457 	map->timestamp++;
5458 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5459 	uvm_tree_sanity(map, file, line);
5460 	uvm_tree_size_chk(map, file, line);
5461 }
5462 
5463 void
5464 vm_map_lock_read_ln(struct vm_map *map, char *file, int line)
5465 {
5466 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5467 		rw_enter_read(&map->lock);
5468 	else
5469 		mtx_enter(&map->mtx);
5470 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5471 	uvm_tree_sanity(map, file, line);
5472 	uvm_tree_size_chk(map, file, line);
5473 }
5474 
5475 void
5476 vm_map_unlock_ln(struct vm_map *map, char *file, int line)
5477 {
5478 	uvm_tree_sanity(map, file, line);
5479 	uvm_tree_size_chk(map, file, line);
5480 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5481 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5482 		rw_exit(&map->lock);
5483 	else
5484 		mtx_leave(&map->mtx);
5485 }
5486 
5487 void
5488 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line)
5489 {
5490 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
5491 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
5492 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5493 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5494 		rw_exit_read(&map->lock);
5495 	else
5496 		mtx_leave(&map->mtx);
5497 }
5498 
5499 void
5500 vm_map_downgrade_ln(struct vm_map *map, char *file, int line)
5501 {
5502 	uvm_tree_sanity(map, file, line);
5503 	uvm_tree_size_chk(map, file, line);
5504 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5505 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5506 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5507 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5508 		rw_enter(&map->lock, RW_DOWNGRADE);
5509 }
5510 
5511 void
5512 vm_map_upgrade_ln(struct vm_map *map, char *file, int line)
5513 {
5514 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
5515 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
5516 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5517 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5518 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5519 		rw_exit_read(&map->lock);
5520 		rw_enter_write(&map->lock);
5521 	}
5522 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5523 	uvm_tree_sanity(map, file, line);
5524 }
5525 
5526 void
5527 vm_map_busy_ln(struct vm_map *map, char *file, int line)
5528 {
5529 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5530 	mtx_enter(&map->flags_lock);
5531 	map->flags |= VM_MAP_BUSY;
5532 	mtx_leave(&map->flags_lock);
5533 }
5534 
5535 void
5536 vm_map_unbusy_ln(struct vm_map *map, char *file, int line)
5537 {
5538 	int oflags;
5539 
5540 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5541 	mtx_enter(&map->flags_lock);
5542 	oflags = map->flags;
5543 	map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK);
5544 	mtx_leave(&map->flags_lock);
5545 	if (oflags & VM_MAP_WANTLOCK)
5546 		wakeup(&map->flags);
5547 }
5548 
5549 #ifndef SMALL_KERNEL
5550 int
5551 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve,
5552     size_t *lenp)
5553 {
5554 	struct vm_map_entry *entry;
5555 	vaddr_t start;
5556 	int cnt, maxcnt, error = 0;
5557 
5558 	KASSERT(*lenp > 0);
5559 	KASSERT((*lenp % sizeof(*kve)) == 0);
5560 	cnt = 0;
5561 	maxcnt = *lenp / sizeof(*kve);
5562 	KASSERT(maxcnt > 0);
5563 
5564 	/*
5565 	 * Return only entries whose address is above the given base
5566 	 * address.  This allows userland to iterate without knowing the
5567 	 * number of entries beforehand.
5568 	 */
5569 	start = (vaddr_t)kve[0].kve_start;
5570 
5571 	vm_map_lock(map);
5572 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
5573 		if (cnt == maxcnt) {
5574 			error = ENOMEM;
5575 			break;
5576 		}
5577 		if (start != 0 && entry->start < start)
5578 			continue;
5579 		kve->kve_start = entry->start;
5580 		kve->kve_end = entry->end;
5581 		kve->kve_guard = entry->guard;
5582 		kve->kve_fspace = entry->fspace;
5583 		kve->kve_fspace_augment = entry->fspace_augment;
5584 		kve->kve_offset = entry->offset;
5585 		kve->kve_wired_count = entry->wired_count;
5586 		kve->kve_etype = entry->etype;
5587 		kve->kve_protection = entry->protection;
5588 		kve->kve_max_protection = entry->max_protection;
5589 		kve->kve_advice = entry->advice;
5590 		kve->kve_inheritance = entry->inheritance;
5591 		kve->kve_flags = entry->flags;
5592 		kve++;
5593 		cnt++;
5594 	}
5595 	vm_map_unlock(map);
5596 
5597 	KASSERT(cnt <= maxcnt);
5598 
5599 	*lenp = sizeof(*kve) * cnt;
5600 	return error;
5601 }
5602 #endif
5603 
5604 
5605 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry,
5606     uvm_mapentry_addrcmp, uvm_map_addr_augment);
5607 
5608 
5609 /*
5610  * MD code: vmspace allocator setup.
5611  */
5612 
5613 #ifdef __i386__
5614 void
5615 uvm_map_setup_md(struct vm_map *map)
5616 {
5617 	vaddr_t		min, max;
5618 
5619 	min = map->min_offset;
5620 	max = map->max_offset;
5621 
5622 	/*
5623 	 * Ensure the selectors will not try to manage page 0;
5624 	 * it's too special.
5625 	 */
5626 	if (min < VMMAP_MIN_ADDR)
5627 		min = VMMAP_MIN_ADDR;
5628 
5629 #if 0	/* Cool stuff, not yet */
5630 	/* Executable code is special. */
5631 	map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR);
5632 	/* Place normal allocations beyond executable mappings. */
5633 	map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max);
5634 #else	/* Crappy stuff, for now */
5635 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5636 #endif
5637 
5638 #ifndef SMALL_KERNEL
5639 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5640 #endif /* !SMALL_KERNEL */
5641 }
5642 #elif __LP64__
5643 void
5644 uvm_map_setup_md(struct vm_map *map)
5645 {
5646 	vaddr_t		min, max;
5647 
5648 	min = map->min_offset;
5649 	max = map->max_offset;
5650 
5651 	/*
5652 	 * Ensure the selectors will not try to manage page 0;
5653 	 * it's too special.
5654 	 */
5655 	if (min < VMMAP_MIN_ADDR)
5656 		min = VMMAP_MIN_ADDR;
5657 
5658 #if 0	/* Cool stuff, not yet */
5659 	map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max);
5660 #else	/* Crappy stuff, for now */
5661 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5662 #endif
5663 
5664 #ifndef SMALL_KERNEL
5665 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5666 #endif /* !SMALL_KERNEL */
5667 }
5668 #else	/* non-i386, 32 bit */
5669 void
5670 uvm_map_setup_md(struct vm_map *map)
5671 {
5672 	vaddr_t		min, max;
5673 
5674 	min = map->min_offset;
5675 	max = map->max_offset;
5676 
5677 	/*
5678 	 * Ensure the selectors will not try to manage page 0;
5679 	 * it's too special.
5680 	 */
5681 	if (min < VMMAP_MIN_ADDR)
5682 		min = VMMAP_MIN_ADDR;
5683 
5684 #if 0	/* Cool stuff, not yet */
5685 	map->uaddr_any[3] = uaddr_pivot_create(min, max);
5686 #else	/* Crappy stuff, for now */
5687 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5688 #endif
5689 
5690 #ifndef SMALL_KERNEL
5691 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5692 #endif /* !SMALL_KERNEL */
5693 }
5694 #endif
5695