xref: /openbsd-src/sys/uvm/uvm_map.c (revision 6396a31b28c13abcc71f05292f11b42abbafd7d3)
1 /*	$OpenBSD: uvm_map.c,v 1.245 2019/06/01 22:42:20 deraadt Exp $	*/
2 /*	$NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $	*/
3 
4 /*
5  * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  *
19  *
20  * Copyright (c) 1997 Charles D. Cranor and Washington University.
21  * Copyright (c) 1991, 1993, The Regents of the University of California.
22  *
23  * All rights reserved.
24  *
25  * This code is derived from software contributed to Berkeley by
26  * The Mach Operating System project at Carnegie-Mellon University.
27  *
28  * Redistribution and use in source and binary forms, with or without
29  * modification, are permitted provided that the following conditions
30  * are met:
31  * 1. Redistributions of source code must retain the above copyright
32  *    notice, this list of conditions and the following disclaimer.
33  * 2. Redistributions in binary form must reproduce the above copyright
34  *    notice, this list of conditions and the following disclaimer in the
35  *    documentation and/or other materials provided with the distribution.
36  * 3. Neither the name of the University nor the names of its contributors
37  *    may be used to endorse or promote products derived from this software
38  *    without specific prior written permission.
39  *
40  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  *	@(#)vm_map.c    8.3 (Berkeley) 1/12/94
53  * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
54  *
55  *
56  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
57  * All rights reserved.
58  *
59  * Permission to use, copy, modify and distribute this software and
60  * its documentation is hereby granted, provided that both the copyright
61  * notice and this permission notice appear in all copies of the
62  * software, derivative works or modified versions, and any portions
63  * thereof, and that both notices appear in supporting documentation.
64  *
65  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
66  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
67  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
68  *
69  * Carnegie Mellon requests users of this software to return to
70  *
71  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
72  *  School of Computer Science
73  *  Carnegie Mellon University
74  *  Pittsburgh PA 15213-3890
75  *
76  * any improvements or extensions that they make and grant Carnegie the
77  * rights to redistribute these changes.
78  */
79 
80 /*
81  * uvm_map.c: uvm map operations
82  */
83 
84 /* #define DEBUG */
85 /* #define VMMAP_DEBUG */
86 
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/mman.h>
90 #include <sys/proc.h>
91 #include <sys/malloc.h>
92 #include <sys/pool.h>
93 #include <sys/sysctl.h>
94 #include <sys/signalvar.h>
95 #include <sys/syslog.h>
96 #include <sys/user.h>
97 
98 #ifdef SYSVSHM
99 #include <sys/shm.h>
100 #endif
101 
102 #include <uvm/uvm.h>
103 
104 #ifdef DDB
105 #include <uvm/uvm_ddb.h>
106 #endif
107 
108 #include <uvm/uvm_addr.h>
109 
110 
111 vsize_t			 uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t);
112 int			 uvm_mapent_isjoinable(struct vm_map*,
113 			    struct vm_map_entry*, struct vm_map_entry*);
114 struct vm_map_entry	*uvm_mapent_merge(struct vm_map*, struct vm_map_entry*,
115 			    struct vm_map_entry*, struct uvm_map_deadq*);
116 struct vm_map_entry	*uvm_mapent_tryjoin(struct vm_map*,
117 			    struct vm_map_entry*, struct uvm_map_deadq*);
118 struct vm_map_entry	*uvm_map_mkentry(struct vm_map*, struct vm_map_entry*,
119 			    struct vm_map_entry*, vaddr_t, vsize_t, int,
120 			    struct uvm_map_deadq*, struct vm_map_entry*);
121 struct vm_map_entry	*uvm_mapent_alloc(struct vm_map*, int);
122 void			 uvm_mapent_free(struct vm_map_entry*);
123 void			 uvm_unmap_kill_entry(struct vm_map*,
124 			    struct vm_map_entry*);
125 void			 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *);
126 void			 uvm_mapent_mkfree(struct vm_map*,
127 			    struct vm_map_entry*, struct vm_map_entry**,
128 			    struct uvm_map_deadq*, boolean_t);
129 void			 uvm_map_pageable_pgon(struct vm_map*,
130 			    struct vm_map_entry*, struct vm_map_entry*,
131 			    vaddr_t, vaddr_t);
132 int			 uvm_map_pageable_wire(struct vm_map*,
133 			    struct vm_map_entry*, struct vm_map_entry*,
134 			    vaddr_t, vaddr_t, int);
135 void			 uvm_map_setup_entries(struct vm_map*);
136 void			 uvm_map_setup_md(struct vm_map*);
137 void			 uvm_map_teardown(struct vm_map*);
138 void			 uvm_map_vmspace_update(struct vm_map*,
139 			    struct uvm_map_deadq*, int);
140 void			 uvm_map_kmem_grow(struct vm_map*,
141 			    struct uvm_map_deadq*, vsize_t, int);
142 void			 uvm_map_freelist_update_clear(struct vm_map*,
143 			    struct uvm_map_deadq*);
144 void			 uvm_map_freelist_update_refill(struct vm_map *, int);
145 void			 uvm_map_freelist_update(struct vm_map*,
146 			    struct uvm_map_deadq*, vaddr_t, vaddr_t,
147 			    vaddr_t, vaddr_t, int);
148 struct vm_map_entry	*uvm_map_fix_space(struct vm_map*, struct vm_map_entry*,
149 			    vaddr_t, vaddr_t, int);
150 int			 uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int,
151 			    struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t,
152 			    int);
153 int			 uvm_map_findspace(struct vm_map*,
154 			    struct vm_map_entry**, struct vm_map_entry**,
155 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
156 			    vaddr_t);
157 vsize_t			 uvm_map_addr_augment_get(struct vm_map_entry*);
158 void			 uvm_map_addr_augment(struct vm_map_entry*);
159 
160 /*
161  * Tree management functions.
162  */
163 
164 static __inline void	 uvm_mapent_copy(struct vm_map_entry*,
165 			    struct vm_map_entry*);
166 static inline int	 uvm_mapentry_addrcmp(const struct vm_map_entry*,
167 			    const struct vm_map_entry*);
168 void			 uvm_mapent_free_insert(struct vm_map*,
169 			    struct uvm_addr_state*, struct vm_map_entry*);
170 void			 uvm_mapent_free_remove(struct vm_map*,
171 			    struct uvm_addr_state*, struct vm_map_entry*);
172 void			 uvm_mapent_addr_insert(struct vm_map*,
173 			    struct vm_map_entry*);
174 void			 uvm_mapent_addr_remove(struct vm_map*,
175 			    struct vm_map_entry*);
176 void			 uvm_map_splitentry(struct vm_map*,
177 			    struct vm_map_entry*, struct vm_map_entry*,
178 			    vaddr_t);
179 vsize_t			 uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t);
180 int			 uvm_mapent_bias(struct vm_map*, struct vm_map_entry*);
181 
182 /*
183  * uvm_vmspace_fork helper functions.
184  */
185 struct vm_map_entry	*uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
186 			    vsize_t, vm_prot_t, vm_prot_t,
187 			    struct vm_map_entry*, struct uvm_map_deadq*, int,
188 			    int);
189 struct vm_map_entry	*uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t,
190 			    vsize_t, vm_prot_t, vm_prot_t, struct vm_map*,
191 			    struct vm_map_entry*, struct uvm_map_deadq*);
192 struct vm_map_entry	*uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
193 			    struct vm_map*, struct vm_map_entry*,
194 			    struct uvm_map_deadq*);
195 struct vm_map_entry	*uvm_mapent_forkcopy(struct vmspace*, struct vm_map*,
196 			    struct vm_map*, struct vm_map_entry*,
197 			    struct uvm_map_deadq*);
198 struct vm_map_entry	*uvm_mapent_forkzero(struct vmspace*, struct vm_map*,
199 			    struct vm_map*, struct vm_map_entry*,
200 			    struct uvm_map_deadq*);
201 
202 /*
203  * Tree validation.
204  */
205 #ifdef VMMAP_DEBUG
206 void			 uvm_tree_assert(struct vm_map*, int, char*,
207 			    char*, int);
208 #define UVM_ASSERT(map, cond, file, line)				\
209 	uvm_tree_assert((map), (cond), #cond, (file), (line))
210 void			 uvm_tree_sanity(struct vm_map*, char*, int);
211 void			 uvm_tree_size_chk(struct vm_map*, char*, int);
212 void			 vmspace_validate(struct vm_map*);
213 #else
214 #define uvm_tree_sanity(_map, _file, _line)		do {} while (0)
215 #define uvm_tree_size_chk(_map, _file, _line)		do {} while (0)
216 #define vmspace_validate(_map)				do {} while (0)
217 #endif
218 
219 /*
220  * All architectures will have pmap_prefer.
221  */
222 #ifndef PMAP_PREFER
223 #define PMAP_PREFER_ALIGN()	(vaddr_t)PAGE_SIZE
224 #define PMAP_PREFER_OFFSET(off)	0
225 #define PMAP_PREFER(addr, off)	(addr)
226 #endif
227 
228 
229 /*
230  * The kernel map will initially be VM_MAP_KSIZE_INIT bytes.
231  * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes.
232  *
233  * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size
234  * each time.
235  */
236 #define VM_MAP_KSIZE_INIT	(512 * (vaddr_t)PAGE_SIZE)
237 #define VM_MAP_KSIZE_DELTA	(256 * (vaddr_t)PAGE_SIZE)
238 #define VM_MAP_KSIZE_ALLOCMUL	4
239 /*
240  * When selecting a random free-space block, look at most FSPACE_DELTA blocks
241  * ahead.
242  */
243 #define FSPACE_DELTA		8
244 /*
245  * Put allocations adjecent to previous allocations when the free-space tree
246  * is larger than FSPACE_COMPACT entries.
247  *
248  * Alignment and PMAP_PREFER may still cause the entry to not be fully
249  * adjecent. Note that this strategy reduces memory fragmentation (by leaving
250  * a large space before or after the allocation).
251  */
252 #define FSPACE_COMPACT		128
253 /*
254  * Make the address selection skip at most this many bytes from the start of
255  * the free space in which the allocation takes place.
256  *
257  * The main idea behind a randomized address space is that an attacker cannot
258  * know where to target his attack. Therefore, the location of objects must be
259  * as random as possible. However, the goal is not to create the most sparse
260  * map that is possible.
261  * FSPACE_MAXOFF pushes the considered range in bytes down to less insane
262  * sizes, thereby reducing the sparseness. The biggest randomization comes
263  * from fragmentation, i.e. FSPACE_COMPACT.
264  */
265 #define FSPACE_MAXOFF		((vaddr_t)32 * 1024 * 1024)
266 /*
267  * Allow for small gaps in the overflow areas.
268  * Gap size is in bytes and does not have to be a multiple of page-size.
269  */
270 #define FSPACE_BIASGAP		((vaddr_t)32 * 1024)
271 
272 /* auto-allocate address lower bound */
273 #define VMMAP_MIN_ADDR		PAGE_SIZE
274 
275 
276 #ifdef DEADBEEF0
277 #define UVMMAP_DEADBEEF		((unsigned long)DEADBEEF0)
278 #else
279 #define UVMMAP_DEADBEEF		((unsigned long)0xdeadd0d0)
280 #endif
281 
282 #ifdef DEBUG
283 int uvm_map_printlocks = 0;
284 
285 #define LPRINTF(_args)							\
286 	do {								\
287 		if (uvm_map_printlocks)					\
288 			printf _args;					\
289 	} while (0)
290 #else
291 #define LPRINTF(_args)	do {} while (0)
292 #endif
293 
294 static struct mutex uvm_kmapent_mtx;
295 static struct timeval uvm_kmapent_last_warn_time;
296 static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
297 
298 const char vmmapbsy[] = "vmmapbsy";
299 
300 /*
301  * pool for vmspace structures.
302  */
303 struct pool uvm_vmspace_pool;
304 
305 /*
306  * pool for dynamically-allocated map entries.
307  */
308 struct pool uvm_map_entry_pool;
309 struct pool uvm_map_entry_kmem_pool;
310 
311 /*
312  * This global represents the end of the kernel virtual address
313  * space. If we want to exceed this, we must grow the kernel
314  * virtual address space dynamically.
315  *
316  * Note, this variable is locked by kernel_map's lock.
317  */
318 vaddr_t uvm_maxkaddr;
319 
320 /*
321  * Locking predicate.
322  */
323 #define UVM_MAP_REQ_WRITE(_map)						\
324 	do {								\
325 		if ((_map)->ref_count > 0) {				\
326 			if (((_map)->flags & VM_MAP_INTRSAFE) == 0)	\
327 				rw_assert_wrlock(&(_map)->lock);	\
328 			else						\
329 				MUTEX_ASSERT_LOCKED(&(_map)->mtx);	\
330 		}							\
331 	} while (0)
332 
333 /*
334  * Tree describing entries by address.
335  *
336  * Addresses are unique.
337  * Entries with start == end may only exist if they are the first entry
338  * (sorted by address) within a free-memory tree.
339  */
340 
341 static inline int
342 uvm_mapentry_addrcmp(const struct vm_map_entry *e1,
343     const struct vm_map_entry *e2)
344 {
345 	return e1->start < e2->start ? -1 : e1->start > e2->start;
346 }
347 
348 /*
349  * Copy mapentry.
350  */
351 static __inline void
352 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
353 {
354 	caddr_t csrc, cdst;
355 	size_t sz;
356 
357 	csrc = (caddr_t)src;
358 	cdst = (caddr_t)dst;
359 	csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
360 	cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
361 
362 	sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) -
363 	    offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
364 	memcpy(cdst, csrc, sz);
365 }
366 
367 /*
368  * Handle free-list insertion.
369  */
370 void
371 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr,
372     struct vm_map_entry *entry)
373 {
374 	const struct uvm_addr_functions *fun;
375 #ifdef VMMAP_DEBUG
376 	vaddr_t min, max, bound;
377 #endif
378 
379 #ifdef VMMAP_DEBUG
380 	/*
381 	 * Boundary check.
382 	 * Boundaries are folded if they go on the same free list.
383 	 */
384 	min = VMMAP_FREE_START(entry);
385 	max = VMMAP_FREE_END(entry);
386 
387 	while (min < max) {
388 		bound = uvm_map_boundary(map, min, max);
389 		KASSERT(uvm_map_uaddr(map, min) == uaddr);
390 		min = bound;
391 	}
392 #endif
393 	KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0);
394 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0);
395 
396 	UVM_MAP_REQ_WRITE(map);
397 
398 	/* Actual insert: forward to uaddr pointer. */
399 	if (uaddr != NULL) {
400 		fun = uaddr->uaddr_functions;
401 		KDASSERT(fun != NULL);
402 		if (fun->uaddr_free_insert != NULL)
403 			(*fun->uaddr_free_insert)(map, uaddr, entry);
404 		entry->etype |= UVM_ET_FREEMAPPED;
405 	}
406 
407 	/* Update fspace augmentation. */
408 	uvm_map_addr_augment(entry);
409 }
410 
411 /*
412  * Handle free-list removal.
413  */
414 void
415 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr,
416     struct vm_map_entry *entry)
417 {
418 	const struct uvm_addr_functions *fun;
419 
420 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL);
421 	KASSERT(uvm_map_uaddr_e(map, entry) == uaddr);
422 	UVM_MAP_REQ_WRITE(map);
423 
424 	if (uaddr != NULL) {
425 		fun = uaddr->uaddr_functions;
426 		if (fun->uaddr_free_remove != NULL)
427 			(*fun->uaddr_free_remove)(map, uaddr, entry);
428 		entry->etype &= ~UVM_ET_FREEMAPPED;
429 	}
430 }
431 
432 /*
433  * Handle address tree insertion.
434  */
435 void
436 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry)
437 {
438 	struct vm_map_entry *res;
439 
440 	if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF))
441 		panic("uvm_mapent_addr_insert: entry still in addr list");
442 	KDASSERT(entry->start <= entry->end);
443 	KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&
444 	    (entry->end & (vaddr_t)PAGE_MASK) == 0);
445 
446 	UVM_MAP_REQ_WRITE(map);
447 	res = RBT_INSERT(uvm_map_addr, &map->addr, entry);
448 	if (res != NULL) {
449 		panic("uvm_mapent_addr_insert: map %p entry %p "
450 		    "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision "
451 		    "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)",
452 		    map, entry,
453 		    entry->start, entry->end, entry->guard, entry->fspace,
454 		    res, res->start, res->end, res->guard, res->fspace);
455 	}
456 }
457 
458 /*
459  * Handle address tree removal.
460  */
461 void
462 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)
463 {
464 	struct vm_map_entry *res;
465 
466 	UVM_MAP_REQ_WRITE(map);
467 	res = RBT_REMOVE(uvm_map_addr, &map->addr, entry);
468 	if (res != entry)
469 		panic("uvm_mapent_addr_remove");
470 	RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF);
471 }
472 
473 /*
474  * uvm_map_reference: add reference to a map
475  *
476  * XXX check map reference counter lock
477  */
478 #define uvm_map_reference(_map)						\
479 	do {								\
480 		map->ref_count++;					\
481 	} while (0)
482 
483 /*
484  * Calculate the dused delta.
485  */
486 vsize_t
487 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max)
488 {
489 	struct vmspace *vm;
490 	vsize_t sz;
491 	vaddr_t lmax;
492 	vaddr_t stack_begin, stack_end; /* Position of stack. */
493 
494 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
495 	vm = (struct vmspace *)map;
496 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
497 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
498 
499 	sz = 0;
500 	while (min != max) {
501 		lmax = max;
502 		if (min < stack_begin && lmax > stack_begin)
503 			lmax = stack_begin;
504 		else if (min < stack_end && lmax > stack_end)
505 			lmax = stack_end;
506 
507 		if (min >= stack_begin && min < stack_end) {
508 			/* nothing */
509 		} else
510 			sz += lmax - min;
511 		min = lmax;
512 	}
513 
514 	return sz >> PAGE_SHIFT;
515 }
516 
517 /*
518  * Find the entry describing the given address.
519  */
520 struct vm_map_entry*
521 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr)
522 {
523 	struct vm_map_entry *iter;
524 
525 	iter = RBT_ROOT(uvm_map_addr, atree);
526 	while (iter != NULL) {
527 		if (iter->start > addr)
528 			iter = RBT_LEFT(uvm_map_addr, iter);
529 		else if (VMMAP_FREE_END(iter) <= addr)
530 			iter = RBT_RIGHT(uvm_map_addr, iter);
531 		else
532 			return iter;
533 	}
534 	return NULL;
535 }
536 
537 /*
538  * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry)
539  *
540  * Push dead entries into a linked list.
541  * Since the linked list abuses the address tree for storage, the entry
542  * may not be linked in a map.
543  *
544  * *head must be initialized to NULL before the first call to this macro.
545  * uvm_unmap_detach(*head, 0) will remove dead entries.
546  */
547 static __inline void
548 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry)
549 {
550 	TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq);
551 }
552 #define DEAD_ENTRY_PUSH(_headptr, _entry)				\
553 	dead_entry_push((_headptr), (_entry))
554 
555 /*
556  * Helper function for uvm_map_findspace_tree.
557  *
558  * Given allocation constraints and pmap constraints, finds the
559  * lowest and highest address in a range that can be used for the
560  * allocation.
561  *
562  * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs.
563  *
564  *
565  * Big chunk of math with a seasoning of dragons.
566  */
567 int
568 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg,
569     struct vm_map_entry *sel, vaddr_t align,
570     vaddr_t pmap_align, vaddr_t pmap_off, int bias)
571 {
572 	vaddr_t sel_min, sel_max;
573 #ifdef PMAP_PREFER
574 	vaddr_t pmap_min, pmap_max;
575 #endif /* PMAP_PREFER */
576 #ifdef DIAGNOSTIC
577 	int bad;
578 #endif /* DIAGNOSTIC */
579 
580 	sel_min = VMMAP_FREE_START(sel);
581 	sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0);
582 
583 #ifdef PMAP_PREFER
584 
585 	/*
586 	 * There are two special cases, in which we can satisfy the align
587 	 * requirement and the pmap_prefer requirement.
588 	 * - when pmap_off == 0, we always select the largest of the two
589 	 * - when pmap_off % align == 0 and pmap_align > align, we simply
590 	 *   satisfy the pmap_align requirement and automatically
591 	 *   satisfy the align requirement.
592 	 */
593 	if (align > PAGE_SIZE &&
594 	    !(pmap_align > align && (pmap_off & (align - 1)) == 0)) {
595 		/*
596 		 * Simple case: only use align.
597 		 */
598 		sel_min = roundup(sel_min, align);
599 		sel_max &= ~(align - 1);
600 
601 		if (sel_min > sel_max)
602 			return ENOMEM;
603 
604 		/* Correct for bias. */
605 		if (sel_max - sel_min > FSPACE_BIASGAP) {
606 			if (bias > 0) {
607 				sel_min = sel_max - FSPACE_BIASGAP;
608 				sel_min = roundup(sel_min, align);
609 			} else if (bias < 0) {
610 				sel_max = sel_min + FSPACE_BIASGAP;
611 				sel_max &= ~(align - 1);
612 			}
613 		}
614 	} else if (pmap_align != 0) {
615 		/*
616 		 * Special case: satisfy both pmap_prefer and
617 		 * align argument.
618 		 */
619 		pmap_max = sel_max & ~(pmap_align - 1);
620 		pmap_min = sel_min;
621 		if (pmap_max < sel_min)
622 			return ENOMEM;
623 
624 		/* Adjust pmap_min for BIASGAP for top-addr bias. */
625 		if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP)
626 			pmap_min = pmap_max - FSPACE_BIASGAP;
627 		/* Align pmap_min. */
628 		pmap_min &= ~(pmap_align - 1);
629 		if (pmap_min < sel_min)
630 			pmap_min += pmap_align;
631 		if (pmap_min > pmap_max)
632 			return ENOMEM;
633 
634 		/* Adjust pmap_max for BIASGAP for bottom-addr bias. */
635 		if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) {
636 			pmap_max = (pmap_min + FSPACE_BIASGAP) &
637 			    ~(pmap_align - 1);
638 		}
639 		if (pmap_min > pmap_max)
640 			return ENOMEM;
641 
642 		/* Apply pmap prefer offset. */
643 		pmap_max |= pmap_off;
644 		if (pmap_max > sel_max)
645 			pmap_max -= pmap_align;
646 		pmap_min |= pmap_off;
647 		if (pmap_min < sel_min)
648 			pmap_min += pmap_align;
649 
650 		/*
651 		 * Fixup: it's possible that pmap_min and pmap_max
652 		 * cross eachother. In this case, try to find one
653 		 * address that is allowed.
654 		 * (This usually happens in biased case.)
655 		 */
656 		if (pmap_min > pmap_max) {
657 			if (pmap_min < sel_max)
658 				pmap_max = pmap_min;
659 			else if (pmap_max > sel_min)
660 				pmap_min = pmap_max;
661 			else
662 				return ENOMEM;
663 		}
664 
665 		/* Internal validation. */
666 		KDASSERT(pmap_min <= pmap_max);
667 
668 		sel_min = pmap_min;
669 		sel_max = pmap_max;
670 	} else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
671 		sel_min = sel_max - FSPACE_BIASGAP;
672 	else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
673 		sel_max = sel_min + FSPACE_BIASGAP;
674 
675 #else
676 
677 	if (align > PAGE_SIZE) {
678 		sel_min = roundup(sel_min, align);
679 		sel_max &= ~(align - 1);
680 		if (sel_min > sel_max)
681 			return ENOMEM;
682 
683 		if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) {
684 			if (bias > 0) {
685 				sel_min = roundup(sel_max - FSPACE_BIASGAP,
686 				    align);
687 			} else {
688 				sel_max = (sel_min + FSPACE_BIASGAP) &
689 				    ~(align - 1);
690 			}
691 		}
692 	} else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
693 		sel_min = sel_max - FSPACE_BIASGAP;
694 	else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
695 		sel_max = sel_min + FSPACE_BIASGAP;
696 
697 #endif
698 
699 	if (sel_min > sel_max)
700 		return ENOMEM;
701 
702 #ifdef DIAGNOSTIC
703 	bad = 0;
704 	/* Lower boundary check. */
705 	if (sel_min < VMMAP_FREE_START(sel)) {
706 		printf("sel_min: 0x%lx, but should be at least 0x%lx\n",
707 		    sel_min, VMMAP_FREE_START(sel));
708 		bad++;
709 	}
710 	/* Upper boundary check. */
711 	if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) {
712 		printf("sel_max: 0x%lx, but should be at most 0x%lx\n",
713 		    sel_max,
714 		    VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0));
715 		bad++;
716 	}
717 	/* Lower boundary alignment. */
718 	if (align != 0 && (sel_min & (align - 1)) != 0) {
719 		printf("sel_min: 0x%lx, not aligned to 0x%lx\n",
720 		    sel_min, align);
721 		bad++;
722 	}
723 	/* Upper boundary alignment. */
724 	if (align != 0 && (sel_max & (align - 1)) != 0) {
725 		printf("sel_max: 0x%lx, not aligned to 0x%lx\n",
726 		    sel_max, align);
727 		bad++;
728 	}
729 	/* Lower boundary PMAP_PREFER check. */
730 	if (pmap_align != 0 && align == 0 &&
731 	    (sel_min & (pmap_align - 1)) != pmap_off) {
732 		printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
733 		    sel_min, sel_min & (pmap_align - 1), pmap_off);
734 		bad++;
735 	}
736 	/* Upper boundary PMAP_PREFER check. */
737 	if (pmap_align != 0 && align == 0 &&
738 	    (sel_max & (pmap_align - 1)) != pmap_off) {
739 		printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
740 		    sel_max, sel_max & (pmap_align - 1), pmap_off);
741 		bad++;
742 	}
743 
744 	if (bad) {
745 		panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, "
746 		    "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, "
747 		    "bias = %d, "
748 		    "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)",
749 		    sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off,
750 		    bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel));
751 	}
752 #endif /* DIAGNOSTIC */
753 
754 	*min = sel_min;
755 	*max = sel_max;
756 	return 0;
757 }
758 
759 /*
760  * Test if memory starting at addr with sz bytes is free.
761  *
762  * Fills in *start_ptr and *end_ptr to be the first and last entry describing
763  * the space.
764  * If called with prefilled *start_ptr and *end_ptr, they are to be correct.
765  */
766 int
767 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr,
768     struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr,
769     vaddr_t addr, vsize_t sz)
770 {
771 	struct uvm_addr_state *free;
772 	struct uvm_map_addr *atree;
773 	struct vm_map_entry *i, *i_end;
774 
775 	if (addr + sz < addr)
776 		return 0;
777 
778 	/*
779 	 * Kernel memory above uvm_maxkaddr is considered unavailable.
780 	 */
781 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
782 		if (addr + sz > uvm_maxkaddr)
783 			return 0;
784 	}
785 
786 	atree = &map->addr;
787 
788 	/*
789 	 * Fill in first, last, so they point at the entries containing the
790 	 * first and last address of the range.
791 	 * Note that if they are not NULL, we don't perform the lookup.
792 	 */
793 	KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL);
794 	if (*start_ptr == NULL) {
795 		*start_ptr = uvm_map_entrybyaddr(atree, addr);
796 		if (*start_ptr == NULL)
797 			return 0;
798 	} else
799 		KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr));
800 	if (*end_ptr == NULL) {
801 		if (VMMAP_FREE_END(*start_ptr) >= addr + sz)
802 			*end_ptr = *start_ptr;
803 		else {
804 			*end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1);
805 			if (*end_ptr == NULL)
806 				return 0;
807 		}
808 	} else
809 		KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1));
810 
811 	/* Validation. */
812 	KDASSERT(*start_ptr != NULL && *end_ptr != NULL);
813 	KDASSERT((*start_ptr)->start <= addr &&
814 	    VMMAP_FREE_END(*start_ptr) > addr &&
815 	    (*end_ptr)->start < addr + sz &&
816 	    VMMAP_FREE_END(*end_ptr) >= addr + sz);
817 
818 	/*
819 	 * Check the none of the entries intersects with <addr, addr+sz>.
820 	 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is
821 	 * considered unavailable unless called by those allocators.
822 	 */
823 	i = *start_ptr;
824 	i_end = RBT_NEXT(uvm_map_addr, *end_ptr);
825 	for (; i != i_end;
826 	    i = RBT_NEXT(uvm_map_addr, i)) {
827 		if (i->start != i->end && i->end > addr)
828 			return 0;
829 
830 		/*
831 		 * uaddr_exe and uaddr_brk_stack may only be used
832 		 * by these allocators and the NULL uaddr (i.e. no
833 		 * uaddr).
834 		 * Reject if this requirement is not met.
835 		 */
836 		if (uaddr != NULL) {
837 			free = uvm_map_uaddr_e(map, i);
838 
839 			if (uaddr != free && free != NULL &&
840 			    (free == map->uaddr_exe ||
841 			     free == map->uaddr_brk_stack))
842 				return 0;
843 		}
844 	}
845 
846 	return -1;
847 }
848 
849 /*
850  * Invoke each address selector until an address is found.
851  * Will not invoke uaddr_exe.
852  */
853 int
854 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first,
855     struct vm_map_entry**last, vaddr_t *addr, vsize_t sz,
856     vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint)
857 {
858 	struct uvm_addr_state *uaddr;
859 	int i;
860 
861 	/*
862 	 * Allocation for sz bytes at any address,
863 	 * using the addr selectors in order.
864 	 */
865 	for (i = 0; i < nitems(map->uaddr_any); i++) {
866 		uaddr = map->uaddr_any[i];
867 
868 		if (uvm_addr_invoke(map, uaddr, first, last,
869 		    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
870 			return 0;
871 	}
872 
873 	/* Fall back to brk() and stack() address selectors. */
874 	uaddr = map->uaddr_brk_stack;
875 	if (uvm_addr_invoke(map, uaddr, first, last,
876 	    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
877 		return 0;
878 
879 	return ENOMEM;
880 }
881 
882 /* Calculate entry augmentation value. */
883 vsize_t
884 uvm_map_addr_augment_get(struct vm_map_entry *entry)
885 {
886 	vsize_t			 augment;
887 	struct vm_map_entry	*left, *right;
888 
889 	augment = entry->fspace;
890 	if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL)
891 		augment = MAX(augment, left->fspace_augment);
892 	if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
893 		augment = MAX(augment, right->fspace_augment);
894 	return augment;
895 }
896 
897 /*
898  * Update augmentation data in entry.
899  */
900 void
901 uvm_map_addr_augment(struct vm_map_entry *entry)
902 {
903 	vsize_t			 augment;
904 
905 	while (entry != NULL) {
906 		/* Calculate value for augmentation. */
907 		augment = uvm_map_addr_augment_get(entry);
908 
909 		/*
910 		 * Descend update.
911 		 * Once we find an entry that already has the correct value,
912 		 * stop, since it means all its parents will use the correct
913 		 * value too.
914 		 */
915 		if (entry->fspace_augment == augment)
916 			return;
917 		entry->fspace_augment = augment;
918 		entry = RBT_PARENT(uvm_map_addr, entry);
919 	}
920 }
921 
922 /*
923  * uvm_mapanon: establish a valid mapping in map for an anon
924  *
925  * => *addr and sz must be a multiple of PAGE_SIZE.
926  * => *addr is ignored, except if flags contains UVM_FLAG_FIXED.
927  * => map must be unlocked.
928  *
929  * => align: align vaddr, must be a power-of-2.
930  *    Align is only a hint and will be ignored if the alignment fails.
931  */
932 int
933 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz,
934     vsize_t align, unsigned int flags)
935 {
936 	struct vm_map_entry	*first, *last, *entry, *new;
937 	struct uvm_map_deadq	 dead;
938 	vm_prot_t		 prot;
939 	vm_prot_t		 maxprot;
940 	vm_inherit_t		 inherit;
941 	int			 advice;
942 	int			 error;
943 	vaddr_t			 pmap_align, pmap_offset;
944 	vaddr_t			 hint;
945 
946 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE);
947 	KASSERT(map != kernel_map);
948 	KASSERT((map->flags & UVM_FLAG_HOLE) == 0);
949 
950 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
951 	splassert(IPL_NONE);
952 
953 	/*
954 	 * We use pmap_align and pmap_offset as alignment and offset variables.
955 	 *
956 	 * Because the align parameter takes precedence over pmap prefer,
957 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
958 	 * if pmap_prefer will not align.
959 	 */
960 	pmap_align = MAX(align, PAGE_SIZE);
961 	pmap_offset = 0;
962 
963 	/* Decode parameters. */
964 	prot = UVM_PROTECTION(flags);
965 	maxprot = UVM_MAXPROTECTION(flags);
966 	advice = UVM_ADVICE(flags);
967 	inherit = UVM_INHERIT(flags);
968 	error = 0;
969 	hint = trunc_page(*addr);
970 	TAILQ_INIT(&dead);
971 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
972 	KASSERT((align & (align - 1)) == 0);
973 
974 	/* Check protection. */
975 	if ((prot & maxprot) != prot)
976 		return EACCES;
977 
978 	/*
979 	 * Before grabbing the lock, allocate a map entry for later
980 	 * use to ensure we don't wait for memory while holding the
981 	 * vm_map_lock.
982 	 */
983 	new = uvm_mapent_alloc(map, flags);
984 	if (new == NULL)
985 		return(ENOMEM);
986 
987 	if (flags & UVM_FLAG_TRYLOCK) {
988 		if (vm_map_lock_try(map) == FALSE) {
989 			error = EFAULT;
990 			goto out;
991 		}
992 	} else
993 		vm_map_lock(map);
994 
995 	first = last = NULL;
996 	if (flags & UVM_FLAG_FIXED) {
997 		/*
998 		 * Fixed location.
999 		 *
1000 		 * Note: we ignore align, pmap_prefer.
1001 		 * Fill in first, last and *addr.
1002 		 */
1003 		KASSERT((*addr & PAGE_MASK) == 0);
1004 
1005 		/* Check that the space is available. */
1006 		if (flags & UVM_FLAG_UNMAP) {
1007 			if ((flags & UVM_FLAG_STACK) &&
1008 			    !uvm_map_is_stack_remappable(map, *addr, sz)) {
1009 				error = EINVAL;
1010 				goto unlock;
1011 			}
1012 			uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE);
1013 		}
1014 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1015 			error = ENOMEM;
1016 			goto unlock;
1017 		}
1018 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1019 	    (align == 0 || (*addr & (align - 1)) == 0) &&
1020 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1021 		/*
1022 		 * Address used as hint.
1023 		 *
1024 		 * Note: we enforce the alignment restriction,
1025 		 * but ignore pmap_prefer.
1026 		 */
1027 	} else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1028 		/* Run selection algorithm for executables. */
1029 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1030 		    addr, sz, pmap_align, pmap_offset, prot, hint);
1031 
1032 		if (error != 0)
1033 			goto unlock;
1034 	} else {
1035 		/* Update freelists from vmspace. */
1036 		uvm_map_vmspace_update(map, &dead, flags);
1037 
1038 		error = uvm_map_findspace(map, &first, &last, addr, sz,
1039 		    pmap_align, pmap_offset, prot, hint);
1040 
1041 		if (error != 0)
1042 			goto unlock;
1043 	}
1044 
1045 	/* Double-check if selected address doesn't cause overflow. */
1046 	if (*addr + sz < *addr) {
1047 		error = ENOMEM;
1048 		goto unlock;
1049 	}
1050 
1051 	/* If we only want a query, return now. */
1052 	if (flags & UVM_FLAG_QUERY) {
1053 		error = 0;
1054 		goto unlock;
1055 	}
1056 
1057 	/*
1058 	 * Create new entry.
1059 	 * first and last may be invalidated after this call.
1060 	 */
1061 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1062 	    new);
1063 	if (entry == NULL) {
1064 		error = ENOMEM;
1065 		goto unlock;
1066 	}
1067 	new = NULL;
1068 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1069 	entry->object.uvm_obj = NULL;
1070 	entry->offset = 0;
1071 	entry->protection = prot;
1072 	entry->max_protection = maxprot;
1073 	entry->inheritance = inherit;
1074 	entry->wired_count = 0;
1075 	entry->advice = advice;
1076 	if (prot & PROT_WRITE)
1077 		map->wserial++;
1078 	if (flags & UVM_FLAG_STACK) {
1079 		entry->etype |= UVM_ET_STACK;
1080 		if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP))
1081 			map->sserial++;
1082 	}
1083 	if (flags & UVM_FLAG_COPYONW) {
1084 		entry->etype |= UVM_ET_COPYONWRITE;
1085 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1086 			entry->etype |= UVM_ET_NEEDSCOPY;
1087 	}
1088 	if (flags & UVM_FLAG_CONCEAL)
1089 		entry->etype |= UVM_ET_CONCEAL;
1090 	if (flags & UVM_FLAG_OVERLAY) {
1091 		KERNEL_LOCK();
1092 		entry->aref.ar_pageoff = 0;
1093 		entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1094 		KERNEL_UNLOCK();
1095 	}
1096 
1097 	/* Update map and process statistics. */
1098 	map->size += sz;
1099 	((struct vmspace *)map)->vm_dused += uvmspace_dused(map, *addr, *addr + sz);
1100 
1101 unlock:
1102 	vm_map_unlock(map);
1103 
1104 	/*
1105 	 * Remove dead entries.
1106 	 *
1107 	 * Dead entries may be the result of merging.
1108 	 * uvm_map_mkentry may also create dead entries, when it attempts to
1109 	 * destroy free-space entries.
1110 	 */
1111 	uvm_unmap_detach(&dead, 0);
1112 out:
1113 	if (new)
1114 		uvm_mapent_free(new);
1115 	return error;
1116 }
1117 
1118 /*
1119  * uvm_map: establish a valid mapping in map
1120  *
1121  * => *addr and sz must be a multiple of PAGE_SIZE.
1122  * => map must be unlocked.
1123  * => <uobj,uoffset> value meanings (4 cases):
1124  *	[1] <NULL,uoffset>		== uoffset is a hint for PMAP_PREFER
1125  *	[2] <NULL,UVM_UNKNOWN_OFFSET>	== don't PMAP_PREFER
1126  *	[3] <uobj,uoffset>		== normal mapping
1127  *	[4] <uobj,UVM_UNKNOWN_OFFSET>	== uvm_map finds offset based on VA
1128  *
1129  *   case [4] is for kernel mappings where we don't know the offset until
1130  *   we've found a virtual address.   note that kernel object offsets are
1131  *   always relative to vm_map_min(kernel_map).
1132  *
1133  * => align: align vaddr, must be a power-of-2.
1134  *    Align is only a hint and will be ignored if the alignment fails.
1135  */
1136 int
1137 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,
1138     struct uvm_object *uobj, voff_t uoffset,
1139     vsize_t align, unsigned int flags)
1140 {
1141 	struct vm_map_entry	*first, *last, *entry, *new;
1142 	struct uvm_map_deadq	 dead;
1143 	vm_prot_t		 prot;
1144 	vm_prot_t		 maxprot;
1145 	vm_inherit_t		 inherit;
1146 	int			 advice;
1147 	int			 error;
1148 	vaddr_t			 pmap_align, pmap_offset;
1149 	vaddr_t			 hint;
1150 
1151 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
1152 		splassert(IPL_NONE);
1153 	else
1154 		splassert(IPL_VM);
1155 
1156 	/*
1157 	 * We use pmap_align and pmap_offset as alignment and offset variables.
1158 	 *
1159 	 * Because the align parameter takes precedence over pmap prefer,
1160 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
1161 	 * if pmap_prefer will not align.
1162 	 */
1163 	if (uoffset == UVM_UNKNOWN_OFFSET) {
1164 		pmap_align = MAX(align, PAGE_SIZE);
1165 		pmap_offset = 0;
1166 	} else {
1167 		pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE);
1168 		pmap_offset = PMAP_PREFER_OFFSET(uoffset);
1169 
1170 		if (align == 0 ||
1171 		    (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) {
1172 			/* pmap_offset satisfies align, no change. */
1173 		} else {
1174 			/* Align takes precedence over pmap prefer. */
1175 			pmap_align = align;
1176 			pmap_offset = 0;
1177 		}
1178 	}
1179 
1180 	/* Decode parameters. */
1181 	prot = UVM_PROTECTION(flags);
1182 	maxprot = UVM_MAXPROTECTION(flags);
1183 	advice = UVM_ADVICE(flags);
1184 	inherit = UVM_INHERIT(flags);
1185 	error = 0;
1186 	hint = trunc_page(*addr);
1187 	TAILQ_INIT(&dead);
1188 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
1189 	KASSERT((align & (align - 1)) == 0);
1190 
1191 	/* Holes are incompatible with other types of mappings. */
1192 	if (flags & UVM_FLAG_HOLE) {
1193 		KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) &&
1194 		    (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
1195 	}
1196 
1197 	/* Unset hint for kernel_map non-fixed allocations. */
1198 	if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED))
1199 		hint = 0;
1200 
1201 	/* Check protection. */
1202 	if ((prot & maxprot) != prot)
1203 		return EACCES;
1204 
1205 	if (map == kernel_map &&
1206 	    (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
1207 		panic("uvm_map: kernel map W^X violation requested");
1208 
1209 	/*
1210 	 * Before grabbing the lock, allocate a map entry for later
1211 	 * use to ensure we don't wait for memory while holding the
1212 	 * vm_map_lock.
1213 	 */
1214 	new = uvm_mapent_alloc(map, flags);
1215 	if (new == NULL)
1216 		return(ENOMEM);
1217 
1218 	if (flags & UVM_FLAG_TRYLOCK) {
1219 		if (vm_map_lock_try(map) == FALSE) {
1220 			error = EFAULT;
1221 			goto out;
1222 		}
1223 	} else {
1224 		vm_map_lock(map);
1225 	}
1226 
1227 	first = last = NULL;
1228 	if (flags & UVM_FLAG_FIXED) {
1229 		/*
1230 		 * Fixed location.
1231 		 *
1232 		 * Note: we ignore align, pmap_prefer.
1233 		 * Fill in first, last and *addr.
1234 		 */
1235 		KASSERT((*addr & PAGE_MASK) == 0);
1236 
1237 		/*
1238 		 * Grow pmap to include allocated address.
1239 		 * If the growth fails, the allocation will fail too.
1240 		 */
1241 		if ((map->flags & VM_MAP_ISVMSPACE) == 0 &&
1242 		    uvm_maxkaddr < (*addr + sz)) {
1243 			uvm_map_kmem_grow(map, &dead,
1244 			    *addr + sz - uvm_maxkaddr, flags);
1245 		}
1246 
1247 		/* Check that the space is available. */
1248 		if (flags & UVM_FLAG_UNMAP)
1249 			uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE);
1250 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1251 			error = ENOMEM;
1252 			goto unlock;
1253 		}
1254 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1255 	    (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE &&
1256 	    (align == 0 || (*addr & (align - 1)) == 0) &&
1257 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1258 		/*
1259 		 * Address used as hint.
1260 		 *
1261 		 * Note: we enforce the alignment restriction,
1262 		 * but ignore pmap_prefer.
1263 		 */
1264 	} else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1265 		/* Run selection algorithm for executables. */
1266 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1267 		    addr, sz, pmap_align, pmap_offset, prot, hint);
1268 
1269 		/* Grow kernel memory and try again. */
1270 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1271 			uvm_map_kmem_grow(map, &dead, sz, flags);
1272 
1273 			error = uvm_addr_invoke(map, map->uaddr_exe,
1274 			    &first, &last, addr, sz,
1275 			    pmap_align, pmap_offset, prot, hint);
1276 		}
1277 
1278 		if (error != 0)
1279 			goto unlock;
1280 	} else {
1281 		/* Update freelists from vmspace. */
1282 		if (map->flags & VM_MAP_ISVMSPACE)
1283 			uvm_map_vmspace_update(map, &dead, flags);
1284 
1285 		error = uvm_map_findspace(map, &first, &last, addr, sz,
1286 		    pmap_align, pmap_offset, prot, hint);
1287 
1288 		/* Grow kernel memory and try again. */
1289 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1290 			uvm_map_kmem_grow(map, &dead, sz, flags);
1291 
1292 			error = uvm_map_findspace(map, &first, &last, addr, sz,
1293 			    pmap_align, pmap_offset, prot, hint);
1294 		}
1295 
1296 		if (error != 0)
1297 			goto unlock;
1298 	}
1299 
1300 	/* Double-check if selected address doesn't cause overflow. */
1301 	if (*addr + sz < *addr) {
1302 		error = ENOMEM;
1303 		goto unlock;
1304 	}
1305 
1306 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE ||
1307 	    uvm_maxkaddr >= *addr + sz);
1308 
1309 	/* If we only want a query, return now. */
1310 	if (flags & UVM_FLAG_QUERY) {
1311 		error = 0;
1312 		goto unlock;
1313 	}
1314 
1315 	if (uobj == NULL)
1316 		uoffset = 0;
1317 	else if (uoffset == UVM_UNKNOWN_OFFSET) {
1318 		KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
1319 		uoffset = *addr - vm_map_min(kernel_map);
1320 	}
1321 
1322 	/*
1323 	 * Create new entry.
1324 	 * first and last may be invalidated after this call.
1325 	 */
1326 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1327 	    new);
1328 	if (entry == NULL) {
1329 		error = ENOMEM;
1330 		goto unlock;
1331 	}
1332 	new = NULL;
1333 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1334 	entry->object.uvm_obj = uobj;
1335 	entry->offset = uoffset;
1336 	entry->protection = prot;
1337 	entry->max_protection = maxprot;
1338 	entry->inheritance = inherit;
1339 	entry->wired_count = 0;
1340 	entry->advice = advice;
1341 	if (prot & PROT_WRITE)
1342 		map->wserial++;
1343 	if (flags & UVM_FLAG_STACK) {
1344 		entry->etype |= UVM_ET_STACK;
1345 		if (flags & UVM_FLAG_UNMAP)
1346 			map->sserial++;
1347 	}
1348 	if (uobj)
1349 		entry->etype |= UVM_ET_OBJ;
1350 	else if (flags & UVM_FLAG_HOLE)
1351 		entry->etype |= UVM_ET_HOLE;
1352 	if (flags & UVM_FLAG_NOFAULT)
1353 		entry->etype |= UVM_ET_NOFAULT;
1354 	if (flags & UVM_FLAG_WC)
1355 		entry->etype |= UVM_ET_WC;
1356 	if (flags & UVM_FLAG_COPYONW) {
1357 		entry->etype |= UVM_ET_COPYONWRITE;
1358 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1359 			entry->etype |= UVM_ET_NEEDSCOPY;
1360 	}
1361 	if (flags & UVM_FLAG_CONCEAL)
1362 		entry->etype |= UVM_ET_CONCEAL;
1363 	if (flags & UVM_FLAG_OVERLAY) {
1364 		entry->aref.ar_pageoff = 0;
1365 		entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1366 	}
1367 
1368 	/* Update map and process statistics. */
1369 	if (!(flags & UVM_FLAG_HOLE)) {
1370 		map->size += sz;
1371 		if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL) {
1372 			((struct vmspace *)map)->vm_dused +=
1373 			    uvmspace_dused(map, *addr, *addr + sz);
1374 		}
1375 	}
1376 
1377 	/*
1378 	 * Try to merge entry.
1379 	 *
1380 	 * Userland allocations are kept separated most of the time.
1381 	 * Forego the effort of merging what most of the time can't be merged
1382 	 * and only try the merge if it concerns a kernel entry.
1383 	 */
1384 	if ((flags & UVM_FLAG_NOMERGE) == 0 &&
1385 	    (map->flags & VM_MAP_ISVMSPACE) == 0)
1386 		uvm_mapent_tryjoin(map, entry, &dead);
1387 
1388 unlock:
1389 	vm_map_unlock(map);
1390 
1391 	/*
1392 	 * Remove dead entries.
1393 	 *
1394 	 * Dead entries may be the result of merging.
1395 	 * uvm_map_mkentry may also create dead entries, when it attempts to
1396 	 * destroy free-space entries.
1397 	 */
1398 	if (map->flags & VM_MAP_INTRSAFE)
1399 		uvm_unmap_detach_intrsafe(&dead);
1400 	else
1401 		uvm_unmap_detach(&dead, 0);
1402 out:
1403 	if (new)
1404 		uvm_mapent_free(new);
1405 	return error;
1406 }
1407 
1408 /*
1409  * True iff e1 and e2 can be joined together.
1410  */
1411 int
1412 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1,
1413     struct vm_map_entry *e2)
1414 {
1415 	KDASSERT(e1 != NULL && e2 != NULL);
1416 
1417 	/* Must be the same entry type and not have free memory between. */
1418 	if (e1->etype != e2->etype || e1->end != e2->start)
1419 		return 0;
1420 
1421 	/* Submaps are never joined. */
1422 	if (UVM_ET_ISSUBMAP(e1))
1423 		return 0;
1424 
1425 	/* Never merge wired memory. */
1426 	if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2))
1427 		return 0;
1428 
1429 	/* Protection, inheritance and advice must be equal. */
1430 	if (e1->protection != e2->protection ||
1431 	    e1->max_protection != e2->max_protection ||
1432 	    e1->inheritance != e2->inheritance ||
1433 	    e1->advice != e2->advice)
1434 		return 0;
1435 
1436 	/* If uvm_object: object itself and offsets within object must match. */
1437 	if (UVM_ET_ISOBJ(e1)) {
1438 		if (e1->object.uvm_obj != e2->object.uvm_obj)
1439 			return 0;
1440 		if (e1->offset + (e1->end - e1->start) != e2->offset)
1441 			return 0;
1442 	}
1443 
1444 	/*
1445 	 * Cannot join shared amaps.
1446 	 * Note: no need to lock amap to look at refs, since we don't care
1447 	 * about its exact value.
1448 	 * If it is 1 (i.e. we have the only reference) it will stay there.
1449 	 */
1450 	if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1)
1451 		return 0;
1452 	if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1)
1453 		return 0;
1454 
1455 	/* Apprently, e1 and e2 match. */
1456 	return 1;
1457 }
1458 
1459 /*
1460  * Join support function.
1461  *
1462  * Returns the merged entry on succes.
1463  * Returns NULL if the merge failed.
1464  */
1465 struct vm_map_entry*
1466 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1,
1467     struct vm_map_entry *e2, struct uvm_map_deadq *dead)
1468 {
1469 	struct uvm_addr_state *free;
1470 
1471 	/*
1472 	 * Merging is not supported for map entries that
1473 	 * contain an amap in e1. This should never happen
1474 	 * anyway, because only kernel entries are merged.
1475 	 * These do not contain amaps.
1476 	 * e2 contains no real information in its amap,
1477 	 * so it can be erased immediately.
1478 	 */
1479 	KASSERT(e1->aref.ar_amap == NULL);
1480 
1481 	/*
1482 	 * Don't drop obj reference:
1483 	 * uvm_unmap_detach will do this for us.
1484 	 */
1485 	free = uvm_map_uaddr_e(map, e1);
1486 	uvm_mapent_free_remove(map, free, e1);
1487 
1488 	free = uvm_map_uaddr_e(map, e2);
1489 	uvm_mapent_free_remove(map, free, e2);
1490 	uvm_mapent_addr_remove(map, e2);
1491 	e1->end = e2->end;
1492 	e1->guard = e2->guard;
1493 	e1->fspace = e2->fspace;
1494 	uvm_mapent_free_insert(map, free, e1);
1495 
1496 	DEAD_ENTRY_PUSH(dead, e2);
1497 	return e1;
1498 }
1499 
1500 /*
1501  * Attempt forward and backward joining of entry.
1502  *
1503  * Returns entry after joins.
1504  * We are guaranteed that the amap of entry is either non-existent or
1505  * has never been used.
1506  */
1507 struct vm_map_entry*
1508 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry,
1509     struct uvm_map_deadq *dead)
1510 {
1511 	struct vm_map_entry *other;
1512 	struct vm_map_entry *merged;
1513 
1514 	/* Merge with previous entry. */
1515 	other = RBT_PREV(uvm_map_addr, entry);
1516 	if (other && uvm_mapent_isjoinable(map, other, entry)) {
1517 		merged = uvm_mapent_merge(map, other, entry, dead);
1518 		if (merged)
1519 			entry = merged;
1520 	}
1521 
1522 	/*
1523 	 * Merge with next entry.
1524 	 *
1525 	 * Because amap can only extend forward and the next entry
1526 	 * probably contains sensible info, only perform forward merging
1527 	 * in the absence of an amap.
1528 	 */
1529 	other = RBT_NEXT(uvm_map_addr, entry);
1530 	if (other && entry->aref.ar_amap == NULL &&
1531 	    other->aref.ar_amap == NULL &&
1532 	    uvm_mapent_isjoinable(map, entry, other)) {
1533 		merged = uvm_mapent_merge(map, entry, other, dead);
1534 		if (merged)
1535 			entry = merged;
1536 	}
1537 
1538 	return entry;
1539 }
1540 
1541 /*
1542  * Kill entries that are no longer in a map.
1543  */
1544 void
1545 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags)
1546 {
1547 	struct vm_map_entry *entry, *tmp;
1548 	int waitok = flags & UVM_PLA_WAITOK;
1549 
1550 	TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) {
1551 		/* Skip entries for which we have to grab the kernel lock. */
1552 		if (entry->aref.ar_amap || UVM_ET_ISSUBMAP(entry) ||
1553 		    UVM_ET_ISOBJ(entry))
1554 			continue;
1555 
1556 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1557 		uvm_mapent_free(entry);
1558 	}
1559 
1560 	if (TAILQ_EMPTY(deadq))
1561 		return;
1562 
1563 	KERNEL_LOCK();
1564 	while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1565 		if (waitok)
1566 			uvm_pause();
1567 		/* Drop reference to amap, if we've got one. */
1568 		if (entry->aref.ar_amap)
1569 			amap_unref(entry->aref.ar_amap,
1570 			    entry->aref.ar_pageoff,
1571 			    atop(entry->end - entry->start),
1572 			    flags & AMAP_REFALL);
1573 
1574 		/* Drop reference to our backing object, if we've got one. */
1575 		if (UVM_ET_ISSUBMAP(entry)) {
1576 			/* ... unlikely to happen, but play it safe */
1577 			uvm_map_deallocate(entry->object.sub_map);
1578 		} else if (UVM_ET_ISOBJ(entry) &&
1579 		    entry->object.uvm_obj->pgops->pgo_detach) {
1580 			entry->object.uvm_obj->pgops->pgo_detach(
1581 			    entry->object.uvm_obj);
1582 		}
1583 
1584 		/* Step to next. */
1585 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1586 		uvm_mapent_free(entry);
1587 	}
1588 	KERNEL_UNLOCK();
1589 }
1590 
1591 void
1592 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq)
1593 {
1594 	struct vm_map_entry *entry;
1595 
1596 	while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1597 		KASSERT(entry->aref.ar_amap == NULL);
1598 		KASSERT(!UVM_ET_ISSUBMAP(entry));
1599 		KASSERT(!UVM_ET_ISOBJ(entry));
1600 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1601 		uvm_mapent_free(entry);
1602 	}
1603 }
1604 
1605 /*
1606  * Create and insert new entry.
1607  *
1608  * Returned entry contains new addresses and is inserted properly in the tree.
1609  * first and last are (probably) no longer valid.
1610  */
1611 struct vm_map_entry*
1612 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first,
1613     struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags,
1614     struct uvm_map_deadq *dead, struct vm_map_entry *new)
1615 {
1616 	struct vm_map_entry *entry, *prev;
1617 	struct uvm_addr_state *free;
1618 	vaddr_t min, max;	/* free space boundaries for new entry */
1619 
1620 	KDASSERT(map != NULL);
1621 	KDASSERT(first != NULL);
1622 	KDASSERT(last != NULL);
1623 	KDASSERT(dead != NULL);
1624 	KDASSERT(sz > 0);
1625 	KDASSERT(addr + sz > addr);
1626 	KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr);
1627 	KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz);
1628 	KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz));
1629 	uvm_tree_sanity(map, __FILE__, __LINE__);
1630 
1631 	min = addr + sz;
1632 	max = VMMAP_FREE_END(last);
1633 
1634 	/* Initialize new entry. */
1635 	if (new == NULL)
1636 		entry = uvm_mapent_alloc(map, flags);
1637 	else
1638 		entry = new;
1639 	if (entry == NULL)
1640 		return NULL;
1641 	entry->offset = 0;
1642 	entry->etype = 0;
1643 	entry->wired_count = 0;
1644 	entry->aref.ar_pageoff = 0;
1645 	entry->aref.ar_amap = NULL;
1646 
1647 	entry->start = addr;
1648 	entry->end = min;
1649 	entry->guard = 0;
1650 	entry->fspace = 0;
1651 
1652 	/* Reset free space in first. */
1653 	free = uvm_map_uaddr_e(map, first);
1654 	uvm_mapent_free_remove(map, free, first);
1655 	first->guard = 0;
1656 	first->fspace = 0;
1657 
1658 	/*
1659 	 * Remove all entries that are fully replaced.
1660 	 * We are iterating using last in reverse order.
1661 	 */
1662 	for (; first != last; last = prev) {
1663 		prev = RBT_PREV(uvm_map_addr, last);
1664 
1665 		KDASSERT(last->start == last->end);
1666 		free = uvm_map_uaddr_e(map, last);
1667 		uvm_mapent_free_remove(map, free, last);
1668 		uvm_mapent_addr_remove(map, last);
1669 		DEAD_ENTRY_PUSH(dead, last);
1670 	}
1671 	/* Remove first if it is entirely inside <addr, addr+sz>.  */
1672 	if (first->start == addr) {
1673 		uvm_mapent_addr_remove(map, first);
1674 		DEAD_ENTRY_PUSH(dead, first);
1675 	} else {
1676 		uvm_map_fix_space(map, first, VMMAP_FREE_START(first),
1677 		    addr, flags);
1678 	}
1679 
1680 	/* Finally, link in entry. */
1681 	uvm_mapent_addr_insert(map, entry);
1682 	uvm_map_fix_space(map, entry, min, max, flags);
1683 
1684 	uvm_tree_sanity(map, __FILE__, __LINE__);
1685 	return entry;
1686 }
1687 
1688 
1689 /*
1690  * uvm_mapent_alloc: allocate a map entry
1691  */
1692 struct vm_map_entry *
1693 uvm_mapent_alloc(struct vm_map *map, int flags)
1694 {
1695 	struct vm_map_entry *me, *ne;
1696 	int pool_flags;
1697 	int i;
1698 
1699 	pool_flags = PR_WAITOK;
1700 	if (flags & UVM_FLAG_TRYLOCK)
1701 		pool_flags = PR_NOWAIT;
1702 
1703 	if (map->flags & VM_MAP_INTRSAFE || cold) {
1704 		mtx_enter(&uvm_kmapent_mtx);
1705 		if (SLIST_EMPTY(&uvm.kentry_free)) {
1706 			ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
1707 			    &kd_nowait);
1708 			if (ne == NULL)
1709 				panic("uvm_mapent_alloc: cannot allocate map "
1710 				    "entry");
1711 			for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) {
1712 				SLIST_INSERT_HEAD(&uvm.kentry_free,
1713 				    &ne[i], daddrs.addr_kentry);
1714 			}
1715 			if (ratecheck(&uvm_kmapent_last_warn_time,
1716 			    &uvm_kmapent_warn_rate))
1717 				printf("uvm_mapent_alloc: out of static "
1718 				    "map entries\n");
1719 		}
1720 		me = SLIST_FIRST(&uvm.kentry_free);
1721 		SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry);
1722 		uvmexp.kmapent++;
1723 		mtx_leave(&uvm_kmapent_mtx);
1724 		me->flags = UVM_MAP_STATIC;
1725 	} else if (map == kernel_map) {
1726 		splassert(IPL_NONE);
1727 		me = pool_get(&uvm_map_entry_kmem_pool, pool_flags);
1728 		if (me == NULL)
1729 			goto out;
1730 		me->flags = UVM_MAP_KMEM;
1731 	} else {
1732 		splassert(IPL_NONE);
1733 		me = pool_get(&uvm_map_entry_pool, pool_flags);
1734 		if (me == NULL)
1735 			goto out;
1736 		me->flags = 0;
1737 	}
1738 
1739 	if (me != NULL) {
1740 		RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF);
1741 	}
1742 
1743 out:
1744 	return(me);
1745 }
1746 
1747 /*
1748  * uvm_mapent_free: free map entry
1749  *
1750  * => XXX: static pool for kernel map?
1751  */
1752 void
1753 uvm_mapent_free(struct vm_map_entry *me)
1754 {
1755 	if (me->flags & UVM_MAP_STATIC) {
1756 		mtx_enter(&uvm_kmapent_mtx);
1757 		SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry);
1758 		uvmexp.kmapent--;
1759 		mtx_leave(&uvm_kmapent_mtx);
1760 	} else if (me->flags & UVM_MAP_KMEM) {
1761 		splassert(IPL_NONE);
1762 		pool_put(&uvm_map_entry_kmem_pool, me);
1763 	} else {
1764 		splassert(IPL_NONE);
1765 		pool_put(&uvm_map_entry_pool, me);
1766 	}
1767 }
1768 
1769 /*
1770  * uvm_map_lookup_entry: find map entry at or before an address.
1771  *
1772  * => map must at least be read-locked by caller
1773  * => entry is returned in "entry"
1774  * => return value is true if address is in the returned entry
1775  * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is
1776  * returned for those mappings.
1777  */
1778 boolean_t
1779 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
1780     struct vm_map_entry **entry)
1781 {
1782 	*entry = uvm_map_entrybyaddr(&map->addr, address);
1783 	return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&
1784 	    (*entry)->start <= address && (*entry)->end > address;
1785 }
1786 
1787 /*
1788  * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet
1789  * grown -- then uvm_map_check_region_range() should not cache the entry
1790  * because growth won't be seen.
1791  */
1792 int
1793 uvm_map_inentry_sp(vm_map_entry_t entry)
1794 {
1795 	if ((entry->etype & UVM_ET_STACK) == 0) {
1796 		if (entry->protection == PROT_NONE)
1797 			return (-1);	/* don't update range */
1798 		return (0);
1799 	}
1800 	return (1);
1801 }
1802 
1803 /*
1804  * If a syscall comes from a writeable entry, W^X is violated.
1805  * (Would be nice if we can spot aliasing, which is also kind of bad)
1806  */
1807 int
1808 uvm_map_inentry_pc(vm_map_entry_t entry)
1809 {
1810 	if (entry->protection & PROT_WRITE)
1811 		return (0);	/* not permitted */
1812 	return (1);
1813 }
1814 
1815 int
1816 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie)
1817 {
1818 	return (serial != ie->ie_serial || ie->ie_start == 0 ||
1819 	    addr < ie->ie_start || addr >= ie->ie_end);
1820 }
1821 
1822 /*
1823  * Inside a vm_map find the reg address and verify it via function.
1824  * Remember low and high addresses of region if valid and return TRUE,
1825  * else return FALSE.
1826  */
1827 boolean_t
1828 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr,
1829     int (*fn)(vm_map_entry_t), u_long serial)
1830 {
1831 	vm_map_t map = &p->p_vmspace->vm_map;
1832 	vm_map_entry_t entry;
1833 	int ret;
1834 
1835 	if (addr < map->min_offset || addr >= map->max_offset)
1836 		return (FALSE);
1837 
1838 	/* lock map */
1839 	vm_map_lock_read(map);
1840 
1841 	/* lookup */
1842 	if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) {
1843 		vm_map_unlock_read(map);
1844 		return (FALSE);
1845 	}
1846 
1847 	ret = (*fn)(entry);
1848 	if (ret == 0) {
1849 		vm_map_unlock_read(map);
1850 		return (FALSE);
1851 	} else if (ret == 1) {
1852 		ie->ie_start = entry->start;
1853 		ie->ie_end = entry->end;
1854 		ie->ie_serial = serial;
1855 	} else {
1856 		/* do not update, re-check later */
1857 	}
1858 	vm_map_unlock_read(map);
1859 	return (TRUE);
1860 }
1861 
1862 boolean_t
1863 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr, char *name,
1864     int (*fn)(vm_map_entry_t), u_long serial)
1865 {
1866 	union sigval sv;
1867 	boolean_t ok = TRUE;
1868 
1869 	if (uvm_map_inentry_recheck(serial, addr, ie)) {
1870 		KERNEL_LOCK();
1871 		ok = uvm_map_inentry_fix(p, ie, addr, fn, serial);
1872 		if (!ok) {
1873 			printf("[%s]%d/%d %s %lx not inside %lx-%lx\n",
1874 			    p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid,
1875 			    name, addr, ie->ie_start, ie->ie_end);
1876 			sv.sival_ptr = (void *)PROC_PC(p);
1877 			trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv);
1878 		}
1879 		KERNEL_UNLOCK();
1880 	}
1881 	return (ok);
1882 }
1883 
1884 /*
1885  * Check whether the given address range can be converted to a MAP_STACK
1886  * mapping.
1887  *
1888  * Must be called with map locked.
1889  */
1890 boolean_t
1891 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz)
1892 {
1893 	vaddr_t end = addr + sz;
1894 	struct vm_map_entry *first, *iter, *prev = NULL;
1895 
1896 	if (!uvm_map_lookup_entry(map, addr, &first)) {
1897 		printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n",
1898 		    addr, end, map);
1899 		return FALSE;
1900 	}
1901 
1902 	/*
1903 	 * Check that the address range exists and is contiguous.
1904 	 */
1905 	for (iter = first; iter != NULL && iter->start < end;
1906 	    prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) {
1907 		/*
1908 		 * Make sure that we do not have holes in the range.
1909 		 */
1910 #if 0
1911 		if (prev != NULL) {
1912 			printf("prev->start 0x%lx, prev->end 0x%lx, "
1913 			    "iter->start 0x%lx, iter->end 0x%lx\n",
1914 			    prev->start, prev->end, iter->start, iter->end);
1915 		}
1916 #endif
1917 
1918 		if (prev != NULL && prev->end != iter->start) {
1919 			printf("map stack 0x%lx-0x%lx of map %p failed: "
1920 			    "hole in range\n", addr, end, map);
1921 			return FALSE;
1922 		}
1923 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) {
1924 			printf("map stack 0x%lx-0x%lx of map %p failed: "
1925 			    "hole in range\n", addr, end, map);
1926 			return FALSE;
1927 		}
1928 	}
1929 
1930 	return TRUE;
1931 }
1932 
1933 /*
1934  * Remap the middle-pages of an existing mapping as a stack range.
1935  * If there exists a previous contiguous mapping with the given range
1936  * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the
1937  * mapping is dropped, and a new anon mapping is created and marked as
1938  * a stack.
1939  *
1940  * Must be called with map unlocked.
1941  */
1942 int
1943 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz)
1944 {
1945 	vm_map_t map = &p->p_vmspace->vm_map;
1946 	vaddr_t start, end;
1947 	int error;
1948 	int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
1949 	    PROT_READ | PROT_WRITE | PROT_EXEC,
1950 	    MAP_INHERIT_COPY, MADV_NORMAL,
1951 	    UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP |
1952 	    UVM_FLAG_COPYONW);
1953 
1954 	start = round_page(addr);
1955 	end = trunc_page(addr + sz);
1956 #ifdef MACHINE_STACK_GROWS_UP
1957 	if (end == addr + sz)
1958 		end -= PAGE_SIZE;
1959 #else
1960 	if (start == addr)
1961 		start += PAGE_SIZE;
1962 #endif
1963 
1964 	if (start < map->min_offset || end >= map->max_offset || end < start)
1965 		return EINVAL;
1966 
1967 	error = uvm_mapanon(map, &start, end - start, 0, flags);
1968 	if (error != 0)
1969 		printf("map stack for pid %d failed\n", p->p_p->ps_pid);
1970 
1971 	return error;
1972 }
1973 
1974 /*
1975  * uvm_map_pie: return a random load address for a PIE executable
1976  * properly aligned.
1977  */
1978 #ifndef VM_PIE_MAX_ADDR
1979 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
1980 #endif
1981 
1982 #ifndef VM_PIE_MIN_ADDR
1983 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
1984 #endif
1985 
1986 #ifndef VM_PIE_MIN_ALIGN
1987 #define VM_PIE_MIN_ALIGN PAGE_SIZE
1988 #endif
1989 
1990 vaddr_t
1991 uvm_map_pie(vaddr_t align)
1992 {
1993 	vaddr_t addr, space, min;
1994 
1995 	align = MAX(align, VM_PIE_MIN_ALIGN);
1996 
1997 	/* round up to next alignment */
1998 	min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
1999 
2000 	if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
2001 		return (align);
2002 
2003 	space = (VM_PIE_MAX_ADDR - min) / align;
2004 	space = MIN(space, (u_int32_t)-1);
2005 
2006 	addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
2007 	addr += min;
2008 
2009 	return (addr);
2010 }
2011 
2012 void
2013 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end)
2014 {
2015 	struct uvm_map_deadq dead;
2016 
2017 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0 &&
2018 	    (end & (vaddr_t)PAGE_MASK) == 0);
2019 	TAILQ_INIT(&dead);
2020 	vm_map_lock(map);
2021 	uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE);
2022 	vm_map_unlock(map);
2023 
2024 	if (map->flags & VM_MAP_INTRSAFE)
2025 		uvm_unmap_detach_intrsafe(&dead);
2026 	else
2027 		uvm_unmap_detach(&dead, 0);
2028 }
2029 
2030 /*
2031  * Mark entry as free.
2032  *
2033  * entry will be put on the dead list.
2034  * The free space will be merged into the previous or a new entry,
2035  * unless markfree is false.
2036  */
2037 void
2038 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry,
2039     struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead,
2040     boolean_t markfree)
2041 {
2042 	struct uvm_addr_state	*free;
2043 	struct vm_map_entry	*prev;
2044 	vaddr_t			 addr;	/* Start of freed range. */
2045 	vaddr_t			 end;	/* End of freed range. */
2046 
2047 	prev = *prev_ptr;
2048 	if (prev == entry)
2049 		*prev_ptr = prev = NULL;
2050 
2051 	if (prev == NULL ||
2052 	    VMMAP_FREE_END(prev) != entry->start)
2053 		prev = RBT_PREV(uvm_map_addr, entry);
2054 
2055 	/* Entry is describing only free memory and has nothing to drain into. */
2056 	if (prev == NULL && entry->start == entry->end && markfree) {
2057 		*prev_ptr = entry;
2058 		return;
2059 	}
2060 
2061 	addr = entry->start;
2062 	end = VMMAP_FREE_END(entry);
2063 	free = uvm_map_uaddr_e(map, entry);
2064 	uvm_mapent_free_remove(map, free, entry);
2065 	uvm_mapent_addr_remove(map, entry);
2066 	DEAD_ENTRY_PUSH(dead, entry);
2067 
2068 	if (markfree) {
2069 		if (prev) {
2070 			free = uvm_map_uaddr_e(map, prev);
2071 			uvm_mapent_free_remove(map, free, prev);
2072 		}
2073 		*prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0);
2074 	}
2075 }
2076 
2077 /*
2078  * Unwire and release referenced amap and object from map entry.
2079  */
2080 void
2081 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
2082 {
2083 	/* Unwire removed map entry. */
2084 	if (VM_MAPENT_ISWIRED(entry)) {
2085 		KERNEL_LOCK();
2086 		entry->wired_count = 0;
2087 		uvm_fault_unwire_locked(map, entry->start, entry->end);
2088 		KERNEL_UNLOCK();
2089 	}
2090 
2091 	/* Entry-type specific code. */
2092 	if (UVM_ET_ISHOLE(entry)) {
2093 		/* Nothing to be done for holes. */
2094 	} else if (map->flags & VM_MAP_INTRSAFE) {
2095 		KASSERT(vm_map_pmap(map) == pmap_kernel());
2096 		uvm_km_pgremove_intrsafe(entry->start, entry->end);
2097 		pmap_kremove(entry->start, entry->end - entry->start);
2098 	} else if (UVM_ET_ISOBJ(entry) &&
2099 	    UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
2100 		KASSERT(vm_map_pmap(map) == pmap_kernel());
2101 		/*
2102 		 * Note: kernel object mappings are currently used in
2103 		 * two ways:
2104 		 *  [1] "normal" mappings of pages in the kernel object
2105 		 *  [2] uvm_km_valloc'd allocations in which we
2106 		 *      pmap_enter in some non-kernel-object page
2107 		 *      (e.g. vmapbuf).
2108 		 *
2109 		 * for case [1], we need to remove the mapping from
2110 		 * the pmap and then remove the page from the kernel
2111 		 * object (because, once pages in a kernel object are
2112 		 * unmapped they are no longer needed, unlike, say,
2113 		 * a vnode where you might want the data to persist
2114 		 * until flushed out of a queue).
2115 		 *
2116 		 * for case [2], we need to remove the mapping from
2117 		 * the pmap.  there shouldn't be any pages at the
2118 		 * specified offset in the kernel object [but it
2119 		 * doesn't hurt to call uvm_km_pgremove just to be
2120 		 * safe?]
2121 		 *
2122 		 * uvm_km_pgremove currently does the following:
2123 		 *   for pages in the kernel object range:
2124 		 *     - drops the swap slot
2125 		 *     - uvm_pagefree the page
2126 		 *
2127 		 * note there is version of uvm_km_pgremove() that
2128 		 * is used for "intrsafe" objects.
2129 		 */
2130 		/*
2131 		 * remove mappings from pmap and drop the pages
2132 		 * from the object.  offsets are always relative
2133 		 * to vm_map_min(kernel_map).
2134 		 */
2135 		pmap_remove(pmap_kernel(), entry->start, entry->end);
2136 		uvm_km_pgremove(entry->object.uvm_obj,
2137 		    entry->start - vm_map_min(kernel_map),
2138 		    entry->end - vm_map_min(kernel_map));
2139 
2140 		/*
2141 		 * null out kernel_object reference, we've just
2142 		 * dropped it
2143 		 */
2144 		entry->etype &= ~UVM_ET_OBJ;
2145 		entry->object.uvm_obj = NULL;  /* to be safe */
2146 	} else {
2147 		/* remove mappings the standard way. */
2148 		pmap_remove(map->pmap, entry->start, entry->end);
2149 	}
2150 }
2151 
2152 /*
2153  * Remove all entries from start to end.
2154  *
2155  * If remove_holes, then remove ET_HOLE entries as well.
2156  * If markfree, entry will be properly marked free, otherwise, no replacement
2157  * entry will be put in the tree (corrupting the tree).
2158  */
2159 void
2160 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
2161     struct uvm_map_deadq *dead, boolean_t remove_holes,
2162     boolean_t markfree)
2163 {
2164 	struct vm_map_entry *prev_hint, *next, *entry;
2165 
2166 	start = MAX(start, map->min_offset);
2167 	end = MIN(end, map->max_offset);
2168 	if (start >= end)
2169 		return;
2170 
2171 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
2172 		splassert(IPL_NONE);
2173 	else
2174 		splassert(IPL_VM);
2175 
2176 	/* Find first affected entry. */
2177 	entry = uvm_map_entrybyaddr(&map->addr, start);
2178 	KDASSERT(entry != NULL && entry->start <= start);
2179 	if (entry->end <= start && markfree)
2180 		entry = RBT_NEXT(uvm_map_addr, entry);
2181 	else
2182 		UVM_MAP_CLIP_START(map, entry, start);
2183 
2184 	/*
2185 	 * Iterate entries until we reach end address.
2186 	 * prev_hint hints where the freed space can be appended to.
2187 	 */
2188 	prev_hint = NULL;
2189 	for (; entry != NULL && entry->start < end; entry = next) {
2190 		KDASSERT(entry->start >= start);
2191 		if (entry->end > end || !markfree)
2192 			UVM_MAP_CLIP_END(map, entry, end);
2193 		KDASSERT(entry->start >= start && entry->end <= end);
2194 		next = RBT_NEXT(uvm_map_addr, entry);
2195 
2196 		/* Don't remove holes unless asked to do so. */
2197 		if (UVM_ET_ISHOLE(entry)) {
2198 			if (!remove_holes) {
2199 				prev_hint = entry;
2200 				continue;
2201 			}
2202 		}
2203 
2204 		/* A stack has been removed.. */
2205 		if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE))
2206 			map->sserial++;
2207 
2208 		/* Kill entry. */
2209 		uvm_unmap_kill_entry(map, entry);
2210 
2211 		/* Update space usage. */
2212 		if ((map->flags & VM_MAP_ISVMSPACE) &&
2213 		    entry->object.uvm_obj == NULL &&
2214 		    !UVM_ET_ISHOLE(entry)) {
2215 			((struct vmspace *)map)->vm_dused -=
2216 			    uvmspace_dused(map, entry->start, entry->end);
2217 		}
2218 		if (!UVM_ET_ISHOLE(entry))
2219 			map->size -= entry->end - entry->start;
2220 
2221 		/* Actual removal of entry. */
2222 		uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree);
2223 	}
2224 
2225 	pmap_update(vm_map_pmap(map));
2226 
2227 #ifdef VMMAP_DEBUG
2228 	if (markfree) {
2229 		for (entry = uvm_map_entrybyaddr(&map->addr, start);
2230 		    entry != NULL && entry->start < end;
2231 		    entry = RBT_NEXT(uvm_map_addr, entry)) {
2232 			KDASSERT(entry->end <= start ||
2233 			    entry->start == entry->end ||
2234 			    UVM_ET_ISHOLE(entry));
2235 		}
2236 	} else {
2237 		vaddr_t a;
2238 		for (a = start; a < end; a += PAGE_SIZE)
2239 			KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL);
2240 	}
2241 #endif
2242 }
2243 
2244 /*
2245  * Mark all entries from first until end (exclusive) as pageable.
2246  *
2247  * Lock must be exclusive on entry and will not be touched.
2248  */
2249 void
2250 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first,
2251     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr)
2252 {
2253 	struct vm_map_entry *iter;
2254 
2255 	for (iter = first; iter != end;
2256 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2257 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2258 		if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2259 			continue;
2260 
2261 		iter->wired_count = 0;
2262 		uvm_fault_unwire_locked(map, iter->start, iter->end);
2263 	}
2264 }
2265 
2266 /*
2267  * Mark all entries from first until end (exclusive) as wired.
2268  *
2269  * Lockflags determines the lock state on return from this function.
2270  * Lock must be exclusive on entry.
2271  */
2272 int
2273 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first,
2274     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr,
2275     int lockflags)
2276 {
2277 	struct vm_map_entry *iter;
2278 #ifdef DIAGNOSTIC
2279 	unsigned int timestamp_save;
2280 #endif
2281 	int error;
2282 
2283 	/*
2284 	 * Wire pages in two passes:
2285 	 *
2286 	 * 1: holding the write lock, we create any anonymous maps that need
2287 	 *    to be created.  then we clip each map entry to the region to
2288 	 *    be wired and increment its wiring count.
2289 	 *
2290 	 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
2291 	 *    in the pages for any newly wired area (wired_count == 1).
2292 	 *
2293 	 *    downgrading to a read lock for uvm_fault_wire avoids a possible
2294 	 *    deadlock with another thread that may have faulted on one of
2295 	 *    the pages to be wired (it would mark the page busy, blocking
2296 	 *    us, then in turn block on the map lock that we hold).
2297 	 *    because we keep the read lock on the map, the copy-on-write
2298 	 *    status of the entries we modify here cannot change.
2299 	 */
2300 	for (iter = first; iter != end;
2301 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2302 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2303 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2304 		    iter->protection == PROT_NONE)
2305 			continue;
2306 
2307 		/*
2308 		 * Perform actions of vm_map_lookup that need the write lock.
2309 		 * - create an anonymous map for copy-on-write
2310 		 * - anonymous map for zero-fill
2311 		 * Skip submaps.
2312 		 */
2313 		if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) &&
2314 		    UVM_ET_ISNEEDSCOPY(iter) &&
2315 		    ((iter->protection & PROT_WRITE) ||
2316 		    iter->object.uvm_obj == NULL)) {
2317 			amap_copy(map, iter, M_WAITOK,
2318 			    UVM_ET_ISSTACK(iter) ? FALSE : TRUE,
2319 			    iter->start, iter->end);
2320 		}
2321 		iter->wired_count++;
2322 	}
2323 
2324 	/*
2325 	 * Pass 2.
2326 	 */
2327 #ifdef DIAGNOSTIC
2328 	timestamp_save = map->timestamp;
2329 #endif
2330 	vm_map_busy(map);
2331 	vm_map_downgrade(map);
2332 
2333 	error = 0;
2334 	for (iter = first; error == 0 && iter != end;
2335 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2336 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2337 		    iter->protection == PROT_NONE)
2338 			continue;
2339 
2340 		error = uvm_fault_wire(map, iter->start, iter->end,
2341 		    iter->protection);
2342 	}
2343 
2344 	if (error) {
2345 		/*
2346 		 * uvm_fault_wire failure
2347 		 *
2348 		 * Reacquire lock and undo our work.
2349 		 */
2350 		vm_map_upgrade(map);
2351 		vm_map_unbusy(map);
2352 #ifdef DIAGNOSTIC
2353 		if (timestamp_save != map->timestamp)
2354 			panic("uvm_map_pageable_wire: stale map");
2355 #endif
2356 
2357 		/*
2358 		 * first is no longer needed to restart loops.
2359 		 * Use it as iterator to unmap successful mappings.
2360 		 */
2361 		for (; first != iter;
2362 		    first = RBT_NEXT(uvm_map_addr, first)) {
2363 			if (UVM_ET_ISHOLE(first) ||
2364 			    first->start == first->end ||
2365 			    first->protection == PROT_NONE)
2366 				continue;
2367 
2368 			first->wired_count--;
2369 			if (!VM_MAPENT_ISWIRED(first)) {
2370 				uvm_fault_unwire_locked(map,
2371 				    iter->start, iter->end);
2372 			}
2373 		}
2374 
2375 		/* decrease counter in the rest of the entries */
2376 		for (; iter != end;
2377 		    iter = RBT_NEXT(uvm_map_addr, iter)) {
2378 			if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2379 			    iter->protection == PROT_NONE)
2380 				continue;
2381 
2382 			iter->wired_count--;
2383 		}
2384 
2385 		if ((lockflags & UVM_LK_EXIT) == 0)
2386 			vm_map_unlock(map);
2387 		return error;
2388 	}
2389 
2390 	/* We are currently holding a read lock. */
2391 	if ((lockflags & UVM_LK_EXIT) == 0) {
2392 		vm_map_unbusy(map);
2393 		vm_map_unlock_read(map);
2394 	} else {
2395 		vm_map_upgrade(map);
2396 		vm_map_unbusy(map);
2397 #ifdef DIAGNOSTIC
2398 		if (timestamp_save != map->timestamp)
2399 			panic("uvm_map_pageable_wire: stale map");
2400 #endif
2401 	}
2402 	return 0;
2403 }
2404 
2405 /*
2406  * uvm_map_pageable: set pageability of a range in a map.
2407  *
2408  * Flags:
2409  * UVM_LK_ENTER: map is already locked by caller
2410  * UVM_LK_EXIT:  don't unlock map on exit
2411  *
2412  * The full range must be in use (entries may not have fspace != 0).
2413  * UVM_ET_HOLE counts as unmapped.
2414  */
2415 int
2416 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
2417     boolean_t new_pageable, int lockflags)
2418 {
2419 	struct vm_map_entry *first, *last, *tmp;
2420 	int error;
2421 
2422 	start = trunc_page(start);
2423 	end = round_page(end);
2424 
2425 	if (start > end)
2426 		return EINVAL;
2427 	if (start == end)
2428 		return 0;	/* nothing to do */
2429 	if (start < map->min_offset)
2430 		return EFAULT; /* why? see first XXX below */
2431 	if (end > map->max_offset)
2432 		return EINVAL; /* why? see second XXX below */
2433 
2434 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2435 	if ((lockflags & UVM_LK_ENTER) == 0)
2436 		vm_map_lock(map);
2437 
2438 	/*
2439 	 * Find first entry.
2440 	 *
2441 	 * Initial test on start is different, because of the different
2442 	 * error returned. Rest is tested further down.
2443 	 */
2444 	first = uvm_map_entrybyaddr(&map->addr, start);
2445 	if (first->end <= start || UVM_ET_ISHOLE(first)) {
2446 		/*
2447 		 * XXX if the first address is not mapped, it is EFAULT?
2448 		 */
2449 		error = EFAULT;
2450 		goto out;
2451 	}
2452 
2453 	/* Check that the range has no holes. */
2454 	for (last = first; last != NULL && last->start < end;
2455 	    last = RBT_NEXT(uvm_map_addr, last)) {
2456 		if (UVM_ET_ISHOLE(last) ||
2457 		    (last->end < end && VMMAP_FREE_END(last) != last->end)) {
2458 			/*
2459 			 * XXX unmapped memory in range, why is it EINVAL
2460 			 * instead of EFAULT?
2461 			 */
2462 			error = EINVAL;
2463 			goto out;
2464 		}
2465 	}
2466 
2467 	/*
2468 	 * Last ended at the first entry after the range.
2469 	 * Move back one step.
2470 	 *
2471 	 * Note that last may be NULL.
2472 	 */
2473 	if (last == NULL) {
2474 		last = RBT_MAX(uvm_map_addr, &map->addr);
2475 		if (last->end < end) {
2476 			error = EINVAL;
2477 			goto out;
2478 		}
2479 	} else {
2480 		KASSERT(last != first);
2481 		last = RBT_PREV(uvm_map_addr, last);
2482 	}
2483 
2484 	/* Wire/unwire pages here. */
2485 	if (new_pageable) {
2486 		/*
2487 		 * Mark pageable.
2488 		 * entries that are not wired are untouched.
2489 		 */
2490 		if (VM_MAPENT_ISWIRED(first))
2491 			UVM_MAP_CLIP_START(map, first, start);
2492 		/*
2493 		 * Split last at end.
2494 		 * Make tmp be the first entry after what is to be touched.
2495 		 * If last is not wired, don't touch it.
2496 		 */
2497 		if (VM_MAPENT_ISWIRED(last)) {
2498 			UVM_MAP_CLIP_END(map, last, end);
2499 			tmp = RBT_NEXT(uvm_map_addr, last);
2500 		} else
2501 			tmp = last;
2502 
2503 		uvm_map_pageable_pgon(map, first, tmp, start, end);
2504 		error = 0;
2505 
2506 out:
2507 		if ((lockflags & UVM_LK_EXIT) == 0)
2508 			vm_map_unlock(map);
2509 		return error;
2510 	} else {
2511 		/*
2512 		 * Mark entries wired.
2513 		 * entries are always touched (because recovery needs this).
2514 		 */
2515 		if (!VM_MAPENT_ISWIRED(first))
2516 			UVM_MAP_CLIP_START(map, first, start);
2517 		/*
2518 		 * Split last at end.
2519 		 * Make tmp be the first entry after what is to be touched.
2520 		 * If last is not wired, don't touch it.
2521 		 */
2522 		if (!VM_MAPENT_ISWIRED(last)) {
2523 			UVM_MAP_CLIP_END(map, last, end);
2524 			tmp = RBT_NEXT(uvm_map_addr, last);
2525 		} else
2526 			tmp = last;
2527 
2528 		return uvm_map_pageable_wire(map, first, tmp, start, end,
2529 		    lockflags);
2530 	}
2531 }
2532 
2533 /*
2534  * uvm_map_pageable_all: special case of uvm_map_pageable - affects
2535  * all mapped regions.
2536  *
2537  * Map must not be locked.
2538  * If no flags are specified, all ragions are unwired.
2539  */
2540 int
2541 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
2542 {
2543 	vsize_t size;
2544 	struct vm_map_entry *iter;
2545 
2546 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2547 	vm_map_lock(map);
2548 
2549 	if (flags == 0) {
2550 		uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr),
2551 		    NULL, map->min_offset, map->max_offset);
2552 
2553 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
2554 		vm_map_unlock(map);
2555 		return 0;
2556 	}
2557 
2558 	if (flags & MCL_FUTURE)
2559 		vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
2560 	if (!(flags & MCL_CURRENT)) {
2561 		vm_map_unlock(map);
2562 		return 0;
2563 	}
2564 
2565 	/*
2566 	 * Count number of pages in all non-wired entries.
2567 	 * If the number exceeds the limit, abort.
2568 	 */
2569 	size = 0;
2570 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2571 		if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2572 			continue;
2573 
2574 		size += iter->end - iter->start;
2575 	}
2576 
2577 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
2578 		vm_map_unlock(map);
2579 		return ENOMEM;
2580 	}
2581 
2582 	/* XXX non-pmap_wired_count case must be handled by caller */
2583 #ifdef pmap_wired_count
2584 	if (limit != 0 &&
2585 	    size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) {
2586 		vm_map_unlock(map);
2587 		return ENOMEM;
2588 	}
2589 #endif
2590 
2591 	/*
2592 	 * uvm_map_pageable_wire will release lcok
2593 	 */
2594 	return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr),
2595 	    NULL, map->min_offset, map->max_offset, 0);
2596 }
2597 
2598 /*
2599  * Initialize map.
2600  *
2601  * Allocates sufficient entries to describe the free memory in the map.
2602  */
2603 void
2604 uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags)
2605 {
2606 	int i;
2607 
2608 	KASSERT((min & (vaddr_t)PAGE_MASK) == 0);
2609 	KASSERT((max & (vaddr_t)PAGE_MASK) == 0 ||
2610 	    (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
2611 
2612 	/*
2613 	 * Update parameters.
2614 	 *
2615 	 * This code handles (vaddr_t)-1 and other page mask ending addresses
2616 	 * properly.
2617 	 * We lose the top page if the full virtual address space is used.
2618 	 */
2619 	if (max & (vaddr_t)PAGE_MASK) {
2620 		max += 1;
2621 		if (max == 0) /* overflow */
2622 			max -= PAGE_SIZE;
2623 	}
2624 
2625 	RBT_INIT(uvm_map_addr, &map->addr);
2626 	map->uaddr_exe = NULL;
2627 	for (i = 0; i < nitems(map->uaddr_any); ++i)
2628 		map->uaddr_any[i] = NULL;
2629 	map->uaddr_brk_stack = NULL;
2630 
2631 	map->size = 0;
2632 	map->ref_count = 0;
2633 	map->min_offset = min;
2634 	map->max_offset = max;
2635 	map->b_start = map->b_end = 0; /* Empty brk() area by default. */
2636 	map->s_start = map->s_end = 0; /* Empty stack area by default. */
2637 	map->flags = flags;
2638 	map->timestamp = 0;
2639 	rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK);
2640 	mtx_init(&map->mtx, IPL_VM);
2641 	mtx_init(&map->flags_lock, IPL_VM);
2642 
2643 	/* Configure the allocators. */
2644 	if (flags & VM_MAP_ISVMSPACE)
2645 		uvm_map_setup_md(map);
2646 	else
2647 		map->uaddr_any[3] = &uaddr_kbootstrap;
2648 
2649 	/*
2650 	 * Fill map entries.
2651 	 * We do not need to write-lock the map here because only the current
2652 	 * thread sees it right now. Initialize ref_count to 0 above to avoid
2653 	 * bogus triggering of lock-not-held assertions.
2654 	 */
2655 	uvm_map_setup_entries(map);
2656 	uvm_tree_sanity(map, __FILE__, __LINE__);
2657 	map->ref_count = 1;
2658 }
2659 
2660 /*
2661  * Destroy the map.
2662  *
2663  * This is the inverse operation to uvm_map_setup.
2664  */
2665 void
2666 uvm_map_teardown(struct vm_map *map)
2667 {
2668 	struct uvm_map_deadq	 dead_entries;
2669 	struct vm_map_entry	*entry, *tmp;
2670 #ifdef VMMAP_DEBUG
2671 	size_t			 numq, numt;
2672 #endif
2673 	int			 i;
2674 
2675 	KERNEL_ASSERT_LOCKED();
2676 	KERNEL_UNLOCK();
2677 	KERNEL_ASSERT_UNLOCKED();
2678 
2679 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
2680 
2681 	/* Remove address selectors. */
2682 	uvm_addr_destroy(map->uaddr_exe);
2683 	map->uaddr_exe = NULL;
2684 	for (i = 0; i < nitems(map->uaddr_any); i++) {
2685 		uvm_addr_destroy(map->uaddr_any[i]);
2686 		map->uaddr_any[i] = NULL;
2687 	}
2688 	uvm_addr_destroy(map->uaddr_brk_stack);
2689 	map->uaddr_brk_stack = NULL;
2690 
2691 	/*
2692 	 * Remove entries.
2693 	 *
2694 	 * The following is based on graph breadth-first search.
2695 	 *
2696 	 * In color terms:
2697 	 * - the dead_entries set contains all nodes that are reachable
2698 	 *   (i.e. both the black and the grey nodes)
2699 	 * - any entry not in dead_entries is white
2700 	 * - any entry that appears in dead_entries before entry,
2701 	 *   is black, the rest is grey.
2702 	 * The set [entry, end] is also referred to as the wavefront.
2703 	 *
2704 	 * Since the tree is always a fully connected graph, the breadth-first
2705 	 * search guarantees that each vmmap_entry is visited exactly once.
2706 	 * The vm_map is broken down in linear time.
2707 	 */
2708 	TAILQ_INIT(&dead_entries);
2709 	if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL)
2710 		DEAD_ENTRY_PUSH(&dead_entries, entry);
2711 	while (entry != NULL) {
2712 		sched_pause(yield);
2713 		uvm_unmap_kill_entry(map, entry);
2714 		if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL)
2715 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2716 		if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
2717 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2718 		/* Update wave-front. */
2719 		entry = TAILQ_NEXT(entry, dfree.deadq);
2720 	}
2721 
2722 #ifdef VMMAP_DEBUG
2723 	numt = numq = 0;
2724 	RBT_FOREACH(entry, uvm_map_addr, &map->addr)
2725 		numt++;
2726 	TAILQ_FOREACH(entry, &dead_entries, dfree.deadq)
2727 		numq++;
2728 	KASSERT(numt == numq);
2729 #endif
2730 	uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK);
2731 
2732 	KERNEL_LOCK();
2733 
2734 	pmap_destroy(map->pmap);
2735 	map->pmap = NULL;
2736 }
2737 
2738 /*
2739  * Populate map with free-memory entries.
2740  *
2741  * Map must be initialized and empty.
2742  */
2743 void
2744 uvm_map_setup_entries(struct vm_map *map)
2745 {
2746 	KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
2747 
2748 	uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0);
2749 }
2750 
2751 /*
2752  * Split entry at given address.
2753  *
2754  * orig:  entry that is to be split.
2755  * next:  a newly allocated map entry that is not linked.
2756  * split: address at which the split is done.
2757  */
2758 void
2759 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,
2760     struct vm_map_entry *next, vaddr_t split)
2761 {
2762 	struct uvm_addr_state *free, *free_before;
2763 	vsize_t adj;
2764 
2765 	if ((split & PAGE_MASK) != 0) {
2766 		panic("uvm_map_splitentry: split address 0x%lx "
2767 		    "not on page boundary!", split);
2768 	}
2769 	KDASSERT(map != NULL && orig != NULL && next != NULL);
2770 	uvm_tree_sanity(map, __FILE__, __LINE__);
2771 	KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split);
2772 
2773 #ifdef VMMAP_DEBUG
2774 	KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig);
2775 	KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next);
2776 #endif /* VMMAP_DEBUG */
2777 
2778 	/*
2779 	 * Free space will change, unlink from free space tree.
2780 	 */
2781 	free = uvm_map_uaddr_e(map, orig);
2782 	uvm_mapent_free_remove(map, free, orig);
2783 
2784 	adj = split - orig->start;
2785 
2786 	uvm_mapent_copy(orig, next);
2787 	if (split >= orig->end) {
2788 		next->etype = 0;
2789 		next->offset = 0;
2790 		next->wired_count = 0;
2791 		next->start = next->end = split;
2792 		next->guard = 0;
2793 		next->fspace = VMMAP_FREE_END(orig) - split;
2794 		next->aref.ar_amap = NULL;
2795 		next->aref.ar_pageoff = 0;
2796 		orig->guard = MIN(orig->guard, split - orig->end);
2797 		orig->fspace = split - VMMAP_FREE_START(orig);
2798 	} else {
2799 		orig->fspace = 0;
2800 		orig->guard = 0;
2801 		orig->end = next->start = split;
2802 
2803 		if (next->aref.ar_amap) {
2804 			KERNEL_LOCK();
2805 			amap_splitref(&orig->aref, &next->aref, adj);
2806 			KERNEL_UNLOCK();
2807 		}
2808 		if (UVM_ET_ISSUBMAP(orig)) {
2809 			uvm_map_reference(next->object.sub_map);
2810 			next->offset += adj;
2811 		} else if (UVM_ET_ISOBJ(orig)) {
2812 			if (next->object.uvm_obj->pgops &&
2813 			    next->object.uvm_obj->pgops->pgo_reference) {
2814 				KERNEL_LOCK();
2815 				next->object.uvm_obj->pgops->pgo_reference(
2816 				    next->object.uvm_obj);
2817 				KERNEL_UNLOCK();
2818 			}
2819 			next->offset += adj;
2820 		}
2821 	}
2822 
2823 	/*
2824 	 * Link next into address tree.
2825 	 * Link orig and next into free-space tree.
2826 	 *
2827 	 * Don't insert 'next' into the addr tree until orig has been linked,
2828 	 * in case the free-list looks at adjecent entries in the addr tree
2829 	 * for its decisions.
2830 	 */
2831 	if (orig->fspace > 0)
2832 		free_before = free;
2833 	else
2834 		free_before = uvm_map_uaddr_e(map, orig);
2835 	uvm_mapent_free_insert(map, free_before, orig);
2836 	uvm_mapent_addr_insert(map, next);
2837 	uvm_mapent_free_insert(map, free, next);
2838 
2839 	uvm_tree_sanity(map, __FILE__, __LINE__);
2840 }
2841 
2842 
2843 #ifdef VMMAP_DEBUG
2844 
2845 void
2846 uvm_tree_assert(struct vm_map *map, int test, char *test_str,
2847     char *file, int line)
2848 {
2849 	char* map_special;
2850 
2851 	if (test)
2852 		return;
2853 
2854 	if (map == kernel_map)
2855 		map_special = " (kernel_map)";
2856 	else if (map == kmem_map)
2857 		map_special = " (kmem_map)";
2858 	else
2859 		map_special = "";
2860 	panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file,
2861 	    line, test_str);
2862 }
2863 
2864 /*
2865  * Check that map is sane.
2866  */
2867 void
2868 uvm_tree_sanity(struct vm_map *map, char *file, int line)
2869 {
2870 	struct vm_map_entry	*iter;
2871 	vaddr_t			 addr;
2872 	vaddr_t			 min, max, bound; /* Bounds checker. */
2873 	struct uvm_addr_state	*free;
2874 
2875 	addr = vm_map_min(map);
2876 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2877 		/*
2878 		 * Valid start, end.
2879 		 * Catch overflow for end+fspace.
2880 		 */
2881 		UVM_ASSERT(map, iter->end >= iter->start, file, line);
2882 		UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line);
2883 
2884 		/* May not be empty. */
2885 		UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter),
2886 		    file, line);
2887 
2888 		/* Addresses for entry must lie within map boundaries. */
2889 		UVM_ASSERT(map, iter->start >= vm_map_min(map) &&
2890 		    VMMAP_FREE_END(iter) <= vm_map_max(map), file, line);
2891 
2892 		/* Tree may not have gaps. */
2893 		UVM_ASSERT(map, iter->start == addr, file, line);
2894 		addr = VMMAP_FREE_END(iter);
2895 
2896 		/*
2897 		 * Free space may not cross boundaries, unless the same
2898 		 * free list is used on both sides of the border.
2899 		 */
2900 		min = VMMAP_FREE_START(iter);
2901 		max = VMMAP_FREE_END(iter);
2902 
2903 		while (min < max &&
2904 		    (bound = uvm_map_boundary(map, min, max)) != max) {
2905 			UVM_ASSERT(map,
2906 			    uvm_map_uaddr(map, bound - 1) ==
2907 			    uvm_map_uaddr(map, bound),
2908 			    file, line);
2909 			min = bound;
2910 		}
2911 
2912 		free = uvm_map_uaddr_e(map, iter);
2913 		if (free) {
2914 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0,
2915 			    file, line);
2916 		} else {
2917 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0,
2918 			    file, line);
2919 		}
2920 	}
2921 	UVM_ASSERT(map, addr == vm_map_max(map), file, line);
2922 }
2923 
2924 void
2925 uvm_tree_size_chk(struct vm_map *map, char *file, int line)
2926 {
2927 	struct vm_map_entry *iter;
2928 	vsize_t size;
2929 
2930 	size = 0;
2931 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2932 		if (!UVM_ET_ISHOLE(iter))
2933 			size += iter->end - iter->start;
2934 	}
2935 
2936 	if (map->size != size)
2937 		printf("map size = 0x%lx, should be 0x%lx\n", map->size, size);
2938 	UVM_ASSERT(map, map->size == size, file, line);
2939 
2940 	vmspace_validate(map);
2941 }
2942 
2943 /*
2944  * This function validates the statistics on vmspace.
2945  */
2946 void
2947 vmspace_validate(struct vm_map *map)
2948 {
2949 	struct vmspace *vm;
2950 	struct vm_map_entry *iter;
2951 	vaddr_t imin, imax;
2952 	vaddr_t stack_begin, stack_end; /* Position of stack. */
2953 	vsize_t stack, heap; /* Measured sizes. */
2954 
2955 	if (!(map->flags & VM_MAP_ISVMSPACE))
2956 		return;
2957 
2958 	vm = (struct vmspace *)map;
2959 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2960 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2961 
2962 	stack = heap = 0;
2963 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2964 		imin = imax = iter->start;
2965 
2966 		if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL)
2967 			continue;
2968 
2969 		/*
2970 		 * Update stack, heap.
2971 		 * Keep in mind that (theoretically) the entries of
2972 		 * userspace and stack may be joined.
2973 		 */
2974 		while (imin != iter->end) {
2975 			/*
2976 			 * Set imax to the first boundary crossed between
2977 			 * imin and stack addresses.
2978 			 */
2979 			imax = iter->end;
2980 			if (imin < stack_begin && imax > stack_begin)
2981 				imax = stack_begin;
2982 			else if (imin < stack_end && imax > stack_end)
2983 				imax = stack_end;
2984 
2985 			if (imin >= stack_begin && imin < stack_end)
2986 				stack += imax - imin;
2987 			else
2988 				heap += imax - imin;
2989 			imin = imax;
2990 		}
2991 	}
2992 
2993 	heap >>= PAGE_SHIFT;
2994 	if (heap != vm->vm_dused) {
2995 		printf("vmspace stack range: 0x%lx-0x%lx\n",
2996 		    stack_begin, stack_end);
2997 		panic("vmspace_validate: vmspace.vm_dused invalid, "
2998 		    "expected %ld pgs, got %ld pgs in map %p",
2999 		    heap, vm->vm_dused,
3000 		    map);
3001 	}
3002 }
3003 
3004 #endif /* VMMAP_DEBUG */
3005 
3006 /*
3007  * uvm_map_init: init mapping system at boot time.   note that we allocate
3008  * and init the static pool of structs vm_map_entry for the kernel here.
3009  */
3010 void
3011 uvm_map_init(void)
3012 {
3013 	static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
3014 	int lcv;
3015 
3016 	/* now set up static pool of kernel map entries ... */
3017 	mtx_init(&uvm_kmapent_mtx, IPL_VM);
3018 	SLIST_INIT(&uvm.kentry_free);
3019 	for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
3020 		SLIST_INSERT_HEAD(&uvm.kentry_free,
3021 		    &kernel_map_entry[lcv], daddrs.addr_kentry);
3022 	}
3023 
3024 	/* initialize the map-related pools. */
3025 	pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0,
3026 	    IPL_NONE, PR_WAITOK, "vmsppl", NULL);
3027 	pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0,
3028 	    IPL_VM, PR_WAITOK, "vmmpepl", NULL);
3029 	pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0,
3030 	    IPL_VM, 0, "vmmpekpl", NULL);
3031 	pool_sethiwat(&uvm_map_entry_pool, 8192);
3032 
3033 	uvm_addr_init();
3034 }
3035 
3036 #if defined(DDB)
3037 
3038 /*
3039  * DDB hooks
3040  */
3041 
3042 /*
3043  * uvm_map_printit: actually prints the map
3044  */
3045 void
3046 uvm_map_printit(struct vm_map *map, boolean_t full,
3047     int (*pr)(const char *, ...))
3048 {
3049 	struct vmspace			*vm;
3050 	struct vm_map_entry		*entry;
3051 	struct uvm_addr_state		*free;
3052 	int				 in_free, i;
3053 	char				 buf[8];
3054 
3055 	(*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
3056 	(*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n",
3057 	    map->b_start, map->b_end);
3058 	(*pr)("\tstack allocate range: 0x%lx-0x%lx\n",
3059 	    map->s_start, map->s_end);
3060 	(*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n",
3061 	    map->size, map->ref_count, map->timestamp,
3062 	    map->flags);
3063 	(*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
3064 	    pmap_resident_count(map->pmap));
3065 
3066 	/* struct vmspace handling. */
3067 	if (map->flags & VM_MAP_ISVMSPACE) {
3068 		vm = (struct vmspace *)map;
3069 
3070 		(*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n",
3071 		    vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss);
3072 		(*pr)("\tvm_tsize=%u vm_dsize=%u\n",
3073 		    vm->vm_tsize, vm->vm_dsize);
3074 		(*pr)("\tvm_taddr=%p vm_daddr=%p\n",
3075 		    vm->vm_taddr, vm->vm_daddr);
3076 		(*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n",
3077 		    vm->vm_maxsaddr, vm->vm_minsaddr);
3078 	}
3079 
3080 	if (!full)
3081 		goto print_uaddr;
3082 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
3083 		(*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
3084 		    entry, entry->start, entry->end, entry->object.uvm_obj,
3085 		    (long long)entry->offset, entry->aref.ar_amap,
3086 		    entry->aref.ar_pageoff);
3087 		(*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, prot(max)=%d/%d, inh=%d, "
3088 		    "wc=%d, adv=%d\n",
3089 		    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
3090 		    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
3091 		    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
3092 		    (entry->etype & UVM_ET_STACK) ? 'T' : 'F',
3093 		    entry->protection, entry->max_protection,
3094 		    entry->inheritance, entry->wired_count, entry->advice);
3095 
3096 		free = uvm_map_uaddr_e(map, entry);
3097 		in_free = (free != NULL);
3098 		(*pr)("\thole=%c, free=%c, guard=0x%lx, "
3099 		    "free=0x%lx-0x%lx\n",
3100 		    (entry->etype & UVM_ET_HOLE) ? 'T' : 'F',
3101 		    in_free ? 'T' : 'F',
3102 		    entry->guard,
3103 		    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
3104 		(*pr)("\tfspace_augment=%lu\n", entry->fspace_augment);
3105 		(*pr)("\tfreemapped=%c, uaddr=%p\n",
3106 		    (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free);
3107 		if (free) {
3108 			(*pr)("\t\t(0x%lx-0x%lx %s)\n",
3109 			    free->uaddr_minaddr, free->uaddr_maxaddr,
3110 			    free->uaddr_functions->uaddr_name);
3111 		}
3112 	}
3113 
3114 print_uaddr:
3115 	uvm_addr_print(map->uaddr_exe, "exe", full, pr);
3116 	for (i = 0; i < nitems(map->uaddr_any); i++) {
3117 		snprintf(&buf[0], sizeof(buf), "any[%d]", i);
3118 		uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr);
3119 	}
3120 	uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr);
3121 }
3122 
3123 /*
3124  * uvm_object_printit: actually prints the object
3125  */
3126 void
3127 uvm_object_printit(uobj, full, pr)
3128 	struct uvm_object *uobj;
3129 	boolean_t full;
3130 	int (*pr)(const char *, ...);
3131 {
3132 	struct vm_page *pg;
3133 	int cnt = 0;
3134 
3135 	(*pr)("OBJECT %p: pgops=%p, npages=%d, ",
3136 	    uobj, uobj->pgops, uobj->uo_npages);
3137 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
3138 		(*pr)("refs=<SYSTEM>\n");
3139 	else
3140 		(*pr)("refs=%d\n", uobj->uo_refs);
3141 
3142 	if (!full) {
3143 		return;
3144 	}
3145 	(*pr)("  PAGES <pg,offset>:\n  ");
3146 	RBT_FOREACH(pg, uvm_objtree, &uobj->memt) {
3147 		(*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
3148 		if ((cnt % 3) == 2) {
3149 			(*pr)("\n  ");
3150 		}
3151 		cnt++;
3152 	}
3153 	if ((cnt % 3) != 2) {
3154 		(*pr)("\n");
3155 	}
3156 }
3157 
3158 /*
3159  * uvm_page_printit: actually print the page
3160  */
3161 static const char page_flagbits[] =
3162 	"\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
3163 	"\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ"
3164 	"\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5";
3165 
3166 void
3167 uvm_page_printit(pg, full, pr)
3168 	struct vm_page *pg;
3169 	boolean_t full;
3170 	int (*pr)(const char *, ...);
3171 {
3172 	struct vm_page *tpg;
3173 	struct uvm_object *uobj;
3174 	struct pglist *pgl;
3175 
3176 	(*pr)("PAGE %p:\n", pg);
3177 	(*pr)("  flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
3178 	    pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
3179 	    (long long)pg->phys_addr);
3180 	(*pr)("  uobject=%p, uanon=%p, offset=0x%llx\n",
3181 	    pg->uobject, pg->uanon, (long long)pg->offset);
3182 #if defined(UVM_PAGE_TRKOWN)
3183 	if (pg->pg_flags & PG_BUSY)
3184 		(*pr)("  owning thread = %d, tag=%s",
3185 		    pg->owner, pg->owner_tag);
3186 	else
3187 		(*pr)("  page not busy, no owner");
3188 #else
3189 	(*pr)("  [page ownership tracking disabled]");
3190 #endif
3191 	(*pr)("\tvm_page_md %p\n", &pg->mdpage);
3192 
3193 	if (!full)
3194 		return;
3195 
3196 	/* cross-verify object/anon */
3197 	if ((pg->pg_flags & PQ_FREE) == 0) {
3198 		if (pg->pg_flags & PQ_ANON) {
3199 			if (pg->uanon == NULL || pg->uanon->an_page != pg)
3200 			    (*pr)("  >>> ANON DOES NOT POINT HERE <<< (%p)\n",
3201 				(pg->uanon) ? pg->uanon->an_page : NULL);
3202 			else
3203 				(*pr)("  anon backpointer is OK\n");
3204 		} else {
3205 			uobj = pg->uobject;
3206 			if (uobj) {
3207 				(*pr)("  checking object list\n");
3208 				RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) {
3209 					if (tpg == pg) {
3210 						break;
3211 					}
3212 				}
3213 				if (tpg)
3214 					(*pr)("  page found on object list\n");
3215 				else
3216 					(*pr)("  >>> PAGE NOT FOUND "
3217 					    "ON OBJECT LIST! <<<\n");
3218 			}
3219 		}
3220 	}
3221 
3222 	/* cross-verify page queue */
3223 	if (pg->pg_flags & PQ_FREE) {
3224 		if (uvm_pmr_isfree(pg))
3225 			(*pr)("  page found in uvm_pmemrange\n");
3226 		else
3227 			(*pr)("  >>> page not found in uvm_pmemrange <<<\n");
3228 		pgl = NULL;
3229 	} else if (pg->pg_flags & PQ_INACTIVE) {
3230 		pgl = (pg->pg_flags & PQ_SWAPBACKED) ?
3231 		    &uvm.page_inactive_swp : &uvm.page_inactive_obj;
3232 	} else if (pg->pg_flags & PQ_ACTIVE) {
3233 		pgl = &uvm.page_active;
3234  	} else {
3235 		pgl = NULL;
3236 	}
3237 
3238 	if (pgl) {
3239 		(*pr)("  checking pageq list\n");
3240 		TAILQ_FOREACH(tpg, pgl, pageq) {
3241 			if (tpg == pg) {
3242 				break;
3243 			}
3244 		}
3245 		if (tpg)
3246 			(*pr)("  page found on pageq list\n");
3247 		else
3248 			(*pr)("  >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
3249 	}
3250 }
3251 #endif
3252 
3253 /*
3254  * uvm_map_protect: change map protection
3255  *
3256  * => set_max means set max_protection.
3257  * => map must be unlocked.
3258  */
3259 int
3260 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
3261     vm_prot_t new_prot, boolean_t set_max)
3262 {
3263 	struct vm_map_entry *first, *iter;
3264 	vm_prot_t old_prot;
3265 	vm_prot_t mask;
3266 	int error;
3267 
3268 	if (start > end)
3269 		return EINVAL;
3270 	start = MAX(start, map->min_offset);
3271 	end = MIN(end, map->max_offset);
3272 	if (start >= end)
3273 		return 0;
3274 
3275 	error = 0;
3276 	vm_map_lock(map);
3277 
3278 	/*
3279 	 * Set up first and last.
3280 	 * - first will contain first entry at or after start.
3281 	 */
3282 	first = uvm_map_entrybyaddr(&map->addr, start);
3283 	KDASSERT(first != NULL);
3284 	if (first->end <= start)
3285 		first = RBT_NEXT(uvm_map_addr, first);
3286 
3287 	/* First, check for protection violations. */
3288 	for (iter = first; iter != NULL && iter->start < end;
3289 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
3290 		/* Treat memory holes as free space. */
3291 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3292 			continue;
3293 
3294 		if (UVM_ET_ISSUBMAP(iter)) {
3295 			error = EINVAL;
3296 			goto out;
3297 		}
3298 		if ((new_prot & iter->max_protection) != new_prot) {
3299 			error = EACCES;
3300 			goto out;
3301 		}
3302 		if (map == kernel_map &&
3303 		    (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
3304 			panic("uvm_map_protect: kernel map W^X violation requested");
3305 	}
3306 
3307 	/* Fix protections.  */
3308 	for (iter = first; iter != NULL && iter->start < end;
3309 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
3310 		/* Treat memory holes as free space. */
3311 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3312 			continue;
3313 
3314 		old_prot = iter->protection;
3315 
3316 		/*
3317 		 * Skip adapting protection iff old and new protection
3318 		 * are equal.
3319 		 */
3320 		if (set_max) {
3321 			if (old_prot == (new_prot & old_prot) &&
3322 			    iter->max_protection == new_prot)
3323 				continue;
3324 		} else {
3325 			if (old_prot == new_prot)
3326 				continue;
3327 		}
3328 
3329 		UVM_MAP_CLIP_START(map, iter, start);
3330 		UVM_MAP_CLIP_END(map, iter, end);
3331 
3332 		if (set_max) {
3333 			iter->max_protection = new_prot;
3334 			iter->protection &= new_prot;
3335 		} else
3336 			iter->protection = new_prot;
3337 
3338 		/*
3339 		 * update physical map if necessary.  worry about copy-on-write
3340 		 * here -- CHECK THIS XXX
3341 		 */
3342 		if (iter->protection != old_prot) {
3343 			mask = UVM_ET_ISCOPYONWRITE(iter) ?
3344 			    ~PROT_WRITE : PROT_MASK;
3345 
3346 			/* XXX should only wserial++ if no split occurs */
3347 			if (iter->protection & PROT_WRITE)
3348 				map->wserial++;
3349 
3350 			/* update pmap */
3351 			if ((iter->protection & mask) == PROT_NONE &&
3352 			    VM_MAPENT_ISWIRED(iter)) {
3353 				/*
3354 				 * TODO(ariane) this is stupid. wired_count
3355 				 * is 0 if not wired, otherwise anything
3356 				 * larger than 0 (incremented once each time
3357 				 * wire is called).
3358 				 * Mostly to be able to undo the damage on
3359 				 * failure. Not the actually be a wired
3360 				 * refcounter...
3361 				 * Originally: iter->wired_count--;
3362 				 * (don't we have to unwire this in the pmap
3363 				 * as well?)
3364 				 */
3365 				iter->wired_count = 0;
3366 			}
3367 			pmap_protect(map->pmap, iter->start, iter->end,
3368 			    iter->protection & mask);
3369 		}
3370 
3371 		/*
3372 		 * If the map is configured to lock any future mappings,
3373 		 * wire this entry now if the old protection was PROT_NONE
3374 		 * and the new protection is not PROT_NONE.
3375 		 */
3376 		if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
3377 		    VM_MAPENT_ISWIRED(iter) == 0 &&
3378 		    old_prot == PROT_NONE &&
3379 		    new_prot != PROT_NONE) {
3380 			if (uvm_map_pageable(map, iter->start, iter->end,
3381 			    FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) {
3382 				/*
3383 				 * If locking the entry fails, remember the
3384 				 * error if it's the first one.  Note we
3385 				 * still continue setting the protection in
3386 				 * the map, but it will return the resource
3387 				 * storage condition regardless.
3388 				 *
3389 				 * XXX Ignore what the actual error is,
3390 				 * XXX just call it a resource shortage
3391 				 * XXX so that it doesn't get confused
3392 				 * XXX what uvm_map_protect() itself would
3393 				 * XXX normally return.
3394 				 */
3395 				error = ENOMEM;
3396 			}
3397 		}
3398 	}
3399 	pmap_update(map->pmap);
3400 
3401 out:
3402 	vm_map_unlock(map);
3403 	return error;
3404 }
3405 
3406 /*
3407  * uvmspace_alloc: allocate a vmspace structure.
3408  *
3409  * - structure includes vm_map and pmap
3410  * - XXX: no locking on this structure
3411  * - refcnt set to 1, rest must be init'd by caller
3412  */
3413 struct vmspace *
3414 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
3415     boolean_t remove_holes)
3416 {
3417 	struct vmspace *vm;
3418 
3419 	vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
3420 	uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
3421 	return (vm);
3422 }
3423 
3424 /*
3425  * uvmspace_init: initialize a vmspace structure.
3426  *
3427  * - XXX: no locking on this structure
3428  * - refcnt set to 1, rest must be init'd by caller
3429  */
3430 void
3431 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
3432     boolean_t pageable, boolean_t remove_holes)
3433 {
3434 	KASSERT(pmap == NULL || pmap == pmap_kernel());
3435 
3436 	if (pmap)
3437 		pmap_reference(pmap);
3438 	else
3439 		pmap = pmap_create();
3440 	vm->vm_map.pmap = pmap;
3441 
3442 	uvm_map_setup(&vm->vm_map, min, max,
3443 	    (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE);
3444 
3445 	vm->vm_refcnt = 1;
3446 
3447 	if (remove_holes)
3448 		pmap_remove_holes(vm);
3449 }
3450 
3451 /*
3452  * uvmspace_share: share a vmspace between two processes
3453  *
3454  * - XXX: no locking on vmspace
3455  * - used for vfork
3456  */
3457 
3458 struct vmspace *
3459 uvmspace_share(struct process *pr)
3460 {
3461 	struct vmspace *vm = pr->ps_vmspace;
3462 
3463 	vm->vm_refcnt++;
3464 	return vm;
3465 }
3466 
3467 /*
3468  * uvmspace_exec: the process wants to exec a new program
3469  *
3470  * - XXX: no locking on vmspace
3471  */
3472 
3473 void
3474 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
3475 {
3476 	struct process *pr = p->p_p;
3477 	struct vmspace *nvm, *ovm = pr->ps_vmspace;
3478 	struct vm_map *map = &ovm->vm_map;
3479 	struct uvm_map_deadq dead_entries;
3480 
3481 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0);
3482 	KASSERT((end & (vaddr_t)PAGE_MASK) == 0 ||
3483 	    (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
3484 
3485 	pmap_unuse_final(p);   /* before stack addresses go away */
3486 	TAILQ_INIT(&dead_entries);
3487 
3488 	/* see if more than one process is using this vmspace...  */
3489 	if (ovm->vm_refcnt == 1) {
3490 		/*
3491 		 * If pr is the only process using its vmspace then
3492 		 * we can safely recycle that vmspace for the program
3493 		 * that is being exec'd.
3494 		 */
3495 
3496 #ifdef SYSVSHM
3497 		/*
3498 		 * SYSV SHM semantics require us to kill all segments on an exec
3499 		 */
3500 		if (ovm->vm_shm)
3501 			shmexit(ovm);
3502 #endif
3503 
3504 		/*
3505 		 * POSIX 1003.1b -- "lock future mappings" is revoked
3506 		 * when a process execs another program image.
3507 		 */
3508 		vm_map_lock(map);
3509 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
3510 
3511 		/*
3512 		 * now unmap the old program
3513 		 *
3514 		 * Instead of attempting to keep the map valid, we simply
3515 		 * nuke all entries and ask uvm_map_setup to reinitialize
3516 		 * the map to the new boundaries.
3517 		 *
3518 		 * uvm_unmap_remove will actually nuke all entries for us
3519 		 * (as in, not replace them with free-memory entries).
3520 		 */
3521 		uvm_unmap_remove(map, map->min_offset, map->max_offset,
3522 		    &dead_entries, TRUE, FALSE);
3523 
3524 		KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
3525 
3526 		/* Nuke statistics and boundaries. */
3527 		memset(&ovm->vm_startcopy, 0,
3528 		    (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy);
3529 
3530 
3531 		if (end & (vaddr_t)PAGE_MASK) {
3532 			end += 1;
3533 			if (end == 0) /* overflow */
3534 				end -= PAGE_SIZE;
3535 		}
3536 
3537 		/* Setup new boundaries and populate map with entries. */
3538 		map->min_offset = start;
3539 		map->max_offset = end;
3540 		uvm_map_setup_entries(map);
3541 		vm_map_unlock(map);
3542 
3543 		/* but keep MMU holes unavailable */
3544 		pmap_remove_holes(ovm);
3545 	} else {
3546 		/*
3547 		 * pr's vmspace is being shared, so we can't reuse
3548 		 * it for pr since it is still being used for others.
3549 		 * allocate a new vmspace for pr
3550 		 */
3551 		nvm = uvmspace_alloc(start, end,
3552 		    (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
3553 
3554 		/* install new vmspace and drop our ref to the old one. */
3555 		pmap_deactivate(p);
3556 		p->p_vmspace = pr->ps_vmspace = nvm;
3557 		pmap_activate(p);
3558 
3559 		uvmspace_free(ovm);
3560 	}
3561 
3562 	/* Release dead entries */
3563 	uvm_unmap_detach(&dead_entries, 0);
3564 }
3565 
3566 /*
3567  * uvmspace_free: free a vmspace data structure
3568  *
3569  * - XXX: no locking on vmspace
3570  */
3571 void
3572 uvmspace_free(struct vmspace *vm)
3573 {
3574 	if (--vm->vm_refcnt == 0) {
3575 		/*
3576 		 * lock the map, to wait out all other references to it.  delete
3577 		 * all of the mappings and pages they hold, then call the pmap
3578 		 * module to reclaim anything left.
3579 		 */
3580 #ifdef SYSVSHM
3581 		/* Get rid of any SYSV shared memory segments. */
3582 		if (vm->vm_shm != NULL)
3583 			shmexit(vm);
3584 #endif
3585 
3586 		uvm_map_teardown(&vm->vm_map);
3587 		pool_put(&uvm_vmspace_pool, vm);
3588 	}
3589 }
3590 
3591 /*
3592  * uvm_share: Map the address range [srcaddr, srcaddr + sz) in
3593  * srcmap to the address range [dstaddr, dstaddr + sz) in
3594  * dstmap.
3595  *
3596  * The whole address range in srcmap must be backed by an object
3597  * (no holes).
3598  *
3599  * If successful, the address ranges share memory and the destination
3600  * address range uses the protection flags in prot.
3601  *
3602  * This routine assumes that sz is a multiple of PAGE_SIZE and
3603  * that dstaddr and srcaddr are page-aligned.
3604  */
3605 int
3606 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot,
3607     struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz)
3608 {
3609 	int ret = 0;
3610 	vaddr_t unmap_end;
3611 	vaddr_t dstva;
3612 	vsize_t off, len, n = sz;
3613 	struct vm_map_entry *first = NULL, *last = NULL;
3614 	struct vm_map_entry *src_entry, *psrc_entry = NULL;
3615 	struct uvm_map_deadq dead;
3616 
3617 	if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr)
3618 		return EINVAL;
3619 
3620 	TAILQ_INIT(&dead);
3621 	vm_map_lock(dstmap);
3622 	vm_map_lock_read(srcmap);
3623 
3624 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) {
3625 		ret = ENOMEM;
3626 		goto exit_unlock;
3627 	}
3628 	if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) {
3629 		ret = EINVAL;
3630 		goto exit_unlock;
3631 	}
3632 
3633 	unmap_end = dstaddr;
3634 	for (; src_entry != NULL;
3635 	    psrc_entry = src_entry,
3636 	    src_entry = RBT_NEXT(uvm_map_addr, src_entry)) {
3637 		/* hole in address space, bail out */
3638 		if (psrc_entry != NULL && psrc_entry->end != src_entry->start)
3639 			break;
3640 		if (src_entry->start >= srcaddr + sz)
3641 			break;
3642 
3643 		if (UVM_ET_ISSUBMAP(src_entry))
3644 			panic("uvm_share: encountered a submap (illegal)");
3645 		if (!UVM_ET_ISCOPYONWRITE(src_entry) &&
3646 		    UVM_ET_ISNEEDSCOPY(src_entry))
3647 			panic("uvm_share: non-copy_on_write map entries "
3648 			    "marked needs_copy (illegal)");
3649 
3650 		dstva = dstaddr;
3651 		if (src_entry->start > srcaddr) {
3652 			dstva += src_entry->start - srcaddr;
3653 			off = 0;
3654 		} else
3655 			off = srcaddr - src_entry->start;
3656 
3657 		if (n < src_entry->end - src_entry->start)
3658 			len = n;
3659 		else
3660 			len = src_entry->end - src_entry->start;
3661 		n -= len;
3662 
3663 		if (uvm_mapent_share(dstmap, dstva, len, off, prot, prot,
3664 		    srcmap, src_entry, &dead) == NULL)
3665 			break;
3666 
3667 		unmap_end = dstva + len;
3668 		if (n == 0)
3669 			goto exit_unlock;
3670 	}
3671 
3672 	ret = EINVAL;
3673 	uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE);
3674 
3675 exit_unlock:
3676 	vm_map_unlock_read(srcmap);
3677 	vm_map_unlock(dstmap);
3678 	uvm_unmap_detach(&dead, 0);
3679 
3680 	return ret;
3681 }
3682 
3683 /*
3684  * Clone map entry into other map.
3685  *
3686  * Mapping will be placed at dstaddr, for the same length.
3687  * Space must be available.
3688  * Reference counters are incremented.
3689  */
3690 struct vm_map_entry *
3691 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3692     vsize_t off, vm_prot_t prot, vm_prot_t maxprot,
3693     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
3694     int mapent_flags, int amap_share_flags)
3695 {
3696 	struct vm_map_entry *new_entry, *first, *last;
3697 
3698 	KDASSERT(!UVM_ET_ISSUBMAP(old_entry));
3699 
3700 	/* Create new entry (linked in on creation). Fill in first, last. */
3701 	first = last = NULL;
3702 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) {
3703 		panic("uvmspace_fork: no space in map for "
3704 		    "entry in empty map");
3705 	}
3706 	new_entry = uvm_map_mkentry(dstmap, first, last,
3707 	    dstaddr, dstlen, mapent_flags, dead, NULL);
3708 	if (new_entry == NULL)
3709 		return NULL;
3710 	/* old_entry -> new_entry */
3711 	new_entry->object = old_entry->object;
3712 	new_entry->offset = old_entry->offset;
3713 	new_entry->aref = old_entry->aref;
3714 	new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
3715 	new_entry->protection = prot;
3716 	new_entry->max_protection = maxprot;
3717 	new_entry->inheritance = old_entry->inheritance;
3718 	new_entry->advice = old_entry->advice;
3719 
3720 	/* gain reference to object backing the map (can't be a submap). */
3721 	if (new_entry->aref.ar_amap) {
3722 		new_entry->aref.ar_pageoff += off >> PAGE_SHIFT;
3723 		amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3724 		    (new_entry->end - new_entry->start) >> PAGE_SHIFT,
3725 		    amap_share_flags);
3726 	}
3727 
3728 	if (UVM_ET_ISOBJ(new_entry) &&
3729 	    new_entry->object.uvm_obj->pgops->pgo_reference) {
3730 		new_entry->offset += off;
3731 		new_entry->object.uvm_obj->pgops->pgo_reference
3732 		    (new_entry->object.uvm_obj);
3733 	}
3734 
3735 	return new_entry;
3736 }
3737 
3738 struct vm_map_entry *
3739 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3740     vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map,
3741     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3742 {
3743 	/*
3744 	 * If old_entry refers to a copy-on-write region that has not yet been
3745 	 * written to (needs_copy flag is set), then we need to allocate a new
3746 	 * amap for old_entry.
3747 	 *
3748 	 * If we do not do this, and the process owning old_entry does a copy-on
3749 	 * write later, old_entry and new_entry will refer to different memory
3750 	 * regions, and the memory between the processes is no longer shared.
3751 	 *
3752 	 * [in other words, we need to clear needs_copy]
3753 	 */
3754 
3755 	if (UVM_ET_ISNEEDSCOPY(old_entry)) {
3756 		/* get our own amap, clears needs_copy */
3757 		amap_copy(old_map, old_entry, M_WAITOK, FALSE,
3758 		    0, 0);
3759 		/* XXXCDC: WAITOK??? */
3760 	}
3761 
3762 	return uvm_mapent_clone(dstmap, dstaddr, dstlen, off,
3763 	    prot, maxprot, old_entry, dead, 0, AMAP_SHARED);
3764 }
3765 
3766 /*
3767  * share the mapping: this means we want the old and
3768  * new entries to share amaps and backing objects.
3769  */
3770 struct vm_map_entry *
3771 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
3772     struct vm_map *old_map,
3773     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3774 {
3775 	struct vm_map_entry *new_entry;
3776 
3777 	new_entry = uvm_mapent_share(new_map, old_entry->start,
3778 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3779 	    old_entry->max_protection, old_map, old_entry, dead);
3780 
3781 	/*
3782 	 * pmap_copy the mappings: this routine is optional
3783 	 * but if it is there it will reduce the number of
3784 	 * page faults in the new proc.
3785 	 */
3786 	if (!UVM_ET_ISHOLE(new_entry))
3787 		pmap_copy(new_map->pmap, old_map->pmap, new_entry->start,
3788 		    (new_entry->end - new_entry->start), new_entry->start);
3789 
3790 	return (new_entry);
3791 }
3792 
3793 /*
3794  * copy-on-write the mapping (using mmap's
3795  * MAP_PRIVATE semantics)
3796  *
3797  * allocate new_entry, adjust reference counts.
3798  * (note that new references are read-only).
3799  */
3800 struct vm_map_entry *
3801 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map,
3802     struct vm_map *old_map,
3803     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3804 {
3805 	struct vm_map_entry	*new_entry;
3806 	boolean_t		 protect_child;
3807 
3808 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3809 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3810 	    old_entry->max_protection, old_entry, dead, 0, 0);
3811 
3812 	new_entry->etype |=
3813 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3814 
3815 	/*
3816 	 * the new entry will need an amap.  it will either
3817 	 * need to be copied from the old entry or created
3818 	 * from scratch (if the old entry does not have an
3819 	 * amap).  can we defer this process until later
3820 	 * (by setting "needs_copy") or do we need to copy
3821 	 * the amap now?
3822 	 *
3823 	 * we must copy the amap now if any of the following
3824 	 * conditions hold:
3825 	 * 1. the old entry has an amap and that amap is
3826 	 *    being shared.  this means that the old (parent)
3827 	 *    process is sharing the amap with another
3828 	 *    process.  if we do not clear needs_copy here
3829 	 *    we will end up in a situation where both the
3830 	 *    parent and child process are referring to the
3831 	 *    same amap with "needs_copy" set.  if the
3832 	 *    parent write-faults, the fault routine will
3833 	 *    clear "needs_copy" in the parent by allocating
3834 	 *    a new amap.   this is wrong because the
3835 	 *    parent is supposed to be sharing the old amap
3836 	 *    and the new amap will break that.
3837 	 *
3838 	 * 2. if the old entry has an amap and a non-zero
3839 	 *    wire count then we are going to have to call
3840 	 *    amap_cow_now to avoid page faults in the
3841 	 *    parent process.   since amap_cow_now requires
3842 	 *    "needs_copy" to be clear we might as well
3843 	 *    clear it here as well.
3844 	 *
3845 	 */
3846 	if (old_entry->aref.ar_amap != NULL &&
3847 	    ((amap_flags(old_entry->aref.ar_amap) &
3848 	    AMAP_SHARED) != 0 ||
3849 	    VM_MAPENT_ISWIRED(old_entry))) {
3850 		amap_copy(new_map, new_entry, M_WAITOK, FALSE,
3851 		    0, 0);
3852 		/* XXXCDC: M_WAITOK ... ok? */
3853 	}
3854 
3855 	/*
3856 	 * if the parent's entry is wired down, then the
3857 	 * parent process does not want page faults on
3858 	 * access to that memory.  this means that we
3859 	 * cannot do copy-on-write because we can't write
3860 	 * protect the old entry.   in this case we
3861 	 * resolve all copy-on-write faults now, using
3862 	 * amap_cow_now.   note that we have already
3863 	 * allocated any needed amap (above).
3864 	 */
3865 	if (VM_MAPENT_ISWIRED(old_entry)) {
3866 		/*
3867 		 * resolve all copy-on-write faults now
3868 		 * (note that there is nothing to do if
3869 		 * the old mapping does not have an amap).
3870 		 * XXX: is it worthwhile to bother with
3871 		 * pmap_copy in this case?
3872 		 */
3873 		if (old_entry->aref.ar_amap)
3874 			amap_cow_now(new_map, new_entry);
3875 	} else {
3876 		if (old_entry->aref.ar_amap) {
3877 			/*
3878 			 * setup mappings to trigger copy-on-write faults
3879 			 * we must write-protect the parent if it has
3880 			 * an amap and it is not already "needs_copy"...
3881 			 * if it is already "needs_copy" then the parent
3882 			 * has already been write-protected by a previous
3883 			 * fork operation.
3884 			 *
3885 			 * if we do not write-protect the parent, then
3886 			 * we must be sure to write-protect the child
3887 			 * after the pmap_copy() operation.
3888 			 *
3889 			 * XXX: pmap_copy should have some way of telling
3890 			 * us that it didn't do anything so we can avoid
3891 			 * calling pmap_protect needlessly.
3892 			 */
3893 			if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
3894 				if (old_entry->max_protection & PROT_WRITE) {
3895 					pmap_protect(old_map->pmap,
3896 					    old_entry->start,
3897 					    old_entry->end,
3898 					    old_entry->protection &
3899 					    ~PROT_WRITE);
3900 					pmap_update(old_map->pmap);
3901 				}
3902 				old_entry->etype |= UVM_ET_NEEDSCOPY;
3903 			}
3904 
3905 	  		/* parent must now be write-protected */
3906 	  		protect_child = FALSE;
3907 		} else {
3908 			/*
3909 			 * we only need to protect the child if the
3910 			 * parent has write access.
3911 			 */
3912 			if (old_entry->max_protection & PROT_WRITE)
3913 				protect_child = TRUE;
3914 			else
3915 				protect_child = FALSE;
3916 		}
3917 		/*
3918 		 * copy the mappings
3919 		 * XXX: need a way to tell if this does anything
3920 		 */
3921 		if (!UVM_ET_ISHOLE(new_entry))
3922 			pmap_copy(new_map->pmap, old_map->pmap,
3923 			    new_entry->start,
3924 			    (old_entry->end - old_entry->start),
3925 			    old_entry->start);
3926 
3927 		/* protect the child's mappings if necessary */
3928 		if (protect_child) {
3929 			pmap_protect(new_map->pmap, new_entry->start,
3930 			    new_entry->end,
3931 			    new_entry->protection &
3932 			    ~PROT_WRITE);
3933 		}
3934 	}
3935 
3936 	return (new_entry);
3937 }
3938 
3939 /*
3940  * zero the mapping: the new entry will be zero initialized
3941  */
3942 struct vm_map_entry *
3943 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map,
3944     struct vm_map *old_map,
3945     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3946 {
3947 	struct vm_map_entry *new_entry;
3948 
3949 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3950 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3951 	    old_entry->max_protection, old_entry, dead, 0, 0);
3952 
3953 	new_entry->etype |=
3954 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3955 
3956 	if (new_entry->aref.ar_amap) {
3957 		amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3958 		    atop(new_entry->end - new_entry->start), 0);
3959 		new_entry->aref.ar_amap = NULL;
3960 		new_entry->aref.ar_pageoff = 0;
3961 	}
3962 
3963 	if (UVM_ET_ISOBJ(new_entry)) {
3964 		if (new_entry->object.uvm_obj->pgops->pgo_detach)
3965 			new_entry->object.uvm_obj->pgops->pgo_detach(
3966 			    new_entry->object.uvm_obj);
3967 		new_entry->object.uvm_obj = NULL;
3968 		new_entry->etype &= ~UVM_ET_OBJ;
3969 	}
3970 
3971 	return (new_entry);
3972 }
3973 
3974 /*
3975  * uvmspace_fork: fork a process' main map
3976  *
3977  * => create a new vmspace for child process from parent.
3978  * => parent's map must not be locked.
3979  */
3980 struct vmspace *
3981 uvmspace_fork(struct process *pr)
3982 {
3983 	struct vmspace *vm1 = pr->ps_vmspace;
3984 	struct vmspace *vm2;
3985 	struct vm_map *old_map = &vm1->vm_map;
3986 	struct vm_map *new_map;
3987 	struct vm_map_entry *old_entry, *new_entry;
3988 	struct uvm_map_deadq dead;
3989 
3990 	vm_map_lock(old_map);
3991 
3992 	vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
3993 	    (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
3994 	memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
3995 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
3996 	vm2->vm_dused = 0; /* Statistic managed by us. */
3997 	new_map = &vm2->vm_map;
3998 	vm_map_lock(new_map);
3999 
4000 	/* go entry-by-entry */
4001 	TAILQ_INIT(&dead);
4002 	RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) {
4003 		if (old_entry->start == old_entry->end)
4004 			continue;
4005 
4006 		/* first, some sanity checks on the old entry */
4007 		if (UVM_ET_ISSUBMAP(old_entry)) {
4008 			panic("fork: encountered a submap during fork "
4009 			    "(illegal)");
4010 		}
4011 
4012 		if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
4013 		    UVM_ET_ISNEEDSCOPY(old_entry)) {
4014 			panic("fork: non-copy_on_write map entry marked "
4015 			    "needs_copy (illegal)");
4016 		}
4017 
4018 		/* Apply inheritance. */
4019 		switch (old_entry->inheritance) {
4020 		case MAP_INHERIT_SHARE:
4021 			new_entry = uvm_mapent_forkshared(vm2, new_map,
4022 			    old_map, old_entry, &dead);
4023 			break;
4024 		case MAP_INHERIT_COPY:
4025 			new_entry = uvm_mapent_forkcopy(vm2, new_map,
4026 			    old_map, old_entry, &dead);
4027 			break;
4028 		case MAP_INHERIT_ZERO:
4029 			new_entry = uvm_mapent_forkzero(vm2, new_map,
4030 			    old_map, old_entry, &dead);
4031 			break;
4032 		default:
4033 			continue;
4034 		}
4035 
4036 	 	/* Update process statistics. */
4037 		if (!UVM_ET_ISHOLE(new_entry))
4038 			new_map->size += new_entry->end - new_entry->start;
4039 		if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) {
4040 			vm2->vm_dused += uvmspace_dused(
4041 			    new_map, new_entry->start, new_entry->end);
4042 		}
4043 	}
4044 
4045 	vm_map_unlock(old_map);
4046 	vm_map_unlock(new_map);
4047 
4048 	/*
4049 	 * This can actually happen, if multiple entries described a
4050 	 * space in which an entry was inherited.
4051 	 */
4052 	uvm_unmap_detach(&dead, 0);
4053 
4054 #ifdef SYSVSHM
4055 	if (vm1->vm_shm)
4056 		shmfork(vm1, vm2);
4057 #endif
4058 
4059 	return vm2;
4060 }
4061 
4062 /*
4063  * uvm_map_hint: return the beginning of the best area suitable for
4064  * creating a new mapping with "prot" protection.
4065  */
4066 vaddr_t
4067 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr,
4068     vaddr_t maxaddr)
4069 {
4070 	vaddr_t addr;
4071 	vaddr_t spacing;
4072 
4073 #ifdef __i386__
4074 	/*
4075 	 * If executable skip first two pages, otherwise start
4076 	 * after data + heap region.
4077 	 */
4078 	if ((prot & PROT_EXEC) != 0 &&
4079 	    (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) {
4080 		addr = (PAGE_SIZE*2) +
4081 		    (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
4082 		return (round_page(addr));
4083 	}
4084 #endif
4085 
4086 #if defined (__LP64__)
4087 	spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4088 #else
4089 	spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4090 #endif
4091 
4092 	/*
4093 	 * Start malloc/mmap after the brk.
4094 	 */
4095 	addr = (vaddr_t)vm->vm_daddr + BRKSIZ;
4096 	addr = MAX(addr, minaddr);
4097 
4098 	if (addr < maxaddr) {
4099 		while (spacing > maxaddr - addr)
4100 			spacing >>= 1;
4101 	}
4102 	addr += arc4random() & spacing;
4103 	return (round_page(addr));
4104 }
4105 
4106 /*
4107  * uvm_map_submap: punch down part of a map into a submap
4108  *
4109  * => only the kernel_map is allowed to be submapped
4110  * => the purpose of submapping is to break up the locking granularity
4111  *	of a larger map
4112  * => the range specified must have been mapped previously with a uvm_map()
4113  *	call [with uobj==NULL] to create a blank map entry in the main map.
4114  *	[And it had better still be blank!]
4115  * => maps which contain submaps should never be copied or forked.
4116  * => to remove a submap, use uvm_unmap() on the main map
4117  *	and then uvm_map_deallocate() the submap.
4118  * => main map must be unlocked.
4119  * => submap must have been init'd and have a zero reference count.
4120  *	[need not be locked as we don't actually reference it]
4121  */
4122 int
4123 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
4124     struct vm_map *submap)
4125 {
4126 	struct vm_map_entry *entry;
4127 	int result;
4128 
4129 	if (start > map->max_offset || end > map->max_offset ||
4130 	    start < map->min_offset || end < map->min_offset)
4131 		return EINVAL;
4132 
4133 	vm_map_lock(map);
4134 
4135 	if (uvm_map_lookup_entry(map, start, &entry)) {
4136 		UVM_MAP_CLIP_START(map, entry, start);
4137 		UVM_MAP_CLIP_END(map, entry, end);
4138 	} else
4139 		entry = NULL;
4140 
4141 	if (entry != NULL &&
4142 	    entry->start == start && entry->end == end &&
4143 	    entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
4144 	    !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
4145 		entry->etype |= UVM_ET_SUBMAP;
4146 		entry->object.sub_map = submap;
4147 		entry->offset = 0;
4148 		uvm_map_reference(submap);
4149 		result = 0;
4150 	} else
4151 		result = EINVAL;
4152 
4153 	vm_map_unlock(map);
4154 	return(result);
4155 }
4156 
4157 /*
4158  * uvm_map_checkprot: check protection in map
4159  *
4160  * => must allow specific protection in a fully allocated region.
4161  * => map mut be read or write locked by caller.
4162  */
4163 boolean_t
4164 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
4165     vm_prot_t protection)
4166 {
4167 	struct vm_map_entry *entry;
4168 
4169 	if (start < map->min_offset || end > map->max_offset || start > end)
4170 		return FALSE;
4171 	if (start == end)
4172 		return TRUE;
4173 
4174 	/*
4175 	 * Iterate entries.
4176 	 */
4177 	for (entry = uvm_map_entrybyaddr(&map->addr, start);
4178 	    entry != NULL && entry->start < end;
4179 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4180 		/* Fail if a hole is found. */
4181 		if (UVM_ET_ISHOLE(entry) ||
4182 		    (entry->end < end && entry->end != VMMAP_FREE_END(entry)))
4183 			return FALSE;
4184 
4185 		/* Check protection. */
4186 		if ((entry->protection & protection) != protection)
4187 			return FALSE;
4188 	}
4189 	return TRUE;
4190 }
4191 
4192 /*
4193  * uvm_map_create: create map
4194  */
4195 vm_map_t
4196 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
4197 {
4198 	vm_map_t map;
4199 
4200 	map = malloc(sizeof *map, M_VMMAP, M_WAITOK);
4201 	map->pmap = pmap;
4202 	uvm_map_setup(map, min, max, flags);
4203 	return (map);
4204 }
4205 
4206 /*
4207  * uvm_map_deallocate: drop reference to a map
4208  *
4209  * => caller must not lock map
4210  * => we will zap map if ref count goes to zero
4211  */
4212 void
4213 uvm_map_deallocate(vm_map_t map)
4214 {
4215 	int c;
4216 	struct uvm_map_deadq dead;
4217 
4218 	c = --map->ref_count;
4219 	if (c > 0) {
4220 		return;
4221 	}
4222 
4223 	/*
4224 	 * all references gone.   unmap and free.
4225 	 *
4226 	 * No lock required: we are only one to access this map.
4227 	 */
4228 	TAILQ_INIT(&dead);
4229 	uvm_tree_sanity(map, __FILE__, __LINE__);
4230 	uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead,
4231 	    TRUE, FALSE);
4232 	pmap_destroy(map->pmap);
4233 	KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
4234 	free(map, M_VMMAP, sizeof *map);
4235 
4236 	uvm_unmap_detach(&dead, 0);
4237 }
4238 
4239 /*
4240  * uvm_map_inherit: set inheritance code for range of addrs in map.
4241  *
4242  * => map must be unlocked
4243  * => note that the inherit code is used during a "fork".  see fork
4244  *	code for details.
4245  */
4246 int
4247 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
4248     vm_inherit_t new_inheritance)
4249 {
4250 	struct vm_map_entry *entry;
4251 
4252 	switch (new_inheritance) {
4253 	case MAP_INHERIT_NONE:
4254 	case MAP_INHERIT_COPY:
4255 	case MAP_INHERIT_SHARE:
4256 	case MAP_INHERIT_ZERO:
4257 		break;
4258 	default:
4259 		return (EINVAL);
4260 	}
4261 
4262 	if (start > end)
4263 		return EINVAL;
4264 	start = MAX(start, map->min_offset);
4265 	end = MIN(end, map->max_offset);
4266 	if (start >= end)
4267 		return 0;
4268 
4269 	vm_map_lock(map);
4270 
4271 	entry = uvm_map_entrybyaddr(&map->addr, start);
4272 	if (entry->end > start)
4273 		UVM_MAP_CLIP_START(map, entry, start);
4274 	else
4275 		entry = RBT_NEXT(uvm_map_addr, entry);
4276 
4277 	while (entry != NULL && entry->start < end) {
4278 		UVM_MAP_CLIP_END(map, entry, end);
4279 		entry->inheritance = new_inheritance;
4280 		entry = RBT_NEXT(uvm_map_addr, entry);
4281 	}
4282 
4283 	vm_map_unlock(map);
4284 	return (0);
4285 }
4286 
4287 /*
4288  * uvm_map_advice: set advice code for range of addrs in map.
4289  *
4290  * => map must be unlocked
4291  */
4292 int
4293 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
4294 {
4295 	struct vm_map_entry *entry;
4296 
4297 	switch (new_advice) {
4298 	case MADV_NORMAL:
4299 	case MADV_RANDOM:
4300 	case MADV_SEQUENTIAL:
4301 		break;
4302 	default:
4303 		return (EINVAL);
4304 	}
4305 
4306 	if (start > end)
4307 		return EINVAL;
4308 	start = MAX(start, map->min_offset);
4309 	end = MIN(end, map->max_offset);
4310 	if (start >= end)
4311 		return 0;
4312 
4313 	vm_map_lock(map);
4314 
4315 	entry = uvm_map_entrybyaddr(&map->addr, start);
4316 	if (entry != NULL && entry->end > start)
4317 		UVM_MAP_CLIP_START(map, entry, start);
4318 	else if (entry!= NULL)
4319 		entry = RBT_NEXT(uvm_map_addr, entry);
4320 
4321 	/*
4322 	 * XXXJRT: disallow holes?
4323 	 */
4324 	while (entry != NULL && entry->start < end) {
4325 		UVM_MAP_CLIP_END(map, entry, end);
4326 		entry->advice = new_advice;
4327 		entry = RBT_NEXT(uvm_map_addr, entry);
4328 	}
4329 
4330 	vm_map_unlock(map);
4331 	return (0);
4332 }
4333 
4334 /*
4335  * uvm_map_extract: extract a mapping from a map and put it somewhere
4336  * in the kernel_map, setting protection to max_prot.
4337  *
4338  * => map should be unlocked (we will write lock it and kernel_map)
4339  * => returns 0 on success, error code otherwise
4340  * => start must be page aligned
4341  * => len must be page sized
4342  * => flags:
4343  *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
4344  * Mappings are QREF's.
4345  */
4346 int
4347 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
4348     vaddr_t *dstaddrp, int flags)
4349 {
4350 	struct uvm_map_deadq dead;
4351 	struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2;
4352 	vaddr_t dstaddr;
4353 	vaddr_t end;
4354 	vaddr_t cp_start;
4355 	vsize_t cp_len, cp_off;
4356 	int error;
4357 
4358 	TAILQ_INIT(&dead);
4359 	end = start + len;
4360 
4361 	/*
4362 	 * Sanity check on the parameters.
4363 	 * Also, since the mapping may not contain gaps, error out if the
4364 	 * mapped area is not in source map.
4365 	 */
4366 	if ((start & (vaddr_t)PAGE_MASK) != 0 ||
4367 	    (end & (vaddr_t)PAGE_MASK) != 0 || end < start)
4368 		return EINVAL;
4369 	if (start < srcmap->min_offset || end > srcmap->max_offset)
4370 		return EINVAL;
4371 
4372 	/* Initialize dead entries. Handle len == 0 case. */
4373 	if (len == 0)
4374 		return 0;
4375 
4376 	/* Acquire lock on srcmap. */
4377 	vm_map_lock(srcmap);
4378 
4379 	/* Lock srcmap, lookup first and last entry in <start,len>. */
4380 	first = uvm_map_entrybyaddr(&srcmap->addr, start);
4381 
4382 	/* Check that the range is contiguous. */
4383 	for (entry = first; entry != NULL && entry->end < end;
4384 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4385 		if (VMMAP_FREE_END(entry) != entry->end ||
4386 		    UVM_ET_ISHOLE(entry)) {
4387 			error = EINVAL;
4388 			goto fail;
4389 		}
4390 	}
4391 	if (entry == NULL || UVM_ET_ISHOLE(entry)) {
4392 		error = EINVAL;
4393 		goto fail;
4394 	}
4395 
4396 	/*
4397 	 * Handle need-copy flag.
4398 	 */
4399 	for (entry = first; entry != NULL && entry->start < end;
4400 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4401 		if (UVM_ET_ISNEEDSCOPY(entry))
4402 			amap_copy(srcmap, entry, M_NOWAIT,
4403 			    UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end);
4404 		if (UVM_ET_ISNEEDSCOPY(entry)) {
4405 			/*
4406 			 * amap_copy failure
4407 			 */
4408 			error = ENOMEM;
4409 			goto fail;
4410 		}
4411 	}
4412 
4413 	/* Lock destination map (kernel_map). */
4414 	vm_map_lock(kernel_map);
4415 
4416 	if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len,
4417 	    MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start),
4418 	    PROT_NONE, 0) != 0) {
4419 		error = ENOMEM;
4420 		goto fail2;
4421 	}
4422 	*dstaddrp = dstaddr;
4423 
4424 	/*
4425 	 * We now have srcmap and kernel_map locked.
4426 	 * dstaddr contains the destination offset in dstmap.
4427 	 */
4428 	/* step 1: start looping through map entries, performing extraction. */
4429 	for (entry = first; entry != NULL && entry->start < end;
4430 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4431 		KDASSERT(!UVM_ET_ISNEEDSCOPY(entry));
4432 		if (UVM_ET_ISHOLE(entry))
4433 			continue;
4434 
4435 		/* Calculate uvm_mapent_clone parameters. */
4436 		cp_start = entry->start;
4437 		if (cp_start < start) {
4438 			cp_off = start - cp_start;
4439 			cp_start = start;
4440 		} else
4441 			cp_off = 0;
4442 		cp_len = MIN(entry->end, end) - cp_start;
4443 
4444 		newentry = uvm_mapent_clone(kernel_map,
4445 		    cp_start - start + dstaddr, cp_len, cp_off,
4446 		    entry->protection, entry->max_protection,
4447 		    entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
4448 		if (newentry == NULL) {
4449 			error = ENOMEM;
4450 			goto fail2_unmap;
4451 		}
4452 		kernel_map->size += cp_len;
4453 		if (flags & UVM_EXTRACT_FIXPROT)
4454 			newentry->protection = newentry->max_protection;
4455 
4456 		/*
4457 		 * Step 2: perform pmap copy.
4458 		 * (Doing this in the loop saves one RB traversal.)
4459 		 */
4460 		pmap_copy(kernel_map->pmap, srcmap->pmap,
4461 		    cp_start - start + dstaddr, cp_len, cp_start);
4462 	}
4463 	pmap_update(kernel_map->pmap);
4464 
4465 	error = 0;
4466 
4467 	/* Unmap copied entries on failure. */
4468 fail2_unmap:
4469 	if (error) {
4470 		uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead,
4471 		    FALSE, TRUE);
4472 	}
4473 
4474 	/* Release maps, release dead entries. */
4475 fail2:
4476 	vm_map_unlock(kernel_map);
4477 
4478 fail:
4479 	vm_map_unlock(srcmap);
4480 
4481 	uvm_unmap_detach(&dead, 0);
4482 
4483 	return error;
4484 }
4485 
4486 /*
4487  * uvm_map_clean: clean out a map range
4488  *
4489  * => valid flags:
4490  *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
4491  *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
4492  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
4493  *   if (flags & PGO_FREE): any cached pages are freed after clean
4494  * => returns an error if any part of the specified range isn't mapped
4495  * => never a need to flush amap layer since the anonymous memory has
4496  *	no permanent home, but may deactivate pages there
4497  * => called from sys_msync() and sys_madvise()
4498  * => caller must not write-lock map (read OK).
4499  * => we may sleep while cleaning if SYNCIO [with map read-locked]
4500  */
4501 
4502 int
4503 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
4504 {
4505 	struct vm_map_entry *first, *entry;
4506 	struct vm_amap *amap;
4507 	struct vm_anon *anon;
4508 	struct vm_page *pg;
4509 	struct uvm_object *uobj;
4510 	vaddr_t cp_start, cp_end;
4511 	int refs;
4512 	int error;
4513 	boolean_t rv;
4514 
4515 	KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
4516 	    (PGO_FREE|PGO_DEACTIVATE));
4517 
4518 	if (start > end || start < map->min_offset || end > map->max_offset)
4519 		return EINVAL;
4520 
4521 	vm_map_lock_read(map);
4522 	first = uvm_map_entrybyaddr(&map->addr, start);
4523 
4524 	/* Make a first pass to check for holes. */
4525 	for (entry = first; entry != NULL && entry->start < end;
4526 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4527 		if (UVM_ET_ISSUBMAP(entry)) {
4528 			vm_map_unlock_read(map);
4529 			return EINVAL;
4530 		}
4531 		if (UVM_ET_ISSUBMAP(entry) ||
4532 		    UVM_ET_ISHOLE(entry) ||
4533 		    (entry->end < end &&
4534 		    VMMAP_FREE_END(entry) != entry->end)) {
4535 			vm_map_unlock_read(map);
4536 			return EFAULT;
4537 		}
4538 	}
4539 
4540 	error = 0;
4541 	for (entry = first; entry != NULL && entry->start < end;
4542 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4543 		amap = entry->aref.ar_amap;	/* top layer */
4544 		if (UVM_ET_ISOBJ(entry))
4545 			uobj = entry->object.uvm_obj;
4546 		else
4547 			uobj = NULL;
4548 
4549 		/*
4550 		 * No amap cleaning necessary if:
4551 		 *  - there's no amap
4552 		 *  - we're not deactivating or freeing pages.
4553 		 */
4554 		if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
4555 			goto flush_object;
4556 
4557 		cp_start = MAX(entry->start, start);
4558 		cp_end = MIN(entry->end, end);
4559 
4560 		for (; cp_start != cp_end; cp_start += PAGE_SIZE) {
4561 			anon = amap_lookup(&entry->aref,
4562 			    cp_start - entry->start);
4563 			if (anon == NULL)
4564 				continue;
4565 
4566 			pg = anon->an_page;
4567 			if (pg == NULL) {
4568 				continue;
4569 			}
4570 			KASSERT(pg->pg_flags & PQ_ANON);
4571 
4572 			switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
4573 			/*
4574 			 * XXX In these first 3 cases, we always just
4575 			 * XXX deactivate the page.  We may want to
4576 			 * XXX handle the different cases more
4577 			 * XXX specifically, in the future.
4578 			 */
4579 			case PGO_CLEANIT|PGO_FREE:
4580 			case PGO_CLEANIT|PGO_DEACTIVATE:
4581 			case PGO_DEACTIVATE:
4582 deactivate_it:
4583 				/* skip the page if it's wired */
4584 				if (pg->wire_count != 0)
4585 					break;
4586 
4587 				uvm_lock_pageq();
4588 
4589 				KASSERT(pg->uanon == anon);
4590 
4591 				/* zap all mappings for the page. */
4592 				pmap_page_protect(pg, PROT_NONE);
4593 
4594 				/* ...and deactivate the page. */
4595 				uvm_pagedeactivate(pg);
4596 
4597 				uvm_unlock_pageq();
4598 				break;
4599 			case PGO_FREE:
4600 				/*
4601 				 * If there are multiple references to
4602 				 * the amap, just deactivate the page.
4603 				 */
4604 				if (amap_refs(amap) > 1)
4605 					goto deactivate_it;
4606 
4607 				/* XXX skip the page if it's wired */
4608 				if (pg->wire_count != 0) {
4609 					break;
4610 				}
4611 				amap_unadd(&entry->aref,
4612 				    cp_start - entry->start);
4613 				refs = --anon->an_ref;
4614 				if (refs == 0)
4615 					uvm_anfree(anon);
4616 				break;
4617 			default:
4618 				panic("uvm_map_clean: weird flags");
4619 			}
4620 		}
4621 
4622 flush_object:
4623 		cp_start = MAX(entry->start, start);
4624 		cp_end = MIN(entry->end, end);
4625 
4626 		/*
4627 		 * flush pages if we've got a valid backing object.
4628 		 *
4629 		 * Don't PGO_FREE if we don't have write permission
4630 		 * and don't flush if this is a copy-on-write object
4631 		 * since we can't know our permissions on it.
4632 		 */
4633 		if (uobj != NULL &&
4634 		    ((flags & PGO_FREE) == 0 ||
4635 		     ((entry->max_protection & PROT_WRITE) != 0 &&
4636 		      (entry->etype & UVM_ET_COPYONWRITE) == 0))) {
4637 			rv = uobj->pgops->pgo_flush(uobj,
4638 			    cp_start - entry->start + entry->offset,
4639 			    cp_end - entry->start + entry->offset, flags);
4640 
4641 			if (rv == FALSE)
4642 				error = EFAULT;
4643 		}
4644 	}
4645 
4646 	vm_map_unlock_read(map);
4647 	return error;
4648 }
4649 
4650 /*
4651  * UVM_MAP_CLIP_END implementation
4652  */
4653 void
4654 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4655 {
4656 	struct vm_map_entry *tmp;
4657 
4658 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4659 	tmp = uvm_mapent_alloc(map, 0);
4660 
4661 	/* Invoke splitentry. */
4662 	uvm_map_splitentry(map, entry, tmp, addr);
4663 }
4664 
4665 /*
4666  * UVM_MAP_CLIP_START implementation
4667  *
4668  * Clippers are required to not change the pointers to the entry they are
4669  * clipping on.
4670  * Since uvm_map_splitentry turns the original entry into the lowest
4671  * entry (address wise) we do a swap between the new entry and the original
4672  * entry, prior to calling uvm_map_splitentry.
4673  */
4674 void
4675 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4676 {
4677 	struct vm_map_entry *tmp;
4678 	struct uvm_addr_state *free;
4679 
4680 	/* Unlink original. */
4681 	free = uvm_map_uaddr_e(map, entry);
4682 	uvm_mapent_free_remove(map, free, entry);
4683 	uvm_mapent_addr_remove(map, entry);
4684 
4685 	/* Copy entry. */
4686 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4687 	tmp = uvm_mapent_alloc(map, 0);
4688 	uvm_mapent_copy(entry, tmp);
4689 
4690 	/* Put new entry in place of original entry. */
4691 	uvm_mapent_addr_insert(map, tmp);
4692 	uvm_mapent_free_insert(map, free, tmp);
4693 
4694 	/* Invoke splitentry. */
4695 	uvm_map_splitentry(map, tmp, entry, addr);
4696 }
4697 
4698 /*
4699  * Boundary fixer.
4700  */
4701 static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t);
4702 static __inline vaddr_t
4703 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound)
4704 {
4705 	return (min < bound && max > bound) ? bound : max;
4706 }
4707 
4708 /*
4709  * Choose free list based on address at start of free space.
4710  *
4711  * The uvm_addr_state returned contains addr and is the first of:
4712  * - uaddr_exe
4713  * - uaddr_brk_stack
4714  * - uaddr_any
4715  */
4716 struct uvm_addr_state*
4717 uvm_map_uaddr(struct vm_map *map, vaddr_t addr)
4718 {
4719 	struct uvm_addr_state *uaddr;
4720 	int i;
4721 
4722 	/* Special case the first page, to prevent mmap from returning 0. */
4723 	if (addr < VMMAP_MIN_ADDR)
4724 		return NULL;
4725 
4726 	/* Upper bound for kernel maps at uvm_maxkaddr. */
4727 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
4728 		if (addr >= uvm_maxkaddr)
4729 			return NULL;
4730 	}
4731 
4732 	/* Is the address inside the exe-only map? */
4733 	if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr &&
4734 	    addr < map->uaddr_exe->uaddr_maxaddr)
4735 		return map->uaddr_exe;
4736 
4737 	/* Check if the space falls inside brk/stack area. */
4738 	if ((addr >= map->b_start && addr < map->b_end) ||
4739 	    (addr >= map->s_start && addr < map->s_end)) {
4740 		if (map->uaddr_brk_stack != NULL &&
4741 		    addr >= map->uaddr_brk_stack->uaddr_minaddr &&
4742 		    addr < map->uaddr_brk_stack->uaddr_maxaddr) {
4743 			return map->uaddr_brk_stack;
4744 		} else
4745 			return NULL;
4746 	}
4747 
4748 	/*
4749 	 * Check the other selectors.
4750 	 *
4751 	 * These selectors are only marked as the owner, if they have insert
4752 	 * functions.
4753 	 */
4754 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4755 		uaddr = map->uaddr_any[i];
4756 		if (uaddr == NULL)
4757 			continue;
4758 		if (uaddr->uaddr_functions->uaddr_free_insert == NULL)
4759 			continue;
4760 
4761 		if (addr >= uaddr->uaddr_minaddr &&
4762 		    addr < uaddr->uaddr_maxaddr)
4763 			return uaddr;
4764 	}
4765 
4766 	return NULL;
4767 }
4768 
4769 /*
4770  * Choose free list based on address at start of free space.
4771  *
4772  * The uvm_addr_state returned contains addr and is the first of:
4773  * - uaddr_exe
4774  * - uaddr_brk_stack
4775  * - uaddr_any
4776  */
4777 struct uvm_addr_state*
4778 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry)
4779 {
4780 	return uvm_map_uaddr(map, VMMAP_FREE_START(entry));
4781 }
4782 
4783 /*
4784  * Returns the first free-memory boundary that is crossed by [min-max].
4785  */
4786 vsize_t
4787 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max)
4788 {
4789 	struct uvm_addr_state	*uaddr;
4790 	int			 i;
4791 
4792 	/* Never return first page. */
4793 	max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR);
4794 
4795 	/* Treat the maxkaddr special, if the map is a kernel_map. */
4796 	if ((map->flags & VM_MAP_ISVMSPACE) == 0)
4797 		max = uvm_map_boundfix(min, max, uvm_maxkaddr);
4798 
4799 	/* Check for exe-only boundaries. */
4800 	if (map->uaddr_exe != NULL) {
4801 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr);
4802 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr);
4803 	}
4804 
4805 	/* Check for exe-only boundaries. */
4806 	if (map->uaddr_brk_stack != NULL) {
4807 		max = uvm_map_boundfix(min, max,
4808 		    map->uaddr_brk_stack->uaddr_minaddr);
4809 		max = uvm_map_boundfix(min, max,
4810 		    map->uaddr_brk_stack->uaddr_maxaddr);
4811 	}
4812 
4813 	/* Check other boundaries. */
4814 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4815 		uaddr = map->uaddr_any[i];
4816 		if (uaddr != NULL) {
4817 			max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr);
4818 			max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr);
4819 		}
4820 	}
4821 
4822 	/* Boundaries at stack and brk() area. */
4823 	max = uvm_map_boundfix(min, max, map->s_start);
4824 	max = uvm_map_boundfix(min, max, map->s_end);
4825 	max = uvm_map_boundfix(min, max, map->b_start);
4826 	max = uvm_map_boundfix(min, max, map->b_end);
4827 
4828 	return max;
4829 }
4830 
4831 /*
4832  * Update map allocation start and end addresses from proc vmspace.
4833  */
4834 void
4835 uvm_map_vmspace_update(struct vm_map *map,
4836     struct uvm_map_deadq *dead, int flags)
4837 {
4838 	struct vmspace *vm;
4839 	vaddr_t b_start, b_end, s_start, s_end;
4840 
4841 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
4842 	KASSERT(offsetof(struct vmspace, vm_map) == 0);
4843 
4844 	/*
4845 	 * Derive actual allocation boundaries from vmspace.
4846 	 */
4847 	vm = (struct vmspace *)map;
4848 	b_start = (vaddr_t)vm->vm_daddr;
4849 	b_end   = b_start + BRKSIZ;
4850 	s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4851 	s_end   = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4852 #ifdef DIAGNOSTIC
4853 	if ((b_start & (vaddr_t)PAGE_MASK) != 0 ||
4854 	    (b_end & (vaddr_t)PAGE_MASK) != 0 ||
4855 	    (s_start & (vaddr_t)PAGE_MASK) != 0 ||
4856 	    (s_end & (vaddr_t)PAGE_MASK) != 0) {
4857 		panic("uvm_map_vmspace_update: vmspace %p invalid bounds: "
4858 		    "b=0x%lx-0x%lx s=0x%lx-0x%lx",
4859 		    vm, b_start, b_end, s_start, s_end);
4860 	}
4861 #endif
4862 
4863 	if (__predict_true(map->b_start == b_start && map->b_end == b_end &&
4864 	    map->s_start == s_start && map->s_end == s_end))
4865 		return;
4866 
4867 	uvm_map_freelist_update(map, dead, b_start, b_end,
4868 	    s_start, s_end, flags);
4869 }
4870 
4871 /*
4872  * Grow kernel memory.
4873  *
4874  * This function is only called for kernel maps when an allocation fails.
4875  *
4876  * If the map has a gap that is large enough to accommodate alloc_sz, this
4877  * function will make sure map->free will include it.
4878  */
4879 void
4880 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead,
4881     vsize_t alloc_sz, int flags)
4882 {
4883 	vsize_t sz;
4884 	vaddr_t end;
4885 	struct vm_map_entry *entry;
4886 
4887 	/* Kernel memory only. */
4888 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0);
4889 	/* Destroy free list. */
4890 	uvm_map_freelist_update_clear(map, dead);
4891 
4892 	/* Include the guard page in the hard minimum requirement of alloc_sz. */
4893 	if (map->flags & VM_MAP_GUARDPAGES)
4894 		alloc_sz += PAGE_SIZE;
4895 
4896 	/*
4897 	 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA.
4898 	 *
4899 	 * Don't handle the case where the multiplication overflows:
4900 	 * if that happens, the allocation is probably too big anyway.
4901 	 */
4902 	sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA);
4903 
4904 	/*
4905 	 * Walk forward until a gap large enough for alloc_sz shows up.
4906 	 *
4907 	 * We assume the kernel map has no boundaries.
4908 	 * uvm_maxkaddr may be zero.
4909 	 */
4910 	end = MAX(uvm_maxkaddr, map->min_offset);
4911 	entry = uvm_map_entrybyaddr(&map->addr, end);
4912 	while (entry && entry->fspace < alloc_sz)
4913 		entry = RBT_NEXT(uvm_map_addr, entry);
4914 	if (entry) {
4915 		end = MAX(VMMAP_FREE_START(entry), end);
4916 		end += MIN(sz, map->max_offset - end);
4917 	} else
4918 		end = map->max_offset;
4919 
4920 	/* Reserve pmap entries. */
4921 #ifdef PMAP_GROWKERNEL
4922 	uvm_maxkaddr = pmap_growkernel(end);
4923 #else
4924 	uvm_maxkaddr = MAX(uvm_maxkaddr, end);
4925 #endif
4926 
4927 	/* Rebuild free list. */
4928 	uvm_map_freelist_update_refill(map, flags);
4929 }
4930 
4931 /*
4932  * Freelist update subfunction: unlink all entries from freelists.
4933  */
4934 void
4935 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead)
4936 {
4937 	struct uvm_addr_state *free;
4938 	struct vm_map_entry *entry, *prev, *next;
4939 
4940 	prev = NULL;
4941 	for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL;
4942 	    entry = next) {
4943 		next = RBT_NEXT(uvm_map_addr, entry);
4944 
4945 		free = uvm_map_uaddr_e(map, entry);
4946 		uvm_mapent_free_remove(map, free, entry);
4947 
4948 		if (prev != NULL && entry->start == entry->end) {
4949 			prev->fspace += VMMAP_FREE_END(entry) - entry->end;
4950 			uvm_mapent_addr_remove(map, entry);
4951 			DEAD_ENTRY_PUSH(dead, entry);
4952 		} else
4953 			prev = entry;
4954 	}
4955 }
4956 
4957 /*
4958  * Freelist update subfunction: refill the freelists with entries.
4959  */
4960 void
4961 uvm_map_freelist_update_refill(struct vm_map *map, int flags)
4962 {
4963 	struct vm_map_entry *entry;
4964 	vaddr_t min, max;
4965 
4966 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
4967 		min = VMMAP_FREE_START(entry);
4968 		max = VMMAP_FREE_END(entry);
4969 		entry->fspace = 0;
4970 
4971 		entry = uvm_map_fix_space(map, entry, min, max, flags);
4972 	}
4973 
4974 	uvm_tree_sanity(map, __FILE__, __LINE__);
4975 }
4976 
4977 /*
4978  * Change {a,b}_{start,end} allocation ranges and associated free lists.
4979  */
4980 void
4981 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead,
4982     vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags)
4983 {
4984 	KDASSERT(b_end >= b_start && s_end >= s_start);
4985 
4986 	/* Clear all free lists. */
4987 	uvm_map_freelist_update_clear(map, dead);
4988 
4989 	/* Apply new bounds. */
4990 	map->b_start = b_start;
4991 	map->b_end   = b_end;
4992 	map->s_start = s_start;
4993 	map->s_end   = s_end;
4994 
4995 	/* Refill free lists. */
4996 	uvm_map_freelist_update_refill(map, flags);
4997 }
4998 
4999 /*
5000  * Assign a uvm_addr_state to the specified pointer in vm_map.
5001  *
5002  * May sleep.
5003  */
5004 void
5005 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which,
5006     struct uvm_addr_state *newval)
5007 {
5008 	struct uvm_map_deadq dead;
5009 
5010 	/* Pointer which must be in this map. */
5011 	KASSERT(which != NULL);
5012 	KASSERT((void*)map <= (void*)(which) &&
5013 	    (void*)(which) < (void*)(map + 1));
5014 
5015 	vm_map_lock(map);
5016 	TAILQ_INIT(&dead);
5017 	uvm_map_freelist_update_clear(map, &dead);
5018 
5019 	uvm_addr_destroy(*which);
5020 	*which = newval;
5021 
5022 	uvm_map_freelist_update_refill(map, 0);
5023 	vm_map_unlock(map);
5024 	uvm_unmap_detach(&dead, 0);
5025 }
5026 
5027 /*
5028  * Correct space insert.
5029  *
5030  * Entry must not be on any freelist.
5031  */
5032 struct vm_map_entry*
5033 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry,
5034     vaddr_t min, vaddr_t max, int flags)
5035 {
5036 	struct uvm_addr_state	*free, *entfree;
5037 	vaddr_t			 lmax;
5038 
5039 	KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0);
5040 	KDASSERT(min <= max);
5041 	KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) ||
5042 	    min == map->min_offset);
5043 
5044 	/*
5045 	 * During the function, entfree will always point at the uaddr state
5046 	 * for entry.
5047 	 */
5048 	entfree = (entry == NULL ? NULL :
5049 	    uvm_map_uaddr_e(map, entry));
5050 
5051 	while (min != max) {
5052 		/* Claim guard page for entry. */
5053 		if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL &&
5054 		    VMMAP_FREE_END(entry) == entry->end &&
5055 		    entry->start != entry->end) {
5056 			if (max - min == 2 * PAGE_SIZE) {
5057 				/*
5058 				 * If the free-space gap is exactly 2 pages,
5059 				 * we make the guard 2 pages instead of 1.
5060 				 * Because in a guarded map, an area needs
5061 				 * at least 2 pages to allocate from:
5062 				 * one page for the allocation and one for
5063 				 * the guard.
5064 				 */
5065 				entry->guard = 2 * PAGE_SIZE;
5066 				min = max;
5067 			} else {
5068 				entry->guard = PAGE_SIZE;
5069 				min += PAGE_SIZE;
5070 			}
5071 			continue;
5072 		}
5073 
5074 		/*
5075 		 * Handle the case where entry has a 2-page guard, but the
5076 		 * space after entry is freed.
5077 		 */
5078 		if (entry != NULL && entry->fspace == 0 &&
5079 		    entry->guard > PAGE_SIZE) {
5080 			entry->guard = PAGE_SIZE;
5081 			min = VMMAP_FREE_START(entry);
5082 		}
5083 
5084 		lmax = uvm_map_boundary(map, min, max);
5085 		free = uvm_map_uaddr(map, min);
5086 
5087 		/*
5088 		 * Entries are merged if they point at the same uvm_free().
5089 		 * Exception to that rule: if min == uvm_maxkaddr, a new
5090 		 * entry is started regardless (otherwise the allocators
5091 		 * will get confused).
5092 		 */
5093 		if (entry != NULL && free == entfree &&
5094 		    !((map->flags & VM_MAP_ISVMSPACE) == 0 &&
5095 		    min == uvm_maxkaddr)) {
5096 			KDASSERT(VMMAP_FREE_END(entry) == min);
5097 			entry->fspace += lmax - min;
5098 		} else {
5099 			/*
5100 			 * Commit entry to free list: it'll not be added to
5101 			 * anymore.
5102 			 * We'll start a new entry and add to that entry
5103 			 * instead.
5104 			 */
5105 			if (entry != NULL)
5106 				uvm_mapent_free_insert(map, entfree, entry);
5107 
5108 			/* New entry for new uaddr. */
5109 			entry = uvm_mapent_alloc(map, flags);
5110 			KDASSERT(entry != NULL);
5111 			entry->end = entry->start = min;
5112 			entry->guard = 0;
5113 			entry->fspace = lmax - min;
5114 			entry->object.uvm_obj = NULL;
5115 			entry->offset = 0;
5116 			entry->etype = 0;
5117 			entry->protection = entry->max_protection = 0;
5118 			entry->inheritance = 0;
5119 			entry->wired_count = 0;
5120 			entry->advice = 0;
5121 			entry->aref.ar_pageoff = 0;
5122 			entry->aref.ar_amap = NULL;
5123 			uvm_mapent_addr_insert(map, entry);
5124 
5125 			entfree = free;
5126 		}
5127 
5128 		min = lmax;
5129 	}
5130 	/* Finally put entry on the uaddr state. */
5131 	if (entry != NULL)
5132 		uvm_mapent_free_insert(map, entfree, entry);
5133 
5134 	return entry;
5135 }
5136 
5137 /*
5138  * MQuery style of allocation.
5139  *
5140  * This allocator searches forward until sufficient space is found to map
5141  * the given size.
5142  *
5143  * XXX: factor in offset (via pmap_prefer) and protection?
5144  */
5145 int
5146 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset,
5147     int flags)
5148 {
5149 	struct vm_map_entry *entry, *last;
5150 	vaddr_t addr;
5151 	vaddr_t tmp, pmap_align, pmap_offset;
5152 	int error;
5153 
5154 	addr = *addr_p;
5155 	vm_map_lock_read(map);
5156 
5157 	/* Configure pmap prefer. */
5158 	if (offset != UVM_UNKNOWN_OFFSET) {
5159 		pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN());
5160 		pmap_offset = PMAP_PREFER_OFFSET(offset);
5161 	} else {
5162 		pmap_align = PAGE_SIZE;
5163 		pmap_offset = 0;
5164 	}
5165 
5166 	/* Align address to pmap_prefer unless FLAG_FIXED is set. */
5167 	if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) {
5168 	  	tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5169 		if (tmp < addr)
5170 			tmp += pmap_align;
5171 		addr = tmp;
5172 	}
5173 
5174 	/* First, check if the requested range is fully available. */
5175 	entry = uvm_map_entrybyaddr(&map->addr, addr);
5176 	last = NULL;
5177 	if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5178 		error = 0;
5179 		goto out;
5180 	}
5181 	if (flags & UVM_FLAG_FIXED) {
5182 		error = EINVAL;
5183 		goto out;
5184 	}
5185 
5186 	error = ENOMEM; /* Default error from here. */
5187 
5188 	/*
5189 	 * At this point, the memory at <addr, sz> is not available.
5190 	 * The reasons are:
5191 	 * [1] it's outside the map,
5192 	 * [2] it starts in used memory (and therefore needs to move
5193 	 *     toward the first free page in entry),
5194 	 * [3] it starts in free memory but bumps into used memory.
5195 	 *
5196 	 * Note that for case [2], the forward moving is handled by the
5197 	 * for loop below.
5198 	 */
5199 	if (entry == NULL) {
5200 		/* [1] Outside the map. */
5201 		if (addr >= map->max_offset)
5202 			goto out;
5203 		else
5204 			entry = RBT_MIN(uvm_map_addr, &map->addr);
5205 	} else if (VMMAP_FREE_START(entry) <= addr) {
5206 		/* [3] Bumped into used memory. */
5207 		entry = RBT_NEXT(uvm_map_addr, entry);
5208 	}
5209 
5210 	/* Test if the next entry is sufficient for the allocation. */
5211 	for (; entry != NULL;
5212 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
5213 		if (entry->fspace == 0)
5214 			continue;
5215 		addr = VMMAP_FREE_START(entry);
5216 
5217 restart:	/* Restart address checks on address change. */
5218 		tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5219 		if (tmp < addr)
5220 			tmp += pmap_align;
5221 		addr = tmp;
5222 		if (addr >= VMMAP_FREE_END(entry))
5223 			continue;
5224 
5225 		/* Skip brk() allocation addresses. */
5226 		if (addr + sz > map->b_start && addr < map->b_end) {
5227 			if (VMMAP_FREE_END(entry) > map->b_end) {
5228 				addr = map->b_end;
5229 				goto restart;
5230 			} else
5231 				continue;
5232 		}
5233 		/* Skip stack allocation addresses. */
5234 		if (addr + sz > map->s_start && addr < map->s_end) {
5235 			if (VMMAP_FREE_END(entry) > map->s_end) {
5236 				addr = map->s_end;
5237 				goto restart;
5238 			} else
5239 				continue;
5240 		}
5241 
5242 		last = NULL;
5243 		if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5244 			error = 0;
5245 			goto out;
5246 		}
5247 	}
5248 
5249 out:
5250 	vm_map_unlock_read(map);
5251 	if (error == 0)
5252 		*addr_p = addr;
5253 	return error;
5254 }
5255 
5256 /*
5257  * Determine allocation bias.
5258  *
5259  * Returns 1 if we should bias to high addresses, -1 for a bias towards low
5260  * addresses, or 0 for no bias.
5261  * The bias mechanism is intended to avoid clashing with brk() and stack
5262  * areas.
5263  */
5264 int
5265 uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry)
5266 {
5267 	vaddr_t start, end;
5268 
5269 	start = VMMAP_FREE_START(entry);
5270 	end = VMMAP_FREE_END(entry);
5271 
5272 	/* Stay at the top of brk() area. */
5273 	if (end >= map->b_start && start < map->b_end)
5274 		return 1;
5275 	/* Stay at the far end of the stack area. */
5276 	if (end >= map->s_start && start < map->s_end) {
5277 #ifdef MACHINE_STACK_GROWS_UP
5278 		return 1;
5279 #else
5280 		return -1;
5281 #endif
5282 	}
5283 
5284 	/* No bias, this area is meant for us. */
5285 	return 0;
5286 }
5287 
5288 
5289 boolean_t
5290 vm_map_lock_try_ln(struct vm_map *map, char *file, int line)
5291 {
5292 	boolean_t rv;
5293 
5294 	if (map->flags & VM_MAP_INTRSAFE) {
5295 		rv = mtx_enter_try(&map->mtx);
5296 	} else {
5297 		mtx_enter(&map->flags_lock);
5298 		if (map->flags & VM_MAP_BUSY) {
5299 			mtx_leave(&map->flags_lock);
5300 			return (FALSE);
5301 		}
5302 		mtx_leave(&map->flags_lock);
5303 		rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0);
5304 		/* check if the lock is busy and back out if we won the race */
5305 		if (rv) {
5306 			mtx_enter(&map->flags_lock);
5307 			if (map->flags & VM_MAP_BUSY) {
5308 				rw_exit(&map->lock);
5309 				rv = FALSE;
5310 			}
5311 			mtx_leave(&map->flags_lock);
5312 		}
5313 	}
5314 
5315 	if (rv) {
5316 		map->timestamp++;
5317 		LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5318 		uvm_tree_sanity(map, file, line);
5319 		uvm_tree_size_chk(map, file, line);
5320 	}
5321 
5322 	return (rv);
5323 }
5324 
5325 void
5326 vm_map_lock_ln(struct vm_map *map, char *file, int line)
5327 {
5328 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5329 		do {
5330 			mtx_enter(&map->flags_lock);
5331 tryagain:
5332 			while (map->flags & VM_MAP_BUSY) {
5333 				map->flags |= VM_MAP_WANTLOCK;
5334 				msleep(&map->flags, &map->flags_lock,
5335 				    PVM, vmmapbsy, 0);
5336 			}
5337 			mtx_leave(&map->flags_lock);
5338 		} while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0);
5339 		/* check if the lock is busy and back out if we won the race */
5340 		mtx_enter(&map->flags_lock);
5341 		if (map->flags & VM_MAP_BUSY) {
5342 			rw_exit(&map->lock);
5343 			goto tryagain;
5344 		}
5345 		mtx_leave(&map->flags_lock);
5346 	} else {
5347 		mtx_enter(&map->mtx);
5348 	}
5349 
5350 	map->timestamp++;
5351 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5352 	uvm_tree_sanity(map, file, line);
5353 	uvm_tree_size_chk(map, file, line);
5354 }
5355 
5356 void
5357 vm_map_lock_read_ln(struct vm_map *map, char *file, int line)
5358 {
5359 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5360 		rw_enter_read(&map->lock);
5361 	else
5362 		mtx_enter(&map->mtx);
5363 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5364 	uvm_tree_sanity(map, file, line);
5365 	uvm_tree_size_chk(map, file, line);
5366 }
5367 
5368 void
5369 vm_map_unlock_ln(struct vm_map *map, char *file, int line)
5370 {
5371 	uvm_tree_sanity(map, file, line);
5372 	uvm_tree_size_chk(map, file, line);
5373 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5374 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5375 		rw_exit(&map->lock);
5376 	else
5377 		mtx_leave(&map->mtx);
5378 }
5379 
5380 void
5381 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line)
5382 {
5383 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
5384 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
5385 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5386 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5387 		rw_exit_read(&map->lock);
5388 	else
5389 		mtx_leave(&map->mtx);
5390 }
5391 
5392 void
5393 vm_map_downgrade_ln(struct vm_map *map, char *file, int line)
5394 {
5395 	uvm_tree_sanity(map, file, line);
5396 	uvm_tree_size_chk(map, file, line);
5397 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5398 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5399 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5400 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5401 		rw_enter(&map->lock, RW_DOWNGRADE);
5402 }
5403 
5404 void
5405 vm_map_upgrade_ln(struct vm_map *map, char *file, int line)
5406 {
5407 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
5408 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
5409 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5410 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5411 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5412 		rw_exit_read(&map->lock);
5413 		rw_enter_write(&map->lock);
5414 	}
5415 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5416 	uvm_tree_sanity(map, file, line);
5417 }
5418 
5419 void
5420 vm_map_busy_ln(struct vm_map *map, char *file, int line)
5421 {
5422 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5423 	mtx_enter(&map->flags_lock);
5424 	map->flags |= VM_MAP_BUSY;
5425 	mtx_leave(&map->flags_lock);
5426 }
5427 
5428 void
5429 vm_map_unbusy_ln(struct vm_map *map, char *file, int line)
5430 {
5431 	int oflags;
5432 
5433 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5434 	mtx_enter(&map->flags_lock);
5435 	oflags = map->flags;
5436 	map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK);
5437 	mtx_leave(&map->flags_lock);
5438 	if (oflags & VM_MAP_WANTLOCK)
5439 		wakeup(&map->flags);
5440 }
5441 
5442 #ifndef SMALL_KERNEL
5443 int
5444 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve,
5445     size_t *lenp)
5446 {
5447 	struct vm_map_entry *entry;
5448 	vaddr_t start;
5449 	int cnt, maxcnt, error = 0;
5450 
5451 	KASSERT(*lenp > 0);
5452 	KASSERT((*lenp % sizeof(*kve)) == 0);
5453 	cnt = 0;
5454 	maxcnt = *lenp / sizeof(*kve);
5455 	KASSERT(maxcnt > 0);
5456 
5457 	/*
5458 	 * Return only entries whose address is above the given base
5459 	 * address.  This allows userland to iterate without knowing the
5460 	 * number of entries beforehand.
5461 	 */
5462 	start = (vaddr_t)kve[0].kve_start;
5463 
5464 	vm_map_lock(map);
5465 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
5466 		if (cnt == maxcnt) {
5467 			error = ENOMEM;
5468 			break;
5469 		}
5470 		if (start != 0 && entry->start < start)
5471 			continue;
5472 		kve->kve_start = entry->start;
5473 		kve->kve_end = entry->end;
5474 		kve->kve_guard = entry->guard;
5475 		kve->kve_fspace = entry->fspace;
5476 		kve->kve_fspace_augment = entry->fspace_augment;
5477 		kve->kve_offset = entry->offset;
5478 		kve->kve_wired_count = entry->wired_count;
5479 		kve->kve_etype = entry->etype;
5480 		kve->kve_protection = entry->protection;
5481 		kve->kve_max_protection = entry->max_protection;
5482 		kve->kve_advice = entry->advice;
5483 		kve->kve_inheritance = entry->inheritance;
5484 		kve->kve_flags = entry->flags;
5485 		kve++;
5486 		cnt++;
5487 	}
5488 	vm_map_unlock(map);
5489 
5490 	KASSERT(cnt <= maxcnt);
5491 
5492 	*lenp = sizeof(*kve) * cnt;
5493 	return error;
5494 }
5495 #endif
5496 
5497 
5498 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry,
5499     uvm_mapentry_addrcmp, uvm_map_addr_augment);
5500 
5501 
5502 /*
5503  * MD code: vmspace allocator setup.
5504  */
5505 
5506 #ifdef __i386__
5507 void
5508 uvm_map_setup_md(struct vm_map *map)
5509 {
5510 	vaddr_t		min, max;
5511 
5512 	min = map->min_offset;
5513 	max = map->max_offset;
5514 
5515 	/*
5516 	 * Ensure the selectors will not try to manage page 0;
5517 	 * it's too special.
5518 	 */
5519 	if (min < VMMAP_MIN_ADDR)
5520 		min = VMMAP_MIN_ADDR;
5521 
5522 #if 0	/* Cool stuff, not yet */
5523 	/* Executable code is special. */
5524 	map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR);
5525 	/* Place normal allocations beyond executable mappings. */
5526 	map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max);
5527 #else	/* Crappy stuff, for now */
5528 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5529 #endif
5530 
5531 #ifndef SMALL_KERNEL
5532 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5533 #endif /* !SMALL_KERNEL */
5534 }
5535 #elif __LP64__
5536 void
5537 uvm_map_setup_md(struct vm_map *map)
5538 {
5539 	vaddr_t		min, max;
5540 
5541 	min = map->min_offset;
5542 	max = map->max_offset;
5543 
5544 	/*
5545 	 * Ensure the selectors will not try to manage page 0;
5546 	 * it's too special.
5547 	 */
5548 	if (min < VMMAP_MIN_ADDR)
5549 		min = VMMAP_MIN_ADDR;
5550 
5551 #if 0	/* Cool stuff, not yet */
5552 	map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max);
5553 #else	/* Crappy stuff, for now */
5554 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5555 #endif
5556 
5557 #ifndef SMALL_KERNEL
5558 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5559 #endif /* !SMALL_KERNEL */
5560 }
5561 #else	/* non-i386, 32 bit */
5562 void
5563 uvm_map_setup_md(struct vm_map *map)
5564 {
5565 	vaddr_t		min, max;
5566 
5567 	min = map->min_offset;
5568 	max = map->max_offset;
5569 
5570 	/*
5571 	 * Ensure the selectors will not try to manage page 0;
5572 	 * it's too special.
5573 	 */
5574 	if (min < VMMAP_MIN_ADDR)
5575 		min = VMMAP_MIN_ADDR;
5576 
5577 #if 0	/* Cool stuff, not yet */
5578 	map->uaddr_any[3] = uaddr_pivot_create(min, max);
5579 #else	/* Crappy stuff, for now */
5580 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5581 #endif
5582 
5583 #ifndef SMALL_KERNEL
5584 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5585 #endif /* !SMALL_KERNEL */
5586 }
5587 #endif
5588