xref: /openbsd-src/sys/uvm/uvm_map.c (revision 9f11ffb7133c203312a01e4b986886bc88c7d74b)
1 /*	$OpenBSD: uvm_map.c,v 1.240 2019/02/10 16:42:35 phessler Exp $	*/
2 /*	$NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $	*/
3 
4 /*
5  * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  *
19  *
20  * Copyright (c) 1997 Charles D. Cranor and Washington University.
21  * Copyright (c) 1991, 1993, The Regents of the University of California.
22  *
23  * All rights reserved.
24  *
25  * This code is derived from software contributed to Berkeley by
26  * The Mach Operating System project at Carnegie-Mellon University.
27  *
28  * Redistribution and use in source and binary forms, with or without
29  * modification, are permitted provided that the following conditions
30  * are met:
31  * 1. Redistributions of source code must retain the above copyright
32  *    notice, this list of conditions and the following disclaimer.
33  * 2. Redistributions in binary form must reproduce the above copyright
34  *    notice, this list of conditions and the following disclaimer in the
35  *    documentation and/or other materials provided with the distribution.
36  * 3. Neither the name of the University nor the names of its contributors
37  *    may be used to endorse or promote products derived from this software
38  *    without specific prior written permission.
39  *
40  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  *	@(#)vm_map.c    8.3 (Berkeley) 1/12/94
53  * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
54  *
55  *
56  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
57  * All rights reserved.
58  *
59  * Permission to use, copy, modify and distribute this software and
60  * its documentation is hereby granted, provided that both the copyright
61  * notice and this permission notice appear in all copies of the
62  * software, derivative works or modified versions, and any portions
63  * thereof, and that both notices appear in supporting documentation.
64  *
65  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
66  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
67  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
68  *
69  * Carnegie Mellon requests users of this software to return to
70  *
71  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
72  *  School of Computer Science
73  *  Carnegie Mellon University
74  *  Pittsburgh PA 15213-3890
75  *
76  * any improvements or extensions that they make and grant Carnegie the
77  * rights to redistribute these changes.
78  */
79 
80 /*
81  * uvm_map.c: uvm map operations
82  */
83 
84 /* #define DEBUG */
85 /* #define VMMAP_DEBUG */
86 
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/mman.h>
90 #include <sys/proc.h>
91 #include <sys/malloc.h>
92 #include <sys/pool.h>
93 #include <sys/sysctl.h>
94 #include <sys/syslog.h>
95 
96 #ifdef SYSVSHM
97 #include <sys/shm.h>
98 #endif
99 
100 #include <uvm/uvm.h>
101 
102 #ifdef DDB
103 #include <uvm/uvm_ddb.h>
104 #endif
105 
106 #include <uvm/uvm_addr.h>
107 
108 
109 vsize_t			 uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t);
110 int			 uvm_mapent_isjoinable(struct vm_map*,
111 			    struct vm_map_entry*, struct vm_map_entry*);
112 struct vm_map_entry	*uvm_mapent_merge(struct vm_map*, struct vm_map_entry*,
113 			    struct vm_map_entry*, struct uvm_map_deadq*);
114 struct vm_map_entry	*uvm_mapent_tryjoin(struct vm_map*,
115 			    struct vm_map_entry*, struct uvm_map_deadq*);
116 struct vm_map_entry	*uvm_map_mkentry(struct vm_map*, struct vm_map_entry*,
117 			    struct vm_map_entry*, vaddr_t, vsize_t, int,
118 			    struct uvm_map_deadq*, struct vm_map_entry*);
119 struct vm_map_entry	*uvm_mapent_alloc(struct vm_map*, int);
120 void			 uvm_mapent_free(struct vm_map_entry*);
121 void			 uvm_unmap_kill_entry(struct vm_map*,
122 			    struct vm_map_entry*);
123 void			 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *);
124 void			 uvm_mapent_mkfree(struct vm_map*,
125 			    struct vm_map_entry*, struct vm_map_entry**,
126 			    struct uvm_map_deadq*, boolean_t);
127 void			 uvm_map_pageable_pgon(struct vm_map*,
128 			    struct vm_map_entry*, struct vm_map_entry*,
129 			    vaddr_t, vaddr_t);
130 int			 uvm_map_pageable_wire(struct vm_map*,
131 			    struct vm_map_entry*, struct vm_map_entry*,
132 			    vaddr_t, vaddr_t, int);
133 void			 uvm_map_setup_entries(struct vm_map*);
134 void			 uvm_map_setup_md(struct vm_map*);
135 void			 uvm_map_teardown(struct vm_map*);
136 void			 uvm_map_vmspace_update(struct vm_map*,
137 			    struct uvm_map_deadq*, int);
138 void			 uvm_map_kmem_grow(struct vm_map*,
139 			    struct uvm_map_deadq*, vsize_t, int);
140 void			 uvm_map_freelist_update_clear(struct vm_map*,
141 			    struct uvm_map_deadq*);
142 void			 uvm_map_freelist_update_refill(struct vm_map *, int);
143 void			 uvm_map_freelist_update(struct vm_map*,
144 			    struct uvm_map_deadq*, vaddr_t, vaddr_t,
145 			    vaddr_t, vaddr_t, int);
146 struct vm_map_entry	*uvm_map_fix_space(struct vm_map*, struct vm_map_entry*,
147 			    vaddr_t, vaddr_t, int);
148 int			 uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int,
149 			    struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t,
150 			    int);
151 int			 uvm_map_findspace(struct vm_map*,
152 			    struct vm_map_entry**, struct vm_map_entry**,
153 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
154 			    vaddr_t);
155 vsize_t			 uvm_map_addr_augment_get(struct vm_map_entry*);
156 void			 uvm_map_addr_augment(struct vm_map_entry*);
157 
158 /*
159  * Tree management functions.
160  */
161 
162 static __inline void	 uvm_mapent_copy(struct vm_map_entry*,
163 			    struct vm_map_entry*);
164 static inline int	 uvm_mapentry_addrcmp(const struct vm_map_entry*,
165 			    const struct vm_map_entry*);
166 void			 uvm_mapent_free_insert(struct vm_map*,
167 			    struct uvm_addr_state*, struct vm_map_entry*);
168 void			 uvm_mapent_free_remove(struct vm_map*,
169 			    struct uvm_addr_state*, struct vm_map_entry*);
170 void			 uvm_mapent_addr_insert(struct vm_map*,
171 			    struct vm_map_entry*);
172 void			 uvm_mapent_addr_remove(struct vm_map*,
173 			    struct vm_map_entry*);
174 void			 uvm_map_splitentry(struct vm_map*,
175 			    struct vm_map_entry*, struct vm_map_entry*,
176 			    vaddr_t);
177 vsize_t			 uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t);
178 int			 uvm_mapent_bias(struct vm_map*, struct vm_map_entry*);
179 
180 /*
181  * uvm_vmspace_fork helper functions.
182  */
183 struct vm_map_entry	*uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
184 			    vsize_t, vm_prot_t, vm_prot_t,
185 			    struct vm_map_entry*, struct uvm_map_deadq*, int,
186 			    int);
187 struct vm_map_entry	*uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t,
188 			    vsize_t, vm_prot_t, vm_prot_t, struct vm_map*,
189 			    struct vm_map_entry*, struct uvm_map_deadq*);
190 struct vm_map_entry	*uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
191 			    struct vm_map*, struct vm_map_entry*,
192 			    struct uvm_map_deadq*);
193 struct vm_map_entry	*uvm_mapent_forkcopy(struct vmspace*, struct vm_map*,
194 			    struct vm_map*, struct vm_map_entry*,
195 			    struct uvm_map_deadq*);
196 struct vm_map_entry	*uvm_mapent_forkzero(struct vmspace*, struct vm_map*,
197 			    struct vm_map*, struct vm_map_entry*,
198 			    struct uvm_map_deadq*);
199 
200 /*
201  * Tree validation.
202  */
203 #ifdef VMMAP_DEBUG
204 void			 uvm_tree_assert(struct vm_map*, int, char*,
205 			    char*, int);
206 #define UVM_ASSERT(map, cond, file, line)				\
207 	uvm_tree_assert((map), (cond), #cond, (file), (line))
208 void			 uvm_tree_sanity(struct vm_map*, char*, int);
209 void			 uvm_tree_size_chk(struct vm_map*, char*, int);
210 void			 vmspace_validate(struct vm_map*);
211 #else
212 #define uvm_tree_sanity(_map, _file, _line)		do {} while (0)
213 #define uvm_tree_size_chk(_map, _file, _line)		do {} while (0)
214 #define vmspace_validate(_map)				do {} while (0)
215 #endif
216 
217 /*
218  * All architectures will have pmap_prefer.
219  */
220 #ifndef PMAP_PREFER
221 #define PMAP_PREFER_ALIGN()	(vaddr_t)PAGE_SIZE
222 #define PMAP_PREFER_OFFSET(off)	0
223 #define PMAP_PREFER(addr, off)	(addr)
224 #endif
225 
226 
227 /*
228  * The kernel map will initially be VM_MAP_KSIZE_INIT bytes.
229  * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes.
230  *
231  * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size
232  * each time.
233  */
234 #define VM_MAP_KSIZE_INIT	(512 * (vaddr_t)PAGE_SIZE)
235 #define VM_MAP_KSIZE_DELTA	(256 * (vaddr_t)PAGE_SIZE)
236 #define VM_MAP_KSIZE_ALLOCMUL	4
237 /*
238  * When selecting a random free-space block, look at most FSPACE_DELTA blocks
239  * ahead.
240  */
241 #define FSPACE_DELTA		8
242 /*
243  * Put allocations adjecent to previous allocations when the free-space tree
244  * is larger than FSPACE_COMPACT entries.
245  *
246  * Alignment and PMAP_PREFER may still cause the entry to not be fully
247  * adjecent. Note that this strategy reduces memory fragmentation (by leaving
248  * a large space before or after the allocation).
249  */
250 #define FSPACE_COMPACT		128
251 /*
252  * Make the address selection skip at most this many bytes from the start of
253  * the free space in which the allocation takes place.
254  *
255  * The main idea behind a randomized address space is that an attacker cannot
256  * know where to target his attack. Therefore, the location of objects must be
257  * as random as possible. However, the goal is not to create the most sparse
258  * map that is possible.
259  * FSPACE_MAXOFF pushes the considered range in bytes down to less insane
260  * sizes, thereby reducing the sparseness. The biggest randomization comes
261  * from fragmentation, i.e. FSPACE_COMPACT.
262  */
263 #define FSPACE_MAXOFF		((vaddr_t)32 * 1024 * 1024)
264 /*
265  * Allow for small gaps in the overflow areas.
266  * Gap size is in bytes and does not have to be a multiple of page-size.
267  */
268 #define FSPACE_BIASGAP		((vaddr_t)32 * 1024)
269 
270 /* auto-allocate address lower bound */
271 #define VMMAP_MIN_ADDR		PAGE_SIZE
272 
273 
274 #ifdef DEADBEEF0
275 #define UVMMAP_DEADBEEF		((unsigned long)DEADBEEF0)
276 #else
277 #define UVMMAP_DEADBEEF		((unsigned long)0xdeadd0d0)
278 #endif
279 
280 #ifdef DEBUG
281 int uvm_map_printlocks = 0;
282 
283 #define LPRINTF(_args)							\
284 	do {								\
285 		if (uvm_map_printlocks)					\
286 			printf _args;					\
287 	} while (0)
288 #else
289 #define LPRINTF(_args)	do {} while (0)
290 #endif
291 
292 static struct mutex uvm_kmapent_mtx;
293 static struct timeval uvm_kmapent_last_warn_time;
294 static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
295 
296 const char vmmapbsy[] = "vmmapbsy";
297 
298 /*
299  * pool for vmspace structures.
300  */
301 struct pool uvm_vmspace_pool;
302 
303 /*
304  * pool for dynamically-allocated map entries.
305  */
306 struct pool uvm_map_entry_pool;
307 struct pool uvm_map_entry_kmem_pool;
308 
309 /*
310  * This global represents the end of the kernel virtual address
311  * space. If we want to exceed this, we must grow the kernel
312  * virtual address space dynamically.
313  *
314  * Note, this variable is locked by kernel_map's lock.
315  */
316 vaddr_t uvm_maxkaddr;
317 
318 /*
319  * Locking predicate.
320  */
321 #define UVM_MAP_REQ_WRITE(_map)						\
322 	do {								\
323 		if ((_map)->ref_count > 0) {				\
324 			if (((_map)->flags & VM_MAP_INTRSAFE) == 0)	\
325 				rw_assert_wrlock(&(_map)->lock);	\
326 			else						\
327 				MUTEX_ASSERT_LOCKED(&(_map)->mtx);	\
328 		}							\
329 	} while (0)
330 
331 /*
332  * Tree describing entries by address.
333  *
334  * Addresses are unique.
335  * Entries with start == end may only exist if they are the first entry
336  * (sorted by address) within a free-memory tree.
337  */
338 
339 static inline int
340 uvm_mapentry_addrcmp(const struct vm_map_entry *e1,
341     const struct vm_map_entry *e2)
342 {
343 	return e1->start < e2->start ? -1 : e1->start > e2->start;
344 }
345 
346 /*
347  * Copy mapentry.
348  */
349 static __inline void
350 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
351 {
352 	caddr_t csrc, cdst;
353 	size_t sz;
354 
355 	csrc = (caddr_t)src;
356 	cdst = (caddr_t)dst;
357 	csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
358 	cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
359 
360 	sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) -
361 	    offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
362 	memcpy(cdst, csrc, sz);
363 }
364 
365 /*
366  * Handle free-list insertion.
367  */
368 void
369 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr,
370     struct vm_map_entry *entry)
371 {
372 	const struct uvm_addr_functions *fun;
373 #ifdef VMMAP_DEBUG
374 	vaddr_t min, max, bound;
375 #endif
376 
377 #ifdef VMMAP_DEBUG
378 	/*
379 	 * Boundary check.
380 	 * Boundaries are folded if they go on the same free list.
381 	 */
382 	min = VMMAP_FREE_START(entry);
383 	max = VMMAP_FREE_END(entry);
384 
385 	while (min < max) {
386 		bound = uvm_map_boundary(map, min, max);
387 		KASSERT(uvm_map_uaddr(map, min) == uaddr);
388 		min = bound;
389 	}
390 #endif
391 	KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0);
392 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0);
393 
394 	UVM_MAP_REQ_WRITE(map);
395 
396 	/* Actual insert: forward to uaddr pointer. */
397 	if (uaddr != NULL) {
398 		fun = uaddr->uaddr_functions;
399 		KDASSERT(fun != NULL);
400 		if (fun->uaddr_free_insert != NULL)
401 			(*fun->uaddr_free_insert)(map, uaddr, entry);
402 		entry->etype |= UVM_ET_FREEMAPPED;
403 	}
404 
405 	/* Update fspace augmentation. */
406 	uvm_map_addr_augment(entry);
407 }
408 
409 /*
410  * Handle free-list removal.
411  */
412 void
413 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr,
414     struct vm_map_entry *entry)
415 {
416 	const struct uvm_addr_functions *fun;
417 
418 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL);
419 	KASSERT(uvm_map_uaddr_e(map, entry) == uaddr);
420 	UVM_MAP_REQ_WRITE(map);
421 
422 	if (uaddr != NULL) {
423 		fun = uaddr->uaddr_functions;
424 		if (fun->uaddr_free_remove != NULL)
425 			(*fun->uaddr_free_remove)(map, uaddr, entry);
426 		entry->etype &= ~UVM_ET_FREEMAPPED;
427 	}
428 }
429 
430 /*
431  * Handle address tree insertion.
432  */
433 void
434 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry)
435 {
436 	struct vm_map_entry *res;
437 
438 	if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF))
439 		panic("uvm_mapent_addr_insert: entry still in addr list");
440 	KDASSERT(entry->start <= entry->end);
441 	KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&
442 	    (entry->end & (vaddr_t)PAGE_MASK) == 0);
443 
444 	UVM_MAP_REQ_WRITE(map);
445 	res = RBT_INSERT(uvm_map_addr, &map->addr, entry);
446 	if (res != NULL) {
447 		panic("uvm_mapent_addr_insert: map %p entry %p "
448 		    "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision "
449 		    "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)",
450 		    map, entry,
451 		    entry->start, entry->end, entry->guard, entry->fspace,
452 		    res, res->start, res->end, res->guard, res->fspace);
453 	}
454 }
455 
456 /*
457  * Handle address tree removal.
458  */
459 void
460 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)
461 {
462 	struct vm_map_entry *res;
463 
464 	UVM_MAP_REQ_WRITE(map);
465 	res = RBT_REMOVE(uvm_map_addr, &map->addr, entry);
466 	if (res != entry)
467 		panic("uvm_mapent_addr_remove");
468 	RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF);
469 }
470 
471 /*
472  * uvm_map_reference: add reference to a map
473  *
474  * XXX check map reference counter lock
475  */
476 #define uvm_map_reference(_map)						\
477 	do {								\
478 		map->ref_count++;					\
479 	} while (0)
480 
481 /*
482  * Calculate the dused delta.
483  */
484 vsize_t
485 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max)
486 {
487 	struct vmspace *vm;
488 	vsize_t sz;
489 	vaddr_t lmax;
490 	vaddr_t stack_begin, stack_end; /* Position of stack. */
491 
492 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
493 	vm = (struct vmspace *)map;
494 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
495 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
496 
497 	sz = 0;
498 	while (min != max) {
499 		lmax = max;
500 		if (min < stack_begin && lmax > stack_begin)
501 			lmax = stack_begin;
502 		else if (min < stack_end && lmax > stack_end)
503 			lmax = stack_end;
504 
505 		if (min >= stack_begin && min < stack_end) {
506 			/* nothing */
507 		} else
508 			sz += lmax - min;
509 		min = lmax;
510 	}
511 
512 	return sz >> PAGE_SHIFT;
513 }
514 
515 /*
516  * Find the entry describing the given address.
517  */
518 struct vm_map_entry*
519 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr)
520 {
521 	struct vm_map_entry *iter;
522 
523 	iter = RBT_ROOT(uvm_map_addr, atree);
524 	while (iter != NULL) {
525 		if (iter->start > addr)
526 			iter = RBT_LEFT(uvm_map_addr, iter);
527 		else if (VMMAP_FREE_END(iter) <= addr)
528 			iter = RBT_RIGHT(uvm_map_addr, iter);
529 		else
530 			return iter;
531 	}
532 	return NULL;
533 }
534 
535 /*
536  * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry)
537  *
538  * Push dead entries into a linked list.
539  * Since the linked list abuses the address tree for storage, the entry
540  * may not be linked in a map.
541  *
542  * *head must be initialized to NULL before the first call to this macro.
543  * uvm_unmap_detach(*head, 0) will remove dead entries.
544  */
545 static __inline void
546 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry)
547 {
548 	TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq);
549 }
550 #define DEAD_ENTRY_PUSH(_headptr, _entry)				\
551 	dead_entry_push((_headptr), (_entry))
552 
553 /*
554  * Helper function for uvm_map_findspace_tree.
555  *
556  * Given allocation constraints and pmap constraints, finds the
557  * lowest and highest address in a range that can be used for the
558  * allocation.
559  *
560  * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs.
561  *
562  *
563  * Big chunk of math with a seasoning of dragons.
564  */
565 int
566 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg,
567     struct vm_map_entry *sel, vaddr_t align,
568     vaddr_t pmap_align, vaddr_t pmap_off, int bias)
569 {
570 	vaddr_t sel_min, sel_max;
571 #ifdef PMAP_PREFER
572 	vaddr_t pmap_min, pmap_max;
573 #endif /* PMAP_PREFER */
574 #ifdef DIAGNOSTIC
575 	int bad;
576 #endif /* DIAGNOSTIC */
577 
578 	sel_min = VMMAP_FREE_START(sel);
579 	sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0);
580 
581 #ifdef PMAP_PREFER
582 
583 	/*
584 	 * There are two special cases, in which we can satisfy the align
585 	 * requirement and the pmap_prefer requirement.
586 	 * - when pmap_off == 0, we always select the largest of the two
587 	 * - when pmap_off % align == 0 and pmap_align > align, we simply
588 	 *   satisfy the pmap_align requirement and automatically
589 	 *   satisfy the align requirement.
590 	 */
591 	if (align > PAGE_SIZE &&
592 	    !(pmap_align > align && (pmap_off & (align - 1)) == 0)) {
593 		/*
594 		 * Simple case: only use align.
595 		 */
596 		sel_min = roundup(sel_min, align);
597 		sel_max &= ~(align - 1);
598 
599 		if (sel_min > sel_max)
600 			return ENOMEM;
601 
602 		/* Correct for bias. */
603 		if (sel_max - sel_min > FSPACE_BIASGAP) {
604 			if (bias > 0) {
605 				sel_min = sel_max - FSPACE_BIASGAP;
606 				sel_min = roundup(sel_min, align);
607 			} else if (bias < 0) {
608 				sel_max = sel_min + FSPACE_BIASGAP;
609 				sel_max &= ~(align - 1);
610 			}
611 		}
612 	} else if (pmap_align != 0) {
613 		/*
614 		 * Special case: satisfy both pmap_prefer and
615 		 * align argument.
616 		 */
617 		pmap_max = sel_max & ~(pmap_align - 1);
618 		pmap_min = sel_min;
619 		if (pmap_max < sel_min)
620 			return ENOMEM;
621 
622 		/* Adjust pmap_min for BIASGAP for top-addr bias. */
623 		if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP)
624 			pmap_min = pmap_max - FSPACE_BIASGAP;
625 		/* Align pmap_min. */
626 		pmap_min &= ~(pmap_align - 1);
627 		if (pmap_min < sel_min)
628 			pmap_min += pmap_align;
629 		if (pmap_min > pmap_max)
630 			return ENOMEM;
631 
632 		/* Adjust pmap_max for BIASGAP for bottom-addr bias. */
633 		if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) {
634 			pmap_max = (pmap_min + FSPACE_BIASGAP) &
635 			    ~(pmap_align - 1);
636 		}
637 		if (pmap_min > pmap_max)
638 			return ENOMEM;
639 
640 		/* Apply pmap prefer offset. */
641 		pmap_max |= pmap_off;
642 		if (pmap_max > sel_max)
643 			pmap_max -= pmap_align;
644 		pmap_min |= pmap_off;
645 		if (pmap_min < sel_min)
646 			pmap_min += pmap_align;
647 
648 		/*
649 		 * Fixup: it's possible that pmap_min and pmap_max
650 		 * cross eachother. In this case, try to find one
651 		 * address that is allowed.
652 		 * (This usually happens in biased case.)
653 		 */
654 		if (pmap_min > pmap_max) {
655 			if (pmap_min < sel_max)
656 				pmap_max = pmap_min;
657 			else if (pmap_max > sel_min)
658 				pmap_min = pmap_max;
659 			else
660 				return ENOMEM;
661 		}
662 
663 		/* Internal validation. */
664 		KDASSERT(pmap_min <= pmap_max);
665 
666 		sel_min = pmap_min;
667 		sel_max = pmap_max;
668 	} else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
669 		sel_min = sel_max - FSPACE_BIASGAP;
670 	else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
671 		sel_max = sel_min + FSPACE_BIASGAP;
672 
673 #else
674 
675 	if (align > PAGE_SIZE) {
676 		sel_min = roundup(sel_min, align);
677 		sel_max &= ~(align - 1);
678 		if (sel_min > sel_max)
679 			return ENOMEM;
680 
681 		if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) {
682 			if (bias > 0) {
683 				sel_min = roundup(sel_max - FSPACE_BIASGAP,
684 				    align);
685 			} else {
686 				sel_max = (sel_min + FSPACE_BIASGAP) &
687 				    ~(align - 1);
688 			}
689 		}
690 	} else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
691 		sel_min = sel_max - FSPACE_BIASGAP;
692 	else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
693 		sel_max = sel_min + FSPACE_BIASGAP;
694 
695 #endif
696 
697 	if (sel_min > sel_max)
698 		return ENOMEM;
699 
700 #ifdef DIAGNOSTIC
701 	bad = 0;
702 	/* Lower boundary check. */
703 	if (sel_min < VMMAP_FREE_START(sel)) {
704 		printf("sel_min: 0x%lx, but should be at least 0x%lx\n",
705 		    sel_min, VMMAP_FREE_START(sel));
706 		bad++;
707 	}
708 	/* Upper boundary check. */
709 	if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) {
710 		printf("sel_max: 0x%lx, but should be at most 0x%lx\n",
711 		    sel_max,
712 		    VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0));
713 		bad++;
714 	}
715 	/* Lower boundary alignment. */
716 	if (align != 0 && (sel_min & (align - 1)) != 0) {
717 		printf("sel_min: 0x%lx, not aligned to 0x%lx\n",
718 		    sel_min, align);
719 		bad++;
720 	}
721 	/* Upper boundary alignment. */
722 	if (align != 0 && (sel_max & (align - 1)) != 0) {
723 		printf("sel_max: 0x%lx, not aligned to 0x%lx\n",
724 		    sel_max, align);
725 		bad++;
726 	}
727 	/* Lower boundary PMAP_PREFER check. */
728 	if (pmap_align != 0 && align == 0 &&
729 	    (sel_min & (pmap_align - 1)) != pmap_off) {
730 		printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
731 		    sel_min, sel_min & (pmap_align - 1), pmap_off);
732 		bad++;
733 	}
734 	/* Upper boundary PMAP_PREFER check. */
735 	if (pmap_align != 0 && align == 0 &&
736 	    (sel_max & (pmap_align - 1)) != pmap_off) {
737 		printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
738 		    sel_max, sel_max & (pmap_align - 1), pmap_off);
739 		bad++;
740 	}
741 
742 	if (bad) {
743 		panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, "
744 		    "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, "
745 		    "bias = %d, "
746 		    "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)",
747 		    sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off,
748 		    bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel));
749 	}
750 #endif /* DIAGNOSTIC */
751 
752 	*min = sel_min;
753 	*max = sel_max;
754 	return 0;
755 }
756 
757 /*
758  * Test if memory starting at addr with sz bytes is free.
759  *
760  * Fills in *start_ptr and *end_ptr to be the first and last entry describing
761  * the space.
762  * If called with prefilled *start_ptr and *end_ptr, they are to be correct.
763  */
764 int
765 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr,
766     struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr,
767     vaddr_t addr, vsize_t sz)
768 {
769 	struct uvm_addr_state *free;
770 	struct uvm_map_addr *atree;
771 	struct vm_map_entry *i, *i_end;
772 
773 	if (addr + sz < addr)
774 		return 0;
775 
776 	/*
777 	 * Kernel memory above uvm_maxkaddr is considered unavailable.
778 	 */
779 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
780 		if (addr + sz > uvm_maxkaddr)
781 			return 0;
782 	}
783 
784 	atree = &map->addr;
785 
786 	/*
787 	 * Fill in first, last, so they point at the entries containing the
788 	 * first and last address of the range.
789 	 * Note that if they are not NULL, we don't perform the lookup.
790 	 */
791 	KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL);
792 	if (*start_ptr == NULL) {
793 		*start_ptr = uvm_map_entrybyaddr(atree, addr);
794 		if (*start_ptr == NULL)
795 			return 0;
796 	} else
797 		KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr));
798 	if (*end_ptr == NULL) {
799 		if (VMMAP_FREE_END(*start_ptr) >= addr + sz)
800 			*end_ptr = *start_ptr;
801 		else {
802 			*end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1);
803 			if (*end_ptr == NULL)
804 				return 0;
805 		}
806 	} else
807 		KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1));
808 
809 	/* Validation. */
810 	KDASSERT(*start_ptr != NULL && *end_ptr != NULL);
811 	KDASSERT((*start_ptr)->start <= addr &&
812 	    VMMAP_FREE_END(*start_ptr) > addr &&
813 	    (*end_ptr)->start < addr + sz &&
814 	    VMMAP_FREE_END(*end_ptr) >= addr + sz);
815 
816 	/*
817 	 * Check the none of the entries intersects with <addr, addr+sz>.
818 	 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is
819 	 * considered unavailable unless called by those allocators.
820 	 */
821 	i = *start_ptr;
822 	i_end = RBT_NEXT(uvm_map_addr, *end_ptr);
823 	for (; i != i_end;
824 	    i = RBT_NEXT(uvm_map_addr, i)) {
825 		if (i->start != i->end && i->end > addr)
826 			return 0;
827 
828 		/*
829 		 * uaddr_exe and uaddr_brk_stack may only be used
830 		 * by these allocators and the NULL uaddr (i.e. no
831 		 * uaddr).
832 		 * Reject if this requirement is not met.
833 		 */
834 		if (uaddr != NULL) {
835 			free = uvm_map_uaddr_e(map, i);
836 
837 			if (uaddr != free && free != NULL &&
838 			    (free == map->uaddr_exe ||
839 			     free == map->uaddr_brk_stack))
840 				return 0;
841 		}
842 	}
843 
844 	return -1;
845 }
846 
847 /*
848  * Invoke each address selector until an address is found.
849  * Will not invoke uaddr_exe.
850  */
851 int
852 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first,
853     struct vm_map_entry**last, vaddr_t *addr, vsize_t sz,
854     vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint)
855 {
856 	struct uvm_addr_state *uaddr;
857 	int i;
858 
859 	/*
860 	 * Allocation for sz bytes at any address,
861 	 * using the addr selectors in order.
862 	 */
863 	for (i = 0; i < nitems(map->uaddr_any); i++) {
864 		uaddr = map->uaddr_any[i];
865 
866 		if (uvm_addr_invoke(map, uaddr, first, last,
867 		    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
868 			return 0;
869 	}
870 
871 	/* Fall back to brk() and stack() address selectors. */
872 	uaddr = map->uaddr_brk_stack;
873 	if (uvm_addr_invoke(map, uaddr, first, last,
874 	    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
875 		return 0;
876 
877 	return ENOMEM;
878 }
879 
880 /* Calculate entry augmentation value. */
881 vsize_t
882 uvm_map_addr_augment_get(struct vm_map_entry *entry)
883 {
884 	vsize_t			 augment;
885 	struct vm_map_entry	*left, *right;
886 
887 	augment = entry->fspace;
888 	if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL)
889 		augment = MAX(augment, left->fspace_augment);
890 	if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
891 		augment = MAX(augment, right->fspace_augment);
892 	return augment;
893 }
894 
895 /*
896  * Update augmentation data in entry.
897  */
898 void
899 uvm_map_addr_augment(struct vm_map_entry *entry)
900 {
901 	vsize_t			 augment;
902 
903 	while (entry != NULL) {
904 		/* Calculate value for augmentation. */
905 		augment = uvm_map_addr_augment_get(entry);
906 
907 		/*
908 		 * Descend update.
909 		 * Once we find an entry that already has the correct value,
910 		 * stop, since it means all its parents will use the correct
911 		 * value too.
912 		 */
913 		if (entry->fspace_augment == augment)
914 			return;
915 		entry->fspace_augment = augment;
916 		entry = RBT_PARENT(uvm_map_addr, entry);
917 	}
918 }
919 
920 /*
921  * uvm_mapanon: establish a valid mapping in map for an anon
922  *
923  * => *addr and sz must be a multiple of PAGE_SIZE.
924  * => *addr is ignored, except if flags contains UVM_FLAG_FIXED.
925  * => map must be unlocked.
926  *
927  * => align: align vaddr, must be a power-of-2.
928  *    Align is only a hint and will be ignored if the alignment fails.
929  */
930 int
931 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz,
932     vsize_t align, unsigned int flags)
933 {
934 	struct vm_map_entry	*first, *last, *entry, *new;
935 	struct uvm_map_deadq	 dead;
936 	vm_prot_t		 prot;
937 	vm_prot_t		 maxprot;
938 	vm_inherit_t		 inherit;
939 	int			 advice;
940 	int			 error;
941 	vaddr_t			 pmap_align, pmap_offset;
942 	vaddr_t			 hint;
943 
944 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE);
945 	KASSERT(map != kernel_map);
946 	KASSERT((map->flags & UVM_FLAG_HOLE) == 0);
947 
948 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
949 	splassert(IPL_NONE);
950 
951 	/*
952 	 * We use pmap_align and pmap_offset as alignment and offset variables.
953 	 *
954 	 * Because the align parameter takes precedence over pmap prefer,
955 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
956 	 * if pmap_prefer will not align.
957 	 */
958 	pmap_align = MAX(align, PAGE_SIZE);
959 	pmap_offset = 0;
960 
961 	/* Decode parameters. */
962 	prot = UVM_PROTECTION(flags);
963 	maxprot = UVM_MAXPROTECTION(flags);
964 	advice = UVM_ADVICE(flags);
965 	inherit = UVM_INHERIT(flags);
966 	error = 0;
967 	hint = trunc_page(*addr);
968 	TAILQ_INIT(&dead);
969 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
970 	KASSERT((align & (align - 1)) == 0);
971 
972 	/* Check protection. */
973 	if ((prot & maxprot) != prot)
974 		return EACCES;
975 
976 	/*
977 	 * Before grabbing the lock, allocate a map entry for later
978 	 * use to ensure we don't wait for memory while holding the
979 	 * vm_map_lock.
980 	 */
981 	new = uvm_mapent_alloc(map, flags);
982 	if (new == NULL)
983 		return(ENOMEM);
984 
985 	if (flags & UVM_FLAG_TRYLOCK) {
986 		if (vm_map_lock_try(map) == FALSE) {
987 			error = EFAULT;
988 			goto out;
989 		}
990 	} else
991 		vm_map_lock(map);
992 
993 	first = last = NULL;
994 	if (flags & UVM_FLAG_FIXED) {
995 		/*
996 		 * Fixed location.
997 		 *
998 		 * Note: we ignore align, pmap_prefer.
999 		 * Fill in first, last and *addr.
1000 		 */
1001 		KASSERT((*addr & PAGE_MASK) == 0);
1002 
1003 		/* Check that the space is available. */
1004 		if (flags & UVM_FLAG_UNMAP) {
1005 			if ((flags & UVM_FLAG_STACK) &&
1006 			    !uvm_map_is_stack_remappable(map, *addr, sz)) {
1007 				error = EINVAL;
1008 				goto unlock;
1009 			}
1010 			uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE);
1011 		}
1012 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1013 			error = ENOMEM;
1014 			goto unlock;
1015 		}
1016 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1017 	    (align == 0 || (*addr & (align - 1)) == 0) &&
1018 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1019 		/*
1020 		 * Address used as hint.
1021 		 *
1022 		 * Note: we enforce the alignment restriction,
1023 		 * but ignore pmap_prefer.
1024 		 */
1025 	} else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1026 		/* Run selection algorithm for executables. */
1027 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1028 		    addr, sz, pmap_align, pmap_offset, prot, hint);
1029 
1030 		if (error != 0)
1031 			goto unlock;
1032 	} else {
1033 		/* Update freelists from vmspace. */
1034 		uvm_map_vmspace_update(map, &dead, flags);
1035 
1036 		error = uvm_map_findspace(map, &first, &last, addr, sz,
1037 		    pmap_align, pmap_offset, prot, hint);
1038 
1039 		if (error != 0)
1040 			goto unlock;
1041 	}
1042 
1043 	/* Double-check if selected address doesn't cause overflow. */
1044 	if (*addr + sz < *addr) {
1045 		error = ENOMEM;
1046 		goto unlock;
1047 	}
1048 
1049 	/* If we only want a query, return now. */
1050 	if (flags & UVM_FLAG_QUERY) {
1051 		error = 0;
1052 		goto unlock;
1053 	}
1054 
1055 	/*
1056 	 * Create new entry.
1057 	 * first and last may be invalidated after this call.
1058 	 */
1059 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1060 	    new);
1061 	if (entry == NULL) {
1062 		error = ENOMEM;
1063 		goto unlock;
1064 	}
1065 	new = NULL;
1066 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1067 	entry->object.uvm_obj = NULL;
1068 	entry->offset = 0;
1069 	entry->protection = prot;
1070 	entry->max_protection = maxprot;
1071 	entry->inheritance = inherit;
1072 	entry->wired_count = 0;
1073 	entry->advice = advice;
1074 	if (flags & UVM_FLAG_STACK) {
1075 		entry->etype |= UVM_ET_STACK;
1076 		if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP))
1077 			map->serial++;
1078 	}
1079 	if (flags & UVM_FLAG_COPYONW) {
1080 		entry->etype |= UVM_ET_COPYONWRITE;
1081 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1082 			entry->etype |= UVM_ET_NEEDSCOPY;
1083 	}
1084 	if (flags & UVM_FLAG_OVERLAY) {
1085 		KERNEL_LOCK();
1086 		entry->aref.ar_pageoff = 0;
1087 		entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1088 		KERNEL_UNLOCK();
1089 	}
1090 
1091 	/* Update map and process statistics. */
1092 	map->size += sz;
1093 	((struct vmspace *)map)->vm_dused += uvmspace_dused(map, *addr, *addr + sz);
1094 
1095 unlock:
1096 	vm_map_unlock(map);
1097 
1098 	/*
1099 	 * Remove dead entries.
1100 	 *
1101 	 * Dead entries may be the result of merging.
1102 	 * uvm_map_mkentry may also create dead entries, when it attempts to
1103 	 * destroy free-space entries.
1104 	 */
1105 	uvm_unmap_detach(&dead, 0);
1106 out:
1107 	if (new)
1108 		uvm_mapent_free(new);
1109 	return error;
1110 }
1111 
1112 /*
1113  * uvm_map: establish a valid mapping in map
1114  *
1115  * => *addr and sz must be a multiple of PAGE_SIZE.
1116  * => map must be unlocked.
1117  * => <uobj,uoffset> value meanings (4 cases):
1118  *	[1] <NULL,uoffset>		== uoffset is a hint for PMAP_PREFER
1119  *	[2] <NULL,UVM_UNKNOWN_OFFSET>	== don't PMAP_PREFER
1120  *	[3] <uobj,uoffset>		== normal mapping
1121  *	[4] <uobj,UVM_UNKNOWN_OFFSET>	== uvm_map finds offset based on VA
1122  *
1123  *   case [4] is for kernel mappings where we don't know the offset until
1124  *   we've found a virtual address.   note that kernel object offsets are
1125  *   always relative to vm_map_min(kernel_map).
1126  *
1127  * => align: align vaddr, must be a power-of-2.
1128  *    Align is only a hint and will be ignored if the alignment fails.
1129  */
1130 int
1131 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,
1132     struct uvm_object *uobj, voff_t uoffset,
1133     vsize_t align, unsigned int flags)
1134 {
1135 	struct vm_map_entry	*first, *last, *entry, *new;
1136 	struct uvm_map_deadq	 dead;
1137 	vm_prot_t		 prot;
1138 	vm_prot_t		 maxprot;
1139 	vm_inherit_t		 inherit;
1140 	int			 advice;
1141 	int			 error;
1142 	vaddr_t			 pmap_align, pmap_offset;
1143 	vaddr_t			 hint;
1144 
1145 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
1146 		splassert(IPL_NONE);
1147 	else
1148 		splassert(IPL_VM);
1149 
1150 	/*
1151 	 * We use pmap_align and pmap_offset as alignment and offset variables.
1152 	 *
1153 	 * Because the align parameter takes precedence over pmap prefer,
1154 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
1155 	 * if pmap_prefer will not align.
1156 	 */
1157 	if (uoffset == UVM_UNKNOWN_OFFSET) {
1158 		pmap_align = MAX(align, PAGE_SIZE);
1159 		pmap_offset = 0;
1160 	} else {
1161 		pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE);
1162 		pmap_offset = PMAP_PREFER_OFFSET(uoffset);
1163 
1164 		if (align == 0 ||
1165 		    (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) {
1166 			/* pmap_offset satisfies align, no change. */
1167 		} else {
1168 			/* Align takes precedence over pmap prefer. */
1169 			pmap_align = align;
1170 			pmap_offset = 0;
1171 		}
1172 	}
1173 
1174 	/* Decode parameters. */
1175 	prot = UVM_PROTECTION(flags);
1176 	maxprot = UVM_MAXPROTECTION(flags);
1177 	advice = UVM_ADVICE(flags);
1178 	inherit = UVM_INHERIT(flags);
1179 	error = 0;
1180 	hint = trunc_page(*addr);
1181 	TAILQ_INIT(&dead);
1182 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
1183 	KASSERT((align & (align - 1)) == 0);
1184 
1185 	/* Holes are incompatible with other types of mappings. */
1186 	if (flags & UVM_FLAG_HOLE) {
1187 		KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) &&
1188 		    (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
1189 	}
1190 
1191 	/* Unset hint for kernel_map non-fixed allocations. */
1192 	if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED))
1193 		hint = 0;
1194 
1195 	/* Check protection. */
1196 	if ((prot & maxprot) != prot)
1197 		return EACCES;
1198 
1199 	if (map == kernel_map &&
1200 	    (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
1201 		panic("uvm_map: kernel map W^X violation requested");
1202 
1203 	/*
1204 	 * Before grabbing the lock, allocate a map entry for later
1205 	 * use to ensure we don't wait for memory while holding the
1206 	 * vm_map_lock.
1207 	 */
1208 	new = uvm_mapent_alloc(map, flags);
1209 	if (new == NULL)
1210 		return(ENOMEM);
1211 
1212 	if (flags & UVM_FLAG_TRYLOCK) {
1213 		if (vm_map_lock_try(map) == FALSE) {
1214 			error = EFAULT;
1215 			goto out;
1216 		}
1217 	} else {
1218 		vm_map_lock(map);
1219 	}
1220 
1221 	first = last = NULL;
1222 	if (flags & UVM_FLAG_FIXED) {
1223 		/*
1224 		 * Fixed location.
1225 		 *
1226 		 * Note: we ignore align, pmap_prefer.
1227 		 * Fill in first, last and *addr.
1228 		 */
1229 		KASSERT((*addr & PAGE_MASK) == 0);
1230 
1231 		/*
1232 		 * Grow pmap to include allocated address.
1233 		 * If the growth fails, the allocation will fail too.
1234 		 */
1235 		if ((map->flags & VM_MAP_ISVMSPACE) == 0 &&
1236 		    uvm_maxkaddr < (*addr + sz)) {
1237 			uvm_map_kmem_grow(map, &dead,
1238 			    *addr + sz - uvm_maxkaddr, flags);
1239 		}
1240 
1241 		/* Check that the space is available. */
1242 		if (flags & UVM_FLAG_UNMAP)
1243 			uvm_unmap_remove(map, *addr, *addr + sz, &dead, FALSE, TRUE);
1244 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1245 			error = ENOMEM;
1246 			goto unlock;
1247 		}
1248 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1249 	    (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE &&
1250 	    (align == 0 || (*addr & (align - 1)) == 0) &&
1251 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1252 		/*
1253 		 * Address used as hint.
1254 		 *
1255 		 * Note: we enforce the alignment restriction,
1256 		 * but ignore pmap_prefer.
1257 		 */
1258 	} else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1259 		/* Run selection algorithm for executables. */
1260 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1261 		    addr, sz, pmap_align, pmap_offset, prot, hint);
1262 
1263 		/* Grow kernel memory and try again. */
1264 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1265 			uvm_map_kmem_grow(map, &dead, sz, flags);
1266 
1267 			error = uvm_addr_invoke(map, map->uaddr_exe,
1268 			    &first, &last, addr, sz,
1269 			    pmap_align, pmap_offset, prot, hint);
1270 		}
1271 
1272 		if (error != 0)
1273 			goto unlock;
1274 	} else {
1275 		/* Update freelists from vmspace. */
1276 		if (map->flags & VM_MAP_ISVMSPACE)
1277 			uvm_map_vmspace_update(map, &dead, flags);
1278 
1279 		error = uvm_map_findspace(map, &first, &last, addr, sz,
1280 		    pmap_align, pmap_offset, prot, hint);
1281 
1282 		/* Grow kernel memory and try again. */
1283 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1284 			uvm_map_kmem_grow(map, &dead, sz, flags);
1285 
1286 			error = uvm_map_findspace(map, &first, &last, addr, sz,
1287 			    pmap_align, pmap_offset, prot, hint);
1288 		}
1289 
1290 		if (error != 0)
1291 			goto unlock;
1292 	}
1293 
1294 	/* Double-check if selected address doesn't cause overflow. */
1295 	if (*addr + sz < *addr) {
1296 		error = ENOMEM;
1297 		goto unlock;
1298 	}
1299 
1300 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE ||
1301 	    uvm_maxkaddr >= *addr + sz);
1302 
1303 	/* If we only want a query, return now. */
1304 	if (flags & UVM_FLAG_QUERY) {
1305 		error = 0;
1306 		goto unlock;
1307 	}
1308 
1309 	if (uobj == NULL)
1310 		uoffset = 0;
1311 	else if (uoffset == UVM_UNKNOWN_OFFSET) {
1312 		KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
1313 		uoffset = *addr - vm_map_min(kernel_map);
1314 	}
1315 
1316 	/*
1317 	 * Create new entry.
1318 	 * first and last may be invalidated after this call.
1319 	 */
1320 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1321 	    new);
1322 	if (entry == NULL) {
1323 		error = ENOMEM;
1324 		goto unlock;
1325 	}
1326 	new = NULL;
1327 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1328 	entry->object.uvm_obj = uobj;
1329 	entry->offset = uoffset;
1330 	entry->protection = prot;
1331 	entry->max_protection = maxprot;
1332 	entry->inheritance = inherit;
1333 	entry->wired_count = 0;
1334 	entry->advice = advice;
1335 	if (flags & UVM_FLAG_STACK) {
1336 		entry->etype |= UVM_ET_STACK;
1337 		if (flags & UVM_FLAG_UNMAP)
1338 			map->serial++;
1339 	}
1340 	if (uobj)
1341 		entry->etype |= UVM_ET_OBJ;
1342 	else if (flags & UVM_FLAG_HOLE)
1343 		entry->etype |= UVM_ET_HOLE;
1344 	if (flags & UVM_FLAG_NOFAULT)
1345 		entry->etype |= UVM_ET_NOFAULT;
1346 	if (flags & UVM_FLAG_WC)
1347 		entry->etype |= UVM_ET_WC;
1348 	if (flags & UVM_FLAG_COPYONW) {
1349 		entry->etype |= UVM_ET_COPYONWRITE;
1350 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1351 			entry->etype |= UVM_ET_NEEDSCOPY;
1352 	}
1353 	if (flags & UVM_FLAG_OVERLAY) {
1354 		entry->aref.ar_pageoff = 0;
1355 		entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1356 	}
1357 
1358 	/* Update map and process statistics. */
1359 	if (!(flags & UVM_FLAG_HOLE)) {
1360 		map->size += sz;
1361 		if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL) {
1362 			((struct vmspace *)map)->vm_dused +=
1363 			    uvmspace_dused(map, *addr, *addr + sz);
1364 		}
1365 	}
1366 
1367 	/*
1368 	 * Try to merge entry.
1369 	 *
1370 	 * Userland allocations are kept separated most of the time.
1371 	 * Forego the effort of merging what most of the time can't be merged
1372 	 * and only try the merge if it concerns a kernel entry.
1373 	 */
1374 	if ((flags & UVM_FLAG_NOMERGE) == 0 &&
1375 	    (map->flags & VM_MAP_ISVMSPACE) == 0)
1376 		uvm_mapent_tryjoin(map, entry, &dead);
1377 
1378 unlock:
1379 	vm_map_unlock(map);
1380 
1381 	/*
1382 	 * Remove dead entries.
1383 	 *
1384 	 * Dead entries may be the result of merging.
1385 	 * uvm_map_mkentry may also create dead entries, when it attempts to
1386 	 * destroy free-space entries.
1387 	 */
1388 	if (map->flags & VM_MAP_INTRSAFE)
1389 		uvm_unmap_detach_intrsafe(&dead);
1390 	else
1391 		uvm_unmap_detach(&dead, 0);
1392 out:
1393 	if (new)
1394 		uvm_mapent_free(new);
1395 	return error;
1396 }
1397 
1398 /*
1399  * True iff e1 and e2 can be joined together.
1400  */
1401 int
1402 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1,
1403     struct vm_map_entry *e2)
1404 {
1405 	KDASSERT(e1 != NULL && e2 != NULL);
1406 
1407 	/* Must be the same entry type and not have free memory between. */
1408 	if (e1->etype != e2->etype || e1->end != e2->start)
1409 		return 0;
1410 
1411 	/* Submaps are never joined. */
1412 	if (UVM_ET_ISSUBMAP(e1))
1413 		return 0;
1414 
1415 	/* Never merge wired memory. */
1416 	if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2))
1417 		return 0;
1418 
1419 	/* Protection, inheritance and advice must be equal. */
1420 	if (e1->protection != e2->protection ||
1421 	    e1->max_protection != e2->max_protection ||
1422 	    e1->inheritance != e2->inheritance ||
1423 	    e1->advice != e2->advice)
1424 		return 0;
1425 
1426 	/* If uvm_object: object itself and offsets within object must match. */
1427 	if (UVM_ET_ISOBJ(e1)) {
1428 		if (e1->object.uvm_obj != e2->object.uvm_obj)
1429 			return 0;
1430 		if (e1->offset + (e1->end - e1->start) != e2->offset)
1431 			return 0;
1432 	}
1433 
1434 	/*
1435 	 * Cannot join shared amaps.
1436 	 * Note: no need to lock amap to look at refs, since we don't care
1437 	 * about its exact value.
1438 	 * If it is 1 (i.e. we have the only reference) it will stay there.
1439 	 */
1440 	if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1)
1441 		return 0;
1442 	if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1)
1443 		return 0;
1444 
1445 	/* Apprently, e1 and e2 match. */
1446 	return 1;
1447 }
1448 
1449 /*
1450  * Join support function.
1451  *
1452  * Returns the merged entry on succes.
1453  * Returns NULL if the merge failed.
1454  */
1455 struct vm_map_entry*
1456 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1,
1457     struct vm_map_entry *e2, struct uvm_map_deadq *dead)
1458 {
1459 	struct uvm_addr_state *free;
1460 
1461 	/*
1462 	 * Merging is not supported for map entries that
1463 	 * contain an amap in e1. This should never happen
1464 	 * anyway, because only kernel entries are merged.
1465 	 * These do not contain amaps.
1466 	 * e2 contains no real information in its amap,
1467 	 * so it can be erased immediately.
1468 	 */
1469 	KASSERT(e1->aref.ar_amap == NULL);
1470 
1471 	/*
1472 	 * Don't drop obj reference:
1473 	 * uvm_unmap_detach will do this for us.
1474 	 */
1475 	free = uvm_map_uaddr_e(map, e1);
1476 	uvm_mapent_free_remove(map, free, e1);
1477 
1478 	free = uvm_map_uaddr_e(map, e2);
1479 	uvm_mapent_free_remove(map, free, e2);
1480 	uvm_mapent_addr_remove(map, e2);
1481 	e1->end = e2->end;
1482 	e1->guard = e2->guard;
1483 	e1->fspace = e2->fspace;
1484 	uvm_mapent_free_insert(map, free, e1);
1485 
1486 	DEAD_ENTRY_PUSH(dead, e2);
1487 	return e1;
1488 }
1489 
1490 /*
1491  * Attempt forward and backward joining of entry.
1492  *
1493  * Returns entry after joins.
1494  * We are guaranteed that the amap of entry is either non-existent or
1495  * has never been used.
1496  */
1497 struct vm_map_entry*
1498 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry,
1499     struct uvm_map_deadq *dead)
1500 {
1501 	struct vm_map_entry *other;
1502 	struct vm_map_entry *merged;
1503 
1504 	/* Merge with previous entry. */
1505 	other = RBT_PREV(uvm_map_addr, entry);
1506 	if (other && uvm_mapent_isjoinable(map, other, entry)) {
1507 		merged = uvm_mapent_merge(map, other, entry, dead);
1508 		if (merged)
1509 			entry = merged;
1510 	}
1511 
1512 	/*
1513 	 * Merge with next entry.
1514 	 *
1515 	 * Because amap can only extend forward and the next entry
1516 	 * probably contains sensible info, only perform forward merging
1517 	 * in the absence of an amap.
1518 	 */
1519 	other = RBT_NEXT(uvm_map_addr, entry);
1520 	if (other && entry->aref.ar_amap == NULL &&
1521 	    other->aref.ar_amap == NULL &&
1522 	    uvm_mapent_isjoinable(map, entry, other)) {
1523 		merged = uvm_mapent_merge(map, entry, other, dead);
1524 		if (merged)
1525 			entry = merged;
1526 	}
1527 
1528 	return entry;
1529 }
1530 
1531 /*
1532  * Kill entries that are no longer in a map.
1533  */
1534 void
1535 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags)
1536 {
1537 	struct vm_map_entry *entry;
1538 	int waitok = flags & UVM_PLA_WAITOK;
1539 
1540 	if (TAILQ_EMPTY(deadq))
1541 		return;
1542 
1543 	KERNEL_LOCK();
1544 	while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1545 		if (waitok)
1546 			uvm_pause();
1547 		/* Drop reference to amap, if we've got one. */
1548 		if (entry->aref.ar_amap)
1549 			amap_unref(entry->aref.ar_amap,
1550 			    entry->aref.ar_pageoff,
1551 			    atop(entry->end - entry->start),
1552 			    flags & AMAP_REFALL);
1553 
1554 		/* Drop reference to our backing object, if we've got one. */
1555 		if (UVM_ET_ISSUBMAP(entry)) {
1556 			/* ... unlikely to happen, but play it safe */
1557 			uvm_map_deallocate(entry->object.sub_map);
1558 		} else if (UVM_ET_ISOBJ(entry) &&
1559 		    entry->object.uvm_obj->pgops->pgo_detach) {
1560 			entry->object.uvm_obj->pgops->pgo_detach(
1561 			    entry->object.uvm_obj);
1562 		}
1563 
1564 		/* Step to next. */
1565 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1566 		uvm_mapent_free(entry);
1567 	}
1568 	KERNEL_UNLOCK();
1569 }
1570 
1571 void
1572 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq)
1573 {
1574 	struct vm_map_entry *entry;
1575 
1576 	while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1577 		KASSERT(entry->aref.ar_amap == NULL);
1578 		KASSERT(!UVM_ET_ISSUBMAP(entry));
1579 		KASSERT(!UVM_ET_ISOBJ(entry));
1580 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1581 		uvm_mapent_free(entry);
1582 	}
1583 }
1584 
1585 /*
1586  * Create and insert new entry.
1587  *
1588  * Returned entry contains new addresses and is inserted properly in the tree.
1589  * first and last are (probably) no longer valid.
1590  */
1591 struct vm_map_entry*
1592 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first,
1593     struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags,
1594     struct uvm_map_deadq *dead, struct vm_map_entry *new)
1595 {
1596 	struct vm_map_entry *entry, *prev;
1597 	struct uvm_addr_state *free;
1598 	vaddr_t min, max;	/* free space boundaries for new entry */
1599 
1600 	KDASSERT(map != NULL);
1601 	KDASSERT(first != NULL);
1602 	KDASSERT(last != NULL);
1603 	KDASSERT(dead != NULL);
1604 	KDASSERT(sz > 0);
1605 	KDASSERT(addr + sz > addr);
1606 	KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr);
1607 	KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz);
1608 	KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz));
1609 	uvm_tree_sanity(map, __FILE__, __LINE__);
1610 
1611 	min = addr + sz;
1612 	max = VMMAP_FREE_END(last);
1613 
1614 	/* Initialize new entry. */
1615 	if (new == NULL)
1616 		entry = uvm_mapent_alloc(map, flags);
1617 	else
1618 		entry = new;
1619 	if (entry == NULL)
1620 		return NULL;
1621 	entry->offset = 0;
1622 	entry->etype = 0;
1623 	entry->wired_count = 0;
1624 	entry->aref.ar_pageoff = 0;
1625 	entry->aref.ar_amap = NULL;
1626 
1627 	entry->start = addr;
1628 	entry->end = min;
1629 	entry->guard = 0;
1630 	entry->fspace = 0;
1631 
1632 	/* Reset free space in first. */
1633 	free = uvm_map_uaddr_e(map, first);
1634 	uvm_mapent_free_remove(map, free, first);
1635 	first->guard = 0;
1636 	first->fspace = 0;
1637 
1638 	/*
1639 	 * Remove all entries that are fully replaced.
1640 	 * We are iterating using last in reverse order.
1641 	 */
1642 	for (; first != last; last = prev) {
1643 		prev = RBT_PREV(uvm_map_addr, last);
1644 
1645 		KDASSERT(last->start == last->end);
1646 		free = uvm_map_uaddr_e(map, last);
1647 		uvm_mapent_free_remove(map, free, last);
1648 		uvm_mapent_addr_remove(map, last);
1649 		DEAD_ENTRY_PUSH(dead, last);
1650 	}
1651 	/* Remove first if it is entirely inside <addr, addr+sz>.  */
1652 	if (first->start == addr) {
1653 		uvm_mapent_addr_remove(map, first);
1654 		DEAD_ENTRY_PUSH(dead, first);
1655 	} else {
1656 		uvm_map_fix_space(map, first, VMMAP_FREE_START(first),
1657 		    addr, flags);
1658 	}
1659 
1660 	/* Finally, link in entry. */
1661 	uvm_mapent_addr_insert(map, entry);
1662 	uvm_map_fix_space(map, entry, min, max, flags);
1663 
1664 	uvm_tree_sanity(map, __FILE__, __LINE__);
1665 	return entry;
1666 }
1667 
1668 
1669 /*
1670  * uvm_mapent_alloc: allocate a map entry
1671  */
1672 struct vm_map_entry *
1673 uvm_mapent_alloc(struct vm_map *map, int flags)
1674 {
1675 	struct vm_map_entry *me, *ne;
1676 	int pool_flags;
1677 	int i;
1678 
1679 	pool_flags = PR_WAITOK;
1680 	if (flags & UVM_FLAG_TRYLOCK)
1681 		pool_flags = PR_NOWAIT;
1682 
1683 	if (map->flags & VM_MAP_INTRSAFE || cold) {
1684 		mtx_enter(&uvm_kmapent_mtx);
1685 		if (SLIST_EMPTY(&uvm.kentry_free)) {
1686 			ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
1687 			    &kd_nowait);
1688 			if (ne == NULL)
1689 				panic("uvm_mapent_alloc: cannot allocate map "
1690 				    "entry");
1691 			for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) {
1692 				SLIST_INSERT_HEAD(&uvm.kentry_free,
1693 				    &ne[i], daddrs.addr_kentry);
1694 			}
1695 			if (ratecheck(&uvm_kmapent_last_warn_time,
1696 			    &uvm_kmapent_warn_rate))
1697 				printf("uvm_mapent_alloc: out of static "
1698 				    "map entries\n");
1699 		}
1700 		me = SLIST_FIRST(&uvm.kentry_free);
1701 		SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry);
1702 		uvmexp.kmapent++;
1703 		mtx_leave(&uvm_kmapent_mtx);
1704 		me->flags = UVM_MAP_STATIC;
1705 	} else if (map == kernel_map) {
1706 		splassert(IPL_NONE);
1707 		me = pool_get(&uvm_map_entry_kmem_pool, pool_flags);
1708 		if (me == NULL)
1709 			goto out;
1710 		me->flags = UVM_MAP_KMEM;
1711 	} else {
1712 		splassert(IPL_NONE);
1713 		me = pool_get(&uvm_map_entry_pool, pool_flags);
1714 		if (me == NULL)
1715 			goto out;
1716 		me->flags = 0;
1717 	}
1718 
1719 	if (me != NULL) {
1720 		RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF);
1721 	}
1722 
1723 out:
1724 	return(me);
1725 }
1726 
1727 /*
1728  * uvm_mapent_free: free map entry
1729  *
1730  * => XXX: static pool for kernel map?
1731  */
1732 void
1733 uvm_mapent_free(struct vm_map_entry *me)
1734 {
1735 	if (me->flags & UVM_MAP_STATIC) {
1736 		mtx_enter(&uvm_kmapent_mtx);
1737 		SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry);
1738 		uvmexp.kmapent--;
1739 		mtx_leave(&uvm_kmapent_mtx);
1740 	} else if (me->flags & UVM_MAP_KMEM) {
1741 		splassert(IPL_NONE);
1742 		pool_put(&uvm_map_entry_kmem_pool, me);
1743 	} else {
1744 		splassert(IPL_NONE);
1745 		pool_put(&uvm_map_entry_pool, me);
1746 	}
1747 }
1748 
1749 /*
1750  * uvm_map_lookup_entry: find map entry at or before an address.
1751  *
1752  * => map must at least be read-locked by caller
1753  * => entry is returned in "entry"
1754  * => return value is true if address is in the returned entry
1755  * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is
1756  * returned for those mappings.
1757  */
1758 boolean_t
1759 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
1760     struct vm_map_entry **entry)
1761 {
1762 	*entry = uvm_map_entrybyaddr(&map->addr, address);
1763 	return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&
1764 	    (*entry)->start <= address && (*entry)->end > address;
1765 }
1766 
1767 /*
1768  * Inside a vm_map find the sp address and verify MAP_STACK, and also
1769  * remember low and high regions of that of region  which is marked
1770  * with MAP_STACK.  Return TRUE.
1771  * If sp isn't in a MAP_STACK region return FALSE.
1772  */
1773 boolean_t
1774 uvm_map_check_stack_range(struct proc *p, vaddr_t sp)
1775 {
1776 	vm_map_t map = &p->p_vmspace->vm_map;
1777 	vm_map_entry_t entry;
1778 
1779 	if (sp < map->min_offset || sp >= map->max_offset)
1780 		return(FALSE);
1781 
1782 	/* lock map */
1783 	vm_map_lock_read(map);
1784 
1785 	/* lookup */
1786 	if (!uvm_map_lookup_entry(map, trunc_page(sp), &entry)) {
1787 		vm_map_unlock_read(map);
1788 		return(FALSE);
1789 	}
1790 
1791 	if ((entry->etype & UVM_ET_STACK) == 0) {
1792 		vm_map_unlock_read(map);
1793 		return (FALSE);
1794 	}
1795 	p->p_spstart = entry->start;
1796 	p->p_spend = entry->end;
1797 	p->p_spserial = map->serial;
1798 	vm_map_unlock_read(map);
1799 	return(TRUE);
1800 }
1801 
1802 /*
1803  * Check whether the given address range can be converted to a MAP_STACK
1804  * mapping.
1805  *
1806  * Must be called with map locked.
1807  */
1808 boolean_t
1809 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz)
1810 {
1811 	vaddr_t end = addr + sz;
1812 	struct vm_map_entry *first, *iter, *prev = NULL;
1813 
1814 	if (!uvm_map_lookup_entry(map, addr, &first)) {
1815 		printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n",
1816 		    addr, end, map);
1817 		return FALSE;
1818 	}
1819 
1820 	/*
1821 	 * Check that the address range exists and is contiguous.
1822 	 */
1823 	for (iter = first; iter != NULL && iter->start < end;
1824 	    prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) {
1825 		/*
1826 		 * Make sure that we do not have holes in the range.
1827 		 */
1828 #if 0
1829 		if (prev != NULL) {
1830 			printf("prev->start 0x%lx, prev->end 0x%lx, "
1831 			    "iter->start 0x%lx, iter->end 0x%lx\n",
1832 			    prev->start, prev->end, iter->start, iter->end);
1833 		}
1834 #endif
1835 
1836 		if (prev != NULL && prev->end != iter->start) {
1837 			printf("map stack 0x%lx-0x%lx of map %p failed: "
1838 			    "hole in range\n", addr, end, map);
1839 			return FALSE;
1840 		}
1841 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) {
1842 			printf("map stack 0x%lx-0x%lx of map %p failed: "
1843 			    "hole in range\n", addr, end, map);
1844 			return FALSE;
1845 		}
1846 	}
1847 
1848 	return TRUE;
1849 }
1850 
1851 /*
1852  * Remap the middle-pages of an existing mapping as a stack range.
1853  * If there exists a previous contiguous mapping with the given range
1854  * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the
1855  * mapping is dropped, and a new anon mapping is created and marked as
1856  * a stack.
1857  *
1858  * Must be called with map unlocked.
1859  */
1860 int
1861 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz)
1862 {
1863 	vm_map_t map = &p->p_vmspace->vm_map;
1864 	vaddr_t start, end;
1865 	int error;
1866 	int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
1867 	    PROT_READ | PROT_WRITE | PROT_EXEC,
1868 	    MAP_INHERIT_COPY, MADV_NORMAL,
1869 	    UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP |
1870 	    UVM_FLAG_COPYONW);
1871 
1872 	start = round_page(addr);
1873 	end = trunc_page(addr + sz);
1874 #ifdef MACHINE_STACK_GROWS_UP
1875 	if (end == addr + sz)
1876 		end -= PAGE_SIZE;
1877 #else
1878 	if (start == addr)
1879 		start += PAGE_SIZE;
1880 #endif
1881 
1882 	if (start < map->min_offset || end >= map->max_offset || end < start)
1883 		return EINVAL;
1884 
1885 	error = uvm_mapanon(map, &start, end - start, 0, flags);
1886 	if (error != 0)
1887 		printf("map stack for pid %d failed\n", p->p_p->ps_pid);
1888 
1889 	return error;
1890 }
1891 
1892 /*
1893  * uvm_map_pie: return a random load address for a PIE executable
1894  * properly aligned.
1895  */
1896 #ifndef VM_PIE_MAX_ADDR
1897 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
1898 #endif
1899 
1900 #ifndef VM_PIE_MIN_ADDR
1901 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
1902 #endif
1903 
1904 #ifndef VM_PIE_MIN_ALIGN
1905 #define VM_PIE_MIN_ALIGN PAGE_SIZE
1906 #endif
1907 
1908 vaddr_t
1909 uvm_map_pie(vaddr_t align)
1910 {
1911 	vaddr_t addr, space, min;
1912 
1913 	align = MAX(align, VM_PIE_MIN_ALIGN);
1914 
1915 	/* round up to next alignment */
1916 	min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
1917 
1918 	if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
1919 		return (align);
1920 
1921 	space = (VM_PIE_MAX_ADDR - min) / align;
1922 	space = MIN(space, (u_int32_t)-1);
1923 
1924 	addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
1925 	addr += min;
1926 
1927 	return (addr);
1928 }
1929 
1930 void
1931 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end)
1932 {
1933 	struct uvm_map_deadq dead;
1934 
1935 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0 &&
1936 	    (end & (vaddr_t)PAGE_MASK) == 0);
1937 	TAILQ_INIT(&dead);
1938 	vm_map_lock(map);
1939 	uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE);
1940 	vm_map_unlock(map);
1941 
1942 	if (map->flags & VM_MAP_INTRSAFE)
1943 		uvm_unmap_detach_intrsafe(&dead);
1944 	else
1945 		uvm_unmap_detach(&dead, 0);
1946 }
1947 
1948 /*
1949  * Mark entry as free.
1950  *
1951  * entry will be put on the dead list.
1952  * The free space will be merged into the previous or a new entry,
1953  * unless markfree is false.
1954  */
1955 void
1956 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry,
1957     struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead,
1958     boolean_t markfree)
1959 {
1960 	struct uvm_addr_state	*free;
1961 	struct vm_map_entry	*prev;
1962 	vaddr_t			 addr;	/* Start of freed range. */
1963 	vaddr_t			 end;	/* End of freed range. */
1964 
1965 	prev = *prev_ptr;
1966 	if (prev == entry)
1967 		*prev_ptr = prev = NULL;
1968 
1969 	if (prev == NULL ||
1970 	    VMMAP_FREE_END(prev) != entry->start)
1971 		prev = RBT_PREV(uvm_map_addr, entry);
1972 
1973 	/* Entry is describing only free memory and has nothing to drain into. */
1974 	if (prev == NULL && entry->start == entry->end && markfree) {
1975 		*prev_ptr = entry;
1976 		return;
1977 	}
1978 
1979 	addr = entry->start;
1980 	end = VMMAP_FREE_END(entry);
1981 	free = uvm_map_uaddr_e(map, entry);
1982 	uvm_mapent_free_remove(map, free, entry);
1983 	uvm_mapent_addr_remove(map, entry);
1984 	DEAD_ENTRY_PUSH(dead, entry);
1985 
1986 	if (markfree) {
1987 		if (prev) {
1988 			free = uvm_map_uaddr_e(map, prev);
1989 			uvm_mapent_free_remove(map, free, prev);
1990 		}
1991 		*prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0);
1992 	}
1993 }
1994 
1995 /*
1996  * Unwire and release referenced amap and object from map entry.
1997  */
1998 void
1999 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
2000 {
2001 	/* Unwire removed map entry. */
2002 	if (VM_MAPENT_ISWIRED(entry)) {
2003 		KERNEL_LOCK();
2004 		entry->wired_count = 0;
2005 		uvm_fault_unwire_locked(map, entry->start, entry->end);
2006 		KERNEL_UNLOCK();
2007 	}
2008 
2009 	/* Entry-type specific code. */
2010 	if (UVM_ET_ISHOLE(entry)) {
2011 		/* Nothing to be done for holes. */
2012 	} else if (map->flags & VM_MAP_INTRSAFE) {
2013 		KASSERT(vm_map_pmap(map) == pmap_kernel());
2014 		uvm_km_pgremove_intrsafe(entry->start, entry->end);
2015 		pmap_kremove(entry->start, entry->end - entry->start);
2016 	} else if (UVM_ET_ISOBJ(entry) &&
2017 	    UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
2018 		KASSERT(vm_map_pmap(map) == pmap_kernel());
2019 		/*
2020 		 * Note: kernel object mappings are currently used in
2021 		 * two ways:
2022 		 *  [1] "normal" mappings of pages in the kernel object
2023 		 *  [2] uvm_km_valloc'd allocations in which we
2024 		 *      pmap_enter in some non-kernel-object page
2025 		 *      (e.g. vmapbuf).
2026 		 *
2027 		 * for case [1], we need to remove the mapping from
2028 		 * the pmap and then remove the page from the kernel
2029 		 * object (because, once pages in a kernel object are
2030 		 * unmapped they are no longer needed, unlike, say,
2031 		 * a vnode where you might want the data to persist
2032 		 * until flushed out of a queue).
2033 		 *
2034 		 * for case [2], we need to remove the mapping from
2035 		 * the pmap.  there shouldn't be any pages at the
2036 		 * specified offset in the kernel object [but it
2037 		 * doesn't hurt to call uvm_km_pgremove just to be
2038 		 * safe?]
2039 		 *
2040 		 * uvm_km_pgremove currently does the following:
2041 		 *   for pages in the kernel object range:
2042 		 *     - drops the swap slot
2043 		 *     - uvm_pagefree the page
2044 		 *
2045 		 * note there is version of uvm_km_pgremove() that
2046 		 * is used for "intrsafe" objects.
2047 		 */
2048 		/*
2049 		 * remove mappings from pmap and drop the pages
2050 		 * from the object.  offsets are always relative
2051 		 * to vm_map_min(kernel_map).
2052 		 */
2053 		pmap_remove(pmap_kernel(), entry->start, entry->end);
2054 		uvm_km_pgremove(entry->object.uvm_obj,
2055 		    entry->start - vm_map_min(kernel_map),
2056 		    entry->end - vm_map_min(kernel_map));
2057 
2058 		/*
2059 		 * null out kernel_object reference, we've just
2060 		 * dropped it
2061 		 */
2062 		entry->etype &= ~UVM_ET_OBJ;
2063 		entry->object.uvm_obj = NULL;  /* to be safe */
2064 	} else {
2065 		/* remove mappings the standard way. */
2066 		pmap_remove(map->pmap, entry->start, entry->end);
2067 	}
2068 }
2069 
2070 /*
2071  * Remove all entries from start to end.
2072  *
2073  * If remove_holes, then remove ET_HOLE entries as well.
2074  * If markfree, entry will be properly marked free, otherwise, no replacement
2075  * entry will be put in the tree (corrupting the tree).
2076  */
2077 void
2078 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
2079     struct uvm_map_deadq *dead, boolean_t remove_holes,
2080     boolean_t markfree)
2081 {
2082 	struct vm_map_entry *prev_hint, *next, *entry;
2083 
2084 	start = MAX(start, map->min_offset);
2085 	end = MIN(end, map->max_offset);
2086 	if (start >= end)
2087 		return;
2088 
2089 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
2090 		splassert(IPL_NONE);
2091 	else
2092 		splassert(IPL_VM);
2093 
2094 	/* Find first affected entry. */
2095 	entry = uvm_map_entrybyaddr(&map->addr, start);
2096 	KDASSERT(entry != NULL && entry->start <= start);
2097 	if (entry->end <= start && markfree)
2098 		entry = RBT_NEXT(uvm_map_addr, entry);
2099 	else
2100 		UVM_MAP_CLIP_START(map, entry, start);
2101 
2102 	/*
2103 	 * Iterate entries until we reach end address.
2104 	 * prev_hint hints where the freed space can be appended to.
2105 	 */
2106 	prev_hint = NULL;
2107 	for (; entry != NULL && entry->start < end; entry = next) {
2108 		KDASSERT(entry->start >= start);
2109 		if (entry->end > end || !markfree)
2110 			UVM_MAP_CLIP_END(map, entry, end);
2111 		KDASSERT(entry->start >= start && entry->end <= end);
2112 		next = RBT_NEXT(uvm_map_addr, entry);
2113 
2114 		/* Don't remove holes unless asked to do so. */
2115 		if (UVM_ET_ISHOLE(entry)) {
2116 			if (!remove_holes) {
2117 				prev_hint = entry;
2118 				continue;
2119 			}
2120 		}
2121 
2122 		/* A stack has been removed.. */
2123 		if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE))
2124 			map->serial++;
2125 
2126 		/* Kill entry. */
2127 		uvm_unmap_kill_entry(map, entry);
2128 
2129 		/* Update space usage. */
2130 		if ((map->flags & VM_MAP_ISVMSPACE) &&
2131 		    entry->object.uvm_obj == NULL &&
2132 		    !UVM_ET_ISHOLE(entry)) {
2133 			((struct vmspace *)map)->vm_dused -=
2134 			    uvmspace_dused(map, entry->start, entry->end);
2135 		}
2136 		if (!UVM_ET_ISHOLE(entry))
2137 			map->size -= entry->end - entry->start;
2138 
2139 		/* Actual removal of entry. */
2140 		uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree);
2141 	}
2142 
2143 	pmap_update(vm_map_pmap(map));
2144 
2145 #ifdef VMMAP_DEBUG
2146 	if (markfree) {
2147 		for (entry = uvm_map_entrybyaddr(&map->addr, start);
2148 		    entry != NULL && entry->start < end;
2149 		    entry = RBT_NEXT(uvm_map_addr, entry)) {
2150 			KDASSERT(entry->end <= start ||
2151 			    entry->start == entry->end ||
2152 			    UVM_ET_ISHOLE(entry));
2153 		}
2154 	} else {
2155 		vaddr_t a;
2156 		for (a = start; a < end; a += PAGE_SIZE)
2157 			KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL);
2158 	}
2159 #endif
2160 }
2161 
2162 /*
2163  * Mark all entries from first until end (exclusive) as pageable.
2164  *
2165  * Lock must be exclusive on entry and will not be touched.
2166  */
2167 void
2168 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first,
2169     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr)
2170 {
2171 	struct vm_map_entry *iter;
2172 
2173 	for (iter = first; iter != end;
2174 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2175 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2176 		if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2177 			continue;
2178 
2179 		iter->wired_count = 0;
2180 		uvm_fault_unwire_locked(map, iter->start, iter->end);
2181 	}
2182 }
2183 
2184 /*
2185  * Mark all entries from first until end (exclusive) as wired.
2186  *
2187  * Lockflags determines the lock state on return from this function.
2188  * Lock must be exclusive on entry.
2189  */
2190 int
2191 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first,
2192     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr,
2193     int lockflags)
2194 {
2195 	struct vm_map_entry *iter;
2196 #ifdef DIAGNOSTIC
2197 	unsigned int timestamp_save;
2198 #endif
2199 	int error;
2200 
2201 	/*
2202 	 * Wire pages in two passes:
2203 	 *
2204 	 * 1: holding the write lock, we create any anonymous maps that need
2205 	 *    to be created.  then we clip each map entry to the region to
2206 	 *    be wired and increment its wiring count.
2207 	 *
2208 	 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
2209 	 *    in the pages for any newly wired area (wired_count == 1).
2210 	 *
2211 	 *    downgrading to a read lock for uvm_fault_wire avoids a possible
2212 	 *    deadlock with another thread that may have faulted on one of
2213 	 *    the pages to be wired (it would mark the page busy, blocking
2214 	 *    us, then in turn block on the map lock that we hold).
2215 	 *    because we keep the read lock on the map, the copy-on-write
2216 	 *    status of the entries we modify here cannot change.
2217 	 */
2218 	for (iter = first; iter != end;
2219 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2220 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2221 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2222 		    iter->protection == PROT_NONE)
2223 			continue;
2224 
2225 		/*
2226 		 * Perform actions of vm_map_lookup that need the write lock.
2227 		 * - create an anonymous map for copy-on-write
2228 		 * - anonymous map for zero-fill
2229 		 * Skip submaps.
2230 		 */
2231 		if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) &&
2232 		    UVM_ET_ISNEEDSCOPY(iter) &&
2233 		    ((iter->protection & PROT_WRITE) ||
2234 		    iter->object.uvm_obj == NULL)) {
2235 			amap_copy(map, iter, M_WAITOK,
2236 			    UVM_ET_ISSTACK(iter) ? FALSE : TRUE,
2237 			    iter->start, iter->end);
2238 		}
2239 		iter->wired_count++;
2240 	}
2241 
2242 	/*
2243 	 * Pass 2.
2244 	 */
2245 #ifdef DIAGNOSTIC
2246 	timestamp_save = map->timestamp;
2247 #endif
2248 	vm_map_busy(map);
2249 	vm_map_downgrade(map);
2250 
2251 	error = 0;
2252 	for (iter = first; error == 0 && iter != end;
2253 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2254 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2255 		    iter->protection == PROT_NONE)
2256 			continue;
2257 
2258 		error = uvm_fault_wire(map, iter->start, iter->end,
2259 		    iter->protection);
2260 	}
2261 
2262 	if (error) {
2263 		/*
2264 		 * uvm_fault_wire failure
2265 		 *
2266 		 * Reacquire lock and undo our work.
2267 		 */
2268 		vm_map_upgrade(map);
2269 		vm_map_unbusy(map);
2270 #ifdef DIAGNOSTIC
2271 		if (timestamp_save != map->timestamp)
2272 			panic("uvm_map_pageable_wire: stale map");
2273 #endif
2274 
2275 		/*
2276 		 * first is no longer needed to restart loops.
2277 		 * Use it as iterator to unmap successful mappings.
2278 		 */
2279 		for (; first != iter;
2280 		    first = RBT_NEXT(uvm_map_addr, first)) {
2281 			if (UVM_ET_ISHOLE(first) ||
2282 			    first->start == first->end ||
2283 			    first->protection == PROT_NONE)
2284 				continue;
2285 
2286 			first->wired_count--;
2287 			if (!VM_MAPENT_ISWIRED(first)) {
2288 				uvm_fault_unwire_locked(map,
2289 				    iter->start, iter->end);
2290 			}
2291 		}
2292 
2293 		/* decrease counter in the rest of the entries */
2294 		for (; iter != end;
2295 		    iter = RBT_NEXT(uvm_map_addr, iter)) {
2296 			if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2297 			    iter->protection == PROT_NONE)
2298 				continue;
2299 
2300 			iter->wired_count--;
2301 		}
2302 
2303 		if ((lockflags & UVM_LK_EXIT) == 0)
2304 			vm_map_unlock(map);
2305 		return error;
2306 	}
2307 
2308 	/* We are currently holding a read lock. */
2309 	if ((lockflags & UVM_LK_EXIT) == 0) {
2310 		vm_map_unbusy(map);
2311 		vm_map_unlock_read(map);
2312 	} else {
2313 		vm_map_upgrade(map);
2314 		vm_map_unbusy(map);
2315 #ifdef DIAGNOSTIC
2316 		if (timestamp_save != map->timestamp)
2317 			panic("uvm_map_pageable_wire: stale map");
2318 #endif
2319 	}
2320 	return 0;
2321 }
2322 
2323 /*
2324  * uvm_map_pageable: set pageability of a range in a map.
2325  *
2326  * Flags:
2327  * UVM_LK_ENTER: map is already locked by caller
2328  * UVM_LK_EXIT:  don't unlock map on exit
2329  *
2330  * The full range must be in use (entries may not have fspace != 0).
2331  * UVM_ET_HOLE counts as unmapped.
2332  */
2333 int
2334 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
2335     boolean_t new_pageable, int lockflags)
2336 {
2337 	struct vm_map_entry *first, *last, *tmp;
2338 	int error;
2339 
2340 	start = trunc_page(start);
2341 	end = round_page(end);
2342 
2343 	if (start > end)
2344 		return EINVAL;
2345 	if (start == end)
2346 		return 0;	/* nothing to do */
2347 	if (start < map->min_offset)
2348 		return EFAULT; /* why? see first XXX below */
2349 	if (end > map->max_offset)
2350 		return EINVAL; /* why? see second XXX below */
2351 
2352 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2353 	if ((lockflags & UVM_LK_ENTER) == 0)
2354 		vm_map_lock(map);
2355 
2356 	/*
2357 	 * Find first entry.
2358 	 *
2359 	 * Initial test on start is different, because of the different
2360 	 * error returned. Rest is tested further down.
2361 	 */
2362 	first = uvm_map_entrybyaddr(&map->addr, start);
2363 	if (first->end <= start || UVM_ET_ISHOLE(first)) {
2364 		/*
2365 		 * XXX if the first address is not mapped, it is EFAULT?
2366 		 */
2367 		error = EFAULT;
2368 		goto out;
2369 	}
2370 
2371 	/* Check that the range has no holes. */
2372 	for (last = first; last != NULL && last->start < end;
2373 	    last = RBT_NEXT(uvm_map_addr, last)) {
2374 		if (UVM_ET_ISHOLE(last) ||
2375 		    (last->end < end && VMMAP_FREE_END(last) != last->end)) {
2376 			/*
2377 			 * XXX unmapped memory in range, why is it EINVAL
2378 			 * instead of EFAULT?
2379 			 */
2380 			error = EINVAL;
2381 			goto out;
2382 		}
2383 	}
2384 
2385 	/*
2386 	 * Last ended at the first entry after the range.
2387 	 * Move back one step.
2388 	 *
2389 	 * Note that last may be NULL.
2390 	 */
2391 	if (last == NULL) {
2392 		last = RBT_MAX(uvm_map_addr, &map->addr);
2393 		if (last->end < end) {
2394 			error = EINVAL;
2395 			goto out;
2396 		}
2397 	} else {
2398 		KASSERT(last != first);
2399 		last = RBT_PREV(uvm_map_addr, last);
2400 	}
2401 
2402 	/* Wire/unwire pages here. */
2403 	if (new_pageable) {
2404 		/*
2405 		 * Mark pageable.
2406 		 * entries that are not wired are untouched.
2407 		 */
2408 		if (VM_MAPENT_ISWIRED(first))
2409 			UVM_MAP_CLIP_START(map, first, start);
2410 		/*
2411 		 * Split last at end.
2412 		 * Make tmp be the first entry after what is to be touched.
2413 		 * If last is not wired, don't touch it.
2414 		 */
2415 		if (VM_MAPENT_ISWIRED(last)) {
2416 			UVM_MAP_CLIP_END(map, last, end);
2417 			tmp = RBT_NEXT(uvm_map_addr, last);
2418 		} else
2419 			tmp = last;
2420 
2421 		uvm_map_pageable_pgon(map, first, tmp, start, end);
2422 		error = 0;
2423 
2424 out:
2425 		if ((lockflags & UVM_LK_EXIT) == 0)
2426 			vm_map_unlock(map);
2427 		return error;
2428 	} else {
2429 		/*
2430 		 * Mark entries wired.
2431 		 * entries are always touched (because recovery needs this).
2432 		 */
2433 		if (!VM_MAPENT_ISWIRED(first))
2434 			UVM_MAP_CLIP_START(map, first, start);
2435 		/*
2436 		 * Split last at end.
2437 		 * Make tmp be the first entry after what is to be touched.
2438 		 * If last is not wired, don't touch it.
2439 		 */
2440 		if (!VM_MAPENT_ISWIRED(last)) {
2441 			UVM_MAP_CLIP_END(map, last, end);
2442 			tmp = RBT_NEXT(uvm_map_addr, last);
2443 		} else
2444 			tmp = last;
2445 
2446 		return uvm_map_pageable_wire(map, first, tmp, start, end,
2447 		    lockflags);
2448 	}
2449 }
2450 
2451 /*
2452  * uvm_map_pageable_all: special case of uvm_map_pageable - affects
2453  * all mapped regions.
2454  *
2455  * Map must not be locked.
2456  * If no flags are specified, all ragions are unwired.
2457  */
2458 int
2459 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
2460 {
2461 	vsize_t size;
2462 	struct vm_map_entry *iter;
2463 
2464 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2465 	vm_map_lock(map);
2466 
2467 	if (flags == 0) {
2468 		uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr),
2469 		    NULL, map->min_offset, map->max_offset);
2470 
2471 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
2472 		vm_map_unlock(map);
2473 		return 0;
2474 	}
2475 
2476 	if (flags & MCL_FUTURE)
2477 		vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
2478 	if (!(flags & MCL_CURRENT)) {
2479 		vm_map_unlock(map);
2480 		return 0;
2481 	}
2482 
2483 	/*
2484 	 * Count number of pages in all non-wired entries.
2485 	 * If the number exceeds the limit, abort.
2486 	 */
2487 	size = 0;
2488 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2489 		if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2490 			continue;
2491 
2492 		size += iter->end - iter->start;
2493 	}
2494 
2495 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
2496 		vm_map_unlock(map);
2497 		return ENOMEM;
2498 	}
2499 
2500 	/* XXX non-pmap_wired_count case must be handled by caller */
2501 #ifdef pmap_wired_count
2502 	if (limit != 0 &&
2503 	    size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) {
2504 		vm_map_unlock(map);
2505 		return ENOMEM;
2506 	}
2507 #endif
2508 
2509 	/*
2510 	 * uvm_map_pageable_wire will release lcok
2511 	 */
2512 	return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr),
2513 	    NULL, map->min_offset, map->max_offset, 0);
2514 }
2515 
2516 /*
2517  * Initialize map.
2518  *
2519  * Allocates sufficient entries to describe the free memory in the map.
2520  */
2521 void
2522 uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags)
2523 {
2524 	int i;
2525 
2526 	KASSERT((min & (vaddr_t)PAGE_MASK) == 0);
2527 	KASSERT((max & (vaddr_t)PAGE_MASK) == 0 ||
2528 	    (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
2529 
2530 	/*
2531 	 * Update parameters.
2532 	 *
2533 	 * This code handles (vaddr_t)-1 and other page mask ending addresses
2534 	 * properly.
2535 	 * We lose the top page if the full virtual address space is used.
2536 	 */
2537 	if (max & (vaddr_t)PAGE_MASK) {
2538 		max += 1;
2539 		if (max == 0) /* overflow */
2540 			max -= PAGE_SIZE;
2541 	}
2542 
2543 	RBT_INIT(uvm_map_addr, &map->addr);
2544 	map->uaddr_exe = NULL;
2545 	for (i = 0; i < nitems(map->uaddr_any); ++i)
2546 		map->uaddr_any[i] = NULL;
2547 	map->uaddr_brk_stack = NULL;
2548 
2549 	map->size = 0;
2550 	map->ref_count = 0;
2551 	map->min_offset = min;
2552 	map->max_offset = max;
2553 	map->b_start = map->b_end = 0; /* Empty brk() area by default. */
2554 	map->s_start = map->s_end = 0; /* Empty stack area by default. */
2555 	map->flags = flags;
2556 	map->timestamp = 0;
2557 	rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK);
2558 	mtx_init(&map->mtx, IPL_VM);
2559 	mtx_init(&map->flags_lock, IPL_VM);
2560 
2561 	/* Configure the allocators. */
2562 	if (flags & VM_MAP_ISVMSPACE)
2563 		uvm_map_setup_md(map);
2564 	else
2565 		map->uaddr_any[3] = &uaddr_kbootstrap;
2566 
2567 	/*
2568 	 * Fill map entries.
2569 	 * We do not need to write-lock the map here because only the current
2570 	 * thread sees it right now. Initialize ref_count to 0 above to avoid
2571 	 * bogus triggering of lock-not-held assertions.
2572 	 */
2573 	uvm_map_setup_entries(map);
2574 	uvm_tree_sanity(map, __FILE__, __LINE__);
2575 	map->ref_count = 1;
2576 }
2577 
2578 /*
2579  * Destroy the map.
2580  *
2581  * This is the inverse operation to uvm_map_setup.
2582  */
2583 void
2584 uvm_map_teardown(struct vm_map *map)
2585 {
2586 	struct uvm_map_deadq	 dead_entries;
2587 	struct vm_map_entry	*entry, *tmp;
2588 #ifdef VMMAP_DEBUG
2589 	size_t			 numq, numt;
2590 #endif
2591 	int			 i;
2592 
2593 	KERNEL_ASSERT_LOCKED();
2594 	KERNEL_UNLOCK();
2595 	KERNEL_ASSERT_UNLOCKED();
2596 
2597 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
2598 
2599 	/* Remove address selectors. */
2600 	uvm_addr_destroy(map->uaddr_exe);
2601 	map->uaddr_exe = NULL;
2602 	for (i = 0; i < nitems(map->uaddr_any); i++) {
2603 		uvm_addr_destroy(map->uaddr_any[i]);
2604 		map->uaddr_any[i] = NULL;
2605 	}
2606 	uvm_addr_destroy(map->uaddr_brk_stack);
2607 	map->uaddr_brk_stack = NULL;
2608 
2609 	/*
2610 	 * Remove entries.
2611 	 *
2612 	 * The following is based on graph breadth-first search.
2613 	 *
2614 	 * In color terms:
2615 	 * - the dead_entries set contains all nodes that are reachable
2616 	 *   (i.e. both the black and the grey nodes)
2617 	 * - any entry not in dead_entries is white
2618 	 * - any entry that appears in dead_entries before entry,
2619 	 *   is black, the rest is grey.
2620 	 * The set [entry, end] is also referred to as the wavefront.
2621 	 *
2622 	 * Since the tree is always a fully connected graph, the breadth-first
2623 	 * search guarantees that each vmmap_entry is visited exactly once.
2624 	 * The vm_map is broken down in linear time.
2625 	 */
2626 	TAILQ_INIT(&dead_entries);
2627 	if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL)
2628 		DEAD_ENTRY_PUSH(&dead_entries, entry);
2629 	while (entry != NULL) {
2630 		sched_pause(yield);
2631 		uvm_unmap_kill_entry(map, entry);
2632 		if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL)
2633 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2634 		if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
2635 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2636 		/* Update wave-front. */
2637 		entry = TAILQ_NEXT(entry, dfree.deadq);
2638 	}
2639 
2640 #ifdef VMMAP_DEBUG
2641 	numt = numq = 0;
2642 	RBT_FOREACH(entry, uvm_map_addr, &map->addr)
2643 		numt++;
2644 	TAILQ_FOREACH(entry, &dead_entries, dfree.deadq)
2645 		numq++;
2646 	KASSERT(numt == numq);
2647 #endif
2648 	uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK);
2649 
2650 	KERNEL_LOCK();
2651 
2652 	pmap_destroy(map->pmap);
2653 	map->pmap = NULL;
2654 }
2655 
2656 /*
2657  * Populate map with free-memory entries.
2658  *
2659  * Map must be initialized and empty.
2660  */
2661 void
2662 uvm_map_setup_entries(struct vm_map *map)
2663 {
2664 	KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
2665 
2666 	uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0);
2667 }
2668 
2669 /*
2670  * Split entry at given address.
2671  *
2672  * orig:  entry that is to be split.
2673  * next:  a newly allocated map entry that is not linked.
2674  * split: address at which the split is done.
2675  */
2676 void
2677 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,
2678     struct vm_map_entry *next, vaddr_t split)
2679 {
2680 	struct uvm_addr_state *free, *free_before;
2681 	vsize_t adj;
2682 
2683 	if ((split & PAGE_MASK) != 0) {
2684 		panic("uvm_map_splitentry: split address 0x%lx "
2685 		    "not on page boundary!", split);
2686 	}
2687 	KDASSERT(map != NULL && orig != NULL && next != NULL);
2688 	uvm_tree_sanity(map, __FILE__, __LINE__);
2689 	KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split);
2690 
2691 #ifdef VMMAP_DEBUG
2692 	KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig);
2693 	KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next);
2694 #endif /* VMMAP_DEBUG */
2695 
2696 	/*
2697 	 * Free space will change, unlink from free space tree.
2698 	 */
2699 	free = uvm_map_uaddr_e(map, orig);
2700 	uvm_mapent_free_remove(map, free, orig);
2701 
2702 	adj = split - orig->start;
2703 
2704 	uvm_mapent_copy(orig, next);
2705 	if (split >= orig->end) {
2706 		next->etype = 0;
2707 		next->offset = 0;
2708 		next->wired_count = 0;
2709 		next->start = next->end = split;
2710 		next->guard = 0;
2711 		next->fspace = VMMAP_FREE_END(orig) - split;
2712 		next->aref.ar_amap = NULL;
2713 		next->aref.ar_pageoff = 0;
2714 		orig->guard = MIN(orig->guard, split - orig->end);
2715 		orig->fspace = split - VMMAP_FREE_START(orig);
2716 	} else {
2717 		orig->fspace = 0;
2718 		orig->guard = 0;
2719 		orig->end = next->start = split;
2720 
2721 		if (next->aref.ar_amap) {
2722 			KERNEL_LOCK();
2723 			amap_splitref(&orig->aref, &next->aref, adj);
2724 			KERNEL_UNLOCK();
2725 		}
2726 		if (UVM_ET_ISSUBMAP(orig)) {
2727 			uvm_map_reference(next->object.sub_map);
2728 			next->offset += adj;
2729 		} else if (UVM_ET_ISOBJ(orig)) {
2730 			if (next->object.uvm_obj->pgops &&
2731 			    next->object.uvm_obj->pgops->pgo_reference) {
2732 				KERNEL_LOCK();
2733 				next->object.uvm_obj->pgops->pgo_reference(
2734 				    next->object.uvm_obj);
2735 				KERNEL_UNLOCK();
2736 			}
2737 			next->offset += adj;
2738 		}
2739 	}
2740 
2741 	/*
2742 	 * Link next into address tree.
2743 	 * Link orig and next into free-space tree.
2744 	 *
2745 	 * Don't insert 'next' into the addr tree until orig has been linked,
2746 	 * in case the free-list looks at adjecent entries in the addr tree
2747 	 * for its decisions.
2748 	 */
2749 	if (orig->fspace > 0)
2750 		free_before = free;
2751 	else
2752 		free_before = uvm_map_uaddr_e(map, orig);
2753 	uvm_mapent_free_insert(map, free_before, orig);
2754 	uvm_mapent_addr_insert(map, next);
2755 	uvm_mapent_free_insert(map, free, next);
2756 
2757 	uvm_tree_sanity(map, __FILE__, __LINE__);
2758 }
2759 
2760 
2761 #ifdef VMMAP_DEBUG
2762 
2763 void
2764 uvm_tree_assert(struct vm_map *map, int test, char *test_str,
2765     char *file, int line)
2766 {
2767 	char* map_special;
2768 
2769 	if (test)
2770 		return;
2771 
2772 	if (map == kernel_map)
2773 		map_special = " (kernel_map)";
2774 	else if (map == kmem_map)
2775 		map_special = " (kmem_map)";
2776 	else
2777 		map_special = "";
2778 	panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file,
2779 	    line, test_str);
2780 }
2781 
2782 /*
2783  * Check that map is sane.
2784  */
2785 void
2786 uvm_tree_sanity(struct vm_map *map, char *file, int line)
2787 {
2788 	struct vm_map_entry	*iter;
2789 	vaddr_t			 addr;
2790 	vaddr_t			 min, max, bound; /* Bounds checker. */
2791 	struct uvm_addr_state	*free;
2792 
2793 	addr = vm_map_min(map);
2794 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2795 		/*
2796 		 * Valid start, end.
2797 		 * Catch overflow for end+fspace.
2798 		 */
2799 		UVM_ASSERT(map, iter->end >= iter->start, file, line);
2800 		UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line);
2801 
2802 		/* May not be empty. */
2803 		UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter),
2804 		    file, line);
2805 
2806 		/* Addresses for entry must lie within map boundaries. */
2807 		UVM_ASSERT(map, iter->start >= vm_map_min(map) &&
2808 		    VMMAP_FREE_END(iter) <= vm_map_max(map), file, line);
2809 
2810 		/* Tree may not have gaps. */
2811 		UVM_ASSERT(map, iter->start == addr, file, line);
2812 		addr = VMMAP_FREE_END(iter);
2813 
2814 		/*
2815 		 * Free space may not cross boundaries, unless the same
2816 		 * free list is used on both sides of the border.
2817 		 */
2818 		min = VMMAP_FREE_START(iter);
2819 		max = VMMAP_FREE_END(iter);
2820 
2821 		while (min < max &&
2822 		    (bound = uvm_map_boundary(map, min, max)) != max) {
2823 			UVM_ASSERT(map,
2824 			    uvm_map_uaddr(map, bound - 1) ==
2825 			    uvm_map_uaddr(map, bound),
2826 			    file, line);
2827 			min = bound;
2828 		}
2829 
2830 		free = uvm_map_uaddr_e(map, iter);
2831 		if (free) {
2832 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0,
2833 			    file, line);
2834 		} else {
2835 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0,
2836 			    file, line);
2837 		}
2838 	}
2839 	UVM_ASSERT(map, addr == vm_map_max(map), file, line);
2840 }
2841 
2842 void
2843 uvm_tree_size_chk(struct vm_map *map, char *file, int line)
2844 {
2845 	struct vm_map_entry *iter;
2846 	vsize_t size;
2847 
2848 	size = 0;
2849 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2850 		if (!UVM_ET_ISHOLE(iter))
2851 			size += iter->end - iter->start;
2852 	}
2853 
2854 	if (map->size != size)
2855 		printf("map size = 0x%lx, should be 0x%lx\n", map->size, size);
2856 	UVM_ASSERT(map, map->size == size, file, line);
2857 
2858 	vmspace_validate(map);
2859 }
2860 
2861 /*
2862  * This function validates the statistics on vmspace.
2863  */
2864 void
2865 vmspace_validate(struct vm_map *map)
2866 {
2867 	struct vmspace *vm;
2868 	struct vm_map_entry *iter;
2869 	vaddr_t imin, imax;
2870 	vaddr_t stack_begin, stack_end; /* Position of stack. */
2871 	vsize_t stack, heap; /* Measured sizes. */
2872 
2873 	if (!(map->flags & VM_MAP_ISVMSPACE))
2874 		return;
2875 
2876 	vm = (struct vmspace *)map;
2877 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2878 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2879 
2880 	stack = heap = 0;
2881 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2882 		imin = imax = iter->start;
2883 
2884 		if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL)
2885 			continue;
2886 
2887 		/*
2888 		 * Update stack, heap.
2889 		 * Keep in mind that (theoretically) the entries of
2890 		 * userspace and stack may be joined.
2891 		 */
2892 		while (imin != iter->end) {
2893 			/*
2894 			 * Set imax to the first boundary crossed between
2895 			 * imin and stack addresses.
2896 			 */
2897 			imax = iter->end;
2898 			if (imin < stack_begin && imax > stack_begin)
2899 				imax = stack_begin;
2900 			else if (imin < stack_end && imax > stack_end)
2901 				imax = stack_end;
2902 
2903 			if (imin >= stack_begin && imin < stack_end)
2904 				stack += imax - imin;
2905 			else
2906 				heap += imax - imin;
2907 			imin = imax;
2908 		}
2909 	}
2910 
2911 	heap >>= PAGE_SHIFT;
2912 	if (heap != vm->vm_dused) {
2913 		printf("vmspace stack range: 0x%lx-0x%lx\n",
2914 		    stack_begin, stack_end);
2915 		panic("vmspace_validate: vmspace.vm_dused invalid, "
2916 		    "expected %ld pgs, got %ld pgs in map %p",
2917 		    heap, vm->vm_dused,
2918 		    map);
2919 	}
2920 }
2921 
2922 #endif /* VMMAP_DEBUG */
2923 
2924 /*
2925  * uvm_map_init: init mapping system at boot time.   note that we allocate
2926  * and init the static pool of structs vm_map_entry for the kernel here.
2927  */
2928 void
2929 uvm_map_init(void)
2930 {
2931 	static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
2932 	int lcv;
2933 
2934 	/* now set up static pool of kernel map entries ... */
2935 	mtx_init(&uvm_kmapent_mtx, IPL_VM);
2936 	SLIST_INIT(&uvm.kentry_free);
2937 	for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
2938 		SLIST_INSERT_HEAD(&uvm.kentry_free,
2939 		    &kernel_map_entry[lcv], daddrs.addr_kentry);
2940 	}
2941 
2942 	/* initialize the map-related pools. */
2943 	pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0,
2944 	    IPL_NONE, PR_WAITOK, "vmsppl", NULL);
2945 	pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0,
2946 	    IPL_VM, PR_WAITOK, "vmmpepl", NULL);
2947 	pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0,
2948 	    IPL_VM, 0, "vmmpekpl", NULL);
2949 	pool_sethiwat(&uvm_map_entry_pool, 8192);
2950 
2951 	uvm_addr_init();
2952 }
2953 
2954 #if defined(DDB)
2955 
2956 /*
2957  * DDB hooks
2958  */
2959 
2960 /*
2961  * uvm_map_printit: actually prints the map
2962  */
2963 void
2964 uvm_map_printit(struct vm_map *map, boolean_t full,
2965     int (*pr)(const char *, ...))
2966 {
2967 	struct vmspace			*vm;
2968 	struct vm_map_entry		*entry;
2969 	struct uvm_addr_state		*free;
2970 	int				 in_free, i;
2971 	char				 buf[8];
2972 
2973 	(*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
2974 	(*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n",
2975 	    map->b_start, map->b_end);
2976 	(*pr)("\tstack allocate range: 0x%lx-0x%lx\n",
2977 	    map->s_start, map->s_end);
2978 	(*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n",
2979 	    map->size, map->ref_count, map->timestamp,
2980 	    map->flags);
2981 	(*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
2982 	    pmap_resident_count(map->pmap));
2983 
2984 	/* struct vmspace handling. */
2985 	if (map->flags & VM_MAP_ISVMSPACE) {
2986 		vm = (struct vmspace *)map;
2987 
2988 		(*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n",
2989 		    vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss);
2990 		(*pr)("\tvm_tsize=%u vm_dsize=%u\n",
2991 		    vm->vm_tsize, vm->vm_dsize);
2992 		(*pr)("\tvm_taddr=%p vm_daddr=%p\n",
2993 		    vm->vm_taddr, vm->vm_daddr);
2994 		(*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n",
2995 		    vm->vm_maxsaddr, vm->vm_minsaddr);
2996 	}
2997 
2998 	if (!full)
2999 		goto print_uaddr;
3000 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
3001 		(*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
3002 		    entry, entry->start, entry->end, entry->object.uvm_obj,
3003 		    (long long)entry->offset, entry->aref.ar_amap,
3004 		    entry->aref.ar_pageoff);
3005 		(*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, prot(max)=%d/%d, inh=%d, "
3006 		    "wc=%d, adv=%d\n",
3007 		    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
3008 		    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
3009 		    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
3010 		    (entry->etype & UVM_ET_STACK) ? 'T' : 'F',
3011 		    entry->protection, entry->max_protection,
3012 		    entry->inheritance, entry->wired_count, entry->advice);
3013 
3014 		free = uvm_map_uaddr_e(map, entry);
3015 		in_free = (free != NULL);
3016 		(*pr)("\thole=%c, free=%c, guard=0x%lx, "
3017 		    "free=0x%lx-0x%lx\n",
3018 		    (entry->etype & UVM_ET_HOLE) ? 'T' : 'F',
3019 		    in_free ? 'T' : 'F',
3020 		    entry->guard,
3021 		    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
3022 		(*pr)("\tfspace_augment=%lu\n", entry->fspace_augment);
3023 		(*pr)("\tfreemapped=%c, uaddr=%p\n",
3024 		    (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free);
3025 		if (free) {
3026 			(*pr)("\t\t(0x%lx-0x%lx %s)\n",
3027 			    free->uaddr_minaddr, free->uaddr_maxaddr,
3028 			    free->uaddr_functions->uaddr_name);
3029 		}
3030 	}
3031 
3032 print_uaddr:
3033 	uvm_addr_print(map->uaddr_exe, "exe", full, pr);
3034 	for (i = 0; i < nitems(map->uaddr_any); i++) {
3035 		snprintf(&buf[0], sizeof(buf), "any[%d]", i);
3036 		uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr);
3037 	}
3038 	uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr);
3039 }
3040 
3041 /*
3042  * uvm_object_printit: actually prints the object
3043  */
3044 void
3045 uvm_object_printit(uobj, full, pr)
3046 	struct uvm_object *uobj;
3047 	boolean_t full;
3048 	int (*pr)(const char *, ...);
3049 {
3050 	struct vm_page *pg;
3051 	int cnt = 0;
3052 
3053 	(*pr)("OBJECT %p: pgops=%p, npages=%d, ",
3054 	    uobj, uobj->pgops, uobj->uo_npages);
3055 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
3056 		(*pr)("refs=<SYSTEM>\n");
3057 	else
3058 		(*pr)("refs=%d\n", uobj->uo_refs);
3059 
3060 	if (!full) {
3061 		return;
3062 	}
3063 	(*pr)("  PAGES <pg,offset>:\n  ");
3064 	RBT_FOREACH(pg, uvm_objtree, &uobj->memt) {
3065 		(*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
3066 		if ((cnt % 3) == 2) {
3067 			(*pr)("\n  ");
3068 		}
3069 		cnt++;
3070 	}
3071 	if ((cnt % 3) != 2) {
3072 		(*pr)("\n");
3073 	}
3074 }
3075 
3076 /*
3077  * uvm_page_printit: actually print the page
3078  */
3079 static const char page_flagbits[] =
3080 	"\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
3081 	"\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ"
3082 	"\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5";
3083 
3084 void
3085 uvm_page_printit(pg, full, pr)
3086 	struct vm_page *pg;
3087 	boolean_t full;
3088 	int (*pr)(const char *, ...);
3089 {
3090 	struct vm_page *tpg;
3091 	struct uvm_object *uobj;
3092 	struct pglist *pgl;
3093 
3094 	(*pr)("PAGE %p:\n", pg);
3095 	(*pr)("  flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
3096 	    pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
3097 	    (long long)pg->phys_addr);
3098 	(*pr)("  uobject=%p, uanon=%p, offset=0x%llx\n",
3099 	    pg->uobject, pg->uanon, (long long)pg->offset);
3100 #if defined(UVM_PAGE_TRKOWN)
3101 	if (pg->pg_flags & PG_BUSY)
3102 		(*pr)("  owning thread = %d, tag=%s",
3103 		    pg->owner, pg->owner_tag);
3104 	else
3105 		(*pr)("  page not busy, no owner");
3106 #else
3107 	(*pr)("  [page ownership tracking disabled]");
3108 #endif
3109 	(*pr)("\tvm_page_md %p\n", &pg->mdpage);
3110 
3111 	if (!full)
3112 		return;
3113 
3114 	/* cross-verify object/anon */
3115 	if ((pg->pg_flags & PQ_FREE) == 0) {
3116 		if (pg->pg_flags & PQ_ANON) {
3117 			if (pg->uanon == NULL || pg->uanon->an_page != pg)
3118 			    (*pr)("  >>> ANON DOES NOT POINT HERE <<< (%p)\n",
3119 				(pg->uanon) ? pg->uanon->an_page : NULL);
3120 			else
3121 				(*pr)("  anon backpointer is OK\n");
3122 		} else {
3123 			uobj = pg->uobject;
3124 			if (uobj) {
3125 				(*pr)("  checking object list\n");
3126 				RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) {
3127 					if (tpg == pg) {
3128 						break;
3129 					}
3130 				}
3131 				if (tpg)
3132 					(*pr)("  page found on object list\n");
3133 				else
3134 					(*pr)("  >>> PAGE NOT FOUND "
3135 					    "ON OBJECT LIST! <<<\n");
3136 			}
3137 		}
3138 	}
3139 
3140 	/* cross-verify page queue */
3141 	if (pg->pg_flags & PQ_FREE) {
3142 		if (uvm_pmr_isfree(pg))
3143 			(*pr)("  page found in uvm_pmemrange\n");
3144 		else
3145 			(*pr)("  >>> page not found in uvm_pmemrange <<<\n");
3146 		pgl = NULL;
3147 	} else if (pg->pg_flags & PQ_INACTIVE) {
3148 		pgl = (pg->pg_flags & PQ_SWAPBACKED) ?
3149 		    &uvm.page_inactive_swp : &uvm.page_inactive_obj;
3150 	} else if (pg->pg_flags & PQ_ACTIVE) {
3151 		pgl = &uvm.page_active;
3152  	} else {
3153 		pgl = NULL;
3154 	}
3155 
3156 	if (pgl) {
3157 		(*pr)("  checking pageq list\n");
3158 		TAILQ_FOREACH(tpg, pgl, pageq) {
3159 			if (tpg == pg) {
3160 				break;
3161 			}
3162 		}
3163 		if (tpg)
3164 			(*pr)("  page found on pageq list\n");
3165 		else
3166 			(*pr)("  >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
3167 	}
3168 }
3169 #endif
3170 
3171 /*
3172  * uvm_map_protect: change map protection
3173  *
3174  * => set_max means set max_protection.
3175  * => map must be unlocked.
3176  */
3177 int
3178 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
3179     vm_prot_t new_prot, boolean_t set_max)
3180 {
3181 	struct vm_map_entry *first, *iter;
3182 	vm_prot_t old_prot;
3183 	vm_prot_t mask;
3184 	int error;
3185 
3186 	if (start > end)
3187 		return EINVAL;
3188 	start = MAX(start, map->min_offset);
3189 	end = MIN(end, map->max_offset);
3190 	if (start >= end)
3191 		return 0;
3192 
3193 	error = 0;
3194 	vm_map_lock(map);
3195 
3196 	/*
3197 	 * Set up first and last.
3198 	 * - first will contain first entry at or after start.
3199 	 */
3200 	first = uvm_map_entrybyaddr(&map->addr, start);
3201 	KDASSERT(first != NULL);
3202 	if (first->end <= start)
3203 		first = RBT_NEXT(uvm_map_addr, first);
3204 
3205 	/* First, check for protection violations. */
3206 	for (iter = first; iter != NULL && iter->start < end;
3207 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
3208 		/* Treat memory holes as free space. */
3209 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3210 			continue;
3211 
3212 		if (UVM_ET_ISSUBMAP(iter)) {
3213 			error = EINVAL;
3214 			goto out;
3215 		}
3216 		if ((new_prot & iter->max_protection) != new_prot) {
3217 			error = EACCES;
3218 			goto out;
3219 		}
3220 		if (map == kernel_map &&
3221 		    (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
3222 			panic("uvm_map_protect: kernel map W^X violation requested");
3223 	}
3224 
3225 	/* Fix protections.  */
3226 	for (iter = first; iter != NULL && iter->start < end;
3227 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
3228 		/* Treat memory holes as free space. */
3229 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3230 			continue;
3231 
3232 		old_prot = iter->protection;
3233 
3234 		/*
3235 		 * Skip adapting protection iff old and new protection
3236 		 * are equal.
3237 		 */
3238 		if (set_max) {
3239 			if (old_prot == (new_prot & old_prot) &&
3240 			    iter->max_protection == new_prot)
3241 				continue;
3242 		} else {
3243 			if (old_prot == new_prot)
3244 				continue;
3245 		}
3246 
3247 		UVM_MAP_CLIP_START(map, iter, start);
3248 		UVM_MAP_CLIP_END(map, iter, end);
3249 
3250 		if (set_max) {
3251 			iter->max_protection = new_prot;
3252 			iter->protection &= new_prot;
3253 		} else
3254 			iter->protection = new_prot;
3255 
3256 		/*
3257 		 * update physical map if necessary.  worry about copy-on-write
3258 		 * here -- CHECK THIS XXX
3259 		 */
3260 		if (iter->protection != old_prot) {
3261 			mask = UVM_ET_ISCOPYONWRITE(iter) ?
3262 			    ~PROT_WRITE : PROT_MASK;
3263 
3264 			/* update pmap */
3265 			if ((iter->protection & mask) == PROT_NONE &&
3266 			    VM_MAPENT_ISWIRED(iter)) {
3267 				/*
3268 				 * TODO(ariane) this is stupid. wired_count
3269 				 * is 0 if not wired, otherwise anything
3270 				 * larger than 0 (incremented once each time
3271 				 * wire is called).
3272 				 * Mostly to be able to undo the damage on
3273 				 * failure. Not the actually be a wired
3274 				 * refcounter...
3275 				 * Originally: iter->wired_count--;
3276 				 * (don't we have to unwire this in the pmap
3277 				 * as well?)
3278 				 */
3279 				iter->wired_count = 0;
3280 			}
3281 			pmap_protect(map->pmap, iter->start, iter->end,
3282 			    iter->protection & mask);
3283 		}
3284 
3285 		/*
3286 		 * If the map is configured to lock any future mappings,
3287 		 * wire this entry now if the old protection was PROT_NONE
3288 		 * and the new protection is not PROT_NONE.
3289 		 */
3290 		if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
3291 		    VM_MAPENT_ISWIRED(iter) == 0 &&
3292 		    old_prot == PROT_NONE &&
3293 		    new_prot != PROT_NONE) {
3294 			if (uvm_map_pageable(map, iter->start, iter->end,
3295 			    FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) {
3296 				/*
3297 				 * If locking the entry fails, remember the
3298 				 * error if it's the first one.  Note we
3299 				 * still continue setting the protection in
3300 				 * the map, but it will return the resource
3301 				 * storage condition regardless.
3302 				 *
3303 				 * XXX Ignore what the actual error is,
3304 				 * XXX just call it a resource shortage
3305 				 * XXX so that it doesn't get confused
3306 				 * XXX what uvm_map_protect() itself would
3307 				 * XXX normally return.
3308 				 */
3309 				error = ENOMEM;
3310 			}
3311 		}
3312 	}
3313 	pmap_update(map->pmap);
3314 
3315 out:
3316 	vm_map_unlock(map);
3317 	return error;
3318 }
3319 
3320 /*
3321  * uvmspace_alloc: allocate a vmspace structure.
3322  *
3323  * - structure includes vm_map and pmap
3324  * - XXX: no locking on this structure
3325  * - refcnt set to 1, rest must be init'd by caller
3326  */
3327 struct vmspace *
3328 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
3329     boolean_t remove_holes)
3330 {
3331 	struct vmspace *vm;
3332 
3333 	vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
3334 	uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
3335 	return (vm);
3336 }
3337 
3338 /*
3339  * uvmspace_init: initialize a vmspace structure.
3340  *
3341  * - XXX: no locking on this structure
3342  * - refcnt set to 1, rest must be init'd by caller
3343  */
3344 void
3345 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
3346     boolean_t pageable, boolean_t remove_holes)
3347 {
3348 	KASSERT(pmap == NULL || pmap == pmap_kernel());
3349 
3350 	if (pmap)
3351 		pmap_reference(pmap);
3352 	else
3353 		pmap = pmap_create();
3354 	vm->vm_map.pmap = pmap;
3355 
3356 	uvm_map_setup(&vm->vm_map, min, max,
3357 	    (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE);
3358 
3359 	vm->vm_refcnt = 1;
3360 
3361 	if (remove_holes)
3362 		pmap_remove_holes(vm);
3363 }
3364 
3365 /*
3366  * uvmspace_share: share a vmspace between two processes
3367  *
3368  * - XXX: no locking on vmspace
3369  * - used for vfork
3370  */
3371 
3372 struct vmspace *
3373 uvmspace_share(struct process *pr)
3374 {
3375 	struct vmspace *vm = pr->ps_vmspace;
3376 
3377 	vm->vm_refcnt++;
3378 	return vm;
3379 }
3380 
3381 /*
3382  * uvmspace_exec: the process wants to exec a new program
3383  *
3384  * - XXX: no locking on vmspace
3385  */
3386 
3387 void
3388 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
3389 {
3390 	struct process *pr = p->p_p;
3391 	struct vmspace *nvm, *ovm = pr->ps_vmspace;
3392 	struct vm_map *map = &ovm->vm_map;
3393 	struct uvm_map_deadq dead_entries;
3394 
3395 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0);
3396 	KASSERT((end & (vaddr_t)PAGE_MASK) == 0 ||
3397 	    (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
3398 
3399 	pmap_unuse_final(p);   /* before stack addresses go away */
3400 	TAILQ_INIT(&dead_entries);
3401 
3402 	/* see if more than one process is using this vmspace...  */
3403 	if (ovm->vm_refcnt == 1) {
3404 		/*
3405 		 * If pr is the only process using its vmspace then
3406 		 * we can safely recycle that vmspace for the program
3407 		 * that is being exec'd.
3408 		 */
3409 
3410 #ifdef SYSVSHM
3411 		/*
3412 		 * SYSV SHM semantics require us to kill all segments on an exec
3413 		 */
3414 		if (ovm->vm_shm)
3415 			shmexit(ovm);
3416 #endif
3417 
3418 		/*
3419 		 * POSIX 1003.1b -- "lock future mappings" is revoked
3420 		 * when a process execs another program image.
3421 		 */
3422 		vm_map_lock(map);
3423 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
3424 
3425 		/*
3426 		 * now unmap the old program
3427 		 *
3428 		 * Instead of attempting to keep the map valid, we simply
3429 		 * nuke all entries and ask uvm_map_setup to reinitialize
3430 		 * the map to the new boundaries.
3431 		 *
3432 		 * uvm_unmap_remove will actually nuke all entries for us
3433 		 * (as in, not replace them with free-memory entries).
3434 		 */
3435 		uvm_unmap_remove(map, map->min_offset, map->max_offset,
3436 		    &dead_entries, TRUE, FALSE);
3437 
3438 		KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
3439 
3440 		/* Nuke statistics and boundaries. */
3441 		memset(&ovm->vm_startcopy, 0,
3442 		    (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy);
3443 
3444 
3445 		if (end & (vaddr_t)PAGE_MASK) {
3446 			end += 1;
3447 			if (end == 0) /* overflow */
3448 				end -= PAGE_SIZE;
3449 		}
3450 
3451 		/* Setup new boundaries and populate map with entries. */
3452 		map->min_offset = start;
3453 		map->max_offset = end;
3454 		uvm_map_setup_entries(map);
3455 		vm_map_unlock(map);
3456 
3457 		/* but keep MMU holes unavailable */
3458 		pmap_remove_holes(ovm);
3459 	} else {
3460 		/*
3461 		 * pr's vmspace is being shared, so we can't reuse
3462 		 * it for pr since it is still being used for others.
3463 		 * allocate a new vmspace for pr
3464 		 */
3465 		nvm = uvmspace_alloc(start, end,
3466 		    (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
3467 
3468 		/* install new vmspace and drop our ref to the old one. */
3469 		pmap_deactivate(p);
3470 		p->p_vmspace = pr->ps_vmspace = nvm;
3471 		pmap_activate(p);
3472 
3473 		uvmspace_free(ovm);
3474 	}
3475 
3476 	/* Release dead entries */
3477 	uvm_unmap_detach(&dead_entries, 0);
3478 }
3479 
3480 /*
3481  * uvmspace_free: free a vmspace data structure
3482  *
3483  * - XXX: no locking on vmspace
3484  */
3485 void
3486 uvmspace_free(struct vmspace *vm)
3487 {
3488 	if (--vm->vm_refcnt == 0) {
3489 		/*
3490 		 * lock the map, to wait out all other references to it.  delete
3491 		 * all of the mappings and pages they hold, then call the pmap
3492 		 * module to reclaim anything left.
3493 		 */
3494 #ifdef SYSVSHM
3495 		/* Get rid of any SYSV shared memory segments. */
3496 		if (vm->vm_shm != NULL)
3497 			shmexit(vm);
3498 #endif
3499 
3500 		uvm_map_teardown(&vm->vm_map);
3501 		pool_put(&uvm_vmspace_pool, vm);
3502 	}
3503 }
3504 
3505 /*
3506  * uvm_share: Map the address range [srcaddr, srcaddr + sz) in
3507  * srcmap to the address range [dstaddr, dstaddr + sz) in
3508  * dstmap.
3509  *
3510  * The whole address range in srcmap must be backed by an object
3511  * (no holes).
3512  *
3513  * If successful, the address ranges share memory and the destination
3514  * address range uses the protection flags in prot.
3515  *
3516  * This routine assumes that sz is a multiple of PAGE_SIZE and
3517  * that dstaddr and srcaddr are page-aligned.
3518  */
3519 int
3520 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot,
3521     struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz)
3522 {
3523 	int ret = 0;
3524 	vaddr_t unmap_end;
3525 	vaddr_t dstva;
3526 	vsize_t off, len, n = sz;
3527 	struct vm_map_entry *first = NULL, *last = NULL;
3528 	struct vm_map_entry *src_entry, *psrc_entry = NULL;
3529 	struct uvm_map_deadq dead;
3530 
3531 	if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr)
3532 		return EINVAL;
3533 
3534 	TAILQ_INIT(&dead);
3535 	vm_map_lock(dstmap);
3536 	vm_map_lock_read(srcmap);
3537 
3538 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) {
3539 		ret = ENOMEM;
3540 		goto exit_unlock;
3541 	}
3542 	if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) {
3543 		ret = EINVAL;
3544 		goto exit_unlock;
3545 	}
3546 
3547 	unmap_end = dstaddr;
3548 	for (; src_entry != NULL;
3549 	    psrc_entry = src_entry,
3550 	    src_entry = RBT_NEXT(uvm_map_addr, src_entry)) {
3551 		/* hole in address space, bail out */
3552 		if (psrc_entry != NULL && psrc_entry->end != src_entry->start)
3553 			break;
3554 		if (src_entry->start >= srcaddr + sz)
3555 			break;
3556 
3557 		if (UVM_ET_ISSUBMAP(src_entry))
3558 			panic("uvm_share: encountered a submap (illegal)");
3559 		if (!UVM_ET_ISCOPYONWRITE(src_entry) &&
3560 		    UVM_ET_ISNEEDSCOPY(src_entry))
3561 			panic("uvm_share: non-copy_on_write map entries "
3562 			    "marked needs_copy (illegal)");
3563 
3564 		dstva = dstaddr;
3565 		if (src_entry->start > srcaddr) {
3566 			dstva += src_entry->start - srcaddr;
3567 			off = 0;
3568 		} else
3569 			off = srcaddr - src_entry->start;
3570 
3571 		if (n < src_entry->end - src_entry->start)
3572 			len = n;
3573 		else
3574 			len = src_entry->end - src_entry->start;
3575 		n -= len;
3576 
3577 		if (uvm_mapent_share(dstmap, dstva, len, off, prot, prot,
3578 		    srcmap, src_entry, &dead) == NULL)
3579 			break;
3580 
3581 		unmap_end = dstva + len;
3582 		if (n == 0)
3583 			goto exit_unlock;
3584 	}
3585 
3586 	ret = EINVAL;
3587 	uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE);
3588 
3589 exit_unlock:
3590 	vm_map_unlock_read(srcmap);
3591 	vm_map_unlock(dstmap);
3592 	uvm_unmap_detach(&dead, 0);
3593 
3594 	return ret;
3595 }
3596 
3597 /*
3598  * Clone map entry into other map.
3599  *
3600  * Mapping will be placed at dstaddr, for the same length.
3601  * Space must be available.
3602  * Reference counters are incremented.
3603  */
3604 struct vm_map_entry *
3605 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3606     vsize_t off, vm_prot_t prot, vm_prot_t maxprot,
3607     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
3608     int mapent_flags, int amap_share_flags)
3609 {
3610 	struct vm_map_entry *new_entry, *first, *last;
3611 
3612 	KDASSERT(!UVM_ET_ISSUBMAP(old_entry));
3613 
3614 	/* Create new entry (linked in on creation). Fill in first, last. */
3615 	first = last = NULL;
3616 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) {
3617 		panic("uvmspace_fork: no space in map for "
3618 		    "entry in empty map");
3619 	}
3620 	new_entry = uvm_map_mkentry(dstmap, first, last,
3621 	    dstaddr, dstlen, mapent_flags, dead, NULL);
3622 	if (new_entry == NULL)
3623 		return NULL;
3624 	/* old_entry -> new_entry */
3625 	new_entry->object = old_entry->object;
3626 	new_entry->offset = old_entry->offset;
3627 	new_entry->aref = old_entry->aref;
3628 	new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
3629 	new_entry->protection = prot;
3630 	new_entry->max_protection = maxprot;
3631 	new_entry->inheritance = old_entry->inheritance;
3632 	new_entry->advice = old_entry->advice;
3633 
3634 	/* gain reference to object backing the map (can't be a submap). */
3635 	if (new_entry->aref.ar_amap) {
3636 		new_entry->aref.ar_pageoff += off >> PAGE_SHIFT;
3637 		amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3638 		    (new_entry->end - new_entry->start) >> PAGE_SHIFT,
3639 		    amap_share_flags);
3640 	}
3641 
3642 	if (UVM_ET_ISOBJ(new_entry) &&
3643 	    new_entry->object.uvm_obj->pgops->pgo_reference) {
3644 		new_entry->offset += off;
3645 		new_entry->object.uvm_obj->pgops->pgo_reference
3646 		    (new_entry->object.uvm_obj);
3647 	}
3648 
3649 	return new_entry;
3650 }
3651 
3652 struct vm_map_entry *
3653 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3654     vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map,
3655     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3656 {
3657 	/*
3658 	 * If old_entry refers to a copy-on-write region that has not yet been
3659 	 * written to (needs_copy flag is set), then we need to allocate a new
3660 	 * amap for old_entry.
3661 	 *
3662 	 * If we do not do this, and the process owning old_entry does a copy-on
3663 	 * write later, old_entry and new_entry will refer to different memory
3664 	 * regions, and the memory between the processes is no longer shared.
3665 	 *
3666 	 * [in other words, we need to clear needs_copy]
3667 	 */
3668 
3669 	if (UVM_ET_ISNEEDSCOPY(old_entry)) {
3670 		/* get our own amap, clears needs_copy */
3671 		amap_copy(old_map, old_entry, M_WAITOK, FALSE,
3672 		    0, 0);
3673 		/* XXXCDC: WAITOK??? */
3674 	}
3675 
3676 	return uvm_mapent_clone(dstmap, dstaddr, dstlen, off,
3677 	    prot, maxprot, old_entry, dead, 0, AMAP_SHARED);
3678 }
3679 
3680 /*
3681  * share the mapping: this means we want the old and
3682  * new entries to share amaps and backing objects.
3683  */
3684 struct vm_map_entry *
3685 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
3686     struct vm_map *old_map,
3687     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3688 {
3689 	struct vm_map_entry *new_entry;
3690 
3691 	new_entry = uvm_mapent_share(new_map, old_entry->start,
3692 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3693 	    old_entry->max_protection, old_map, old_entry, dead);
3694 
3695 	/*
3696 	 * pmap_copy the mappings: this routine is optional
3697 	 * but if it is there it will reduce the number of
3698 	 * page faults in the new proc.
3699 	 */
3700 	if (!UVM_ET_ISHOLE(new_entry))
3701 		pmap_copy(new_map->pmap, old_map->pmap, new_entry->start,
3702 		    (new_entry->end - new_entry->start), new_entry->start);
3703 
3704 	return (new_entry);
3705 }
3706 
3707 /*
3708  * copy-on-write the mapping (using mmap's
3709  * MAP_PRIVATE semantics)
3710  *
3711  * allocate new_entry, adjust reference counts.
3712  * (note that new references are read-only).
3713  */
3714 struct vm_map_entry *
3715 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map,
3716     struct vm_map *old_map,
3717     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3718 {
3719 	struct vm_map_entry	*new_entry;
3720 	boolean_t		 protect_child;
3721 
3722 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3723 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3724 	    old_entry->max_protection, old_entry, dead, 0, 0);
3725 
3726 	new_entry->etype |=
3727 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3728 
3729 	/*
3730 	 * the new entry will need an amap.  it will either
3731 	 * need to be copied from the old entry or created
3732 	 * from scratch (if the old entry does not have an
3733 	 * amap).  can we defer this process until later
3734 	 * (by setting "needs_copy") or do we need to copy
3735 	 * the amap now?
3736 	 *
3737 	 * we must copy the amap now if any of the following
3738 	 * conditions hold:
3739 	 * 1. the old entry has an amap and that amap is
3740 	 *    being shared.  this means that the old (parent)
3741 	 *    process is sharing the amap with another
3742 	 *    process.  if we do not clear needs_copy here
3743 	 *    we will end up in a situation where both the
3744 	 *    parent and child process are referring to the
3745 	 *    same amap with "needs_copy" set.  if the
3746 	 *    parent write-faults, the fault routine will
3747 	 *    clear "needs_copy" in the parent by allocating
3748 	 *    a new amap.   this is wrong because the
3749 	 *    parent is supposed to be sharing the old amap
3750 	 *    and the new amap will break that.
3751 	 *
3752 	 * 2. if the old entry has an amap and a non-zero
3753 	 *    wire count then we are going to have to call
3754 	 *    amap_cow_now to avoid page faults in the
3755 	 *    parent process.   since amap_cow_now requires
3756 	 *    "needs_copy" to be clear we might as well
3757 	 *    clear it here as well.
3758 	 *
3759 	 */
3760 	if (old_entry->aref.ar_amap != NULL &&
3761 	    ((amap_flags(old_entry->aref.ar_amap) &
3762 	    AMAP_SHARED) != 0 ||
3763 	    VM_MAPENT_ISWIRED(old_entry))) {
3764 		amap_copy(new_map, new_entry, M_WAITOK, FALSE,
3765 		    0, 0);
3766 		/* XXXCDC: M_WAITOK ... ok? */
3767 	}
3768 
3769 	/*
3770 	 * if the parent's entry is wired down, then the
3771 	 * parent process does not want page faults on
3772 	 * access to that memory.  this means that we
3773 	 * cannot do copy-on-write because we can't write
3774 	 * protect the old entry.   in this case we
3775 	 * resolve all copy-on-write faults now, using
3776 	 * amap_cow_now.   note that we have already
3777 	 * allocated any needed amap (above).
3778 	 */
3779 	if (VM_MAPENT_ISWIRED(old_entry)) {
3780 		/*
3781 		 * resolve all copy-on-write faults now
3782 		 * (note that there is nothing to do if
3783 		 * the old mapping does not have an amap).
3784 		 * XXX: is it worthwhile to bother with
3785 		 * pmap_copy in this case?
3786 		 */
3787 		if (old_entry->aref.ar_amap)
3788 			amap_cow_now(new_map, new_entry);
3789 	} else {
3790 		if (old_entry->aref.ar_amap) {
3791 			/*
3792 			 * setup mappings to trigger copy-on-write faults
3793 			 * we must write-protect the parent if it has
3794 			 * an amap and it is not already "needs_copy"...
3795 			 * if it is already "needs_copy" then the parent
3796 			 * has already been write-protected by a previous
3797 			 * fork operation.
3798 			 *
3799 			 * if we do not write-protect the parent, then
3800 			 * we must be sure to write-protect the child
3801 			 * after the pmap_copy() operation.
3802 			 *
3803 			 * XXX: pmap_copy should have some way of telling
3804 			 * us that it didn't do anything so we can avoid
3805 			 * calling pmap_protect needlessly.
3806 			 */
3807 			if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
3808 				if (old_entry->max_protection & PROT_WRITE) {
3809 					pmap_protect(old_map->pmap,
3810 					    old_entry->start,
3811 					    old_entry->end,
3812 					    old_entry->protection &
3813 					    ~PROT_WRITE);
3814 					pmap_update(old_map->pmap);
3815 				}
3816 				old_entry->etype |= UVM_ET_NEEDSCOPY;
3817 			}
3818 
3819 	  		/* parent must now be write-protected */
3820 	  		protect_child = FALSE;
3821 		} else {
3822 			/*
3823 			 * we only need to protect the child if the
3824 			 * parent has write access.
3825 			 */
3826 			if (old_entry->max_protection & PROT_WRITE)
3827 				protect_child = TRUE;
3828 			else
3829 				protect_child = FALSE;
3830 		}
3831 		/*
3832 		 * copy the mappings
3833 		 * XXX: need a way to tell if this does anything
3834 		 */
3835 		if (!UVM_ET_ISHOLE(new_entry))
3836 			pmap_copy(new_map->pmap, old_map->pmap,
3837 			    new_entry->start,
3838 			    (old_entry->end - old_entry->start),
3839 			    old_entry->start);
3840 
3841 		/* protect the child's mappings if necessary */
3842 		if (protect_child) {
3843 			pmap_protect(new_map->pmap, new_entry->start,
3844 			    new_entry->end,
3845 			    new_entry->protection &
3846 			    ~PROT_WRITE);
3847 		}
3848 	}
3849 
3850 	return (new_entry);
3851 }
3852 
3853 /*
3854  * zero the mapping: the new entry will be zero initialized
3855  */
3856 struct vm_map_entry *
3857 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map,
3858     struct vm_map *old_map,
3859     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3860 {
3861 	struct vm_map_entry *new_entry;
3862 
3863 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3864 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3865 	    old_entry->max_protection, old_entry, dead, 0, 0);
3866 
3867 	new_entry->etype |=
3868 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3869 
3870 	if (new_entry->aref.ar_amap) {
3871 		amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3872 		    atop(new_entry->end - new_entry->start), 0);
3873 		new_entry->aref.ar_amap = NULL;
3874 		new_entry->aref.ar_pageoff = 0;
3875 	}
3876 
3877 	if (UVM_ET_ISOBJ(new_entry)) {
3878 		if (new_entry->object.uvm_obj->pgops->pgo_detach)
3879 			new_entry->object.uvm_obj->pgops->pgo_detach(
3880 			    new_entry->object.uvm_obj);
3881 		new_entry->object.uvm_obj = NULL;
3882 		new_entry->etype &= ~UVM_ET_OBJ;
3883 	}
3884 
3885 	return (new_entry);
3886 }
3887 
3888 /*
3889  * uvmspace_fork: fork a process' main map
3890  *
3891  * => create a new vmspace for child process from parent.
3892  * => parent's map must not be locked.
3893  */
3894 struct vmspace *
3895 uvmspace_fork(struct process *pr)
3896 {
3897 	struct vmspace *vm1 = pr->ps_vmspace;
3898 	struct vmspace *vm2;
3899 	struct vm_map *old_map = &vm1->vm_map;
3900 	struct vm_map *new_map;
3901 	struct vm_map_entry *old_entry, *new_entry;
3902 	struct uvm_map_deadq dead;
3903 
3904 	vm_map_lock(old_map);
3905 
3906 	vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
3907 	    (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
3908 	memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
3909 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
3910 	vm2->vm_dused = 0; /* Statistic managed by us. */
3911 	new_map = &vm2->vm_map;
3912 	vm_map_lock(new_map);
3913 
3914 	/* go entry-by-entry */
3915 	TAILQ_INIT(&dead);
3916 	RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) {
3917 		if (old_entry->start == old_entry->end)
3918 			continue;
3919 
3920 		/* first, some sanity checks on the old entry */
3921 		if (UVM_ET_ISSUBMAP(old_entry)) {
3922 			panic("fork: encountered a submap during fork "
3923 			    "(illegal)");
3924 		}
3925 
3926 		if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
3927 		    UVM_ET_ISNEEDSCOPY(old_entry)) {
3928 			panic("fork: non-copy_on_write map entry marked "
3929 			    "needs_copy (illegal)");
3930 		}
3931 
3932 		/* Apply inheritance. */
3933 		switch (old_entry->inheritance) {
3934 		case MAP_INHERIT_SHARE:
3935 			new_entry = uvm_mapent_forkshared(vm2, new_map,
3936 			    old_map, old_entry, &dead);
3937 			break;
3938 		case MAP_INHERIT_COPY:
3939 			new_entry = uvm_mapent_forkcopy(vm2, new_map,
3940 			    old_map, old_entry, &dead);
3941 			break;
3942 		case MAP_INHERIT_ZERO:
3943 			new_entry = uvm_mapent_forkzero(vm2, new_map,
3944 			    old_map, old_entry, &dead);
3945 			break;
3946 		default:
3947 			continue;
3948 		}
3949 
3950 	 	/* Update process statistics. */
3951 		if (!UVM_ET_ISHOLE(new_entry))
3952 			new_map->size += new_entry->end - new_entry->start;
3953 		if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) {
3954 			vm2->vm_dused += uvmspace_dused(
3955 			    new_map, new_entry->start, new_entry->end);
3956 		}
3957 	}
3958 
3959 	vm_map_unlock(old_map);
3960 	vm_map_unlock(new_map);
3961 
3962 	/*
3963 	 * This can actually happen, if multiple entries described a
3964 	 * space in which an entry was inherited.
3965 	 */
3966 	uvm_unmap_detach(&dead, 0);
3967 
3968 #ifdef SYSVSHM
3969 	if (vm1->vm_shm)
3970 		shmfork(vm1, vm2);
3971 #endif
3972 
3973 	return vm2;
3974 }
3975 
3976 /*
3977  * uvm_map_hint: return the beginning of the best area suitable for
3978  * creating a new mapping with "prot" protection.
3979  */
3980 vaddr_t
3981 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr,
3982     vaddr_t maxaddr)
3983 {
3984 	vaddr_t addr;
3985 	vaddr_t spacing;
3986 
3987 #ifdef __i386__
3988 	/*
3989 	 * If executable skip first two pages, otherwise start
3990 	 * after data + heap region.
3991 	 */
3992 	if ((prot & PROT_EXEC) != 0 &&
3993 	    (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) {
3994 		addr = (PAGE_SIZE*2) +
3995 		    (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
3996 		return (round_page(addr));
3997 	}
3998 #endif
3999 
4000 #if defined (__LP64__)
4001 	spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4002 #else
4003 	spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4004 #endif
4005 
4006 	/*
4007 	 * Start malloc/mmap after the brk.
4008 	 */
4009 	addr = (vaddr_t)vm->vm_daddr + BRKSIZ;
4010 	addr = MAX(addr, minaddr);
4011 
4012 	if (addr < maxaddr) {
4013 		while (spacing > maxaddr - addr)
4014 			spacing >>= 1;
4015 	}
4016 	addr += arc4random() & spacing;
4017 	return (round_page(addr));
4018 }
4019 
4020 /*
4021  * uvm_map_submap: punch down part of a map into a submap
4022  *
4023  * => only the kernel_map is allowed to be submapped
4024  * => the purpose of submapping is to break up the locking granularity
4025  *	of a larger map
4026  * => the range specified must have been mapped previously with a uvm_map()
4027  *	call [with uobj==NULL] to create a blank map entry in the main map.
4028  *	[And it had better still be blank!]
4029  * => maps which contain submaps should never be copied or forked.
4030  * => to remove a submap, use uvm_unmap() on the main map
4031  *	and then uvm_map_deallocate() the submap.
4032  * => main map must be unlocked.
4033  * => submap must have been init'd and have a zero reference count.
4034  *	[need not be locked as we don't actually reference it]
4035  */
4036 int
4037 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
4038     struct vm_map *submap)
4039 {
4040 	struct vm_map_entry *entry;
4041 	int result;
4042 
4043 	if (start > map->max_offset || end > map->max_offset ||
4044 	    start < map->min_offset || end < map->min_offset)
4045 		return EINVAL;
4046 
4047 	vm_map_lock(map);
4048 
4049 	if (uvm_map_lookup_entry(map, start, &entry)) {
4050 		UVM_MAP_CLIP_START(map, entry, start);
4051 		UVM_MAP_CLIP_END(map, entry, end);
4052 	} else
4053 		entry = NULL;
4054 
4055 	if (entry != NULL &&
4056 	    entry->start == start && entry->end == end &&
4057 	    entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
4058 	    !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
4059 		entry->etype |= UVM_ET_SUBMAP;
4060 		entry->object.sub_map = submap;
4061 		entry->offset = 0;
4062 		uvm_map_reference(submap);
4063 		result = 0;
4064 	} else
4065 		result = EINVAL;
4066 
4067 	vm_map_unlock(map);
4068 	return(result);
4069 }
4070 
4071 /*
4072  * uvm_map_checkprot: check protection in map
4073  *
4074  * => must allow specific protection in a fully allocated region.
4075  * => map mut be read or write locked by caller.
4076  */
4077 boolean_t
4078 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
4079     vm_prot_t protection)
4080 {
4081 	struct vm_map_entry *entry;
4082 
4083 	if (start < map->min_offset || end > map->max_offset || start > end)
4084 		return FALSE;
4085 	if (start == end)
4086 		return TRUE;
4087 
4088 	/*
4089 	 * Iterate entries.
4090 	 */
4091 	for (entry = uvm_map_entrybyaddr(&map->addr, start);
4092 	    entry != NULL && entry->start < end;
4093 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4094 		/* Fail if a hole is found. */
4095 		if (UVM_ET_ISHOLE(entry) ||
4096 		    (entry->end < end && entry->end != VMMAP_FREE_END(entry)))
4097 			return FALSE;
4098 
4099 		/* Check protection. */
4100 		if ((entry->protection & protection) != protection)
4101 			return FALSE;
4102 	}
4103 	return TRUE;
4104 }
4105 
4106 /*
4107  * uvm_map_create: create map
4108  */
4109 vm_map_t
4110 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
4111 {
4112 	vm_map_t map;
4113 
4114 	map = malloc(sizeof *map, M_VMMAP, M_WAITOK);
4115 	map->pmap = pmap;
4116 	uvm_map_setup(map, min, max, flags);
4117 	return (map);
4118 }
4119 
4120 /*
4121  * uvm_map_deallocate: drop reference to a map
4122  *
4123  * => caller must not lock map
4124  * => we will zap map if ref count goes to zero
4125  */
4126 void
4127 uvm_map_deallocate(vm_map_t map)
4128 {
4129 	int c;
4130 	struct uvm_map_deadq dead;
4131 
4132 	c = --map->ref_count;
4133 	if (c > 0) {
4134 		return;
4135 	}
4136 
4137 	/*
4138 	 * all references gone.   unmap and free.
4139 	 *
4140 	 * No lock required: we are only one to access this map.
4141 	 */
4142 	TAILQ_INIT(&dead);
4143 	uvm_tree_sanity(map, __FILE__, __LINE__);
4144 	uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead,
4145 	    TRUE, FALSE);
4146 	pmap_destroy(map->pmap);
4147 	KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
4148 	free(map, M_VMMAP, sizeof *map);
4149 
4150 	uvm_unmap_detach(&dead, 0);
4151 }
4152 
4153 /*
4154  * uvm_map_inherit: set inheritance code for range of addrs in map.
4155  *
4156  * => map must be unlocked
4157  * => note that the inherit code is used during a "fork".  see fork
4158  *	code for details.
4159  */
4160 int
4161 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
4162     vm_inherit_t new_inheritance)
4163 {
4164 	struct vm_map_entry *entry;
4165 
4166 	switch (new_inheritance) {
4167 	case MAP_INHERIT_NONE:
4168 	case MAP_INHERIT_COPY:
4169 	case MAP_INHERIT_SHARE:
4170 	case MAP_INHERIT_ZERO:
4171 		break;
4172 	default:
4173 		return (EINVAL);
4174 	}
4175 
4176 	if (start > end)
4177 		return EINVAL;
4178 	start = MAX(start, map->min_offset);
4179 	end = MIN(end, map->max_offset);
4180 	if (start >= end)
4181 		return 0;
4182 
4183 	vm_map_lock(map);
4184 
4185 	entry = uvm_map_entrybyaddr(&map->addr, start);
4186 	if (entry->end > start)
4187 		UVM_MAP_CLIP_START(map, entry, start);
4188 	else
4189 		entry = RBT_NEXT(uvm_map_addr, entry);
4190 
4191 	while (entry != NULL && entry->start < end) {
4192 		UVM_MAP_CLIP_END(map, entry, end);
4193 		entry->inheritance = new_inheritance;
4194 		entry = RBT_NEXT(uvm_map_addr, entry);
4195 	}
4196 
4197 	vm_map_unlock(map);
4198 	return (0);
4199 }
4200 
4201 /*
4202  * uvm_map_advice: set advice code for range of addrs in map.
4203  *
4204  * => map must be unlocked
4205  */
4206 int
4207 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
4208 {
4209 	struct vm_map_entry *entry;
4210 
4211 	switch (new_advice) {
4212 	case MADV_NORMAL:
4213 	case MADV_RANDOM:
4214 	case MADV_SEQUENTIAL:
4215 		break;
4216 	default:
4217 		return (EINVAL);
4218 	}
4219 
4220 	if (start > end)
4221 		return EINVAL;
4222 	start = MAX(start, map->min_offset);
4223 	end = MIN(end, map->max_offset);
4224 	if (start >= end)
4225 		return 0;
4226 
4227 	vm_map_lock(map);
4228 
4229 	entry = uvm_map_entrybyaddr(&map->addr, start);
4230 	if (entry != NULL && entry->end > start)
4231 		UVM_MAP_CLIP_START(map, entry, start);
4232 	else if (entry!= NULL)
4233 		entry = RBT_NEXT(uvm_map_addr, entry);
4234 
4235 	/*
4236 	 * XXXJRT: disallow holes?
4237 	 */
4238 	while (entry != NULL && entry->start < end) {
4239 		UVM_MAP_CLIP_END(map, entry, end);
4240 		entry->advice = new_advice;
4241 		entry = RBT_NEXT(uvm_map_addr, entry);
4242 	}
4243 
4244 	vm_map_unlock(map);
4245 	return (0);
4246 }
4247 
4248 /*
4249  * uvm_map_extract: extract a mapping from a map and put it somewhere
4250  * in the kernel_map, setting protection to max_prot.
4251  *
4252  * => map should be unlocked (we will write lock it and kernel_map)
4253  * => returns 0 on success, error code otherwise
4254  * => start must be page aligned
4255  * => len must be page sized
4256  * => flags:
4257  *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
4258  * Mappings are QREF's.
4259  */
4260 int
4261 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
4262     vaddr_t *dstaddrp, int flags)
4263 {
4264 	struct uvm_map_deadq dead;
4265 	struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2;
4266 	vaddr_t dstaddr;
4267 	vaddr_t end;
4268 	vaddr_t cp_start;
4269 	vsize_t cp_len, cp_off;
4270 	int error;
4271 
4272 	TAILQ_INIT(&dead);
4273 	end = start + len;
4274 
4275 	/*
4276 	 * Sanity check on the parameters.
4277 	 * Also, since the mapping may not contain gaps, error out if the
4278 	 * mapped area is not in source map.
4279 	 */
4280 	if ((start & (vaddr_t)PAGE_MASK) != 0 ||
4281 	    (end & (vaddr_t)PAGE_MASK) != 0 || end < start)
4282 		return EINVAL;
4283 	if (start < srcmap->min_offset || end > srcmap->max_offset)
4284 		return EINVAL;
4285 
4286 	/* Initialize dead entries. Handle len == 0 case. */
4287 	if (len == 0)
4288 		return 0;
4289 
4290 	/* Acquire lock on srcmap. */
4291 	vm_map_lock(srcmap);
4292 
4293 	/* Lock srcmap, lookup first and last entry in <start,len>. */
4294 	first = uvm_map_entrybyaddr(&srcmap->addr, start);
4295 
4296 	/* Check that the range is contiguous. */
4297 	for (entry = first; entry != NULL && entry->end < end;
4298 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4299 		if (VMMAP_FREE_END(entry) != entry->end ||
4300 		    UVM_ET_ISHOLE(entry)) {
4301 			error = EINVAL;
4302 			goto fail;
4303 		}
4304 	}
4305 	if (entry == NULL || UVM_ET_ISHOLE(entry)) {
4306 		error = EINVAL;
4307 		goto fail;
4308 	}
4309 
4310 	/*
4311 	 * Handle need-copy flag.
4312 	 */
4313 	for (entry = first; entry != NULL && entry->start < end;
4314 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4315 		if (UVM_ET_ISNEEDSCOPY(entry))
4316 			amap_copy(srcmap, entry, M_NOWAIT,
4317 			    UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end);
4318 		if (UVM_ET_ISNEEDSCOPY(entry)) {
4319 			/*
4320 			 * amap_copy failure
4321 			 */
4322 			error = ENOMEM;
4323 			goto fail;
4324 		}
4325 	}
4326 
4327 	/* Lock destination map (kernel_map). */
4328 	vm_map_lock(kernel_map);
4329 
4330 	if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len,
4331 	    MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start),
4332 	    PROT_NONE, 0) != 0) {
4333 		error = ENOMEM;
4334 		goto fail2;
4335 	}
4336 	*dstaddrp = dstaddr;
4337 
4338 	/*
4339 	 * We now have srcmap and kernel_map locked.
4340 	 * dstaddr contains the destination offset in dstmap.
4341 	 */
4342 	/* step 1: start looping through map entries, performing extraction. */
4343 	for (entry = first; entry != NULL && entry->start < end;
4344 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4345 		KDASSERT(!UVM_ET_ISNEEDSCOPY(entry));
4346 		if (UVM_ET_ISHOLE(entry))
4347 			continue;
4348 
4349 		/* Calculate uvm_mapent_clone parameters. */
4350 		cp_start = entry->start;
4351 		if (cp_start < start) {
4352 			cp_off = start - cp_start;
4353 			cp_start = start;
4354 		} else
4355 			cp_off = 0;
4356 		cp_len = MIN(entry->end, end) - cp_start;
4357 
4358 		newentry = uvm_mapent_clone(kernel_map,
4359 		    cp_start - start + dstaddr, cp_len, cp_off,
4360 		    entry->protection, entry->max_protection,
4361 		    entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
4362 		if (newentry == NULL) {
4363 			error = ENOMEM;
4364 			goto fail2_unmap;
4365 		}
4366 		kernel_map->size += cp_len;
4367 		if (flags & UVM_EXTRACT_FIXPROT)
4368 			newentry->protection = newentry->max_protection;
4369 
4370 		/*
4371 		 * Step 2: perform pmap copy.
4372 		 * (Doing this in the loop saves one RB traversal.)
4373 		 */
4374 		pmap_copy(kernel_map->pmap, srcmap->pmap,
4375 		    cp_start - start + dstaddr, cp_len, cp_start);
4376 	}
4377 	pmap_update(kernel_map->pmap);
4378 
4379 	error = 0;
4380 
4381 	/* Unmap copied entries on failure. */
4382 fail2_unmap:
4383 	if (error) {
4384 		uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead,
4385 		    FALSE, TRUE);
4386 	}
4387 
4388 	/* Release maps, release dead entries. */
4389 fail2:
4390 	vm_map_unlock(kernel_map);
4391 
4392 fail:
4393 	vm_map_unlock(srcmap);
4394 
4395 	uvm_unmap_detach(&dead, 0);
4396 
4397 	return error;
4398 }
4399 
4400 /*
4401  * uvm_map_clean: clean out a map range
4402  *
4403  * => valid flags:
4404  *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
4405  *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
4406  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
4407  *   if (flags & PGO_FREE): any cached pages are freed after clean
4408  * => returns an error if any part of the specified range isn't mapped
4409  * => never a need to flush amap layer since the anonymous memory has
4410  *	no permanent home, but may deactivate pages there
4411  * => called from sys_msync() and sys_madvise()
4412  * => caller must not write-lock map (read OK).
4413  * => we may sleep while cleaning if SYNCIO [with map read-locked]
4414  */
4415 
4416 int
4417 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
4418 {
4419 	struct vm_map_entry *first, *entry;
4420 	struct vm_amap *amap;
4421 	struct vm_anon *anon;
4422 	struct vm_page *pg;
4423 	struct uvm_object *uobj;
4424 	vaddr_t cp_start, cp_end;
4425 	int refs;
4426 	int error;
4427 	boolean_t rv;
4428 
4429 	KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
4430 	    (PGO_FREE|PGO_DEACTIVATE));
4431 
4432 	if (start > end || start < map->min_offset || end > map->max_offset)
4433 		return EINVAL;
4434 
4435 	vm_map_lock_read(map);
4436 	first = uvm_map_entrybyaddr(&map->addr, start);
4437 
4438 	/* Make a first pass to check for holes. */
4439 	for (entry = first; entry != NULL && entry->start < end;
4440 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4441 		if (UVM_ET_ISSUBMAP(entry)) {
4442 			vm_map_unlock_read(map);
4443 			return EINVAL;
4444 		}
4445 		if (UVM_ET_ISSUBMAP(entry) ||
4446 		    UVM_ET_ISHOLE(entry) ||
4447 		    (entry->end < end &&
4448 		    VMMAP_FREE_END(entry) != entry->end)) {
4449 			vm_map_unlock_read(map);
4450 			return EFAULT;
4451 		}
4452 	}
4453 
4454 	error = 0;
4455 	for (entry = first; entry != NULL && entry->start < end;
4456 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4457 		amap = entry->aref.ar_amap;	/* top layer */
4458 		if (UVM_ET_ISOBJ(entry))
4459 			uobj = entry->object.uvm_obj;
4460 		else
4461 			uobj = NULL;
4462 
4463 		/*
4464 		 * No amap cleaning necessary if:
4465 		 *  - there's no amap
4466 		 *  - we're not deactivating or freeing pages.
4467 		 */
4468 		if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
4469 			goto flush_object;
4470 
4471 		cp_start = MAX(entry->start, start);
4472 		cp_end = MIN(entry->end, end);
4473 
4474 		for (; cp_start != cp_end; cp_start += PAGE_SIZE) {
4475 			anon = amap_lookup(&entry->aref,
4476 			    cp_start - entry->start);
4477 			if (anon == NULL)
4478 				continue;
4479 
4480 			pg = anon->an_page;
4481 			if (pg == NULL) {
4482 				continue;
4483 			}
4484 			KASSERT(pg->pg_flags & PQ_ANON);
4485 
4486 			switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
4487 			/*
4488 			 * XXX In these first 3 cases, we always just
4489 			 * XXX deactivate the page.  We may want to
4490 			 * XXX handle the different cases more
4491 			 * XXX specifically, in the future.
4492 			 */
4493 			case PGO_CLEANIT|PGO_FREE:
4494 			case PGO_CLEANIT|PGO_DEACTIVATE:
4495 			case PGO_DEACTIVATE:
4496 deactivate_it:
4497 				/* skip the page if it's wired */
4498 				if (pg->wire_count != 0)
4499 					break;
4500 
4501 				uvm_lock_pageq();
4502 
4503 				KASSERT(pg->uanon == anon);
4504 
4505 				/* zap all mappings for the page. */
4506 				pmap_page_protect(pg, PROT_NONE);
4507 
4508 				/* ...and deactivate the page. */
4509 				uvm_pagedeactivate(pg);
4510 
4511 				uvm_unlock_pageq();
4512 				break;
4513 			case PGO_FREE:
4514 				/*
4515 				 * If there are multiple references to
4516 				 * the amap, just deactivate the page.
4517 				 */
4518 				if (amap_refs(amap) > 1)
4519 					goto deactivate_it;
4520 
4521 				/* XXX skip the page if it's wired */
4522 				if (pg->wire_count != 0) {
4523 					break;
4524 				}
4525 				amap_unadd(&entry->aref,
4526 				    cp_start - entry->start);
4527 				refs = --anon->an_ref;
4528 				if (refs == 0)
4529 					uvm_anfree(anon);
4530 				break;
4531 			default:
4532 				panic("uvm_map_clean: weird flags");
4533 			}
4534 		}
4535 
4536 flush_object:
4537 		cp_start = MAX(entry->start, start);
4538 		cp_end = MIN(entry->end, end);
4539 
4540 		/*
4541 		 * flush pages if we've got a valid backing object.
4542 		 *
4543 		 * Don't PGO_FREE if we don't have write permission
4544 		 * and don't flush if this is a copy-on-write object
4545 		 * since we can't know our permissions on it.
4546 		 */
4547 		if (uobj != NULL &&
4548 		    ((flags & PGO_FREE) == 0 ||
4549 		     ((entry->max_protection & PROT_WRITE) != 0 &&
4550 		      (entry->etype & UVM_ET_COPYONWRITE) == 0))) {
4551 			rv = uobj->pgops->pgo_flush(uobj,
4552 			    cp_start - entry->start + entry->offset,
4553 			    cp_end - entry->start + entry->offset, flags);
4554 
4555 			if (rv == FALSE)
4556 				error = EFAULT;
4557 		}
4558 	}
4559 
4560 	vm_map_unlock_read(map);
4561 	return error;
4562 }
4563 
4564 /*
4565  * UVM_MAP_CLIP_END implementation
4566  */
4567 void
4568 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4569 {
4570 	struct vm_map_entry *tmp;
4571 
4572 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4573 	tmp = uvm_mapent_alloc(map, 0);
4574 
4575 	/* Invoke splitentry. */
4576 	uvm_map_splitentry(map, entry, tmp, addr);
4577 }
4578 
4579 /*
4580  * UVM_MAP_CLIP_START implementation
4581  *
4582  * Clippers are required to not change the pointers to the entry they are
4583  * clipping on.
4584  * Since uvm_map_splitentry turns the original entry into the lowest
4585  * entry (address wise) we do a swap between the new entry and the original
4586  * entry, prior to calling uvm_map_splitentry.
4587  */
4588 void
4589 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4590 {
4591 	struct vm_map_entry *tmp;
4592 	struct uvm_addr_state *free;
4593 
4594 	/* Unlink original. */
4595 	free = uvm_map_uaddr_e(map, entry);
4596 	uvm_mapent_free_remove(map, free, entry);
4597 	uvm_mapent_addr_remove(map, entry);
4598 
4599 	/* Copy entry. */
4600 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4601 	tmp = uvm_mapent_alloc(map, 0);
4602 	uvm_mapent_copy(entry, tmp);
4603 
4604 	/* Put new entry in place of original entry. */
4605 	uvm_mapent_addr_insert(map, tmp);
4606 	uvm_mapent_free_insert(map, free, tmp);
4607 
4608 	/* Invoke splitentry. */
4609 	uvm_map_splitentry(map, tmp, entry, addr);
4610 }
4611 
4612 /*
4613  * Boundary fixer.
4614  */
4615 static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t);
4616 static __inline vaddr_t
4617 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound)
4618 {
4619 	return (min < bound && max > bound) ? bound : max;
4620 }
4621 
4622 /*
4623  * Choose free list based on address at start of free space.
4624  *
4625  * The uvm_addr_state returned contains addr and is the first of:
4626  * - uaddr_exe
4627  * - uaddr_brk_stack
4628  * - uaddr_any
4629  */
4630 struct uvm_addr_state*
4631 uvm_map_uaddr(struct vm_map *map, vaddr_t addr)
4632 {
4633 	struct uvm_addr_state *uaddr;
4634 	int i;
4635 
4636 	/* Special case the first page, to prevent mmap from returning 0. */
4637 	if (addr < VMMAP_MIN_ADDR)
4638 		return NULL;
4639 
4640 	/* Upper bound for kernel maps at uvm_maxkaddr. */
4641 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
4642 		if (addr >= uvm_maxkaddr)
4643 			return NULL;
4644 	}
4645 
4646 	/* Is the address inside the exe-only map? */
4647 	if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr &&
4648 	    addr < map->uaddr_exe->uaddr_maxaddr)
4649 		return map->uaddr_exe;
4650 
4651 	/* Check if the space falls inside brk/stack area. */
4652 	if ((addr >= map->b_start && addr < map->b_end) ||
4653 	    (addr >= map->s_start && addr < map->s_end)) {
4654 		if (map->uaddr_brk_stack != NULL &&
4655 		    addr >= map->uaddr_brk_stack->uaddr_minaddr &&
4656 		    addr < map->uaddr_brk_stack->uaddr_maxaddr) {
4657 			return map->uaddr_brk_stack;
4658 		} else
4659 			return NULL;
4660 	}
4661 
4662 	/*
4663 	 * Check the other selectors.
4664 	 *
4665 	 * These selectors are only marked as the owner, if they have insert
4666 	 * functions.
4667 	 */
4668 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4669 		uaddr = map->uaddr_any[i];
4670 		if (uaddr == NULL)
4671 			continue;
4672 		if (uaddr->uaddr_functions->uaddr_free_insert == NULL)
4673 			continue;
4674 
4675 		if (addr >= uaddr->uaddr_minaddr &&
4676 		    addr < uaddr->uaddr_maxaddr)
4677 			return uaddr;
4678 	}
4679 
4680 	return NULL;
4681 }
4682 
4683 /*
4684  * Choose free list based on address at start of free space.
4685  *
4686  * The uvm_addr_state returned contains addr and is the first of:
4687  * - uaddr_exe
4688  * - uaddr_brk_stack
4689  * - uaddr_any
4690  */
4691 struct uvm_addr_state*
4692 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry)
4693 {
4694 	return uvm_map_uaddr(map, VMMAP_FREE_START(entry));
4695 }
4696 
4697 /*
4698  * Returns the first free-memory boundary that is crossed by [min-max].
4699  */
4700 vsize_t
4701 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max)
4702 {
4703 	struct uvm_addr_state	*uaddr;
4704 	int			 i;
4705 
4706 	/* Never return first page. */
4707 	max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR);
4708 
4709 	/* Treat the maxkaddr special, if the map is a kernel_map. */
4710 	if ((map->flags & VM_MAP_ISVMSPACE) == 0)
4711 		max = uvm_map_boundfix(min, max, uvm_maxkaddr);
4712 
4713 	/* Check for exe-only boundaries. */
4714 	if (map->uaddr_exe != NULL) {
4715 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr);
4716 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr);
4717 	}
4718 
4719 	/* Check for exe-only boundaries. */
4720 	if (map->uaddr_brk_stack != NULL) {
4721 		max = uvm_map_boundfix(min, max,
4722 		    map->uaddr_brk_stack->uaddr_minaddr);
4723 		max = uvm_map_boundfix(min, max,
4724 		    map->uaddr_brk_stack->uaddr_maxaddr);
4725 	}
4726 
4727 	/* Check other boundaries. */
4728 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4729 		uaddr = map->uaddr_any[i];
4730 		if (uaddr != NULL) {
4731 			max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr);
4732 			max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr);
4733 		}
4734 	}
4735 
4736 	/* Boundaries at stack and brk() area. */
4737 	max = uvm_map_boundfix(min, max, map->s_start);
4738 	max = uvm_map_boundfix(min, max, map->s_end);
4739 	max = uvm_map_boundfix(min, max, map->b_start);
4740 	max = uvm_map_boundfix(min, max, map->b_end);
4741 
4742 	return max;
4743 }
4744 
4745 /*
4746  * Update map allocation start and end addresses from proc vmspace.
4747  */
4748 void
4749 uvm_map_vmspace_update(struct vm_map *map,
4750     struct uvm_map_deadq *dead, int flags)
4751 {
4752 	struct vmspace *vm;
4753 	vaddr_t b_start, b_end, s_start, s_end;
4754 
4755 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
4756 	KASSERT(offsetof(struct vmspace, vm_map) == 0);
4757 
4758 	/*
4759 	 * Derive actual allocation boundaries from vmspace.
4760 	 */
4761 	vm = (struct vmspace *)map;
4762 	b_start = (vaddr_t)vm->vm_daddr;
4763 	b_end   = b_start + BRKSIZ;
4764 	s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4765 	s_end   = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4766 #ifdef DIAGNOSTIC
4767 	if ((b_start & (vaddr_t)PAGE_MASK) != 0 ||
4768 	    (b_end & (vaddr_t)PAGE_MASK) != 0 ||
4769 	    (s_start & (vaddr_t)PAGE_MASK) != 0 ||
4770 	    (s_end & (vaddr_t)PAGE_MASK) != 0) {
4771 		panic("uvm_map_vmspace_update: vmspace %p invalid bounds: "
4772 		    "b=0x%lx-0x%lx s=0x%lx-0x%lx",
4773 		    vm, b_start, b_end, s_start, s_end);
4774 	}
4775 #endif
4776 
4777 	if (__predict_true(map->b_start == b_start && map->b_end == b_end &&
4778 	    map->s_start == s_start && map->s_end == s_end))
4779 		return;
4780 
4781 	uvm_map_freelist_update(map, dead, b_start, b_end,
4782 	    s_start, s_end, flags);
4783 }
4784 
4785 /*
4786  * Grow kernel memory.
4787  *
4788  * This function is only called for kernel maps when an allocation fails.
4789  *
4790  * If the map has a gap that is large enough to accommodate alloc_sz, this
4791  * function will make sure map->free will include it.
4792  */
4793 void
4794 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead,
4795     vsize_t alloc_sz, int flags)
4796 {
4797 	vsize_t sz;
4798 	vaddr_t end;
4799 	struct vm_map_entry *entry;
4800 
4801 	/* Kernel memory only. */
4802 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0);
4803 	/* Destroy free list. */
4804 	uvm_map_freelist_update_clear(map, dead);
4805 
4806 	/* Include the guard page in the hard minimum requirement of alloc_sz. */
4807 	if (map->flags & VM_MAP_GUARDPAGES)
4808 		alloc_sz += PAGE_SIZE;
4809 
4810 	/*
4811 	 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA.
4812 	 *
4813 	 * Don't handle the case where the multiplication overflows:
4814 	 * if that happens, the allocation is probably too big anyway.
4815 	 */
4816 	sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA);
4817 
4818 	/*
4819 	 * Walk forward until a gap large enough for alloc_sz shows up.
4820 	 *
4821 	 * We assume the kernel map has no boundaries.
4822 	 * uvm_maxkaddr may be zero.
4823 	 */
4824 	end = MAX(uvm_maxkaddr, map->min_offset);
4825 	entry = uvm_map_entrybyaddr(&map->addr, end);
4826 	while (entry && entry->fspace < alloc_sz)
4827 		entry = RBT_NEXT(uvm_map_addr, entry);
4828 	if (entry) {
4829 		end = MAX(VMMAP_FREE_START(entry), end);
4830 		end += MIN(sz, map->max_offset - end);
4831 	} else
4832 		end = map->max_offset;
4833 
4834 	/* Reserve pmap entries. */
4835 #ifdef PMAP_GROWKERNEL
4836 	uvm_maxkaddr = pmap_growkernel(end);
4837 #else
4838 	uvm_maxkaddr = MAX(uvm_maxkaddr, end);
4839 #endif
4840 
4841 	/* Rebuild free list. */
4842 	uvm_map_freelist_update_refill(map, flags);
4843 }
4844 
4845 /*
4846  * Freelist update subfunction: unlink all entries from freelists.
4847  */
4848 void
4849 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead)
4850 {
4851 	struct uvm_addr_state *free;
4852 	struct vm_map_entry *entry, *prev, *next;
4853 
4854 	prev = NULL;
4855 	for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL;
4856 	    entry = next) {
4857 		next = RBT_NEXT(uvm_map_addr, entry);
4858 
4859 		free = uvm_map_uaddr_e(map, entry);
4860 		uvm_mapent_free_remove(map, free, entry);
4861 
4862 		if (prev != NULL && entry->start == entry->end) {
4863 			prev->fspace += VMMAP_FREE_END(entry) - entry->end;
4864 			uvm_mapent_addr_remove(map, entry);
4865 			DEAD_ENTRY_PUSH(dead, entry);
4866 		} else
4867 			prev = entry;
4868 	}
4869 }
4870 
4871 /*
4872  * Freelist update subfunction: refill the freelists with entries.
4873  */
4874 void
4875 uvm_map_freelist_update_refill(struct vm_map *map, int flags)
4876 {
4877 	struct vm_map_entry *entry;
4878 	vaddr_t min, max;
4879 
4880 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
4881 		min = VMMAP_FREE_START(entry);
4882 		max = VMMAP_FREE_END(entry);
4883 		entry->fspace = 0;
4884 
4885 		entry = uvm_map_fix_space(map, entry, min, max, flags);
4886 	}
4887 
4888 	uvm_tree_sanity(map, __FILE__, __LINE__);
4889 }
4890 
4891 /*
4892  * Change {a,b}_{start,end} allocation ranges and associated free lists.
4893  */
4894 void
4895 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead,
4896     vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags)
4897 {
4898 	KDASSERT(b_end >= b_start && s_end >= s_start);
4899 
4900 	/* Clear all free lists. */
4901 	uvm_map_freelist_update_clear(map, dead);
4902 
4903 	/* Apply new bounds. */
4904 	map->b_start = b_start;
4905 	map->b_end   = b_end;
4906 	map->s_start = s_start;
4907 	map->s_end   = s_end;
4908 
4909 	/* Refill free lists. */
4910 	uvm_map_freelist_update_refill(map, flags);
4911 }
4912 
4913 /*
4914  * Assign a uvm_addr_state to the specified pointer in vm_map.
4915  *
4916  * May sleep.
4917  */
4918 void
4919 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which,
4920     struct uvm_addr_state *newval)
4921 {
4922 	struct uvm_map_deadq dead;
4923 
4924 	/* Pointer which must be in this map. */
4925 	KASSERT(which != NULL);
4926 	KASSERT((void*)map <= (void*)(which) &&
4927 	    (void*)(which) < (void*)(map + 1));
4928 
4929 	vm_map_lock(map);
4930 	TAILQ_INIT(&dead);
4931 	uvm_map_freelist_update_clear(map, &dead);
4932 
4933 	uvm_addr_destroy(*which);
4934 	*which = newval;
4935 
4936 	uvm_map_freelist_update_refill(map, 0);
4937 	vm_map_unlock(map);
4938 	uvm_unmap_detach(&dead, 0);
4939 }
4940 
4941 /*
4942  * Correct space insert.
4943  *
4944  * Entry must not be on any freelist.
4945  */
4946 struct vm_map_entry*
4947 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry,
4948     vaddr_t min, vaddr_t max, int flags)
4949 {
4950 	struct uvm_addr_state	*free, *entfree;
4951 	vaddr_t			 lmax;
4952 
4953 	KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0);
4954 	KDASSERT(min <= max);
4955 	KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) ||
4956 	    min == map->min_offset);
4957 
4958 	/*
4959 	 * During the function, entfree will always point at the uaddr state
4960 	 * for entry.
4961 	 */
4962 	entfree = (entry == NULL ? NULL :
4963 	    uvm_map_uaddr_e(map, entry));
4964 
4965 	while (min != max) {
4966 		/* Claim guard page for entry. */
4967 		if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL &&
4968 		    VMMAP_FREE_END(entry) == entry->end &&
4969 		    entry->start != entry->end) {
4970 			if (max - min == 2 * PAGE_SIZE) {
4971 				/*
4972 				 * If the free-space gap is exactly 2 pages,
4973 				 * we make the guard 2 pages instead of 1.
4974 				 * Because in a guarded map, an area needs
4975 				 * at least 2 pages to allocate from:
4976 				 * one page for the allocation and one for
4977 				 * the guard.
4978 				 */
4979 				entry->guard = 2 * PAGE_SIZE;
4980 				min = max;
4981 			} else {
4982 				entry->guard = PAGE_SIZE;
4983 				min += PAGE_SIZE;
4984 			}
4985 			continue;
4986 		}
4987 
4988 		/*
4989 		 * Handle the case where entry has a 2-page guard, but the
4990 		 * space after entry is freed.
4991 		 */
4992 		if (entry != NULL && entry->fspace == 0 &&
4993 		    entry->guard > PAGE_SIZE) {
4994 			entry->guard = PAGE_SIZE;
4995 			min = VMMAP_FREE_START(entry);
4996 		}
4997 
4998 		lmax = uvm_map_boundary(map, min, max);
4999 		free = uvm_map_uaddr(map, min);
5000 
5001 		/*
5002 		 * Entries are merged if they point at the same uvm_free().
5003 		 * Exception to that rule: if min == uvm_maxkaddr, a new
5004 		 * entry is started regardless (otherwise the allocators
5005 		 * will get confused).
5006 		 */
5007 		if (entry != NULL && free == entfree &&
5008 		    !((map->flags & VM_MAP_ISVMSPACE) == 0 &&
5009 		    min == uvm_maxkaddr)) {
5010 			KDASSERT(VMMAP_FREE_END(entry) == min);
5011 			entry->fspace += lmax - min;
5012 		} else {
5013 			/*
5014 			 * Commit entry to free list: it'll not be added to
5015 			 * anymore.
5016 			 * We'll start a new entry and add to that entry
5017 			 * instead.
5018 			 */
5019 			if (entry != NULL)
5020 				uvm_mapent_free_insert(map, entfree, entry);
5021 
5022 			/* New entry for new uaddr. */
5023 			entry = uvm_mapent_alloc(map, flags);
5024 			KDASSERT(entry != NULL);
5025 			entry->end = entry->start = min;
5026 			entry->guard = 0;
5027 			entry->fspace = lmax - min;
5028 			entry->object.uvm_obj = NULL;
5029 			entry->offset = 0;
5030 			entry->etype = 0;
5031 			entry->protection = entry->max_protection = 0;
5032 			entry->inheritance = 0;
5033 			entry->wired_count = 0;
5034 			entry->advice = 0;
5035 			entry->aref.ar_pageoff = 0;
5036 			entry->aref.ar_amap = NULL;
5037 			uvm_mapent_addr_insert(map, entry);
5038 
5039 			entfree = free;
5040 		}
5041 
5042 		min = lmax;
5043 	}
5044 	/* Finally put entry on the uaddr state. */
5045 	if (entry != NULL)
5046 		uvm_mapent_free_insert(map, entfree, entry);
5047 
5048 	return entry;
5049 }
5050 
5051 /*
5052  * MQuery style of allocation.
5053  *
5054  * This allocator searches forward until sufficient space is found to map
5055  * the given size.
5056  *
5057  * XXX: factor in offset (via pmap_prefer) and protection?
5058  */
5059 int
5060 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset,
5061     int flags)
5062 {
5063 	struct vm_map_entry *entry, *last;
5064 	vaddr_t addr;
5065 	vaddr_t tmp, pmap_align, pmap_offset;
5066 	int error;
5067 
5068 	addr = *addr_p;
5069 	vm_map_lock_read(map);
5070 
5071 	/* Configure pmap prefer. */
5072 	if (offset != UVM_UNKNOWN_OFFSET) {
5073 		pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN());
5074 		pmap_offset = PMAP_PREFER_OFFSET(offset);
5075 	} else {
5076 		pmap_align = PAGE_SIZE;
5077 		pmap_offset = 0;
5078 	}
5079 
5080 	/* Align address to pmap_prefer unless FLAG_FIXED is set. */
5081 	if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) {
5082 	  	tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5083 		if (tmp < addr)
5084 			tmp += pmap_align;
5085 		addr = tmp;
5086 	}
5087 
5088 	/* First, check if the requested range is fully available. */
5089 	entry = uvm_map_entrybyaddr(&map->addr, addr);
5090 	last = NULL;
5091 	if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5092 		error = 0;
5093 		goto out;
5094 	}
5095 	if (flags & UVM_FLAG_FIXED) {
5096 		error = EINVAL;
5097 		goto out;
5098 	}
5099 
5100 	error = ENOMEM; /* Default error from here. */
5101 
5102 	/*
5103 	 * At this point, the memory at <addr, sz> is not available.
5104 	 * The reasons are:
5105 	 * [1] it's outside the map,
5106 	 * [2] it starts in used memory (and therefore needs to move
5107 	 *     toward the first free page in entry),
5108 	 * [3] it starts in free memory but bumps into used memory.
5109 	 *
5110 	 * Note that for case [2], the forward moving is handled by the
5111 	 * for loop below.
5112 	 */
5113 	if (entry == NULL) {
5114 		/* [1] Outside the map. */
5115 		if (addr >= map->max_offset)
5116 			goto out;
5117 		else
5118 			entry = RBT_MIN(uvm_map_addr, &map->addr);
5119 	} else if (VMMAP_FREE_START(entry) <= addr) {
5120 		/* [3] Bumped into used memory. */
5121 		entry = RBT_NEXT(uvm_map_addr, entry);
5122 	}
5123 
5124 	/* Test if the next entry is sufficient for the allocation. */
5125 	for (; entry != NULL;
5126 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
5127 		if (entry->fspace == 0)
5128 			continue;
5129 		addr = VMMAP_FREE_START(entry);
5130 
5131 restart:	/* Restart address checks on address change. */
5132 		tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5133 		if (tmp < addr)
5134 			tmp += pmap_align;
5135 		addr = tmp;
5136 		if (addr >= VMMAP_FREE_END(entry))
5137 			continue;
5138 
5139 		/* Skip brk() allocation addresses. */
5140 		if (addr + sz > map->b_start && addr < map->b_end) {
5141 			if (VMMAP_FREE_END(entry) > map->b_end) {
5142 				addr = map->b_end;
5143 				goto restart;
5144 			} else
5145 				continue;
5146 		}
5147 		/* Skip stack allocation addresses. */
5148 		if (addr + sz > map->s_start && addr < map->s_end) {
5149 			if (VMMAP_FREE_END(entry) > map->s_end) {
5150 				addr = map->s_end;
5151 				goto restart;
5152 			} else
5153 				continue;
5154 		}
5155 
5156 		last = NULL;
5157 		if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5158 			error = 0;
5159 			goto out;
5160 		}
5161 	}
5162 
5163 out:
5164 	vm_map_unlock_read(map);
5165 	if (error == 0)
5166 		*addr_p = addr;
5167 	return error;
5168 }
5169 
5170 /*
5171  * Determine allocation bias.
5172  *
5173  * Returns 1 if we should bias to high addresses, -1 for a bias towards low
5174  * addresses, or 0 for no bias.
5175  * The bias mechanism is intended to avoid clashing with brk() and stack
5176  * areas.
5177  */
5178 int
5179 uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry)
5180 {
5181 	vaddr_t start, end;
5182 
5183 	start = VMMAP_FREE_START(entry);
5184 	end = VMMAP_FREE_END(entry);
5185 
5186 	/* Stay at the top of brk() area. */
5187 	if (end >= map->b_start && start < map->b_end)
5188 		return 1;
5189 	/* Stay at the far end of the stack area. */
5190 	if (end >= map->s_start && start < map->s_end) {
5191 #ifdef MACHINE_STACK_GROWS_UP
5192 		return 1;
5193 #else
5194 		return -1;
5195 #endif
5196 	}
5197 
5198 	/* No bias, this area is meant for us. */
5199 	return 0;
5200 }
5201 
5202 
5203 boolean_t
5204 vm_map_lock_try_ln(struct vm_map *map, char *file, int line)
5205 {
5206 	boolean_t rv;
5207 
5208 	if (map->flags & VM_MAP_INTRSAFE) {
5209 		rv = _mtx_enter_try(&map->mtx LOCK_FL_ARGS);
5210 	} else {
5211 		mtx_enter(&map->flags_lock);
5212 		if (map->flags & VM_MAP_BUSY) {
5213 			mtx_leave(&map->flags_lock);
5214 			return (FALSE);
5215 		}
5216 		mtx_leave(&map->flags_lock);
5217 		rv = (_rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP LOCK_FL_ARGS)
5218 		    == 0);
5219 		/* check if the lock is busy and back out if we won the race */
5220 		if (rv) {
5221 			mtx_enter(&map->flags_lock);
5222 			if (map->flags & VM_MAP_BUSY) {
5223 				_rw_exit(&map->lock LOCK_FL_ARGS);
5224 				rv = FALSE;
5225 			}
5226 			mtx_leave(&map->flags_lock);
5227 		}
5228 	}
5229 
5230 	if (rv) {
5231 		map->timestamp++;
5232 		LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5233 		uvm_tree_sanity(map, file, line);
5234 		uvm_tree_size_chk(map, file, line);
5235 	}
5236 
5237 	return (rv);
5238 }
5239 
5240 void
5241 vm_map_lock_ln(struct vm_map *map, char *file, int line)
5242 {
5243 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5244 		do {
5245 			mtx_enter(&map->flags_lock);
5246 tryagain:
5247 			while (map->flags & VM_MAP_BUSY) {
5248 				map->flags |= VM_MAP_WANTLOCK;
5249 				msleep(&map->flags, &map->flags_lock,
5250 				    PVM, vmmapbsy, 0);
5251 			}
5252 			mtx_leave(&map->flags_lock);
5253 		} while (_rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL
5254 		    LOCK_FL_ARGS) != 0);
5255 		/* check if the lock is busy and back out if we won the race */
5256 		mtx_enter(&map->flags_lock);
5257 		if (map->flags & VM_MAP_BUSY) {
5258 			_rw_exit(&map->lock LOCK_FL_ARGS);
5259 			goto tryagain;
5260 		}
5261 		mtx_leave(&map->flags_lock);
5262 	} else {
5263 		_mtx_enter(&map->mtx LOCK_FL_ARGS);
5264 	}
5265 
5266 	map->timestamp++;
5267 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5268 	uvm_tree_sanity(map, file, line);
5269 	uvm_tree_size_chk(map, file, line);
5270 }
5271 
5272 void
5273 vm_map_lock_read_ln(struct vm_map *map, char *file, int line)
5274 {
5275 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5276 		_rw_enter_read(&map->lock LOCK_FL_ARGS);
5277 	else
5278 		_mtx_enter(&map->mtx LOCK_FL_ARGS);
5279 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5280 	uvm_tree_sanity(map, file, line);
5281 	uvm_tree_size_chk(map, file, line);
5282 }
5283 
5284 void
5285 vm_map_unlock_ln(struct vm_map *map, char *file, int line)
5286 {
5287 	uvm_tree_sanity(map, file, line);
5288 	uvm_tree_size_chk(map, file, line);
5289 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5290 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5291 		_rw_exit(&map->lock LOCK_FL_ARGS);
5292 	else
5293 		_mtx_leave(&map->mtx LOCK_FL_ARGS);
5294 }
5295 
5296 void
5297 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line)
5298 {
5299 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
5300 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
5301 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5302 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5303 		_rw_exit_read(&map->lock LOCK_FL_ARGS);
5304 	else
5305 		_mtx_leave(&map->mtx LOCK_FL_ARGS);
5306 }
5307 
5308 void
5309 vm_map_downgrade_ln(struct vm_map *map, char *file, int line)
5310 {
5311 	uvm_tree_sanity(map, file, line);
5312 	uvm_tree_size_chk(map, file, line);
5313 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5314 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5315 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5316 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5317 		_rw_enter(&map->lock, RW_DOWNGRADE LOCK_FL_ARGS);
5318 }
5319 
5320 void
5321 vm_map_upgrade_ln(struct vm_map *map, char *file, int line)
5322 {
5323 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
5324 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
5325 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5326 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5327 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5328 		_rw_exit_read(&map->lock LOCK_FL_ARGS);
5329 		_rw_enter_write(&map->lock LOCK_FL_ARGS);
5330 	}
5331 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5332 	uvm_tree_sanity(map, file, line);
5333 }
5334 
5335 void
5336 vm_map_busy_ln(struct vm_map *map, char *file, int line)
5337 {
5338 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5339 	mtx_enter(&map->flags_lock);
5340 	map->flags |= VM_MAP_BUSY;
5341 	mtx_leave(&map->flags_lock);
5342 }
5343 
5344 void
5345 vm_map_unbusy_ln(struct vm_map *map, char *file, int line)
5346 {
5347 	int oflags;
5348 
5349 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5350 	mtx_enter(&map->flags_lock);
5351 	oflags = map->flags;
5352 	map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK);
5353 	mtx_leave(&map->flags_lock);
5354 	if (oflags & VM_MAP_WANTLOCK)
5355 		wakeup(&map->flags);
5356 }
5357 
5358 #ifndef SMALL_KERNEL
5359 int
5360 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve,
5361     size_t *lenp)
5362 {
5363 	struct vm_map_entry *entry;
5364 	vaddr_t start;
5365 	int cnt, maxcnt, error = 0;
5366 
5367 	KASSERT(*lenp > 0);
5368 	KASSERT((*lenp % sizeof(*kve)) == 0);
5369 	cnt = 0;
5370 	maxcnt = *lenp / sizeof(*kve);
5371 	KASSERT(maxcnt > 0);
5372 
5373 	/*
5374 	 * Return only entries whose address is above the given base
5375 	 * address.  This allows userland to iterate without knowing the
5376 	 * number of entries beforehand.
5377 	 */
5378 	start = (vaddr_t)kve[0].kve_start;
5379 
5380 	vm_map_lock(map);
5381 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
5382 		if (cnt == maxcnt) {
5383 			error = ENOMEM;
5384 			break;
5385 		}
5386 		if (start != 0 && entry->start < start)
5387 			continue;
5388 		kve->kve_start = entry->start;
5389 		kve->kve_end = entry->end;
5390 		kve->kve_guard = entry->guard;
5391 		kve->kve_fspace = entry->fspace;
5392 		kve->kve_fspace_augment = entry->fspace_augment;
5393 		kve->kve_offset = entry->offset;
5394 		kve->kve_wired_count = entry->wired_count;
5395 		kve->kve_etype = entry->etype;
5396 		kve->kve_protection = entry->protection;
5397 		kve->kve_max_protection = entry->max_protection;
5398 		kve->kve_advice = entry->advice;
5399 		kve->kve_inheritance = entry->inheritance;
5400 		kve->kve_flags = entry->flags;
5401 		kve++;
5402 		cnt++;
5403 	}
5404 	vm_map_unlock(map);
5405 
5406 	KASSERT(cnt <= maxcnt);
5407 
5408 	*lenp = sizeof(*kve) * cnt;
5409 	return error;
5410 }
5411 #endif
5412 
5413 
5414 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry,
5415     uvm_mapentry_addrcmp, uvm_map_addr_augment);
5416 
5417 
5418 /*
5419  * MD code: vmspace allocator setup.
5420  */
5421 
5422 #ifdef __i386__
5423 void
5424 uvm_map_setup_md(struct vm_map *map)
5425 {
5426 	vaddr_t		min, max;
5427 
5428 	min = map->min_offset;
5429 	max = map->max_offset;
5430 
5431 	/*
5432 	 * Ensure the selectors will not try to manage page 0;
5433 	 * it's too special.
5434 	 */
5435 	if (min < VMMAP_MIN_ADDR)
5436 		min = VMMAP_MIN_ADDR;
5437 
5438 #if 0	/* Cool stuff, not yet */
5439 	/* Executable code is special. */
5440 	map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR);
5441 	/* Place normal allocations beyond executable mappings. */
5442 	map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max);
5443 #else	/* Crappy stuff, for now */
5444 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5445 #endif
5446 
5447 #ifndef SMALL_KERNEL
5448 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5449 #endif /* !SMALL_KERNEL */
5450 }
5451 #elif __LP64__
5452 void
5453 uvm_map_setup_md(struct vm_map *map)
5454 {
5455 	vaddr_t		min, max;
5456 
5457 	min = map->min_offset;
5458 	max = map->max_offset;
5459 
5460 	/*
5461 	 * Ensure the selectors will not try to manage page 0;
5462 	 * it's too special.
5463 	 */
5464 	if (min < VMMAP_MIN_ADDR)
5465 		min = VMMAP_MIN_ADDR;
5466 
5467 #if 0	/* Cool stuff, not yet */
5468 	map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max);
5469 #else	/* Crappy stuff, for now */
5470 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5471 #endif
5472 
5473 #ifndef SMALL_KERNEL
5474 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5475 #endif /* !SMALL_KERNEL */
5476 }
5477 #else	/* non-i386, 32 bit */
5478 void
5479 uvm_map_setup_md(struct vm_map *map)
5480 {
5481 	vaddr_t		min, max;
5482 
5483 	min = map->min_offset;
5484 	max = map->max_offset;
5485 
5486 	/*
5487 	 * Ensure the selectors will not try to manage page 0;
5488 	 * it's too special.
5489 	 */
5490 	if (min < VMMAP_MIN_ADDR)
5491 		min = VMMAP_MIN_ADDR;
5492 
5493 #if 0	/* Cool stuff, not yet */
5494 	map->uaddr_any[3] = uaddr_pivot_create(min, max);
5495 #else	/* Crappy stuff, for now */
5496 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5497 #endif
5498 
5499 #ifndef SMALL_KERNEL
5500 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5501 #endif /* !SMALL_KERNEL */
5502 }
5503 #endif
5504