xref: /openbsd-src/sys/uvm/uvm_map.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*	$OpenBSD: uvm_map.c,v 1.157 2012/06/14 15:54:36 ariane Exp $	*/
2 /*	$NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $	*/
3 
4 /*
5  * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  *
19  *
20  * Copyright (c) 1997 Charles D. Cranor and Washington University.
21  * Copyright (c) 1991, 1993, The Regents of the University of California.
22  *
23  * All rights reserved.
24  *
25  * This code is derived from software contributed to Berkeley by
26  * The Mach Operating System project at Carnegie-Mellon University.
27  *
28  * Redistribution and use in source and binary forms, with or without
29  * modification, are permitted provided that the following conditions
30  * are met:
31  * 1. Redistributions of source code must retain the above copyright
32  *    notice, this list of conditions and the following disclaimer.
33  * 2. Redistributions in binary form must reproduce the above copyright
34  *    notice, this list of conditions and the following disclaimer in the
35  *    documentation and/or other materials provided with the distribution.
36  * 3. All advertising materials mentioning features or use of this software
37  *    must display the following acknowledgement:
38  *	This product includes software developed by Charles D. Cranor,
39  *      Washington University, the University of California, Berkeley and
40  *      its contributors.
41  * 4. Neither the name of the University nor the names of its contributors
42  *    may be used to endorse or promote products derived from this software
43  *    without specific prior written permission.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55  * SUCH DAMAGE.
56  *
57  *	@(#)vm_map.c    8.3 (Berkeley) 1/12/94
58  * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
59  *
60  *
61  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
62  * All rights reserved.
63  *
64  * Permission to use, copy, modify and distribute this software and
65  * its documentation is hereby granted, provided that both the copyright
66  * notice and this permission notice appear in all copies of the
67  * software, derivative works or modified versions, and any portions
68  * thereof, and that both notices appear in supporting documentation.
69  *
70  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
71  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
72  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
73  *
74  * Carnegie Mellon requests users of this software to return to
75  *
76  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
77  *  School of Computer Science
78  *  Carnegie Mellon University
79  *  Pittsburgh PA 15213-3890
80  *
81  * any improvements or extensions that they make and grant Carnegie the
82  * rights to redistribute these changes.
83  */
84 
85 /*
86  * uvm_map.c: uvm map operations
87  */
88 
89 /* #define DEBUG */
90 /* #define VMMAP_DEBUG */
91 
92 #include <sys/param.h>
93 #include <sys/systm.h>
94 #include <sys/mman.h>
95 #include <sys/proc.h>
96 #include <sys/malloc.h>
97 #include <sys/pool.h>
98 #include <sys/kernel.h>
99 
100 #include <dev/rndvar.h>
101 
102 #ifdef SYSVSHM
103 #include <sys/shm.h>
104 #endif
105 
106 #include <uvm/uvm.h>
107 
108 #ifdef DDB
109 #include <uvm/uvm_ddb.h>
110 #endif
111 
112 #include <uvm/uvm_addr.h>
113 
114 
115 vsize_t			 uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t);
116 int			 uvm_mapent_isjoinable(struct vm_map*,
117 			    struct vm_map_entry*, struct vm_map_entry*);
118 struct vm_map_entry	*uvm_mapent_merge(struct vm_map*, struct vm_map_entry*,
119 			    struct vm_map_entry*, struct uvm_map_deadq*);
120 struct vm_map_entry	*uvm_mapent_tryjoin(struct vm_map*,
121 			    struct vm_map_entry*, struct uvm_map_deadq*);
122 struct vm_map_entry	*uvm_map_mkentry(struct vm_map*, struct vm_map_entry*,
123 			    struct vm_map_entry*, vaddr_t, vsize_t, int,
124 			    struct uvm_map_deadq*);
125 struct vm_map_entry	*uvm_mapent_alloc(struct vm_map*, int);
126 void			 uvm_mapent_free(struct vm_map_entry*);
127 void			 uvm_unmap_kill_entry(struct vm_map*,
128 			    struct vm_map_entry*);
129 void			 uvm_mapent_mkfree(struct vm_map*,
130 			    struct vm_map_entry*, struct vm_map_entry**,
131 			    struct uvm_map_deadq*, boolean_t);
132 void			 uvm_map_pageable_pgon(struct vm_map*,
133 			    struct vm_map_entry*, struct vm_map_entry*,
134 			    vaddr_t, vaddr_t);
135 int			 uvm_map_pageable_wire(struct vm_map*,
136 			    struct vm_map_entry*, struct vm_map_entry*,
137 			    vaddr_t, vaddr_t, int);
138 void			 uvm_map_setup_entries(struct vm_map*);
139 void			 uvm_map_setup_md(struct vm_map*);
140 void			 uvm_map_teardown(struct vm_map*);
141 void			 uvm_map_vmspace_update(struct vm_map*,
142 			    struct uvm_map_deadq*, int);
143 void			 uvm_map_kmem_grow(struct vm_map*,
144 			    struct uvm_map_deadq*, vsize_t, int);
145 void			 uvm_map_freelist_update_clear(struct vm_map*,
146 			    struct uvm_map_deadq*);
147 void			 uvm_map_freelist_update_refill(struct vm_map *, int);
148 void			 uvm_map_freelist_update(struct vm_map*,
149 			    struct uvm_map_deadq*, vaddr_t, vaddr_t,
150 			    vaddr_t, vaddr_t, int);
151 struct vm_map_entry	*uvm_map_fix_space(struct vm_map*, struct vm_map_entry*,
152 			    vaddr_t, vaddr_t, int);
153 int			 uvm_map_sel_limits(vaddr_t*, vaddr_t*, vsize_t, int,
154 			    struct vm_map_entry*, vaddr_t, vaddr_t, vaddr_t,
155 			    int);
156 int			 uvm_map_findspace(struct vm_map*,
157 			    struct vm_map_entry**, struct vm_map_entry**,
158 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
159 			    vaddr_t);
160 vsize_t			 uvm_map_addr_augment_get(struct vm_map_entry*);
161 void			 uvm_map_addr_augment(struct vm_map_entry*);
162 
163 /*
164  * Tree management functions.
165  */
166 
167 static __inline void	 uvm_mapent_copy(struct vm_map_entry*,
168 			    struct vm_map_entry*);
169 static int		 uvm_mapentry_addrcmp(struct vm_map_entry*,
170 			    struct vm_map_entry*);
171 static int		 uvm_mapentry_freecmp(struct vm_map_entry*,
172 			    struct vm_map_entry*);
173 void			 uvm_mapent_free_insert(struct vm_map*,
174 			    struct uvm_addr_state*, struct vm_map_entry*);
175 void			 uvm_mapent_free_remove(struct vm_map*,
176 			    struct uvm_addr_state*, struct vm_map_entry*);
177 void			 uvm_mapent_addr_insert(struct vm_map*,
178 			    struct vm_map_entry*);
179 void			 uvm_mapent_addr_remove(struct vm_map*,
180 			    struct vm_map_entry*);
181 void			 uvm_map_splitentry(struct vm_map*,
182 			    struct vm_map_entry*, struct vm_map_entry*,
183 			    vaddr_t);
184 vsize_t			 uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t);
185 int			 uvm_mapent_bias(struct vm_map*, struct vm_map_entry*);
186 
187 /*
188  * uvm_vmspace_fork helper functions.
189  */
190 struct vm_map_entry	*uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
191 			    vsize_t, struct vm_map_entry*,
192 			    struct uvm_map_deadq*, int, int);
193 void			 uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
194 			    struct vm_map*, struct vm_map_entry*,
195 			    struct uvm_map_deadq*);
196 void			 uvm_mapent_forkcopy(struct vmspace*, struct vm_map*,
197 			    struct vm_map*, struct vm_map_entry*,
198 			    struct uvm_map_deadq*);
199 
200 /*
201  * Tree validation.
202  */
203 
204 #ifdef VMMAP_DEBUG
205 void			 uvm_tree_assert(struct vm_map*, int, char*,
206 			    char*, int);
207 #define UVM_ASSERT(map, cond, file, line)				\
208 	uvm_tree_assert((map), (cond), #cond, (file), (line))
209 void			 uvm_tree_sanity(struct vm_map*, char*, int);
210 void			 uvm_tree_size_chk(struct vm_map*, char*, int);
211 void			 vmspace_validate(struct vm_map*);
212 #else
213 #define uvm_tree_sanity(_map, _file, _line)		do {} while (0)
214 #define uvm_tree_size_chk(_map, _file, _line)		do {} while (0)
215 #define vmspace_validate(_map)				do {} while (0)
216 #endif
217 
218 /*
219  * All architectures will have pmap_prefer.
220  */
221 #ifndef PMAP_PREFER
222 #define PMAP_PREFER_ALIGN()	(vaddr_t)PAGE_SIZE
223 #define PMAP_PREFER_OFFSET(off)	0
224 #define PMAP_PREFER(addr, off)	(addr)
225 #endif
226 
227 
228 /*
229  * The kernel map will initially be VM_MAP_KSIZE_INIT bytes.
230  * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes.
231  *
232  * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size
233  * each time.
234  */
235 #define VM_MAP_KSIZE_INIT	(512 * (vaddr_t)PAGE_SIZE)
236 #define VM_MAP_KSIZE_DELTA	(256 * (vaddr_t)PAGE_SIZE)
237 #define VM_MAP_KSIZE_ALLOCMUL	4
238 /*
239  * When selecting a random free-space block, look at most FSPACE_DELTA blocks
240  * ahead.
241  */
242 #define FSPACE_DELTA		8
243 /*
244  * Put allocations adjecent to previous allocations when the free-space tree
245  * is larger than FSPACE_COMPACT entries.
246  *
247  * Alignment and PMAP_PREFER may still cause the entry to not be fully
248  * adjecent. Note that this strategy reduces memory fragmentation (by leaving
249  * a large space before or after the allocation).
250  */
251 #define FSPACE_COMPACT		128
252 /*
253  * Make the address selection skip at most this many bytes from the start of
254  * the free space in which the allocation takes place.
255  *
256  * The main idea behind a randomized address space is that an attacker cannot
257  * know where to target his attack. Therefore, the location of objects must be
258  * as random as possible. However, the goal is not to create the most sparse
259  * map that is possible.
260  * FSPACE_MAXOFF pushes the considered range in bytes down to less insane
261  * sizes, thereby reducing the sparseness. The biggest randomization comes
262  * from fragmentation, i.e. FSPACE_COMPACT.
263  */
264 #define FSPACE_MAXOFF		((vaddr_t)32 * 1024 * 1024)
265 /*
266  * Allow for small gaps in the overflow areas.
267  * Gap size is in bytes and does not have to be a multiple of page-size.
268  */
269 #define FSPACE_BIASGAP		((vaddr_t)32 * 1024)
270 
271 /* auto-allocate address lower bound */
272 #define VMMAP_MIN_ADDR		PAGE_SIZE
273 
274 
275 #ifdef DEADBEEF0
276 #define UVMMAP_DEADBEEF		((void*)DEADBEEF0)
277 #else
278 #define UVMMAP_DEADBEEF		((void*)0xdeadd0d0)
279 #endif
280 
281 #ifdef DEBUG
282 int uvm_map_printlocks = 0;
283 
284 #define LPRINTF(_args)							\
285 	do {								\
286 		if (uvm_map_printlocks)					\
287 			printf _args;					\
288 	} while (0)
289 #else
290 #define LPRINTF(_args)	do {} while (0)
291 #endif
292 
293 static struct timeval uvm_kmapent_last_warn_time;
294 static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
295 
296 const char vmmapbsy[] = "vmmapbsy";
297 
298 /*
299  * pool for vmspace structures.
300  */
301 struct pool uvm_vmspace_pool;
302 
303 /*
304  * pool for dynamically-allocated map entries.
305  */
306 struct pool uvm_map_entry_pool;
307 struct pool uvm_map_entry_kmem_pool;
308 
309 /*
310  * This global represents the end of the kernel virtual address
311  * space. If we want to exceed this, we must grow the kernel
312  * virtual address space dynamically.
313  *
314  * Note, this variable is locked by kernel_map's lock.
315  */
316 vaddr_t uvm_maxkaddr;
317 
318 /*
319  * Locking predicate.
320  */
321 #define UVM_MAP_REQ_WRITE(_map)						\
322 	do {								\
323 		if (((_map)->flags & VM_MAP_INTRSAFE) == 0)		\
324 			rw_assert_wrlock(&(_map)->lock);		\
325 	} while (0)
326 
327 /*
328  * Tree describing entries by address.
329  *
330  * Addresses are unique.
331  * Entries with start == end may only exist if they are the first entry
332  * (sorted by address) within a free-memory tree.
333  */
334 
335 static __inline int
336 uvm_mapentry_addrcmp(struct vm_map_entry *e1, struct vm_map_entry *e2)
337 {
338 	return e1->start < e2->start ? -1 : e1->start > e2->start;
339 }
340 
341 /*
342  * Tree describing free memory.
343  *
344  * Free memory is indexed (so we can use array semantics in O(log N).
345  * Free memory is ordered by size (so we can reduce fragmentation).
346  *
347  * The address range in the tree can be limited, having part of the
348  * free memory not in the free-memory tree. Only free memory in the
349  * tree will be considered during 'any address' allocations.
350  */
351 
352 static __inline int
353 uvm_mapentry_freecmp(struct vm_map_entry *e1, struct vm_map_entry *e2)
354 {
355 	int cmp = e1->fspace < e2->fspace ? -1 : e1->fspace > e2->fspace;
356 	return cmp ? cmp : uvm_mapentry_addrcmp(e1, e2);
357 }
358 
359 /*
360  * Copy mapentry.
361  */
362 static __inline void
363 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
364 {
365 	caddr_t csrc, cdst;
366 	size_t sz;
367 
368 	csrc = (caddr_t)src;
369 	cdst = (caddr_t)dst;
370 	csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
371 	cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
372 
373 	sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) -
374 	    offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
375 	memcpy(cdst, csrc, sz);
376 }
377 
378 /*
379  * Handle free-list insertion.
380  */
381 void
382 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr,
383     struct vm_map_entry *entry)
384 {
385 	const struct uvm_addr_functions *fun;
386 #ifdef VMMAP_DEBUG
387 	vaddr_t min, max, bound;
388 #endif
389 
390 #ifdef VMMAP_DEBUG
391 	/*
392 	 * Boundary check.
393 	 * Boundaries are folded if they go on the same free list.
394 	 */
395 	min = VMMAP_FREE_START(entry);
396 	max = VMMAP_FREE_END(entry);
397 
398 	while (min < max) {
399 		bound = uvm_map_boundary(map, min, max);
400 		KASSERT(uvm_map_uaddr(map, min) == uaddr);
401 		min = bound;
402 	}
403 #endif
404 	KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0);
405 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0);
406 
407 	UVM_MAP_REQ_WRITE(map);
408 
409 	/* Actual insert: forward to uaddr pointer. */
410 	if (uaddr != NULL) {
411 		fun = uaddr->uaddr_functions;
412 		KDASSERT(fun != NULL);
413 		if (fun->uaddr_free_insert != NULL)
414 			(*fun->uaddr_free_insert)(map, uaddr, entry);
415 		entry->etype |= UVM_ET_FREEMAPPED;
416 	}
417 
418 	/* Update fspace augmentation. */
419 	uvm_map_addr_augment(entry);
420 }
421 
422 /*
423  * Handle free-list removal.
424  */
425 void
426 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr,
427     struct vm_map_entry *entry)
428 {
429 	const struct uvm_addr_functions *fun;
430 
431 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL);
432 	KASSERT(uvm_map_uaddr_e(map, entry) == uaddr);
433 	UVM_MAP_REQ_WRITE(map);
434 
435 	if (uaddr != NULL) {
436 		fun = uaddr->uaddr_functions;
437 		if (fun->uaddr_free_remove != NULL)
438 			(*fun->uaddr_free_remove)(map, uaddr, entry);
439 		entry->etype &= ~UVM_ET_FREEMAPPED;
440 	}
441 }
442 
443 /*
444  * Handle address tree insertion.
445  */
446 void
447 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry)
448 {
449 	struct vm_map_entry *res;
450 
451 	if (RB_LEFT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF ||
452 	    RB_RIGHT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF ||
453 	    RB_PARENT(entry, daddrs.addr_entry) != UVMMAP_DEADBEEF)
454 		panic("uvm_mapent_addr_insert: entry still in addr list");
455 	KDASSERT(entry->start <= entry->end);
456 	KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&
457 	    (entry->end & (vaddr_t)PAGE_MASK) == 0);
458 
459 	UVM_MAP_REQ_WRITE(map);
460 	res = RB_INSERT(uvm_map_addr, &map->addr, entry);
461 	if (res != NULL) {
462 		panic("uvm_mapent_addr_insert: map %p entry %p "
463 		    "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision "
464 		    "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)",
465 		    map, entry,
466 		    entry->start, entry->end, entry->guard, entry->fspace,
467 		    res, res->start, res->end, res->guard, res->fspace);
468 	}
469 }
470 
471 /*
472  * Handle address tree removal.
473  */
474 void
475 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)
476 {
477 	struct vm_map_entry *res;
478 
479 	UVM_MAP_REQ_WRITE(map);
480 	res = RB_REMOVE(uvm_map_addr, &map->addr, entry);
481 	if (res != entry)
482 		panic("uvm_mapent_addr_remove");
483 	RB_LEFT(entry, daddrs.addr_entry) = RB_RIGHT(entry, daddrs.addr_entry) =
484 	    RB_PARENT(entry, daddrs.addr_entry) = UVMMAP_DEADBEEF;
485 }
486 
487 /*
488  * uvm_map_reference: add reference to a map
489  *
490  * XXX check map reference counter lock
491  */
492 #define uvm_map_reference(_map)						\
493 	do {								\
494 		simple_lock(&map->ref_lock);				\
495 		map->ref_count++;					\
496 		simple_unlock(&map->ref_lock);				\
497 	} while (0)
498 
499 /*
500  * Calculate the dused delta.
501  */
502 vsize_t
503 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max)
504 {
505 	struct vmspace *vm;
506 	vsize_t sz;
507 	vaddr_t lmax;
508 	vaddr_t stack_begin, stack_end; /* Position of stack. */
509 
510 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
511 	vm = (struct vmspace *)map;
512 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
513 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
514 
515 	sz = 0;
516 	while (min != max) {
517 		lmax = max;
518 		if (min < stack_begin && lmax > stack_begin)
519 			lmax = stack_begin;
520 		else if (min < stack_end && lmax > stack_end)
521 			lmax = stack_end;
522 
523 		if (min >= stack_begin && min < stack_end) {
524 			/* nothing */
525 		} else
526 			sz += lmax - min;
527 		min = lmax;
528 	}
529 
530 	return sz >> PAGE_SHIFT;
531 }
532 
533 /*
534  * Find the entry describing the given address.
535  */
536 struct vm_map_entry*
537 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr)
538 {
539 	struct vm_map_entry *iter;
540 
541 	iter = RB_ROOT(atree);
542 	while (iter != NULL) {
543 		if (iter->start > addr)
544 			iter = RB_LEFT(iter, daddrs.addr_entry);
545 		else if (VMMAP_FREE_END(iter) <= addr)
546 			iter = RB_RIGHT(iter, daddrs.addr_entry);
547 		else
548 			return iter;
549 	}
550 	return NULL;
551 }
552 
553 /*
554  * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry)
555  *
556  * Push dead entries into a linked list.
557  * Since the linked list abuses the address tree for storage, the entry
558  * may not be linked in a map.
559  *
560  * *head must be initialized to NULL before the first call to this macro.
561  * uvm_unmap_detach(*head, 0) will remove dead entries.
562  */
563 static __inline void
564 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry)
565 {
566 	TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq);
567 }
568 #define DEAD_ENTRY_PUSH(_headptr, _entry)				\
569 	dead_entry_push((_headptr), (_entry))
570 
571 /*
572  * Helper function for uvm_map_findspace_tree.
573  *
574  * Given allocation constraints and pmap constraints, finds the
575  * lowest and highest address in a range that can be used for the
576  * allocation.
577  *
578  * pmap_align and pmap_off are ignored on non-PMAP_PREFER archs.
579  *
580  *
581  * Big chunk of math with a seasoning of dragons.
582  */
583 int
584 uvm_map_sel_limits(vaddr_t *min, vaddr_t *max, vsize_t sz, int guardpg,
585     struct vm_map_entry *sel, vaddr_t align,
586     vaddr_t pmap_align, vaddr_t pmap_off, int bias)
587 {
588 	vaddr_t sel_min, sel_max;
589 #ifdef PMAP_PREFER
590 	vaddr_t pmap_min, pmap_max;
591 #endif /* PMAP_PREFER */
592 #ifdef DIAGNOSTIC
593 	int bad;
594 #endif /* DIAGNOSTIC */
595 
596 	sel_min = VMMAP_FREE_START(sel);
597 	sel_max = VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0);
598 
599 #ifdef PMAP_PREFER
600 
601 	/*
602 	 * There are two special cases, in which we can satisfy the align
603 	 * requirement and the pmap_prefer requirement.
604 	 * - when pmap_off == 0, we always select the largest of the two
605 	 * - when pmap_off % align == 0 and pmap_align > align, we simply
606 	 *   satisfy the pmap_align requirement and automatically
607 	 *   satisfy the align requirement.
608 	 */
609 	if (align > PAGE_SIZE &&
610 	    !(pmap_align > align && (pmap_off & (align - 1)) == 0)) {
611 		/*
612 		 * Simple case: only use align.
613 		 */
614 		sel_min = roundup(sel_min, align);
615 		sel_max &= ~(align - 1);
616 
617 		if (sel_min > sel_max)
618 			return ENOMEM;
619 
620 		/*
621 		 * Correct for bias.
622 		 */
623 		if (sel_max - sel_min > FSPACE_BIASGAP) {
624 			if (bias > 0) {
625 				sel_min = sel_max - FSPACE_BIASGAP;
626 				sel_min = roundup(sel_min, align);
627 			} else if (bias < 0) {
628 				sel_max = sel_min + FSPACE_BIASGAP;
629 				sel_max &= ~(align - 1);
630 			}
631 		}
632 	} else if (pmap_align != 0) {
633 		/*
634 		 * Special case: satisfy both pmap_prefer and
635 		 * align argument.
636 		 */
637 		pmap_max = sel_max & ~(pmap_align - 1);
638 		pmap_min = sel_min;
639 		if (pmap_max < sel_min)
640 			return ENOMEM;
641 
642 		/* Adjust pmap_min for BIASGAP for top-addr bias. */
643 		if (bias > 0 && pmap_max - pmap_min > FSPACE_BIASGAP)
644 			pmap_min = pmap_max - FSPACE_BIASGAP;
645 		/* Align pmap_min. */
646 		pmap_min &= ~(pmap_align - 1);
647 		if (pmap_min < sel_min)
648 			pmap_min += pmap_align;
649 		if (pmap_min > pmap_max)
650 			return ENOMEM;
651 
652 		/* Adjust pmap_max for BIASGAP for bottom-addr bias. */
653 		if (bias < 0 && pmap_max - pmap_min > FSPACE_BIASGAP) {
654 			pmap_max = (pmap_min + FSPACE_BIASGAP) &
655 			    ~(pmap_align - 1);
656 		}
657 		if (pmap_min > pmap_max)
658 			return ENOMEM;
659 
660 		/* Apply pmap prefer offset. */
661 		pmap_max |= pmap_off;
662 		if (pmap_max > sel_max)
663 			pmap_max -= pmap_align;
664 		pmap_min |= pmap_off;
665 		if (pmap_min < sel_min)
666 			pmap_min += pmap_align;
667 
668 		/*
669 		 * Fixup: it's possible that pmap_min and pmap_max
670 		 * cross eachother. In this case, try to find one
671 		 * address that is allowed.
672 		 * (This usually happens in biased case.)
673 		 */
674 		if (pmap_min > pmap_max) {
675 			if (pmap_min < sel_max)
676 				pmap_max = pmap_min;
677 			else if (pmap_max > sel_min)
678 				pmap_min = pmap_max;
679 			else
680 				return ENOMEM;
681 		}
682 
683 		/* Internal validation. */
684 		KDASSERT(pmap_min <= pmap_max);
685 
686 		sel_min = pmap_min;
687 		sel_max = pmap_max;
688 	} else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
689 		sel_min = sel_max - FSPACE_BIASGAP;
690 	else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
691 		sel_max = sel_min + FSPACE_BIASGAP;
692 
693 #else
694 
695 	if (align > PAGE_SIZE) {
696 		sel_min = roundup(sel_min, align);
697 		sel_max &= ~(align - 1);
698 		if (sel_min > sel_max)
699 			return ENOMEM;
700 
701 		if (bias != 0 && sel_max - sel_min > FSPACE_BIASGAP) {
702 			if (bias > 0) {
703 				sel_min = roundup(sel_max - FSPACE_BIASGAP,
704 				    align);
705 			} else {
706 				sel_max = (sel_min + FSPACE_BIASGAP) &
707 				    ~(align - 1);
708 			}
709 		}
710 	} else if (bias > 0 && sel_max - sel_min > FSPACE_BIASGAP)
711 		sel_min = sel_max - FSPACE_BIASGAP;
712 	else if (bias < 0 && sel_max - sel_min > FSPACE_BIASGAP)
713 		sel_max = sel_min + FSPACE_BIASGAP;
714 
715 #endif
716 
717 	if (sel_min > sel_max)
718 		return ENOMEM;
719 
720 #ifdef DIAGNOSTIC
721 	bad = 0;
722 	/* Lower boundary check. */
723 	if (sel_min < VMMAP_FREE_START(sel)) {
724 		printf("sel_min: 0x%lx, but should be at least 0x%lx\n",
725 		    sel_min, VMMAP_FREE_START(sel));
726 		bad++;
727 	}
728 	/* Upper boundary check. */
729 	if (sel_max > VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0)) {
730 		printf("sel_max: 0x%lx, but should be at most 0x%lx\n",
731 		    sel_max,
732 		    VMMAP_FREE_END(sel) - sz - (guardpg ? PAGE_SIZE : 0));
733 		bad++;
734 	}
735 	/* Lower boundary alignment. */
736 	if (align != 0 && (sel_min & (align - 1)) != 0) {
737 		printf("sel_min: 0x%lx, not aligned to 0x%lx\n",
738 		    sel_min, align);
739 		bad++;
740 	}
741 	/* Upper boundary alignment. */
742 	if (align != 0 && (sel_max & (align - 1)) != 0) {
743 		printf("sel_max: 0x%lx, not aligned to 0x%lx\n",
744 		    sel_max, align);
745 		bad++;
746 	}
747 	/* Lower boundary PMAP_PREFER check. */
748 	if (pmap_align != 0 && align == 0 &&
749 	    (sel_min & (pmap_align - 1)) != pmap_off) {
750 		printf("sel_min: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
751 		    sel_min, sel_min & (pmap_align - 1), pmap_off);
752 		bad++;
753 	}
754 	/* Upper boundary PMAP_PREFER check. */
755 	if (pmap_align != 0 && align == 0 &&
756 	    (sel_max & (pmap_align - 1)) != pmap_off) {
757 		printf("sel_max: 0x%lx, aligned to 0x%lx, expected 0x%lx\n",
758 		    sel_max, sel_max & (pmap_align - 1), pmap_off);
759 		bad++;
760 	}
761 
762 	if (bad) {
763 		panic("uvm_map_sel_limits(sz = %lu, guardpg = %c, "
764 		    "align = 0x%lx, pmap_align = 0x%lx, pmap_off = 0x%lx, "
765 		    "bias = %d, "
766 		    "FREE_START(sel) = 0x%lx, FREE_END(sel) = 0x%lx)",
767 		    sz, (guardpg ? 'T' : 'F'), align, pmap_align, pmap_off,
768 		    bias, VMMAP_FREE_START(sel), VMMAP_FREE_END(sel));
769 	}
770 #endif /* DIAGNOSTIC */
771 
772 	*min = sel_min;
773 	*max = sel_max;
774 	return 0;
775 }
776 
777 /*
778  * Test if memory starting at addr with sz bytes is free.
779  *
780  * Fills in *start_ptr and *end_ptr to be the first and last entry describing
781  * the space.
782  * If called with prefilled *start_ptr and *end_ptr, they are to be correct.
783  */
784 int
785 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr,
786     struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr,
787     vaddr_t addr, vsize_t sz)
788 {
789 	struct uvm_addr_state *free;
790 	struct uvm_map_addr *atree;
791 	struct vm_map_entry *i, *i_end;
792 
793 	/*
794 	 * Kernel memory above uvm_maxkaddr is considered unavailable.
795 	 */
796 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
797 		if (addr + sz > uvm_maxkaddr)
798 			return 0;
799 	}
800 
801 	atree = &map->addr;
802 
803 	/*
804 	 * Fill in first, last, so they point at the entries containing the
805 	 * first and last address of the range.
806 	 * Note that if they are not NULL, we don't perform the lookup.
807 	 */
808 	KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL);
809 	if (*start_ptr == NULL) {
810 		*start_ptr = uvm_map_entrybyaddr(atree, addr);
811 		if (*start_ptr == NULL)
812 			return 0;
813 	} else
814 		KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr));
815 	if (*end_ptr == NULL) {
816 		if (VMMAP_FREE_END(*start_ptr) >= addr + sz)
817 			*end_ptr = *start_ptr;
818 		else {
819 			*end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1);
820 			if (*end_ptr == NULL)
821 				return 0;
822 		}
823 	} else
824 		KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1));
825 
826 	/*
827 	 * Validation.
828 	 */
829 	KDASSERT(*start_ptr != NULL && *end_ptr != NULL);
830 	KDASSERT((*start_ptr)->start <= addr &&
831 	    VMMAP_FREE_END(*start_ptr) > addr &&
832 	    (*end_ptr)->start < addr + sz &&
833 	    VMMAP_FREE_END(*end_ptr) >= addr + sz);
834 
835 	/*
836 	 * Check the none of the entries intersects with <addr, addr+sz>.
837 	 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is
838 	 * considered unavailable unless called by those allocators.
839 	 */
840 	i = *start_ptr;
841 	i_end = RB_NEXT(uvm_map_addr, atree, *end_ptr);
842 	for (; i != i_end;
843 	    i = RB_NEXT(uvm_map_addr, atree, i)) {
844 		if (i->start != i->end && i->end > addr)
845 			return 0;
846 
847 		/*
848 		 * uaddr_exe and uaddr_brk_stack may only be used
849 		 * by these allocators and the NULL uaddr (i.e. no
850 		 * uaddr).
851 		 * Reject if this requirement is not met.
852 		 */
853 		if (uaddr != NULL) {
854 			free = uvm_map_uaddr_e(map, i);
855 
856 			if (uaddr != free && free != NULL &&
857 			    (free == map->uaddr_exe ||
858 			     free == map->uaddr_brk_stack))
859 				return 0;
860 		}
861 	}
862 
863 	return -1;
864 }
865 
866 /*
867  * Invoke each address selector until an address is found.
868  * Will not invoke uaddr_exe.
869  */
870 int
871 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first,
872     struct vm_map_entry**last, vaddr_t *addr, vsize_t sz,
873     vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint)
874 {
875 	struct uvm_addr_state *uaddr;
876 	int i;
877 
878 	/*
879 	 * Allocation for sz bytes at any address,
880 	 * using the addr selectors in order.
881 	 */
882 	for (i = 0; i < nitems(map->uaddr_any); i++) {
883 		uaddr = map->uaddr_any[i];
884 
885 		if (uvm_addr_invoke(map, uaddr, first, last,
886 		    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
887 			return 0;
888 	}
889 
890 	/*
891 	 * Fall back to brk() and stack() address selectors.
892 	 */
893 	uaddr = map->uaddr_brk_stack;
894 	if (uvm_addr_invoke(map, uaddr, first, last,
895 	    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
896 		return 0;
897 
898 	return ENOMEM;
899 }
900 
901 /* Calculate entry augmentation value. */
902 vsize_t
903 uvm_map_addr_augment_get(struct vm_map_entry *entry)
904 {
905 	vsize_t			 augment;
906 	struct vm_map_entry	*left, *right;
907 
908 	augment = entry->fspace;
909 	if ((left = RB_LEFT(entry, daddrs.addr_entry)) != NULL)
910 		augment = MAX(augment, left->fspace_augment);
911 	if ((right = RB_RIGHT(entry, daddrs.addr_entry)) != NULL)
912 		augment = MAX(augment, right->fspace_augment);
913 	return augment;
914 }
915 
916 /*
917  * Update augmentation data in entry.
918  */
919 void
920 uvm_map_addr_augment(struct vm_map_entry *entry)
921 {
922 	vsize_t			 augment;
923 
924 	while (entry != NULL) {
925 		/* Calculate value for augmentation. */
926 		augment = uvm_map_addr_augment_get(entry);
927 
928 		/*
929 		 * Descend update.
930 		 * Once we find an entry that already has the correct value,
931 		 * stop, since it means all its parents will use the correct
932 		 * value too.
933 		 */
934 		if (entry->fspace_augment == augment)
935 			return;
936 		entry->fspace_augment = augment;
937 		entry = RB_PARENT(entry, daddrs.addr_entry);
938 	}
939 }
940 
941 /*
942  * uvm_map: establish a valid mapping in map
943  *
944  * => *addr and sz must be a multiple of PAGE_SIZE.
945  * => *addr is ignored, except if flags contains UVM_FLAG_FIXED.
946  * => map must be unlocked.
947  * => <uobj,uoffset> value meanings (4 cases):
948  *	[1] <NULL,uoffset>		== uoffset is a hint for PMAP_PREFER
949  *	[2] <NULL,UVM_UNKNOWN_OFFSET>	== don't PMAP_PREFER
950  *	[3] <uobj,uoffset>		== normal mapping
951  *	[4] <uobj,UVM_UNKNOWN_OFFSET>	== uvm_map finds offset based on VA
952  *
953  *   case [4] is for kernel mappings where we don't know the offset until
954  *   we've found a virtual address.   note that kernel object offsets are
955  *   always relative to vm_map_min(kernel_map).
956  *
957  * => align: align vaddr, must be a power-of-2.
958  *    Align is only a hint and will be ignored if the alignment fails.
959  */
960 int
961 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,
962     struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags)
963 {
964 	struct vm_map_entry	*first, *last, *entry;
965 	struct uvm_map_deadq	 dead;
966 	vm_prot_t		 prot;
967 	vm_prot_t		 maxprot;
968 	vm_inherit_t		 inherit;
969 	int			 advice;
970 	int			 error;
971 	vaddr_t			 pmap_align, pmap_offset;
972 	vaddr_t			 hint;
973 
974 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
975 		splassert(IPL_NONE);
976 	else
977 		splassert(IPL_VM);
978 
979 	/*
980 	 * We use pmap_align and pmap_offset as alignment and offset variables.
981 	 *
982 	 * Because the align parameter takes precedence over pmap prefer,
983 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
984 	 * if pmap_prefer will not align.
985 	 */
986 	if (uoffset == UVM_UNKNOWN_OFFSET) {
987 		pmap_align = MAX(align, PAGE_SIZE);
988 		pmap_offset = 0;
989 	} else {
990 		pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE);
991 		pmap_offset = PMAP_PREFER_OFFSET(uoffset);
992 
993 		if (align == 0 ||
994 		    (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) {
995 			/*
996 			 * pmap_offset satisfies align, no change.
997 			 */
998 		} else {
999 			/*
1000 			 * Align takes precedence over pmap prefer.
1001 			 */
1002 			pmap_align = align;
1003 			pmap_offset = 0;
1004 		}
1005 	}
1006 
1007 	/*
1008 	 * Decode parameters.
1009 	 */
1010 	prot = UVM_PROTECTION(flags);
1011 	maxprot = UVM_MAXPROTECTION(flags);
1012 	advice = UVM_ADVICE(flags);
1013 	inherit = UVM_INHERIT(flags);
1014 	error = 0;
1015 	hint = trunc_page(*addr);
1016 	TAILQ_INIT(&dead);
1017 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
1018 	KASSERT((align & (align - 1)) == 0);
1019 
1020 	/*
1021 	 * Holes are incompatible with other types of mappings.
1022 	 */
1023 	if (flags & UVM_FLAG_HOLE) {
1024 		KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) &&
1025 		    (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
1026 	}
1027 
1028 	/*
1029 	 * Unset hint for kernel_map non-fixed allocations.
1030 	 */
1031 	if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED))
1032 		hint = 0;
1033 
1034 	/*
1035 	 * Check protection.
1036 	 */
1037 	if ((prot & maxprot) != prot)
1038 		return EACCES;
1039 
1040 	if (flags & UVM_FLAG_TRYLOCK) {
1041 		if (vm_map_lock_try(map) == FALSE)
1042 			return EFAULT;
1043 	} else
1044 		vm_map_lock(map);
1045 
1046 	first = last = NULL;
1047 	if (flags & UVM_FLAG_FIXED) {
1048 		/*
1049 		 * Fixed location.
1050 		 *
1051 		 * Note: we ignore align, pmap_prefer.
1052 		 * Fill in first, last and *addr.
1053 		 */
1054 		KASSERT((*addr & PAGE_MASK) == 0);
1055 
1056 		/*
1057 		 * Grow pmap to include allocated address.
1058 		 * If the growth fails, the allocation will fail too.
1059 		 */
1060 		if ((map->flags & VM_MAP_ISVMSPACE) == 0 &&
1061 		    uvm_maxkaddr < (*addr + sz)) {
1062 			uvm_map_kmem_grow(map, &dead,
1063 			    *addr + sz - uvm_maxkaddr, flags);
1064 		}
1065 
1066 		/*
1067 		 * Check that the space is available.
1068 		 */
1069 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1070 			error = ENOMEM;
1071 			goto unlock;
1072 		}
1073 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1074 	    (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE &&
1075 	    (align == 0 || (*addr & (align - 1)) == 0) &&
1076 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1077 		/*
1078 		 * Address used as hint.
1079 		 *
1080 		 * Note: we enforce the alignment restriction,
1081 		 * but ignore pmap_prefer.
1082 		 */
1083 	} else if ((maxprot & VM_PROT_EXECUTE) != 0 &&
1084 	    map->uaddr_exe != NULL) {
1085 		/*
1086 		 * Run selection algorithm for executables.
1087 		 */
1088 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1089 		    addr, sz, pmap_align, pmap_offset, prot, hint);
1090 
1091 		/*
1092 		 * Grow kernel memory and try again.
1093 		 */
1094 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1095 			uvm_map_kmem_grow(map, &dead, sz, flags);
1096 
1097 			error = uvm_addr_invoke(map, map->uaddr_exe,
1098 			    &first, &last, addr, sz,
1099 			    pmap_align, pmap_offset, prot, hint);
1100 		}
1101 
1102 		if (error != 0)
1103 			goto unlock;
1104 	} else {
1105 		/*
1106 		 * Update freelists from vmspace.
1107 		 */
1108 		if (map->flags & VM_MAP_ISVMSPACE)
1109 			uvm_map_vmspace_update(map, &dead, flags);
1110 
1111 		error = uvm_map_findspace(map, &first, &last, addr, sz,
1112 		    pmap_align, pmap_offset, prot, hint);
1113 
1114 		/*
1115 		 * Grow kernel memory and try again.
1116 		 */
1117 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1118 			uvm_map_kmem_grow(map, &dead, sz, flags);
1119 
1120 			error = uvm_map_findspace(map, &first, &last, addr, sz,
1121 			    pmap_align, pmap_offset, prot, hint);
1122 		}
1123 
1124 		if (error != 0)
1125 			goto unlock;
1126 	}
1127 
1128 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE ||
1129 	    uvm_maxkaddr >= *addr + sz);
1130 
1131 	/*
1132 	 * If we only want a query, return now.
1133 	 */
1134 	if (flags & UVM_FLAG_QUERY) {
1135 		error = 0;
1136 		goto unlock;
1137 	}
1138 
1139 	if (uobj == NULL)
1140 		uoffset = 0;
1141 	else if (uoffset == UVM_UNKNOWN_OFFSET) {
1142 		KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
1143 		uoffset = *addr - vm_map_min(kernel_map);
1144 	}
1145 
1146 	/*
1147 	 * Create new entry.
1148 	 * first and last may be invalidated after this call.
1149 	 */
1150 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead);
1151 	if (entry == NULL) {
1152 		error = ENOMEM;
1153 		goto unlock;
1154 	}
1155 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1156 	entry->object.uvm_obj = uobj;
1157 	entry->offset = uoffset;
1158 	entry->protection = prot;
1159 	entry->max_protection = maxprot;
1160 	entry->inheritance = inherit;
1161 	entry->wired_count = 0;
1162 	entry->advice = advice;
1163 	if (uobj)
1164 		entry->etype |= UVM_ET_OBJ;
1165 	else if (flags & UVM_FLAG_HOLE)
1166 		entry->etype |= UVM_ET_HOLE;
1167 	if (flags & UVM_FLAG_COPYONW) {
1168 		entry->etype |= UVM_ET_COPYONWRITE;
1169 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1170 			entry->etype |= UVM_ET_NEEDSCOPY;
1171 	}
1172 	if (flags & UVM_FLAG_OVERLAY) {
1173 		entry->aref.ar_pageoff = 0;
1174 		entry->aref.ar_amap = amap_alloc(sz,
1175 		    ptoa(flags & UVM_FLAG_AMAPPAD ? UVM_AMAP_CHUNK : 0),
1176 		    M_WAITOK);
1177 	}
1178 
1179 	/*
1180 	 * Update map and process statistics.
1181 	 */
1182 	if (!(flags & UVM_FLAG_HOLE)) {
1183 		map->size += sz;
1184 		if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL) {
1185 			((struct vmspace *)map)->vm_dused +=
1186 			    uvmspace_dused(map, *addr, *addr + sz);
1187 		}
1188 	}
1189 
1190 	/*
1191 	 * Try to merge entry.
1192 	 *
1193 	 * Userland allocations are kept separated most of the time.
1194 	 * Forego the effort of merging what most of the time can't be merged
1195 	 * and only try the merge if it concerns a kernel entry.
1196 	 */
1197 	if ((flags & UVM_FLAG_NOMERGE) == 0 &&
1198 	    (map->flags & VM_MAP_ISVMSPACE) == 0)
1199 		uvm_mapent_tryjoin(map, entry, &dead);
1200 
1201 unlock:
1202 	vm_map_unlock(map);
1203 
1204 	/*
1205 	 * Remove dead entries.
1206 	 *
1207 	 * Dead entries may be the result of merging.
1208 	 * uvm_map_mkentry may also create dead entries, when it attempts to
1209 	 * destroy free-space entries.
1210 	 */
1211 	uvm_unmap_detach(&dead, 0);
1212 	return error;
1213 }
1214 
1215 /*
1216  * True iff e1 and e2 can be joined together.
1217  */
1218 int
1219 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1,
1220     struct vm_map_entry *e2)
1221 {
1222 	KDASSERT(e1 != NULL && e2 != NULL);
1223 
1224 	/*
1225 	 * Must be the same entry type and not have free memory between.
1226 	 */
1227 	if (e1->etype != e2->etype || e1->end != e2->start)
1228 		return 0;
1229 
1230 	/*
1231 	 * Submaps are never joined.
1232 	 */
1233 	if (UVM_ET_ISSUBMAP(e1))
1234 		return 0;
1235 
1236 	/*
1237 	 * Never merge wired memory.
1238 	 */
1239 	if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2))
1240 		return 0;
1241 
1242 	/*
1243 	 * Protection, inheritance and advice must be equal.
1244 	 */
1245 	if (e1->protection != e2->protection ||
1246 	    e1->max_protection != e2->max_protection ||
1247 	    e1->inheritance != e2->inheritance ||
1248 	    e1->advice != e2->advice)
1249 		return 0;
1250 
1251 	/*
1252 	 * If uvm_object: objects itself and offsets within object must match.
1253 	 */
1254 	if (UVM_ET_ISOBJ(e1)) {
1255 		if (e1->object.uvm_obj != e2->object.uvm_obj)
1256 			return 0;
1257 		if (e1->offset + (e1->end - e1->start) != e2->offset)
1258 			return 0;
1259 	}
1260 
1261 	/*
1262 	 * Cannot join shared amaps.
1263 	 * Note: no need to lock amap to look at refs, since we don't care
1264 	 * about its exact value.
1265 	 * If it is 1 (i.e. we have the only reference) it will stay there.
1266 	 */
1267 	if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1)
1268 		return 0;
1269 	if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1)
1270 		return 0;
1271 
1272 	/*
1273 	 * Apprently, e1 and e2 match.
1274 	 */
1275 	return 1;
1276 }
1277 
1278 /*
1279  * Join support function.
1280  *
1281  * Returns the merged entry on succes.
1282  * Returns NULL if the merge failed.
1283  */
1284 struct vm_map_entry*
1285 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1,
1286     struct vm_map_entry *e2, struct uvm_map_deadq *dead)
1287 {
1288 	struct uvm_addr_state *free;
1289 
1290 	/*
1291 	 * Amap of e1 must be extended to include e2.
1292 	 * e2 contains no real information in its amap,
1293 	 * so it can be erased immediately.
1294 	 */
1295 	if (e1->aref.ar_amap) {
1296 		if (amap_extend(e1, e2->end - e2->start))
1297 			return NULL;
1298 	}
1299 
1300 	/*
1301 	 * Don't drop obj reference:
1302 	 * uvm_unmap_detach will do this for us.
1303 	 */
1304 
1305 	free = uvm_map_uaddr_e(map, e1);
1306 	uvm_mapent_free_remove(map, free, e1);
1307 
1308 	free = uvm_map_uaddr_e(map, e2);
1309 	uvm_mapent_free_remove(map, free, e2);
1310 	uvm_mapent_addr_remove(map, e2);
1311 	e1->end = e2->end;
1312 	e1->guard = e2->guard;
1313 	e1->fspace = e2->fspace;
1314 	uvm_mapent_free_insert(map, free, e1);
1315 
1316 	DEAD_ENTRY_PUSH(dead, e2);
1317 	return e1;
1318 }
1319 
1320 /*
1321  * Attempt forward and backward joining of entry.
1322  *
1323  * Returns entry after joins.
1324  * We are guaranteed that the amap of entry is either non-existant or
1325  * has never been used.
1326  */
1327 struct vm_map_entry*
1328 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry,
1329     struct uvm_map_deadq *dead)
1330 {
1331 	struct vm_map_entry *other;
1332 	struct vm_map_entry *merged;
1333 
1334 	/*
1335 	 * Merge with previous entry.
1336 	 */
1337 	other = RB_PREV(uvm_map_addr, &map->addr, entry);
1338 	if (other && uvm_mapent_isjoinable(map, other, entry)) {
1339 		merged = uvm_mapent_merge(map, other, entry, dead);
1340 		if (merged)
1341 			entry = merged;
1342 	}
1343 
1344 	/*
1345 	 * Merge with next entry.
1346 	 *
1347 	 * Because amap can only extend forward and the next entry
1348 	 * probably contains sensible info, only perform forward merging
1349 	 * in the absence of an amap.
1350 	 */
1351 	other = RB_NEXT(uvm_map_addr, &map->addr, entry);
1352 	if (other && entry->aref.ar_amap == NULL &&
1353 	    other->aref.ar_amap == NULL &&
1354 	    uvm_mapent_isjoinable(map, entry, other)) {
1355 		merged = uvm_mapent_merge(map, entry, other, dead);
1356 		if (merged)
1357 			entry = merged;
1358 	}
1359 
1360 	return entry;
1361 }
1362 
1363 /*
1364  * Kill entries that are no longer in a map.
1365  */
1366 void
1367 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags)
1368 {
1369 	struct vm_map_entry *entry;
1370 
1371 	while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1372 		/*
1373 		 * Drop reference to amap, if we've got one.
1374 		 */
1375 		if (entry->aref.ar_amap)
1376 			amap_unref(entry->aref.ar_amap,
1377 			    entry->aref.ar_pageoff,
1378 			    atop(entry->end - entry->start),
1379 			    flags);
1380 
1381 		/*
1382 		 * Drop reference to our backing object, if we've got one.
1383 		 */
1384 		if (UVM_ET_ISSUBMAP(entry)) {
1385 			/* ... unlikely to happen, but play it safe */
1386 			uvm_map_deallocate(entry->object.sub_map);
1387 		} else if (UVM_ET_ISOBJ(entry) &&
1388 		    entry->object.uvm_obj->pgops->pgo_detach) {
1389 			entry->object.uvm_obj->pgops->pgo_detach(
1390 			    entry->object.uvm_obj);
1391 		}
1392 
1393 		/*
1394 		 * Step to next.
1395 		 */
1396 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1397 		uvm_mapent_free(entry);
1398 	}
1399 }
1400 
1401 /*
1402  * Create and insert new entry.
1403  *
1404  * Returned entry contains new addresses and is inserted properly in the tree.
1405  * first and last are (probably) no longer valid.
1406  */
1407 struct vm_map_entry*
1408 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first,
1409     struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags,
1410     struct uvm_map_deadq *dead)
1411 {
1412 	struct vm_map_entry *entry, *prev;
1413 	struct uvm_addr_state *free;
1414 	vaddr_t min, max;	/* free space boundaries for new entry */
1415 
1416 	KDASSERT(map != NULL);
1417 	KDASSERT(first != NULL);
1418 	KDASSERT(last != NULL);
1419 	KDASSERT(dead != NULL);
1420 	KDASSERT(sz > 0);
1421 	KDASSERT(addr + sz > addr);
1422 	KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr);
1423 	KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz);
1424 	KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz));
1425 	uvm_tree_sanity(map, __FILE__, __LINE__);
1426 
1427 	min = addr + sz;
1428 	max = VMMAP_FREE_END(last);
1429 
1430 	/*
1431 	 * Initialize new entry.
1432 	 */
1433 	entry = uvm_mapent_alloc(map, flags);
1434 	if (entry == NULL)
1435 		return NULL;
1436 	entry->offset = 0;
1437 	entry->etype = 0;
1438 	entry->wired_count = 0;
1439 	entry->aref.ar_pageoff = 0;
1440 	entry->aref.ar_amap = NULL;
1441 
1442 	entry->start = addr;
1443 	entry->end = min;
1444 	entry->guard = 0;
1445 	entry->fspace = 0;
1446 
1447 	/*
1448 	 * Reset free space in first.
1449 	 */
1450 	free = uvm_map_uaddr_e(map, first);
1451 	uvm_mapent_free_remove(map, free, first);
1452 	first->guard = 0;
1453 	first->fspace = 0;
1454 
1455 	/*
1456 	 * Remove all entries that are fully replaced.
1457 	 * We are iterating using last in reverse order.
1458 	 */
1459 	for (; first != last; last = prev) {
1460 		prev = RB_PREV(uvm_map_addr, &map->addr, last);
1461 
1462 		KDASSERT(last->start == last->end);
1463 		free = uvm_map_uaddr_e(map, last);
1464 		uvm_mapent_free_remove(map, free, last);
1465 		uvm_mapent_addr_remove(map, last);
1466 		DEAD_ENTRY_PUSH(dead, last);
1467 	}
1468 	/*
1469 	 * Remove first if it is entirely inside <addr, addr+sz>.
1470 	 */
1471 	if (first->start == addr) {
1472 		uvm_mapent_addr_remove(map, first);
1473 		DEAD_ENTRY_PUSH(dead, first);
1474 	} else {
1475 		uvm_map_fix_space(map, first, VMMAP_FREE_START(first),
1476 		    addr, flags);
1477 	}
1478 
1479 	/*
1480 	 * Finally, link in entry.
1481 	 */
1482 	uvm_mapent_addr_insert(map, entry);
1483 	uvm_map_fix_space(map, entry, min, max, flags);
1484 
1485 	uvm_tree_sanity(map, __FILE__, __LINE__);
1486 	return entry;
1487 }
1488 
1489 /*
1490  * uvm_mapent_alloc: allocate a map entry
1491  */
1492 struct vm_map_entry *
1493 uvm_mapent_alloc(struct vm_map *map, int flags)
1494 {
1495 	struct vm_map_entry *me, *ne;
1496 	int s, i;
1497 	int pool_flags;
1498 
1499 	pool_flags = PR_WAITOK;
1500 	if (flags & UVM_FLAG_TRYLOCK)
1501 		pool_flags = PR_NOWAIT;
1502 
1503 	if (map->flags & VM_MAP_INTRSAFE || cold) {
1504 		s = splvm();
1505 		simple_lock(&uvm.kentry_lock);
1506 		me = uvm.kentry_free;
1507 		if (me == NULL) {
1508 			ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
1509 			    &kd_nowait);
1510 			if (ne == NULL)
1511 				panic("uvm_mapent_alloc: cannot allocate map "
1512 				    "entry");
1513 			for (i = 0;
1514 			    i < PAGE_SIZE / sizeof(struct vm_map_entry) - 1;
1515 			    i++)
1516 				RB_LEFT(&ne[i], daddrs.addr_entry) = &ne[i + 1];
1517 			RB_LEFT(&ne[i], daddrs.addr_entry) = NULL;
1518 			me = ne;
1519 			if (ratecheck(&uvm_kmapent_last_warn_time,
1520 			    &uvm_kmapent_warn_rate))
1521 				printf("uvm_mapent_alloc: out of static "
1522 				    "map entries\n");
1523 		}
1524 		uvm.kentry_free = RB_LEFT(me, daddrs.addr_entry);
1525 		uvmexp.kmapent++;
1526 		simple_unlock(&uvm.kentry_lock);
1527 		splx(s);
1528 		me->flags = UVM_MAP_STATIC;
1529 	} else if (map == kernel_map) {
1530 		splassert(IPL_NONE);
1531 		me = pool_get(&uvm_map_entry_kmem_pool, pool_flags);
1532 		if (me == NULL)
1533 			goto out;
1534 		me->flags = UVM_MAP_KMEM;
1535 	} else {
1536 		splassert(IPL_NONE);
1537 		me = pool_get(&uvm_map_entry_pool, pool_flags);
1538 		if (me == NULL)
1539 			goto out;
1540 		me->flags = 0;
1541 	}
1542 
1543 	if (me != NULL) {
1544 		RB_LEFT(me, daddrs.addr_entry) =
1545 		    RB_RIGHT(me, daddrs.addr_entry) =
1546 		    RB_PARENT(me, daddrs.addr_entry) = UVMMAP_DEADBEEF;
1547 	}
1548 
1549 out:
1550 	return(me);
1551 }
1552 
1553 /*
1554  * uvm_mapent_free: free map entry
1555  *
1556  * => XXX: static pool for kernel map?
1557  */
1558 void
1559 uvm_mapent_free(struct vm_map_entry *me)
1560 {
1561 	int s;
1562 
1563 	if (me->flags & UVM_MAP_STATIC) {
1564 		s = splvm();
1565 		simple_lock(&uvm.kentry_lock);
1566 		RB_LEFT(me, daddrs.addr_entry) = uvm.kentry_free;
1567 		uvm.kentry_free = me;
1568 		uvmexp.kmapent--;
1569 		simple_unlock(&uvm.kentry_lock);
1570 		splx(s);
1571 	} else if (me->flags & UVM_MAP_KMEM) {
1572 		splassert(IPL_NONE);
1573 		pool_put(&uvm_map_entry_kmem_pool, me);
1574 	} else {
1575 		splassert(IPL_NONE);
1576 		pool_put(&uvm_map_entry_pool, me);
1577 	}
1578 }
1579 
1580 /*
1581  * uvm_map_lookup_entry: find map entry at or before an address.
1582  *
1583  * => map must at least be read-locked by caller
1584  * => entry is returned in "entry"
1585  * => return value is true if address is in the returned entry
1586  * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is
1587  * returned for those mappings.
1588  */
1589 boolean_t
1590 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
1591     struct vm_map_entry **entry)
1592 {
1593 	*entry = uvm_map_entrybyaddr(&map->addr, address);
1594 	return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&
1595 	    (*entry)->start <= address && (*entry)->end > address;
1596 }
1597 
1598 /*
1599  * uvm_map_pie: return a random load address for a PIE executable
1600  * properly aligned.
1601  */
1602 #ifndef VM_PIE_MAX_ADDR
1603 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
1604 #endif
1605 
1606 #ifndef VM_PIE_MIN_ADDR
1607 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
1608 #endif
1609 
1610 #ifndef VM_PIE_MIN_ALIGN
1611 #define VM_PIE_MIN_ALIGN PAGE_SIZE
1612 #endif
1613 
1614 vaddr_t
1615 uvm_map_pie(vaddr_t align)
1616 {
1617 	vaddr_t addr, space, min;
1618 
1619 	align = MAX(align, VM_PIE_MIN_ALIGN);
1620 
1621 	/* round up to next alignment */
1622 	min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
1623 
1624 	if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
1625 		return (align);
1626 
1627 	space = (VM_PIE_MAX_ADDR - min) / align;
1628 	space = MIN(space, (u_int32_t)-1);
1629 
1630 	addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
1631 	addr += min;
1632 
1633 	return (addr);
1634 }
1635 
1636 void
1637 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end)
1638 {
1639 	struct uvm_map_deadq dead;
1640 
1641 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0 &&
1642 	    (end & (vaddr_t)PAGE_MASK) == 0);
1643 	TAILQ_INIT(&dead);
1644 	vm_map_lock(map);
1645 	uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE);
1646 	vm_map_unlock(map);
1647 
1648 	uvm_unmap_detach(&dead, 0);
1649 }
1650 
1651 /*
1652  * Mark entry as free.
1653  *
1654  * entry will be put on the dead list.
1655  * The free space will be merged into the previous or a new entry,
1656  * unless markfree is false.
1657  */
1658 void
1659 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry,
1660     struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead,
1661     boolean_t markfree)
1662 {
1663 	struct uvm_addr_state	*free;
1664 	struct vm_map_entry	*prev;
1665 	vaddr_t			 addr;	/* Start of freed range. */
1666 	vaddr_t			 end;	/* End of freed range. */
1667 
1668 	prev = *prev_ptr;
1669 	if (prev == entry)
1670 		*prev_ptr = prev = NULL;
1671 
1672 	if (prev == NULL ||
1673 	    VMMAP_FREE_END(prev) != entry->start)
1674 		prev = RB_PREV(uvm_map_addr, &map->addr, entry);
1675 	/*
1676 	 * Entry is describing only free memory and has nothing to drain into.
1677 	 */
1678 	if (prev == NULL && entry->start == entry->end && markfree) {
1679 		*prev_ptr = entry;
1680 		return;
1681 	}
1682 
1683 	addr = entry->start;
1684 	end = VMMAP_FREE_END(entry);
1685 	free = uvm_map_uaddr_e(map, entry);
1686 	uvm_mapent_free_remove(map, free, entry);
1687 	uvm_mapent_addr_remove(map, entry);
1688 	DEAD_ENTRY_PUSH(dead, entry);
1689 
1690 	if (markfree) {
1691 		if (prev) {
1692 			free = uvm_map_uaddr_e(map, prev);
1693 			uvm_mapent_free_remove(map, free, prev);
1694 		}
1695 		*prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0);
1696 	}
1697 }
1698 
1699 /*
1700  * Unwire and release referenced amap and object from map entry.
1701  */
1702 void
1703 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
1704 {
1705 	/*
1706 	 * Unwire removed map entry.
1707 	 */
1708 	if (VM_MAPENT_ISWIRED(entry)) {
1709 		entry->wired_count = 0;
1710 		uvm_fault_unwire_locked(map, entry->start, entry->end);
1711 	}
1712 
1713 	/*
1714 	 * Entry-type specific code.
1715 	 */
1716 	if (UVM_ET_ISHOLE(entry)) {
1717 		/*
1718 		 * Nothing to be done for holes.
1719 		 */
1720 	} else if (map->flags & VM_MAP_INTRSAFE) {
1721 		KASSERT(vm_map_pmap(map) == pmap_kernel());
1722 		uvm_km_pgremove_intrsafe(entry->start, entry->end);
1723 		pmap_kremove(entry->start, entry->end - entry->start);
1724 	} else if (UVM_ET_ISOBJ(entry) &&
1725 	    UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
1726 		KASSERT(vm_map_pmap(map) == pmap_kernel());
1727 
1728 		/*
1729 		 * Note: kernel object mappings are currently used in
1730 		 * two ways:
1731 		 *  [1] "normal" mappings of pages in the kernel object
1732 		 *  [2] uvm_km_valloc'd allocations in which we
1733 		 *      pmap_enter in some non-kernel-object page
1734 		 *      (e.g. vmapbuf).
1735 		 *
1736 		 * for case [1], we need to remove the mapping from
1737 		 * the pmap and then remove the page from the kernel
1738 		 * object (because, once pages in a kernel object are
1739 		 * unmapped they are no longer needed, unlike, say,
1740 		 * a vnode where you might want the data to persist
1741 		 * until flushed out of a queue).
1742 		 *
1743 		 * for case [2], we need to remove the mapping from
1744 		 * the pmap.  there shouldn't be any pages at the
1745 		 * specified offset in the kernel object [but it
1746 		 * doesn't hurt to call uvm_km_pgremove just to be
1747 		 * safe?]
1748 		 *
1749 		 * uvm_km_pgremove currently does the following:
1750 		 *   for pages in the kernel object range:
1751 		 *     - drops the swap slot
1752 		 *     - uvm_pagefree the page
1753 		 *
1754 		 * note there is version of uvm_km_pgremove() that
1755 		 * is used for "intrsafe" objects.
1756 		 */
1757 
1758 		/*
1759 		 * remove mappings from pmap and drop the pages
1760 		 * from the object.  offsets are always relative
1761 		 * to vm_map_min(kernel_map).
1762 		 */
1763 		pmap_remove(pmap_kernel(), entry->start, entry->end);
1764 		uvm_km_pgremove(entry->object.uvm_obj,
1765 		    entry->start - vm_map_min(kernel_map),
1766 		    entry->end - vm_map_min(kernel_map));
1767 
1768 		/*
1769 		 * null out kernel_object reference, we've just
1770 		 * dropped it
1771 		 */
1772 		entry->etype &= ~UVM_ET_OBJ;
1773 		entry->object.uvm_obj = NULL;  /* to be safe */
1774 	} else {
1775 		/*
1776 		 * remove mappings the standard way.
1777 		 */
1778 		pmap_remove(map->pmap, entry->start, entry->end);
1779 	}
1780 }
1781 
1782 /*
1783  * Remove all entries from start to end.
1784  *
1785  * If remove_holes, then remove ET_HOLE entries as well.
1786  * If markfree, entry will be properly marked free, otherwise, no replacement
1787  * entry will be put in the tree (corrupting the tree).
1788  */
1789 void
1790 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
1791     struct uvm_map_deadq *dead, boolean_t remove_holes,
1792     boolean_t markfree)
1793 {
1794 	struct vm_map_entry *prev_hint, *next, *entry;
1795 
1796 	start = MAX(start, map->min_offset);
1797 	end = MIN(end, map->max_offset);
1798 	if (start >= end)
1799 		return;
1800 
1801 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
1802 		splassert(IPL_NONE);
1803 	else
1804 		splassert(IPL_VM);
1805 
1806 	/*
1807 	 * Find first affected entry.
1808 	 */
1809 	entry = uvm_map_entrybyaddr(&map->addr, start);
1810 	KDASSERT(entry != NULL && entry->start <= start);
1811 	if (entry->end <= start && markfree)
1812 		entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
1813 	else
1814 		UVM_MAP_CLIP_START(map, entry, start);
1815 
1816 	/*
1817 	 * Iterate entries until we reach end address.
1818 	 * prev_hint hints where the freed space can be appended to.
1819 	 */
1820 	prev_hint = NULL;
1821 	for (; entry != NULL && entry->start < end; entry = next) {
1822 		KDASSERT(entry->start >= start);
1823 		if (entry->end > end || !markfree)
1824 			UVM_MAP_CLIP_END(map, entry, end);
1825 		KDASSERT(entry->start >= start && entry->end <= end);
1826 		next = RB_NEXT(uvm_map_addr, &map->addr, entry);
1827 
1828 		/* Don't remove holes unless asked to do so. */
1829 		if (UVM_ET_ISHOLE(entry)) {
1830 			if (!remove_holes) {
1831 				prev_hint = entry;
1832 				continue;
1833 			}
1834 		}
1835 
1836 		/* Kill entry. */
1837 		uvm_unmap_kill_entry(map, entry);
1838 
1839 		/*
1840 		 * Update space usage.
1841 		 */
1842 		if ((map->flags & VM_MAP_ISVMSPACE) &&
1843 		    entry->object.uvm_obj == NULL &&
1844 		    !UVM_ET_ISHOLE(entry)) {
1845 			((struct vmspace *)map)->vm_dused -=
1846 			    uvmspace_dused(map, entry->start, entry->end);
1847 		}
1848 		if (!UVM_ET_ISHOLE(entry))
1849 			map->size -= entry->end - entry->start;
1850 
1851 		/*
1852 		 * Actual removal of entry.
1853 		 */
1854 		uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree);
1855 	}
1856 
1857 	pmap_update(vm_map_pmap(map));
1858 
1859 #ifdef VMMAP_DEBUG
1860 	if (markfree) {
1861 		for (entry = uvm_map_entrybyaddr(&map->addr, start);
1862 		    entry != NULL && entry->start < end;
1863 		    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
1864 			KDASSERT(entry->end <= start ||
1865 			    entry->start == entry->end ||
1866 			    UVM_ET_ISHOLE(entry));
1867 		}
1868 	} else {
1869 		vaddr_t a;
1870 		for (a = start; a < end; a += PAGE_SIZE)
1871 			KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL);
1872 	}
1873 #endif
1874 }
1875 
1876 /*
1877  * Mark all entries from first until end (exclusive) as pageable.
1878  *
1879  * Lock must be exclusive on entry and will not be touched.
1880  */
1881 void
1882 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first,
1883     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr)
1884 {
1885 	struct vm_map_entry *iter;
1886 
1887 	for (iter = first; iter != end;
1888 	    iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
1889 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
1890 		if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
1891 			continue;
1892 
1893 		iter->wired_count = 0;
1894 		uvm_fault_unwire_locked(map, iter->start, iter->end);
1895 	}
1896 }
1897 
1898 /*
1899  * Mark all entries from first until end (exclusive) as wired.
1900  *
1901  * Lockflags determines the lock state on return from this function.
1902  * Lock must be exclusive on entry.
1903  */
1904 int
1905 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first,
1906     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr,
1907     int lockflags)
1908 {
1909 	struct vm_map_entry *iter;
1910 #ifdef DIAGNOSTIC
1911 	unsigned int timestamp_save;
1912 #endif
1913 	int error;
1914 
1915 	/*
1916 	 * Wire pages in two passes:
1917 	 *
1918 	 * 1: holding the write lock, we create any anonymous maps that need
1919 	 *    to be created.  then we clip each map entry to the region to
1920 	 *    be wired and increment its wiring count.
1921 	 *
1922 	 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
1923 	 *    in the pages for any newly wired area (wired_count == 1).
1924 	 *
1925 	 *    downgrading to a read lock for uvm_fault_wire avoids a possible
1926 	 *    deadlock with another thread that may have faulted on one of
1927 	 *    the pages to be wired (it would mark the page busy, blocking
1928 	 *    us, then in turn block on the map lock that we hold).
1929 	 *    because we keep the read lock on the map, the copy-on-write
1930 	 *    status of the entries we modify here cannot change.
1931 	 */
1932 	for (iter = first; iter != end;
1933 	    iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
1934 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
1935 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end)
1936 			continue;
1937 
1938 		/*
1939 		 * Perform actions of vm_map_lookup that need the write lock.
1940 		 * - create an anonymous map for copy-on-write
1941 		 * - anonymous map for zero-fill
1942 		 * Skip submaps.
1943 		 */
1944 		if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) &&
1945 		    UVM_ET_ISNEEDSCOPY(iter) &&
1946 		    ((iter->protection & VM_PROT_WRITE) ||
1947 		    iter->object.uvm_obj == NULL)) {
1948 			amap_copy(map, iter, M_WAITOK, TRUE,
1949 			    iter->start, iter->end);
1950 		}
1951 		iter->wired_count++;
1952 	}
1953 
1954 	/*
1955 	 * Pass 2.
1956 	 */
1957 #ifdef DIAGNOSTIC
1958 	timestamp_save = map->timestamp;
1959 #endif
1960 	vm_map_busy(map);
1961 	vm_map_downgrade(map);
1962 
1963 	error = 0;
1964 	for (iter = first; error == 0 && iter != end;
1965 	    iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
1966 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end)
1967 			continue;
1968 
1969 		error = uvm_fault_wire(map, iter->start, iter->end,
1970 		    iter->protection);
1971 	}
1972 
1973 	if (error) {
1974 		/*
1975 		 * uvm_fault_wire failure
1976 		 *
1977 		 * Reacquire lock and undo our work.
1978 		 */
1979 		vm_map_upgrade(map);
1980 		vm_map_unbusy(map);
1981 #ifdef DIAGNOSTIC
1982 		if (timestamp_save != map->timestamp)
1983 			panic("uvm_map_pageable_wire: stale map");
1984 #endif
1985 
1986 		/*
1987 		 * first is no longer needed to restart loops.
1988 		 * Use it as iterator to unmap successful mappings.
1989 		 */
1990 		for (; first != iter;
1991 		    first = RB_NEXT(uvm_map_addr, &map->addr, first)) {
1992 			if (UVM_ET_ISHOLE(first) || first->start == first->end)
1993 				continue;
1994 
1995 			first->wired_count--;
1996 			if (!VM_MAPENT_ISWIRED(first)) {
1997 				uvm_fault_unwire_locked(map,
1998 				    iter->start, iter->end);
1999 			}
2000 		}
2001 
2002 		/*
2003 		 * decrease counter in the rest of the entries
2004 		 */
2005 		for (; iter != end;
2006 		    iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
2007 			if (UVM_ET_ISHOLE(iter) || iter->start == iter->end)
2008 				continue;
2009 
2010 			iter->wired_count--;
2011 		}
2012 
2013 		if ((lockflags & UVM_LK_EXIT) == 0)
2014 			vm_map_unlock(map);
2015 		return error;
2016 	}
2017 
2018 	/*
2019 	 * We are currently holding a read lock.
2020 	 */
2021 	if ((lockflags & UVM_LK_EXIT) == 0) {
2022 		vm_map_unbusy(map);
2023 		vm_map_unlock_read(map);
2024 	} else {
2025 		vm_map_upgrade(map);
2026 		vm_map_unbusy(map);
2027 #ifdef DIAGNOSTIC
2028 		if (timestamp_save != map->timestamp)
2029 			panic("uvm_map_pageable_wire: stale map");
2030 #endif
2031 	}
2032 	return 0;
2033 }
2034 
2035 /*
2036  * uvm_map_pageable: set pageability of a range in a map.
2037  *
2038  * Flags:
2039  * UVM_LK_ENTER: map is already locked by caller
2040  * UVM_LK_EXIT:  don't unlock map on exit
2041  *
2042  * The full range must be in use (entries may not have fspace != 0).
2043  * UVM_ET_HOLE counts as unmapped.
2044  */
2045 int
2046 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
2047     boolean_t new_pageable, int lockflags)
2048 {
2049 	struct vm_map_entry *first, *last, *tmp;
2050 	int error;
2051 
2052 	start = trunc_page(start);
2053 	end = round_page(end);
2054 
2055 	if (start > end)
2056 		return EINVAL;
2057 	if (start == end)
2058 		return 0;	/* nothing to do */
2059 	if (start < map->min_offset)
2060 		return EFAULT; /* why? see first XXX below */
2061 	if (end > map->max_offset)
2062 		return EINVAL; /* why? see second XXX below */
2063 
2064 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2065 	if ((lockflags & UVM_LK_ENTER) == 0)
2066 		vm_map_lock(map);
2067 
2068 	/*
2069 	 * Find first entry.
2070 	 *
2071 	 * Initial test on start is different, because of the different
2072 	 * error returned. Rest is tested further down.
2073 	 */
2074 	first = uvm_map_entrybyaddr(&map->addr, start);
2075 	if (first->end <= start || UVM_ET_ISHOLE(first)) {
2076 		/*
2077 		 * XXX if the first address is not mapped, it is EFAULT?
2078 		 */
2079 		error = EFAULT;
2080 		goto out;
2081 	}
2082 
2083 	/*
2084 	 * Check that the range has no holes.
2085 	 */
2086 	for (last = first; last != NULL && last->start < end;
2087 	    last = RB_NEXT(uvm_map_addr, &map->addr, last)) {
2088 		if (UVM_ET_ISHOLE(last) ||
2089 		    (last->end < end && VMMAP_FREE_END(last) != last->end)) {
2090 			/*
2091 			 * XXX unmapped memory in range, why is it EINVAL
2092 			 * instead of EFAULT?
2093 			 */
2094 			error = EINVAL;
2095 			goto out;
2096 		}
2097 	}
2098 
2099 	/*
2100 	 * Last ended at the first entry after the range.
2101 	 * Move back one step.
2102 	 *
2103 	 * Note that last may be NULL.
2104 	 */
2105 	if (last == NULL) {
2106 		last = RB_MAX(uvm_map_addr, &map->addr);
2107 		if (last->end < end) {
2108 			error = EINVAL;
2109 			goto out;
2110 		}
2111 	} else {
2112 		KASSERT(last != first);
2113 		last = RB_PREV(uvm_map_addr, &map->addr, last);
2114 	}
2115 
2116 	/*
2117 	 * Wire/unwire pages here.
2118 	 */
2119 	if (new_pageable) {
2120 		/*
2121 		 * Mark pageable.
2122 		 * entries that are not wired are untouched.
2123 		 */
2124 		if (VM_MAPENT_ISWIRED(first))
2125 			UVM_MAP_CLIP_START(map, first, start);
2126 		/*
2127 		 * Split last at end.
2128 		 * Make tmp be the first entry after what is to be touched.
2129 		 * If last is not wired, don't touch it.
2130 		 */
2131 		if (VM_MAPENT_ISWIRED(last)) {
2132 			UVM_MAP_CLIP_END(map, last, end);
2133 			tmp = RB_NEXT(uvm_map_addr, &map->addr, last);
2134 		} else
2135 			tmp = last;
2136 
2137 		uvm_map_pageable_pgon(map, first, tmp, start, end);
2138 		error = 0;
2139 
2140 out:
2141 		if ((lockflags & UVM_LK_EXIT) == 0)
2142 			vm_map_unlock(map);
2143 		return error;
2144 	} else {
2145 		/*
2146 		 * Mark entries wired.
2147 		 * entries are always touched (because recovery needs this).
2148 		 */
2149 		if (!VM_MAPENT_ISWIRED(first))
2150 			UVM_MAP_CLIP_START(map, first, start);
2151 		/*
2152 		 * Split last at end.
2153 		 * Make tmp be the first entry after what is to be touched.
2154 		 * If last is not wired, don't touch it.
2155 		 */
2156 		if (!VM_MAPENT_ISWIRED(last)) {
2157 			UVM_MAP_CLIP_END(map, last, end);
2158 			tmp = RB_NEXT(uvm_map_addr, &map->addr, last);
2159 		} else
2160 			tmp = last;
2161 
2162 		return uvm_map_pageable_wire(map, first, tmp, start, end,
2163 		    lockflags);
2164 	}
2165 }
2166 
2167 /*
2168  * uvm_map_pageable_all: special case of uvm_map_pageable - affects
2169  * all mapped regions.
2170  *
2171  * Map must not be locked.
2172  * If no flags are specified, all ragions are unwired.
2173  */
2174 int
2175 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
2176 {
2177 	vsize_t size;
2178 	struct vm_map_entry *iter;
2179 
2180 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2181 	vm_map_lock(map);
2182 
2183 	if (flags == 0) {
2184 		uvm_map_pageable_pgon(map, RB_MIN(uvm_map_addr, &map->addr),
2185 		    NULL, map->min_offset, map->max_offset);
2186 
2187 		atomic_clearbits_int(&map->flags, VM_MAP_WIREFUTURE);
2188 		vm_map_unlock(map);
2189 		return 0;
2190 	}
2191 
2192 	if (flags & MCL_FUTURE)
2193 		atomic_setbits_int(&map->flags, VM_MAP_WIREFUTURE);
2194 	if (!(flags & MCL_CURRENT)) {
2195 		vm_map_unlock(map);
2196 		return 0;
2197 	}
2198 
2199 	/*
2200 	 * Count number of pages in all non-wired entries.
2201 	 * If the number exceeds the limit, abort.
2202 	 */
2203 	size = 0;
2204 	RB_FOREACH(iter, uvm_map_addr, &map->addr) {
2205 		if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2206 			continue;
2207 
2208 		size += iter->end - iter->start;
2209 	}
2210 
2211 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
2212 		vm_map_unlock(map);
2213 		return ENOMEM;
2214 	}
2215 
2216 	/* XXX non-pmap_wired_count case must be handled by caller */
2217 #ifdef pmap_wired_count
2218 	if (limit != 0 &&
2219 	    size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) {
2220 		vm_map_unlock(map);
2221 		return ENOMEM;
2222 	}
2223 #endif
2224 
2225 	/*
2226 	 * uvm_map_pageable_wire will release lcok
2227 	 */
2228 	return uvm_map_pageable_wire(map, RB_MIN(uvm_map_addr, &map->addr),
2229 	    NULL, map->min_offset, map->max_offset, 0);
2230 }
2231 
2232 /*
2233  * Initialize map.
2234  *
2235  * Allocates sufficient entries to describe the free memory in the map.
2236  */
2237 void
2238 uvm_map_setup(struct vm_map *map, vaddr_t min, vaddr_t max, int flags)
2239 {
2240 	int i;
2241 
2242 	KASSERT((min & (vaddr_t)PAGE_MASK) == 0);
2243 	KASSERT((max & (vaddr_t)PAGE_MASK) == 0 ||
2244 	    (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
2245 
2246 	/*
2247 	 * Update parameters.
2248 	 *
2249 	 * This code handles (vaddr_t)-1 and other page mask ending addresses
2250 	 * properly.
2251 	 * We lose the top page if the full virtual address space is used.
2252 	 */
2253 	if (max & (vaddr_t)PAGE_MASK) {
2254 		max += 1;
2255 		if (max == 0) /* overflow */
2256 			max -= PAGE_SIZE;
2257 	}
2258 
2259 	RB_INIT(&map->addr);
2260 	map->uaddr_exe = NULL;
2261 	for (i = 0; i < nitems(map->uaddr_any); ++i)
2262 		map->uaddr_any[i] = NULL;
2263 	map->uaddr_brk_stack = NULL;
2264 
2265 	map->size = 0;
2266 	map->ref_count = 1;
2267 	map->min_offset = min;
2268 	map->max_offset = max;
2269 	map->b_start = map->b_end = 0; /* Empty brk() area by default. */
2270 	map->s_start = map->s_end = 0; /* Empty stack area by default. */
2271 	map->flags = flags;
2272 	map->timestamp = 0;
2273 	rw_init(&map->lock, "vmmaplk");
2274 	simple_lock_init(&map->ref_lock);
2275 
2276 	/*
2277 	 * Configure the allocators.
2278 	 */
2279 	if (flags & VM_MAP_ISVMSPACE)
2280 		uvm_map_setup_md(map);
2281 	else
2282 		map->uaddr_any[3] = &uaddr_kbootstrap;
2283 
2284 	/*
2285 	 * Fill map entries.
2286 	 * This requires a write-locked map (because of diagnostic assertions
2287 	 * in insert code).
2288 	 */
2289 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
2290 		if (rw_enter(&map->lock, RW_NOSLEEP|RW_WRITE) != 0)
2291 			panic("uvm_map_setup: rw_enter failed on new map");
2292 	}
2293 	uvm_map_setup_entries(map);
2294 	uvm_tree_sanity(map, __FILE__, __LINE__);
2295 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
2296 		rw_exit(&map->lock);
2297 }
2298 
2299 /*
2300  * Destroy the map.
2301  *
2302  * This is the inverse operation to uvm_map_setup.
2303  */
2304 void
2305 uvm_map_teardown(struct vm_map *map)
2306 {
2307 	struct uvm_map_deadq	 dead_entries;
2308 	int			 i;
2309 	struct vm_map_entry	*entry, *tmp;
2310 #ifdef VMMAP_DEBUG
2311 	size_t			 numq, numt;
2312 #endif
2313 
2314 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
2315 		if (rw_enter(&map->lock, RW_NOSLEEP | RW_WRITE) != 0)
2316 			panic("uvm_map_teardown: rw_enter failed on free map");
2317 	}
2318 
2319 	/*
2320 	 * Remove address selectors.
2321 	 */
2322 	uvm_addr_destroy(map->uaddr_exe);
2323 	map->uaddr_exe = NULL;
2324 	for (i = 0; i < nitems(map->uaddr_any); i++) {
2325 		uvm_addr_destroy(map->uaddr_any[i]);
2326 		map->uaddr_any[i] = NULL;
2327 	}
2328 	uvm_addr_destroy(map->uaddr_brk_stack);
2329 	map->uaddr_brk_stack = NULL;
2330 
2331 	/*
2332 	 * Remove entries.
2333 	 *
2334 	 * The following is based on graph breadth-first search.
2335 	 *
2336 	 * In color terms:
2337 	 * - the dead_entries set contains all nodes that are reachable
2338 	 *   (i.e. both the black and the grey nodes)
2339 	 * - any entry not in dead_entries is white
2340 	 * - any entry that appears in dead_entries before entry,
2341 	 *   is black, the rest is grey.
2342 	 * The set [entry, end] is also referred to as the wavefront.
2343 	 *
2344 	 * Since the tree is always a fully connected graph, the breadth-first
2345 	 * search guarantees that each vmmap_entry is visited exactly once.
2346 	 * The vm_map is broken down in linear time.
2347 	 */
2348 	TAILQ_INIT(&dead_entries);
2349 	if ((entry = RB_ROOT(&map->addr)) != NULL)
2350 		DEAD_ENTRY_PUSH(&dead_entries, entry);
2351 	while (entry != NULL) {
2352 		uvm_unmap_kill_entry(map, entry);
2353 		if ((tmp = RB_LEFT(entry, daddrs.addr_entry)) != NULL)
2354 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2355 		if ((tmp = RB_RIGHT(entry, daddrs.addr_entry)) != NULL)
2356 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2357 		/* Update wave-front. */
2358 		entry = TAILQ_NEXT(entry, dfree.deadq);
2359 	}
2360 
2361 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
2362 		rw_exit(&map->lock);
2363 
2364 #ifdef VMMAP_DEBUG
2365 	numt = numq = 0;
2366 	RB_FOREACH(entry, uvm_map_addr, &map->addr)
2367 		numt++;
2368 	TAILQ_FOREACH(entry, &dead_entries, dfree.deadq)
2369 		numq++;
2370 	KASSERT(numt == numq);
2371 #endif
2372 	uvm_unmap_detach(&dead_entries, 0);
2373 	pmap_destroy(map->pmap);
2374 	map->pmap = NULL;
2375 }
2376 
2377 /*
2378  * Populate map with free-memory entries.
2379  *
2380  * Map must be initialized and empty.
2381  */
2382 void
2383 uvm_map_setup_entries(struct vm_map *map)
2384 {
2385 	KDASSERT(RB_EMPTY(&map->addr));
2386 
2387 	uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0);
2388 }
2389 
2390 /*
2391  * Split entry at given address.
2392  *
2393  * orig:  entry that is to be split.
2394  * next:  a newly allocated map entry that is not linked.
2395  * split: address at which the split is done.
2396  */
2397 void
2398 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,
2399     struct vm_map_entry *next, vaddr_t split)
2400 {
2401 	struct uvm_addr_state *free, *free_before;
2402 	vsize_t adj;
2403 
2404 	if ((split & PAGE_MASK) != 0) {
2405 		panic("uvm_map_splitentry: split address 0x%lx "
2406 		    "not on page boundary!", split);
2407 	}
2408 	KDASSERT(map != NULL && orig != NULL && next != NULL);
2409 	uvm_tree_sanity(map, __FILE__, __LINE__);
2410 	KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split);
2411 
2412 #ifdef VMMAP_DEBUG
2413 	KDASSERT(RB_FIND(uvm_map_addr, &map->addr, orig) == orig);
2414 	KDASSERT(RB_FIND(uvm_map_addr, &map->addr, next) != next);
2415 #endif /* VMMAP_DEBUG */
2416 
2417 	/*
2418 	 * Free space will change, unlink from free space tree.
2419 	 */
2420 	free = uvm_map_uaddr_e(map, orig);
2421 	uvm_mapent_free_remove(map, free, orig);
2422 
2423 	adj = split - orig->start;
2424 
2425 	uvm_mapent_copy(orig, next);
2426 	if (split >= orig->end) {
2427 		next->etype = 0;
2428 		next->offset = 0;
2429 		next->wired_count = 0;
2430 		next->start = next->end = split;
2431 		next->guard = 0;
2432 		next->fspace = VMMAP_FREE_END(orig) - split;
2433 		next->aref.ar_amap = NULL;
2434 		next->aref.ar_pageoff = 0;
2435 		orig->guard = MIN(orig->guard, split - orig->end);
2436 		orig->fspace = split - VMMAP_FREE_START(orig);
2437 	} else {
2438 		orig->fspace = 0;
2439 		orig->guard = 0;
2440 		orig->end = next->start = split;
2441 
2442 		if (next->aref.ar_amap)
2443 			amap_splitref(&orig->aref, &next->aref, adj);
2444 		if (UVM_ET_ISSUBMAP(orig)) {
2445 			uvm_map_reference(next->object.sub_map);
2446 			next->offset += adj;
2447 		} else if (UVM_ET_ISOBJ(orig)) {
2448 			if (next->object.uvm_obj->pgops &&
2449 			    next->object.uvm_obj->pgops->pgo_reference) {
2450 				next->object.uvm_obj->pgops->pgo_reference(
2451 				    next->object.uvm_obj);
2452 			}
2453 			next->offset += adj;
2454 		}
2455 	}
2456 
2457 	/*
2458 	 * Link next into address tree.
2459 	 * Link orig and next into free-space tree.
2460 	 *
2461 	 * Don't insert 'next' into the addr tree until orig has been linked,
2462 	 * in case the free-list looks at adjecent entries in the addr tree
2463 	 * for its decisions.
2464 	 */
2465 	if (orig->fspace > 0)
2466 		free_before = free;
2467 	else
2468 		free_before = uvm_map_uaddr_e(map, orig);
2469 	uvm_mapent_free_insert(map, free_before, orig);
2470 	uvm_mapent_addr_insert(map, next);
2471 	uvm_mapent_free_insert(map, free, next);
2472 
2473 	uvm_tree_sanity(map, __FILE__, __LINE__);
2474 }
2475 
2476 
2477 #ifdef VMMAP_DEBUG
2478 
2479 void
2480 uvm_tree_assert(struct vm_map *map, int test, char *test_str,
2481     char *file, int line)
2482 {
2483 	char* map_special;
2484 
2485 	if (test)
2486 		return;
2487 
2488 	if (map == kernel_map)
2489 		map_special = " (kernel_map)";
2490 	else if (map == kmem_map)
2491 		map_special = " (kmem_map)";
2492 	else
2493 		map_special = "";
2494 	panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file,
2495 	    line, test_str);
2496 }
2497 
2498 /*
2499  * Check that map is sane.
2500  */
2501 void
2502 uvm_tree_sanity(struct vm_map *map, char *file, int line)
2503 {
2504 	struct vm_map_entry	*iter;
2505 	vaddr_t			 addr;
2506 	vaddr_t			 min, max, bound; /* Bounds checker. */
2507 	struct uvm_addr_state	*free;
2508 
2509 	addr = vm_map_min(map);
2510 	RB_FOREACH(iter, uvm_map_addr, &map->addr) {
2511 		/*
2512 		 * Valid start, end.
2513 		 * Catch overflow for end+fspace.
2514 		 */
2515 		UVM_ASSERT(map, iter->end >= iter->start, file, line);
2516 		UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line);
2517 		/*
2518 		 * May not be empty.
2519 		 */
2520 		UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter),
2521 		    file, line);
2522 
2523 		/*
2524 		 * Addresses for entry must lie within map boundaries.
2525 		 */
2526 		UVM_ASSERT(map, iter->start >= vm_map_min(map) &&
2527 		    VMMAP_FREE_END(iter) <= vm_map_max(map), file, line);
2528 
2529 		/*
2530 		 * Tree may not have gaps.
2531 		 */
2532 		UVM_ASSERT(map, iter->start == addr, file, line);
2533 		addr = VMMAP_FREE_END(iter);
2534 
2535 		/*
2536 		 * Free space may not cross boundaries, unless the same
2537 		 * free list is used on both sides of the border.
2538 		 */
2539 		min = VMMAP_FREE_START(iter);
2540 		max = VMMAP_FREE_END(iter);
2541 
2542 		while (min < max &&
2543 		    (bound = uvm_map_boundary(map, min, max)) != max) {
2544 			UVM_ASSERT(map,
2545 			    uvm_map_uaddr(map, bound - 1) ==
2546 			    uvm_map_uaddr(map, bound),
2547 			    file, line);
2548 			min = bound;
2549 		}
2550 
2551 		free = uvm_map_uaddr_e(map, iter);
2552 		if (free) {
2553 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0,
2554 			    file, line);
2555 		} else {
2556 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0,
2557 			    file, line);
2558 		}
2559 	}
2560 	UVM_ASSERT(map, addr == vm_map_max(map), file, line);
2561 }
2562 
2563 void
2564 uvm_tree_size_chk(struct vm_map *map, char *file, int line)
2565 {
2566 	struct vm_map_entry *iter;
2567 	vsize_t size;
2568 
2569 	size = 0;
2570 	RB_FOREACH(iter, uvm_map_addr, &map->addr) {
2571 		if (!UVM_ET_ISHOLE(iter))
2572 			size += iter->end - iter->start;
2573 	}
2574 
2575 	if (map->size != size)
2576 		printf("map size = 0x%lx, should be 0x%lx\n", map->size, size);
2577 	UVM_ASSERT(map, map->size == size, file, line);
2578 
2579 	vmspace_validate(map);
2580 }
2581 
2582 /*
2583  * This function validates the statistics on vmspace.
2584  */
2585 void
2586 vmspace_validate(struct vm_map *map)
2587 {
2588 	struct vmspace *vm;
2589 	struct vm_map_entry *iter;
2590 	vaddr_t imin, imax;
2591 	vaddr_t stack_begin, stack_end; /* Position of stack. */
2592 	vsize_t stack, heap; /* Measured sizes. */
2593 
2594 	if (!(map->flags & VM_MAP_ISVMSPACE))
2595 		return;
2596 
2597 	vm = (struct vmspace *)map;
2598 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2599 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2600 
2601 	stack = heap = 0;
2602 	RB_FOREACH(iter, uvm_map_addr, &map->addr) {
2603 		imin = imax = iter->start;
2604 
2605 		if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL)
2606 			continue;
2607 
2608 		/*
2609 		 * Update stack, heap.
2610 		 * Keep in mind that (theoretically) the entries of
2611 		 * userspace and stack may be joined.
2612 		 */
2613 		while (imin != iter->end) {
2614 			/*
2615 			 * Set imax to the first boundary crossed between
2616 			 * imin and stack addresses.
2617 			 */
2618 			imax = iter->end;
2619 			if (imin < stack_begin && imax > stack_begin)
2620 				imax = stack_begin;
2621 			else if (imin < stack_end && imax > stack_end)
2622 				imax = stack_end;
2623 
2624 			if (imin >= stack_begin && imin < stack_end)
2625 				stack += imax - imin;
2626 			else
2627 				heap += imax - imin;
2628 			imin = imax;
2629 		}
2630 	}
2631 
2632 	heap >>= PAGE_SHIFT;
2633 	if (heap != vm->vm_dused) {
2634 		printf("vmspace stack range: 0x%lx-0x%lx\n",
2635 		    stack_begin, stack_end);
2636 		panic("vmspace_validate: vmspace.vm_dused invalid, "
2637 		    "expected %ld pgs, got %ld pgs in map %p",
2638 		    heap, vm->vm_dused,
2639 		    map);
2640 	}
2641 }
2642 
2643 #endif /* VMMAP_DEBUG */
2644 
2645 /*
2646  * uvm_map_init: init mapping system at boot time.   note that we allocate
2647  * and init the static pool of structs vm_map_entry for the kernel here.
2648  */
2649 void
2650 uvm_map_init(void)
2651 {
2652 	static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
2653 	int lcv;
2654 
2655 	/*
2656 	 * now set up static pool of kernel map entries ...
2657 	 */
2658 
2659 	simple_lock_init(&uvm.kentry_lock);
2660 	uvm.kentry_free = NULL;
2661 	for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
2662 		RB_LEFT(&kernel_map_entry[lcv], daddrs.addr_entry) =
2663 		    uvm.kentry_free;
2664 		uvm.kentry_free = &kernel_map_entry[lcv];
2665 	}
2666 
2667 	/*
2668 	 * initialize the map-related pools.
2669 	 */
2670 	pool_init(&uvm_vmspace_pool, sizeof(struct vmspace),
2671 	    0, 0, 0, "vmsppl", &pool_allocator_nointr);
2672 	pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry),
2673 	    0, 0, 0, "vmmpepl", &pool_allocator_nointr);
2674 	pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry),
2675 	    0, 0, 0, "vmmpekpl", NULL);
2676 	pool_sethiwat(&uvm_map_entry_pool, 8192);
2677 
2678 	uvm_addr_init();
2679 }
2680 
2681 #if defined(DDB)
2682 
2683 /*
2684  * DDB hooks
2685  */
2686 
2687 /*
2688  * uvm_map_printit: actually prints the map
2689  */
2690 void
2691 uvm_map_printit(struct vm_map *map, boolean_t full,
2692     int (*pr)(const char *, ...))
2693 {
2694 	struct vmspace			*vm;
2695 	struct vm_map_entry		*entry;
2696 	struct uvm_addr_state		*free;
2697 	int				 in_free, i;
2698 	char				 buf[8];
2699 
2700 	(*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
2701 	(*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n",
2702 	    map->b_start, map->b_end);
2703 	(*pr)("\tstack allocate range: 0x%lx-0x%lx\n",
2704 	    map->s_start, map->s_end);
2705 	(*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n",
2706 	    map->size, map->ref_count, map->timestamp,
2707 	    map->flags);
2708 #ifdef pmap_resident_count
2709 	(*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
2710 	    pmap_resident_count(map->pmap));
2711 #else
2712 	/* XXXCDC: this should be required ... */
2713 	(*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap);
2714 #endif
2715 
2716 	/*
2717 	 * struct vmspace handling.
2718 	 */
2719 	if (map->flags & VM_MAP_ISVMSPACE) {
2720 		vm = (struct vmspace *)map;
2721 
2722 		(*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n",
2723 		    vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss);
2724 		(*pr)("\tvm_tsize=%u vm_dsize=%u\n",
2725 		    vm->vm_tsize, vm->vm_dsize);
2726 		(*pr)("\tvm_taddr=%p vm_daddr=%p\n",
2727 		    vm->vm_taddr, vm->vm_daddr);
2728 		(*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n",
2729 		    vm->vm_maxsaddr, vm->vm_minsaddr);
2730 	}
2731 
2732 	if (!full)
2733 		goto print_uaddr;
2734 	RB_FOREACH(entry, uvm_map_addr, &map->addr) {
2735 		(*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
2736 		    entry, entry->start, entry->end, entry->object.uvm_obj,
2737 		    (long long)entry->offset, entry->aref.ar_amap,
2738 		    entry->aref.ar_pageoff);
2739 		(*pr)("\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, "
2740 		    "wc=%d, adv=%d\n",
2741 		    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
2742 		    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
2743 		    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
2744 		    entry->protection, entry->max_protection,
2745 		    entry->inheritance, entry->wired_count, entry->advice);
2746 
2747 		free = uvm_map_uaddr_e(map, entry);
2748 		in_free = (free != NULL);
2749 		(*pr)("\thole=%c, free=%c, guard=0x%lx, "
2750 		    "free=0x%lx-0x%lx\n",
2751 		    (entry->etype & UVM_ET_HOLE) ? 'T' : 'F',
2752 		    in_free ? 'T' : 'F',
2753 		    entry->guard,
2754 		    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
2755 		(*pr)("\tfspace_augment=%lu\n", entry->fspace_augment);
2756 		(*pr)("\tfreemapped=%c, uaddr=%p\n",
2757 		    (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free);
2758 		if (free) {
2759 			(*pr)("\t\t(0x%lx-0x%lx %s)\n",
2760 			    free->uaddr_minaddr, free->uaddr_maxaddr,
2761 			    free->uaddr_functions->uaddr_name);
2762 		}
2763 	}
2764 
2765 print_uaddr:
2766 	uvm_addr_print(map->uaddr_exe, "exe", full, pr);
2767 	for (i = 0; i < nitems(map->uaddr_any); i++) {
2768 		snprintf(&buf[0], sizeof(buf), "any[%d]", i);
2769 		uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr);
2770 	}
2771 	uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr);
2772 }
2773 
2774 /*
2775  * uvm_object_printit: actually prints the object
2776  */
2777 void
2778 uvm_object_printit(uobj, full, pr)
2779 	struct uvm_object *uobj;
2780 	boolean_t full;
2781 	int (*pr)(const char *, ...);
2782 {
2783 	struct vm_page *pg;
2784 	int cnt = 0;
2785 
2786 	(*pr)("OBJECT %p: pgops=%p, npages=%d, ",
2787 	    uobj, uobj->pgops, uobj->uo_npages);
2788 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
2789 		(*pr)("refs=<SYSTEM>\n");
2790 	else
2791 		(*pr)("refs=%d\n", uobj->uo_refs);
2792 
2793 	if (!full) {
2794 		return;
2795 	}
2796 	(*pr)("  PAGES <pg,offset>:\n  ");
2797 	RB_FOREACH(pg, uvm_objtree, &uobj->memt) {
2798 		(*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
2799 		if ((cnt % 3) == 2) {
2800 			(*pr)("\n  ");
2801 		}
2802 		cnt++;
2803 	}
2804 	if ((cnt % 3) != 2) {
2805 		(*pr)("\n");
2806 	}
2807 }
2808 
2809 /*
2810  * uvm_page_printit: actually print the page
2811  */
2812 static const char page_flagbits[] =
2813 	"\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
2814 	"\11ZERO\15PAGER1\20FREE\21INACTIVE\22ACTIVE\24ENCRYPT\30PMAP0"
2815 	"\31PMAP1\32PMAP2\33PMAP3";
2816 
2817 void
2818 uvm_page_printit(pg, full, pr)
2819 	struct vm_page *pg;
2820 	boolean_t full;
2821 	int (*pr)(const char *, ...);
2822 {
2823 	struct vm_page *tpg;
2824 	struct uvm_object *uobj;
2825 	struct pglist *pgl;
2826 
2827 	(*pr)("PAGE %p:\n", pg);
2828 	(*pr)("  flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
2829 	    pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
2830 	    (long long)pg->phys_addr);
2831 	(*pr)("  uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n",
2832 	    pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count);
2833 #if defined(UVM_PAGE_TRKOWN)
2834 	if (pg->pg_flags & PG_BUSY)
2835 		(*pr)("  owning process = %d, tag=%s",
2836 		    pg->owner, pg->owner_tag);
2837 	else
2838 		(*pr)("  page not busy, no owner");
2839 #else
2840 	(*pr)("  [page ownership tracking disabled]");
2841 #endif
2842 #ifdef __HAVE_VM_PAGE_MD
2843 	(*pr)("\tvm_page_md %p\n", &pg->mdpage);
2844 #else
2845 	(*pr)("\n");
2846 #endif
2847 
2848 	if (!full)
2849 		return;
2850 
2851 	/* cross-verify object/anon */
2852 	if ((pg->pg_flags & PQ_FREE) == 0) {
2853 		if (pg->pg_flags & PQ_ANON) {
2854 			if (pg->uanon == NULL || pg->uanon->an_page != pg)
2855 			    (*pr)("  >>> ANON DOES NOT POINT HERE <<< (%p)\n",
2856 				(pg->uanon) ? pg->uanon->an_page : NULL);
2857 			else
2858 				(*pr)("  anon backpointer is OK\n");
2859 		} else {
2860 			uobj = pg->uobject;
2861 			if (uobj) {
2862 				(*pr)("  checking object list\n");
2863 				RB_FOREACH(tpg, uvm_objtree, &uobj->memt) {
2864 					if (tpg == pg) {
2865 						break;
2866 					}
2867 				}
2868 				if (tpg)
2869 					(*pr)("  page found on object list\n");
2870 				else
2871 					(*pr)("  >>> PAGE NOT FOUND "
2872 					    "ON OBJECT LIST! <<<\n");
2873 			}
2874 		}
2875 	}
2876 
2877 	/* cross-verify page queue */
2878 	if (pg->pg_flags & PQ_FREE) {
2879 		if (uvm_pmr_isfree(pg))
2880 			(*pr)("  page found in uvm_pmemrange\n");
2881 		else
2882 			(*pr)("  >>> page not found in uvm_pmemrange <<<\n");
2883 		pgl = NULL;
2884 	} else if (pg->pg_flags & PQ_INACTIVE) {
2885 		pgl = (pg->pg_flags & PQ_SWAPBACKED) ?
2886 		    &uvm.page_inactive_swp : &uvm.page_inactive_obj;
2887 	} else if (pg->pg_flags & PQ_ACTIVE) {
2888 		pgl = &uvm.page_active;
2889  	} else {
2890 		pgl = NULL;
2891 	}
2892 
2893 	if (pgl) {
2894 		(*pr)("  checking pageq list\n");
2895 		TAILQ_FOREACH(tpg, pgl, pageq) {
2896 			if (tpg == pg) {
2897 				break;
2898 			}
2899 		}
2900 		if (tpg)
2901 			(*pr)("  page found on pageq list\n");
2902 		else
2903 			(*pr)("  >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
2904 	}
2905 }
2906 #endif
2907 
2908 /*
2909  * uvm_map_protect: change map protection
2910  *
2911  * => set_max means set max_protection.
2912  * => map must be unlocked.
2913  */
2914 int
2915 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
2916     vm_prot_t new_prot, boolean_t set_max)
2917 {
2918 	struct vm_map_entry *first, *iter;
2919 	vm_prot_t old_prot;
2920 	vm_prot_t mask;
2921 	int error;
2922 
2923 	if (start > end)
2924 		return EINVAL;
2925 	start = MAX(start, map->min_offset);
2926 	end = MIN(end, map->max_offset);
2927 	if (start >= end)
2928 		return 0;
2929 
2930 	error = 0;
2931 	vm_map_lock(map);
2932 
2933 	/*
2934 	 * Set up first and last.
2935 	 * - first will contain first entry at or after start.
2936 	 */
2937 	first = uvm_map_entrybyaddr(&map->addr, start);
2938 	KDASSERT(first != NULL);
2939 	if (first->end < start)
2940 		first = RB_NEXT(uvm_map_addr, &map->addr, first);
2941 
2942 	/*
2943 	 * First, check for protection violations.
2944 	 */
2945 	for (iter = first; iter != NULL && iter->start < end;
2946 	    iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
2947 		/* Treat memory holes as free space. */
2948 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
2949 			continue;
2950 
2951 		if (UVM_ET_ISSUBMAP(iter)) {
2952 			error = EINVAL;
2953 			goto out;
2954 		}
2955 		if ((new_prot & iter->max_protection) != new_prot) {
2956 			error = EACCES;
2957 			goto out;
2958 		}
2959 	}
2960 
2961 	/*
2962 	 * Fix protections.
2963 	 */
2964 	for (iter = first; iter != NULL && iter->start < end;
2965 	    iter = RB_NEXT(uvm_map_addr, &map->addr, iter)) {
2966 		/* Treat memory holes as free space. */
2967 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
2968 			continue;
2969 
2970 		old_prot = iter->protection;
2971 
2972 		/*
2973 		 * Skip adapting protection iff old and new protection
2974 		 * are equal.
2975 		 */
2976 		if (set_max) {
2977 			if (old_prot == (new_prot & old_prot) &&
2978 			    iter->max_protection == new_prot)
2979 				continue;
2980 		} else {
2981 			if (old_prot == new_prot)
2982 				continue;
2983 		}
2984 
2985 		UVM_MAP_CLIP_START(map, iter, start);
2986 		UVM_MAP_CLIP_END(map, iter, end);
2987 
2988 		if (set_max) {
2989 			iter->max_protection = new_prot;
2990 			iter->protection &= new_prot;
2991 		} else
2992 			iter->protection = new_prot;
2993 
2994 		/*
2995 		 * update physical map if necessary.  worry about copy-on-write
2996 		 * here -- CHECK THIS XXX
2997 		 */
2998 		if (iter->protection != old_prot) {
2999 			mask = UVM_ET_ISCOPYONWRITE(iter) ?
3000 			    ~VM_PROT_WRITE : VM_PROT_ALL;
3001 
3002 			/* update pmap */
3003 			if ((iter->protection & mask) == PROT_NONE &&
3004 			    VM_MAPENT_ISWIRED(iter)) {
3005 				/*
3006 				 * TODO(ariane) this is stupid. wired_count
3007 				 * is 0 if not wired, otherwise anything
3008 				 * larger than 0 (incremented once each time
3009 				 * wire is called).
3010 				 * Mostly to be able to undo the damage on
3011 				 * failure. Not the actually be a wired
3012 				 * refcounter...
3013 				 * Originally: iter->wired_count--;
3014 				 * (don't we have to unwire this in the pmap
3015 				 * as well?)
3016 				 */
3017 				iter->wired_count = 0;
3018 			}
3019 			pmap_protect(map->pmap, iter->start, iter->end,
3020 			    iter->protection & mask);
3021 		}
3022 
3023 		/*
3024 		 * If the map is configured to lock any future mappings,
3025 		 * wire this entry now if the old protection was VM_PROT_NONE
3026 		 * and the new protection is not VM_PROT_NONE.
3027 		 */
3028 		if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
3029 		    VM_MAPENT_ISWIRED(iter) == 0 &&
3030 		    old_prot == VM_PROT_NONE &&
3031 		    new_prot != VM_PROT_NONE) {
3032 			if (uvm_map_pageable(map, iter->start, iter->end,
3033 			    FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) {
3034 				/*
3035 				 * If locking the entry fails, remember the
3036 				 * error if it's the first one.  Note we
3037 				 * still continue setting the protection in
3038 				 * the map, but it will return the resource
3039 				 * storage condition regardless.
3040 				 *
3041 				 * XXX Ignore what the actual error is,
3042 				 * XXX just call it a resource shortage
3043 				 * XXX so that it doesn't get confused
3044 				 * XXX what uvm_map_protect() itself would
3045 				 * XXX normally return.
3046 				 */
3047 				error = ENOMEM;
3048 			}
3049 		}
3050 	}
3051 	pmap_update(map->pmap);
3052 
3053 out:
3054 	vm_map_unlock(map);
3055 	return error;
3056 }
3057 
3058 /*
3059  * uvmspace_alloc: allocate a vmspace structure.
3060  *
3061  * - structure includes vm_map and pmap
3062  * - XXX: no locking on this structure
3063  * - refcnt set to 1, rest must be init'd by caller
3064  */
3065 struct vmspace *
3066 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
3067     boolean_t remove_holes)
3068 {
3069 	struct vmspace *vm;
3070 
3071 	vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
3072 	uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
3073 	return (vm);
3074 }
3075 
3076 /*
3077  * uvmspace_init: initialize a vmspace structure.
3078  *
3079  * - XXX: no locking on this structure
3080  * - refcnt set to 1, rest must be init'd by caller
3081  */
3082 void
3083 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
3084     boolean_t pageable, boolean_t remove_holes)
3085 {
3086 	if (pmap)
3087 		pmap_reference(pmap);
3088 	else
3089 		pmap = pmap_create();
3090 	vm->vm_map.pmap = pmap;
3091 
3092 	uvm_map_setup(&vm->vm_map, min, max,
3093 	    (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE);
3094 
3095 	vm->vm_refcnt = 1;
3096 
3097 	if (remove_holes)
3098 		pmap_remove_holes(&vm->vm_map);
3099 }
3100 
3101 /*
3102  * uvmspace_share: share a vmspace between two processes
3103  *
3104  * - XXX: no locking on vmspace
3105  * - used for vfork and threads
3106  */
3107 
3108 void
3109 uvmspace_share(p1, p2)
3110 	struct proc *p1, *p2;
3111 {
3112 	p2->p_vmspace = p1->p_vmspace;
3113 	p1->p_vmspace->vm_refcnt++;
3114 }
3115 
3116 /*
3117  * uvmspace_exec: the process wants to exec a new program
3118  *
3119  * - XXX: no locking on vmspace
3120  */
3121 
3122 void
3123 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
3124 {
3125 	struct vmspace *nvm, *ovm = p->p_vmspace;
3126 	struct vm_map *map = &ovm->vm_map;
3127 	struct uvm_map_deadq dead_entries;
3128 
3129 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0);
3130 	KASSERT((end & (vaddr_t)PAGE_MASK) == 0 ||
3131 	    (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
3132 
3133 	pmap_unuse_final(p);   /* before stack addresses go away */
3134 	TAILQ_INIT(&dead_entries);
3135 
3136 	/*
3137 	 * see if more than one process is using this vmspace...
3138 	 */
3139 
3140 	if (ovm->vm_refcnt == 1) {
3141 		/*
3142 		 * if p is the only process using its vmspace then we can safely
3143 		 * recycle that vmspace for the program that is being exec'd.
3144 		 */
3145 
3146 #ifdef SYSVSHM
3147 		/*
3148 		 * SYSV SHM semantics require us to kill all segments on an exec
3149 		 */
3150 		if (ovm->vm_shm)
3151 			shmexit(ovm);
3152 #endif
3153 
3154 		/*
3155 		 * POSIX 1003.1b -- "lock future mappings" is revoked
3156 		 * when a process execs another program image.
3157 		 */
3158 		vm_map_lock(map);
3159 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
3160 
3161 		/*
3162 		 * now unmap the old program
3163 		 *
3164 		 * Instead of attempting to keep the map valid, we simply
3165 		 * nuke all entries and ask uvm_map_setup to reinitialize
3166 		 * the map to the new boundaries.
3167 		 *
3168 		 * uvm_unmap_remove will actually nuke all entries for us
3169 		 * (as in, not replace them with free-memory entries).
3170 		 */
3171 		uvm_unmap_remove(map, map->min_offset, map->max_offset,
3172 		    &dead_entries, TRUE, FALSE);
3173 
3174 		KDASSERT(RB_EMPTY(&map->addr));
3175 
3176 		/*
3177 		 * Nuke statistics and boundaries.
3178 		 */
3179 		bzero(&ovm->vm_startcopy,
3180 		    (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy);
3181 
3182 
3183 		if (end & (vaddr_t)PAGE_MASK) {
3184 			end += 1;
3185 			if (end == 0) /* overflow */
3186 				end -= PAGE_SIZE;
3187 		}
3188 
3189 		/*
3190 		 * Setup new boundaries and populate map with entries.
3191 		 */
3192 		map->min_offset = start;
3193 		map->max_offset = end;
3194 		uvm_map_setup_entries(map);
3195 		vm_map_unlock(map);
3196 
3197 		/*
3198 		 * but keep MMU holes unavailable
3199 		 */
3200 		pmap_remove_holes(map);
3201 
3202 	} else {
3203 
3204 		/*
3205 		 * p's vmspace is being shared, so we can't reuse it for p since
3206 		 * it is still being used for others.   allocate a new vmspace
3207 		 * for p
3208 		 */
3209 		nvm = uvmspace_alloc(start, end,
3210 		    (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
3211 
3212 		/*
3213 		 * install new vmspace and drop our ref to the old one.
3214 		 */
3215 
3216 		pmap_deactivate(p);
3217 		p->p_vmspace = nvm;
3218 		pmap_activate(p);
3219 
3220 		uvmspace_free(ovm);
3221 	}
3222 
3223 	/*
3224 	 * Release dead entries
3225 	 */
3226 	uvm_unmap_detach(&dead_entries, 0);
3227 }
3228 
3229 /*
3230  * uvmspace_free: free a vmspace data structure
3231  *
3232  * - XXX: no locking on vmspace
3233  */
3234 
3235 void
3236 uvmspace_free(struct vmspace *vm)
3237 {
3238 	if (--vm->vm_refcnt == 0) {
3239 		/*
3240 		 * lock the map, to wait out all other references to it.  delete
3241 		 * all of the mappings and pages they hold, then call the pmap
3242 		 * module to reclaim anything left.
3243 		 */
3244 #ifdef SYSVSHM
3245 		/* Get rid of any SYSV shared memory segments. */
3246 		if (vm->vm_shm != NULL)
3247 			shmexit(vm);
3248 #endif
3249 
3250 		uvm_map_teardown(&vm->vm_map);
3251 		pool_put(&uvm_vmspace_pool, vm);
3252 	}
3253 }
3254 
3255 /*
3256  * Clone map entry into other map.
3257  *
3258  * Mapping will be placed at dstaddr, for the same length.
3259  * Space must be available.
3260  * Reference counters are incremented.
3261  */
3262 struct vm_map_entry*
3263 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3264     vsize_t off, struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
3265     int mapent_flags, int amap_share_flags)
3266 {
3267 	struct vm_map_entry *new_entry, *first, *last;
3268 
3269 	KDASSERT(!UVM_ET_ISSUBMAP(old_entry));
3270 
3271 	/*
3272 	 * Create new entry (linked in on creation).
3273 	 * Fill in first, last.
3274 	 */
3275 	first = last = NULL;
3276 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) {
3277 		panic("uvmspace_fork: no space in map for "
3278 		    "entry in empty map");
3279 	}
3280 	new_entry = uvm_map_mkentry(dstmap, first, last,
3281 	    dstaddr, dstlen, mapent_flags, dead);
3282 	if (new_entry == NULL)
3283 		return NULL;
3284 	/* old_entry -> new_entry */
3285 	new_entry->object = old_entry->object;
3286 	new_entry->offset = old_entry->offset;
3287 	new_entry->aref = old_entry->aref;
3288 	new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
3289 	new_entry->protection = old_entry->protection;
3290 	new_entry->max_protection = old_entry->max_protection;
3291 	new_entry->inheritance = old_entry->inheritance;
3292 	new_entry->advice = old_entry->advice;
3293 
3294 	/*
3295 	 * gain reference to object backing the map (can't
3296 	 * be a submap).
3297 	 */
3298 	if (new_entry->aref.ar_amap) {
3299 		new_entry->aref.ar_pageoff += off >> PAGE_SHIFT;
3300 		amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3301 		    (new_entry->end - new_entry->start) >> PAGE_SHIFT,
3302 		    amap_share_flags);
3303 	}
3304 
3305 	if (UVM_ET_ISOBJ(new_entry) &&
3306 	    new_entry->object.uvm_obj->pgops->pgo_reference) {
3307 		new_entry->offset += off;
3308 		new_entry->object.uvm_obj->pgops->pgo_reference
3309 		    (new_entry->object.uvm_obj);
3310 	}
3311 
3312 	return new_entry;
3313 }
3314 
3315 /*
3316  * share the mapping: this means we want the old and
3317  * new entries to share amaps and backing objects.
3318  */
3319 void
3320 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
3321     struct vm_map *old_map,
3322     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3323 {
3324 	struct vm_map_entry *new_entry;
3325 
3326 	/*
3327 	 * if the old_entry needs a new amap (due to prev fork)
3328 	 * then we need to allocate it now so that we have
3329 	 * something we own to share with the new_entry.   [in
3330 	 * other words, we need to clear needs_copy]
3331 	 */
3332 
3333 	if (UVM_ET_ISNEEDSCOPY(old_entry)) {
3334 		/* get our own amap, clears needs_copy */
3335 		amap_copy(old_map, old_entry, M_WAITOK, FALSE,
3336 		    0, 0);
3337 		/* XXXCDC: WAITOK??? */
3338 	}
3339 
3340 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3341 	    old_entry->end - old_entry->start, 0, old_entry,
3342 	    dead, 0, AMAP_SHARED);
3343 
3344 	/*
3345 	 * pmap_copy the mappings: this routine is optional
3346 	 * but if it is there it will reduce the number of
3347 	 * page faults in the new proc.
3348 	 */
3349 	pmap_copy(new_map->pmap, old_map->pmap, new_entry->start,
3350 	    (new_entry->end - new_entry->start), new_entry->start);
3351 
3352 	/*
3353 	 * Update process statistics.
3354 	 */
3355 	if (!UVM_ET_ISHOLE(new_entry))
3356 		new_map->size += new_entry->end - new_entry->start;
3357 	if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) {
3358 		new_vm->vm_dused +=
3359 		    uvmspace_dused(new_map, new_entry->start, new_entry->end);
3360 	}
3361 }
3362 
3363 /*
3364  * copy-on-write the mapping (using mmap's
3365  * MAP_PRIVATE semantics)
3366  *
3367  * allocate new_entry, adjust reference counts.
3368  * (note that new references are read-only).
3369  */
3370 void
3371 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map,
3372     struct vm_map *old_map,
3373     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3374 {
3375 	struct vm_map_entry	*new_entry;
3376 	boolean_t		 protect_child;
3377 
3378 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3379 	    old_entry->end - old_entry->start, 0, old_entry,
3380 	    dead, 0, 0);
3381 
3382 	new_entry->etype |=
3383 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3384 
3385 	/*
3386 	 * the new entry will need an amap.  it will either
3387 	 * need to be copied from the old entry or created
3388 	 * from scratch (if the old entry does not have an
3389 	 * amap).  can we defer this process until later
3390 	 * (by setting "needs_copy") or do we need to copy
3391 	 * the amap now?
3392 	 *
3393 	 * we must copy the amap now if any of the following
3394 	 * conditions hold:
3395 	 * 1. the old entry has an amap and that amap is
3396 	 *    being shared.  this means that the old (parent)
3397 	 *    process is sharing the amap with another
3398 	 *    process.  if we do not clear needs_copy here
3399 	 *    we will end up in a situation where both the
3400 	 *    parent and child process are referring to the
3401 	 *    same amap with "needs_copy" set.  if the
3402 	 *    parent write-faults, the fault routine will
3403 	 *    clear "needs_copy" in the parent by allocating
3404 	 *    a new amap.   this is wrong because the
3405 	 *    parent is supposed to be sharing the old amap
3406 	 *    and the new amap will break that.
3407 	 *
3408 	 * 2. if the old entry has an amap and a non-zero
3409 	 *    wire count then we are going to have to call
3410 	 *    amap_cow_now to avoid page faults in the
3411 	 *    parent process.   since amap_cow_now requires
3412 	 *    "needs_copy" to be clear we might as well
3413 	 *    clear it here as well.
3414 	 *
3415 	 */
3416 
3417 	if (old_entry->aref.ar_amap != NULL &&
3418 	    ((amap_flags(old_entry->aref.ar_amap) &
3419 	    AMAP_SHARED) != 0 ||
3420 	    VM_MAPENT_ISWIRED(old_entry))) {
3421 		amap_copy(new_map, new_entry, M_WAITOK, FALSE,
3422 		    0, 0);
3423 		/* XXXCDC: M_WAITOK ... ok? */
3424 	}
3425 
3426 	/*
3427 	 * if the parent's entry is wired down, then the
3428 	 * parent process does not want page faults on
3429 	 * access to that memory.  this means that we
3430 	 * cannot do copy-on-write because we can't write
3431 	 * protect the old entry.   in this case we
3432 	 * resolve all copy-on-write faults now, using
3433 	 * amap_cow_now.   note that we have already
3434 	 * allocated any needed amap (above).
3435 	 */
3436 
3437 	if (VM_MAPENT_ISWIRED(old_entry)) {
3438 
3439 		/*
3440 		 * resolve all copy-on-write faults now
3441 		 * (note that there is nothing to do if
3442 		 * the old mapping does not have an amap).
3443 		 * XXX: is it worthwhile to bother with
3444 		 * pmap_copy in this case?
3445 		 */
3446 		if (old_entry->aref.ar_amap)
3447 			amap_cow_now(new_map, new_entry);
3448 
3449 	} else {
3450 		if (old_entry->aref.ar_amap) {
3451 
3452 			/*
3453 			 * setup mappings to trigger copy-on-write faults
3454 			 * we must write-protect the parent if it has
3455 			 * an amap and it is not already "needs_copy"...
3456 			 * if it is already "needs_copy" then the parent
3457 			 * has already been write-protected by a previous
3458 			 * fork operation.
3459 			 *
3460 			 * if we do not write-protect the parent, then
3461 			 * we must be sure to write-protect the child
3462 			 * after the pmap_copy() operation.
3463 			 *
3464 			 * XXX: pmap_copy should have some way of telling
3465 			 * us that it didn't do anything so we can avoid
3466 			 * calling pmap_protect needlessly.
3467 			 */
3468 			if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
3469 				if (old_entry->max_protection &
3470 				    VM_PROT_WRITE) {
3471 					pmap_protect(old_map->pmap,
3472 					    old_entry->start,
3473 					    old_entry->end,
3474 					    old_entry->protection &
3475 					    ~VM_PROT_WRITE);
3476 					pmap_update(old_map->pmap);
3477 				}
3478 				old_entry->etype |= UVM_ET_NEEDSCOPY;
3479 			}
3480 
3481 	  		/*
3482 	  		 * parent must now be write-protected
3483 	  		 */
3484 	  		protect_child = FALSE;
3485 		} else {
3486 
3487 			/*
3488 			 * we only need to protect the child if the
3489 			 * parent has write access.
3490 			 */
3491 			if (old_entry->max_protection & VM_PROT_WRITE)
3492 				protect_child = TRUE;
3493 			else
3494 				protect_child = FALSE;
3495 
3496 		}
3497 
3498 		/*
3499 		 * copy the mappings
3500 		 * XXX: need a way to tell if this does anything
3501 		 */
3502 
3503 		pmap_copy(new_map->pmap, old_map->pmap,
3504 		    new_entry->start,
3505 		    (old_entry->end - old_entry->start),
3506 		    old_entry->start);
3507 
3508 		/*
3509 		 * protect the child's mappings if necessary
3510 		 */
3511 		if (protect_child) {
3512 			pmap_protect(new_map->pmap, new_entry->start,
3513 			    new_entry->end,
3514 			    new_entry->protection &
3515 			    ~VM_PROT_WRITE);
3516 		}
3517 	}
3518 
3519 	/*
3520 	 * Update process statistics.
3521 	 */
3522 	if (!UVM_ET_ISHOLE(new_entry))
3523 		new_map->size += new_entry->end - new_entry->start;
3524 	if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry)) {
3525 		new_vm->vm_dused +=
3526 		    uvmspace_dused(new_map, new_entry->start, new_entry->end);
3527 	}
3528 }
3529 
3530 /*
3531  * uvmspace_fork: fork a process' main map
3532  *
3533  * => create a new vmspace for child process from parent.
3534  * => parent's map must not be locked.
3535  */
3536 struct vmspace *
3537 uvmspace_fork(struct vmspace *vm1)
3538 {
3539 	struct vmspace *vm2;
3540 	struct vm_map *old_map = &vm1->vm_map;
3541 	struct vm_map *new_map;
3542 	struct vm_map_entry *old_entry;
3543 	struct uvm_map_deadq dead;
3544 
3545 	vm_map_lock(old_map);
3546 
3547 	vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
3548 	    (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
3549 	memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
3550 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
3551 	vm2->vm_dused = 0; /* Statistic managed by us. */
3552 	new_map = &vm2->vm_map;
3553 	vm_map_lock(new_map);
3554 
3555 	/*
3556 	 * go entry-by-entry
3557 	 */
3558 
3559 	TAILQ_INIT(&dead);
3560 	RB_FOREACH(old_entry, uvm_map_addr, &old_map->addr) {
3561 		if (old_entry->start == old_entry->end)
3562 			continue;
3563 
3564 		/*
3565 		 * first, some sanity checks on the old entry
3566 		 */
3567 		if (UVM_ET_ISSUBMAP(old_entry)) {
3568 			panic("fork: encountered a submap during fork "
3569 			    "(illegal)");
3570 		}
3571 
3572 		if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
3573 		    UVM_ET_ISNEEDSCOPY(old_entry)) {
3574 			panic("fork: non-copy_on_write map entry marked "
3575 			    "needs_copy (illegal)");
3576 		}
3577 
3578 		/*
3579 		 * Apply inheritance.
3580 		 */
3581 		if (old_entry->inheritance == MAP_INHERIT_SHARE) {
3582 			uvm_mapent_forkshared(vm2, new_map,
3583 			    old_map, old_entry, &dead);
3584 		}
3585 		if (old_entry->inheritance == MAP_INHERIT_COPY) {
3586 			uvm_mapent_forkcopy(vm2, new_map,
3587 			    old_map, old_entry, &dead);
3588 		}
3589 	}
3590 
3591 	vm_map_unlock(old_map);
3592 	vm_map_unlock(new_map);
3593 
3594 	/*
3595 	 * This can actually happen, if multiple entries described a
3596 	 * space in which an entry was inherited.
3597 	 */
3598 	uvm_unmap_detach(&dead, 0);
3599 
3600 #ifdef SYSVSHM
3601 	if (vm1->vm_shm)
3602 		shmfork(vm1, vm2);
3603 #endif
3604 
3605 #ifdef PMAP_FORK
3606 	pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap);
3607 #endif
3608 
3609 	return vm2;
3610 }
3611 
3612 /*
3613  * uvm_map_hint: return the beginning of the best area suitable for
3614  * creating a new mapping with "prot" protection.
3615  */
3616 vaddr_t
3617 uvm_map_hint(struct vmspace *vm, vm_prot_t prot)
3618 {
3619 	vaddr_t addr;
3620 
3621 #ifdef __i386__
3622 	/*
3623 	 * If executable skip first two pages, otherwise start
3624 	 * after data + heap region.
3625 	 */
3626 	if ((prot & VM_PROT_EXECUTE) != 0 &&
3627 	    (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) {
3628 		addr = (PAGE_SIZE*2) +
3629 		    (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
3630 		return (round_page(addr));
3631 	}
3632 #endif
3633 	/* start malloc/mmap after the brk */
3634 	addr = (vaddr_t)vm->vm_daddr + BRKSIZ;
3635 #if !defined(__vax__)
3636 	addr += arc4random() & (MIN((256 * 1024 * 1024), BRKSIZ) - 1);
3637 #endif
3638 	return (round_page(addr));
3639 }
3640 
3641 /*
3642  * uvm_map_submap: punch down part of a map into a submap
3643  *
3644  * => only the kernel_map is allowed to be submapped
3645  * => the purpose of submapping is to break up the locking granularity
3646  *	of a larger map
3647  * => the range specified must have been mapped previously with a uvm_map()
3648  *	call [with uobj==NULL] to create a blank map entry in the main map.
3649  *	[And it had better still be blank!]
3650  * => maps which contain submaps should never be copied or forked.
3651  * => to remove a submap, use uvm_unmap() on the main map
3652  *	and then uvm_map_deallocate() the submap.
3653  * => main map must be unlocked.
3654  * => submap must have been init'd and have a zero reference count.
3655  *	[need not be locked as we don't actually reference it]
3656  */
3657 int
3658 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
3659     struct vm_map *submap)
3660 {
3661 	struct vm_map_entry *entry;
3662 	int result;
3663 
3664 	if (start > map->max_offset || end > map->max_offset ||
3665 	    start < map->min_offset || end < map->min_offset)
3666 		return EINVAL;
3667 
3668 	vm_map_lock(map);
3669 
3670 	if (uvm_map_lookup_entry(map, start, &entry)) {
3671 		UVM_MAP_CLIP_START(map, entry, start);
3672 		UVM_MAP_CLIP_END(map, entry, end);
3673 	} else
3674 		entry = NULL;
3675 
3676 	if (entry != NULL &&
3677 	    entry->start == start && entry->end == end &&
3678 	    entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
3679 	    !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
3680 		entry->etype |= UVM_ET_SUBMAP;
3681 		entry->object.sub_map = submap;
3682 		entry->offset = 0;
3683 		uvm_map_reference(submap);
3684 		result = 0;
3685 	} else
3686 		result = EINVAL;
3687 
3688 	vm_map_unlock(map);
3689 	return(result);
3690 }
3691 
3692 /*
3693  * uvm_map_checkprot: check protection in map
3694  *
3695  * => must allow specific protection in a fully allocated region.
3696  * => map mut be read or write locked by caller.
3697  */
3698 boolean_t
3699 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
3700     vm_prot_t protection)
3701 {
3702 	struct vm_map_entry *entry;
3703 
3704 	if (start < map->min_offset || end > map->max_offset || start > end)
3705 		return FALSE;
3706 	if (start == end)
3707 		return TRUE;
3708 
3709 	/*
3710 	 * Iterate entries.
3711 	 */
3712 	for (entry = uvm_map_entrybyaddr(&map->addr, start);
3713 	    entry != NULL && entry->start < end;
3714 	    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
3715 		/*
3716 		 * Fail if a hole is found.
3717 		 */
3718 		if (UVM_ET_ISHOLE(entry) ||
3719 		    (entry->end < end && entry->end != VMMAP_FREE_END(entry)))
3720 			return FALSE;
3721 
3722 		/*
3723 		 * Check protection.
3724 		 */
3725 		if ((entry->protection & protection) != protection)
3726 			return FALSE;
3727 	}
3728 	return TRUE;
3729 }
3730 
3731 /*
3732  * uvm_map_create: create map
3733  */
3734 vm_map_t
3735 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
3736 {
3737 	vm_map_t result;
3738 
3739 	result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK);
3740 	result->pmap = pmap;
3741 	uvm_map_setup(result, min, max, flags);
3742 	return(result);
3743 }
3744 
3745 /*
3746  * uvm_map_deallocate: drop reference to a map
3747  *
3748  * => caller must not lock map
3749  * => we will zap map if ref count goes to zero
3750  */
3751 void
3752 uvm_map_deallocate(vm_map_t map)
3753 {
3754 	int c;
3755 	struct uvm_map_deadq dead;
3756 
3757 	simple_lock(&map->ref_lock);
3758 	c = --map->ref_count;
3759 	simple_unlock(&map->ref_lock);
3760 	if (c > 0) {
3761 		return;
3762 	}
3763 
3764 	/*
3765 	 * all references gone.   unmap and free.
3766 	 *
3767 	 * No lock required: we are only one to access this map.
3768 	 */
3769 
3770 	TAILQ_INIT(&dead);
3771 	uvm_tree_sanity(map, __FILE__, __LINE__);
3772 	uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead,
3773 	    TRUE, FALSE);
3774 	pmap_destroy(map->pmap);
3775 	KASSERT(RB_EMPTY(&map->addr));
3776 	free(map, M_VMMAP);
3777 
3778 	uvm_unmap_detach(&dead, 0);
3779 }
3780 
3781 /*
3782  * uvm_map_inherit: set inheritance code for range of addrs in map.
3783  *
3784  * => map must be unlocked
3785  * => note that the inherit code is used during a "fork".  see fork
3786  *	code for details.
3787  */
3788 int
3789 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
3790     vm_inherit_t new_inheritance)
3791 {
3792 	struct vm_map_entry *entry;
3793 
3794 	switch (new_inheritance) {
3795 	case MAP_INHERIT_NONE:
3796 	case MAP_INHERIT_COPY:
3797 	case MAP_INHERIT_SHARE:
3798 		break;
3799 	default:
3800 		return (EINVAL);
3801 	}
3802 
3803 	if (start > end)
3804 		return EINVAL;
3805 	start = MAX(start, map->min_offset);
3806 	end = MIN(end, map->max_offset);
3807 	if (start >= end)
3808 		return 0;
3809 
3810 	vm_map_lock(map);
3811 
3812 	entry = uvm_map_entrybyaddr(&map->addr, start);
3813 	if (entry->end > start)
3814 		UVM_MAP_CLIP_START(map, entry, start);
3815 	else
3816 		entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
3817 
3818 	while (entry != NULL && entry->start < end) {
3819 		UVM_MAP_CLIP_END(map, entry, end);
3820 		entry->inheritance = new_inheritance;
3821 		entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
3822 	}
3823 
3824 	vm_map_unlock(map);
3825 	return (0);
3826 }
3827 
3828 /*
3829  * uvm_map_advice: set advice code for range of addrs in map.
3830  *
3831  * => map must be unlocked
3832  */
3833 int
3834 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
3835 {
3836 	struct vm_map_entry *entry;
3837 
3838 	switch (new_advice) {
3839 	case MADV_NORMAL:
3840 	case MADV_RANDOM:
3841 	case MADV_SEQUENTIAL:
3842 		break;
3843 	default:
3844 		return (EINVAL);
3845 	}
3846 
3847 	if (start > end)
3848 		return EINVAL;
3849 	start = MAX(start, map->min_offset);
3850 	end = MIN(end, map->max_offset);
3851 	if (start >= end)
3852 		return 0;
3853 
3854 	vm_map_lock(map);
3855 
3856 	entry = uvm_map_entrybyaddr(&map->addr, start);
3857 	if (entry != NULL && entry->end > start)
3858 		UVM_MAP_CLIP_START(map, entry, start);
3859 	else if (entry!= NULL)
3860 		entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
3861 
3862 	/*
3863 	 * XXXJRT: disallow holes?
3864 	 */
3865 
3866 	while (entry != NULL && entry->start < end) {
3867 		UVM_MAP_CLIP_END(map, entry, end);
3868 		entry->advice = new_advice;
3869 		entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
3870 	}
3871 
3872 	vm_map_unlock(map);
3873 	return (0);
3874 }
3875 
3876 /*
3877  * uvm_map_extract: extract a mapping from a map and put it somewhere
3878  * in the kernel_map, setting protection to max_prot.
3879  *
3880  * => map should be unlocked (we will write lock it and kernel_map)
3881  * => returns 0 on success, error code otherwise
3882  * => start must be page aligned
3883  * => len must be page sized
3884  * => flags:
3885  *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
3886  * Mappings are QREF's.
3887  */
3888 int
3889 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
3890     vaddr_t *dstaddrp, int flags)
3891 {
3892 	struct uvm_map_deadq dead;
3893 	struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2;
3894 	vaddr_t dstaddr;
3895 	vaddr_t end;
3896 	vaddr_t cp_start;
3897 	vsize_t cp_len, cp_off;
3898 	int error;
3899 
3900 	TAILQ_INIT(&dead);
3901 	end = start + len;
3902 
3903 	/*
3904 	 * Sanity check on the parameters.
3905 	 * Also, since the mapping may not contain gaps, error out if the
3906 	 * mapped area is not in source map.
3907 	 */
3908 
3909 	if ((start & (vaddr_t)PAGE_MASK) != 0 ||
3910 	    (end & (vaddr_t)PAGE_MASK) != 0 || end < start)
3911 		return EINVAL;
3912 	if (start < srcmap->min_offset || end > srcmap->max_offset)
3913 		return EINVAL;
3914 
3915 	/*
3916 	 * Initialize dead entries.
3917 	 * Handle len == 0 case.
3918 	 */
3919 
3920 	if (len == 0)
3921 		return 0;
3922 
3923 	/*
3924 	 * Acquire lock on srcmap.
3925 	 */
3926 	vm_map_lock(srcmap);
3927 
3928 	/*
3929 	 * Lock srcmap, lookup first and last entry in <start,len>.
3930 	 */
3931 	first = uvm_map_entrybyaddr(&srcmap->addr, start);
3932 
3933 	/*
3934 	 * Check that the range is contiguous.
3935 	 */
3936 	for (entry = first; entry != NULL && entry->end < end;
3937 	    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
3938 		if (VMMAP_FREE_END(entry) != entry->end ||
3939 		    UVM_ET_ISHOLE(entry)) {
3940 			error = EINVAL;
3941 			goto fail;
3942 		}
3943 	}
3944 	if (entry == NULL || UVM_ET_ISHOLE(entry)) {
3945 		error = EINVAL;
3946 		goto fail;
3947 	}
3948 
3949 	/*
3950 	 * Handle need-copy flag.
3951 	 * This may invalidate last, hence the re-initialization during the
3952 	 * loop.
3953 	 *
3954 	 * Also, perform clipping of last if not UVM_EXTRACT_QREF.
3955 	 */
3956 	for (entry = first; entry != NULL && entry->start < end;
3957 	    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
3958 		if (UVM_ET_ISNEEDSCOPY(entry))
3959 			amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end);
3960 		if (UVM_ET_ISNEEDSCOPY(entry)) {
3961 			/*
3962 			 * amap_copy failure
3963 			 */
3964 			error = ENOMEM;
3965 			goto fail;
3966 		}
3967 	}
3968 
3969 	/*
3970 	 * Lock destination map (kernel_map).
3971 	 */
3972 	vm_map_lock(kernel_map);
3973 
3974 	if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len,
3975 	    MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start),
3976 	    VM_PROT_NONE, 0) != 0) {
3977 		error = ENOMEM;
3978 		goto fail2;
3979 	}
3980 	*dstaddrp = dstaddr;
3981 
3982 	/*
3983 	 * We now have srcmap and kernel_map locked.
3984 	 * dstaddr contains the destination offset in dstmap.
3985 	 */
3986 
3987 	/*
3988 	 * step 1: start looping through map entries, performing extraction.
3989 	 */
3990 	for (entry = first; entry != NULL && entry->start < end;
3991 	    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
3992 		KDASSERT(!UVM_ET_ISNEEDSCOPY(entry));
3993 		if (UVM_ET_ISHOLE(entry))
3994 			continue;
3995 
3996 		/*
3997 		 * Calculate uvm_mapent_clone parameters.
3998 		 */
3999 		cp_start = entry->start;
4000 		if (cp_start < start) {
4001 			cp_off = start - cp_start;
4002 			cp_start = start;
4003 		} else
4004 			cp_off = 0;
4005 		cp_len = MIN(entry->end, end) - cp_start;
4006 
4007 		newentry = uvm_mapent_clone(kernel_map,
4008 		    cp_start - start + dstaddr, cp_len, cp_off,
4009 		    entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
4010 		if (newentry == NULL) {
4011 			error = ENOMEM;
4012 			goto fail2_unmap;
4013 		}
4014 		kernel_map->size += cp_len;
4015 		if (flags & UVM_EXTRACT_FIXPROT)
4016 			newentry->protection = newentry->max_protection;
4017 
4018 		/*
4019 		 * Step 2: perform pmap copy.
4020 		 * (Doing this in the loop saves one RB traversal.)
4021 		 */
4022 		pmap_copy(kernel_map->pmap, srcmap->pmap,
4023 		    cp_start - start + dstaddr, cp_len, cp_start);
4024 	}
4025 	pmap_update(kernel_map->pmap);
4026 
4027 	error = 0;
4028 
4029 	/*
4030 	 * Unmap copied entries on failure.
4031 	 */
4032 fail2_unmap:
4033 	if (error) {
4034 		uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead,
4035 		    FALSE, TRUE);
4036 	}
4037 
4038 	/*
4039 	 * Release maps, release dead entries.
4040 	 */
4041 fail2:
4042 	vm_map_unlock(kernel_map);
4043 
4044 fail:
4045 	vm_map_unlock(srcmap);
4046 
4047 	uvm_unmap_detach(&dead, 0);
4048 
4049 	return error;
4050 }
4051 
4052 /*
4053  * uvm_map_clean: clean out a map range
4054  *
4055  * => valid flags:
4056  *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
4057  *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
4058  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
4059  *   if (flags & PGO_FREE): any cached pages are freed after clean
4060  * => returns an error if any part of the specified range isn't mapped
4061  * => never a need to flush amap layer since the anonymous memory has
4062  *	no permanent home, but may deactivate pages there
4063  * => called from sys_msync() and sys_madvise()
4064  * => caller must not write-lock map (read OK).
4065  * => we may sleep while cleaning if SYNCIO [with map read-locked]
4066  */
4067 
4068 int	amap_clean_works = 1;	/* XXX for now, just in case... */
4069 
4070 int
4071 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
4072 {
4073 	struct vm_map_entry *first, *entry;
4074 	struct vm_amap *amap;
4075 	struct vm_anon *anon;
4076 	struct vm_page *pg;
4077 	struct uvm_object *uobj;
4078 	vaddr_t cp_start, cp_end;
4079 	int refs;
4080 	int error;
4081 	boolean_t rv;
4082 
4083 	KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
4084 	    (PGO_FREE|PGO_DEACTIVATE));
4085 
4086 	if (start > end || start < map->min_offset || end > map->max_offset)
4087 		return EINVAL;
4088 
4089 	vm_map_lock_read(map);
4090 	first = uvm_map_entrybyaddr(&map->addr, start);
4091 
4092 	/*
4093 	 * Make a first pass to check for holes.
4094 	 */
4095 	for (entry = first; entry->start < end;
4096 	    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
4097 		if (UVM_ET_ISSUBMAP(entry)) {
4098 			vm_map_unlock_read(map);
4099 			return EINVAL;
4100 		}
4101 		if (UVM_ET_ISSUBMAP(entry) ||
4102 		    UVM_ET_ISHOLE(entry) ||
4103 		    (entry->end < end &&
4104 		    VMMAP_FREE_END(entry) != entry->end)) {
4105 			vm_map_unlock_read(map);
4106 			return EFAULT;
4107 		}
4108 	}
4109 
4110 	error = 0;
4111 	for (entry = first; entry != NULL && entry->start < end;
4112 	    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
4113 		amap = entry->aref.ar_amap;	/* top layer */
4114 		if (UVM_ET_ISOBJ(entry))
4115 			uobj = entry->object.uvm_obj;
4116 		else
4117 			uobj = NULL;
4118 
4119 		/*
4120 		 * No amap cleaning necessary if:
4121 		 *  - there's no amap
4122 		 *  - we're not deactivating or freeing pages.
4123 		 */
4124 		if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
4125 			goto flush_object;
4126 		if (!amap_clean_works)
4127 			goto flush_object;
4128 
4129 		cp_start = MAX(entry->start, start);
4130 		cp_end = MIN(entry->end, end);
4131 
4132 		for (; cp_start != cp_end; cp_start += PAGE_SIZE) {
4133 			anon = amap_lookup(&entry->aref,
4134 			    cp_start - entry->start);
4135 			if (anon == NULL)
4136 				continue;
4137 
4138 			simple_lock(&anon->an_lock); /* XXX */
4139 
4140 			pg = anon->an_page;
4141 			if (pg == NULL) {
4142 				simple_unlock(&anon->an_lock);
4143 				continue;
4144 			}
4145 
4146 			switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
4147 			/*
4148 			 * XXX In these first 3 cases, we always just
4149 			 * XXX deactivate the page.  We may want to
4150 			 * XXX handle the different cases more
4151 			 * XXX specifically, in the future.
4152 			 */
4153 			case PGO_CLEANIT|PGO_FREE:
4154 			case PGO_CLEANIT|PGO_DEACTIVATE:
4155 			case PGO_DEACTIVATE:
4156 deactivate_it:
4157 				/* skip the page if it's loaned or wired */
4158 				if (pg->loan_count != 0 ||
4159 				    pg->wire_count != 0) {
4160 					simple_unlock(&anon->an_lock);
4161 					break;
4162 				}
4163 
4164 				uvm_lock_pageq();
4165 
4166 				/*
4167 				 * skip the page if it's not actually owned
4168 				 * by the anon (may simply be loaned to the
4169 				 * anon).
4170 				 */
4171 				if ((pg->pg_flags & PQ_ANON) == 0) {
4172 					KASSERT(pg->uobject == NULL);
4173 					uvm_unlock_pageq();
4174 					simple_unlock(&anon->an_lock);
4175 					break;
4176 				}
4177 				KASSERT(pg->uanon == anon);
4178 
4179 				/* zap all mappings for the page. */
4180 				pmap_page_protect(pg, VM_PROT_NONE);
4181 
4182 				/* ...and deactivate the page. */
4183 				uvm_pagedeactivate(pg);
4184 
4185 				uvm_unlock_pageq();
4186 				simple_unlock(&anon->an_lock);
4187 				break;
4188 
4189 			case PGO_FREE:
4190 
4191 				/*
4192 				 * If there are mutliple references to
4193 				 * the amap, just deactivate the page.
4194 				 */
4195 				if (amap_refs(amap) > 1)
4196 					goto deactivate_it;
4197 
4198 				/* XXX skip the page if it's wired */
4199 				if (pg->wire_count != 0) {
4200 					simple_unlock(&anon->an_lock);
4201 					break;
4202 				}
4203 				amap_unadd(&entry->aref,
4204 				    cp_start - entry->start);
4205 				refs = --anon->an_ref;
4206 				simple_unlock(&anon->an_lock);
4207 				if (refs == 0)
4208 					uvm_anfree(anon);
4209 				break;
4210 
4211 			default:
4212 				panic("uvm_map_clean: weird flags");
4213 			}
4214 		}
4215 
4216 flush_object:
4217 		cp_start = MAX(entry->start, start);
4218 		cp_end = MIN(entry->end, end);
4219 
4220 		/*
4221 		 * flush pages if we've got a valid backing object.
4222 		 *
4223 		 * Don't PGO_FREE if we don't have write permission
4224 		 * and don't flush if this is a copy-on-write object
4225 		 * since we can't know our permissions on it.
4226 		 */
4227 		if (uobj != NULL &&
4228 		    ((flags & PGO_FREE) == 0 ||
4229 		     ((entry->max_protection & VM_PROT_WRITE) != 0 &&
4230 		      (entry->etype & UVM_ET_COPYONWRITE) == 0))) {
4231 			simple_lock(&uobj->vmobjlock);
4232 			rv = uobj->pgops->pgo_flush(uobj,
4233 			    cp_start - entry->start + entry->offset,
4234 			    cp_end - entry->start + entry->offset, flags);
4235 			simple_unlock(&uobj->vmobjlock);
4236 
4237 			if (rv == FALSE)
4238 				error = EFAULT;
4239 		}
4240 	}
4241 
4242 	vm_map_unlock_read(map);
4243 	return error;
4244 }
4245 
4246 /*
4247  * UVM_MAP_CLIP_END implementation
4248  */
4249 void
4250 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4251 {
4252 	struct vm_map_entry *tmp;
4253 
4254 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4255 	tmp = uvm_mapent_alloc(map, 0);
4256 
4257 	/*
4258 	 * Invoke splitentry.
4259 	 */
4260 	uvm_map_splitentry(map, entry, tmp, addr);
4261 }
4262 
4263 /*
4264  * UVM_MAP_CLIP_START implementation
4265  *
4266  * Clippers are required to not change the pointers to the entry they are
4267  * clipping on.
4268  * Since uvm_map_splitentry turns the original entry into the lowest
4269  * entry (address wise) we do a swap between the new entry and the original
4270  * entry, prior to calling uvm_map_splitentry.
4271  */
4272 void
4273 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4274 {
4275 	struct vm_map_entry *tmp;
4276 	struct uvm_addr_state *free;
4277 
4278 	/* Unlink original. */
4279 	free = uvm_map_uaddr_e(map, entry);
4280 	uvm_mapent_free_remove(map, free, entry);
4281 	uvm_mapent_addr_remove(map, entry);
4282 
4283 	/* Copy entry. */
4284 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4285 	tmp = uvm_mapent_alloc(map, 0);
4286 	uvm_mapent_copy(entry, tmp);
4287 
4288 	/* Put new entry in place of original entry. */
4289 	uvm_mapent_addr_insert(map, tmp);
4290 	uvm_mapent_free_insert(map, free, tmp);
4291 
4292 	/* Invoke splitentry. */
4293 	uvm_map_splitentry(map, tmp, entry, addr);
4294 }
4295 
4296 /*
4297  * Boundary fixer.
4298  */
4299 static __inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t);
4300 static __inline vaddr_t
4301 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound)
4302 {
4303 	return (min < bound && max > bound) ? bound : max;
4304 }
4305 
4306 /*
4307  * Choose free list based on address at start of free space.
4308  *
4309  * The uvm_addr_state returned contains addr and is the first of:
4310  * - uaddr_exe
4311  * - uaddr_brk_stack
4312  * - uaddr_any
4313  */
4314 struct uvm_addr_state*
4315 uvm_map_uaddr(struct vm_map *map, vaddr_t addr)
4316 {
4317 	struct uvm_addr_state *uaddr;
4318 	int i;
4319 
4320 	/* Special case the first page, to prevent mmap from returning 0. */
4321 	if (addr < VMMAP_MIN_ADDR)
4322 		return NULL;
4323 
4324 	/* Upper bound for kernel maps at uvm_maxkaddr. */
4325 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
4326 		if (addr >= uvm_maxkaddr)
4327 			return NULL;
4328 	}
4329 
4330 	/* Is the address inside the exe-only map? */
4331 	if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr &&
4332 	    addr < map->uaddr_exe->uaddr_maxaddr)
4333 		return map->uaddr_exe;
4334 
4335 	/* Check if the space falls inside brk/stack area. */
4336 	if ((addr >= map->b_start && addr < map->b_end) ||
4337 	    (addr >= map->s_start && addr < map->s_end)) {
4338 		if (map->uaddr_brk_stack != NULL &&
4339 		    addr >= map->uaddr_brk_stack->uaddr_minaddr &&
4340 		    addr < map->uaddr_brk_stack->uaddr_maxaddr) {
4341 			return map->uaddr_brk_stack;
4342 		} else
4343 			return NULL;
4344 	}
4345 
4346 	/*
4347 	 * Check the other selectors.
4348 	 *
4349 	 * These selectors are only marked as the owner, if they have insert
4350 	 * functions.
4351 	 */
4352 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4353 		uaddr = map->uaddr_any[i];
4354 		if (uaddr == NULL)
4355 			continue;
4356 		if (uaddr->uaddr_functions->uaddr_free_insert == NULL)
4357 			continue;
4358 
4359 		if (addr >= uaddr->uaddr_minaddr &&
4360 		    addr < uaddr->uaddr_maxaddr)
4361 			return uaddr;
4362 	}
4363 
4364 	return NULL;
4365 }
4366 
4367 /*
4368  * Choose free list based on address at start of free space.
4369  *
4370  * The uvm_addr_state returned contains addr and is the first of:
4371  * - uaddr_exe
4372  * - uaddr_brk_stack
4373  * - uaddr_any
4374  */
4375 struct uvm_addr_state*
4376 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry)
4377 {
4378 	return uvm_map_uaddr(map, VMMAP_FREE_START(entry));
4379 }
4380 
4381 /*
4382  * Returns the first free-memory boundary that is crossed by [min-max].
4383  */
4384 vsize_t
4385 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max)
4386 {
4387 	struct uvm_addr_state	*uaddr;
4388 	int			 i;
4389 
4390 	/* Never return first page. */
4391 	max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR);
4392 
4393 	/* Treat the maxkaddr special, if the map is a kernel_map. */
4394 	if ((map->flags & VM_MAP_ISVMSPACE) == 0)
4395 		max = uvm_map_boundfix(min, max, uvm_maxkaddr);
4396 
4397 	/* Check for exe-only boundaries. */
4398 	if (map->uaddr_exe != NULL) {
4399 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr);
4400 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr);
4401 	}
4402 
4403 	/* Check for exe-only boundaries. */
4404 	if (map->uaddr_brk_stack != NULL) {
4405 		max = uvm_map_boundfix(min, max,
4406 		    map->uaddr_brk_stack->uaddr_minaddr);
4407 		max = uvm_map_boundfix(min, max,
4408 		    map->uaddr_brk_stack->uaddr_maxaddr);
4409 	}
4410 
4411 	/* Check other boundaries. */
4412 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4413 		uaddr = map->uaddr_any[i];
4414 		if (uaddr != NULL) {
4415 			max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr);
4416 			max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr);
4417 		}
4418 	}
4419 
4420 	/* Boundaries at stack and brk() area. */
4421 	max = uvm_map_boundfix(min, max, map->s_start);
4422 	max = uvm_map_boundfix(min, max, map->s_end);
4423 	max = uvm_map_boundfix(min, max, map->b_start);
4424 	max = uvm_map_boundfix(min, max, map->b_end);
4425 
4426 	return max;
4427 }
4428 
4429 /*
4430  * Update map allocation start and end addresses from proc vmspace.
4431  */
4432 void
4433 uvm_map_vmspace_update(struct vm_map *map,
4434     struct uvm_map_deadq *dead, int flags)
4435 {
4436 	struct vmspace *vm;
4437 	vaddr_t b_start, b_end, s_start, s_end;
4438 
4439 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
4440 	KASSERT(offsetof(struct vmspace, vm_map) == 0);
4441 
4442 	/*
4443 	 * Derive actual allocation boundaries from vmspace.
4444 	 */
4445 	vm = (struct vmspace *)map;
4446 	b_start = (vaddr_t)vm->vm_daddr;
4447 	b_end   = b_start + BRKSIZ;
4448 	s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4449 	s_end   = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4450 #ifdef DIAGNOSTIC
4451 	if ((b_start & (vaddr_t)PAGE_MASK) != 0 ||
4452 	    (b_end & (vaddr_t)PAGE_MASK) != 0 ||
4453 	    (s_start & (vaddr_t)PAGE_MASK) != 0 ||
4454 	    (s_end & (vaddr_t)PAGE_MASK) != 0) {
4455 		panic("uvm_map_vmspace_update: vmspace %p invalid bounds: "
4456 		    "b=0x%lx-0x%lx s=0x%lx-0x%lx",
4457 		    vm, b_start, b_end, s_start, s_end);
4458 	}
4459 #endif
4460 
4461 	if (__predict_true(map->b_start == b_start && map->b_end == b_end &&
4462 	    map->s_start == s_start && map->s_end == s_end))
4463 		return;
4464 
4465 	uvm_map_freelist_update(map, dead, b_start, b_end,
4466 	    s_start, s_end, flags);
4467 }
4468 
4469 /*
4470  * Grow kernel memory.
4471  *
4472  * This function is only called for kernel maps when an allocation fails.
4473  *
4474  * If the map has a gap that is large enough to accomodate alloc_sz, this
4475  * function will make sure map->free will include it.
4476  */
4477 void
4478 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead,
4479     vsize_t alloc_sz, int flags)
4480 {
4481 	vsize_t sz;
4482 	vaddr_t end;
4483 	struct vm_map_entry *entry;
4484 
4485 	/* Kernel memory only. */
4486 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0);
4487 	/* Destroy free list. */
4488 	uvm_map_freelist_update_clear(map, dead);
4489 
4490 	/*
4491 	 * Include the guard page in the hard minimum requirement of alloc_sz.
4492 	 */
4493 	if (map->flags & VM_MAP_GUARDPAGES)
4494 		alloc_sz += PAGE_SIZE;
4495 
4496 	/*
4497 	 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA.
4498 	 *
4499 	 * Don't handle the case where the multiplication overflows:
4500 	 * if that happens, the allocation is probably too big anyway.
4501 	 */
4502 	sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA);
4503 
4504 	/*
4505 	 * Walk forward until a gap large enough for alloc_sz shows up.
4506 	 *
4507 	 * We assume the kernel map has no boundaries.
4508 	 * uvm_maxkaddr may be zero.
4509 	 */
4510 	end = MAX(uvm_maxkaddr, map->min_offset);
4511 	entry = uvm_map_entrybyaddr(&map->addr, end);
4512 	while (entry && entry->fspace < alloc_sz)
4513 		entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
4514 	if (entry) {
4515 		end = MAX(VMMAP_FREE_START(entry), end);
4516 		end += MIN(sz, map->max_offset - end);
4517 	} else
4518 		end = map->max_offset;
4519 
4520 	/* Reserve pmap entries. */
4521 #ifdef PMAP_GROWKERNEL
4522 	uvm_maxkaddr = pmap_growkernel(end);
4523 #else
4524 	uvm_maxkaddr = end;
4525 #endif
4526 
4527 	/* Rebuild free list. */
4528 	uvm_map_freelist_update_refill(map, flags);
4529 }
4530 
4531 /*
4532  * Freelist update subfunction: unlink all entries from freelists.
4533  */
4534 void
4535 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead)
4536 {
4537 	struct uvm_addr_state *free;
4538 	struct vm_map_entry *entry, *prev, *next;
4539 
4540 	prev = NULL;
4541 	for (entry = RB_MIN(uvm_map_addr, &map->addr); entry != NULL;
4542 	    entry = next) {
4543 		next = RB_NEXT(uvm_map_addr, &map->addr, entry);
4544 
4545 		free = uvm_map_uaddr_e(map, entry);
4546 		uvm_mapent_free_remove(map, free, entry);
4547 
4548 		if (prev != NULL && entry->start == entry->end) {
4549 			prev->fspace += VMMAP_FREE_END(entry) - entry->end;
4550 			uvm_mapent_addr_remove(map, entry);
4551 			DEAD_ENTRY_PUSH(dead, entry);
4552 		} else
4553 			prev = entry;
4554 	}
4555 }
4556 
4557 /*
4558  * Freelist update subfunction: refill the freelists with entries.
4559  */
4560 void
4561 uvm_map_freelist_update_refill(struct vm_map *map, int flags)
4562 {
4563 	struct vm_map_entry *entry;
4564 	vaddr_t min, max;
4565 
4566 	RB_FOREACH(entry, uvm_map_addr, &map->addr) {
4567 		min = VMMAP_FREE_START(entry);
4568 		max = VMMAP_FREE_END(entry);
4569 		entry->fspace = 0;
4570 
4571 		entry = uvm_map_fix_space(map, entry, min, max, flags);
4572 	}
4573 
4574 	uvm_tree_sanity(map, __FILE__, __LINE__);
4575 }
4576 
4577 /*
4578  * Change {a,b}_{start,end} allocation ranges and associated free lists.
4579  */
4580 void
4581 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead,
4582     vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags)
4583 {
4584 	KDASSERT(b_end >= b_start && s_end >= s_start);
4585 
4586 	/* Clear all free lists. */
4587 	uvm_map_freelist_update_clear(map, dead);
4588 
4589 	/* Apply new bounds. */
4590 	map->b_start = b_start;
4591 	map->b_end   = b_end;
4592 	map->s_start = s_start;
4593 	map->s_end   = s_end;
4594 
4595 	/* Refill free lists. */
4596 	uvm_map_freelist_update_refill(map, flags);
4597 }
4598 
4599 /*
4600  * Assign a uvm_addr_state to the specified pointer in vm_map.
4601  *
4602  * May sleep.
4603  */
4604 void
4605 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which,
4606     struct uvm_addr_state *newval)
4607 {
4608 	struct uvm_map_deadq dead;
4609 
4610 	/* Pointer which must be in this map. */
4611 	KASSERT(which != NULL);
4612 	KASSERT((void*)map <= (void*)(which) &&
4613 	    (void*)(which) < (void*)(map + 1));
4614 
4615 	vm_map_lock(map);
4616 	TAILQ_INIT(&dead);
4617 	uvm_map_freelist_update_clear(map, &dead);
4618 
4619 	uvm_addr_destroy(*which);
4620 	*which = newval;
4621 
4622 	uvm_map_freelist_update_refill(map, 0);
4623 	vm_map_unlock(map);
4624 	uvm_unmap_detach(&dead, 0);
4625 }
4626 
4627 /*
4628  * Correct space insert.
4629  *
4630  * Entry must not be on any freelist.
4631  */
4632 struct vm_map_entry*
4633 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry,
4634     vaddr_t min, vaddr_t max, int flags)
4635 {
4636 	struct uvm_addr_state	*free, *entfree;
4637 	vaddr_t			 lmax;
4638 
4639 	KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0);
4640 	KDASSERT(min <= max);
4641 	KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) ||
4642 	    min == map->min_offset);
4643 
4644 	/*
4645 	 * During the function, entfree will always point at the uaddr state
4646 	 * for entry.
4647 	 */
4648 	entfree = (entry == NULL ? NULL :
4649 	    uvm_map_uaddr_e(map, entry));
4650 
4651 	while (min != max) {
4652 		/*
4653 		 * Claim guard page for entry.
4654 		 */
4655 		if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL &&
4656 		    VMMAP_FREE_END(entry) == entry->end &&
4657 		    entry->start != entry->end) {
4658 			if (max - min == 2 * PAGE_SIZE) {
4659 				/*
4660 				 * If the free-space gap is exactly 2 pages,
4661 				 * we make the guard 2 pages instead of 1.
4662 				 * Because in a guarded map, an area needs
4663 				 * at least 2 pages to allocate from:
4664 				 * one page for the allocation and one for
4665 				 * the guard.
4666 				 */
4667 				entry->guard = 2 * PAGE_SIZE;
4668 				min = max;
4669 			} else {
4670 				entry->guard = PAGE_SIZE;
4671 				min += PAGE_SIZE;
4672 			}
4673 			continue;
4674 		}
4675 
4676 		/*
4677 		 * Handle the case where entry has a 2-page guard, but the
4678 		 * space after entry is freed.
4679 		 */
4680 		if (entry != NULL && entry->fspace == 0 &&
4681 		    entry->guard > PAGE_SIZE) {
4682 			entry->guard = PAGE_SIZE;
4683 			min = VMMAP_FREE_START(entry);
4684 		}
4685 
4686 		lmax = uvm_map_boundary(map, min, max);
4687 		free = uvm_map_uaddr(map, min);
4688 
4689 		/*
4690 		 * Entries are merged if they point at the same uvm_free().
4691 		 * Exception to that rule: if min == uvm_maxkaddr, a new
4692 		 * entry is started regardless (otherwise the allocators
4693 		 * will get confused).
4694 		 */
4695 		if (entry != NULL && free == entfree &&
4696 		    !((map->flags & VM_MAP_ISVMSPACE) == 0 &&
4697 		    min == uvm_maxkaddr)) {
4698 			KDASSERT(VMMAP_FREE_END(entry) == min);
4699 			entry->fspace += lmax - min;
4700 		} else {
4701 			/*
4702 			 * Commit entry to free list: it'll not be added to
4703 			 * anymore.
4704 			 * We'll start a new entry and add to that entry
4705 			 * instead.
4706 			 */
4707 			if (entry != NULL)
4708 				uvm_mapent_free_insert(map, entfree, entry);
4709 
4710 			/* New entry for new uaddr. */
4711 			entry = uvm_mapent_alloc(map, flags);
4712 			KDASSERT(entry != NULL);
4713 			entry->end = entry->start = min;
4714 			entry->guard = 0;
4715 			entry->fspace = lmax - min;
4716 			entry->object.uvm_obj = NULL;
4717 			entry->offset = 0;
4718 			entry->etype = 0;
4719 			entry->protection = entry->max_protection = 0;
4720 			entry->inheritance = 0;
4721 			entry->wired_count = 0;
4722 			entry->advice = 0;
4723 			entry->aref.ar_pageoff = 0;
4724 			entry->aref.ar_amap = NULL;
4725 			uvm_mapent_addr_insert(map, entry);
4726 
4727 			entfree = free;
4728 		}
4729 
4730 		min = lmax;
4731 	}
4732 	/* Finally put entry on the uaddr state. */
4733 	if (entry != NULL)
4734 		uvm_mapent_free_insert(map, entfree, entry);
4735 
4736 	return entry;
4737 }
4738 
4739 /*
4740  * MQuery style of allocation.
4741  *
4742  * This allocator searches forward until sufficient space is found to map
4743  * the given size.
4744  *
4745  * XXX: factor in offset (via pmap_prefer) and protection?
4746  */
4747 int
4748 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset,
4749     int flags)
4750 {
4751 	struct vm_map_entry *entry, *last;
4752 	vaddr_t addr;
4753 	vaddr_t tmp, pmap_align, pmap_offset;
4754 	int error;
4755 
4756 	addr = *addr_p;
4757 	vm_map_lock_read(map);
4758 
4759 	/*
4760 	 * Configure pmap prefer.
4761 	 */
4762 	if (offset != UVM_UNKNOWN_OFFSET) {
4763 		pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN());
4764 		pmap_offset = PMAP_PREFER_OFFSET(offset);
4765 	} else {
4766 		pmap_align = PAGE_SIZE;
4767 		pmap_offset = 0;
4768 	}
4769 
4770 	/*
4771 	 * Align address to pmap_prefer unless FLAG_FIXED is set.
4772 	 */
4773 	if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) {
4774 	  	tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
4775 		if (tmp < addr)
4776 			tmp += pmap_align;
4777 		addr = tmp;
4778 	}
4779 
4780 	/*
4781 	 * First, check if the requested range is fully available.
4782 	 */
4783 	entry = uvm_map_entrybyaddr(&map->addr, addr);
4784 	last = NULL;
4785 	if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
4786 		error = 0;
4787 		goto out;
4788 	}
4789 	if (flags & UVM_FLAG_FIXED) {
4790 		error = EINVAL;
4791 		goto out;
4792 	}
4793 
4794 	error = ENOMEM; /* Default error from here. */
4795 
4796 	/*
4797 	 * At this point, the memory at <addr, sz> is not available.
4798 	 * The reasons are:
4799 	 * [1] it's outside the map,
4800 	 * [2] it starts in used memory (and therefore needs to move
4801 	 *     toward the first free page in entry),
4802 	 * [3] it starts in free memory but bumps into used memory.
4803 	 *
4804 	 * Note that for case [2], the forward moving is handled by the
4805 	 * for loop below.
4806 	 */
4807 
4808 	if (entry == NULL) {
4809 		/* [1] Outside the map. */
4810 		if (addr >= map->max_offset)
4811 			goto out;
4812 		else
4813 			entry = RB_MIN(uvm_map_addr, &map->addr);
4814 	} else if (VMMAP_FREE_START(entry) <= addr) {
4815 		/* [3] Bumped into used memory. */
4816 		entry = RB_NEXT(uvm_map_addr, &map->addr, entry);
4817 	}
4818 
4819 	/*
4820 	 * Test if the next entry is sufficient for the allocation.
4821 	 */
4822 	for (; entry != NULL;
4823 	    entry = RB_NEXT(uvm_map_addr, &map->addr, entry)) {
4824 		if (entry->fspace == 0)
4825 			continue;
4826 		addr = VMMAP_FREE_START(entry);
4827 
4828 restart:	/* Restart address checks on address change. */
4829 
4830 		tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
4831 		if (tmp < addr)
4832 			tmp += pmap_align;
4833 		addr = tmp;
4834 		if (addr >= VMMAP_FREE_END(entry))
4835 			continue;
4836 
4837 		/*
4838 		 * Skip brk() allocation addresses.
4839 		 */
4840 		if (addr + sz > map->b_start && addr < map->b_end) {
4841 			if (VMMAP_FREE_END(entry) > map->b_end) {
4842 				addr = map->b_end;
4843 				goto restart;
4844 			} else
4845 				continue;
4846 		}
4847 		/*
4848 		 * Skip stack allocation addresses.
4849 		 */
4850 		if (addr + sz > map->s_start && addr < map->s_end) {
4851 			if (VMMAP_FREE_END(entry) > map->s_end) {
4852 				addr = map->s_end;
4853 				goto restart;
4854 			} else
4855 				continue;
4856 		}
4857 
4858 		last = NULL;
4859 		if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
4860 			error = 0;
4861 			goto out;
4862 		}
4863 	}
4864 
4865 out:
4866 	vm_map_unlock_read(map);
4867 	if (error == 0)
4868 		*addr_p = addr;
4869 	return error;
4870 }
4871 
4872 /*
4873  * Determine allocation bias.
4874  *
4875  * Returns 1 if we should bias to high addresses, -1 for a bias towards low
4876  * addresses, or 0 for no bias.
4877  * The bias mechanism is intended to avoid clashing with brk() and stack
4878  * areas.
4879  */
4880 int
4881 uvm_mapent_bias(struct vm_map *map, struct vm_map_entry *entry)
4882 {
4883 	vaddr_t start, end;
4884 
4885 	start = VMMAP_FREE_START(entry);
4886 	end = VMMAP_FREE_END(entry);
4887 
4888 	/*
4889 	 * Stay at the top of brk() area.
4890 	 */
4891 	if (end >= map->b_start && start < map->b_end)
4892 		return 1;
4893 	/*
4894 	 * Stay at the far end of the stack area.
4895 	 */
4896 	if (end >= map->s_start && start < map->s_end) {
4897 #ifdef MACHINE_STACK_GROWS_UP
4898 		return 1;
4899 #else
4900 		return -1;
4901 #endif
4902 	}
4903 
4904 	/*
4905 	 * No bias, this area is meant for us.
4906 	 */
4907 	return 0;
4908 }
4909 
4910 
4911 boolean_t
4912 vm_map_lock_try_ln(struct vm_map *map, char *file, int line)
4913 {
4914 	boolean_t rv;
4915 
4916 	if (map->flags & VM_MAP_INTRSAFE) {
4917 		rv = TRUE;
4918 	} else {
4919 		if (map->flags & VM_MAP_BUSY) {
4920 			return (FALSE);
4921 		}
4922 		rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0);
4923 	}
4924 
4925 	if (rv) {
4926 		map->timestamp++;
4927 		LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
4928 		uvm_tree_sanity(map, file, line);
4929 		uvm_tree_size_chk(map, file, line);
4930 	}
4931 
4932 	return (rv);
4933 }
4934 
4935 void
4936 vm_map_lock_ln(struct vm_map *map, char *file, int line)
4937 {
4938 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
4939 		do {
4940 			while (map->flags & VM_MAP_BUSY) {
4941 				map->flags |= VM_MAP_WANTLOCK;
4942 				tsleep(&map->flags, PVM, (char *)vmmapbsy, 0);
4943 			}
4944 		} while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0);
4945 	}
4946 
4947 	map->timestamp++;
4948 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
4949 	uvm_tree_sanity(map, file, line);
4950 	uvm_tree_size_chk(map, file, line);
4951 }
4952 
4953 void
4954 vm_map_lock_read_ln(struct vm_map *map, char *file, int line)
4955 {
4956 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
4957 		rw_enter_read(&map->lock);
4958 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
4959 	uvm_tree_sanity(map, file, line);
4960 	uvm_tree_size_chk(map, file, line);
4961 }
4962 
4963 void
4964 vm_map_unlock_ln(struct vm_map *map, char *file, int line)
4965 {
4966 	uvm_tree_sanity(map, file, line);
4967 	uvm_tree_size_chk(map, file, line);
4968 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
4969 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
4970 		rw_exit(&map->lock);
4971 }
4972 
4973 void
4974 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line)
4975 {
4976 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
4977 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
4978 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
4979 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
4980 		rw_exit_read(&map->lock);
4981 }
4982 
4983 void
4984 vm_map_downgrade_ln(struct vm_map *map, char *file, int line)
4985 {
4986 	uvm_tree_sanity(map, file, line);
4987 	uvm_tree_size_chk(map, file, line);
4988 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
4989 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
4990 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
4991 		rw_enter(&map->lock, RW_DOWNGRADE);
4992 }
4993 
4994 void
4995 vm_map_upgrade_ln(struct vm_map *map, char *file, int line)
4996 {
4997 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
4998 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
4999 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5000 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5001 		rw_exit_read(&map->lock);
5002 		rw_enter_write(&map->lock);
5003 	}
5004 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5005 	uvm_tree_sanity(map, file, line);
5006 }
5007 
5008 void
5009 vm_map_busy_ln(struct vm_map *map, char *file, int line)
5010 {
5011 	map->flags |= VM_MAP_BUSY;
5012 }
5013 
5014 void
5015 vm_map_unbusy_ln(struct vm_map *map, char *file, int line)
5016 {
5017 	int oflags;
5018 
5019 	oflags = map->flags;
5020 	map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK);
5021 	if (oflags & VM_MAP_WANTLOCK)
5022 		wakeup(&map->flags);
5023 }
5024 
5025 
5026 #undef RB_AUGMENT
5027 #define RB_AUGMENT(x)	uvm_map_addr_augment((x))
5028 RB_GENERATE(uvm_map_addr, vm_map_entry, daddrs.addr_entry,
5029     uvm_mapentry_addrcmp);
5030 #undef RB_AUGMENT
5031 
5032 
5033 /*
5034  * MD code: vmspace allocator setup.
5035  */
5036 
5037 
5038 #ifdef __i386__
5039 void
5040 uvm_map_setup_md(struct vm_map *map)
5041 {
5042 	vaddr_t		min, max;
5043 
5044 	min = map->min_offset;
5045 	max = map->max_offset;
5046 
5047 	/*
5048 	 * Ensure the selectors will not try to manage page 0;
5049 	 * it's too special.
5050 	 */
5051 	if (min < VMMAP_MIN_ADDR)
5052 		min = VMMAP_MIN_ADDR;
5053 
5054 #if 0	/* Cool stuff, not yet */
5055 	/* Hinted allocations. */
5056 	map->uaddr_any[1] = uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), max,
5057 	    1024 * 1024 * 1024);
5058 
5059 	/* Executable code is special. */
5060 	map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR);
5061 	/* Place normal allocations beyond executable mappings. */
5062 	map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max);
5063 #else	/* Crappy stuff, for now */
5064 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5065 #endif
5066 
5067 #ifndef SMALL_KERNEL
5068 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5069 #endif /* !SMALL_KERNEL */
5070 }
5071 #elif __LP64__
5072 void
5073 uvm_map_setup_md(struct vm_map *map)
5074 {
5075 	vaddr_t		min, max;
5076 
5077 	min = map->min_offset;
5078 	max = map->max_offset;
5079 
5080 	/*
5081 	 * Ensure the selectors will not try to manage page 0;
5082 	 * it's too special.
5083 	 */
5084 	if (min < VMMAP_MIN_ADDR)
5085 		min = VMMAP_MIN_ADDR;
5086 
5087 #if 0	/* Cool stuff, not yet */
5088 	/* Hinted allocations above 4GB */
5089 	map->uaddr_any[0] =
5090 	    uaddr_hint_create(0x100000000ULL, max, 1024 * 1024 * 1024);
5091 	/* Hinted allocations below 4GB */
5092 	map->uaddr_any[1] =
5093 	    uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), 0x100000000ULL,
5094 	    1024 * 1024 * 1024);
5095 	/* Normal allocations, always above 4GB */
5096 	map->uaddr_any[3] =
5097 	    uaddr_pivot_create(MAX(min, 0x100000000ULL), max);
5098 #else	/* Crappy stuff, for now */
5099 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5100 #endif
5101 
5102 #ifndef SMALL_KERNEL
5103 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5104 #endif /* !SMALL_KERNEL */
5105 }
5106 #else	/* non-i386, 32 bit */
5107 void
5108 uvm_map_setup_md(struct vm_map *map)
5109 {
5110 	vaddr_t		min, max;
5111 
5112 	min = map->min_offset;
5113 	max = map->max_offset;
5114 
5115 	/*
5116 	 * Ensure the selectors will not try to manage page 0;
5117 	 * it's too special.
5118 	 */
5119 	if (min < VMMAP_MIN_ADDR)
5120 		min = VMMAP_MIN_ADDR;
5121 
5122 #if 0	/* Cool stuff, not yet */
5123 	/* Hinted allocations. */
5124 	map->uaddr_any[1] = uaddr_hint_create(MAX(min, VMMAP_MIN_ADDR), max,
5125 	    1024 * 1024 * 1024);
5126 	/* Normal allocations. */
5127 	map->uaddr_any[3] = uaddr_pivot_create(min, max);
5128 #else	/* Crappy stuff, for now */
5129 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5130 #endif
5131 
5132 #ifndef SMALL_KERNEL
5133 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5134 #endif /* !SMALL_KERNEL */
5135 }
5136 #endif
5137