xref: /openbsd-src/sys/uvm/uvm_map.c (revision 6ca44032e7be0d795b9f13c99fbce059e942c15d)
1 /*	$OpenBSD: uvm_map.c,v 1.317 2023/04/26 12:25:12 bluhm Exp $	*/
2 /*	$NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $	*/
3 
4 /*
5  * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  *
19  *
20  * Copyright (c) 1997 Charles D. Cranor and Washington University.
21  * Copyright (c) 1991, 1993, The Regents of the University of California.
22  *
23  * All rights reserved.
24  *
25  * This code is derived from software contributed to Berkeley by
26  * The Mach Operating System project at Carnegie-Mellon University.
27  *
28  * Redistribution and use in source and binary forms, with or without
29  * modification, are permitted provided that the following conditions
30  * are met:
31  * 1. Redistributions of source code must retain the above copyright
32  *    notice, this list of conditions and the following disclaimer.
33  * 2. Redistributions in binary form must reproduce the above copyright
34  *    notice, this list of conditions and the following disclaimer in the
35  *    documentation and/or other materials provided with the distribution.
36  * 3. Neither the name of the University nor the names of its contributors
37  *    may be used to endorse or promote products derived from this software
38  *    without specific prior written permission.
39  *
40  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  *	@(#)vm_map.c    8.3 (Berkeley) 1/12/94
53  * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
54  *
55  *
56  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
57  * All rights reserved.
58  *
59  * Permission to use, copy, modify and distribute this software and
60  * its documentation is hereby granted, provided that both the copyright
61  * notice and this permission notice appear in all copies of the
62  * software, derivative works or modified versions, and any portions
63  * thereof, and that both notices appear in supporting documentation.
64  *
65  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
66  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
67  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
68  *
69  * Carnegie Mellon requests users of this software to return to
70  *
71  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
72  *  School of Computer Science
73  *  Carnegie Mellon University
74  *  Pittsburgh PA 15213-3890
75  *
76  * any improvements or extensions that they make and grant Carnegie the
77  * rights to redistribute these changes.
78  */
79 
80 /*
81  * uvm_map.c: uvm map operations
82  */
83 
84 /* #define DEBUG */
85 /* #define VMMAP_DEBUG */
86 
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/acct.h>
90 #include <sys/mman.h>
91 #include <sys/proc.h>
92 #include <sys/malloc.h>
93 #include <sys/pool.h>
94 #include <sys/sysctl.h>
95 #include <sys/signalvar.h>
96 #include <sys/syslog.h>
97 #include <sys/user.h>
98 #include <sys/tracepoint.h>
99 
100 #ifdef SYSVSHM
101 #include <sys/shm.h>
102 #endif
103 
104 #include <uvm/uvm.h>
105 
106 #ifdef DDB
107 #include <uvm/uvm_ddb.h>
108 #endif
109 
110 #include <uvm/uvm_addr.h>
111 
112 
113 vsize_t			 uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t);
114 int			 uvm_mapent_isjoinable(struct vm_map*,
115 			    struct vm_map_entry*, struct vm_map_entry*);
116 struct vm_map_entry	*uvm_mapent_merge(struct vm_map*, struct vm_map_entry*,
117 			    struct vm_map_entry*, struct uvm_map_deadq*);
118 struct vm_map_entry	*uvm_mapent_tryjoin(struct vm_map*,
119 			    struct vm_map_entry*, struct uvm_map_deadq*);
120 struct vm_map_entry	*uvm_map_mkentry(struct vm_map*, struct vm_map_entry*,
121 			    struct vm_map_entry*, vaddr_t, vsize_t, int,
122 			    struct uvm_map_deadq*, struct vm_map_entry*);
123 struct vm_map_entry	*uvm_mapent_alloc(struct vm_map*, int);
124 void			 uvm_mapent_free(struct vm_map_entry*);
125 void			 uvm_unmap_kill_entry(struct vm_map*,
126 			    struct vm_map_entry*);
127 void			 uvm_unmap_kill_entry_withlock(struct vm_map *,
128 			    struct vm_map_entry *, int);
129 void			 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *);
130 void			 uvm_mapent_mkfree(struct vm_map*,
131 			    struct vm_map_entry*, struct vm_map_entry**,
132 			    struct uvm_map_deadq*, boolean_t);
133 void			 uvm_map_pageable_pgon(struct vm_map*,
134 			    struct vm_map_entry*, struct vm_map_entry*,
135 			    vaddr_t, vaddr_t);
136 int			 uvm_map_pageable_wire(struct vm_map*,
137 			    struct vm_map_entry*, struct vm_map_entry*,
138 			    vaddr_t, vaddr_t, int);
139 void			 uvm_map_setup_entries(struct vm_map*);
140 void			 uvm_map_setup_md(struct vm_map*);
141 void			 uvm_map_teardown(struct vm_map*);
142 void			 uvm_map_vmspace_update(struct vm_map*,
143 			    struct uvm_map_deadq*, int);
144 void			 uvm_map_kmem_grow(struct vm_map*,
145 			    struct uvm_map_deadq*, vsize_t, int);
146 void			 uvm_map_freelist_update_clear(struct vm_map*,
147 			    struct uvm_map_deadq*);
148 void			 uvm_map_freelist_update_refill(struct vm_map *, int);
149 void			 uvm_map_freelist_update(struct vm_map*,
150 			    struct uvm_map_deadq*, vaddr_t, vaddr_t,
151 			    vaddr_t, vaddr_t, int);
152 struct vm_map_entry	*uvm_map_fix_space(struct vm_map*, struct vm_map_entry*,
153 			    vaddr_t, vaddr_t, int);
154 int			 uvm_map_findspace(struct vm_map*,
155 			    struct vm_map_entry**, struct vm_map_entry**,
156 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
157 			    vaddr_t);
158 vsize_t			 uvm_map_addr_augment_get(struct vm_map_entry*);
159 void			 uvm_map_addr_augment(struct vm_map_entry*);
160 
161 int			 uvm_map_inentry_recheck(u_long, vaddr_t,
162 			     struct p_inentry *);
163 boolean_t		 uvm_map_inentry_fix(struct proc *, struct p_inentry *,
164 			     vaddr_t, int (*)(vm_map_entry_t), u_long);
165 /*
166  * Tree management functions.
167  */
168 
169 static inline void	 uvm_mapent_copy(struct vm_map_entry*,
170 			    struct vm_map_entry*);
171 static inline int	 uvm_mapentry_addrcmp(const struct vm_map_entry*,
172 			    const struct vm_map_entry*);
173 void			 uvm_mapent_free_insert(struct vm_map*,
174 			    struct uvm_addr_state*, struct vm_map_entry*);
175 void			 uvm_mapent_free_remove(struct vm_map*,
176 			    struct uvm_addr_state*, struct vm_map_entry*);
177 void			 uvm_mapent_addr_insert(struct vm_map*,
178 			    struct vm_map_entry*);
179 void			 uvm_mapent_addr_remove(struct vm_map*,
180 			    struct vm_map_entry*);
181 void			 uvm_map_splitentry(struct vm_map*,
182 			    struct vm_map_entry*, struct vm_map_entry*,
183 			    vaddr_t);
184 vsize_t			 uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t);
185 
186 /*
187  * uvm_vmspace_fork helper functions.
188  */
189 struct vm_map_entry	*uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
190 			    vsize_t, vm_prot_t, vm_prot_t,
191 			    struct vm_map_entry*, struct uvm_map_deadq*, int,
192 			    int);
193 struct vm_map_entry	*uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t,
194 			    vsize_t, vm_prot_t, vm_prot_t, struct vm_map*,
195 			    struct vm_map_entry*, struct uvm_map_deadq*);
196 struct vm_map_entry	*uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
197 			    struct vm_map*, struct vm_map_entry*,
198 			    struct uvm_map_deadq*);
199 struct vm_map_entry	*uvm_mapent_forkcopy(struct vmspace*, struct vm_map*,
200 			    struct vm_map*, struct vm_map_entry*,
201 			    struct uvm_map_deadq*);
202 struct vm_map_entry	*uvm_mapent_forkzero(struct vmspace*, struct vm_map*,
203 			    struct vm_map*, struct vm_map_entry*,
204 			    struct uvm_map_deadq*);
205 
206 /*
207  * Tree validation.
208  */
209 #ifdef VMMAP_DEBUG
210 void			 uvm_tree_assert(struct vm_map*, int, char*,
211 			    char*, int);
212 #define UVM_ASSERT(map, cond, file, line)				\
213 	uvm_tree_assert((map), (cond), #cond, (file), (line))
214 void			 uvm_tree_sanity(struct vm_map*, char*, int);
215 void			 uvm_tree_size_chk(struct vm_map*, char*, int);
216 void			 vmspace_validate(struct vm_map*);
217 #else
218 #define uvm_tree_sanity(_map, _file, _line)		do {} while (0)
219 #define uvm_tree_size_chk(_map, _file, _line)		do {} while (0)
220 #define vmspace_validate(_map)				do {} while (0)
221 #endif
222 
223 /*
224  * The kernel map will initially be VM_MAP_KSIZE_INIT bytes.
225  * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes.
226  *
227  * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size
228  * each time.
229  */
230 #define VM_MAP_KSIZE_INIT	(512 * (vaddr_t)PAGE_SIZE)
231 #define VM_MAP_KSIZE_DELTA	(256 * (vaddr_t)PAGE_SIZE)
232 #define VM_MAP_KSIZE_ALLOCMUL	4
233 
234 /* auto-allocate address lower bound */
235 #define VMMAP_MIN_ADDR		PAGE_SIZE
236 
237 
238 #ifdef DEADBEEF0
239 #define UVMMAP_DEADBEEF		((unsigned long)DEADBEEF0)
240 #else
241 #define UVMMAP_DEADBEEF		((unsigned long)0xdeadd0d0)
242 #endif
243 
244 #ifdef DEBUG
245 int uvm_map_printlocks = 0;
246 
247 #define LPRINTF(_args)							\
248 	do {								\
249 		if (uvm_map_printlocks)					\
250 			printf _args;					\
251 	} while (0)
252 #else
253 #define LPRINTF(_args)	do {} while (0)
254 #endif
255 
256 static struct mutex uvm_kmapent_mtx;
257 static struct timeval uvm_kmapent_last_warn_time;
258 static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
259 
260 const char vmmapbsy[] = "vmmapbsy";
261 
262 /*
263  * pool for vmspace structures.
264  */
265 struct pool uvm_vmspace_pool;
266 
267 /*
268  * pool for dynamically-allocated map entries.
269  */
270 struct pool uvm_map_entry_pool;
271 struct pool uvm_map_entry_kmem_pool;
272 
273 /*
274  * This global represents the end of the kernel virtual address
275  * space. If we want to exceed this, we must grow the kernel
276  * virtual address space dynamically.
277  *
278  * Note, this variable is locked by kernel_map's lock.
279  */
280 vaddr_t uvm_maxkaddr;
281 
282 /*
283  * Locking predicate.
284  */
285 #define UVM_MAP_REQ_WRITE(_map)						\
286 	do {								\
287 		if ((_map)->ref_count > 0) {				\
288 			if (((_map)->flags & VM_MAP_INTRSAFE) == 0)	\
289 				rw_assert_wrlock(&(_map)->lock);	\
290 			else						\
291 				MUTEX_ASSERT_LOCKED(&(_map)->mtx);	\
292 		}							\
293 	} while (0)
294 
295 #define	vm_map_modflags(map, set, clear)				\
296 	do {								\
297 		mtx_enter(&(map)->flags_lock);				\
298 		(map)->flags = ((map)->flags | (set)) & ~(clear);	\
299 		mtx_leave(&(map)->flags_lock);				\
300 	} while (0)
301 
302 
303 /*
304  * Tree describing entries by address.
305  *
306  * Addresses are unique.
307  * Entries with start == end may only exist if they are the first entry
308  * (sorted by address) within a free-memory tree.
309  */
310 
311 static inline int
312 uvm_mapentry_addrcmp(const struct vm_map_entry *e1,
313     const struct vm_map_entry *e2)
314 {
315 	return e1->start < e2->start ? -1 : e1->start > e2->start;
316 }
317 
318 /*
319  * Copy mapentry.
320  */
321 static inline void
322 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
323 {
324 	caddr_t csrc, cdst;
325 	size_t sz;
326 
327 	csrc = (caddr_t)src;
328 	cdst = (caddr_t)dst;
329 	csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
330 	cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
331 
332 	sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) -
333 	    offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
334 	memcpy(cdst, csrc, sz);
335 }
336 
337 /*
338  * Handle free-list insertion.
339  */
340 void
341 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr,
342     struct vm_map_entry *entry)
343 {
344 	const struct uvm_addr_functions *fun;
345 #ifdef VMMAP_DEBUG
346 	vaddr_t min, max, bound;
347 #endif
348 
349 #ifdef VMMAP_DEBUG
350 	/*
351 	 * Boundary check.
352 	 * Boundaries are folded if they go on the same free list.
353 	 */
354 	min = VMMAP_FREE_START(entry);
355 	max = VMMAP_FREE_END(entry);
356 
357 	while (min < max) {
358 		bound = uvm_map_boundary(map, min, max);
359 		KASSERT(uvm_map_uaddr(map, min) == uaddr);
360 		min = bound;
361 	}
362 #endif
363 	KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0);
364 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0);
365 
366 	UVM_MAP_REQ_WRITE(map);
367 
368 	/* Actual insert: forward to uaddr pointer. */
369 	if (uaddr != NULL) {
370 		fun = uaddr->uaddr_functions;
371 		KDASSERT(fun != NULL);
372 		if (fun->uaddr_free_insert != NULL)
373 			(*fun->uaddr_free_insert)(map, uaddr, entry);
374 		entry->etype |= UVM_ET_FREEMAPPED;
375 	}
376 
377 	/* Update fspace augmentation. */
378 	uvm_map_addr_augment(entry);
379 }
380 
381 /*
382  * Handle free-list removal.
383  */
384 void
385 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr,
386     struct vm_map_entry *entry)
387 {
388 	const struct uvm_addr_functions *fun;
389 
390 	KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL);
391 	KASSERT(uvm_map_uaddr_e(map, entry) == uaddr);
392 	UVM_MAP_REQ_WRITE(map);
393 
394 	if (uaddr != NULL) {
395 		fun = uaddr->uaddr_functions;
396 		if (fun->uaddr_free_remove != NULL)
397 			(*fun->uaddr_free_remove)(map, uaddr, entry);
398 		entry->etype &= ~UVM_ET_FREEMAPPED;
399 	}
400 }
401 
402 /*
403  * Handle address tree insertion.
404  */
405 void
406 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry)
407 {
408 	struct vm_map_entry *res;
409 
410 	if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF))
411 		panic("uvm_mapent_addr_insert: entry still in addr list");
412 	KDASSERT(entry->start <= entry->end);
413 	KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&
414 	    (entry->end & (vaddr_t)PAGE_MASK) == 0);
415 
416 	TRACEPOINT(uvm, map_insert,
417 	    entry->start, entry->end, entry->protection, NULL);
418 
419 	UVM_MAP_REQ_WRITE(map);
420 	res = RBT_INSERT(uvm_map_addr, &map->addr, entry);
421 	if (res != NULL) {
422 		panic("uvm_mapent_addr_insert: map %p entry %p "
423 		    "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision "
424 		    "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)",
425 		    map, entry,
426 		    entry->start, entry->end, entry->guard, entry->fspace,
427 		    res, res->start, res->end, res->guard, res->fspace);
428 	}
429 }
430 
431 /*
432  * Handle address tree removal.
433  */
434 void
435 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)
436 {
437 	struct vm_map_entry *res;
438 
439 	TRACEPOINT(uvm, map_remove,
440 	    entry->start, entry->end, entry->protection, NULL);
441 
442 	UVM_MAP_REQ_WRITE(map);
443 	res = RBT_REMOVE(uvm_map_addr, &map->addr, entry);
444 	if (res != entry)
445 		panic("uvm_mapent_addr_remove");
446 	RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF);
447 }
448 
449 /*
450  * uvm_map_reference: add reference to a map
451  *
452  * => map need not be locked
453  */
454 void
455 uvm_map_reference(struct vm_map *map)
456 {
457 	atomic_inc_int(&map->ref_count);
458 }
459 
460 void
461 uvm_map_lock_entry(struct vm_map_entry *entry)
462 {
463 	if (entry->aref.ar_amap != NULL) {
464 		amap_lock(entry->aref.ar_amap);
465 	}
466 	if (UVM_ET_ISOBJ(entry)) {
467 		rw_enter(entry->object.uvm_obj->vmobjlock, RW_WRITE);
468 	}
469 }
470 
471 void
472 uvm_map_unlock_entry(struct vm_map_entry *entry)
473 {
474 	if (UVM_ET_ISOBJ(entry)) {
475 		rw_exit(entry->object.uvm_obj->vmobjlock);
476 	}
477 	if (entry->aref.ar_amap != NULL) {
478 		amap_unlock(entry->aref.ar_amap);
479 	}
480 }
481 
482 /*
483  * Calculate the dused delta.
484  */
485 vsize_t
486 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max)
487 {
488 	struct vmspace *vm;
489 	vsize_t sz;
490 	vaddr_t lmax;
491 	vaddr_t stack_begin, stack_end; /* Position of stack. */
492 
493 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
494 	vm_map_assert_anylock(map);
495 
496 	vm = (struct vmspace *)map;
497 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
498 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
499 
500 	sz = 0;
501 	while (min != max) {
502 		lmax = max;
503 		if (min < stack_begin && lmax > stack_begin)
504 			lmax = stack_begin;
505 		else if (min < stack_end && lmax > stack_end)
506 			lmax = stack_end;
507 
508 		if (min >= stack_begin && min < stack_end) {
509 			/* nothing */
510 		} else
511 			sz += lmax - min;
512 		min = lmax;
513 	}
514 
515 	return sz >> PAGE_SHIFT;
516 }
517 
518 /*
519  * Find the entry describing the given address.
520  */
521 struct vm_map_entry*
522 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr)
523 {
524 	struct vm_map_entry *iter;
525 
526 	iter = RBT_ROOT(uvm_map_addr, atree);
527 	while (iter != NULL) {
528 		if (iter->start > addr)
529 			iter = RBT_LEFT(uvm_map_addr, iter);
530 		else if (VMMAP_FREE_END(iter) <= addr)
531 			iter = RBT_RIGHT(uvm_map_addr, iter);
532 		else
533 			return iter;
534 	}
535 	return NULL;
536 }
537 
538 /*
539  * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry)
540  *
541  * Push dead entries into a linked list.
542  * Since the linked list abuses the address tree for storage, the entry
543  * may not be linked in a map.
544  *
545  * *head must be initialized to NULL before the first call to this macro.
546  * uvm_unmap_detach(*head, 0) will remove dead entries.
547  */
548 static inline void
549 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry)
550 {
551 	TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq);
552 }
553 #define DEAD_ENTRY_PUSH(_headptr, _entry)				\
554 	dead_entry_push((_headptr), (_entry))
555 
556 /*
557  * Test if memory starting at addr with sz bytes is free.
558  *
559  * Fills in *start_ptr and *end_ptr to be the first and last entry describing
560  * the space.
561  * If called with prefilled *start_ptr and *end_ptr, they are to be correct.
562  */
563 int
564 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr,
565     struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr,
566     vaddr_t addr, vsize_t sz)
567 {
568 	struct uvm_addr_state *free;
569 	struct uvm_map_addr *atree;
570 	struct vm_map_entry *i, *i_end;
571 
572 	if (addr + sz < addr)
573 		return 0;
574 
575 	vm_map_assert_anylock(map);
576 
577 	/*
578 	 * Kernel memory above uvm_maxkaddr is considered unavailable.
579 	 */
580 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
581 		if (addr + sz > uvm_maxkaddr)
582 			return 0;
583 	}
584 
585 	atree = &map->addr;
586 
587 	/*
588 	 * Fill in first, last, so they point at the entries containing the
589 	 * first and last address of the range.
590 	 * Note that if they are not NULL, we don't perform the lookup.
591 	 */
592 	KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL);
593 	if (*start_ptr == NULL) {
594 		*start_ptr = uvm_map_entrybyaddr(atree, addr);
595 		if (*start_ptr == NULL)
596 			return 0;
597 	} else
598 		KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr));
599 	if (*end_ptr == NULL) {
600 		if (VMMAP_FREE_END(*start_ptr) >= addr + sz)
601 			*end_ptr = *start_ptr;
602 		else {
603 			*end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1);
604 			if (*end_ptr == NULL)
605 				return 0;
606 		}
607 	} else
608 		KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1));
609 
610 	/* Validation. */
611 	KDASSERT(*start_ptr != NULL && *end_ptr != NULL);
612 	KDASSERT((*start_ptr)->start <= addr &&
613 	    VMMAP_FREE_END(*start_ptr) > addr &&
614 	    (*end_ptr)->start < addr + sz &&
615 	    VMMAP_FREE_END(*end_ptr) >= addr + sz);
616 
617 	/*
618 	 * Check the none of the entries intersects with <addr, addr+sz>.
619 	 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is
620 	 * considered unavailable unless called by those allocators.
621 	 */
622 	i = *start_ptr;
623 	i_end = RBT_NEXT(uvm_map_addr, *end_ptr);
624 	for (; i != i_end;
625 	    i = RBT_NEXT(uvm_map_addr, i)) {
626 		if (i->start != i->end && i->end > addr)
627 			return 0;
628 
629 		/*
630 		 * uaddr_exe and uaddr_brk_stack may only be used
631 		 * by these allocators and the NULL uaddr (i.e. no
632 		 * uaddr).
633 		 * Reject if this requirement is not met.
634 		 */
635 		if (uaddr != NULL) {
636 			free = uvm_map_uaddr_e(map, i);
637 
638 			if (uaddr != free && free != NULL &&
639 			    (free == map->uaddr_exe ||
640 			     free == map->uaddr_brk_stack))
641 				return 0;
642 		}
643 	}
644 
645 	return -1;
646 }
647 
648 /*
649  * Invoke each address selector until an address is found.
650  * Will not invoke uaddr_exe.
651  */
652 int
653 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first,
654     struct vm_map_entry**last, vaddr_t *addr, vsize_t sz,
655     vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint)
656 {
657 	struct uvm_addr_state *uaddr;
658 	int i;
659 
660 	/*
661 	 * Allocation for sz bytes at any address,
662 	 * using the addr selectors in order.
663 	 */
664 	for (i = 0; i < nitems(map->uaddr_any); i++) {
665 		uaddr = map->uaddr_any[i];
666 
667 		if (uvm_addr_invoke(map, uaddr, first, last,
668 		    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
669 			return 0;
670 	}
671 
672 	/* Fall back to brk() and stack() address selectors. */
673 	uaddr = map->uaddr_brk_stack;
674 	if (uvm_addr_invoke(map, uaddr, first, last,
675 	    addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
676 		return 0;
677 
678 	return ENOMEM;
679 }
680 
681 /* Calculate entry augmentation value. */
682 vsize_t
683 uvm_map_addr_augment_get(struct vm_map_entry *entry)
684 {
685 	vsize_t			 augment;
686 	struct vm_map_entry	*left, *right;
687 
688 	augment = entry->fspace;
689 	if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL)
690 		augment = MAX(augment, left->fspace_augment);
691 	if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
692 		augment = MAX(augment, right->fspace_augment);
693 	return augment;
694 }
695 
696 /*
697  * Update augmentation data in entry.
698  */
699 void
700 uvm_map_addr_augment(struct vm_map_entry *entry)
701 {
702 	vsize_t			 augment;
703 
704 	while (entry != NULL) {
705 		/* Calculate value for augmentation. */
706 		augment = uvm_map_addr_augment_get(entry);
707 
708 		/*
709 		 * Descend update.
710 		 * Once we find an entry that already has the correct value,
711 		 * stop, since it means all its parents will use the correct
712 		 * value too.
713 		 */
714 		if (entry->fspace_augment == augment)
715 			return;
716 		entry->fspace_augment = augment;
717 		entry = RBT_PARENT(uvm_map_addr, entry);
718 	}
719 }
720 
721 /*
722  * uvm_mapanon: establish a valid mapping in map for an anon
723  *
724  * => *addr and sz must be a multiple of PAGE_SIZE.
725  * => *addr is ignored, except if flags contains UVM_FLAG_FIXED.
726  * => map must be unlocked.
727  *
728  * => align: align vaddr, must be a power-of-2.
729  *    Align is only a hint and will be ignored if the alignment fails.
730  */
731 int
732 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz,
733     vsize_t align, unsigned int flags)
734 {
735 	struct vm_map_entry	*first, *last, *entry, *new;
736 	struct uvm_map_deadq	 dead;
737 	vm_prot_t		 prot;
738 	vm_prot_t		 maxprot;
739 	vm_inherit_t		 inherit;
740 	int			 advice;
741 	int			 error;
742 	vaddr_t			 pmap_align, pmap_offset;
743 	vaddr_t			 hint;
744 
745 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE);
746 	KASSERT(map != kernel_map);
747 	KASSERT((map->flags & UVM_FLAG_HOLE) == 0);
748 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
749 	splassert(IPL_NONE);
750 	KASSERT((flags & UVM_FLAG_TRYLOCK) == 0);
751 
752 	/*
753 	 * We use pmap_align and pmap_offset as alignment and offset variables.
754 	 *
755 	 * Because the align parameter takes precedence over pmap prefer,
756 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
757 	 * if pmap_prefer will not align.
758 	 */
759 	pmap_align = MAX(align, PAGE_SIZE);
760 	pmap_offset = 0;
761 
762 	/* Decode parameters. */
763 	prot = UVM_PROTECTION(flags);
764 	maxprot = UVM_MAXPROTECTION(flags);
765 	advice = UVM_ADVICE(flags);
766 	inherit = UVM_INHERIT(flags);
767 	error = 0;
768 	hint = trunc_page(*addr);
769 	TAILQ_INIT(&dead);
770 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
771 	KASSERT((align & (align - 1)) == 0);
772 
773 	/* Check protection. */
774 	if ((prot & maxprot) != prot)
775 		return EACCES;
776 
777 	/*
778 	 * Before grabbing the lock, allocate a map entry for later
779 	 * use to ensure we don't wait for memory while holding the
780 	 * vm_map_lock.
781 	 */
782 	new = uvm_mapent_alloc(map, flags);
783 	if (new == NULL)
784 		return ENOMEM;
785 
786 	vm_map_lock(map);
787 	first = last = NULL;
788 	if (flags & UVM_FLAG_FIXED) {
789 		/*
790 		 * Fixed location.
791 		 *
792 		 * Note: we ignore align, pmap_prefer.
793 		 * Fill in first, last and *addr.
794 		 */
795 		KASSERT((*addr & PAGE_MASK) == 0);
796 
797 		/* Check that the space is available. */
798 		if (flags & UVM_FLAG_UNMAP) {
799 			if ((flags & UVM_FLAG_STACK) &&
800 			    !uvm_map_is_stack_remappable(map, *addr, sz,
801 				(flags & UVM_FLAG_SIGALTSTACK))) {
802 				error = EINVAL;
803 				goto unlock;
804 			}
805 			if (uvm_unmap_remove(map, *addr, *addr + sz, &dead,
806 			    FALSE, TRUE,
807 			    (flags & UVM_FLAG_SIGALTSTACK) ? FALSE : TRUE) != 0) {
808 				error = EPERM;	/* immutable entries found */
809 				goto unlock;
810 			}
811 		}
812 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
813 			error = ENOMEM;
814 			goto unlock;
815 		}
816 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
817 	    (align == 0 || (*addr & (align - 1)) == 0) &&
818 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
819 		/*
820 		 * Address used as hint.
821 		 *
822 		 * Note: we enforce the alignment restriction,
823 		 * but ignore pmap_prefer.
824 		 */
825 	} else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
826 		/* Run selection algorithm for executables. */
827 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
828 		    addr, sz, pmap_align, pmap_offset, prot, hint);
829 
830 		if (error != 0)
831 			goto unlock;
832 	} else {
833 		/* Update freelists from vmspace. */
834 		uvm_map_vmspace_update(map, &dead, flags);
835 
836 		error = uvm_map_findspace(map, &first, &last, addr, sz,
837 		    pmap_align, pmap_offset, prot, hint);
838 
839 		if (error != 0)
840 			goto unlock;
841 	}
842 
843 	/* Double-check if selected address doesn't cause overflow. */
844 	if (*addr + sz < *addr) {
845 		error = ENOMEM;
846 		goto unlock;
847 	}
848 
849 	/* If we only want a query, return now. */
850 	if (flags & UVM_FLAG_QUERY) {
851 		error = 0;
852 		goto unlock;
853 	}
854 
855 	/*
856 	 * Create new entry.
857 	 * first and last may be invalidated after this call.
858 	 */
859 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
860 	    new);
861 	if (entry == NULL) {
862 		error = ENOMEM;
863 		goto unlock;
864 	}
865 	new = NULL;
866 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
867 	entry->object.uvm_obj = NULL;
868 	entry->offset = 0;
869 	entry->protection = prot;
870 	entry->max_protection = maxprot;
871 	entry->inheritance = inherit;
872 	entry->wired_count = 0;
873 	entry->advice = advice;
874 	if (prot & PROT_WRITE)
875 		map->wserial++;
876 	if (flags & UVM_FLAG_SYSCALL) {
877 		entry->etype |= UVM_ET_SYSCALL;
878 		map->wserial++;
879 	}
880 	if (flags & UVM_FLAG_STACK) {
881 		entry->etype |= UVM_ET_STACK;
882 		if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP))
883 			map->sserial++;
884 	}
885 	if (flags & UVM_FLAG_COPYONW) {
886 		entry->etype |= UVM_ET_COPYONWRITE;
887 		if ((flags & UVM_FLAG_OVERLAY) == 0)
888 			entry->etype |= UVM_ET_NEEDSCOPY;
889 	}
890 	if (flags & UVM_FLAG_CONCEAL)
891 		entry->etype |= UVM_ET_CONCEAL;
892 	if (flags & UVM_FLAG_OVERLAY) {
893 		entry->aref.ar_pageoff = 0;
894 		entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
895 	}
896 
897 	/* Update map and process statistics. */
898 	map->size += sz;
899 	if (prot != PROT_NONE) {
900 		((struct vmspace *)map)->vm_dused +=
901 		    uvmspace_dused(map, *addr, *addr + sz);
902 	}
903 
904 unlock:
905 	vm_map_unlock(map);
906 
907 	/*
908 	 * Remove dead entries.
909 	 *
910 	 * Dead entries may be the result of merging.
911 	 * uvm_map_mkentry may also create dead entries, when it attempts to
912 	 * destroy free-space entries.
913 	 */
914 	uvm_unmap_detach(&dead, 0);
915 
916 	if (new)
917 		uvm_mapent_free(new);
918 	return error;
919 }
920 
921 /*
922  * uvm_map: establish a valid mapping in map
923  *
924  * => *addr and sz must be a multiple of PAGE_SIZE.
925  * => map must be unlocked.
926  * => <uobj,uoffset> value meanings (4 cases):
927  *	[1] <NULL,uoffset>		== uoffset is a hint for PMAP_PREFER
928  *	[2] <NULL,UVM_UNKNOWN_OFFSET>	== don't PMAP_PREFER
929  *	[3] <uobj,uoffset>		== normal mapping
930  *	[4] <uobj,UVM_UNKNOWN_OFFSET>	== uvm_map finds offset based on VA
931  *
932  *   case [4] is for kernel mappings where we don't know the offset until
933  *   we've found a virtual address.   note that kernel object offsets are
934  *   always relative to vm_map_min(kernel_map).
935  *
936  * => align: align vaddr, must be a power-of-2.
937  *    Align is only a hint and will be ignored if the alignment fails.
938  */
939 int
940 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,
941     struct uvm_object *uobj, voff_t uoffset,
942     vsize_t align, unsigned int flags)
943 {
944 	struct vm_map_entry	*first, *last, *entry, *new;
945 	struct uvm_map_deadq	 dead;
946 	vm_prot_t		 prot;
947 	vm_prot_t		 maxprot;
948 	vm_inherit_t		 inherit;
949 	int			 advice;
950 	int			 error;
951 	vaddr_t			 pmap_align, pmap_offset;
952 	vaddr_t			 hint;
953 
954 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
955 		splassert(IPL_NONE);
956 	else
957 		splassert(IPL_VM);
958 
959 	/*
960 	 * We use pmap_align and pmap_offset as alignment and offset variables.
961 	 *
962 	 * Because the align parameter takes precedence over pmap prefer,
963 	 * the pmap_align will need to be set to align, with pmap_offset = 0,
964 	 * if pmap_prefer will not align.
965 	 */
966 	if (uoffset == UVM_UNKNOWN_OFFSET) {
967 		pmap_align = MAX(align, PAGE_SIZE);
968 		pmap_offset = 0;
969 	} else {
970 		pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE);
971 		pmap_offset = PMAP_PREFER_OFFSET(uoffset);
972 
973 		if (align == 0 ||
974 		    (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) {
975 			/* pmap_offset satisfies align, no change. */
976 		} else {
977 			/* Align takes precedence over pmap prefer. */
978 			pmap_align = align;
979 			pmap_offset = 0;
980 		}
981 	}
982 
983 	/* Decode parameters. */
984 	prot = UVM_PROTECTION(flags);
985 	maxprot = UVM_MAXPROTECTION(flags);
986 	advice = UVM_ADVICE(flags);
987 	inherit = UVM_INHERIT(flags);
988 	error = 0;
989 	hint = trunc_page(*addr);
990 	TAILQ_INIT(&dead);
991 	KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
992 	KASSERT((align & (align - 1)) == 0);
993 
994 	/* Holes are incompatible with other types of mappings. */
995 	if (flags & UVM_FLAG_HOLE) {
996 		KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) &&
997 		    (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
998 	}
999 
1000 	/* Unset hint for kernel_map non-fixed allocations. */
1001 	if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED))
1002 		hint = 0;
1003 
1004 	/* Check protection. */
1005 	if ((prot & maxprot) != prot)
1006 		return EACCES;
1007 
1008 	if (map == kernel_map &&
1009 	    (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
1010 		panic("uvm_map: kernel map W^X violation requested");
1011 
1012 	/*
1013 	 * Before grabbing the lock, allocate a map entry for later
1014 	 * use to ensure we don't wait for memory while holding the
1015 	 * vm_map_lock.
1016 	 */
1017 	new = uvm_mapent_alloc(map, flags);
1018 	if (new == NULL)
1019 		return ENOMEM;
1020 
1021 	if (flags & UVM_FLAG_TRYLOCK) {
1022 		if (vm_map_lock_try(map) == FALSE) {
1023 			error = EFAULT;
1024 			goto out;
1025 		}
1026 	} else {
1027 		vm_map_lock(map);
1028 	}
1029 
1030 	first = last = NULL;
1031 	if (flags & UVM_FLAG_FIXED) {
1032 		/*
1033 		 * Fixed location.
1034 		 *
1035 		 * Note: we ignore align, pmap_prefer.
1036 		 * Fill in first, last and *addr.
1037 		 */
1038 		KASSERT((*addr & PAGE_MASK) == 0);
1039 
1040 		/*
1041 		 * Grow pmap to include allocated address.
1042 		 * If the growth fails, the allocation will fail too.
1043 		 */
1044 		if ((map->flags & VM_MAP_ISVMSPACE) == 0 &&
1045 		    uvm_maxkaddr < (*addr + sz)) {
1046 			uvm_map_kmem_grow(map, &dead,
1047 			    *addr + sz - uvm_maxkaddr, flags);
1048 		}
1049 
1050 		/* Check that the space is available. */
1051 		if (flags & UVM_FLAG_UNMAP) {
1052 			if (uvm_unmap_remove(map, *addr, *addr + sz, &dead,
1053 			    FALSE, TRUE, TRUE) != 0) {
1054 				error = EPERM;	/* immutable entries found */
1055 				goto unlock;
1056 			}
1057 		}
1058 		if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1059 			error = ENOMEM;
1060 			goto unlock;
1061 		}
1062 	} else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1063 	    (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE &&
1064 	    (align == 0 || (*addr & (align - 1)) == 0) &&
1065 	    uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1066 		/*
1067 		 * Address used as hint.
1068 		 *
1069 		 * Note: we enforce the alignment restriction,
1070 		 * but ignore pmap_prefer.
1071 		 */
1072 	} else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1073 		/* Run selection algorithm for executables. */
1074 		error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1075 		    addr, sz, pmap_align, pmap_offset, prot, hint);
1076 
1077 		/* Grow kernel memory and try again. */
1078 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1079 			uvm_map_kmem_grow(map, &dead, sz, flags);
1080 
1081 			error = uvm_addr_invoke(map, map->uaddr_exe,
1082 			    &first, &last, addr, sz,
1083 			    pmap_align, pmap_offset, prot, hint);
1084 		}
1085 
1086 		if (error != 0)
1087 			goto unlock;
1088 	} else {
1089 		/* Update freelists from vmspace. */
1090 		if (map->flags & VM_MAP_ISVMSPACE)
1091 			uvm_map_vmspace_update(map, &dead, flags);
1092 
1093 		error = uvm_map_findspace(map, &first, &last, addr, sz,
1094 		    pmap_align, pmap_offset, prot, hint);
1095 
1096 		/* Grow kernel memory and try again. */
1097 		if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1098 			uvm_map_kmem_grow(map, &dead, sz, flags);
1099 
1100 			error = uvm_map_findspace(map, &first, &last, addr, sz,
1101 			    pmap_align, pmap_offset, prot, hint);
1102 		}
1103 
1104 		if (error != 0)
1105 			goto unlock;
1106 	}
1107 
1108 	/* Double-check if selected address doesn't cause overflow. */
1109 	if (*addr + sz < *addr) {
1110 		error = ENOMEM;
1111 		goto unlock;
1112 	}
1113 
1114 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE ||
1115 	    uvm_maxkaddr >= *addr + sz);
1116 
1117 	/* If we only want a query, return now. */
1118 	if (flags & UVM_FLAG_QUERY) {
1119 		error = 0;
1120 		goto unlock;
1121 	}
1122 
1123 	if (uobj == NULL)
1124 		uoffset = 0;
1125 	else if (uoffset == UVM_UNKNOWN_OFFSET) {
1126 		KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
1127 		uoffset = *addr - vm_map_min(kernel_map);
1128 	}
1129 
1130 	/*
1131 	 * Create new entry.
1132 	 * first and last may be invalidated after this call.
1133 	 */
1134 	entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1135 	    new);
1136 	if (entry == NULL) {
1137 		error = ENOMEM;
1138 		goto unlock;
1139 	}
1140 	new = NULL;
1141 	KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1142 	entry->object.uvm_obj = uobj;
1143 	entry->offset = uoffset;
1144 	entry->protection = prot;
1145 	entry->max_protection = maxprot;
1146 	entry->inheritance = inherit;
1147 	entry->wired_count = 0;
1148 	entry->advice = advice;
1149 	if (prot & PROT_WRITE)
1150 		map->wserial++;
1151 	if (flags & UVM_FLAG_SYSCALL) {
1152 		entry->etype |= UVM_ET_SYSCALL;
1153 		map->wserial++;
1154 	}
1155 	if (flags & UVM_FLAG_STACK) {
1156 		entry->etype |= UVM_ET_STACK;
1157 		if (flags & UVM_FLAG_UNMAP)
1158 			map->sserial++;
1159 	}
1160 	if (uobj)
1161 		entry->etype |= UVM_ET_OBJ;
1162 	else if (flags & UVM_FLAG_HOLE)
1163 		entry->etype |= UVM_ET_HOLE;
1164 	if (flags & UVM_FLAG_NOFAULT)
1165 		entry->etype |= UVM_ET_NOFAULT;
1166 	if (flags & UVM_FLAG_WC)
1167 		entry->etype |= UVM_ET_WC;
1168 	if (flags & UVM_FLAG_COPYONW) {
1169 		entry->etype |= UVM_ET_COPYONWRITE;
1170 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1171 			entry->etype |= UVM_ET_NEEDSCOPY;
1172 	}
1173 	if (flags & UVM_FLAG_CONCEAL)
1174 		entry->etype |= UVM_ET_CONCEAL;
1175 	if (flags & UVM_FLAG_OVERLAY) {
1176 		entry->aref.ar_pageoff = 0;
1177 		entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1178 	}
1179 
1180 	/* Update map and process statistics. */
1181 	if (!(flags & UVM_FLAG_HOLE)) {
1182 		map->size += sz;
1183 		if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL &&
1184 		    prot != PROT_NONE) {
1185 			((struct vmspace *)map)->vm_dused +=
1186 			    uvmspace_dused(map, *addr, *addr + sz);
1187 		}
1188 	}
1189 
1190 	/*
1191 	 * Try to merge entry.
1192 	 *
1193 	 * Userland allocations are kept separated most of the time.
1194 	 * Forego the effort of merging what most of the time can't be merged
1195 	 * and only try the merge if it concerns a kernel entry.
1196 	 */
1197 	if ((flags & UVM_FLAG_NOMERGE) == 0 &&
1198 	    (map->flags & VM_MAP_ISVMSPACE) == 0)
1199 		uvm_mapent_tryjoin(map, entry, &dead);
1200 
1201 unlock:
1202 	vm_map_unlock(map);
1203 
1204 	/*
1205 	 * Remove dead entries.
1206 	 *
1207 	 * Dead entries may be the result of merging.
1208 	 * uvm_map_mkentry may also create dead entries, when it attempts to
1209 	 * destroy free-space entries.
1210 	 */
1211 	if (map->flags & VM_MAP_INTRSAFE)
1212 		uvm_unmap_detach_intrsafe(&dead);
1213 	else
1214 		uvm_unmap_detach(&dead, 0);
1215 out:
1216 	if (new)
1217 		uvm_mapent_free(new);
1218 	return error;
1219 }
1220 
1221 /*
1222  * True iff e1 and e2 can be joined together.
1223  */
1224 int
1225 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1,
1226     struct vm_map_entry *e2)
1227 {
1228 	KDASSERT(e1 != NULL && e2 != NULL);
1229 
1230 	/* Must be the same entry type and not have free memory between. */
1231 	if (e1->etype != e2->etype || e1->end != e2->start)
1232 		return 0;
1233 
1234 	/* Submaps are never joined. */
1235 	if (UVM_ET_ISSUBMAP(e1))
1236 		return 0;
1237 
1238 	/* Never merge wired memory. */
1239 	if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2))
1240 		return 0;
1241 
1242 	/* Protection, inheritance and advice must be equal. */
1243 	if (e1->protection != e2->protection ||
1244 	    e1->max_protection != e2->max_protection ||
1245 	    e1->inheritance != e2->inheritance ||
1246 	    e1->advice != e2->advice)
1247 		return 0;
1248 
1249 	/* If uvm_object: object itself and offsets within object must match. */
1250 	if (UVM_ET_ISOBJ(e1)) {
1251 		if (e1->object.uvm_obj != e2->object.uvm_obj)
1252 			return 0;
1253 		if (e1->offset + (e1->end - e1->start) != e2->offset)
1254 			return 0;
1255 	}
1256 
1257 	/*
1258 	 * Cannot join shared amaps.
1259 	 * Note: no need to lock amap to look at refs, since we don't care
1260 	 * about its exact value.
1261 	 * If it is 1 (i.e. we have the only reference) it will stay there.
1262 	 */
1263 	if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1)
1264 		return 0;
1265 	if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1)
1266 		return 0;
1267 
1268 	/* Apparently, e1 and e2 match. */
1269 	return 1;
1270 }
1271 
1272 /*
1273  * Join support function.
1274  *
1275  * Returns the merged entry on success.
1276  * Returns NULL if the merge failed.
1277  */
1278 struct vm_map_entry*
1279 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1,
1280     struct vm_map_entry *e2, struct uvm_map_deadq *dead)
1281 {
1282 	struct uvm_addr_state *free;
1283 
1284 	/*
1285 	 * Merging is not supported for map entries that
1286 	 * contain an amap in e1. This should never happen
1287 	 * anyway, because only kernel entries are merged.
1288 	 * These do not contain amaps.
1289 	 * e2 contains no real information in its amap,
1290 	 * so it can be erased immediately.
1291 	 */
1292 	KASSERT(e1->aref.ar_amap == NULL);
1293 
1294 	/*
1295 	 * Don't drop obj reference:
1296 	 * uvm_unmap_detach will do this for us.
1297 	 */
1298 	free = uvm_map_uaddr_e(map, e1);
1299 	uvm_mapent_free_remove(map, free, e1);
1300 
1301 	free = uvm_map_uaddr_e(map, e2);
1302 	uvm_mapent_free_remove(map, free, e2);
1303 	uvm_mapent_addr_remove(map, e2);
1304 	e1->end = e2->end;
1305 	e1->guard = e2->guard;
1306 	e1->fspace = e2->fspace;
1307 	uvm_mapent_free_insert(map, free, e1);
1308 
1309 	DEAD_ENTRY_PUSH(dead, e2);
1310 	return e1;
1311 }
1312 
1313 /*
1314  * Attempt forward and backward joining of entry.
1315  *
1316  * Returns entry after joins.
1317  * We are guaranteed that the amap of entry is either non-existent or
1318  * has never been used.
1319  */
1320 struct vm_map_entry*
1321 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry,
1322     struct uvm_map_deadq *dead)
1323 {
1324 	struct vm_map_entry *other;
1325 	struct vm_map_entry *merged;
1326 
1327 	/* Merge with previous entry. */
1328 	other = RBT_PREV(uvm_map_addr, entry);
1329 	if (other && uvm_mapent_isjoinable(map, other, entry)) {
1330 		merged = uvm_mapent_merge(map, other, entry, dead);
1331 		if (merged)
1332 			entry = merged;
1333 	}
1334 
1335 	/*
1336 	 * Merge with next entry.
1337 	 *
1338 	 * Because amap can only extend forward and the next entry
1339 	 * probably contains sensible info, only perform forward merging
1340 	 * in the absence of an amap.
1341 	 */
1342 	other = RBT_NEXT(uvm_map_addr, entry);
1343 	if (other && entry->aref.ar_amap == NULL &&
1344 	    other->aref.ar_amap == NULL &&
1345 	    uvm_mapent_isjoinable(map, entry, other)) {
1346 		merged = uvm_mapent_merge(map, entry, other, dead);
1347 		if (merged)
1348 			entry = merged;
1349 	}
1350 
1351 	return entry;
1352 }
1353 
1354 /*
1355  * Kill entries that are no longer in a map.
1356  */
1357 void
1358 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags)
1359 {
1360 	struct vm_map_entry *entry, *tmp;
1361 	int waitok = flags & UVM_PLA_WAITOK;
1362 
1363 	TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) {
1364 		/* Drop reference to amap, if we've got one. */
1365 		if (entry->aref.ar_amap)
1366 			amap_unref(entry->aref.ar_amap,
1367 			    entry->aref.ar_pageoff,
1368 			    atop(entry->end - entry->start),
1369 			    flags & AMAP_REFALL);
1370 
1371 		/* Skip entries for which we have to grab the kernel lock. */
1372 		if (UVM_ET_ISSUBMAP(entry) || UVM_ET_ISOBJ(entry))
1373 			continue;
1374 
1375 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1376 		uvm_mapent_free(entry);
1377 	}
1378 
1379 	if (TAILQ_EMPTY(deadq))
1380 		return;
1381 
1382 	KERNEL_LOCK();
1383 	while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1384 		if (waitok)
1385 			uvm_pause();
1386 		/* Drop reference to our backing object, if we've got one. */
1387 		if (UVM_ET_ISSUBMAP(entry)) {
1388 			/* ... unlikely to happen, but play it safe */
1389 			uvm_map_deallocate(entry->object.sub_map);
1390 		} else if (UVM_ET_ISOBJ(entry) &&
1391 		    entry->object.uvm_obj->pgops->pgo_detach) {
1392 			entry->object.uvm_obj->pgops->pgo_detach(
1393 			    entry->object.uvm_obj);
1394 		}
1395 
1396 		/* Step to next. */
1397 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1398 		uvm_mapent_free(entry);
1399 	}
1400 	KERNEL_UNLOCK();
1401 }
1402 
1403 void
1404 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq)
1405 {
1406 	struct vm_map_entry *entry;
1407 
1408 	while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1409 		KASSERT(entry->aref.ar_amap == NULL);
1410 		KASSERT(!UVM_ET_ISSUBMAP(entry));
1411 		KASSERT(!UVM_ET_ISOBJ(entry));
1412 		TAILQ_REMOVE(deadq, entry, dfree.deadq);
1413 		uvm_mapent_free(entry);
1414 	}
1415 }
1416 
1417 /*
1418  * Create and insert new entry.
1419  *
1420  * Returned entry contains new addresses and is inserted properly in the tree.
1421  * first and last are (probably) no longer valid.
1422  */
1423 struct vm_map_entry*
1424 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first,
1425     struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags,
1426     struct uvm_map_deadq *dead, struct vm_map_entry *new)
1427 {
1428 	struct vm_map_entry *entry, *prev;
1429 	struct uvm_addr_state *free;
1430 	vaddr_t min, max;	/* free space boundaries for new entry */
1431 
1432 	KDASSERT(map != NULL);
1433 	KDASSERT(first != NULL);
1434 	KDASSERT(last != NULL);
1435 	KDASSERT(dead != NULL);
1436 	KDASSERT(sz > 0);
1437 	KDASSERT(addr + sz > addr);
1438 	KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr);
1439 	KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz);
1440 	KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz));
1441 	uvm_tree_sanity(map, __FILE__, __LINE__);
1442 
1443 	min = addr + sz;
1444 	max = VMMAP_FREE_END(last);
1445 
1446 	/* Initialize new entry. */
1447 	if (new == NULL)
1448 		entry = uvm_mapent_alloc(map, flags);
1449 	else
1450 		entry = new;
1451 	if (entry == NULL)
1452 		return NULL;
1453 	entry->offset = 0;
1454 	entry->etype = 0;
1455 	entry->wired_count = 0;
1456 	entry->aref.ar_pageoff = 0;
1457 	entry->aref.ar_amap = NULL;
1458 
1459 	entry->start = addr;
1460 	entry->end = min;
1461 	entry->guard = 0;
1462 	entry->fspace = 0;
1463 
1464 	vm_map_assert_wrlock(map);
1465 
1466 	/* Reset free space in first. */
1467 	free = uvm_map_uaddr_e(map, first);
1468 	uvm_mapent_free_remove(map, free, first);
1469 	first->guard = 0;
1470 	first->fspace = 0;
1471 
1472 	/*
1473 	 * Remove all entries that are fully replaced.
1474 	 * We are iterating using last in reverse order.
1475 	 */
1476 	for (; first != last; last = prev) {
1477 		prev = RBT_PREV(uvm_map_addr, last);
1478 
1479 		KDASSERT(last->start == last->end);
1480 		free = uvm_map_uaddr_e(map, last);
1481 		uvm_mapent_free_remove(map, free, last);
1482 		uvm_mapent_addr_remove(map, last);
1483 		DEAD_ENTRY_PUSH(dead, last);
1484 	}
1485 	/* Remove first if it is entirely inside <addr, addr+sz>.  */
1486 	if (first->start == addr) {
1487 		uvm_mapent_addr_remove(map, first);
1488 		DEAD_ENTRY_PUSH(dead, first);
1489 	} else {
1490 		uvm_map_fix_space(map, first, VMMAP_FREE_START(first),
1491 		    addr, flags);
1492 	}
1493 
1494 	/* Finally, link in entry. */
1495 	uvm_mapent_addr_insert(map, entry);
1496 	uvm_map_fix_space(map, entry, min, max, flags);
1497 
1498 	uvm_tree_sanity(map, __FILE__, __LINE__);
1499 	return entry;
1500 }
1501 
1502 
1503 /*
1504  * uvm_mapent_alloc: allocate a map entry
1505  */
1506 struct vm_map_entry *
1507 uvm_mapent_alloc(struct vm_map *map, int flags)
1508 {
1509 	struct vm_map_entry *me, *ne;
1510 	int pool_flags;
1511 	int i;
1512 
1513 	pool_flags = PR_WAITOK;
1514 	if (flags & UVM_FLAG_TRYLOCK)
1515 		pool_flags = PR_NOWAIT;
1516 
1517 	if (map->flags & VM_MAP_INTRSAFE || cold) {
1518 		mtx_enter(&uvm_kmapent_mtx);
1519 		if (SLIST_EMPTY(&uvm.kentry_free)) {
1520 			ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
1521 			    &kd_nowait);
1522 			if (ne == NULL)
1523 				panic("uvm_mapent_alloc: cannot allocate map "
1524 				    "entry");
1525 			for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) {
1526 				SLIST_INSERT_HEAD(&uvm.kentry_free,
1527 				    &ne[i], daddrs.addr_kentry);
1528 			}
1529 			if (ratecheck(&uvm_kmapent_last_warn_time,
1530 			    &uvm_kmapent_warn_rate))
1531 				printf("uvm_mapent_alloc: out of static "
1532 				    "map entries\n");
1533 		}
1534 		me = SLIST_FIRST(&uvm.kentry_free);
1535 		SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry);
1536 		uvmexp.kmapent++;
1537 		mtx_leave(&uvm_kmapent_mtx);
1538 		me->flags = UVM_MAP_STATIC;
1539 	} else if (map == kernel_map) {
1540 		splassert(IPL_NONE);
1541 		me = pool_get(&uvm_map_entry_kmem_pool, pool_flags);
1542 		if (me == NULL)
1543 			goto out;
1544 		me->flags = UVM_MAP_KMEM;
1545 	} else {
1546 		splassert(IPL_NONE);
1547 		me = pool_get(&uvm_map_entry_pool, pool_flags);
1548 		if (me == NULL)
1549 			goto out;
1550 		me->flags = 0;
1551 	}
1552 
1553 	RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF);
1554 out:
1555 	return me;
1556 }
1557 
1558 /*
1559  * uvm_mapent_free: free map entry
1560  *
1561  * => XXX: static pool for kernel map?
1562  */
1563 void
1564 uvm_mapent_free(struct vm_map_entry *me)
1565 {
1566 	if (me->flags & UVM_MAP_STATIC) {
1567 		mtx_enter(&uvm_kmapent_mtx);
1568 		SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry);
1569 		uvmexp.kmapent--;
1570 		mtx_leave(&uvm_kmapent_mtx);
1571 	} else if (me->flags & UVM_MAP_KMEM) {
1572 		splassert(IPL_NONE);
1573 		pool_put(&uvm_map_entry_kmem_pool, me);
1574 	} else {
1575 		splassert(IPL_NONE);
1576 		pool_put(&uvm_map_entry_pool, me);
1577 	}
1578 }
1579 
1580 /*
1581  * uvm_map_lookup_entry: find map entry at or before an address.
1582  *
1583  * => map must at least be read-locked by caller
1584  * => entry is returned in "entry"
1585  * => return value is true if address is in the returned entry
1586  * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is
1587  * returned for those mappings.
1588  */
1589 boolean_t
1590 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
1591     struct vm_map_entry **entry)
1592 {
1593 	vm_map_assert_anylock(map);
1594 
1595 	*entry = uvm_map_entrybyaddr(&map->addr, address);
1596 	return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&
1597 	    (*entry)->start <= address && (*entry)->end > address;
1598 }
1599 
1600 /*
1601  * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet
1602  * grown -- then uvm_map_check_region_range() should not cache the entry
1603  * because growth won't be seen.
1604  */
1605 int
1606 uvm_map_inentry_sp(vm_map_entry_t entry)
1607 {
1608 	if ((entry->etype & UVM_ET_STACK) == 0) {
1609 		if (entry->protection == PROT_NONE)
1610 			return (-1);	/* don't update range */
1611 		return (0);
1612 	}
1613 	return (1);
1614 }
1615 
1616 /*
1617  * The system call must not come from a writeable entry, W^X is violated.
1618  * (Would be nice if we can spot aliasing, which is also kind of bad)
1619  *
1620  * The system call must come from an syscall-labeled entry (which are
1621  * the text regions of the main program, sigtramp, ld.so, or libc).
1622  */
1623 int
1624 uvm_map_inentry_pc(vm_map_entry_t entry)
1625 {
1626 	if (entry->protection & PROT_WRITE)
1627 		return (0);	/* not permitted */
1628 	if ((entry->etype & UVM_ET_SYSCALL) == 0)
1629 		return (0);	/* not permitted */
1630 	return (1);
1631 }
1632 
1633 int
1634 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie)
1635 {
1636 	return (serial != ie->ie_serial || ie->ie_start == 0 ||
1637 	    addr < ie->ie_start || addr >= ie->ie_end);
1638 }
1639 
1640 /*
1641  * Inside a vm_map find the reg address and verify it via function.
1642  * Remember low and high addresses of region if valid and return TRUE,
1643  * else return FALSE.
1644  */
1645 boolean_t
1646 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr,
1647     int (*fn)(vm_map_entry_t), u_long serial)
1648 {
1649 	vm_map_t map = &p->p_vmspace->vm_map;
1650 	vm_map_entry_t entry;
1651 	int ret;
1652 
1653 	if (addr < map->min_offset || addr >= map->max_offset)
1654 		return (FALSE);
1655 
1656 	/* lock map */
1657 	vm_map_lock_read(map);
1658 
1659 	/* lookup */
1660 	if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) {
1661 		vm_map_unlock_read(map);
1662 		return (FALSE);
1663 	}
1664 
1665 	ret = (*fn)(entry);
1666 	if (ret == 0) {
1667 		vm_map_unlock_read(map);
1668 		return (FALSE);
1669 	} else if (ret == 1) {
1670 		ie->ie_start = entry->start;
1671 		ie->ie_end = entry->end;
1672 		ie->ie_serial = serial;
1673 	} else {
1674 		/* do not update, re-check later */
1675 	}
1676 	vm_map_unlock_read(map);
1677 	return (TRUE);
1678 }
1679 
1680 boolean_t
1681 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr,
1682     const char *fmt, int (*fn)(vm_map_entry_t), u_long serial)
1683 {
1684 	union sigval sv;
1685 	boolean_t ok = TRUE;
1686 
1687 	if (uvm_map_inentry_recheck(serial, addr, ie)) {
1688 		ok = uvm_map_inentry_fix(p, ie, addr, fn, serial);
1689 		if (!ok) {
1690 			KERNEL_LOCK();
1691 			printf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid,
1692 			    addr, ie->ie_start, ie->ie_end-1);
1693 			p->p_p->ps_acflag |= AMAP;
1694 			sv.sival_ptr = (void *)PROC_PC(p);
1695 			trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv);
1696 			KERNEL_UNLOCK();
1697 		}
1698 	}
1699 	return (ok);
1700 }
1701 
1702 /*
1703  * Check whether the given address range can be converted to a MAP_STACK
1704  * mapping.
1705  *
1706  * Must be called with map locked.
1707  */
1708 boolean_t
1709 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz,
1710     int sigaltstack_check)
1711 {
1712 	vaddr_t end = addr + sz;
1713 	struct vm_map_entry *first, *iter, *prev = NULL;
1714 
1715 	vm_map_assert_anylock(map);
1716 
1717 	if (!uvm_map_lookup_entry(map, addr, &first)) {
1718 		printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n",
1719 		    addr, end, map);
1720 		return FALSE;
1721 	}
1722 
1723 	/*
1724 	 * Check that the address range exists and is contiguous.
1725 	 */
1726 	for (iter = first; iter != NULL && iter->start < end;
1727 	    prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) {
1728 		/*
1729 		 * Make sure that we do not have holes in the range.
1730 		 */
1731 #if 0
1732 		if (prev != NULL) {
1733 			printf("prev->start 0x%lx, prev->end 0x%lx, "
1734 			    "iter->start 0x%lx, iter->end 0x%lx\n",
1735 			    prev->start, prev->end, iter->start, iter->end);
1736 		}
1737 #endif
1738 
1739 		if (prev != NULL && prev->end != iter->start) {
1740 			printf("map stack 0x%lx-0x%lx of map %p failed: "
1741 			    "hole in range\n", addr, end, map);
1742 			return FALSE;
1743 		}
1744 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) {
1745 			printf("map stack 0x%lx-0x%lx of map %p failed: "
1746 			    "hole in range\n", addr, end, map);
1747 			return FALSE;
1748 		}
1749 		if (sigaltstack_check) {
1750 			if ((iter->etype & UVM_ET_SYSCALL))
1751 				return FALSE;
1752 			if (iter->protection != (PROT_READ | PROT_WRITE))
1753 				return FALSE;
1754 		}
1755 	}
1756 
1757 	return TRUE;
1758 }
1759 
1760 /*
1761  * Remap the middle-pages of an existing mapping as a stack range.
1762  * If there exists a previous contiguous mapping with the given range
1763  * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the
1764  * mapping is dropped, and a new anon mapping is created and marked as
1765  * a stack.
1766  *
1767  * Must be called with map unlocked.
1768  */
1769 int
1770 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz)
1771 {
1772 	vm_map_t map = &p->p_vmspace->vm_map;
1773 	vaddr_t start, end;
1774 	int error;
1775 	int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
1776 	    PROT_READ | PROT_WRITE | PROT_EXEC,
1777 	    MAP_INHERIT_COPY, MADV_NORMAL,
1778 	    UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP |
1779 	    UVM_FLAG_COPYONW | UVM_FLAG_SIGALTSTACK);
1780 
1781 	start = round_page(addr);
1782 	end = trunc_page(addr + sz);
1783 #ifdef MACHINE_STACK_GROWS_UP
1784 	if (end == addr + sz)
1785 		end -= PAGE_SIZE;
1786 #else
1787 	if (start == addr)
1788 		start += PAGE_SIZE;
1789 #endif
1790 
1791 	if (start < map->min_offset || end >= map->max_offset || end < start)
1792 		return EINVAL;
1793 
1794 	/*
1795 	 * UVM_FLAG_SIGALTSTACK indicates that immutable may be bypassed,
1796 	 * but the range is checked that it is contiguous, is not a syscall
1797 	 * mapping, and protection RW.  Then, a new mapping (all zero) is
1798 	 * placed upon the region, which prevents an attacker from pivoting
1799 	 * into pre-placed MAP_STACK space.
1800 	 */
1801 	error = uvm_mapanon(map, &start, end - start, 0, flags);
1802 	if (error != 0)
1803 		printf("map stack for pid %d failed\n", p->p_p->ps_pid);
1804 
1805 	return error;
1806 }
1807 
1808 /*
1809  * uvm_map_pie: return a random load address for a PIE executable
1810  * properly aligned.
1811  */
1812 #ifndef VM_PIE_MAX_ADDR
1813 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
1814 #endif
1815 
1816 #ifndef VM_PIE_MIN_ADDR
1817 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
1818 #endif
1819 
1820 #ifndef VM_PIE_MIN_ALIGN
1821 #define VM_PIE_MIN_ALIGN PAGE_SIZE
1822 #endif
1823 
1824 vaddr_t
1825 uvm_map_pie(vaddr_t align)
1826 {
1827 	vaddr_t addr, space, min;
1828 
1829 	align = MAX(align, VM_PIE_MIN_ALIGN);
1830 
1831 	/* round up to next alignment */
1832 	min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
1833 
1834 	if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
1835 		return (align);
1836 
1837 	space = (VM_PIE_MAX_ADDR - min) / align;
1838 	space = MIN(space, (u_int32_t)-1);
1839 
1840 	addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
1841 	addr += min;
1842 
1843 	return (addr);
1844 }
1845 
1846 void
1847 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end)
1848 {
1849 	struct uvm_map_deadq dead;
1850 
1851 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0 &&
1852 	    (end & (vaddr_t)PAGE_MASK) == 0);
1853 	TAILQ_INIT(&dead);
1854 	vm_map_lock(map);
1855 	uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE, FALSE);
1856 	vm_map_unlock(map);
1857 
1858 	if (map->flags & VM_MAP_INTRSAFE)
1859 		uvm_unmap_detach_intrsafe(&dead);
1860 	else
1861 		uvm_unmap_detach(&dead, 0);
1862 }
1863 
1864 /*
1865  * Mark entry as free.
1866  *
1867  * entry will be put on the dead list.
1868  * The free space will be merged into the previous or a new entry,
1869  * unless markfree is false.
1870  */
1871 void
1872 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry,
1873     struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead,
1874     boolean_t markfree)
1875 {
1876 	struct uvm_addr_state	*free;
1877 	struct vm_map_entry	*prev;
1878 	vaddr_t			 addr;	/* Start of freed range. */
1879 	vaddr_t			 end;	/* End of freed range. */
1880 
1881 	UVM_MAP_REQ_WRITE(map);
1882 
1883 	prev = *prev_ptr;
1884 	if (prev == entry)
1885 		*prev_ptr = prev = NULL;
1886 
1887 	if (prev == NULL ||
1888 	    VMMAP_FREE_END(prev) != entry->start)
1889 		prev = RBT_PREV(uvm_map_addr, entry);
1890 
1891 	/* Entry is describing only free memory and has nothing to drain into. */
1892 	if (prev == NULL && entry->start == entry->end && markfree) {
1893 		*prev_ptr = entry;
1894 		return;
1895 	}
1896 
1897 	addr = entry->start;
1898 	end = VMMAP_FREE_END(entry);
1899 	free = uvm_map_uaddr_e(map, entry);
1900 	uvm_mapent_free_remove(map, free, entry);
1901 	uvm_mapent_addr_remove(map, entry);
1902 	DEAD_ENTRY_PUSH(dead, entry);
1903 
1904 	if (markfree) {
1905 		if (prev) {
1906 			free = uvm_map_uaddr_e(map, prev);
1907 			uvm_mapent_free_remove(map, free, prev);
1908 		}
1909 		*prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0);
1910 	}
1911 }
1912 
1913 /*
1914  * Unwire and release referenced amap and object from map entry.
1915  */
1916 void
1917 uvm_unmap_kill_entry_withlock(struct vm_map *map, struct vm_map_entry *entry,
1918     int needlock)
1919 {
1920 	/* Unwire removed map entry. */
1921 	if (VM_MAPENT_ISWIRED(entry)) {
1922 		KERNEL_LOCK();
1923 		entry->wired_count = 0;
1924 		uvm_fault_unwire_locked(map, entry->start, entry->end);
1925 		KERNEL_UNLOCK();
1926 	}
1927 
1928 	if (needlock)
1929 		uvm_map_lock_entry(entry);
1930 
1931 	/* Entry-type specific code. */
1932 	if (UVM_ET_ISHOLE(entry)) {
1933 		/* Nothing to be done for holes. */
1934 	} else if (map->flags & VM_MAP_INTRSAFE) {
1935 		KASSERT(vm_map_pmap(map) == pmap_kernel());
1936 
1937 		uvm_km_pgremove_intrsafe(entry->start, entry->end);
1938 	} else if (UVM_ET_ISOBJ(entry) &&
1939 	    UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
1940 		KASSERT(vm_map_pmap(map) == pmap_kernel());
1941 		/*
1942 		 * Note: kernel object mappings are currently used in
1943 		 * two ways:
1944 		 *  [1] "normal" mappings of pages in the kernel object
1945 		 *  [2] uvm_km_valloc'd allocations in which we
1946 		 *      pmap_enter in some non-kernel-object page
1947 		 *      (e.g. vmapbuf).
1948 		 *
1949 		 * for case [1], we need to remove the mapping from
1950 		 * the pmap and then remove the page from the kernel
1951 		 * object (because, once pages in a kernel object are
1952 		 * unmapped they are no longer needed, unlike, say,
1953 		 * a vnode where you might want the data to persist
1954 		 * until flushed out of a queue).
1955 		 *
1956 		 * for case [2], we need to remove the mapping from
1957 		 * the pmap.  there shouldn't be any pages at the
1958 		 * specified offset in the kernel object [but it
1959 		 * doesn't hurt to call uvm_km_pgremove just to be
1960 		 * safe?]
1961 		 *
1962 		 * uvm_km_pgremove currently does the following:
1963 		 *   for pages in the kernel object range:
1964 		 *     - drops the swap slot
1965 		 *     - uvm_pagefree the page
1966 		 *
1967 		 * note there is version of uvm_km_pgremove() that
1968 		 * is used for "intrsafe" objects.
1969 		 */
1970 		/*
1971 		 * remove mappings from pmap and drop the pages
1972 		 * from the object.  offsets are always relative
1973 		 * to vm_map_min(kernel_map).
1974 		 */
1975 		uvm_km_pgremove(entry->object.uvm_obj, entry->start,
1976 		    entry->end);
1977 	} else {
1978 		/* remove mappings the standard way. */
1979 		pmap_remove(map->pmap, entry->start, entry->end);
1980 	}
1981 
1982 	if (needlock)
1983 		uvm_map_unlock_entry(entry);
1984 }
1985 
1986 void
1987 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
1988 {
1989 	uvm_unmap_kill_entry_withlock(map, entry, 0);
1990 }
1991 
1992 /*
1993  * Remove all entries from start to end.
1994  *
1995  * If remove_holes, then remove ET_HOLE entries as well.
1996  * If markfree, entry will be properly marked free, otherwise, no replacement
1997  * entry will be put in the tree (corrupting the tree).
1998  */
1999 int
2000 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
2001     struct uvm_map_deadq *dead, boolean_t remove_holes,
2002     boolean_t markfree, boolean_t checkimmutable)
2003 {
2004 	struct vm_map_entry *prev_hint, *next, *entry;
2005 
2006 	start = MAX(start, map->min_offset);
2007 	end = MIN(end, map->max_offset);
2008 	if (start >= end)
2009 		return 0;
2010 
2011 	vm_map_assert_wrlock(map);
2012 
2013 	/* Find first affected entry. */
2014 	entry = uvm_map_entrybyaddr(&map->addr, start);
2015 	KDASSERT(entry != NULL && entry->start <= start);
2016 
2017 	if (checkimmutable) {
2018 		struct vm_map_entry *entry1 = entry;
2019 
2020 		/* Refuse to unmap if any entries are immutable */
2021 		if (entry1->end <= start)
2022 			entry1 = RBT_NEXT(uvm_map_addr, entry1);
2023 		for (; entry1 != NULL && entry1->start < end; entry1 = next) {
2024 			KDASSERT(entry1->start >= start);
2025 			next = RBT_NEXT(uvm_map_addr, entry1);
2026 			/* Treat memory holes as free space. */
2027 			if (entry1->start == entry1->end || UVM_ET_ISHOLE(entry1))
2028 				continue;
2029 			if (entry1->etype & UVM_ET_IMMUTABLE)
2030 				return EPERM;
2031 		}
2032 	}
2033 
2034 	if (entry->end <= start && markfree)
2035 		entry = RBT_NEXT(uvm_map_addr, entry);
2036 	else
2037 		UVM_MAP_CLIP_START(map, entry, start);
2038 
2039 	/*
2040 	 * Iterate entries until we reach end address.
2041 	 * prev_hint hints where the freed space can be appended to.
2042 	 */
2043 	prev_hint = NULL;
2044 	for (; entry != NULL && entry->start < end; entry = next) {
2045 		KDASSERT(entry->start >= start);
2046 		if (entry->end > end || !markfree)
2047 			UVM_MAP_CLIP_END(map, entry, end);
2048 		KDASSERT(entry->start >= start && entry->end <= end);
2049 		next = RBT_NEXT(uvm_map_addr, entry);
2050 
2051 		/* Don't remove holes unless asked to do so. */
2052 		if (UVM_ET_ISHOLE(entry)) {
2053 			if (!remove_holes) {
2054 				prev_hint = entry;
2055 				continue;
2056 			}
2057 		}
2058 
2059 		/* A stack has been removed.. */
2060 		if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE))
2061 			map->sserial++;
2062 
2063 		/* Kill entry. */
2064 		uvm_unmap_kill_entry_withlock(map, entry, 1);
2065 
2066 		/* Update space usage. */
2067 		if ((map->flags & VM_MAP_ISVMSPACE) &&
2068 		    entry->object.uvm_obj == NULL &&
2069 		    entry->protection != PROT_NONE &&
2070 		    !UVM_ET_ISHOLE(entry)) {
2071 			((struct vmspace *)map)->vm_dused -=
2072 			    uvmspace_dused(map, entry->start, entry->end);
2073 		}
2074 		if (!UVM_ET_ISHOLE(entry))
2075 			map->size -= entry->end - entry->start;
2076 
2077 		/* Actual removal of entry. */
2078 		uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree);
2079 	}
2080 
2081 	pmap_update(vm_map_pmap(map));
2082 
2083 #ifdef VMMAP_DEBUG
2084 	if (markfree) {
2085 		for (entry = uvm_map_entrybyaddr(&map->addr, start);
2086 		    entry != NULL && entry->start < end;
2087 		    entry = RBT_NEXT(uvm_map_addr, entry)) {
2088 			KDASSERT(entry->end <= start ||
2089 			    entry->start == entry->end ||
2090 			    UVM_ET_ISHOLE(entry));
2091 		}
2092 	} else {
2093 		vaddr_t a;
2094 		for (a = start; a < end; a += PAGE_SIZE)
2095 			KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL);
2096 	}
2097 #endif
2098 	return 0;
2099 }
2100 
2101 /*
2102  * Mark all entries from first until end (exclusive) as pageable.
2103  *
2104  * Lock must be exclusive on entry and will not be touched.
2105  */
2106 void
2107 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first,
2108     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr)
2109 {
2110 	struct vm_map_entry *iter;
2111 
2112 	for (iter = first; iter != end;
2113 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2114 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2115 		if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2116 			continue;
2117 
2118 		iter->wired_count = 0;
2119 		uvm_fault_unwire_locked(map, iter->start, iter->end);
2120 	}
2121 }
2122 
2123 /*
2124  * Mark all entries from first until end (exclusive) as wired.
2125  *
2126  * Lockflags determines the lock state on return from this function.
2127  * Lock must be exclusive on entry.
2128  */
2129 int
2130 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first,
2131     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr,
2132     int lockflags)
2133 {
2134 	struct vm_map_entry *iter;
2135 #ifdef DIAGNOSTIC
2136 	unsigned int timestamp_save;
2137 #endif
2138 	int error;
2139 
2140 	/*
2141 	 * Wire pages in two passes:
2142 	 *
2143 	 * 1: holding the write lock, we create any anonymous maps that need
2144 	 *    to be created.  then we clip each map entry to the region to
2145 	 *    be wired and increment its wiring count.
2146 	 *
2147 	 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
2148 	 *    in the pages for any newly wired area (wired_count == 1).
2149 	 *
2150 	 *    downgrading to a read lock for uvm_fault_wire avoids a possible
2151 	 *    deadlock with another thread that may have faulted on one of
2152 	 *    the pages to be wired (it would mark the page busy, blocking
2153 	 *    us, then in turn block on the map lock that we hold).
2154 	 *    because we keep the read lock on the map, the copy-on-write
2155 	 *    status of the entries we modify here cannot change.
2156 	 */
2157 	for (iter = first; iter != end;
2158 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2159 		KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2160 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2161 		    iter->protection == PROT_NONE)
2162 			continue;
2163 
2164 		/*
2165 		 * Perform actions of vm_map_lookup that need the write lock.
2166 		 * - create an anonymous map for copy-on-write
2167 		 * - anonymous map for zero-fill
2168 		 * Skip submaps.
2169 		 */
2170 		if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) &&
2171 		    UVM_ET_ISNEEDSCOPY(iter) &&
2172 		    ((iter->protection & PROT_WRITE) ||
2173 		    iter->object.uvm_obj == NULL)) {
2174 			amap_copy(map, iter, M_WAITOK,
2175 			    UVM_ET_ISSTACK(iter) ? FALSE : TRUE,
2176 			    iter->start, iter->end);
2177 		}
2178 		iter->wired_count++;
2179 	}
2180 
2181 	/*
2182 	 * Pass 2.
2183 	 */
2184 #ifdef DIAGNOSTIC
2185 	timestamp_save = map->timestamp;
2186 #endif
2187 	vm_map_busy(map);
2188 	vm_map_downgrade(map);
2189 
2190 	error = 0;
2191 	for (iter = first; error == 0 && iter != end;
2192 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
2193 		if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2194 		    iter->protection == PROT_NONE)
2195 			continue;
2196 
2197 		error = uvm_fault_wire(map, iter->start, iter->end,
2198 		    iter->protection);
2199 	}
2200 
2201 	if (error) {
2202 		/*
2203 		 * uvm_fault_wire failure
2204 		 *
2205 		 * Reacquire lock and undo our work.
2206 		 */
2207 		vm_map_upgrade(map);
2208 		vm_map_unbusy(map);
2209 #ifdef DIAGNOSTIC
2210 		if (timestamp_save != map->timestamp)
2211 			panic("uvm_map_pageable_wire: stale map");
2212 #endif
2213 
2214 		/*
2215 		 * first is no longer needed to restart loops.
2216 		 * Use it as iterator to unmap successful mappings.
2217 		 */
2218 		for (; first != iter;
2219 		    first = RBT_NEXT(uvm_map_addr, first)) {
2220 			if (UVM_ET_ISHOLE(first) ||
2221 			    first->start == first->end ||
2222 			    first->protection == PROT_NONE)
2223 				continue;
2224 
2225 			first->wired_count--;
2226 			if (!VM_MAPENT_ISWIRED(first)) {
2227 				uvm_fault_unwire_locked(map,
2228 				    first->start, first->end);
2229 			}
2230 		}
2231 
2232 		/* decrease counter in the rest of the entries */
2233 		for (; iter != end;
2234 		    iter = RBT_NEXT(uvm_map_addr, iter)) {
2235 			if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2236 			    iter->protection == PROT_NONE)
2237 				continue;
2238 
2239 			iter->wired_count--;
2240 		}
2241 
2242 		if ((lockflags & UVM_LK_EXIT) == 0)
2243 			vm_map_unlock(map);
2244 		return error;
2245 	}
2246 
2247 	/* We are currently holding a read lock. */
2248 	if ((lockflags & UVM_LK_EXIT) == 0) {
2249 		vm_map_unbusy(map);
2250 		vm_map_unlock_read(map);
2251 	} else {
2252 		vm_map_upgrade(map);
2253 		vm_map_unbusy(map);
2254 #ifdef DIAGNOSTIC
2255 		if (timestamp_save != map->timestamp)
2256 			panic("uvm_map_pageable_wire: stale map");
2257 #endif
2258 	}
2259 	return 0;
2260 }
2261 
2262 /*
2263  * uvm_map_pageable: set pageability of a range in a map.
2264  *
2265  * Flags:
2266  * UVM_LK_ENTER: map is already locked by caller
2267  * UVM_LK_EXIT:  don't unlock map on exit
2268  *
2269  * The full range must be in use (entries may not have fspace != 0).
2270  * UVM_ET_HOLE counts as unmapped.
2271  */
2272 int
2273 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
2274     boolean_t new_pageable, int lockflags)
2275 {
2276 	struct vm_map_entry *first, *last, *tmp;
2277 	int error;
2278 
2279 	start = trunc_page(start);
2280 	end = round_page(end);
2281 
2282 	if (start > end)
2283 		return EINVAL;
2284 	if (start == end)
2285 		return 0;	/* nothing to do */
2286 	if (start < map->min_offset)
2287 		return EFAULT; /* why? see first XXX below */
2288 	if (end > map->max_offset)
2289 		return EINVAL; /* why? see second XXX below */
2290 
2291 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2292 	if ((lockflags & UVM_LK_ENTER) == 0)
2293 		vm_map_lock(map);
2294 
2295 	/*
2296 	 * Find first entry.
2297 	 *
2298 	 * Initial test on start is different, because of the different
2299 	 * error returned. Rest is tested further down.
2300 	 */
2301 	first = uvm_map_entrybyaddr(&map->addr, start);
2302 	if (first->end <= start || UVM_ET_ISHOLE(first)) {
2303 		/*
2304 		 * XXX if the first address is not mapped, it is EFAULT?
2305 		 */
2306 		error = EFAULT;
2307 		goto out;
2308 	}
2309 
2310 	/* Check that the range has no holes. */
2311 	for (last = first; last != NULL && last->start < end;
2312 	    last = RBT_NEXT(uvm_map_addr, last)) {
2313 		if (UVM_ET_ISHOLE(last) ||
2314 		    (last->end < end && VMMAP_FREE_END(last) != last->end)) {
2315 			/*
2316 			 * XXX unmapped memory in range, why is it EINVAL
2317 			 * instead of EFAULT?
2318 			 */
2319 			error = EINVAL;
2320 			goto out;
2321 		}
2322 	}
2323 
2324 	/*
2325 	 * Last ended at the first entry after the range.
2326 	 * Move back one step.
2327 	 *
2328 	 * Note that last may be NULL.
2329 	 */
2330 	if (last == NULL) {
2331 		last = RBT_MAX(uvm_map_addr, &map->addr);
2332 		if (last->end < end) {
2333 			error = EINVAL;
2334 			goto out;
2335 		}
2336 	} else {
2337 		KASSERT(last != first);
2338 		last = RBT_PREV(uvm_map_addr, last);
2339 	}
2340 
2341 	/* Wire/unwire pages here. */
2342 	if (new_pageable) {
2343 		/*
2344 		 * Mark pageable.
2345 		 * entries that are not wired are untouched.
2346 		 */
2347 		if (VM_MAPENT_ISWIRED(first))
2348 			UVM_MAP_CLIP_START(map, first, start);
2349 		/*
2350 		 * Split last at end.
2351 		 * Make tmp be the first entry after what is to be touched.
2352 		 * If last is not wired, don't touch it.
2353 		 */
2354 		if (VM_MAPENT_ISWIRED(last)) {
2355 			UVM_MAP_CLIP_END(map, last, end);
2356 			tmp = RBT_NEXT(uvm_map_addr, last);
2357 		} else
2358 			tmp = last;
2359 
2360 		uvm_map_pageable_pgon(map, first, tmp, start, end);
2361 		error = 0;
2362 
2363 out:
2364 		if ((lockflags & UVM_LK_EXIT) == 0)
2365 			vm_map_unlock(map);
2366 		return error;
2367 	} else {
2368 		/*
2369 		 * Mark entries wired.
2370 		 * entries are always touched (because recovery needs this).
2371 		 */
2372 		if (!VM_MAPENT_ISWIRED(first))
2373 			UVM_MAP_CLIP_START(map, first, start);
2374 		/*
2375 		 * Split last at end.
2376 		 * Make tmp be the first entry after what is to be touched.
2377 		 * If last is not wired, don't touch it.
2378 		 */
2379 		if (!VM_MAPENT_ISWIRED(last)) {
2380 			UVM_MAP_CLIP_END(map, last, end);
2381 			tmp = RBT_NEXT(uvm_map_addr, last);
2382 		} else
2383 			tmp = last;
2384 
2385 		return uvm_map_pageable_wire(map, first, tmp, start, end,
2386 		    lockflags);
2387 	}
2388 }
2389 
2390 /*
2391  * uvm_map_pageable_all: special case of uvm_map_pageable - affects
2392  * all mapped regions.
2393  *
2394  * Map must not be locked.
2395  * If no flags are specified, all regions are unwired.
2396  */
2397 int
2398 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
2399 {
2400 	vsize_t size;
2401 	struct vm_map_entry *iter;
2402 
2403 	KASSERT(map->flags & VM_MAP_PAGEABLE);
2404 	vm_map_lock(map);
2405 
2406 	if (flags == 0) {
2407 		uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr),
2408 		    NULL, map->min_offset, map->max_offset);
2409 
2410 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
2411 		vm_map_unlock(map);
2412 		return 0;
2413 	}
2414 
2415 	if (flags & MCL_FUTURE)
2416 		vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
2417 	if (!(flags & MCL_CURRENT)) {
2418 		vm_map_unlock(map);
2419 		return 0;
2420 	}
2421 
2422 	/*
2423 	 * Count number of pages in all non-wired entries.
2424 	 * If the number exceeds the limit, abort.
2425 	 */
2426 	size = 0;
2427 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2428 		if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2429 			continue;
2430 
2431 		size += iter->end - iter->start;
2432 	}
2433 
2434 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
2435 		vm_map_unlock(map);
2436 		return ENOMEM;
2437 	}
2438 
2439 	/* XXX non-pmap_wired_count case must be handled by caller */
2440 #ifdef pmap_wired_count
2441 	if (limit != 0 &&
2442 	    size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) {
2443 		vm_map_unlock(map);
2444 		return ENOMEM;
2445 	}
2446 #endif
2447 
2448 	/*
2449 	 * uvm_map_pageable_wire will release lock
2450 	 */
2451 	return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr),
2452 	    NULL, map->min_offset, map->max_offset, 0);
2453 }
2454 
2455 /*
2456  * Initialize map.
2457  *
2458  * Allocates sufficient entries to describe the free memory in the map.
2459  */
2460 void
2461 uvm_map_setup(struct vm_map *map, pmap_t pmap, vaddr_t min, vaddr_t max,
2462     int flags)
2463 {
2464 	int i;
2465 
2466 	KASSERT((min & (vaddr_t)PAGE_MASK) == 0);
2467 	KASSERT((max & (vaddr_t)PAGE_MASK) == 0 ||
2468 	    (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
2469 
2470 	/*
2471 	 * Update parameters.
2472 	 *
2473 	 * This code handles (vaddr_t)-1 and other page mask ending addresses
2474 	 * properly.
2475 	 * We lose the top page if the full virtual address space is used.
2476 	 */
2477 	if (max & (vaddr_t)PAGE_MASK) {
2478 		max += 1;
2479 		if (max == 0) /* overflow */
2480 			max -= PAGE_SIZE;
2481 	}
2482 
2483 	RBT_INIT(uvm_map_addr, &map->addr);
2484 	map->uaddr_exe = NULL;
2485 	for (i = 0; i < nitems(map->uaddr_any); ++i)
2486 		map->uaddr_any[i] = NULL;
2487 	map->uaddr_brk_stack = NULL;
2488 
2489 	map->pmap = pmap;
2490 	map->size = 0;
2491 	map->ref_count = 0;
2492 	map->min_offset = min;
2493 	map->max_offset = max;
2494 	map->b_start = map->b_end = 0; /* Empty brk() area by default. */
2495 	map->s_start = map->s_end = 0; /* Empty stack area by default. */
2496 	map->flags = flags;
2497 	map->timestamp = 0;
2498 	if (flags & VM_MAP_ISVMSPACE)
2499 		rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK);
2500 	else
2501 		rw_init(&map->lock, "kmmaplk");
2502 	mtx_init(&map->mtx, IPL_VM);
2503 	mtx_init(&map->flags_lock, IPL_VM);
2504 
2505 	/* Configure the allocators. */
2506 	if (flags & VM_MAP_ISVMSPACE)
2507 		uvm_map_setup_md(map);
2508 	else
2509 		map->uaddr_any[3] = &uaddr_kbootstrap;
2510 
2511 	/*
2512 	 * Fill map entries.
2513 	 * We do not need to write-lock the map here because only the current
2514 	 * thread sees it right now. Initialize ref_count to 0 above to avoid
2515 	 * bogus triggering of lock-not-held assertions.
2516 	 */
2517 	uvm_map_setup_entries(map);
2518 	uvm_tree_sanity(map, __FILE__, __LINE__);
2519 	map->ref_count = 1;
2520 }
2521 
2522 /*
2523  * Destroy the map.
2524  *
2525  * This is the inverse operation to uvm_map_setup.
2526  */
2527 void
2528 uvm_map_teardown(struct vm_map *map)
2529 {
2530 	struct uvm_map_deadq	 dead_entries;
2531 	struct vm_map_entry	*entry, *tmp;
2532 #ifdef VMMAP_DEBUG
2533 	size_t			 numq, numt;
2534 #endif
2535 	int			 i;
2536 
2537 	KERNEL_ASSERT_LOCKED();
2538 	KERNEL_UNLOCK();
2539 	KERNEL_ASSERT_UNLOCKED();
2540 
2541 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
2542 
2543 	vm_map_lock(map);
2544 
2545 	/* Remove address selectors. */
2546 	uvm_addr_destroy(map->uaddr_exe);
2547 	map->uaddr_exe = NULL;
2548 	for (i = 0; i < nitems(map->uaddr_any); i++) {
2549 		uvm_addr_destroy(map->uaddr_any[i]);
2550 		map->uaddr_any[i] = NULL;
2551 	}
2552 	uvm_addr_destroy(map->uaddr_brk_stack);
2553 	map->uaddr_brk_stack = NULL;
2554 
2555 	/*
2556 	 * Remove entries.
2557 	 *
2558 	 * The following is based on graph breadth-first search.
2559 	 *
2560 	 * In color terms:
2561 	 * - the dead_entries set contains all nodes that are reachable
2562 	 *   (i.e. both the black and the grey nodes)
2563 	 * - any entry not in dead_entries is white
2564 	 * - any entry that appears in dead_entries before entry,
2565 	 *   is black, the rest is grey.
2566 	 * The set [entry, end] is also referred to as the wavefront.
2567 	 *
2568 	 * Since the tree is always a fully connected graph, the breadth-first
2569 	 * search guarantees that each vmmap_entry is visited exactly once.
2570 	 * The vm_map is broken down in linear time.
2571 	 */
2572 	TAILQ_INIT(&dead_entries);
2573 	if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL)
2574 		DEAD_ENTRY_PUSH(&dead_entries, entry);
2575 	while (entry != NULL) {
2576 		sched_pause(yield);
2577 		uvm_unmap_kill_entry(map, entry);
2578 		if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL)
2579 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2580 		if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
2581 			DEAD_ENTRY_PUSH(&dead_entries, tmp);
2582 		/* Update wave-front. */
2583 		entry = TAILQ_NEXT(entry, dfree.deadq);
2584 	}
2585 
2586 	vm_map_unlock(map);
2587 
2588 #ifdef VMMAP_DEBUG
2589 	numt = numq = 0;
2590 	RBT_FOREACH(entry, uvm_map_addr, &map->addr)
2591 		numt++;
2592 	TAILQ_FOREACH(entry, &dead_entries, dfree.deadq)
2593 		numq++;
2594 	KASSERT(numt == numq);
2595 #endif
2596 	uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK);
2597 
2598 	KERNEL_LOCK();
2599 
2600 	pmap_destroy(map->pmap);
2601 	map->pmap = NULL;
2602 }
2603 
2604 /*
2605  * Populate map with free-memory entries.
2606  *
2607  * Map must be initialized and empty.
2608  */
2609 void
2610 uvm_map_setup_entries(struct vm_map *map)
2611 {
2612 	KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
2613 
2614 	uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0);
2615 }
2616 
2617 /*
2618  * Split entry at given address.
2619  *
2620  * orig:  entry that is to be split.
2621  * next:  a newly allocated map entry that is not linked.
2622  * split: address at which the split is done.
2623  */
2624 void
2625 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,
2626     struct vm_map_entry *next, vaddr_t split)
2627 {
2628 	struct uvm_addr_state *free, *free_before;
2629 	vsize_t adj;
2630 
2631 	if ((split & PAGE_MASK) != 0) {
2632 		panic("uvm_map_splitentry: split address 0x%lx "
2633 		    "not on page boundary!", split);
2634 	}
2635 	KDASSERT(map != NULL && orig != NULL && next != NULL);
2636 	uvm_tree_sanity(map, __FILE__, __LINE__);
2637 	KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split);
2638 
2639 #ifdef VMMAP_DEBUG
2640 	KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig);
2641 	KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next);
2642 #endif /* VMMAP_DEBUG */
2643 
2644 	/*
2645 	 * Free space will change, unlink from free space tree.
2646 	 */
2647 	free = uvm_map_uaddr_e(map, orig);
2648 	uvm_mapent_free_remove(map, free, orig);
2649 
2650 	adj = split - orig->start;
2651 
2652 	uvm_mapent_copy(orig, next);
2653 	if (split >= orig->end) {
2654 		next->etype = 0;
2655 		next->offset = 0;
2656 		next->wired_count = 0;
2657 		next->start = next->end = split;
2658 		next->guard = 0;
2659 		next->fspace = VMMAP_FREE_END(orig) - split;
2660 		next->aref.ar_amap = NULL;
2661 		next->aref.ar_pageoff = 0;
2662 		orig->guard = MIN(orig->guard, split - orig->end);
2663 		orig->fspace = split - VMMAP_FREE_START(orig);
2664 	} else {
2665 		orig->fspace = 0;
2666 		orig->guard = 0;
2667 		orig->end = next->start = split;
2668 
2669 		if (next->aref.ar_amap) {
2670 			amap_splitref(&orig->aref, &next->aref, adj);
2671 		}
2672 		if (UVM_ET_ISSUBMAP(orig)) {
2673 			uvm_map_reference(next->object.sub_map);
2674 			next->offset += adj;
2675 		} else if (UVM_ET_ISOBJ(orig)) {
2676 			if (next->object.uvm_obj->pgops &&
2677 			    next->object.uvm_obj->pgops->pgo_reference) {
2678 				KERNEL_LOCK();
2679 				next->object.uvm_obj->pgops->pgo_reference(
2680 				    next->object.uvm_obj);
2681 				KERNEL_UNLOCK();
2682 			}
2683 			next->offset += adj;
2684 		}
2685 	}
2686 
2687 	/*
2688 	 * Link next into address tree.
2689 	 * Link orig and next into free-space tree.
2690 	 *
2691 	 * Don't insert 'next' into the addr tree until orig has been linked,
2692 	 * in case the free-list looks at adjacent entries in the addr tree
2693 	 * for its decisions.
2694 	 */
2695 	if (orig->fspace > 0)
2696 		free_before = free;
2697 	else
2698 		free_before = uvm_map_uaddr_e(map, orig);
2699 	uvm_mapent_free_insert(map, free_before, orig);
2700 	uvm_mapent_addr_insert(map, next);
2701 	uvm_mapent_free_insert(map, free, next);
2702 
2703 	uvm_tree_sanity(map, __FILE__, __LINE__);
2704 }
2705 
2706 
2707 #ifdef VMMAP_DEBUG
2708 
2709 void
2710 uvm_tree_assert(struct vm_map *map, int test, char *test_str,
2711     char *file, int line)
2712 {
2713 	char* map_special;
2714 
2715 	if (test)
2716 		return;
2717 
2718 	if (map == kernel_map)
2719 		map_special = " (kernel_map)";
2720 	else if (map == kmem_map)
2721 		map_special = " (kmem_map)";
2722 	else
2723 		map_special = "";
2724 	panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file,
2725 	    line, test_str);
2726 }
2727 
2728 /*
2729  * Check that map is sane.
2730  */
2731 void
2732 uvm_tree_sanity(struct vm_map *map, char *file, int line)
2733 {
2734 	struct vm_map_entry	*iter;
2735 	vaddr_t			 addr;
2736 	vaddr_t			 min, max, bound; /* Bounds checker. */
2737 	struct uvm_addr_state	*free;
2738 
2739 	addr = vm_map_min(map);
2740 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2741 		/*
2742 		 * Valid start, end.
2743 		 * Catch overflow for end+fspace.
2744 		 */
2745 		UVM_ASSERT(map, iter->end >= iter->start, file, line);
2746 		UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line);
2747 
2748 		/* May not be empty. */
2749 		UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter),
2750 		    file, line);
2751 
2752 		/* Addresses for entry must lie within map boundaries. */
2753 		UVM_ASSERT(map, iter->start >= vm_map_min(map) &&
2754 		    VMMAP_FREE_END(iter) <= vm_map_max(map), file, line);
2755 
2756 		/* Tree may not have gaps. */
2757 		UVM_ASSERT(map, iter->start == addr, file, line);
2758 		addr = VMMAP_FREE_END(iter);
2759 
2760 		/*
2761 		 * Free space may not cross boundaries, unless the same
2762 		 * free list is used on both sides of the border.
2763 		 */
2764 		min = VMMAP_FREE_START(iter);
2765 		max = VMMAP_FREE_END(iter);
2766 
2767 		while (min < max &&
2768 		    (bound = uvm_map_boundary(map, min, max)) != max) {
2769 			UVM_ASSERT(map,
2770 			    uvm_map_uaddr(map, bound - 1) ==
2771 			    uvm_map_uaddr(map, bound),
2772 			    file, line);
2773 			min = bound;
2774 		}
2775 
2776 		free = uvm_map_uaddr_e(map, iter);
2777 		if (free) {
2778 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0,
2779 			    file, line);
2780 		} else {
2781 			UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0,
2782 			    file, line);
2783 		}
2784 	}
2785 	UVM_ASSERT(map, addr == vm_map_max(map), file, line);
2786 }
2787 
2788 void
2789 uvm_tree_size_chk(struct vm_map *map, char *file, int line)
2790 {
2791 	struct vm_map_entry *iter;
2792 	vsize_t size;
2793 
2794 	size = 0;
2795 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2796 		if (!UVM_ET_ISHOLE(iter))
2797 			size += iter->end - iter->start;
2798 	}
2799 
2800 	if (map->size != size)
2801 		printf("map size = 0x%lx, should be 0x%lx\n", map->size, size);
2802 	UVM_ASSERT(map, map->size == size, file, line);
2803 
2804 	vmspace_validate(map);
2805 }
2806 
2807 /*
2808  * This function validates the statistics on vmspace.
2809  */
2810 void
2811 vmspace_validate(struct vm_map *map)
2812 {
2813 	struct vmspace *vm;
2814 	struct vm_map_entry *iter;
2815 	vaddr_t imin, imax;
2816 	vaddr_t stack_begin, stack_end; /* Position of stack. */
2817 	vsize_t stack, heap; /* Measured sizes. */
2818 
2819 	if (!(map->flags & VM_MAP_ISVMSPACE))
2820 		return;
2821 
2822 	vm = (struct vmspace *)map;
2823 	stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2824 	stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2825 
2826 	stack = heap = 0;
2827 	RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2828 		imin = imax = iter->start;
2829 
2830 		if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL ||
2831 		    iter->protection != PROT_NONE)
2832 			continue;
2833 
2834 		/*
2835 		 * Update stack, heap.
2836 		 * Keep in mind that (theoretically) the entries of
2837 		 * userspace and stack may be joined.
2838 		 */
2839 		while (imin != iter->end) {
2840 			/*
2841 			 * Set imax to the first boundary crossed between
2842 			 * imin and stack addresses.
2843 			 */
2844 			imax = iter->end;
2845 			if (imin < stack_begin && imax > stack_begin)
2846 				imax = stack_begin;
2847 			else if (imin < stack_end && imax > stack_end)
2848 				imax = stack_end;
2849 
2850 			if (imin >= stack_begin && imin < stack_end)
2851 				stack += imax - imin;
2852 			else
2853 				heap += imax - imin;
2854 			imin = imax;
2855 		}
2856 	}
2857 
2858 	heap >>= PAGE_SHIFT;
2859 	if (heap != vm->vm_dused) {
2860 		printf("vmspace stack range: 0x%lx-0x%lx\n",
2861 		    stack_begin, stack_end);
2862 		panic("vmspace_validate: vmspace.vm_dused invalid, "
2863 		    "expected %ld pgs, got %d pgs in map %p",
2864 		    heap, vm->vm_dused,
2865 		    map);
2866 	}
2867 }
2868 
2869 #endif /* VMMAP_DEBUG */
2870 
2871 /*
2872  * uvm_map_init: init mapping system at boot time.   note that we allocate
2873  * and init the static pool of structs vm_map_entry for the kernel here.
2874  */
2875 void
2876 uvm_map_init(void)
2877 {
2878 	static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
2879 	int lcv;
2880 
2881 	/* now set up static pool of kernel map entries ... */
2882 	mtx_init(&uvm_kmapent_mtx, IPL_VM);
2883 	SLIST_INIT(&uvm.kentry_free);
2884 	for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
2885 		SLIST_INSERT_HEAD(&uvm.kentry_free,
2886 		    &kernel_map_entry[lcv], daddrs.addr_kentry);
2887 	}
2888 
2889 	/* initialize the map-related pools. */
2890 	pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0,
2891 	    IPL_NONE, PR_WAITOK, "vmsppl", NULL);
2892 	pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0,
2893 	    IPL_VM, PR_WAITOK, "vmmpepl", NULL);
2894 	pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0,
2895 	    IPL_VM, 0, "vmmpekpl", NULL);
2896 	pool_sethiwat(&uvm_map_entry_pool, 8192);
2897 
2898 	uvm_addr_init();
2899 }
2900 
2901 #if defined(DDB)
2902 
2903 /*
2904  * DDB hooks
2905  */
2906 
2907 /*
2908  * uvm_map_printit: actually prints the map
2909  */
2910 void
2911 uvm_map_printit(struct vm_map *map, boolean_t full,
2912     int (*pr)(const char *, ...))
2913 {
2914 	struct vmspace			*vm;
2915 	struct vm_map_entry		*entry;
2916 	struct uvm_addr_state		*free;
2917 	int				 in_free, i;
2918 	char				 buf[8];
2919 
2920 	(*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
2921 	(*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n",
2922 	    map->b_start, map->b_end);
2923 	(*pr)("\tstack allocate range: 0x%lx-0x%lx\n",
2924 	    map->s_start, map->s_end);
2925 	(*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n",
2926 	    map->size, map->ref_count, map->timestamp,
2927 	    map->flags);
2928 	(*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
2929 	    pmap_resident_count(map->pmap));
2930 
2931 	/* struct vmspace handling. */
2932 	if (map->flags & VM_MAP_ISVMSPACE) {
2933 		vm = (struct vmspace *)map;
2934 
2935 		(*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n",
2936 		    vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss);
2937 		(*pr)("\tvm_tsize=%u vm_dsize=%u\n",
2938 		    vm->vm_tsize, vm->vm_dsize);
2939 		(*pr)("\tvm_taddr=%p vm_daddr=%p\n",
2940 		    vm->vm_taddr, vm->vm_daddr);
2941 		(*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n",
2942 		    vm->vm_maxsaddr, vm->vm_minsaddr);
2943 	}
2944 
2945 	if (!full)
2946 		goto print_uaddr;
2947 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
2948 		(*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
2949 		    entry, entry->start, entry->end, entry->object.uvm_obj,
2950 		    (long long)entry->offset, entry->aref.ar_amap,
2951 		    entry->aref.ar_pageoff);
2952 		(*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, "
2953 		    "syscall=%c, prot(max)=%d/%d, inh=%d, "
2954 		    "wc=%d, adv=%d\n",
2955 		    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
2956 		    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
2957 		    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
2958 		    (entry->etype & UVM_ET_STACK) ? 'T' : 'F',
2959 		    (entry->etype & UVM_ET_SYSCALL) ? 'T' : 'F',
2960 		    entry->protection, entry->max_protection,
2961 		    entry->inheritance, entry->wired_count, entry->advice);
2962 
2963 		free = uvm_map_uaddr_e(map, entry);
2964 		in_free = (free != NULL);
2965 		(*pr)("\thole=%c, free=%c, guard=0x%lx, "
2966 		    "free=0x%lx-0x%lx\n",
2967 		    (entry->etype & UVM_ET_HOLE) ? 'T' : 'F',
2968 		    in_free ? 'T' : 'F',
2969 		    entry->guard,
2970 		    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
2971 		(*pr)("\tfspace_augment=%lu\n", entry->fspace_augment);
2972 		(*pr)("\tfreemapped=%c, uaddr=%p\n",
2973 		    (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free);
2974 		if (free) {
2975 			(*pr)("\t\t(0x%lx-0x%lx %s)\n",
2976 			    free->uaddr_minaddr, free->uaddr_maxaddr,
2977 			    free->uaddr_functions->uaddr_name);
2978 		}
2979 	}
2980 
2981 print_uaddr:
2982 	uvm_addr_print(map->uaddr_exe, "exe", full, pr);
2983 	for (i = 0; i < nitems(map->uaddr_any); i++) {
2984 		snprintf(&buf[0], sizeof(buf), "any[%d]", i);
2985 		uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr);
2986 	}
2987 	uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr);
2988 }
2989 
2990 /*
2991  * uvm_object_printit: actually prints the object
2992  */
2993 void
2994 uvm_object_printit(struct uvm_object *uobj, boolean_t full,
2995     int (*pr)(const char *, ...))
2996 {
2997 	struct vm_page *pg;
2998 	int cnt = 0;
2999 
3000 	(*pr)("OBJECT %p: pgops=%p, npages=%d, ",
3001 	    uobj, uobj->pgops, uobj->uo_npages);
3002 	if (UVM_OBJ_IS_KERN_OBJECT(uobj))
3003 		(*pr)("refs=<SYSTEM>\n");
3004 	else
3005 		(*pr)("refs=%d\n", uobj->uo_refs);
3006 
3007 	if (!full) {
3008 		return;
3009 	}
3010 	(*pr)("  PAGES <pg,offset>:\n  ");
3011 	RBT_FOREACH(pg, uvm_objtree, &uobj->memt) {
3012 		(*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
3013 		if ((cnt % 3) == 2) {
3014 			(*pr)("\n  ");
3015 		}
3016 		cnt++;
3017 	}
3018 	if ((cnt % 3) != 2) {
3019 		(*pr)("\n");
3020 	}
3021 }
3022 
3023 /*
3024  * uvm_page_printit: actually print the page
3025  */
3026 static const char page_flagbits[] =
3027 	"\20\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
3028 	"\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ"
3029 	"\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5";
3030 
3031 void
3032 uvm_page_printit(struct vm_page *pg, boolean_t full,
3033     int (*pr)(const char *, ...))
3034 {
3035 	struct vm_page *tpg;
3036 	struct uvm_object *uobj;
3037 	struct pglist *pgl;
3038 
3039 	(*pr)("PAGE %p:\n", pg);
3040 	(*pr)("  flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
3041 	    pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
3042 	    (long long)pg->phys_addr);
3043 	(*pr)("  uobject=%p, uanon=%p, offset=0x%llx\n",
3044 	    pg->uobject, pg->uanon, (long long)pg->offset);
3045 #if defined(UVM_PAGE_TRKOWN)
3046 	if (pg->pg_flags & PG_BUSY)
3047 		(*pr)("  owning thread = %d, tag=%s",
3048 		    pg->owner, pg->owner_tag);
3049 	else
3050 		(*pr)("  page not busy, no owner");
3051 #else
3052 	(*pr)("  [page ownership tracking disabled]");
3053 #endif
3054 	(*pr)("\tvm_page_md %p\n", &pg->mdpage);
3055 
3056 	if (!full)
3057 		return;
3058 
3059 	/* cross-verify object/anon */
3060 	if ((pg->pg_flags & PQ_FREE) == 0) {
3061 		if (pg->pg_flags & PQ_ANON) {
3062 			if (pg->uanon == NULL || pg->uanon->an_page != pg)
3063 			    (*pr)("  >>> ANON DOES NOT POINT HERE <<< (%p)\n",
3064 				(pg->uanon) ? pg->uanon->an_page : NULL);
3065 			else
3066 				(*pr)("  anon backpointer is OK\n");
3067 		} else {
3068 			uobj = pg->uobject;
3069 			if (uobj) {
3070 				(*pr)("  checking object list\n");
3071 				RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) {
3072 					if (tpg == pg) {
3073 						break;
3074 					}
3075 				}
3076 				if (tpg)
3077 					(*pr)("  page found on object list\n");
3078 				else
3079 					(*pr)("  >>> PAGE NOT FOUND "
3080 					    "ON OBJECT LIST! <<<\n");
3081 			}
3082 		}
3083 	}
3084 
3085 	/* cross-verify page queue */
3086 	if (pg->pg_flags & PQ_FREE) {
3087 		if (uvm_pmr_isfree(pg))
3088 			(*pr)("  page found in uvm_pmemrange\n");
3089 		else
3090 			(*pr)("  >>> page not found in uvm_pmemrange <<<\n");
3091 		pgl = NULL;
3092 	} else if (pg->pg_flags & PQ_INACTIVE) {
3093 		pgl = &uvm.page_inactive;
3094 	} else if (pg->pg_flags & PQ_ACTIVE) {
3095 		pgl = &uvm.page_active;
3096  	} else {
3097 		pgl = NULL;
3098 	}
3099 
3100 	if (pgl) {
3101 		(*pr)("  checking pageq list\n");
3102 		TAILQ_FOREACH(tpg, pgl, pageq) {
3103 			if (tpg == pg) {
3104 				break;
3105 			}
3106 		}
3107 		if (tpg)
3108 			(*pr)("  page found on pageq list\n");
3109 		else
3110 			(*pr)("  >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
3111 	}
3112 }
3113 #endif
3114 
3115 /*
3116  * uvm_map_protect: change map protection
3117  *
3118  * => set_max means set max_protection.
3119  * => map must be unlocked.
3120  */
3121 int
3122 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
3123     vm_prot_t new_prot, int etype, boolean_t set_max, boolean_t checkimmutable)
3124 {
3125 	struct vm_map_entry *first, *iter;
3126 	vm_prot_t old_prot;
3127 	vm_prot_t mask;
3128 	vsize_t dused;
3129 	int error;
3130 
3131 	KASSERT((etype & ~UVM_ET_STACK) == 0);	/* only UVM_ET_STACK allowed */
3132 
3133 	if (start > end)
3134 		return EINVAL;
3135 	start = MAX(start, map->min_offset);
3136 	end = MIN(end, map->max_offset);
3137 	if (start >= end)
3138 		return 0;
3139 
3140 	dused = 0;
3141 	error = 0;
3142 	vm_map_lock(map);
3143 
3144 	/*
3145 	 * Set up first and last.
3146 	 * - first will contain first entry at or after start.
3147 	 */
3148 	first = uvm_map_entrybyaddr(&map->addr, start);
3149 	KDASSERT(first != NULL);
3150 	if (first->end <= start)
3151 		first = RBT_NEXT(uvm_map_addr, first);
3152 
3153 	/* First, check for protection violations. */
3154 	for (iter = first; iter != NULL && iter->start < end;
3155 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
3156 		/* Treat memory holes as free space. */
3157 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3158 			continue;
3159 
3160 		if (checkimmutable &&
3161 		    (iter->etype & UVM_ET_IMMUTABLE)) {
3162 			if (iter->protection == (PROT_READ | PROT_WRITE) &&
3163 			    new_prot == PROT_READ) {
3164 				/* Permit RW to R as a data-locking mechanism */
3165 				;
3166 			} else {
3167 				error = EPERM;
3168 				goto out;
3169 			}
3170 		}
3171 		old_prot = iter->protection;
3172 		if (old_prot == PROT_NONE && new_prot != old_prot) {
3173 			dused += uvmspace_dused(
3174 			    map, MAX(start, iter->start), MIN(end, iter->end));
3175 		}
3176 
3177 		if (UVM_ET_ISSUBMAP(iter)) {
3178 			error = EINVAL;
3179 			goto out;
3180 		}
3181 		if ((new_prot & iter->max_protection) != new_prot) {
3182 			error = EACCES;
3183 			goto out;
3184 		}
3185 		if (map == kernel_map &&
3186 		    (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
3187 			panic("uvm_map_protect: kernel map W^X violation requested");
3188 	}
3189 
3190 	/* Check limits. */
3191 	if (dused > 0 && (map->flags & VM_MAP_ISVMSPACE)) {
3192 		vsize_t limit = lim_cur(RLIMIT_DATA);
3193 		dused = ptoa(dused);
3194 		if (limit < dused ||
3195 		    limit - dused < ptoa(((struct vmspace *)map)->vm_dused)) {
3196 			error = ENOMEM;
3197 			goto out;
3198 		}
3199 	}
3200 
3201 	/* only apply UVM_ET_STACK on a mapping changing to RW */
3202 	if (etype && new_prot != (PROT_READ|PROT_WRITE))
3203 		etype = 0;
3204 
3205 	/* Fix protections.  */
3206 	for (iter = first; iter != NULL && iter->start < end;
3207 	    iter = RBT_NEXT(uvm_map_addr, iter)) {
3208 		/* Treat memory holes as free space. */
3209 		if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3210 			continue;
3211 
3212 		old_prot = iter->protection;
3213 
3214 		/*
3215 		 * Skip adapting protection iff old and new protection
3216 		 * are equal.
3217 		 */
3218 		if (set_max) {
3219 			if (old_prot == (new_prot & old_prot) &&
3220 			    iter->max_protection == new_prot)
3221 				continue;
3222 		} else {
3223 			if (old_prot == new_prot)
3224 				continue;
3225 		}
3226 
3227 		UVM_MAP_CLIP_START(map, iter, start);
3228 		UVM_MAP_CLIP_END(map, iter, end);
3229 
3230 		if (set_max) {
3231 			iter->max_protection = new_prot;
3232 			iter->protection &= new_prot;
3233 		} else
3234 			iter->protection = new_prot;
3235 		iter->etype |= etype;	/* potentially add UVM_ET_STACK */
3236 
3237 		/*
3238 		 * update physical map if necessary.  worry about copy-on-write
3239 		 * here -- CHECK THIS XXX
3240 		 */
3241 		if (iter->protection != old_prot) {
3242 			mask = UVM_ET_ISCOPYONWRITE(iter) ?
3243 			    ~PROT_WRITE : PROT_MASK;
3244 
3245 			/* XXX should only wserial++ if no split occurs */
3246 			if (iter->protection & PROT_WRITE)
3247 				map->wserial++;
3248 
3249 			if (map->flags & VM_MAP_ISVMSPACE) {
3250 				if (old_prot == PROT_NONE) {
3251 					((struct vmspace *)map)->vm_dused +=
3252 					    uvmspace_dused(map, iter->start,
3253 					        iter->end);
3254 				}
3255 				if (iter->protection == PROT_NONE) {
3256 					((struct vmspace *)map)->vm_dused -=
3257 					    uvmspace_dused(map, iter->start,
3258 					        iter->end);
3259 				}
3260 			}
3261 
3262 			/* update pmap */
3263 			if ((iter->protection & mask) == PROT_NONE &&
3264 			    VM_MAPENT_ISWIRED(iter)) {
3265 				/*
3266 				 * TODO(ariane) this is stupid. wired_count
3267 				 * is 0 if not wired, otherwise anything
3268 				 * larger than 0 (incremented once each time
3269 				 * wire is called).
3270 				 * Mostly to be able to undo the damage on
3271 				 * failure. Not the actually be a wired
3272 				 * refcounter...
3273 				 * Originally: iter->wired_count--;
3274 				 * (don't we have to unwire this in the pmap
3275 				 * as well?)
3276 				 */
3277 				iter->wired_count = 0;
3278 			}
3279 			uvm_map_lock_entry(iter);
3280 			pmap_protect(map->pmap, iter->start, iter->end,
3281 			    iter->protection & mask);
3282 			uvm_map_unlock_entry(iter);
3283 		}
3284 
3285 		/*
3286 		 * If the map is configured to lock any future mappings,
3287 		 * wire this entry now if the old protection was PROT_NONE
3288 		 * and the new protection is not PROT_NONE.
3289 		 */
3290 		if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
3291 		    VM_MAPENT_ISWIRED(iter) == 0 &&
3292 		    old_prot == PROT_NONE &&
3293 		    new_prot != PROT_NONE) {
3294 			if (uvm_map_pageable(map, iter->start, iter->end,
3295 			    FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) {
3296 				/*
3297 				 * If locking the entry fails, remember the
3298 				 * error if it's the first one.  Note we
3299 				 * still continue setting the protection in
3300 				 * the map, but it will return the resource
3301 				 * storage condition regardless.
3302 				 *
3303 				 * XXX Ignore what the actual error is,
3304 				 * XXX just call it a resource shortage
3305 				 * XXX so that it doesn't get confused
3306 				 * XXX what uvm_map_protect() itself would
3307 				 * XXX normally return.
3308 				 */
3309 				error = ENOMEM;
3310 			}
3311 		}
3312 	}
3313 	pmap_update(map->pmap);
3314 
3315 out:
3316 	if (etype & UVM_ET_STACK)
3317 		map->sserial++;
3318 	vm_map_unlock(map);
3319 	return error;
3320 }
3321 
3322 /*
3323  * uvmspace_alloc: allocate a vmspace structure.
3324  *
3325  * - structure includes vm_map and pmap
3326  * - XXX: no locking on this structure
3327  * - refcnt set to 1, rest must be init'd by caller
3328  */
3329 struct vmspace *
3330 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
3331     boolean_t remove_holes)
3332 {
3333 	struct vmspace *vm;
3334 
3335 	vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
3336 	uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
3337 	return (vm);
3338 }
3339 
3340 /*
3341  * uvmspace_init: initialize a vmspace structure.
3342  *
3343  * - XXX: no locking on this structure
3344  * - refcnt set to 1, rest must be init'd by caller
3345  */
3346 void
3347 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
3348     boolean_t pageable, boolean_t remove_holes)
3349 {
3350 	KASSERT(pmap == NULL || pmap == pmap_kernel());
3351 
3352 	if (pmap)
3353 		pmap_reference(pmap);
3354 	else
3355 		pmap = pmap_create();
3356 
3357 	uvm_map_setup(&vm->vm_map, pmap, min, max,
3358 	    (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE);
3359 
3360 	vm->vm_refcnt = 1;
3361 
3362 	if (remove_holes)
3363 		pmap_remove_holes(vm);
3364 }
3365 
3366 /*
3367  * uvmspace_share: share a vmspace between two processes
3368  *
3369  * - used for vfork
3370  */
3371 
3372 struct vmspace *
3373 uvmspace_share(struct process *pr)
3374 {
3375 	struct vmspace *vm = pr->ps_vmspace;
3376 
3377 	uvmspace_addref(vm);
3378 	return vm;
3379 }
3380 
3381 /*
3382  * uvmspace_exec: the process wants to exec a new program
3383  *
3384  * - XXX: no locking on vmspace
3385  */
3386 
3387 void
3388 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
3389 {
3390 	struct process *pr = p->p_p;
3391 	struct vmspace *nvm, *ovm = pr->ps_vmspace;
3392 	struct vm_map *map = &ovm->vm_map;
3393 	struct uvm_map_deadq dead_entries;
3394 
3395 	KASSERT((start & (vaddr_t)PAGE_MASK) == 0);
3396 	KASSERT((end & (vaddr_t)PAGE_MASK) == 0 ||
3397 	    (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
3398 
3399 	pmap_unuse_final(p);   /* before stack addresses go away */
3400 	TAILQ_INIT(&dead_entries);
3401 
3402 	/* see if more than one process is using this vmspace...  */
3403 	if (ovm->vm_refcnt == 1) {
3404 		/*
3405 		 * If pr is the only process using its vmspace then
3406 		 * we can safely recycle that vmspace for the program
3407 		 * that is being exec'd.
3408 		 */
3409 
3410 #ifdef SYSVSHM
3411 		/*
3412 		 * SYSV SHM semantics require us to kill all segments on an exec
3413 		 */
3414 		if (ovm->vm_shm)
3415 			shmexit(ovm);
3416 #endif
3417 
3418 		/*
3419 		 * POSIX 1003.1b -- "lock future mappings" is revoked
3420 		 * when a process execs another program image.
3421 		 */
3422 		vm_map_lock(map);
3423 		vm_map_modflags(map, 0, VM_MAP_WIREFUTURE|VM_MAP_SYSCALL_ONCE);
3424 
3425 		/*
3426 		 * now unmap the old program
3427 		 *
3428 		 * Instead of attempting to keep the map valid, we simply
3429 		 * nuke all entries and ask uvm_map_setup to reinitialize
3430 		 * the map to the new boundaries.
3431 		 *
3432 		 * uvm_unmap_remove will actually nuke all entries for us
3433 		 * (as in, not replace them with free-memory entries).
3434 		 */
3435 		uvm_unmap_remove(map, map->min_offset, map->max_offset,
3436 		    &dead_entries, TRUE, FALSE, FALSE);
3437 
3438 		KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
3439 
3440 		/* Nuke statistics and boundaries. */
3441 		memset(&ovm->vm_startcopy, 0,
3442 		    (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy);
3443 
3444 
3445 		if (end & (vaddr_t)PAGE_MASK) {
3446 			end += 1;
3447 			if (end == 0) /* overflow */
3448 				end -= PAGE_SIZE;
3449 		}
3450 
3451 		/* Setup new boundaries and populate map with entries. */
3452 		map->min_offset = start;
3453 		map->max_offset = end;
3454 		uvm_map_setup_entries(map);
3455 		vm_map_unlock(map);
3456 
3457 		/* but keep MMU holes unavailable */
3458 		pmap_remove_holes(ovm);
3459 	} else {
3460 		/*
3461 		 * pr's vmspace is being shared, so we can't reuse
3462 		 * it for pr since it is still being used for others.
3463 		 * allocate a new vmspace for pr
3464 		 */
3465 		nvm = uvmspace_alloc(start, end,
3466 		    (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
3467 
3468 		/* install new vmspace and drop our ref to the old one. */
3469 		pmap_deactivate(p);
3470 		p->p_vmspace = pr->ps_vmspace = nvm;
3471 		pmap_activate(p);
3472 
3473 		uvmspace_free(ovm);
3474 	}
3475 #ifdef PMAP_CHECK_COPYIN
3476 	p->p_vmspace->vm_map.check_copyin_count = 0;	/* disable checks */
3477 #endif
3478 
3479 	/* Release dead entries */
3480 	uvm_unmap_detach(&dead_entries, 0);
3481 }
3482 
3483 /*
3484  * uvmspace_addref: add a reference to a vmspace.
3485  */
3486 void
3487 uvmspace_addref(struct vmspace *vm)
3488 {
3489 	KERNEL_ASSERT_LOCKED();
3490 	KASSERT(vm->vm_refcnt > 0);
3491 
3492 	vm->vm_refcnt++;
3493 }
3494 
3495 /*
3496  * uvmspace_free: free a vmspace data structure
3497  */
3498 void
3499 uvmspace_free(struct vmspace *vm)
3500 {
3501 	KERNEL_ASSERT_LOCKED();
3502 
3503 	if (--vm->vm_refcnt == 0) {
3504 		/*
3505 		 * lock the map, to wait out all other references to it.  delete
3506 		 * all of the mappings and pages they hold, then call the pmap
3507 		 * module to reclaim anything left.
3508 		 */
3509 #ifdef SYSVSHM
3510 		/* Get rid of any SYSV shared memory segments. */
3511 		if (vm->vm_shm != NULL)
3512 			shmexit(vm);
3513 #endif
3514 
3515 		uvm_map_teardown(&vm->vm_map);
3516 		pool_put(&uvm_vmspace_pool, vm);
3517 	}
3518 }
3519 
3520 /*
3521  * uvm_share: Map the address range [srcaddr, srcaddr + sz) in
3522  * srcmap to the address range [dstaddr, dstaddr + sz) in
3523  * dstmap.
3524  *
3525  * The whole address range in srcmap must be backed by an object
3526  * (no holes).
3527  *
3528  * If successful, the address ranges share memory and the destination
3529  * address range uses the protection flags in prot.
3530  *
3531  * This routine assumes that sz is a multiple of PAGE_SIZE and
3532  * that dstaddr and srcaddr are page-aligned.
3533  */
3534 int
3535 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot,
3536     struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz)
3537 {
3538 	int ret = 0;
3539 	vaddr_t unmap_end;
3540 	vaddr_t dstva;
3541 	vsize_t s_off, len, n = sz, remain;
3542 	struct vm_map_entry *first = NULL, *last = NULL;
3543 	struct vm_map_entry *src_entry, *psrc_entry = NULL;
3544 	struct uvm_map_deadq dead;
3545 
3546 	if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr)
3547 		return EINVAL;
3548 
3549 	TAILQ_INIT(&dead);
3550 	vm_map_lock(dstmap);
3551 	vm_map_lock_read(srcmap);
3552 
3553 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) {
3554 		ret = ENOMEM;
3555 		goto exit_unlock;
3556 	}
3557 	if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) {
3558 		ret = EINVAL;
3559 		goto exit_unlock;
3560 	}
3561 
3562 	dstva = dstaddr;
3563 	unmap_end = dstaddr;
3564 	for (; src_entry != NULL;
3565 	    psrc_entry = src_entry,
3566 	    src_entry = RBT_NEXT(uvm_map_addr, src_entry)) {
3567 		/* hole in address space, bail out */
3568 		if (psrc_entry != NULL && psrc_entry->end != src_entry->start)
3569 			break;
3570 		if (src_entry->start >= srcaddr + sz)
3571 			break;
3572 
3573 		if (UVM_ET_ISSUBMAP(src_entry))
3574 			panic("uvm_share: encountered a submap (illegal)");
3575 		if (!UVM_ET_ISCOPYONWRITE(src_entry) &&
3576 		    UVM_ET_ISNEEDSCOPY(src_entry))
3577 			panic("uvm_share: non-copy_on_write map entries "
3578 			    "marked needs_copy (illegal)");
3579 
3580 		/*
3581 		 * srcaddr > map entry start? means we are in the middle of a
3582 		 * map, so we calculate the offset to use in the source map.
3583 		 */
3584 		if (srcaddr > src_entry->start)
3585 			s_off = srcaddr - src_entry->start;
3586 		else if (srcaddr == src_entry->start)
3587 			s_off = 0;
3588 		else
3589 			panic("uvm_share: map entry start > srcaddr");
3590 
3591 		remain = src_entry->end - src_entry->start - s_off;
3592 
3593 		/* Determine how many bytes to share in this pass */
3594 		if (n < remain)
3595 			len = n;
3596 		else
3597 			len = remain;
3598 
3599 		if (uvm_mapent_share(dstmap, dstva, len, s_off, prot, prot,
3600 		    srcmap, src_entry, &dead) == NULL)
3601 			break;
3602 
3603 		n -= len;
3604 		dstva += len;
3605 		srcaddr += len;
3606 		unmap_end = dstva + len;
3607 		if (n == 0)
3608 			goto exit_unlock;
3609 	}
3610 
3611 	ret = EINVAL;
3612 	uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE, FALSE);
3613 
3614 exit_unlock:
3615 	vm_map_unlock_read(srcmap);
3616 	vm_map_unlock(dstmap);
3617 	uvm_unmap_detach(&dead, 0);
3618 
3619 	return ret;
3620 }
3621 
3622 /*
3623  * Clone map entry into other map.
3624  *
3625  * Mapping will be placed at dstaddr, for the same length.
3626  * Space must be available.
3627  * Reference counters are incremented.
3628  */
3629 struct vm_map_entry *
3630 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3631     vsize_t off, vm_prot_t prot, vm_prot_t maxprot,
3632     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
3633     int mapent_flags, int amap_share_flags)
3634 {
3635 	struct vm_map_entry *new_entry, *first, *last;
3636 
3637 	KDASSERT(!UVM_ET_ISSUBMAP(old_entry));
3638 
3639 	/* Create new entry (linked in on creation). Fill in first, last. */
3640 	first = last = NULL;
3641 	if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) {
3642 		panic("uvm_mapent_clone: no space in map for "
3643 		    "entry in empty map");
3644 	}
3645 	new_entry = uvm_map_mkentry(dstmap, first, last,
3646 	    dstaddr, dstlen, mapent_flags, dead, NULL);
3647 	if (new_entry == NULL)
3648 		return NULL;
3649 	/* old_entry -> new_entry */
3650 	new_entry->object = old_entry->object;
3651 	new_entry->offset = old_entry->offset;
3652 	new_entry->aref = old_entry->aref;
3653 	new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
3654 	new_entry->protection = prot;
3655 	new_entry->max_protection = maxprot;
3656 	new_entry->inheritance = old_entry->inheritance;
3657 	new_entry->advice = old_entry->advice;
3658 
3659 	/* gain reference to object backing the map (can't be a submap). */
3660 	if (new_entry->aref.ar_amap) {
3661 		new_entry->aref.ar_pageoff += off >> PAGE_SHIFT;
3662 		amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3663 		    (new_entry->end - new_entry->start) >> PAGE_SHIFT,
3664 		    amap_share_flags);
3665 	}
3666 
3667 	if (UVM_ET_ISOBJ(new_entry) &&
3668 	    new_entry->object.uvm_obj->pgops->pgo_reference) {
3669 		new_entry->offset += off;
3670 		new_entry->object.uvm_obj->pgops->pgo_reference
3671 		    (new_entry->object.uvm_obj);
3672 	}
3673 
3674 	return new_entry;
3675 }
3676 
3677 struct vm_map_entry *
3678 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3679     vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map,
3680     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3681 {
3682 	/*
3683 	 * If old_entry refers to a copy-on-write region that has not yet been
3684 	 * written to (needs_copy flag is set), then we need to allocate a new
3685 	 * amap for old_entry.
3686 	 *
3687 	 * If we do not do this, and the process owning old_entry does a copy-on
3688 	 * write later, old_entry and new_entry will refer to different memory
3689 	 * regions, and the memory between the processes is no longer shared.
3690 	 *
3691 	 * [in other words, we need to clear needs_copy]
3692 	 */
3693 
3694 	if (UVM_ET_ISNEEDSCOPY(old_entry)) {
3695 		/* get our own amap, clears needs_copy */
3696 		amap_copy(old_map, old_entry, M_WAITOK, FALSE, 0, 0);
3697 		/* XXXCDC: WAITOK??? */
3698 	}
3699 
3700 	return uvm_mapent_clone(dstmap, dstaddr, dstlen, off,
3701 	    prot, maxprot, old_entry, dead, 0, AMAP_SHARED);
3702 }
3703 
3704 /*
3705  * share the mapping: this means we want the old and
3706  * new entries to share amaps and backing objects.
3707  */
3708 struct vm_map_entry *
3709 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
3710     struct vm_map *old_map,
3711     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3712 {
3713 	struct vm_map_entry *new_entry;
3714 
3715 	new_entry = uvm_mapent_share(new_map, old_entry->start,
3716 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3717 	    old_entry->max_protection, old_map, old_entry, dead);
3718 
3719 	return (new_entry);
3720 }
3721 
3722 /*
3723  * copy-on-write the mapping (using mmap's
3724  * MAP_PRIVATE semantics)
3725  *
3726  * allocate new_entry, adjust reference counts.
3727  * (note that new references are read-only).
3728  */
3729 struct vm_map_entry *
3730 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map,
3731     struct vm_map *old_map,
3732     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3733 {
3734 	struct vm_map_entry	*new_entry;
3735 	boolean_t		 protect_child;
3736 
3737 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3738 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3739 	    old_entry->max_protection, old_entry, dead, 0, 0);
3740 
3741 	new_entry->etype |=
3742 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3743 
3744 	/*
3745 	 * the new entry will need an amap.  it will either
3746 	 * need to be copied from the old entry or created
3747 	 * from scratch (if the old entry does not have an
3748 	 * amap).  can we defer this process until later
3749 	 * (by setting "needs_copy") or do we need to copy
3750 	 * the amap now?
3751 	 *
3752 	 * we must copy the amap now if any of the following
3753 	 * conditions hold:
3754 	 * 1. the old entry has an amap and that amap is
3755 	 *    being shared.  this means that the old (parent)
3756 	 *    process is sharing the amap with another
3757 	 *    process.  if we do not clear needs_copy here
3758 	 *    we will end up in a situation where both the
3759 	 *    parent and child process are referring to the
3760 	 *    same amap with "needs_copy" set.  if the
3761 	 *    parent write-faults, the fault routine will
3762 	 *    clear "needs_copy" in the parent by allocating
3763 	 *    a new amap.   this is wrong because the
3764 	 *    parent is supposed to be sharing the old amap
3765 	 *    and the new amap will break that.
3766 	 *
3767 	 * 2. if the old entry has an amap and a non-zero
3768 	 *    wire count then we are going to have to call
3769 	 *    amap_cow_now to avoid page faults in the
3770 	 *    parent process.   since amap_cow_now requires
3771 	 *    "needs_copy" to be clear we might as well
3772 	 *    clear it here as well.
3773 	 *
3774 	 */
3775 	if (old_entry->aref.ar_amap != NULL &&
3776 	    ((amap_flags(old_entry->aref.ar_amap) &
3777 	    AMAP_SHARED) != 0 ||
3778 	    VM_MAPENT_ISWIRED(old_entry))) {
3779 		amap_copy(new_map, new_entry, M_WAITOK, FALSE,
3780 		    0, 0);
3781 		/* XXXCDC: M_WAITOK ... ok? */
3782 	}
3783 
3784 	/*
3785 	 * if the parent's entry is wired down, then the
3786 	 * parent process does not want page faults on
3787 	 * access to that memory.  this means that we
3788 	 * cannot do copy-on-write because we can't write
3789 	 * protect the old entry.   in this case we
3790 	 * resolve all copy-on-write faults now, using
3791 	 * amap_cow_now.   note that we have already
3792 	 * allocated any needed amap (above).
3793 	 */
3794 	if (VM_MAPENT_ISWIRED(old_entry)) {
3795 		/*
3796 		 * resolve all copy-on-write faults now
3797 		 * (note that there is nothing to do if
3798 		 * the old mapping does not have an amap).
3799 		 */
3800 		if (old_entry->aref.ar_amap)
3801 			amap_cow_now(new_map, new_entry);
3802 	} else {
3803 		if (old_entry->aref.ar_amap) {
3804 			/*
3805 			 * setup mappings to trigger copy-on-write faults
3806 			 * we must write-protect the parent if it has
3807 			 * an amap and it is not already "needs_copy"...
3808 			 * if it is already "needs_copy" then the parent
3809 			 * has already been write-protected by a previous
3810 			 * fork operation.
3811 			 *
3812 			 * if we do not write-protect the parent, then
3813 			 * we must be sure to write-protect the child.
3814 			 */
3815 			if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
3816 				if (old_entry->max_protection & PROT_WRITE) {
3817 					uvm_map_lock_entry(old_entry);
3818 					pmap_protect(old_map->pmap,
3819 					    old_entry->start,
3820 					    old_entry->end,
3821 					    old_entry->protection &
3822 					    ~PROT_WRITE);
3823 					uvm_map_unlock_entry(old_entry);
3824 					pmap_update(old_map->pmap);
3825 				}
3826 				old_entry->etype |= UVM_ET_NEEDSCOPY;
3827 			}
3828 
3829 	  		/* parent must now be write-protected */
3830 	  		protect_child = FALSE;
3831 		} else {
3832 			/*
3833 			 * we only need to protect the child if the
3834 			 * parent has write access.
3835 			 */
3836 			if (old_entry->max_protection & PROT_WRITE)
3837 				protect_child = TRUE;
3838 			else
3839 				protect_child = FALSE;
3840 		}
3841 
3842 		/* protect the child's mappings if necessary */
3843 		if (protect_child) {
3844 			pmap_protect(new_map->pmap, new_entry->start,
3845 			    new_entry->end,
3846 			    new_entry->protection &
3847 			    ~PROT_WRITE);
3848 		}
3849 	}
3850 
3851 	return (new_entry);
3852 }
3853 
3854 /*
3855  * zero the mapping: the new entry will be zero initialized
3856  */
3857 struct vm_map_entry *
3858 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map,
3859     struct vm_map *old_map,
3860     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3861 {
3862 	struct vm_map_entry *new_entry;
3863 
3864 	new_entry = uvm_mapent_clone(new_map, old_entry->start,
3865 	    old_entry->end - old_entry->start, 0, old_entry->protection,
3866 	    old_entry->max_protection, old_entry, dead, 0, 0);
3867 
3868 	new_entry->etype |=
3869 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3870 
3871 	if (new_entry->aref.ar_amap) {
3872 		amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3873 		    atop(new_entry->end - new_entry->start), 0);
3874 		new_entry->aref.ar_amap = NULL;
3875 		new_entry->aref.ar_pageoff = 0;
3876 	}
3877 
3878 	if (UVM_ET_ISOBJ(new_entry)) {
3879 		if (new_entry->object.uvm_obj->pgops->pgo_detach)
3880 			new_entry->object.uvm_obj->pgops->pgo_detach(
3881 			    new_entry->object.uvm_obj);
3882 		new_entry->object.uvm_obj = NULL;
3883 		new_entry->etype &= ~UVM_ET_OBJ;
3884 	}
3885 
3886 	return (new_entry);
3887 }
3888 
3889 /*
3890  * uvmspace_fork: fork a process' main map
3891  *
3892  * => create a new vmspace for child process from parent.
3893  * => parent's map must not be locked.
3894  */
3895 struct vmspace *
3896 uvmspace_fork(struct process *pr)
3897 {
3898 	struct vmspace *vm1 = pr->ps_vmspace;
3899 	struct vmspace *vm2;
3900 	struct vm_map *old_map = &vm1->vm_map;
3901 	struct vm_map *new_map;
3902 	struct vm_map_entry *old_entry, *new_entry;
3903 	struct uvm_map_deadq dead;
3904 
3905 	vm_map_lock(old_map);
3906 
3907 	vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
3908 	    (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
3909 	memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
3910 	    (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
3911 	vm2->vm_dused = 0; /* Statistic managed by us. */
3912 	new_map = &vm2->vm_map;
3913 	vm_map_lock(new_map);
3914 
3915 	/* go entry-by-entry */
3916 	TAILQ_INIT(&dead);
3917 	RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) {
3918 		if (old_entry->start == old_entry->end)
3919 			continue;
3920 
3921 		/* first, some sanity checks on the old entry */
3922 		if (UVM_ET_ISSUBMAP(old_entry)) {
3923 			panic("fork: encountered a submap during fork "
3924 			    "(illegal)");
3925 		}
3926 
3927 		if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
3928 		    UVM_ET_ISNEEDSCOPY(old_entry)) {
3929 			panic("fork: non-copy_on_write map entry marked "
3930 			    "needs_copy (illegal)");
3931 		}
3932 
3933 		/* Apply inheritance. */
3934 		switch (old_entry->inheritance) {
3935 		case MAP_INHERIT_SHARE:
3936 			new_entry = uvm_mapent_forkshared(vm2, new_map,
3937 			    old_map, old_entry, &dead);
3938 			break;
3939 		case MAP_INHERIT_COPY:
3940 			new_entry = uvm_mapent_forkcopy(vm2, new_map,
3941 			    old_map, old_entry, &dead);
3942 			break;
3943 		case MAP_INHERIT_ZERO:
3944 			new_entry = uvm_mapent_forkzero(vm2, new_map,
3945 			    old_map, old_entry, &dead);
3946 			break;
3947 		default:
3948 			continue;
3949 		}
3950 
3951 	 	/* Update process statistics. */
3952 		if (!UVM_ET_ISHOLE(new_entry))
3953 			new_map->size += new_entry->end - new_entry->start;
3954 		if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry) &&
3955 		    new_entry->protection != PROT_NONE) {
3956 			vm2->vm_dused += uvmspace_dused(
3957 			    new_map, new_entry->start, new_entry->end);
3958 		}
3959 	}
3960 	new_map->flags |= old_map->flags & VM_MAP_SYSCALL_ONCE;
3961 #ifdef PMAP_CHECK_COPYIN
3962 	if (PMAP_CHECK_COPYIN) {
3963 		memcpy(&new_map->check_copyin, &old_map->check_copyin,
3964 		    sizeof(new_map->check_copyin));
3965 		membar_producer();
3966 		new_map->check_copyin_count = old_map->check_copyin_count;
3967 	}
3968 #endif
3969 
3970 	vm_map_unlock(old_map);
3971 	vm_map_unlock(new_map);
3972 
3973 	/*
3974 	 * This can actually happen, if multiple entries described a
3975 	 * space in which an entry was inherited.
3976 	 */
3977 	uvm_unmap_detach(&dead, 0);
3978 
3979 #ifdef SYSVSHM
3980 	if (vm1->vm_shm)
3981 		shmfork(vm1, vm2);
3982 #endif
3983 
3984 	return vm2;
3985 }
3986 
3987 /*
3988  * uvm_map_hint: return the beginning of the best area suitable for
3989  * creating a new mapping with "prot" protection.
3990  */
3991 vaddr_t
3992 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr,
3993     vaddr_t maxaddr)
3994 {
3995 	vaddr_t addr;
3996 	vaddr_t spacing;
3997 
3998 #ifdef __i386__
3999 	/*
4000 	 * If executable skip first two pages, otherwise start
4001 	 * after data + heap region.
4002 	 */
4003 	if ((prot & PROT_EXEC) != 0 &&
4004 	    (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) {
4005 		addr = (PAGE_SIZE*2) +
4006 		    (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
4007 		return (round_page(addr));
4008 	}
4009 #endif
4010 
4011 #if defined (__LP64__)
4012 	spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4013 #else
4014 	spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4015 #endif
4016 
4017 	/*
4018 	 * Start malloc/mmap after the brk.
4019 	 */
4020 	addr = (vaddr_t)vm->vm_daddr + BRKSIZ;
4021 	addr = MAX(addr, minaddr);
4022 
4023 	if (addr < maxaddr) {
4024 		while (spacing > maxaddr - addr)
4025 			spacing >>= 1;
4026 	}
4027 	addr += arc4random() & spacing;
4028 	return (round_page(addr));
4029 }
4030 
4031 /*
4032  * uvm_map_submap: punch down part of a map into a submap
4033  *
4034  * => only the kernel_map is allowed to be submapped
4035  * => the purpose of submapping is to break up the locking granularity
4036  *	of a larger map
4037  * => the range specified must have been mapped previously with a uvm_map()
4038  *	call [with uobj==NULL] to create a blank map entry in the main map.
4039  *	[And it had better still be blank!]
4040  * => maps which contain submaps should never be copied or forked.
4041  * => to remove a submap, use uvm_unmap() on the main map
4042  *	and then uvm_map_deallocate() the submap.
4043  * => main map must be unlocked.
4044  * => submap must have been init'd and have a zero reference count.
4045  *	[need not be locked as we don't actually reference it]
4046  */
4047 int
4048 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
4049     struct vm_map *submap)
4050 {
4051 	struct vm_map_entry *entry;
4052 	int result;
4053 
4054 	if (start > map->max_offset || end > map->max_offset ||
4055 	    start < map->min_offset || end < map->min_offset)
4056 		return EINVAL;
4057 
4058 	vm_map_lock(map);
4059 
4060 	if (uvm_map_lookup_entry(map, start, &entry)) {
4061 		UVM_MAP_CLIP_START(map, entry, start);
4062 		UVM_MAP_CLIP_END(map, entry, end);
4063 	} else
4064 		entry = NULL;
4065 
4066 	if (entry != NULL &&
4067 	    entry->start == start && entry->end == end &&
4068 	    entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
4069 	    !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
4070 		entry->etype |= UVM_ET_SUBMAP;
4071 		entry->object.sub_map = submap;
4072 		entry->offset = 0;
4073 		uvm_map_reference(submap);
4074 		result = 0;
4075 	} else
4076 		result = EINVAL;
4077 
4078 	vm_map_unlock(map);
4079 	return result;
4080 }
4081 
4082 /*
4083  * uvm_map_checkprot: check protection in map
4084  *
4085  * => must allow specific protection in a fully allocated region.
4086  * => map must be read or write locked by caller.
4087  */
4088 boolean_t
4089 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
4090     vm_prot_t protection)
4091 {
4092 	struct vm_map_entry *entry;
4093 
4094 	vm_map_assert_anylock(map);
4095 
4096 	if (start < map->min_offset || end > map->max_offset || start > end)
4097 		return FALSE;
4098 	if (start == end)
4099 		return TRUE;
4100 
4101 	/*
4102 	 * Iterate entries.
4103 	 */
4104 	for (entry = uvm_map_entrybyaddr(&map->addr, start);
4105 	    entry != NULL && entry->start < end;
4106 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4107 		/* Fail if a hole is found. */
4108 		if (UVM_ET_ISHOLE(entry) ||
4109 		    (entry->end < end && entry->end != VMMAP_FREE_END(entry)))
4110 			return FALSE;
4111 
4112 		/* Check protection. */
4113 		if ((entry->protection & protection) != protection)
4114 			return FALSE;
4115 	}
4116 	return TRUE;
4117 }
4118 
4119 /*
4120  * uvm_map_create: create map
4121  */
4122 vm_map_t
4123 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
4124 {
4125 	vm_map_t map;
4126 
4127 	map = malloc(sizeof *map, M_VMMAP, M_WAITOK);
4128 	uvm_map_setup(map, pmap, min, max, flags);
4129 	return (map);
4130 }
4131 
4132 /*
4133  * uvm_map_deallocate: drop reference to a map
4134  *
4135  * => caller must not lock map
4136  * => we will zap map if ref count goes to zero
4137  */
4138 void
4139 uvm_map_deallocate(vm_map_t map)
4140 {
4141 	int c;
4142 	struct uvm_map_deadq dead;
4143 
4144 	c = atomic_dec_int_nv(&map->ref_count);
4145 	if (c > 0) {
4146 		return;
4147 	}
4148 
4149 	/*
4150 	 * all references gone.   unmap and free.
4151 	 *
4152 	 * No lock required: we are only one to access this map.
4153 	 */
4154 	TAILQ_INIT(&dead);
4155 	uvm_tree_sanity(map, __FILE__, __LINE__);
4156 	vm_map_lock(map);
4157 	uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead,
4158 	    TRUE, FALSE, FALSE);
4159 	vm_map_unlock(map);
4160 	pmap_destroy(map->pmap);
4161 	KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
4162 	free(map, M_VMMAP, sizeof *map);
4163 
4164 	uvm_unmap_detach(&dead, 0);
4165 }
4166 
4167 /*
4168  * uvm_map_inherit: set inheritance code for range of addrs in map.
4169  *
4170  * => map must be unlocked
4171  * => note that the inherit code is used during a "fork".  see fork
4172  *	code for details.
4173  */
4174 int
4175 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
4176     vm_inherit_t new_inheritance)
4177 {
4178 	struct vm_map_entry *entry;
4179 
4180 	switch (new_inheritance) {
4181 	case MAP_INHERIT_NONE:
4182 	case MAP_INHERIT_COPY:
4183 	case MAP_INHERIT_SHARE:
4184 	case MAP_INHERIT_ZERO:
4185 		break;
4186 	default:
4187 		return (EINVAL);
4188 	}
4189 
4190 	if (start > end)
4191 		return EINVAL;
4192 	start = MAX(start, map->min_offset);
4193 	end = MIN(end, map->max_offset);
4194 	if (start >= end)
4195 		return 0;
4196 
4197 	vm_map_lock(map);
4198 
4199 	entry = uvm_map_entrybyaddr(&map->addr, start);
4200 	if (entry->end > start)
4201 		UVM_MAP_CLIP_START(map, entry, start);
4202 	else
4203 		entry = RBT_NEXT(uvm_map_addr, entry);
4204 
4205 	while (entry != NULL && entry->start < end) {
4206 		UVM_MAP_CLIP_END(map, entry, end);
4207 		entry->inheritance = new_inheritance;
4208 		entry = RBT_NEXT(uvm_map_addr, entry);
4209 	}
4210 
4211 	vm_map_unlock(map);
4212 	return (0);
4213 }
4214 
4215 #ifdef PMAP_CHECK_COPYIN
4216 static void inline
4217 check_copyin_add(struct vm_map *map, vaddr_t start, vaddr_t end)
4218 {
4219 	if (PMAP_CHECK_COPYIN == 0 ||
4220 	    map->check_copyin_count >= UVM_MAP_CHECK_COPYIN_MAX)
4221 		return;
4222 	vm_map_assert_wrlock(map);
4223 	map->check_copyin[map->check_copyin_count].start = start;
4224 	map->check_copyin[map->check_copyin_count].end = end;
4225 	membar_producer();
4226 	map->check_copyin_count++;
4227 }
4228 
4229 /*
4230  * uvm_map_check_copyin_add: remember regions which are X-only for copyin(),
4231  * copyinstr(), uiomove(), and others
4232  *
4233  * => map must be unlocked
4234  */
4235 int
4236 uvm_map_check_copyin_add(struct vm_map *map, vaddr_t start, vaddr_t end)
4237 {
4238 	if (start > end)
4239 		return EINVAL;
4240 	start = MAX(start, map->min_offset);
4241 	end = MIN(end, map->max_offset);
4242 	if (start >= end)
4243 		return 0;
4244 	vm_map_lock(map);
4245 	check_copyin_add(map, start, end);
4246 	vm_map_unlock(map);
4247 	return (0);
4248 }
4249 #endif /* PMAP_CHECK_COPYIN */
4250 
4251 /*
4252  * uvm_map_syscall: permit system calls for range of addrs in map.
4253  *
4254  * => map must be unlocked
4255  */
4256 int
4257 uvm_map_syscall(struct vm_map *map, vaddr_t start, vaddr_t end)
4258 {
4259 	struct vm_map_entry *entry;
4260 
4261 	if (start > end)
4262 		return EINVAL;
4263 	start = MAX(start, map->min_offset);
4264 	end = MIN(end, map->max_offset);
4265 	if (start >= end)
4266 		return 0;
4267 	if (map->flags & VM_MAP_SYSCALL_ONCE)	/* only allowed once */
4268 		return (EPERM);
4269 
4270 	vm_map_lock(map);
4271 
4272 	entry = uvm_map_entrybyaddr(&map->addr, start);
4273 	if (entry->end > start)
4274 		UVM_MAP_CLIP_START(map, entry, start);
4275 	else
4276 		entry = RBT_NEXT(uvm_map_addr, entry);
4277 
4278 	while (entry != NULL && entry->start < end) {
4279 		UVM_MAP_CLIP_END(map, entry, end);
4280 		entry->etype |= UVM_ET_SYSCALL;
4281 		entry = RBT_NEXT(uvm_map_addr, entry);
4282 	}
4283 
4284 #ifdef PMAP_CHECK_COPYIN
4285 	check_copyin_add(map, start, end);	/* Add libc's text segment */
4286 #endif
4287 	map->wserial++;
4288 	map->flags |= VM_MAP_SYSCALL_ONCE;
4289 	vm_map_unlock(map);
4290 	return (0);
4291 }
4292 
4293 /*
4294  * uvm_map_immutable: block mapping/mprotect for range of addrs in map.
4295  *
4296  * => map must be unlocked
4297  */
4298 int
4299 uvm_map_immutable(struct vm_map *map, vaddr_t start, vaddr_t end, int imut)
4300 {
4301 	struct vm_map_entry *entry;
4302 
4303 	if (start > end)
4304 		return EINVAL;
4305 	start = MAX(start, map->min_offset);
4306 	end = MIN(end, map->max_offset);
4307 	if (start >= end)
4308 		return 0;
4309 
4310 	vm_map_lock(map);
4311 
4312 	entry = uvm_map_entrybyaddr(&map->addr, start);
4313 	if (entry->end > start)
4314 		UVM_MAP_CLIP_START(map, entry, start);
4315 	else
4316 		entry = RBT_NEXT(uvm_map_addr, entry);
4317 
4318 	while (entry != NULL && entry->start < end) {
4319 		UVM_MAP_CLIP_END(map, entry, end);
4320 		if (imut)
4321 			entry->etype |= UVM_ET_IMMUTABLE;
4322 		else
4323 			entry->etype &= ~UVM_ET_IMMUTABLE;
4324 		entry = RBT_NEXT(uvm_map_addr, entry);
4325 	}
4326 
4327 	map->wserial++;
4328 	vm_map_unlock(map);
4329 	return (0);
4330 }
4331 
4332 /*
4333  * uvm_map_advice: set advice code for range of addrs in map.
4334  *
4335  * => map must be unlocked
4336  */
4337 int
4338 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
4339 {
4340 	struct vm_map_entry *entry;
4341 
4342 	switch (new_advice) {
4343 	case MADV_NORMAL:
4344 	case MADV_RANDOM:
4345 	case MADV_SEQUENTIAL:
4346 		break;
4347 	default:
4348 		return (EINVAL);
4349 	}
4350 
4351 	if (start > end)
4352 		return EINVAL;
4353 	start = MAX(start, map->min_offset);
4354 	end = MIN(end, map->max_offset);
4355 	if (start >= end)
4356 		return 0;
4357 
4358 	vm_map_lock(map);
4359 
4360 	entry = uvm_map_entrybyaddr(&map->addr, start);
4361 	if (entry != NULL && entry->end > start)
4362 		UVM_MAP_CLIP_START(map, entry, start);
4363 	else if (entry!= NULL)
4364 		entry = RBT_NEXT(uvm_map_addr, entry);
4365 
4366 	/*
4367 	 * XXXJRT: disallow holes?
4368 	 */
4369 	while (entry != NULL && entry->start < end) {
4370 		UVM_MAP_CLIP_END(map, entry, end);
4371 		entry->advice = new_advice;
4372 		entry = RBT_NEXT(uvm_map_addr, entry);
4373 	}
4374 
4375 	vm_map_unlock(map);
4376 	return (0);
4377 }
4378 
4379 /*
4380  * uvm_map_extract: extract a mapping from a map and put it somewhere
4381  * in the kernel_map, setting protection to max_prot.
4382  *
4383  * => map should be unlocked (we will write lock it and kernel_map)
4384  * => returns 0 on success, error code otherwise
4385  * => start must be page aligned
4386  * => len must be page sized
4387  * => flags:
4388  *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
4389  * Mappings are QREF's.
4390  */
4391 int
4392 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
4393     vaddr_t *dstaddrp, int flags)
4394 {
4395 	struct uvm_map_deadq dead;
4396 	struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2;
4397 	vaddr_t dstaddr;
4398 	vaddr_t end;
4399 	vaddr_t cp_start;
4400 	vsize_t cp_len, cp_off;
4401 	int error;
4402 
4403 	TAILQ_INIT(&dead);
4404 	end = start + len;
4405 
4406 	/*
4407 	 * Sanity check on the parameters.
4408 	 * Also, since the mapping may not contain gaps, error out if the
4409 	 * mapped area is not in source map.
4410 	 */
4411 	if ((start & (vaddr_t)PAGE_MASK) != 0 ||
4412 	    (end & (vaddr_t)PAGE_MASK) != 0 || end < start)
4413 		return EINVAL;
4414 	if (start < srcmap->min_offset || end > srcmap->max_offset)
4415 		return EINVAL;
4416 
4417 	/* Initialize dead entries. Handle len == 0 case. */
4418 	if (len == 0)
4419 		return 0;
4420 
4421 	/* Acquire lock on srcmap. */
4422 	vm_map_lock(srcmap);
4423 
4424 	/* Lock srcmap, lookup first and last entry in <start,len>. */
4425 	first = uvm_map_entrybyaddr(&srcmap->addr, start);
4426 
4427 	/* Check that the range is contiguous. */
4428 	for (entry = first; entry != NULL && entry->end < end;
4429 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4430 		if (VMMAP_FREE_END(entry) != entry->end ||
4431 		    UVM_ET_ISHOLE(entry)) {
4432 			error = EINVAL;
4433 			goto fail;
4434 		}
4435 	}
4436 	if (entry == NULL || UVM_ET_ISHOLE(entry)) {
4437 		error = EINVAL;
4438 		goto fail;
4439 	}
4440 
4441 	/*
4442 	 * Handle need-copy flag.
4443 	 */
4444 	for (entry = first; entry != NULL && entry->start < end;
4445 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4446 		if (UVM_ET_ISNEEDSCOPY(entry))
4447 			amap_copy(srcmap, entry, M_NOWAIT,
4448 			    UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end);
4449 		if (UVM_ET_ISNEEDSCOPY(entry)) {
4450 			/*
4451 			 * amap_copy failure
4452 			 */
4453 			error = ENOMEM;
4454 			goto fail;
4455 		}
4456 	}
4457 
4458 	/* Lock destination map (kernel_map). */
4459 	vm_map_lock(kernel_map);
4460 
4461 	if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len,
4462 	    MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start),
4463 	    PROT_NONE, 0) != 0) {
4464 		error = ENOMEM;
4465 		goto fail2;
4466 	}
4467 	*dstaddrp = dstaddr;
4468 
4469 	/*
4470 	 * We now have srcmap and kernel_map locked.
4471 	 * dstaddr contains the destination offset in dstmap.
4472 	 */
4473 	/* step 1: start looping through map entries, performing extraction. */
4474 	for (entry = first; entry != NULL && entry->start < end;
4475 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4476 		KDASSERT(!UVM_ET_ISNEEDSCOPY(entry));
4477 		if (UVM_ET_ISHOLE(entry))
4478 			continue;
4479 
4480 		/* Calculate uvm_mapent_clone parameters. */
4481 		cp_start = entry->start;
4482 		if (cp_start < start) {
4483 			cp_off = start - cp_start;
4484 			cp_start = start;
4485 		} else
4486 			cp_off = 0;
4487 		cp_len = MIN(entry->end, end) - cp_start;
4488 
4489 		newentry = uvm_mapent_clone(kernel_map,
4490 		    cp_start - start + dstaddr, cp_len, cp_off,
4491 		    entry->protection, entry->max_protection,
4492 		    entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
4493 		if (newentry == NULL) {
4494 			error = ENOMEM;
4495 			goto fail2_unmap;
4496 		}
4497 		kernel_map->size += cp_len;
4498 
4499 		/* Figure out the best protection */
4500 		if ((flags & UVM_EXTRACT_FIXPROT) &&
4501 		    newentry->protection != PROT_NONE)
4502 			newentry->protection = newentry->max_protection;
4503 		newentry->protection &= ~PROT_EXEC;
4504 	}
4505 	pmap_update(kernel_map->pmap);
4506 
4507 	error = 0;
4508 
4509 	/* Unmap copied entries on failure. */
4510 fail2_unmap:
4511 	if (error) {
4512 		uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead,
4513 		    FALSE, TRUE, FALSE);
4514 	}
4515 
4516 	/* Release maps, release dead entries. */
4517 fail2:
4518 	vm_map_unlock(kernel_map);
4519 
4520 fail:
4521 	vm_map_unlock(srcmap);
4522 
4523 	uvm_unmap_detach(&dead, 0);
4524 
4525 	return error;
4526 }
4527 
4528 /*
4529  * uvm_map_clean: clean out a map range
4530  *
4531  * => valid flags:
4532  *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
4533  *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
4534  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
4535  *   if (flags & PGO_FREE): any cached pages are freed after clean
4536  * => returns an error if any part of the specified range isn't mapped
4537  * => never a need to flush amap layer since the anonymous memory has
4538  *	no permanent home, but may deactivate pages there
4539  * => called from sys_msync() and sys_madvise()
4540  * => caller must not have map locked
4541  */
4542 
4543 int
4544 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
4545 {
4546 	struct vm_map_entry *first, *entry;
4547 	struct vm_amap *amap;
4548 	struct vm_anon *anon;
4549 	struct vm_page *pg;
4550 	struct uvm_object *uobj;
4551 	vaddr_t cp_start, cp_end;
4552 	int refs;
4553 	int error;
4554 	boolean_t rv;
4555 
4556 	KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
4557 	    (PGO_FREE|PGO_DEACTIVATE));
4558 
4559 	if (start > end || start < map->min_offset || end > map->max_offset)
4560 		return EINVAL;
4561 
4562 	vm_map_lock(map);
4563 	first = uvm_map_entrybyaddr(&map->addr, start);
4564 
4565 	/* Make a first pass to check for holes. */
4566 	for (entry = first; entry != NULL && entry->start < end;
4567 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4568 		if (UVM_ET_ISSUBMAP(entry)) {
4569 			vm_map_unlock(map);
4570 			return EINVAL;
4571 		}
4572 		if (UVM_ET_ISSUBMAP(entry) ||
4573 		    UVM_ET_ISHOLE(entry) ||
4574 		    (entry->end < end &&
4575 		    VMMAP_FREE_END(entry) != entry->end)) {
4576 			vm_map_unlock(map);
4577 			return EFAULT;
4578 		}
4579 	}
4580 
4581 	vm_map_busy(map);
4582 	vm_map_unlock(map);
4583 	error = 0;
4584 	for (entry = first; entry != NULL && entry->start < end;
4585 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
4586 		amap = entry->aref.ar_amap;	/* top layer */
4587 		if (UVM_ET_ISOBJ(entry))
4588 			uobj = entry->object.uvm_obj;
4589 		else
4590 			uobj = NULL;
4591 
4592 		/*
4593 		 * No amap cleaning necessary if:
4594 		 *  - there's no amap
4595 		 *  - we're not deactivating or freeing pages.
4596 		 */
4597 		if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
4598 			goto flush_object;
4599 
4600 		cp_start = MAX(entry->start, start);
4601 		cp_end = MIN(entry->end, end);
4602 
4603 		amap_lock(amap);
4604 		for (; cp_start != cp_end; cp_start += PAGE_SIZE) {
4605 			anon = amap_lookup(&entry->aref,
4606 			    cp_start - entry->start);
4607 			if (anon == NULL)
4608 				continue;
4609 
4610 			KASSERT(anon->an_lock == amap->am_lock);
4611 			pg = anon->an_page;
4612 			if (pg == NULL) {
4613 				continue;
4614 			}
4615 			KASSERT(pg->pg_flags & PQ_ANON);
4616 
4617 			switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
4618 			/*
4619 			 * XXX In these first 3 cases, we always just
4620 			 * XXX deactivate the page.  We may want to
4621 			 * XXX handle the different cases more
4622 			 * XXX specifically, in the future.
4623 			 */
4624 			case PGO_CLEANIT|PGO_FREE:
4625 			case PGO_CLEANIT|PGO_DEACTIVATE:
4626 			case PGO_DEACTIVATE:
4627 deactivate_it:
4628 				/* skip the page if it's wired */
4629 				if (pg->wire_count != 0)
4630 					break;
4631 
4632 				uvm_lock_pageq();
4633 
4634 				KASSERT(pg->uanon == anon);
4635 
4636 				/* zap all mappings for the page. */
4637 				pmap_page_protect(pg, PROT_NONE);
4638 
4639 				/* ...and deactivate the page. */
4640 				uvm_pagedeactivate(pg);
4641 
4642 				uvm_unlock_pageq();
4643 				break;
4644 			case PGO_FREE:
4645 				/*
4646 				 * If there are multiple references to
4647 				 * the amap, just deactivate the page.
4648 				 */
4649 				if (amap_refs(amap) > 1)
4650 					goto deactivate_it;
4651 
4652 				/* XXX skip the page if it's wired */
4653 				if (pg->wire_count != 0) {
4654 					break;
4655 				}
4656 				amap_unadd(&entry->aref,
4657 				    cp_start - entry->start);
4658 				refs = --anon->an_ref;
4659 				if (refs == 0)
4660 					uvm_anfree(anon);
4661 				break;
4662 			default:
4663 				panic("uvm_map_clean: weird flags");
4664 			}
4665 		}
4666 		amap_unlock(amap);
4667 
4668 flush_object:
4669 		cp_start = MAX(entry->start, start);
4670 		cp_end = MIN(entry->end, end);
4671 
4672 		/*
4673 		 * flush pages if we've got a valid backing object.
4674 		 *
4675 		 * Don't PGO_FREE if we don't have write permission
4676 		 * and don't flush if this is a copy-on-write object
4677 		 * since we can't know our permissions on it.
4678 		 */
4679 		if (uobj != NULL &&
4680 		    ((flags & PGO_FREE) == 0 ||
4681 		     ((entry->max_protection & PROT_WRITE) != 0 &&
4682 		      (entry->etype & UVM_ET_COPYONWRITE) == 0))) {
4683 			rw_enter(uobj->vmobjlock, RW_WRITE);
4684 			rv = uobj->pgops->pgo_flush(uobj,
4685 			    cp_start - entry->start + entry->offset,
4686 			    cp_end - entry->start + entry->offset, flags);
4687 			rw_exit(uobj->vmobjlock);
4688 
4689 			if (rv == FALSE)
4690 				error = EFAULT;
4691 		}
4692 	}
4693 
4694 	vm_map_unbusy(map);
4695 	return error;
4696 }
4697 
4698 /*
4699  * UVM_MAP_CLIP_END implementation
4700  */
4701 void
4702 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4703 {
4704 	struct vm_map_entry *tmp;
4705 
4706 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4707 	tmp = uvm_mapent_alloc(map, 0);
4708 
4709 	/* Invoke splitentry. */
4710 	uvm_map_splitentry(map, entry, tmp, addr);
4711 }
4712 
4713 /*
4714  * UVM_MAP_CLIP_START implementation
4715  *
4716  * Clippers are required to not change the pointers to the entry they are
4717  * clipping on.
4718  * Since uvm_map_splitentry turns the original entry into the lowest
4719  * entry (address wise) we do a swap between the new entry and the original
4720  * entry, prior to calling uvm_map_splitentry.
4721  */
4722 void
4723 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4724 {
4725 	struct vm_map_entry *tmp;
4726 	struct uvm_addr_state *free;
4727 
4728 	/* Unlink original. */
4729 	free = uvm_map_uaddr_e(map, entry);
4730 	uvm_mapent_free_remove(map, free, entry);
4731 	uvm_mapent_addr_remove(map, entry);
4732 
4733 	/* Copy entry. */
4734 	KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4735 	tmp = uvm_mapent_alloc(map, 0);
4736 	uvm_mapent_copy(entry, tmp);
4737 
4738 	/* Put new entry in place of original entry. */
4739 	uvm_mapent_addr_insert(map, tmp);
4740 	uvm_mapent_free_insert(map, free, tmp);
4741 
4742 	/* Invoke splitentry. */
4743 	uvm_map_splitentry(map, tmp, entry, addr);
4744 }
4745 
4746 /*
4747  * Boundary fixer.
4748  */
4749 static inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t);
4750 static inline vaddr_t
4751 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound)
4752 {
4753 	return (min < bound && max > bound) ? bound : max;
4754 }
4755 
4756 /*
4757  * Choose free list based on address at start of free space.
4758  *
4759  * The uvm_addr_state returned contains addr and is the first of:
4760  * - uaddr_exe
4761  * - uaddr_brk_stack
4762  * - uaddr_any
4763  */
4764 struct uvm_addr_state*
4765 uvm_map_uaddr(struct vm_map *map, vaddr_t addr)
4766 {
4767 	struct uvm_addr_state *uaddr;
4768 	int i;
4769 
4770 	/* Special case the first page, to prevent mmap from returning 0. */
4771 	if (addr < VMMAP_MIN_ADDR)
4772 		return NULL;
4773 
4774 	/* Upper bound for kernel maps at uvm_maxkaddr. */
4775 	if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
4776 		if (addr >= uvm_maxkaddr)
4777 			return NULL;
4778 	}
4779 
4780 	/* Is the address inside the exe-only map? */
4781 	if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr &&
4782 	    addr < map->uaddr_exe->uaddr_maxaddr)
4783 		return map->uaddr_exe;
4784 
4785 	/* Check if the space falls inside brk/stack area. */
4786 	if ((addr >= map->b_start && addr < map->b_end) ||
4787 	    (addr >= map->s_start && addr < map->s_end)) {
4788 		if (map->uaddr_brk_stack != NULL &&
4789 		    addr >= map->uaddr_brk_stack->uaddr_minaddr &&
4790 		    addr < map->uaddr_brk_stack->uaddr_maxaddr) {
4791 			return map->uaddr_brk_stack;
4792 		} else
4793 			return NULL;
4794 	}
4795 
4796 	/*
4797 	 * Check the other selectors.
4798 	 *
4799 	 * These selectors are only marked as the owner, if they have insert
4800 	 * functions.
4801 	 */
4802 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4803 		uaddr = map->uaddr_any[i];
4804 		if (uaddr == NULL)
4805 			continue;
4806 		if (uaddr->uaddr_functions->uaddr_free_insert == NULL)
4807 			continue;
4808 
4809 		if (addr >= uaddr->uaddr_minaddr &&
4810 		    addr < uaddr->uaddr_maxaddr)
4811 			return uaddr;
4812 	}
4813 
4814 	return NULL;
4815 }
4816 
4817 /*
4818  * Choose free list based on address at start of free space.
4819  *
4820  * The uvm_addr_state returned contains addr and is the first of:
4821  * - uaddr_exe
4822  * - uaddr_brk_stack
4823  * - uaddr_any
4824  */
4825 struct uvm_addr_state*
4826 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry)
4827 {
4828 	return uvm_map_uaddr(map, VMMAP_FREE_START(entry));
4829 }
4830 
4831 /*
4832  * Returns the first free-memory boundary that is crossed by [min-max].
4833  */
4834 vsize_t
4835 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max)
4836 {
4837 	struct uvm_addr_state	*uaddr;
4838 	int			 i;
4839 
4840 	/* Never return first page. */
4841 	max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR);
4842 
4843 	/* Treat the maxkaddr special, if the map is a kernel_map. */
4844 	if ((map->flags & VM_MAP_ISVMSPACE) == 0)
4845 		max = uvm_map_boundfix(min, max, uvm_maxkaddr);
4846 
4847 	/* Check for exe-only boundaries. */
4848 	if (map->uaddr_exe != NULL) {
4849 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr);
4850 		max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr);
4851 	}
4852 
4853 	/* Check for exe-only boundaries. */
4854 	if (map->uaddr_brk_stack != NULL) {
4855 		max = uvm_map_boundfix(min, max,
4856 		    map->uaddr_brk_stack->uaddr_minaddr);
4857 		max = uvm_map_boundfix(min, max,
4858 		    map->uaddr_brk_stack->uaddr_maxaddr);
4859 	}
4860 
4861 	/* Check other boundaries. */
4862 	for (i = 0; i < nitems(map->uaddr_any); i++) {
4863 		uaddr = map->uaddr_any[i];
4864 		if (uaddr != NULL) {
4865 			max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr);
4866 			max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr);
4867 		}
4868 	}
4869 
4870 	/* Boundaries at stack and brk() area. */
4871 	max = uvm_map_boundfix(min, max, map->s_start);
4872 	max = uvm_map_boundfix(min, max, map->s_end);
4873 	max = uvm_map_boundfix(min, max, map->b_start);
4874 	max = uvm_map_boundfix(min, max, map->b_end);
4875 
4876 	return max;
4877 }
4878 
4879 /*
4880  * Update map allocation start and end addresses from proc vmspace.
4881  */
4882 void
4883 uvm_map_vmspace_update(struct vm_map *map,
4884     struct uvm_map_deadq *dead, int flags)
4885 {
4886 	struct vmspace *vm;
4887 	vaddr_t b_start, b_end, s_start, s_end;
4888 
4889 	KASSERT(map->flags & VM_MAP_ISVMSPACE);
4890 	KASSERT(offsetof(struct vmspace, vm_map) == 0);
4891 
4892 	/*
4893 	 * Derive actual allocation boundaries from vmspace.
4894 	 */
4895 	vm = (struct vmspace *)map;
4896 	b_start = (vaddr_t)vm->vm_daddr;
4897 	b_end   = b_start + BRKSIZ;
4898 	s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4899 	s_end   = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4900 #ifdef DIAGNOSTIC
4901 	if ((b_start & (vaddr_t)PAGE_MASK) != 0 ||
4902 	    (b_end & (vaddr_t)PAGE_MASK) != 0 ||
4903 	    (s_start & (vaddr_t)PAGE_MASK) != 0 ||
4904 	    (s_end & (vaddr_t)PAGE_MASK) != 0) {
4905 		panic("uvm_map_vmspace_update: vmspace %p invalid bounds: "
4906 		    "b=0x%lx-0x%lx s=0x%lx-0x%lx",
4907 		    vm, b_start, b_end, s_start, s_end);
4908 	}
4909 #endif
4910 
4911 	if (__predict_true(map->b_start == b_start && map->b_end == b_end &&
4912 	    map->s_start == s_start && map->s_end == s_end))
4913 		return;
4914 
4915 	uvm_map_freelist_update(map, dead, b_start, b_end,
4916 	    s_start, s_end, flags);
4917 }
4918 
4919 /*
4920  * Grow kernel memory.
4921  *
4922  * This function is only called for kernel maps when an allocation fails.
4923  *
4924  * If the map has a gap that is large enough to accommodate alloc_sz, this
4925  * function will make sure map->free will include it.
4926  */
4927 void
4928 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead,
4929     vsize_t alloc_sz, int flags)
4930 {
4931 	vsize_t sz;
4932 	vaddr_t end;
4933 	struct vm_map_entry *entry;
4934 
4935 	/* Kernel memory only. */
4936 	KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0);
4937 	/* Destroy free list. */
4938 	uvm_map_freelist_update_clear(map, dead);
4939 
4940 	/* Include the guard page in the hard minimum requirement of alloc_sz. */
4941 	if (map->flags & VM_MAP_GUARDPAGES)
4942 		alloc_sz += PAGE_SIZE;
4943 
4944 	/*
4945 	 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA.
4946 	 *
4947 	 * Don't handle the case where the multiplication overflows:
4948 	 * if that happens, the allocation is probably too big anyway.
4949 	 */
4950 	sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA);
4951 
4952 	/*
4953 	 * Walk forward until a gap large enough for alloc_sz shows up.
4954 	 *
4955 	 * We assume the kernel map has no boundaries.
4956 	 * uvm_maxkaddr may be zero.
4957 	 */
4958 	end = MAX(uvm_maxkaddr, map->min_offset);
4959 	entry = uvm_map_entrybyaddr(&map->addr, end);
4960 	while (entry && entry->fspace < alloc_sz)
4961 		entry = RBT_NEXT(uvm_map_addr, entry);
4962 	if (entry) {
4963 		end = MAX(VMMAP_FREE_START(entry), end);
4964 		end += MIN(sz, map->max_offset - end);
4965 	} else
4966 		end = map->max_offset;
4967 
4968 	/* Reserve pmap entries. */
4969 #ifdef PMAP_GROWKERNEL
4970 	uvm_maxkaddr = pmap_growkernel(end);
4971 #else
4972 	uvm_maxkaddr = MAX(uvm_maxkaddr, end);
4973 #endif
4974 
4975 	/* Rebuild free list. */
4976 	uvm_map_freelist_update_refill(map, flags);
4977 }
4978 
4979 /*
4980  * Freelist update subfunction: unlink all entries from freelists.
4981  */
4982 void
4983 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead)
4984 {
4985 	struct uvm_addr_state *free;
4986 	struct vm_map_entry *entry, *prev, *next;
4987 
4988 	prev = NULL;
4989 	for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL;
4990 	    entry = next) {
4991 		next = RBT_NEXT(uvm_map_addr, entry);
4992 
4993 		free = uvm_map_uaddr_e(map, entry);
4994 		uvm_mapent_free_remove(map, free, entry);
4995 
4996 		if (prev != NULL && entry->start == entry->end) {
4997 			prev->fspace += VMMAP_FREE_END(entry) - entry->end;
4998 			uvm_mapent_addr_remove(map, entry);
4999 			DEAD_ENTRY_PUSH(dead, entry);
5000 		} else
5001 			prev = entry;
5002 	}
5003 }
5004 
5005 /*
5006  * Freelist update subfunction: refill the freelists with entries.
5007  */
5008 void
5009 uvm_map_freelist_update_refill(struct vm_map *map, int flags)
5010 {
5011 	struct vm_map_entry *entry;
5012 	vaddr_t min, max;
5013 
5014 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
5015 		min = VMMAP_FREE_START(entry);
5016 		max = VMMAP_FREE_END(entry);
5017 		entry->fspace = 0;
5018 
5019 		entry = uvm_map_fix_space(map, entry, min, max, flags);
5020 	}
5021 
5022 	uvm_tree_sanity(map, __FILE__, __LINE__);
5023 }
5024 
5025 /*
5026  * Change {a,b}_{start,end} allocation ranges and associated free lists.
5027  */
5028 void
5029 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead,
5030     vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags)
5031 {
5032 	KDASSERT(b_end >= b_start && s_end >= s_start);
5033 	vm_map_assert_wrlock(map);
5034 
5035 	/* Clear all free lists. */
5036 	uvm_map_freelist_update_clear(map, dead);
5037 
5038 	/* Apply new bounds. */
5039 	map->b_start = b_start;
5040 	map->b_end   = b_end;
5041 	map->s_start = s_start;
5042 	map->s_end   = s_end;
5043 
5044 	/* Refill free lists. */
5045 	uvm_map_freelist_update_refill(map, flags);
5046 }
5047 
5048 /*
5049  * Assign a uvm_addr_state to the specified pointer in vm_map.
5050  *
5051  * May sleep.
5052  */
5053 void
5054 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which,
5055     struct uvm_addr_state *newval)
5056 {
5057 	struct uvm_map_deadq dead;
5058 
5059 	/* Pointer which must be in this map. */
5060 	KASSERT(which != NULL);
5061 	KASSERT((void*)map <= (void*)(which) &&
5062 	    (void*)(which) < (void*)(map + 1));
5063 
5064 	vm_map_lock(map);
5065 	TAILQ_INIT(&dead);
5066 	uvm_map_freelist_update_clear(map, &dead);
5067 
5068 	uvm_addr_destroy(*which);
5069 	*which = newval;
5070 
5071 	uvm_map_freelist_update_refill(map, 0);
5072 	vm_map_unlock(map);
5073 	uvm_unmap_detach(&dead, 0);
5074 }
5075 
5076 /*
5077  * Correct space insert.
5078  *
5079  * Entry must not be on any freelist.
5080  */
5081 struct vm_map_entry*
5082 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry,
5083     vaddr_t min, vaddr_t max, int flags)
5084 {
5085 	struct uvm_addr_state	*free, *entfree;
5086 	vaddr_t			 lmax;
5087 
5088 	KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0);
5089 	KDASSERT(min <= max);
5090 	KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) ||
5091 	    min == map->min_offset);
5092 
5093 	UVM_MAP_REQ_WRITE(map);
5094 
5095 	/*
5096 	 * During the function, entfree will always point at the uaddr state
5097 	 * for entry.
5098 	 */
5099 	entfree = (entry == NULL ? NULL :
5100 	    uvm_map_uaddr_e(map, entry));
5101 
5102 	while (min != max) {
5103 		/* Claim guard page for entry. */
5104 		if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL &&
5105 		    VMMAP_FREE_END(entry) == entry->end &&
5106 		    entry->start != entry->end) {
5107 			if (max - min == 2 * PAGE_SIZE) {
5108 				/*
5109 				 * If the free-space gap is exactly 2 pages,
5110 				 * we make the guard 2 pages instead of 1.
5111 				 * Because in a guarded map, an area needs
5112 				 * at least 2 pages to allocate from:
5113 				 * one page for the allocation and one for
5114 				 * the guard.
5115 				 */
5116 				entry->guard = 2 * PAGE_SIZE;
5117 				min = max;
5118 			} else {
5119 				entry->guard = PAGE_SIZE;
5120 				min += PAGE_SIZE;
5121 			}
5122 			continue;
5123 		}
5124 
5125 		/*
5126 		 * Handle the case where entry has a 2-page guard, but the
5127 		 * space after entry is freed.
5128 		 */
5129 		if (entry != NULL && entry->fspace == 0 &&
5130 		    entry->guard > PAGE_SIZE) {
5131 			entry->guard = PAGE_SIZE;
5132 			min = VMMAP_FREE_START(entry);
5133 		}
5134 
5135 		lmax = uvm_map_boundary(map, min, max);
5136 		free = uvm_map_uaddr(map, min);
5137 
5138 		/*
5139 		 * Entries are merged if they point at the same uvm_free().
5140 		 * Exception to that rule: if min == uvm_maxkaddr, a new
5141 		 * entry is started regardless (otherwise the allocators
5142 		 * will get confused).
5143 		 */
5144 		if (entry != NULL && free == entfree &&
5145 		    !((map->flags & VM_MAP_ISVMSPACE) == 0 &&
5146 		    min == uvm_maxkaddr)) {
5147 			KDASSERT(VMMAP_FREE_END(entry) == min);
5148 			entry->fspace += lmax - min;
5149 		} else {
5150 			/*
5151 			 * Commit entry to free list: it'll not be added to
5152 			 * anymore.
5153 			 * We'll start a new entry and add to that entry
5154 			 * instead.
5155 			 */
5156 			if (entry != NULL)
5157 				uvm_mapent_free_insert(map, entfree, entry);
5158 
5159 			/* New entry for new uaddr. */
5160 			entry = uvm_mapent_alloc(map, flags);
5161 			KDASSERT(entry != NULL);
5162 			entry->end = entry->start = min;
5163 			entry->guard = 0;
5164 			entry->fspace = lmax - min;
5165 			entry->object.uvm_obj = NULL;
5166 			entry->offset = 0;
5167 			entry->etype = 0;
5168 			entry->protection = entry->max_protection = 0;
5169 			entry->inheritance = 0;
5170 			entry->wired_count = 0;
5171 			entry->advice = 0;
5172 			entry->aref.ar_pageoff = 0;
5173 			entry->aref.ar_amap = NULL;
5174 			uvm_mapent_addr_insert(map, entry);
5175 
5176 			entfree = free;
5177 		}
5178 
5179 		min = lmax;
5180 	}
5181 	/* Finally put entry on the uaddr state. */
5182 	if (entry != NULL)
5183 		uvm_mapent_free_insert(map, entfree, entry);
5184 
5185 	return entry;
5186 }
5187 
5188 /*
5189  * MQuery style of allocation.
5190  *
5191  * This allocator searches forward until sufficient space is found to map
5192  * the given size.
5193  *
5194  * XXX: factor in offset (via pmap_prefer) and protection?
5195  */
5196 int
5197 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset,
5198     int flags)
5199 {
5200 	struct vm_map_entry *entry, *last;
5201 	vaddr_t addr;
5202 	vaddr_t tmp, pmap_align, pmap_offset;
5203 	int error;
5204 
5205 	addr = *addr_p;
5206 	vm_map_lock_read(map);
5207 
5208 	/* Configure pmap prefer. */
5209 	if (offset != UVM_UNKNOWN_OFFSET) {
5210 		pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN());
5211 		pmap_offset = PMAP_PREFER_OFFSET(offset);
5212 	} else {
5213 		pmap_align = PAGE_SIZE;
5214 		pmap_offset = 0;
5215 	}
5216 
5217 	/* Align address to pmap_prefer unless FLAG_FIXED is set. */
5218 	if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) {
5219 	  	tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5220 		if (tmp < addr)
5221 			tmp += pmap_align;
5222 		addr = tmp;
5223 	}
5224 
5225 	/* First, check if the requested range is fully available. */
5226 	entry = uvm_map_entrybyaddr(&map->addr, addr);
5227 	last = NULL;
5228 	if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5229 		error = 0;
5230 		goto out;
5231 	}
5232 	if (flags & UVM_FLAG_FIXED) {
5233 		error = EINVAL;
5234 		goto out;
5235 	}
5236 
5237 	error = ENOMEM; /* Default error from here. */
5238 
5239 	/*
5240 	 * At this point, the memory at <addr, sz> is not available.
5241 	 * The reasons are:
5242 	 * [1] it's outside the map,
5243 	 * [2] it starts in used memory (and therefore needs to move
5244 	 *     toward the first free page in entry),
5245 	 * [3] it starts in free memory but bumps into used memory.
5246 	 *
5247 	 * Note that for case [2], the forward moving is handled by the
5248 	 * for loop below.
5249 	 */
5250 	if (entry == NULL) {
5251 		/* [1] Outside the map. */
5252 		if (addr >= map->max_offset)
5253 			goto out;
5254 		else
5255 			entry = RBT_MIN(uvm_map_addr, &map->addr);
5256 	} else if (VMMAP_FREE_START(entry) <= addr) {
5257 		/* [3] Bumped into used memory. */
5258 		entry = RBT_NEXT(uvm_map_addr, entry);
5259 	}
5260 
5261 	/* Test if the next entry is sufficient for the allocation. */
5262 	for (; entry != NULL;
5263 	    entry = RBT_NEXT(uvm_map_addr, entry)) {
5264 		if (entry->fspace == 0)
5265 			continue;
5266 		addr = VMMAP_FREE_START(entry);
5267 
5268 restart:	/* Restart address checks on address change. */
5269 		tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5270 		if (tmp < addr)
5271 			tmp += pmap_align;
5272 		addr = tmp;
5273 		if (addr >= VMMAP_FREE_END(entry))
5274 			continue;
5275 
5276 		/* Skip brk() allocation addresses. */
5277 		if (addr + sz > map->b_start && addr < map->b_end) {
5278 			if (VMMAP_FREE_END(entry) > map->b_end) {
5279 				addr = map->b_end;
5280 				goto restart;
5281 			} else
5282 				continue;
5283 		}
5284 		/* Skip stack allocation addresses. */
5285 		if (addr + sz > map->s_start && addr < map->s_end) {
5286 			if (VMMAP_FREE_END(entry) > map->s_end) {
5287 				addr = map->s_end;
5288 				goto restart;
5289 			} else
5290 				continue;
5291 		}
5292 
5293 		last = NULL;
5294 		if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5295 			error = 0;
5296 			goto out;
5297 		}
5298 	}
5299 
5300 out:
5301 	vm_map_unlock_read(map);
5302 	if (error == 0)
5303 		*addr_p = addr;
5304 	return error;
5305 }
5306 
5307 boolean_t
5308 vm_map_lock_try_ln(struct vm_map *map, char *file, int line)
5309 {
5310 	boolean_t rv;
5311 
5312 	if (map->flags & VM_MAP_INTRSAFE) {
5313 		rv = mtx_enter_try(&map->mtx);
5314 	} else {
5315 		mtx_enter(&map->flags_lock);
5316 		if (map->flags & VM_MAP_BUSY) {
5317 			mtx_leave(&map->flags_lock);
5318 			return (FALSE);
5319 		}
5320 		mtx_leave(&map->flags_lock);
5321 		rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0);
5322 		/* check if the lock is busy and back out if we won the race */
5323 		if (rv) {
5324 			mtx_enter(&map->flags_lock);
5325 			if (map->flags & VM_MAP_BUSY) {
5326 				rw_exit(&map->lock);
5327 				rv = FALSE;
5328 			}
5329 			mtx_leave(&map->flags_lock);
5330 		}
5331 	}
5332 
5333 	if (rv) {
5334 		map->timestamp++;
5335 		LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5336 		uvm_tree_sanity(map, file, line);
5337 		uvm_tree_size_chk(map, file, line);
5338 	}
5339 
5340 	return (rv);
5341 }
5342 
5343 void
5344 vm_map_lock_ln(struct vm_map *map, char *file, int line)
5345 {
5346 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5347 		do {
5348 			mtx_enter(&map->flags_lock);
5349 tryagain:
5350 			while (map->flags & VM_MAP_BUSY) {
5351 				map->flags |= VM_MAP_WANTLOCK;
5352 				msleep_nsec(&map->flags, &map->flags_lock,
5353 				    PVM, vmmapbsy, INFSLP);
5354 			}
5355 			mtx_leave(&map->flags_lock);
5356 		} while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0);
5357 		/* check if the lock is busy and back out if we won the race */
5358 		mtx_enter(&map->flags_lock);
5359 		if (map->flags & VM_MAP_BUSY) {
5360 			rw_exit(&map->lock);
5361 			goto tryagain;
5362 		}
5363 		mtx_leave(&map->flags_lock);
5364 	} else {
5365 		mtx_enter(&map->mtx);
5366 	}
5367 
5368 	map->timestamp++;
5369 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5370 	uvm_tree_sanity(map, file, line);
5371 	uvm_tree_size_chk(map, file, line);
5372 }
5373 
5374 void
5375 vm_map_lock_read_ln(struct vm_map *map, char *file, int line)
5376 {
5377 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5378 		rw_enter_read(&map->lock);
5379 	else
5380 		mtx_enter(&map->mtx);
5381 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5382 	uvm_tree_sanity(map, file, line);
5383 	uvm_tree_size_chk(map, file, line);
5384 }
5385 
5386 void
5387 vm_map_unlock_ln(struct vm_map *map, char *file, int line)
5388 {
5389 	uvm_tree_sanity(map, file, line);
5390 	uvm_tree_size_chk(map, file, line);
5391 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5392 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5393 		rw_exit(&map->lock);
5394 	else
5395 		mtx_leave(&map->mtx);
5396 }
5397 
5398 void
5399 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line)
5400 {
5401 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
5402 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
5403 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5404 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5405 		rw_exit_read(&map->lock);
5406 	else
5407 		mtx_leave(&map->mtx);
5408 }
5409 
5410 void
5411 vm_map_downgrade_ln(struct vm_map *map, char *file, int line)
5412 {
5413 	uvm_tree_sanity(map, file, line);
5414 	uvm_tree_size_chk(map, file, line);
5415 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5416 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5417 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5418 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5419 		rw_enter(&map->lock, RW_DOWNGRADE);
5420 }
5421 
5422 void
5423 vm_map_upgrade_ln(struct vm_map *map, char *file, int line)
5424 {
5425 	/* XXX: RO */ uvm_tree_sanity(map, file, line);
5426 	/* XXX: RO */ uvm_tree_size_chk(map, file, line);
5427 	LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5428 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5429 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5430 		rw_exit_read(&map->lock);
5431 		rw_enter_write(&map->lock);
5432 	}
5433 	LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
5434 	uvm_tree_sanity(map, file, line);
5435 }
5436 
5437 void
5438 vm_map_busy_ln(struct vm_map *map, char *file, int line)
5439 {
5440 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5441 	mtx_enter(&map->flags_lock);
5442 	map->flags |= VM_MAP_BUSY;
5443 	mtx_leave(&map->flags_lock);
5444 }
5445 
5446 void
5447 vm_map_unbusy_ln(struct vm_map *map, char *file, int line)
5448 {
5449 	int oflags;
5450 
5451 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5452 	mtx_enter(&map->flags_lock);
5453 	oflags = map->flags;
5454 	map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK);
5455 	mtx_leave(&map->flags_lock);
5456 	if (oflags & VM_MAP_WANTLOCK)
5457 		wakeup(&map->flags);
5458 }
5459 
5460 void
5461 vm_map_assert_anylock_ln(struct vm_map *map, char *file, int line)
5462 {
5463 	LPRINTF(("map assert read or write locked: %p (at %s %d)\n", map, file, line));
5464 	if ((map->flags & VM_MAP_INTRSAFE) == 0)
5465 		rw_assert_anylock(&map->lock);
5466 	else
5467 		MUTEX_ASSERT_LOCKED(&map->mtx);
5468 }
5469 
5470 void
5471 vm_map_assert_wrlock_ln(struct vm_map *map, char *file, int line)
5472 {
5473 	LPRINTF(("map assert write locked: %p (at %s %d)\n", map, file, line));
5474 	if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5475 		splassert(IPL_NONE);
5476 		rw_assert_wrlock(&map->lock);
5477 	} else
5478 		MUTEX_ASSERT_LOCKED(&map->mtx);
5479 }
5480 
5481 #ifndef SMALL_KERNEL
5482 int
5483 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve,
5484     size_t *lenp)
5485 {
5486 	struct vm_map_entry *entry;
5487 	vaddr_t start;
5488 	int cnt, maxcnt, error = 0;
5489 
5490 	KASSERT(*lenp > 0);
5491 	KASSERT((*lenp % sizeof(*kve)) == 0);
5492 	cnt = 0;
5493 	maxcnt = *lenp / sizeof(*kve);
5494 	KASSERT(maxcnt > 0);
5495 
5496 	/*
5497 	 * Return only entries whose address is above the given base
5498 	 * address.  This allows userland to iterate without knowing the
5499 	 * number of entries beforehand.
5500 	 */
5501 	start = (vaddr_t)kve[0].kve_start;
5502 
5503 	vm_map_lock(map);
5504 	RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
5505 		if (cnt == maxcnt) {
5506 			error = ENOMEM;
5507 			break;
5508 		}
5509 		if (start != 0 && entry->start < start)
5510 			continue;
5511 		kve->kve_start = entry->start;
5512 		kve->kve_end = entry->end;
5513 		kve->kve_guard = entry->guard;
5514 		kve->kve_fspace = entry->fspace;
5515 		kve->kve_fspace_augment = entry->fspace_augment;
5516 		kve->kve_offset = entry->offset;
5517 		kve->kve_wired_count = entry->wired_count;
5518 		kve->kve_etype = entry->etype;
5519 		kve->kve_protection = entry->protection;
5520 		kve->kve_max_protection = entry->max_protection;
5521 		kve->kve_advice = entry->advice;
5522 		kve->kve_inheritance = entry->inheritance;
5523 		kve->kve_flags = entry->flags;
5524 		kve++;
5525 		cnt++;
5526 	}
5527 	vm_map_unlock(map);
5528 
5529 	KASSERT(cnt <= maxcnt);
5530 
5531 	*lenp = sizeof(*kve) * cnt;
5532 	return error;
5533 }
5534 #endif
5535 
5536 
5537 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry,
5538     uvm_mapentry_addrcmp, uvm_map_addr_augment);
5539 
5540 
5541 /*
5542  * MD code: vmspace allocator setup.
5543  */
5544 
5545 #ifdef __i386__
5546 void
5547 uvm_map_setup_md(struct vm_map *map)
5548 {
5549 	vaddr_t		min, max;
5550 
5551 	min = map->min_offset;
5552 	max = map->max_offset;
5553 
5554 	/*
5555 	 * Ensure the selectors will not try to manage page 0;
5556 	 * it's too special.
5557 	 */
5558 	if (min < VMMAP_MIN_ADDR)
5559 		min = VMMAP_MIN_ADDR;
5560 
5561 #if 0	/* Cool stuff, not yet */
5562 	/* Executable code is special. */
5563 	map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR);
5564 	/* Place normal allocations beyond executable mappings. */
5565 	map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max);
5566 #else	/* Crappy stuff, for now */
5567 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5568 #endif
5569 
5570 #ifndef SMALL_KERNEL
5571 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5572 #endif /* !SMALL_KERNEL */
5573 }
5574 #elif __LP64__
5575 void
5576 uvm_map_setup_md(struct vm_map *map)
5577 {
5578 	vaddr_t		min, max;
5579 
5580 	min = map->min_offset;
5581 	max = map->max_offset;
5582 
5583 	/*
5584 	 * Ensure the selectors will not try to manage page 0;
5585 	 * it's too special.
5586 	 */
5587 	if (min < VMMAP_MIN_ADDR)
5588 		min = VMMAP_MIN_ADDR;
5589 
5590 #if 0	/* Cool stuff, not yet */
5591 	map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max);
5592 #else	/* Crappy stuff, for now */
5593 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5594 #endif
5595 
5596 #ifndef SMALL_KERNEL
5597 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5598 #endif /* !SMALL_KERNEL */
5599 }
5600 #else	/* non-i386, 32 bit */
5601 void
5602 uvm_map_setup_md(struct vm_map *map)
5603 {
5604 	vaddr_t		min, max;
5605 
5606 	min = map->min_offset;
5607 	max = map->max_offset;
5608 
5609 	/*
5610 	 * Ensure the selectors will not try to manage page 0;
5611 	 * it's too special.
5612 	 */
5613 	if (min < VMMAP_MIN_ADDR)
5614 		min = VMMAP_MIN_ADDR;
5615 
5616 #if 0	/* Cool stuff, not yet */
5617 	map->uaddr_any[3] = uaddr_pivot_create(min, max);
5618 #else	/* Crappy stuff, for now */
5619 	map->uaddr_any[0] = uaddr_rnd_create(min, max);
5620 #endif
5621 
5622 #ifndef SMALL_KERNEL
5623 	map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5624 #endif /* !SMALL_KERNEL */
5625 }
5626 #endif
5627