xref: /netbsd-src/sys/uvm/uvm_map.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /*	$NetBSD: uvm_map.c,v 1.376 2020/03/22 18:32:42 ad Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
5  * Copyright (c) 1991, 1993, The Regents of the University of California.
6  *
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * The Mach Operating System project at Carnegie-Mellon University.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)vm_map.c    8.3 (Berkeley) 1/12/94
37  * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
38  *
39  *
40  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
41  * All rights reserved.
42  *
43  * Permission to use, copy, modify and distribute this software and
44  * its documentation is hereby granted, provided that both the copyright
45  * notice and this permission notice appear in all copies of the
46  * software, derivative works or modified versions, and any portions
47  * thereof, and that both notices appear in supporting documentation.
48  *
49  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
50  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
51  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
52  *
53  * Carnegie Mellon requests users of this software to return to
54  *
55  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
56  *  School of Computer Science
57  *  Carnegie Mellon University
58  *  Pittsburgh PA 15213-3890
59  *
60  * any improvements or extensions that they make and grant Carnegie the
61  * rights to redistribute these changes.
62  */
63 
64 /*
65  * uvm_map.c: uvm map operations
66  */
67 
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.376 2020/03/22 18:32:42 ad Exp $");
70 
71 #include "opt_ddb.h"
72 #include "opt_pax.h"
73 #include "opt_uvmhist.h"
74 #include "opt_uvm.h"
75 #include "opt_sysv.h"
76 
77 #include <sys/param.h>
78 #include <sys/systm.h>
79 #include <sys/mman.h>
80 #include <sys/proc.h>
81 #include <sys/pool.h>
82 #include <sys/kernel.h>
83 #include <sys/mount.h>
84 #include <sys/pax.h>
85 #include <sys/vnode.h>
86 #include <sys/filedesc.h>
87 #include <sys/lockdebug.h>
88 #include <sys/atomic.h>
89 #include <sys/sysctl.h>
90 #ifndef __USER_VA0_IS_SAFE
91 #include <sys/kauth.h>
92 #include "opt_user_va0_disable_default.h"
93 #endif
94 
95 #include <sys/shm.h>
96 
97 #include <uvm/uvm.h>
98 #include <uvm/uvm_readahead.h>
99 
100 #if defined(DDB) || defined(DEBUGPRINT)
101 #include <uvm/uvm_ddb.h>
102 #endif
103 
104 #ifdef UVMHIST
105 #ifndef UVMHIST_MAPHIST_SIZE
106 #define UVMHIST_MAPHIST_SIZE 100
107 #endif
108 #ifndef UVMHIST_PDHIST_SIZE
109 #define UVMHIST_PDHIST_SIZE 100
110 #endif
111 static struct kern_history_ent maphistbuf[UVMHIST_MAPHIST_SIZE];
112 UVMHIST_DEFINE(maphist) = UVMHIST_INITIALIZER(maphist, maphistbuf);
113 #endif
114 
115 #if !defined(UVMMAP_COUNTERS)
116 
117 #define	UVMMAP_EVCNT_DEFINE(name)	/* nothing */
118 #define UVMMAP_EVCNT_INCR(ev)		/* nothing */
119 #define UVMMAP_EVCNT_DECR(ev)		/* nothing */
120 
121 #else /* defined(UVMMAP_NOCOUNTERS) */
122 
123 #include <sys/evcnt.h>
124 #define	UVMMAP_EVCNT_DEFINE(name) \
125 struct evcnt uvmmap_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \
126     "uvmmap", #name); \
127 EVCNT_ATTACH_STATIC(uvmmap_evcnt_##name);
128 #define	UVMMAP_EVCNT_INCR(ev)		uvmmap_evcnt_##ev.ev_count++
129 #define	UVMMAP_EVCNT_DECR(ev)		uvmmap_evcnt_##ev.ev_count--
130 
131 #endif /* defined(UVMMAP_NOCOUNTERS) */
132 
133 UVMMAP_EVCNT_DEFINE(ubackmerge)
134 UVMMAP_EVCNT_DEFINE(uforwmerge)
135 UVMMAP_EVCNT_DEFINE(ubimerge)
136 UVMMAP_EVCNT_DEFINE(unomerge)
137 UVMMAP_EVCNT_DEFINE(kbackmerge)
138 UVMMAP_EVCNT_DEFINE(kforwmerge)
139 UVMMAP_EVCNT_DEFINE(kbimerge)
140 UVMMAP_EVCNT_DEFINE(knomerge)
141 UVMMAP_EVCNT_DEFINE(map_call)
142 UVMMAP_EVCNT_DEFINE(mlk_call)
143 UVMMAP_EVCNT_DEFINE(mlk_hint)
144 UVMMAP_EVCNT_DEFINE(mlk_tree)
145 UVMMAP_EVCNT_DEFINE(mlk_treeloop)
146 
147 const char vmmapbsy[] = "vmmapbsy";
148 
149 /*
150  * cache for vmspace structures.
151  */
152 
153 static struct pool_cache uvm_vmspace_cache;
154 
155 /*
156  * cache for dynamically-allocated map entries.
157  */
158 
159 static struct pool_cache uvm_map_entry_cache;
160 
161 #ifdef PMAP_GROWKERNEL
162 /*
163  * This global represents the end of the kernel virtual address
164  * space.  If we want to exceed this, we must grow the kernel
165  * virtual address space dynamically.
166  *
167  * Note, this variable is locked by kernel_map's lock.
168  */
169 vaddr_t uvm_maxkaddr;
170 #endif
171 
172 #ifndef __USER_VA0_IS_SAFE
173 #ifndef __USER_VA0_DISABLE_DEFAULT
174 #define __USER_VA0_DISABLE_DEFAULT 1
175 #endif
176 #ifdef USER_VA0_DISABLE_DEFAULT /* kernel config option overrides */
177 #undef __USER_VA0_DISABLE_DEFAULT
178 #define __USER_VA0_DISABLE_DEFAULT USER_VA0_DISABLE_DEFAULT
179 #endif
180 int user_va0_disable = __USER_VA0_DISABLE_DEFAULT;
181 #endif
182 
183 /*
184  * macros
185  */
186 
187 /*
188  * uvm_map_align_va: round down or up virtual address
189  */
190 static __inline void
191 uvm_map_align_va(vaddr_t *vap, vsize_t align, int topdown)
192 {
193 
194 	KASSERT(powerof2(align));
195 
196 	if (align != 0 && (*vap & (align - 1)) != 0) {
197 		if (topdown)
198 			*vap = rounddown2(*vap, align);
199 		else
200 			*vap = roundup2(*vap, align);
201 	}
202 }
203 
204 /*
205  * UVM_ET_ISCOMPATIBLE: check some requirements for map entry merging
206  */
207 extern struct vm_map *pager_map;
208 
209 #define	UVM_ET_ISCOMPATIBLE(ent, type, uobj, meflags, \
210     prot, maxprot, inh, adv, wire) \
211 	((ent)->etype == (type) && \
212 	(((ent)->flags ^ (meflags)) & (UVM_MAP_NOMERGE)) == 0 && \
213 	(ent)->object.uvm_obj == (uobj) && \
214 	(ent)->protection == (prot) && \
215 	(ent)->max_protection == (maxprot) && \
216 	(ent)->inheritance == (inh) && \
217 	(ent)->advice == (adv) && \
218 	(ent)->wired_count == (wire))
219 
220 /*
221  * uvm_map_entry_link: insert entry into a map
222  *
223  * => map must be locked
224  */
225 #define uvm_map_entry_link(map, after_where, entry) do { \
226 	uvm_mapent_check(entry); \
227 	(map)->nentries++; \
228 	(entry)->prev = (after_where); \
229 	(entry)->next = (after_where)->next; \
230 	(entry)->prev->next = (entry); \
231 	(entry)->next->prev = (entry); \
232 	uvm_rb_insert((map), (entry)); \
233 } while (/*CONSTCOND*/ 0)
234 
235 /*
236  * uvm_map_entry_unlink: remove entry from a map
237  *
238  * => map must be locked
239  */
240 #define uvm_map_entry_unlink(map, entry) do { \
241 	KASSERT((entry) != (map)->first_free); \
242 	KASSERT((entry) != (map)->hint); \
243 	uvm_mapent_check(entry); \
244 	(map)->nentries--; \
245 	(entry)->next->prev = (entry)->prev; \
246 	(entry)->prev->next = (entry)->next; \
247 	uvm_rb_remove((map), (entry)); \
248 } while (/*CONSTCOND*/ 0)
249 
250 /*
251  * SAVE_HINT: saves the specified entry as the hint for future lookups.
252  *
253  * => map need not be locked.
254  */
255 #define SAVE_HINT(map, check, value) do { \
256 	if ((map)->hint == (check)) \
257 		(map)->hint = (value); \
258 } while (/*CONSTCOND*/ 0)
259 
260 /*
261  * clear_hints: ensure that hints don't point to the entry.
262  *
263  * => map must be write-locked.
264  */
265 static void
266 clear_hints(struct vm_map *map, struct vm_map_entry *ent)
267 {
268 
269 	SAVE_HINT(map, ent, ent->prev);
270 	if (map->first_free == ent) {
271 		map->first_free = ent->prev;
272 	}
273 }
274 
275 /*
276  * VM_MAP_RANGE_CHECK: check and correct range
277  *
278  * => map must at least be read locked
279  */
280 
281 #define VM_MAP_RANGE_CHECK(map, start, end) do { \
282 	if (start < vm_map_min(map))		\
283 		start = vm_map_min(map);	\
284 	if (end > vm_map_max(map))		\
285 		end = vm_map_max(map);		\
286 	if (start > end)			\
287 		start = end;			\
288 } while (/*CONSTCOND*/ 0)
289 
290 /*
291  * local prototypes
292  */
293 
294 static struct vm_map_entry *
295 		uvm_mapent_alloc(struct vm_map *, int);
296 static void	uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *);
297 static void	uvm_mapent_free(struct vm_map_entry *);
298 #if defined(DEBUG)
299 static void	_uvm_mapent_check(const struct vm_map_entry *, const char *,
300 		    int);
301 #define	uvm_mapent_check(map)	_uvm_mapent_check(map, __FILE__, __LINE__)
302 #else /* defined(DEBUG) */
303 #define	uvm_mapent_check(e)	/* nothing */
304 #endif /* defined(DEBUG) */
305 
306 static void	uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *);
307 static void	uvm_map_reference_amap(struct vm_map_entry *, int);
308 static int	uvm_map_space_avail(vaddr_t *, vsize_t, voff_t, vsize_t, int,
309 		    int, struct vm_map_entry *);
310 static void	uvm_map_unreference_amap(struct vm_map_entry *, int);
311 
312 int _uvm_map_sanity(struct vm_map *);
313 int _uvm_tree_sanity(struct vm_map *);
314 static vsize_t uvm_rb_maxgap(const struct vm_map_entry *);
315 
316 #define	ROOT_ENTRY(map)		((struct vm_map_entry *)(map)->rb_tree.rbt_root)
317 #define	LEFT_ENTRY(entry)	((struct vm_map_entry *)(entry)->rb_node.rb_left)
318 #define	RIGHT_ENTRY(entry)	((struct vm_map_entry *)(entry)->rb_node.rb_right)
319 #define	PARENT_ENTRY(map, entry) \
320 	(ROOT_ENTRY(map) == (entry) \
321 	    ? NULL : (struct vm_map_entry *)RB_FATHER(&(entry)->rb_node))
322 
323 /*
324  * These get filled in if/when SYSVSHM shared memory code is loaded
325  *
326  * We do this with function pointers rather the #ifdef SYSVSHM so the
327  * SYSVSHM code can be loaded and unloaded
328  */
329 void (*uvm_shmexit)(struct vmspace *) = NULL;
330 void (*uvm_shmfork)(struct vmspace *, struct vmspace *) = NULL;
331 
332 static int
333 uvm_map_compare_nodes(void *ctx, const void *nparent, const void *nkey)
334 {
335 	const struct vm_map_entry *eparent = nparent;
336 	const struct vm_map_entry *ekey = nkey;
337 
338 	KASSERT(eparent->start < ekey->start || eparent->start >= ekey->end);
339 	KASSERT(ekey->start < eparent->start || ekey->start >= eparent->end);
340 
341 	if (eparent->start < ekey->start)
342 		return -1;
343 	if (eparent->end >= ekey->start)
344 		return 1;
345 	return 0;
346 }
347 
348 static int
349 uvm_map_compare_key(void *ctx, const void *nparent, const void *vkey)
350 {
351 	const struct vm_map_entry *eparent = nparent;
352 	const vaddr_t va = *(const vaddr_t *) vkey;
353 
354 	if (eparent->start < va)
355 		return -1;
356 	if (eparent->end >= va)
357 		return 1;
358 	return 0;
359 }
360 
361 static const rb_tree_ops_t uvm_map_tree_ops = {
362 	.rbto_compare_nodes = uvm_map_compare_nodes,
363 	.rbto_compare_key = uvm_map_compare_key,
364 	.rbto_node_offset = offsetof(struct vm_map_entry, rb_node),
365 	.rbto_context = NULL
366 };
367 
368 /*
369  * uvm_rb_gap: return the gap size between our entry and next entry.
370  */
371 static inline vsize_t
372 uvm_rb_gap(const struct vm_map_entry *entry)
373 {
374 
375 	KASSERT(entry->next != NULL);
376 	return entry->next->start - entry->end;
377 }
378 
379 static vsize_t
380 uvm_rb_maxgap(const struct vm_map_entry *entry)
381 {
382 	struct vm_map_entry *child;
383 	vsize_t maxgap = entry->gap;
384 
385 	/*
386 	 * We need maxgap to be the largest gap of us or any of our
387 	 * descendents.  Since each of our children's maxgap is the
388 	 * cached value of their largest gap of themselves or their
389 	 * descendents, we can just use that value and avoid recursing
390 	 * down the tree to calculate it.
391 	 */
392 	if ((child = LEFT_ENTRY(entry)) != NULL && maxgap < child->maxgap)
393 		maxgap = child->maxgap;
394 
395 	if ((child = RIGHT_ENTRY(entry)) != NULL && maxgap < child->maxgap)
396 		maxgap = child->maxgap;
397 
398 	return maxgap;
399 }
400 
401 static void
402 uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry)
403 {
404 	struct vm_map_entry *parent;
405 
406 	KASSERT(entry->gap == uvm_rb_gap(entry));
407 	entry->maxgap = uvm_rb_maxgap(entry);
408 
409 	while ((parent = PARENT_ENTRY(map, entry)) != NULL) {
410 		struct vm_map_entry *brother;
411 		vsize_t maxgap = parent->gap;
412 		unsigned int which;
413 
414 		KDASSERT(parent->gap == uvm_rb_gap(parent));
415 		if (maxgap < entry->maxgap)
416 			maxgap = entry->maxgap;
417 		/*
418 		 * Since we work towards the root, we know entry's maxgap
419 		 * value is OK, but its brothers may now be out-of-date due
420 		 * to rebalancing.  So refresh it.
421 		 */
422 		which = RB_POSITION(&entry->rb_node) ^ RB_DIR_OTHER;
423 		brother = (struct vm_map_entry *)parent->rb_node.rb_nodes[which];
424 		if (brother != NULL) {
425 			KDASSERT(brother->gap == uvm_rb_gap(brother));
426 			brother->maxgap = uvm_rb_maxgap(brother);
427 			if (maxgap < brother->maxgap)
428 				maxgap = brother->maxgap;
429 		}
430 
431 		parent->maxgap = maxgap;
432 		entry = parent;
433 	}
434 }
435 
436 static void
437 uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry)
438 {
439 	struct vm_map_entry *ret __diagused;
440 
441 	entry->gap = entry->maxgap = uvm_rb_gap(entry);
442 	if (entry->prev != &map->header)
443 		entry->prev->gap = uvm_rb_gap(entry->prev);
444 
445 	ret = rb_tree_insert_node(&map->rb_tree, entry);
446 	KASSERTMSG(ret == entry,
447 	    "uvm_rb_insert: map %p: duplicate entry %p", map, ret);
448 
449 	/*
450 	 * If the previous entry is not our immediate left child, then it's an
451 	 * ancestor and will be fixed up on the way to the root.  We don't
452 	 * have to check entry->prev against &map->header since &map->header
453 	 * will never be in the tree.
454 	 */
455 	uvm_rb_fixup(map,
456 	    LEFT_ENTRY(entry) == entry->prev ? entry->prev : entry);
457 }
458 
459 static void
460 uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry)
461 {
462 	struct vm_map_entry *prev_parent = NULL, *next_parent = NULL;
463 
464 	/*
465 	 * If we are removing an interior node, then an adjacent node will
466 	 * be used to replace its position in the tree.  Therefore we will
467 	 * need to fixup the tree starting at the parent of the replacement
468 	 * node.  So record their parents for later use.
469 	 */
470 	if (entry->prev != &map->header)
471 		prev_parent = PARENT_ENTRY(map, entry->prev);
472 	if (entry->next != &map->header)
473 		next_parent = PARENT_ENTRY(map, entry->next);
474 
475 	rb_tree_remove_node(&map->rb_tree, entry);
476 
477 	/*
478 	 * If the previous node has a new parent, fixup the tree starting
479 	 * at the previous node's old parent.
480 	 */
481 	if (entry->prev != &map->header) {
482 		/*
483 		 * Update the previous entry's gap due to our absence.
484 		 */
485 		entry->prev->gap = uvm_rb_gap(entry->prev);
486 		uvm_rb_fixup(map, entry->prev);
487 		if (prev_parent != NULL
488 		    && prev_parent != entry
489 		    && prev_parent != PARENT_ENTRY(map, entry->prev))
490 			uvm_rb_fixup(map, prev_parent);
491 	}
492 
493 	/*
494 	 * If the next node has a new parent, fixup the tree starting
495 	 * at the next node's old parent.
496 	 */
497 	if (entry->next != &map->header) {
498 		uvm_rb_fixup(map, entry->next);
499 		if (next_parent != NULL
500 		    && next_parent != entry
501 		    && next_parent != PARENT_ENTRY(map, entry->next))
502 			uvm_rb_fixup(map, next_parent);
503 	}
504 }
505 
506 #if defined(DEBUG)
507 int uvm_debug_check_map = 0;
508 int uvm_debug_check_rbtree = 0;
509 #define uvm_map_check(map, name) \
510 	_uvm_map_check((map), (name), __FILE__, __LINE__)
511 static void
512 _uvm_map_check(struct vm_map *map, const char *name,
513     const char *file, int line)
514 {
515 
516 	if ((uvm_debug_check_map && _uvm_map_sanity(map)) ||
517 	    (uvm_debug_check_rbtree && _uvm_tree_sanity(map))) {
518 		panic("uvm_map_check failed: \"%s\" map=%p (%s:%d)",
519 		    name, map, file, line);
520 	}
521 }
522 #else /* defined(DEBUG) */
523 #define uvm_map_check(map, name)	/* nothing */
524 #endif /* defined(DEBUG) */
525 
526 #if defined(DEBUG) || defined(DDB)
527 int
528 _uvm_map_sanity(struct vm_map *map)
529 {
530 	bool first_free_found = false;
531 	bool hint_found = false;
532 	const struct vm_map_entry *e;
533 	struct vm_map_entry *hint = map->hint;
534 
535 	e = &map->header;
536 	for (;;) {
537 		if (map->first_free == e) {
538 			first_free_found = true;
539 		} else if (!first_free_found && e->next->start > e->end) {
540 			printf("first_free %p should be %p\n",
541 			    map->first_free, e);
542 			return -1;
543 		}
544 		if (hint == e) {
545 			hint_found = true;
546 		}
547 
548 		e = e->next;
549 		if (e == &map->header) {
550 			break;
551 		}
552 	}
553 	if (!first_free_found) {
554 		printf("stale first_free\n");
555 		return -1;
556 	}
557 	if (!hint_found) {
558 		printf("stale hint\n");
559 		return -1;
560 	}
561 	return 0;
562 }
563 
564 int
565 _uvm_tree_sanity(struct vm_map *map)
566 {
567 	struct vm_map_entry *tmp, *trtmp;
568 	int n = 0, i = 1;
569 
570 	for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) {
571 		if (tmp->gap != uvm_rb_gap(tmp)) {
572 			printf("%d/%d gap %#lx != %#lx %s\n",
573 			    n + 1, map->nentries,
574 			    (ulong)tmp->gap, (ulong)uvm_rb_gap(tmp),
575 			    tmp->next == &map->header ? "(last)" : "");
576 			goto error;
577 		}
578 		/*
579 		 * If any entries are out of order, tmp->gap will be unsigned
580 		 * and will likely exceed the size of the map.
581 		 */
582 		if (tmp->gap >= vm_map_max(map) - vm_map_min(map)) {
583 			printf("too large gap %zu\n", (size_t)tmp->gap);
584 			goto error;
585 		}
586 		n++;
587 	}
588 
589 	if (n != map->nentries) {
590 		printf("nentries: %d vs %d\n", n, map->nentries);
591 		goto error;
592 	}
593 
594 	trtmp = NULL;
595 	for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) {
596 		if (tmp->maxgap != uvm_rb_maxgap(tmp)) {
597 			printf("maxgap %#lx != %#lx\n",
598 			    (ulong)tmp->maxgap,
599 			    (ulong)uvm_rb_maxgap(tmp));
600 			goto error;
601 		}
602 		if (trtmp != NULL && trtmp->start >= tmp->start) {
603 			printf("corrupt: 0x%"PRIxVADDR"x >= 0x%"PRIxVADDR"x\n",
604 			    trtmp->start, tmp->start);
605 			goto error;
606 		}
607 
608 		trtmp = tmp;
609 	}
610 
611 	for (tmp = map->header.next; tmp != &map->header;
612 	    tmp = tmp->next, i++) {
613 		trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_LEFT);
614 		if (trtmp == NULL)
615 			trtmp = &map->header;
616 		if (tmp->prev != trtmp) {
617 			printf("lookup: %d: %p->prev=%p: %p\n",
618 			    i, tmp, tmp->prev, trtmp);
619 			goto error;
620 		}
621 		trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_RIGHT);
622 		if (trtmp == NULL)
623 			trtmp = &map->header;
624 		if (tmp->next != trtmp) {
625 			printf("lookup: %d: %p->next=%p: %p\n",
626 			    i, tmp, tmp->next, trtmp);
627 			goto error;
628 		}
629 		trtmp = rb_tree_find_node(&map->rb_tree, &tmp->start);
630 		if (trtmp != tmp) {
631 			printf("lookup: %d: %p - %p: %p\n", i, tmp, trtmp,
632 			    PARENT_ENTRY(map, tmp));
633 			goto error;
634 		}
635 	}
636 
637 	return (0);
638  error:
639 	return (-1);
640 }
641 #endif /* defined(DEBUG) || defined(DDB) */
642 
643 /*
644  * vm_map_lock: acquire an exclusive (write) lock on a map.
645  *
646  * => The locking protocol provides for guaranteed upgrade from shared ->
647  *    exclusive by whichever thread currently has the map marked busy.
648  *    See "LOCKING PROTOCOL NOTES" in uvm_map.h.  This is horrible; among
649  *    other problems, it defeats any fairness guarantees provided by RW
650  *    locks.
651  */
652 
653 void
654 vm_map_lock(struct vm_map *map)
655 {
656 
657 	for (;;) {
658 		rw_enter(&map->lock, RW_WRITER);
659 		if (map->busy == NULL || map->busy == curlwp) {
660 			break;
661 		}
662 		mutex_enter(&map->misc_lock);
663 		rw_exit(&map->lock);
664 		if (map->busy != NULL) {
665 			cv_wait(&map->cv, &map->misc_lock);
666 		}
667 		mutex_exit(&map->misc_lock);
668 	}
669 	map->timestamp++;
670 }
671 
672 /*
673  * vm_map_lock_try: try to lock a map, failing if it is already locked.
674  */
675 
676 bool
677 vm_map_lock_try(struct vm_map *map)
678 {
679 
680 	if (!rw_tryenter(&map->lock, RW_WRITER)) {
681 		return false;
682 	}
683 	if (map->busy != NULL) {
684 		rw_exit(&map->lock);
685 		return false;
686 	}
687 	map->timestamp++;
688 	return true;
689 }
690 
691 /*
692  * vm_map_unlock: release an exclusive lock on a map.
693  */
694 
695 void
696 vm_map_unlock(struct vm_map *map)
697 {
698 
699 	KASSERT(rw_write_held(&map->lock));
700 	KASSERT(map->busy == NULL || map->busy == curlwp);
701 	rw_exit(&map->lock);
702 }
703 
704 /*
705  * vm_map_unbusy: mark the map as unbusy, and wake any waiters that
706  *     want an exclusive lock.
707  */
708 
709 void
710 vm_map_unbusy(struct vm_map *map)
711 {
712 
713 	KASSERT(map->busy == curlwp);
714 
715 	/*
716 	 * Safe to clear 'busy' and 'waiters' with only a read lock held:
717 	 *
718 	 * o they can only be set with a write lock held
719 	 * o writers are blocked out with a read or write hold
720 	 * o at any time, only one thread owns the set of values
721 	 */
722 	mutex_enter(&map->misc_lock);
723 	map->busy = NULL;
724 	cv_broadcast(&map->cv);
725 	mutex_exit(&map->misc_lock);
726 }
727 
728 /*
729  * vm_map_lock_read: acquire a shared (read) lock on a map.
730  */
731 
732 void
733 vm_map_lock_read(struct vm_map *map)
734 {
735 
736 	rw_enter(&map->lock, RW_READER);
737 }
738 
739 /*
740  * vm_map_unlock_read: release a shared lock on a map.
741  */
742 
743 void
744 vm_map_unlock_read(struct vm_map *map)
745 {
746 
747 	rw_exit(&map->lock);
748 }
749 
750 /*
751  * vm_map_busy: mark a map as busy.
752  *
753  * => the caller must hold the map write locked
754  */
755 
756 void
757 vm_map_busy(struct vm_map *map)
758 {
759 
760 	KASSERT(rw_write_held(&map->lock));
761 	KASSERT(map->busy == NULL);
762 
763 	map->busy = curlwp;
764 }
765 
766 /*
767  * vm_map_locked_p: return true if the map is write locked.
768  *
769  * => only for debug purposes like KASSERTs.
770  * => should not be used to verify that a map is not locked.
771  */
772 
773 bool
774 vm_map_locked_p(struct vm_map *map)
775 {
776 
777 	return rw_write_held(&map->lock);
778 }
779 
780 /*
781  * uvm_mapent_alloc: allocate a map entry
782  */
783 
784 static struct vm_map_entry *
785 uvm_mapent_alloc(struct vm_map *map, int flags)
786 {
787 	struct vm_map_entry *me;
788 	int pflags = (flags & UVM_FLAG_NOWAIT) ? PR_NOWAIT : PR_WAITOK;
789 	UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist);
790 
791 	me = pool_cache_get(&uvm_map_entry_cache, pflags);
792 	if (__predict_false(me == NULL)) {
793 		return NULL;
794 	}
795 	me->flags = 0;
796 
797 	UVMHIST_LOG(maphist, "<- new entry=%#jx [kentry=%jd]", (uintptr_t)me,
798 	    (map == kernel_map), 0, 0);
799 	return me;
800 }
801 
802 /*
803  * uvm_mapent_free: free map entry
804  */
805 
806 static void
807 uvm_mapent_free(struct vm_map_entry *me)
808 {
809 	UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist);
810 
811 	UVMHIST_LOG(maphist,"<- freeing map entry=%#jx [flags=%jd]",
812 		(uintptr_t)me, me->flags, 0, 0);
813 	pool_cache_put(&uvm_map_entry_cache, me);
814 }
815 
816 /*
817  * uvm_mapent_copy: copy a map entry, preserving flags
818  */
819 
820 static inline void
821 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
822 {
823 
824 	memcpy(dst, src, sizeof(*dst));
825 	dst->flags = 0;
826 }
827 
828 #if defined(DEBUG)
829 static void
830 _uvm_mapent_check(const struct vm_map_entry *entry, const char *file, int line)
831 {
832 
833 	if (entry->start >= entry->end) {
834 		goto bad;
835 	}
836 	if (UVM_ET_ISOBJ(entry)) {
837 		if (entry->object.uvm_obj == NULL) {
838 			goto bad;
839 		}
840 	} else if (UVM_ET_ISSUBMAP(entry)) {
841 		if (entry->object.sub_map == NULL) {
842 			goto bad;
843 		}
844 	} else {
845 		if (entry->object.uvm_obj != NULL ||
846 		    entry->object.sub_map != NULL) {
847 			goto bad;
848 		}
849 	}
850 	if (!UVM_ET_ISOBJ(entry)) {
851 		if (entry->offset != 0) {
852 			goto bad;
853 		}
854 	}
855 
856 	return;
857 
858 bad:
859 	panic("%s: bad entry %p (%s:%d)", __func__, entry, file, line);
860 }
861 #endif /* defined(DEBUG) */
862 
863 /*
864  * uvm_map_entry_unwire: unwire a map entry
865  *
866  * => map should be locked by caller
867  */
868 
869 static inline void
870 uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry)
871 {
872 
873 	entry->wired_count = 0;
874 	uvm_fault_unwire_locked(map, entry->start, entry->end);
875 }
876 
877 
878 /*
879  * wrapper for calling amap_ref()
880  */
881 static inline void
882 uvm_map_reference_amap(struct vm_map_entry *entry, int flags)
883 {
884 
885 	amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff,
886 	    (entry->end - entry->start) >> PAGE_SHIFT, flags);
887 }
888 
889 
890 /*
891  * wrapper for calling amap_unref()
892  */
893 static inline void
894 uvm_map_unreference_amap(struct vm_map_entry *entry, int flags)
895 {
896 
897 	amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff,
898 	    (entry->end - entry->start) >> PAGE_SHIFT, flags);
899 }
900 
901 
902 /*
903  * uvm_map_init: init mapping system at boot time.
904  */
905 
906 void
907 uvm_map_init(void)
908 {
909 #if defined(UVMHIST)
910 	static struct kern_history_ent pdhistbuf[UVMHIST_PDHIST_SIZE];
911 #endif
912 
913 	/*
914 	 * first, init logging system.
915 	 */
916 
917 	UVMHIST_FUNC("uvm_map_init");
918 	UVMHIST_LINK_STATIC(maphist);
919 	UVMHIST_INIT_STATIC(pdhist, pdhistbuf);
920 	UVMHIST_CALLED(maphist);
921 	UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0);
922 
923 	/*
924 	 * initialize the global lock for kernel map entry.
925 	 */
926 
927 	mutex_init(&uvm_kentry_lock, MUTEX_DRIVER, IPL_VM);
928 }
929 
930 /*
931  * uvm_map_init_caches: init mapping system caches.
932  */
933 void
934 uvm_map_init_caches(void)
935 {
936 	/*
937 	 * initialize caches.
938 	 */
939 
940 	pool_cache_bootstrap(&uvm_map_entry_cache, sizeof(struct vm_map_entry),
941 	    coherency_unit, 0, 0, "vmmpepl", NULL, IPL_NONE, NULL, NULL, NULL);
942 	pool_cache_bootstrap(&uvm_vmspace_cache, sizeof(struct vmspace),
943 	    0, 0, 0, "vmsppl", NULL, IPL_NONE, NULL, NULL, NULL);
944 }
945 
946 /*
947  * clippers
948  */
949 
950 /*
951  * uvm_mapent_splitadj: adjust map entries for splitting, after uvm_mapent_copy.
952  */
953 
954 static void
955 uvm_mapent_splitadj(struct vm_map_entry *entry1, struct vm_map_entry *entry2,
956     vaddr_t splitat)
957 {
958 	vaddr_t adj;
959 
960 	KASSERT(entry1->start < splitat);
961 	KASSERT(splitat < entry1->end);
962 
963 	adj = splitat - entry1->start;
964 	entry1->end = entry2->start = splitat;
965 
966 	if (entry1->aref.ar_amap) {
967 		amap_splitref(&entry1->aref, &entry2->aref, adj);
968 	}
969 	if (UVM_ET_ISSUBMAP(entry1)) {
970 		/* ... unlikely to happen, but play it safe */
971 		 uvm_map_reference(entry1->object.sub_map);
972 	} else if (UVM_ET_ISOBJ(entry1)) {
973 		KASSERT(entry1->object.uvm_obj != NULL); /* suppress coverity */
974 		entry2->offset += adj;
975 		if (entry1->object.uvm_obj->pgops &&
976 		    entry1->object.uvm_obj->pgops->pgo_reference)
977 			entry1->object.uvm_obj->pgops->pgo_reference(
978 			    entry1->object.uvm_obj);
979 	}
980 }
981 
982 /*
983  * uvm_map_clip_start: ensure that the entry begins at or after
984  *	the starting address, if it doesn't we split the entry.
985  *
986  * => caller should use UVM_MAP_CLIP_START macro rather than calling
987  *    this directly
988  * => map must be locked by caller
989  */
990 
991 void
992 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry,
993     vaddr_t start)
994 {
995 	struct vm_map_entry *new_entry;
996 
997 	/* uvm_map_simplify_entry(map, entry); */ /* XXX */
998 
999 	uvm_map_check(map, "clip_start entry");
1000 	uvm_mapent_check(entry);
1001 
1002 	/*
1003 	 * Split off the front portion.  note that we must insert the new
1004 	 * entry BEFORE this one, so that this entry has the specified
1005 	 * starting address.
1006 	 */
1007 	new_entry = uvm_mapent_alloc(map, 0);
1008 	uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
1009 	uvm_mapent_splitadj(new_entry, entry, start);
1010 	uvm_map_entry_link(map, entry->prev, new_entry);
1011 
1012 	uvm_map_check(map, "clip_start leave");
1013 }
1014 
1015 /*
1016  * uvm_map_clip_end: ensure that the entry ends at or before
1017  *	the ending address, if it does't we split the reference
1018  *
1019  * => caller should use UVM_MAP_CLIP_END macro rather than calling
1020  *    this directly
1021  * => map must be locked by caller
1022  */
1023 
1024 void
1025 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end)
1026 {
1027 	struct vm_map_entry *new_entry;
1028 
1029 	uvm_map_check(map, "clip_end entry");
1030 	uvm_mapent_check(entry);
1031 
1032 	/*
1033 	 *	Create a new entry and insert it
1034 	 *	AFTER the specified entry
1035 	 */
1036 	new_entry = uvm_mapent_alloc(map, 0);
1037 	uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
1038 	uvm_mapent_splitadj(entry, new_entry, end);
1039 	uvm_map_entry_link(map, entry, new_entry);
1040 
1041 	uvm_map_check(map, "clip_end leave");
1042 }
1043 
1044 /*
1045  *   M A P   -   m a i n   e n t r y   p o i n t
1046  */
1047 /*
1048  * uvm_map: establish a valid mapping in a map
1049  *
1050  * => assume startp is page aligned.
1051  * => assume size is a multiple of PAGE_SIZE.
1052  * => assume sys_mmap provides enough of a "hint" to have us skip
1053  *	over text/data/bss area.
1054  * => map must be unlocked (we will lock it)
1055  * => <uobj,uoffset> value meanings (4 cases):
1056  *	 [1] <NULL,uoffset>		== uoffset is a hint for PMAP_PREFER
1057  *	 [2] <NULL,UVM_UNKNOWN_OFFSET>	== don't PMAP_PREFER
1058  *	 [3] <uobj,uoffset>		== normal mapping
1059  *	 [4] <uobj,UVM_UNKNOWN_OFFSET>	== uvm_map finds offset based on VA
1060  *
1061  *    case [4] is for kernel mappings where we don't know the offset until
1062  *    we've found a virtual address.   note that kernel object offsets are
1063  *    always relative to vm_map_min(kernel_map).
1064  *
1065  * => if `align' is non-zero, we align the virtual address to the specified
1066  *	alignment.
1067  *	this is provided as a mechanism for large pages.
1068  *
1069  * => XXXCDC: need way to map in external amap?
1070  */
1071 
1072 int
1073 uvm_map(struct vm_map *map, vaddr_t *startp /* IN/OUT */, vsize_t size,
1074     struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags)
1075 {
1076 	struct uvm_map_args args;
1077 	struct vm_map_entry *new_entry;
1078 	int error;
1079 
1080 	KASSERT((size & PAGE_MASK) == 0);
1081 	KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0);
1082 
1083 	/*
1084 	 * for pager_map, allocate the new entry first to avoid sleeping
1085 	 * for memory while we have the map locked.
1086 	 */
1087 
1088 	new_entry = NULL;
1089 	if (map == pager_map) {
1090 		new_entry = uvm_mapent_alloc(map, (flags & UVM_FLAG_NOWAIT));
1091 		if (__predict_false(new_entry == NULL))
1092 			return ENOMEM;
1093 	}
1094 	if (map == pager_map)
1095 		flags |= UVM_FLAG_NOMERGE;
1096 
1097 	error = uvm_map_prepare(map, *startp, size, uobj, uoffset, align,
1098 	    flags, &args);
1099 	if (!error) {
1100 		error = uvm_map_enter(map, &args, new_entry);
1101 		*startp = args.uma_start;
1102 	} else if (new_entry) {
1103 		uvm_mapent_free(new_entry);
1104 	}
1105 
1106 #if defined(DEBUG)
1107 	if (!error && VM_MAP_IS_KERNEL(map) && (flags & UVM_FLAG_NOWAIT) == 0) {
1108 		uvm_km_check_empty(map, *startp, *startp + size);
1109 	}
1110 #endif /* defined(DEBUG) */
1111 
1112 	return error;
1113 }
1114 
1115 /*
1116  * uvm_map_prepare:
1117  *
1118  * called with map unlocked.
1119  * on success, returns the map locked.
1120  */
1121 
1122 int
1123 uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size,
1124     struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags,
1125     struct uvm_map_args *args)
1126 {
1127 	struct vm_map_entry *prev_entry;
1128 	vm_prot_t prot = UVM_PROTECTION(flags);
1129 	vm_prot_t maxprot = UVM_MAXPROTECTION(flags);
1130 
1131 	UVMHIST_FUNC("uvm_map_prepare");
1132 	UVMHIST_CALLED(maphist);
1133 
1134 	UVMHIST_LOG(maphist, "(map=%#jx, start=%#jx, size=%ju, flags=%#jx)",
1135 	    (uintptr_t)map, start, size, flags);
1136 	UVMHIST_LOG(maphist, "  uobj/offset %#jx/%jd", (uintptr_t)uobj,
1137 	    uoffset,0,0);
1138 
1139 	/*
1140 	 * detect a popular device driver bug.
1141 	 */
1142 
1143 	KASSERT(doing_shutdown || curlwp != NULL);
1144 
1145 	/*
1146 	 * zero-sized mapping doesn't make any sense.
1147 	 */
1148 	KASSERT(size > 0);
1149 
1150 	KASSERT((~flags & (UVM_FLAG_NOWAIT | UVM_FLAG_WAITVA)) != 0);
1151 
1152 	uvm_map_check(map, "map entry");
1153 
1154 	/*
1155 	 * check sanity of protection code
1156 	 */
1157 
1158 	if ((prot & maxprot) != prot) {
1159 		UVMHIST_LOG(maphist, "<- prot. failure:  prot=%#jx, max=%#jx",
1160 		prot, maxprot,0,0);
1161 		return EACCES;
1162 	}
1163 
1164 	/*
1165 	 * figure out where to put new VM range
1166 	 */
1167 retry:
1168 	if (vm_map_lock_try(map) == false) {
1169 		if ((flags & UVM_FLAG_TRYLOCK) != 0) {
1170 			return EAGAIN;
1171 		}
1172 		vm_map_lock(map); /* could sleep here */
1173 	}
1174 	if (flags & UVM_FLAG_UNMAP) {
1175 		KASSERT(flags & UVM_FLAG_FIXED);
1176 		KASSERT((flags & UVM_FLAG_NOWAIT) == 0);
1177 
1178 		/*
1179 		 * Set prev_entry to what it will need to be after any existing
1180 		 * entries are removed later in uvm_map_enter().
1181 		 */
1182 
1183 		if (uvm_map_lookup_entry(map, start, &prev_entry)) {
1184 			if (start == prev_entry->start)
1185 				prev_entry = prev_entry->prev;
1186 			else
1187 				UVM_MAP_CLIP_END(map, prev_entry, start);
1188 			SAVE_HINT(map, map->hint, prev_entry);
1189 		}
1190 	} else {
1191 		prev_entry = uvm_map_findspace(map, start, size, &start,
1192 		    uobj, uoffset, align, flags);
1193 	}
1194 	if (prev_entry == NULL) {
1195 		unsigned int timestamp;
1196 
1197 		timestamp = map->timestamp;
1198 		UVMHIST_LOG(maphist,"waiting va timestamp=%#jx",
1199 			    timestamp,0,0,0);
1200 		map->flags |= VM_MAP_WANTVA;
1201 		vm_map_unlock(map);
1202 
1203 		/*
1204 		 * try to reclaim kva and wait until someone does unmap.
1205 		 * fragile locking here, so we awaken every second to
1206 		 * recheck the condition.
1207 		 */
1208 
1209 		mutex_enter(&map->misc_lock);
1210 		while ((map->flags & VM_MAP_WANTVA) != 0 &&
1211 		   map->timestamp == timestamp) {
1212 			if ((flags & UVM_FLAG_WAITVA) == 0) {
1213 				mutex_exit(&map->misc_lock);
1214 				UVMHIST_LOG(maphist,
1215 				    "<- uvm_map_findspace failed!", 0,0,0,0);
1216 				return ENOMEM;
1217 			} else {
1218 				cv_timedwait(&map->cv, &map->misc_lock, hz);
1219 			}
1220 		}
1221 		mutex_exit(&map->misc_lock);
1222 		goto retry;
1223 	}
1224 
1225 #ifdef PMAP_GROWKERNEL
1226 	/*
1227 	 * If the kernel pmap can't map the requested space,
1228 	 * then allocate more resources for it.
1229 	 */
1230 	if (map == kernel_map && uvm_maxkaddr < (start + size))
1231 		uvm_maxkaddr = pmap_growkernel(start + size);
1232 #endif
1233 
1234 	UVMMAP_EVCNT_INCR(map_call);
1235 
1236 	/*
1237 	 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER
1238 	 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET.   in
1239 	 * either case we want to zero it  before storing it in the map entry
1240 	 * (because it looks strange and confusing when debugging...)
1241 	 *
1242 	 * if uobj is not null
1243 	 *   if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping
1244 	 *      and we do not need to change uoffset.
1245 	 *   if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset
1246 	 *      now (based on the starting address of the map).   this case is
1247 	 *      for kernel object mappings where we don't know the offset until
1248 	 *      the virtual address is found (with uvm_map_findspace).   the
1249 	 *      offset is the distance we are from the start of the map.
1250 	 */
1251 
1252 	if (uobj == NULL) {
1253 		uoffset = 0;
1254 	} else {
1255 		if (uoffset == UVM_UNKNOWN_OFFSET) {
1256 			KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
1257 			uoffset = start - vm_map_min(kernel_map);
1258 		}
1259 	}
1260 
1261 	args->uma_flags = flags;
1262 	args->uma_prev = prev_entry;
1263 	args->uma_start = start;
1264 	args->uma_size = size;
1265 	args->uma_uobj = uobj;
1266 	args->uma_uoffset = uoffset;
1267 
1268 	UVMHIST_LOG(maphist, "<- done!", 0,0,0,0);
1269 	return 0;
1270 }
1271 
1272 /*
1273  * uvm_map_enter:
1274  *
1275  * called with map locked.
1276  * unlock the map before returning.
1277  */
1278 
1279 int
1280 uvm_map_enter(struct vm_map *map, const struct uvm_map_args *args,
1281     struct vm_map_entry *new_entry)
1282 {
1283 	struct vm_map_entry *prev_entry = args->uma_prev;
1284 	struct vm_map_entry *dead = NULL, *dead_entries = NULL;
1285 
1286 	const uvm_flag_t flags = args->uma_flags;
1287 	const vm_prot_t prot = UVM_PROTECTION(flags);
1288 	const vm_prot_t maxprot = UVM_MAXPROTECTION(flags);
1289 	const vm_inherit_t inherit = UVM_INHERIT(flags);
1290 	const int amapwaitflag = (flags & UVM_FLAG_NOWAIT) ?
1291 	    AMAP_EXTEND_NOWAIT : 0;
1292 	const int advice = UVM_ADVICE(flags);
1293 
1294 	vaddr_t start = args->uma_start;
1295 	vsize_t size = args->uma_size;
1296 	struct uvm_object *uobj = args->uma_uobj;
1297 	voff_t uoffset = args->uma_uoffset;
1298 
1299 	const int kmap = (vm_map_pmap(map) == pmap_kernel());
1300 	int merged = 0;
1301 	int error;
1302 	int newetype;
1303 
1304 	UVMHIST_FUNC("uvm_map_enter");
1305 	UVMHIST_CALLED(maphist);
1306 
1307 	UVMHIST_LOG(maphist, "(map=%#jx, start=%#jx, size=%ju, flags=%#jx)",
1308 	    (uintptr_t)map, start, size, flags);
1309 	UVMHIST_LOG(maphist, "  uobj/offset %#jx/%jd", (uintptr_t)uobj,
1310 	    uoffset,0,0);
1311 
1312 	KASSERT(map->hint == prev_entry); /* bimerge case assumes this */
1313 	KASSERT(vm_map_locked_p(map));
1314 	KASSERT((flags & (UVM_FLAG_NOWAIT | UVM_FLAG_UNMAP)) !=
1315 		(UVM_FLAG_NOWAIT | UVM_FLAG_UNMAP));
1316 
1317 	if (uobj)
1318 		newetype = UVM_ET_OBJ;
1319 	else
1320 		newetype = 0;
1321 
1322 	if (flags & UVM_FLAG_COPYONW) {
1323 		newetype |= UVM_ET_COPYONWRITE;
1324 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1325 			newetype |= UVM_ET_NEEDSCOPY;
1326 	}
1327 
1328 	/*
1329 	 * For mappings with unmap, remove any old entries now.  Adding the new
1330 	 * entry cannot fail because that can only happen if UVM_FLAG_NOWAIT
1331 	 * is set, and we do not support nowait and unmap together.
1332 	 */
1333 
1334 	if (flags & UVM_FLAG_UNMAP) {
1335 		KASSERT(flags & UVM_FLAG_FIXED);
1336 		uvm_unmap_remove(map, start, start + size, &dead_entries, 0);
1337 #ifdef DEBUG
1338 		struct vm_map_entry *tmp_entry __diagused;
1339 		bool rv __diagused;
1340 
1341 		rv = uvm_map_lookup_entry(map, start, &tmp_entry);
1342 		KASSERT(!rv);
1343 		KASSERTMSG(prev_entry == tmp_entry,
1344 			   "args %p prev_entry %p tmp_entry %p",
1345 			   args, prev_entry, tmp_entry);
1346 #endif
1347 		SAVE_HINT(map, map->hint, prev_entry);
1348 	}
1349 
1350 	/*
1351 	 * try and insert in map by extending previous entry, if possible.
1352 	 * XXX: we don't try and pull back the next entry.   might be useful
1353 	 * for a stack, but we are currently allocating our stack in advance.
1354 	 */
1355 
1356 	if (flags & UVM_FLAG_NOMERGE)
1357 		goto nomerge;
1358 
1359 	if (prev_entry->end == start &&
1360 	    prev_entry != &map->header &&
1361 	    UVM_ET_ISCOMPATIBLE(prev_entry, newetype, uobj, 0,
1362 	    prot, maxprot, inherit, advice, 0)) {
1363 
1364 		if (uobj && prev_entry->offset +
1365 		    (prev_entry->end - prev_entry->start) != uoffset)
1366 			goto forwardmerge;
1367 
1368 		/*
1369 		 * can't extend a shared amap.  note: no need to lock amap to
1370 		 * look at refs since we don't care about its exact value.
1371 		 * if it is one (i.e. we have only reference) it will stay there
1372 		 */
1373 
1374 		if (prev_entry->aref.ar_amap &&
1375 		    amap_refs(prev_entry->aref.ar_amap) != 1) {
1376 			goto forwardmerge;
1377 		}
1378 
1379 		if (prev_entry->aref.ar_amap) {
1380 			error = amap_extend(prev_entry, size,
1381 			    amapwaitflag | AMAP_EXTEND_FORWARDS);
1382 			if (error)
1383 				goto nomerge;
1384 		}
1385 
1386 		if (kmap) {
1387 			UVMMAP_EVCNT_INCR(kbackmerge);
1388 		} else {
1389 			UVMMAP_EVCNT_INCR(ubackmerge);
1390 		}
1391 		UVMHIST_LOG(maphist,"  starting back merge", 0, 0, 0, 0);
1392 
1393 		/*
1394 		 * drop our reference to uobj since we are extending a reference
1395 		 * that we already have (the ref count can not drop to zero).
1396 		 */
1397 
1398 		if (uobj && uobj->pgops->pgo_detach)
1399 			uobj->pgops->pgo_detach(uobj);
1400 
1401 		/*
1402 		 * Now that we've merged the entries, note that we've grown
1403 		 * and our gap has shrunk.  Then fix the tree.
1404 		 */
1405 		prev_entry->end += size;
1406 		prev_entry->gap -= size;
1407 		uvm_rb_fixup(map, prev_entry);
1408 
1409 		uvm_map_check(map, "map backmerged");
1410 
1411 		UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0);
1412 		merged++;
1413 	}
1414 
1415 forwardmerge:
1416 	if (prev_entry->next->start == (start + size) &&
1417 	    prev_entry->next != &map->header &&
1418 	    UVM_ET_ISCOMPATIBLE(prev_entry->next, newetype, uobj, 0,
1419 	    prot, maxprot, inherit, advice, 0)) {
1420 
1421 		if (uobj && prev_entry->next->offset != uoffset + size)
1422 			goto nomerge;
1423 
1424 		/*
1425 		 * can't extend a shared amap.  note: no need to lock amap to
1426 		 * look at refs since we don't care about its exact value.
1427 		 * if it is one (i.e. we have only reference) it will stay there.
1428 		 *
1429 		 * note that we also can't merge two amaps, so if we
1430 		 * merged with the previous entry which has an amap,
1431 		 * and the next entry also has an amap, we give up.
1432 		 *
1433 		 * Interesting cases:
1434 		 * amap, new, amap -> give up second merge (single fwd extend)
1435 		 * amap, new, none -> double forward extend (extend again here)
1436 		 * none, new, amap -> double backward extend (done here)
1437 		 * uobj, new, amap -> single backward extend (done here)
1438 		 *
1439 		 * XXX should we attempt to deal with someone refilling
1440 		 * the deallocated region between two entries that are
1441 		 * backed by the same amap (ie, arefs is 2, "prev" and
1442 		 * "next" refer to it, and adding this allocation will
1443 		 * close the hole, thus restoring arefs to 1 and
1444 		 * deallocating the "next" vm_map_entry)?  -- @@@
1445 		 */
1446 
1447 		if (prev_entry->next->aref.ar_amap &&
1448 		    (amap_refs(prev_entry->next->aref.ar_amap) != 1 ||
1449 		     (merged && prev_entry->aref.ar_amap))) {
1450 			goto nomerge;
1451 		}
1452 
1453 		if (merged) {
1454 			/*
1455 			 * Try to extend the amap of the previous entry to
1456 			 * cover the next entry as well.  If it doesn't work
1457 			 * just skip on, don't actually give up, since we've
1458 			 * already completed the back merge.
1459 			 */
1460 			if (prev_entry->aref.ar_amap) {
1461 				if (amap_extend(prev_entry,
1462 				    prev_entry->next->end -
1463 				    prev_entry->next->start,
1464 				    amapwaitflag | AMAP_EXTEND_FORWARDS))
1465 					goto nomerge;
1466 			}
1467 
1468 			/*
1469 			 * Try to extend the amap of the *next* entry
1470 			 * back to cover the new allocation *and* the
1471 			 * previous entry as well (the previous merge
1472 			 * didn't have an amap already otherwise we
1473 			 * wouldn't be checking here for an amap).  If
1474 			 * it doesn't work just skip on, again, don't
1475 			 * actually give up, since we've already
1476 			 * completed the back merge.
1477 			 */
1478 			else if (prev_entry->next->aref.ar_amap) {
1479 				if (amap_extend(prev_entry->next,
1480 				    prev_entry->end -
1481 				    prev_entry->start,
1482 				    amapwaitflag | AMAP_EXTEND_BACKWARDS))
1483 					goto nomerge;
1484 			}
1485 		} else {
1486 			/*
1487 			 * Pull the next entry's amap backwards to cover this
1488 			 * new allocation.
1489 			 */
1490 			if (prev_entry->next->aref.ar_amap) {
1491 				error = amap_extend(prev_entry->next, size,
1492 				    amapwaitflag | AMAP_EXTEND_BACKWARDS);
1493 				if (error)
1494 					goto nomerge;
1495 			}
1496 		}
1497 
1498 		if (merged) {
1499 			if (kmap) {
1500 				UVMMAP_EVCNT_DECR(kbackmerge);
1501 				UVMMAP_EVCNT_INCR(kbimerge);
1502 			} else {
1503 				UVMMAP_EVCNT_DECR(ubackmerge);
1504 				UVMMAP_EVCNT_INCR(ubimerge);
1505 			}
1506 		} else {
1507 			if (kmap) {
1508 				UVMMAP_EVCNT_INCR(kforwmerge);
1509 			} else {
1510 				UVMMAP_EVCNT_INCR(uforwmerge);
1511 			}
1512 		}
1513 		UVMHIST_LOG(maphist,"  starting forward merge", 0, 0, 0, 0);
1514 
1515 		/*
1516 		 * drop our reference to uobj since we are extending a reference
1517 		 * that we already have (the ref count can not drop to zero).
1518 		 */
1519 		if (uobj && uobj->pgops->pgo_detach)
1520 			uobj->pgops->pgo_detach(uobj);
1521 
1522 		if (merged) {
1523 			dead = prev_entry->next;
1524 			prev_entry->end = dead->end;
1525 			uvm_map_entry_unlink(map, dead);
1526 			if (dead->aref.ar_amap != NULL) {
1527 				prev_entry->aref = dead->aref;
1528 				dead->aref.ar_amap = NULL;
1529 			}
1530 		} else {
1531 			prev_entry->next->start -= size;
1532 			if (prev_entry != &map->header) {
1533 				prev_entry->gap -= size;
1534 				KASSERT(prev_entry->gap == uvm_rb_gap(prev_entry));
1535 				uvm_rb_fixup(map, prev_entry);
1536 			}
1537 			if (uobj)
1538 				prev_entry->next->offset = uoffset;
1539 		}
1540 
1541 		uvm_map_check(map, "map forwardmerged");
1542 
1543 		UVMHIST_LOG(maphist,"<- done forwardmerge", 0, 0, 0, 0);
1544 		merged++;
1545 	}
1546 
1547 nomerge:
1548 	if (!merged) {
1549 		UVMHIST_LOG(maphist,"  allocating new map entry", 0, 0, 0, 0);
1550 		if (kmap) {
1551 			UVMMAP_EVCNT_INCR(knomerge);
1552 		} else {
1553 			UVMMAP_EVCNT_INCR(unomerge);
1554 		}
1555 
1556 		/*
1557 		 * allocate new entry and link it in.
1558 		 */
1559 
1560 		if (new_entry == NULL) {
1561 			new_entry = uvm_mapent_alloc(map,
1562 				(flags & UVM_FLAG_NOWAIT));
1563 			if (__predict_false(new_entry == NULL)) {
1564 				error = ENOMEM;
1565 				goto done;
1566 			}
1567 		}
1568 		new_entry->start = start;
1569 		new_entry->end = new_entry->start + size;
1570 		new_entry->object.uvm_obj = uobj;
1571 		new_entry->offset = uoffset;
1572 
1573 		new_entry->etype = newetype;
1574 
1575 		if (flags & UVM_FLAG_NOMERGE) {
1576 			new_entry->flags |= UVM_MAP_NOMERGE;
1577 		}
1578 
1579 		new_entry->protection = prot;
1580 		new_entry->max_protection = maxprot;
1581 		new_entry->inheritance = inherit;
1582 		new_entry->wired_count = 0;
1583 		new_entry->advice = advice;
1584 		if (flags & UVM_FLAG_OVERLAY) {
1585 
1586 			/*
1587 			 * to_add: for BSS we overallocate a little since we
1588 			 * are likely to extend
1589 			 */
1590 
1591 			vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ?
1592 				UVM_AMAP_CHUNK << PAGE_SHIFT : 0;
1593 			struct vm_amap *amap = amap_alloc(size, to_add,
1594 			    (flags & UVM_FLAG_NOWAIT));
1595 			if (__predict_false(amap == NULL)) {
1596 				error = ENOMEM;
1597 				goto done;
1598 			}
1599 			new_entry->aref.ar_pageoff = 0;
1600 			new_entry->aref.ar_amap = amap;
1601 		} else {
1602 			new_entry->aref.ar_pageoff = 0;
1603 			new_entry->aref.ar_amap = NULL;
1604 		}
1605 		uvm_map_entry_link(map, prev_entry, new_entry);
1606 
1607 		/*
1608 		 * Update the free space hint
1609 		 */
1610 
1611 		if ((map->first_free == prev_entry) &&
1612 		    (prev_entry->end >= new_entry->start))
1613 			map->first_free = new_entry;
1614 
1615 		new_entry = NULL;
1616 	}
1617 
1618 	map->size += size;
1619 
1620 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1621 
1622 	error = 0;
1623 
1624 done:
1625 	vm_map_unlock(map);
1626 
1627 	if (new_entry) {
1628 		uvm_mapent_free(new_entry);
1629 	}
1630 	if (dead) {
1631 		KDASSERT(merged);
1632 		uvm_mapent_free(dead);
1633 	}
1634 	if (dead_entries)
1635 		uvm_unmap_detach(dead_entries, 0);
1636 
1637 	return error;
1638 }
1639 
1640 /*
1641  * uvm_map_lookup_entry_bytree: lookup an entry in tree
1642  */
1643 
1644 static inline bool
1645 uvm_map_lookup_entry_bytree(struct vm_map *map, vaddr_t address,
1646     struct vm_map_entry **entry	/* OUT */)
1647 {
1648 	struct vm_map_entry *prev = &map->header;
1649 	struct vm_map_entry *cur = ROOT_ENTRY(map);
1650 
1651 	while (cur) {
1652 		UVMMAP_EVCNT_INCR(mlk_treeloop);
1653 		if (address >= cur->start) {
1654 			if (address < cur->end) {
1655 				*entry = cur;
1656 				return true;
1657 			}
1658 			prev = cur;
1659 			cur = RIGHT_ENTRY(cur);
1660 		} else
1661 			cur = LEFT_ENTRY(cur);
1662 	}
1663 	*entry = prev;
1664 	return false;
1665 }
1666 
1667 /*
1668  * uvm_map_lookup_entry: find map entry at or before an address
1669  *
1670  * => map must at least be read-locked by caller
1671  * => entry is returned in "entry"
1672  * => return value is true if address is in the returned entry
1673  */
1674 
1675 bool
1676 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
1677     struct vm_map_entry **entry	/* OUT */)
1678 {
1679 	struct vm_map_entry *cur;
1680 	UVMHIST_FUNC("uvm_map_lookup_entry");
1681 	UVMHIST_CALLED(maphist);
1682 
1683 	UVMHIST_LOG(maphist,"(map=%#jx,addr=%#jx,ent=%#jx)",
1684 	    (uintptr_t)map, address, (uintptr_t)entry, 0);
1685 
1686 	/*
1687 	 * make a quick check to see if we are already looking at
1688 	 * the entry we want (which is usually the case).  note also
1689 	 * that we don't need to save the hint here...  it is the
1690 	 * same hint (unless we are at the header, in which case the
1691 	 * hint didn't buy us anything anyway).
1692 	 */
1693 
1694 	cur = map->hint;
1695 	UVMMAP_EVCNT_INCR(mlk_call);
1696 	if (cur != &map->header &&
1697 	    address >= cur->start && cur->end > address) {
1698 		UVMMAP_EVCNT_INCR(mlk_hint);
1699 		*entry = cur;
1700 		UVMHIST_LOG(maphist,"<- got it via hint (%#jx)",
1701 		    (uintptr_t)cur, 0, 0, 0);
1702 		uvm_mapent_check(*entry);
1703 		return (true);
1704 	}
1705 	uvm_map_check(map, __func__);
1706 
1707 	/*
1708 	 * lookup in the tree.
1709 	 */
1710 
1711 	UVMMAP_EVCNT_INCR(mlk_tree);
1712 	if (__predict_true(uvm_map_lookup_entry_bytree(map, address, entry))) {
1713 		SAVE_HINT(map, map->hint, *entry);
1714 		UVMHIST_LOG(maphist,"<- search got it (%#jx)",
1715 		    (uintptr_t)cur, 0, 0, 0);
1716 		KDASSERT((*entry)->start <= address);
1717 		KDASSERT(address < (*entry)->end);
1718 		uvm_mapent_check(*entry);
1719 		return (true);
1720 	}
1721 
1722 	SAVE_HINT(map, map->hint, *entry);
1723 	UVMHIST_LOG(maphist,"<- failed!",0,0,0,0);
1724 	KDASSERT((*entry) == &map->header || (*entry)->end <= address);
1725 	KDASSERT((*entry)->next == &map->header ||
1726 	    address < (*entry)->next->start);
1727 	return (false);
1728 }
1729 
1730 /*
1731  * See if the range between start and start + length fits in the gap
1732  * entry->next->start and entry->end.  Returns 1 if fits, 0 if doesn't
1733  * fit, and -1 address wraps around.
1734  */
1735 static int
1736 uvm_map_space_avail(vaddr_t *start, vsize_t length, voff_t uoffset,
1737     vsize_t align, int flags, int topdown, struct vm_map_entry *entry)
1738 {
1739 	vaddr_t end;
1740 
1741 #ifdef PMAP_PREFER
1742 	/*
1743 	 * push start address forward as needed to avoid VAC alias problems.
1744 	 * we only do this if a valid offset is specified.
1745 	 */
1746 
1747 	if (uoffset != UVM_UNKNOWN_OFFSET)
1748 		PMAP_PREFER(uoffset, start, length, topdown);
1749 #endif
1750 	if ((flags & UVM_FLAG_COLORMATCH) != 0) {
1751 		KASSERT(align < uvmexp.ncolors);
1752 		if (uvmexp.ncolors > 1) {
1753 			const u_int colormask = uvmexp.colormask;
1754 			const u_int colorsize = colormask + 1;
1755 			vaddr_t hint = atop(*start);
1756 			const u_int color = hint & colormask;
1757 			if (color != align) {
1758 				hint -= color;	/* adjust to color boundary */
1759 				KASSERT((hint & colormask) == 0);
1760 				if (topdown) {
1761 					if (align > color)
1762 						hint -= colorsize;
1763 				} else {
1764 					if (align < color)
1765 						hint += colorsize;
1766 				}
1767 				*start = ptoa(hint + align); /* adjust to color */
1768 			}
1769 		}
1770 	} else {
1771 		KASSERT(powerof2(align));
1772 		uvm_map_align_va(start, align, topdown);
1773 		/*
1774 		 * XXX Should we PMAP_PREFER() here again?
1775 		 * eh...i think we're okay
1776 		 */
1777 	}
1778 
1779 	/*
1780 	 * Find the end of the proposed new region.  Be sure we didn't
1781 	 * wrap around the address; if so, we lose.  Otherwise, if the
1782 	 * proposed new region fits before the next entry, we win.
1783 	 */
1784 
1785 	end = *start + length;
1786 	if (end < *start)
1787 		return (-1);
1788 
1789 	if (entry->next->start >= end && *start >= entry->end)
1790 		return (1);
1791 
1792 	return (0);
1793 }
1794 
1795 /*
1796  * uvm_map_findspace: find "length" sized space in "map".
1797  *
1798  * => "hint" is a hint about where we want it, unless UVM_FLAG_FIXED is
1799  *	set in "flags" (in which case we insist on using "hint").
1800  * => "result" is VA returned
1801  * => uobj/uoffset are to be used to handle VAC alignment, if required
1802  * => if "align" is non-zero, we attempt to align to that value.
1803  * => caller must at least have read-locked map
1804  * => returns NULL on failure, or pointer to prev. map entry if success
1805  * => note this is a cross between the old vm_map_findspace and vm_map_find
1806  */
1807 
1808 struct vm_map_entry *
1809 uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length,
1810     vaddr_t *result /* OUT */, struct uvm_object *uobj, voff_t uoffset,
1811     vsize_t align, int flags)
1812 {
1813 	struct vm_map_entry *entry;
1814 	struct vm_map_entry *child, *prev, *tmp;
1815 	vaddr_t orig_hint __diagused;
1816 	const int topdown = map->flags & VM_MAP_TOPDOWN;
1817 	UVMHIST_FUNC("uvm_map_findspace");
1818 	UVMHIST_CALLED(maphist);
1819 
1820 	UVMHIST_LOG(maphist, "(map=%#jx, hint=%#jx, len=%ju, flags=%#jx)",
1821 	    (uintptr_t)map, hint, length, flags);
1822 	KASSERT((flags & UVM_FLAG_COLORMATCH) != 0 || powerof2(align));
1823 	KASSERT((flags & UVM_FLAG_COLORMATCH) == 0 || align < uvmexp.ncolors);
1824 	KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0);
1825 
1826 	uvm_map_check(map, "map_findspace entry");
1827 
1828 	/*
1829 	 * remember the original hint.  if we are aligning, then we
1830 	 * may have to try again with no alignment constraint if
1831 	 * we fail the first time.
1832 	 */
1833 
1834 	orig_hint = hint;
1835 	if (hint < vm_map_min(map)) {	/* check ranges ... */
1836 		if (flags & UVM_FLAG_FIXED) {
1837 			UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0);
1838 			return (NULL);
1839 		}
1840 		hint = vm_map_min(map);
1841 	}
1842 	if (hint > vm_map_max(map)) {
1843 		UVMHIST_LOG(maphist,"<- VA %#jx > range [%#jx->%#jx]",
1844 		    hint, vm_map_min(map), vm_map_max(map), 0);
1845 		return (NULL);
1846 	}
1847 
1848 	/*
1849 	 * hint may not be aligned properly; we need round up or down it
1850 	 * before proceeding further.
1851 	 */
1852 	if ((flags & UVM_FLAG_COLORMATCH) == 0)
1853 		uvm_map_align_va(&hint, align, topdown);
1854 
1855 	/*
1856 	 * Look for the first possible address; if there's already
1857 	 * something at this address, we have to start after it.
1858 	 */
1859 
1860 	/*
1861 	 * @@@: there are four, no, eight cases to consider.
1862 	 *
1863 	 * 0: found,     fixed,     bottom up -> fail
1864 	 * 1: found,     fixed,     top down  -> fail
1865 	 * 2: found,     not fixed, bottom up -> start after entry->end,
1866 	 *                                       loop up
1867 	 * 3: found,     not fixed, top down  -> start before entry->start,
1868 	 *                                       loop down
1869 	 * 4: not found, fixed,     bottom up -> check entry->next->start, fail
1870 	 * 5: not found, fixed,     top down  -> check entry->next->start, fail
1871 	 * 6: not found, not fixed, bottom up -> check entry->next->start,
1872 	 *                                       loop up
1873 	 * 7: not found, not fixed, top down  -> check entry->next->start,
1874 	 *                                       loop down
1875 	 *
1876 	 * as you can see, it reduces to roughly five cases, and that
1877 	 * adding top down mapping only adds one unique case (without
1878 	 * it, there would be four cases).
1879 	 */
1880 
1881 	if ((flags & UVM_FLAG_FIXED) == 0 && hint == vm_map_min(map)) {
1882 		entry = map->first_free;
1883 	} else {
1884 		if (uvm_map_lookup_entry(map, hint, &entry)) {
1885 			/* "hint" address already in use ... */
1886 			if (flags & UVM_FLAG_FIXED) {
1887 				UVMHIST_LOG(maphist, "<- fixed & VA in use",
1888 				    0, 0, 0, 0);
1889 				return (NULL);
1890 			}
1891 			if (topdown)
1892 				/* Start from lower gap. */
1893 				entry = entry->prev;
1894 		} else if (flags & UVM_FLAG_FIXED) {
1895 			if (entry->next->start >= hint + length &&
1896 			    hint + length > hint)
1897 				goto found;
1898 
1899 			/* "hint" address is gap but too small */
1900 			UVMHIST_LOG(maphist, "<- fixed mapping failed",
1901 			    0, 0, 0, 0);
1902 			return (NULL); /* only one shot at it ... */
1903 		} else {
1904 			/*
1905 			 * See if given hint fits in this gap.
1906 			 */
1907 			switch (uvm_map_space_avail(&hint, length,
1908 			    uoffset, align, flags, topdown, entry)) {
1909 			case 1:
1910 				goto found;
1911 			case -1:
1912 				goto wraparound;
1913 			}
1914 
1915 			if (topdown) {
1916 				/*
1917 				 * Still there is a chance to fit
1918 				 * if hint > entry->end.
1919 				 */
1920 			} else {
1921 				/* Start from higher gap. */
1922 				entry = entry->next;
1923 				if (entry == &map->header)
1924 					goto notfound;
1925 				goto nextgap;
1926 			}
1927 		}
1928 	}
1929 
1930 	/*
1931 	 * Note that all UVM_FLAGS_FIXED case is already handled.
1932 	 */
1933 	KDASSERT((flags & UVM_FLAG_FIXED) == 0);
1934 
1935 	/* Try to find the space in the red-black tree */
1936 
1937 	/* Check slot before any entry */
1938 	hint = topdown ? entry->next->start - length : entry->end;
1939 	switch (uvm_map_space_avail(&hint, length, uoffset, align, flags,
1940 	    topdown, entry)) {
1941 	case 1:
1942 		goto found;
1943 	case -1:
1944 		goto wraparound;
1945 	}
1946 
1947 nextgap:
1948 	KDASSERT((flags & UVM_FLAG_FIXED) == 0);
1949 	/* If there is not enough space in the whole tree, we fail */
1950 	tmp = ROOT_ENTRY(map);
1951 	if (tmp == NULL || tmp->maxgap < length)
1952 		goto notfound;
1953 
1954 	prev = NULL; /* previous candidate */
1955 
1956 	/* Find an entry close to hint that has enough space */
1957 	for (; tmp;) {
1958 		KASSERT(tmp->next->start == tmp->end + tmp->gap);
1959 		if (topdown) {
1960 			if (tmp->next->start < hint + length &&
1961 			    (prev == NULL || tmp->end > prev->end)) {
1962 				if (tmp->gap >= length)
1963 					prev = tmp;
1964 				else if ((child = LEFT_ENTRY(tmp)) != NULL
1965 				    && child->maxgap >= length)
1966 					prev = tmp;
1967 			}
1968 		} else {
1969 			if (tmp->end >= hint &&
1970 			    (prev == NULL || tmp->end < prev->end)) {
1971 				if (tmp->gap >= length)
1972 					prev = tmp;
1973 				else if ((child = RIGHT_ENTRY(tmp)) != NULL
1974 				    && child->maxgap >= length)
1975 					prev = tmp;
1976 			}
1977 		}
1978 		if (tmp->next->start < hint + length)
1979 			child = RIGHT_ENTRY(tmp);
1980 		else if (tmp->end > hint)
1981 			child = LEFT_ENTRY(tmp);
1982 		else {
1983 			if (tmp->gap >= length)
1984 				break;
1985 			if (topdown)
1986 				child = LEFT_ENTRY(tmp);
1987 			else
1988 				child = RIGHT_ENTRY(tmp);
1989 		}
1990 		if (child == NULL || child->maxgap < length)
1991 			break;
1992 		tmp = child;
1993 	}
1994 
1995 	if (tmp != NULL && tmp->start < hint && hint < tmp->next->start) {
1996 		/*
1997 		 * Check if the entry that we found satifies the
1998 		 * space requirement
1999 		 */
2000 		if (topdown) {
2001 			if (hint > tmp->next->start - length)
2002 				hint = tmp->next->start - length;
2003 		} else {
2004 			if (hint < tmp->end)
2005 				hint = tmp->end;
2006 		}
2007 		switch (uvm_map_space_avail(&hint, length, uoffset, align,
2008 		    flags, topdown, tmp)) {
2009 		case 1:
2010 			entry = tmp;
2011 			goto found;
2012 		case -1:
2013 			goto wraparound;
2014 		}
2015 		if (tmp->gap >= length)
2016 			goto listsearch;
2017 	}
2018 	if (prev == NULL)
2019 		goto notfound;
2020 
2021 	if (topdown) {
2022 		KASSERT(orig_hint >= prev->next->start - length ||
2023 		    prev->next->start - length > prev->next->start);
2024 		hint = prev->next->start - length;
2025 	} else {
2026 		KASSERT(orig_hint <= prev->end);
2027 		hint = prev->end;
2028 	}
2029 	switch (uvm_map_space_avail(&hint, length, uoffset, align,
2030 	    flags, topdown, prev)) {
2031 	case 1:
2032 		entry = prev;
2033 		goto found;
2034 	case -1:
2035 		goto wraparound;
2036 	}
2037 	if (prev->gap >= length)
2038 		goto listsearch;
2039 
2040 	if (topdown)
2041 		tmp = LEFT_ENTRY(prev);
2042 	else
2043 		tmp = RIGHT_ENTRY(prev);
2044 	for (;;) {
2045 		KASSERT(tmp && tmp->maxgap >= length);
2046 		if (topdown)
2047 			child = RIGHT_ENTRY(tmp);
2048 		else
2049 			child = LEFT_ENTRY(tmp);
2050 		if (child && child->maxgap >= length) {
2051 			tmp = child;
2052 			continue;
2053 		}
2054 		if (tmp->gap >= length)
2055 			break;
2056 		if (topdown)
2057 			tmp = LEFT_ENTRY(tmp);
2058 		else
2059 			tmp = RIGHT_ENTRY(tmp);
2060 	}
2061 
2062 	if (topdown) {
2063 		KASSERT(orig_hint >= tmp->next->start - length ||
2064 		    tmp->next->start - length > tmp->next->start);
2065 		hint = tmp->next->start - length;
2066 	} else {
2067 		KASSERT(orig_hint <= tmp->end);
2068 		hint = tmp->end;
2069 	}
2070 	switch (uvm_map_space_avail(&hint, length, uoffset, align,
2071 	    flags, topdown, tmp)) {
2072 	case 1:
2073 		entry = tmp;
2074 		goto found;
2075 	case -1:
2076 		goto wraparound;
2077 	}
2078 
2079 	/*
2080 	 * The tree fails to find an entry because of offset or alignment
2081 	 * restrictions.  Search the list instead.
2082 	 */
2083  listsearch:
2084 	/*
2085 	 * Look through the rest of the map, trying to fit a new region in
2086 	 * the gap between existing regions, or after the very last region.
2087 	 * note: entry->end = base VA of current gap,
2088 	 *	 entry->next->start = VA of end of current gap
2089 	 */
2090 
2091 	for (;;) {
2092 		/* Update hint for current gap. */
2093 		hint = topdown ? entry->next->start - length : entry->end;
2094 
2095 		/* See if it fits. */
2096 		switch (uvm_map_space_avail(&hint, length, uoffset, align,
2097 		    flags, topdown, entry)) {
2098 		case 1:
2099 			goto found;
2100 		case -1:
2101 			goto wraparound;
2102 		}
2103 
2104 		/* Advance to next/previous gap */
2105 		if (topdown) {
2106 			if (entry == &map->header) {
2107 				UVMHIST_LOG(maphist, "<- failed (off start)",
2108 				    0,0,0,0);
2109 				goto notfound;
2110 			}
2111 			entry = entry->prev;
2112 		} else {
2113 			entry = entry->next;
2114 			if (entry == &map->header) {
2115 				UVMHIST_LOG(maphist, "<- failed (off end)",
2116 				    0,0,0,0);
2117 				goto notfound;
2118 			}
2119 		}
2120 	}
2121 
2122  found:
2123 	SAVE_HINT(map, map->hint, entry);
2124 	*result = hint;
2125 	UVMHIST_LOG(maphist,"<- got it!  (result=%#jx)", hint, 0,0,0);
2126 	KASSERTMSG( topdown || hint >= orig_hint, "hint: %jx, orig_hint: %jx",
2127 	    (uintmax_t)hint, (uintmax_t)orig_hint);
2128 	KASSERTMSG(!topdown || hint <= orig_hint, "hint: %jx, orig_hint: %jx",
2129 	    (uintmax_t)hint, (uintmax_t)orig_hint);
2130 	KASSERT(entry->end <= hint);
2131 	KASSERT(hint + length <= entry->next->start);
2132 	return (entry);
2133 
2134  wraparound:
2135 	UVMHIST_LOG(maphist, "<- failed (wrap around)", 0,0,0,0);
2136 
2137 	return (NULL);
2138 
2139  notfound:
2140 	UVMHIST_LOG(maphist, "<- failed (notfound)", 0,0,0,0);
2141 
2142 	return (NULL);
2143 }
2144 
2145 /*
2146  *   U N M A P   -   m a i n   h e l p e r   f u n c t i o n s
2147  */
2148 
2149 /*
2150  * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop")
2151  *
2152  * => caller must check alignment and size
2153  * => map must be locked by caller
2154  * => we return a list of map entries that we've remove from the map
2155  *    in "entry_list"
2156  */
2157 
2158 void
2159 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
2160     struct vm_map_entry **entry_list /* OUT */, int flags)
2161 {
2162 	struct vm_map_entry *entry, *first_entry, *next;
2163 	vaddr_t len;
2164 	UVMHIST_FUNC("uvm_unmap_remove"); UVMHIST_CALLED(maphist);
2165 
2166 	UVMHIST_LOG(maphist,"(map=%#jx, start=%#jx, end=%#jx)",
2167 	    (uintptr_t)map, start, end, 0);
2168 	VM_MAP_RANGE_CHECK(map, start, end);
2169 
2170 	uvm_map_check(map, "unmap_remove entry");
2171 
2172 	/*
2173 	 * find first entry
2174 	 */
2175 
2176 	if (uvm_map_lookup_entry(map, start, &first_entry) == true) {
2177 		/* clip and go... */
2178 		entry = first_entry;
2179 		UVM_MAP_CLIP_START(map, entry, start);
2180 		/* critical!  prevents stale hint */
2181 		SAVE_HINT(map, entry, entry->prev);
2182 	} else {
2183 		entry = first_entry->next;
2184 	}
2185 
2186 	/*
2187 	 * save the free space hint
2188 	 */
2189 
2190 	if (map->first_free != &map->header && map->first_free->start >= start)
2191 		map->first_free = entry->prev;
2192 
2193 	/*
2194 	 * note: we now re-use first_entry for a different task.  we remove
2195 	 * a number of map entries from the map and save them in a linked
2196 	 * list headed by "first_entry".  once we remove them from the map
2197 	 * the caller should unlock the map and drop the references to the
2198 	 * backing objects [c.f. uvm_unmap_detach].  the object is to
2199 	 * separate unmapping from reference dropping.  why?
2200 	 *   [1] the map has to be locked for unmapping
2201 	 *   [2] the map need not be locked for reference dropping
2202 	 *   [3] dropping references may trigger pager I/O, and if we hit
2203 	 *       a pager that does synchronous I/O we may have to wait for it.
2204 	 *   [4] we would like all waiting for I/O to occur with maps unlocked
2205 	 *       so that we don't block other threads.
2206 	 */
2207 
2208 	first_entry = NULL;
2209 	*entry_list = NULL;
2210 
2211 	/*
2212 	 * break up the area into map entry sized regions and unmap.  note
2213 	 * that all mappings have to be removed before we can even consider
2214 	 * dropping references to amaps or VM objects (otherwise we could end
2215 	 * up with a mapping to a page on the free list which would be very bad)
2216 	 */
2217 
2218 	while ((entry != &map->header) && (entry->start < end)) {
2219 		KASSERT((entry->flags & UVM_MAP_STATIC) == 0);
2220 
2221 		UVM_MAP_CLIP_END(map, entry, end);
2222 		next = entry->next;
2223 		len = entry->end - entry->start;
2224 
2225 		/*
2226 		 * unwire before removing addresses from the pmap; otherwise
2227 		 * unwiring will put the entries back into the pmap (XXX).
2228 		 */
2229 
2230 		if (VM_MAPENT_ISWIRED(entry)) {
2231 			uvm_map_entry_unwire(map, entry);
2232 		}
2233 		if (flags & UVM_FLAG_VAONLY) {
2234 
2235 			/* nothing */
2236 
2237 		} else if ((map->flags & VM_MAP_PAGEABLE) == 0) {
2238 
2239 			/*
2240 			 * if the map is non-pageable, any pages mapped there
2241 			 * must be wired and entered with pmap_kenter_pa(),
2242 			 * and we should free any such pages immediately.
2243 			 * this is mostly used for kmem_map.
2244 			 */
2245 			KASSERT(vm_map_pmap(map) == pmap_kernel());
2246 
2247 			uvm_km_pgremove_intrsafe(map, entry->start, entry->end);
2248 		} else if (UVM_ET_ISOBJ(entry) &&
2249 			   UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
2250 			panic("%s: kernel object %p %p\n",
2251 			    __func__, map, entry);
2252 		} else if (UVM_ET_ISOBJ(entry) || entry->aref.ar_amap) {
2253 			/*
2254 			 * remove mappings the standard way.  lock object
2255 			 * and/or amap to ensure vm_page state does not
2256 			 * change while in pmap_remove().
2257 			 */
2258 
2259 #ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
2260 			uvm_map_lock_entry(entry, RW_WRITER);
2261 #else
2262 			uvm_map_lock_entry(entry, RW_READER);
2263 #endif
2264 			pmap_remove(map->pmap, entry->start, entry->end);
2265 
2266 			/*
2267 			 * note: if map is dying, leave pmap_update() for
2268 			 * pmap_destroy(), which will be called later.
2269 			 */
2270 
2271 			if ((map->flags & VM_MAP_DYING) == 0) {
2272 				pmap_update(vm_map_pmap(map));
2273 			} else {
2274 				KASSERT(vm_map_pmap(map) != pmap_kernel());
2275 			}
2276 
2277 			uvm_map_unlock_entry(entry);
2278 		}
2279 
2280 #if defined(UVMDEBUG)
2281 		/*
2282 		 * check if there's remaining mapping,
2283 		 * which is a bug in caller.
2284 		 */
2285 
2286 		vaddr_t va;
2287 		for (va = entry->start; va < entry->end;
2288 		    va += PAGE_SIZE) {
2289 			if (pmap_extract(vm_map_pmap(map), va, NULL)) {
2290 				panic("%s: %#"PRIxVADDR" has mapping",
2291 				    __func__, va);
2292 			}
2293 		}
2294 
2295 		if (VM_MAP_IS_KERNEL(map) && (flags & UVM_FLAG_NOWAIT) == 0) {
2296 			uvm_km_check_empty(map, entry->start,
2297 			    entry->end);
2298 		}
2299 #endif /* defined(UVMDEBUG) */
2300 
2301 		/*
2302 		 * remove entry from map and put it on our list of entries
2303 		 * that we've nuked.  then go to next entry.
2304 		 */
2305 
2306 		UVMHIST_LOG(maphist, "  removed map entry %#jx",
2307 		    (uintptr_t)entry, 0, 0, 0);
2308 
2309 		/* critical!  prevents stale hint */
2310 		SAVE_HINT(map, entry, entry->prev);
2311 
2312 		uvm_map_entry_unlink(map, entry);
2313 		KASSERT(map->size >= len);
2314 		map->size -= len;
2315 		entry->prev = NULL;
2316 		entry->next = first_entry;
2317 		first_entry = entry;
2318 		entry = next;
2319 	}
2320 
2321 	uvm_map_check(map, "unmap_remove leave");
2322 
2323 	/*
2324 	 * now we've cleaned up the map and are ready for the caller to drop
2325 	 * references to the mapped objects.
2326 	 */
2327 
2328 	*entry_list = first_entry;
2329 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
2330 
2331 	if (map->flags & VM_MAP_WANTVA) {
2332 		mutex_enter(&map->misc_lock);
2333 		map->flags &= ~VM_MAP_WANTVA;
2334 		cv_broadcast(&map->cv);
2335 		mutex_exit(&map->misc_lock);
2336 	}
2337 }
2338 
2339 /*
2340  * uvm_unmap_detach: drop references in a chain of map entries
2341  *
2342  * => we will free the map entries as we traverse the list.
2343  */
2344 
2345 void
2346 uvm_unmap_detach(struct vm_map_entry *first_entry, int flags)
2347 {
2348 	struct vm_map_entry *next_entry;
2349 	UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist);
2350 
2351 	while (first_entry) {
2352 		KASSERT(!VM_MAPENT_ISWIRED(first_entry));
2353 		UVMHIST_LOG(maphist,
2354 		    "  detach %#jx: amap=%#jx, obj=%#jx, submap?=%jd",
2355 		    (uintptr_t)first_entry,
2356 		    (uintptr_t)first_entry->aref.ar_amap,
2357 		    (uintptr_t)first_entry->object.uvm_obj,
2358 		    UVM_ET_ISSUBMAP(first_entry));
2359 
2360 		/*
2361 		 * drop reference to amap, if we've got one
2362 		 */
2363 
2364 		if (first_entry->aref.ar_amap)
2365 			uvm_map_unreference_amap(first_entry, flags);
2366 
2367 		/*
2368 		 * drop reference to our backing object, if we've got one
2369 		 */
2370 
2371 		KASSERT(!UVM_ET_ISSUBMAP(first_entry));
2372 		if (UVM_ET_ISOBJ(first_entry) &&
2373 		    first_entry->object.uvm_obj->pgops->pgo_detach) {
2374 			(*first_entry->object.uvm_obj->pgops->pgo_detach)
2375 				(first_entry->object.uvm_obj);
2376 		}
2377 		next_entry = first_entry->next;
2378 		uvm_mapent_free(first_entry);
2379 		first_entry = next_entry;
2380 	}
2381 	UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
2382 }
2383 
2384 /*
2385  *   E X T R A C T I O N   F U N C T I O N S
2386  */
2387 
2388 /*
2389  * uvm_map_reserve: reserve space in a vm_map for future use.
2390  *
2391  * => we reserve space in a map by putting a dummy map entry in the
2392  *    map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE)
2393  * => map should be unlocked (we will write lock it)
2394  * => we return true if we were able to reserve space
2395  * => XXXCDC: should be inline?
2396  */
2397 
2398 int
2399 uvm_map_reserve(struct vm_map *map, vsize_t size,
2400     vaddr_t offset	/* hint for pmap_prefer */,
2401     vsize_t align	/* alignment */,
2402     vaddr_t *raddr	/* IN:hint, OUT: reserved VA */,
2403     uvm_flag_t flags	/* UVM_FLAG_FIXED or UVM_FLAG_COLORMATCH or 0 */)
2404 {
2405 	UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist);
2406 
2407 	UVMHIST_LOG(maphist, "(map=%#jx, size=%#jx, offset=%#jx, addr=%#jx)",
2408 	    (uintptr_t)map, size, offset, (uintptr_t)raddr);
2409 
2410 	size = round_page(size);
2411 
2412 	/*
2413 	 * reserve some virtual space.
2414 	 */
2415 
2416 	if (uvm_map(map, raddr, size, NULL, offset, align,
2417 	    UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
2418 	    UVM_ADV_RANDOM, UVM_FLAG_NOMERGE|flags)) != 0) {
2419 	    UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0);
2420 		return (false);
2421 	}
2422 
2423 	UVMHIST_LOG(maphist, "<- done (*raddr=%#jx)", *raddr,0,0,0);
2424 	return (true);
2425 }
2426 
2427 /*
2428  * uvm_map_replace: replace a reserved (blank) area of memory with
2429  * real mappings.
2430  *
2431  * => caller must WRITE-LOCK the map
2432  * => we return true if replacement was a success
2433  * => we expect the newents chain to have nnewents entrys on it and
2434  *    we expect newents->prev to point to the last entry on the list
2435  * => note newents is allowed to be NULL
2436  */
2437 
2438 static int
2439 uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end,
2440     struct vm_map_entry *newents, int nnewents, vsize_t nsize,
2441     struct vm_map_entry **oldentryp)
2442 {
2443 	struct vm_map_entry *oldent, *last;
2444 
2445 	uvm_map_check(map, "map_replace entry");
2446 
2447 	/*
2448 	 * first find the blank map entry at the specified address
2449 	 */
2450 
2451 	if (!uvm_map_lookup_entry(map, start, &oldent)) {
2452 		return (false);
2453 	}
2454 
2455 	/*
2456 	 * check to make sure we have a proper blank entry
2457 	 */
2458 
2459 	if (end < oldent->end) {
2460 		UVM_MAP_CLIP_END(map, oldent, end);
2461 	}
2462 	if (oldent->start != start || oldent->end != end ||
2463 	    oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) {
2464 		return (false);
2465 	}
2466 
2467 #ifdef DIAGNOSTIC
2468 
2469 	/*
2470 	 * sanity check the newents chain
2471 	 */
2472 
2473 	{
2474 		struct vm_map_entry *tmpent = newents;
2475 		int nent = 0;
2476 		vsize_t sz = 0;
2477 		vaddr_t cur = start;
2478 
2479 		while (tmpent) {
2480 			nent++;
2481 			sz += tmpent->end - tmpent->start;
2482 			if (tmpent->start < cur)
2483 				panic("uvm_map_replace1");
2484 			if (tmpent->start >= tmpent->end || tmpent->end > end) {
2485 				panic("uvm_map_replace2: "
2486 				    "tmpent->start=%#"PRIxVADDR
2487 				    ", tmpent->end=%#"PRIxVADDR
2488 				    ", end=%#"PRIxVADDR,
2489 				    tmpent->start, tmpent->end, end);
2490 			}
2491 			cur = tmpent->end;
2492 			if (tmpent->next) {
2493 				if (tmpent->next->prev != tmpent)
2494 					panic("uvm_map_replace3");
2495 			} else {
2496 				if (newents->prev != tmpent)
2497 					panic("uvm_map_replace4");
2498 			}
2499 			tmpent = tmpent->next;
2500 		}
2501 		if (nent != nnewents)
2502 			panic("uvm_map_replace5");
2503 		if (sz != nsize)
2504 			panic("uvm_map_replace6");
2505 	}
2506 #endif
2507 
2508 	/*
2509 	 * map entry is a valid blank!   replace it.   (this does all the
2510 	 * work of map entry link/unlink...).
2511 	 */
2512 
2513 	if (newents) {
2514 		last = newents->prev;
2515 
2516 		/* critical: flush stale hints out of map */
2517 		SAVE_HINT(map, map->hint, newents);
2518 		if (map->first_free == oldent)
2519 			map->first_free = last;
2520 
2521 		last->next = oldent->next;
2522 		last->next->prev = last;
2523 
2524 		/* Fix RB tree */
2525 		uvm_rb_remove(map, oldent);
2526 
2527 		newents->prev = oldent->prev;
2528 		newents->prev->next = newents;
2529 		map->nentries = map->nentries + (nnewents - 1);
2530 
2531 		/* Fixup the RB tree */
2532 		{
2533 			int i;
2534 			struct vm_map_entry *tmp;
2535 
2536 			tmp = newents;
2537 			for (i = 0; i < nnewents && tmp; i++) {
2538 				uvm_rb_insert(map, tmp);
2539 				tmp = tmp->next;
2540 			}
2541 		}
2542 	} else {
2543 		/* NULL list of new entries: just remove the old one */
2544 		clear_hints(map, oldent);
2545 		uvm_map_entry_unlink(map, oldent);
2546 	}
2547 	map->size -= end - start - nsize;
2548 
2549 	uvm_map_check(map, "map_replace leave");
2550 
2551 	/*
2552 	 * now we can free the old blank entry and return.
2553 	 */
2554 
2555 	*oldentryp = oldent;
2556 	return (true);
2557 }
2558 
2559 /*
2560  * uvm_map_extract: extract a mapping from a map and put it somewhere
2561  *	(maybe removing the old mapping)
2562  *
2563  * => maps should be unlocked (we will write lock them)
2564  * => returns 0 on success, error code otherwise
2565  * => start must be page aligned
2566  * => len must be page sized
2567  * => flags:
2568  *      UVM_EXTRACT_REMOVE: remove mappings from srcmap
2569  *      UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only)
2570  *      UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs
2571  *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
2572  *      UVM_EXTRACT_PROT_ALL: set prot to UVM_PROT_ALL as we go
2573  *    >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<<
2574  *    >>>NOTE: QREF's must be unmapped via the QREF path, thus should only
2575  *             be used from within the kernel in a kernel level map <<<
2576  */
2577 
2578 int
2579 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
2580     struct vm_map *dstmap, vaddr_t *dstaddrp, int flags)
2581 {
2582 	vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge;
2583 	struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry,
2584 	    *deadentry, *oldentry;
2585 	struct vm_map_entry *resentry = NULL; /* a dummy reservation entry */
2586 	vsize_t elen __unused;
2587 	int nchain, error, copy_ok;
2588 	vsize_t nsize;
2589 	UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist);
2590 
2591 	UVMHIST_LOG(maphist,"(srcmap=%#jx,start=%#jx, len=%#jx",
2592 	    (uintptr_t)srcmap, start, len, 0);
2593 	UVMHIST_LOG(maphist," ...,dstmap=%#jx, flags=%#jx)",
2594 	    (uintptr_t)dstmap, flags, 0, 0);
2595 
2596 	/*
2597 	 * step 0: sanity check: start must be on a page boundary, length
2598 	 * must be page sized.  can't ask for CONTIG/QREF if you asked for
2599 	 * REMOVE.
2600 	 */
2601 
2602 	KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0);
2603 	KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 ||
2604 		(flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0);
2605 
2606 	/*
2607 	 * step 1: reserve space in the target map for the extracted area
2608 	 */
2609 
2610 	if ((flags & UVM_EXTRACT_RESERVED) == 0) {
2611 		dstaddr = vm_map_min(dstmap);
2612 		if (!uvm_map_reserve(dstmap, len, start,
2613 		    atop(start) & uvmexp.colormask, &dstaddr,
2614 		    UVM_FLAG_COLORMATCH))
2615 			return (ENOMEM);
2616 		KASSERT((atop(start ^ dstaddr) & uvmexp.colormask) == 0);
2617 		*dstaddrp = dstaddr;	/* pass address back to caller */
2618 		UVMHIST_LOG(maphist, "  dstaddr=%#jx", dstaddr,0,0,0);
2619 	} else {
2620 		dstaddr = *dstaddrp;
2621 	}
2622 
2623 	/*
2624 	 * step 2: setup for the extraction process loop by init'ing the
2625 	 * map entry chain, locking src map, and looking up the first useful
2626 	 * entry in the map.
2627 	 */
2628 
2629 	end = start + len;
2630 	newend = dstaddr + len;
2631 	chain = endchain = NULL;
2632 	nchain = 0;
2633 	nsize = 0;
2634 	vm_map_lock(srcmap);
2635 
2636 	if (uvm_map_lookup_entry(srcmap, start, &entry)) {
2637 
2638 		/* "start" is within an entry */
2639 		if (flags & UVM_EXTRACT_QREF) {
2640 
2641 			/*
2642 			 * for quick references we don't clip the entry, so
2643 			 * the entry may map space "before" the starting
2644 			 * virtual address... this is the "fudge" factor
2645 			 * (which can be non-zero only the first time
2646 			 * through the "while" loop in step 3).
2647 			 */
2648 
2649 			fudge = start - entry->start;
2650 		} else {
2651 
2652 			/*
2653 			 * normal reference: we clip the map to fit (thus
2654 			 * fudge is zero)
2655 			 */
2656 
2657 			UVM_MAP_CLIP_START(srcmap, entry, start);
2658 			SAVE_HINT(srcmap, srcmap->hint, entry->prev);
2659 			fudge = 0;
2660 		}
2661 	} else {
2662 
2663 		/* "start" is not within an entry ... skip to next entry */
2664 		if (flags & UVM_EXTRACT_CONTIG) {
2665 			error = EINVAL;
2666 			goto bad;    /* definite hole here ... */
2667 		}
2668 
2669 		entry = entry->next;
2670 		fudge = 0;
2671 	}
2672 
2673 	/* save values from srcmap for step 6 */
2674 	orig_entry = entry;
2675 	orig_fudge = fudge;
2676 
2677 	/*
2678 	 * step 3: now start looping through the map entries, extracting
2679 	 * as we go.
2680 	 */
2681 
2682 	while (entry->start < end && entry != &srcmap->header) {
2683 
2684 		/* if we are not doing a quick reference, clip it */
2685 		if ((flags & UVM_EXTRACT_QREF) == 0)
2686 			UVM_MAP_CLIP_END(srcmap, entry, end);
2687 
2688 		/* clear needs_copy (allow chunking) */
2689 		if (UVM_ET_ISNEEDSCOPY(entry)) {
2690 			amap_copy(srcmap, entry,
2691 			    AMAP_COPY_NOWAIT|AMAP_COPY_NOMERGE, start, end);
2692 			if (UVM_ET_ISNEEDSCOPY(entry)) {  /* failed? */
2693 				error = ENOMEM;
2694 				goto bad;
2695 			}
2696 
2697 			/* amap_copy could clip (during chunk)!  update fudge */
2698 			if (fudge) {
2699 				fudge = start - entry->start;
2700 				orig_fudge = fudge;
2701 			}
2702 		}
2703 
2704 		/* calculate the offset of this from "start" */
2705 		oldoffset = (entry->start + fudge) - start;
2706 
2707 		/* allocate a new map entry */
2708 		newentry = uvm_mapent_alloc(dstmap, 0);
2709 		if (newentry == NULL) {
2710 			error = ENOMEM;
2711 			goto bad;
2712 		}
2713 
2714 		/* set up new map entry */
2715 		newentry->next = NULL;
2716 		newentry->prev = endchain;
2717 		newentry->start = dstaddr + oldoffset;
2718 		newentry->end =
2719 		    newentry->start + (entry->end - (entry->start + fudge));
2720 		if (newentry->end > newend || newentry->end < newentry->start)
2721 			newentry->end = newend;
2722 		newentry->object.uvm_obj = entry->object.uvm_obj;
2723 		if (newentry->object.uvm_obj) {
2724 			if (newentry->object.uvm_obj->pgops->pgo_reference)
2725 				newentry->object.uvm_obj->pgops->
2726 				    pgo_reference(newentry->object.uvm_obj);
2727 			newentry->offset = entry->offset + fudge;
2728 		} else {
2729 			newentry->offset = 0;
2730 		}
2731 		newentry->etype = entry->etype;
2732 		if (flags & UVM_EXTRACT_PROT_ALL) {
2733 			newentry->protection = newentry->max_protection =
2734 			    UVM_PROT_ALL;
2735 		} else {
2736 			newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ?
2737 			    entry->max_protection : entry->protection;
2738 			newentry->max_protection = entry->max_protection;
2739 		}
2740 		newentry->inheritance = entry->inheritance;
2741 		newentry->wired_count = 0;
2742 		newentry->aref.ar_amap = entry->aref.ar_amap;
2743 		if (newentry->aref.ar_amap) {
2744 			newentry->aref.ar_pageoff =
2745 			    entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT);
2746 			uvm_map_reference_amap(newentry, AMAP_SHARED |
2747 			    ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0));
2748 		} else {
2749 			newentry->aref.ar_pageoff = 0;
2750 		}
2751 		newentry->advice = entry->advice;
2752 		if ((flags & UVM_EXTRACT_QREF) != 0) {
2753 			newentry->flags |= UVM_MAP_NOMERGE;
2754 		}
2755 
2756 		/* now link it on the chain */
2757 		nchain++;
2758 		nsize += newentry->end - newentry->start;
2759 		if (endchain == NULL) {
2760 			chain = endchain = newentry;
2761 		} else {
2762 			endchain->next = newentry;
2763 			endchain = newentry;
2764 		}
2765 
2766 		/* end of 'while' loop! */
2767 		if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end &&
2768 		    (entry->next == &srcmap->header ||
2769 		    entry->next->start != entry->end)) {
2770 			error = EINVAL;
2771 			goto bad;
2772 		}
2773 		entry = entry->next;
2774 		fudge = 0;
2775 	}
2776 
2777 	/*
2778 	 * step 4: close off chain (in format expected by uvm_map_replace)
2779 	 */
2780 
2781 	if (chain)
2782 		chain->prev = endchain;
2783 
2784 	/*
2785 	 * step 5: attempt to lock the dest map so we can pmap_copy.
2786 	 * note usage of copy_ok:
2787 	 *   1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5)
2788 	 *   0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7
2789 	 */
2790 
2791 	if (srcmap == dstmap || vm_map_lock_try(dstmap) == true) {
2792 		copy_ok = 1;
2793 		if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
2794 		    nchain, nsize, &resentry)) {
2795 			if (srcmap != dstmap)
2796 				vm_map_unlock(dstmap);
2797 			error = EIO;
2798 			goto bad;
2799 		}
2800 	} else {
2801 		copy_ok = 0;
2802 		/* replace defered until step 7 */
2803 	}
2804 
2805 	/*
2806 	 * step 6: traverse the srcmap a second time to do the following:
2807 	 *  - if we got a lock on the dstmap do pmap_copy
2808 	 *  - if UVM_EXTRACT_REMOVE remove the entries
2809 	 * we make use of orig_entry and orig_fudge (saved in step 2)
2810 	 */
2811 
2812 	if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) {
2813 
2814 		/* purge possible stale hints from srcmap */
2815 		if (flags & UVM_EXTRACT_REMOVE) {
2816 			SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev);
2817 			if (srcmap->first_free != &srcmap->header &&
2818 			    srcmap->first_free->start >= start)
2819 				srcmap->first_free = orig_entry->prev;
2820 		}
2821 
2822 		entry = orig_entry;
2823 		fudge = orig_fudge;
2824 		deadentry = NULL;	/* for UVM_EXTRACT_REMOVE */
2825 
2826 		while (entry->start < end && entry != &srcmap->header) {
2827 			if (copy_ok) {
2828 				oldoffset = (entry->start + fudge) - start;
2829 				elen = MIN(end, entry->end) -
2830 				    (entry->start + fudge);
2831 				pmap_copy(dstmap->pmap, srcmap->pmap,
2832 				    dstaddr + oldoffset, elen,
2833 				    entry->start + fudge);
2834 			}
2835 
2836 			/* we advance "entry" in the following if statement */
2837 			if (flags & UVM_EXTRACT_REMOVE) {
2838 #ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
2839 				uvm_map_lock_entry(entry, RW_WRITER);
2840 #else
2841 				uvm_map_lock_entry(entry, RW_READER);
2842 #endif
2843 				pmap_remove(srcmap->pmap, entry->start,
2844 						entry->end);
2845 				uvm_map_unlock_entry(entry);
2846 				oldentry = entry;	/* save entry */
2847 				entry = entry->next;	/* advance */
2848 				uvm_map_entry_unlink(srcmap, oldentry);
2849 							/* add to dead list */
2850 				oldentry->next = deadentry;
2851 				deadentry = oldentry;
2852 			} else {
2853 				entry = entry->next;		/* advance */
2854 			}
2855 
2856 			/* end of 'while' loop */
2857 			fudge = 0;
2858 		}
2859 		pmap_update(srcmap->pmap);
2860 
2861 		/*
2862 		 * unlock dstmap.  we will dispose of deadentry in
2863 		 * step 7 if needed
2864 		 */
2865 
2866 		if (copy_ok && srcmap != dstmap)
2867 			vm_map_unlock(dstmap);
2868 
2869 	} else {
2870 		deadentry = NULL;
2871 	}
2872 
2873 	/*
2874 	 * step 7: we are done with the source map, unlock.   if copy_ok
2875 	 * is 0 then we have not replaced the dummy mapping in dstmap yet
2876 	 * and we need to do so now.
2877 	 */
2878 
2879 	vm_map_unlock(srcmap);
2880 	if ((flags & UVM_EXTRACT_REMOVE) && deadentry)
2881 		uvm_unmap_detach(deadentry, 0);   /* dispose of old entries */
2882 
2883 	/* now do the replacement if we didn't do it in step 5 */
2884 	if (copy_ok == 0) {
2885 		vm_map_lock(dstmap);
2886 		error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
2887 		    nchain, nsize, &resentry);
2888 		vm_map_unlock(dstmap);
2889 
2890 		if (error == false) {
2891 			error = EIO;
2892 			goto bad2;
2893 		}
2894 	}
2895 
2896 	if (resentry != NULL)
2897 		uvm_mapent_free(resentry);
2898 
2899 	return (0);
2900 
2901 	/*
2902 	 * bad: failure recovery
2903 	 */
2904 bad:
2905 	vm_map_unlock(srcmap);
2906 bad2:			/* src already unlocked */
2907 	if (chain)
2908 		uvm_unmap_detach(chain,
2909 		    (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0);
2910 
2911 	if (resentry != NULL)
2912 		uvm_mapent_free(resentry);
2913 
2914 	if ((flags & UVM_EXTRACT_RESERVED) == 0) {
2915 		uvm_unmap(dstmap, dstaddr, dstaddr+len);   /* ??? */
2916 	}
2917 	return (error);
2918 }
2919 
2920 /* end of extraction functions */
2921 
2922 /*
2923  * uvm_map_submap: punch down part of a map into a submap
2924  *
2925  * => only the kernel_map is allowed to be submapped
2926  * => the purpose of submapping is to break up the locking granularity
2927  *	of a larger map
2928  * => the range specified must have been mapped previously with a uvm_map()
2929  *	call [with uobj==NULL] to create a blank map entry in the main map.
2930  *	[And it had better still be blank!]
2931  * => maps which contain submaps should never be copied or forked.
2932  * => to remove a submap, use uvm_unmap() on the main map
2933  *	and then uvm_map_deallocate() the submap.
2934  * => main map must be unlocked.
2935  * => submap must have been init'd and have a zero reference count.
2936  *	[need not be locked as we don't actually reference it]
2937  */
2938 
2939 int
2940 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
2941     struct vm_map *submap)
2942 {
2943 	struct vm_map_entry *entry;
2944 	int error;
2945 
2946 	vm_map_lock(map);
2947 	VM_MAP_RANGE_CHECK(map, start, end);
2948 
2949 	if (uvm_map_lookup_entry(map, start, &entry)) {
2950 		UVM_MAP_CLIP_START(map, entry, start);
2951 		UVM_MAP_CLIP_END(map, entry, end);	/* to be safe */
2952 	} else {
2953 		entry = NULL;
2954 	}
2955 
2956 	if (entry != NULL &&
2957 	    entry->start == start && entry->end == end &&
2958 	    entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
2959 	    !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
2960 		entry->etype |= UVM_ET_SUBMAP;
2961 		entry->object.sub_map = submap;
2962 		entry->offset = 0;
2963 		uvm_map_reference(submap);
2964 		error = 0;
2965 	} else {
2966 		error = EINVAL;
2967 	}
2968 	vm_map_unlock(map);
2969 
2970 	return error;
2971 }
2972 
2973 /*
2974  * uvm_map_protect_user: change map protection on behalf of the user.
2975  * Enforces PAX settings as necessary.
2976  */
2977 int
2978 uvm_map_protect_user(struct lwp *l, vaddr_t start, vaddr_t end,
2979     vm_prot_t new_prot)
2980 {
2981 	int error;
2982 
2983 	if ((error = PAX_MPROTECT_VALIDATE(l, new_prot)))
2984 		return error;
2985 
2986 	return uvm_map_protect(&l->l_proc->p_vmspace->vm_map, start, end,
2987 	    new_prot, false);
2988 }
2989 
2990 
2991 /*
2992  * uvm_map_protect: change map protection
2993  *
2994  * => set_max means set max_protection.
2995  * => map must be unlocked.
2996  */
2997 
2998 #define MASK(entry)	(UVM_ET_ISCOPYONWRITE(entry) ? \
2999 			 ~VM_PROT_WRITE : VM_PROT_ALL)
3000 
3001 int
3002 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
3003     vm_prot_t new_prot, bool set_max)
3004 {
3005 	struct vm_map_entry *current, *entry;
3006 	int error = 0;
3007 	UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist);
3008 	UVMHIST_LOG(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_prot=%#jx)",
3009 	    (uintptr_t)map, start, end, new_prot);
3010 
3011 	vm_map_lock(map);
3012 	VM_MAP_RANGE_CHECK(map, start, end);
3013 	if (uvm_map_lookup_entry(map, start, &entry)) {
3014 		UVM_MAP_CLIP_START(map, entry, start);
3015 	} else {
3016 		entry = entry->next;
3017 	}
3018 
3019 	/*
3020 	 * make a first pass to check for protection violations.
3021 	 */
3022 
3023 	current = entry;
3024 	while ((current != &map->header) && (current->start < end)) {
3025 		if (UVM_ET_ISSUBMAP(current)) {
3026 			error = EINVAL;
3027 			goto out;
3028 		}
3029 		if ((new_prot & current->max_protection) != new_prot) {
3030 			error = EACCES;
3031 			goto out;
3032 		}
3033 		/*
3034 		 * Don't allow VM_PROT_EXECUTE to be set on entries that
3035 		 * point to vnodes that are associated with a NOEXEC file
3036 		 * system.
3037 		 */
3038 		if (UVM_ET_ISOBJ(current) &&
3039 		    UVM_OBJ_IS_VNODE(current->object.uvm_obj)) {
3040 			struct vnode *vp =
3041 			    (struct vnode *) current->object.uvm_obj;
3042 
3043 			if ((new_prot & VM_PROT_EXECUTE) != 0 &&
3044 			    (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) {
3045 				error = EACCES;
3046 				goto out;
3047 			}
3048 		}
3049 
3050 		current = current->next;
3051 	}
3052 
3053 	/* go back and fix up protections (no need to clip this time). */
3054 
3055 	current = entry;
3056 	while ((current != &map->header) && (current->start < end)) {
3057 		vm_prot_t old_prot;
3058 
3059 		UVM_MAP_CLIP_END(map, current, end);
3060 		old_prot = current->protection;
3061 		if (set_max)
3062 			current->protection =
3063 			    (current->max_protection = new_prot) & old_prot;
3064 		else
3065 			current->protection = new_prot;
3066 
3067 		/*
3068 		 * update physical map if necessary.  worry about copy-on-write
3069 		 * here -- CHECK THIS XXX
3070 		 */
3071 
3072 		if (current->protection != old_prot) {
3073 			/* update pmap! */
3074 #ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
3075 			uvm_map_lock_entry(current, RW_WRITER);
3076 #else
3077 			uvm_map_lock_entry(current, RW_READER);
3078 #endif
3079 			pmap_protect(map->pmap, current->start, current->end,
3080 			    current->protection & MASK(current));
3081 			uvm_map_unlock_entry(current);
3082 
3083 			/*
3084 			 * If this entry points at a vnode, and the
3085 			 * protection includes VM_PROT_EXECUTE, mark
3086 			 * the vnode as VEXECMAP.
3087 			 */
3088 			if (UVM_ET_ISOBJ(current)) {
3089 				struct uvm_object *uobj =
3090 				    current->object.uvm_obj;
3091 
3092 				if (UVM_OBJ_IS_VNODE(uobj) &&
3093 				    (current->protection & VM_PROT_EXECUTE)) {
3094 					vn_markexec((struct vnode *) uobj);
3095 				}
3096 			}
3097 		}
3098 
3099 		/*
3100 		 * If the map is configured to lock any future mappings,
3101 		 * wire this entry now if the old protection was VM_PROT_NONE
3102 		 * and the new protection is not VM_PROT_NONE.
3103 		 */
3104 
3105 		if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
3106 		    VM_MAPENT_ISWIRED(current) == 0 &&
3107 		    old_prot == VM_PROT_NONE &&
3108 		    new_prot != VM_PROT_NONE) {
3109 
3110 			/*
3111 			 * We must call pmap_update() here because the
3112 			 * pmap_protect() call above might have removed some
3113 			 * pmap entries and uvm_map_pageable() might create
3114 			 * some new pmap entries that rely on the prior
3115 			 * removals being completely finished.
3116 			 */
3117 
3118 			pmap_update(map->pmap);
3119 
3120 			if (uvm_map_pageable(map, current->start,
3121 			    current->end, false,
3122 			    UVM_LK_ENTER|UVM_LK_EXIT) != 0) {
3123 
3124 				/*
3125 				 * If locking the entry fails, remember the
3126 				 * error if it's the first one.  Note we
3127 				 * still continue setting the protection in
3128 				 * the map, but will return the error
3129 				 * condition regardless.
3130 				 *
3131 				 * XXX Ignore what the actual error is,
3132 				 * XXX just call it a resource shortage
3133 				 * XXX so that it doesn't get confused
3134 				 * XXX what uvm_map_protect() itself would
3135 				 * XXX normally return.
3136 				 */
3137 
3138 				error = ENOMEM;
3139 			}
3140 		}
3141 		current = current->next;
3142 	}
3143 	pmap_update(map->pmap);
3144 
3145  out:
3146 	vm_map_unlock(map);
3147 
3148 	UVMHIST_LOG(maphist, "<- done, error=%jd",error,0,0,0);
3149 	return error;
3150 }
3151 
3152 #undef  MASK
3153 
3154 /*
3155  * uvm_map_inherit: set inheritance code for range of addrs in map.
3156  *
3157  * => map must be unlocked
3158  * => note that the inherit code is used during a "fork".  see fork
3159  *	code for details.
3160  */
3161 
3162 int
3163 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
3164     vm_inherit_t new_inheritance)
3165 {
3166 	struct vm_map_entry *entry, *temp_entry;
3167 	UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist);
3168 	UVMHIST_LOG(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_inh=%#jx)",
3169 	    (uintptr_t)map, start, end, new_inheritance);
3170 
3171 	switch (new_inheritance) {
3172 	case MAP_INHERIT_NONE:
3173 	case MAP_INHERIT_COPY:
3174 	case MAP_INHERIT_SHARE:
3175 	case MAP_INHERIT_ZERO:
3176 		break;
3177 	default:
3178 		UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
3179 		return EINVAL;
3180 	}
3181 
3182 	vm_map_lock(map);
3183 	VM_MAP_RANGE_CHECK(map, start, end);
3184 	if (uvm_map_lookup_entry(map, start, &temp_entry)) {
3185 		entry = temp_entry;
3186 		UVM_MAP_CLIP_START(map, entry, start);
3187 	}  else {
3188 		entry = temp_entry->next;
3189 	}
3190 	while ((entry != &map->header) && (entry->start < end)) {
3191 		UVM_MAP_CLIP_END(map, entry, end);
3192 		entry->inheritance = new_inheritance;
3193 		entry = entry->next;
3194 	}
3195 	vm_map_unlock(map);
3196 	UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
3197 	return 0;
3198 }
3199 
3200 /*
3201  * uvm_map_advice: set advice code for range of addrs in map.
3202  *
3203  * => map must be unlocked
3204  */
3205 
3206 int
3207 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
3208 {
3209 	struct vm_map_entry *entry, *temp_entry;
3210 	UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist);
3211 	UVMHIST_LOG(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_adv=%#jx)",
3212 	    (uintptr_t)map, start, end, new_advice);
3213 
3214 	vm_map_lock(map);
3215 	VM_MAP_RANGE_CHECK(map, start, end);
3216 	if (uvm_map_lookup_entry(map, start, &temp_entry)) {
3217 		entry = temp_entry;
3218 		UVM_MAP_CLIP_START(map, entry, start);
3219 	} else {
3220 		entry = temp_entry->next;
3221 	}
3222 
3223 	/*
3224 	 * XXXJRT: disallow holes?
3225 	 */
3226 
3227 	while ((entry != &map->header) && (entry->start < end)) {
3228 		UVM_MAP_CLIP_END(map, entry, end);
3229 
3230 		switch (new_advice) {
3231 		case MADV_NORMAL:
3232 		case MADV_RANDOM:
3233 		case MADV_SEQUENTIAL:
3234 			/* nothing special here */
3235 			break;
3236 
3237 		default:
3238 			vm_map_unlock(map);
3239 			UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
3240 			return EINVAL;
3241 		}
3242 		entry->advice = new_advice;
3243 		entry = entry->next;
3244 	}
3245 
3246 	vm_map_unlock(map);
3247 	UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
3248 	return 0;
3249 }
3250 
3251 /*
3252  * uvm_map_willneed: apply MADV_WILLNEED
3253  */
3254 
3255 int
3256 uvm_map_willneed(struct vm_map *map, vaddr_t start, vaddr_t end)
3257 {
3258 	struct vm_map_entry *entry;
3259 	UVMHIST_FUNC("uvm_map_willneed"); UVMHIST_CALLED(maphist);
3260 	UVMHIST_LOG(maphist,"(map=%#jx,start=%#jx,end=%#jx)",
3261 	    (uintptr_t)map, start, end, 0);
3262 
3263 	vm_map_lock_read(map);
3264 	VM_MAP_RANGE_CHECK(map, start, end);
3265 	if (!uvm_map_lookup_entry(map, start, &entry)) {
3266 		entry = entry->next;
3267 	}
3268 	while (entry->start < end) {
3269 		struct vm_amap * const amap = entry->aref.ar_amap;
3270 		struct uvm_object * const uobj = entry->object.uvm_obj;
3271 
3272 		KASSERT(entry != &map->header);
3273 		KASSERT(start < entry->end);
3274 		/*
3275 		 * For now, we handle only the easy but commonly-requested case.
3276 		 * ie. start prefetching of backing uobj pages.
3277 		 *
3278 		 * XXX It might be useful to pmap_enter() the already-in-core
3279 		 * pages by inventing a "weak" mode for uvm_fault() which would
3280 		 * only do the PGO_LOCKED pgo_get().
3281 		 */
3282 		if (UVM_ET_ISOBJ(entry) && amap == NULL && uobj != NULL) {
3283 			off_t offset;
3284 			off_t size;
3285 
3286 			offset = entry->offset;
3287 			if (start < entry->start) {
3288 				offset += entry->start - start;
3289 			}
3290 			size = entry->offset + (entry->end - entry->start);
3291 			if (entry->end < end) {
3292 				size -= end - entry->end;
3293 			}
3294 			uvm_readahead(uobj, offset, size);
3295 		}
3296 		entry = entry->next;
3297 	}
3298 	vm_map_unlock_read(map);
3299 	UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
3300 	return 0;
3301 }
3302 
3303 /*
3304  * uvm_map_pageable: sets the pageability of a range in a map.
3305  *
3306  * => wires map entries.  should not be used for transient page locking.
3307  *	for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()).
3308  * => regions specified as not pageable require lock-down (wired) memory
3309  *	and page tables.
3310  * => map must never be read-locked
3311  * => if islocked is true, map is already write-locked
3312  * => we always unlock the map, since we must downgrade to a read-lock
3313  *	to call uvm_fault_wire()
3314  * => XXXCDC: check this and try and clean it up.
3315  */
3316 
3317 int
3318 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
3319     bool new_pageable, int lockflags)
3320 {
3321 	struct vm_map_entry *entry, *start_entry, *failed_entry;
3322 	int rv;
3323 #ifdef DIAGNOSTIC
3324 	u_int timestamp_save;
3325 #endif
3326 	UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist);
3327 	UVMHIST_LOG(maphist,"(map=%#jx,start=%#jx,end=%#jx,new_pageable=%ju)",
3328 	    (uintptr_t)map, start, end, new_pageable);
3329 	KASSERT(map->flags & VM_MAP_PAGEABLE);
3330 
3331 	if ((lockflags & UVM_LK_ENTER) == 0)
3332 		vm_map_lock(map);
3333 	VM_MAP_RANGE_CHECK(map, start, end);
3334 
3335 	/*
3336 	 * only one pageability change may take place at one time, since
3337 	 * uvm_fault_wire assumes it will be called only once for each
3338 	 * wiring/unwiring.  therefore, we have to make sure we're actually
3339 	 * changing the pageability for the entire region.  we do so before
3340 	 * making any changes.
3341 	 */
3342 
3343 	if (uvm_map_lookup_entry(map, start, &start_entry) == false) {
3344 		if ((lockflags & UVM_LK_EXIT) == 0)
3345 			vm_map_unlock(map);
3346 
3347 		UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0);
3348 		return EFAULT;
3349 	}
3350 	entry = start_entry;
3351 
3352 	if (start == end) {		/* nothing required */
3353 		if ((lockflags & UVM_LK_EXIT) == 0)
3354 			vm_map_unlock(map);
3355 
3356 		UVMHIST_LOG(maphist,"<- done (nothing)",0,0,0,0);
3357 		return 0;
3358 	}
3359 
3360 	/*
3361 	 * handle wiring and unwiring separately.
3362 	 */
3363 
3364 	if (new_pageable) {		/* unwire */
3365 		UVM_MAP_CLIP_START(map, entry, start);
3366 
3367 		/*
3368 		 * unwiring.  first ensure that the range to be unwired is
3369 		 * really wired down and that there are no holes.
3370 		 */
3371 
3372 		while ((entry != &map->header) && (entry->start < end)) {
3373 			if (entry->wired_count == 0 ||
3374 			    (entry->end < end &&
3375 			     (entry->next == &map->header ||
3376 			      entry->next->start > entry->end))) {
3377 				if ((lockflags & UVM_LK_EXIT) == 0)
3378 					vm_map_unlock(map);
3379 				UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0);
3380 				return EINVAL;
3381 			}
3382 			entry = entry->next;
3383 		}
3384 
3385 		/*
3386 		 * POSIX 1003.1b - a single munlock call unlocks a region,
3387 		 * regardless of the number of mlock calls made on that
3388 		 * region.
3389 		 */
3390 
3391 		entry = start_entry;
3392 		while ((entry != &map->header) && (entry->start < end)) {
3393 			UVM_MAP_CLIP_END(map, entry, end);
3394 			if (VM_MAPENT_ISWIRED(entry))
3395 				uvm_map_entry_unwire(map, entry);
3396 			entry = entry->next;
3397 		}
3398 		if ((lockflags & UVM_LK_EXIT) == 0)
3399 			vm_map_unlock(map);
3400 		UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
3401 		return 0;
3402 	}
3403 
3404 	/*
3405 	 * wire case: in two passes [XXXCDC: ugly block of code here]
3406 	 *
3407 	 * 1: holding the write lock, we create any anonymous maps that need
3408 	 *    to be created.  then we clip each map entry to the region to
3409 	 *    be wired and increment its wiring count.
3410 	 *
3411 	 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
3412 	 *    in the pages for any newly wired area (wired_count == 1).
3413 	 *
3414 	 *    downgrading to a read lock for uvm_fault_wire avoids a possible
3415 	 *    deadlock with another thread that may have faulted on one of
3416 	 *    the pages to be wired (it would mark the page busy, blocking
3417 	 *    us, then in turn block on the map lock that we hold).  because
3418 	 *    of problems in the recursive lock package, we cannot upgrade
3419 	 *    to a write lock in vm_map_lookup.  thus, any actions that
3420 	 *    require the write lock must be done beforehand.  because we
3421 	 *    keep the read lock on the map, the copy-on-write status of the
3422 	 *    entries we modify here cannot change.
3423 	 */
3424 
3425 	while ((entry != &map->header) && (entry->start < end)) {
3426 		if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
3427 
3428 			/*
3429 			 * perform actions of vm_map_lookup that need the
3430 			 * write lock on the map: create an anonymous map
3431 			 * for a copy-on-write region, or an anonymous map
3432 			 * for a zero-fill region.  (XXXCDC: submap case
3433 			 * ok?)
3434 			 */
3435 
3436 			if (!UVM_ET_ISSUBMAP(entry)) {  /* not submap */
3437 				if (UVM_ET_ISNEEDSCOPY(entry) &&
3438 				    ((entry->max_protection & VM_PROT_WRITE) ||
3439 				     (entry->object.uvm_obj == NULL))) {
3440 					amap_copy(map, entry, 0, start, end);
3441 					/* XXXCDC: wait OK? */
3442 				}
3443 			}
3444 		}
3445 		UVM_MAP_CLIP_START(map, entry, start);
3446 		UVM_MAP_CLIP_END(map, entry, end);
3447 		entry->wired_count++;
3448 
3449 		/*
3450 		 * Check for holes
3451 		 */
3452 
3453 		if (entry->protection == VM_PROT_NONE ||
3454 		    (entry->end < end &&
3455 		     (entry->next == &map->header ||
3456 		      entry->next->start > entry->end))) {
3457 
3458 			/*
3459 			 * found one.  amap creation actions do not need to
3460 			 * be undone, but the wired counts need to be restored.
3461 			 */
3462 
3463 			while (entry != &map->header && entry->end > start) {
3464 				entry->wired_count--;
3465 				entry = entry->prev;
3466 			}
3467 			if ((lockflags & UVM_LK_EXIT) == 0)
3468 				vm_map_unlock(map);
3469 			UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0);
3470 			return EINVAL;
3471 		}
3472 		entry = entry->next;
3473 	}
3474 
3475 	/*
3476 	 * Pass 2.
3477 	 */
3478 
3479 #ifdef DIAGNOSTIC
3480 	timestamp_save = map->timestamp;
3481 #endif
3482 	vm_map_busy(map);
3483 	vm_map_unlock(map);
3484 
3485 	rv = 0;
3486 	entry = start_entry;
3487 	while (entry != &map->header && entry->start < end) {
3488 		if (entry->wired_count == 1) {
3489 			rv = uvm_fault_wire(map, entry->start, entry->end,
3490 			    entry->max_protection, 1);
3491 			if (rv) {
3492 
3493 				/*
3494 				 * wiring failed.  break out of the loop.
3495 				 * we'll clean up the map below, once we
3496 				 * have a write lock again.
3497 				 */
3498 
3499 				break;
3500 			}
3501 		}
3502 		entry = entry->next;
3503 	}
3504 
3505 	if (rv) {	/* failed? */
3506 
3507 		/*
3508 		 * Get back to an exclusive (write) lock.
3509 		 */
3510 
3511 		vm_map_lock(map);
3512 		vm_map_unbusy(map);
3513 
3514 #ifdef DIAGNOSTIC
3515 		if (timestamp_save + 1 != map->timestamp)
3516 			panic("uvm_map_pageable: stale map");
3517 #endif
3518 
3519 		/*
3520 		 * first drop the wiring count on all the entries
3521 		 * which haven't actually been wired yet.
3522 		 */
3523 
3524 		failed_entry = entry;
3525 		while (entry != &map->header && entry->start < end) {
3526 			entry->wired_count--;
3527 			entry = entry->next;
3528 		}
3529 
3530 		/*
3531 		 * now, unwire all the entries that were successfully
3532 		 * wired above.
3533 		 */
3534 
3535 		entry = start_entry;
3536 		while (entry != failed_entry) {
3537 			entry->wired_count--;
3538 			if (VM_MAPENT_ISWIRED(entry) == 0)
3539 				uvm_map_entry_unwire(map, entry);
3540 			entry = entry->next;
3541 		}
3542 		if ((lockflags & UVM_LK_EXIT) == 0)
3543 			vm_map_unlock(map);
3544 		UVMHIST_LOG(maphist, "<- done (RV=%jd)", rv,0,0,0);
3545 		return (rv);
3546 	}
3547 
3548 	if ((lockflags & UVM_LK_EXIT) == 0) {
3549 		vm_map_unbusy(map);
3550 	} else {
3551 
3552 		/*
3553 		 * Get back to an exclusive (write) lock.
3554 		 */
3555 
3556 		vm_map_lock(map);
3557 		vm_map_unbusy(map);
3558 	}
3559 
3560 	UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
3561 	return 0;
3562 }
3563 
3564 /*
3565  * uvm_map_pageable_all: special case of uvm_map_pageable - affects
3566  * all mapped regions.
3567  *
3568  * => map must not be locked.
3569  * => if no flags are specified, all regions are unwired.
3570  * => XXXJRT: has some of the same problems as uvm_map_pageable() above.
3571  */
3572 
3573 int
3574 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
3575 {
3576 	struct vm_map_entry *entry, *failed_entry;
3577 	vsize_t size;
3578 	int rv;
3579 #ifdef DIAGNOSTIC
3580 	u_int timestamp_save;
3581 #endif
3582 	UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist);
3583 	UVMHIST_LOG(maphist,"(map=%#jx,flags=%#jx)", (uintptr_t)map, flags,
3584 	    0, 0);
3585 
3586 	KASSERT(map->flags & VM_MAP_PAGEABLE);
3587 
3588 	vm_map_lock(map);
3589 
3590 	/*
3591 	 * handle wiring and unwiring separately.
3592 	 */
3593 
3594 	if (flags == 0) {			/* unwire */
3595 
3596 		/*
3597 		 * POSIX 1003.1b -- munlockall unlocks all regions,
3598 		 * regardless of how many times mlockall has been called.
3599 		 */
3600 
3601 		for (entry = map->header.next; entry != &map->header;
3602 		     entry = entry->next) {
3603 			if (VM_MAPENT_ISWIRED(entry))
3604 				uvm_map_entry_unwire(map, entry);
3605 		}
3606 		map->flags &= ~VM_MAP_WIREFUTURE;
3607 		vm_map_unlock(map);
3608 		UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
3609 		return 0;
3610 	}
3611 
3612 	if (flags & MCL_FUTURE) {
3613 
3614 		/*
3615 		 * must wire all future mappings; remember this.
3616 		 */
3617 
3618 		map->flags |= VM_MAP_WIREFUTURE;
3619 	}
3620 
3621 	if ((flags & MCL_CURRENT) == 0) {
3622 
3623 		/*
3624 		 * no more work to do!
3625 		 */
3626 
3627 		UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0);
3628 		vm_map_unlock(map);
3629 		return 0;
3630 	}
3631 
3632 	/*
3633 	 * wire case: in three passes [XXXCDC: ugly block of code here]
3634 	 *
3635 	 * 1: holding the write lock, count all pages mapped by non-wired
3636 	 *    entries.  if this would cause us to go over our limit, we fail.
3637 	 *
3638 	 * 2: still holding the write lock, we create any anonymous maps that
3639 	 *    need to be created.  then we increment its wiring count.
3640 	 *
3641 	 * 3: we downgrade to a read lock, and call uvm_fault_wire to fault
3642 	 *    in the pages for any newly wired area (wired_count == 1).
3643 	 *
3644 	 *    downgrading to a read lock for uvm_fault_wire avoids a possible
3645 	 *    deadlock with another thread that may have faulted on one of
3646 	 *    the pages to be wired (it would mark the page busy, blocking
3647 	 *    us, then in turn block on the map lock that we hold).  because
3648 	 *    of problems in the recursive lock package, we cannot upgrade
3649 	 *    to a write lock in vm_map_lookup.  thus, any actions that
3650 	 *    require the write lock must be done beforehand.  because we
3651 	 *    keep the read lock on the map, the copy-on-write status of the
3652 	 *    entries we modify here cannot change.
3653 	 */
3654 
3655 	for (size = 0, entry = map->header.next; entry != &map->header;
3656 	     entry = entry->next) {
3657 		if (entry->protection != VM_PROT_NONE &&
3658 		    VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
3659 			size += entry->end - entry->start;
3660 		}
3661 	}
3662 
3663 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
3664 		vm_map_unlock(map);
3665 		return ENOMEM;
3666 	}
3667 
3668 	if (limit != 0 &&
3669 	    (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) {
3670 		vm_map_unlock(map);
3671 		return ENOMEM;
3672 	}
3673 
3674 	/*
3675 	 * Pass 2.
3676 	 */
3677 
3678 	for (entry = map->header.next; entry != &map->header;
3679 	     entry = entry->next) {
3680 		if (entry->protection == VM_PROT_NONE)
3681 			continue;
3682 		if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
3683 
3684 			/*
3685 			 * perform actions of vm_map_lookup that need the
3686 			 * write lock on the map: create an anonymous map
3687 			 * for a copy-on-write region, or an anonymous map
3688 			 * for a zero-fill region.  (XXXCDC: submap case
3689 			 * ok?)
3690 			 */
3691 
3692 			if (!UVM_ET_ISSUBMAP(entry)) {	/* not submap */
3693 				if (UVM_ET_ISNEEDSCOPY(entry) &&
3694 				    ((entry->max_protection & VM_PROT_WRITE) ||
3695 				     (entry->object.uvm_obj == NULL))) {
3696 					amap_copy(map, entry, 0, entry->start,
3697 					    entry->end);
3698 					/* XXXCDC: wait OK? */
3699 				}
3700 			}
3701 		}
3702 		entry->wired_count++;
3703 	}
3704 
3705 	/*
3706 	 * Pass 3.
3707 	 */
3708 
3709 #ifdef DIAGNOSTIC
3710 	timestamp_save = map->timestamp;
3711 #endif
3712 	vm_map_busy(map);
3713 	vm_map_unlock(map);
3714 
3715 	rv = 0;
3716 	for (entry = map->header.next; entry != &map->header;
3717 	     entry = entry->next) {
3718 		if (entry->wired_count == 1) {
3719 			rv = uvm_fault_wire(map, entry->start, entry->end,
3720 			    entry->max_protection, 1);
3721 			if (rv) {
3722 
3723 				/*
3724 				 * wiring failed.  break out of the loop.
3725 				 * we'll clean up the map below, once we
3726 				 * have a write lock again.
3727 				 */
3728 
3729 				break;
3730 			}
3731 		}
3732 	}
3733 
3734 	if (rv) {
3735 
3736 		/*
3737 		 * Get back an exclusive (write) lock.
3738 		 */
3739 
3740 		vm_map_lock(map);
3741 		vm_map_unbusy(map);
3742 
3743 #ifdef DIAGNOSTIC
3744 		if (timestamp_save + 1 != map->timestamp)
3745 			panic("uvm_map_pageable_all: stale map");
3746 #endif
3747 
3748 		/*
3749 		 * first drop the wiring count on all the entries
3750 		 * which haven't actually been wired yet.
3751 		 *
3752 		 * Skip VM_PROT_NONE entries like we did above.
3753 		 */
3754 
3755 		failed_entry = entry;
3756 		for (/* nothing */; entry != &map->header;
3757 		     entry = entry->next) {
3758 			if (entry->protection == VM_PROT_NONE)
3759 				continue;
3760 			entry->wired_count--;
3761 		}
3762 
3763 		/*
3764 		 * now, unwire all the entries that were successfully
3765 		 * wired above.
3766 		 *
3767 		 * Skip VM_PROT_NONE entries like we did above.
3768 		 */
3769 
3770 		for (entry = map->header.next; entry != failed_entry;
3771 		     entry = entry->next) {
3772 			if (entry->protection == VM_PROT_NONE)
3773 				continue;
3774 			entry->wired_count--;
3775 			if (VM_MAPENT_ISWIRED(entry))
3776 				uvm_map_entry_unwire(map, entry);
3777 		}
3778 		vm_map_unlock(map);
3779 		UVMHIST_LOG(maphist,"<- done (RV=%jd)", rv,0,0,0);
3780 		return (rv);
3781 	}
3782 
3783 	vm_map_unbusy(map);
3784 
3785 	UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
3786 	return 0;
3787 }
3788 
3789 /*
3790  * uvm_map_clean: clean out a map range
3791  *
3792  * => valid flags:
3793  *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
3794  *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
3795  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
3796  *   if (flags & PGO_FREE): any cached pages are freed after clean
3797  * => returns an error if any part of the specified range isn't mapped
3798  * => never a need to flush amap layer since the anonymous memory has
3799  *	no permanent home, but may deactivate pages there
3800  * => called from sys_msync() and sys_madvise()
3801  * => caller must not write-lock map (read OK).
3802  * => we may sleep while cleaning if SYNCIO [with map read-locked]
3803  */
3804 
3805 int
3806 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
3807 {
3808 	struct vm_map_entry *current, *entry;
3809 	struct uvm_object *uobj;
3810 	struct vm_amap *amap;
3811 	struct vm_anon *anon;
3812 	struct vm_page *pg;
3813 	vaddr_t offset;
3814 	vsize_t size;
3815 	voff_t uoff;
3816 	int error, refs;
3817 	UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist);
3818 
3819 	UVMHIST_LOG(maphist,"(map=%#jx,start=%#jx,end=%#jx,flags=%#jx)",
3820 	    (uintptr_t)map, start, end, flags);
3821 	KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
3822 		(PGO_FREE|PGO_DEACTIVATE));
3823 
3824 	vm_map_lock_read(map);
3825 	VM_MAP_RANGE_CHECK(map, start, end);
3826 	if (uvm_map_lookup_entry(map, start, &entry) == false) {
3827 		vm_map_unlock_read(map);
3828 		return EFAULT;
3829 	}
3830 
3831 	/*
3832 	 * Make a first pass to check for holes and wiring problems.
3833 	 */
3834 
3835 	for (current = entry; current->start < end; current = current->next) {
3836 		if (UVM_ET_ISSUBMAP(current)) {
3837 			vm_map_unlock_read(map);
3838 			return EINVAL;
3839 		}
3840 		if ((flags & PGO_FREE) != 0 && VM_MAPENT_ISWIRED(entry)) {
3841 			vm_map_unlock_read(map);
3842 			return EBUSY;
3843 		}
3844 		if (end <= current->end) {
3845 			break;
3846 		}
3847 		if (current->end != current->next->start) {
3848 			vm_map_unlock_read(map);
3849 			return EFAULT;
3850 		}
3851 	}
3852 
3853 	error = 0;
3854 	for (current = entry; start < end; current = current->next) {
3855 		amap = current->aref.ar_amap;	/* upper layer */
3856 		uobj = current->object.uvm_obj;	/* lower layer */
3857 		KASSERT(start >= current->start);
3858 
3859 		/*
3860 		 * No amap cleaning necessary if:
3861 		 *
3862 		 *	(1) There's no amap.
3863 		 *
3864 		 *	(2) We're not deactivating or freeing pages.
3865 		 */
3866 
3867 		if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
3868 			goto flush_object;
3869 
3870 		offset = start - current->start;
3871 		size = MIN(end, current->end) - start;
3872 
3873 		amap_lock(amap, RW_WRITER);
3874 		for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) {
3875 			anon = amap_lookup(&current->aref, offset);
3876 			if (anon == NULL)
3877 				continue;
3878 
3879 			KASSERT(anon->an_lock == amap->am_lock);
3880 			pg = anon->an_page;
3881 			if (pg == NULL) {
3882 				continue;
3883 			}
3884 			if (pg->flags & PG_BUSY) {
3885 				continue;
3886 			}
3887 
3888 			switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
3889 
3890 			/*
3891 			 * In these first 3 cases, we just deactivate the page.
3892 			 */
3893 
3894 			case PGO_CLEANIT|PGO_FREE:
3895 			case PGO_CLEANIT|PGO_DEACTIVATE:
3896 			case PGO_DEACTIVATE:
3897  deactivate_it:
3898 				/*
3899 				 * skip the page if it's loaned or wired,
3900 				 * since it shouldn't be on a paging queue
3901 				 * at all in these cases.
3902 				 */
3903 
3904 				if (pg->loan_count != 0 ||
3905 				    pg->wire_count != 0) {
3906 					continue;
3907 				}
3908 				KASSERT(pg->uanon == anon);
3909 				uvm_pagelock(pg);
3910 				uvm_pagedeactivate(pg);
3911 				uvm_pageunlock(pg);
3912 				continue;
3913 
3914 			case PGO_FREE:
3915 
3916 				/*
3917 				 * If there are multiple references to
3918 				 * the amap, just deactivate the page.
3919 				 */
3920 
3921 				if (amap_refs(amap) > 1)
3922 					goto deactivate_it;
3923 
3924 				/* skip the page if it's wired */
3925 				if (pg->wire_count != 0) {
3926 					continue;
3927 				}
3928 				amap_unadd(&current->aref, offset);
3929 				refs = --anon->an_ref;
3930 				if (refs == 0) {
3931 					uvm_anfree(anon);
3932 				}
3933 				continue;
3934 			}
3935 		}
3936 		amap_unlock(amap);
3937 
3938  flush_object:
3939 		/*
3940 		 * flush pages if we've got a valid backing object.
3941 		 * note that we must always clean object pages before
3942 		 * freeing them since otherwise we could reveal stale
3943 		 * data from files.
3944 		 */
3945 
3946 		uoff = current->offset + (start - current->start);
3947 		size = MIN(end, current->end) - start;
3948 		if (uobj != NULL) {
3949 			rw_enter(uobj->vmobjlock, RW_WRITER);
3950 			if (uobj->pgops->pgo_put != NULL)
3951 				error = (uobj->pgops->pgo_put)(uobj, uoff,
3952 				    uoff + size, flags | PGO_CLEANIT);
3953 			else
3954 				error = 0;
3955 		}
3956 		start += size;
3957 	}
3958 	vm_map_unlock_read(map);
3959 	return (error);
3960 }
3961 
3962 
3963 /*
3964  * uvm_map_checkprot: check protection in map
3965  *
3966  * => must allow specified protection in a fully allocated region.
3967  * => map must be read or write locked by caller.
3968  */
3969 
3970 bool
3971 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
3972     vm_prot_t protection)
3973 {
3974 	struct vm_map_entry *entry;
3975 	struct vm_map_entry *tmp_entry;
3976 
3977 	if (!uvm_map_lookup_entry(map, start, &tmp_entry)) {
3978 		return (false);
3979 	}
3980 	entry = tmp_entry;
3981 	while (start < end) {
3982 		if (entry == &map->header) {
3983 			return (false);
3984 		}
3985 
3986 		/*
3987 		 * no holes allowed
3988 		 */
3989 
3990 		if (start < entry->start) {
3991 			return (false);
3992 		}
3993 
3994 		/*
3995 		 * check protection associated with entry
3996 		 */
3997 
3998 		if ((entry->protection & protection) != protection) {
3999 			return (false);
4000 		}
4001 		start = entry->end;
4002 		entry = entry->next;
4003 	}
4004 	return (true);
4005 }
4006 
4007 /*
4008  * uvmspace_alloc: allocate a vmspace structure.
4009  *
4010  * - structure includes vm_map and pmap
4011  * - XXX: no locking on this structure
4012  * - refcnt set to 1, rest must be init'd by caller
4013  */
4014 struct vmspace *
4015 uvmspace_alloc(vaddr_t vmin, vaddr_t vmax, bool topdown)
4016 {
4017 	struct vmspace *vm;
4018 	UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist);
4019 
4020 	vm = pool_cache_get(&uvm_vmspace_cache, PR_WAITOK);
4021 	uvmspace_init(vm, NULL, vmin, vmax, topdown);
4022 	UVMHIST_LOG(maphist,"<- done (vm=%#jx)", (uintptr_t)vm, 0, 0, 0);
4023 	return (vm);
4024 }
4025 
4026 /*
4027  * uvmspace_init: initialize a vmspace structure.
4028  *
4029  * - XXX: no locking on this structure
4030  * - refcnt set to 1, rest must be init'd by caller
4031  */
4032 void
4033 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin,
4034     vaddr_t vmax, bool topdown)
4035 {
4036 	UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist);
4037 
4038 	UVMHIST_LOG(maphist, "(vm=%#jx, pmap=%#jx, vmin=%#jx, vmax=%#jx",
4039 	    (uintptr_t)vm, (uintptr_t)pmap, vmin, vmax);
4040 	UVMHIST_LOG(maphist, "   topdown=%ju)", topdown, 0, 0, 0);
4041 
4042 	memset(vm, 0, sizeof(*vm));
4043 	uvm_map_setup(&vm->vm_map, vmin, vmax, VM_MAP_PAGEABLE
4044 	    | (topdown ? VM_MAP_TOPDOWN : 0)
4045 	    );
4046 	if (pmap)
4047 		pmap_reference(pmap);
4048 	else
4049 		pmap = pmap_create();
4050 	vm->vm_map.pmap = pmap;
4051 	vm->vm_refcnt = 1;
4052 	UVMHIST_LOG(maphist,"<- done",0,0,0,0);
4053 }
4054 
4055 /*
4056  * uvmspace_share: share a vmspace between two processes
4057  *
4058  * - used for vfork, threads(?)
4059  */
4060 
4061 void
4062 uvmspace_share(struct proc *p1, struct proc *p2)
4063 {
4064 
4065 	uvmspace_addref(p1->p_vmspace);
4066 	p2->p_vmspace = p1->p_vmspace;
4067 }
4068 
4069 #if 0
4070 
4071 /*
4072  * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace
4073  *
4074  * - XXX: no locking on vmspace
4075  */
4076 
4077 void
4078 uvmspace_unshare(struct lwp *l)
4079 {
4080 	struct proc *p = l->l_proc;
4081 	struct vmspace *nvm, *ovm = p->p_vmspace;
4082 
4083 	if (ovm->vm_refcnt == 1)
4084 		/* nothing to do: vmspace isn't shared in the first place */
4085 		return;
4086 
4087 	/* make a new vmspace, still holding old one */
4088 	nvm = uvmspace_fork(ovm);
4089 
4090 	kpreempt_disable();
4091 	pmap_deactivate(l);		/* unbind old vmspace */
4092 	p->p_vmspace = nvm;
4093 	pmap_activate(l);		/* switch to new vmspace */
4094 	kpreempt_enable();
4095 
4096 	uvmspace_free(ovm);		/* drop reference to old vmspace */
4097 }
4098 
4099 #endif
4100 
4101 
4102 /*
4103  * uvmspace_spawn: a new process has been spawned and needs a vmspace
4104  */
4105 
4106 void
4107 uvmspace_spawn(struct lwp *l, vaddr_t start, vaddr_t end, bool topdown)
4108 {
4109 	struct proc *p = l->l_proc;
4110 	struct vmspace *nvm;
4111 
4112 #ifdef __HAVE_CPU_VMSPACE_EXEC
4113 	cpu_vmspace_exec(l, start, end);
4114 #endif
4115 
4116 	nvm = uvmspace_alloc(start, end, topdown);
4117 	kpreempt_disable();
4118 	p->p_vmspace = nvm;
4119 	pmap_activate(l);
4120 	kpreempt_enable();
4121 }
4122 
4123 /*
4124  * uvmspace_exec: the process wants to exec a new program
4125  */
4126 
4127 void
4128 uvmspace_exec(struct lwp *l, vaddr_t start, vaddr_t end, bool topdown)
4129 {
4130 	struct proc *p = l->l_proc;
4131 	struct vmspace *nvm, *ovm = p->p_vmspace;
4132 	struct vm_map *map;
4133 	int flags;
4134 
4135 	KASSERT(ovm != NULL);
4136 #ifdef __HAVE_CPU_VMSPACE_EXEC
4137 	cpu_vmspace_exec(l, start, end);
4138 #endif
4139 
4140 	map = &ovm->vm_map;
4141 	/*
4142 	 * see if more than one process is using this vmspace...
4143 	 */
4144 
4145 	if (ovm->vm_refcnt == 1
4146 	    && topdown == ((ovm->vm_map.flags & VM_MAP_TOPDOWN) != 0)) {
4147 
4148 		/*
4149 		 * if p is the only process using its vmspace then we can safely
4150 		 * recycle that vmspace for the program that is being exec'd.
4151 		 * But only if TOPDOWN matches the requested value for the new
4152 		 * vm space!
4153 		 */
4154 
4155 		/*
4156 		 * SYSV SHM semantics require us to kill all segments on an exec
4157 		 */
4158 		if (uvm_shmexit && ovm->vm_shm)
4159 			(*uvm_shmexit)(ovm);
4160 
4161 		/*
4162 		 * POSIX 1003.1b -- "lock future mappings" is revoked
4163 		 * when a process execs another program image.
4164 		 */
4165 
4166 		map->flags &= ~VM_MAP_WIREFUTURE;
4167 
4168 		/*
4169 		 * now unmap the old program
4170 		 */
4171 
4172 		flags = pmap_remove_all(map->pmap) ? UVM_FLAG_VAONLY : 0;
4173 		uvm_unmap1(map, vm_map_min(map), vm_map_max(map), flags);
4174 		KASSERT(map->header.prev == &map->header);
4175 		KASSERT(map->nentries == 0);
4176 
4177 		/*
4178 		 * resize the map
4179 		 */
4180 
4181 		vm_map_setmin(map, start);
4182 		vm_map_setmax(map, end);
4183 	} else {
4184 
4185 		/*
4186 		 * p's vmspace is being shared, so we can't reuse it for p since
4187 		 * it is still being used for others.   allocate a new vmspace
4188 		 * for p
4189 		 */
4190 
4191 		nvm = uvmspace_alloc(start, end, topdown);
4192 
4193 		/*
4194 		 * install new vmspace and drop our ref to the old one.
4195 		 */
4196 
4197 		kpreempt_disable();
4198 		pmap_deactivate(l);
4199 		p->p_vmspace = nvm;
4200 		pmap_activate(l);
4201 		kpreempt_enable();
4202 
4203 		uvmspace_free(ovm);
4204 	}
4205 }
4206 
4207 /*
4208  * uvmspace_addref: add a reference to a vmspace.
4209  */
4210 
4211 void
4212 uvmspace_addref(struct vmspace *vm)
4213 {
4214 
4215 	KASSERT((vm->vm_map.flags & VM_MAP_DYING) == 0);
4216 	KASSERT(vm->vm_refcnt > 0);
4217 	atomic_inc_uint(&vm->vm_refcnt);
4218 }
4219 
4220 /*
4221  * uvmspace_free: free a vmspace data structure
4222  */
4223 
4224 void
4225 uvmspace_free(struct vmspace *vm)
4226 {
4227 	struct vm_map_entry *dead_entries;
4228 	struct vm_map *map = &vm->vm_map;
4229 	int flags;
4230 
4231 	UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist);
4232 
4233 	UVMHIST_LOG(maphist,"(vm=%#jx) ref=%jd", (uintptr_t)vm, vm->vm_refcnt,
4234 	    0, 0);
4235 	if (atomic_dec_uint_nv(&vm->vm_refcnt) > 0)
4236 		return;
4237 
4238 	/*
4239 	 * at this point, there should be no other references to the map.
4240 	 * delete all of the mappings, then destroy the pmap.
4241 	 */
4242 
4243 	map->flags |= VM_MAP_DYING;
4244 	flags = pmap_remove_all(map->pmap) ? UVM_FLAG_VAONLY : 0;
4245 
4246 	/* Get rid of any SYSV shared memory segments. */
4247 	if (uvm_shmexit && vm->vm_shm != NULL)
4248 		(*uvm_shmexit)(vm);
4249 
4250 	if (map->nentries) {
4251 		uvm_unmap_remove(map, vm_map_min(map), vm_map_max(map),
4252 		    &dead_entries, flags);
4253 		if (dead_entries != NULL)
4254 			uvm_unmap_detach(dead_entries, 0);
4255 	}
4256 	KASSERT(map->nentries == 0);
4257 	KASSERT(map->size == 0);
4258 
4259 	mutex_destroy(&map->misc_lock);
4260 	rw_destroy(&map->lock);
4261 	cv_destroy(&map->cv);
4262 	pmap_destroy(map->pmap);
4263 	pool_cache_put(&uvm_vmspace_cache, vm);
4264 }
4265 
4266 static struct vm_map_entry *
4267 uvm_mapent_clone(struct vm_map *new_map, struct vm_map_entry *old_entry,
4268     int flags)
4269 {
4270 	struct vm_map_entry *new_entry;
4271 
4272 	new_entry = uvm_mapent_alloc(new_map, 0);
4273 	/* old_entry -> new_entry */
4274 	uvm_mapent_copy(old_entry, new_entry);
4275 
4276 	/* new pmap has nothing wired in it */
4277 	new_entry->wired_count = 0;
4278 
4279 	/*
4280 	 * gain reference to object backing the map (can't
4281 	 * be a submap, already checked this case).
4282 	 */
4283 
4284 	if (new_entry->aref.ar_amap)
4285 		uvm_map_reference_amap(new_entry, flags);
4286 
4287 	if (new_entry->object.uvm_obj &&
4288 	    new_entry->object.uvm_obj->pgops->pgo_reference)
4289 		new_entry->object.uvm_obj->pgops->pgo_reference(
4290 			new_entry->object.uvm_obj);
4291 
4292 	/* insert entry at end of new_map's entry list */
4293 	uvm_map_entry_link(new_map, new_map->header.prev,
4294 	    new_entry);
4295 
4296 	return new_entry;
4297 }
4298 
4299 /*
4300  * share the mapping: this means we want the old and
4301  * new entries to share amaps and backing objects.
4302  */
4303 static void
4304 uvm_mapent_forkshared(struct vm_map *new_map, struct vm_map *old_map,
4305     struct vm_map_entry *old_entry)
4306 {
4307 	/*
4308 	 * if the old_entry needs a new amap (due to prev fork)
4309 	 * then we need to allocate it now so that we have
4310 	 * something we own to share with the new_entry.   [in
4311 	 * other words, we need to clear needs_copy]
4312 	 */
4313 
4314 	if (UVM_ET_ISNEEDSCOPY(old_entry)) {
4315 		/* get our own amap, clears needs_copy */
4316 		amap_copy(old_map, old_entry, AMAP_COPY_NOCHUNK,
4317 		    0, 0);
4318 		/* XXXCDC: WAITOK??? */
4319 	}
4320 
4321 	uvm_mapent_clone(new_map, old_entry, AMAP_SHARED);
4322 }
4323 
4324 
4325 static void
4326 uvm_mapent_forkcopy(struct vm_map *new_map, struct vm_map *old_map,
4327     struct vm_map_entry *old_entry)
4328 {
4329 	struct vm_map_entry *new_entry;
4330 
4331 	/*
4332 	 * copy-on-write the mapping (using mmap's
4333 	 * MAP_PRIVATE semantics)
4334 	 *
4335 	 * allocate new_entry, adjust reference counts.
4336 	 * (note that new references are read-only).
4337 	 */
4338 
4339 	new_entry = uvm_mapent_clone(new_map, old_entry, 0);
4340 
4341 	new_entry->etype |=
4342 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
4343 
4344 	/*
4345 	 * the new entry will need an amap.  it will either
4346 	 * need to be copied from the old entry or created
4347 	 * from scratch (if the old entry does not have an
4348 	 * amap).  can we defer this process until later
4349 	 * (by setting "needs_copy") or do we need to copy
4350 	 * the amap now?
4351 	 *
4352 	 * we must copy the amap now if any of the following
4353 	 * conditions hold:
4354 	 * 1. the old entry has an amap and that amap is
4355 	 *    being shared.  this means that the old (parent)
4356 	 *    process is sharing the amap with another
4357 	 *    process.  if we do not clear needs_copy here
4358 	 *    we will end up in a situation where both the
4359 	 *    parent and child process are refering to the
4360 	 *    same amap with "needs_copy" set.  if the
4361 	 *    parent write-faults, the fault routine will
4362 	 *    clear "needs_copy" in the parent by allocating
4363 	 *    a new amap.   this is wrong because the
4364 	 *    parent is supposed to be sharing the old amap
4365 	 *    and the new amap will break that.
4366 	 *
4367 	 * 2. if the old entry has an amap and a non-zero
4368 	 *    wire count then we are going to have to call
4369 	 *    amap_cow_now to avoid page faults in the
4370 	 *    parent process.   since amap_cow_now requires
4371 	 *    "needs_copy" to be clear we might as well
4372 	 *    clear it here as well.
4373 	 *
4374 	 */
4375 
4376 	if (old_entry->aref.ar_amap != NULL) {
4377 		if ((amap_flags(old_entry->aref.ar_amap) & AMAP_SHARED) != 0 ||
4378 		    VM_MAPENT_ISWIRED(old_entry)) {
4379 
4380 			amap_copy(new_map, new_entry,
4381 			    AMAP_COPY_NOCHUNK, 0, 0);
4382 			/* XXXCDC: M_WAITOK ... ok? */
4383 		}
4384 	}
4385 
4386 	/*
4387 	 * if the parent's entry is wired down, then the
4388 	 * parent process does not want page faults on
4389 	 * access to that memory.  this means that we
4390 	 * cannot do copy-on-write because we can't write
4391 	 * protect the old entry.   in this case we
4392 	 * resolve all copy-on-write faults now, using
4393 	 * amap_cow_now.   note that we have already
4394 	 * allocated any needed amap (above).
4395 	 */
4396 
4397 	if (VM_MAPENT_ISWIRED(old_entry)) {
4398 
4399 		/*
4400 		 * resolve all copy-on-write faults now
4401 		 * (note that there is nothing to do if
4402 		 * the old mapping does not have an amap).
4403 		 */
4404 		if (old_entry->aref.ar_amap)
4405 			amap_cow_now(new_map, new_entry);
4406 
4407 	} else {
4408 		/*
4409 		 * setup mappings to trigger copy-on-write faults
4410 		 * we must write-protect the parent if it has
4411 		 * an amap and it is not already "needs_copy"...
4412 		 * if it is already "needs_copy" then the parent
4413 		 * has already been write-protected by a previous
4414 		 * fork operation.
4415 		 */
4416 		if (old_entry->aref.ar_amap &&
4417 		    !UVM_ET_ISNEEDSCOPY(old_entry)) {
4418 			if (old_entry->max_protection & VM_PROT_WRITE) {
4419 #ifdef __HAVE_UNLOCKED_PMAP /* XXX temporary */
4420 				uvm_map_lock_entry(old_entry, RW_WRITER);
4421 #else
4422 				uvm_map_lock_entry(old_entry, RW_READER);
4423 #endif
4424 				pmap_protect(old_map->pmap,
4425 				    old_entry->start, old_entry->end,
4426 				    old_entry->protection & ~VM_PROT_WRITE);
4427 				uvm_map_unlock_entry(old_entry);
4428 			}
4429 			old_entry->etype |= UVM_ET_NEEDSCOPY;
4430 		}
4431 	}
4432 }
4433 
4434 /*
4435  * zero the mapping: the new entry will be zero initialized
4436  */
4437 static void
4438 uvm_mapent_forkzero(struct vm_map *new_map, struct vm_map *old_map,
4439     struct vm_map_entry *old_entry)
4440 {
4441 	struct vm_map_entry *new_entry;
4442 
4443 	new_entry = uvm_mapent_clone(new_map, old_entry, 0);
4444 
4445 	new_entry->etype |=
4446 	    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
4447 
4448 	if (new_entry->aref.ar_amap) {
4449 		uvm_map_unreference_amap(new_entry, 0);
4450 		new_entry->aref.ar_pageoff = 0;
4451 		new_entry->aref.ar_amap = NULL;
4452 	}
4453 
4454 	if (UVM_ET_ISOBJ(new_entry)) {
4455 		if (new_entry->object.uvm_obj->pgops->pgo_detach)
4456 			new_entry->object.uvm_obj->pgops->pgo_detach(
4457 			    new_entry->object.uvm_obj);
4458 		new_entry->object.uvm_obj = NULL;
4459 		new_entry->etype &= ~UVM_ET_OBJ;
4460 	}
4461 }
4462 
4463 /*
4464  *   F O R K   -   m a i n   e n t r y   p o i n t
4465  */
4466 /*
4467  * uvmspace_fork: fork a process' main map
4468  *
4469  * => create a new vmspace for child process from parent.
4470  * => parent's map must not be locked.
4471  */
4472 
4473 struct vmspace *
4474 uvmspace_fork(struct vmspace *vm1)
4475 {
4476 	struct vmspace *vm2;
4477 	struct vm_map *old_map = &vm1->vm_map;
4478 	struct vm_map *new_map;
4479 	struct vm_map_entry *old_entry;
4480 	UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist);
4481 
4482 	vm_map_lock(old_map);
4483 
4484 	vm2 = uvmspace_alloc(vm_map_min(old_map), vm_map_max(old_map),
4485 	    vm1->vm_map.flags & VM_MAP_TOPDOWN);
4486 	memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
4487 	    (char *) (vm1 + 1) - (char *) &vm1->vm_startcopy);
4488 	new_map = &vm2->vm_map;		  /* XXX */
4489 
4490 	old_entry = old_map->header.next;
4491 	new_map->size = old_map->size;
4492 
4493 	/*
4494 	 * go entry-by-entry
4495 	 */
4496 
4497 	while (old_entry != &old_map->header) {
4498 
4499 		/*
4500 		 * first, some sanity checks on the old entry
4501 		 */
4502 
4503 		KASSERT(!UVM_ET_ISSUBMAP(old_entry));
4504 		KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) ||
4505 			!UVM_ET_ISNEEDSCOPY(old_entry));
4506 
4507 		switch (old_entry->inheritance) {
4508 		case MAP_INHERIT_NONE:
4509 			/*
4510 			 * drop the mapping, modify size
4511 			 */
4512 			new_map->size -= old_entry->end - old_entry->start;
4513 			break;
4514 
4515 		case MAP_INHERIT_SHARE:
4516 			uvm_mapent_forkshared(new_map, old_map, old_entry);
4517 			break;
4518 
4519 		case MAP_INHERIT_COPY:
4520 			uvm_mapent_forkcopy(new_map, old_map, old_entry);
4521 			break;
4522 
4523 		case MAP_INHERIT_ZERO:
4524 			uvm_mapent_forkzero(new_map, old_map, old_entry);
4525 			break;
4526 		default:
4527 			KASSERT(0);
4528 			break;
4529 		}
4530 		old_entry = old_entry->next;
4531 	}
4532 
4533 	pmap_update(old_map->pmap);
4534 	vm_map_unlock(old_map);
4535 
4536 	if (uvm_shmfork && vm1->vm_shm)
4537 		(*uvm_shmfork)(vm1, vm2);
4538 
4539 #ifdef PMAP_FORK
4540 	pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap);
4541 #endif
4542 
4543 	UVMHIST_LOG(maphist,"<- done",0,0,0,0);
4544 	return (vm2);
4545 }
4546 
4547 
4548 /*
4549  * uvm_mapent_trymerge: try to merge an entry with its neighbors.
4550  *
4551  * => called with map locked.
4552  * => return non zero if successfully merged.
4553  */
4554 
4555 int
4556 uvm_mapent_trymerge(struct vm_map *map, struct vm_map_entry *entry, int flags)
4557 {
4558 	struct uvm_object *uobj;
4559 	struct vm_map_entry *next;
4560 	struct vm_map_entry *prev;
4561 	vsize_t size;
4562 	int merged = 0;
4563 	bool copying;
4564 	int newetype;
4565 
4566 	if (entry->aref.ar_amap != NULL) {
4567 		return 0;
4568 	}
4569 	if ((entry->flags & UVM_MAP_NOMERGE) != 0) {
4570 		return 0;
4571 	}
4572 
4573 	uobj = entry->object.uvm_obj;
4574 	size = entry->end - entry->start;
4575 	copying = (flags & UVM_MERGE_COPYING) != 0;
4576 	newetype = copying ? (entry->etype & ~UVM_ET_NEEDSCOPY) : entry->etype;
4577 
4578 	next = entry->next;
4579 	if (next != &map->header &&
4580 	    next->start == entry->end &&
4581 	    ((copying && next->aref.ar_amap != NULL &&
4582 	    amap_refs(next->aref.ar_amap) == 1) ||
4583 	    (!copying && next->aref.ar_amap == NULL)) &&
4584 	    UVM_ET_ISCOMPATIBLE(next, newetype,
4585 	    uobj, entry->flags, entry->protection,
4586 	    entry->max_protection, entry->inheritance, entry->advice,
4587 	    entry->wired_count) &&
4588 	    (uobj == NULL || entry->offset + size == next->offset)) {
4589 		int error;
4590 
4591 		if (copying) {
4592 			error = amap_extend(next, size,
4593 			    AMAP_EXTEND_NOWAIT|AMAP_EXTEND_BACKWARDS);
4594 		} else {
4595 			error = 0;
4596 		}
4597 		if (error == 0) {
4598 			if (uobj) {
4599 				if (uobj->pgops->pgo_detach) {
4600 					uobj->pgops->pgo_detach(uobj);
4601 				}
4602 			}
4603 
4604 			entry->end = next->end;
4605 			clear_hints(map, next);
4606 			uvm_map_entry_unlink(map, next);
4607 			if (copying) {
4608 				entry->aref = next->aref;
4609 				entry->etype &= ~UVM_ET_NEEDSCOPY;
4610 			}
4611 			uvm_map_check(map, "trymerge forwardmerge");
4612 			uvm_mapent_free(next);
4613 			merged++;
4614 		}
4615 	}
4616 
4617 	prev = entry->prev;
4618 	if (prev != &map->header &&
4619 	    prev->end == entry->start &&
4620 	    ((copying && !merged && prev->aref.ar_amap != NULL &&
4621 	    amap_refs(prev->aref.ar_amap) == 1) ||
4622 	    (!copying && prev->aref.ar_amap == NULL)) &&
4623 	    UVM_ET_ISCOMPATIBLE(prev, newetype,
4624 	    uobj, entry->flags, entry->protection,
4625 	    entry->max_protection, entry->inheritance, entry->advice,
4626 	    entry->wired_count) &&
4627 	    (uobj == NULL ||
4628 	    prev->offset + prev->end - prev->start == entry->offset)) {
4629 		int error;
4630 
4631 		if (copying) {
4632 			error = amap_extend(prev, size,
4633 			    AMAP_EXTEND_NOWAIT|AMAP_EXTEND_FORWARDS);
4634 		} else {
4635 			error = 0;
4636 		}
4637 		if (error == 0) {
4638 			if (uobj) {
4639 				if (uobj->pgops->pgo_detach) {
4640 					uobj->pgops->pgo_detach(uobj);
4641 				}
4642 				entry->offset = prev->offset;
4643 			}
4644 
4645 			entry->start = prev->start;
4646 			clear_hints(map, prev);
4647 			uvm_map_entry_unlink(map, prev);
4648 			if (copying) {
4649 				entry->aref = prev->aref;
4650 				entry->etype &= ~UVM_ET_NEEDSCOPY;
4651 			}
4652 			uvm_map_check(map, "trymerge backmerge");
4653 			uvm_mapent_free(prev);
4654 			merged++;
4655 		}
4656 	}
4657 
4658 	return merged;
4659 }
4660 
4661 /*
4662  * uvm_map_setup: init map
4663  *
4664  * => map must not be in service yet.
4665  */
4666 
4667 void
4668 uvm_map_setup(struct vm_map *map, vaddr_t vmin, vaddr_t vmax, int flags)
4669 {
4670 
4671 	rb_tree_init(&map->rb_tree, &uvm_map_tree_ops);
4672 	map->header.next = map->header.prev = &map->header;
4673 	map->nentries = 0;
4674 	map->size = 0;
4675 	map->ref_count = 1;
4676 	vm_map_setmin(map, vmin);
4677 	vm_map_setmax(map, vmax);
4678 	map->flags = flags;
4679 	map->first_free = &map->header;
4680 	map->hint = &map->header;
4681 	map->timestamp = 0;
4682 	map->busy = NULL;
4683 
4684 	rw_init(&map->lock);
4685 	cv_init(&map->cv, "vm_map");
4686 	mutex_init(&map->misc_lock, MUTEX_DRIVER, IPL_NONE);
4687 }
4688 
4689 /*
4690  *   U N M A P   -   m a i n   e n t r y   p o i n t
4691  */
4692 
4693 /*
4694  * uvm_unmap1: remove mappings from a vm_map (from "start" up to "stop")
4695  *
4696  * => caller must check alignment and size
4697  * => map must be unlocked (we will lock it)
4698  * => flags is UVM_FLAG_QUANTUM or 0.
4699  */
4700 
4701 void
4702 uvm_unmap1(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
4703 {
4704 	struct vm_map_entry *dead_entries;
4705 	UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist);
4706 
4707 	KASSERTMSG(start < end,
4708 	    "%s: map %p: start %#jx < end %#jx", __func__, map,
4709 	    (uintmax_t)start, (uintmax_t)end);
4710 	UVMHIST_LOG(maphist, "  (map=%#jx, start=%#jx, end=%#jx)",
4711 	    (uintptr_t)map, start, end, 0);
4712 	if (map == kernel_map) {
4713 		LOCKDEBUG_MEM_CHECK((void *)start, end - start);
4714 	}
4715 
4716 	/*
4717 	 * work now done by helper functions.   wipe the pmap's and then
4718 	 * detach from the dead entries...
4719 	 */
4720 	vm_map_lock(map);
4721 	uvm_unmap_remove(map, start, end, &dead_entries, flags);
4722 	vm_map_unlock(map);
4723 
4724 	if (dead_entries != NULL)
4725 		uvm_unmap_detach(dead_entries, 0);
4726 
4727 	UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
4728 }
4729 
4730 
4731 /*
4732  * uvm_map_reference: add reference to a map
4733  *
4734  * => map need not be locked
4735  */
4736 
4737 void
4738 uvm_map_reference(struct vm_map *map)
4739 {
4740 
4741 	atomic_inc_uint(&map->ref_count);
4742 }
4743 
4744 void
4745 uvm_map_lock_entry(struct vm_map_entry *entry, krw_t op)
4746 {
4747 
4748 	if (entry->aref.ar_amap != NULL) {
4749 		amap_lock(entry->aref.ar_amap, op);
4750 	}
4751 	if (UVM_ET_ISOBJ(entry)) {
4752 		rw_enter(entry->object.uvm_obj->vmobjlock, op);
4753 	}
4754 }
4755 
4756 void
4757 uvm_map_unlock_entry(struct vm_map_entry *entry)
4758 {
4759 
4760 	if (UVM_ET_ISOBJ(entry)) {
4761 		rw_exit(entry->object.uvm_obj->vmobjlock);
4762 	}
4763 	if (entry->aref.ar_amap != NULL) {
4764 		amap_unlock(entry->aref.ar_amap);
4765 	}
4766 }
4767 
4768 #if defined(DDB) || defined(DEBUGPRINT)
4769 
4770 /*
4771  * uvm_map_printit: actually prints the map
4772  */
4773 
4774 void
4775 uvm_map_printit(struct vm_map *map, bool full,
4776     void (*pr)(const char *, ...))
4777 {
4778 	struct vm_map_entry *entry;
4779 
4780 	(*pr)("MAP %p: [%#lx->%#lx]\n", map, vm_map_min(map),
4781 	    vm_map_max(map));
4782 	(*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=%#x\n",
4783 	    map->nentries, map->size, map->ref_count, map->timestamp,
4784 	    map->flags);
4785 	(*pr)("\tpmap=%p(resident=%ld, wired=%ld)\n", map->pmap,
4786 	    pmap_resident_count(map->pmap), pmap_wired_count(map->pmap));
4787 	if (!full)
4788 		return;
4789 	for (entry = map->header.next; entry != &map->header;
4790 	    entry = entry->next) {
4791 		(*pr)(" - %p: %#lx->%#lx: obj=%p/%#llx, amap=%p/%d\n",
4792 		    entry, entry->start, entry->end, entry->object.uvm_obj,
4793 		    (long long)entry->offset, entry->aref.ar_amap,
4794 		    entry->aref.ar_pageoff);
4795 		(*pr)(
4796 		    "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, "
4797 		    "wc=%d, adv=%d\n",
4798 		    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
4799 		    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
4800 		    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
4801 		    entry->protection, entry->max_protection,
4802 		    entry->inheritance, entry->wired_count, entry->advice);
4803 	}
4804 }
4805 
4806 void
4807 uvm_whatis(uintptr_t addr, void (*pr)(const char *, ...))
4808 {
4809 	struct vm_map *map;
4810 
4811 	for (map = kernel_map;;) {
4812 		struct vm_map_entry *entry;
4813 
4814 		if (!uvm_map_lookup_entry_bytree(map, (vaddr_t)addr, &entry)) {
4815 			break;
4816 		}
4817 		(*pr)("%p is %p+%zu from VMMAP %p\n",
4818 		    (void *)addr, (void *)entry->start,
4819 		    (size_t)(addr - (uintptr_t)entry->start), map);
4820 		if (!UVM_ET_ISSUBMAP(entry)) {
4821 			break;
4822 		}
4823 		map = entry->object.sub_map;
4824 	}
4825 }
4826 
4827 #endif /* DDB || DEBUGPRINT */
4828 
4829 #ifndef __USER_VA0_IS_SAFE
4830 static int
4831 sysctl_user_va0_disable(SYSCTLFN_ARGS)
4832 {
4833 	struct sysctlnode node;
4834 	int t, error;
4835 
4836 	node = *rnode;
4837 	node.sysctl_data = &t;
4838 	t = user_va0_disable;
4839 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
4840 	if (error || newp == NULL)
4841 		return (error);
4842 
4843 	if (!t && user_va0_disable &&
4844 	    kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MAP_VA_ZERO, 0,
4845 	    NULL, NULL, NULL))
4846 		return EPERM;
4847 
4848 	user_va0_disable = !!t;
4849 	return 0;
4850 }
4851 #endif
4852 
4853 static int
4854 fill_vmentry(struct lwp *l, struct proc *p, struct kinfo_vmentry *kve,
4855     struct vm_map *m, struct vm_map_entry *e)
4856 {
4857 #ifndef _RUMPKERNEL
4858 	int error;
4859 
4860 	memset(kve, 0, sizeof(*kve));
4861 	KASSERT(e != NULL);
4862 	if (UVM_ET_ISOBJ(e)) {
4863 		struct uvm_object *uobj = e->object.uvm_obj;
4864 		KASSERT(uobj != NULL);
4865 		kve->kve_ref_count = uobj->uo_refs;
4866 		kve->kve_count = uobj->uo_npages;
4867 		if (UVM_OBJ_IS_VNODE(uobj)) {
4868 			struct vattr va;
4869 			struct vnode *vp = (struct vnode *)uobj;
4870 			vn_lock(vp, LK_SHARED | LK_RETRY);
4871 			error = VOP_GETATTR(vp, &va, l->l_cred);
4872 			VOP_UNLOCK(vp);
4873 			kve->kve_type = KVME_TYPE_VNODE;
4874 			if (error == 0) {
4875 				kve->kve_vn_size = vp->v_size;
4876 				kve->kve_vn_type = (int)vp->v_type;
4877 				kve->kve_vn_mode = va.va_mode;
4878 				kve->kve_vn_rdev = va.va_rdev;
4879 				kve->kve_vn_fileid = va.va_fileid;
4880 				kve->kve_vn_fsid = va.va_fsid;
4881 				error = vnode_to_path(kve->kve_path,
4882 				    sizeof(kve->kve_path) / 2, vp, l, p);
4883 #ifdef DIAGNOSTIC
4884 				if (error)
4885 					printf("%s: vp %p error %d\n", __func__,
4886 						vp, error);
4887 #endif
4888 			}
4889 		} else if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
4890 			kve->kve_type = KVME_TYPE_KERN;
4891 		} else if (UVM_OBJ_IS_DEVICE(uobj)) {
4892 			kve->kve_type = KVME_TYPE_DEVICE;
4893 		} else if (UVM_OBJ_IS_AOBJ(uobj)) {
4894 			kve->kve_type = KVME_TYPE_ANON;
4895 		} else {
4896 			kve->kve_type = KVME_TYPE_OBJECT;
4897 		}
4898 	} else if (UVM_ET_ISSUBMAP(e)) {
4899 		struct vm_map *map = e->object.sub_map;
4900 		KASSERT(map != NULL);
4901 		kve->kve_ref_count = map->ref_count;
4902 		kve->kve_count = map->nentries;
4903 		kve->kve_type = KVME_TYPE_SUBMAP;
4904 	} else
4905 		kve->kve_type = KVME_TYPE_UNKNOWN;
4906 
4907 	kve->kve_start = e->start;
4908 	kve->kve_end = e->end;
4909 	kve->kve_offset = e->offset;
4910 	kve->kve_wired_count = e->wired_count;
4911 	kve->kve_inheritance = e->inheritance;
4912 	kve->kve_attributes = 0; /* unused */
4913 	kve->kve_advice = e->advice;
4914 #define PROT(p) (((p) & VM_PROT_READ) ? KVME_PROT_READ : 0) | \
4915 	(((p) & VM_PROT_WRITE) ? KVME_PROT_WRITE : 0) | \
4916 	(((p) & VM_PROT_EXECUTE) ? KVME_PROT_EXEC : 0)
4917 	kve->kve_protection = PROT(e->protection);
4918 	kve->kve_max_protection = PROT(e->max_protection);
4919 	kve->kve_flags |= (e->etype & UVM_ET_COPYONWRITE)
4920 	    ? KVME_FLAG_COW : 0;
4921 	kve->kve_flags |= (e->etype & UVM_ET_NEEDSCOPY)
4922 	    ? KVME_FLAG_NEEDS_COPY : 0;
4923 	kve->kve_flags |= (m->flags & VM_MAP_TOPDOWN)
4924 	    ? KVME_FLAG_GROWS_DOWN : KVME_FLAG_GROWS_UP;
4925 	kve->kve_flags |= (m->flags & VM_MAP_PAGEABLE)
4926 	    ? KVME_FLAG_PAGEABLE : 0;
4927 #endif
4928 	return 0;
4929 }
4930 
4931 static int
4932 fill_vmentries(struct lwp *l, pid_t pid, u_int elem_size, void *oldp,
4933     size_t *oldlenp)
4934 {
4935 	int error;
4936 	struct proc *p;
4937 	struct kinfo_vmentry *vme;
4938 	struct vmspace *vm;
4939 	struct vm_map *map;
4940 	struct vm_map_entry *entry;
4941 	char *dp;
4942 	size_t count, vmesize;
4943 
4944 	if (elem_size == 0 || elem_size > 2 * sizeof(*vme))
4945 		return EINVAL;
4946 
4947 	if (oldp) {
4948 		if (*oldlenp > 10UL * 1024UL * 1024UL)
4949 			return E2BIG;
4950 		count = *oldlenp / elem_size;
4951 		if (count == 0)
4952 			return ENOMEM;
4953 		vmesize = count * sizeof(*vme);
4954 	} else
4955 		vmesize = 0;
4956 
4957 	if ((error = proc_find_locked(l, &p, pid)) != 0)
4958 		return error;
4959 
4960 	vme = NULL;
4961 	count = 0;
4962 
4963 	if ((error = proc_vmspace_getref(p, &vm)) != 0)
4964 		goto out;
4965 
4966 	map = &vm->vm_map;
4967 	vm_map_lock_read(map);
4968 
4969 	dp = oldp;
4970 	if (oldp)
4971 		vme = kmem_alloc(vmesize, KM_SLEEP);
4972 	for (entry = map->header.next; entry != &map->header;
4973 	    entry = entry->next) {
4974 		if (oldp && (dp - (char *)oldp) < vmesize) {
4975 			error = fill_vmentry(l, p, &vme[count], map, entry);
4976 			if (error)
4977 				goto out;
4978 			dp += elem_size;
4979 		}
4980 		count++;
4981 	}
4982 	vm_map_unlock_read(map);
4983 	uvmspace_free(vm);
4984 
4985 out:
4986 	if (pid != -1)
4987 		mutex_exit(p->p_lock);
4988 	if (error == 0) {
4989 		const u_int esize = uimin(sizeof(*vme), elem_size);
4990 		dp = oldp;
4991 		for (size_t i = 0; i < count; i++) {
4992 			if (oldp && (dp - (char *)oldp) < vmesize) {
4993 				error = sysctl_copyout(l, &vme[i], dp, esize);
4994 				if (error)
4995 					break;
4996 				dp += elem_size;
4997 			} else
4998 				break;
4999 		}
5000 		count *= elem_size;
5001 		if (oldp != NULL && *oldlenp < count)
5002 			error = ENOSPC;
5003 		*oldlenp = count;
5004 	}
5005 	if (vme)
5006 		kmem_free(vme, vmesize);
5007 	return error;
5008 }
5009 
5010 static int
5011 sysctl_vmproc(SYSCTLFN_ARGS)
5012 {
5013 	int error;
5014 
5015 	if (namelen == 1 && name[0] == CTL_QUERY)
5016 		return (sysctl_query(SYSCTLFN_CALL(rnode)));
5017 
5018 	if (namelen == 0)
5019 		return EINVAL;
5020 
5021 	switch (name[0]) {
5022 	case VM_PROC_MAP:
5023 		if (namelen != 3)
5024 			return EINVAL;
5025 		sysctl_unlock();
5026 		error = fill_vmentries(l, name[1], name[2], oldp, oldlenp);
5027 		sysctl_relock();
5028 		return error;
5029 	default:
5030 		return EINVAL;
5031 	}
5032 }
5033 
5034 SYSCTL_SETUP(sysctl_uvmmap_setup, "sysctl uvmmap setup")
5035 {
5036 
5037 	sysctl_createv(clog, 0, NULL, NULL,
5038 		       CTLFLAG_PERMANENT,
5039 		       CTLTYPE_STRUCT, "proc",
5040 		       SYSCTL_DESCR("Process vm information"),
5041 		       sysctl_vmproc, 0, NULL, 0,
5042 		       CTL_VM, VM_PROC, CTL_EOL);
5043 #ifndef __USER_VA0_IS_SAFE
5044         sysctl_createv(clog, 0, NULL, NULL,
5045                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
5046                        CTLTYPE_INT, "user_va0_disable",
5047                        SYSCTL_DESCR("Disable VA 0"),
5048                        sysctl_user_va0_disable, 0, &user_va0_disable, 0,
5049                        CTL_VM, CTL_CREATE, CTL_EOL);
5050 #endif
5051 }
5052