xref: /netbsd-src/sys/uvm/uvm_map.c (revision c2f76ff004a2cb67efe5b12d97bd3ef7fe89e18d)
1 /*	$NetBSD: uvm_map.c,v 1.294 2011/01/04 08:21:18 matt Exp $	*/
2 
3 /*
4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
5  * Copyright (c) 1991, 1993, The Regents of the University of California.
6  *
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * The Mach Operating System project at Carnegie-Mellon University.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. All advertising materials mentioning features or use of this software
21  *    must display the following acknowledgement:
22  *	This product includes software developed by Charles D. Cranor,
23  *      Washington University, the University of California, Berkeley and
24  *      its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)vm_map.c    8.3 (Berkeley) 1/12/94
42  * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
43  *
44  *
45  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
46  * All rights reserved.
47  *
48  * Permission to use, copy, modify and distribute this software and
49  * its documentation is hereby granted, provided that both the copyright
50  * notice and this permission notice appear in all copies of the
51  * software, derivative works or modified versions, and any portions
52  * thereof, and that both notices appear in supporting documentation.
53  *
54  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
55  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
56  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
57  *
58  * Carnegie Mellon requests users of this software to return to
59  *
60  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
61  *  School of Computer Science
62  *  Carnegie Mellon University
63  *  Pittsburgh PA 15213-3890
64  *
65  * any improvements or extensions that they make and grant Carnegie the
66  * rights to redistribute these changes.
67  */
68 
69 /*
70  * uvm_map.c: uvm map operations
71  */
72 
73 #include <sys/cdefs.h>
74 __KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.294 2011/01/04 08:21:18 matt Exp $");
75 
76 #include "opt_ddb.h"
77 #include "opt_uvmhist.h"
78 #include "opt_uvm.h"
79 #include "opt_sysv.h"
80 
81 #include <sys/param.h>
82 #include <sys/systm.h>
83 #include <sys/mman.h>
84 #include <sys/proc.h>
85 #include <sys/malloc.h>
86 #include <sys/pool.h>
87 #include <sys/kernel.h>
88 #include <sys/mount.h>
89 #include <sys/vnode.h>
90 #include <sys/lockdebug.h>
91 #include <sys/atomic.h>
92 #ifndef __USER_VA0_IS_SAFE
93 #include <sys/sysctl.h>
94 #include <sys/kauth.h>
95 #include "opt_user_va0_disable_default.h"
96 #endif
97 
98 #ifdef SYSVSHM
99 #include <sys/shm.h>
100 #endif
101 
102 #include <uvm/uvm.h>
103 #include <uvm/uvm_readahead.h>
104 
105 #if defined(DDB) || defined(DEBUGPRINT)
106 #include <uvm/uvm_ddb.h>
107 #endif
108 
109 #if !defined(UVMMAP_COUNTERS)
110 
111 #define	UVMMAP_EVCNT_DEFINE(name)	/* nothing */
112 #define UVMMAP_EVCNT_INCR(ev)		/* nothing */
113 #define UVMMAP_EVCNT_DECR(ev)		/* nothing */
114 
115 #else /* defined(UVMMAP_NOCOUNTERS) */
116 
117 #include <sys/evcnt.h>
118 #define	UVMMAP_EVCNT_DEFINE(name) \
119 struct evcnt uvmmap_evcnt_##name = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, \
120     "uvmmap", #name); \
121 EVCNT_ATTACH_STATIC(uvmmap_evcnt_##name);
122 #define	UVMMAP_EVCNT_INCR(ev)		uvmmap_evcnt_##ev.ev_count++
123 #define	UVMMAP_EVCNT_DECR(ev)		uvmmap_evcnt_##ev.ev_count--
124 
125 #endif /* defined(UVMMAP_NOCOUNTERS) */
126 
127 UVMMAP_EVCNT_DEFINE(ubackmerge)
128 UVMMAP_EVCNT_DEFINE(uforwmerge)
129 UVMMAP_EVCNT_DEFINE(ubimerge)
130 UVMMAP_EVCNT_DEFINE(unomerge)
131 UVMMAP_EVCNT_DEFINE(kbackmerge)
132 UVMMAP_EVCNT_DEFINE(kforwmerge)
133 UVMMAP_EVCNT_DEFINE(kbimerge)
134 UVMMAP_EVCNT_DEFINE(knomerge)
135 UVMMAP_EVCNT_DEFINE(map_call)
136 UVMMAP_EVCNT_DEFINE(mlk_call)
137 UVMMAP_EVCNT_DEFINE(mlk_hint)
138 UVMMAP_EVCNT_DEFINE(mlk_list)
139 UVMMAP_EVCNT_DEFINE(mlk_tree)
140 UVMMAP_EVCNT_DEFINE(mlk_treeloop)
141 UVMMAP_EVCNT_DEFINE(mlk_listloop)
142 
143 UVMMAP_EVCNT_DEFINE(uke_alloc)
144 UVMMAP_EVCNT_DEFINE(uke_free)
145 UVMMAP_EVCNT_DEFINE(ukh_alloc)
146 UVMMAP_EVCNT_DEFINE(ukh_free)
147 
148 const char vmmapbsy[] = "vmmapbsy";
149 
150 /*
151  * cache for vmspace structures.
152  */
153 
154 static struct pool_cache uvm_vmspace_cache;
155 
156 /*
157  * cache for dynamically-allocated map entries.
158  */
159 
160 static struct pool_cache uvm_map_entry_cache;
161 
162 MALLOC_DEFINE(M_VMMAP, "VM map", "VM map structures");
163 MALLOC_DEFINE(M_VMPMAP, "VM pmap", "VM pmap");
164 
165 #ifdef PMAP_GROWKERNEL
166 /*
167  * This global represents the end of the kernel virtual address
168  * space.  If we want to exceed this, we must grow the kernel
169  * virtual address space dynamically.
170  *
171  * Note, this variable is locked by kernel_map's lock.
172  */
173 vaddr_t uvm_maxkaddr;
174 #endif
175 
176 #ifndef __USER_VA0_IS_SAFE
177 #ifndef __USER_VA0_DISABLE_DEFAULT
178 #define __USER_VA0_DISABLE_DEFAULT 1
179 #endif
180 #ifdef USER_VA0_DISABLE_DEFAULT /* kernel config option overrides */
181 #undef __USER_VA0_DISABLE_DEFAULT
182 #define __USER_VA0_DISABLE_DEFAULT USER_VA0_DISABLE_DEFAULT
183 #endif
184 static int user_va0_disable = __USER_VA0_DISABLE_DEFAULT;
185 #endif
186 
187 /*
188  * macros
189  */
190 
191 /*
192  * VM_MAP_USE_KMAPENT: determine if uvm_kmapent_alloc/free is used
193  * for the vm_map.
194  */
195 extern struct vm_map *pager_map; /* XXX */
196 #define	VM_MAP_USE_KMAPENT_FLAGS(flags) \
197 	(((flags) & VM_MAP_INTRSAFE) != 0)
198 #define	VM_MAP_USE_KMAPENT(map) \
199 	(VM_MAP_USE_KMAPENT_FLAGS((map)->flags) || (map) == kernel_map)
200 
201 /*
202  * UVM_ET_ISCOMPATIBLE: check some requirements for map entry merging
203  */
204 
205 #define	UVM_ET_ISCOMPATIBLE(ent, type, uobj, meflags, \
206     prot, maxprot, inh, adv, wire) \
207 	((ent)->etype == (type) && \
208 	(((ent)->flags ^ (meflags)) & (UVM_MAP_NOMERGE | UVM_MAP_QUANTUM)) \
209 	== 0 && \
210 	(ent)->object.uvm_obj == (uobj) && \
211 	(ent)->protection == (prot) && \
212 	(ent)->max_protection == (maxprot) && \
213 	(ent)->inheritance == (inh) && \
214 	(ent)->advice == (adv) && \
215 	(ent)->wired_count == (wire))
216 
217 /*
218  * uvm_map_entry_link: insert entry into a map
219  *
220  * => map must be locked
221  */
222 #define uvm_map_entry_link(map, after_where, entry) do { \
223 	uvm_mapent_check(entry); \
224 	(map)->nentries++; \
225 	(entry)->prev = (after_where); \
226 	(entry)->next = (after_where)->next; \
227 	(entry)->prev->next = (entry); \
228 	(entry)->next->prev = (entry); \
229 	uvm_rb_insert((map), (entry)); \
230 } while (/*CONSTCOND*/ 0)
231 
232 /*
233  * uvm_map_entry_unlink: remove entry from a map
234  *
235  * => map must be locked
236  */
237 #define uvm_map_entry_unlink(map, entry) do { \
238 	KASSERT((entry) != (map)->first_free); \
239 	KASSERT((entry) != (map)->hint); \
240 	uvm_mapent_check(entry); \
241 	(map)->nentries--; \
242 	(entry)->next->prev = (entry)->prev; \
243 	(entry)->prev->next = (entry)->next; \
244 	uvm_rb_remove((map), (entry)); \
245 } while (/*CONSTCOND*/ 0)
246 
247 /*
248  * SAVE_HINT: saves the specified entry as the hint for future lookups.
249  *
250  * => map need not be locked.
251  */
252 #define SAVE_HINT(map, check, value) do { \
253 	if ((map)->hint == (check)) \
254 		(map)->hint = (value); \
255 } while (/*CONSTCOND*/ 0)
256 
257 /*
258  * clear_hints: ensure that hints don't point to the entry.
259  *
260  * => map must be write-locked.
261  */
262 static void
263 clear_hints(struct vm_map *map, struct vm_map_entry *ent)
264 {
265 
266 	SAVE_HINT(map, ent, ent->prev);
267 	if (map->first_free == ent) {
268 		map->first_free = ent->prev;
269 	}
270 }
271 
272 /*
273  * VM_MAP_RANGE_CHECK: check and correct range
274  *
275  * => map must at least be read locked
276  */
277 
278 #define VM_MAP_RANGE_CHECK(map, start, end) do { \
279 	if (start < vm_map_min(map))		\
280 		start = vm_map_min(map);	\
281 	if (end > vm_map_max(map))		\
282 		end = vm_map_max(map);		\
283 	if (start > end)			\
284 		start = end;			\
285 } while (/*CONSTCOND*/ 0)
286 
287 /*
288  * local prototypes
289  */
290 
291 static struct vm_map_entry *
292 		uvm_mapent_alloc(struct vm_map *, int);
293 static struct vm_map_entry *
294 		uvm_mapent_alloc_split(struct vm_map *,
295 		    const struct vm_map_entry *, int,
296 		    struct uvm_mapent_reservation *);
297 static void	uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *);
298 static void	uvm_mapent_free(struct vm_map_entry *);
299 #if defined(DEBUG)
300 static void	_uvm_mapent_check(const struct vm_map_entry *, const char *,
301 		    int);
302 #define	uvm_mapent_check(map)	_uvm_mapent_check(map, __FILE__, __LINE__)
303 #else /* defined(DEBUG) */
304 #define	uvm_mapent_check(e)	/* nothing */
305 #endif /* defined(DEBUG) */
306 static struct vm_map_entry *
307 		uvm_kmapent_alloc(struct vm_map *, int);
308 static void	uvm_kmapent_free(struct vm_map_entry *);
309 static vsize_t	uvm_kmapent_overhead(vsize_t);
310 
311 static void	uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *);
312 static void	uvm_map_reference_amap(struct vm_map_entry *, int);
313 static int	uvm_map_space_avail(vaddr_t *, vsize_t, voff_t, vsize_t, int,
314 		    struct vm_map_entry *);
315 static void	uvm_map_unreference_amap(struct vm_map_entry *, int);
316 
317 int _uvm_map_sanity(struct vm_map *);
318 int _uvm_tree_sanity(struct vm_map *);
319 static vsize_t uvm_rb_maxgap(const struct vm_map_entry *);
320 
321 #define	ROOT_ENTRY(map)		((struct vm_map_entry *)(map)->rb_tree.rbt_root)
322 #define	LEFT_ENTRY(entry)	((struct vm_map_entry *)(entry)->rb_node.rb_left)
323 #define	RIGHT_ENTRY(entry)	((struct vm_map_entry *)(entry)->rb_node.rb_right)
324 #define	PARENT_ENTRY(map, entry) \
325 	(ROOT_ENTRY(map) == (entry) \
326 	    ? NULL : (struct vm_map_entry *)RB_FATHER(&(entry)->rb_node))
327 
328 static int
329 uvm_map_compare_nodes(void *ctx, const void *nparent, const void *nkey)
330 {
331 	const struct vm_map_entry *eparent = nparent;
332 	const struct vm_map_entry *ekey = nkey;
333 
334 	KASSERT(eparent->start < ekey->start || eparent->start >= ekey->end);
335 	KASSERT(ekey->start < eparent->start || ekey->start >= eparent->end);
336 
337 	if (eparent->start < ekey->start)
338 		return -1;
339 	if (eparent->end >= ekey->start)
340 		return 1;
341 	return 0;
342 }
343 
344 static int
345 uvm_map_compare_key(void *ctx, const void *nparent, const void *vkey)
346 {
347 	const struct vm_map_entry *eparent = nparent;
348 	const vaddr_t va = *(const vaddr_t *) vkey;
349 
350 	if (eparent->start < va)
351 		return -1;
352 	if (eparent->end >= va)
353 		return 1;
354 	return 0;
355 }
356 
357 static const rb_tree_ops_t uvm_map_tree_ops = {
358 	.rbto_compare_nodes = uvm_map_compare_nodes,
359 	.rbto_compare_key = uvm_map_compare_key,
360 	.rbto_node_offset = offsetof(struct vm_map_entry, rb_node),
361 	.rbto_context = NULL
362 };
363 
364 /*
365  * uvm_rb_gap: return the gap size between our entry and next entry.
366  */
367 static inline vsize_t
368 uvm_rb_gap(const struct vm_map_entry *entry)
369 {
370 
371 	KASSERT(entry->next != NULL);
372 	return entry->next->start - entry->end;
373 }
374 
375 static vsize_t
376 uvm_rb_maxgap(const struct vm_map_entry *entry)
377 {
378 	struct vm_map_entry *child;
379 	vsize_t maxgap = entry->gap;
380 
381 	/*
382 	 * We need maxgap to be the largest gap of us or any of our
383 	 * descendents.  Since each of our children's maxgap is the
384 	 * cached value of their largest gap of themselves or their
385 	 * descendents, we can just use that value and avoid recursing
386 	 * down the tree to calculate it.
387 	 */
388 	if ((child = LEFT_ENTRY(entry)) != NULL && maxgap < child->maxgap)
389 		maxgap = child->maxgap;
390 
391 	if ((child = RIGHT_ENTRY(entry)) != NULL && maxgap < child->maxgap)
392 		maxgap = child->maxgap;
393 
394 	return maxgap;
395 }
396 
397 static void
398 uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry)
399 {
400 	struct vm_map_entry *parent;
401 
402 	KASSERT(entry->gap == uvm_rb_gap(entry));
403 	entry->maxgap = uvm_rb_maxgap(entry);
404 
405 	while ((parent = PARENT_ENTRY(map, entry)) != NULL) {
406 		struct vm_map_entry *brother;
407 		vsize_t maxgap = parent->gap;
408 		unsigned int which;
409 
410 		KDASSERT(parent->gap == uvm_rb_gap(parent));
411 		if (maxgap < entry->maxgap)
412 			maxgap = entry->maxgap;
413 		/*
414 		 * Since we work towards the root, we know entry's maxgap
415 		 * value is OK, but its brothers may now be out-of-date due
416 		 * to rebalancing.  So refresh it.
417 		 */
418 		which = RB_POSITION(&entry->rb_node) ^ RB_DIR_OTHER;
419 		brother = (struct vm_map_entry *)parent->rb_node.rb_nodes[which];
420 		if (brother != NULL) {
421 			KDASSERT(brother->gap == uvm_rb_gap(brother));
422 			brother->maxgap = uvm_rb_maxgap(brother);
423 			if (maxgap < brother->maxgap)
424 				maxgap = brother->maxgap;
425 		}
426 
427 		parent->maxgap = maxgap;
428 		entry = parent;
429 	}
430 }
431 
432 static void
433 uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry)
434 {
435 	struct vm_map_entry *ret;
436 
437 	entry->gap = entry->maxgap = uvm_rb_gap(entry);
438 	if (entry->prev != &map->header)
439 		entry->prev->gap = uvm_rb_gap(entry->prev);
440 
441 	ret = rb_tree_insert_node(&map->rb_tree, entry);
442 	KASSERTMSG(ret == entry,
443 	    ("uvm_rb_insert: map %p: duplicate entry %p", map, ret)
444 	);
445 
446 	/*
447 	 * If the previous entry is not our immediate left child, then it's an
448 	 * ancestor and will be fixed up on the way to the root.  We don't
449 	 * have to check entry->prev against &map->header since &map->header
450 	 * will never be in the tree.
451 	 */
452 	uvm_rb_fixup(map,
453 	    LEFT_ENTRY(entry) == entry->prev ? entry->prev : entry);
454 }
455 
456 static void
457 uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry)
458 {
459 	struct vm_map_entry *prev_parent = NULL, *next_parent = NULL;
460 
461 	/*
462 	 * If we are removing an interior node, then an adjacent node will
463 	 * be used to replace its position in the tree.  Therefore we will
464 	 * need to fixup the tree starting at the parent of the replacement
465 	 * node.  So record their parents for later use.
466 	 */
467 	if (entry->prev != &map->header)
468 		prev_parent = PARENT_ENTRY(map, entry->prev);
469 	if (entry->next != &map->header)
470 		next_parent = PARENT_ENTRY(map, entry->next);
471 
472 	rb_tree_remove_node(&map->rb_tree, entry);
473 
474 	/*
475 	 * If the previous node has a new parent, fixup the tree starting
476 	 * at the previous node's old parent.
477 	 */
478 	if (entry->prev != &map->header) {
479 		/*
480 		 * Update the previous entry's gap due to our absence.
481 		 */
482 		entry->prev->gap = uvm_rb_gap(entry->prev);
483 		uvm_rb_fixup(map, entry->prev);
484 		if (prev_parent != NULL
485 		    && prev_parent != entry
486 		    && prev_parent != PARENT_ENTRY(map, entry->prev))
487 			uvm_rb_fixup(map, prev_parent);
488 	}
489 
490 	/*
491 	 * If the next node has a new parent, fixup the tree starting
492 	 * at the next node's old parent.
493 	 */
494 	if (entry->next != &map->header) {
495 		uvm_rb_fixup(map, entry->next);
496 		if (next_parent != NULL
497 		    && next_parent != entry
498 		    && next_parent != PARENT_ENTRY(map, entry->next))
499 			uvm_rb_fixup(map, next_parent);
500 	}
501 }
502 
503 #if defined(DEBUG)
504 int uvm_debug_check_map = 0;
505 int uvm_debug_check_rbtree = 0;
506 #define uvm_map_check(map, name) \
507 	_uvm_map_check((map), (name), __FILE__, __LINE__)
508 static void
509 _uvm_map_check(struct vm_map *map, const char *name,
510     const char *file, int line)
511 {
512 
513 	if ((uvm_debug_check_map && _uvm_map_sanity(map)) ||
514 	    (uvm_debug_check_rbtree && _uvm_tree_sanity(map))) {
515 		panic("uvm_map_check failed: \"%s\" map=%p (%s:%d)",
516 		    name, map, file, line);
517 	}
518 }
519 #else /* defined(DEBUG) */
520 #define uvm_map_check(map, name)	/* nothing */
521 #endif /* defined(DEBUG) */
522 
523 #if defined(DEBUG) || defined(DDB)
524 int
525 _uvm_map_sanity(struct vm_map *map)
526 {
527 	bool first_free_found = false;
528 	bool hint_found = false;
529 	const struct vm_map_entry *e;
530 	struct vm_map_entry *hint = map->hint;
531 
532 	e = &map->header;
533 	for (;;) {
534 		if (map->first_free == e) {
535 			first_free_found = true;
536 		} else if (!first_free_found && e->next->start > e->end) {
537 			printf("first_free %p should be %p\n",
538 			    map->first_free, e);
539 			return -1;
540 		}
541 		if (hint == e) {
542 			hint_found = true;
543 		}
544 
545 		e = e->next;
546 		if (e == &map->header) {
547 			break;
548 		}
549 	}
550 	if (!first_free_found) {
551 		printf("stale first_free\n");
552 		return -1;
553 	}
554 	if (!hint_found) {
555 		printf("stale hint\n");
556 		return -1;
557 	}
558 	return 0;
559 }
560 
561 int
562 _uvm_tree_sanity(struct vm_map *map)
563 {
564 	struct vm_map_entry *tmp, *trtmp;
565 	int n = 0, i = 1;
566 
567 	for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) {
568 		if (tmp->gap != uvm_rb_gap(tmp)) {
569 			printf("%d/%d gap %lx != %lx %s\n",
570 			    n + 1, map->nentries,
571 			    (ulong)tmp->gap, (ulong)uvm_rb_gap(tmp),
572 			    tmp->next == &map->header ? "(last)" : "");
573 			goto error;
574 		}
575 		/*
576 		 * If any entries are out of order, tmp->gap will be unsigned
577 		 * and will likely exceed the size of the map.
578 		 */
579 		if (tmp->gap >= vm_map_max(map) - vm_map_min(map)) {
580 			printf("too large gap %zu\n", (size_t)tmp->gap);
581 			goto error;
582 		}
583 		n++;
584 	}
585 
586 	if (n != map->nentries) {
587 		printf("nentries: %d vs %d\n", n, map->nentries);
588 		goto error;
589 	}
590 
591 	trtmp = NULL;
592 	for (tmp = map->header.next; tmp != &map->header; tmp = tmp->next) {
593 		if (tmp->maxgap != uvm_rb_maxgap(tmp)) {
594 			printf("maxgap %lx != %lx\n",
595 			    (ulong)tmp->maxgap,
596 			    (ulong)uvm_rb_maxgap(tmp));
597 			goto error;
598 		}
599 		if (trtmp != NULL && trtmp->start >= tmp->start) {
600 			printf("corrupt: 0x%"PRIxVADDR"x >= 0x%"PRIxVADDR"x\n",
601 			    trtmp->start, tmp->start);
602 			goto error;
603 		}
604 
605 		trtmp = tmp;
606 	}
607 
608 	for (tmp = map->header.next; tmp != &map->header;
609 	    tmp = tmp->next, i++) {
610 		trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_LEFT);
611 		if (trtmp == NULL)
612 			trtmp = &map->header;
613 		if (tmp->prev != trtmp) {
614 			printf("lookup: %d: %p->prev=%p: %p\n",
615 			    i, tmp, tmp->prev, trtmp);
616 			goto error;
617 		}
618 		trtmp = rb_tree_iterate(&map->rb_tree, tmp, RB_DIR_RIGHT);
619 		if (trtmp == NULL)
620 			trtmp = &map->header;
621 		if (tmp->next != trtmp) {
622 			printf("lookup: %d: %p->next=%p: %p\n",
623 			    i, tmp, tmp->next, trtmp);
624 			goto error;
625 		}
626 		trtmp = rb_tree_find_node(&map->rb_tree, &tmp->start);
627 		if (trtmp != tmp) {
628 			printf("lookup: %d: %p - %p: %p\n", i, tmp, trtmp,
629 			    PARENT_ENTRY(map, tmp));
630 			goto error;
631 		}
632 	}
633 
634 	return (0);
635  error:
636 	return (-1);
637 }
638 #endif /* defined(DEBUG) || defined(DDB) */
639 
640 #ifdef DIAGNOSTIC
641 static struct vm_map *uvm_kmapent_map(struct vm_map_entry *);
642 #endif
643 
644 /*
645  * vm_map_lock: acquire an exclusive (write) lock on a map.
646  *
647  * => Note that "intrsafe" maps use only exclusive, spin locks.
648  *
649  * => The locking protocol provides for guaranteed upgrade from shared ->
650  *    exclusive by whichever thread currently has the map marked busy.
651  *    See "LOCKING PROTOCOL NOTES" in uvm_map.h.  This is horrible; among
652  *    other problems, it defeats any fairness guarantees provided by RW
653  *    locks.
654  */
655 
656 void
657 vm_map_lock(struct vm_map *map)
658 {
659 
660 	if ((map->flags & VM_MAP_INTRSAFE) != 0) {
661 		mutex_spin_enter(&map->mutex);
662 		return;
663 	}
664 
665 	for (;;) {
666 		rw_enter(&map->lock, RW_WRITER);
667 		if (map->busy == NULL)
668 			break;
669 		if (map->busy == curlwp)
670 			break;
671 		mutex_enter(&map->misc_lock);
672 		rw_exit(&map->lock);
673 		if (map->busy != NULL)
674 			cv_wait(&map->cv, &map->misc_lock);
675 		mutex_exit(&map->misc_lock);
676 	}
677 
678 	map->timestamp++;
679 }
680 
681 /*
682  * vm_map_lock_try: try to lock a map, failing if it is already locked.
683  */
684 
685 bool
686 vm_map_lock_try(struct vm_map *map)
687 {
688 
689 	if ((map->flags & VM_MAP_INTRSAFE) != 0)
690 		return mutex_tryenter(&map->mutex);
691 	if (!rw_tryenter(&map->lock, RW_WRITER))
692 		return false;
693 	if (map->busy != NULL) {
694 		rw_exit(&map->lock);
695 		return false;
696 	}
697 
698 	map->timestamp++;
699 	return true;
700 }
701 
702 /*
703  * vm_map_unlock: release an exclusive lock on a map.
704  */
705 
706 void
707 vm_map_unlock(struct vm_map *map)
708 {
709 
710 	if ((map->flags & VM_MAP_INTRSAFE) != 0)
711 		mutex_spin_exit(&map->mutex);
712 	else {
713 		KASSERT(rw_write_held(&map->lock));
714 		KASSERT(map->busy == NULL || map->busy == curlwp);
715 		rw_exit(&map->lock);
716 	}
717 }
718 
719 /*
720  * vm_map_unbusy: mark the map as unbusy, and wake any waiters that
721  *     want an exclusive lock.
722  */
723 
724 void
725 vm_map_unbusy(struct vm_map *map)
726 {
727 
728 	KASSERT(map->busy == curlwp);
729 
730 	/*
731 	 * Safe to clear 'busy' and 'waiters' with only a read lock held:
732 	 *
733 	 * o they can only be set with a write lock held
734 	 * o writers are blocked out with a read or write hold
735 	 * o at any time, only one thread owns the set of values
736 	 */
737 	mutex_enter(&map->misc_lock);
738 	map->busy = NULL;
739 	cv_broadcast(&map->cv);
740 	mutex_exit(&map->misc_lock);
741 }
742 
743 /*
744  * vm_map_lock_read: acquire a shared (read) lock on a map.
745  */
746 
747 void
748 vm_map_lock_read(struct vm_map *map)
749 {
750 
751 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
752 
753 	rw_enter(&map->lock, RW_READER);
754 }
755 
756 /*
757  * vm_map_unlock_read: release a shared lock on a map.
758  */
759 
760 void
761 vm_map_unlock_read(struct vm_map *map)
762 {
763 
764 	KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
765 
766 	rw_exit(&map->lock);
767 }
768 
769 /*
770  * vm_map_busy: mark a map as busy.
771  *
772  * => the caller must hold the map write locked
773  */
774 
775 void
776 vm_map_busy(struct vm_map *map)
777 {
778 
779 	KASSERT(rw_write_held(&map->lock));
780 	KASSERT(map->busy == NULL);
781 
782 	map->busy = curlwp;
783 }
784 
785 /*
786  * vm_map_locked_p: return true if the map is write locked.
787  *
788  * => only for debug purposes like KASSERTs.
789  * => should not be used to verify that a map is not locked.
790  */
791 
792 bool
793 vm_map_locked_p(struct vm_map *map)
794 {
795 
796 	if ((map->flags & VM_MAP_INTRSAFE) != 0) {
797 		return mutex_owned(&map->mutex);
798 	} else {
799 		return rw_write_held(&map->lock);
800 	}
801 }
802 
803 /*
804  * uvm_mapent_alloc: allocate a map entry
805  */
806 
807 static struct vm_map_entry *
808 uvm_mapent_alloc(struct vm_map *map, int flags)
809 {
810 	struct vm_map_entry *me;
811 	int pflags = (flags & UVM_FLAG_NOWAIT) ? PR_NOWAIT : PR_WAITOK;
812 	UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist);
813 
814 	if (VM_MAP_USE_KMAPENT(map)) {
815 		me = uvm_kmapent_alloc(map, flags);
816 	} else {
817 		me = pool_cache_get(&uvm_map_entry_cache, pflags);
818 		if (__predict_false(me == NULL))
819 			return NULL;
820 		me->flags = 0;
821 	}
822 
823 	UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]", me,
824 	    ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map), 0, 0);
825 	return (me);
826 }
827 
828 /*
829  * uvm_mapent_alloc_split: allocate a map entry for clipping.
830  *
831  * => map must be locked by caller if UVM_MAP_QUANTUM is set.
832  */
833 
834 static struct vm_map_entry *
835 uvm_mapent_alloc_split(struct vm_map *map,
836     const struct vm_map_entry *old_entry, int flags,
837     struct uvm_mapent_reservation *umr)
838 {
839 	struct vm_map_entry *me;
840 
841 	KASSERT(!VM_MAP_USE_KMAPENT(map) ||
842 	    (old_entry->flags & UVM_MAP_QUANTUM) || !UMR_EMPTY(umr));
843 
844 	if (old_entry->flags & UVM_MAP_QUANTUM) {
845 		struct vm_map_kernel *vmk = vm_map_to_kernel(map);
846 
847 		KASSERT(vm_map_locked_p(map));
848 		me = vmk->vmk_merged_entries;
849 		KASSERT(me);
850 		vmk->vmk_merged_entries = me->next;
851 		KASSERT(me->flags & UVM_MAP_QUANTUM);
852 	} else {
853 		me = uvm_mapent_alloc(map, flags);
854 	}
855 
856 	return me;
857 }
858 
859 /*
860  * uvm_mapent_free: free map entry
861  */
862 
863 static void
864 uvm_mapent_free(struct vm_map_entry *me)
865 {
866 	UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist);
867 
868 	UVMHIST_LOG(maphist,"<- freeing map entry=0x%x [flags=%d]",
869 		me, me->flags, 0, 0);
870 	if (me->flags & UVM_MAP_KERNEL) {
871 		uvm_kmapent_free(me);
872 	} else {
873 		pool_cache_put(&uvm_map_entry_cache, me);
874 	}
875 }
876 
877 /*
878  * uvm_mapent_free_merged: free merged map entry
879  *
880  * => keep the entry if needed.
881  * => caller shouldn't hold map locked if VM_MAP_USE_KMAPENT(map) is true.
882  * => map should be locked if UVM_MAP_QUANTUM is set.
883  */
884 
885 static void
886 uvm_mapent_free_merged(struct vm_map *map, struct vm_map_entry *me)
887 {
888 
889 	KASSERT(!(me->flags & UVM_MAP_KERNEL) || uvm_kmapent_map(me) == map);
890 
891 	if (me->flags & UVM_MAP_QUANTUM) {
892 		/*
893 		 * keep this entry for later splitting.
894 		 */
895 		struct vm_map_kernel *vmk;
896 
897 		KASSERT(vm_map_locked_p(map));
898 		KASSERT(VM_MAP_IS_KERNEL(map));
899 		KASSERT(!VM_MAP_USE_KMAPENT(map) ||
900 		    (me->flags & UVM_MAP_KERNEL));
901 
902 		vmk = vm_map_to_kernel(map);
903 		me->next = vmk->vmk_merged_entries;
904 		vmk->vmk_merged_entries = me;
905 	} else {
906 		uvm_mapent_free(me);
907 	}
908 }
909 
910 /*
911  * uvm_mapent_copy: copy a map entry, preserving flags
912  */
913 
914 static inline void
915 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
916 {
917 
918 	memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) -
919 	    ((char *)src));
920 }
921 
922 /*
923  * uvm_mapent_overhead: calculate maximum kva overhead necessary for
924  * map entries.
925  *
926  * => size and flags are the same as uvm_km_suballoc's ones.
927  */
928 
929 vsize_t
930 uvm_mapent_overhead(vsize_t size, int flags)
931 {
932 
933 	if (VM_MAP_USE_KMAPENT_FLAGS(flags)) {
934 		return uvm_kmapent_overhead(size);
935 	}
936 	return 0;
937 }
938 
939 #if defined(DEBUG)
940 static void
941 _uvm_mapent_check(const struct vm_map_entry *entry, const char *file, int line)
942 {
943 
944 	if (entry->start >= entry->end) {
945 		goto bad;
946 	}
947 	if (UVM_ET_ISOBJ(entry)) {
948 		if (entry->object.uvm_obj == NULL) {
949 			goto bad;
950 		}
951 	} else if (UVM_ET_ISSUBMAP(entry)) {
952 		if (entry->object.sub_map == NULL) {
953 			goto bad;
954 		}
955 	} else {
956 		if (entry->object.uvm_obj != NULL ||
957 		    entry->object.sub_map != NULL) {
958 			goto bad;
959 		}
960 	}
961 	if (!UVM_ET_ISOBJ(entry)) {
962 		if (entry->offset != 0) {
963 			goto bad;
964 		}
965 	}
966 
967 	return;
968 
969 bad:
970 	panic("%s: bad entry %p (%s:%d)", __func__, entry, file, line);
971 }
972 #endif /* defined(DEBUG) */
973 
974 /*
975  * uvm_map_entry_unwire: unwire a map entry
976  *
977  * => map should be locked by caller
978  */
979 
980 static inline void
981 uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry)
982 {
983 
984 	entry->wired_count = 0;
985 	uvm_fault_unwire_locked(map, entry->start, entry->end);
986 }
987 
988 
989 /*
990  * wrapper for calling amap_ref()
991  */
992 static inline void
993 uvm_map_reference_amap(struct vm_map_entry *entry, int flags)
994 {
995 
996 	amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff,
997 	    (entry->end - entry->start) >> PAGE_SHIFT, flags);
998 }
999 
1000 
1001 /*
1002  * wrapper for calling amap_unref()
1003  */
1004 static inline void
1005 uvm_map_unreference_amap(struct vm_map_entry *entry, int flags)
1006 {
1007 
1008 	amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff,
1009 	    (entry->end - entry->start) >> PAGE_SHIFT, flags);
1010 }
1011 
1012 
1013 /*
1014  * uvm_map_init: init mapping system at boot time.
1015  */
1016 
1017 void
1018 uvm_map_init(void)
1019 {
1020 #if defined(UVMHIST)
1021 	static struct uvm_history_ent maphistbuf[100];
1022 	static struct uvm_history_ent pdhistbuf[100];
1023 #endif
1024 
1025 	/*
1026 	 * first, init logging system.
1027 	 */
1028 
1029 	UVMHIST_FUNC("uvm_map_init");
1030 	UVMHIST_INIT_STATIC(maphist, maphistbuf);
1031 	UVMHIST_INIT_STATIC(pdhist, pdhistbuf);
1032 	UVMHIST_CALLED(maphist);
1033 	UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0);
1034 
1035 	/*
1036 	 * initialize the global lock for kernel map entry.
1037 	 */
1038 
1039 	mutex_init(&uvm_kentry_lock, MUTEX_DRIVER, IPL_VM);
1040 
1041 	/*
1042 	 * initialize caches.
1043 	 */
1044 
1045 	pool_cache_bootstrap(&uvm_map_entry_cache, sizeof(struct vm_map_entry),
1046 	    0, 0, 0, "vmmpepl", NULL, IPL_NONE, NULL, NULL, NULL);
1047 	pool_cache_bootstrap(&uvm_vmspace_cache, sizeof(struct vmspace),
1048 	    0, 0, 0, "vmsppl", NULL, IPL_NONE, NULL, NULL, NULL);
1049 }
1050 
1051 /*
1052  * clippers
1053  */
1054 
1055 /*
1056  * uvm_mapent_splitadj: adjust map entries for splitting, after uvm_mapent_copy.
1057  */
1058 
1059 static void
1060 uvm_mapent_splitadj(struct vm_map_entry *entry1, struct vm_map_entry *entry2,
1061     vaddr_t splitat)
1062 {
1063 	vaddr_t adj;
1064 
1065 	KASSERT(entry1->start < splitat);
1066 	KASSERT(splitat < entry1->end);
1067 
1068 	adj = splitat - entry1->start;
1069 	entry1->end = entry2->start = splitat;
1070 
1071 	if (entry1->aref.ar_amap) {
1072 		amap_splitref(&entry1->aref, &entry2->aref, adj);
1073 	}
1074 	if (UVM_ET_ISSUBMAP(entry1)) {
1075 		/* ... unlikely to happen, but play it safe */
1076 		 uvm_map_reference(entry1->object.sub_map);
1077 	} else if (UVM_ET_ISOBJ(entry1)) {
1078 		KASSERT(entry1->object.uvm_obj != NULL); /* suppress coverity */
1079 		entry2->offset += adj;
1080 		if (entry1->object.uvm_obj->pgops &&
1081 		    entry1->object.uvm_obj->pgops->pgo_reference)
1082 			entry1->object.uvm_obj->pgops->pgo_reference(
1083 			    entry1->object.uvm_obj);
1084 	}
1085 }
1086 
1087 /*
1088  * uvm_map_clip_start: ensure that the entry begins at or after
1089  *	the starting address, if it doesn't we split the entry.
1090  *
1091  * => caller should use UVM_MAP_CLIP_START macro rather than calling
1092  *    this directly
1093  * => map must be locked by caller
1094  */
1095 
1096 void
1097 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry,
1098     vaddr_t start, struct uvm_mapent_reservation *umr)
1099 {
1100 	struct vm_map_entry *new_entry;
1101 
1102 	/* uvm_map_simplify_entry(map, entry); */ /* XXX */
1103 
1104 	uvm_map_check(map, "clip_start entry");
1105 	uvm_mapent_check(entry);
1106 
1107 	/*
1108 	 * Split off the front portion.  note that we must insert the new
1109 	 * entry BEFORE this one, so that this entry has the specified
1110 	 * starting address.
1111 	 */
1112 	new_entry = uvm_mapent_alloc_split(map, entry, 0, umr);
1113 	uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
1114 	uvm_mapent_splitadj(new_entry, entry, start);
1115 	uvm_map_entry_link(map, entry->prev, new_entry);
1116 
1117 	uvm_map_check(map, "clip_start leave");
1118 }
1119 
1120 /*
1121  * uvm_map_clip_end: ensure that the entry ends at or before
1122  *	the ending address, if it does't we split the reference
1123  *
1124  * => caller should use UVM_MAP_CLIP_END macro rather than calling
1125  *    this directly
1126  * => map must be locked by caller
1127  */
1128 
1129 void
1130 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end,
1131     struct uvm_mapent_reservation *umr)
1132 {
1133 	struct vm_map_entry *new_entry;
1134 
1135 	uvm_map_check(map, "clip_end entry");
1136 	uvm_mapent_check(entry);
1137 
1138 	/*
1139 	 *	Create a new entry and insert it
1140 	 *	AFTER the specified entry
1141 	 */
1142 	new_entry = uvm_mapent_alloc_split(map, entry, 0, umr);
1143 	uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
1144 	uvm_mapent_splitadj(entry, new_entry, end);
1145 	uvm_map_entry_link(map, entry, new_entry);
1146 
1147 	uvm_map_check(map, "clip_end leave");
1148 }
1149 
1150 static void
1151 vm_map_drain(struct vm_map *map, uvm_flag_t flags)
1152 {
1153 
1154 	if (!VM_MAP_IS_KERNEL(map)) {
1155 		return;
1156 	}
1157 
1158 	uvm_km_va_drain(map, flags);
1159 }
1160 
1161 /*
1162  *   M A P   -   m a i n   e n t r y   p o i n t
1163  */
1164 /*
1165  * uvm_map: establish a valid mapping in a map
1166  *
1167  * => assume startp is page aligned.
1168  * => assume size is a multiple of PAGE_SIZE.
1169  * => assume sys_mmap provides enough of a "hint" to have us skip
1170  *	over text/data/bss area.
1171  * => map must be unlocked (we will lock it)
1172  * => <uobj,uoffset> value meanings (4 cases):
1173  *	 [1] <NULL,uoffset>		== uoffset is a hint for PMAP_PREFER
1174  *	 [2] <NULL,UVM_UNKNOWN_OFFSET>	== don't PMAP_PREFER
1175  *	 [3] <uobj,uoffset>		== normal mapping
1176  *	 [4] <uobj,UVM_UNKNOWN_OFFSET>	== uvm_map finds offset based on VA
1177  *
1178  *    case [4] is for kernel mappings where we don't know the offset until
1179  *    we've found a virtual address.   note that kernel object offsets are
1180  *    always relative to vm_map_min(kernel_map).
1181  *
1182  * => if `align' is non-zero, we align the virtual address to the specified
1183  *	alignment.
1184  *	this is provided as a mechanism for large pages.
1185  *
1186  * => XXXCDC: need way to map in external amap?
1187  */
1188 
1189 int
1190 uvm_map(struct vm_map *map, vaddr_t *startp /* IN/OUT */, vsize_t size,
1191     struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags)
1192 {
1193 	struct uvm_map_args args;
1194 	struct vm_map_entry *new_entry;
1195 	int error;
1196 
1197 	KASSERT((flags & UVM_FLAG_QUANTUM) == 0 || VM_MAP_IS_KERNEL(map));
1198 	KASSERT((size & PAGE_MASK) == 0);
1199 
1200 #ifndef __USER_VA0_IS_SAFE
1201 	if ((flags & UVM_FLAG_FIXED) && *startp == 0 &&
1202 	    !VM_MAP_IS_KERNEL(map) && user_va0_disable)
1203 		return EACCES;
1204 #endif
1205 
1206 	/*
1207 	 * for pager_map, allocate the new entry first to avoid sleeping
1208 	 * for memory while we have the map locked.
1209 	 *
1210 	 * Also, because we allocate entries for in-kernel maps
1211 	 * a bit differently (cf. uvm_kmapent_alloc/free), we need to
1212 	 * allocate them before locking the map.
1213 	 */
1214 
1215 	new_entry = NULL;
1216 	if (VM_MAP_USE_KMAPENT(map) || (flags & UVM_FLAG_QUANTUM) ||
1217 	    map == pager_map) {
1218 		new_entry = uvm_mapent_alloc(map, (flags & UVM_FLAG_NOWAIT));
1219 		if (__predict_false(new_entry == NULL))
1220 			return ENOMEM;
1221 		if (flags & UVM_FLAG_QUANTUM)
1222 			new_entry->flags |= UVM_MAP_QUANTUM;
1223 	}
1224 	if (map == pager_map)
1225 		flags |= UVM_FLAG_NOMERGE;
1226 
1227 	error = uvm_map_prepare(map, *startp, size, uobj, uoffset, align,
1228 	    flags, &args);
1229 	if (!error) {
1230 		error = uvm_map_enter(map, &args, new_entry);
1231 		*startp = args.uma_start;
1232 	} else if (new_entry) {
1233 		uvm_mapent_free(new_entry);
1234 	}
1235 
1236 #if defined(DEBUG)
1237 	if (!error && VM_MAP_IS_KERNEL(map)) {
1238 		uvm_km_check_empty(map, *startp, *startp + size);
1239 	}
1240 #endif /* defined(DEBUG) */
1241 
1242 	return error;
1243 }
1244 
1245 int
1246 uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size,
1247     struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags,
1248     struct uvm_map_args *args)
1249 {
1250 	struct vm_map_entry *prev_entry;
1251 	vm_prot_t prot = UVM_PROTECTION(flags);
1252 	vm_prot_t maxprot = UVM_MAXPROTECTION(flags);
1253 
1254 	UVMHIST_FUNC("uvm_map_prepare");
1255 	UVMHIST_CALLED(maphist);
1256 
1257 	UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)",
1258 	    map, start, size, flags);
1259 	UVMHIST_LOG(maphist, "  uobj/offset 0x%x/%d", uobj, uoffset,0,0);
1260 
1261 	/*
1262 	 * detect a popular device driver bug.
1263 	 */
1264 
1265 	KASSERT(doing_shutdown || curlwp != NULL ||
1266 	    (map->flags & VM_MAP_INTRSAFE));
1267 
1268 	/*
1269 	 * zero-sized mapping doesn't make any sense.
1270 	 */
1271 	KASSERT(size > 0);
1272 
1273 	KASSERT((~flags & (UVM_FLAG_NOWAIT | UVM_FLAG_WAITVA)) != 0);
1274 
1275 	uvm_map_check(map, "map entry");
1276 
1277 	/*
1278 	 * check sanity of protection code
1279 	 */
1280 
1281 	if ((prot & maxprot) != prot) {
1282 		UVMHIST_LOG(maphist, "<- prot. failure:  prot=0x%x, max=0x%x",
1283 		prot, maxprot,0,0);
1284 		return EACCES;
1285 	}
1286 
1287 	/*
1288 	 * figure out where to put new VM range
1289 	 */
1290 
1291 retry:
1292 	if (vm_map_lock_try(map) == false) {
1293 		if ((flags & UVM_FLAG_TRYLOCK) != 0 &&
1294 		    (map->flags & VM_MAP_INTRSAFE) == 0) {
1295 			return EAGAIN;
1296 		}
1297 		vm_map_lock(map); /* could sleep here */
1298 	}
1299 	prev_entry = uvm_map_findspace(map, start, size, &start,
1300 	    uobj, uoffset, align, flags);
1301 	if (prev_entry == NULL) {
1302 		unsigned int timestamp;
1303 
1304 		timestamp = map->timestamp;
1305 		UVMHIST_LOG(maphist,"waiting va timestamp=0x%x",
1306 			    timestamp,0,0,0);
1307 		map->flags |= VM_MAP_WANTVA;
1308 		vm_map_unlock(map);
1309 
1310 		/*
1311 		 * try to reclaim kva and wait until someone does unmap.
1312 		 * fragile locking here, so we awaken every second to
1313 		 * recheck the condition.
1314 		 */
1315 
1316 		vm_map_drain(map, flags);
1317 
1318 		mutex_enter(&map->misc_lock);
1319 		while ((map->flags & VM_MAP_WANTVA) != 0 &&
1320 		   map->timestamp == timestamp) {
1321 			if ((flags & UVM_FLAG_WAITVA) == 0) {
1322 				mutex_exit(&map->misc_lock);
1323 				UVMHIST_LOG(maphist,
1324 				    "<- uvm_map_findspace failed!", 0,0,0,0);
1325 				return ENOMEM;
1326 			} else {
1327 				cv_timedwait(&map->cv, &map->misc_lock, hz);
1328 			}
1329 		}
1330 		mutex_exit(&map->misc_lock);
1331 		goto retry;
1332 	}
1333 
1334 #ifdef PMAP_GROWKERNEL
1335 	/*
1336 	 * If the kernel pmap can't map the requested space,
1337 	 * then allocate more resources for it.
1338 	 */
1339 	if (map == kernel_map && uvm_maxkaddr < (start + size))
1340 		uvm_maxkaddr = pmap_growkernel(start + size);
1341 #endif
1342 
1343 	UVMMAP_EVCNT_INCR(map_call);
1344 
1345 	/*
1346 	 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER
1347 	 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET.   in
1348 	 * either case we want to zero it  before storing it in the map entry
1349 	 * (because it looks strange and confusing when debugging...)
1350 	 *
1351 	 * if uobj is not null
1352 	 *   if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping
1353 	 *      and we do not need to change uoffset.
1354 	 *   if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset
1355 	 *      now (based on the starting address of the map).   this case is
1356 	 *      for kernel object mappings where we don't know the offset until
1357 	 *      the virtual address is found (with uvm_map_findspace).   the
1358 	 *      offset is the distance we are from the start of the map.
1359 	 */
1360 
1361 	if (uobj == NULL) {
1362 		uoffset = 0;
1363 	} else {
1364 		if (uoffset == UVM_UNKNOWN_OFFSET) {
1365 			KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
1366 			uoffset = start - vm_map_min(kernel_map);
1367 		}
1368 	}
1369 
1370 	args->uma_flags = flags;
1371 	args->uma_prev = prev_entry;
1372 	args->uma_start = start;
1373 	args->uma_size = size;
1374 	args->uma_uobj = uobj;
1375 	args->uma_uoffset = uoffset;
1376 
1377 	UVMHIST_LOG(maphist, "<- done!", 0,0,0,0);
1378 	return 0;
1379 }
1380 
1381 int
1382 uvm_map_enter(struct vm_map *map, const struct uvm_map_args *args,
1383     struct vm_map_entry *new_entry)
1384 {
1385 	struct vm_map_entry *prev_entry = args->uma_prev;
1386 	struct vm_map_entry *dead = NULL;
1387 
1388 	const uvm_flag_t flags = args->uma_flags;
1389 	const vm_prot_t prot = UVM_PROTECTION(flags);
1390 	const vm_prot_t maxprot = UVM_MAXPROTECTION(flags);
1391 	const vm_inherit_t inherit = UVM_INHERIT(flags);
1392 	const int amapwaitflag = (flags & UVM_FLAG_NOWAIT) ?
1393 	    AMAP_EXTEND_NOWAIT : 0;
1394 	const int advice = UVM_ADVICE(flags);
1395 	const int meflagval = (flags & UVM_FLAG_QUANTUM) ?
1396 	    UVM_MAP_QUANTUM : 0;
1397 
1398 	vaddr_t start = args->uma_start;
1399 	vsize_t size = args->uma_size;
1400 	struct uvm_object *uobj = args->uma_uobj;
1401 	voff_t uoffset = args->uma_uoffset;
1402 
1403 	const int kmap = (vm_map_pmap(map) == pmap_kernel());
1404 	int merged = 0;
1405 	int error;
1406 	int newetype;
1407 
1408 	UVMHIST_FUNC("uvm_map_enter");
1409 	UVMHIST_CALLED(maphist);
1410 
1411 	UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)",
1412 	    map, start, size, flags);
1413 	UVMHIST_LOG(maphist, "  uobj/offset 0x%x/%d", uobj, uoffset,0,0);
1414 
1415 	KASSERT(map->hint == prev_entry); /* bimerge case assumes this */
1416 
1417 	if (flags & UVM_FLAG_QUANTUM) {
1418 		KASSERT(new_entry);
1419 		KASSERT(new_entry->flags & UVM_MAP_QUANTUM);
1420 	}
1421 
1422 	if (uobj)
1423 		newetype = UVM_ET_OBJ;
1424 	else
1425 		newetype = 0;
1426 
1427 	if (flags & UVM_FLAG_COPYONW) {
1428 		newetype |= UVM_ET_COPYONWRITE;
1429 		if ((flags & UVM_FLAG_OVERLAY) == 0)
1430 			newetype |= UVM_ET_NEEDSCOPY;
1431 	}
1432 
1433 	/*
1434 	 * try and insert in map by extending previous entry, if possible.
1435 	 * XXX: we don't try and pull back the next entry.   might be useful
1436 	 * for a stack, but we are currently allocating our stack in advance.
1437 	 */
1438 
1439 	if (flags & UVM_FLAG_NOMERGE)
1440 		goto nomerge;
1441 
1442 	if (prev_entry->end == start &&
1443 	    prev_entry != &map->header &&
1444 	    UVM_ET_ISCOMPATIBLE(prev_entry, newetype, uobj, meflagval,
1445 	    prot, maxprot, inherit, advice, 0)) {
1446 
1447 		if (uobj && prev_entry->offset +
1448 		    (prev_entry->end - prev_entry->start) != uoffset)
1449 			goto forwardmerge;
1450 
1451 		/*
1452 		 * can't extend a shared amap.  note: no need to lock amap to
1453 		 * look at refs since we don't care about its exact value.
1454 		 * if it is one (i.e. we have only reference) it will stay there
1455 		 */
1456 
1457 		if (prev_entry->aref.ar_amap &&
1458 		    amap_refs(prev_entry->aref.ar_amap) != 1) {
1459 			goto forwardmerge;
1460 		}
1461 
1462 		if (prev_entry->aref.ar_amap) {
1463 			error = amap_extend(prev_entry, size,
1464 			    amapwaitflag | AMAP_EXTEND_FORWARDS);
1465 			if (error)
1466 				goto nomerge;
1467 		}
1468 
1469 		if (kmap) {
1470 			UVMMAP_EVCNT_INCR(kbackmerge);
1471 		} else {
1472 			UVMMAP_EVCNT_INCR(ubackmerge);
1473 		}
1474 		UVMHIST_LOG(maphist,"  starting back merge", 0, 0, 0, 0);
1475 
1476 		/*
1477 		 * drop our reference to uobj since we are extending a reference
1478 		 * that we already have (the ref count can not drop to zero).
1479 		 */
1480 
1481 		if (uobj && uobj->pgops->pgo_detach)
1482 			uobj->pgops->pgo_detach(uobj);
1483 
1484 		/*
1485 		 * Now that we've merged the entries, note that we've grown
1486 		 * and our gap has shrunk.  Then fix the tree.
1487 		 */
1488 		prev_entry->end += size;
1489 		prev_entry->gap -= size;
1490 		uvm_rb_fixup(map, prev_entry);
1491 
1492 		uvm_map_check(map, "map backmerged");
1493 
1494 		UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0);
1495 		merged++;
1496 	}
1497 
1498 forwardmerge:
1499 	if (prev_entry->next->start == (start + size) &&
1500 	    prev_entry->next != &map->header &&
1501 	    UVM_ET_ISCOMPATIBLE(prev_entry->next, newetype, uobj, meflagval,
1502 	    prot, maxprot, inherit, advice, 0)) {
1503 
1504 		if (uobj && prev_entry->next->offset != uoffset + size)
1505 			goto nomerge;
1506 
1507 		/*
1508 		 * can't extend a shared amap.  note: no need to lock amap to
1509 		 * look at refs since we don't care about its exact value.
1510 		 * if it is one (i.e. we have only reference) it will stay there.
1511 		 *
1512 		 * note that we also can't merge two amaps, so if we
1513 		 * merged with the previous entry which has an amap,
1514 		 * and the next entry also has an amap, we give up.
1515 		 *
1516 		 * Interesting cases:
1517 		 * amap, new, amap -> give up second merge (single fwd extend)
1518 		 * amap, new, none -> double forward extend (extend again here)
1519 		 * none, new, amap -> double backward extend (done here)
1520 		 * uobj, new, amap -> single backward extend (done here)
1521 		 *
1522 		 * XXX should we attempt to deal with someone refilling
1523 		 * the deallocated region between two entries that are
1524 		 * backed by the same amap (ie, arefs is 2, "prev" and
1525 		 * "next" refer to it, and adding this allocation will
1526 		 * close the hole, thus restoring arefs to 1 and
1527 		 * deallocating the "next" vm_map_entry)?  -- @@@
1528 		 */
1529 
1530 		if (prev_entry->next->aref.ar_amap &&
1531 		    (amap_refs(prev_entry->next->aref.ar_amap) != 1 ||
1532 		     (merged && prev_entry->aref.ar_amap))) {
1533 			goto nomerge;
1534 		}
1535 
1536 		if (merged) {
1537 			/*
1538 			 * Try to extend the amap of the previous entry to
1539 			 * cover the next entry as well.  If it doesn't work
1540 			 * just skip on, don't actually give up, since we've
1541 			 * already completed the back merge.
1542 			 */
1543 			if (prev_entry->aref.ar_amap) {
1544 				if (amap_extend(prev_entry,
1545 				    prev_entry->next->end -
1546 				    prev_entry->next->start,
1547 				    amapwaitflag | AMAP_EXTEND_FORWARDS))
1548 					goto nomerge;
1549 			}
1550 
1551 			/*
1552 			 * Try to extend the amap of the *next* entry
1553 			 * back to cover the new allocation *and* the
1554 			 * previous entry as well (the previous merge
1555 			 * didn't have an amap already otherwise we
1556 			 * wouldn't be checking here for an amap).  If
1557 			 * it doesn't work just skip on, again, don't
1558 			 * actually give up, since we've already
1559 			 * completed the back merge.
1560 			 */
1561 			else if (prev_entry->next->aref.ar_amap) {
1562 				if (amap_extend(prev_entry->next,
1563 				    prev_entry->end -
1564 				    prev_entry->start,
1565 				    amapwaitflag | AMAP_EXTEND_BACKWARDS))
1566 					goto nomerge;
1567 			}
1568 		} else {
1569 			/*
1570 			 * Pull the next entry's amap backwards to cover this
1571 			 * new allocation.
1572 			 */
1573 			if (prev_entry->next->aref.ar_amap) {
1574 				error = amap_extend(prev_entry->next, size,
1575 				    amapwaitflag | AMAP_EXTEND_BACKWARDS);
1576 				if (error)
1577 					goto nomerge;
1578 			}
1579 		}
1580 
1581 		if (merged) {
1582 			if (kmap) {
1583 				UVMMAP_EVCNT_DECR(kbackmerge);
1584 				UVMMAP_EVCNT_INCR(kbimerge);
1585 			} else {
1586 				UVMMAP_EVCNT_DECR(ubackmerge);
1587 				UVMMAP_EVCNT_INCR(ubimerge);
1588 			}
1589 		} else {
1590 			if (kmap) {
1591 				UVMMAP_EVCNT_INCR(kforwmerge);
1592 			} else {
1593 				UVMMAP_EVCNT_INCR(uforwmerge);
1594 			}
1595 		}
1596 		UVMHIST_LOG(maphist,"  starting forward merge", 0, 0, 0, 0);
1597 
1598 		/*
1599 		 * drop our reference to uobj since we are extending a reference
1600 		 * that we already have (the ref count can not drop to zero).
1601 		 * (if merged, we've already detached)
1602 		 */
1603 		if (uobj && uobj->pgops->pgo_detach && !merged)
1604 			uobj->pgops->pgo_detach(uobj);
1605 
1606 		if (merged) {
1607 			dead = prev_entry->next;
1608 			prev_entry->end = dead->end;
1609 			uvm_map_entry_unlink(map, dead);
1610 			if (dead->aref.ar_amap != NULL) {
1611 				prev_entry->aref = dead->aref;
1612 				dead->aref.ar_amap = NULL;
1613 			}
1614 		} else {
1615 			prev_entry->next->start -= size;
1616 			if (prev_entry != &map->header) {
1617 				prev_entry->gap -= size;
1618 				KASSERT(prev_entry->gap == uvm_rb_gap(prev_entry));
1619 				uvm_rb_fixup(map, prev_entry);
1620 			}
1621 			if (uobj)
1622 				prev_entry->next->offset = uoffset;
1623 		}
1624 
1625 		uvm_map_check(map, "map forwardmerged");
1626 
1627 		UVMHIST_LOG(maphist,"<- done forwardmerge", 0, 0, 0, 0);
1628 		merged++;
1629 	}
1630 
1631 nomerge:
1632 	if (!merged) {
1633 		UVMHIST_LOG(maphist,"  allocating new map entry", 0, 0, 0, 0);
1634 		if (kmap) {
1635 			UVMMAP_EVCNT_INCR(knomerge);
1636 		} else {
1637 			UVMMAP_EVCNT_INCR(unomerge);
1638 		}
1639 
1640 		/*
1641 		 * allocate new entry and link it in.
1642 		 */
1643 
1644 		if (new_entry == NULL) {
1645 			new_entry = uvm_mapent_alloc(map,
1646 				(flags & UVM_FLAG_NOWAIT));
1647 			if (__predict_false(new_entry == NULL)) {
1648 				error = ENOMEM;
1649 				goto done;
1650 			}
1651 		}
1652 		new_entry->start = start;
1653 		new_entry->end = new_entry->start + size;
1654 		new_entry->object.uvm_obj = uobj;
1655 		new_entry->offset = uoffset;
1656 
1657 		new_entry->etype = newetype;
1658 
1659 		if (flags & UVM_FLAG_NOMERGE) {
1660 			new_entry->flags |= UVM_MAP_NOMERGE;
1661 		}
1662 
1663 		new_entry->protection = prot;
1664 		new_entry->max_protection = maxprot;
1665 		new_entry->inheritance = inherit;
1666 		new_entry->wired_count = 0;
1667 		new_entry->advice = advice;
1668 		if (flags & UVM_FLAG_OVERLAY) {
1669 
1670 			/*
1671 			 * to_add: for BSS we overallocate a little since we
1672 			 * are likely to extend
1673 			 */
1674 
1675 			vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ?
1676 				UVM_AMAP_CHUNK << PAGE_SHIFT : 0;
1677 			struct vm_amap *amap = amap_alloc(size, to_add,
1678 			    (flags & UVM_FLAG_NOWAIT));
1679 			if (__predict_false(amap == NULL)) {
1680 				error = ENOMEM;
1681 				goto done;
1682 			}
1683 			new_entry->aref.ar_pageoff = 0;
1684 			new_entry->aref.ar_amap = amap;
1685 		} else {
1686 			new_entry->aref.ar_pageoff = 0;
1687 			new_entry->aref.ar_amap = NULL;
1688 		}
1689 		uvm_map_entry_link(map, prev_entry, new_entry);
1690 
1691 		/*
1692 		 * Update the free space hint
1693 		 */
1694 
1695 		if ((map->first_free == prev_entry) &&
1696 		    (prev_entry->end >= new_entry->start))
1697 			map->first_free = new_entry;
1698 
1699 		new_entry = NULL;
1700 	}
1701 
1702 	map->size += size;
1703 
1704 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
1705 
1706 	error = 0;
1707 done:
1708 	if ((flags & UVM_FLAG_QUANTUM) == 0) {
1709 		/*
1710 		 * vmk_merged_entries is locked by the map's lock.
1711 		 */
1712 		vm_map_unlock(map);
1713 	}
1714 	if (new_entry && error == 0) {
1715 		KDASSERT(merged);
1716 		uvm_mapent_free_merged(map, new_entry);
1717 		new_entry = NULL;
1718 	}
1719 	if (dead) {
1720 		KDASSERT(merged);
1721 		uvm_mapent_free_merged(map, dead);
1722 	}
1723 	if ((flags & UVM_FLAG_QUANTUM) != 0) {
1724 		vm_map_unlock(map);
1725 	}
1726 	if (new_entry != NULL) {
1727 		uvm_mapent_free(new_entry);
1728 	}
1729 	return error;
1730 }
1731 
1732 /*
1733  * uvm_map_lookup_entry_bytree: lookup an entry in tree
1734  */
1735 
1736 static inline bool
1737 uvm_map_lookup_entry_bytree(struct vm_map *map, vaddr_t address,
1738     struct vm_map_entry **entry	/* OUT */)
1739 {
1740 	struct vm_map_entry *prev = &map->header;
1741 	struct vm_map_entry *cur = ROOT_ENTRY(map);
1742 
1743 	while (cur) {
1744 		UVMMAP_EVCNT_INCR(mlk_treeloop);
1745 		if (address >= cur->start) {
1746 			if (address < cur->end) {
1747 				*entry = cur;
1748 				return true;
1749 			}
1750 			prev = cur;
1751 			cur = RIGHT_ENTRY(cur);
1752 		} else
1753 			cur = LEFT_ENTRY(cur);
1754 	}
1755 	*entry = prev;
1756 	return false;
1757 }
1758 
1759 /*
1760  * uvm_map_lookup_entry: find map entry at or before an address
1761  *
1762  * => map must at least be read-locked by caller
1763  * => entry is returned in "entry"
1764  * => return value is true if address is in the returned entry
1765  */
1766 
1767 bool
1768 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
1769     struct vm_map_entry **entry	/* OUT */)
1770 {
1771 	struct vm_map_entry *cur;
1772 	bool use_tree = false;
1773 	UVMHIST_FUNC("uvm_map_lookup_entry");
1774 	UVMHIST_CALLED(maphist);
1775 
1776 	UVMHIST_LOG(maphist,"(map=0x%x,addr=0x%x,ent=0x%x)",
1777 	    map, address, entry, 0);
1778 
1779 	/*
1780 	 * start looking either from the head of the
1781 	 * list, or from the hint.
1782 	 */
1783 
1784 	cur = map->hint;
1785 
1786 	if (cur == &map->header)
1787 		cur = cur->next;
1788 
1789 	UVMMAP_EVCNT_INCR(mlk_call);
1790 	if (address >= cur->start) {
1791 
1792 		/*
1793 		 * go from hint to end of list.
1794 		 *
1795 		 * but first, make a quick check to see if
1796 		 * we are already looking at the entry we
1797 		 * want (which is usually the case).
1798 		 * note also that we don't need to save the hint
1799 		 * here... it is the same hint (unless we are
1800 		 * at the header, in which case the hint didn't
1801 		 * buy us anything anyway).
1802 		 */
1803 
1804 		if (cur != &map->header && cur->end > address) {
1805 			UVMMAP_EVCNT_INCR(mlk_hint);
1806 			*entry = cur;
1807 			UVMHIST_LOG(maphist,"<- got it via hint (0x%x)",
1808 			    cur, 0, 0, 0);
1809 			uvm_mapent_check(*entry);
1810 			return (true);
1811 		}
1812 
1813 		if (map->nentries > 15)
1814 			use_tree = true;
1815 	} else {
1816 
1817 		/*
1818 		 * invalid hint.  use tree.
1819 		 */
1820 		use_tree = true;
1821 	}
1822 
1823 	uvm_map_check(map, __func__);
1824 
1825 	if (use_tree) {
1826 		/*
1827 		 * Simple lookup in the tree.  Happens when the hint is
1828 		 * invalid, or nentries reach a threshold.
1829 		 */
1830 		UVMMAP_EVCNT_INCR(mlk_tree);
1831 		if (uvm_map_lookup_entry_bytree(map, address, entry)) {
1832 			goto got;
1833 		} else {
1834 			goto failed;
1835 		}
1836 	}
1837 
1838 	/*
1839 	 * search linearly
1840 	 */
1841 
1842 	UVMMAP_EVCNT_INCR(mlk_list);
1843 	while (cur != &map->header) {
1844 		UVMMAP_EVCNT_INCR(mlk_listloop);
1845 		if (cur->end > address) {
1846 			if (address >= cur->start) {
1847 				/*
1848 				 * save this lookup for future
1849 				 * hints, and return
1850 				 */
1851 
1852 				*entry = cur;
1853 got:
1854 				SAVE_HINT(map, map->hint, *entry);
1855 				UVMHIST_LOG(maphist,"<- search got it (0x%x)",
1856 					cur, 0, 0, 0);
1857 				KDASSERT((*entry)->start <= address);
1858 				KDASSERT(address < (*entry)->end);
1859 				uvm_mapent_check(*entry);
1860 				return (true);
1861 			}
1862 			break;
1863 		}
1864 		cur = cur->next;
1865 	}
1866 	*entry = cur->prev;
1867 failed:
1868 	SAVE_HINT(map, map->hint, *entry);
1869 	UVMHIST_LOG(maphist,"<- failed!",0,0,0,0);
1870 	KDASSERT((*entry) == &map->header || (*entry)->end <= address);
1871 	KDASSERT((*entry)->next == &map->header ||
1872 	    address < (*entry)->next->start);
1873 	return (false);
1874 }
1875 
1876 /*
1877  * See if the range between start and start + length fits in the gap
1878  * entry->next->start and entry->end.  Returns 1 if fits, 0 if doesn't
1879  * fit, and -1 address wraps around.
1880  */
1881 static int
1882 uvm_map_space_avail(vaddr_t *start, vsize_t length, voff_t uoffset,
1883     vsize_t align, int topdown, struct vm_map_entry *entry)
1884 {
1885 	vaddr_t end;
1886 
1887 #ifdef PMAP_PREFER
1888 	/*
1889 	 * push start address forward as needed to avoid VAC alias problems.
1890 	 * we only do this if a valid offset is specified.
1891 	 */
1892 
1893 	if (uoffset != UVM_UNKNOWN_OFFSET)
1894 		PMAP_PREFER(uoffset, start, length, topdown);
1895 #endif
1896 	if (align != 0) {
1897 		if ((*start & (align - 1)) != 0) {
1898 			if (topdown)
1899 				*start &= ~(align - 1);
1900 			else
1901 				*start = roundup(*start, align);
1902 		}
1903 		/*
1904 		 * XXX Should we PMAP_PREFER() here again?
1905 		 * eh...i think we're okay
1906 		 */
1907 	}
1908 
1909 	/*
1910 	 * Find the end of the proposed new region.  Be sure we didn't
1911 	 * wrap around the address; if so, we lose.  Otherwise, if the
1912 	 * proposed new region fits before the next entry, we win.
1913 	 */
1914 
1915 	end = *start + length;
1916 	if (end < *start)
1917 		return (-1);
1918 
1919 	if (entry->next->start >= end && *start >= entry->end)
1920 		return (1);
1921 
1922 	return (0);
1923 }
1924 
1925 /*
1926  * uvm_map_findspace: find "length" sized space in "map".
1927  *
1928  * => "hint" is a hint about where we want it, unless UVM_FLAG_FIXED is
1929  *	set in "flags" (in which case we insist on using "hint").
1930  * => "result" is VA returned
1931  * => uobj/uoffset are to be used to handle VAC alignment, if required
1932  * => if "align" is non-zero, we attempt to align to that value.
1933  * => caller must at least have read-locked map
1934  * => returns NULL on failure, or pointer to prev. map entry if success
1935  * => note this is a cross between the old vm_map_findspace and vm_map_find
1936  */
1937 
1938 struct vm_map_entry *
1939 uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length,
1940     vaddr_t *result /* OUT */, struct uvm_object *uobj, voff_t uoffset,
1941     vsize_t align, int flags)
1942 {
1943 	struct vm_map_entry *entry;
1944 	struct vm_map_entry *child, *prev, *tmp;
1945 	vaddr_t orig_hint;
1946 	const int topdown = map->flags & VM_MAP_TOPDOWN;
1947 	UVMHIST_FUNC("uvm_map_findspace");
1948 	UVMHIST_CALLED(maphist);
1949 
1950 	UVMHIST_LOG(maphist, "(map=0x%x, hint=0x%x, len=%d, flags=0x%x)",
1951 	    map, hint, length, flags);
1952 	KASSERT((align & (align - 1)) == 0);
1953 	KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0);
1954 
1955 	uvm_map_check(map, "map_findspace entry");
1956 
1957 	/*
1958 	 * remember the original hint.  if we are aligning, then we
1959 	 * may have to try again with no alignment constraint if
1960 	 * we fail the first time.
1961 	 */
1962 
1963 	orig_hint = hint;
1964 	if (hint < vm_map_min(map)) {	/* check ranges ... */
1965 		if (flags & UVM_FLAG_FIXED) {
1966 			UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0);
1967 			return (NULL);
1968 		}
1969 		hint = vm_map_min(map);
1970 	}
1971 	if (hint > vm_map_max(map)) {
1972 		UVMHIST_LOG(maphist,"<- VA 0x%x > range [0x%x->0x%x]",
1973 		    hint, vm_map_min(map), vm_map_max(map), 0);
1974 		return (NULL);
1975 	}
1976 
1977 	/*
1978 	 * Look for the first possible address; if there's already
1979 	 * something at this address, we have to start after it.
1980 	 */
1981 
1982 	/*
1983 	 * @@@: there are four, no, eight cases to consider.
1984 	 *
1985 	 * 0: found,     fixed,     bottom up -> fail
1986 	 * 1: found,     fixed,     top down  -> fail
1987 	 * 2: found,     not fixed, bottom up -> start after entry->end,
1988 	 *                                       loop up
1989 	 * 3: found,     not fixed, top down  -> start before entry->start,
1990 	 *                                       loop down
1991 	 * 4: not found, fixed,     bottom up -> check entry->next->start, fail
1992 	 * 5: not found, fixed,     top down  -> check entry->next->start, fail
1993 	 * 6: not found, not fixed, bottom up -> check entry->next->start,
1994 	 *                                       loop up
1995 	 * 7: not found, not fixed, top down  -> check entry->next->start,
1996 	 *                                       loop down
1997 	 *
1998 	 * as you can see, it reduces to roughly five cases, and that
1999 	 * adding top down mapping only adds one unique case (without
2000 	 * it, there would be four cases).
2001 	 */
2002 
2003 	if ((flags & UVM_FLAG_FIXED) == 0 && hint == vm_map_min(map)) {
2004 		entry = map->first_free;
2005 	} else {
2006 		if (uvm_map_lookup_entry(map, hint, &entry)) {
2007 			/* "hint" address already in use ... */
2008 			if (flags & UVM_FLAG_FIXED) {
2009 				UVMHIST_LOG(maphist, "<- fixed & VA in use",
2010 				    0, 0, 0, 0);
2011 				return (NULL);
2012 			}
2013 			if (topdown)
2014 				/* Start from lower gap. */
2015 				entry = entry->prev;
2016 		} else if (flags & UVM_FLAG_FIXED) {
2017 			if (entry->next->start >= hint + length &&
2018 			    hint + length > hint)
2019 				goto found;
2020 
2021 			/* "hint" address is gap but too small */
2022 			UVMHIST_LOG(maphist, "<- fixed mapping failed",
2023 			    0, 0, 0, 0);
2024 			return (NULL); /* only one shot at it ... */
2025 		} else {
2026 			/*
2027 			 * See if given hint fits in this gap.
2028 			 */
2029 			switch (uvm_map_space_avail(&hint, length,
2030 			    uoffset, align, topdown, entry)) {
2031 			case 1:
2032 				goto found;
2033 			case -1:
2034 				goto wraparound;
2035 			}
2036 
2037 			if (topdown) {
2038 				/*
2039 				 * Still there is a chance to fit
2040 				 * if hint > entry->end.
2041 				 */
2042 			} else {
2043 				/* Start from higher gap. */
2044 				entry = entry->next;
2045 				if (entry == &map->header)
2046 					goto notfound;
2047 				goto nextgap;
2048 			}
2049 		}
2050 	}
2051 
2052 	/*
2053 	 * Note that all UVM_FLAGS_FIXED case is already handled.
2054 	 */
2055 	KDASSERT((flags & UVM_FLAG_FIXED) == 0);
2056 
2057 	/* Try to find the space in the red-black tree */
2058 
2059 	/* Check slot before any entry */
2060 	hint = topdown ? entry->next->start - length : entry->end;
2061 	switch (uvm_map_space_avail(&hint, length, uoffset, align,
2062 	    topdown, entry)) {
2063 	case 1:
2064 		goto found;
2065 	case -1:
2066 		goto wraparound;
2067 	}
2068 
2069 nextgap:
2070 	KDASSERT((flags & UVM_FLAG_FIXED) == 0);
2071 	/* If there is not enough space in the whole tree, we fail */
2072 	tmp = ROOT_ENTRY(map);
2073 	if (tmp == NULL || tmp->maxgap < length)
2074 		goto notfound;
2075 
2076 	prev = NULL; /* previous candidate */
2077 
2078 	/* Find an entry close to hint that has enough space */
2079 	for (; tmp;) {
2080 		KASSERT(tmp->next->start == tmp->end + tmp->gap);
2081 		if (topdown) {
2082 			if (tmp->next->start < hint + length &&
2083 			    (prev == NULL || tmp->end > prev->end)) {
2084 				if (tmp->gap >= length)
2085 					prev = tmp;
2086 				else if ((child = LEFT_ENTRY(tmp)) != NULL
2087 				    && child->maxgap >= length)
2088 					prev = tmp;
2089 			}
2090 		} else {
2091 			if (tmp->end >= hint &&
2092 			    (prev == NULL || tmp->end < prev->end)) {
2093 				if (tmp->gap >= length)
2094 					prev = tmp;
2095 				else if ((child = RIGHT_ENTRY(tmp)) != NULL
2096 				    && child->maxgap >= length)
2097 					prev = tmp;
2098 			}
2099 		}
2100 		if (tmp->next->start < hint + length)
2101 			child = RIGHT_ENTRY(tmp);
2102 		else if (tmp->end > hint)
2103 			child = LEFT_ENTRY(tmp);
2104 		else {
2105 			if (tmp->gap >= length)
2106 				break;
2107 			if (topdown)
2108 				child = LEFT_ENTRY(tmp);
2109 			else
2110 				child = RIGHT_ENTRY(tmp);
2111 		}
2112 		if (child == NULL || child->maxgap < length)
2113 			break;
2114 		tmp = child;
2115 	}
2116 
2117 	if (tmp != NULL && tmp->start < hint && hint < tmp->next->start) {
2118 		/*
2119 		 * Check if the entry that we found satifies the
2120 		 * space requirement
2121 		 */
2122 		if (topdown) {
2123 			if (hint > tmp->next->start - length)
2124 				hint = tmp->next->start - length;
2125 		} else {
2126 			if (hint < tmp->end)
2127 				hint = tmp->end;
2128 		}
2129 		switch (uvm_map_space_avail(&hint, length, uoffset, align,
2130 		    topdown, tmp)) {
2131 		case 1:
2132 			entry = tmp;
2133 			goto found;
2134 		case -1:
2135 			goto wraparound;
2136 		}
2137 		if (tmp->gap >= length)
2138 			goto listsearch;
2139 	}
2140 	if (prev == NULL)
2141 		goto notfound;
2142 
2143 	if (topdown) {
2144 		KASSERT(orig_hint >= prev->next->start - length ||
2145 		    prev->next->start - length > prev->next->start);
2146 		hint = prev->next->start - length;
2147 	} else {
2148 		KASSERT(orig_hint <= prev->end);
2149 		hint = prev->end;
2150 	}
2151 	switch (uvm_map_space_avail(&hint, length, uoffset, align,
2152 	    topdown, prev)) {
2153 	case 1:
2154 		entry = prev;
2155 		goto found;
2156 	case -1:
2157 		goto wraparound;
2158 	}
2159 	if (prev->gap >= length)
2160 		goto listsearch;
2161 
2162 	if (topdown)
2163 		tmp = LEFT_ENTRY(prev);
2164 	else
2165 		tmp = RIGHT_ENTRY(prev);
2166 	for (;;) {
2167 		KASSERT(tmp && tmp->maxgap >= length);
2168 		if (topdown)
2169 			child = RIGHT_ENTRY(tmp);
2170 		else
2171 			child = LEFT_ENTRY(tmp);
2172 		if (child && child->maxgap >= length) {
2173 			tmp = child;
2174 			continue;
2175 		}
2176 		if (tmp->gap >= length)
2177 			break;
2178 		if (topdown)
2179 			tmp = LEFT_ENTRY(tmp);
2180 		else
2181 			tmp = RIGHT_ENTRY(tmp);
2182 	}
2183 
2184 	if (topdown) {
2185 		KASSERT(orig_hint >= tmp->next->start - length ||
2186 		    tmp->next->start - length > tmp->next->start);
2187 		hint = tmp->next->start - length;
2188 	} else {
2189 		KASSERT(orig_hint <= tmp->end);
2190 		hint = tmp->end;
2191 	}
2192 	switch (uvm_map_space_avail(&hint, length, uoffset, align,
2193 	    topdown, tmp)) {
2194 	case 1:
2195 		entry = tmp;
2196 		goto found;
2197 	case -1:
2198 		goto wraparound;
2199 	}
2200 
2201 	/*
2202 	 * The tree fails to find an entry because of offset or alignment
2203 	 * restrictions.  Search the list instead.
2204 	 */
2205  listsearch:
2206 	/*
2207 	 * Look through the rest of the map, trying to fit a new region in
2208 	 * the gap between existing regions, or after the very last region.
2209 	 * note: entry->end = base VA of current gap,
2210 	 *	 entry->next->start = VA of end of current gap
2211 	 */
2212 
2213 	for (;;) {
2214 		/* Update hint for current gap. */
2215 		hint = topdown ? entry->next->start - length : entry->end;
2216 
2217 		/* See if it fits. */
2218 		switch (uvm_map_space_avail(&hint, length, uoffset, align,
2219 		    topdown, entry)) {
2220 		case 1:
2221 			goto found;
2222 		case -1:
2223 			goto wraparound;
2224 		}
2225 
2226 		/* Advance to next/previous gap */
2227 		if (topdown) {
2228 			if (entry == &map->header) {
2229 				UVMHIST_LOG(maphist, "<- failed (off start)",
2230 				    0,0,0,0);
2231 				goto notfound;
2232 			}
2233 			entry = entry->prev;
2234 		} else {
2235 			entry = entry->next;
2236 			if (entry == &map->header) {
2237 				UVMHIST_LOG(maphist, "<- failed (off end)",
2238 				    0,0,0,0);
2239 				goto notfound;
2240 			}
2241 		}
2242 	}
2243 
2244  found:
2245 	SAVE_HINT(map, map->hint, entry);
2246 	*result = hint;
2247 	UVMHIST_LOG(maphist,"<- got it!  (result=0x%x)", hint, 0,0,0);
2248 	KASSERT( topdown || hint >= orig_hint);
2249 	KASSERT(!topdown || hint <= orig_hint);
2250 	KASSERT(entry->end <= hint);
2251 	KASSERT(hint + length <= entry->next->start);
2252 	return (entry);
2253 
2254  wraparound:
2255 	UVMHIST_LOG(maphist, "<- failed (wrap around)", 0,0,0,0);
2256 
2257 	return (NULL);
2258 
2259  notfound:
2260 	UVMHIST_LOG(maphist, "<- failed (notfound)", 0,0,0,0);
2261 
2262 	return (NULL);
2263 }
2264 
2265 /*
2266  *   U N M A P   -   m a i n   h e l p e r   f u n c t i o n s
2267  */
2268 
2269 /*
2270  * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop")
2271  *
2272  * => caller must check alignment and size
2273  * => map must be locked by caller
2274  * => we return a list of map entries that we've remove from the map
2275  *    in "entry_list"
2276  */
2277 
2278 void
2279 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
2280     struct vm_map_entry **entry_list /* OUT */,
2281     struct uvm_mapent_reservation *umr, int flags)
2282 {
2283 	struct vm_map_entry *entry, *first_entry, *next;
2284 	vaddr_t len;
2285 	UVMHIST_FUNC("uvm_unmap_remove"); UVMHIST_CALLED(maphist);
2286 
2287 	UVMHIST_LOG(maphist,"(map=0x%x, start=0x%x, end=0x%x)",
2288 	    map, start, end, 0);
2289 	VM_MAP_RANGE_CHECK(map, start, end);
2290 
2291 	uvm_map_check(map, "unmap_remove entry");
2292 
2293 	/*
2294 	 * find first entry
2295 	 */
2296 
2297 	if (uvm_map_lookup_entry(map, start, &first_entry) == true) {
2298 		/* clip and go... */
2299 		entry = first_entry;
2300 		UVM_MAP_CLIP_START(map, entry, start, umr);
2301 		/* critical!  prevents stale hint */
2302 		SAVE_HINT(map, entry, entry->prev);
2303 	} else {
2304 		entry = first_entry->next;
2305 	}
2306 
2307 	/*
2308 	 * Save the free space hint
2309 	 */
2310 
2311 	if (map->first_free != &map->header && map->first_free->start >= start)
2312 		map->first_free = entry->prev;
2313 
2314 	/*
2315 	 * note: we now re-use first_entry for a different task.  we remove
2316 	 * a number of map entries from the map and save them in a linked
2317 	 * list headed by "first_entry".  once we remove them from the map
2318 	 * the caller should unlock the map and drop the references to the
2319 	 * backing objects [c.f. uvm_unmap_detach].  the object is to
2320 	 * separate unmapping from reference dropping.  why?
2321 	 *   [1] the map has to be locked for unmapping
2322 	 *   [2] the map need not be locked for reference dropping
2323 	 *   [3] dropping references may trigger pager I/O, and if we hit
2324 	 *       a pager that does synchronous I/O we may have to wait for it.
2325 	 *   [4] we would like all waiting for I/O to occur with maps unlocked
2326 	 *       so that we don't block other threads.
2327 	 */
2328 
2329 	first_entry = NULL;
2330 	*entry_list = NULL;
2331 
2332 	/*
2333 	 * break up the area into map entry sized regions and unmap.  note
2334 	 * that all mappings have to be removed before we can even consider
2335 	 * dropping references to amaps or VM objects (otherwise we could end
2336 	 * up with a mapping to a page on the free list which would be very bad)
2337 	 */
2338 
2339 	while ((entry != &map->header) && (entry->start < end)) {
2340 		KASSERT((entry->flags & UVM_MAP_FIRST) == 0);
2341 
2342 		UVM_MAP_CLIP_END(map, entry, end, umr);
2343 		next = entry->next;
2344 		len = entry->end - entry->start;
2345 
2346 		/*
2347 		 * unwire before removing addresses from the pmap; otherwise
2348 		 * unwiring will put the entries back into the pmap (XXX).
2349 		 */
2350 
2351 		if (VM_MAPENT_ISWIRED(entry)) {
2352 			uvm_map_entry_unwire(map, entry);
2353 		}
2354 		if (flags & UVM_FLAG_VAONLY) {
2355 
2356 			/* nothing */
2357 
2358 		} else if ((map->flags & VM_MAP_PAGEABLE) == 0) {
2359 
2360 			/*
2361 			 * if the map is non-pageable, any pages mapped there
2362 			 * must be wired and entered with pmap_kenter_pa(),
2363 			 * and we should free any such pages immediately.
2364 			 * this is mostly used for kmem_map.
2365 			 */
2366 			KASSERT(vm_map_pmap(map) == pmap_kernel());
2367 
2368 			if ((entry->flags & UVM_MAP_KMAPENT) == 0) {
2369 				uvm_km_pgremove_intrsafe(map, entry->start,
2370 				    entry->end);
2371 				pmap_kremove(entry->start, len);
2372 			}
2373 		} else if (UVM_ET_ISOBJ(entry) &&
2374 			   UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
2375 			KASSERT(vm_map_pmap(map) == pmap_kernel());
2376 
2377 			/*
2378 			 * note: kernel object mappings are currently used in
2379 			 * two ways:
2380 			 *  [1] "normal" mappings of pages in the kernel object
2381 			 *  [2] uvm_km_valloc'd allocations in which we
2382 			 *      pmap_enter in some non-kernel-object page
2383 			 *      (e.g. vmapbuf).
2384 			 *
2385 			 * for case [1], we need to remove the mapping from
2386 			 * the pmap and then remove the page from the kernel
2387 			 * object (because, once pages in a kernel object are
2388 			 * unmapped they are no longer needed, unlike, say,
2389 			 * a vnode where you might want the data to persist
2390 			 * until flushed out of a queue).
2391 			 *
2392 			 * for case [2], we need to remove the mapping from
2393 			 * the pmap.  there shouldn't be any pages at the
2394 			 * specified offset in the kernel object [but it
2395 			 * doesn't hurt to call uvm_km_pgremove just to be
2396 			 * safe?]
2397 			 *
2398 			 * uvm_km_pgremove currently does the following:
2399 			 *   for pages in the kernel object in range:
2400 			 *     - drops the swap slot
2401 			 *     - uvm_pagefree the page
2402 			 */
2403 
2404 			/*
2405 			 * remove mappings from pmap and drop the pages
2406 			 * from the object.  offsets are always relative
2407 			 * to vm_map_min(kernel_map).
2408 			 */
2409 
2410 			pmap_remove(pmap_kernel(), entry->start,
2411 			    entry->start + len);
2412 			uvm_km_pgremove(entry->start, entry->end);
2413 
2414 			/*
2415 			 * null out kernel_object reference, we've just
2416 			 * dropped it
2417 			 */
2418 
2419 			entry->etype &= ~UVM_ET_OBJ;
2420 			entry->object.uvm_obj = NULL;
2421 		} else if (UVM_ET_ISOBJ(entry) || entry->aref.ar_amap) {
2422 
2423 			/*
2424 			 * remove mappings the standard way.
2425 			 */
2426 
2427 			pmap_remove(map->pmap, entry->start, entry->end);
2428 		}
2429 
2430 #if defined(DEBUG)
2431 		if ((entry->flags & UVM_MAP_KMAPENT) == 0) {
2432 
2433 			/*
2434 			 * check if there's remaining mapping,
2435 			 * which is a bug in caller.
2436 			 */
2437 
2438 			vaddr_t va;
2439 			for (va = entry->start; va < entry->end;
2440 			    va += PAGE_SIZE) {
2441 				if (pmap_extract(vm_map_pmap(map), va, NULL)) {
2442 					panic("uvm_unmap_remove: has mapping");
2443 				}
2444 			}
2445 
2446 			if (VM_MAP_IS_KERNEL(map)) {
2447 				uvm_km_check_empty(map, entry->start,
2448 				    entry->end);
2449 			}
2450 		}
2451 #endif /* defined(DEBUG) */
2452 
2453 		/*
2454 		 * remove entry from map and put it on our list of entries
2455 		 * that we've nuked.  then go to next entry.
2456 		 */
2457 
2458 		UVMHIST_LOG(maphist, "  removed map entry 0x%x", entry, 0, 0,0);
2459 
2460 		/* critical!  prevents stale hint */
2461 		SAVE_HINT(map, entry, entry->prev);
2462 
2463 		uvm_map_entry_unlink(map, entry);
2464 		KASSERT(map->size >= len);
2465 		map->size -= len;
2466 		entry->prev = NULL;
2467 		entry->next = first_entry;
2468 		first_entry = entry;
2469 		entry = next;
2470 	}
2471 
2472 	/*
2473 	 * Note: if map is dying, leave pmap_update() for pmap_destroy(),
2474 	 * which will be called later.
2475 	 */
2476 	if ((map->flags & VM_MAP_DYING) == 0) {
2477 		pmap_update(vm_map_pmap(map));
2478 	} else {
2479 		KASSERT(vm_map_pmap(map) != pmap_kernel());
2480 	}
2481 
2482 	uvm_map_check(map, "unmap_remove leave");
2483 
2484 	/*
2485 	 * now we've cleaned up the map and are ready for the caller to drop
2486 	 * references to the mapped objects.
2487 	 */
2488 
2489 	*entry_list = first_entry;
2490 	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
2491 
2492 	if (map->flags & VM_MAP_WANTVA) {
2493 		mutex_enter(&map->misc_lock);
2494 		map->flags &= ~VM_MAP_WANTVA;
2495 		cv_broadcast(&map->cv);
2496 		mutex_exit(&map->misc_lock);
2497 	}
2498 }
2499 
2500 /*
2501  * uvm_unmap_detach: drop references in a chain of map entries
2502  *
2503  * => we will free the map entries as we traverse the list.
2504  */
2505 
2506 void
2507 uvm_unmap_detach(struct vm_map_entry *first_entry, int flags)
2508 {
2509 	struct vm_map_entry *next_entry;
2510 	UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist);
2511 
2512 	while (first_entry) {
2513 		KASSERT(!VM_MAPENT_ISWIRED(first_entry));
2514 		UVMHIST_LOG(maphist,
2515 		    "  detach 0x%x: amap=0x%x, obj=0x%x, submap?=%d",
2516 		    first_entry, first_entry->aref.ar_amap,
2517 		    first_entry->object.uvm_obj,
2518 		    UVM_ET_ISSUBMAP(first_entry));
2519 
2520 		/*
2521 		 * drop reference to amap, if we've got one
2522 		 */
2523 
2524 		if (first_entry->aref.ar_amap)
2525 			uvm_map_unreference_amap(first_entry, flags);
2526 
2527 		/*
2528 		 * drop reference to our backing object, if we've got one
2529 		 */
2530 
2531 		KASSERT(!UVM_ET_ISSUBMAP(first_entry));
2532 		if (UVM_ET_ISOBJ(first_entry) &&
2533 		    first_entry->object.uvm_obj->pgops->pgo_detach) {
2534 			(*first_entry->object.uvm_obj->pgops->pgo_detach)
2535 				(first_entry->object.uvm_obj);
2536 		}
2537 		next_entry = first_entry->next;
2538 		uvm_mapent_free(first_entry);
2539 		first_entry = next_entry;
2540 	}
2541 	UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
2542 }
2543 
2544 /*
2545  *   E X T R A C T I O N   F U N C T I O N S
2546  */
2547 
2548 /*
2549  * uvm_map_reserve: reserve space in a vm_map for future use.
2550  *
2551  * => we reserve space in a map by putting a dummy map entry in the
2552  *    map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE)
2553  * => map should be unlocked (we will write lock it)
2554  * => we return true if we were able to reserve space
2555  * => XXXCDC: should be inline?
2556  */
2557 
2558 int
2559 uvm_map_reserve(struct vm_map *map, vsize_t size,
2560     vaddr_t offset	/* hint for pmap_prefer */,
2561     vsize_t align	/* alignment */,
2562     vaddr_t *raddr	/* IN:hint, OUT: reserved VA */,
2563     uvm_flag_t flags	/* UVM_FLAG_FIXED or 0 */)
2564 {
2565 	UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist);
2566 
2567 	UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x, offset=0x%x,addr=0x%x)",
2568 	    map,size,offset,raddr);
2569 
2570 	size = round_page(size);
2571 
2572 	/*
2573 	 * reserve some virtual space.
2574 	 */
2575 
2576 	if (uvm_map(map, raddr, size, NULL, offset, align,
2577 	    UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
2578 	    UVM_ADV_RANDOM, UVM_FLAG_NOMERGE|flags)) != 0) {
2579 	    UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0);
2580 		return (false);
2581 	}
2582 
2583 	UVMHIST_LOG(maphist, "<- done (*raddr=0x%x)", *raddr,0,0,0);
2584 	return (true);
2585 }
2586 
2587 /*
2588  * uvm_map_replace: replace a reserved (blank) area of memory with
2589  * real mappings.
2590  *
2591  * => caller must WRITE-LOCK the map
2592  * => we return true if replacement was a success
2593  * => we expect the newents chain to have nnewents entrys on it and
2594  *    we expect newents->prev to point to the last entry on the list
2595  * => note newents is allowed to be NULL
2596  */
2597 
2598 static int
2599 uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end,
2600     struct vm_map_entry *newents, int nnewents, vsize_t nsize,
2601     struct vm_map_entry **oldentryp)
2602 {
2603 	struct vm_map_entry *oldent, *last;
2604 
2605 	uvm_map_check(map, "map_replace entry");
2606 
2607 	/*
2608 	 * first find the blank map entry at the specified address
2609 	 */
2610 
2611 	if (!uvm_map_lookup_entry(map, start, &oldent)) {
2612 		return (false);
2613 	}
2614 
2615 	/*
2616 	 * check to make sure we have a proper blank entry
2617 	 */
2618 
2619 	if (end < oldent->end && !VM_MAP_USE_KMAPENT(map)) {
2620 		UVM_MAP_CLIP_END(map, oldent, end, NULL);
2621 	}
2622 	if (oldent->start != start || oldent->end != end ||
2623 	    oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) {
2624 		return (false);
2625 	}
2626 
2627 #ifdef DIAGNOSTIC
2628 
2629 	/*
2630 	 * sanity check the newents chain
2631 	 */
2632 
2633 	{
2634 		struct vm_map_entry *tmpent = newents;
2635 		int nent = 0;
2636 		vsize_t sz = 0;
2637 		vaddr_t cur = start;
2638 
2639 		while (tmpent) {
2640 			nent++;
2641 			sz += tmpent->end - tmpent->start;
2642 			if (tmpent->start < cur)
2643 				panic("uvm_map_replace1");
2644 			if (tmpent->start >= tmpent->end || tmpent->end > end) {
2645 				panic("uvm_map_replace2: "
2646 				    "tmpent->start=0x%"PRIxVADDR
2647 				    ", tmpent->end=0x%"PRIxVADDR
2648 				    ", end=0x%"PRIxVADDR,
2649 				    tmpent->start, tmpent->end, end);
2650 			}
2651 			cur = tmpent->end;
2652 			if (tmpent->next) {
2653 				if (tmpent->next->prev != tmpent)
2654 					panic("uvm_map_replace3");
2655 			} else {
2656 				if (newents->prev != tmpent)
2657 					panic("uvm_map_replace4");
2658 			}
2659 			tmpent = tmpent->next;
2660 		}
2661 		if (nent != nnewents)
2662 			panic("uvm_map_replace5");
2663 		if (sz != nsize)
2664 			panic("uvm_map_replace6");
2665 	}
2666 #endif
2667 
2668 	/*
2669 	 * map entry is a valid blank!   replace it.   (this does all the
2670 	 * work of map entry link/unlink...).
2671 	 */
2672 
2673 	if (newents) {
2674 		last = newents->prev;
2675 
2676 		/* critical: flush stale hints out of map */
2677 		SAVE_HINT(map, map->hint, newents);
2678 		if (map->first_free == oldent)
2679 			map->first_free = last;
2680 
2681 		last->next = oldent->next;
2682 		last->next->prev = last;
2683 
2684 		/* Fix RB tree */
2685 		uvm_rb_remove(map, oldent);
2686 
2687 		newents->prev = oldent->prev;
2688 		newents->prev->next = newents;
2689 		map->nentries = map->nentries + (nnewents - 1);
2690 
2691 		/* Fixup the RB tree */
2692 		{
2693 			int i;
2694 			struct vm_map_entry *tmp;
2695 
2696 			tmp = newents;
2697 			for (i = 0; i < nnewents && tmp; i++) {
2698 				uvm_rb_insert(map, tmp);
2699 				tmp = tmp->next;
2700 			}
2701 		}
2702 	} else {
2703 		/* NULL list of new entries: just remove the old one */
2704 		clear_hints(map, oldent);
2705 		uvm_map_entry_unlink(map, oldent);
2706 	}
2707 	map->size -= end - start - nsize;
2708 
2709 	uvm_map_check(map, "map_replace leave");
2710 
2711 	/*
2712 	 * now we can free the old blank entry and return.
2713 	 */
2714 
2715 	*oldentryp = oldent;
2716 	return (true);
2717 }
2718 
2719 /*
2720  * uvm_map_extract: extract a mapping from a map and put it somewhere
2721  *	(maybe removing the old mapping)
2722  *
2723  * => maps should be unlocked (we will write lock them)
2724  * => returns 0 on success, error code otherwise
2725  * => start must be page aligned
2726  * => len must be page sized
2727  * => flags:
2728  *      UVM_EXTRACT_REMOVE: remove mappings from srcmap
2729  *      UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only)
2730  *      UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs
2731  *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
2732  *    >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<<
2733  *    >>>NOTE: QREF's must be unmapped via the QREF path, thus should only
2734  *             be used from within the kernel in a kernel level map <<<
2735  */
2736 
2737 int
2738 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
2739     struct vm_map *dstmap, vaddr_t *dstaddrp, int flags)
2740 {
2741 	vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge;
2742 	struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry,
2743 	    *deadentry, *oldentry;
2744 	struct vm_map_entry *resentry = NULL; /* a dummy reservation entry */
2745 	vsize_t elen;
2746 	int nchain, error, copy_ok;
2747 	vsize_t nsize;
2748 	UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist);
2749 
2750 	UVMHIST_LOG(maphist,"(srcmap=0x%x,start=0x%x, len=0x%x", srcmap, start,
2751 	    len,0);
2752 	UVMHIST_LOG(maphist," ...,dstmap=0x%x, flags=0x%x)", dstmap,flags,0,0);
2753 
2754 	/*
2755 	 * step 0: sanity check: start must be on a page boundary, length
2756 	 * must be page sized.  can't ask for CONTIG/QREF if you asked for
2757 	 * REMOVE.
2758 	 */
2759 
2760 	KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0);
2761 	KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 ||
2762 		(flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0);
2763 
2764 	/*
2765 	 * step 1: reserve space in the target map for the extracted area
2766 	 */
2767 
2768 	if ((flags & UVM_EXTRACT_RESERVED) == 0) {
2769 		dstaddr = vm_map_min(dstmap);
2770 		if (!uvm_map_reserve(dstmap, len, start, 0, &dstaddr, 0))
2771 			return (ENOMEM);
2772 		*dstaddrp = dstaddr;	/* pass address back to caller */
2773 		UVMHIST_LOG(maphist, "  dstaddr=0x%x", dstaddr,0,0,0);
2774 	} else {
2775 		dstaddr = *dstaddrp;
2776 	}
2777 
2778 	/*
2779 	 * step 2: setup for the extraction process loop by init'ing the
2780 	 * map entry chain, locking src map, and looking up the first useful
2781 	 * entry in the map.
2782 	 */
2783 
2784 	end = start + len;
2785 	newend = dstaddr + len;
2786 	chain = endchain = NULL;
2787 	nchain = 0;
2788 	nsize = 0;
2789 	vm_map_lock(srcmap);
2790 
2791 	if (uvm_map_lookup_entry(srcmap, start, &entry)) {
2792 
2793 		/* "start" is within an entry */
2794 		if (flags & UVM_EXTRACT_QREF) {
2795 
2796 			/*
2797 			 * for quick references we don't clip the entry, so
2798 			 * the entry may map space "before" the starting
2799 			 * virtual address... this is the "fudge" factor
2800 			 * (which can be non-zero only the first time
2801 			 * through the "while" loop in step 3).
2802 			 */
2803 
2804 			fudge = start - entry->start;
2805 		} else {
2806 
2807 			/*
2808 			 * normal reference: we clip the map to fit (thus
2809 			 * fudge is zero)
2810 			 */
2811 
2812 			UVM_MAP_CLIP_START(srcmap, entry, start, NULL);
2813 			SAVE_HINT(srcmap, srcmap->hint, entry->prev);
2814 			fudge = 0;
2815 		}
2816 	} else {
2817 
2818 		/* "start" is not within an entry ... skip to next entry */
2819 		if (flags & UVM_EXTRACT_CONTIG) {
2820 			error = EINVAL;
2821 			goto bad;    /* definite hole here ... */
2822 		}
2823 
2824 		entry = entry->next;
2825 		fudge = 0;
2826 	}
2827 
2828 	/* save values from srcmap for step 6 */
2829 	orig_entry = entry;
2830 	orig_fudge = fudge;
2831 
2832 	/*
2833 	 * step 3: now start looping through the map entries, extracting
2834 	 * as we go.
2835 	 */
2836 
2837 	while (entry->start < end && entry != &srcmap->header) {
2838 
2839 		/* if we are not doing a quick reference, clip it */
2840 		if ((flags & UVM_EXTRACT_QREF) == 0)
2841 			UVM_MAP_CLIP_END(srcmap, entry, end, NULL);
2842 
2843 		/* clear needs_copy (allow chunking) */
2844 		if (UVM_ET_ISNEEDSCOPY(entry)) {
2845 			amap_copy(srcmap, entry,
2846 			    AMAP_COPY_NOWAIT|AMAP_COPY_NOMERGE, start, end);
2847 			if (UVM_ET_ISNEEDSCOPY(entry)) {  /* failed? */
2848 				error = ENOMEM;
2849 				goto bad;
2850 			}
2851 
2852 			/* amap_copy could clip (during chunk)!  update fudge */
2853 			if (fudge) {
2854 				fudge = start - entry->start;
2855 				orig_fudge = fudge;
2856 			}
2857 		}
2858 
2859 		/* calculate the offset of this from "start" */
2860 		oldoffset = (entry->start + fudge) - start;
2861 
2862 		/* allocate a new map entry */
2863 		newentry = uvm_mapent_alloc(dstmap, 0);
2864 		if (newentry == NULL) {
2865 			error = ENOMEM;
2866 			goto bad;
2867 		}
2868 
2869 		/* set up new map entry */
2870 		newentry->next = NULL;
2871 		newentry->prev = endchain;
2872 		newentry->start = dstaddr + oldoffset;
2873 		newentry->end =
2874 		    newentry->start + (entry->end - (entry->start + fudge));
2875 		if (newentry->end > newend || newentry->end < newentry->start)
2876 			newentry->end = newend;
2877 		newentry->object.uvm_obj = entry->object.uvm_obj;
2878 		if (newentry->object.uvm_obj) {
2879 			if (newentry->object.uvm_obj->pgops->pgo_reference)
2880 				newentry->object.uvm_obj->pgops->
2881 				    pgo_reference(newentry->object.uvm_obj);
2882 				newentry->offset = entry->offset + fudge;
2883 		} else {
2884 			newentry->offset = 0;
2885 		}
2886 		newentry->etype = entry->etype;
2887 		newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ?
2888 			entry->max_protection : entry->protection;
2889 		newentry->max_protection = entry->max_protection;
2890 		newentry->inheritance = entry->inheritance;
2891 		newentry->wired_count = 0;
2892 		newentry->aref.ar_amap = entry->aref.ar_amap;
2893 		if (newentry->aref.ar_amap) {
2894 			newentry->aref.ar_pageoff =
2895 			    entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT);
2896 			uvm_map_reference_amap(newentry, AMAP_SHARED |
2897 			    ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0));
2898 		} else {
2899 			newentry->aref.ar_pageoff = 0;
2900 		}
2901 		newentry->advice = entry->advice;
2902 		if ((flags & UVM_EXTRACT_QREF) != 0) {
2903 			newentry->flags |= UVM_MAP_NOMERGE;
2904 		}
2905 
2906 		/* now link it on the chain */
2907 		nchain++;
2908 		nsize += newentry->end - newentry->start;
2909 		if (endchain == NULL) {
2910 			chain = endchain = newentry;
2911 		} else {
2912 			endchain->next = newentry;
2913 			endchain = newentry;
2914 		}
2915 
2916 		/* end of 'while' loop! */
2917 		if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end &&
2918 		    (entry->next == &srcmap->header ||
2919 		    entry->next->start != entry->end)) {
2920 			error = EINVAL;
2921 			goto bad;
2922 		}
2923 		entry = entry->next;
2924 		fudge = 0;
2925 	}
2926 
2927 	/*
2928 	 * step 4: close off chain (in format expected by uvm_map_replace)
2929 	 */
2930 
2931 	if (chain)
2932 		chain->prev = endchain;
2933 
2934 	/*
2935 	 * step 5: attempt to lock the dest map so we can pmap_copy.
2936 	 * note usage of copy_ok:
2937 	 *   1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5)
2938 	 *   0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7
2939 	 */
2940 
2941 	if (srcmap == dstmap || vm_map_lock_try(dstmap) == true) {
2942 		copy_ok = 1;
2943 		if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
2944 		    nchain, nsize, &resentry)) {
2945 			if (srcmap != dstmap)
2946 				vm_map_unlock(dstmap);
2947 			error = EIO;
2948 			goto bad;
2949 		}
2950 	} else {
2951 		copy_ok = 0;
2952 		/* replace defered until step 7 */
2953 	}
2954 
2955 	/*
2956 	 * step 6: traverse the srcmap a second time to do the following:
2957 	 *  - if we got a lock on the dstmap do pmap_copy
2958 	 *  - if UVM_EXTRACT_REMOVE remove the entries
2959 	 * we make use of orig_entry and orig_fudge (saved in step 2)
2960 	 */
2961 
2962 	if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) {
2963 
2964 		/* purge possible stale hints from srcmap */
2965 		if (flags & UVM_EXTRACT_REMOVE) {
2966 			SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev);
2967 			if (srcmap->first_free != &srcmap->header &&
2968 			    srcmap->first_free->start >= start)
2969 				srcmap->first_free = orig_entry->prev;
2970 		}
2971 
2972 		entry = orig_entry;
2973 		fudge = orig_fudge;
2974 		deadentry = NULL;	/* for UVM_EXTRACT_REMOVE */
2975 
2976 		while (entry->start < end && entry != &srcmap->header) {
2977 			if (copy_ok) {
2978 				oldoffset = (entry->start + fudge) - start;
2979 				elen = MIN(end, entry->end) -
2980 				    (entry->start + fudge);
2981 				pmap_copy(dstmap->pmap, srcmap->pmap,
2982 				    dstaddr + oldoffset, elen,
2983 				    entry->start + fudge);
2984 			}
2985 
2986 			/* we advance "entry" in the following if statement */
2987 			if (flags & UVM_EXTRACT_REMOVE) {
2988 				pmap_remove(srcmap->pmap, entry->start,
2989 						entry->end);
2990 				oldentry = entry;	/* save entry */
2991 				entry = entry->next;	/* advance */
2992 				uvm_map_entry_unlink(srcmap, oldentry);
2993 							/* add to dead list */
2994 				oldentry->next = deadentry;
2995 				deadentry = oldentry;
2996 			} else {
2997 				entry = entry->next;		/* advance */
2998 			}
2999 
3000 			/* end of 'while' loop */
3001 			fudge = 0;
3002 		}
3003 		pmap_update(srcmap->pmap);
3004 
3005 		/*
3006 		 * unlock dstmap.  we will dispose of deadentry in
3007 		 * step 7 if needed
3008 		 */
3009 
3010 		if (copy_ok && srcmap != dstmap)
3011 			vm_map_unlock(dstmap);
3012 
3013 	} else {
3014 		deadentry = NULL;
3015 	}
3016 
3017 	/*
3018 	 * step 7: we are done with the source map, unlock.   if copy_ok
3019 	 * is 0 then we have not replaced the dummy mapping in dstmap yet
3020 	 * and we need to do so now.
3021 	 */
3022 
3023 	vm_map_unlock(srcmap);
3024 	if ((flags & UVM_EXTRACT_REMOVE) && deadentry)
3025 		uvm_unmap_detach(deadentry, 0);   /* dispose of old entries */
3026 
3027 	/* now do the replacement if we didn't do it in step 5 */
3028 	if (copy_ok == 0) {
3029 		vm_map_lock(dstmap);
3030 		error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
3031 		    nchain, nsize, &resentry);
3032 		vm_map_unlock(dstmap);
3033 
3034 		if (error == false) {
3035 			error = EIO;
3036 			goto bad2;
3037 		}
3038 	}
3039 
3040 	if (resentry != NULL)
3041 		uvm_mapent_free(resentry);
3042 
3043 	return (0);
3044 
3045 	/*
3046 	 * bad: failure recovery
3047 	 */
3048 bad:
3049 	vm_map_unlock(srcmap);
3050 bad2:			/* src already unlocked */
3051 	if (chain)
3052 		uvm_unmap_detach(chain,
3053 		    (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0);
3054 
3055 	if (resentry != NULL)
3056 		uvm_mapent_free(resentry);
3057 
3058 	if ((flags & UVM_EXTRACT_RESERVED) == 0) {
3059 		uvm_unmap(dstmap, dstaddr, dstaddr+len);   /* ??? */
3060 	}
3061 	return (error);
3062 }
3063 
3064 /* end of extraction functions */
3065 
3066 /*
3067  * uvm_map_submap: punch down part of a map into a submap
3068  *
3069  * => only the kernel_map is allowed to be submapped
3070  * => the purpose of submapping is to break up the locking granularity
3071  *	of a larger map
3072  * => the range specified must have been mapped previously with a uvm_map()
3073  *	call [with uobj==NULL] to create a blank map entry in the main map.
3074  *	[And it had better still be blank!]
3075  * => maps which contain submaps should never be copied or forked.
3076  * => to remove a submap, use uvm_unmap() on the main map
3077  *	and then uvm_map_deallocate() the submap.
3078  * => main map must be unlocked.
3079  * => submap must have been init'd and have a zero reference count.
3080  *	[need not be locked as we don't actually reference it]
3081  */
3082 
3083 int
3084 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
3085     struct vm_map *submap)
3086 {
3087 	struct vm_map_entry *entry;
3088 	struct uvm_mapent_reservation umr;
3089 	int error;
3090 
3091 	uvm_mapent_reserve(map, &umr, 2, 0);
3092 
3093 	vm_map_lock(map);
3094 	VM_MAP_RANGE_CHECK(map, start, end);
3095 
3096 	if (uvm_map_lookup_entry(map, start, &entry)) {
3097 		UVM_MAP_CLIP_START(map, entry, start, &umr);
3098 		UVM_MAP_CLIP_END(map, entry, end, &umr);	/* to be safe */
3099 	} else {
3100 		entry = NULL;
3101 	}
3102 
3103 	if (entry != NULL &&
3104 	    entry->start == start && entry->end == end &&
3105 	    entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
3106 	    !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
3107 		entry->etype |= UVM_ET_SUBMAP;
3108 		entry->object.sub_map = submap;
3109 		entry->offset = 0;
3110 		uvm_map_reference(submap);
3111 		error = 0;
3112 	} else {
3113 		error = EINVAL;
3114 	}
3115 	vm_map_unlock(map);
3116 
3117 	uvm_mapent_unreserve(map, &umr);
3118 
3119 	return error;
3120 }
3121 
3122 /*
3123  * uvm_map_setup_kernel: init in-kernel map
3124  *
3125  * => map must not be in service yet.
3126  */
3127 
3128 void
3129 uvm_map_setup_kernel(struct vm_map_kernel *map,
3130     vaddr_t vmin, vaddr_t vmax, int flags)
3131 {
3132 
3133 	uvm_map_setup(&map->vmk_map, vmin, vmax, flags);
3134 	callback_head_init(&map->vmk_reclaim_callback, IPL_VM);
3135 	LIST_INIT(&map->vmk_kentry_free);
3136 	map->vmk_merged_entries = NULL;
3137 }
3138 
3139 
3140 /*
3141  * uvm_map_protect: change map protection
3142  *
3143  * => set_max means set max_protection.
3144  * => map must be unlocked.
3145  */
3146 
3147 #define MASK(entry)	(UVM_ET_ISCOPYONWRITE(entry) ? \
3148 			 ~VM_PROT_WRITE : VM_PROT_ALL)
3149 
3150 int
3151 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
3152     vm_prot_t new_prot, bool set_max)
3153 {
3154 	struct vm_map_entry *current, *entry;
3155 	int error = 0;
3156 	UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist);
3157 	UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_prot=0x%x)",
3158 		    map, start, end, new_prot);
3159 
3160 	vm_map_lock(map);
3161 	VM_MAP_RANGE_CHECK(map, start, end);
3162 	if (uvm_map_lookup_entry(map, start, &entry)) {
3163 		UVM_MAP_CLIP_START(map, entry, start, NULL);
3164 	} else {
3165 		entry = entry->next;
3166 	}
3167 
3168 	/*
3169 	 * make a first pass to check for protection violations.
3170 	 */
3171 
3172 	current = entry;
3173 	while ((current != &map->header) && (current->start < end)) {
3174 		if (UVM_ET_ISSUBMAP(current)) {
3175 			error = EINVAL;
3176 			goto out;
3177 		}
3178 		if ((new_prot & current->max_protection) != new_prot) {
3179 			error = EACCES;
3180 			goto out;
3181 		}
3182 		/*
3183 		 * Don't allow VM_PROT_EXECUTE to be set on entries that
3184 		 * point to vnodes that are associated with a NOEXEC file
3185 		 * system.
3186 		 */
3187 		if (UVM_ET_ISOBJ(current) &&
3188 		    UVM_OBJ_IS_VNODE(current->object.uvm_obj)) {
3189 			struct vnode *vp =
3190 			    (struct vnode *) current->object.uvm_obj;
3191 
3192 			if ((new_prot & VM_PROT_EXECUTE) != 0 &&
3193 			    (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) {
3194 				error = EACCES;
3195 				goto out;
3196 			}
3197 		}
3198 
3199 		current = current->next;
3200 	}
3201 
3202 	/* go back and fix up protections (no need to clip this time). */
3203 
3204 	current = entry;
3205 	while ((current != &map->header) && (current->start < end)) {
3206 		vm_prot_t old_prot;
3207 
3208 		UVM_MAP_CLIP_END(map, current, end, NULL);
3209 		old_prot = current->protection;
3210 		if (set_max)
3211 			current->protection =
3212 			    (current->max_protection = new_prot) & old_prot;
3213 		else
3214 			current->protection = new_prot;
3215 
3216 		/*
3217 		 * update physical map if necessary.  worry about copy-on-write
3218 		 * here -- CHECK THIS XXX
3219 		 */
3220 
3221 		if (current->protection != old_prot) {
3222 			/* update pmap! */
3223 			pmap_protect(map->pmap, current->start, current->end,
3224 			    current->protection & MASK(entry));
3225 
3226 			/*
3227 			 * If this entry points at a vnode, and the
3228 			 * protection includes VM_PROT_EXECUTE, mark
3229 			 * the vnode as VEXECMAP.
3230 			 */
3231 			if (UVM_ET_ISOBJ(current)) {
3232 				struct uvm_object *uobj =
3233 				    current->object.uvm_obj;
3234 
3235 				if (UVM_OBJ_IS_VNODE(uobj) &&
3236 				    (current->protection & VM_PROT_EXECUTE)) {
3237 					vn_markexec((struct vnode *) uobj);
3238 				}
3239 			}
3240 		}
3241 
3242 		/*
3243 		 * If the map is configured to lock any future mappings,
3244 		 * wire this entry now if the old protection was VM_PROT_NONE
3245 		 * and the new protection is not VM_PROT_NONE.
3246 		 */
3247 
3248 		if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
3249 		    VM_MAPENT_ISWIRED(entry) == 0 &&
3250 		    old_prot == VM_PROT_NONE &&
3251 		    new_prot != VM_PROT_NONE) {
3252 			if (uvm_map_pageable(map, entry->start,
3253 			    entry->end, false,
3254 			    UVM_LK_ENTER|UVM_LK_EXIT) != 0) {
3255 
3256 				/*
3257 				 * If locking the entry fails, remember the
3258 				 * error if it's the first one.  Note we
3259 				 * still continue setting the protection in
3260 				 * the map, but will return the error
3261 				 * condition regardless.
3262 				 *
3263 				 * XXX Ignore what the actual error is,
3264 				 * XXX just call it a resource shortage
3265 				 * XXX so that it doesn't get confused
3266 				 * XXX what uvm_map_protect() itself would
3267 				 * XXX normally return.
3268 				 */
3269 
3270 				error = ENOMEM;
3271 			}
3272 		}
3273 		current = current->next;
3274 	}
3275 	pmap_update(map->pmap);
3276 
3277  out:
3278 	vm_map_unlock(map);
3279 
3280 	UVMHIST_LOG(maphist, "<- done, error=%d",error,0,0,0);
3281 	return error;
3282 }
3283 
3284 #undef  MASK
3285 
3286 /*
3287  * uvm_map_inherit: set inheritance code for range of addrs in map.
3288  *
3289  * => map must be unlocked
3290  * => note that the inherit code is used during a "fork".  see fork
3291  *	code for details.
3292  */
3293 
3294 int
3295 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
3296     vm_inherit_t new_inheritance)
3297 {
3298 	struct vm_map_entry *entry, *temp_entry;
3299 	UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist);
3300 	UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_inh=0x%x)",
3301 	    map, start, end, new_inheritance);
3302 
3303 	switch (new_inheritance) {
3304 	case MAP_INHERIT_NONE:
3305 	case MAP_INHERIT_COPY:
3306 	case MAP_INHERIT_SHARE:
3307 		break;
3308 	default:
3309 		UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
3310 		return EINVAL;
3311 	}
3312 
3313 	vm_map_lock(map);
3314 	VM_MAP_RANGE_CHECK(map, start, end);
3315 	if (uvm_map_lookup_entry(map, start, &temp_entry)) {
3316 		entry = temp_entry;
3317 		UVM_MAP_CLIP_START(map, entry, start, NULL);
3318 	}  else {
3319 		entry = temp_entry->next;
3320 	}
3321 	while ((entry != &map->header) && (entry->start < end)) {
3322 		UVM_MAP_CLIP_END(map, entry, end, NULL);
3323 		entry->inheritance = new_inheritance;
3324 		entry = entry->next;
3325 	}
3326 	vm_map_unlock(map);
3327 	UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
3328 	return 0;
3329 }
3330 
3331 /*
3332  * uvm_map_advice: set advice code for range of addrs in map.
3333  *
3334  * => map must be unlocked
3335  */
3336 
3337 int
3338 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
3339 {
3340 	struct vm_map_entry *entry, *temp_entry;
3341 	UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist);
3342 	UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_adv=0x%x)",
3343 	    map, start, end, new_advice);
3344 
3345 	vm_map_lock(map);
3346 	VM_MAP_RANGE_CHECK(map, start, end);
3347 	if (uvm_map_lookup_entry(map, start, &temp_entry)) {
3348 		entry = temp_entry;
3349 		UVM_MAP_CLIP_START(map, entry, start, NULL);
3350 	} else {
3351 		entry = temp_entry->next;
3352 	}
3353 
3354 	/*
3355 	 * XXXJRT: disallow holes?
3356 	 */
3357 
3358 	while ((entry != &map->header) && (entry->start < end)) {
3359 		UVM_MAP_CLIP_END(map, entry, end, NULL);
3360 
3361 		switch (new_advice) {
3362 		case MADV_NORMAL:
3363 		case MADV_RANDOM:
3364 		case MADV_SEQUENTIAL:
3365 			/* nothing special here */
3366 			break;
3367 
3368 		default:
3369 			vm_map_unlock(map);
3370 			UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
3371 			return EINVAL;
3372 		}
3373 		entry->advice = new_advice;
3374 		entry = entry->next;
3375 	}
3376 
3377 	vm_map_unlock(map);
3378 	UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
3379 	return 0;
3380 }
3381 
3382 /*
3383  * uvm_map_willneed: apply MADV_WILLNEED
3384  */
3385 
3386 int
3387 uvm_map_willneed(struct vm_map *map, vaddr_t start, vaddr_t end)
3388 {
3389 	struct vm_map_entry *entry;
3390 	UVMHIST_FUNC("uvm_map_willneed"); UVMHIST_CALLED(maphist);
3391 	UVMHIST_LOG(maphist,"(map=0x%lx,start=0x%lx,end=0x%lx)",
3392 	    map, start, end, 0);
3393 
3394 	vm_map_lock_read(map);
3395 	VM_MAP_RANGE_CHECK(map, start, end);
3396 	if (!uvm_map_lookup_entry(map, start, &entry)) {
3397 		entry = entry->next;
3398 	}
3399 	while (entry->start < end) {
3400 		struct vm_amap * const amap = entry->aref.ar_amap;
3401 		struct uvm_object * const uobj = entry->object.uvm_obj;
3402 
3403 		KASSERT(entry != &map->header);
3404 		KASSERT(start < entry->end);
3405 		/*
3406 		 * XXX IMPLEMENT ME.
3407 		 * Should invent a "weak" mode for uvm_fault()
3408 		 * which would only do the PGO_LOCKED pgo_get().
3409 		 *
3410 		 * for now, we handle only the easy but common case.
3411 		 */
3412 		if (UVM_ET_ISOBJ(entry) && amap == NULL && uobj != NULL) {
3413 			off_t offset;
3414 			off_t size;
3415 
3416 			offset = entry->offset;
3417 			if (start < entry->start) {
3418 				offset += entry->start - start;
3419 			}
3420 			size = entry->offset + (entry->end - entry->start);
3421 			if (entry->end < end) {
3422 				size -= end - entry->end;
3423 			}
3424 			uvm_readahead(uobj, offset, size);
3425 		}
3426 		entry = entry->next;
3427 	}
3428 	vm_map_unlock_read(map);
3429 	UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
3430 	return 0;
3431 }
3432 
3433 /*
3434  * uvm_map_pageable: sets the pageability of a range in a map.
3435  *
3436  * => wires map entries.  should not be used for transient page locking.
3437  *	for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()).
3438  * => regions specified as not pageable require lock-down (wired) memory
3439  *	and page tables.
3440  * => map must never be read-locked
3441  * => if islocked is true, map is already write-locked
3442  * => we always unlock the map, since we must downgrade to a read-lock
3443  *	to call uvm_fault_wire()
3444  * => XXXCDC: check this and try and clean it up.
3445  */
3446 
3447 int
3448 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
3449     bool new_pageable, int lockflags)
3450 {
3451 	struct vm_map_entry *entry, *start_entry, *failed_entry;
3452 	int rv;
3453 #ifdef DIAGNOSTIC
3454 	u_int timestamp_save;
3455 #endif
3456 	UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist);
3457 	UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_pageable=0x%x)",
3458 		    map, start, end, new_pageable);
3459 	KASSERT(map->flags & VM_MAP_PAGEABLE);
3460 
3461 	if ((lockflags & UVM_LK_ENTER) == 0)
3462 		vm_map_lock(map);
3463 	VM_MAP_RANGE_CHECK(map, start, end);
3464 
3465 	/*
3466 	 * only one pageability change may take place at one time, since
3467 	 * uvm_fault_wire assumes it will be called only once for each
3468 	 * wiring/unwiring.  therefore, we have to make sure we're actually
3469 	 * changing the pageability for the entire region.  we do so before
3470 	 * making any changes.
3471 	 */
3472 
3473 	if (uvm_map_lookup_entry(map, start, &start_entry) == false) {
3474 		if ((lockflags & UVM_LK_EXIT) == 0)
3475 			vm_map_unlock(map);
3476 
3477 		UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0);
3478 		return EFAULT;
3479 	}
3480 	entry = start_entry;
3481 
3482 	/*
3483 	 * handle wiring and unwiring separately.
3484 	 */
3485 
3486 	if (new_pageable) {		/* unwire */
3487 		UVM_MAP_CLIP_START(map, entry, start, NULL);
3488 
3489 		/*
3490 		 * unwiring.  first ensure that the range to be unwired is
3491 		 * really wired down and that there are no holes.
3492 		 */
3493 
3494 		while ((entry != &map->header) && (entry->start < end)) {
3495 			if (entry->wired_count == 0 ||
3496 			    (entry->end < end &&
3497 			     (entry->next == &map->header ||
3498 			      entry->next->start > entry->end))) {
3499 				if ((lockflags & UVM_LK_EXIT) == 0)
3500 					vm_map_unlock(map);
3501 				UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0);
3502 				return EINVAL;
3503 			}
3504 			entry = entry->next;
3505 		}
3506 
3507 		/*
3508 		 * POSIX 1003.1b - a single munlock call unlocks a region,
3509 		 * regardless of the number of mlock calls made on that
3510 		 * region.
3511 		 */
3512 
3513 		entry = start_entry;
3514 		while ((entry != &map->header) && (entry->start < end)) {
3515 			UVM_MAP_CLIP_END(map, entry, end, NULL);
3516 			if (VM_MAPENT_ISWIRED(entry))
3517 				uvm_map_entry_unwire(map, entry);
3518 			entry = entry->next;
3519 		}
3520 		if ((lockflags & UVM_LK_EXIT) == 0)
3521 			vm_map_unlock(map);
3522 		UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
3523 		return 0;
3524 	}
3525 
3526 	/*
3527 	 * wire case: in two passes [XXXCDC: ugly block of code here]
3528 	 *
3529 	 * 1: holding the write lock, we create any anonymous maps that need
3530 	 *    to be created.  then we clip each map entry to the region to
3531 	 *    be wired and increment its wiring count.
3532 	 *
3533 	 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
3534 	 *    in the pages for any newly wired area (wired_count == 1).
3535 	 *
3536 	 *    downgrading to a read lock for uvm_fault_wire avoids a possible
3537 	 *    deadlock with another thread that may have faulted on one of
3538 	 *    the pages to be wired (it would mark the page busy, blocking
3539 	 *    us, then in turn block on the map lock that we hold).  because
3540 	 *    of problems in the recursive lock package, we cannot upgrade
3541 	 *    to a write lock in vm_map_lookup.  thus, any actions that
3542 	 *    require the write lock must be done beforehand.  because we
3543 	 *    keep the read lock on the map, the copy-on-write status of the
3544 	 *    entries we modify here cannot change.
3545 	 */
3546 
3547 	while ((entry != &map->header) && (entry->start < end)) {
3548 		if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
3549 
3550 			/*
3551 			 * perform actions of vm_map_lookup that need the
3552 			 * write lock on the map: create an anonymous map
3553 			 * for a copy-on-write region, or an anonymous map
3554 			 * for a zero-fill region.  (XXXCDC: submap case
3555 			 * ok?)
3556 			 */
3557 
3558 			if (!UVM_ET_ISSUBMAP(entry)) {  /* not submap */
3559 				if (UVM_ET_ISNEEDSCOPY(entry) &&
3560 				    ((entry->max_protection & VM_PROT_WRITE) ||
3561 				     (entry->object.uvm_obj == NULL))) {
3562 					amap_copy(map, entry, 0, start, end);
3563 					/* XXXCDC: wait OK? */
3564 				}
3565 			}
3566 		}
3567 		UVM_MAP_CLIP_START(map, entry, start, NULL);
3568 		UVM_MAP_CLIP_END(map, entry, end, NULL);
3569 		entry->wired_count++;
3570 
3571 		/*
3572 		 * Check for holes
3573 		 */
3574 
3575 		if (entry->protection == VM_PROT_NONE ||
3576 		    (entry->end < end &&
3577 		     (entry->next == &map->header ||
3578 		      entry->next->start > entry->end))) {
3579 
3580 			/*
3581 			 * found one.  amap creation actions do not need to
3582 			 * be undone, but the wired counts need to be restored.
3583 			 */
3584 
3585 			while (entry != &map->header && entry->end > start) {
3586 				entry->wired_count--;
3587 				entry = entry->prev;
3588 			}
3589 			if ((lockflags & UVM_LK_EXIT) == 0)
3590 				vm_map_unlock(map);
3591 			UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0);
3592 			return EINVAL;
3593 		}
3594 		entry = entry->next;
3595 	}
3596 
3597 	/*
3598 	 * Pass 2.
3599 	 */
3600 
3601 #ifdef DIAGNOSTIC
3602 	timestamp_save = map->timestamp;
3603 #endif
3604 	vm_map_busy(map);
3605 	vm_map_unlock(map);
3606 
3607 	rv = 0;
3608 	entry = start_entry;
3609 	while (entry != &map->header && entry->start < end) {
3610 		if (entry->wired_count == 1) {
3611 			rv = uvm_fault_wire(map, entry->start, entry->end,
3612 			    entry->max_protection, 1);
3613 			if (rv) {
3614 
3615 				/*
3616 				 * wiring failed.  break out of the loop.
3617 				 * we'll clean up the map below, once we
3618 				 * have a write lock again.
3619 				 */
3620 
3621 				break;
3622 			}
3623 		}
3624 		entry = entry->next;
3625 	}
3626 
3627 	if (rv) {	/* failed? */
3628 
3629 		/*
3630 		 * Get back to an exclusive (write) lock.
3631 		 */
3632 
3633 		vm_map_lock(map);
3634 		vm_map_unbusy(map);
3635 
3636 #ifdef DIAGNOSTIC
3637 		if (timestamp_save + 1 != map->timestamp)
3638 			panic("uvm_map_pageable: stale map");
3639 #endif
3640 
3641 		/*
3642 		 * first drop the wiring count on all the entries
3643 		 * which haven't actually been wired yet.
3644 		 */
3645 
3646 		failed_entry = entry;
3647 		while (entry != &map->header && entry->start < end) {
3648 			entry->wired_count--;
3649 			entry = entry->next;
3650 		}
3651 
3652 		/*
3653 		 * now, unwire all the entries that were successfully
3654 		 * wired above.
3655 		 */
3656 
3657 		entry = start_entry;
3658 		while (entry != failed_entry) {
3659 			entry->wired_count--;
3660 			if (VM_MAPENT_ISWIRED(entry) == 0)
3661 				uvm_map_entry_unwire(map, entry);
3662 			entry = entry->next;
3663 		}
3664 		if ((lockflags & UVM_LK_EXIT) == 0)
3665 			vm_map_unlock(map);
3666 		UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0);
3667 		return (rv);
3668 	}
3669 
3670 	if ((lockflags & UVM_LK_EXIT) == 0) {
3671 		vm_map_unbusy(map);
3672 	} else {
3673 
3674 		/*
3675 		 * Get back to an exclusive (write) lock.
3676 		 */
3677 
3678 		vm_map_lock(map);
3679 		vm_map_unbusy(map);
3680 	}
3681 
3682 	UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
3683 	return 0;
3684 }
3685 
3686 /*
3687  * uvm_map_pageable_all: special case of uvm_map_pageable - affects
3688  * all mapped regions.
3689  *
3690  * => map must not be locked.
3691  * => if no flags are specified, all regions are unwired.
3692  * => XXXJRT: has some of the same problems as uvm_map_pageable() above.
3693  */
3694 
3695 int
3696 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
3697 {
3698 	struct vm_map_entry *entry, *failed_entry;
3699 	vsize_t size;
3700 	int rv;
3701 #ifdef DIAGNOSTIC
3702 	u_int timestamp_save;
3703 #endif
3704 	UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist);
3705 	UVMHIST_LOG(maphist,"(map=0x%x,flags=0x%x)", map, flags, 0, 0);
3706 
3707 	KASSERT(map->flags & VM_MAP_PAGEABLE);
3708 
3709 	vm_map_lock(map);
3710 
3711 	/*
3712 	 * handle wiring and unwiring separately.
3713 	 */
3714 
3715 	if (flags == 0) {			/* unwire */
3716 
3717 		/*
3718 		 * POSIX 1003.1b -- munlockall unlocks all regions,
3719 		 * regardless of how many times mlockall has been called.
3720 		 */
3721 
3722 		for (entry = map->header.next; entry != &map->header;
3723 		     entry = entry->next) {
3724 			if (VM_MAPENT_ISWIRED(entry))
3725 				uvm_map_entry_unwire(map, entry);
3726 		}
3727 		map->flags &= ~VM_MAP_WIREFUTURE;
3728 		vm_map_unlock(map);
3729 		UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
3730 		return 0;
3731 	}
3732 
3733 	if (flags & MCL_FUTURE) {
3734 
3735 		/*
3736 		 * must wire all future mappings; remember this.
3737 		 */
3738 
3739 		map->flags |= VM_MAP_WIREFUTURE;
3740 	}
3741 
3742 	if ((flags & MCL_CURRENT) == 0) {
3743 
3744 		/*
3745 		 * no more work to do!
3746 		 */
3747 
3748 		UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0);
3749 		vm_map_unlock(map);
3750 		return 0;
3751 	}
3752 
3753 	/*
3754 	 * wire case: in three passes [XXXCDC: ugly block of code here]
3755 	 *
3756 	 * 1: holding the write lock, count all pages mapped by non-wired
3757 	 *    entries.  if this would cause us to go over our limit, we fail.
3758 	 *
3759 	 * 2: still holding the write lock, we create any anonymous maps that
3760 	 *    need to be created.  then we increment its wiring count.
3761 	 *
3762 	 * 3: we downgrade to a read lock, and call uvm_fault_wire to fault
3763 	 *    in the pages for any newly wired area (wired_count == 1).
3764 	 *
3765 	 *    downgrading to a read lock for uvm_fault_wire avoids a possible
3766 	 *    deadlock with another thread that may have faulted on one of
3767 	 *    the pages to be wired (it would mark the page busy, blocking
3768 	 *    us, then in turn block on the map lock that we hold).  because
3769 	 *    of problems in the recursive lock package, we cannot upgrade
3770 	 *    to a write lock in vm_map_lookup.  thus, any actions that
3771 	 *    require the write lock must be done beforehand.  because we
3772 	 *    keep the read lock on the map, the copy-on-write status of the
3773 	 *    entries we modify here cannot change.
3774 	 */
3775 
3776 	for (size = 0, entry = map->header.next; entry != &map->header;
3777 	     entry = entry->next) {
3778 		if (entry->protection != VM_PROT_NONE &&
3779 		    VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
3780 			size += entry->end - entry->start;
3781 		}
3782 	}
3783 
3784 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
3785 		vm_map_unlock(map);
3786 		return ENOMEM;
3787 	}
3788 
3789 	if (limit != 0 &&
3790 	    (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) {
3791 		vm_map_unlock(map);
3792 		return ENOMEM;
3793 	}
3794 
3795 	/*
3796 	 * Pass 2.
3797 	 */
3798 
3799 	for (entry = map->header.next; entry != &map->header;
3800 	     entry = entry->next) {
3801 		if (entry->protection == VM_PROT_NONE)
3802 			continue;
3803 		if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
3804 
3805 			/*
3806 			 * perform actions of vm_map_lookup that need the
3807 			 * write lock on the map: create an anonymous map
3808 			 * for a copy-on-write region, or an anonymous map
3809 			 * for a zero-fill region.  (XXXCDC: submap case
3810 			 * ok?)
3811 			 */
3812 
3813 			if (!UVM_ET_ISSUBMAP(entry)) {	/* not submap */
3814 				if (UVM_ET_ISNEEDSCOPY(entry) &&
3815 				    ((entry->max_protection & VM_PROT_WRITE) ||
3816 				     (entry->object.uvm_obj == NULL))) {
3817 					amap_copy(map, entry, 0, entry->start,
3818 					    entry->end);
3819 					/* XXXCDC: wait OK? */
3820 				}
3821 			}
3822 		}
3823 		entry->wired_count++;
3824 	}
3825 
3826 	/*
3827 	 * Pass 3.
3828 	 */
3829 
3830 #ifdef DIAGNOSTIC
3831 	timestamp_save = map->timestamp;
3832 #endif
3833 	vm_map_busy(map);
3834 	vm_map_unlock(map);
3835 
3836 	rv = 0;
3837 	for (entry = map->header.next; entry != &map->header;
3838 	     entry = entry->next) {
3839 		if (entry->wired_count == 1) {
3840 			rv = uvm_fault_wire(map, entry->start, entry->end,
3841 			    entry->max_protection, 1);
3842 			if (rv) {
3843 
3844 				/*
3845 				 * wiring failed.  break out of the loop.
3846 				 * we'll clean up the map below, once we
3847 				 * have a write lock again.
3848 				 */
3849 
3850 				break;
3851 			}
3852 		}
3853 	}
3854 
3855 	if (rv) {
3856 
3857 		/*
3858 		 * Get back an exclusive (write) lock.
3859 		 */
3860 
3861 		vm_map_lock(map);
3862 		vm_map_unbusy(map);
3863 
3864 #ifdef DIAGNOSTIC
3865 		if (timestamp_save + 1 != map->timestamp)
3866 			panic("uvm_map_pageable_all: stale map");
3867 #endif
3868 
3869 		/*
3870 		 * first drop the wiring count on all the entries
3871 		 * which haven't actually been wired yet.
3872 		 *
3873 		 * Skip VM_PROT_NONE entries like we did above.
3874 		 */
3875 
3876 		failed_entry = entry;
3877 		for (/* nothing */; entry != &map->header;
3878 		     entry = entry->next) {
3879 			if (entry->protection == VM_PROT_NONE)
3880 				continue;
3881 			entry->wired_count--;
3882 		}
3883 
3884 		/*
3885 		 * now, unwire all the entries that were successfully
3886 		 * wired above.
3887 		 *
3888 		 * Skip VM_PROT_NONE entries like we did above.
3889 		 */
3890 
3891 		for (entry = map->header.next; entry != failed_entry;
3892 		     entry = entry->next) {
3893 			if (entry->protection == VM_PROT_NONE)
3894 				continue;
3895 			entry->wired_count--;
3896 			if (VM_MAPENT_ISWIRED(entry))
3897 				uvm_map_entry_unwire(map, entry);
3898 		}
3899 		vm_map_unlock(map);
3900 		UVMHIST_LOG(maphist,"<- done (RV=%d)", rv,0,0,0);
3901 		return (rv);
3902 	}
3903 
3904 	vm_map_unbusy(map);
3905 
3906 	UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
3907 	return 0;
3908 }
3909 
3910 /*
3911  * uvm_map_clean: clean out a map range
3912  *
3913  * => valid flags:
3914  *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
3915  *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
3916  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
3917  *   if (flags & PGO_FREE): any cached pages are freed after clean
3918  * => returns an error if any part of the specified range isn't mapped
3919  * => never a need to flush amap layer since the anonymous memory has
3920  *	no permanent home, but may deactivate pages there
3921  * => called from sys_msync() and sys_madvise()
3922  * => caller must not write-lock map (read OK).
3923  * => we may sleep while cleaning if SYNCIO [with map read-locked]
3924  */
3925 
3926 int
3927 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
3928 {
3929 	struct vm_map_entry *current, *entry;
3930 	struct uvm_object *uobj;
3931 	struct vm_amap *amap;
3932 	struct vm_anon *anon;
3933 	struct vm_page *pg;
3934 	vaddr_t offset;
3935 	vsize_t size;
3936 	voff_t uoff;
3937 	int error, refs;
3938 	UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist);
3939 
3940 	UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,flags=0x%x)",
3941 		    map, start, end, flags);
3942 	KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
3943 		(PGO_FREE|PGO_DEACTIVATE));
3944 
3945 	vm_map_lock_read(map);
3946 	VM_MAP_RANGE_CHECK(map, start, end);
3947 	if (uvm_map_lookup_entry(map, start, &entry) == false) {
3948 		vm_map_unlock_read(map);
3949 		return EFAULT;
3950 	}
3951 
3952 	/*
3953 	 * Make a first pass to check for holes and wiring problems.
3954 	 */
3955 
3956 	for (current = entry; current->start < end; current = current->next) {
3957 		if (UVM_ET_ISSUBMAP(current)) {
3958 			vm_map_unlock_read(map);
3959 			return EINVAL;
3960 		}
3961 		if ((flags & PGO_FREE) != 0 && VM_MAPENT_ISWIRED(entry)) {
3962 			vm_map_unlock_read(map);
3963 			return EBUSY;
3964 		}
3965 		if (end <= current->end) {
3966 			break;
3967 		}
3968 		if (current->end != current->next->start) {
3969 			vm_map_unlock_read(map);
3970 			return EFAULT;
3971 		}
3972 	}
3973 
3974 	error = 0;
3975 	for (current = entry; start < end; current = current->next) {
3976 		amap = current->aref.ar_amap;	/* upper layer */
3977 		uobj = current->object.uvm_obj;	/* lower layer */
3978 		KASSERT(start >= current->start);
3979 
3980 		/*
3981 		 * No amap cleaning necessary if:
3982 		 *
3983 		 *	(1) There's no amap.
3984 		 *
3985 		 *	(2) We're not deactivating or freeing pages.
3986 		 */
3987 
3988 		if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
3989 			goto flush_object;
3990 
3991 		amap_lock(amap);
3992 		offset = start - current->start;
3993 		size = MIN(end, current->end) - start;
3994 		for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) {
3995 			anon = amap_lookup(&current->aref, offset);
3996 			if (anon == NULL)
3997 				continue;
3998 
3999 			mutex_enter(&anon->an_lock);
4000 			pg = anon->an_page;
4001 			if (pg == NULL) {
4002 				mutex_exit(&anon->an_lock);
4003 				continue;
4004 			}
4005 
4006 			switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
4007 
4008 			/*
4009 			 * In these first 3 cases, we just deactivate the page.
4010 			 */
4011 
4012 			case PGO_CLEANIT|PGO_FREE:
4013 			case PGO_CLEANIT|PGO_DEACTIVATE:
4014 			case PGO_DEACTIVATE:
4015  deactivate_it:
4016 				/*
4017 				 * skip the page if it's loaned or wired,
4018 				 * since it shouldn't be on a paging queue
4019 				 * at all in these cases.
4020 				 */
4021 
4022 				mutex_enter(&uvm_pageqlock);
4023 				if (pg->loan_count != 0 ||
4024 				    pg->wire_count != 0) {
4025 					mutex_exit(&uvm_pageqlock);
4026 					mutex_exit(&anon->an_lock);
4027 					continue;
4028 				}
4029 				KASSERT(pg->uanon == anon);
4030 				uvm_pagedeactivate(pg);
4031 				mutex_exit(&uvm_pageqlock);
4032 				mutex_exit(&anon->an_lock);
4033 				continue;
4034 
4035 			case PGO_FREE:
4036 
4037 				/*
4038 				 * If there are multiple references to
4039 				 * the amap, just deactivate the page.
4040 				 */
4041 
4042 				if (amap_refs(amap) > 1)
4043 					goto deactivate_it;
4044 
4045 				/* skip the page if it's wired */
4046 				if (pg->wire_count != 0) {
4047 					mutex_exit(&anon->an_lock);
4048 					continue;
4049 				}
4050 				amap_unadd(&current->aref, offset);
4051 				refs = --anon->an_ref;
4052 				mutex_exit(&anon->an_lock);
4053 				if (refs == 0)
4054 					uvm_anfree(anon);
4055 				continue;
4056 			}
4057 		}
4058 		amap_unlock(amap);
4059 
4060  flush_object:
4061 		/*
4062 		 * flush pages if we've got a valid backing object.
4063 		 * note that we must always clean object pages before
4064 		 * freeing them since otherwise we could reveal stale
4065 		 * data from files.
4066 		 */
4067 
4068 		uoff = current->offset + (start - current->start);
4069 		size = MIN(end, current->end) - start;
4070 		if (uobj != NULL) {
4071 			mutex_enter(&uobj->vmobjlock);
4072 			if (uobj->pgops->pgo_put != NULL)
4073 				error = (uobj->pgops->pgo_put)(uobj, uoff,
4074 				    uoff + size, flags | PGO_CLEANIT);
4075 			else
4076 				error = 0;
4077 		}
4078 		start += size;
4079 	}
4080 	vm_map_unlock_read(map);
4081 	return (error);
4082 }
4083 
4084 
4085 /*
4086  * uvm_map_checkprot: check protection in map
4087  *
4088  * => must allow specified protection in a fully allocated region.
4089  * => map must be read or write locked by caller.
4090  */
4091 
4092 bool
4093 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
4094     vm_prot_t protection)
4095 {
4096 	struct vm_map_entry *entry;
4097 	struct vm_map_entry *tmp_entry;
4098 
4099 	if (!uvm_map_lookup_entry(map, start, &tmp_entry)) {
4100 		return (false);
4101 	}
4102 	entry = tmp_entry;
4103 	while (start < end) {
4104 		if (entry == &map->header) {
4105 			return (false);
4106 		}
4107 
4108 		/*
4109 		 * no holes allowed
4110 		 */
4111 
4112 		if (start < entry->start) {
4113 			return (false);
4114 		}
4115 
4116 		/*
4117 		 * check protection associated with entry
4118 		 */
4119 
4120 		if ((entry->protection & protection) != protection) {
4121 			return (false);
4122 		}
4123 		start = entry->end;
4124 		entry = entry->next;
4125 	}
4126 	return (true);
4127 }
4128 
4129 /*
4130  * uvmspace_alloc: allocate a vmspace structure.
4131  *
4132  * - structure includes vm_map and pmap
4133  * - XXX: no locking on this structure
4134  * - refcnt set to 1, rest must be init'd by caller
4135  */
4136 struct vmspace *
4137 uvmspace_alloc(vaddr_t vmin, vaddr_t vmax)
4138 {
4139 	struct vmspace *vm;
4140 	UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist);
4141 
4142 	vm = pool_cache_get(&uvm_vmspace_cache, PR_WAITOK);
4143 	uvmspace_init(vm, NULL, vmin, vmax);
4144 	UVMHIST_LOG(maphist,"<- done (vm=0x%x)", vm,0,0,0);
4145 	return (vm);
4146 }
4147 
4148 /*
4149  * uvmspace_init: initialize a vmspace structure.
4150  *
4151  * - XXX: no locking on this structure
4152  * - refcnt set to 1, rest must be init'd by caller
4153  */
4154 void
4155 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t vmin, vaddr_t vmax)
4156 {
4157 	UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist);
4158 
4159 	memset(vm, 0, sizeof(*vm));
4160 	uvm_map_setup(&vm->vm_map, vmin, vmax, VM_MAP_PAGEABLE
4161 #ifdef __USING_TOPDOWN_VM
4162 	    | VM_MAP_TOPDOWN
4163 #endif
4164 	    );
4165 	if (pmap)
4166 		pmap_reference(pmap);
4167 	else
4168 		pmap = pmap_create();
4169 	vm->vm_map.pmap = pmap;
4170 	vm->vm_refcnt = 1;
4171 	UVMHIST_LOG(maphist,"<- done",0,0,0,0);
4172 }
4173 
4174 /*
4175  * uvmspace_share: share a vmspace between two processes
4176  *
4177  * - used for vfork, threads(?)
4178  */
4179 
4180 void
4181 uvmspace_share(struct proc *p1, struct proc *p2)
4182 {
4183 
4184 	uvmspace_addref(p1->p_vmspace);
4185 	p2->p_vmspace = p1->p_vmspace;
4186 }
4187 
4188 #if 0
4189 
4190 /*
4191  * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace
4192  *
4193  * - XXX: no locking on vmspace
4194  */
4195 
4196 void
4197 uvmspace_unshare(struct lwp *l)
4198 {
4199 	struct proc *p = l->l_proc;
4200 	struct vmspace *nvm, *ovm = p->p_vmspace;
4201 
4202 	if (ovm->vm_refcnt == 1)
4203 		/* nothing to do: vmspace isn't shared in the first place */
4204 		return;
4205 
4206 	/* make a new vmspace, still holding old one */
4207 	nvm = uvmspace_fork(ovm);
4208 
4209 	kpreempt_disable();
4210 	pmap_deactivate(l);		/* unbind old vmspace */
4211 	p->p_vmspace = nvm;
4212 	pmap_activate(l);		/* switch to new vmspace */
4213 	kpreempt_enable();
4214 
4215 	uvmspace_free(ovm);		/* drop reference to old vmspace */
4216 }
4217 
4218 #endif
4219 
4220 /*
4221  * uvmspace_exec: the process wants to exec a new program
4222  */
4223 
4224 void
4225 uvmspace_exec(struct lwp *l, vaddr_t start, vaddr_t end)
4226 {
4227 	struct proc *p = l->l_proc;
4228 	struct vmspace *nvm, *ovm = p->p_vmspace;
4229 	struct vm_map *map = &ovm->vm_map;
4230 
4231 #ifdef __sparc__
4232 	/* XXX cgd 960926: the sparc #ifdef should be a MD hook */
4233 	kill_user_windows(l);   /* before stack addresses go away */
4234 #endif
4235 #ifdef __HAVE_CPU_VMSPACE_EXEC
4236 	cpu_vmspace_exec(l, start, end);
4237 #endif
4238 
4239 	/*
4240 	 * see if more than one process is using this vmspace...
4241 	 */
4242 
4243 	if (ovm->vm_refcnt == 1) {
4244 
4245 		/*
4246 		 * if p is the only process using its vmspace then we can safely
4247 		 * recycle that vmspace for the program that is being exec'd.
4248 		 */
4249 
4250 #ifdef SYSVSHM
4251 		/*
4252 		 * SYSV SHM semantics require us to kill all segments on an exec
4253 		 */
4254 
4255 		if (ovm->vm_shm)
4256 			shmexit(ovm);
4257 #endif
4258 
4259 		/*
4260 		 * POSIX 1003.1b -- "lock future mappings" is revoked
4261 		 * when a process execs another program image.
4262 		 */
4263 
4264 		map->flags &= ~VM_MAP_WIREFUTURE;
4265 
4266 		/*
4267 		 * now unmap the old program
4268 		 */
4269 
4270 		pmap_remove_all(map->pmap);
4271 		uvm_unmap(map, vm_map_min(map), vm_map_max(map));
4272 		KASSERT(map->header.prev == &map->header);
4273 		KASSERT(map->nentries == 0);
4274 
4275 		/*
4276 		 * resize the map
4277 		 */
4278 
4279 		vm_map_setmin(map, start);
4280 		vm_map_setmax(map, end);
4281 	} else {
4282 
4283 		/*
4284 		 * p's vmspace is being shared, so we can't reuse it for p since
4285 		 * it is still being used for others.   allocate a new vmspace
4286 		 * for p
4287 		 */
4288 
4289 		nvm = uvmspace_alloc(start, end);
4290 
4291 		/*
4292 		 * install new vmspace and drop our ref to the old one.
4293 		 */
4294 
4295 		kpreempt_disable();
4296 		pmap_deactivate(l);
4297 		p->p_vmspace = nvm;
4298 		pmap_activate(l);
4299 		kpreempt_enable();
4300 
4301 		uvmspace_free(ovm);
4302 	}
4303 }
4304 
4305 /*
4306  * uvmspace_addref: add a referece to a vmspace.
4307  */
4308 
4309 void
4310 uvmspace_addref(struct vmspace *vm)
4311 {
4312 	struct vm_map *map = &vm->vm_map;
4313 
4314 	KASSERT((map->flags & VM_MAP_DYING) == 0);
4315 
4316 	mutex_enter(&map->misc_lock);
4317 	KASSERT(vm->vm_refcnt > 0);
4318 	vm->vm_refcnt++;
4319 	mutex_exit(&map->misc_lock);
4320 }
4321 
4322 /*
4323  * uvmspace_free: free a vmspace data structure
4324  */
4325 
4326 void
4327 uvmspace_free(struct vmspace *vm)
4328 {
4329 	struct vm_map_entry *dead_entries;
4330 	struct vm_map *map = &vm->vm_map;
4331 	int n;
4332 
4333 	UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist);
4334 
4335 	UVMHIST_LOG(maphist,"(vm=0x%x) ref=%d", vm, vm->vm_refcnt,0,0);
4336 	mutex_enter(&map->misc_lock);
4337 	n = --vm->vm_refcnt;
4338 	mutex_exit(&map->misc_lock);
4339 	if (n > 0)
4340 		return;
4341 
4342 	/*
4343 	 * at this point, there should be no other references to the map.
4344 	 * delete all of the mappings, then destroy the pmap.
4345 	 */
4346 
4347 	map->flags |= VM_MAP_DYING;
4348 	pmap_remove_all(map->pmap);
4349 #ifdef SYSVSHM
4350 	/* Get rid of any SYSV shared memory segments. */
4351 	if (vm->vm_shm != NULL)
4352 		shmexit(vm);
4353 #endif
4354 	if (map->nentries) {
4355 		uvm_unmap_remove(map, vm_map_min(map), vm_map_max(map),
4356 		    &dead_entries, NULL, 0);
4357 		if (dead_entries != NULL)
4358 			uvm_unmap_detach(dead_entries, 0);
4359 	}
4360 	KASSERT(map->nentries == 0);
4361 	KASSERT(map->size == 0);
4362 	mutex_destroy(&map->misc_lock);
4363 	mutex_destroy(&map->mutex);
4364 	rw_destroy(&map->lock);
4365 	cv_destroy(&map->cv);
4366 	pmap_destroy(map->pmap);
4367 	pool_cache_put(&uvm_vmspace_cache, vm);
4368 }
4369 
4370 /*
4371  *   F O R K   -   m a i n   e n t r y   p o i n t
4372  */
4373 /*
4374  * uvmspace_fork: fork a process' main map
4375  *
4376  * => create a new vmspace for child process from parent.
4377  * => parent's map must not be locked.
4378  */
4379 
4380 struct vmspace *
4381 uvmspace_fork(struct vmspace *vm1)
4382 {
4383 	struct vmspace *vm2;
4384 	struct vm_map *old_map = &vm1->vm_map;
4385 	struct vm_map *new_map;
4386 	struct vm_map_entry *old_entry;
4387 	struct vm_map_entry *new_entry;
4388 	UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist);
4389 
4390 	vm_map_lock(old_map);
4391 
4392 	vm2 = uvmspace_alloc(vm_map_min(old_map), vm_map_max(old_map));
4393 	memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
4394 	    (char *) (vm1 + 1) - (char *) &vm1->vm_startcopy);
4395 	new_map = &vm2->vm_map;		  /* XXX */
4396 
4397 	old_entry = old_map->header.next;
4398 	new_map->size = old_map->size;
4399 
4400 	/*
4401 	 * go entry-by-entry
4402 	 */
4403 
4404 	while (old_entry != &old_map->header) {
4405 
4406 		/*
4407 		 * first, some sanity checks on the old entry
4408 		 */
4409 
4410 		KASSERT(!UVM_ET_ISSUBMAP(old_entry));
4411 		KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) ||
4412 			!UVM_ET_ISNEEDSCOPY(old_entry));
4413 
4414 		switch (old_entry->inheritance) {
4415 		case MAP_INHERIT_NONE:
4416 
4417 			/*
4418 			 * drop the mapping, modify size
4419 			 */
4420 			new_map->size -= old_entry->end - old_entry->start;
4421 			break;
4422 
4423 		case MAP_INHERIT_SHARE:
4424 
4425 			/*
4426 			 * share the mapping: this means we want the old and
4427 			 * new entries to share amaps and backing objects.
4428 			 */
4429 			/*
4430 			 * if the old_entry needs a new amap (due to prev fork)
4431 			 * then we need to allocate it now so that we have
4432 			 * something we own to share with the new_entry.   [in
4433 			 * other words, we need to clear needs_copy]
4434 			 */
4435 
4436 			if (UVM_ET_ISNEEDSCOPY(old_entry)) {
4437 				/* get our own amap, clears needs_copy */
4438 				amap_copy(old_map, old_entry, AMAP_COPY_NOCHUNK,
4439 				    0, 0);
4440 				/* XXXCDC: WAITOK??? */
4441 			}
4442 
4443 			new_entry = uvm_mapent_alloc(new_map, 0);
4444 			/* old_entry -> new_entry */
4445 			uvm_mapent_copy(old_entry, new_entry);
4446 
4447 			/* new pmap has nothing wired in it */
4448 			new_entry->wired_count = 0;
4449 
4450 			/*
4451 			 * gain reference to object backing the map (can't
4452 			 * be a submap, already checked this case).
4453 			 */
4454 
4455 			if (new_entry->aref.ar_amap)
4456 				uvm_map_reference_amap(new_entry, AMAP_SHARED);
4457 
4458 			if (new_entry->object.uvm_obj &&
4459 			    new_entry->object.uvm_obj->pgops->pgo_reference)
4460 				new_entry->object.uvm_obj->
4461 				    pgops->pgo_reference(
4462 				        new_entry->object.uvm_obj);
4463 
4464 			/* insert entry at end of new_map's entry list */
4465 			uvm_map_entry_link(new_map, new_map->header.prev,
4466 			    new_entry);
4467 
4468 			break;
4469 
4470 		case MAP_INHERIT_COPY:
4471 
4472 			/*
4473 			 * copy-on-write the mapping (using mmap's
4474 			 * MAP_PRIVATE semantics)
4475 			 *
4476 			 * allocate new_entry, adjust reference counts.
4477 			 * (note that new references are read-only).
4478 			 */
4479 
4480 			new_entry = uvm_mapent_alloc(new_map, 0);
4481 			/* old_entry -> new_entry */
4482 			uvm_mapent_copy(old_entry, new_entry);
4483 
4484 			if (new_entry->aref.ar_amap)
4485 				uvm_map_reference_amap(new_entry, 0);
4486 
4487 			if (new_entry->object.uvm_obj &&
4488 			    new_entry->object.uvm_obj->pgops->pgo_reference)
4489 				new_entry->object.uvm_obj->pgops->pgo_reference
4490 				    (new_entry->object.uvm_obj);
4491 
4492 			/* new pmap has nothing wired in it */
4493 			new_entry->wired_count = 0;
4494 
4495 			new_entry->etype |=
4496 			    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
4497 			uvm_map_entry_link(new_map, new_map->header.prev,
4498 			    new_entry);
4499 
4500 			/*
4501 			 * the new entry will need an amap.  it will either
4502 			 * need to be copied from the old entry or created
4503 			 * from scratch (if the old entry does not have an
4504 			 * amap).  can we defer this process until later
4505 			 * (by setting "needs_copy") or do we need to copy
4506 			 * the amap now?
4507 			 *
4508 			 * we must copy the amap now if any of the following
4509 			 * conditions hold:
4510 			 * 1. the old entry has an amap and that amap is
4511 			 *    being shared.  this means that the old (parent)
4512 			 *    process is sharing the amap with another
4513 			 *    process.  if we do not clear needs_copy here
4514 			 *    we will end up in a situation where both the
4515 			 *    parent and child process are refering to the
4516 			 *    same amap with "needs_copy" set.  if the
4517 			 *    parent write-faults, the fault routine will
4518 			 *    clear "needs_copy" in the parent by allocating
4519 			 *    a new amap.   this is wrong because the
4520 			 *    parent is supposed to be sharing the old amap
4521 			 *    and the new amap will break that.
4522 			 *
4523 			 * 2. if the old entry has an amap and a non-zero
4524 			 *    wire count then we are going to have to call
4525 			 *    amap_cow_now to avoid page faults in the
4526 			 *    parent process.   since amap_cow_now requires
4527 			 *    "needs_copy" to be clear we might as well
4528 			 *    clear it here as well.
4529 			 *
4530 			 */
4531 
4532 			if (old_entry->aref.ar_amap != NULL) {
4533 				if ((amap_flags(old_entry->aref.ar_amap) &
4534 				     AMAP_SHARED) != 0 ||
4535 				    VM_MAPENT_ISWIRED(old_entry)) {
4536 
4537 					amap_copy(new_map, new_entry,
4538 					    AMAP_COPY_NOCHUNK, 0, 0);
4539 					/* XXXCDC: M_WAITOK ... ok? */
4540 				}
4541 			}
4542 
4543 			/*
4544 			 * if the parent's entry is wired down, then the
4545 			 * parent process does not want page faults on
4546 			 * access to that memory.  this means that we
4547 			 * cannot do copy-on-write because we can't write
4548 			 * protect the old entry.   in this case we
4549 			 * resolve all copy-on-write faults now, using
4550 			 * amap_cow_now.   note that we have already
4551 			 * allocated any needed amap (above).
4552 			 */
4553 
4554 			if (VM_MAPENT_ISWIRED(old_entry)) {
4555 
4556 			  /*
4557 			   * resolve all copy-on-write faults now
4558 			   * (note that there is nothing to do if
4559 			   * the old mapping does not have an amap).
4560 			   */
4561 			  if (old_entry->aref.ar_amap)
4562 			    amap_cow_now(new_map, new_entry);
4563 
4564 			} else {
4565 
4566 			  /*
4567 			   * setup mappings to trigger copy-on-write faults
4568 			   * we must write-protect the parent if it has
4569 			   * an amap and it is not already "needs_copy"...
4570 			   * if it is already "needs_copy" then the parent
4571 			   * has already been write-protected by a previous
4572 			   * fork operation.
4573 			   */
4574 
4575 			  if (old_entry->aref.ar_amap &&
4576 			      !UVM_ET_ISNEEDSCOPY(old_entry)) {
4577 			      if (old_entry->max_protection & VM_PROT_WRITE) {
4578 				pmap_protect(old_map->pmap,
4579 					     old_entry->start,
4580 					     old_entry->end,
4581 					     old_entry->protection &
4582 					     ~VM_PROT_WRITE);
4583 			      }
4584 			      old_entry->etype |= UVM_ET_NEEDSCOPY;
4585 			  }
4586 			}
4587 			break;
4588 		}  /* end of switch statement */
4589 		old_entry = old_entry->next;
4590 	}
4591 
4592 	pmap_update(old_map->pmap);
4593 	vm_map_unlock(old_map);
4594 
4595 #ifdef SYSVSHM
4596 	if (vm1->vm_shm)
4597 		shmfork(vm1, vm2);
4598 #endif
4599 
4600 #ifdef PMAP_FORK
4601 	pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap);
4602 #endif
4603 
4604 	UVMHIST_LOG(maphist,"<- done",0,0,0,0);
4605 	return (vm2);
4606 }
4607 
4608 
4609 /*
4610  * in-kernel map entry allocation.
4611  */
4612 
4613 struct uvm_kmapent_hdr {
4614 	LIST_ENTRY(uvm_kmapent_hdr) ukh_listq;
4615 	int ukh_nused;
4616 	struct vm_map_entry *ukh_freelist;
4617 	struct vm_map *ukh_map;
4618 	struct vm_map_entry ukh_entries[0];
4619 };
4620 
4621 #define	UVM_KMAPENT_CHUNK				\
4622 	((PAGE_SIZE - sizeof(struct uvm_kmapent_hdr))	\
4623 	/ sizeof(struct vm_map_entry))
4624 
4625 #define	UVM_KHDR_FIND(entry)	\
4626 	((struct uvm_kmapent_hdr *)(((vaddr_t)entry) & ~PAGE_MASK))
4627 
4628 
4629 #ifdef DIAGNOSTIC
4630 static struct vm_map *
4631 uvm_kmapent_map(struct vm_map_entry *entry)
4632 {
4633 	const struct uvm_kmapent_hdr *ukh;
4634 
4635 	ukh = UVM_KHDR_FIND(entry);
4636 	return ukh->ukh_map;
4637 }
4638 #endif
4639 
4640 static inline struct vm_map_entry *
4641 uvm_kmapent_get(struct uvm_kmapent_hdr *ukh)
4642 {
4643 	struct vm_map_entry *entry;
4644 
4645 	KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK);
4646 	KASSERT(ukh->ukh_nused >= 0);
4647 
4648 	entry = ukh->ukh_freelist;
4649 	if (entry) {
4650 		KASSERT((entry->flags & (UVM_MAP_KERNEL | UVM_MAP_KMAPENT))
4651 		    == UVM_MAP_KERNEL);
4652 		ukh->ukh_freelist = entry->next;
4653 		ukh->ukh_nused++;
4654 		KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK);
4655 	} else {
4656 		KASSERT(ukh->ukh_nused == UVM_KMAPENT_CHUNK);
4657 	}
4658 
4659 	return entry;
4660 }
4661 
4662 static inline void
4663 uvm_kmapent_put(struct uvm_kmapent_hdr *ukh, struct vm_map_entry *entry)
4664 {
4665 
4666 	KASSERT((entry->flags & (UVM_MAP_KERNEL | UVM_MAP_KMAPENT))
4667 	    == UVM_MAP_KERNEL);
4668 	KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK);
4669 	KASSERT(ukh->ukh_nused > 0);
4670 	KASSERT(ukh->ukh_freelist != NULL ||
4671 	    ukh->ukh_nused == UVM_KMAPENT_CHUNK);
4672 	KASSERT(ukh->ukh_freelist == NULL ||
4673 	    ukh->ukh_nused < UVM_KMAPENT_CHUNK);
4674 
4675 	ukh->ukh_nused--;
4676 	entry->next = ukh->ukh_freelist;
4677 	ukh->ukh_freelist = entry;
4678 }
4679 
4680 /*
4681  * uvm_kmapent_alloc: allocate a map entry for in-kernel map
4682  */
4683 
4684 static struct vm_map_entry *
4685 uvm_kmapent_alloc(struct vm_map *map, int flags)
4686 {
4687 	struct vm_page *pg;
4688 	struct uvm_kmapent_hdr *ukh;
4689 	struct vm_map_entry *entry;
4690 #ifndef PMAP_MAP_POOLPAGE
4691 	struct uvm_map_args args;
4692 	uvm_flag_t mapflags = UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL,
4693 	    UVM_INH_NONE, UVM_ADV_RANDOM, flags | UVM_FLAG_NOMERGE);
4694 	int error;
4695 #endif
4696 	vaddr_t va;
4697 	int i;
4698 
4699 	KDASSERT(UVM_KMAPENT_CHUNK > 2);
4700 	KDASSERT(kernel_map != NULL);
4701 	KASSERT(vm_map_pmap(map) == pmap_kernel());
4702 
4703 	UVMMAP_EVCNT_INCR(uke_alloc);
4704 	entry = NULL;
4705 again:
4706 	/*
4707 	 * try to grab an entry from freelist.
4708 	 */
4709 	mutex_spin_enter(&uvm_kentry_lock);
4710 	ukh = LIST_FIRST(&vm_map_to_kernel(map)->vmk_kentry_free);
4711 	if (ukh) {
4712 		entry = uvm_kmapent_get(ukh);
4713 		if (ukh->ukh_nused == UVM_KMAPENT_CHUNK)
4714 			LIST_REMOVE(ukh, ukh_listq);
4715 	}
4716 	mutex_spin_exit(&uvm_kentry_lock);
4717 
4718 	if (entry)
4719 		return entry;
4720 
4721 	/*
4722 	 * there's no free entry for this vm_map.
4723 	 * now we need to allocate some vm_map_entry.
4724 	 * for simplicity, always allocate one page chunk of them at once.
4725 	 */
4726 
4727 	pg = uvm_pagealloc(NULL, 0, NULL,
4728 	    (flags & UVM_KMF_NOWAIT) != 0 ? UVM_PGA_USERESERVE : 0);
4729 	if (__predict_false(pg == NULL)) {
4730 		if (flags & UVM_FLAG_NOWAIT)
4731 			return NULL;
4732 		uvm_wait("kme_alloc");
4733 		goto again;
4734 	}
4735 
4736 #ifdef PMAP_MAP_POOLPAGE
4737 	va = PMAP_MAP_POOLPAGE(VM_PAGE_TO_PHYS(pg));
4738 	KASSERT(va != 0);
4739 #else
4740 	error = uvm_map_prepare(map, 0, PAGE_SIZE, NULL, UVM_UNKNOWN_OFFSET,
4741 	    0, mapflags, &args);
4742 	if (error) {
4743 		uvm_pagefree(pg);
4744 		return NULL;
4745 	}
4746 
4747 	va = args.uma_start;
4748 
4749 	pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg),
4750 	    VM_PROT_READ|VM_PROT_WRITE, PMAP_KMPAGE);
4751 	pmap_update(vm_map_pmap(map));
4752 
4753 #endif
4754 	ukh = (void *)va;
4755 
4756 	/*
4757 	 * use the last entry for ukh itsself.
4758 	 */
4759 
4760 	i = UVM_KMAPENT_CHUNK - 1;
4761 #ifndef PMAP_MAP_POOLPAGE
4762 	entry = &ukh->ukh_entries[i--];
4763 	entry->flags = UVM_MAP_KERNEL | UVM_MAP_KMAPENT;
4764 	error = uvm_map_enter(map, &args, entry);
4765 	KASSERT(error == 0);
4766 #endif
4767 
4768 	ukh->ukh_nused = UVM_KMAPENT_CHUNK;
4769 	ukh->ukh_map = map;
4770 	ukh->ukh_freelist = NULL;
4771 	for (; i >= 1; i--) {
4772 		struct vm_map_entry *xentry = &ukh->ukh_entries[i];
4773 
4774 		xentry->flags = UVM_MAP_KERNEL;
4775 		uvm_kmapent_put(ukh, xentry);
4776 	}
4777 #ifdef PMAP_MAP_POOLPAGE
4778 	KASSERT(ukh->ukh_nused == 1);
4779 #else
4780 	KASSERT(ukh->ukh_nused == 2);
4781 #endif
4782 
4783 	mutex_spin_enter(&uvm_kentry_lock);
4784 	LIST_INSERT_HEAD(&vm_map_to_kernel(map)->vmk_kentry_free,
4785 	    ukh, ukh_listq);
4786 	mutex_spin_exit(&uvm_kentry_lock);
4787 
4788 	/*
4789 	 * return first entry.
4790 	 */
4791 
4792 	entry = &ukh->ukh_entries[0];
4793 	entry->flags = UVM_MAP_KERNEL;
4794 	UVMMAP_EVCNT_INCR(ukh_alloc);
4795 
4796 	return entry;
4797 }
4798 
4799 /*
4800  * uvm_mapent_free: free map entry for in-kernel map
4801  */
4802 
4803 static void
4804 uvm_kmapent_free(struct vm_map_entry *entry)
4805 {
4806 	struct uvm_kmapent_hdr *ukh;
4807 	struct vm_page *pg;
4808 	struct vm_map *map;
4809 #ifndef PMAP_UNMAP_POOLPAGE
4810 	struct pmap *pmap;
4811 	struct vm_map_entry *deadentry;
4812 #endif
4813 	vaddr_t va;
4814 	paddr_t pa;
4815 
4816 	UVMMAP_EVCNT_INCR(uke_free);
4817 	ukh = UVM_KHDR_FIND(entry);
4818 	map = ukh->ukh_map;
4819 
4820 	mutex_spin_enter(&uvm_kentry_lock);
4821 	uvm_kmapent_put(ukh, entry);
4822 #ifdef PMAP_UNMAP_POOLPAGE
4823 	if (ukh->ukh_nused > 0) {
4824 #else
4825 	if (ukh->ukh_nused > 1) {
4826 #endif
4827 		if (ukh->ukh_nused == UVM_KMAPENT_CHUNK - 1)
4828 			LIST_INSERT_HEAD(
4829 			    &vm_map_to_kernel(map)->vmk_kentry_free,
4830 			    ukh, ukh_listq);
4831 		mutex_spin_exit(&uvm_kentry_lock);
4832 		return;
4833 	}
4834 
4835 	/*
4836 	 * now we can free this ukh.
4837 	 *
4838 	 * however, keep an empty ukh to avoid ping-pong.
4839 	 */
4840 
4841 	if (LIST_FIRST(&vm_map_to_kernel(map)->vmk_kentry_free) == ukh &&
4842 	    LIST_NEXT(ukh, ukh_listq) == NULL) {
4843 		mutex_spin_exit(&uvm_kentry_lock);
4844 		return;
4845 	}
4846 	LIST_REMOVE(ukh, ukh_listq);
4847 	mutex_spin_exit(&uvm_kentry_lock);
4848 
4849 	va = (vaddr_t)ukh;
4850 
4851 #ifdef PMAP_UNMAP_POOLPAGE
4852 	KASSERT(ukh->ukh_nused == 0);
4853 	pa = PMAP_UNMAP_POOLPAGE(va);
4854 	KASSERT(pa != 0);
4855 #else
4856 	KASSERT(ukh->ukh_nused == 1);
4857 
4858 	/*
4859 	 * remove map entry for ukh itsself.
4860 	 */
4861 
4862 	KASSERT((va & PAGE_MASK) == 0);
4863 	vm_map_lock(map);
4864 	uvm_unmap_remove(map, va, va + PAGE_SIZE, &deadentry, NULL, 0);
4865 	KASSERT(deadentry->flags & UVM_MAP_KERNEL);
4866 	KASSERT(deadentry->flags & UVM_MAP_KMAPENT);
4867 	KASSERT(deadentry->next == NULL);
4868 	KASSERT(deadentry == &ukh->ukh_entries[UVM_KMAPENT_CHUNK - 1]);
4869 
4870 	/*
4871 	 * unmap the page from pmap and free it.
4872 	 */
4873 
4874 	pmap = vm_map_pmap(map);
4875 	KASSERT(pmap == pmap_kernel());
4876 	if (!pmap_extract(pmap, va, &pa))
4877 		panic("%s: no mapping", __func__);
4878 	pmap_kremove(va, PAGE_SIZE);
4879 	pmap_update(vm_map_pmap(map));
4880 	vm_map_unlock(map);
4881 #endif /* !PMAP_UNMAP_POOLPAGE */
4882 	pg = PHYS_TO_VM_PAGE(pa);
4883 	uvm_pagefree(pg);
4884 	UVMMAP_EVCNT_INCR(ukh_free);
4885 }
4886 
4887 static vsize_t
4888 uvm_kmapent_overhead(vsize_t size)
4889 {
4890 
4891 	/*
4892 	 * - the max number of unmerged entries is howmany(size, PAGE_SIZE)
4893 	 *   as the min allocation unit is PAGE_SIZE.
4894 	 * - UVM_KMAPENT_CHUNK "kmapent"s are allocated from a page.
4895 	 *   one of them are used to map the page itself.
4896 	 */
4897 
4898 	return howmany(howmany(size, PAGE_SIZE), (UVM_KMAPENT_CHUNK - 1)) *
4899 	    PAGE_SIZE;
4900 }
4901 
4902 /*
4903  * map entry reservation
4904  */
4905 
4906 /*
4907  * uvm_mapent_reserve: reserve map entries for clipping before locking map.
4908  *
4909  * => needed when unmapping entries allocated without UVM_FLAG_QUANTUM.
4910  * => caller shouldn't hold map locked.
4911  */
4912 int
4913 uvm_mapent_reserve(struct vm_map *map, struct uvm_mapent_reservation *umr,
4914     int nentries, int flags)
4915 {
4916 
4917 	umr->umr_nentries = 0;
4918 
4919 	if ((flags & UVM_FLAG_QUANTUM) != 0)
4920 		return 0;
4921 
4922 	if (!VM_MAP_USE_KMAPENT(map))
4923 		return 0;
4924 
4925 	while (nentries--) {
4926 		struct vm_map_entry *ent;
4927 		ent = uvm_kmapent_alloc(map, flags);
4928 		if (!ent) {
4929 			uvm_mapent_unreserve(map, umr);
4930 			return ENOMEM;
4931 		}
4932 		UMR_PUTENTRY(umr, ent);
4933 	}
4934 
4935 	return 0;
4936 }
4937 
4938 /*
4939  * uvm_mapent_unreserve:
4940  *
4941  * => caller shouldn't hold map locked.
4942  * => never fail or sleep.
4943  */
4944 void
4945 uvm_mapent_unreserve(struct vm_map *map, struct uvm_mapent_reservation *umr)
4946 {
4947 
4948 	while (!UMR_EMPTY(umr))
4949 		uvm_kmapent_free(UMR_GETENTRY(umr));
4950 }
4951 
4952 /*
4953  * uvm_mapent_trymerge: try to merge an entry with its neighbors.
4954  *
4955  * => called with map locked.
4956  * => return non zero if successfully merged.
4957  */
4958 
4959 int
4960 uvm_mapent_trymerge(struct vm_map *map, struct vm_map_entry *entry, int flags)
4961 {
4962 	struct uvm_object *uobj;
4963 	struct vm_map_entry *next;
4964 	struct vm_map_entry *prev;
4965 	vsize_t size;
4966 	int merged = 0;
4967 	bool copying;
4968 	int newetype;
4969 
4970 	if (VM_MAP_USE_KMAPENT(map)) {
4971 		return 0;
4972 	}
4973 	if (entry->aref.ar_amap != NULL) {
4974 		return 0;
4975 	}
4976 	if ((entry->flags & UVM_MAP_NOMERGE) != 0) {
4977 		return 0;
4978 	}
4979 
4980 	uobj = entry->object.uvm_obj;
4981 	size = entry->end - entry->start;
4982 	copying = (flags & UVM_MERGE_COPYING) != 0;
4983 	newetype = copying ? (entry->etype & ~UVM_ET_NEEDSCOPY) : entry->etype;
4984 
4985 	next = entry->next;
4986 	if (next != &map->header &&
4987 	    next->start == entry->end &&
4988 	    ((copying && next->aref.ar_amap != NULL &&
4989 	    amap_refs(next->aref.ar_amap) == 1) ||
4990 	    (!copying && next->aref.ar_amap == NULL)) &&
4991 	    UVM_ET_ISCOMPATIBLE(next, newetype,
4992 	    uobj, entry->flags, entry->protection,
4993 	    entry->max_protection, entry->inheritance, entry->advice,
4994 	    entry->wired_count) &&
4995 	    (uobj == NULL || entry->offset + size == next->offset)) {
4996 		int error;
4997 
4998 		if (copying) {
4999 			error = amap_extend(next, size,
5000 			    AMAP_EXTEND_NOWAIT|AMAP_EXTEND_BACKWARDS);
5001 		} else {
5002 			error = 0;
5003 		}
5004 		if (error == 0) {
5005 			if (uobj) {
5006 				if (uobj->pgops->pgo_detach) {
5007 					uobj->pgops->pgo_detach(uobj);
5008 				}
5009 			}
5010 
5011 			entry->end = next->end;
5012 			clear_hints(map, next);
5013 			uvm_map_entry_unlink(map, next);
5014 			if (copying) {
5015 				entry->aref = next->aref;
5016 				entry->etype &= ~UVM_ET_NEEDSCOPY;
5017 			}
5018 			uvm_map_check(map, "trymerge forwardmerge");
5019 			uvm_mapent_free_merged(map, next);
5020 			merged++;
5021 		}
5022 	}
5023 
5024 	prev = entry->prev;
5025 	if (prev != &map->header &&
5026 	    prev->end == entry->start &&
5027 	    ((copying && !merged && prev->aref.ar_amap != NULL &&
5028 	    amap_refs(prev->aref.ar_amap) == 1) ||
5029 	    (!copying && prev->aref.ar_amap == NULL)) &&
5030 	    UVM_ET_ISCOMPATIBLE(prev, newetype,
5031 	    uobj, entry->flags, entry->protection,
5032 	    entry->max_protection, entry->inheritance, entry->advice,
5033 	    entry->wired_count) &&
5034 	    (uobj == NULL ||
5035 	    prev->offset + prev->end - prev->start == entry->offset)) {
5036 		int error;
5037 
5038 		if (copying) {
5039 			error = amap_extend(prev, size,
5040 			    AMAP_EXTEND_NOWAIT|AMAP_EXTEND_FORWARDS);
5041 		} else {
5042 			error = 0;
5043 		}
5044 		if (error == 0) {
5045 			if (uobj) {
5046 				if (uobj->pgops->pgo_detach) {
5047 					uobj->pgops->pgo_detach(uobj);
5048 				}
5049 				entry->offset = prev->offset;
5050 			}
5051 
5052 			entry->start = prev->start;
5053 			clear_hints(map, prev);
5054 			uvm_map_entry_unlink(map, prev);
5055 			if (copying) {
5056 				entry->aref = prev->aref;
5057 				entry->etype &= ~UVM_ET_NEEDSCOPY;
5058 			}
5059 			uvm_map_check(map, "trymerge backmerge");
5060 			uvm_mapent_free_merged(map, prev);
5061 			merged++;
5062 		}
5063 	}
5064 
5065 	return merged;
5066 }
5067 
5068 /*
5069  * uvm_map_create: create map
5070  */
5071 
5072 struct vm_map *
5073 uvm_map_create(pmap_t pmap, vaddr_t vmin, vaddr_t vmax, int flags)
5074 {
5075 	struct vm_map *result;
5076 
5077 	result = malloc(sizeof(struct vm_map), M_VMMAP, M_WAITOK);
5078 	uvm_map_setup(result, vmin, vmax, flags);
5079 	result->pmap = pmap;
5080 	return(result);
5081 }
5082 
5083 /*
5084  * uvm_map_setup: init map
5085  *
5086  * => map must not be in service yet.
5087  */
5088 
5089 void
5090 uvm_map_setup(struct vm_map *map, vaddr_t vmin, vaddr_t vmax, int flags)
5091 {
5092 	int ipl;
5093 
5094 	rb_tree_init(&map->rb_tree, &uvm_map_tree_ops);
5095 	map->header.next = map->header.prev = &map->header;
5096 	map->nentries = 0;
5097 	map->size = 0;
5098 	map->ref_count = 1;
5099 	vm_map_setmin(map, vmin);
5100 	vm_map_setmax(map, vmax);
5101 	map->flags = flags;
5102 	map->first_free = &map->header;
5103 	map->hint = &map->header;
5104 	map->timestamp = 0;
5105 	map->busy = NULL;
5106 
5107 	if ((flags & VM_MAP_INTRSAFE) != 0) {
5108 		ipl = IPL_VM;
5109 	} else {
5110 		ipl = IPL_NONE;
5111 	}
5112 
5113 	rw_init(&map->lock);
5114 	cv_init(&map->cv, "vm_map");
5115 	mutex_init(&map->misc_lock, MUTEX_DRIVER, ipl);
5116 	mutex_init(&map->mutex, MUTEX_DRIVER, ipl);
5117 }
5118 
5119 
5120 /*
5121  *   U N M A P   -   m a i n   e n t r y   p o i n t
5122  */
5123 
5124 /*
5125  * uvm_unmap1: remove mappings from a vm_map (from "start" up to "stop")
5126  *
5127  * => caller must check alignment and size
5128  * => map must be unlocked (we will lock it)
5129  * => flags is UVM_FLAG_QUANTUM or 0.
5130  */
5131 
5132 void
5133 uvm_unmap1(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
5134 {
5135 	struct vm_map_entry *dead_entries;
5136 	struct uvm_mapent_reservation umr;
5137 	UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist);
5138 
5139 	UVMHIST_LOG(maphist, "  (map=0x%x, start=0x%x, end=0x%x)",
5140 	    map, start, end, 0);
5141 	if (map == kernel_map) {
5142 		LOCKDEBUG_MEM_CHECK((void *)start, end - start);
5143 	}
5144 	/*
5145 	 * work now done by helper functions.   wipe the pmap's and then
5146 	 * detach from the dead entries...
5147 	 */
5148 	uvm_mapent_reserve(map, &umr, 2, flags);
5149 	vm_map_lock(map);
5150 	uvm_unmap_remove(map, start, end, &dead_entries, &umr, flags);
5151 	vm_map_unlock(map);
5152 	uvm_mapent_unreserve(map, &umr);
5153 
5154 	if (dead_entries != NULL)
5155 		uvm_unmap_detach(dead_entries, 0);
5156 
5157 	UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
5158 }
5159 
5160 
5161 /*
5162  * uvm_map_reference: add reference to a map
5163  *
5164  * => map need not be locked (we use misc_lock).
5165  */
5166 
5167 void
5168 uvm_map_reference(struct vm_map *map)
5169 {
5170 	mutex_enter(&map->misc_lock);
5171 	map->ref_count++;
5172 	mutex_exit(&map->misc_lock);
5173 }
5174 
5175 struct vm_map_kernel *
5176 vm_map_to_kernel(struct vm_map *map)
5177 {
5178 
5179 	KASSERT(VM_MAP_IS_KERNEL(map));
5180 
5181 	return (struct vm_map_kernel *)map;
5182 }
5183 
5184 bool
5185 vm_map_starved_p(struct vm_map *map)
5186 {
5187 
5188 	if ((map->flags & VM_MAP_WANTVA) != 0) {
5189 		return true;
5190 	}
5191 	/* XXX */
5192 	if ((vm_map_max(map) - vm_map_min(map)) / 16 * 15 < map->size) {
5193 		return true;
5194 	}
5195 	return false;
5196 }
5197 
5198 #if defined(DDB) || defined(DEBUGPRINT)
5199 
5200 /*
5201  * uvm_map_printit: actually prints the map
5202  */
5203 
5204 void
5205 uvm_map_printit(struct vm_map *map, bool full,
5206     void (*pr)(const char *, ...))
5207 {
5208 	struct vm_map_entry *entry;
5209 
5210 	(*pr)("MAP %p: [0x%lx->0x%lx]\n", map, vm_map_min(map),
5211 	    vm_map_max(map));
5212 	(*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=0x%x\n",
5213 	    map->nentries, map->size, map->ref_count, map->timestamp,
5214 	    map->flags);
5215 	(*pr)("\tpmap=%p(resident=%ld, wired=%ld)\n", map->pmap,
5216 	    pmap_resident_count(map->pmap), pmap_wired_count(map->pmap));
5217 	if (!full)
5218 		return;
5219 	for (entry = map->header.next; entry != &map->header;
5220 	    entry = entry->next) {
5221 		(*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
5222 		    entry, entry->start, entry->end, entry->object.uvm_obj,
5223 		    (long long)entry->offset, entry->aref.ar_amap,
5224 		    entry->aref.ar_pageoff);
5225 		(*pr)(
5226 		    "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, "
5227 		    "wc=%d, adv=%d\n",
5228 		    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
5229 		    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
5230 		    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
5231 		    entry->protection, entry->max_protection,
5232 		    entry->inheritance, entry->wired_count, entry->advice);
5233 	}
5234 }
5235 
5236 void
5237 uvm_whatis(uintptr_t addr, void (*pr)(const char *, ...))
5238 {
5239 	struct vm_map *map;
5240 
5241 	for (map = kernel_map;;) {
5242 		struct vm_map_entry *entry;
5243 
5244 		if (!uvm_map_lookup_entry_bytree(map, (vaddr_t)addr, &entry)) {
5245 			break;
5246 		}
5247 		(*pr)("%p is %p+%zu from VMMAP %p\n",
5248 		    (void *)addr, (void *)entry->start,
5249 		    (size_t)(addr - (uintptr_t)entry->start), map);
5250 		if (!UVM_ET_ISSUBMAP(entry)) {
5251 			break;
5252 		}
5253 		map = entry->object.sub_map;
5254 	}
5255 }
5256 
5257 #endif /* DDB || DEBUGPRINT */
5258 
5259 #ifndef __USER_VA0_IS_SAFE
5260 static int
5261 sysctl_user_va0_disable(SYSCTLFN_ARGS)
5262 {
5263 	struct sysctlnode node;
5264 	int t, error;
5265 
5266 	node = *rnode;
5267 	node.sysctl_data = &t;
5268 	t = user_va0_disable;
5269 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
5270 	if (error || newp == NULL)
5271 		return (error);
5272 
5273 	/* lower only at securelevel < 1 */
5274 	if (!t && user_va0_disable &&
5275 	    kauth_authorize_system(l->l_cred,
5276 				   KAUTH_SYSTEM_CHSYSFLAGS /* XXX */, 0,
5277 				   NULL, NULL, NULL))
5278 		return EPERM;
5279 
5280 	user_va0_disable = !!t;
5281 	return 0;
5282 }
5283 
5284 SYSCTL_SETUP(sysctl_uvmmap_setup, "sysctl uvmmap setup")
5285 {
5286 
5287         sysctl_createv(clog, 0, NULL, NULL,
5288                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
5289                        CTLTYPE_INT, "user_va0_disable",
5290                        SYSCTL_DESCR("Disable VA 0"),
5291                        sysctl_user_va0_disable, 0, &user_va0_disable, 0,
5292                        CTL_VM, CTL_CREATE, CTL_EOL);
5293 }
5294 #endif
5295