xref: /openbsd-src/sys/uvm/uvm_addr.c (revision 1a8dbaac879b9f3335ad7fb25429ce63ac1d6bac)
1 /*	$OpenBSD: uvm_addr.c,v 1.29 2020/09/22 14:31:08 mpi Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /* #define DEBUG */
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <uvm/uvm.h>
24 #include <uvm/uvm_addr.h>
25 #include <sys/pool.h>
26 
27 /* Max gap between hint allocations. */
28 #define UADDR_HINT_MAXGAP	(4 * PAGE_SIZE)
29 /* Number of pivots in pivot allocator. */
30 #define NUM_PIVOTS		16
31 /*
32  * Max number (inclusive) of pages the pivot allocator
33  * will place between allocations.
34  *
35  * The uaddr_pivot_random() function attempts to bias towards
36  * small space between allocations, so putting a large number here is fine.
37  */
38 #define PIVOT_RND		8
39 /*
40  * Number of allocations that a pivot can supply before expiring.
41  * When a pivot expires, a new pivot has to be found.
42  *
43  * Must be at least 1.
44  */
45 #define PIVOT_EXPIRE		1024
46 
47 
48 /* Pool with uvm_addr_state structures. */
49 struct pool uaddr_pool;
50 struct pool uaddr_bestfit_pool;
51 struct pool uaddr_pivot_pool;
52 struct pool uaddr_rnd_pool;
53 
54 /* uvm_addr state for bestfit selector. */
55 struct uaddr_bestfit_state {
56 	struct uvm_addr_state		 ubf_uaddr;
57 	struct uaddr_free_rbtree	 ubf_free;
58 };
59 
60 /* uvm_addr state for rnd selector. */
61 struct uaddr_rnd_state {
62 	struct uvm_addr_state		 ur_uaddr;
63 #if 0
64 	TAILQ_HEAD(, vm_map_entry)	 ur_free;
65 #endif
66 };
67 
68 /* Definition of a pivot in pivot selector. */
69 struct uaddr_pivot {
70 	vaddr_t				 addr;	/* End of prev. allocation. */
71 	int				 expire;/* Best before date. */
72 	int				 dir;	/* Direction. */
73 	struct vm_map_entry		*entry; /* Will contain next alloc. */
74 };
75 /* uvm_addr state for pivot selector. */
76 struct uaddr_pivot_state {
77 	struct uvm_addr_state		 up_uaddr;
78 
79 	/* Free space tree, for fast pivot selection. */
80 	struct uaddr_free_rbtree	 up_free;
81 
82 	/* List of pivots. The pointers point to after the last allocation. */
83 	struct uaddr_pivot		 up_pivots[NUM_PIVOTS];
84 };
85 
86 /* Forward declaration (see below). */
87 extern const struct uvm_addr_functions uaddr_kernel_functions;
88 struct uvm_addr_state uaddr_kbootstrap;
89 
90 /* Support functions. */
91 #ifndef SMALL_KERNEL
92 struct vm_map_entry	*uvm_addr_entrybyspace(struct uaddr_free_rbtree*,
93 			    vsize_t);
94 #endif /* !SMALL_KERNEL */
95 void			 uaddr_kinsert(struct vm_map *,
96 			    struct uvm_addr_state *, struct vm_map_entry *);
97 void			 uaddr_kremove(struct vm_map *,
98 			    struct uvm_addr_state *, struct vm_map_entry *);
99 void			 uaddr_kbootstrapdestroy(struct uvm_addr_state *);
100 
101 void			 uaddr_destroy(struct uvm_addr_state *);
102 void			 uaddr_kbootstrap_destroy(struct uvm_addr_state *);
103 void			 uaddr_rnd_destroy(struct uvm_addr_state *);
104 void			 uaddr_bestfit_destroy(struct uvm_addr_state *);
105 void			 uaddr_pivot_destroy(struct uvm_addr_state *);
106 
107 #if 0
108 int			 uaddr_lin_select(struct vm_map *,
109 			    struct uvm_addr_state *, struct vm_map_entry **,
110 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
111 			    vaddr_t);
112 #endif
113 int			 uaddr_kbootstrap_select(struct vm_map *,
114 			    struct uvm_addr_state *, struct vm_map_entry **,
115 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
116 			    vaddr_t);
117 int			 uaddr_rnd_select(struct vm_map *,
118 			    struct uvm_addr_state *, struct vm_map_entry **,
119 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
120 			    vaddr_t);
121 int			 uaddr_bestfit_select(struct vm_map *,
122 			    struct uvm_addr_state*, struct vm_map_entry **,
123 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
124 			    vaddr_t);
125 #ifndef SMALL_KERNEL
126 int			 uaddr_pivot_select(struct vm_map *,
127 			    struct uvm_addr_state *, struct vm_map_entry **,
128 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
129 			    vaddr_t);
130 int			 uaddr_stack_brk_select(struct vm_map *,
131 			    struct uvm_addr_state *, struct vm_map_entry **,
132 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
133 			    vaddr_t);
134 #endif /* !SMALL_KERNEL */
135 
136 void			 uaddr_rnd_insert(struct vm_map *,
137 			    struct uvm_addr_state *, struct vm_map_entry *);
138 void			 uaddr_rnd_remove(struct vm_map *,
139 			    struct uvm_addr_state *, struct vm_map_entry *);
140 void			 uaddr_bestfit_insert(struct vm_map *,
141 			    struct uvm_addr_state *, struct vm_map_entry *);
142 void			 uaddr_bestfit_remove(struct vm_map *,
143 			    struct uvm_addr_state *, struct vm_map_entry *);
144 void			 uaddr_pivot_insert(struct vm_map *,
145 			    struct uvm_addr_state *, struct vm_map_entry *);
146 void			 uaddr_pivot_remove(struct vm_map *,
147 			    struct uvm_addr_state *, struct vm_map_entry *);
148 
149 #ifndef SMALL_KERNEL
150 vsize_t			 uaddr_pivot_random(void);
151 int			 uaddr_pivot_newpivot(struct vm_map *,
152 			    struct uaddr_pivot_state *, struct uaddr_pivot *,
153 			    struct vm_map_entry **, vaddr_t *,
154 			    vsize_t, vaddr_t, vaddr_t, vsize_t, vsize_t);
155 #endif /* !SMALL_KERNEL */
156 
157 #if defined(DEBUG) || defined(DDB)
158 void			 uaddr_pivot_print(struct uvm_addr_state *, boolean_t,
159 			    int (*)(const char *, ...));
160 #if 0
161 void			 uaddr_rnd_print(struct uvm_addr_state *, boolean_t,
162 			    int (*)(const char *, ...));
163 #endif
164 #endif /* DEBUG || DDB */
165 
166 
167 #ifndef SMALL_KERNEL
168 /*
169  * Find smallest entry in tree that will fit sz bytes.
170  */
171 struct vm_map_entry *
172 uvm_addr_entrybyspace(struct uaddr_free_rbtree *free, vsize_t sz)
173 {
174 	struct vm_map_entry	*tmp, *res;
175 
176 	tmp = RBT_ROOT(uaddr_free_rbtree, free);
177 	res = NULL;
178 	while (tmp) {
179 		if (tmp->fspace >= sz) {
180 			res = tmp;
181 			tmp = RBT_LEFT(uaddr_free_rbtree, tmp);
182 		} else if (tmp->fspace < sz)
183 			tmp = RBT_RIGHT(uaddr_free_rbtree, tmp);
184 	}
185 	return res;
186 }
187 #endif /* !SMALL_KERNEL */
188 
189 static inline vaddr_t
190 uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset)
191 {
192 	vaddr_t adjusted;
193 
194 	KASSERT(offset < align || (align == 0 && offset == 0));
195 	KASSERT((align & (align - 1)) == 0);
196 	KASSERT((offset & PAGE_MASK) == 0);
197 
198 	align = MAX(align, PAGE_SIZE);
199 	adjusted = addr & ~(align - 1);
200 	adjusted += offset;
201 	return (adjusted < addr ? adjusted + align : adjusted);
202 }
203 
204 static inline vaddr_t
205 uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset)
206 {
207 	vaddr_t adjusted;
208 
209 	KASSERT(offset < align || (align == 0 && offset == 0));
210 	KASSERT((align & (align - 1)) == 0);
211 	KASSERT((offset & PAGE_MASK) == 0);
212 
213 	align = MAX(align, PAGE_SIZE);
214 	adjusted = addr & ~(align - 1);
215 	adjusted += offset;
216 	return (adjusted > addr ? adjusted - align : adjusted);
217 }
218 
219 /*
220  * Try to fit the requested space into the entry.
221  */
222 int
223 uvm_addr_fitspace(vaddr_t *min_result, vaddr_t *max_result,
224     vaddr_t low_addr, vaddr_t high_addr, vsize_t sz,
225     vaddr_t align, vaddr_t offset,
226     vsize_t before_gap, vsize_t after_gap)
227 {
228 	vaddr_t	tmp;
229 	vsize_t	fspace;
230 
231 	if (low_addr > high_addr)
232 		return ENOMEM;
233 	fspace = high_addr - low_addr;
234 	if (fspace < before_gap + after_gap)
235 		return ENOMEM;
236 	if (fspace - before_gap - after_gap < sz)
237 		return ENOMEM;
238 
239 	/* Calculate lowest address. */
240 	low_addr += before_gap;
241 	low_addr = uvm_addr_align_forward(tmp = low_addr, align, offset);
242 	if (low_addr < tmp)	/* Overflow during alignment. */
243 		return ENOMEM;
244 	if (high_addr - after_gap - sz < low_addr)
245 		return ENOMEM;
246 
247 	/* Calculate highest address. */
248 	high_addr -= after_gap + sz;
249 	high_addr = uvm_addr_align_backward(tmp = high_addr, align, offset);
250 	if (high_addr > tmp)	/* Overflow during alignment. */
251 		return ENOMEM;
252 	if (low_addr > high_addr)
253 		return ENOMEM;
254 
255 	*min_result = low_addr;
256 	*max_result = high_addr;
257 	return 0;
258 }
259 
260 
261 /*
262  * Initialize uvm_addr.
263  */
264 void
265 uvm_addr_init(void)
266 {
267 	pool_init(&uaddr_pool, sizeof(struct uvm_addr_state), 0,
268 	    IPL_VM, PR_WAITOK, "uaddr", NULL);
269 	pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state), 0,
270 	    IPL_VM, PR_WAITOK, "uaddrbest", NULL);
271 	pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state), 0,
272 	    IPL_VM, PR_WAITOK, "uaddrpivot", NULL);
273 	pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state), 0,
274 	    IPL_VM, PR_WAITOK, "uaddrrnd", NULL);
275 
276 	uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE;
277 	uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE;
278 	uaddr_kbootstrap.uaddr_functions = &uaddr_kernel_functions;
279 }
280 
281 /*
282  * Invoke destructor function of uaddr.
283  */
284 void
285 uvm_addr_destroy(struct uvm_addr_state *uaddr)
286 {
287 	if (uaddr)
288 		(*uaddr->uaddr_functions->uaddr_destroy)(uaddr);
289 }
290 
291 /*
292  * Move address forward to satisfy align, offset.
293  */
294 vaddr_t
295 uvm_addr_align(vaddr_t addr, vaddr_t align, vaddr_t offset)
296 {
297 	vaddr_t result = (addr & ~(align - 1)) + offset;
298 	if (result < addr)
299 		result += align;
300 	return result;
301 }
302 
303 /*
304  * Move address backwards to satisfy align, offset.
305  */
306 vaddr_t
307 uvm_addr_align_back(vaddr_t addr, vaddr_t align, vaddr_t offset)
308 {
309 	vaddr_t result = (addr & ~(align - 1)) + offset;
310 	if (result > addr)
311 		result -= align;
312 	return result;
313 }
314 
315 /*
316  * Directional first fit.
317  *
318  * Do a linear search for free space, starting at addr in entry.
319  * direction ==  1: search forward
320  * direction == -1: search backward
321  *
322  * Output: low <= addr <= high and entry will contain addr.
323  * 0 will be returned if no space is available.
324  *
325  * gap describes the space that must appear between the preceding entry.
326  */
327 int
328 uvm_addr_linsearch(struct vm_map *map, struct uvm_addr_state *uaddr,
329     struct vm_map_entry **entry_out, vaddr_t *addr_out,
330     vaddr_t hint, vsize_t sz, vaddr_t align, vaddr_t offset,
331     int direction, vaddr_t low, vaddr_t high,
332     vsize_t before_gap, vsize_t after_gap)
333 {
334 	struct vm_map_entry	*entry;
335 	vaddr_t			 low_addr, high_addr;
336 
337 	KASSERT(entry_out != NULL && addr_out != NULL);
338 	KASSERT(direction == -1 || direction == 1);
339 	KASSERT((hint & PAGE_MASK) == 0 && (high & PAGE_MASK) == 0 &&
340 	    (low & PAGE_MASK) == 0 &&
341 	    (before_gap & PAGE_MASK) == 0 && (after_gap & PAGE_MASK) == 0);
342 	KASSERT(high + sz > high); /* Check for overflow. */
343 
344 	/* Hint magic. */
345 	if (hint == 0)
346 		hint = (direction == 1 ? low : high);
347 	else if (hint > high) {
348 		if (direction != -1)
349 			return ENOMEM;
350 		hint = high;
351 	} else if (hint < low) {
352 		if (direction != 1)
353 			return ENOMEM;
354 		hint = low;
355 	}
356 
357 	for (entry = uvm_map_entrybyaddr(&map->addr,
358 	    hint - (direction == -1 ? 1 : 0)); entry != NULL;
359 	    entry = (direction == 1 ?
360 	    RBT_NEXT(uvm_map_addr, entry) :
361 	    RBT_PREV(uvm_map_addr, entry))) {
362 		if ((direction == 1 && VMMAP_FREE_START(entry) > high) ||
363 		    (direction == -1 && VMMAP_FREE_END(entry) < low)) {
364 			break;
365 		}
366 
367 		if (uvm_addr_fitspace(&low_addr, &high_addr,
368 		    MAX(low, VMMAP_FREE_START(entry)),
369 		    MIN(high, VMMAP_FREE_END(entry)),
370 		    sz, align, offset, before_gap, after_gap) == 0) {
371 			*entry_out = entry;
372 			if (hint >= low_addr && hint <= high_addr) {
373 				*addr_out = hint;
374 			} else {
375 				*addr_out = (direction == 1 ?
376 				    low_addr : high_addr);
377 			}
378 			return 0;
379 		}
380 	}
381 
382 	return ENOMEM;
383 }
384 
385 /*
386  * Invoke address selector of uaddr.
387  * uaddr may be NULL, in which case the algorithm will fail with ENOMEM.
388  *
389  * Will invoke uvm_addr_isavail to fill in last_out.
390  */
391 int
392 uvm_addr_invoke(struct vm_map *map, struct uvm_addr_state *uaddr,
393     struct vm_map_entry **entry_out, struct vm_map_entry **last_out,
394     vaddr_t *addr_out,
395     vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
396 {
397 	int error;
398 
399 	if (uaddr == NULL)
400 		return ENOMEM;
401 
402 	hint &= ~((vaddr_t)PAGE_MASK);
403 	if (hint != 0 &&
404 	    !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr))
405 		return ENOMEM;
406 
407 	error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr,
408 	    entry_out, addr_out, sz, align, offset, prot, hint);
409 
410 	if (error == 0) {
411 		KASSERT(*entry_out != NULL);
412 		*last_out = NULL;
413 		if (!uvm_map_isavail(map, uaddr, entry_out, last_out,
414 		    *addr_out, sz)) {
415 			panic("uvm_addr_invoke: address selector %p "
416 			    "(%s 0x%lx-0x%lx) "
417 			    "returned unavailable address 0x%lx sz 0x%lx",
418 			    uaddr, uaddr->uaddr_functions->uaddr_name,
419 			    uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr,
420 			    *addr_out, sz);
421 		}
422 	}
423 
424 	return error;
425 }
426 
427 #if defined(DEBUG) || defined(DDB)
428 void
429 uvm_addr_print(struct uvm_addr_state *uaddr, const char *slot, boolean_t full,
430     int (*pr)(const char *, ...))
431 {
432 	if (uaddr == NULL) {
433 		(*pr)("- uvm_addr %s: NULL\n", slot);
434 		return;
435 	}
436 
437 	(*pr)("- uvm_addr %s: %p (%s 0x%lx-0x%lx)\n", slot, uaddr,
438 	    uaddr->uaddr_functions->uaddr_name,
439 	    uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr);
440 	if (uaddr->uaddr_functions->uaddr_print == NULL)
441 		return;
442 
443 	(*uaddr->uaddr_functions->uaddr_print)(uaddr, full, pr);
444 }
445 #endif /* DEBUG || DDB */
446 
447 /*
448  * Destroy a uvm_addr_state structure.
449  * The uaddr must have been previously allocated from uaddr_state_pool.
450  */
451 void
452 uaddr_destroy(struct uvm_addr_state *uaddr)
453 {
454 	pool_put(&uaddr_pool, uaddr);
455 }
456 
457 
458 #if 0
459 /*
460  * Linear allocator.
461  * This allocator uses a first-fit algorithm.
462  *
463  * If hint is set, search will start at the hint position.
464  * Only searches forward.
465  */
466 const struct uvm_addr_functions uaddr_lin_functions = {
467 	.uaddr_select = &uaddr_lin_select,
468 	.uaddr_destroy = &uaddr_destroy,
469 	.uaddr_name = "uaddr_lin"
470 };
471 
472 struct uvm_addr_state *
473 uaddr_lin_create(vaddr_t minaddr, vaddr_t maxaddr)
474 {
475 	struct uvm_addr_state *uaddr;
476 
477 	uaddr = pool_get(&uaddr_pool, PR_WAITOK);
478 	uaddr->uaddr_minaddr = minaddr;
479 	uaddr->uaddr_maxaddr = maxaddr;
480 	uaddr->uaddr_functions = &uaddr_lin_functions;
481 	return uaddr;
482 }
483 
484 int
485 uaddr_lin_select(struct vm_map *map, struct uvm_addr_state *uaddr,
486     struct vm_map_entry **entry_out, vaddr_t *addr_out,
487     vsize_t sz, vaddr_t align, vaddr_t offset,
488     vm_prot_t prot, vaddr_t hint)
489 {
490 	vaddr_t guard_sz;
491 
492 	/* Deal with guardpages: search for space with one extra page. */
493 	guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
494 
495 	if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr - guard_sz < sz)
496 		return ENOMEM;
497 	return uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 0, sz,
498 	    align, offset, 1, uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr - sz,
499 	    0, guard_sz);
500 }
501 #endif
502 
503 /*
504  * Randomized allocator.
505  * This allocator use uvm_map_hint to acquire a random address and searches
506  * from there.
507  */
508 
509 const struct uvm_addr_functions uaddr_rnd_functions = {
510 	.uaddr_select = &uaddr_rnd_select,
511 	.uaddr_free_insert = &uaddr_rnd_insert,
512 	.uaddr_free_remove = &uaddr_rnd_remove,
513 	.uaddr_destroy = &uaddr_rnd_destroy,
514 #if defined(DEBUG) || defined(DDB)
515 #if 0
516 	.uaddr_print = &uaddr_rnd_print,
517 #endif
518 #endif /* DEBUG || DDB */
519 	.uaddr_name = "uaddr_rnd"
520 };
521 
522 struct uvm_addr_state *
523 uaddr_rnd_create(vaddr_t minaddr, vaddr_t maxaddr)
524 {
525 	struct uaddr_rnd_state *uaddr;
526 
527 	uaddr = pool_get(&uaddr_rnd_pool, PR_WAITOK);
528 	uaddr->ur_uaddr.uaddr_minaddr = minaddr;
529 	uaddr->ur_uaddr.uaddr_maxaddr = maxaddr;
530 	uaddr->ur_uaddr.uaddr_functions = &uaddr_rnd_functions;
531 #if 0
532 	TAILQ_INIT(&uaddr->ur_free);
533 #endif
534 	return &uaddr->ur_uaddr;
535 }
536 
537 int
538 uaddr_rnd_select(struct vm_map *map, struct uvm_addr_state *uaddr,
539     struct vm_map_entry **entry_out, vaddr_t *addr_out,
540     vsize_t sz, vaddr_t align, vaddr_t offset,
541     vm_prot_t prot, vaddr_t hint)
542 {
543 	struct vmspace		*vm;
544 	vaddr_t			 minaddr, maxaddr;
545 	vaddr_t			 guard_sz;
546 	vaddr_t			 low_addr, high_addr;
547 	struct vm_map_entry	*entry, *next;
548 	vsize_t			 before_gap, after_gap;
549 	vaddr_t			 tmp;
550 
551 	KASSERT((map->flags & VM_MAP_ISVMSPACE) != 0);
552 	vm = (struct vmspace *)map;
553 
554 	/* Deal with guardpages: search for space with one extra page. */
555 	guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
556 
557 	if (uaddr->uaddr_maxaddr - guard_sz < sz)
558 		return ENOMEM;
559 	minaddr = uvm_addr_align_forward(uaddr->uaddr_minaddr, align, offset);
560 	maxaddr = uvm_addr_align_backward(uaddr->uaddr_maxaddr - sz - guard_sz,
561 	    align, offset);
562 
563 	/* Quick fail if the allocation won't fit. */
564 	if (minaddr >= maxaddr)
565 		return ENOMEM;
566 
567 	/* Select a hint. */
568 	if (hint == 0)
569 		hint = uvm_map_hint(vm, prot, minaddr, maxaddr);
570 	/* Clamp hint to uaddr range. */
571 	hint = MIN(MAX(hint, minaddr), maxaddr);
572 
573 	/* Align hint to align,offset parameters. */
574 	tmp = hint;
575 	hint = uvm_addr_align_forward(tmp, align, offset);
576 	/* Check for overflow during alignment. */
577 	if (hint < tmp || hint > maxaddr)
578 		return ENOMEM; /* Compatibility mode: never look backwards. */
579 
580 	before_gap = 0;
581 	after_gap = guard_sz;
582 	hint -= MIN(hint, before_gap);
583 
584 	/*
585 	 * Use the augmented address tree to look up the first entry
586 	 * at or after hint with sufficient space.
587 	 *
588 	 * This code is the original optimized code, but will fail if the
589 	 * subtree it looks at does have sufficient space, but fails to meet
590 	 * the align constraint.
591 	 *
592 	 * Guard: subtree is not exhausted and max(fspace) >= required.
593 	 */
594 	entry = uvm_map_entrybyaddr(&map->addr, hint);
595 
596 	/* Walk up the tree, until there is at least sufficient space. */
597 	while (entry != NULL &&
598 	    entry->fspace_augment < before_gap + after_gap + sz)
599 		entry = RBT_PARENT(uvm_map_addr, entry);
600 
601 	while (entry != NULL) {
602 		/* Test if this fits. */
603 		if (VMMAP_FREE_END(entry) > hint &&
604 		    uvm_map_uaddr_e(map, entry) == uaddr &&
605 		    uvm_addr_fitspace(&low_addr, &high_addr,
606 		    MAX(uaddr->uaddr_minaddr, VMMAP_FREE_START(entry)),
607 		    MIN(uaddr->uaddr_maxaddr, VMMAP_FREE_END(entry)),
608 		    sz, align, offset, before_gap, after_gap) == 0) {
609 			*entry_out = entry;
610 			if (hint >= low_addr && hint <= high_addr)
611 				*addr_out = hint;
612 			else
613 				*addr_out = low_addr;
614 			return 0;
615 		}
616 
617 		/* RBT_NEXT, but skip subtrees that cannot possible fit. */
618 		next = RBT_RIGHT(uvm_map_addr, entry);
619 		if (next != NULL &&
620 		    next->fspace_augment >= before_gap + after_gap + sz) {
621 			entry = next;
622 			while ((next = RBT_LEFT(uvm_map_addr, entry)) !=
623 			    NULL)
624 				entry = next;
625 		} else {
626 do_parent:
627 			next = RBT_PARENT(uvm_map_addr, entry);
628 			if (next == NULL)
629 				entry = NULL;
630 			else if (RBT_LEFT(uvm_map_addr, next) == entry)
631 				entry = next;
632 			else {
633 				entry = next;
634 				goto do_parent;
635 			}
636 		}
637 	}
638 
639 	/* Lookup failed. */
640 	return ENOMEM;
641 }
642 
643 /*
644  * Destroy a uaddr_rnd_state structure.
645  */
646 void
647 uaddr_rnd_destroy(struct uvm_addr_state *uaddr)
648 {
649 	pool_put(&uaddr_rnd_pool, uaddr);
650 }
651 
652 /*
653  * Add entry to tailq.
654  */
655 void
656 uaddr_rnd_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
657     struct vm_map_entry *entry)
658 {
659 	return;
660 }
661 
662 /*
663  * Remove entry from tailq.
664  */
665 void
666 uaddr_rnd_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
667     struct vm_map_entry *entry)
668 {
669 	return;
670 }
671 
672 #if 0
673 #if defined(DEBUG) || defined(DDB)
674 void
675 uaddr_rnd_print(struct uvm_addr_state *uaddr_p, boolean_t full,
676     int (*pr)(const char*, ...))
677 {
678 	struct vm_map_entry	*entry;
679 	struct uaddr_rnd_state	*uaddr;
680 	vaddr_t			 addr;
681 	size_t			 count;
682 	vsize_t			 space;
683 
684 	uaddr = (struct uaddr_rnd_state *)uaddr_p;
685 	addr = 0;
686 	count = 0;
687 	space = 0;
688 	TAILQ_FOREACH(entry, &uaddr->ur_free, dfree.tailq) {
689 		count++;
690 		space += entry->fspace;
691 
692 		if (full) {
693 			(*pr)("\tentry %p: 0x%lx-0x%lx G=0x%lx F=0x%lx\n",
694 			    entry, entry->start, entry->end,
695 			    entry->guard, entry->fspace);
696 			(*pr)("\t\tfree: 0x%lx-0x%lx\n",
697 			    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
698 		}
699 		if (entry->start < addr) {
700 			if (!full)
701 				(*pr)("\tentry %p: 0x%lx-0x%lx "
702 				    "G=0x%lx F=0x%lx\n",
703 				    entry, entry->start, entry->end,
704 				    entry->guard, entry->fspace);
705 			(*pr)("\t\tstart=0x%lx, expected at least 0x%lx\n",
706 			    entry->start, addr);
707 		}
708 
709 		addr = VMMAP_FREE_END(entry);
710 	}
711 	(*pr)("\t0x%lu entries, 0x%lx free bytes\n", count, space);
712 }
713 #endif /* DEBUG || DDB */
714 #endif
715 
716 /*
717  * Kernel allocation bootstrap logic.
718  */
719 const struct uvm_addr_functions uaddr_kernel_functions = {
720 	.uaddr_select = &uaddr_kbootstrap_select,
721 	.uaddr_destroy = &uaddr_kbootstrap_destroy,
722 	.uaddr_name = "uaddr_kbootstrap"
723 };
724 
725 /*
726  * Select an address from the map.
727  *
728  * This function ignores the uaddr spec and instead uses the map directly.
729  * Because of that property, the uaddr algorithm can be shared across all
730  * kernel maps.
731  */
732 int
733 uaddr_kbootstrap_select(struct vm_map *map, struct uvm_addr_state *uaddr,
734     struct vm_map_entry **entry_out, vaddr_t *addr_out,
735     vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
736 {
737 	vaddr_t tmp;
738 
739 	RBT_FOREACH(*entry_out, uvm_map_addr, &map->addr) {
740 		if (VMMAP_FREE_END(*entry_out) <= uvm_maxkaddr &&
741 		    uvm_addr_fitspace(addr_out, &tmp,
742 		    VMMAP_FREE_START(*entry_out), VMMAP_FREE_END(*entry_out),
743 		    sz, align, offset, 0, 0) == 0)
744 			return 0;
745 	}
746 
747 	return ENOMEM;
748 }
749 
750 /*
751  * Don't destroy the kernel bootstrap allocator.
752  */
753 void
754 uaddr_kbootstrap_destroy(struct uvm_addr_state *uaddr)
755 {
756 	KASSERT(uaddr == (struct uvm_addr_state *)&uaddr_kbootstrap);
757 }
758 
759 #ifndef SMALL_KERNEL
760 /*
761  * Best fit algorithm.
762  */
763 
764 const struct uvm_addr_functions uaddr_bestfit_functions = {
765 	.uaddr_select = &uaddr_bestfit_select,
766 	.uaddr_free_insert = &uaddr_bestfit_insert,
767 	.uaddr_free_remove = &uaddr_bestfit_remove,
768 	.uaddr_destroy = &uaddr_bestfit_destroy,
769 	.uaddr_name = "uaddr_bestfit"
770 };
771 
772 struct uvm_addr_state *
773 uaddr_bestfit_create(vaddr_t minaddr, vaddr_t maxaddr)
774 {
775 	struct uaddr_bestfit_state *uaddr;
776 
777 	uaddr = pool_get(&uaddr_bestfit_pool, PR_WAITOK);
778 	uaddr->ubf_uaddr.uaddr_minaddr = minaddr;
779 	uaddr->ubf_uaddr.uaddr_maxaddr = maxaddr;
780 	uaddr->ubf_uaddr.uaddr_functions = &uaddr_bestfit_functions;
781 	RBT_INIT(uaddr_free_rbtree, &uaddr->ubf_free);
782 	return &uaddr->ubf_uaddr;
783 }
784 
785 void
786 uaddr_bestfit_destroy(struct uvm_addr_state *uaddr)
787 {
788 	pool_put(&uaddr_bestfit_pool, uaddr);
789 }
790 
791 void
792 uaddr_bestfit_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
793     struct vm_map_entry *entry)
794 {
795 	struct uaddr_bestfit_state	*uaddr;
796 	struct vm_map_entry		*rb_rv;
797 
798 	uaddr = (struct uaddr_bestfit_state *)uaddr_p;
799 	if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->ubf_free, entry)) !=
800 	    NULL) {
801 		panic("%s: duplicate insertion: state %p "
802 		    "interting %p, colliding with %p", __func__,
803 		    uaddr, entry, rb_rv);
804 	}
805 }
806 
807 void
808 uaddr_bestfit_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
809     struct vm_map_entry *entry)
810 {
811 	struct uaddr_bestfit_state	*uaddr;
812 
813 	uaddr = (struct uaddr_bestfit_state *)uaddr_p;
814 	if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->ubf_free, entry) != entry)
815 		panic("%s: entry was not in tree", __func__);
816 }
817 
818 int
819 uaddr_bestfit_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
820     struct vm_map_entry **entry_out, vaddr_t *addr_out,
821     vsize_t sz, vaddr_t align, vaddr_t offset,
822     vm_prot_t prot, vaddr_t hint)
823 {
824 	vaddr_t				 min, max;
825 	struct uaddr_bestfit_state	*uaddr;
826 	struct vm_map_entry		*entry;
827 	vsize_t				 guardsz;
828 
829 	uaddr = (struct uaddr_bestfit_state *)uaddr_p;
830 	guardsz = ((map->flags & VM_MAP_GUARDPAGES) ? PAGE_SIZE : 0);
831 	if (sz + guardsz < sz)
832 		return ENOMEM;
833 
834 	/*
835 	 * Find smallest item on freelist capable of holding item.
836 	 * Deal with guardpages: search for space with one extra page.
837 	 */
838 	entry = uvm_addr_entrybyspace(&uaddr->ubf_free, sz + guardsz);
839 	if (entry == NULL)
840 		return ENOMEM;
841 
842 	/* Walk the tree until we find an entry that fits.  */
843 	while (uvm_addr_fitspace(&min, &max,
844 	    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
845 	    sz, align, offset, 0, guardsz) != 0) {
846 		entry = RBT_NEXT(uaddr_free_rbtree, entry);
847 		if (entry == NULL)
848 			return ENOMEM;
849 	}
850 
851 	/* Return the address that generates the least fragmentation. */
852 	*entry_out = entry;
853 	*addr_out = (min - VMMAP_FREE_START(entry) <=
854 	    VMMAP_FREE_END(entry) - guardsz - sz - max ?
855 	    min : max);
856 	return 0;
857 }
858 #endif /* !SMALL_KERNEL */
859 
860 
861 #ifndef SMALL_KERNEL
862 /*
863  * A userspace allocator based on pivots.
864  */
865 
866 const struct uvm_addr_functions uaddr_pivot_functions = {
867 	.uaddr_select = &uaddr_pivot_select,
868 	.uaddr_free_insert = &uaddr_pivot_insert,
869 	.uaddr_free_remove = &uaddr_pivot_remove,
870 	.uaddr_destroy = &uaddr_pivot_destroy,
871 #if defined(DEBUG) || defined(DDB)
872 	.uaddr_print = &uaddr_pivot_print,
873 #endif /* DEBUG || DDB */
874 	.uaddr_name = "uaddr_pivot"
875 };
876 
877 /*
878  * A special random function for pivots.
879  *
880  * This function will return:
881  * - a random number
882  * - a multiple of PAGE_SIZE
883  * - at least PAGE_SIZE
884  *
885  * The random function has a slightly higher change to return a small number.
886  */
887 vsize_t
888 uaddr_pivot_random(void)
889 {
890 	int			r;
891 
892 	/*
893 	 * The sum of two six-sided dice will have a normal distribution.
894 	 * We map the highest probable number to 1, by folding the curve
895 	 * (think of a graph on a piece of paper, that you fold).
896 	 *
897 	 * Because the fold happens at PIVOT_RND - 1, the numbers 0 and 1
898 	 * have the same and highest probability of happening.
899 	 */
900 	r = arc4random_uniform(PIVOT_RND) + arc4random_uniform(PIVOT_RND) -
901 	    (PIVOT_RND - 1);
902 	if (r < 0)
903 		r = -r;
904 
905 	/*
906 	 * Make the returned value at least PAGE_SIZE and a multiple of
907 	 * PAGE_SIZE.
908 	 */
909 	return (vaddr_t)(1 + r) << PAGE_SHIFT;
910 }
911 
912 /*
913  * Select a new pivot.
914  *
915  * A pivot must:
916  * - be chosen random
917  * - have a randomly chosen gap before it, where the uaddr_state starts
918  * - have a randomly chosen gap after it, before the uaddr_state ends
919  *
920  * Furthermore, the pivot must provide sufficient space for the allocation.
921  * The addr will be set to the selected address.
922  *
923  * Returns ENOMEM on failure.
924  */
925 int
926 uaddr_pivot_newpivot(struct vm_map *map, struct uaddr_pivot_state *uaddr,
927     struct uaddr_pivot *pivot,
928     struct vm_map_entry **entry_out, vaddr_t *addr_out,
929     vsize_t sz, vaddr_t align, vaddr_t offset,
930     vsize_t before_gap, vsize_t after_gap)
931 {
932 	struct vm_map_entry		*entry, *found;
933 	vaddr_t				 minaddr, maxaddr;
934 	vsize_t				 dist;
935 	vaddr_t				 found_minaddr, found_maxaddr;
936 	vaddr_t				 min, max;
937 	vsize_t				 arc4_arg;
938 	int				 fit_error;
939 	u_int32_t			 path;
940 
941 	minaddr = uaddr->up_uaddr.uaddr_minaddr;
942 	maxaddr = uaddr->up_uaddr.uaddr_maxaddr;
943 	KASSERT(minaddr < maxaddr);
944 #ifdef DIAGNOSTIC
945 	if (minaddr + 2 * PAGE_SIZE > maxaddr) {
946 		panic("uaddr_pivot_newpivot: cannot grant random pivot "
947 		    "in area less than 2 pages (size = 0x%lx)",
948 		    maxaddr - minaddr);
949 	}
950 #endif /* DIAGNOSTIC */
951 
952 	/*
953 	 * Gap calculation: 1/32 of the size of the managed area.
954 	 *
955 	 * At most: sufficient to not get truncated at arc4random.
956 	 * At least: 2 PAGE_SIZE
957 	 *
958 	 * minaddr and maxaddr will be changed according to arc4random.
959 	 */
960 	dist = MAX((maxaddr - minaddr) / 32, 2 * (vaddr_t)PAGE_SIZE);
961 	if (dist >> PAGE_SHIFT > 0xffffffff) {
962 		minaddr += (vsize_t)arc4random() << PAGE_SHIFT;
963 		maxaddr -= (vsize_t)arc4random() << PAGE_SHIFT;
964 	} else {
965 		minaddr += (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
966 		    PAGE_SHIFT;
967 		maxaddr -= (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
968 		    PAGE_SHIFT;
969 	}
970 
971 	/*
972 	 * A very fast way to find an entry that will be large enough
973 	 * to hold the allocation, but still is found more or less
974 	 * randomly: the tree path selector has a 50% chance to go for
975 	 * a bigger or smaller entry.
976 	 *
977 	 * Note that the memory may actually be available,
978 	 * but the fragmentation may be so bad and the gaps chosen
979 	 * so unfortunately, that the allocation will not succeed.
980 	 * Or the alignment can only be satisfied by an entry that
981 	 * is not visited in the randomly selected path.
982 	 *
983 	 * This code finds an entry with sufficient space in O(log n) time.
984 	 */
985 	path = arc4random();
986 	found = NULL;
987 	entry = RBT_ROOT(uaddr_free_rbtree, &uaddr->up_free);
988 	while (entry != NULL) {
989 		fit_error = uvm_addr_fitspace(&min, &max,
990 		    MAX(VMMAP_FREE_START(entry), minaddr),
991 		    MIN(VMMAP_FREE_END(entry), maxaddr),
992 		    sz, align, offset, before_gap, after_gap);
993 
994 		/* It fits, save this entry. */
995 		if (fit_error == 0) {
996 			found = entry;
997 			found_minaddr = min;
998 			found_maxaddr = max;
999 		}
1000 
1001 		/* Next. */
1002 		if (fit_error != 0)
1003 			entry = RBT_RIGHT(uaddr_free_rbtree, entry);
1004 		else if	((path & 0x1) == 0) {
1005 			path >>= 1;
1006 			entry = RBT_RIGHT(uaddr_free_rbtree, entry);
1007 		} else {
1008 			path >>= 1;
1009 			entry = RBT_LEFT(uaddr_free_rbtree, entry);
1010 		}
1011 	}
1012 	if (found == NULL)
1013 		return ENOMEM;	/* Not found a large enough region. */
1014 
1015 	/*
1016 	 * Calculate a random address within found.
1017 	 *
1018 	 * found_minaddr and found_maxaddr are already aligned, so be sure
1019 	 * to select a multiple of align as the offset in the entry.
1020 	 * Preferably, arc4random_uniform is used to provide no bias within
1021 	 * the entry.
1022 	 * However if the size of the entry exceeds arc4random_uniforms
1023 	 * argument limit, we simply use arc4random (thus limiting ourselves
1024 	 * to 4G * PAGE_SIZE bytes offset).
1025 	 */
1026 	if (found_maxaddr == found_minaddr)
1027 		*addr_out = found_minaddr;
1028 	else {
1029 		KASSERT(align >= PAGE_SIZE && (align & (align - 1)) == 0);
1030 		arc4_arg = found_maxaddr - found_minaddr;
1031 		if (arc4_arg > 0xffffffff) {
1032 			*addr_out = found_minaddr +
1033 			    (arc4random() & ~(align - 1));
1034 		} else {
1035 			*addr_out = found_minaddr +
1036 			    (arc4random_uniform(arc4_arg) & ~(align - 1));
1037 		}
1038 	}
1039 	/* Address was found in this entry. */
1040 	*entry_out = found;
1041 
1042 	/*
1043 	 * Set up new pivot and return selected address.
1044 	 *
1045 	 * Depending on the direction of the pivot, the pivot must be placed
1046 	 * at the bottom or the top of the allocation:
1047 	 * - if the pivot moves upwards, place the pivot at the top of the
1048 	 *   allocation,
1049 	 * - if the pivot moves downwards, place the pivot at the bottom
1050 	 *   of the allocation.
1051 	 */
1052 	pivot->entry = found;
1053 	pivot->dir = (arc4random() & 0x1 ? 1 : -1);
1054 	if (pivot->dir > 0)
1055 		pivot->addr = *addr_out + sz;
1056 	else
1057 		pivot->addr = *addr_out;
1058 	pivot->expire = PIVOT_EXPIRE - 1; /* First use is right now. */
1059 	return 0;
1060 }
1061 
1062 /*
1063  * Pivot selector.
1064  *
1065  * Each time the selector is invoked, it will select a random pivot, which
1066  * it will use to select memory with. The memory will be placed at the pivot,
1067  * with a randomly sized gap between the allocation and the pivot.
1068  * The pivot will then move so it will never revisit this address.
1069  *
1070  * Each allocation, the pivot expiry timer ticks. Once the pivot becomes
1071  * expired, it will be replaced with a newly created pivot. Pivots also
1072  * automatically expire if they fail to provide memory for an allocation.
1073  *
1074  * Expired pivots are replaced using the uaddr_pivot_newpivot() function,
1075  * which will ensure the pivot points at memory in such a way that the
1076  * allocation will succeed.
1077  * As an added bonus, the uaddr_pivot_newpivot() function will perform the
1078  * allocation immediately and move the pivot as appropriate.
1079  *
1080  * If uaddr_pivot_newpivot() fails to find a new pivot that will allow the
1081  * allocation to succeed, it will not create a new pivot and the allocation
1082  * will fail.
1083  *
1084  * A pivot running into used memory will automatically expire (because it will
1085  * fail to allocate).
1086  *
1087  * Characteristics of the allocator:
1088  * - best case, an allocation is O(log N)
1089  *   (it would be O(1), if it werent for the need to check if the memory is
1090  *   free; although that can be avoided...)
1091  * - worst case, an allocation is O(log N)
1092  *   (the uaddr_pivot_newpivot() function has that complexity)
1093  * - failed allocations always take O(log N)
1094  *   (the uaddr_pivot_newpivot() function will walk that deep into the tree).
1095  */
1096 int
1097 uaddr_pivot_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1098     struct vm_map_entry **entry_out, vaddr_t *addr_out,
1099     vsize_t sz, vaddr_t align, vaddr_t offset,
1100     vm_prot_t prot, vaddr_t hint)
1101 {
1102 	struct uaddr_pivot_state	*uaddr;
1103 	struct vm_map_entry		*entry;
1104 	struct uaddr_pivot		*pivot;
1105 	vaddr_t				 min, max;
1106 	vsize_t				 before_gap, after_gap;
1107 	int				 err;
1108 
1109 	/*
1110 	 * When we have a hint, use the rnd allocator that finds the
1111 	 * area that is closest to the hint, if there is such an area.
1112 	 */
1113 	if (hint != 0) {
1114 		if (uaddr_rnd_select(map, uaddr_p, entry_out, addr_out,
1115 		    sz, align, offset, prot, hint) == 0)
1116 			return 0;
1117 		return ENOMEM;
1118 	}
1119 
1120 	/*
1121 	 * Select a random pivot and a random gap sizes around the allocation.
1122 	 */
1123 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1124 	pivot = &uaddr->up_pivots[
1125 	    arc4random_uniform(nitems(uaddr->up_pivots))];
1126 	before_gap = uaddr_pivot_random();
1127 	after_gap = uaddr_pivot_random();
1128 	if (pivot->addr == 0 || pivot->entry == NULL || pivot->expire == 0)
1129 		goto expired;	/* Pivot is invalid (null or expired). */
1130 
1131 	/* Attempt to use the pivot to map the entry. */
1132 	entry = pivot->entry;
1133 	if (pivot->dir > 0) {
1134 		if (uvm_addr_fitspace(&min, &max,
1135 		    MAX(VMMAP_FREE_START(entry), pivot->addr),
1136 		    VMMAP_FREE_END(entry), sz, align, offset,
1137 		    before_gap, after_gap) == 0) {
1138 			*addr_out = min;
1139 			*entry_out = entry;
1140 			pivot->addr = min + sz;
1141 			pivot->expire--;
1142 			return 0;
1143 		}
1144 	} else {
1145 		if (uvm_addr_fitspace(&min, &max,
1146 		    VMMAP_FREE_START(entry),
1147 		    MIN(VMMAP_FREE_END(entry), pivot->addr),
1148 		    sz, align, offset, before_gap, after_gap) == 0) {
1149 			*addr_out = max;
1150 			*entry_out = entry;
1151 			pivot->addr = max;
1152 			pivot->expire--;
1153 			return 0;
1154 		}
1155 	}
1156 
1157 expired:
1158 	/*
1159 	 * Pivot expired or allocation failed.
1160 	 * Use pivot selector to do the allocation and find a new pivot.
1161 	 */
1162 	err = uaddr_pivot_newpivot(map, uaddr, pivot, entry_out, addr_out,
1163 	    sz, align, offset, before_gap, after_gap);
1164 	return err;
1165 }
1166 
1167 /*
1168  * Free the pivot.
1169  */
1170 void
1171 uaddr_pivot_destroy(struct uvm_addr_state *uaddr)
1172 {
1173 	pool_put(&uaddr_pivot_pool, uaddr);
1174 }
1175 
1176 /*
1177  * Insert an entry with free space in the space tree.
1178  */
1179 void
1180 uaddr_pivot_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1181     struct vm_map_entry *entry)
1182 {
1183 	struct uaddr_pivot_state	*uaddr;
1184 	struct vm_map_entry		*rb_rv;
1185 	struct uaddr_pivot		*p;
1186 	vaddr_t				 check_addr;
1187 	vaddr_t				 start, end;
1188 
1189 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1190 	if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->up_free, entry)) !=
1191 	    NULL) {
1192 		panic("%s: duplicate insertion: state %p "
1193 		    "inserting entry %p which collides with %p", __func__,
1194 		    uaddr, entry, rb_rv);
1195 	}
1196 
1197 	start = VMMAP_FREE_START(entry);
1198 	end = VMMAP_FREE_END(entry);
1199 
1200 	/*
1201 	 * Update all pivots that are contained in this entry.
1202 	 */
1203 	for (p = &uaddr->up_pivots[0];
1204 	    p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1205 		check_addr = p->addr;
1206 		if (check_addr == 0)
1207 			continue;
1208 		if (p->dir < 0)
1209 			check_addr--;
1210 
1211 		if (start <= check_addr &&
1212 		    check_addr < end) {
1213 			KASSERT(p->entry == NULL);
1214 			p->entry = entry;
1215 		}
1216 	}
1217 }
1218 
1219 /*
1220  * Remove an entry with free space from the space tree.
1221  */
1222 void
1223 uaddr_pivot_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1224     struct vm_map_entry *entry)
1225 {
1226 	struct uaddr_pivot_state	*uaddr;
1227 	struct uaddr_pivot		*p;
1228 
1229 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1230 	if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->up_free, entry) != entry)
1231 		panic("%s: entry was not in tree", __func__);
1232 
1233 	/*
1234 	 * Inform any pivot with this entry that the entry is gone.
1235 	 * Note that this does not automatically invalidate the pivot.
1236 	 */
1237 	for (p = &uaddr->up_pivots[0];
1238 	    p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1239 		if (p->entry == entry)
1240 			p->entry = NULL;
1241 	}
1242 }
1243 
1244 /*
1245  * Create a new pivot selector.
1246  *
1247  * Initially, all pivots are in the expired state.
1248  * Two reasons for this:
1249  * - it means this allocator will not take a huge amount of time
1250  * - pivots select better on demand, because the pivot selection will be
1251  *   affected by preceding allocations:
1252  *   the next pivots will likely end up in different segments of free memory,
1253  *   that was segmented by an earlier allocation; better spread.
1254  */
1255 struct uvm_addr_state *
1256 uaddr_pivot_create(vaddr_t minaddr, vaddr_t maxaddr)
1257 {
1258 	struct uaddr_pivot_state *uaddr;
1259 
1260 	uaddr = pool_get(&uaddr_pivot_pool, PR_WAITOK);
1261 	uaddr->up_uaddr.uaddr_minaddr = minaddr;
1262 	uaddr->up_uaddr.uaddr_maxaddr = maxaddr;
1263 	uaddr->up_uaddr.uaddr_functions = &uaddr_pivot_functions;
1264 	RBT_INIT(uaddr_free_rbtree, &uaddr->up_free);
1265 	memset(uaddr->up_pivots, 0, sizeof(uaddr->up_pivots));
1266 
1267 	return &uaddr->up_uaddr;
1268 }
1269 
1270 #if defined(DEBUG) || defined(DDB)
1271 /*
1272  * Print the uaddr_pivot_state.
1273  *
1274  * If full, a listing of all entries in the state will be provided.
1275  */
1276 void
1277 uaddr_pivot_print(struct uvm_addr_state *uaddr_p, boolean_t full,
1278     int (*pr)(const char *, ...))
1279 {
1280 	struct uaddr_pivot_state	*uaddr;
1281 	struct uaddr_pivot		*pivot;
1282 	struct vm_map_entry		*entry;
1283 	int				 i;
1284 	vaddr_t				 check_addr;
1285 
1286 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1287 
1288 	for (i = 0; i < NUM_PIVOTS; i++) {
1289 		pivot = &uaddr->up_pivots[i];
1290 
1291 		(*pr)("\tpivot 0x%lx, epires in %d, direction %d\n",
1292 		    pivot->addr, pivot->expire, pivot->dir);
1293 	}
1294 	if (!full)
1295 		return;
1296 
1297 	if (RBT_EMPTY(uaddr_free_rbtree, &uaddr->up_free))
1298 		(*pr)("\tempty\n");
1299 	/* Print list of free space. */
1300 	RBT_FOREACH(entry, uaddr_free_rbtree, &uaddr->up_free) {
1301 		(*pr)("\t0x%lx - 0x%lx free (0x%lx bytes)\n",
1302 		    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
1303 		    VMMAP_FREE_END(entry) - VMMAP_FREE_START(entry));
1304 
1305 		for (i = 0; i < NUM_PIVOTS; i++) {
1306 			pivot = &uaddr->up_pivots[i];
1307 			check_addr = pivot->addr;
1308 			if (check_addr == 0)
1309 				continue;
1310 			if (pivot->dir < 0)
1311 				check_addr--;
1312 
1313 			if (VMMAP_FREE_START(entry) <= check_addr &&
1314 			    check_addr < VMMAP_FREE_END(entry)) {
1315 				(*pr)("\t\tcontains pivot %d (0x%lx)\n",
1316 				    i, pivot->addr);
1317 			}
1318 		}
1319 	}
1320 }
1321 #endif /* DEBUG || DDB */
1322 #endif /* !SMALL_KERNEL */
1323 
1324 #ifndef SMALL_KERNEL
1325 /*
1326  * Stack/break allocator.
1327  *
1328  * Stack area is grown into in the opposite direction of the stack growth,
1329  * brk area is grown downward (because sbrk() grows upward).
1330  *
1331  * Both areas are grown into proportially: a weighted chance is used to
1332  * select which one (stack or brk area) to try. If the allocation fails,
1333  * the other one is tested.
1334  */
1335 const struct uvm_addr_functions uaddr_stack_brk_functions = {
1336 	.uaddr_select = &uaddr_stack_brk_select,
1337 	.uaddr_destroy = &uaddr_destroy,
1338 	.uaddr_name = "uaddr_stckbrk"
1339 };
1340 
1341 /*
1342  * Stack/brk address selector.
1343  */
1344 int
1345 uaddr_stack_brk_select(struct vm_map *map, struct uvm_addr_state *uaddr,
1346     struct vm_map_entry **entry_out, vaddr_t *addr_out,
1347     vsize_t sz, vaddr_t align, vaddr_t offset,
1348     vm_prot_t prot, vaddr_t hint)
1349 {
1350 	vaddr_t			start;
1351 	vaddr_t			end;
1352 	vsize_t			before_gap;
1353 	vsize_t			after_gap;
1354 	int			dir;
1355 
1356 	/* Set up brk search strategy. */
1357 	start = MAX(map->b_start, uaddr->uaddr_minaddr);
1358 	end = MIN(map->b_end, uaddr->uaddr_maxaddr);
1359 	before_gap = 0;
1360 	after_gap = 0;
1361 	dir = -1;	/* Opposite of brk() growth. */
1362 
1363 	if (end - start >= sz) {
1364 		if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
1365 		    0, sz, align, offset, dir, start, end - sz,
1366 		    before_gap, after_gap) == 0)
1367 			return 0;
1368 	}
1369 
1370 	/* Set up stack search strategy. */
1371 	start = MAX(map->s_start, uaddr->uaddr_minaddr);
1372 	end = MIN(map->s_end, uaddr->uaddr_maxaddr);
1373 	before_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1374 	after_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1375 #ifdef MACHINE_STACK_GROWS_UP
1376 	dir = -1;
1377 #else
1378 	dir =  1;
1379 #endif
1380 	if (end - start >= before_gap + after_gap &&
1381 	    end - start - before_gap - after_gap >= sz) {
1382 		if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
1383 		    0, sz, align, offset, dir, start, end - sz,
1384 		    before_gap, after_gap) == 0)
1385 		return 0;
1386 	}
1387 
1388 	return ENOMEM;
1389 }
1390 
1391 struct uvm_addr_state *
1392 uaddr_stack_brk_create(vaddr_t minaddr, vaddr_t maxaddr)
1393 {
1394 	struct uvm_addr_state* uaddr;
1395 
1396 	uaddr = pool_get(&uaddr_pool, PR_WAITOK);
1397 	uaddr->uaddr_minaddr = minaddr;
1398 	uaddr->uaddr_maxaddr = maxaddr;
1399 	uaddr->uaddr_functions = &uaddr_stack_brk_functions;
1400 	return uaddr;
1401 }
1402 #endif /* !SMALL_KERNEL */
1403 
1404 
1405 #ifndef SMALL_KERNEL
1406 /*
1407  * Free space comparison.
1408  * Compares smaller free-space before larger free-space.
1409  */
1410 static inline int
1411 uvm_mapent_fspace_cmp(const struct vm_map_entry *e1,
1412     const struct vm_map_entry *e2)
1413 {
1414 	if (e1->fspace != e2->fspace)
1415 		return (e1->fspace < e2->fspace ? -1 : 1);
1416 	return (e1->start < e2->start ? -1 : e1->start > e2->start);
1417 }
1418 
1419 RBT_GENERATE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree,
1420     uvm_mapent_fspace_cmp);
1421 #endif /* !SMALL_KERNEL */
1422