xref: /openbsd-src/sys/uvm/uvm_addr.c (revision 99fd087599a8791921855f21bd7e36130f39aadc)
1 /*	$OpenBSD: uvm_addr.c,v 1.27 2018/05/16 09:02:11 otto Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /* #define DEBUG */
20 
21 #include <sys/param.h>
22 #include <uvm/uvm.h>
23 #include <uvm/uvm_addr.h>
24 #include <sys/pool.h>
25 
26 /* Max gap between hint allocations. */
27 #define UADDR_HINT_MAXGAP	(4 * PAGE_SIZE)
28 /* Number of pivots in pivot allocator. */
29 #define NUM_PIVOTS		16
30 /*
31  * Max number (inclusive) of pages the pivot allocator
32  * will place between allocations.
33  *
34  * The uaddr_pivot_random() function attempts to bias towards
35  * small space between allocations, so putting a large number here is fine.
36  */
37 #define PIVOT_RND		8
38 /*
39  * Number of allocations that a pivot can supply before expiring.
40  * When a pivot expires, a new pivot has to be found.
41  *
42  * Must be at least 1.
43  */
44 #define PIVOT_EXPIRE		1024
45 
46 
47 /* Pool with uvm_addr_state structures. */
48 struct pool uaddr_pool;
49 struct pool uaddr_bestfit_pool;
50 struct pool uaddr_pivot_pool;
51 struct pool uaddr_rnd_pool;
52 
53 /* uvm_addr state for bestfit selector. */
54 struct uaddr_bestfit_state {
55 	struct uvm_addr_state		 ubf_uaddr;
56 	struct uaddr_free_rbtree	 ubf_free;
57 };
58 
59 /* uvm_addr state for rnd selector. */
60 struct uaddr_rnd_state {
61 	struct uvm_addr_state		 ur_uaddr;
62 #if 0
63 	TAILQ_HEAD(, vm_map_entry)	 ur_free;
64 #endif
65 };
66 
67 /* Definition of a pivot in pivot selector. */
68 struct uaddr_pivot {
69 	vaddr_t				 addr;	/* End of prev. allocation. */
70 	int				 expire;/* Best before date. */
71 	int				 dir;	/* Direction. */
72 	struct vm_map_entry		*entry; /* Will contain next alloc. */
73 };
74 /* uvm_addr state for pivot selector. */
75 struct uaddr_pivot_state {
76 	struct uvm_addr_state		 up_uaddr;
77 
78 	/* Free space tree, for fast pivot selection. */
79 	struct uaddr_free_rbtree	 up_free;
80 
81 	/* List of pivots. The pointers point to after the last allocation. */
82 	struct uaddr_pivot		 up_pivots[NUM_PIVOTS];
83 };
84 
85 /* Forward declaration (see below). */
86 extern const struct uvm_addr_functions uaddr_kernel_functions;
87 struct uvm_addr_state uaddr_kbootstrap;
88 
89 /* Support functions. */
90 #ifndef SMALL_KERNEL
91 struct vm_map_entry	*uvm_addr_entrybyspace(struct uaddr_free_rbtree*,
92 			    vsize_t);
93 #endif /* !SMALL_KERNEL */
94 void			 uaddr_kinsert(struct vm_map *,
95 			    struct uvm_addr_state *, struct vm_map_entry *);
96 void			 uaddr_kremove(struct vm_map *,
97 			    struct uvm_addr_state *, struct vm_map_entry *);
98 void			 uaddr_kbootstrapdestroy(struct uvm_addr_state *);
99 
100 void			 uaddr_destroy(struct uvm_addr_state *);
101 void			 uaddr_kbootstrap_destroy(struct uvm_addr_state *);
102 void			 uaddr_rnd_destroy(struct uvm_addr_state *);
103 void			 uaddr_bestfit_destroy(struct uvm_addr_state *);
104 void			 uaddr_pivot_destroy(struct uvm_addr_state *);
105 
106 #if 0
107 int			 uaddr_lin_select(struct vm_map *,
108 			    struct uvm_addr_state *, struct vm_map_entry **,
109 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
110 			    vaddr_t);
111 #endif
112 int			 uaddr_kbootstrap_select(struct vm_map *,
113 			    struct uvm_addr_state *, struct vm_map_entry **,
114 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
115 			    vaddr_t);
116 int			 uaddr_rnd_select(struct vm_map *,
117 			    struct uvm_addr_state *, struct vm_map_entry **,
118 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
119 			    vaddr_t);
120 int			 uaddr_bestfit_select(struct vm_map *,
121 			    struct uvm_addr_state*, struct vm_map_entry **,
122 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
123 			    vaddr_t);
124 #ifndef SMALL_KERNEL
125 int			 uaddr_pivot_select(struct vm_map *,
126 			    struct uvm_addr_state *, struct vm_map_entry **,
127 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
128 			    vaddr_t);
129 int			 uaddr_stack_brk_select(struct vm_map *,
130 			    struct uvm_addr_state *, struct vm_map_entry **,
131 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
132 			    vaddr_t);
133 #endif /* !SMALL_KERNEL */
134 
135 void			 uaddr_rnd_insert(struct vm_map *,
136 			    struct uvm_addr_state *, struct vm_map_entry *);
137 void			 uaddr_rnd_remove(struct vm_map *,
138 			    struct uvm_addr_state *, struct vm_map_entry *);
139 void			 uaddr_bestfit_insert(struct vm_map *,
140 			    struct uvm_addr_state *, struct vm_map_entry *);
141 void			 uaddr_bestfit_remove(struct vm_map *,
142 			    struct uvm_addr_state *, struct vm_map_entry *);
143 void			 uaddr_pivot_insert(struct vm_map *,
144 			    struct uvm_addr_state *, struct vm_map_entry *);
145 void			 uaddr_pivot_remove(struct vm_map *,
146 			    struct uvm_addr_state *, struct vm_map_entry *);
147 
148 #ifndef SMALL_KERNEL
149 vsize_t			 uaddr_pivot_random(void);
150 int			 uaddr_pivot_newpivot(struct vm_map *,
151 			    struct uaddr_pivot_state *, struct uaddr_pivot *,
152 			    struct vm_map_entry **, vaddr_t *,
153 			    vsize_t, vaddr_t, vaddr_t, vsize_t, vsize_t);
154 #endif /* !SMALL_KERNEL */
155 
156 #if defined(DEBUG) || defined(DDB)
157 void			 uaddr_pivot_print(struct uvm_addr_state *, boolean_t,
158 			    int (*)(const char *, ...));
159 #if 0
160 void			 uaddr_rnd_print(struct uvm_addr_state *, boolean_t,
161 			    int (*)(const char *, ...));
162 #endif
163 #endif /* DEBUG || DDB */
164 
165 
166 #ifndef SMALL_KERNEL
167 /*
168  * Find smallest entry in tree that will fit sz bytes.
169  */
170 struct vm_map_entry *
171 uvm_addr_entrybyspace(struct uaddr_free_rbtree *free, vsize_t sz)
172 {
173 	struct vm_map_entry	*tmp, *res;
174 
175 	tmp = RBT_ROOT(uaddr_free_rbtree, free);
176 	res = NULL;
177 	while (tmp) {
178 		if (tmp->fspace >= sz) {
179 			res = tmp;
180 			tmp = RBT_LEFT(uaddr_free_rbtree, tmp);
181 		} else if (tmp->fspace < sz)
182 			tmp = RBT_RIGHT(uaddr_free_rbtree, tmp);
183 	}
184 	return res;
185 }
186 #endif /* !SMALL_KERNEL */
187 
188 static __inline vaddr_t
189 uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset)
190 {
191 	vaddr_t adjusted;
192 
193 	KASSERT(offset < align || (align == 0 && offset == 0));
194 	KASSERT((align & (align - 1)) == 0);
195 	KASSERT((offset & PAGE_MASK) == 0);
196 
197 	align = MAX(align, PAGE_SIZE);
198 	adjusted = addr & ~(align - 1);
199 	adjusted += offset;
200 	return (adjusted < addr ? adjusted + align : adjusted);
201 }
202 
203 static __inline vaddr_t
204 uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset)
205 {
206 	vaddr_t adjusted;
207 
208 	KASSERT(offset < align || (align == 0 && offset == 0));
209 	KASSERT((align & (align - 1)) == 0);
210 	KASSERT((offset & PAGE_MASK) == 0);
211 
212 	align = MAX(align, PAGE_SIZE);
213 	adjusted = addr & ~(align - 1);
214 	adjusted += offset;
215 	return (adjusted > addr ? adjusted - align : adjusted);
216 }
217 
218 /*
219  * Try to fit the requested space into the entry.
220  */
221 int
222 uvm_addr_fitspace(vaddr_t *min_result, vaddr_t *max_result,
223     vaddr_t low_addr, vaddr_t high_addr, vsize_t sz,
224     vaddr_t align, vaddr_t offset,
225     vsize_t before_gap, vsize_t after_gap)
226 {
227 	vaddr_t	tmp;
228 	vsize_t	fspace;
229 
230 	if (low_addr > high_addr)
231 		return ENOMEM;
232 	fspace = high_addr - low_addr;
233 	if (fspace < before_gap + after_gap)
234 		return ENOMEM;
235 	if (fspace - before_gap - after_gap < sz)
236 		return ENOMEM;
237 
238 	/* Calculate lowest address. */
239 	low_addr += before_gap;
240 	low_addr = uvm_addr_align_forward(tmp = low_addr, align, offset);
241 	if (low_addr < tmp)	/* Overflow during alignment. */
242 		return ENOMEM;
243 	if (high_addr - after_gap - sz < low_addr)
244 		return ENOMEM;
245 
246 	/* Calculate highest address. */
247 	high_addr -= after_gap + sz;
248 	high_addr = uvm_addr_align_backward(tmp = high_addr, align, offset);
249 	if (high_addr > tmp)	/* Overflow during alignment. */
250 		return ENOMEM;
251 	if (low_addr > high_addr)
252 		return ENOMEM;
253 
254 	*min_result = low_addr;
255 	*max_result = high_addr;
256 	return 0;
257 }
258 
259 
260 /*
261  * Initialize uvm_addr.
262  */
263 void
264 uvm_addr_init(void)
265 {
266 	pool_init(&uaddr_pool, sizeof(struct uvm_addr_state), 0,
267 	    IPL_VM, PR_WAITOK, "uaddr", NULL);
268 	pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state), 0,
269 	    IPL_VM, PR_WAITOK, "uaddrbest", NULL);
270 	pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state), 0,
271 	    IPL_VM, PR_WAITOK, "uaddrpivot", NULL);
272 	pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state), 0,
273 	    IPL_VM, PR_WAITOK, "uaddrrnd", NULL);
274 
275 	uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE;
276 	uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE;
277 	uaddr_kbootstrap.uaddr_functions = &uaddr_kernel_functions;
278 }
279 
280 /*
281  * Invoke destructor function of uaddr.
282  */
283 void
284 uvm_addr_destroy(struct uvm_addr_state *uaddr)
285 {
286 	if (uaddr)
287 		(*uaddr->uaddr_functions->uaddr_destroy)(uaddr);
288 }
289 
290 /*
291  * Move address forward to satisfy align, offset.
292  */
293 vaddr_t
294 uvm_addr_align(vaddr_t addr, vaddr_t align, vaddr_t offset)
295 {
296 	vaddr_t result = (addr & ~(align - 1)) + offset;
297 	if (result < addr)
298 		result += align;
299 	return result;
300 }
301 
302 /*
303  * Move address backwards to satisfy align, offset.
304  */
305 vaddr_t
306 uvm_addr_align_back(vaddr_t addr, vaddr_t align, vaddr_t offset)
307 {
308 	vaddr_t result = (addr & ~(align - 1)) + offset;
309 	if (result > addr)
310 		result -= align;
311 	return result;
312 }
313 
314 /*
315  * Directional first fit.
316  *
317  * Do a linear search for free space, starting at addr in entry.
318  * direction ==  1: search forward
319  * direction == -1: search backward
320  *
321  * Output: low <= addr <= high and entry will contain addr.
322  * 0 will be returned if no space is available.
323  *
324  * gap describes the space that must appear between the preceding entry.
325  */
326 int
327 uvm_addr_linsearch(struct vm_map *map, struct uvm_addr_state *uaddr,
328     struct vm_map_entry **entry_out, vaddr_t *addr_out,
329     vaddr_t hint, vsize_t sz, vaddr_t align, vaddr_t offset,
330     int direction, vaddr_t low, vaddr_t high,
331     vsize_t before_gap, vsize_t after_gap)
332 {
333 	struct vm_map_entry	*entry;
334 	vaddr_t			 low_addr, high_addr;
335 
336 	KASSERT(entry_out != NULL && addr_out != NULL);
337 	KASSERT(direction == -1 || direction == 1);
338 	KASSERT((hint & PAGE_MASK) == 0 && (high & PAGE_MASK) == 0 &&
339 	    (low & PAGE_MASK) == 0 &&
340 	    (before_gap & PAGE_MASK) == 0 && (after_gap & PAGE_MASK) == 0);
341 	KASSERT(high + sz > high); /* Check for overflow. */
342 
343 	/* Hint magic. */
344 	if (hint == 0)
345 		hint = (direction == 1 ? low : high);
346 	else if (hint > high) {
347 		if (direction != -1)
348 			return ENOMEM;
349 		hint = high;
350 	} else if (hint < low) {
351 		if (direction != 1)
352 			return ENOMEM;
353 		hint = low;
354 	}
355 
356 	for (entry = uvm_map_entrybyaddr(&map->addr,
357 	    hint - (direction == -1 ? 1 : 0)); entry != NULL;
358 	    entry = (direction == 1 ?
359 	    RBT_NEXT(uvm_map_addr, entry) :
360 	    RBT_PREV(uvm_map_addr, entry))) {
361 		if ((direction == 1 && VMMAP_FREE_START(entry) > high) ||
362 		    (direction == -1 && VMMAP_FREE_END(entry) < low)) {
363 			break;
364 		}
365 
366 		if (uvm_addr_fitspace(&low_addr, &high_addr,
367 		    MAX(low, VMMAP_FREE_START(entry)),
368 		    MIN(high, VMMAP_FREE_END(entry)),
369 		    sz, align, offset, before_gap, after_gap) == 0) {
370 			*entry_out = entry;
371 			if (hint >= low_addr && hint <= high_addr) {
372 				*addr_out = hint;
373 			} else {
374 				*addr_out = (direction == 1 ?
375 				    low_addr : high_addr);
376 			}
377 			return 0;
378 		}
379 	}
380 
381 	return ENOMEM;
382 }
383 
384 /*
385  * Invoke address selector of uaddr.
386  * uaddr may be NULL, in which case the algorithm will fail with ENOMEM.
387  *
388  * Will invoke uvm_addr_isavail to fill in last_out.
389  */
390 int
391 uvm_addr_invoke(struct vm_map *map, struct uvm_addr_state *uaddr,
392     struct vm_map_entry **entry_out, struct vm_map_entry **last_out,
393     vaddr_t *addr_out,
394     vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
395 {
396 	int error;
397 
398 	if (uaddr == NULL)
399 		return ENOMEM;
400 
401 	hint &= ~((vaddr_t)PAGE_MASK);
402 	if (hint != 0 &&
403 	    !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr))
404 		return ENOMEM;
405 
406 	error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr,
407 	    entry_out, addr_out, sz, align, offset, prot, hint);
408 
409 	if (error == 0) {
410 		KASSERT(*entry_out != NULL);
411 		*last_out = NULL;
412 		if (!uvm_map_isavail(map, uaddr, entry_out, last_out,
413 		    *addr_out, sz)) {
414 			panic("uvm_addr_invoke: address selector %p "
415 			    "(%s 0x%lx-0x%lx) "
416 			    "returned unavailable address 0x%lx sz 0x%lx",
417 			    uaddr, uaddr->uaddr_functions->uaddr_name,
418 			    uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr,
419 			    *addr_out, sz);
420 		}
421 	}
422 
423 	return error;
424 }
425 
426 #if defined(DEBUG) || defined(DDB)
427 void
428 uvm_addr_print(struct uvm_addr_state *uaddr, const char *slot, boolean_t full,
429     int (*pr)(const char *, ...))
430 {
431 	if (uaddr == NULL) {
432 		(*pr)("- uvm_addr %s: NULL\n", slot);
433 		return;
434 	}
435 
436 	(*pr)("- uvm_addr %s: %p (%s 0x%lx-0x%lx)\n", slot, uaddr,
437 	    uaddr->uaddr_functions->uaddr_name,
438 	    uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr);
439 	if (uaddr->uaddr_functions->uaddr_print == NULL)
440 		return;
441 
442 	(*uaddr->uaddr_functions->uaddr_print)(uaddr, full, pr);
443 }
444 #endif /* DEBUG || DDB */
445 
446 /*
447  * Destroy a uvm_addr_state structure.
448  * The uaddr must have been previously allocated from uaddr_state_pool.
449  */
450 void
451 uaddr_destroy(struct uvm_addr_state *uaddr)
452 {
453 	pool_put(&uaddr_pool, uaddr);
454 }
455 
456 
457 #if 0
458 /*
459  * Linear allocator.
460  * This allocator uses a first-fit algorithm.
461  *
462  * If hint is set, search will start at the hint position.
463  * Only searches forward.
464  */
465 const struct uvm_addr_functions uaddr_lin_functions = {
466 	.uaddr_select = &uaddr_lin_select,
467 	.uaddr_destroy = &uaddr_destroy,
468 	.uaddr_name = "uaddr_lin"
469 };
470 
471 struct uvm_addr_state *
472 uaddr_lin_create(vaddr_t minaddr, vaddr_t maxaddr)
473 {
474 	struct uvm_addr_state *uaddr;
475 
476 	uaddr = pool_get(&uaddr_pool, PR_WAITOK);
477 	uaddr->uaddr_minaddr = minaddr;
478 	uaddr->uaddr_maxaddr = maxaddr;
479 	uaddr->uaddr_functions = &uaddr_lin_functions;
480 	return uaddr;
481 }
482 
483 int
484 uaddr_lin_select(struct vm_map *map, struct uvm_addr_state *uaddr,
485     struct vm_map_entry **entry_out, vaddr_t *addr_out,
486     vsize_t sz, vaddr_t align, vaddr_t offset,
487     vm_prot_t prot, vaddr_t hint)
488 {
489 	vaddr_t guard_sz;
490 
491 	/* Deal with guardpages: search for space with one extra page. */
492 	guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
493 
494 	if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr - guard_sz < sz)
495 		return ENOMEM;
496 	return uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 0, sz,
497 	    align, offset, 1, uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr - sz,
498 	    0, guard_sz);
499 }
500 #endif
501 
502 /*
503  * Randomized allocator.
504  * This allocator use uvm_map_hint to acquire a random address and searches
505  * from there.
506  */
507 
508 const struct uvm_addr_functions uaddr_rnd_functions = {
509 	.uaddr_select = &uaddr_rnd_select,
510 	.uaddr_free_insert = &uaddr_rnd_insert,
511 	.uaddr_free_remove = &uaddr_rnd_remove,
512 	.uaddr_destroy = &uaddr_rnd_destroy,
513 #if defined(DEBUG) || defined(DDB)
514 #if 0
515 	.uaddr_print = &uaddr_rnd_print,
516 #endif
517 #endif /* DEBUG || DDB */
518 	.uaddr_name = "uaddr_rnd"
519 };
520 
521 struct uvm_addr_state *
522 uaddr_rnd_create(vaddr_t minaddr, vaddr_t maxaddr)
523 {
524 	struct uaddr_rnd_state *uaddr;
525 
526 	uaddr = pool_get(&uaddr_rnd_pool, PR_WAITOK);
527 	uaddr->ur_uaddr.uaddr_minaddr = minaddr;
528 	uaddr->ur_uaddr.uaddr_maxaddr = maxaddr;
529 	uaddr->ur_uaddr.uaddr_functions = &uaddr_rnd_functions;
530 #if 0
531 	TAILQ_INIT(&uaddr->ur_free);
532 #endif
533 	return &uaddr->ur_uaddr;
534 }
535 
536 int
537 uaddr_rnd_select(struct vm_map *map, struct uvm_addr_state *uaddr,
538     struct vm_map_entry **entry_out, vaddr_t *addr_out,
539     vsize_t sz, vaddr_t align, vaddr_t offset,
540     vm_prot_t prot, vaddr_t hint)
541 {
542 	struct vmspace		*vm;
543 	vaddr_t			 minaddr, maxaddr;
544 	vaddr_t			 guard_sz;
545 	vaddr_t			 low_addr, high_addr;
546 	struct vm_map_entry	*entry, *next;
547 	vsize_t			 before_gap, after_gap;
548 	vaddr_t			 tmp;
549 
550 	KASSERT((map->flags & VM_MAP_ISVMSPACE) != 0);
551 	vm = (struct vmspace *)map;
552 
553 	/* Deal with guardpages: search for space with one extra page. */
554 	guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
555 
556 	if (uaddr->uaddr_maxaddr - guard_sz < sz)
557 		return ENOMEM;
558 	minaddr = uvm_addr_align_forward(uaddr->uaddr_minaddr, align, offset);
559 	maxaddr = uvm_addr_align_backward(uaddr->uaddr_maxaddr - sz - guard_sz,
560 	    align, offset);
561 
562 	/* Quick fail if the allocation won't fit. */
563 	if (minaddr >= maxaddr)
564 		return ENOMEM;
565 
566 	/* Select a hint. */
567 	if (hint == 0)
568 		hint = uvm_map_hint(vm, prot, minaddr, maxaddr);
569 	/* Clamp hint to uaddr range. */
570 	hint = MIN(MAX(hint, minaddr), maxaddr);
571 
572 	/* Align hint to align,offset parameters. */
573 	tmp = hint;
574 	hint = uvm_addr_align_forward(tmp, align, offset);
575 	/* Check for overflow during alignment. */
576 	if (hint < tmp || hint > maxaddr)
577 		return ENOMEM; /* Compatibility mode: never look backwards. */
578 
579 	before_gap = 0;
580 	after_gap = guard_sz;
581 	hint -= MIN(hint, before_gap);
582 
583 	/*
584 	 * Use the augmented address tree to look up the first entry
585 	 * at or after hint with sufficient space.
586 	 *
587 	 * This code is the original optimized code, but will fail if the
588 	 * subtree it looks at does have sufficient space, but fails to meet
589 	 * the align constraint.
590 	 *
591 	 * Guard: subtree is not exhausted and max(fspace) >= required.
592 	 */
593 	entry = uvm_map_entrybyaddr(&map->addr, hint);
594 
595 	/* Walk up the tree, until there is at least sufficient space. */
596 	while (entry != NULL &&
597 	    entry->fspace_augment < before_gap + after_gap + sz)
598 		entry = RBT_PARENT(uvm_map_addr, entry);
599 
600 	while (entry != NULL) {
601 		/* Test if this fits. */
602 		if (VMMAP_FREE_END(entry) > hint &&
603 		    uvm_map_uaddr_e(map, entry) == uaddr &&
604 		    uvm_addr_fitspace(&low_addr, &high_addr,
605 		    MAX(uaddr->uaddr_minaddr, VMMAP_FREE_START(entry)),
606 		    MIN(uaddr->uaddr_maxaddr, VMMAP_FREE_END(entry)),
607 		    sz, align, offset, before_gap, after_gap) == 0) {
608 			*entry_out = entry;
609 			if (hint >= low_addr && hint <= high_addr)
610 				*addr_out = hint;
611 			else
612 				*addr_out = low_addr;
613 			return 0;
614 		}
615 
616 		/* RBT_NEXT, but skip subtrees that cannot possible fit. */
617 		next = RBT_RIGHT(uvm_map_addr, entry);
618 		if (next != NULL &&
619 		    next->fspace_augment >= before_gap + after_gap + sz) {
620 			entry = next;
621 			while ((next = RBT_LEFT(uvm_map_addr, entry)) !=
622 			    NULL)
623 				entry = next;
624 		} else {
625 do_parent:
626 			next = RBT_PARENT(uvm_map_addr, entry);
627 			if (next == NULL)
628 				entry = NULL;
629 			else if (RBT_LEFT(uvm_map_addr, next) == entry)
630 				entry = next;
631 			else {
632 				entry = next;
633 				goto do_parent;
634 			}
635 		}
636 	}
637 
638 	/* Lookup failed. */
639 	return ENOMEM;
640 }
641 
642 /*
643  * Destroy a uaddr_rnd_state structure.
644  */
645 void
646 uaddr_rnd_destroy(struct uvm_addr_state *uaddr)
647 {
648 	pool_put(&uaddr_rnd_pool, uaddr);
649 }
650 
651 /*
652  * Add entry to tailq.
653  */
654 void
655 uaddr_rnd_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
656     struct vm_map_entry *entry)
657 {
658 	return;
659 }
660 
661 /*
662  * Remove entry from tailq.
663  */
664 void
665 uaddr_rnd_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
666     struct vm_map_entry *entry)
667 {
668 	return;
669 }
670 
671 #if 0
672 #if defined(DEBUG) || defined(DDB)
673 void
674 uaddr_rnd_print(struct uvm_addr_state *uaddr_p, boolean_t full,
675     int (*pr)(const char*, ...))
676 {
677 	struct vm_map_entry	*entry;
678 	struct uaddr_rnd_state	*uaddr;
679 	vaddr_t			 addr;
680 	size_t			 count;
681 	vsize_t			 space;
682 
683 	uaddr = (struct uaddr_rnd_state *)uaddr_p;
684 	addr = 0;
685 	count = 0;
686 	space = 0;
687 	TAILQ_FOREACH(entry, &uaddr->ur_free, dfree.tailq) {
688 		count++;
689 		space += entry->fspace;
690 
691 		if (full) {
692 			(*pr)("\tentry %p: 0x%lx-0x%lx G=0x%lx F=0x%lx\n",
693 			    entry, entry->start, entry->end,
694 			    entry->guard, entry->fspace);
695 			(*pr)("\t\tfree: 0x%lx-0x%lx\n",
696 			    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
697 		}
698 		if (entry->start < addr) {
699 			if (!full)
700 				(*pr)("\tentry %p: 0x%lx-0x%lx "
701 				    "G=0x%lx F=0x%lx\n",
702 				    entry, entry->start, entry->end,
703 				    entry->guard, entry->fspace);
704 			(*pr)("\t\tstart=0x%lx, expected at least 0x%lx\n",
705 			    entry->start, addr);
706 		}
707 
708 		addr = VMMAP_FREE_END(entry);
709 	}
710 	(*pr)("\t0x%lu entries, 0x%lx free bytes\n", count, space);
711 }
712 #endif /* DEBUG || DDB */
713 #endif
714 
715 /*
716  * Kernel allocation bootstrap logic.
717  */
718 const struct uvm_addr_functions uaddr_kernel_functions = {
719 	.uaddr_select = &uaddr_kbootstrap_select,
720 	.uaddr_destroy = &uaddr_kbootstrap_destroy,
721 	.uaddr_name = "uaddr_kbootstrap"
722 };
723 
724 /*
725  * Select an address from the map.
726  *
727  * This function ignores the uaddr spec and instead uses the map directly.
728  * Because of that property, the uaddr algorithm can be shared across all
729  * kernel maps.
730  */
731 int
732 uaddr_kbootstrap_select(struct vm_map *map, struct uvm_addr_state *uaddr,
733     struct vm_map_entry **entry_out, vaddr_t *addr_out,
734     vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
735 {
736 	vaddr_t tmp;
737 
738 	RBT_FOREACH(*entry_out, uvm_map_addr, &map->addr) {
739 		if (VMMAP_FREE_END(*entry_out) <= uvm_maxkaddr &&
740 		    uvm_addr_fitspace(addr_out, &tmp,
741 		    VMMAP_FREE_START(*entry_out), VMMAP_FREE_END(*entry_out),
742 		    sz, align, offset, 0, 0) == 0)
743 			return 0;
744 	}
745 
746 	return ENOMEM;
747 }
748 
749 /*
750  * Don't destroy the kernel bootstrap allocator.
751  */
752 void
753 uaddr_kbootstrap_destroy(struct uvm_addr_state *uaddr)
754 {
755 	KASSERT(uaddr == (struct uvm_addr_state *)&uaddr_kbootstrap);
756 }
757 
758 #ifndef SMALL_KERNEL
759 /*
760  * Best fit algorithm.
761  */
762 
763 const struct uvm_addr_functions uaddr_bestfit_functions = {
764 	.uaddr_select = &uaddr_bestfit_select,
765 	.uaddr_free_insert = &uaddr_bestfit_insert,
766 	.uaddr_free_remove = &uaddr_bestfit_remove,
767 	.uaddr_destroy = &uaddr_bestfit_destroy,
768 	.uaddr_name = "uaddr_bestfit"
769 };
770 
771 struct uvm_addr_state *
772 uaddr_bestfit_create(vaddr_t minaddr, vaddr_t maxaddr)
773 {
774 	struct uaddr_bestfit_state *uaddr;
775 
776 	uaddr = pool_get(&uaddr_bestfit_pool, PR_WAITOK);
777 	uaddr->ubf_uaddr.uaddr_minaddr = minaddr;
778 	uaddr->ubf_uaddr.uaddr_maxaddr = maxaddr;
779 	uaddr->ubf_uaddr.uaddr_functions = &uaddr_bestfit_functions;
780 	RBT_INIT(uaddr_free_rbtree, &uaddr->ubf_free);
781 	return &uaddr->ubf_uaddr;
782 }
783 
784 void
785 uaddr_bestfit_destroy(struct uvm_addr_state *uaddr)
786 {
787 	pool_put(&uaddr_bestfit_pool, uaddr);
788 }
789 
790 void
791 uaddr_bestfit_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
792     struct vm_map_entry *entry)
793 {
794 	struct uaddr_bestfit_state	*uaddr;
795 	struct vm_map_entry		*rb_rv;
796 
797 	uaddr = (struct uaddr_bestfit_state *)uaddr_p;
798 	if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->ubf_free, entry)) !=
799 	    NULL) {
800 		panic("%s: duplicate insertion: state %p "
801 		    "interting %p, colliding with %p", __func__,
802 		    uaddr, entry, rb_rv);
803 	}
804 }
805 
806 void
807 uaddr_bestfit_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
808     struct vm_map_entry *entry)
809 {
810 	struct uaddr_bestfit_state	*uaddr;
811 
812 	uaddr = (struct uaddr_bestfit_state *)uaddr_p;
813 	if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->ubf_free, entry) != entry)
814 		panic("%s: entry was not in tree", __func__);
815 }
816 
817 int
818 uaddr_bestfit_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
819     struct vm_map_entry **entry_out, vaddr_t *addr_out,
820     vsize_t sz, vaddr_t align, vaddr_t offset,
821     vm_prot_t prot, vaddr_t hint)
822 {
823 	vaddr_t				 min, max;
824 	struct uaddr_bestfit_state	*uaddr;
825 	struct vm_map_entry		*entry;
826 	vsize_t				 guardsz;
827 
828 	uaddr = (struct uaddr_bestfit_state *)uaddr_p;
829 	guardsz = ((map->flags & VM_MAP_GUARDPAGES) ? PAGE_SIZE : 0);
830 	if (sz + guardsz < sz)
831 		return ENOMEM;
832 
833 	/*
834 	 * Find smallest item on freelist capable of holding item.
835 	 * Deal with guardpages: search for space with one extra page.
836 	 */
837 	entry = uvm_addr_entrybyspace(&uaddr->ubf_free, sz + guardsz);
838 	if (entry == NULL)
839 		return ENOMEM;
840 
841 	/* Walk the tree until we find an entry that fits.  */
842 	while (uvm_addr_fitspace(&min, &max,
843 	    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
844 	    sz, align, offset, 0, guardsz) != 0) {
845 		entry = RBT_NEXT(uaddr_free_rbtree, entry);
846 		if (entry == NULL)
847 			return ENOMEM;
848 	}
849 
850 	/* Return the address that generates the least fragmentation. */
851 	*entry_out = entry;
852 	*addr_out = (min - VMMAP_FREE_START(entry) <=
853 	    VMMAP_FREE_END(entry) - guardsz - sz - max ?
854 	    min : max);
855 	return 0;
856 }
857 #endif /* !SMALL_KERNEL */
858 
859 
860 #ifndef SMALL_KERNEL
861 /*
862  * A userspace allocator based on pivots.
863  */
864 
865 const struct uvm_addr_functions uaddr_pivot_functions = {
866 	.uaddr_select = &uaddr_pivot_select,
867 	.uaddr_free_insert = &uaddr_pivot_insert,
868 	.uaddr_free_remove = &uaddr_pivot_remove,
869 	.uaddr_destroy = &uaddr_pivot_destroy,
870 #if defined(DEBUG) || defined(DDB)
871 	.uaddr_print = &uaddr_pivot_print,
872 #endif /* DEBUG || DDB */
873 	.uaddr_name = "uaddr_pivot"
874 };
875 
876 /*
877  * A special random function for pivots.
878  *
879  * This function will return:
880  * - a random number
881  * - a multiple of PAGE_SIZE
882  * - at least PAGE_SIZE
883  *
884  * The random function has a slightly higher change to return a small number.
885  */
886 vsize_t
887 uaddr_pivot_random(void)
888 {
889 	int			r;
890 
891 	/*
892 	 * The sum of two six-sided dice will have a normal distribution.
893 	 * We map the highest probable number to 1, by folding the curve
894 	 * (think of a graph on a piece of paper, that you fold).
895 	 *
896 	 * Because the fold happens at PIVOT_RND - 1, the numbers 0 and 1
897 	 * have the same and highest probability of happening.
898 	 */
899 	r = arc4random_uniform(PIVOT_RND) + arc4random_uniform(PIVOT_RND) -
900 	    (PIVOT_RND - 1);
901 	if (r < 0)
902 		r = -r;
903 
904 	/*
905 	 * Make the returned value at least PAGE_SIZE and a multiple of
906 	 * PAGE_SIZE.
907 	 */
908 	return (vaddr_t)(1 + r) << PAGE_SHIFT;
909 }
910 
911 /*
912  * Select a new pivot.
913  *
914  * A pivot must:
915  * - be chosen random
916  * - have a randomly chosen gap before it, where the uaddr_state starts
917  * - have a randomly chosen gap after it, before the uaddr_state ends
918  *
919  * Furthermore, the pivot must provide sufficient space for the allocation.
920  * The addr will be set to the selected address.
921  *
922  * Returns ENOMEM on failure.
923  */
924 int
925 uaddr_pivot_newpivot(struct vm_map *map, struct uaddr_pivot_state *uaddr,
926     struct uaddr_pivot *pivot,
927     struct vm_map_entry **entry_out, vaddr_t *addr_out,
928     vsize_t sz, vaddr_t align, vaddr_t offset,
929     vsize_t before_gap, vsize_t after_gap)
930 {
931 	struct vm_map_entry		*entry, *found;
932 	vaddr_t				 minaddr, maxaddr;
933 	vsize_t				 dist;
934 	vaddr_t				 found_minaddr, found_maxaddr;
935 	vaddr_t				 min, max;
936 	vsize_t				 arc4_arg;
937 	int				 fit_error;
938 	u_int32_t			 path;
939 
940 	minaddr = uaddr->up_uaddr.uaddr_minaddr;
941 	maxaddr = uaddr->up_uaddr.uaddr_maxaddr;
942 	KASSERT(minaddr < maxaddr);
943 #ifdef DIAGNOSTIC
944 	if (minaddr + 2 * PAGE_SIZE > maxaddr) {
945 		panic("uaddr_pivot_newpivot: cannot grant random pivot "
946 		    "in area less than 2 pages (size = 0x%lx)",
947 		    maxaddr - minaddr);
948 	}
949 #endif /* DIAGNOSTIC */
950 
951 	/*
952 	 * Gap calculation: 1/32 of the size of the managed area.
953 	 *
954 	 * At most: sufficient to not get truncated at arc4random.
955 	 * At least: 2 PAGE_SIZE
956 	 *
957 	 * minaddr and maxaddr will be changed according to arc4random.
958 	 */
959 	dist = MAX((maxaddr - minaddr) / 32, 2 * (vaddr_t)PAGE_SIZE);
960 	if (dist >> PAGE_SHIFT > 0xffffffff) {
961 		minaddr += (vsize_t)arc4random() << PAGE_SHIFT;
962 		maxaddr -= (vsize_t)arc4random() << PAGE_SHIFT;
963 	} else {
964 		minaddr += (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
965 		    PAGE_SHIFT;
966 		maxaddr -= (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
967 		    PAGE_SHIFT;
968 	}
969 
970 	/*
971 	 * A very fast way to find an entry that will be large enough
972 	 * to hold the allocation, but still is found more or less
973 	 * randomly: the tree path selector has a 50% chance to go for
974 	 * a bigger or smaller entry.
975 	 *
976 	 * Note that the memory may actually be available,
977 	 * but the fragmentation may be so bad and the gaps chosen
978 	 * so unfortunately, that the allocation will not succeed.
979 	 * Or the alignment can only be satisfied by an entry that
980 	 * is not visited in the randomly selected path.
981 	 *
982 	 * This code finds an entry with sufficient space in O(log n) time.
983 	 */
984 	path = arc4random();
985 	found = NULL;
986 	entry = RBT_ROOT(uaddr_free_rbtree, &uaddr->up_free);
987 	while (entry != NULL) {
988 		fit_error = uvm_addr_fitspace(&min, &max,
989 		    MAX(VMMAP_FREE_START(entry), minaddr),
990 		    MIN(VMMAP_FREE_END(entry), maxaddr),
991 		    sz, align, offset, before_gap, after_gap);
992 
993 		/* It fits, save this entry. */
994 		if (fit_error == 0) {
995 			found = entry;
996 			found_minaddr = min;
997 			found_maxaddr = max;
998 		}
999 
1000 		/* Next. */
1001 		if (fit_error != 0)
1002 			entry = RBT_RIGHT(uaddr_free_rbtree, entry);
1003 		else if	((path & 0x1) == 0) {
1004 			path >>= 1;
1005 			entry = RBT_RIGHT(uaddr_free_rbtree, entry);
1006 		} else {
1007 			path >>= 1;
1008 			entry = RBT_LEFT(uaddr_free_rbtree, entry);
1009 		}
1010 	}
1011 	if (found == NULL)
1012 		return ENOMEM;	/* Not found a large enough region. */
1013 
1014 	/*
1015 	 * Calculate a random address within found.
1016 	 *
1017 	 * found_minaddr and found_maxaddr are already aligned, so be sure
1018 	 * to select a multiple of align as the offset in the entry.
1019 	 * Preferably, arc4random_uniform is used to provide no bias within
1020 	 * the entry.
1021 	 * However if the size of the entry exceeds arc4random_uniforms
1022 	 * argument limit, we simply use arc4random (thus limiting ourselves
1023 	 * to 4G * PAGE_SIZE bytes offset).
1024 	 */
1025 	if (found_maxaddr == found_minaddr)
1026 		*addr_out = found_minaddr;
1027 	else {
1028 		KASSERT(align >= PAGE_SIZE && (align & (align - 1)) == 0);
1029 		arc4_arg = found_maxaddr - found_minaddr;
1030 		if (arc4_arg > 0xffffffff) {
1031 			*addr_out = found_minaddr +
1032 			    (arc4random() & ~(align - 1));
1033 		} else {
1034 			*addr_out = found_minaddr +
1035 			    (arc4random_uniform(arc4_arg) & ~(align - 1));
1036 		}
1037 	}
1038 	/* Address was found in this entry. */
1039 	*entry_out = found;
1040 
1041 	/*
1042 	 * Set up new pivot and return selected address.
1043 	 *
1044 	 * Depending on the direction of the pivot, the pivot must be placed
1045 	 * at the bottom or the top of the allocation:
1046 	 * - if the pivot moves upwards, place the pivot at the top of the
1047 	 *   allocation,
1048 	 * - if the pivot moves downwards, place the pivot at the bottom
1049 	 *   of the allocation.
1050 	 */
1051 	pivot->entry = found;
1052 	pivot->dir = (arc4random() & 0x1 ? 1 : -1);
1053 	if (pivot->dir > 0)
1054 		pivot->addr = *addr_out + sz;
1055 	else
1056 		pivot->addr = *addr_out;
1057 	pivot->expire = PIVOT_EXPIRE - 1; /* First use is right now. */
1058 	return 0;
1059 }
1060 
1061 /*
1062  * Pivot selector.
1063  *
1064  * Each time the selector is invoked, it will select a random pivot, which
1065  * it will use to select memory with. The memory will be placed at the pivot,
1066  * with a randomly sized gap between the allocation and the pivot.
1067  * The pivot will then move so it will never revisit this address.
1068  *
1069  * Each allocation, the pivot expiry timer ticks. Once the pivot becomes
1070  * expired, it will be replaced with a newly created pivot. Pivots also
1071  * automatically expire if they fail to provide memory for an allocation.
1072  *
1073  * Expired pivots are replaced using the uaddr_pivot_newpivot() function,
1074  * which will ensure the pivot points at memory in such a way that the
1075  * allocation will succeed.
1076  * As an added bonus, the uaddr_pivot_newpivot() function will perform the
1077  * allocation immediately and move the pivot as appropriate.
1078  *
1079  * If uaddr_pivot_newpivot() fails to find a new pivot that will allow the
1080  * allocation to succeed, it will not create a new pivot and the allocation
1081  * will fail.
1082  *
1083  * A pivot running into used memory will automatically expire (because it will
1084  * fail to allocate).
1085  *
1086  * Characteristics of the allocator:
1087  * - best case, an allocation is O(log N)
1088  *   (it would be O(1), if it werent for the need to check if the memory is
1089  *   free; although that can be avoided...)
1090  * - worst case, an allocation is O(log N)
1091  *   (the uaddr_pivot_newpivot() function has that complexity)
1092  * - failed allocations always take O(log N)
1093  *   (the uaddr_pivot_newpivot() function will walk that deep into the tree).
1094  */
1095 int
1096 uaddr_pivot_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1097     struct vm_map_entry **entry_out, vaddr_t *addr_out,
1098     vsize_t sz, vaddr_t align, vaddr_t offset,
1099     vm_prot_t prot, vaddr_t hint)
1100 {
1101 	struct uaddr_pivot_state	*uaddr;
1102 	struct vm_map_entry		*entry;
1103 	struct uaddr_pivot		*pivot;
1104 	vaddr_t				 min, max;
1105 	vsize_t				 before_gap, after_gap;
1106 	int				 err;
1107 
1108 	/*
1109 	 * When we have a hint, use the rnd allocator that finds the
1110 	 * area that is closest to the hint, if there is such an area.
1111 	 */
1112 	if (hint != 0) {
1113 		if (uaddr_rnd_select(map, uaddr_p, entry_out, addr_out,
1114 		    sz, align, offset, prot, hint) == 0)
1115 			return 0;
1116 		return ENOMEM;
1117 	}
1118 
1119 	/*
1120 	 * Select a random pivot and a random gap sizes around the allocation.
1121 	 */
1122 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1123 	pivot = &uaddr->up_pivots[
1124 	    arc4random_uniform(nitems(uaddr->up_pivots))];
1125 	before_gap = uaddr_pivot_random();
1126 	after_gap = uaddr_pivot_random();
1127 	if (pivot->addr == 0 || pivot->entry == NULL || pivot->expire == 0)
1128 		goto expired;	/* Pivot is invalid (null or expired). */
1129 
1130 	/* Attempt to use the pivot to map the entry. */
1131 	entry = pivot->entry;
1132 	if (pivot->dir > 0) {
1133 		if (uvm_addr_fitspace(&min, &max,
1134 		    MAX(VMMAP_FREE_START(entry), pivot->addr),
1135 		    VMMAP_FREE_END(entry), sz, align, offset,
1136 		    before_gap, after_gap) == 0) {
1137 			*addr_out = min;
1138 			*entry_out = entry;
1139 			pivot->addr = min + sz;
1140 			pivot->expire--;
1141 			return 0;
1142 		}
1143 	} else {
1144 		if (uvm_addr_fitspace(&min, &max,
1145 		    VMMAP_FREE_START(entry),
1146 		    MIN(VMMAP_FREE_END(entry), pivot->addr),
1147 		    sz, align, offset, before_gap, after_gap) == 0) {
1148 			*addr_out = max;
1149 			*entry_out = entry;
1150 			pivot->addr = max;
1151 			pivot->expire--;
1152 			return 0;
1153 		}
1154 	}
1155 
1156 expired:
1157 	/*
1158 	 * Pivot expired or allocation failed.
1159 	 * Use pivot selector to do the allocation and find a new pivot.
1160 	 */
1161 	err = uaddr_pivot_newpivot(map, uaddr, pivot, entry_out, addr_out,
1162 	    sz, align, offset, before_gap, after_gap);
1163 	return err;
1164 }
1165 
1166 /*
1167  * Free the pivot.
1168  */
1169 void
1170 uaddr_pivot_destroy(struct uvm_addr_state *uaddr)
1171 {
1172 	pool_put(&uaddr_pivot_pool, uaddr);
1173 }
1174 
1175 /*
1176  * Insert an entry with free space in the space tree.
1177  */
1178 void
1179 uaddr_pivot_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1180     struct vm_map_entry *entry)
1181 {
1182 	struct uaddr_pivot_state	*uaddr;
1183 	struct vm_map_entry		*rb_rv;
1184 	struct uaddr_pivot		*p;
1185 	vaddr_t				 check_addr;
1186 	vaddr_t				 start, end;
1187 
1188 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1189 	if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->up_free, entry)) !=
1190 	    NULL) {
1191 		panic("%s: duplicate insertion: state %p "
1192 		    "inserting entry %p which collides with %p", __func__,
1193 		    uaddr, entry, rb_rv);
1194 	}
1195 
1196 	start = VMMAP_FREE_START(entry);
1197 	end = VMMAP_FREE_END(entry);
1198 
1199 	/*
1200 	 * Update all pivots that are contained in this entry.
1201 	 */
1202 	for (p = &uaddr->up_pivots[0];
1203 	    p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1204 		check_addr = p->addr;
1205 		if (check_addr == 0)
1206 			continue;
1207 		if (p->dir < 0)
1208 			check_addr--;
1209 
1210 		if (start <= check_addr &&
1211 		    check_addr < end) {
1212 			KASSERT(p->entry == NULL);
1213 			p->entry = entry;
1214 		}
1215 	}
1216 }
1217 
1218 /*
1219  * Remove an entry with free space from the space tree.
1220  */
1221 void
1222 uaddr_pivot_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1223     struct vm_map_entry *entry)
1224 {
1225 	struct uaddr_pivot_state	*uaddr;
1226 	struct uaddr_pivot		*p;
1227 
1228 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1229 	if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->up_free, entry) != entry)
1230 		panic("%s: entry was not in tree", __func__);
1231 
1232 	/*
1233 	 * Inform any pivot with this entry that the entry is gone.
1234 	 * Note that this does not automatically invalidate the pivot.
1235 	 */
1236 	for (p = &uaddr->up_pivots[0];
1237 	    p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1238 		if (p->entry == entry)
1239 			p->entry = NULL;
1240 	}
1241 }
1242 
1243 /*
1244  * Create a new pivot selector.
1245  *
1246  * Initially, all pivots are in the expired state.
1247  * Two reasons for this:
1248  * - it means this allocator will not take a huge amount of time
1249  * - pivots select better on demand, because the pivot selection will be
1250  *   affected by preceding allocations:
1251  *   the next pivots will likely end up in different segments of free memory,
1252  *   that was segmented by an earlier allocation; better spread.
1253  */
1254 struct uvm_addr_state *
1255 uaddr_pivot_create(vaddr_t minaddr, vaddr_t maxaddr)
1256 {
1257 	struct uaddr_pivot_state *uaddr;
1258 
1259 	uaddr = pool_get(&uaddr_pivot_pool, PR_WAITOK);
1260 	uaddr->up_uaddr.uaddr_minaddr = minaddr;
1261 	uaddr->up_uaddr.uaddr_maxaddr = maxaddr;
1262 	uaddr->up_uaddr.uaddr_functions = &uaddr_pivot_functions;
1263 	RBT_INIT(uaddr_free_rbtree, &uaddr->up_free);
1264 	memset(uaddr->up_pivots, 0, sizeof(uaddr->up_pivots));
1265 
1266 	return &uaddr->up_uaddr;
1267 }
1268 
1269 #if defined(DEBUG) || defined(DDB)
1270 /*
1271  * Print the uaddr_pivot_state.
1272  *
1273  * If full, a listing of all entries in the state will be provided.
1274  */
1275 void
1276 uaddr_pivot_print(struct uvm_addr_state *uaddr_p, boolean_t full,
1277     int (*pr)(const char *, ...))
1278 {
1279 	struct uaddr_pivot_state	*uaddr;
1280 	struct uaddr_pivot		*pivot;
1281 	struct vm_map_entry		*entry;
1282 	int				 i;
1283 	vaddr_t				 check_addr;
1284 
1285 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1286 
1287 	for (i = 0; i < NUM_PIVOTS; i++) {
1288 		pivot = &uaddr->up_pivots[i];
1289 
1290 		(*pr)("\tpivot 0x%lx, epires in %d, direction %d\n",
1291 		    pivot->addr, pivot->expire, pivot->dir);
1292 	}
1293 	if (!full)
1294 		return;
1295 
1296 	if (RBT_EMPTY(uaddr_free_rbtree, &uaddr->up_free))
1297 		(*pr)("\tempty\n");
1298 	/* Print list of free space. */
1299 	RBT_FOREACH(entry, uaddr_free_rbtree, &uaddr->up_free) {
1300 		(*pr)("\t0x%lx - 0x%lx free (0x%lx bytes)\n",
1301 		    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
1302 		    VMMAP_FREE_END(entry) - VMMAP_FREE_START(entry));
1303 
1304 		for (i = 0; i < NUM_PIVOTS; i++) {
1305 			pivot = &uaddr->up_pivots[i];
1306 			check_addr = pivot->addr;
1307 			if (check_addr == 0)
1308 				continue;
1309 			if (pivot->dir < 0)
1310 				check_addr--;
1311 
1312 			if (VMMAP_FREE_START(entry) <= check_addr &&
1313 			    check_addr < VMMAP_FREE_END(entry)) {
1314 				(*pr)("\t\tcontains pivot %d (0x%lx)\n",
1315 				    i, pivot->addr);
1316 			}
1317 		}
1318 	}
1319 }
1320 #endif /* DEBUG || DDB */
1321 #endif /* !SMALL_KERNEL */
1322 
1323 #ifndef SMALL_KERNEL
1324 /*
1325  * Stack/break allocator.
1326  *
1327  * Stack area is grown into in the opposite direction of the stack growth,
1328  * brk area is grown downward (because sbrk() grows upward).
1329  *
1330  * Both areas are grown into proportially: a weighted chance is used to
1331  * select which one (stack or brk area) to try. If the allocation fails,
1332  * the other one is tested.
1333  */
1334 const struct uvm_addr_functions uaddr_stack_brk_functions = {
1335 	.uaddr_select = &uaddr_stack_brk_select,
1336 	.uaddr_destroy = &uaddr_destroy,
1337 	.uaddr_name = "uaddr_stckbrk"
1338 };
1339 
1340 /*
1341  * Stack/brk address selector.
1342  */
1343 int
1344 uaddr_stack_brk_select(struct vm_map *map, struct uvm_addr_state *uaddr,
1345     struct vm_map_entry **entry_out, vaddr_t *addr_out,
1346     vsize_t sz, vaddr_t align, vaddr_t offset,
1347     vm_prot_t prot, vaddr_t hint)
1348 {
1349 	vaddr_t			start;
1350 	vaddr_t			end;
1351 	vsize_t			before_gap;
1352 	vsize_t			after_gap;
1353 	int			dir;
1354 
1355 	/* Set up brk search strategy. */
1356 	start = MAX(map->b_start, uaddr->uaddr_minaddr);
1357 	end = MIN(map->b_end, uaddr->uaddr_maxaddr);
1358 	before_gap = 0;
1359 	after_gap = 0;
1360 	dir = -1;	/* Opposite of brk() growth. */
1361 
1362 	if (end - start >= sz) {
1363 		if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
1364 		    0, sz, align, offset, dir, start, end - sz,
1365 		    before_gap, after_gap) == 0)
1366 			return 0;
1367 	}
1368 
1369 	/* Set up stack search strategy. */
1370 	start = MAX(map->s_start, uaddr->uaddr_minaddr);
1371 	end = MIN(map->s_end, uaddr->uaddr_maxaddr);
1372 	before_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1373 	after_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1374 #ifdef MACHINE_STACK_GROWS_UP
1375 	dir = -1;
1376 #else
1377 	dir =  1;
1378 #endif
1379 	if (end - start >= before_gap + after_gap &&
1380 	    end - start - before_gap - after_gap >= sz) {
1381 		if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
1382 		    0, sz, align, offset, dir, start, end - sz,
1383 		    before_gap, after_gap) == 0)
1384 		return 0;
1385 	}
1386 
1387 	return ENOMEM;
1388 }
1389 
1390 struct uvm_addr_state *
1391 uaddr_stack_brk_create(vaddr_t minaddr, vaddr_t maxaddr)
1392 {
1393 	struct uvm_addr_state* uaddr;
1394 
1395 	uaddr = pool_get(&uaddr_pool, PR_WAITOK);
1396 	uaddr->uaddr_minaddr = minaddr;
1397 	uaddr->uaddr_maxaddr = maxaddr;
1398 	uaddr->uaddr_functions = &uaddr_stack_brk_functions;
1399 	return uaddr;
1400 }
1401 #endif /* !SMALL_KERNEL */
1402 
1403 
1404 #ifndef SMALL_KERNEL
1405 /*
1406  * Free space comparison.
1407  * Compares smaller free-space before larger free-space.
1408  */
1409 static inline int
1410 uvm_mapent_fspace_cmp(const struct vm_map_entry *e1,
1411     const struct vm_map_entry *e2)
1412 {
1413 	if (e1->fspace != e2->fspace)
1414 		return (e1->fspace < e2->fspace ? -1 : 1);
1415 	return (e1->start < e2->start ? -1 : e1->start > e2->start);
1416 }
1417 
1418 RBT_GENERATE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree,
1419     uvm_mapent_fspace_cmp);
1420 #endif /* !SMALL_KERNEL */
1421