xref: /openbsd-src/sys/uvm/uvm_addr.c (revision f6aab3d83b51b91c24247ad2c2573574de475a82)
1 /*	$OpenBSD: uvm_addr.c,v 1.32 2022/11/04 09:36:44 mpi Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /* #define DEBUG */
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <uvm/uvm.h>
24 #include <uvm/uvm_addr.h>
25 #include <sys/pool.h>
26 
27 /* Max gap between hint allocations. */
28 #define UADDR_HINT_MAXGAP	(4 * PAGE_SIZE)
29 /* Number of pivots in pivot allocator. */
30 #define NUM_PIVOTS		16
31 /*
32  * Max number (inclusive) of pages the pivot allocator
33  * will place between allocations.
34  *
35  * The uaddr_pivot_random() function attempts to bias towards
36  * small space between allocations, so putting a large number here is fine.
37  */
38 #define PIVOT_RND		8
39 /*
40  * Number of allocations that a pivot can supply before expiring.
41  * When a pivot expires, a new pivot has to be found.
42  *
43  * Must be at least 1.
44  */
45 #define PIVOT_EXPIRE		1024
46 
47 
48 /* Pool with uvm_addr_state structures. */
49 struct pool uaddr_pool;
50 struct pool uaddr_bestfit_pool;
51 struct pool uaddr_pivot_pool;
52 struct pool uaddr_rnd_pool;
53 
54 /* uvm_addr state for bestfit selector. */
55 struct uaddr_bestfit_state {
56 	struct uvm_addr_state		 ubf_uaddr;
57 	struct uaddr_free_rbtree	 ubf_free;
58 };
59 
60 /* uvm_addr state for rnd selector. */
61 struct uaddr_rnd_state {
62 	struct uvm_addr_state		 ur_uaddr;
63 #if 0
64 	TAILQ_HEAD(, vm_map_entry)	 ur_free;
65 #endif
66 };
67 
68 /*
69  * Definition of a pivot in pivot selector.
70  */
71 struct uaddr_pivot {
72 	vaddr_t				 addr;	/* End of prev. allocation. */
73 	int				 expire;/* Best before date. */
74 	int				 dir;	/* Direction. */
75 	struct vm_map_entry		*entry; /* Will contain next alloc. */
76 };
77 /* uvm_addr state for pivot selector. */
78 struct uaddr_pivot_state {
79 	struct uvm_addr_state		 up_uaddr;
80 
81 	/* Free space tree, for fast pivot selection. */
82 	struct uaddr_free_rbtree	 up_free;
83 
84 	/* List of pivots. The pointers point to after the last allocation. */
85 	struct uaddr_pivot		 up_pivots[NUM_PIVOTS];
86 };
87 
88 /* Forward declaration (see below). */
89 extern const struct uvm_addr_functions uaddr_kernel_functions;
90 struct uvm_addr_state uaddr_kbootstrap;
91 
92 
93 /*
94  * Support functions.
95  */
96 
97 #ifndef SMALL_KERNEL
98 struct vm_map_entry	*uvm_addr_entrybyspace(struct uaddr_free_rbtree*,
99 			    vsize_t);
100 #endif /* !SMALL_KERNEL */
101 void			 uaddr_kinsert(struct vm_map *,
102 			    struct uvm_addr_state *, struct vm_map_entry *);
103 void			 uaddr_kremove(struct vm_map *,
104 			    struct uvm_addr_state *, struct vm_map_entry *);
105 void			 uaddr_kbootstrapdestroy(struct uvm_addr_state *);
106 
107 void			 uaddr_destroy(struct uvm_addr_state *);
108 void			 uaddr_kbootstrap_destroy(struct uvm_addr_state *);
109 void			 uaddr_rnd_destroy(struct uvm_addr_state *);
110 void			 uaddr_bestfit_destroy(struct uvm_addr_state *);
111 void			 uaddr_pivot_destroy(struct uvm_addr_state *);
112 
113 #if 0
114 int			 uaddr_lin_select(struct vm_map *,
115 			    struct uvm_addr_state *, struct vm_map_entry **,
116 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
117 			    vaddr_t);
118 #endif
119 int			 uaddr_kbootstrap_select(struct vm_map *,
120 			    struct uvm_addr_state *, struct vm_map_entry **,
121 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
122 			    vaddr_t);
123 int			 uaddr_rnd_select(struct vm_map *,
124 			    struct uvm_addr_state *, struct vm_map_entry **,
125 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
126 			    vaddr_t);
127 int			 uaddr_bestfit_select(struct vm_map *,
128 			    struct uvm_addr_state*, struct vm_map_entry **,
129 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
130 			    vaddr_t);
131 #ifndef SMALL_KERNEL
132 int			 uaddr_pivot_select(struct vm_map *,
133 			    struct uvm_addr_state *, struct vm_map_entry **,
134 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
135 			    vaddr_t);
136 int			 uaddr_stack_brk_select(struct vm_map *,
137 			    struct uvm_addr_state *, struct vm_map_entry **,
138 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
139 			    vaddr_t);
140 #endif /* !SMALL_KERNEL */
141 
142 void			 uaddr_rnd_insert(struct vm_map *,
143 			    struct uvm_addr_state *, struct vm_map_entry *);
144 void			 uaddr_rnd_remove(struct vm_map *,
145 			    struct uvm_addr_state *, struct vm_map_entry *);
146 void			 uaddr_bestfit_insert(struct vm_map *,
147 			    struct uvm_addr_state *, struct vm_map_entry *);
148 void			 uaddr_bestfit_remove(struct vm_map *,
149 			    struct uvm_addr_state *, struct vm_map_entry *);
150 void			 uaddr_pivot_insert(struct vm_map *,
151 			    struct uvm_addr_state *, struct vm_map_entry *);
152 void			 uaddr_pivot_remove(struct vm_map *,
153 			    struct uvm_addr_state *, struct vm_map_entry *);
154 
155 #ifndef SMALL_KERNEL
156 vsize_t			 uaddr_pivot_random(void);
157 int			 uaddr_pivot_newpivot(struct vm_map *,
158 			    struct uaddr_pivot_state *, struct uaddr_pivot *,
159 			    struct vm_map_entry **, vaddr_t *,
160 			    vsize_t, vaddr_t, vaddr_t, vsize_t, vsize_t);
161 #endif /* !SMALL_KERNEL */
162 
163 #if defined(DEBUG) || defined(DDB)
164 void			 uaddr_pivot_print(struct uvm_addr_state *, boolean_t,
165 			    int (*)(const char *, ...));
166 #if 0
167 void			 uaddr_rnd_print(struct uvm_addr_state *, boolean_t,
168 			    int (*)(const char *, ...));
169 #endif
170 #endif /* DEBUG || DDB */
171 
172 
173 #ifndef SMALL_KERNEL
174 /*
175  * Find smallest entry in tree that will fit sz bytes.
176  */
177 struct vm_map_entry *
178 uvm_addr_entrybyspace(struct uaddr_free_rbtree *free, vsize_t sz)
179 {
180 	struct vm_map_entry	*tmp, *res;
181 
182 	tmp = RBT_ROOT(uaddr_free_rbtree, free);
183 	res = NULL;
184 	while (tmp) {
185 		if (tmp->fspace >= sz) {
186 			res = tmp;
187 			tmp = RBT_LEFT(uaddr_free_rbtree, tmp);
188 		} else if (tmp->fspace < sz)
189 			tmp = RBT_RIGHT(uaddr_free_rbtree, tmp);
190 	}
191 	return res;
192 }
193 #endif /* !SMALL_KERNEL */
194 
195 static inline vaddr_t
196 uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset)
197 {
198 	vaddr_t adjusted;
199 
200 	KASSERT(offset < align || (align == 0 && offset == 0));
201 	KASSERT((align & (align - 1)) == 0);
202 	KASSERT((offset & PAGE_MASK) == 0);
203 
204 	align = MAX(align, PAGE_SIZE);
205 	adjusted = addr & ~(align - 1);
206 	adjusted += offset;
207 	return (adjusted < addr ? adjusted + align : adjusted);
208 }
209 
210 static inline vaddr_t
211 uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset)
212 {
213 	vaddr_t adjusted;
214 
215 	KASSERT(offset < align || (align == 0 && offset == 0));
216 	KASSERT((align & (align - 1)) == 0);
217 	KASSERT((offset & PAGE_MASK) == 0);
218 
219 	align = MAX(align, PAGE_SIZE);
220 	adjusted = addr & ~(align - 1);
221 	adjusted += offset;
222 	return (adjusted > addr ? adjusted - align : adjusted);
223 }
224 
225 /*
226  * Try to fit the requested space into the entry.
227  */
228 int
229 uvm_addr_fitspace(vaddr_t *min_result, vaddr_t *max_result,
230     vaddr_t low_addr, vaddr_t high_addr, vsize_t sz,
231     vaddr_t align, vaddr_t offset,
232     vsize_t before_gap, vsize_t after_gap)
233 {
234 	vaddr_t	tmp;
235 	vsize_t	fspace;
236 
237 	if (low_addr > high_addr)
238 		return ENOMEM;
239 	fspace = high_addr - low_addr;
240 	if (fspace < before_gap + after_gap)
241 		return ENOMEM;
242 	if (fspace - before_gap - after_gap < sz)
243 		return ENOMEM;
244 
245 	/*
246 	 * Calculate lowest address.
247 	 */
248 	low_addr += before_gap;
249 	low_addr = uvm_addr_align_forward(tmp = low_addr, align, offset);
250 	if (low_addr < tmp)	/* Overflow during alignment. */
251 		return ENOMEM;
252 	if (high_addr - after_gap - sz < low_addr)
253 		return ENOMEM;
254 
255 	/*
256 	 * Calculate highest address.
257 	 */
258 	high_addr -= after_gap + sz;
259 	high_addr = uvm_addr_align_backward(tmp = high_addr, align, offset);
260 	if (high_addr > tmp)	/* Overflow during alignment. */
261 		return ENOMEM;
262 	if (low_addr > high_addr)
263 		return ENOMEM;
264 
265 	*min_result = low_addr;
266 	*max_result = high_addr;
267 	return 0;
268 }
269 
270 
271 /*
272  * Initialize uvm_addr.
273  */
274 void
275 uvm_addr_init(void)
276 {
277 	pool_init(&uaddr_pool, sizeof(struct uvm_addr_state), 0,
278 	    IPL_VM, PR_WAITOK, "uaddr", NULL);
279 	pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state), 0,
280 	    IPL_VM, PR_WAITOK, "uaddrbest", NULL);
281 	pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state), 0,
282 	    IPL_VM, PR_WAITOK, "uaddrpivot", NULL);
283 	pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state), 0,
284 	    IPL_VM, PR_WAITOK, "uaddrrnd", NULL);
285 
286 	uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE;
287 	uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE;
288 	uaddr_kbootstrap.uaddr_functions = &uaddr_kernel_functions;
289 }
290 
291 /*
292  * Invoke destructor function of uaddr.
293  */
294 void
295 uvm_addr_destroy(struct uvm_addr_state *uaddr)
296 {
297 	if (uaddr)
298 		(*uaddr->uaddr_functions->uaddr_destroy)(uaddr);
299 }
300 
301 /*
302  * Move address forward to satisfy align, offset.
303  */
304 vaddr_t
305 uvm_addr_align(vaddr_t addr, vaddr_t align, vaddr_t offset)
306 {
307 	vaddr_t result = (addr & ~(align - 1)) + offset;
308 	if (result < addr)
309 		result += align;
310 	return result;
311 }
312 
313 /*
314  * Move address backwards to satisfy align, offset.
315  */
316 vaddr_t
317 uvm_addr_align_back(vaddr_t addr, vaddr_t align, vaddr_t offset)
318 {
319 	vaddr_t result = (addr & ~(align - 1)) + offset;
320 	if (result > addr)
321 		result -= align;
322 	return result;
323 }
324 
325 /*
326  * Directional first fit.
327  *
328  * Do a linear search for free space, starting at addr in entry.
329  * direction ==  1: search forward
330  * direction == -1: search backward
331  *
332  * Output: low <= addr <= high and entry will contain addr.
333  * 0 will be returned if no space is available.
334  *
335  * gap describes the space that must appear between the preceding entry.
336  */
337 int
338 uvm_addr_linsearch(struct vm_map *map, struct uvm_addr_state *uaddr,
339     struct vm_map_entry **entry_out, vaddr_t *addr_out,
340     vaddr_t hint, vsize_t sz, vaddr_t align, vaddr_t offset,
341     int direction, vaddr_t low, vaddr_t high,
342     vsize_t before_gap, vsize_t after_gap)
343 {
344 	struct vm_map_entry	*entry;
345 	vaddr_t			 low_addr, high_addr;
346 
347 	KASSERT(entry_out != NULL && addr_out != NULL);
348 	KASSERT(direction == -1 || direction == 1);
349 	KASSERT((hint & PAGE_MASK) == 0 && (high & PAGE_MASK) == 0 &&
350 	    (low & PAGE_MASK) == 0 &&
351 	    (before_gap & PAGE_MASK) == 0 && (after_gap & PAGE_MASK) == 0);
352 	KASSERT(high + sz > high); /* Check for overflow. */
353 
354 	/*
355 	 * Hint magic.
356 	 */
357 	if (hint == 0)
358 		hint = (direction == 1 ? low : high);
359 	else if (hint > high) {
360 		if (direction != -1)
361 			return ENOMEM;
362 		hint = high;
363 	} else if (hint < low) {
364 		if (direction != 1)
365 			return ENOMEM;
366 		hint = low;
367 	}
368 
369 	for (entry = uvm_map_entrybyaddr(&map->addr,
370 	    hint - (direction == -1 ? 1 : 0)); entry != NULL;
371 	    entry = (direction == 1 ?
372 	    RBT_NEXT(uvm_map_addr, entry) :
373 	    RBT_PREV(uvm_map_addr, entry))) {
374 		if ((direction == 1 && VMMAP_FREE_START(entry) > high) ||
375 		    (direction == -1 && VMMAP_FREE_END(entry) < low)) {
376 			break;
377 		}
378 
379 		if (uvm_addr_fitspace(&low_addr, &high_addr,
380 		    MAX(low, VMMAP_FREE_START(entry)),
381 		    MIN(high, VMMAP_FREE_END(entry)),
382 		    sz, align, offset, before_gap, after_gap) == 0) {
383 			*entry_out = entry;
384 			if (hint >= low_addr && hint <= high_addr) {
385 				*addr_out = hint;
386 			} else {
387 				*addr_out = (direction == 1 ?
388 				    low_addr : high_addr);
389 			}
390 			return 0;
391 		}
392 	}
393 
394 	return ENOMEM;
395 }
396 
397 /*
398  * Invoke address selector of uaddr.
399  * uaddr may be NULL, in which case the algorithm will fail with ENOMEM.
400  *
401  * Will invoke uvm_addr_isavail to fill in last_out.
402  */
403 int
404 uvm_addr_invoke(struct vm_map *map, struct uvm_addr_state *uaddr,
405     struct vm_map_entry **entry_out, struct vm_map_entry **last_out,
406     vaddr_t *addr_out,
407     vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
408 {
409 	int error;
410 
411 	if (uaddr == NULL)
412 		return ENOMEM;
413 
414 	hint &= ~((vaddr_t)PAGE_MASK);
415 	if (hint != 0 &&
416 	    !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr))
417 		return ENOMEM;
418 
419 	vm_map_assert_anylock(map);
420 
421 	error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr,
422 	    entry_out, addr_out, sz, align, offset, prot, hint);
423 
424 	if (error == 0) {
425 		KASSERT(*entry_out != NULL);
426 		*last_out = NULL;
427 		if (!uvm_map_isavail(map, uaddr, entry_out, last_out,
428 		    *addr_out, sz)) {
429 			panic("uvm_addr_invoke: address selector %p "
430 			    "(%s 0x%lx-0x%lx) "
431 			    "returned unavailable address 0x%lx sz 0x%lx",
432 			    uaddr, uaddr->uaddr_functions->uaddr_name,
433 			    uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr,
434 			    *addr_out, sz);
435 		}
436 	}
437 
438 	return error;
439 }
440 
441 #if defined(DEBUG) || defined(DDB)
442 void
443 uvm_addr_print(struct uvm_addr_state *uaddr, const char *slot, boolean_t full,
444     int (*pr)(const char *, ...))
445 {
446 	if (uaddr == NULL) {
447 		(*pr)("- uvm_addr %s: NULL\n", slot);
448 		return;
449 	}
450 
451 	(*pr)("- uvm_addr %s: %p (%s 0x%lx-0x%lx)\n", slot, uaddr,
452 	    uaddr->uaddr_functions->uaddr_name,
453 	    uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr);
454 	if (uaddr->uaddr_functions->uaddr_print == NULL)
455 		return;
456 
457 	(*uaddr->uaddr_functions->uaddr_print)(uaddr, full, pr);
458 }
459 #endif /* DEBUG || DDB */
460 
461 /*
462  * Destroy a uvm_addr_state structure.
463  * The uaddr must have been previously allocated from uaddr_state_pool.
464  */
465 void
466 uaddr_destroy(struct uvm_addr_state *uaddr)
467 {
468 	pool_put(&uaddr_pool, uaddr);
469 }
470 
471 
472 #if 0
473 /*
474  * Linear allocator.
475  * This allocator uses a first-fit algorithm.
476  *
477  * If hint is set, search will start at the hint position.
478  * Only searches forward.
479  */
480 
481 const struct uvm_addr_functions uaddr_lin_functions = {
482 	.uaddr_select = &uaddr_lin_select,
483 	.uaddr_destroy = &uaddr_destroy,
484 	.uaddr_name = "uaddr_lin"
485 };
486 
487 struct uvm_addr_state *
488 uaddr_lin_create(vaddr_t minaddr, vaddr_t maxaddr)
489 {
490 	struct uvm_addr_state *uaddr;
491 
492 	uaddr = pool_get(&uaddr_pool, PR_WAITOK);
493 	uaddr->uaddr_minaddr = minaddr;
494 	uaddr->uaddr_maxaddr = maxaddr;
495 	uaddr->uaddr_functions = &uaddr_lin_functions;
496 	return uaddr;
497 }
498 
499 int
500 uaddr_lin_select(struct vm_map *map, struct uvm_addr_state *uaddr,
501     struct vm_map_entry **entry_out, vaddr_t *addr_out,
502     vsize_t sz, vaddr_t align, vaddr_t offset,
503     vm_prot_t prot, vaddr_t hint)
504 {
505 	vaddr_t guard_sz;
506 
507 	/*
508 	 * Deal with guardpages: search for space with one extra page.
509 	 */
510 	guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
511 
512 	if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr - guard_sz < sz)
513 		return ENOMEM;
514 	return uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 0, sz,
515 	    align, offset, 1, uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr - sz,
516 	    0, guard_sz);
517 }
518 #endif
519 
520 /*
521  * Randomized allocator.
522  * This allocator use uvm_map_hint to acquire a random address and searches
523  * from there.
524  */
525 
526 const struct uvm_addr_functions uaddr_rnd_functions = {
527 	.uaddr_select = &uaddr_rnd_select,
528 	.uaddr_free_insert = &uaddr_rnd_insert,
529 	.uaddr_free_remove = &uaddr_rnd_remove,
530 	.uaddr_destroy = &uaddr_rnd_destroy,
531 #if defined(DEBUG) || defined(DDB)
532 #if 0
533 	.uaddr_print = &uaddr_rnd_print,
534 #endif
535 #endif /* DEBUG || DDB */
536 	.uaddr_name = "uaddr_rnd"
537 };
538 
539 struct uvm_addr_state *
540 uaddr_rnd_create(vaddr_t minaddr, vaddr_t maxaddr)
541 {
542 	struct uaddr_rnd_state *uaddr;
543 
544 	uaddr = pool_get(&uaddr_rnd_pool, PR_WAITOK);
545 	uaddr->ur_uaddr.uaddr_minaddr = minaddr;
546 	uaddr->ur_uaddr.uaddr_maxaddr = maxaddr;
547 	uaddr->ur_uaddr.uaddr_functions = &uaddr_rnd_functions;
548 #if 0
549 	TAILQ_INIT(&uaddr->ur_free);
550 #endif
551 	return &uaddr->ur_uaddr;
552 }
553 
554 int
555 uaddr_rnd_select(struct vm_map *map, struct uvm_addr_state *uaddr,
556     struct vm_map_entry **entry_out, vaddr_t *addr_out,
557     vsize_t sz, vaddr_t align, vaddr_t offset,
558     vm_prot_t prot, vaddr_t hint)
559 {
560 	struct vmspace		*vm;
561 	vaddr_t			 minaddr, maxaddr;
562 	vaddr_t			 guard_sz;
563 	vaddr_t			 low_addr, high_addr;
564 	struct vm_map_entry	*entry, *next;
565 	vsize_t			 before_gap, after_gap;
566 	vaddr_t			 tmp;
567 
568 	KASSERT((map->flags & VM_MAP_ISVMSPACE) != 0);
569 	vm = (struct vmspace *)map;
570 
571 	/* Deal with guardpages: search for space with one extra page. */
572 	guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
573 
574 	if (uaddr->uaddr_maxaddr - guard_sz < sz)
575 		return ENOMEM;
576 	minaddr = uvm_addr_align_forward(uaddr->uaddr_minaddr, align, offset);
577 	maxaddr = uvm_addr_align_backward(uaddr->uaddr_maxaddr - sz - guard_sz,
578 	    align, offset);
579 
580 	/* Quick fail if the allocation won't fit. */
581 	if (minaddr >= maxaddr)
582 		return ENOMEM;
583 
584 	/* Select a hint. */
585 	if (hint == 0)
586 		hint = uvm_map_hint(vm, prot, minaddr, maxaddr);
587 	/* Clamp hint to uaddr range. */
588 	hint = MIN(MAX(hint, minaddr), maxaddr);
589 
590 	/* Align hint to align,offset parameters. */
591 	tmp = hint;
592 	hint = uvm_addr_align_forward(tmp, align, offset);
593 	/* Check for overflow during alignment. */
594 	if (hint < tmp || hint > maxaddr)
595 		return ENOMEM; /* Compatibility mode: never look backwards. */
596 
597 	before_gap = 0;
598 	after_gap = guard_sz;
599 	hint -= MIN(hint, before_gap);
600 
601 	/*
602 	 * Use the augmented address tree to look up the first entry
603 	 * at or after hint with sufficient space.
604 	 *
605 	 * This code is the original optimized code, but will fail if the
606 	 * subtree it looks at does have sufficient space, but fails to meet
607 	 * the align constraint.
608 	 *
609 	 * Guard: subtree is not exhausted and max(fspace) >= required.
610 	 */
611 	entry = uvm_map_entrybyaddr(&map->addr, hint);
612 
613 	/* Walk up the tree, until there is at least sufficient space. */
614 	while (entry != NULL &&
615 	    entry->fspace_augment < before_gap + after_gap + sz)
616 		entry = RBT_PARENT(uvm_map_addr, entry);
617 
618 	while (entry != NULL) {
619 		/* Test if this fits. */
620 		if (VMMAP_FREE_END(entry) > hint &&
621 		    uvm_map_uaddr_e(map, entry) == uaddr &&
622 		    uvm_addr_fitspace(&low_addr, &high_addr,
623 		    MAX(uaddr->uaddr_minaddr, VMMAP_FREE_START(entry)),
624 		    MIN(uaddr->uaddr_maxaddr, VMMAP_FREE_END(entry)),
625 		    sz, align, offset, before_gap, after_gap) == 0) {
626 			*entry_out = entry;
627 			if (hint >= low_addr && hint <= high_addr)
628 				*addr_out = hint;
629 			else
630 				*addr_out = low_addr;
631 			return 0;
632 		}
633 
634 		/* RBT_NEXT, but skip subtrees that cannot possible fit. */
635 		next = RBT_RIGHT(uvm_map_addr, entry);
636 		if (next != NULL &&
637 		    next->fspace_augment >= before_gap + after_gap + sz) {
638 			entry = next;
639 			while ((next = RBT_LEFT(uvm_map_addr, entry)) !=
640 			    NULL)
641 				entry = next;
642 		} else {
643 do_parent:
644 			next = RBT_PARENT(uvm_map_addr, entry);
645 			if (next == NULL)
646 				entry = NULL;
647 			else if (RBT_LEFT(uvm_map_addr, next) == entry)
648 				entry = next;
649 			else {
650 				entry = next;
651 				goto do_parent;
652 			}
653 		}
654 	}
655 
656 	/* Lookup failed. */
657 	return ENOMEM;
658 }
659 
660 /*
661  * Destroy a uaddr_rnd_state structure.
662  */
663 void
664 uaddr_rnd_destroy(struct uvm_addr_state *uaddr)
665 {
666 	pool_put(&uaddr_rnd_pool, uaddr);
667 }
668 
669 /*
670  * Add entry to tailq.
671  */
672 void
673 uaddr_rnd_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
674     struct vm_map_entry *entry)
675 {
676 	return;
677 }
678 
679 /*
680  * Remove entry from tailq.
681  */
682 void
683 uaddr_rnd_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
684     struct vm_map_entry *entry)
685 {
686 	return;
687 }
688 
689 #if 0
690 #if defined(DEBUG) || defined(DDB)
691 void
692 uaddr_rnd_print(struct uvm_addr_state *uaddr_p, boolean_t full,
693     int (*pr)(const char*, ...))
694 {
695 	struct vm_map_entry	*entry;
696 	struct uaddr_rnd_state	*uaddr;
697 	vaddr_t			 addr;
698 	size_t			 count;
699 	vsize_t			 space;
700 
701 	uaddr = (struct uaddr_rnd_state *)uaddr_p;
702 	addr = 0;
703 	count = 0;
704 	space = 0;
705 	TAILQ_FOREACH(entry, &uaddr->ur_free, dfree.tailq) {
706 		count++;
707 		space += entry->fspace;
708 
709 		if (full) {
710 			(*pr)("\tentry %p: 0x%lx-0x%lx G=0x%lx F=0x%lx\n",
711 			    entry, entry->start, entry->end,
712 			    entry->guard, entry->fspace);
713 			(*pr)("\t\tfree: 0x%lx-0x%lx\n",
714 			    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
715 		}
716 		if (entry->start < addr) {
717 			if (!full)
718 				(*pr)("\tentry %p: 0x%lx-0x%lx "
719 				    "G=0x%lx F=0x%lx\n",
720 				    entry, entry->start, entry->end,
721 				    entry->guard, entry->fspace);
722 			(*pr)("\t\tstart=0x%lx, expected at least 0x%lx\n",
723 			    entry->start, addr);
724 		}
725 
726 		addr = VMMAP_FREE_END(entry);
727 	}
728 	(*pr)("\t0x%lu entries, 0x%lx free bytes\n", count, space);
729 }
730 #endif /* DEBUG || DDB */
731 #endif
732 
733 /*
734  * Kernel allocation bootstrap logic.
735  */
736 
737 const struct uvm_addr_functions uaddr_kernel_functions = {
738 	.uaddr_select = &uaddr_kbootstrap_select,
739 	.uaddr_destroy = &uaddr_kbootstrap_destroy,
740 	.uaddr_name = "uaddr_kbootstrap"
741 };
742 
743 /*
744  * Select an address from the map.
745  *
746  * This function ignores the uaddr spec and instead uses the map directly.
747  * Because of that property, the uaddr algorithm can be shared across all
748  * kernel maps.
749  */
750 int
751 uaddr_kbootstrap_select(struct vm_map *map, struct uvm_addr_state *uaddr,
752     struct vm_map_entry **entry_out, vaddr_t *addr_out,
753     vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
754 {
755 	vaddr_t tmp;
756 
757 	RBT_FOREACH(*entry_out, uvm_map_addr, &map->addr) {
758 		if (VMMAP_FREE_END(*entry_out) <= uvm_maxkaddr &&
759 		    uvm_addr_fitspace(addr_out, &tmp,
760 		    VMMAP_FREE_START(*entry_out), VMMAP_FREE_END(*entry_out),
761 		    sz, align, offset, 0, 0) == 0)
762 			return 0;
763 	}
764 
765 	return ENOMEM;
766 }
767 
768 /*
769  * Don't destroy the kernel bootstrap allocator.
770  */
771 void
772 uaddr_kbootstrap_destroy(struct uvm_addr_state *uaddr)
773 {
774 	KASSERT(uaddr == (struct uvm_addr_state *)&uaddr_kbootstrap);
775 }
776 
777 #ifndef SMALL_KERNEL
778 /*
779  * Best fit algorithm.
780  */
781 
782 const struct uvm_addr_functions uaddr_bestfit_functions = {
783 	.uaddr_select = &uaddr_bestfit_select,
784 	.uaddr_free_insert = &uaddr_bestfit_insert,
785 	.uaddr_free_remove = &uaddr_bestfit_remove,
786 	.uaddr_destroy = &uaddr_bestfit_destroy,
787 	.uaddr_name = "uaddr_bestfit"
788 };
789 
790 struct uvm_addr_state *
791 uaddr_bestfit_create(vaddr_t minaddr, vaddr_t maxaddr)
792 {
793 	struct uaddr_bestfit_state *uaddr;
794 
795 	uaddr = pool_get(&uaddr_bestfit_pool, PR_WAITOK);
796 	uaddr->ubf_uaddr.uaddr_minaddr = minaddr;
797 	uaddr->ubf_uaddr.uaddr_maxaddr = maxaddr;
798 	uaddr->ubf_uaddr.uaddr_functions = &uaddr_bestfit_functions;
799 	RBT_INIT(uaddr_free_rbtree, &uaddr->ubf_free);
800 	return &uaddr->ubf_uaddr;
801 }
802 
803 void
804 uaddr_bestfit_destroy(struct uvm_addr_state *uaddr)
805 {
806 	pool_put(&uaddr_bestfit_pool, uaddr);
807 }
808 
809 void
810 uaddr_bestfit_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
811     struct vm_map_entry *entry)
812 {
813 	struct uaddr_bestfit_state	*uaddr;
814 	struct vm_map_entry		*rb_rv;
815 
816 	uaddr = (struct uaddr_bestfit_state *)uaddr_p;
817 	if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->ubf_free, entry)) !=
818 	    NULL) {
819 		panic("%s: duplicate insertion: state %p "
820 		    "inserting %p, colliding with %p", __func__,
821 		    uaddr, entry, rb_rv);
822 	}
823 }
824 
825 void
826 uaddr_bestfit_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
827     struct vm_map_entry *entry)
828 {
829 	struct uaddr_bestfit_state	*uaddr;
830 
831 	uaddr = (struct uaddr_bestfit_state *)uaddr_p;
832 	if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->ubf_free, entry) != entry)
833 		panic("%s: entry was not in tree", __func__);
834 }
835 
836 int
837 uaddr_bestfit_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
838     struct vm_map_entry **entry_out, vaddr_t *addr_out,
839     vsize_t sz, vaddr_t align, vaddr_t offset,
840     vm_prot_t prot, vaddr_t hint)
841 {
842 	vaddr_t				 min, max;
843 	struct uaddr_bestfit_state	*uaddr;
844 	struct vm_map_entry		*entry;
845 	vsize_t				 guardsz;
846 
847 	uaddr = (struct uaddr_bestfit_state *)uaddr_p;
848 	guardsz = ((map->flags & VM_MAP_GUARDPAGES) ? PAGE_SIZE : 0);
849 	if (sz + guardsz < sz)
850 		return ENOMEM;
851 
852 	/*
853 	 * Find smallest item on freelist capable of holding item.
854 	 * Deal with guardpages: search for space with one extra page.
855 	 */
856 	entry = uvm_addr_entrybyspace(&uaddr->ubf_free, sz + guardsz);
857 	if (entry == NULL)
858 		return ENOMEM;
859 
860 	/*
861 	 * Walk the tree until we find an entry that fits.
862 	 */
863 	while (uvm_addr_fitspace(&min, &max,
864 	    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
865 	    sz, align, offset, 0, guardsz) != 0) {
866 		entry = RBT_NEXT(uaddr_free_rbtree, entry);
867 		if (entry == NULL)
868 			return ENOMEM;
869 	}
870 
871 	/*
872 	 * Return the address that generates the least fragmentation.
873 	 */
874 	*entry_out = entry;
875 	*addr_out = (min - VMMAP_FREE_START(entry) <=
876 	    VMMAP_FREE_END(entry) - guardsz - sz - max ?
877 	    min : max);
878 	return 0;
879 }
880 #endif /* !SMALL_KERNEL */
881 
882 
883 #ifndef SMALL_KERNEL
884 /*
885  * A userspace allocator based on pivots.
886  */
887 
888 const struct uvm_addr_functions uaddr_pivot_functions = {
889 	.uaddr_select = &uaddr_pivot_select,
890 	.uaddr_free_insert = &uaddr_pivot_insert,
891 	.uaddr_free_remove = &uaddr_pivot_remove,
892 	.uaddr_destroy = &uaddr_pivot_destroy,
893 #if defined(DEBUG) || defined(DDB)
894 	.uaddr_print = &uaddr_pivot_print,
895 #endif /* DEBUG || DDB */
896 	.uaddr_name = "uaddr_pivot"
897 };
898 
899 /*
900  * A special random function for pivots.
901  *
902  * This function will return:
903  * - a random number
904  * - a multiple of PAGE_SIZE
905  * - at least PAGE_SIZE
906  *
907  * The random function has a slightly higher change to return a small number.
908  */
909 vsize_t
910 uaddr_pivot_random(void)
911 {
912 	int			r;
913 
914 	/*
915 	 * The sum of two six-sided dice will have a normal distribution.
916 	 * We map the highest probable number to 1, by folding the curve
917 	 * (think of a graph on a piece of paper, that you fold).
918 	 *
919 	 * Because the fold happens at PIVOT_RND - 1, the numbers 0 and 1
920 	 * have the same and highest probability of happening.
921 	 */
922 	r = arc4random_uniform(PIVOT_RND) + arc4random_uniform(PIVOT_RND) -
923 	    (PIVOT_RND - 1);
924 	if (r < 0)
925 		r = -r;
926 
927 	/*
928 	 * Make the returned value at least PAGE_SIZE and a multiple of
929 	 * PAGE_SIZE.
930 	 */
931 	return (vaddr_t)(1 + r) << PAGE_SHIFT;
932 }
933 
934 /*
935  * Select a new pivot.
936  *
937  * A pivot must:
938  * - be chosen random
939  * - have a randomly chosen gap before it, where the uaddr_state starts
940  * - have a randomly chosen gap after it, before the uaddr_state ends
941  *
942  * Furthermore, the pivot must provide sufficient space for the allocation.
943  * The addr will be set to the selected address.
944  *
945  * Returns ENOMEM on failure.
946  */
947 int
948 uaddr_pivot_newpivot(struct vm_map *map, struct uaddr_pivot_state *uaddr,
949     struct uaddr_pivot *pivot,
950     struct vm_map_entry **entry_out, vaddr_t *addr_out,
951     vsize_t sz, vaddr_t align, vaddr_t offset,
952     vsize_t before_gap, vsize_t after_gap)
953 {
954 	struct vm_map_entry		*entry, *found;
955 	vaddr_t				 minaddr, maxaddr;
956 	vsize_t				 dist;
957 	vaddr_t				 found_minaddr, found_maxaddr;
958 	vaddr_t				 min, max;
959 	vsize_t				 arc4_arg;
960 	int				 fit_error;
961 	u_int32_t			 path;
962 
963 	minaddr = uaddr->up_uaddr.uaddr_minaddr;
964 	maxaddr = uaddr->up_uaddr.uaddr_maxaddr;
965 	KASSERT(minaddr < maxaddr);
966 #ifdef DIAGNOSTIC
967 	if (minaddr + 2 * PAGE_SIZE > maxaddr) {
968 		panic("uaddr_pivot_newpivot: cannot grant random pivot "
969 		    "in area less than 2 pages (size = 0x%lx)",
970 		    maxaddr - minaddr);
971 	}
972 #endif /* DIAGNOSTIC */
973 
974 	/*
975 	 * Gap calculation: 1/32 of the size of the managed area.
976 	 *
977 	 * At most: sufficient to not get truncated at arc4random.
978 	 * At least: 2 PAGE_SIZE
979 	 *
980 	 * minaddr and maxaddr will be changed according to arc4random.
981 	 */
982 	dist = MAX((maxaddr - minaddr) / 32, 2 * (vaddr_t)PAGE_SIZE);
983 	if (dist >> PAGE_SHIFT > 0xffffffff) {
984 		minaddr += (vsize_t)arc4random() << PAGE_SHIFT;
985 		maxaddr -= (vsize_t)arc4random() << PAGE_SHIFT;
986 	} else {
987 		minaddr += (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
988 		    PAGE_SHIFT;
989 		maxaddr -= (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
990 		    PAGE_SHIFT;
991 	}
992 
993 	/*
994 	 * A very fast way to find an entry that will be large enough
995 	 * to hold the allocation, but still is found more or less
996 	 * randomly: the tree path selector has a 50% chance to go for
997 	 * a bigger or smaller entry.
998 	 *
999 	 * Note that the memory may actually be available,
1000 	 * but the fragmentation may be so bad and the gaps chosen
1001 	 * so unfortunately, that the allocation will not succeed.
1002 	 * Or the alignment can only be satisfied by an entry that
1003 	 * is not visited in the randomly selected path.
1004 	 *
1005 	 * This code finds an entry with sufficient space in O(log n) time.
1006 	 */
1007 	path = arc4random();
1008 	found = NULL;
1009 	entry = RBT_ROOT(uaddr_free_rbtree, &uaddr->up_free);
1010 	while (entry != NULL) {
1011 		fit_error = uvm_addr_fitspace(&min, &max,
1012 		    MAX(VMMAP_FREE_START(entry), minaddr),
1013 		    MIN(VMMAP_FREE_END(entry), maxaddr),
1014 		    sz, align, offset, before_gap, after_gap);
1015 
1016 		/* It fits, save this entry. */
1017 		if (fit_error == 0) {
1018 			found = entry;
1019 			found_minaddr = min;
1020 			found_maxaddr = max;
1021 		}
1022 
1023 		/* Next. */
1024 		if (fit_error != 0)
1025 			entry = RBT_RIGHT(uaddr_free_rbtree, entry);
1026 		else if	((path & 0x1) == 0) {
1027 			path >>= 1;
1028 			entry = RBT_RIGHT(uaddr_free_rbtree, entry);
1029 		} else {
1030 			path >>= 1;
1031 			entry = RBT_LEFT(uaddr_free_rbtree, entry);
1032 		}
1033 	}
1034 	if (found == NULL)
1035 		return ENOMEM;	/* Not found a large enough region. */
1036 
1037 	/*
1038 	 * Calculate a random address within found.
1039 	 *
1040 	 * found_minaddr and found_maxaddr are already aligned, so be sure
1041 	 * to select a multiple of align as the offset in the entry.
1042 	 * Preferably, arc4random_uniform is used to provide no bias within
1043 	 * the entry.
1044 	 * However if the size of the entry exceeds arc4random_uniforms
1045 	 * argument limit, we simply use arc4random (thus limiting ourselves
1046 	 * to 4G * PAGE_SIZE bytes offset).
1047 	 */
1048 	if (found_maxaddr == found_minaddr)
1049 		*addr_out = found_minaddr;
1050 	else {
1051 		KASSERT(align >= PAGE_SIZE && (align & (align - 1)) == 0);
1052 		arc4_arg = found_maxaddr - found_minaddr;
1053 		if (arc4_arg > 0xffffffff) {
1054 			*addr_out = found_minaddr +
1055 			    (arc4random() & ~(align - 1));
1056 		} else {
1057 			*addr_out = found_minaddr +
1058 			    (arc4random_uniform(arc4_arg) & ~(align - 1));
1059 		}
1060 	}
1061 	/* Address was found in this entry. */
1062 	*entry_out = found;
1063 
1064 	/*
1065 	 * Set up new pivot and return selected address.
1066 	 *
1067 	 * Depending on the direction of the pivot, the pivot must be placed
1068 	 * at the bottom or the top of the allocation:
1069 	 * - if the pivot moves upwards, place the pivot at the top of the
1070 	 *   allocation,
1071 	 * - if the pivot moves downwards, place the pivot at the bottom
1072 	 *   of the allocation.
1073 	 */
1074 	pivot->entry = found;
1075 	pivot->dir = (arc4random() & 0x1 ? 1 : -1);
1076 	if (pivot->dir > 0)
1077 		pivot->addr = *addr_out + sz;
1078 	else
1079 		pivot->addr = *addr_out;
1080 	pivot->expire = PIVOT_EXPIRE - 1; /* First use is right now. */
1081 	return 0;
1082 }
1083 
1084 /*
1085  * Pivot selector.
1086  *
1087  * Each time the selector is invoked, it will select a random pivot, which
1088  * it will use to select memory with. The memory will be placed at the pivot,
1089  * with a randomly sized gap between the allocation and the pivot.
1090  * The pivot will then move so it will never revisit this address.
1091  *
1092  * Each allocation, the pivot expiry timer ticks. Once the pivot becomes
1093  * expired, it will be replaced with a newly created pivot. Pivots also
1094  * automatically expire if they fail to provide memory for an allocation.
1095  *
1096  * Expired pivots are replaced using the uaddr_pivot_newpivot() function,
1097  * which will ensure the pivot points at memory in such a way that the
1098  * allocation will succeed.
1099  * As an added bonus, the uaddr_pivot_newpivot() function will perform the
1100  * allocation immediately and move the pivot as appropriate.
1101  *
1102  * If uaddr_pivot_newpivot() fails to find a new pivot that will allow the
1103  * allocation to succeed, it will not create a new pivot and the allocation
1104  * will fail.
1105  *
1106  * A pivot running into used memory will automatically expire (because it will
1107  * fail to allocate).
1108  *
1109  * Characteristics of the allocator:
1110  * - best case, an allocation is O(log N)
1111  *   (it would be O(1), if it werent for the need to check if the memory is
1112  *   free; although that can be avoided...)
1113  * - worst case, an allocation is O(log N)
1114  *   (the uaddr_pivot_newpivot() function has that complexity)
1115  * - failed allocations always take O(log N)
1116  *   (the uaddr_pivot_newpivot() function will walk that deep into the tree).
1117  */
1118 int
1119 uaddr_pivot_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1120     struct vm_map_entry **entry_out, vaddr_t *addr_out,
1121     vsize_t sz, vaddr_t align, vaddr_t offset,
1122     vm_prot_t prot, vaddr_t hint)
1123 {
1124 	struct uaddr_pivot_state	*uaddr;
1125 	struct vm_map_entry		*entry;
1126 	struct uaddr_pivot		*pivot;
1127 	vaddr_t				 min, max;
1128 	vsize_t				 before_gap, after_gap;
1129 	int				 err;
1130 
1131 	/*
1132 	 * When we have a hint, use the rnd allocator that finds the
1133 	 * area that is closest to the hint, if there is such an area.
1134 	 */
1135 	if (hint != 0) {
1136 		if (uaddr_rnd_select(map, uaddr_p, entry_out, addr_out,
1137 		    sz, align, offset, prot, hint) == 0)
1138 			return 0;
1139 		return ENOMEM;
1140 	}
1141 
1142 	/*
1143 	 * Select a random pivot and a random gap sizes around the allocation.
1144 	 */
1145 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1146 	pivot = &uaddr->up_pivots[
1147 	    arc4random_uniform(nitems(uaddr->up_pivots))];
1148 	before_gap = uaddr_pivot_random();
1149 	after_gap = uaddr_pivot_random();
1150 	if (pivot->addr == 0 || pivot->entry == NULL || pivot->expire == 0)
1151 		goto expired;	/* Pivot is invalid (null or expired). */
1152 
1153 	/*
1154 	 * Attempt to use the pivot to map the entry.
1155 	 */
1156 	entry = pivot->entry;
1157 	if (pivot->dir > 0) {
1158 		if (uvm_addr_fitspace(&min, &max,
1159 		    MAX(VMMAP_FREE_START(entry), pivot->addr),
1160 		    VMMAP_FREE_END(entry), sz, align, offset,
1161 		    before_gap, after_gap) == 0) {
1162 			*addr_out = min;
1163 			*entry_out = entry;
1164 			pivot->addr = min + sz;
1165 			pivot->expire--;
1166 			return 0;
1167 		}
1168 	} else {
1169 		if (uvm_addr_fitspace(&min, &max,
1170 		    VMMAP_FREE_START(entry),
1171 		    MIN(VMMAP_FREE_END(entry), pivot->addr),
1172 		    sz, align, offset, before_gap, after_gap) == 0) {
1173 			*addr_out = max;
1174 			*entry_out = entry;
1175 			pivot->addr = max;
1176 			pivot->expire--;
1177 			return 0;
1178 		}
1179 	}
1180 
1181 expired:
1182 	/*
1183 	 * Pivot expired or allocation failed.
1184 	 * Use pivot selector to do the allocation and find a new pivot.
1185 	 */
1186 	err = uaddr_pivot_newpivot(map, uaddr, pivot, entry_out, addr_out,
1187 	    sz, align, offset, before_gap, after_gap);
1188 	return err;
1189 }
1190 
1191 /*
1192  * Free the pivot.
1193  */
1194 void
1195 uaddr_pivot_destroy(struct uvm_addr_state *uaddr)
1196 {
1197 	pool_put(&uaddr_pivot_pool, uaddr);
1198 }
1199 
1200 /*
1201  * Insert an entry with free space in the space tree.
1202  */
1203 void
1204 uaddr_pivot_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1205     struct vm_map_entry *entry)
1206 {
1207 	struct uaddr_pivot_state	*uaddr;
1208 	struct vm_map_entry		*rb_rv;
1209 	struct uaddr_pivot		*p;
1210 	vaddr_t				 check_addr;
1211 	vaddr_t				 start, end;
1212 
1213 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1214 	if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->up_free, entry)) !=
1215 	    NULL) {
1216 		panic("%s: duplicate insertion: state %p "
1217 		    "inserting entry %p which collides with %p", __func__,
1218 		    uaddr, entry, rb_rv);
1219 	}
1220 
1221 	start = VMMAP_FREE_START(entry);
1222 	end = VMMAP_FREE_END(entry);
1223 
1224 	/*
1225 	 * Update all pivots that are contained in this entry.
1226 	 */
1227 	for (p = &uaddr->up_pivots[0];
1228 	    p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1229 		check_addr = p->addr;
1230 		if (check_addr == 0)
1231 			continue;
1232 		if (p->dir < 0)
1233 			check_addr--;
1234 
1235 		if (start <= check_addr &&
1236 		    check_addr < end) {
1237 			KASSERT(p->entry == NULL);
1238 			p->entry = entry;
1239 		}
1240 	}
1241 }
1242 
1243 /*
1244  * Remove an entry with free space from the space tree.
1245  */
1246 void
1247 uaddr_pivot_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1248     struct vm_map_entry *entry)
1249 {
1250 	struct uaddr_pivot_state	*uaddr;
1251 	struct uaddr_pivot		*p;
1252 
1253 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1254 	if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->up_free, entry) != entry)
1255 		panic("%s: entry was not in tree", __func__);
1256 
1257 	/*
1258 	 * Inform any pivot with this entry that the entry is gone.
1259 	 * Note that this does not automatically invalidate the pivot.
1260 	 */
1261 	for (p = &uaddr->up_pivots[0];
1262 	    p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1263 		if (p->entry == entry)
1264 			p->entry = NULL;
1265 	}
1266 }
1267 
1268 /*
1269  * Create a new pivot selector.
1270  *
1271  * Initially, all pivots are in the expired state.
1272  * Two reasons for this:
1273  * - it means this allocator will not take a huge amount of time
1274  * - pivots select better on demand, because the pivot selection will be
1275  *   affected by preceding allocations:
1276  *   the next pivots will likely end up in different segments of free memory,
1277  *   that was segmented by an earlier allocation; better spread.
1278  */
1279 struct uvm_addr_state *
1280 uaddr_pivot_create(vaddr_t minaddr, vaddr_t maxaddr)
1281 {
1282 	struct uaddr_pivot_state *uaddr;
1283 
1284 	uaddr = pool_get(&uaddr_pivot_pool, PR_WAITOK);
1285 	uaddr->up_uaddr.uaddr_minaddr = minaddr;
1286 	uaddr->up_uaddr.uaddr_maxaddr = maxaddr;
1287 	uaddr->up_uaddr.uaddr_functions = &uaddr_pivot_functions;
1288 	RBT_INIT(uaddr_free_rbtree, &uaddr->up_free);
1289 	memset(uaddr->up_pivots, 0, sizeof(uaddr->up_pivots));
1290 
1291 	return &uaddr->up_uaddr;
1292 }
1293 
1294 #if defined(DEBUG) || defined(DDB)
1295 /*
1296  * Print the uaddr_pivot_state.
1297  *
1298  * If full, a listing of all entries in the state will be provided.
1299  */
1300 void
1301 uaddr_pivot_print(struct uvm_addr_state *uaddr_p, boolean_t full,
1302     int (*pr)(const char *, ...))
1303 {
1304 	struct uaddr_pivot_state	*uaddr;
1305 	struct uaddr_pivot		*pivot;
1306 	struct vm_map_entry		*entry;
1307 	int				 i;
1308 	vaddr_t				 check_addr;
1309 
1310 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1311 
1312 	for (i = 0; i < NUM_PIVOTS; i++) {
1313 		pivot = &uaddr->up_pivots[i];
1314 
1315 		(*pr)("\tpivot 0x%lx, epires in %d, direction %d\n",
1316 		    pivot->addr, pivot->expire, pivot->dir);
1317 	}
1318 	if (!full)
1319 		return;
1320 
1321 	if (RBT_EMPTY(uaddr_free_rbtree, &uaddr->up_free))
1322 		(*pr)("\tempty\n");
1323 	/* Print list of free space. */
1324 	RBT_FOREACH(entry, uaddr_free_rbtree, &uaddr->up_free) {
1325 		(*pr)("\t0x%lx - 0x%lx free (0x%lx bytes)\n",
1326 		    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
1327 		    VMMAP_FREE_END(entry) - VMMAP_FREE_START(entry));
1328 
1329 		for (i = 0; i < NUM_PIVOTS; i++) {
1330 			pivot = &uaddr->up_pivots[i];
1331 			check_addr = pivot->addr;
1332 			if (check_addr == 0)
1333 				continue;
1334 			if (pivot->dir < 0)
1335 				check_addr--;
1336 
1337 			if (VMMAP_FREE_START(entry) <= check_addr &&
1338 			    check_addr < VMMAP_FREE_END(entry)) {
1339 				(*pr)("\t\tcontains pivot %d (0x%lx)\n",
1340 				    i, pivot->addr);
1341 			}
1342 		}
1343 	}
1344 }
1345 #endif /* DEBUG || DDB */
1346 #endif /* !SMALL_KERNEL */
1347 
1348 #ifndef SMALL_KERNEL
1349 /*
1350  * Stack/break allocator.
1351  *
1352  * Stack area is grown into in the opposite direction of the stack growth,
1353  * brk area is grown downward (because sbrk() grows upward).
1354  *
1355  * Both areas are grown into proportially: a weighted chance is used to
1356  * select which one (stack or brk area) to try. If the allocation fails,
1357  * the other one is tested.
1358  */
1359 const struct uvm_addr_functions uaddr_stack_brk_functions = {
1360 	.uaddr_select = &uaddr_stack_brk_select,
1361 	.uaddr_destroy = &uaddr_destroy,
1362 	.uaddr_name = "uaddr_stckbrk"
1363 };
1364 
1365 /*
1366  * Stack/brk address selector.
1367  */
1368 int
1369 uaddr_stack_brk_select(struct vm_map *map, struct uvm_addr_state *uaddr,
1370     struct vm_map_entry **entry_out, vaddr_t *addr_out,
1371     vsize_t sz, vaddr_t align, vaddr_t offset,
1372     vm_prot_t prot, vaddr_t hint)
1373 {
1374 	vaddr_t			start;
1375 	vaddr_t			end;
1376 	vsize_t			before_gap;
1377 	vsize_t			after_gap;
1378 	int			dir;
1379 
1380 	/* Set up brk search strategy. */
1381 	start = MAX(map->b_start, uaddr->uaddr_minaddr);
1382 	end = MIN(map->b_end, uaddr->uaddr_maxaddr);
1383 	before_gap = 0;
1384 	after_gap = 0;
1385 	dir = -1;	/* Opposite of brk() growth. */
1386 
1387 	if (end - start >= sz) {
1388 		if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
1389 		    0, sz, align, offset, dir, start, end - sz,
1390 		    before_gap, after_gap) == 0)
1391 			return 0;
1392 	}
1393 
1394 	/* Set up stack search strategy. */
1395 	start = MAX(map->s_start, uaddr->uaddr_minaddr);
1396 	end = MIN(map->s_end, uaddr->uaddr_maxaddr);
1397 	before_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1398 	after_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1399 #ifdef MACHINE_STACK_GROWS_UP
1400 	dir = -1;
1401 #else
1402 	dir =  1;
1403 #endif
1404 	if (end - start >= before_gap + after_gap &&
1405 	    end - start - before_gap - after_gap >= sz) {
1406 		if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
1407 		    0, sz, align, offset, dir, start, end - sz,
1408 		    before_gap, after_gap) == 0)
1409 		return 0;
1410 	}
1411 
1412 	return ENOMEM;
1413 }
1414 
1415 struct uvm_addr_state *
1416 uaddr_stack_brk_create(vaddr_t minaddr, vaddr_t maxaddr)
1417 {
1418 	struct uvm_addr_state* uaddr;
1419 
1420 	uaddr = pool_get(&uaddr_pool, PR_WAITOK);
1421 	uaddr->uaddr_minaddr = minaddr;
1422 	uaddr->uaddr_maxaddr = maxaddr;
1423 	uaddr->uaddr_functions = &uaddr_stack_brk_functions;
1424 	return uaddr;
1425 }
1426 #endif /* !SMALL_KERNEL */
1427 
1428 
1429 #ifndef SMALL_KERNEL
1430 /*
1431  * Free space comparison.
1432  * Compares smaller free-space before larger free-space.
1433  */
1434 static inline int
1435 uvm_mapent_fspace_cmp(const struct vm_map_entry *e1,
1436     const struct vm_map_entry *e2)
1437 {
1438 	if (e1->fspace != e2->fspace)
1439 		return (e1->fspace < e2->fspace ? -1 : 1);
1440 	return (e1->start < e2->start ? -1 : e1->start > e2->start);
1441 }
1442 
1443 RBT_GENERATE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree,
1444     uvm_mapent_fspace_cmp);
1445 #endif /* !SMALL_KERNEL */
1446