xref: /openbsd-src/sys/uvm/uvm_addr.c (revision 4c1e55dc91edd6e69ccc60ce855900fbc12cf34f)
1 /*	$OpenBSD: uvm_addr.c,v 1.4 2012/06/06 04:54:36 matthew Exp $	*/
2 
3 /*
4  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 /* #define DEBUG */
20 
21 #include <sys/param.h>
22 #include <uvm/uvm.h>
23 #include <uvm/uvm_addr.h>
24 #include <sys/pool.h>
25 #include <dev/rndvar.h>
26 
27 /* Max gap between hint allocations. */
28 #define UADDR_HINT_MAXGAP	(4 * PAGE_SIZE)
29 /* Number of pivots in pivot allocator. */
30 #define NUM_PIVOTS		16
31 /*
32  * Max number (inclusive) of pages the pivot allocator
33  * will place between allocations.
34  *
35  * The uaddr_pivot_random() function attempts to bias towards
36  * small space between allocations, so putting a large number here is fine.
37  */
38 #define PIVOT_RND		8
39 /*
40  * Number of allocations that a pivot can supply before expiring.
41  * When a pivot expires, a new pivot has to be found.
42  *
43  * Must be at least 1.
44  */
45 #define PIVOT_EXPIRE		1024
46 
47 
48 /* Pool with uvm_addr_state structures. */
49 struct pool uaddr_pool;
50 struct pool uaddr_hint_pool;
51 struct pool uaddr_bestfit_pool;
52 struct pool uaddr_pivot_pool;
53 struct pool uaddr_rnd_pool;
54 
55 /* uvm_addr state for hint based selector. */
56 struct uaddr_hint_state {
57 	struct uvm_addr_state		 uaddr;
58 	vsize_t				 max_dist;
59 };
60 
61 /* uvm_addr state for bestfit selector. */
62 struct uaddr_bestfit_state {
63 	struct uvm_addr_state		 ubf_uaddr;
64 	struct uaddr_free_rbtree	 ubf_free;
65 };
66 
67 /* uvm_addr state for rnd selector. */
68 struct uaddr_rnd_state {
69 	struct uvm_addr_state		 ur_uaddr;
70 	TAILQ_HEAD(, vm_map_entry)	 ur_free;
71 };
72 
73 /*
74  * Definition of a pivot in pivot selector.
75  */
76 struct uaddr_pivot {
77 	vaddr_t				 addr;	/* End of prev. allocation. */
78 	int				 expire;/* Best before date. */
79 	int				 dir;	/* Direction. */
80 	struct vm_map_entry		*entry; /* Will contain next alloc. */
81 };
82 /* uvm_addr state for pivot selector. */
83 struct uaddr_pivot_state {
84 	struct uvm_addr_state		 up_uaddr;
85 
86 	/* Free space tree, for fast pivot selection. */
87 	struct uaddr_free_rbtree	 up_free;
88 
89 	/* List of pivots. The pointers point to after the last allocation. */
90 	struct uaddr_pivot		 up_pivots[NUM_PIVOTS];
91 };
92 
93 /*
94  * Free space comparison.
95  * Compares smaller free-space before larger free-space.
96  */
97 static __inline int
98 uvm_mapent_fspace_cmp(struct vm_map_entry *e1, struct vm_map_entry *e2)
99 {
100 	if (e1->fspace != e2->fspace)
101 		return (e1->fspace < e2->fspace ? -1 : 1);
102 	return (e1->start < e2->start ? -1 : e1->start > e2->start);
103 }
104 
105 /* Forward declaration (see below). */
106 extern const struct uvm_addr_functions uaddr_kernel_functions;
107 struct uvm_addr_state uaddr_kbootstrap;
108 
109 
110 /*
111  * Support functions.
112  */
113 
114 #ifndef SMALL_KERNEL
115 struct vm_map_entry	*uvm_addr_entrybyspace(struct uaddr_free_rbtree*,
116 			    vsize_t);
117 #endif /* !SMALL_KERNEL */
118 void			 uaddr_kinsert(struct vm_map*, struct uvm_addr_state*,
119 			    struct vm_map_entry*);
120 void			 uaddr_kremove(struct vm_map*, struct uvm_addr_state*,
121 			    struct vm_map_entry*);
122 void			 uaddr_kbootstrapdestroy(struct uvm_addr_state*);
123 
124 void			 uaddr_destroy(struct uvm_addr_state*);
125 void			 uaddr_hint_destroy(struct uvm_addr_state*);
126 void			 uaddr_kbootstrap_destroy(struct uvm_addr_state*);
127 void			 uaddr_rnd_destroy(struct uvm_addr_state*);
128 void			 uaddr_bestfit_destroy(struct uvm_addr_state*);
129 void			 uaddr_pivot_destroy(struct uvm_addr_state*);
130 
131 int			 uaddr_lin_select(struct vm_map*,
132 			    struct uvm_addr_state*, struct vm_map_entry**,
133 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
134 			    vaddr_t);
135 int			 uaddr_kbootstrap_select(struct vm_map*,
136 			    struct uvm_addr_state*, struct vm_map_entry**,
137 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
138 			    vaddr_t);
139 int			 uaddr_rnd_select(struct vm_map*,
140 			    struct uvm_addr_state*, struct vm_map_entry**,
141 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
142 			    vaddr_t);
143 int			 uaddr_hint_select(struct vm_map*,
144 			    struct uvm_addr_state*, struct vm_map_entry**,
145 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
146 			    vaddr_t);
147 int			 uaddr_bestfit_select(struct vm_map*,
148 			    struct uvm_addr_state*, struct vm_map_entry**,
149 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
150 			    vaddr_t);
151 #ifndef SMALL_KERNEL
152 int			 uaddr_pivot_select(struct vm_map*,
153 			    struct uvm_addr_state*, struct vm_map_entry**,
154 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
155 			    vaddr_t);
156 int			 uaddr_stack_brk_select(struct vm_map*,
157 			    struct uvm_addr_state*, struct vm_map_entry**,
158 			    vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
159 			    vaddr_t);
160 #endif /* !SMALL_KERNEL */
161 
162 void			 uaddr_rnd_insert(struct vm_map*,
163 			    struct uvm_addr_state*, struct vm_map_entry*);
164 void			 uaddr_rnd_remove(struct vm_map*,
165 			    struct uvm_addr_state*, struct vm_map_entry*);
166 void			 uaddr_bestfit_insert(struct vm_map*,
167 			    struct uvm_addr_state*, struct vm_map_entry*);
168 void			 uaddr_bestfit_remove(struct vm_map*,
169 			    struct uvm_addr_state*, struct vm_map_entry*);
170 void			 uaddr_pivot_insert(struct vm_map*,
171 			    struct uvm_addr_state*, struct vm_map_entry*);
172 void			 uaddr_pivot_remove(struct vm_map*,
173 			    struct uvm_addr_state*, struct vm_map_entry*);
174 
175 #ifndef SMALL_KERNEL
176 vsize_t			 uaddr_pivot_random(void);
177 int			 uaddr_pivot_newpivot(struct vm_map*,
178 			    struct uaddr_pivot_state*, struct uaddr_pivot*,
179 			    struct vm_map_entry**, vaddr_t*,
180 			    vsize_t, vaddr_t, vaddr_t, vsize_t, vsize_t);
181 #endif /* !SMALL_KERNEL */
182 
183 #if defined(DEBUG) || defined(DDB)
184 void			 uaddr_pivot_print(struct uvm_addr_state*, boolean_t,
185 			    int (*)(const char*, ...));
186 void			 uaddr_rnd_print(struct uvm_addr_state*, boolean_t,
187 			    int (*)(const char*, ...));
188 #endif /* DEBUG || DDB */
189 
190 
191 #ifndef SMALL_KERNEL
192 /*
193  * Find smallest entry in tree that will fit sz bytes.
194  */
195 struct vm_map_entry*
196 uvm_addr_entrybyspace(struct uaddr_free_rbtree *free, vsize_t sz)
197 {
198 	struct vm_map_entry	*tmp, *res;
199 
200 	tmp = RB_ROOT(free);
201 	res = NULL;
202 	while (tmp) {
203 		if (tmp->fspace >= sz) {
204 			res = tmp;
205 			tmp = RB_LEFT(tmp, dfree.rbtree);
206 		} else if (tmp->fspace < sz)
207 			tmp = RB_RIGHT(tmp, dfree.rbtree);
208 	}
209 	return res;
210 }
211 #endif /* !SMALL_KERNEL */
212 
213 static __inline vaddr_t
214 uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset)
215 {
216 	vaddr_t adjusted;
217 
218 	KASSERT(offset < align || (align == 0 && offset == 0));
219 	KASSERT((align & (align - 1)) == 0);
220 	KASSERT((offset & PAGE_MASK) == 0);
221 
222 	align = MAX(align, PAGE_SIZE);
223 	adjusted = addr & ~(align - 1);
224 	adjusted += offset;
225 	return (adjusted < addr ? adjusted + align : adjusted);
226 }
227 
228 static __inline vaddr_t
229 uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset)
230 {
231 	vaddr_t adjusted;
232 
233 	KASSERT(offset < align || (align == 0 && offset == 0));
234 	KASSERT((align & (align - 1)) == 0);
235 	KASSERT((offset & PAGE_MASK) == 0);
236 
237 	align = MAX(align, PAGE_SIZE);
238 	adjusted = addr & ~(align - 1);
239 	adjusted += offset;
240 	return (adjusted > addr ? adjusted - align : adjusted);
241 }
242 
243 /*
244  * Try to fit the requested space into the entry.
245  */
246 int
247 uvm_addr_fitspace(vaddr_t *min_result, vaddr_t *max_result,
248     vaddr_t low_addr, vaddr_t high_addr, vsize_t sz,
249     vaddr_t align, vaddr_t offset,
250     vsize_t before_gap, vsize_t after_gap)
251 {
252 	vaddr_t	tmp;
253 	vsize_t	fspace;
254 
255 	if (low_addr > high_addr)
256 		return ENOMEM;
257 	fspace = high_addr - low_addr;
258 	if (fspace < sz + before_gap + after_gap)
259 		return ENOMEM;
260 
261 	/*
262 	 * Calculate lowest address.
263 	 */
264 	low_addr += before_gap;
265 	low_addr = uvm_addr_align_forward(tmp = low_addr, align, offset);
266 	if (low_addr < tmp)	/* Overflow during alignment. */
267 		return ENOMEM;
268 	if (high_addr - after_gap - sz < low_addr)
269 		return ENOMEM;
270 
271 	/*
272 	 * Calculate highest address.
273 	 */
274 	high_addr -= after_gap + sz;
275 	high_addr = uvm_addr_align_backward(tmp = high_addr, align, offset);
276 	if (high_addr > tmp)	/* Overflow during alignment. */
277 		return ENOMEM;
278 	if (low_addr > high_addr)
279 		return ENOMEM;
280 
281 	*min_result = low_addr;
282 	*max_result = high_addr;
283 	return 0;
284 }
285 
286 
287 /*
288  * Initialize uvm_addr.
289  */
290 void
291 uvm_addr_init()
292 {
293 	pool_init(&uaddr_pool, sizeof(struct uvm_addr_state),
294 	    0, 0, 0, "uaddr", &pool_allocator_nointr);
295 	pool_init(&uaddr_hint_pool, sizeof(struct uaddr_hint_state),
296 	    0, 0, 0, "uaddrhint", &pool_allocator_nointr);
297 	pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state),
298 	    0, 0, 0, "uaddrbestfit", &pool_allocator_nointr);
299 	pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state),
300 	    0, 0, 0, "uaddrpivot", &pool_allocator_nointr);
301 	pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state),
302 	    0, 0, 0, "uaddrrnd", &pool_allocator_nointr);
303 
304 	uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE;
305 	uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE;
306 	uaddr_kbootstrap.uaddr_functions = &uaddr_kernel_functions;
307 }
308 
309 /*
310  * Invoke destructor function of uaddr.
311  */
312 void
313 uvm_addr_destroy(struct uvm_addr_state *uaddr)
314 {
315 	if (uaddr)
316 		(*uaddr->uaddr_functions->uaddr_destroy)(uaddr);
317 }
318 
319 /*
320  * Move address forward to satisfy align, offset.
321  */
322 vaddr_t
323 uvm_addr_align(vaddr_t addr, vaddr_t align, vaddr_t offset)
324 {
325 	vaddr_t result = (addr & ~(align - 1)) + offset;
326 	if (result < addr)
327 		result += align;
328 	return result;
329 }
330 
331 /*
332  * Move address backwards to satisfy align, offset.
333  */
334 vaddr_t
335 uvm_addr_align_back(vaddr_t addr, vaddr_t align, vaddr_t offset)
336 {
337 	vaddr_t result = (addr & ~(align - 1)) + offset;
338 	if (result > addr)
339 		result -= align;
340 	return result;
341 }
342 
343 /*
344  * Directional first fit.
345  *
346  * Do a lineair search for free space, starting at addr in entry.
347  * direction ==  1: search forward
348  * direction == -1: search backward
349  *
350  * Output: low <= addr <= high and entry will contain addr.
351  * 0 will be returned if no space is available.
352  *
353  * gap describes the space that must appear between the preceding entry.
354  */
355 int
356 uvm_addr_linsearch(struct vm_map *map, struct uvm_addr_state *uaddr,
357     struct vm_map_entry**entry_out, vaddr_t *addr_out,
358     vaddr_t hint, vsize_t sz, vaddr_t align, vaddr_t offset,
359     int direction, vaddr_t low, vaddr_t high,
360     vsize_t before_gap, vsize_t after_gap)
361 {
362 	struct vm_map_entry	*entry;
363 	vaddr_t			 low_addr, high_addr;
364 
365 	KASSERT(entry_out != NULL && addr_out != NULL);
366 	KASSERT(direction == -1 || direction == 1);
367 	KASSERT((hint & PAGE_MASK) == 0 && (high & PAGE_MASK) == 0 &&
368 	    (low & PAGE_MASK) == 0 &&
369 	    (before_gap & PAGE_MASK) == 0 && (after_gap & PAGE_MASK) == 0);
370 	KASSERT(high + sz > high); /* Check for overflow. */
371 
372 	/*
373 	 * Hint magic.
374 	 */
375 	if (hint == 0)
376 		hint = (direction == 1 ? low : high);
377 	else if (hint > high) {
378 		if (direction != -1)
379 			return ENOMEM;
380 		hint = high;
381 	} else if (hint < low) {
382 		if (direction != 1)
383 			return ENOMEM;
384 		hint = low;
385 	}
386 
387 	for (entry = uvm_map_entrybyaddr(&map->addr,
388 	    hint - (direction == -1 ? 1 : 0)); entry != NULL;
389 	    entry = (direction == 1 ?
390 	    RB_NEXT(uvm_map_addr, &map->addr, entry) :
391 	    RB_PREV(uvm_map_addr, &map->addr, entry))) {
392 		if (VMMAP_FREE_START(entry) > high ||
393 		    VMMAP_FREE_END(entry) < low) {
394 			break;
395 		}
396 
397 		if (uvm_addr_fitspace(&low_addr, &high_addr,
398 		    MAX(low, VMMAP_FREE_START(entry)),
399 		    MIN(high, VMMAP_FREE_END(entry)),
400 		    sz, align, offset, before_gap, after_gap) == 0) {
401 			*entry_out = entry;
402 			if (hint >= low_addr && hint <= high_addr) {
403 				*addr_out = hint;
404 			} else {
405 				*addr_out = (direction == 1 ?
406 				    low_addr : high_addr);
407 			}
408 			return 0;
409 		}
410 	}
411 
412 	return ENOMEM;
413 }
414 
415 /*
416  * Invoke address selector of uaddr.
417  * uaddr may be NULL, in which case the algorithm will fail with ENOMEM.
418  *
419  * Will invoke uvm_addr_isavail to fill in last_out.
420  */
421 int
422 uvm_addr_invoke(struct vm_map *map, struct uvm_addr_state *uaddr,
423     struct vm_map_entry**entry_out, struct vm_map_entry**last_out,
424     vaddr_t *addr_out,
425     vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
426 {
427 	int error;
428 
429 	if (uaddr == NULL)
430 		return ENOMEM;
431 
432 	hint &= ~((vaddr_t)PAGE_MASK);
433 	if (hint != 0 &&
434 	    !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr))
435 		return ENOMEM;
436 
437 	error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr,
438 	    entry_out, addr_out, sz, align, offset, prot, hint);
439 
440 	if (error == 0) {
441 		KASSERT(*entry_out != NULL);
442 		*last_out = NULL;
443 		if (!uvm_map_isavail(map, uaddr, entry_out, last_out,
444 		    *addr_out, sz)) {
445 			panic("uvm_addr_invoke: address selector %p "
446 			    "(%s 0x%lx-0x%lx) "
447 			    "returned unavailable address 0x%lx",
448 			    uaddr, uaddr->uaddr_functions->uaddr_name,
449 			    uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr,
450 			    *addr_out);
451 		}
452 	}
453 
454 	return error;
455 }
456 
457 #if defined(DEBUG) || defined(DDB)
458 void
459 uvm_addr_print(struct uvm_addr_state *uaddr, const char *slot, boolean_t full,
460     int (*pr)(const char*, ...))
461 {
462 	if (uaddr == NULL) {
463 		(*pr)("- uvm_addr %s: NULL\n", slot);
464 		return;
465 	}
466 
467 	(*pr)("- uvm_addr %s: %p (%s 0x%lx-0x%lx)\n", slot, uaddr,
468 	    uaddr->uaddr_functions->uaddr_name,
469 	    uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr);
470 	if (uaddr->uaddr_functions->uaddr_print == NULL)
471 		return;
472 
473 	(*uaddr->uaddr_functions->uaddr_print)(uaddr, full, pr);
474 }
475 #endif /* DEBUG || DDB */
476 
477 /*
478  * Destroy a uvm_addr_state structure.
479  * The uaddr must have been previously allocated from uaddr_state_pool.
480  */
481 void
482 uaddr_destroy(struct uvm_addr_state *uaddr)
483 {
484 	pool_put(&uaddr_pool, uaddr);
485 }
486 
487 
488 /*
489  * Lineair allocator.
490  * This allocator uses a first-fit algorithm.
491  *
492  * If hint is set, search will start at the hint position.
493  * Only searches forward.
494  */
495 
496 const struct uvm_addr_functions uaddr_lin_functions = {
497 	.uaddr_select = &uaddr_lin_select,
498 	.uaddr_destroy = &uaddr_destroy,
499 	.uaddr_name = "uaddr_lin"
500 };
501 
502 struct uvm_addr_state*
503 uaddr_lin_create(vaddr_t minaddr, vaddr_t maxaddr)
504 {
505 	struct uvm_addr_state* uaddr;
506 
507 	uaddr = pool_get(&uaddr_pool, PR_WAITOK);
508 	uaddr->uaddr_minaddr = minaddr;
509 	uaddr->uaddr_maxaddr = maxaddr;
510 	uaddr->uaddr_functions = &uaddr_lin_functions;
511 	return uaddr;
512 }
513 
514 int
515 uaddr_lin_select(struct vm_map *map, struct uvm_addr_state *uaddr,
516     struct vm_map_entry**entry_out, vaddr_t *addr_out,
517     vsize_t sz, vaddr_t align, vaddr_t offset,
518     vm_prot_t prot, vaddr_t hint)
519 {
520 	vaddr_t guard_sz;
521 
522 	/*
523 	 * Deal with guardpages: search for space with one extra page.
524 	 */
525 	guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
526 
527 	if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr < sz + guard_sz)
528 		return ENOMEM;
529 	return uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 0, sz,
530 	    align, offset, 1, uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr - sz,
531 	    0, guard_sz);
532 }
533 
534 
535 /*
536  * Randomized allocator.
537  * This allocator use uvm_map_hint to acquire a random address and searches
538  * from there.
539  */
540 
541 const struct uvm_addr_functions uaddr_rnd_functions = {
542 	.uaddr_select = &uaddr_rnd_select,
543 	.uaddr_free_insert = &uaddr_rnd_insert,
544 	.uaddr_free_remove = &uaddr_rnd_remove,
545 	.uaddr_destroy = &uaddr_rnd_destroy,
546 #if defined(DEBUG) || defined(DDB)
547 	.uaddr_print = &uaddr_rnd_print,
548 #endif /* DEBUG || DDB */
549 	.uaddr_name = "uaddr_rnd"
550 };
551 
552 struct uvm_addr_state*
553 uaddr_rnd_create(vaddr_t minaddr, vaddr_t maxaddr)
554 {
555 	struct uaddr_rnd_state* uaddr;
556 
557 	uaddr = pool_get(&uaddr_rnd_pool, PR_WAITOK);
558 	uaddr->ur_uaddr.uaddr_minaddr = minaddr;
559 	uaddr->ur_uaddr.uaddr_maxaddr = maxaddr;
560 	uaddr->ur_uaddr.uaddr_functions = &uaddr_rnd_functions;
561 	TAILQ_INIT(&uaddr->ur_free);
562 	return &uaddr->ur_uaddr;
563 }
564 
565 int
566 uaddr_rnd_select(struct vm_map *map, struct uvm_addr_state *uaddr,
567     struct vm_map_entry**entry_out, vaddr_t *addr_out,
568     vsize_t sz, vaddr_t align, vaddr_t offset,
569     vm_prot_t prot, vaddr_t hint)
570 {
571 	struct vmspace		*vm;
572 	vaddr_t			 guard_sz;
573 	vaddr_t			 low_addr, high_addr;
574 	struct vm_map_entry	*entry, *next;
575 	vsize_t			 before_gap, after_gap;
576 	vaddr_t			 tmp;
577 
578 	KASSERT((map->flags & VM_MAP_ISVMSPACE) != 0);
579 	vm = (struct vmspace*)map;
580 
581 	/* Deal with guardpages: search for space with one extra page. */
582 	guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
583 
584 	/* Quick fail if the allocation won't fit. */
585 	if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr < sz + guard_sz)
586 		return ENOMEM;
587 
588 	/* Select a hint. */
589 	if (hint == 0)
590 		hint = uvm_map_hint(vm, prot);
591 	/* Clamp hint to uaddr range. */
592 	hint = MIN(MAX(hint, uaddr->uaddr_minaddr),
593 	    uaddr->uaddr_maxaddr - sz - guard_sz);
594 
595 	/* Align hint to align,offset parameters. */
596 	tmp = hint;
597 	hint = uvm_addr_align_forward(tmp, align, offset);
598 	/* Check for overflow during alignment. */
599 	if (hint < tmp || hint > uaddr->uaddr_maxaddr - sz - guard_sz)
600 		return ENOMEM; /* Compatibility mode: never look backwards. */
601 
602 	before_gap = 0;
603 	after_gap = guard_sz;
604 	hint -= MIN(hint, before_gap);
605 
606 	/*
607 	 * Use the augmented address tree to look up the first entry
608 	 * at or after hint with sufficient space.
609 	 *
610 	 * This code is the original optimized code, but will fail if the
611 	 * subtree it looks at does have sufficient space, but fails to meet
612 	 * the align constraint.
613 	 *
614 	 * Guard: subtree is not exhausted and max(fspace) >= required.
615 	 */
616 	entry = uvm_map_entrybyaddr(&map->addr, hint);
617 
618 	/* Walk up the tree, until there is at least sufficient space. */
619 	while (entry != NULL &&
620 	    entry->fspace_augment < before_gap + after_gap + sz)
621 		entry = RB_PARENT(entry, daddrs.addr_entry);
622 
623 	while (entry != NULL) {
624 		/* Test if this fits. */
625 		if (VMMAP_FREE_END(entry) > hint &&
626 		    uvm_map_uaddr_e(map, entry) == uaddr &&
627 		    uvm_addr_fitspace(&low_addr, &high_addr,
628 		    MAX(uaddr->uaddr_minaddr, VMMAP_FREE_START(entry)),
629 		    MIN(uaddr->uaddr_maxaddr, VMMAP_FREE_END(entry)),
630 		    sz, align, offset, before_gap, after_gap) == 0) {
631 			*entry_out = entry;
632 			if (hint >= low_addr && hint <= high_addr)
633 				*addr_out = hint;
634 			else
635 				*addr_out = low_addr;
636 			return 0;
637 		}
638 
639 		/* RB_NEXT, but skip subtrees that cannot possible fit. */
640 		next = RB_RIGHT(entry, daddrs.addr_entry);
641 		if (next != NULL &&
642 		    next->fspace_augment >= before_gap + after_gap + sz) {
643 			entry = next;
644 			while ((next = RB_LEFT(entry, daddrs.addr_entry)) !=
645 			    NULL)
646 				entry = next;
647 		} else {
648 do_parent:
649 			next = RB_PARENT(entry, daddrs.addr_entry);
650 			if (next == NULL)
651 				entry = NULL;
652 			else if (RB_LEFT(next, daddrs.addr_entry) == entry)
653 				entry = next;
654 			else {
655 				entry = next;
656 				goto do_parent;
657 			}
658 		}
659 	}
660 
661 	/* Lookup failed. */
662 	return ENOMEM;
663 }
664 
665 /*
666  * Destroy a uaddr_rnd_state structure.
667  */
668 void
669 uaddr_rnd_destroy(struct uvm_addr_state *uaddr)
670 {
671 	pool_put(&uaddr_rnd_pool, uaddr);
672 }
673 
674 /*
675  * Add entry to tailq.
676  */
677 void
678 uaddr_rnd_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
679     struct vm_map_entry *entry)
680 {
681 	return;
682 }
683 
684 /*
685  * Remove entry from tailq.
686  */
687 void
688 uaddr_rnd_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
689     struct vm_map_entry *entry)
690 {
691 	return;
692 }
693 
694 #if defined(DEBUG) || defined(DDB)
695 void
696 uaddr_rnd_print(struct uvm_addr_state *uaddr_p, boolean_t full,
697     int (*pr)(const char*, ...))
698 {
699 	struct vm_map_entry	*entry;
700 	struct uaddr_rnd_state	*uaddr;
701 	vaddr_t			 addr;
702 	size_t			 count;
703 	vsize_t			 space;
704 
705 	uaddr = (struct uaddr_rnd_state*)uaddr_p;
706 	addr = 0;
707 	count = 0;
708 	space = 0;
709 	TAILQ_FOREACH(entry, &uaddr->ur_free, dfree.tailq) {
710 		count++;
711 		space += entry->fspace;
712 
713 		if (full) {
714 			(*pr)("\tentry %p: 0x%lx-0x%lx G=0x%lx F=0x%lx\n",
715 			    entry, entry->start, entry->end,
716 			    entry->guard, entry->fspace);
717 			(*pr)("\t\tfree: 0x%lx-0x%lx\n",
718 			    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
719 		}
720 		if (entry->start < addr) {
721 			if (!full)
722 				(*pr)("\tentry %p: 0x%lx-0x%lx "
723 				    "G=0x%lx F=0x%lx\n",
724 				    entry, entry->start, entry->end,
725 				    entry->guard, entry->fspace);
726 			(*pr)("\t\tstart=0x%lx, expected at least 0x%lx\n",
727 			    entry->start, addr);
728 		}
729 
730 		addr = VMMAP_FREE_END(entry);
731 	}
732 	(*pr)("\t0x%lu entries, 0x%lx free bytes\n", count, space);
733 }
734 #endif /* DEBUG || DDB */
735 
736 
737 /*
738  * An allocator that selects an address within distance of the hint.
739  *
740  * If no hint is given, the allocator refuses to allocate.
741  */
742 
743 const struct uvm_addr_functions uaddr_hint_functions = {
744 	.uaddr_select = &uaddr_hint_select,
745 	.uaddr_destroy = &uaddr_hint_destroy,
746 	.uaddr_name = "uaddr_hint"
747 };
748 
749 /*
750  * Create uaddr_hint state.
751  */
752 struct uvm_addr_state*
753 uaddr_hint_create(vaddr_t minaddr, vaddr_t maxaddr, vsize_t max_dist)
754 {
755 	struct uaddr_hint_state* ua_hint;
756 
757 	KASSERT(uaddr_hint_pool.pr_size == sizeof(*ua_hint));
758 
759 	ua_hint = pool_get(&uaddr_hint_pool, PR_WAITOK);
760 	ua_hint->uaddr.uaddr_minaddr = minaddr;
761 	ua_hint->uaddr.uaddr_maxaddr = maxaddr;
762 	ua_hint->uaddr.uaddr_functions = &uaddr_hint_functions;
763 	ua_hint->max_dist = max_dist;
764 	return &ua_hint->uaddr;
765 }
766 
767 /*
768  * Destroy uaddr_hint state.
769  */
770 void
771 uaddr_hint_destroy(struct uvm_addr_state *uaddr)
772 {
773 	pool_put(&uaddr_hint_pool, uaddr);
774 }
775 
776 /*
777  * Hint selector.
778  *
779  * Attempts to find an address that is within max_dist of the hint.
780  */
781 int
782 uaddr_hint_select(struct vm_map *map, struct uvm_addr_state *uaddr_param,
783     struct vm_map_entry**entry_out, vaddr_t *addr_out,
784     vsize_t sz, vaddr_t align, vaddr_t offset,
785     vm_prot_t prot, vaddr_t hint)
786 {
787 	struct uaddr_hint_state	*uaddr = (struct uaddr_hint_state*)uaddr_param;
788 	vsize_t			 before_gap, after_gap;
789 	vaddr_t			 low, high;
790 	int			 dir;
791 
792 	if (hint == 0)
793 		return ENOMEM;
794 
795 	/*
796 	 * Calculate upper and lower bound for selected address.
797 	 */
798 	high = hint + uaddr->max_dist;
799 	if (high < hint)	/* overflow */
800 		high = map->max_offset;
801 	high = MIN(high, uaddr->uaddr.uaddr_maxaddr);
802 	if (high < sz)
803 		return ENOMEM;	/* Protect against underflow. */
804 	high -= sz;
805 
806 	/* Calculate lower bound for selected address. */
807 	low = hint - uaddr->max_dist;
808 	if (low > hint)		/* underflow */
809 		low = map->min_offset;
810 	low = MAX(low, uaddr->uaddr.uaddr_minaddr);
811 
812 	/* Search strategy setup. */
813 	before_gap = PAGE_SIZE +
814 	    (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK);
815 	after_gap = PAGE_SIZE +
816 	    (arc4random_uniform(UADDR_HINT_MAXGAP) & ~(vaddr_t)PAGE_MASK);
817 	dir = (arc4random() & 0x01) ? 1 : -1;
818 
819 	/*
820 	 * Try to search:
821 	 * - forward,  with gap
822 	 * - backward, with gap
823 	 * - forward,  without gap
824 	 * - backward, without gap
825 	 * (Where forward is in the direction specified by dir and
826 	 * backward is in the direction specified by -dir).
827 	 */
828 	if (uvm_addr_linsearch(map, uaddr_param,
829 	    entry_out, addr_out, hint, sz, align, offset,
830 	    dir, low, high, before_gap, after_gap) == 0)
831 		return 0;
832 	if (uvm_addr_linsearch(map, uaddr_param,
833 	    entry_out, addr_out, hint, sz, align, offset,
834 	    -dir, low, high, before_gap, after_gap) == 0)
835 		return 0;
836 
837 	if (uvm_addr_linsearch(map, uaddr_param,
838 	    entry_out, addr_out, hint, sz, align, offset,
839 	    dir, low, high, 0, 0) == 0)
840 		return 0;
841 	if (uvm_addr_linsearch(map, uaddr_param,
842 	    entry_out, addr_out, hint, sz, align, offset,
843 	    -dir, low, high, 0, 0) == 0)
844 		return 0;
845 
846 	return ENOMEM;
847 }
848 
849 /*
850  * Kernel allocation bootstrap logic.
851  */
852 
853 const struct uvm_addr_functions uaddr_kernel_functions = {
854 	.uaddr_select = &uaddr_kbootstrap_select,
855 	.uaddr_destroy = &uaddr_kbootstrap_destroy,
856 	.uaddr_name = "uaddr_kbootstrap"
857 };
858 
859 /*
860  * Select an address from the map.
861  *
862  * This function ignores the uaddr spec and instead uses the map directly.
863  * Because of that property, the uaddr algorithm can be shared across all
864  * kernel maps.
865  */
866 int
867 uaddr_kbootstrap_select(struct vm_map *map, struct uvm_addr_state *uaddr,
868     struct vm_map_entry **entry_out, vaddr_t *addr_out,
869     vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
870 {
871 	vaddr_t tmp;
872 
873 	RB_FOREACH(*entry_out, uvm_map_addr, &map->addr) {
874 		if (VMMAP_FREE_END(*entry_out) <= uvm_maxkaddr &&
875 		    uvm_addr_fitspace(addr_out, &tmp,
876 		    VMMAP_FREE_START(*entry_out), VMMAP_FREE_END(*entry_out),
877 		    sz, align, offset, 0, 0) == 0)
878 			return 0;
879 	}
880 
881 	return ENOMEM;
882 }
883 
884 /*
885  * Don't destroy the kernel bootstrap allocator.
886  */
887 void
888 uaddr_kbootstrap_destroy(struct uvm_addr_state *uaddr)
889 {
890 	KASSERT(uaddr == (struct uvm_addr_state*)&uaddr_kbootstrap);
891 }
892 
893 #ifndef SMALL_KERNEL
894 /*
895  * Best fit algorithm.
896  */
897 
898 const struct uvm_addr_functions uaddr_bestfit_functions = {
899 	.uaddr_select = &uaddr_bestfit_select,
900 	.uaddr_free_insert = &uaddr_bestfit_insert,
901 	.uaddr_free_remove = &uaddr_bestfit_remove,
902 	.uaddr_destroy = &uaddr_bestfit_destroy,
903 	.uaddr_name = "uaddr_bestfit"
904 };
905 
906 struct uvm_addr_state*
907 uaddr_bestfit_create(vaddr_t minaddr, vaddr_t maxaddr)
908 {
909 	struct uaddr_bestfit_state *uaddr;
910 
911 	uaddr = pool_get(&uaddr_bestfit_pool, PR_WAITOK);
912 	uaddr->ubf_uaddr.uaddr_minaddr = minaddr;
913 	uaddr->ubf_uaddr.uaddr_maxaddr = maxaddr;
914 	uaddr->ubf_uaddr.uaddr_functions = &uaddr_bestfit_functions;
915 	RB_INIT(&uaddr->ubf_free);
916 	return &uaddr->ubf_uaddr;
917 }
918 
919 void
920 uaddr_bestfit_destroy(struct uvm_addr_state *uaddr)
921 {
922 	pool_put(&uaddr_bestfit_pool, uaddr);
923 }
924 
925 void
926 uaddr_bestfit_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
927     struct vm_map_entry *entry)
928 {
929 	struct uaddr_bestfit_state	*uaddr;
930 	struct vm_map_entry		*rb_rv;
931 
932 	uaddr = (struct uaddr_bestfit_state*)uaddr_p;
933 	if ((rb_rv = RB_INSERT(uaddr_free_rbtree, &uaddr->ubf_free, entry)) !=
934 	    NULL) {
935 		panic("%s: duplicate insertion: state %p "
936 		    "interting %p, colliding with %p", __func__,
937 		    uaddr, entry, rb_rv);
938 	}
939 }
940 
941 void
942 uaddr_bestfit_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
943     struct vm_map_entry *entry)
944 {
945 	struct uaddr_bestfit_state	*uaddr;
946 
947 	uaddr = (struct uaddr_bestfit_state*)uaddr_p;
948 	if (RB_REMOVE(uaddr_free_rbtree, &uaddr->ubf_free, entry) != entry)
949 		panic("%s: entry was not in tree", __func__);
950 }
951 
952 int
953 uaddr_bestfit_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
954     struct vm_map_entry**entry_out, vaddr_t *addr_out,
955     vsize_t sz, vaddr_t align, vaddr_t offset,
956     vm_prot_t prot, vaddr_t hint)
957 {
958 	vaddr_t				 min, max;
959 	struct uaddr_bestfit_state	*uaddr;
960 	struct vm_map_entry		*entry;
961 	vsize_t				 guardsz;
962 
963 	uaddr = (struct uaddr_bestfit_state*)uaddr_p;
964 	guardsz = ((map->flags & VM_MAP_GUARDPAGES) ? PAGE_SIZE : 0);
965 
966 	/*
967 	 * Find smallest item on freelist capable of holding item.
968 	 * Deal with guardpages: search for space with one extra page.
969 	 */
970 	entry = uvm_addr_entrybyspace(&uaddr->ubf_free, sz + guardsz);
971 	if (entry == NULL)
972 		return ENOMEM;
973 
974 	/*
975 	 * Walk the tree until we find an entry that fits.
976 	 */
977 	while (uvm_addr_fitspace(&min, &max,
978 	    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
979 	    sz, align, offset, 0, guardsz) != 0) {
980 		entry = RB_NEXT(uaddr_free_rbtree, &uaddr->ubf_free, entry);
981 		if (entry == NULL)
982 			return ENOMEM;
983 	}
984 
985 	/*
986 	 * Return the address that generates the least fragmentation.
987 	 */
988 	*entry_out = entry;
989 	*addr_out = (min - VMMAP_FREE_START(entry) <=
990 	    VMMAP_FREE_END(entry) - guardsz - sz - max ?
991 	    min : max);
992 	return 0;
993 }
994 #endif /* !SMALL_KERNEL */
995 
996 
997 #ifndef SMALL_KERNEL
998 /*
999  * A userspace allocator based on pivots.
1000  */
1001 
1002 const struct uvm_addr_functions uaddr_pivot_functions = {
1003 	.uaddr_select = &uaddr_pivot_select,
1004 	.uaddr_free_insert = &uaddr_pivot_insert,
1005 	.uaddr_free_remove = &uaddr_pivot_remove,
1006 	.uaddr_destroy = &uaddr_pivot_destroy,
1007 #if defined(DEBUG) || defined(DDB)
1008 	.uaddr_print = &uaddr_pivot_print,
1009 #endif /* DEBUG || DDB */
1010 	.uaddr_name = "uaddr_pivot"
1011 };
1012 
1013 /*
1014  * A special random function for pivots.
1015  *
1016  * This function will return:
1017  * - a random number
1018  * - a multiple of PAGE_SIZE
1019  * - at least PAGE_SIZE
1020  *
1021  * The random function has a slightly higher change to return a small number.
1022  */
1023 vsize_t
1024 uaddr_pivot_random()
1025 {
1026 	int			r;
1027 
1028 	/*
1029 	 * The sum of two six-sided dice will have a normal distribution.
1030 	 * We map the highest probable number to 1, by folding the curve
1031 	 * (think of a graph on a piece of paper, that you fold).
1032 	 *
1033 	 * Because the fold happens at PIVOT_RND - 1, the numbers 0 and 1
1034 	 * have the same and highest probability of happening.
1035 	 */
1036 	r = arc4random_uniform(PIVOT_RND) + arc4random_uniform(PIVOT_RND) -
1037 	    (PIVOT_RND - 1);
1038 	if (r < 0)
1039 		r = -r;
1040 
1041 	/*
1042 	 * Make the returned value at least PAGE_SIZE and a multiple of
1043 	 * PAGE_SIZE.
1044 	 */
1045 	return (vaddr_t)(1 + r) << PAGE_SHIFT;
1046 }
1047 
1048 /*
1049  * Select a new pivot.
1050  *
1051  * A pivot must:
1052  * - be chosen random
1053  * - have a randomly chosen gap before it, where the uaddr_state starts
1054  * - have a randomly chosen gap after it, before the uaddr_state ends
1055  *
1056  * Furthermore, the pivot must provide sufficient space for the allocation.
1057  * The addr will be set to the selected address.
1058  *
1059  * Returns ENOMEM on failure.
1060  */
1061 int
1062 uaddr_pivot_newpivot(struct vm_map *map, struct uaddr_pivot_state *uaddr,
1063     struct uaddr_pivot *pivot,
1064     struct vm_map_entry**entry_out, vaddr_t *addr_out,
1065     vsize_t sz, vaddr_t align, vaddr_t offset,
1066     vsize_t before_gap, vsize_t after_gap)
1067 {
1068 	struct vm_map_entry		*entry, *found;
1069 	vaddr_t				 minaddr, maxaddr;
1070 	vsize_t				 dist;
1071 	vaddr_t				 found_minaddr, found_maxaddr;
1072 	vaddr_t				 min, max;
1073 	vsize_t				 arc4_arg;
1074 	int				 fit_error;
1075 	u_int32_t			 path;
1076 
1077 	minaddr = uaddr->up_uaddr.uaddr_minaddr;
1078 	maxaddr = uaddr->up_uaddr.uaddr_maxaddr;
1079 	KASSERT(minaddr < maxaddr);
1080 #ifdef DIAGNOSTIC
1081 	if (minaddr + 2 * PAGE_SIZE > maxaddr) {
1082 		panic("uaddr_pivot_newpivot: cannot grant random pivot "
1083 		    "in area less than 2 pages (size = 0x%lx)",
1084 		    maxaddr - minaddr);
1085 	}
1086 #endif /* DIAGNOSTIC */
1087 
1088 	/*
1089 	 * Gap calculation: 1/32 of the size of the managed area.
1090 	 *
1091 	 * At most: sufficient to not get truncated at arc4random.
1092 	 * At least: 2 PAGE_SIZE
1093 	 *
1094 	 * minaddr and maxaddr will be changed according to arc4random.
1095 	 */
1096 	dist = MAX((maxaddr - minaddr) / 32, 2 * (vaddr_t)PAGE_SIZE);
1097 	if (dist >> PAGE_SHIFT > 0xffffffff) {
1098 		minaddr += (vsize_t)arc4random() << PAGE_SHIFT;
1099 		maxaddr -= (vsize_t)arc4random() << PAGE_SHIFT;
1100 	} else {
1101 		minaddr += (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
1102 		    PAGE_SHIFT;
1103 		maxaddr -= (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
1104 		    PAGE_SHIFT;
1105 	}
1106 
1107 	/*
1108 	 * A very fast way to find an entry that will be large enough
1109 	 * to hold the allocation, but still is found more or less
1110 	 * randomly: the tree path selector has a 50% chance to go for
1111 	 * a bigger or smaller entry.
1112 	 *
1113 	 * Note that the memory may actually be available,
1114 	 * but the fragmentation may be so bad and the gaps chosen
1115 	 * so unfortunately, that the allocation will not succeed.
1116 	 * Or the alignment can only be satisfied by an entry that
1117 	 * is not visited in the randomly selected path.
1118 	 *
1119 	 * This code finds an entry with sufficient space in O(log n) time.
1120 	 */
1121 	path = arc4random();
1122 	found = NULL;
1123 	entry = RB_ROOT(&uaddr->up_free);
1124 	while (entry != NULL) {
1125 		fit_error = uvm_addr_fitspace(&min, &max,
1126 		    MAX(VMMAP_FREE_START(entry), minaddr),
1127 		    MIN(VMMAP_FREE_END(entry), maxaddr),
1128 		    sz, align, offset, before_gap, after_gap);
1129 
1130 		/* It fits, save this entry. */
1131 		if (fit_error == 0) {
1132 			found = entry;
1133 			found_minaddr = min;
1134 			found_maxaddr = max;
1135 		}
1136 
1137 		/* Next. */
1138 		if (fit_error != 0)
1139 			entry = RB_RIGHT(entry, dfree.rbtree);
1140 		else if	((path & 0x1) == 0) {
1141 			path >>= 1;
1142 			entry = RB_RIGHT(entry, dfree.rbtree);
1143 		} else {
1144 			path >>= 1;
1145 			entry = RB_LEFT(entry, dfree.rbtree);
1146 		}
1147 	}
1148 	if (found == NULL)
1149 		return ENOMEM;	/* Not found a large enough region. */
1150 
1151 	/*
1152 	 * Calculate a random address within found.
1153 	 *
1154 	 * found_minaddr and found_maxaddr are already aligned, so be sure
1155 	 * to select a multiple of align as the offset in the entry.
1156 	 * Preferably, arc4random_uniform is used to provide no bias within
1157 	 * the entry.
1158 	 * However if the size of the entry exceeds arc4random_uniforms
1159 	 * argument limit, we simply use arc4random (thus limiting ourselves
1160 	 * to 4G * PAGE_SIZE bytes offset).
1161 	 */
1162 	if (found_maxaddr == found_minaddr)
1163 		*addr_out = found_minaddr;
1164 	else {
1165 		KASSERT(align >= PAGE_SIZE && (align & (align - 1)) == 0);
1166 		arc4_arg = found_maxaddr - found_minaddr;
1167 		if (arc4_arg > 0xffffffff) {
1168 			*addr_out = found_minaddr +
1169 			    (arc4random() & (align - 1));
1170 		} else {
1171 			*addr_out = found_minaddr +
1172 			    (arc4random_uniform(arc4_arg) & (align - 1));
1173 		}
1174 	}
1175 	/* Address was found in this entry. */
1176 	*entry_out = found;
1177 
1178 	/*
1179 	 * Set up new pivot and return selected address.
1180 	 *
1181 	 * Depending on the direction of the pivot, the pivot must be placed
1182 	 * at the bottom or the top of the allocation:
1183 	 * - if the pivot moves upwards, place the pivot at the top of the
1184 	 *   allocation,
1185 	 * - if the pivot moves downwards, place the pivot at the bottom
1186 	 *   of the allocation.
1187 	 */
1188 	pivot->entry = found;
1189 	pivot->dir = (arc4random() & 0x1 ? 1 : -1);
1190 	if (pivot->dir > 0)
1191 		pivot->addr = *addr_out + sz;
1192 	else
1193 		pivot->addr = *addr_out;
1194 	pivot->expire = PIVOT_EXPIRE - 1; /* First use is right now. */
1195 	return 0;
1196 }
1197 
1198 /*
1199  * Pivot selector.
1200  *
1201  * Each time the selector is invoked, it will select a random pivot, which
1202  * it will use to select memory with. The memory will be placed at the pivot,
1203  * with a randomly sized gap between the allocation and the pivot.
1204  * The pivot will then move so it will never revisit this address.
1205  *
1206  * Each allocation, the pivot expiry timer ticks. Once the pivot becomes
1207  * expired, it will be replaced with a newly created pivot. Pivots also
1208  * automatically expire if they fail to provide memory for an allocation.
1209  *
1210  * Expired pivots are replaced using the uaddr_pivot_newpivot() function,
1211  * which will ensure the pivot points at memory in such a way that the
1212  * allocation will succeed.
1213  * As an added bonus, the uaddr_pivot_newpivot() function will perform the
1214  * allocation immediately and move the pivot as appropriate.
1215  *
1216  * If uaddr_pivot_newpivot() fails to find a new pivot that will allow the
1217  * allocation to succeed, it will not create a new pivot and the allocation
1218  * will fail.
1219  *
1220  * A pivot running into used memory will automatically expire (because it will
1221  * fail to allocate).
1222  *
1223  * Characteristics of the allocator:
1224  * - best case, an allocation is O(log N)
1225  *   (it would be O(1), if it werent for the need to check if the memory is
1226  *   free; although that can be avoided...)
1227  * - worst case, an allocation is O(log N)
1228  *   (the uaddr_pivot_newpivot() function has that complexity)
1229  * - failed allocations always take O(log N)
1230  *   (the uaddr_pivot_newpivot() function will walk that deep into the tree).
1231  */
1232 int
1233 uaddr_pivot_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1234     struct vm_map_entry**entry_out, vaddr_t *addr_out,
1235     vsize_t sz, vaddr_t align, vaddr_t offset,
1236     vm_prot_t prot, vaddr_t hint)
1237 {
1238 	struct uaddr_pivot_state	*uaddr;
1239 	struct vm_map_entry		*entry;
1240 	struct uaddr_pivot		*pivot;
1241 	vaddr_t				 min, max;
1242 	vsize_t				 before_gap, after_gap;
1243 	int				 err;
1244 
1245 	/* Hint must be handled by dedicated hint allocator. */
1246 	if (hint != 0)
1247 		return EINVAL;
1248 
1249 	/*
1250 	 * Select a random pivot and a random gap sizes around the allocation.
1251 	 */
1252 	uaddr = (struct uaddr_pivot_state*)uaddr_p;
1253 	pivot = &uaddr->up_pivots[
1254 	    arc4random_uniform(nitems(uaddr->up_pivots))];
1255 	before_gap = uaddr_pivot_random();
1256 	after_gap = uaddr_pivot_random();
1257 	if (pivot->addr == 0 || pivot->entry == NULL || pivot->expire == 0)
1258 		goto expired;	/* Pivot is invalid (null or expired). */
1259 
1260 	/*
1261 	 * Attempt to use the pivot to map the entry.
1262 	 */
1263 	entry = pivot->entry;
1264 	if (pivot->dir > 0) {
1265 		if (uvm_addr_fitspace(&min, &max,
1266 		    MAX(VMMAP_FREE_START(entry), pivot->addr),
1267 		    VMMAP_FREE_END(entry), sz, align, offset,
1268 		    before_gap, after_gap) == 0) {
1269 			*addr_out = min;
1270 			*entry_out = entry;
1271 			pivot->addr = min + sz;
1272 			pivot->expire--;
1273 			return 0;
1274 		}
1275 	} else {
1276 		if (uvm_addr_fitspace(&min, &max,
1277 		    VMMAP_FREE_START(entry),
1278 		    MIN(VMMAP_FREE_END(entry), pivot->addr),
1279 		    sz, align, offset, before_gap, after_gap) == 0) {
1280 			*addr_out = max;
1281 			*entry_out = entry;
1282 			pivot->addr = max;
1283 			pivot->expire--;
1284 			return 0;
1285 		}
1286 	}
1287 
1288 expired:
1289 	/*
1290 	 * Pivot expired or allocation failed.
1291 	 * Use pivot selector to do the allocation and find a new pivot.
1292 	 */
1293 	err = uaddr_pivot_newpivot(map, uaddr, pivot, entry_out, addr_out,
1294 	    sz, align, offset, before_gap, after_gap);
1295 	return err;
1296 }
1297 
1298 /*
1299  * Free the pivot.
1300  */
1301 void
1302 uaddr_pivot_destroy(struct uvm_addr_state *uaddr)
1303 {
1304 	pool_put(&uaddr_pivot_pool, uaddr);
1305 }
1306 
1307 /*
1308  * Insert an entry with free space in the space tree.
1309  */
1310 void
1311 uaddr_pivot_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1312     struct vm_map_entry *entry)
1313 {
1314 	struct uaddr_pivot_state	*uaddr;
1315 	struct vm_map_entry		*rb_rv;
1316 	struct uaddr_pivot		*p;
1317 	vaddr_t				 check_addr;
1318 	vaddr_t				 start, end;
1319 
1320 	uaddr = (struct uaddr_pivot_state*)uaddr_p;
1321 	if ((rb_rv = RB_INSERT(uaddr_free_rbtree, &uaddr->up_free, entry)) !=
1322 	    NULL) {
1323 		panic("%s: duplicate insertion: state %p "
1324 		    "inserting entry %p which collides with %p", __func__,
1325 		    uaddr, entry, rb_rv);
1326 	}
1327 
1328 	start = VMMAP_FREE_START(entry);
1329 	end = VMMAP_FREE_END(entry);
1330 
1331 	/*
1332 	 * Update all pivots that are contained in this entry.
1333 	 */
1334 	for (p = &uaddr->up_pivots[0];
1335 	    p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1336 		check_addr = p->addr;
1337 		if (check_addr == 0)
1338 			continue;
1339 		if (p->dir < 0)
1340 			check_addr--;
1341 
1342 		if (start <= check_addr &&
1343 		    check_addr < end) {
1344 			KASSERT(p->entry == NULL);
1345 			p->entry = entry;
1346 		}
1347 	}
1348 }
1349 
1350 /*
1351  * Remove an entry with free space from the space tree.
1352  */
1353 void
1354 uaddr_pivot_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1355     struct vm_map_entry *entry)
1356 {
1357 	struct uaddr_pivot_state	*uaddr;
1358 	struct uaddr_pivot		*p;
1359 
1360 	uaddr = (struct uaddr_pivot_state*)uaddr_p;
1361 	if (RB_REMOVE(uaddr_free_rbtree, &uaddr->up_free, entry) != entry)
1362 		panic("%s: entry was not in tree", __func__);
1363 
1364 	/*
1365 	 * Inform any pivot with this entry that the entry is gone.
1366 	 * Note that this does not automatically invalidate the pivot.
1367 	 */
1368 	for (p = &uaddr->up_pivots[0];
1369 	    p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1370 		if (p->entry == entry)
1371 			p->entry = NULL;
1372 	}
1373 }
1374 
1375 /*
1376  * Create a new pivot selector.
1377  *
1378  * Initially, all pivots are in the expired state.
1379  * Two reasons for this:
1380  * - it means this allocator will not take a huge amount of time
1381  * - pivots select better on demand, because the pivot selection will be
1382  *   affected by preceding allocations:
1383  *   the next pivots will likely end up in different segments of free memory,
1384  *   that was segmented by an earlier allocation; better spread.
1385  */
1386 struct uvm_addr_state*
1387 uaddr_pivot_create(vaddr_t minaddr, vaddr_t maxaddr)
1388 {
1389 	struct uaddr_pivot_state *uaddr;
1390 
1391 	uaddr = pool_get(&uaddr_pivot_pool, PR_WAITOK);
1392 	uaddr->up_uaddr.uaddr_minaddr = minaddr;
1393 	uaddr->up_uaddr.uaddr_maxaddr = maxaddr;
1394 	uaddr->up_uaddr.uaddr_functions = &uaddr_pivot_functions;
1395 	RB_INIT(&uaddr->up_free);
1396 	bzero(uaddr->up_pivots, sizeof(uaddr->up_pivots));
1397 
1398 	return &uaddr->up_uaddr;
1399 }
1400 
1401 #if defined(DEBUG) || defined(DDB)
1402 /*
1403  * Print the uaddr_pivot_state.
1404  *
1405  * If full, a listing of all entries in the state will be provided.
1406  */
1407 void
1408 uaddr_pivot_print(struct uvm_addr_state *uaddr_p, boolean_t full,
1409     int (*pr)(const char*, ...))
1410 {
1411 	struct uaddr_pivot_state	*uaddr;
1412 	struct uaddr_pivot		*pivot;
1413 	struct vm_map_entry		*entry;
1414 	int				 i;
1415 	vaddr_t				 check_addr;
1416 
1417 	uaddr = (struct uaddr_pivot_state*)uaddr_p;
1418 
1419 	for (i = 0; i < NUM_PIVOTS; i++) {
1420 		pivot = &uaddr->up_pivots[i];
1421 
1422 		(*pr)("\tpivot 0x%lx, epires in %d, direction %d\n",
1423 		    pivot->addr, pivot->expire, pivot->dir);
1424 	}
1425 	if (!full)
1426 		return;
1427 
1428 	if (RB_EMPTY(&uaddr->up_free))
1429 		(*pr)("\tempty\n");
1430 	/* Print list of free space. */
1431 	RB_FOREACH(entry, uaddr_free_rbtree, &uaddr->up_free) {
1432 		(*pr)("\t0x%lx - 0x%lx free (0x%lx bytes)\n",
1433 		    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
1434 		    VMMAP_FREE_END(entry) - VMMAP_FREE_START(entry));
1435 
1436 		for (i = 0; i < NUM_PIVOTS; i++) {
1437 			pivot = &uaddr->up_pivots[i];
1438 			check_addr = pivot->addr;
1439 			if (check_addr == 0)
1440 				continue;
1441 			if (pivot->dir < 0)
1442 				check_addr--;
1443 
1444 			if (VMMAP_FREE_START(entry) <= check_addr &&
1445 			    check_addr < VMMAP_FREE_END(entry)) {
1446 				(*pr)("\t\tcontains pivot %d (0x%lx)\n",
1447 				    i, pivot->addr);
1448 			}
1449 		}
1450 	}
1451 }
1452 #endif /* DEBUG || DDB */
1453 #endif /* !SMALL_KERNEL */
1454 
1455 #ifndef SMALL_KERNEL
1456 /*
1457  * Strategy for uaddr_stack_brk_select.
1458  */
1459 struct uaddr_bs_strat {
1460 	vaddr_t			start;		/* Start of area. */
1461 	vaddr_t			end;		/* End of area. */
1462 	int			dir;		/* Search direction. */
1463 };
1464 
1465 /*
1466  * Stack/break allocator.
1467  *
1468  * Stack area is grown into in the opposite direction of the stack growth,
1469  * brk area is grown downward (because sbrk() grows upward).
1470  *
1471  * Both areas are grown into proportially: a weighted chance is used to
1472  * select which one (stack or brk area) to try. If the allocation fails,
1473  * the other one is tested.
1474  */
1475 
1476 const struct uvm_addr_functions uaddr_stack_brk_functions = {
1477 	.uaddr_select = &uaddr_stack_brk_select,
1478 	.uaddr_destroy = &uaddr_destroy,
1479 	.uaddr_name = "uaddr_stckbrk"
1480 };
1481 
1482 /*
1483  * Stack/brk address selector.
1484  */
1485 int
1486 uaddr_stack_brk_select(struct vm_map *map, struct uvm_addr_state *uaddr,
1487     struct vm_map_entry**entry_out, vaddr_t *addr_out,
1488     vsize_t sz, vaddr_t align, vaddr_t offset,
1489     vm_prot_t prot, vaddr_t hint)
1490 {
1491 	vsize_t			before_gap, after_gap;
1492 	int			stack_idx, brk_idx;
1493 	struct uaddr_bs_strat	strat[2], *s;
1494 	vsize_t			sb_size;
1495 
1496 	/*
1497 	 * Choose gap size and if the stack is searched before or after the
1498 	 * brk area.
1499 	 */
1500 	before_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1501 	after_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1502 
1503 	sb_size = (map->s_end - map->s_start) + (map->b_end - map->b_start);
1504 	sb_size >>= PAGE_SHIFT;
1505 	if (arc4random_uniform(MAX(sb_size, 0xffffffff)) >
1506 	    map->b_end - map->b_start) {
1507 		brk_idx = 1;
1508 		stack_idx = 0;
1509 	} else {
1510 		brk_idx = 0;
1511 		stack_idx = 1;
1512 	}
1513 
1514 	/*
1515 	 * Set up stack search strategy.
1516 	 */
1517 	s = &strat[stack_idx];
1518 	s->start = MAX(map->s_start, uaddr->uaddr_minaddr);
1519 	s->end = MIN(map->s_end, uaddr->uaddr_maxaddr);
1520 #ifdef MACHINE_STACK_GROWS_UP
1521 	s->dir = -1;
1522 #else
1523 	s->dir =  1;
1524 #endif
1525 
1526 	/*
1527 	 * Set up brk search strategy.
1528 	 */
1529 	s = &strat[brk_idx];
1530 	s->start = MAX(map->b_start, uaddr->uaddr_minaddr);
1531 	s->end = MIN(map->b_end, uaddr->uaddr_maxaddr);
1532 	s->dir = -1;	/* Opposite of brk() growth. */
1533 
1534 	/*
1535 	 * Linear search for space.
1536 	 */
1537 	for (s = &strat[0]; s < &strat[nitems(strat)]; s++) {
1538 		if (s->end - s->start < sz)
1539 			continue;
1540 		if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
1541 		    0, sz, align, offset, s->dir, s->start, s->end - sz,
1542 		    before_gap, after_gap) == 0)
1543 			return 0;
1544 	}
1545 
1546 	return ENOMEM;
1547 }
1548 
1549 struct uvm_addr_state*
1550 uaddr_stack_brk_create(vaddr_t minaddr, vaddr_t maxaddr)
1551 {
1552 	struct uvm_addr_state* uaddr;
1553 
1554 	uaddr = pool_get(&uaddr_pool, PR_WAITOK);
1555 	uaddr->uaddr_minaddr = minaddr;
1556 	uaddr->uaddr_maxaddr = maxaddr;
1557 	uaddr->uaddr_functions = &uaddr_stack_brk_functions;
1558 	return uaddr;
1559 }
1560 #endif /* !SMALL_KERNEL */
1561 
1562 
1563 #ifndef SMALL_KERNEL
1564 RB_GENERATE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree,
1565     uvm_mapent_fspace_cmp);
1566 #endif /* !SMALL_KERNEL */
1567