xref: /openbsd-src/sys/uvm/uvm_addr.c (revision 9593dc34da13a12012033a17061c846c208061c2)
1*9593dc34Smglocker /*	$OpenBSD: uvm_addr.c,v 1.37 2024/09/04 07:54:53 mglocker Exp $	*/
2181c6205Sariane 
3181c6205Sariane /*
4181c6205Sariane  * Copyright (c) 2011 Ariane van der Steldt <ariane@stack.nl>
5181c6205Sariane  *
6181c6205Sariane  * Permission to use, copy, modify, and distribute this software for any
7181c6205Sariane  * purpose with or without fee is hereby granted, provided that the above
8181c6205Sariane  * copyright notice and this permission notice appear in all copies.
9181c6205Sariane  *
10181c6205Sariane  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11181c6205Sariane  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12181c6205Sariane  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13181c6205Sariane  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14181c6205Sariane  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15181c6205Sariane  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16181c6205Sariane  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17181c6205Sariane  */
18181c6205Sariane 
19181c6205Sariane /* #define DEBUG */
20181c6205Sariane 
21181c6205Sariane #include <sys/param.h>
22af074ab7Smpi #include <sys/systm.h>
23181c6205Sariane #include <uvm/uvm.h>
24181c6205Sariane #include <uvm/uvm_addr.h>
25181c6205Sariane #include <sys/pool.h>
26181c6205Sariane 
27181c6205Sariane /* Number of pivots in pivot allocator. */
28181c6205Sariane #define NUM_PIVOTS		16
29181c6205Sariane /*
30181c6205Sariane  * Max number (inclusive) of pages the pivot allocator
31181c6205Sariane  * will place between allocations.
32181c6205Sariane  *
33181c6205Sariane  * The uaddr_pivot_random() function attempts to bias towards
34181c6205Sariane  * small space between allocations, so putting a large number here is fine.
35181c6205Sariane  */
36181c6205Sariane #define PIVOT_RND		8
37181c6205Sariane /*
38181c6205Sariane  * Number of allocations that a pivot can supply before expiring.
39181c6205Sariane  * When a pivot expires, a new pivot has to be found.
40181c6205Sariane  *
41181c6205Sariane  * Must be at least 1.
42181c6205Sariane  */
43181c6205Sariane #define PIVOT_EXPIRE		1024
44181c6205Sariane 
45181c6205Sariane 
46181c6205Sariane /* Pool with uvm_addr_state structures. */
47181c6205Sariane struct pool uaddr_pool;
48181c6205Sariane struct pool uaddr_bestfit_pool;
49181c6205Sariane struct pool uaddr_pivot_pool;
50181c6205Sariane struct pool uaddr_rnd_pool;
51181c6205Sariane 
52181c6205Sariane /* uvm_addr state for bestfit selector. */
53181c6205Sariane struct uaddr_bestfit_state {
54181c6205Sariane 	struct uvm_addr_state		 ubf_uaddr;
55181c6205Sariane 	struct uaddr_free_rbtree	 ubf_free;
56181c6205Sariane };
57181c6205Sariane 
58181c6205Sariane /* uvm_addr state for rnd selector. */
59181c6205Sariane struct uaddr_rnd_state {
60181c6205Sariane 	struct uvm_addr_state		 ur_uaddr;
61f09c1a3eSmiod #if 0
62181c6205Sariane 	TAILQ_HEAD(, vm_map_entry)	 ur_free;
63f09c1a3eSmiod #endif
64181c6205Sariane };
65181c6205Sariane 
6652887a38Smpi /*
6752887a38Smpi  * Definition of a pivot in pivot selector.
6852887a38Smpi  */
69181c6205Sariane struct uaddr_pivot {
70181c6205Sariane 	vaddr_t				 addr;	/* End of prev. allocation. */
71181c6205Sariane 	int				 expire;/* Best before date. */
72181c6205Sariane 	int				 dir;	/* Direction. */
73181c6205Sariane 	struct vm_map_entry		*entry; /* Will contain next alloc. */
74181c6205Sariane };
75181c6205Sariane /* uvm_addr state for pivot selector. */
76181c6205Sariane struct uaddr_pivot_state {
77181c6205Sariane 	struct uvm_addr_state		 up_uaddr;
78181c6205Sariane 
79181c6205Sariane 	/* Free space tree, for fast pivot selection. */
80181c6205Sariane 	struct uaddr_free_rbtree	 up_free;
81181c6205Sariane 
82181c6205Sariane 	/* List of pivots. The pointers point to after the last allocation. */
83181c6205Sariane 	struct uaddr_pivot		 up_pivots[NUM_PIVOTS];
84181c6205Sariane };
85181c6205Sariane 
86181c6205Sariane /* Forward declaration (see below). */
87181c6205Sariane extern const struct uvm_addr_functions uaddr_kernel_functions;
88181c6205Sariane struct uvm_addr_state uaddr_kbootstrap;
89181c6205Sariane 
9052887a38Smpi 
9152887a38Smpi /*
9252887a38Smpi  * Support functions.
9352887a38Smpi  */
9452887a38Smpi 
9559399f65Sariane #ifndef SMALL_KERNEL
96181c6205Sariane struct vm_map_entry	*uvm_addr_entrybyspace(struct uaddr_free_rbtree*,
97181c6205Sariane 			    vsize_t);
9859399f65Sariane #endif /* !SMALL_KERNEL */
99181c6205Sariane void			 uaddr_destroy(struct uvm_addr_state *);
100181c6205Sariane void			 uaddr_kbootstrap_destroy(struct uvm_addr_state *);
101181c6205Sariane void			 uaddr_rnd_destroy(struct uvm_addr_state *);
102181c6205Sariane void			 uaddr_bestfit_destroy(struct uvm_addr_state *);
103181c6205Sariane void			 uaddr_pivot_destroy(struct uvm_addr_state *);
104181c6205Sariane 
105f09c1a3eSmiod #if 0
106181c6205Sariane int			 uaddr_lin_select(struct vm_map *,
107181c6205Sariane 			    struct uvm_addr_state *, struct vm_map_entry **,
108181c6205Sariane 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
109181c6205Sariane 			    vaddr_t);
110f09c1a3eSmiod #endif
111181c6205Sariane int			 uaddr_kbootstrap_select(struct vm_map *,
112181c6205Sariane 			    struct uvm_addr_state *, struct vm_map_entry **,
113181c6205Sariane 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
114181c6205Sariane 			    vaddr_t);
115181c6205Sariane int			 uaddr_rnd_select(struct vm_map *,
116181c6205Sariane 			    struct uvm_addr_state *, struct vm_map_entry **,
117181c6205Sariane 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
118181c6205Sariane 			    vaddr_t);
119181c6205Sariane int			 uaddr_bestfit_select(struct vm_map *,
120181c6205Sariane 			    struct uvm_addr_state*, struct vm_map_entry **,
121181c6205Sariane 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
122181c6205Sariane 			    vaddr_t);
12359399f65Sariane #ifndef SMALL_KERNEL
124181c6205Sariane int			 uaddr_pivot_select(struct vm_map *,
125181c6205Sariane 			    struct uvm_addr_state *, struct vm_map_entry **,
126181c6205Sariane 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
127181c6205Sariane 			    vaddr_t);
128181c6205Sariane int			 uaddr_stack_brk_select(struct vm_map *,
129181c6205Sariane 			    struct uvm_addr_state *, struct vm_map_entry **,
130181c6205Sariane 			    vaddr_t *, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
131181c6205Sariane 			    vaddr_t);
13259399f65Sariane #endif /* !SMALL_KERNEL */
133181c6205Sariane 
134181c6205Sariane void			 uaddr_rnd_insert(struct vm_map *,
135181c6205Sariane 			    struct uvm_addr_state *, struct vm_map_entry *);
136181c6205Sariane void			 uaddr_rnd_remove(struct vm_map *,
137181c6205Sariane 			    struct uvm_addr_state *, struct vm_map_entry *);
138181c6205Sariane void			 uaddr_bestfit_insert(struct vm_map *,
139181c6205Sariane 			    struct uvm_addr_state *, struct vm_map_entry *);
140181c6205Sariane void			 uaddr_bestfit_remove(struct vm_map *,
141181c6205Sariane 			    struct uvm_addr_state *, struct vm_map_entry *);
142181c6205Sariane void			 uaddr_pivot_insert(struct vm_map *,
143181c6205Sariane 			    struct uvm_addr_state *, struct vm_map_entry *);
144181c6205Sariane void			 uaddr_pivot_remove(struct vm_map *,
145181c6205Sariane 			    struct uvm_addr_state *, struct vm_map_entry *);
146181c6205Sariane 
14759399f65Sariane #ifndef SMALL_KERNEL
148181c6205Sariane vsize_t			 uaddr_pivot_random(void);
149181c6205Sariane int			 uaddr_pivot_newpivot(struct vm_map *,
150181c6205Sariane 			    struct uaddr_pivot_state *, struct uaddr_pivot *,
151181c6205Sariane 			    struct vm_map_entry **, vaddr_t *,
152181c6205Sariane 			    vsize_t, vaddr_t, vaddr_t, vsize_t, vsize_t);
15359399f65Sariane #endif /* !SMALL_KERNEL */
154181c6205Sariane 
155181c6205Sariane #if defined(DEBUG) || defined(DDB)
156181c6205Sariane void			 uaddr_pivot_print(struct uvm_addr_state *, boolean_t,
157181c6205Sariane 			    int (*)(const char *, ...));
158f09c1a3eSmiod #if 0
159181c6205Sariane void			 uaddr_rnd_print(struct uvm_addr_state *, boolean_t,
160181c6205Sariane 			    int (*)(const char *, ...));
161f09c1a3eSmiod #endif
162181c6205Sariane #endif /* DEBUG || DDB */
163181c6205Sariane 
164181c6205Sariane 
16559399f65Sariane #ifndef SMALL_KERNEL
166181c6205Sariane /*
167181c6205Sariane  * Find smallest entry in tree that will fit sz bytes.
168181c6205Sariane  */
169181c6205Sariane struct vm_map_entry *
170181c6205Sariane uvm_addr_entrybyspace(struct uaddr_free_rbtree *free, vsize_t sz)
171181c6205Sariane {
172181c6205Sariane 	struct vm_map_entry	*tmp, *res;
173181c6205Sariane 
17471b0131cSdlg 	tmp = RBT_ROOT(uaddr_free_rbtree, free);
175181c6205Sariane 	res = NULL;
176181c6205Sariane 	while (tmp) {
177181c6205Sariane 		if (tmp->fspace >= sz) {
178181c6205Sariane 			res = tmp;
17971b0131cSdlg 			tmp = RBT_LEFT(uaddr_free_rbtree, tmp);
180181c6205Sariane 		} else if (tmp->fspace < sz)
18171b0131cSdlg 			tmp = RBT_RIGHT(uaddr_free_rbtree, tmp);
182181c6205Sariane 	}
183181c6205Sariane 	return res;
184181c6205Sariane }
18559399f65Sariane #endif /* !SMALL_KERNEL */
186181c6205Sariane 
18756b7e380Smpi static inline vaddr_t
188181c6205Sariane uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset)
189181c6205Sariane {
190181c6205Sariane 	vaddr_t adjusted;
191181c6205Sariane 
192181c6205Sariane 	KASSERT(offset < align || (align == 0 && offset == 0));
193181c6205Sariane 	KASSERT((align & (align - 1)) == 0);
194181c6205Sariane 	KASSERT((offset & PAGE_MASK) == 0);
195181c6205Sariane 
196181c6205Sariane 	align = MAX(align, PAGE_SIZE);
197181c6205Sariane 	adjusted = addr & ~(align - 1);
198181c6205Sariane 	adjusted += offset;
199181c6205Sariane 	return (adjusted < addr ? adjusted + align : adjusted);
200181c6205Sariane }
201181c6205Sariane 
20256b7e380Smpi static inline vaddr_t
203181c6205Sariane uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset)
204181c6205Sariane {
205181c6205Sariane 	vaddr_t adjusted;
206181c6205Sariane 
207181c6205Sariane 	KASSERT(offset < align || (align == 0 && offset == 0));
208181c6205Sariane 	KASSERT((align & (align - 1)) == 0);
209181c6205Sariane 	KASSERT((offset & PAGE_MASK) == 0);
210181c6205Sariane 
211181c6205Sariane 	align = MAX(align, PAGE_SIZE);
212181c6205Sariane 	adjusted = addr & ~(align - 1);
213181c6205Sariane 	adjusted += offset;
214181c6205Sariane 	return (adjusted > addr ? adjusted - align : adjusted);
215181c6205Sariane }
216181c6205Sariane 
217181c6205Sariane /*
218181c6205Sariane  * Try to fit the requested space into the entry.
219181c6205Sariane  */
220181c6205Sariane int
221181c6205Sariane uvm_addr_fitspace(vaddr_t *min_result, vaddr_t *max_result,
222181c6205Sariane     vaddr_t low_addr, vaddr_t high_addr, vsize_t sz,
223181c6205Sariane     vaddr_t align, vaddr_t offset,
224181c6205Sariane     vsize_t before_gap, vsize_t after_gap)
225181c6205Sariane {
226181c6205Sariane 	vaddr_t	tmp;
227181c6205Sariane 	vsize_t	fspace;
228181c6205Sariane 
229181c6205Sariane 	if (low_addr > high_addr)
230181c6205Sariane 		return ENOMEM;
231181c6205Sariane 	fspace = high_addr - low_addr;
232b8c60e10Skettenis 	if (fspace < before_gap + after_gap)
233b8c60e10Skettenis 		return ENOMEM;
234b8c60e10Skettenis 	if (fspace - before_gap - after_gap < sz)
235181c6205Sariane 		return ENOMEM;
236181c6205Sariane 
23752887a38Smpi 	/*
23852887a38Smpi 	 * Calculate lowest address.
23952887a38Smpi 	 */
240181c6205Sariane 	low_addr += before_gap;
241181c6205Sariane 	low_addr = uvm_addr_align_forward(tmp = low_addr, align, offset);
242181c6205Sariane 	if (low_addr < tmp)	/* Overflow during alignment. */
243181c6205Sariane 		return ENOMEM;
244181c6205Sariane 	if (high_addr - after_gap - sz < low_addr)
245181c6205Sariane 		return ENOMEM;
246181c6205Sariane 
24752887a38Smpi 	/*
24852887a38Smpi 	 * Calculate highest address.
24952887a38Smpi 	 */
250181c6205Sariane 	high_addr -= after_gap + sz;
251181c6205Sariane 	high_addr = uvm_addr_align_backward(tmp = high_addr, align, offset);
252181c6205Sariane 	if (high_addr > tmp)	/* Overflow during alignment. */
253181c6205Sariane 		return ENOMEM;
254181c6205Sariane 	if (low_addr > high_addr)
255181c6205Sariane 		return ENOMEM;
256181c6205Sariane 
257181c6205Sariane 	*min_result = low_addr;
258181c6205Sariane 	*max_result = high_addr;
259181c6205Sariane 	return 0;
260181c6205Sariane }
261181c6205Sariane 
262181c6205Sariane 
263181c6205Sariane /*
264181c6205Sariane  * Initialize uvm_addr.
265181c6205Sariane  */
266181c6205Sariane void
26711dc60fdSkettenis uvm_addr_init(void)
268181c6205Sariane {
2691378bae2Sdlg 	pool_init(&uaddr_pool, sizeof(struct uvm_addr_state), 0,
2701378bae2Sdlg 	    IPL_VM, PR_WAITOK, "uaddr", NULL);
2711378bae2Sdlg 	pool_init(&uaddr_bestfit_pool, sizeof(struct uaddr_bestfit_state), 0,
2721378bae2Sdlg 	    IPL_VM, PR_WAITOK, "uaddrbest", NULL);
2731378bae2Sdlg 	pool_init(&uaddr_pivot_pool, sizeof(struct uaddr_pivot_state), 0,
2741378bae2Sdlg 	    IPL_VM, PR_WAITOK, "uaddrpivot", NULL);
2751378bae2Sdlg 	pool_init(&uaddr_rnd_pool, sizeof(struct uaddr_rnd_state), 0,
2761378bae2Sdlg 	    IPL_VM, PR_WAITOK, "uaddrrnd", NULL);
277181c6205Sariane 
278181c6205Sariane 	uaddr_kbootstrap.uaddr_minaddr = PAGE_SIZE;
279181c6205Sariane 	uaddr_kbootstrap.uaddr_maxaddr = -(vaddr_t)PAGE_SIZE;
280181c6205Sariane 	uaddr_kbootstrap.uaddr_functions = &uaddr_kernel_functions;
281181c6205Sariane }
282181c6205Sariane 
283181c6205Sariane /*
284181c6205Sariane  * Invoke destructor function of uaddr.
285181c6205Sariane  */
286181c6205Sariane void
287181c6205Sariane uvm_addr_destroy(struct uvm_addr_state *uaddr)
288181c6205Sariane {
289181c6205Sariane 	if (uaddr)
290181c6205Sariane 		(*uaddr->uaddr_functions->uaddr_destroy)(uaddr);
291181c6205Sariane }
292181c6205Sariane 
293181c6205Sariane /*
294181c6205Sariane  * Directional first fit.
295181c6205Sariane  *
29603a42b6eSmatthew  * Do a linear search for free space, starting at addr in entry.
297181c6205Sariane  * direction ==  1: search forward
298181c6205Sariane  * direction == -1: search backward
299181c6205Sariane  *
300181c6205Sariane  * Output: low <= addr <= high and entry will contain addr.
301181c6205Sariane  * 0 will be returned if no space is available.
302181c6205Sariane  *
303181c6205Sariane  * gap describes the space that must appear between the preceding entry.
304181c6205Sariane  */
305181c6205Sariane int
306181c6205Sariane uvm_addr_linsearch(struct vm_map *map, struct uvm_addr_state *uaddr,
307181c6205Sariane     struct vm_map_entry **entry_out, vaddr_t *addr_out,
308181c6205Sariane     vaddr_t hint, vsize_t sz, vaddr_t align, vaddr_t offset,
309181c6205Sariane     int direction, vaddr_t low, vaddr_t high,
310181c6205Sariane     vsize_t before_gap, vsize_t after_gap)
311181c6205Sariane {
312181c6205Sariane 	struct vm_map_entry	*entry;
313181c6205Sariane 	vaddr_t			 low_addr, high_addr;
314181c6205Sariane 
315181c6205Sariane 	KASSERT(entry_out != NULL && addr_out != NULL);
316181c6205Sariane 	KASSERT(direction == -1 || direction == 1);
317181c6205Sariane 	KASSERT((hint & PAGE_MASK) == 0 && (high & PAGE_MASK) == 0 &&
318181c6205Sariane 	    (low & PAGE_MASK) == 0 &&
319181c6205Sariane 	    (before_gap & PAGE_MASK) == 0 && (after_gap & PAGE_MASK) == 0);
320181c6205Sariane 	KASSERT(high + sz > high); /* Check for overflow. */
321181c6205Sariane 
32252887a38Smpi 	/*
32352887a38Smpi 	 * Hint magic.
32452887a38Smpi 	 */
325181c6205Sariane 	if (hint == 0)
326181c6205Sariane 		hint = (direction == 1 ? low : high);
327181c6205Sariane 	else if (hint > high) {
328181c6205Sariane 		if (direction != -1)
329181c6205Sariane 			return ENOMEM;
330181c6205Sariane 		hint = high;
331181c6205Sariane 	} else if (hint < low) {
332181c6205Sariane 		if (direction != 1)
333181c6205Sariane 			return ENOMEM;
334181c6205Sariane 		hint = low;
335181c6205Sariane 	}
336181c6205Sariane 
337181c6205Sariane 	for (entry = uvm_map_entrybyaddr(&map->addr,
338181c6205Sariane 	    hint - (direction == -1 ? 1 : 0)); entry != NULL;
339181c6205Sariane 	    entry = (direction == 1 ?
340415d6aa0Sdlg 	    RBT_NEXT(uvm_map_addr, entry) :
341415d6aa0Sdlg 	    RBT_PREV(uvm_map_addr, entry))) {
342a70e45a6Sotto 		if ((direction == 1 && VMMAP_FREE_START(entry) > high) ||
343a70e45a6Sotto 		    (direction == -1 && VMMAP_FREE_END(entry) < low)) {
344181c6205Sariane 			break;
345181c6205Sariane 		}
346181c6205Sariane 
347181c6205Sariane 		if (uvm_addr_fitspace(&low_addr, &high_addr,
348181c6205Sariane 		    MAX(low, VMMAP_FREE_START(entry)),
349181c6205Sariane 		    MIN(high, VMMAP_FREE_END(entry)),
350181c6205Sariane 		    sz, align, offset, before_gap, after_gap) == 0) {
351181c6205Sariane 			*entry_out = entry;
352181c6205Sariane 			if (hint >= low_addr && hint <= high_addr) {
353181c6205Sariane 				*addr_out = hint;
354181c6205Sariane 			} else {
355181c6205Sariane 				*addr_out = (direction == 1 ?
356181c6205Sariane 				    low_addr : high_addr);
357181c6205Sariane 			}
358181c6205Sariane 			return 0;
359181c6205Sariane 		}
360181c6205Sariane 	}
361181c6205Sariane 
362181c6205Sariane 	return ENOMEM;
363181c6205Sariane }
364181c6205Sariane 
365181c6205Sariane /*
366181c6205Sariane  * Invoke address selector of uaddr.
367181c6205Sariane  * uaddr may be NULL, in which case the algorithm will fail with ENOMEM.
368181c6205Sariane  *
369181c6205Sariane  * Will invoke uvm_addr_isavail to fill in last_out.
370181c6205Sariane  */
371181c6205Sariane int
372181c6205Sariane uvm_addr_invoke(struct vm_map *map, struct uvm_addr_state *uaddr,
373181c6205Sariane     struct vm_map_entry **entry_out, struct vm_map_entry **last_out,
374181c6205Sariane     vaddr_t *addr_out,
375181c6205Sariane     vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
376181c6205Sariane {
377181c6205Sariane 	int error;
378181c6205Sariane 
379181c6205Sariane 	if (uaddr == NULL)
380181c6205Sariane 		return ENOMEM;
381181c6205Sariane 
382181c6205Sariane 	hint &= ~((vaddr_t)PAGE_MASK);
383181c6205Sariane 	if (hint != 0 &&
384181c6205Sariane 	    !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr))
385181c6205Sariane 		return ENOMEM;
386181c6205Sariane 
38755490b01Smpi 	vm_map_assert_anylock(map);
38855490b01Smpi 
389181c6205Sariane 	error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr,
390181c6205Sariane 	    entry_out, addr_out, sz, align, offset, prot, hint);
391181c6205Sariane 
392181c6205Sariane 	if (error == 0) {
393181c6205Sariane 		KASSERT(*entry_out != NULL);
394181c6205Sariane 		*last_out = NULL;
395181c6205Sariane 		if (!uvm_map_isavail(map, uaddr, entry_out, last_out,
396181c6205Sariane 		    *addr_out, sz)) {
397181c6205Sariane 			panic("uvm_addr_invoke: address selector %p "
398181c6205Sariane 			    "(%s 0x%lx-0x%lx) "
399f55a9323Stedu 			    "returned unavailable address 0x%lx sz 0x%lx",
400181c6205Sariane 			    uaddr, uaddr->uaddr_functions->uaddr_name,
401181c6205Sariane 			    uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr,
402f55a9323Stedu 			    *addr_out, sz);
403181c6205Sariane 		}
404181c6205Sariane 	}
405181c6205Sariane 
406181c6205Sariane 	return error;
407181c6205Sariane }
408181c6205Sariane 
409181c6205Sariane #if defined(DEBUG) || defined(DDB)
410181c6205Sariane void
411181c6205Sariane uvm_addr_print(struct uvm_addr_state *uaddr, const char *slot, boolean_t full,
412181c6205Sariane     int (*pr)(const char *, ...))
413181c6205Sariane {
414181c6205Sariane 	if (uaddr == NULL) {
415181c6205Sariane 		(*pr)("- uvm_addr %s: NULL\n", slot);
416181c6205Sariane 		return;
417181c6205Sariane 	}
418181c6205Sariane 
419181c6205Sariane 	(*pr)("- uvm_addr %s: %p (%s 0x%lx-0x%lx)\n", slot, uaddr,
420181c6205Sariane 	    uaddr->uaddr_functions->uaddr_name,
421181c6205Sariane 	    uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr);
422181c6205Sariane 	if (uaddr->uaddr_functions->uaddr_print == NULL)
423181c6205Sariane 		return;
424181c6205Sariane 
425181c6205Sariane 	(*uaddr->uaddr_functions->uaddr_print)(uaddr, full, pr);
426181c6205Sariane }
427181c6205Sariane #endif /* DEBUG || DDB */
428181c6205Sariane 
429181c6205Sariane /*
430181c6205Sariane  * Destroy a uvm_addr_state structure.
431181c6205Sariane  * The uaddr must have been previously allocated from uaddr_state_pool.
432181c6205Sariane  */
433181c6205Sariane void
434181c6205Sariane uaddr_destroy(struct uvm_addr_state *uaddr)
435181c6205Sariane {
436181c6205Sariane 	pool_put(&uaddr_pool, uaddr);
437181c6205Sariane }
438181c6205Sariane 
439181c6205Sariane 
440f09c1a3eSmiod #if 0
441181c6205Sariane /*
442f09c1a3eSmiod  * Linear allocator.
443181c6205Sariane  * This allocator uses a first-fit algorithm.
444181c6205Sariane  *
445181c6205Sariane  * If hint is set, search will start at the hint position.
446181c6205Sariane  * Only searches forward.
447181c6205Sariane  */
44852887a38Smpi 
449181c6205Sariane const struct uvm_addr_functions uaddr_lin_functions = {
450181c6205Sariane 	.uaddr_select = &uaddr_lin_select,
451181c6205Sariane 	.uaddr_destroy = &uaddr_destroy,
452181c6205Sariane 	.uaddr_name = "uaddr_lin"
453181c6205Sariane };
454181c6205Sariane 
455181c6205Sariane struct uvm_addr_state *
456181c6205Sariane uaddr_lin_create(vaddr_t minaddr, vaddr_t maxaddr)
457181c6205Sariane {
458181c6205Sariane 	struct uvm_addr_state *uaddr;
459181c6205Sariane 
460181c6205Sariane 	uaddr = pool_get(&uaddr_pool, PR_WAITOK);
461181c6205Sariane 	uaddr->uaddr_minaddr = minaddr;
462181c6205Sariane 	uaddr->uaddr_maxaddr = maxaddr;
463181c6205Sariane 	uaddr->uaddr_functions = &uaddr_lin_functions;
464181c6205Sariane 	return uaddr;
465181c6205Sariane }
466181c6205Sariane 
467181c6205Sariane int
468181c6205Sariane uaddr_lin_select(struct vm_map *map, struct uvm_addr_state *uaddr,
469181c6205Sariane     struct vm_map_entry **entry_out, vaddr_t *addr_out,
470181c6205Sariane     vsize_t sz, vaddr_t align, vaddr_t offset,
471181c6205Sariane     vm_prot_t prot, vaddr_t hint)
472181c6205Sariane {
473181c6205Sariane 	vaddr_t guard_sz;
474181c6205Sariane 
47552887a38Smpi 	/*
47652887a38Smpi 	 * Deal with guardpages: search for space with one extra page.
47752887a38Smpi 	 */
478181c6205Sariane 	guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
479181c6205Sariane 
480b8c60e10Skettenis 	if (uaddr->uaddr_maxaddr - uaddr->uaddr_minaddr - guard_sz < sz)
481181c6205Sariane 		return ENOMEM;
482181c6205Sariane 	return uvm_addr_linsearch(map, uaddr, entry_out, addr_out, 0, sz,
483181c6205Sariane 	    align, offset, 1, uaddr->uaddr_minaddr, uaddr->uaddr_maxaddr - sz,
484181c6205Sariane 	    0, guard_sz);
485181c6205Sariane }
486f09c1a3eSmiod #endif
487181c6205Sariane 
488181c6205Sariane /*
489181c6205Sariane  * Randomized allocator.
490181c6205Sariane  * This allocator use uvm_map_hint to acquire a random address and searches
491181c6205Sariane  * from there.
492181c6205Sariane  */
493181c6205Sariane 
494181c6205Sariane const struct uvm_addr_functions uaddr_rnd_functions = {
495181c6205Sariane 	.uaddr_select = &uaddr_rnd_select,
496181c6205Sariane 	.uaddr_free_insert = &uaddr_rnd_insert,
497181c6205Sariane 	.uaddr_free_remove = &uaddr_rnd_remove,
498181c6205Sariane 	.uaddr_destroy = &uaddr_rnd_destroy,
499181c6205Sariane #if defined(DEBUG) || defined(DDB)
500f09c1a3eSmiod #if 0
501181c6205Sariane 	.uaddr_print = &uaddr_rnd_print,
502f09c1a3eSmiod #endif
503181c6205Sariane #endif /* DEBUG || DDB */
504181c6205Sariane 	.uaddr_name = "uaddr_rnd"
505181c6205Sariane };
506181c6205Sariane 
507181c6205Sariane struct uvm_addr_state *
508181c6205Sariane uaddr_rnd_create(vaddr_t minaddr, vaddr_t maxaddr)
509181c6205Sariane {
510181c6205Sariane 	struct uaddr_rnd_state *uaddr;
511181c6205Sariane 
512181c6205Sariane 	uaddr = pool_get(&uaddr_rnd_pool, PR_WAITOK);
513181c6205Sariane 	uaddr->ur_uaddr.uaddr_minaddr = minaddr;
514181c6205Sariane 	uaddr->ur_uaddr.uaddr_maxaddr = maxaddr;
515181c6205Sariane 	uaddr->ur_uaddr.uaddr_functions = &uaddr_rnd_functions;
516f09c1a3eSmiod #if 0
517181c6205Sariane 	TAILQ_INIT(&uaddr->ur_free);
518f09c1a3eSmiod #endif
519181c6205Sariane 	return &uaddr->ur_uaddr;
520181c6205Sariane }
521181c6205Sariane 
522181c6205Sariane int
523181c6205Sariane uaddr_rnd_select(struct vm_map *map, struct uvm_addr_state *uaddr,
524181c6205Sariane     struct vm_map_entry **entry_out, vaddr_t *addr_out,
525181c6205Sariane     vsize_t sz, vaddr_t align, vaddr_t offset,
526181c6205Sariane     vm_prot_t prot, vaddr_t hint)
527181c6205Sariane {
528181c6205Sariane 	struct vmspace		*vm;
529e0e97bfbSmiod 	vaddr_t			 minaddr, maxaddr;
530181c6205Sariane 	vaddr_t			 guard_sz;
531181c6205Sariane 	vaddr_t			 low_addr, high_addr;
532821c28f8Sariane 	struct vm_map_entry	*entry, *next;
533181c6205Sariane 	vsize_t			 before_gap, after_gap;
534181c6205Sariane 	vaddr_t			 tmp;
535181c6205Sariane 
536181c6205Sariane 	KASSERT((map->flags & VM_MAP_ISVMSPACE) != 0);
537181c6205Sariane 	vm = (struct vmspace *)map;
538181c6205Sariane 
539181c6205Sariane 	/* Deal with guardpages: search for space with one extra page. */
540181c6205Sariane 	guard_sz = ((map->flags & VM_MAP_GUARDPAGES) == 0 ? 0 : PAGE_SIZE);
541181c6205Sariane 
542b8c60e10Skettenis 	if (uaddr->uaddr_maxaddr - guard_sz < sz)
543b8c60e10Skettenis 		return ENOMEM;
544e0e97bfbSmiod 	minaddr = uvm_addr_align_forward(uaddr->uaddr_minaddr, align, offset);
545e0e97bfbSmiod 	maxaddr = uvm_addr_align_backward(uaddr->uaddr_maxaddr - sz - guard_sz,
546e0e97bfbSmiod 	    align, offset);
547e0e97bfbSmiod 
548181c6205Sariane 	/* Quick fail if the allocation won't fit. */
549e0e97bfbSmiod 	if (minaddr >= maxaddr)
550181c6205Sariane 		return ENOMEM;
551181c6205Sariane 
552181c6205Sariane 	/* Select a hint. */
553181c6205Sariane 	if (hint == 0)
554e0e97bfbSmiod 		hint = uvm_map_hint(vm, prot, minaddr, maxaddr);
555181c6205Sariane 	/* Clamp hint to uaddr range. */
556e0e97bfbSmiod 	hint = MIN(MAX(hint, minaddr), maxaddr);
557181c6205Sariane 
558181c6205Sariane 	/* Align hint to align,offset parameters. */
559181c6205Sariane 	tmp = hint;
560181c6205Sariane 	hint = uvm_addr_align_forward(tmp, align, offset);
561181c6205Sariane 	/* Check for overflow during alignment. */
562e0e97bfbSmiod 	if (hint < tmp || hint > maxaddr)
563181c6205Sariane 		return ENOMEM; /* Compatibility mode: never look backwards. */
564181c6205Sariane 
565181c6205Sariane 	before_gap = 0;
566181c6205Sariane 	after_gap = guard_sz;
567821c28f8Sariane 	hint -= MIN(hint, before_gap);
568181c6205Sariane 
569181c6205Sariane 	/*
570821c28f8Sariane 	 * Use the augmented address tree to look up the first entry
571821c28f8Sariane 	 * at or after hint with sufficient space.
572181c6205Sariane 	 *
573821c28f8Sariane 	 * This code is the original optimized code, but will fail if the
574821c28f8Sariane 	 * subtree it looks at does have sufficient space, but fails to meet
575821c28f8Sariane 	 * the align constraint.
576821c28f8Sariane 	 *
577821c28f8Sariane 	 * Guard: subtree is not exhausted and max(fspace) >= required.
578181c6205Sariane 	 */
579821c28f8Sariane 	entry = uvm_map_entrybyaddr(&map->addr, hint);
580181c6205Sariane 
581821c28f8Sariane 	/* Walk up the tree, until there is at least sufficient space. */
582821c28f8Sariane 	while (entry != NULL &&
583821c28f8Sariane 	    entry->fspace_augment < before_gap + after_gap + sz)
584415d6aa0Sdlg 		entry = RBT_PARENT(uvm_map_addr, entry);
585821c28f8Sariane 
586821c28f8Sariane 	while (entry != NULL) {
587821c28f8Sariane 		/* Test if this fits. */
588821c28f8Sariane 		if (VMMAP_FREE_END(entry) > hint &&
589821c28f8Sariane 		    uvm_map_uaddr_e(map, entry) == uaddr &&
590821c28f8Sariane 		    uvm_addr_fitspace(&low_addr, &high_addr,
591181c6205Sariane 		    MAX(uaddr->uaddr_minaddr, VMMAP_FREE_START(entry)),
592181c6205Sariane 		    MIN(uaddr->uaddr_maxaddr, VMMAP_FREE_END(entry)),
593181c6205Sariane 		    sz, align, offset, before_gap, after_gap) == 0) {
594181c6205Sariane 			*entry_out = entry;
595bbbbe834Smatthew 			if (hint >= low_addr && hint <= high_addr)
596bbbbe834Smatthew 				*addr_out = hint;
597bbbbe834Smatthew 			else
598181c6205Sariane 				*addr_out = low_addr;
599181c6205Sariane 			return 0;
600181c6205Sariane 		}
601821c28f8Sariane 
602f75ddc9fSdlg 		/* RBT_NEXT, but skip subtrees that cannot possible fit. */
603415d6aa0Sdlg 		next = RBT_RIGHT(uvm_map_addr, entry);
604821c28f8Sariane 		if (next != NULL &&
605821c28f8Sariane 		    next->fspace_augment >= before_gap + after_gap + sz) {
606821c28f8Sariane 			entry = next;
607415d6aa0Sdlg 			while ((next = RBT_LEFT(uvm_map_addr, entry)) !=
608821c28f8Sariane 			    NULL)
609821c28f8Sariane 				entry = next;
610821c28f8Sariane 		} else {
611821c28f8Sariane do_parent:
612415d6aa0Sdlg 			next = RBT_PARENT(uvm_map_addr, entry);
613821c28f8Sariane 			if (next == NULL)
614821c28f8Sariane 				entry = NULL;
615415d6aa0Sdlg 			else if (RBT_LEFT(uvm_map_addr, next) == entry)
616821c28f8Sariane 				entry = next;
617821c28f8Sariane 			else {
618821c28f8Sariane 				entry = next;
619821c28f8Sariane 				goto do_parent;
620821c28f8Sariane 			}
621821c28f8Sariane 		}
622181c6205Sariane 	}
623181c6205Sariane 
624821c28f8Sariane 	/* Lookup failed. */
625181c6205Sariane 	return ENOMEM;
626181c6205Sariane }
627181c6205Sariane 
628181c6205Sariane /*
629181c6205Sariane  * Destroy a uaddr_rnd_state structure.
630181c6205Sariane  */
631181c6205Sariane void
632181c6205Sariane uaddr_rnd_destroy(struct uvm_addr_state *uaddr)
633181c6205Sariane {
634181c6205Sariane 	pool_put(&uaddr_rnd_pool, uaddr);
635181c6205Sariane }
636181c6205Sariane 
637181c6205Sariane /*
638181c6205Sariane  * Add entry to tailq.
639181c6205Sariane  */
640181c6205Sariane void
641181c6205Sariane uaddr_rnd_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
642181c6205Sariane     struct vm_map_entry *entry)
643181c6205Sariane {
644821c28f8Sariane 	return;
645181c6205Sariane }
646181c6205Sariane 
647181c6205Sariane /*
648181c6205Sariane  * Remove entry from tailq.
649181c6205Sariane  */
650181c6205Sariane void
651181c6205Sariane uaddr_rnd_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
652181c6205Sariane     struct vm_map_entry *entry)
653181c6205Sariane {
654821c28f8Sariane 	return;
655181c6205Sariane }
656181c6205Sariane 
657f09c1a3eSmiod #if 0
658181c6205Sariane #if defined(DEBUG) || defined(DDB)
659181c6205Sariane void
660181c6205Sariane uaddr_rnd_print(struct uvm_addr_state *uaddr_p, boolean_t full,
661181c6205Sariane     int (*pr)(const char*, ...))
662181c6205Sariane {
663181c6205Sariane 	struct vm_map_entry	*entry;
664181c6205Sariane 	struct uaddr_rnd_state	*uaddr;
665181c6205Sariane 	vaddr_t			 addr;
666181c6205Sariane 	size_t			 count;
667181c6205Sariane 	vsize_t			 space;
668181c6205Sariane 
669181c6205Sariane 	uaddr = (struct uaddr_rnd_state *)uaddr_p;
670181c6205Sariane 	addr = 0;
671181c6205Sariane 	count = 0;
672181c6205Sariane 	space = 0;
673181c6205Sariane 	TAILQ_FOREACH(entry, &uaddr->ur_free, dfree.tailq) {
674181c6205Sariane 		count++;
675181c6205Sariane 		space += entry->fspace;
676181c6205Sariane 
677181c6205Sariane 		if (full) {
678181c6205Sariane 			(*pr)("\tentry %p: 0x%lx-0x%lx G=0x%lx F=0x%lx\n",
679181c6205Sariane 			    entry, entry->start, entry->end,
680181c6205Sariane 			    entry->guard, entry->fspace);
681181c6205Sariane 			(*pr)("\t\tfree: 0x%lx-0x%lx\n",
682181c6205Sariane 			    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
683181c6205Sariane 		}
684181c6205Sariane 		if (entry->start < addr) {
685181c6205Sariane 			if (!full)
686181c6205Sariane 				(*pr)("\tentry %p: 0x%lx-0x%lx "
687181c6205Sariane 				    "G=0x%lx F=0x%lx\n",
688181c6205Sariane 				    entry, entry->start, entry->end,
689181c6205Sariane 				    entry->guard, entry->fspace);
690181c6205Sariane 			(*pr)("\t\tstart=0x%lx, expected at least 0x%lx\n",
691181c6205Sariane 			    entry->start, addr);
692181c6205Sariane 		}
693181c6205Sariane 
694181c6205Sariane 		addr = VMMAP_FREE_END(entry);
695181c6205Sariane 	}
696181c6205Sariane 	(*pr)("\t0x%lu entries, 0x%lx free bytes\n", count, space);
697181c6205Sariane }
698181c6205Sariane #endif /* DEBUG || DDB */
699f09c1a3eSmiod #endif
700181c6205Sariane 
701181c6205Sariane /*
702181c6205Sariane  * Kernel allocation bootstrap logic.
703181c6205Sariane  */
70452887a38Smpi 
705181c6205Sariane const struct uvm_addr_functions uaddr_kernel_functions = {
706181c6205Sariane 	.uaddr_select = &uaddr_kbootstrap_select,
707181c6205Sariane 	.uaddr_destroy = &uaddr_kbootstrap_destroy,
708181c6205Sariane 	.uaddr_name = "uaddr_kbootstrap"
709181c6205Sariane };
710181c6205Sariane 
711181c6205Sariane /*
712181c6205Sariane  * Select an address from the map.
713181c6205Sariane  *
714181c6205Sariane  * This function ignores the uaddr spec and instead uses the map directly.
715181c6205Sariane  * Because of that property, the uaddr algorithm can be shared across all
716181c6205Sariane  * kernel maps.
717181c6205Sariane  */
718181c6205Sariane int
719181c6205Sariane uaddr_kbootstrap_select(struct vm_map *map, struct uvm_addr_state *uaddr,
720181c6205Sariane     struct vm_map_entry **entry_out, vaddr_t *addr_out,
721181c6205Sariane     vsize_t sz, vaddr_t align, vaddr_t offset, vm_prot_t prot, vaddr_t hint)
722181c6205Sariane {
723181c6205Sariane 	vaddr_t tmp;
724181c6205Sariane 
725415d6aa0Sdlg 	RBT_FOREACH(*entry_out, uvm_map_addr, &map->addr) {
726181c6205Sariane 		if (VMMAP_FREE_END(*entry_out) <= uvm_maxkaddr &&
727181c6205Sariane 		    uvm_addr_fitspace(addr_out, &tmp,
728181c6205Sariane 		    VMMAP_FREE_START(*entry_out), VMMAP_FREE_END(*entry_out),
729181c6205Sariane 		    sz, align, offset, 0, 0) == 0)
730181c6205Sariane 			return 0;
731181c6205Sariane 	}
732181c6205Sariane 
733181c6205Sariane 	return ENOMEM;
734181c6205Sariane }
735181c6205Sariane 
736181c6205Sariane /*
737181c6205Sariane  * Don't destroy the kernel bootstrap allocator.
738181c6205Sariane  */
739181c6205Sariane void
740181c6205Sariane uaddr_kbootstrap_destroy(struct uvm_addr_state *uaddr)
741181c6205Sariane {
742181c6205Sariane 	KASSERT(uaddr == (struct uvm_addr_state *)&uaddr_kbootstrap);
743181c6205Sariane }
744181c6205Sariane 
74559399f65Sariane #ifndef SMALL_KERNEL
746181c6205Sariane /*
747181c6205Sariane  * Best fit algorithm.
748181c6205Sariane  */
749181c6205Sariane 
750181c6205Sariane const struct uvm_addr_functions uaddr_bestfit_functions = {
751181c6205Sariane 	.uaddr_select = &uaddr_bestfit_select,
752181c6205Sariane 	.uaddr_free_insert = &uaddr_bestfit_insert,
753181c6205Sariane 	.uaddr_free_remove = &uaddr_bestfit_remove,
754181c6205Sariane 	.uaddr_destroy = &uaddr_bestfit_destroy,
755181c6205Sariane 	.uaddr_name = "uaddr_bestfit"
756181c6205Sariane };
757181c6205Sariane 
758181c6205Sariane struct uvm_addr_state *
759181c6205Sariane uaddr_bestfit_create(vaddr_t minaddr, vaddr_t maxaddr)
760181c6205Sariane {
761181c6205Sariane 	struct uaddr_bestfit_state *uaddr;
762181c6205Sariane 
763181c6205Sariane 	uaddr = pool_get(&uaddr_bestfit_pool, PR_WAITOK);
764181c6205Sariane 	uaddr->ubf_uaddr.uaddr_minaddr = minaddr;
765181c6205Sariane 	uaddr->ubf_uaddr.uaddr_maxaddr = maxaddr;
766181c6205Sariane 	uaddr->ubf_uaddr.uaddr_functions = &uaddr_bestfit_functions;
76771b0131cSdlg 	RBT_INIT(uaddr_free_rbtree, &uaddr->ubf_free);
768181c6205Sariane 	return &uaddr->ubf_uaddr;
769181c6205Sariane }
770181c6205Sariane 
771181c6205Sariane void
772181c6205Sariane uaddr_bestfit_destroy(struct uvm_addr_state *uaddr)
773181c6205Sariane {
774181c6205Sariane 	pool_put(&uaddr_bestfit_pool, uaddr);
775181c6205Sariane }
776181c6205Sariane 
777181c6205Sariane void
778181c6205Sariane uaddr_bestfit_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
779181c6205Sariane     struct vm_map_entry *entry)
780181c6205Sariane {
781181c6205Sariane 	struct uaddr_bestfit_state	*uaddr;
782181c6205Sariane 	struct vm_map_entry		*rb_rv;
783181c6205Sariane 
784181c6205Sariane 	uaddr = (struct uaddr_bestfit_state *)uaddr_p;
78571b0131cSdlg 	if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->ubf_free, entry)) !=
786181c6205Sariane 	    NULL) {
787181c6205Sariane 		panic("%s: duplicate insertion: state %p "
78867207b96Sjsg 		    "inserting %p, colliding with %p", __func__,
789181c6205Sariane 		    uaddr, entry, rb_rv);
790181c6205Sariane 	}
791181c6205Sariane }
792181c6205Sariane 
793181c6205Sariane void
794181c6205Sariane uaddr_bestfit_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
795181c6205Sariane     struct vm_map_entry *entry)
796181c6205Sariane {
797181c6205Sariane 	struct uaddr_bestfit_state	*uaddr;
798181c6205Sariane 
799181c6205Sariane 	uaddr = (struct uaddr_bestfit_state *)uaddr_p;
80071b0131cSdlg 	if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->ubf_free, entry) != entry)
801181c6205Sariane 		panic("%s: entry was not in tree", __func__);
802181c6205Sariane }
803181c6205Sariane 
804181c6205Sariane int
805181c6205Sariane uaddr_bestfit_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
806181c6205Sariane     struct vm_map_entry **entry_out, vaddr_t *addr_out,
807181c6205Sariane     vsize_t sz, vaddr_t align, vaddr_t offset,
808181c6205Sariane     vm_prot_t prot, vaddr_t hint)
809181c6205Sariane {
810181c6205Sariane 	vaddr_t				 min, max;
811181c6205Sariane 	struct uaddr_bestfit_state	*uaddr;
812181c6205Sariane 	struct vm_map_entry		*entry;
813181c6205Sariane 	vsize_t				 guardsz;
814181c6205Sariane 
815181c6205Sariane 	uaddr = (struct uaddr_bestfit_state *)uaddr_p;
816181c6205Sariane 	guardsz = ((map->flags & VM_MAP_GUARDPAGES) ? PAGE_SIZE : 0);
817b8c60e10Skettenis 	if (sz + guardsz < sz)
818b8c60e10Skettenis 		return ENOMEM;
819181c6205Sariane 
820181c6205Sariane 	/*
821181c6205Sariane 	 * Find smallest item on freelist capable of holding item.
822181c6205Sariane 	 * Deal with guardpages: search for space with one extra page.
823181c6205Sariane 	 */
824181c6205Sariane 	entry = uvm_addr_entrybyspace(&uaddr->ubf_free, sz + guardsz);
825181c6205Sariane 	if (entry == NULL)
826181c6205Sariane 		return ENOMEM;
827181c6205Sariane 
82852887a38Smpi 	/*
82952887a38Smpi 	 * Walk the tree until we find an entry that fits.
83052887a38Smpi 	 */
831181c6205Sariane 	while (uvm_addr_fitspace(&min, &max,
832181c6205Sariane 	    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
833181c6205Sariane 	    sz, align, offset, 0, guardsz) != 0) {
83471b0131cSdlg 		entry = RBT_NEXT(uaddr_free_rbtree, entry);
835181c6205Sariane 		if (entry == NULL)
836181c6205Sariane 			return ENOMEM;
837181c6205Sariane 	}
838181c6205Sariane 
83952887a38Smpi 	/*
84052887a38Smpi 	 * Return the address that generates the least fragmentation.
84152887a38Smpi 	 */
842181c6205Sariane 	*entry_out = entry;
843181c6205Sariane 	*addr_out = (min - VMMAP_FREE_START(entry) <=
844181c6205Sariane 	    VMMAP_FREE_END(entry) - guardsz - sz - max ?
845181c6205Sariane 	    min : max);
846181c6205Sariane 	return 0;
847181c6205Sariane }
84859399f65Sariane #endif /* !SMALL_KERNEL */
849181c6205Sariane 
850181c6205Sariane 
85159399f65Sariane #ifndef SMALL_KERNEL
852181c6205Sariane /*
853181c6205Sariane  * A userspace allocator based on pivots.
854181c6205Sariane  */
855181c6205Sariane 
856181c6205Sariane const struct uvm_addr_functions uaddr_pivot_functions = {
857181c6205Sariane 	.uaddr_select = &uaddr_pivot_select,
858181c6205Sariane 	.uaddr_free_insert = &uaddr_pivot_insert,
859181c6205Sariane 	.uaddr_free_remove = &uaddr_pivot_remove,
860181c6205Sariane 	.uaddr_destroy = &uaddr_pivot_destroy,
861181c6205Sariane #if defined(DEBUG) || defined(DDB)
862181c6205Sariane 	.uaddr_print = &uaddr_pivot_print,
863181c6205Sariane #endif /* DEBUG || DDB */
864181c6205Sariane 	.uaddr_name = "uaddr_pivot"
865181c6205Sariane };
866181c6205Sariane 
867181c6205Sariane /*
868181c6205Sariane  * A special random function for pivots.
869181c6205Sariane  *
870181c6205Sariane  * This function will return:
871181c6205Sariane  * - a random number
872181c6205Sariane  * - a multiple of PAGE_SIZE
873181c6205Sariane  * - at least PAGE_SIZE
874181c6205Sariane  *
875181c6205Sariane  * The random function has a slightly higher change to return a small number.
876181c6205Sariane  */
877181c6205Sariane vsize_t
878c799dc6dSnaddy uaddr_pivot_random(void)
879181c6205Sariane {
880181c6205Sariane 	int			r;
881181c6205Sariane 
882181c6205Sariane 	/*
883181c6205Sariane 	 * The sum of two six-sided dice will have a normal distribution.
884181c6205Sariane 	 * We map the highest probable number to 1, by folding the curve
885181c6205Sariane 	 * (think of a graph on a piece of paper, that you fold).
886181c6205Sariane 	 *
887181c6205Sariane 	 * Because the fold happens at PIVOT_RND - 1, the numbers 0 and 1
888181c6205Sariane 	 * have the same and highest probability of happening.
889181c6205Sariane 	 */
890181c6205Sariane 	r = arc4random_uniform(PIVOT_RND) + arc4random_uniform(PIVOT_RND) -
891181c6205Sariane 	    (PIVOT_RND - 1);
892181c6205Sariane 	if (r < 0)
893181c6205Sariane 		r = -r;
894181c6205Sariane 
895181c6205Sariane 	/*
896181c6205Sariane 	 * Make the returned value at least PAGE_SIZE and a multiple of
897181c6205Sariane 	 * PAGE_SIZE.
898181c6205Sariane 	 */
899181c6205Sariane 	return (vaddr_t)(1 + r) << PAGE_SHIFT;
900181c6205Sariane }
901181c6205Sariane 
902181c6205Sariane /*
903181c6205Sariane  * Select a new pivot.
904181c6205Sariane  *
905181c6205Sariane  * A pivot must:
906181c6205Sariane  * - be chosen random
907181c6205Sariane  * - have a randomly chosen gap before it, where the uaddr_state starts
908181c6205Sariane  * - have a randomly chosen gap after it, before the uaddr_state ends
909181c6205Sariane  *
910181c6205Sariane  * Furthermore, the pivot must provide sufficient space for the allocation.
911181c6205Sariane  * The addr will be set to the selected address.
912181c6205Sariane  *
913181c6205Sariane  * Returns ENOMEM on failure.
914181c6205Sariane  */
915181c6205Sariane int
916181c6205Sariane uaddr_pivot_newpivot(struct vm_map *map, struct uaddr_pivot_state *uaddr,
917181c6205Sariane     struct uaddr_pivot *pivot,
918181c6205Sariane     struct vm_map_entry **entry_out, vaddr_t *addr_out,
919181c6205Sariane     vsize_t sz, vaddr_t align, vaddr_t offset,
920181c6205Sariane     vsize_t before_gap, vsize_t after_gap)
921181c6205Sariane {
922181c6205Sariane 	struct vm_map_entry		*entry, *found;
923181c6205Sariane 	vaddr_t				 minaddr, maxaddr;
924181c6205Sariane 	vsize_t				 dist;
925181c6205Sariane 	vaddr_t				 found_minaddr, found_maxaddr;
926181c6205Sariane 	vaddr_t				 min, max;
927181c6205Sariane 	vsize_t				 arc4_arg;
928181c6205Sariane 	int				 fit_error;
929181c6205Sariane 	u_int32_t			 path;
930181c6205Sariane 
931181c6205Sariane 	minaddr = uaddr->up_uaddr.uaddr_minaddr;
932181c6205Sariane 	maxaddr = uaddr->up_uaddr.uaddr_maxaddr;
933181c6205Sariane 	KASSERT(minaddr < maxaddr);
934181c6205Sariane #ifdef DIAGNOSTIC
935181c6205Sariane 	if (minaddr + 2 * PAGE_SIZE > maxaddr) {
936181c6205Sariane 		panic("uaddr_pivot_newpivot: cannot grant random pivot "
937181c6205Sariane 		    "in area less than 2 pages (size = 0x%lx)",
938181c6205Sariane 		    maxaddr - minaddr);
939181c6205Sariane 	}
940181c6205Sariane #endif /* DIAGNOSTIC */
941181c6205Sariane 
942181c6205Sariane 	/*
943181c6205Sariane 	 * Gap calculation: 1/32 of the size of the managed area.
944181c6205Sariane 	 *
945181c6205Sariane 	 * At most: sufficient to not get truncated at arc4random.
946181c6205Sariane 	 * At least: 2 PAGE_SIZE
947181c6205Sariane 	 *
948181c6205Sariane 	 * minaddr and maxaddr will be changed according to arc4random.
949181c6205Sariane 	 */
950181c6205Sariane 	dist = MAX((maxaddr - minaddr) / 32, 2 * (vaddr_t)PAGE_SIZE);
951181c6205Sariane 	if (dist >> PAGE_SHIFT > 0xffffffff) {
952181c6205Sariane 		minaddr += (vsize_t)arc4random() << PAGE_SHIFT;
953181c6205Sariane 		maxaddr -= (vsize_t)arc4random() << PAGE_SHIFT;
954181c6205Sariane 	} else {
955181c6205Sariane 		minaddr += (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
956181c6205Sariane 		    PAGE_SHIFT;
957181c6205Sariane 		maxaddr -= (vsize_t)arc4random_uniform(dist >> PAGE_SHIFT) <<
958181c6205Sariane 		    PAGE_SHIFT;
959181c6205Sariane 	}
960181c6205Sariane 
961181c6205Sariane 	/*
962181c6205Sariane 	 * A very fast way to find an entry that will be large enough
963181c6205Sariane 	 * to hold the allocation, but still is found more or less
964181c6205Sariane 	 * randomly: the tree path selector has a 50% chance to go for
965181c6205Sariane 	 * a bigger or smaller entry.
966181c6205Sariane 	 *
967181c6205Sariane 	 * Note that the memory may actually be available,
968181c6205Sariane 	 * but the fragmentation may be so bad and the gaps chosen
969181c6205Sariane 	 * so unfortunately, that the allocation will not succeed.
970181c6205Sariane 	 * Or the alignment can only be satisfied by an entry that
971181c6205Sariane 	 * is not visited in the randomly selected path.
972181c6205Sariane 	 *
973181c6205Sariane 	 * This code finds an entry with sufficient space in O(log n) time.
974181c6205Sariane 	 */
975181c6205Sariane 	path = arc4random();
976181c6205Sariane 	found = NULL;
97771b0131cSdlg 	entry = RBT_ROOT(uaddr_free_rbtree, &uaddr->up_free);
978181c6205Sariane 	while (entry != NULL) {
979181c6205Sariane 		fit_error = uvm_addr_fitspace(&min, &max,
980181c6205Sariane 		    MAX(VMMAP_FREE_START(entry), minaddr),
981181c6205Sariane 		    MIN(VMMAP_FREE_END(entry), maxaddr),
982181c6205Sariane 		    sz, align, offset, before_gap, after_gap);
983181c6205Sariane 
984181c6205Sariane 		/* It fits, save this entry. */
985181c6205Sariane 		if (fit_error == 0) {
986181c6205Sariane 			found = entry;
987181c6205Sariane 			found_minaddr = min;
988181c6205Sariane 			found_maxaddr = max;
989181c6205Sariane 		}
990181c6205Sariane 
991181c6205Sariane 		/* Next. */
992181c6205Sariane 		if (fit_error != 0)
99371b0131cSdlg 			entry = RBT_RIGHT(uaddr_free_rbtree, entry);
994181c6205Sariane 		else if	((path & 0x1) == 0) {
995181c6205Sariane 			path >>= 1;
99671b0131cSdlg 			entry = RBT_RIGHT(uaddr_free_rbtree, entry);
997181c6205Sariane 		} else {
998181c6205Sariane 			path >>= 1;
99971b0131cSdlg 			entry = RBT_LEFT(uaddr_free_rbtree, entry);
1000181c6205Sariane 		}
1001181c6205Sariane 	}
1002181c6205Sariane 	if (found == NULL)
1003181c6205Sariane 		return ENOMEM;	/* Not found a large enough region. */
1004181c6205Sariane 
1005181c6205Sariane 	/*
1006181c6205Sariane 	 * Calculate a random address within found.
1007181c6205Sariane 	 *
1008181c6205Sariane 	 * found_minaddr and found_maxaddr are already aligned, so be sure
1009181c6205Sariane 	 * to select a multiple of align as the offset in the entry.
1010181c6205Sariane 	 * Preferably, arc4random_uniform is used to provide no bias within
1011181c6205Sariane 	 * the entry.
1012181c6205Sariane 	 * However if the size of the entry exceeds arc4random_uniforms
1013181c6205Sariane 	 * argument limit, we simply use arc4random (thus limiting ourselves
1014181c6205Sariane 	 * to 4G * PAGE_SIZE bytes offset).
1015181c6205Sariane 	 */
1016181c6205Sariane 	if (found_maxaddr == found_minaddr)
1017181c6205Sariane 		*addr_out = found_minaddr;
1018181c6205Sariane 	else {
1019181c6205Sariane 		KASSERT(align >= PAGE_SIZE && (align & (align - 1)) == 0);
1020181c6205Sariane 		arc4_arg = found_maxaddr - found_minaddr;
1021181c6205Sariane 		if (arc4_arg > 0xffffffff) {
1022181c6205Sariane 			*addr_out = found_minaddr +
1023ee1647d4Sstefan 			    (arc4random() & ~(align - 1));
1024181c6205Sariane 		} else {
1025181c6205Sariane 			*addr_out = found_minaddr +
1026ee1647d4Sstefan 			    (arc4random_uniform(arc4_arg) & ~(align - 1));
1027181c6205Sariane 		}
1028181c6205Sariane 	}
1029181c6205Sariane 	/* Address was found in this entry. */
1030181c6205Sariane 	*entry_out = found;
1031181c6205Sariane 
1032181c6205Sariane 	/*
1033181c6205Sariane 	 * Set up new pivot and return selected address.
1034181c6205Sariane 	 *
1035181c6205Sariane 	 * Depending on the direction of the pivot, the pivot must be placed
1036181c6205Sariane 	 * at the bottom or the top of the allocation:
1037181c6205Sariane 	 * - if the pivot moves upwards, place the pivot at the top of the
1038181c6205Sariane 	 *   allocation,
1039181c6205Sariane 	 * - if the pivot moves downwards, place the pivot at the bottom
1040181c6205Sariane 	 *   of the allocation.
1041181c6205Sariane 	 */
1042181c6205Sariane 	pivot->entry = found;
1043181c6205Sariane 	pivot->dir = (arc4random() & 0x1 ? 1 : -1);
1044181c6205Sariane 	if (pivot->dir > 0)
1045181c6205Sariane 		pivot->addr = *addr_out + sz;
1046181c6205Sariane 	else
1047181c6205Sariane 		pivot->addr = *addr_out;
1048181c6205Sariane 	pivot->expire = PIVOT_EXPIRE - 1; /* First use is right now. */
1049181c6205Sariane 	return 0;
1050181c6205Sariane }
1051181c6205Sariane 
1052181c6205Sariane /*
1053181c6205Sariane  * Pivot selector.
1054181c6205Sariane  *
1055181c6205Sariane  * Each time the selector is invoked, it will select a random pivot, which
1056181c6205Sariane  * it will use to select memory with. The memory will be placed at the pivot,
1057181c6205Sariane  * with a randomly sized gap between the allocation and the pivot.
1058181c6205Sariane  * The pivot will then move so it will never revisit this address.
1059181c6205Sariane  *
1060181c6205Sariane  * Each allocation, the pivot expiry timer ticks. Once the pivot becomes
1061181c6205Sariane  * expired, it will be replaced with a newly created pivot. Pivots also
1062181c6205Sariane  * automatically expire if they fail to provide memory for an allocation.
1063181c6205Sariane  *
1064181c6205Sariane  * Expired pivots are replaced using the uaddr_pivot_newpivot() function,
1065181c6205Sariane  * which will ensure the pivot points at memory in such a way that the
1066181c6205Sariane  * allocation will succeed.
1067181c6205Sariane  * As an added bonus, the uaddr_pivot_newpivot() function will perform the
1068181c6205Sariane  * allocation immediately and move the pivot as appropriate.
1069181c6205Sariane  *
1070181c6205Sariane  * If uaddr_pivot_newpivot() fails to find a new pivot that will allow the
1071181c6205Sariane  * allocation to succeed, it will not create a new pivot and the allocation
1072181c6205Sariane  * will fail.
1073181c6205Sariane  *
1074181c6205Sariane  * A pivot running into used memory will automatically expire (because it will
1075181c6205Sariane  * fail to allocate).
1076181c6205Sariane  *
1077181c6205Sariane  * Characteristics of the allocator:
1078181c6205Sariane  * - best case, an allocation is O(log N)
1079*9593dc34Smglocker  *   (it would be O(1), if it weren't for the need to check if the memory is
1080181c6205Sariane  *   free; although that can be avoided...)
1081181c6205Sariane  * - worst case, an allocation is O(log N)
1082181c6205Sariane  *   (the uaddr_pivot_newpivot() function has that complexity)
1083181c6205Sariane  * - failed allocations always take O(log N)
1084181c6205Sariane  *   (the uaddr_pivot_newpivot() function will walk that deep into the tree).
1085181c6205Sariane  */
1086181c6205Sariane int
1087181c6205Sariane uaddr_pivot_select(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1088181c6205Sariane     struct vm_map_entry **entry_out, vaddr_t *addr_out,
1089181c6205Sariane     vsize_t sz, vaddr_t align, vaddr_t offset,
1090181c6205Sariane     vm_prot_t prot, vaddr_t hint)
1091181c6205Sariane {
1092181c6205Sariane 	struct uaddr_pivot_state	*uaddr;
1093181c6205Sariane 	struct vm_map_entry		*entry;
1094181c6205Sariane 	struct uaddr_pivot		*pivot;
1095181c6205Sariane 	vaddr_t				 min, max;
1096181c6205Sariane 	vsize_t				 before_gap, after_gap;
1097181c6205Sariane 	int				 err;
1098181c6205Sariane 
10991fece9eaSstefan 	/*
11001fece9eaSstefan 	 * When we have a hint, use the rnd allocator that finds the
11011fece9eaSstefan 	 * area that is closest to the hint, if there is such an area.
11021fece9eaSstefan 	 */
11031fece9eaSstefan 	if (hint != 0) {
11041fece9eaSstefan 		if (uaddr_rnd_select(map, uaddr_p, entry_out, addr_out,
11051fece9eaSstefan 		    sz, align, offset, prot, hint) == 0)
11061fece9eaSstefan 			return 0;
11071fece9eaSstefan 		return ENOMEM;
11081fece9eaSstefan 	}
1109181c6205Sariane 
1110181c6205Sariane 	/*
1111181c6205Sariane 	 * Select a random pivot and a random gap sizes around the allocation.
1112181c6205Sariane 	 */
1113181c6205Sariane 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1114181c6205Sariane 	pivot = &uaddr->up_pivots[
1115181c6205Sariane 	    arc4random_uniform(nitems(uaddr->up_pivots))];
1116181c6205Sariane 	before_gap = uaddr_pivot_random();
1117181c6205Sariane 	after_gap = uaddr_pivot_random();
1118181c6205Sariane 	if (pivot->addr == 0 || pivot->entry == NULL || pivot->expire == 0)
1119181c6205Sariane 		goto expired;	/* Pivot is invalid (null or expired). */
1120181c6205Sariane 
112152887a38Smpi 	/*
112252887a38Smpi 	 * Attempt to use the pivot to map the entry.
112352887a38Smpi 	 */
1124181c6205Sariane 	entry = pivot->entry;
1125181c6205Sariane 	if (pivot->dir > 0) {
1126181c6205Sariane 		if (uvm_addr_fitspace(&min, &max,
1127181c6205Sariane 		    MAX(VMMAP_FREE_START(entry), pivot->addr),
1128181c6205Sariane 		    VMMAP_FREE_END(entry), sz, align, offset,
1129181c6205Sariane 		    before_gap, after_gap) == 0) {
1130181c6205Sariane 			*addr_out = min;
1131181c6205Sariane 			*entry_out = entry;
1132181c6205Sariane 			pivot->addr = min + sz;
1133181c6205Sariane 			pivot->expire--;
1134181c6205Sariane 			return 0;
1135181c6205Sariane 		}
1136181c6205Sariane 	} else {
1137181c6205Sariane 		if (uvm_addr_fitspace(&min, &max,
1138181c6205Sariane 		    VMMAP_FREE_START(entry),
1139181c6205Sariane 		    MIN(VMMAP_FREE_END(entry), pivot->addr),
1140181c6205Sariane 		    sz, align, offset, before_gap, after_gap) == 0) {
1141181c6205Sariane 			*addr_out = max;
1142181c6205Sariane 			*entry_out = entry;
1143181c6205Sariane 			pivot->addr = max;
1144181c6205Sariane 			pivot->expire--;
1145181c6205Sariane 			return 0;
1146181c6205Sariane 		}
1147181c6205Sariane 	}
1148181c6205Sariane 
1149181c6205Sariane expired:
1150181c6205Sariane 	/*
1151181c6205Sariane 	 * Pivot expired or allocation failed.
1152181c6205Sariane 	 * Use pivot selector to do the allocation and find a new pivot.
1153181c6205Sariane 	 */
1154181c6205Sariane 	err = uaddr_pivot_newpivot(map, uaddr, pivot, entry_out, addr_out,
1155181c6205Sariane 	    sz, align, offset, before_gap, after_gap);
1156181c6205Sariane 	return err;
1157181c6205Sariane }
1158181c6205Sariane 
1159181c6205Sariane /*
1160181c6205Sariane  * Free the pivot.
1161181c6205Sariane  */
1162181c6205Sariane void
1163181c6205Sariane uaddr_pivot_destroy(struct uvm_addr_state *uaddr)
1164181c6205Sariane {
1165181c6205Sariane 	pool_put(&uaddr_pivot_pool, uaddr);
1166181c6205Sariane }
1167181c6205Sariane 
1168181c6205Sariane /*
1169181c6205Sariane  * Insert an entry with free space in the space tree.
1170181c6205Sariane  */
1171181c6205Sariane void
1172181c6205Sariane uaddr_pivot_insert(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1173181c6205Sariane     struct vm_map_entry *entry)
1174181c6205Sariane {
1175181c6205Sariane 	struct uaddr_pivot_state	*uaddr;
1176181c6205Sariane 	struct vm_map_entry		*rb_rv;
1177181c6205Sariane 	struct uaddr_pivot		*p;
1178181c6205Sariane 	vaddr_t				 check_addr;
1179181c6205Sariane 	vaddr_t				 start, end;
1180181c6205Sariane 
1181181c6205Sariane 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
118271b0131cSdlg 	if ((rb_rv = RBT_INSERT(uaddr_free_rbtree, &uaddr->up_free, entry)) !=
1183181c6205Sariane 	    NULL) {
1184181c6205Sariane 		panic("%s: duplicate insertion: state %p "
1185181c6205Sariane 		    "inserting entry %p which collides with %p", __func__,
1186181c6205Sariane 		    uaddr, entry, rb_rv);
1187181c6205Sariane 	}
1188181c6205Sariane 
1189181c6205Sariane 	start = VMMAP_FREE_START(entry);
1190181c6205Sariane 	end = VMMAP_FREE_END(entry);
1191181c6205Sariane 
1192181c6205Sariane 	/*
1193181c6205Sariane 	 * Update all pivots that are contained in this entry.
1194181c6205Sariane 	 */
1195181c6205Sariane 	for (p = &uaddr->up_pivots[0];
1196181c6205Sariane 	    p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1197181c6205Sariane 		check_addr = p->addr;
1198181c6205Sariane 		if (check_addr == 0)
1199181c6205Sariane 			continue;
1200181c6205Sariane 		if (p->dir < 0)
1201181c6205Sariane 			check_addr--;
1202181c6205Sariane 
1203181c6205Sariane 		if (start <= check_addr &&
1204181c6205Sariane 		    check_addr < end) {
1205181c6205Sariane 			KASSERT(p->entry == NULL);
1206181c6205Sariane 			p->entry = entry;
1207181c6205Sariane 		}
1208181c6205Sariane 	}
1209181c6205Sariane }
1210181c6205Sariane 
1211181c6205Sariane /*
1212181c6205Sariane  * Remove an entry with free space from the space tree.
1213181c6205Sariane  */
1214181c6205Sariane void
1215181c6205Sariane uaddr_pivot_remove(struct vm_map *map, struct uvm_addr_state *uaddr_p,
1216181c6205Sariane     struct vm_map_entry *entry)
1217181c6205Sariane {
1218181c6205Sariane 	struct uaddr_pivot_state	*uaddr;
1219181c6205Sariane 	struct uaddr_pivot		*p;
1220181c6205Sariane 
1221181c6205Sariane 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
122271b0131cSdlg 	if (RBT_REMOVE(uaddr_free_rbtree, &uaddr->up_free, entry) != entry)
1223181c6205Sariane 		panic("%s: entry was not in tree", __func__);
1224181c6205Sariane 
1225181c6205Sariane 	/*
1226181c6205Sariane 	 * Inform any pivot with this entry that the entry is gone.
1227181c6205Sariane 	 * Note that this does not automatically invalidate the pivot.
1228181c6205Sariane 	 */
1229181c6205Sariane 	for (p = &uaddr->up_pivots[0];
1230181c6205Sariane 	    p != &uaddr->up_pivots[nitems(uaddr->up_pivots)]; p++) {
1231181c6205Sariane 		if (p->entry == entry)
1232181c6205Sariane 			p->entry = NULL;
1233181c6205Sariane 	}
1234181c6205Sariane }
1235181c6205Sariane 
1236181c6205Sariane /*
1237181c6205Sariane  * Create a new pivot selector.
1238181c6205Sariane  *
1239181c6205Sariane  * Initially, all pivots are in the expired state.
1240181c6205Sariane  * Two reasons for this:
1241181c6205Sariane  * - it means this allocator will not take a huge amount of time
1242181c6205Sariane  * - pivots select better on demand, because the pivot selection will be
1243181c6205Sariane  *   affected by preceding allocations:
1244181c6205Sariane  *   the next pivots will likely end up in different segments of free memory,
1245181c6205Sariane  *   that was segmented by an earlier allocation; better spread.
1246181c6205Sariane  */
1247181c6205Sariane struct uvm_addr_state *
1248181c6205Sariane uaddr_pivot_create(vaddr_t minaddr, vaddr_t maxaddr)
1249181c6205Sariane {
1250181c6205Sariane 	struct uaddr_pivot_state *uaddr;
1251181c6205Sariane 
1252181c6205Sariane 	uaddr = pool_get(&uaddr_pivot_pool, PR_WAITOK);
1253181c6205Sariane 	uaddr->up_uaddr.uaddr_minaddr = minaddr;
1254181c6205Sariane 	uaddr->up_uaddr.uaddr_maxaddr = maxaddr;
1255181c6205Sariane 	uaddr->up_uaddr.uaddr_functions = &uaddr_pivot_functions;
125671b0131cSdlg 	RBT_INIT(uaddr_free_rbtree, &uaddr->up_free);
12576c0aa6dcStedu 	memset(uaddr->up_pivots, 0, sizeof(uaddr->up_pivots));
1258181c6205Sariane 
1259181c6205Sariane 	return &uaddr->up_uaddr;
1260181c6205Sariane }
1261181c6205Sariane 
1262181c6205Sariane #if defined(DEBUG) || defined(DDB)
1263181c6205Sariane /*
1264181c6205Sariane  * Print the uaddr_pivot_state.
1265181c6205Sariane  *
1266181c6205Sariane  * If full, a listing of all entries in the state will be provided.
1267181c6205Sariane  */
1268181c6205Sariane void
1269181c6205Sariane uaddr_pivot_print(struct uvm_addr_state *uaddr_p, boolean_t full,
1270181c6205Sariane     int (*pr)(const char *, ...))
1271181c6205Sariane {
1272181c6205Sariane 	struct uaddr_pivot_state	*uaddr;
1273181c6205Sariane 	struct uaddr_pivot		*pivot;
1274181c6205Sariane 	struct vm_map_entry		*entry;
1275181c6205Sariane 	int				 i;
1276181c6205Sariane 	vaddr_t				 check_addr;
1277181c6205Sariane 
1278181c6205Sariane 	uaddr = (struct uaddr_pivot_state *)uaddr_p;
1279181c6205Sariane 
1280181c6205Sariane 	for (i = 0; i < NUM_PIVOTS; i++) {
1281181c6205Sariane 		pivot = &uaddr->up_pivots[i];
1282181c6205Sariane 
1283181c6205Sariane 		(*pr)("\tpivot 0x%lx, epires in %d, direction %d\n",
1284181c6205Sariane 		    pivot->addr, pivot->expire, pivot->dir);
1285181c6205Sariane 	}
1286181c6205Sariane 	if (!full)
1287181c6205Sariane 		return;
1288181c6205Sariane 
128971b0131cSdlg 	if (RBT_EMPTY(uaddr_free_rbtree, &uaddr->up_free))
1290181c6205Sariane 		(*pr)("\tempty\n");
1291181c6205Sariane 	/* Print list of free space. */
129271b0131cSdlg 	RBT_FOREACH(entry, uaddr_free_rbtree, &uaddr->up_free) {
1293181c6205Sariane 		(*pr)("\t0x%lx - 0x%lx free (0x%lx bytes)\n",
1294181c6205Sariane 		    VMMAP_FREE_START(entry), VMMAP_FREE_END(entry),
1295181c6205Sariane 		    VMMAP_FREE_END(entry) - VMMAP_FREE_START(entry));
1296181c6205Sariane 
1297181c6205Sariane 		for (i = 0; i < NUM_PIVOTS; i++) {
1298181c6205Sariane 			pivot = &uaddr->up_pivots[i];
1299181c6205Sariane 			check_addr = pivot->addr;
1300181c6205Sariane 			if (check_addr == 0)
1301181c6205Sariane 				continue;
1302181c6205Sariane 			if (pivot->dir < 0)
1303181c6205Sariane 				check_addr--;
1304181c6205Sariane 
1305181c6205Sariane 			if (VMMAP_FREE_START(entry) <= check_addr &&
1306181c6205Sariane 			    check_addr < VMMAP_FREE_END(entry)) {
1307181c6205Sariane 				(*pr)("\t\tcontains pivot %d (0x%lx)\n",
1308181c6205Sariane 				    i, pivot->addr);
1309181c6205Sariane 			}
1310181c6205Sariane 		}
1311181c6205Sariane 	}
1312181c6205Sariane }
1313181c6205Sariane #endif /* DEBUG || DDB */
131459399f65Sariane #endif /* !SMALL_KERNEL */
1315181c6205Sariane 
131659399f65Sariane #ifndef SMALL_KERNEL
1317181c6205Sariane /*
1318181c6205Sariane  * Stack/break allocator.
1319181c6205Sariane  *
1320181c6205Sariane  * Stack area is grown into in the opposite direction of the stack growth,
1321181c6205Sariane  * brk area is grown downward (because sbrk() grows upward).
1322181c6205Sariane  *
1323181c6205Sariane  * Both areas are grown into proportially: a weighted chance is used to
1324181c6205Sariane  * select which one (stack or brk area) to try. If the allocation fails,
1325181c6205Sariane  * the other one is tested.
1326181c6205Sariane  */
1327181c6205Sariane const struct uvm_addr_functions uaddr_stack_brk_functions = {
1328181c6205Sariane 	.uaddr_select = &uaddr_stack_brk_select,
1329181c6205Sariane 	.uaddr_destroy = &uaddr_destroy,
1330181c6205Sariane 	.uaddr_name = "uaddr_stckbrk"
1331181c6205Sariane };
1332181c6205Sariane 
1333181c6205Sariane /*
1334181c6205Sariane  * Stack/brk address selector.
1335181c6205Sariane  */
1336181c6205Sariane int
1337181c6205Sariane uaddr_stack_brk_select(struct vm_map *map, struct uvm_addr_state *uaddr,
1338181c6205Sariane     struct vm_map_entry **entry_out, vaddr_t *addr_out,
1339181c6205Sariane     vsize_t sz, vaddr_t align, vaddr_t offset,
1340181c6205Sariane     vm_prot_t prot, vaddr_t hint)
1341181c6205Sariane {
1342d13399acSotto 	vaddr_t			start;
1343d13399acSotto 	vaddr_t			end;
1344d13399acSotto 	vsize_t			before_gap;
1345d13399acSotto 	vsize_t			after_gap;
1346d13399acSotto 	int			dir;
1347181c6205Sariane 
1348d13399acSotto 	/* Set up brk search strategy. */
1349d13399acSotto 	start = MAX(map->b_start, uaddr->uaddr_minaddr);
1350d13399acSotto 	end = MIN(map->b_end, uaddr->uaddr_maxaddr);
1351d13399acSotto 	before_gap = 0;
1352d13399acSotto 	after_gap = 0;
1353d13399acSotto 	dir = -1;	/* Opposite of brk() growth. */
1354181c6205Sariane 
1355d13399acSotto 	if (end - start >= sz) {
1356d13399acSotto 		if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
1357d13399acSotto 		    0, sz, align, offset, dir, start, end - sz,
1358d13399acSotto 		    before_gap, after_gap) == 0)
1359d13399acSotto 			return 0;
1360181c6205Sariane 	}
1361181c6205Sariane 
136235164244Stedu 	/* Set up stack search strategy. */
1363d13399acSotto 	start = MAX(map->s_start, uaddr->uaddr_minaddr);
1364d13399acSotto 	end = MIN(map->s_end, uaddr->uaddr_maxaddr);
1365d13399acSotto 	before_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1366d13399acSotto 	after_gap = ((arc4random() & 0x3) + 1) << PAGE_SHIFT;
1367181c6205Sariane #ifdef MACHINE_STACK_GROWS_UP
1368d13399acSotto 	dir = -1;
1369181c6205Sariane #else
1370d13399acSotto 	dir =  1;
1371181c6205Sariane #endif
1372f510bcddSotto 	if (end - start >= before_gap + after_gap &&
1373f510bcddSotto 	    end - start - before_gap - after_gap >= sz) {
1374181c6205Sariane 		if (uvm_addr_linsearch(map, uaddr, entry_out, addr_out,
1375d13399acSotto 		    0, sz, align, offset, dir, start, end - sz,
1376181c6205Sariane 		    before_gap, after_gap) == 0)
1377181c6205Sariane 			return 0;
1378181c6205Sariane 	}
1379181c6205Sariane 
1380181c6205Sariane 	return ENOMEM;
1381181c6205Sariane }
1382181c6205Sariane 
1383181c6205Sariane struct uvm_addr_state *
1384181c6205Sariane uaddr_stack_brk_create(vaddr_t minaddr, vaddr_t maxaddr)
1385181c6205Sariane {
1386181c6205Sariane 	struct uvm_addr_state* uaddr;
1387181c6205Sariane 
1388181c6205Sariane 	uaddr = pool_get(&uaddr_pool, PR_WAITOK);
1389181c6205Sariane 	uaddr->uaddr_minaddr = minaddr;
1390181c6205Sariane 	uaddr->uaddr_maxaddr = maxaddr;
1391181c6205Sariane 	uaddr->uaddr_functions = &uaddr_stack_brk_functions;
1392181c6205Sariane 	return uaddr;
1393181c6205Sariane }
139459399f65Sariane #endif /* !SMALL_KERNEL */
1395181c6205Sariane 
1396181c6205Sariane 
139759399f65Sariane #ifndef SMALL_KERNEL
139812f0bc51Spatrick /*
139912f0bc51Spatrick  * Free space comparison.
140012f0bc51Spatrick  * Compares smaller free-space before larger free-space.
140112f0bc51Spatrick  */
140212f0bc51Spatrick static inline int
140312f0bc51Spatrick uvm_mapent_fspace_cmp(const struct vm_map_entry *e1,
140412f0bc51Spatrick     const struct vm_map_entry *e2)
140512f0bc51Spatrick {
140612f0bc51Spatrick 	if (e1->fspace != e2->fspace)
140712f0bc51Spatrick 		return (e1->fspace < e2->fspace ? -1 : 1);
140812f0bc51Spatrick 	return (e1->start < e2->start ? -1 : e1->start > e2->start);
140912f0bc51Spatrick }
141012f0bc51Spatrick 
141171b0131cSdlg RBT_GENERATE(uaddr_free_rbtree, vm_map_entry, dfree.rbtree,
1412181c6205Sariane     uvm_mapent_fspace_cmp);
141359399f65Sariane #endif /* !SMALL_KERNEL */
1414