xref: /openbsd-src/sys/kern/subr_extent.c (revision 91f110e064cd7c194e59e019b83bb7496c1c84d4)
1 /*	$OpenBSD: subr_extent.c,v 1.50 2014/01/21 21:45:58 kettenis Exp $	*/
2 /*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3 
4 /*-
5  * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe and Matthias Drochner.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * General purpose extent manager.
35  */
36 
37 #ifdef _KERNEL
38 #include <sys/param.h>
39 #include <sys/extent.h>
40 #include <sys/malloc.h>
41 #include <sys/time.h>
42 #include <sys/systm.h>
43 #include <sys/proc.h>
44 #include <sys/queue.h>
45 #include <sys/pool.h>
46 #include <ddb/db_output.h>
47 #else
48 /*
49  * user-land definitions, so it can fit into a testing harness.
50  */
51 #include <sys/param.h>
52 #include <sys/pool.h>
53 #include <sys/extent.h>
54 #include <sys/queue.h>
55 #include <errno.h>
56 #include <stdlib.h>
57 #include <stdio.h>
58 #include <string.h>
59 
60 #define	malloc(s, t, flags)		malloc(s)
61 #define	free(p, t)			free(p)
62 #define	tsleep(chan, pri, str, timo)	(EWOULDBLOCK)
63 #define	wakeup(chan)			((void)0)
64 #define	pool_get(pool, flags)		malloc((pool)->pr_size, 0, 0)
65 #define	pool_init(a, b, c, d, e, f, g)	(a)->pr_size = (b)
66 #define	pool_setipl(pool, ipl)		/* nothing */
67 #define	pool_put(pool, rp)		free((rp), 0)
68 #define	panic				printf
69 #endif
70 
71 #if defined(DIAGNOSTIC) || defined(DDB)
72 void	extent_print1(struct extent *, int (*)(const char *, ...)
73 	    __attribute__((__format__(__kprintf__,1,2))));
74 #endif
75 
76 static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
77 	    struct extent_region *, struct extent_region *);
78 static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
79 static	void extent_free_region_descriptor(struct extent *,
80 	    struct extent_region *);
81 int	extent_do_alloc(struct extent *, u_long, u_long, u_long, u_long,
82 	    u_long, u_long, int, struct extent_region *, u_long *);
83 
84 /*
85  * Shortcut to align to an arbitrary power-of-two boundary.
86  */
87 static __inline__ u_long
88 extent_align(u_long start, u_long align, u_long skew)
89 {
90 	return ((((start - skew) + (align - 1)) & (-align)) + skew);
91 }
92 
93 
94 #if defined(DIAGNOSTIC) || defined(DDB)
95 /*
96  * Register the extent on a doubly linked list.
97  * Should work, no?
98  */
99 static LIST_HEAD(listhead, extent) ext_list;
100 static	void extent_register(struct extent *);
101 
102 static void
103 extent_register(struct extent *ex)
104 {
105 #ifdef DIAGNOSTIC
106 	struct extent *ep;
107 #endif
108 	static int initialized;
109 
110 	if (!initialized){
111 		LIST_INIT(&ext_list);
112 		initialized = 1;
113 	}
114 
115 #ifdef DIAGNOSTIC
116 	LIST_FOREACH(ep, &ext_list, ex_link) {
117 		if (ep == ex)
118 			panic("%s: already registered", __func__);
119 	}
120 #endif
121 
122 	/* Insert into list */
123 	LIST_INSERT_HEAD(&ext_list, ex, ex_link);
124 }
125 #endif	/* DIAGNOSTIC || DDB */
126 
127 struct pool ex_region_pl;
128 
129 static void
130 extent_pool_init(void)
131 {
132 	static int inited;
133 
134 	if (!inited) {
135 		pool_init(&ex_region_pl, sizeof(struct extent_region), 0, 0, 0,
136 		    "extentpl", NULL);
137 		pool_setipl(&ex_region_pl, IPL_VM);
138 		inited = 1;
139 	}
140 }
141 
142 #ifdef DDB
143 /*
144  * Print out all extents registered.  This is used in
145  * DDB show extents
146  */
147 void
148 extent_print_all(void)
149 {
150 	struct extent *ep;
151 
152 	LIST_FOREACH(ep, &ext_list, ex_link) {
153 		extent_print1(ep, db_printf);
154 	}
155 }
156 #endif
157 
158 /*
159  * Allocate and initialize an extent map.
160  */
161 struct extent *
162 extent_create(char *name, u_long start, u_long end, int mtype, caddr_t storage,
163     size_t storagesize, int flags)
164 {
165 	struct extent *ex;
166 	caddr_t cp = storage;
167 	size_t sz = storagesize;
168 	struct extent_region *rp;
169 	int fixed_extent = (storage != NULL);
170 
171 #ifdef DIAGNOSTIC
172 	/* Check arguments. */
173 	if (name == NULL)
174 		panic("%s: name == NULL", __func__);
175 	if (end < start) {
176 		printf("%s: extent `%s', start 0x%lx, end 0x%lx\n",
177 		    __func__, name, start, end);
178 		panic("%s: end < start", __func__);
179 	}
180 	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
181 		panic("%s: fixed extent, bad storagesize 0x%lx",
182 		    __func__, (u_long)storagesize);
183 	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
184 		panic("%s: storage provided for non-fixed", __func__);
185 #endif
186 
187 	extent_pool_init();
188 
189 	/* Allocate extent descriptor. */
190 	if (fixed_extent) {
191 		struct extent_fixed *fex;
192 
193 		memset(storage, 0, storagesize);
194 
195 		/*
196 		 * Align all descriptors on "long" boundaries.
197 		 */
198 		fex = (struct extent_fixed *)cp;
199 		ex = (struct extent *)fex;
200 		cp += ALIGN(sizeof(struct extent_fixed));
201 		sz -= ALIGN(sizeof(struct extent_fixed));
202 		fex->fex_storage = storage;
203 		fex->fex_storagesize = storagesize;
204 
205 		/*
206 		 * In a fixed extent, we have to pre-allocate region
207 		 * descriptors and place them in the extent's freelist.
208 		 */
209 		LIST_INIT(&fex->fex_freelist);
210 		while (sz >= ALIGN(sizeof(struct extent_region))) {
211 			rp = (struct extent_region *)cp;
212 			cp += ALIGN(sizeof(struct extent_region));
213 			sz -= ALIGN(sizeof(struct extent_region));
214 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
215 		}
216 	} else {
217 		ex = (struct extent *)malloc(sizeof(struct extent),
218 		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
219 		if (ex == NULL)
220 			return (NULL);
221 	}
222 
223 	/* Fill in the extent descriptor and return it to the caller. */
224 	LIST_INIT(&ex->ex_regions);
225 	ex->ex_name = name;
226 	ex->ex_start = start;
227 	ex->ex_end = end;
228 	ex->ex_mtype = mtype;
229 	ex->ex_flags = 0;
230 	if (fixed_extent)
231 		ex->ex_flags |= EXF_FIXED;
232 	if (flags & EX_NOCOALESCE)
233 		ex->ex_flags |= EXF_NOCOALESCE;
234 
235 	if (flags & EX_FILLED) {
236 		rp = extent_alloc_region_descriptor(ex, flags);
237 		if (rp == NULL) {
238 			if (!fixed_extent)
239 				free(ex, mtype);
240 			return (NULL);
241 		}
242 		rp->er_start = start;
243 		rp->er_end = end;
244 		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
245 	}
246 
247 #if defined(DIAGNOSTIC) || defined(DDB)
248 	extent_register(ex);
249 #endif
250 	return (ex);
251 }
252 
253 /*
254  * Destroy an extent map.
255  */
256 void
257 extent_destroy(struct extent *ex)
258 {
259 	struct extent_region *rp, *orp;
260 
261 #ifdef DIAGNOSTIC
262 	/* Check arguments. */
263 	if (ex == NULL)
264 		panic("%s: NULL extent", __func__);
265 #endif
266 
267 	/* Free all region descriptors in extent. */
268 	for (rp = LIST_FIRST(&ex->ex_regions);
269 	    rp != LIST_END(&ex->ex_regions); ) {
270 		orp = rp;
271 		rp = LIST_NEXT(rp, er_link);
272 		LIST_REMOVE(orp, er_link);
273 		extent_free_region_descriptor(ex, orp);
274 	}
275 
276 #if defined(DIAGNOSTIC) || defined(DDB)
277 	/* Remove from the list of all extents. */
278 	LIST_REMOVE(ex, ex_link);
279 #endif
280 
281 	/* If we're not a fixed extent, free the extent descriptor itself. */
282 	if ((ex->ex_flags & EXF_FIXED) == 0)
283 		free(ex, ex->ex_mtype);
284 }
285 
286 /*
287  * Insert a region descriptor into the sorted region list after the
288  * entry "after" or at the head of the list (if "after" is NULL).
289  * The region descriptor we insert is passed in "rp".  We must
290  * allocate the region descriptor before calling this function!
291  * If we don't need the region descriptor, it will be freed here.
292  */
293 static void
294 extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
295     struct extent_region *after, struct extent_region *rp)
296 {
297 	struct extent_region *nextr;
298 	int appended = 0;
299 
300 	if (after == NULL) {
301 		/*
302 		 * We're the first in the region list.  If there's
303 		 * a region after us, attempt to coalesce to save
304 		 * descriptor overhead.
305 		 */
306 		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
307 		    !LIST_EMPTY(&ex->ex_regions) &&
308 		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
309 			/*
310 			 * We can coalesce.  Prepend us to the first region.
311 			 */
312 			LIST_FIRST(&ex->ex_regions)->er_start = start;
313 			extent_free_region_descriptor(ex, rp);
314 			return;
315 		}
316 
317 		/*
318 		 * Can't coalesce.  Fill in the region descriptor
319 		 * in, and insert us at the head of the region list.
320 		 */
321 		rp->er_start = start;
322 		rp->er_end = start + (size - 1);
323 		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
324 		return;
325 	}
326 
327 	/*
328 	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
329 	 */
330 	if (ex->ex_flags & EXF_NOCOALESCE)
331 		goto cant_coalesce;
332 
333 	/*
334 	 * Attempt to coalesce with the region before us.
335 	 */
336 	if ((after->er_end + 1) == start) {
337 		/*
338 		 * We can coalesce.  Append ourselves and make
339 		 * note of it.
340 		 */
341 		after->er_end = start + (size - 1);
342 		appended = 1;
343 	}
344 
345 	/*
346 	 * Attempt to coalesce with the region after us.
347 	 */
348 	if (LIST_NEXT(after, er_link) != NULL &&
349 	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
350 		/*
351 		 * We can coalesce.  Note that if we appended ourselves
352 		 * to the previous region, we exactly fit the gap, and
353 		 * can free the "next" region descriptor.
354 		 */
355 		if (appended) {
356 			/*
357 			 * Yup, we can free it up.
358 			 */
359 			after->er_end = LIST_NEXT(after, er_link)->er_end;
360 			nextr = LIST_NEXT(after, er_link);
361 			LIST_REMOVE(nextr, er_link);
362 			extent_free_region_descriptor(ex, nextr);
363 		} else {
364 			/*
365 			 * Nope, just prepend us to the next region.
366 			 */
367 			LIST_NEXT(after, er_link)->er_start = start;
368 		}
369 
370 		extent_free_region_descriptor(ex, rp);
371 		return;
372 	}
373 
374 	/*
375 	 * We weren't able to coalesce with the next region, but
376 	 * we don't need to allocate a region descriptor if we
377 	 * appended ourselves to the previous region.
378 	 */
379 	if (appended) {
380 		extent_free_region_descriptor(ex, rp);
381 		return;
382 	}
383 
384  cant_coalesce:
385 
386 	/*
387 	 * Fill in the region descriptor and insert ourselves
388 	 * into the region list.
389 	 */
390 	rp->er_start = start;
391 	rp->er_end = start + (size - 1);
392 	LIST_INSERT_AFTER(after, rp, er_link);
393 }
394 
395 /*
396  * Allocate a specific region in an extent map.
397  */
398 int
399 extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
400 {
401 	struct extent_region *rp, *last, *myrp;
402 	u_long end = start + (size - 1);
403 	int error;
404 
405 #ifdef DIAGNOSTIC
406 	/* Check arguments. */
407 	if (ex == NULL)
408 		panic("%s: NULL extent", __func__);
409 	if (size < 1) {
410 		printf("%s: extent `%s', size 0x%lx\n",
411 		    __func__, ex->ex_name, size);
412 		panic("%s: bad size", __func__);
413 	}
414 	if (end < start) {
415 		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
416 		    __func__, ex->ex_name, start, size);
417 		panic("%s: overflow", __func__);
418 	}
419 	if ((flags & EX_CONFLICTOK) && (flags & EX_WAITSPACE))
420 		panic("%s: EX_CONFLICTOK and EX_WAITSPACE "
421 		    "are mutually exclusive", __func__);
422 #endif
423 
424 	/*
425 	 * Make sure the requested region lies within the
426 	 * extent.
427 	 */
428 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
429 #ifdef DIAGNOSTIC
430 		printf("%s: extent `%s' (0x%lx - 0x%lx)\n",
431 		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
432 		printf("%s: start 0x%lx, end 0x%lx\n",
433 		    __func__, start, end);
434 		panic("%s: region lies outside extent", __func__);
435 #else
436 		return (EINVAL);
437 #endif
438 	}
439 
440 	/*
441 	 * Allocate the region descriptor.  It will be freed later
442 	 * if we can coalesce with another region.
443 	 */
444 	myrp = extent_alloc_region_descriptor(ex, flags);
445 	if (myrp == NULL) {
446 #ifdef DIAGNOSTIC
447 		printf(
448 		    "%s: can't allocate region descriptor\n", __func__);
449 #endif
450 		return (ENOMEM);
451 	}
452 
453  alloc_start:
454 	/*
455 	 * Attempt to place ourselves in the desired area of the
456 	 * extent.  We save ourselves some work by keeping the list sorted.
457 	 * In other words, if the start of the current region is greater
458 	 * than the end of our region, we don't have to search any further.
459 	 */
460 
461 	/*
462 	 * Keep a pointer to the last region we looked at so
463 	 * that we don't have to traverse the list again when
464 	 * we insert ourselves.  If "last" is NULL when we
465 	 * finally insert ourselves, we go at the head of the
466 	 * list.  See extent_insert_and_optimize() for details.
467 	 */
468 	last = NULL;
469 
470 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
471 		if (rp->er_start > end) {
472 			/*
473 			 * We lie before this region and don't
474 			 * conflict.
475 			 */
476 			break;
477 		}
478 
479 		/*
480 		 * The current region begins before we end.
481 		 * Check for a conflict.
482 		 */
483 		if (rp->er_end >= start) {
484 			/*
485 			 * We conflict.  If we can (and want to) wait,
486 			 * do so.
487 			 */
488 			if (flags & EX_WAITSPACE) {
489 				ex->ex_flags |= EXF_WANTED;
490 				error = tsleep(ex,
491 				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
492 				    "extnt", 0);
493 				if (error)
494 					return (error);
495 				goto alloc_start;
496 			}
497 
498 			/*
499 			 * If we tolerate conflicts adjust things such
500 			 * that all space in the requested region is
501 			 * allocated.
502 			 */
503 			if (flags & EX_CONFLICTOK) {
504 				/*
505 				 * There are four possibilities:
506 				 *
507 				 * 1. The current region overlaps with
508 				 *    the start of the requested region.
509 				 *    Adjust the requested region to
510 				 *    start at the end of the current
511 				 *    region and try again.
512 				 *
513 				 * 2. The current region falls
514 				 *    completely within the requested
515 				 *    region.  Free the current region
516 				 *    and try again.
517 				 *
518 				 * 3. The current region overlaps with
519 				 *    the end of the requested region.
520 				 *    Adjust the requested region to
521 				 *    end at the start of the current
522 				 *    region and try again.
523 				 *
524 				 * 4. The requested region falls
525 				 *    completely within the current
526 				 *    region.  We're done.
527 				 */
528 				if (rp->er_start <= start) {
529 					if (rp->er_end < ex->ex_end) {
530 						start = rp->er_end + 1;
531 						size = end - start + 1;
532 						goto alloc_start;
533 					}
534 				} else if (rp->er_end < end) {
535 					LIST_REMOVE(rp, er_link);
536 					extent_free_region_descriptor(ex, rp);
537 					goto alloc_start;
538 				} else if (rp->er_start < end) {
539 					if (rp->er_start > ex->ex_start) {
540 						end = rp->er_start - 1;
541 						size = end - start + 1;
542 						goto alloc_start;
543 					}
544 				}
545 				return (0);
546 			}
547 
548 			extent_free_region_descriptor(ex, myrp);
549 			return (EAGAIN);
550 		}
551 		/*
552 		 * We don't conflict, but this region lies before
553 		 * us.  Keep a pointer to this region, and keep
554 		 * trying.
555 		 */
556 		last = rp;
557 	}
558 
559 	/*
560 	 * We don't conflict with any regions.  "last" points
561 	 * to the region we fall after, or is NULL if we belong
562 	 * at the beginning of the region list.  Insert ourselves.
563 	 */
564 	extent_insert_and_optimize(ex, start, size, last, myrp);
565 	return (0);
566 }
567 
568 /*
569  * Macro to check (x + y) <= z.  This check is designed to fail
570  * if an overflow occurs.
571  */
572 #define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
573 
574 /*
575  * Allocate a region in an extent map subregion.
576  *
577  * If EX_FAST is specified, we return the first fit in the map.
578  * Otherwise, we try to minimize fragmentation by finding the
579  * smallest gap that will hold the request.
580  *
581  * The allocated region is aligned to "alignment", which must be
582  * a power of 2.
583  */
584 int
585 extent_do_alloc(struct extent *ex, u_long substart, u_long subend,
586     u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
587     struct extent_region *myrp, u_long *result)
588 {
589 	struct extent_region *rp, *last, *bestlast;
590 	u_long newstart, newend, exend, beststart, bestovh, ovh;
591 	u_long dontcross;
592 	int error;
593 
594 #ifdef DIAGNOSTIC
595 	/* Check arguments. */
596 	if (ex == NULL)
597 		panic("%s: NULL extent", __func__);
598 	if (myrp == NULL)
599 		panic("%s: NULL region descriptor", __func__);
600 	if (result == NULL)
601 		panic("%s: NULL result pointer", __func__);
602 	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
603 	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
604 		printf("%s: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
605 		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
606 		printf("%s: substart 0x%lx, subend 0x%lx\n",
607 		    __func__, substart, subend);
608 		panic("%s: bad subregion", __func__);
609 	}
610 	if ((size < 1) || ((size - 1) > (subend - substart))) {
611 		printf("%s: extent `%s', size 0x%lx\n",
612 		    __func__, ex->ex_name, size);
613 		panic("%s: bad size", __func__);
614 	}
615 	if (alignment == 0)
616 		panic("%s: bad alignment", __func__);
617 	if (boundary && (boundary < size)) {
618 		printf("%s: extent `%s', size 0x%lx, boundary 0x%lx\n",
619 		    __func__, ex->ex_name, size, boundary);
620 		panic("%s: bad boundary", __func__);
621 	}
622 #endif
623 
624  alloc_start:
625 	/*
626 	 * Keep a pointer to the last region we looked at so
627 	 * that we don't have to traverse the list again when
628 	 * we insert ourselves.  If "last" is NULL when we
629 	 * finally insert ourselves, we go at the head of the
630 	 * list.  See extent_insert_and_optimize() for details.
631 	 */
632 	last = NULL;
633 
634 	/*
635 	 * Keep track of size and location of the smallest
636 	 * chunk we fit in.
637 	 *
638 	 * Since the extent can be as large as the numeric range
639 	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
640 	 * best overhead value can be the maximum unsigned integer.
641 	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
642 	 * into the region list immediately on an exact match (which
643 	 * is the only case where "bestovh" would be set to 0).
644 	 */
645 	bestovh = 0;
646 	beststart = 0;
647 	bestlast = NULL;
648 
649 	/*
650 	 * Keep track of end of free region.  This is either the end of extent
651 	 * or the start of a region past the subend.
652 	 */
653 	exend = ex->ex_end;
654 
655 	/*
656 	 * For N allocated regions, we must make (N + 1)
657 	 * checks for unallocated space.  The first chunk we
658 	 * check is the area from the beginning of the subregion
659 	 * to the first allocated region after that point.
660 	 */
661 	newstart = extent_align(substart, alignment, skew);
662 	if (newstart < ex->ex_start) {
663 #ifdef DIAGNOSTIC
664 		printf("%s: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
665 		    __func__, ex->ex_name, ex->ex_start, ex->ex_end,
666 		    alignment);
667 		panic("%s: overflow after alignment", __func__);
668 #else
669 		extent_free_region_descriptor(ex, myrp);
670 		return (EINVAL);
671 #endif
672 	}
673 
674 	/*
675 	 * Find the first allocated region that begins on or after
676 	 * the subregion start, advancing the "last" pointer along
677 	 * the way.
678 	 */
679 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
680 		if (rp->er_start >= newstart)
681 			break;
682 		last = rp;
683 	}
684 
685 	/*
686 	 * Relocate the start of our candidate region to the end of
687 	 * the last allocated region (if there was one overlapping
688 	 * our subrange).
689 	 */
690 	if (last != NULL && last->er_end >= newstart)
691 		newstart = extent_align((last->er_end + 1), alignment, skew);
692 
693 	for (; rp != LIST_END(&ex->ex_regions); rp = LIST_NEXT(rp, er_link)) {
694 		/*
695 		 * If the region pasts the subend, bail out and see
696 		 * if we fit against the subend.
697 		 */
698 		if (rp->er_start > subend) {
699 			exend = rp->er_start;
700 			break;
701 		}
702 
703 		/*
704 		 * Check the chunk before "rp".  Note that our
705 		 * comparison is safe from overflow conditions.
706 		 */
707 		if (LE_OV(newstart, size, rp->er_start)) {
708 			/*
709 			 * Do a boundary check, if necessary.  Note
710 			 * that a region may *begin* on the boundary,
711 			 * but it must end before the boundary.
712 			 */
713 			if (boundary) {
714 				newend = newstart + (size - 1);
715 
716 				/*
717 				 * Calculate the next boundary after the start
718 				 * of this region.
719 				 */
720 				dontcross = extent_align(newstart+1, boundary,
721 				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
722 				    - 1;
723 
724 #if 0
725 				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
726 				    newstart, newend, ex->ex_start, ex->ex_end,
727 				    boundary, dontcross);
728 #endif
729 
730 				/* Check for overflow */
731 				if (dontcross < ex->ex_start)
732 					dontcross = ex->ex_end;
733 				else if (newend > dontcross) {
734 					/*
735 					 * Candidate region crosses boundary.
736 					 * Throw away the leading part and see
737 					 * if we still fit.
738 					 */
739 					newstart = dontcross + 1;
740 					newend = newstart + (size - 1);
741 					dontcross += boundary;
742 					if (!LE_OV(newstart, size, rp->er_start))
743 						goto skip;
744 				}
745 
746 				/*
747 				 * If we run past the end of
748 				 * the extent or the boundary
749 				 * overflows, then the request
750 				 * can't fit.
751 				 */
752 				if (newstart + size - 1 > ex->ex_end ||
753 				    dontcross < newstart)
754 					goto fail;
755 			}
756 
757 			/*
758 			 * We would fit into this space.  Calculate
759 			 * the overhead (wasted space).  If we exactly
760 			 * fit, or we're taking the first fit, insert
761 			 * ourselves into the region list.
762 			 */
763 			ovh = rp->er_start - newstart - size;
764 			if ((flags & EX_FAST) || (ovh == 0))
765 				goto found;
766 
767 			/*
768 			 * Don't exactly fit, but check to see
769 			 * if we're better than any current choice.
770 			 */
771 			if ((bestovh == 0) || (ovh < bestovh)) {
772 				bestovh = ovh;
773 				beststart = newstart;
774 				bestlast = last;
775 			}
776 		}
777 
778 skip:
779 		/*
780 		 * Skip past the current region and check again.
781 		 */
782 		newstart = extent_align((rp->er_end + 1), alignment, skew);
783 		if (newstart < rp->er_end) {
784 			/*
785 			 * Overflow condition.  Don't error out, since
786 			 * we might have a chunk of space that we can
787 			 * use.
788 			 */
789 			goto fail;
790 		}
791 
792 		last = rp;
793 	}
794 
795 	/*
796 	 * The final check is from the current starting point to the
797 	 * end of the subregion.  If there were no allocated regions,
798 	 * "newstart" is set to the beginning of the subregion, or
799 	 * just past the end of the last allocated region, adjusted
800 	 * for alignment in either case.
801 	 */
802 	if (LE_OV(newstart, (size - 1), subend)) {
803 		/*
804 		 * Do a boundary check, if necessary.  Note
805 		 * that a region may *begin* on the boundary,
806 		 * but it must end before the boundary.
807 		 */
808 		if (boundary) {
809 			newend = newstart + (size - 1);
810 
811 			/*
812 			 * Calculate the next boundary after the start
813 			 * of this region.
814 			 */
815 			dontcross = extent_align(newstart+1, boundary,
816 			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
817 			    - 1;
818 
819 #if 0
820 			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
821 			    newstart, newend, ex->ex_start, ex->ex_end,
822 			    boundary, dontcross);
823 #endif
824 
825 			/* Check for overflow */
826 			if (dontcross < ex->ex_start)
827 				dontcross = ex->ex_end;
828 			else if (newend > dontcross) {
829 				/*
830 				 * Candidate region crosses boundary.
831 				 * Throw away the leading part and see
832 				 * if we still fit.
833 				 */
834 				newstart = dontcross + 1;
835 				newend = newstart + (size - 1);
836 				dontcross += boundary;
837 				if (!LE_OV(newstart, (size - 1), subend))
838 					goto fail;
839 			}
840 
841 			/*
842 			 * If we run past the end of
843 			 * the extent or the boundary
844 			 * overflows, then the request
845 			 * can't fit.
846 			 */
847 			if (newstart + size - 1 > ex->ex_end ||
848 			    dontcross < newstart)
849 				goto fail;
850 		}
851 
852 		/*
853 		 * We would fit into this space.  Calculate
854 		 * the overhead (wasted space).  If we exactly
855 		 * fit, or we're taking the first fit, insert
856 		 * ourselves into the region list.
857 		 */
858 		ovh = exend - newstart - (size - 1);
859 		if ((flags & EX_FAST) || (ovh == 0))
860 			goto found;
861 
862 		/*
863 		 * Don't exactly fit, but check to see
864 		 * if we're better than any current choice.
865 		 */
866 		if ((bestovh == 0) || (ovh < bestovh)) {
867 			bestovh = ovh;
868 			beststart = newstart;
869 			bestlast = last;
870 		}
871 	}
872 
873  fail:
874 	/*
875 	 * One of the following two conditions have
876 	 * occurred:
877 	 *
878 	 *	There is no chunk large enough to hold the request.
879 	 *
880 	 *	If EX_FAST was not specified, there is not an
881 	 *	exact match for the request.
882 	 *
883 	 * Note that if we reach this point and EX_FAST is
884 	 * set, then we know there is no space in the extent for
885 	 * the request.
886 	 */
887 	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
888 		/*
889 		 * We have a match that's "good enough".
890 		 */
891 		newstart = beststart;
892 		last = bestlast;
893 		goto found;
894 	}
895 
896 	/*
897 	 * No space currently available.  Wait for it to free up,
898 	 * if possible.
899 	 */
900 	if (flags & EX_WAITSPACE) {
901 		ex->ex_flags |= EXF_WANTED;
902 		error = tsleep(ex,
903 		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), "extnt", 0);
904 		if (error)
905 			return (error);
906 		goto alloc_start;
907 	}
908 
909 	extent_free_region_descriptor(ex, myrp);
910 	return (EAGAIN);
911 
912  found:
913 	/*
914 	 * Insert ourselves into the region list.
915 	 */
916 	extent_insert_and_optimize(ex, newstart, size, last, myrp);
917 	*result = newstart;
918 	return (0);
919 }
920 
921 int
922 extent_alloc_subregion(struct extent *ex, u_long substart, u_long subend,
923     u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
924     u_long *result)
925 {
926 	struct extent_region *rp;
927 
928 	/*
929 	 * Allocate the region descriptor.  It will be freed later
930 	 * if we can coalesce with another region.
931 	 */
932 	rp = extent_alloc_region_descriptor(ex, flags);
933 	if (rp == NULL) {
934 #ifdef DIAGNOSTIC
935 		printf("%s: can't allocate region descriptor\n", __func__);
936 #endif
937 		return (ENOMEM);
938 	}
939 
940 	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
941 	    boundary, flags, rp, result);
942 }
943 
944 int
945 extent_alloc_subregion_with_descr(struct extent *ex, u_long substart,
946     u_long subend, u_long size, u_long alignment, u_long skew,
947     u_long boundary, int flags, struct extent_region *rp, u_long *result)
948 {
949 #ifdef DIAGNOSTIC
950 	if ((ex->ex_flags & EXF_NOCOALESCE) == 0)
951 		panic("%s: EX_NOCOALESCE not set", __func__);
952 #endif
953 
954 	rp->er_flags = ER_DISCARD;
955 	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
956 	    boundary, flags, rp, result);
957 }
958 
959 int
960 extent_free(struct extent *ex, u_long start, u_long size, int flags)
961 {
962 	struct extent_region *rp, *nrp = NULL;
963 	u_long end = start + (size - 1);
964 	int exflags;
965 
966 #ifdef DIAGNOSTIC
967 	/* Check arguments. */
968 	if (ex == NULL)
969 		panic("%s: NULL extent", __func__);
970 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
971 		extent_print(ex);
972 		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
973 		    __func__, ex->ex_name, start, size);
974 		panic("%s: extent `%s', region not within extent",
975 		    __func__, ex->ex_name);
976 	}
977 	/* Check for an overflow. */
978 	if (end < start) {
979 		extent_print(ex);
980 		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
981 		    __func__, ex->ex_name, start, size);
982 		panic("%s: overflow", __func__);
983 	}
984 #endif
985 
986 	/*
987 	 * If we're allowing coalescing, we must allocate a region
988 	 * descriptor now, since it might block.
989 	 *
990 	 * XXX Make a static, create-time flags word, so we don't
991 	 * XXX have to lock to read it!
992 	 */
993 	exflags = ex->ex_flags;
994 
995 	if ((exflags & EXF_NOCOALESCE) == 0) {
996 		/* Allocate a region descriptor. */
997 		nrp = extent_alloc_region_descriptor(ex, flags);
998 		if (nrp == NULL)
999 			return (ENOMEM);
1000 	}
1001 
1002 	/*
1003 	 * Find region and deallocate.  Several possibilities:
1004 	 *
1005 	 *	1. (start == er_start) && (end == er_end):
1006 	 *	   Free descriptor.
1007 	 *
1008 	 *	2. (start == er_start) && (end < er_end):
1009 	 *	   Adjust er_start.
1010 	 *
1011 	 *	3. (start > er_start) && (end == er_end):
1012 	 *	   Adjust er_end.
1013 	 *
1014 	 *	4. (start > er_start) && (end < er_end):
1015 	 *	   Fragment region.  Requires descriptor alloc.
1016 	 *
1017 	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
1018 	 * is not set.
1019 	 */
1020 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
1021 		/*
1022 		 * Save ourselves some comparisons; does the current
1023 		 * region end before chunk to be freed begins?  If so,
1024 		 * then we haven't found the appropriate region descriptor.
1025 		 */
1026 		if (rp->er_end < start)
1027 			continue;
1028 
1029 		/*
1030 		 * Save ourselves some traversal; does the current
1031 		 * region begin after the chunk to be freed ends?  If so,
1032 		 * then we've already passed any possible region descriptors
1033 		 * that might have contained the chunk to be freed.
1034 		 */
1035 		if (rp->er_start > end)
1036 			break;
1037 
1038 		/* Case 1. */
1039 		if ((start == rp->er_start) && (end == rp->er_end)) {
1040 			LIST_REMOVE(rp, er_link);
1041 			extent_free_region_descriptor(ex, rp);
1042 			goto done;
1043 		}
1044 
1045 		/*
1046 		 * The following cases all require that EXF_NOCOALESCE
1047 		 * is not set.
1048 		 */
1049 		if (ex->ex_flags & EXF_NOCOALESCE)
1050 			continue;
1051 
1052 		/* Case 2. */
1053 		if ((start == rp->er_start) && (end < rp->er_end)) {
1054 			rp->er_start = (end + 1);
1055 			goto done;
1056 		}
1057 
1058 		/* Case 3. */
1059 		if ((start > rp->er_start) && (end == rp->er_end)) {
1060 			rp->er_end = (start - 1);
1061 			goto done;
1062 		}
1063 
1064 		/* Case 4. */
1065 		if ((start > rp->er_start) && (end < rp->er_end)) {
1066 			/* Fill in new descriptor. */
1067 			nrp->er_start = end + 1;
1068 			nrp->er_end = rp->er_end;
1069 
1070 			/* Adjust current descriptor. */
1071 			rp->er_end = start - 1;
1072 
1073 			/* Insert new descriptor after current. */
1074 			LIST_INSERT_AFTER(rp, nrp, er_link);
1075 
1076 			/* We used the new descriptor, so don't free it below */
1077 			nrp = NULL;
1078 			goto done;
1079 		}
1080 	}
1081 
1082 	/* Region not found, or request otherwise invalid. */
1083 #if defined(DIAGNOSTIC) || defined(DDB)
1084 	extent_print(ex);
1085 #endif
1086 	printf("%s: start 0x%lx, end 0x%lx\n", __func__, start, end);
1087 	panic("%s: region not found", __func__);
1088 
1089  done:
1090 	if (nrp != NULL)
1091 		extent_free_region_descriptor(ex, nrp);
1092 	if (ex->ex_flags & EXF_WANTED) {
1093 		ex->ex_flags &= ~EXF_WANTED;
1094 		wakeup(ex);
1095 	}
1096 	return (0);
1097 }
1098 
1099 static struct extent_region *
1100 extent_alloc_region_descriptor(struct extent *ex, int flags)
1101 {
1102 	struct extent_region *rp;
1103 
1104 	if (ex->ex_flags & EXF_FIXED) {
1105 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1106 
1107 		while (LIST_EMPTY(&fex->fex_freelist)) {
1108 			if (flags & EX_MALLOCOK)
1109 				goto alloc;
1110 
1111 			if ((flags & EX_WAITOK) == 0)
1112 				return (NULL);
1113 			ex->ex_flags |= EXF_FLWANTED;
1114 			if (tsleep(&fex->fex_freelist,
1115 			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1116 			    "extnt", 0))
1117 				return (NULL);
1118 		}
1119 		rp = LIST_FIRST(&fex->fex_freelist);
1120 		LIST_REMOVE(rp, er_link);
1121 
1122 		/*
1123 		 * Don't muck with flags after pulling it off the
1124 		 * freelist; it may be a dynamically allocated
1125 		 * region pointer that was kindly given to us,
1126 		 * and we need to preserve that information.
1127 		 */
1128 
1129 		return (rp);
1130 	}
1131 
1132  alloc:
1133 	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK :
1134 	    PR_NOWAIT);
1135 	if (rp != NULL)
1136 		rp->er_flags = ER_ALLOC;
1137 
1138 	return (rp);
1139 }
1140 
1141 static void
1142 extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
1143 {
1144 	if (rp->er_flags & ER_DISCARD)
1145 		return;
1146 
1147 	if (ex->ex_flags & EXF_FIXED) {
1148 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1149 
1150 		/*
1151 		 * If someone's waiting for a region descriptor,
1152 		 * be nice and give them this one, rather than
1153 		 * just free'ing it back to the system.
1154 		 */
1155 		if (rp->er_flags & ER_ALLOC) {
1156 			if (ex->ex_flags & EXF_FLWANTED) {
1157 				/* Clear all but ER_ALLOC flag. */
1158 				rp->er_flags = ER_ALLOC;
1159 				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1160 				    er_link);
1161 				goto wake_em_up;
1162 			} else {
1163 				pool_put(&ex_region_pl, rp);
1164 			}
1165 		} else {
1166 			/* Clear all flags. */
1167 			rp->er_flags = 0;
1168 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1169 		}
1170 
1171 		if (ex->ex_flags & EXF_FLWANTED) {
1172  wake_em_up:
1173 			ex->ex_flags &= ~EXF_FLWANTED;
1174 			wakeup(&fex->fex_freelist);
1175 		}
1176 		return;
1177 	}
1178 
1179 	/*
1180 	 * We know it's dynamically allocated if we get here.
1181 	 */
1182 	pool_put(&ex_region_pl, rp);
1183 }
1184 
1185 
1186 #if defined(DIAGNOSTIC) || defined(DDB) || !defined(_KERNEL)
1187 
1188 void
1189 extent_print(struct extent *ex)
1190 {
1191 	extent_print1(ex, printf);
1192 }
1193 
1194 void
1195 extent_print1(struct extent *ex,
1196     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1197 {
1198 	struct extent_region *rp;
1199 
1200 	if (ex == NULL)
1201 		panic("%s: NULL extent", __func__);
1202 
1203 #ifdef _KERNEL
1204 	(*pr)("extent `%s' (0x%lx - 0x%lx), flags=%b\n", ex->ex_name,
1205 	    ex->ex_start, ex->ex_end, ex->ex_flags, EXF_BITS);
1206 #else
1207 	(*pr)("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1208 	    ex->ex_start, ex->ex_end, ex->ex_flags);
1209 #endif
1210 
1211 	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1212 		(*pr)("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1213 }
1214 #endif
1215