xref: /openbsd-src/sys/kern/subr_extent.c (revision a98d2d9e7dd554a5d9eb9cd2616a4d629c2d7baf)
1 /*	$OpenBSD: subr_extent.c,v 1.52 2014/08/17 09:48:55 dlg Exp $	*/
2 /*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3 
4 /*-
5  * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe and Matthias Drochner.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * General purpose extent manager.
35  */
36 
37 #ifdef _KERNEL
38 #include <sys/param.h>
39 #include <sys/extent.h>
40 #include <sys/malloc.h>
41 #include <sys/time.h>
42 #include <sys/systm.h>
43 #include <sys/proc.h>
44 #include <sys/queue.h>
45 #include <sys/pool.h>
46 #include <ddb/db_output.h>
47 #else
48 /*
49  * user-land definitions, so it can fit into a testing harness.
50  */
51 #include <sys/param.h>
52 #include <sys/extent.h>
53 #include <sys/queue.h>
54 #include <errno.h>
55 #include <err.h>
56 #include <stdlib.h>
57 #include <stdio.h>
58 #include <string.h>
59 
60 #define	malloc(s, t, flags)		malloc(s)
61 #define	free(p, t, s)			free(p)
62 
63 #define	tsleep(chan, pri, str, timo)	(EWOULDBLOCK)
64 #define	wakeup(chan)			((void)0)
65 
66 struct pool {
67 	size_t pr_size;
68 };
69 
70 #define	pool_init(a, b, c, d, e, f, g)	do { (a)->pr_size = (b); } while (0)
71 #define	pool_setipl(pp, ipl)		/* nothing */
72 #define pool_get(pp, flags)		malloc((pp)->pr_size, 0, 0)
73 #define	pool_put(pp, rp)		free((rp), 0, 0)
74 
75 #define	panic(...)		do { warnx(__VA_ARGS__); abort(); } while (0)
76 #endif
77 
78 #if defined(DIAGNOSTIC) || defined(DDB)
79 void	extent_print1(struct extent *, int (*)(const char *, ...)
80 	    __attribute__((__format__(__kprintf__,1,2))));
81 #endif
82 
83 static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
84 	    struct extent_region *, struct extent_region *);
85 static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
86 static	void extent_free_region_descriptor(struct extent *,
87 	    struct extent_region *);
88 int	extent_do_alloc(struct extent *, u_long, u_long, u_long, u_long,
89 	    u_long, u_long, int, struct extent_region *, u_long *);
90 
91 /*
92  * Shortcut to align to an arbitrary power-of-two boundary.
93  */
94 static __inline__ u_long
95 extent_align(u_long start, u_long align, u_long skew)
96 {
97 	return ((((start - skew) + (align - 1)) & (-align)) + skew);
98 }
99 
100 
101 #if defined(DIAGNOSTIC) || defined(DDB)
102 /*
103  * Register the extent on a doubly linked list.
104  * Should work, no?
105  */
106 static LIST_HEAD(listhead, extent) ext_list;
107 static	void extent_register(struct extent *);
108 
109 static void
110 extent_register(struct extent *ex)
111 {
112 #ifdef DIAGNOSTIC
113 	struct extent *ep;
114 #endif
115 	static int initialized;
116 
117 	if (!initialized){
118 		LIST_INIT(&ext_list);
119 		initialized = 1;
120 	}
121 
122 #ifdef DIAGNOSTIC
123 	LIST_FOREACH(ep, &ext_list, ex_link) {
124 		if (ep == ex)
125 			panic("%s: already registered", __func__);
126 	}
127 #endif
128 
129 	/* Insert into list */
130 	LIST_INSERT_HEAD(&ext_list, ex, ex_link);
131 }
132 #endif	/* DIAGNOSTIC || DDB */
133 
134 struct pool ex_region_pl;
135 
136 static void
137 extent_pool_init(void)
138 {
139 	static int inited;
140 
141 	if (!inited) {
142 		pool_init(&ex_region_pl, sizeof(struct extent_region), 0, 0, 0,
143 		    "extentpl", NULL);
144 		pool_setipl(&ex_region_pl, IPL_VM);
145 		inited = 1;
146 	}
147 }
148 
149 #ifdef DDB
150 /*
151  * Print out all extents registered.  This is used in
152  * DDB show extents
153  */
154 void
155 extent_print_all(void)
156 {
157 	struct extent *ep;
158 
159 	LIST_FOREACH(ep, &ext_list, ex_link) {
160 		extent_print1(ep, db_printf);
161 	}
162 }
163 #endif
164 
165 /*
166  * Allocate and initialize an extent map.
167  */
168 struct extent *
169 extent_create(char *name, u_long start, u_long end, int mtype, caddr_t storage,
170     size_t storagesize, int flags)
171 {
172 	struct extent *ex;
173 	caddr_t cp = storage;
174 	size_t sz = storagesize;
175 	struct extent_region *rp;
176 	int fixed_extent = (storage != NULL);
177 
178 #ifdef DIAGNOSTIC
179 	/* Check arguments. */
180 	if (name == NULL)
181 		panic("%s: name == NULL", __func__);
182 	if (end < start) {
183 		printf("%s: extent `%s', start 0x%lx, end 0x%lx\n",
184 		    __func__, name, start, end);
185 		panic("%s: end < start", __func__);
186 	}
187 	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
188 		panic("%s: fixed extent, bad storagesize 0x%lx",
189 		    __func__, (u_long)storagesize);
190 	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
191 		panic("%s: storage provided for non-fixed", __func__);
192 #endif
193 
194 	extent_pool_init();
195 
196 	/* Allocate extent descriptor. */
197 	if (fixed_extent) {
198 		struct extent_fixed *fex;
199 
200 		memset(storage, 0, storagesize);
201 
202 		/*
203 		 * Align all descriptors on "long" boundaries.
204 		 */
205 		fex = (struct extent_fixed *)cp;
206 		ex = (struct extent *)fex;
207 		cp += ALIGN(sizeof(struct extent_fixed));
208 		sz -= ALIGN(sizeof(struct extent_fixed));
209 		fex->fex_storage = storage;
210 		fex->fex_storagesize = storagesize;
211 
212 		/*
213 		 * In a fixed extent, we have to pre-allocate region
214 		 * descriptors and place them in the extent's freelist.
215 		 */
216 		LIST_INIT(&fex->fex_freelist);
217 		while (sz >= ALIGN(sizeof(struct extent_region))) {
218 			rp = (struct extent_region *)cp;
219 			cp += ALIGN(sizeof(struct extent_region));
220 			sz -= ALIGN(sizeof(struct extent_region));
221 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
222 		}
223 	} else {
224 		ex = (struct extent *)malloc(sizeof(struct extent),
225 		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
226 		if (ex == NULL)
227 			return (NULL);
228 	}
229 
230 	/* Fill in the extent descriptor and return it to the caller. */
231 	LIST_INIT(&ex->ex_regions);
232 	ex->ex_name = name;
233 	ex->ex_start = start;
234 	ex->ex_end = end;
235 	ex->ex_mtype = mtype;
236 	ex->ex_flags = 0;
237 	if (fixed_extent)
238 		ex->ex_flags |= EXF_FIXED;
239 	if (flags & EX_NOCOALESCE)
240 		ex->ex_flags |= EXF_NOCOALESCE;
241 
242 	if (flags & EX_FILLED) {
243 		rp = extent_alloc_region_descriptor(ex, flags);
244 		if (rp == NULL) {
245 			if (!fixed_extent)
246 				free(ex, mtype, 0);
247 			return (NULL);
248 		}
249 		rp->er_start = start;
250 		rp->er_end = end;
251 		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
252 	}
253 
254 #if defined(DIAGNOSTIC) || defined(DDB)
255 	extent_register(ex);
256 #endif
257 	return (ex);
258 }
259 
260 /*
261  * Destroy an extent map.
262  */
263 void
264 extent_destroy(struct extent *ex)
265 {
266 	struct extent_region *rp, *orp;
267 
268 #ifdef DIAGNOSTIC
269 	/* Check arguments. */
270 	if (ex == NULL)
271 		panic("%s: NULL extent", __func__);
272 #endif
273 
274 	/* Free all region descriptors in extent. */
275 	for (rp = LIST_FIRST(&ex->ex_regions);
276 	    rp != LIST_END(&ex->ex_regions); ) {
277 		orp = rp;
278 		rp = LIST_NEXT(rp, er_link);
279 		LIST_REMOVE(orp, er_link);
280 		extent_free_region_descriptor(ex, orp);
281 	}
282 
283 #if defined(DIAGNOSTIC) || defined(DDB)
284 	/* Remove from the list of all extents. */
285 	LIST_REMOVE(ex, ex_link);
286 #endif
287 
288 	/* If we're not a fixed extent, free the extent descriptor itself. */
289 	if ((ex->ex_flags & EXF_FIXED) == 0)
290 		free(ex, ex->ex_mtype, 0);
291 }
292 
293 /*
294  * Insert a region descriptor into the sorted region list after the
295  * entry "after" or at the head of the list (if "after" is NULL).
296  * The region descriptor we insert is passed in "rp".  We must
297  * allocate the region descriptor before calling this function!
298  * If we don't need the region descriptor, it will be freed here.
299  */
300 static void
301 extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
302     struct extent_region *after, struct extent_region *rp)
303 {
304 	struct extent_region *nextr;
305 	int appended = 0;
306 
307 	if (after == NULL) {
308 		/*
309 		 * We're the first in the region list.  If there's
310 		 * a region after us, attempt to coalesce to save
311 		 * descriptor overhead.
312 		 */
313 		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
314 		    !LIST_EMPTY(&ex->ex_regions) &&
315 		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
316 			/*
317 			 * We can coalesce.  Prepend us to the first region.
318 			 */
319 			LIST_FIRST(&ex->ex_regions)->er_start = start;
320 			extent_free_region_descriptor(ex, rp);
321 			return;
322 		}
323 
324 		/*
325 		 * Can't coalesce.  Fill in the region descriptor
326 		 * in, and insert us at the head of the region list.
327 		 */
328 		rp->er_start = start;
329 		rp->er_end = start + (size - 1);
330 		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
331 		return;
332 	}
333 
334 	/*
335 	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
336 	 */
337 	if (ex->ex_flags & EXF_NOCOALESCE)
338 		goto cant_coalesce;
339 
340 	/*
341 	 * Attempt to coalesce with the region before us.
342 	 */
343 	if ((after->er_end + 1) == start) {
344 		/*
345 		 * We can coalesce.  Append ourselves and make
346 		 * note of it.
347 		 */
348 		after->er_end = start + (size - 1);
349 		appended = 1;
350 	}
351 
352 	/*
353 	 * Attempt to coalesce with the region after us.
354 	 */
355 	if (LIST_NEXT(after, er_link) != NULL &&
356 	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
357 		/*
358 		 * We can coalesce.  Note that if we appended ourselves
359 		 * to the previous region, we exactly fit the gap, and
360 		 * can free the "next" region descriptor.
361 		 */
362 		if (appended) {
363 			/*
364 			 * Yup, we can free it up.
365 			 */
366 			after->er_end = LIST_NEXT(after, er_link)->er_end;
367 			nextr = LIST_NEXT(after, er_link);
368 			LIST_REMOVE(nextr, er_link);
369 			extent_free_region_descriptor(ex, nextr);
370 		} else {
371 			/*
372 			 * Nope, just prepend us to the next region.
373 			 */
374 			LIST_NEXT(after, er_link)->er_start = start;
375 		}
376 
377 		extent_free_region_descriptor(ex, rp);
378 		return;
379 	}
380 
381 	/*
382 	 * We weren't able to coalesce with the next region, but
383 	 * we don't need to allocate a region descriptor if we
384 	 * appended ourselves to the previous region.
385 	 */
386 	if (appended) {
387 		extent_free_region_descriptor(ex, rp);
388 		return;
389 	}
390 
391  cant_coalesce:
392 
393 	/*
394 	 * Fill in the region descriptor and insert ourselves
395 	 * into the region list.
396 	 */
397 	rp->er_start = start;
398 	rp->er_end = start + (size - 1);
399 	LIST_INSERT_AFTER(after, rp, er_link);
400 }
401 
402 /*
403  * Allocate a specific region in an extent map.
404  */
405 int
406 extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
407 {
408 	struct extent_region *rp, *last, *myrp;
409 	u_long end = start + (size - 1);
410 	int error;
411 
412 #ifdef DIAGNOSTIC
413 	/* Check arguments. */
414 	if (ex == NULL)
415 		panic("%s: NULL extent", __func__);
416 	if (size < 1) {
417 		printf("%s: extent `%s', size 0x%lx\n",
418 		    __func__, ex->ex_name, size);
419 		panic("%s: bad size", __func__);
420 	}
421 	if (end < start) {
422 		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
423 		    __func__, ex->ex_name, start, size);
424 		panic("%s: overflow", __func__);
425 	}
426 	if ((flags & EX_CONFLICTOK) && (flags & EX_WAITSPACE))
427 		panic("%s: EX_CONFLICTOK and EX_WAITSPACE "
428 		    "are mutually exclusive", __func__);
429 #endif
430 
431 	/*
432 	 * Make sure the requested region lies within the
433 	 * extent.
434 	 */
435 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
436 #ifdef DIAGNOSTIC
437 		printf("%s: extent `%s' (0x%lx - 0x%lx)\n",
438 		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
439 		printf("%s: start 0x%lx, end 0x%lx\n",
440 		    __func__, start, end);
441 		panic("%s: region lies outside extent", __func__);
442 #else
443 		return (EINVAL);
444 #endif
445 	}
446 
447 	/*
448 	 * Allocate the region descriptor.  It will be freed later
449 	 * if we can coalesce with another region.
450 	 */
451 	myrp = extent_alloc_region_descriptor(ex, flags);
452 	if (myrp == NULL) {
453 #ifdef DIAGNOSTIC
454 		printf(
455 		    "%s: can't allocate region descriptor\n", __func__);
456 #endif
457 		return (ENOMEM);
458 	}
459 
460  alloc_start:
461 	/*
462 	 * Attempt to place ourselves in the desired area of the
463 	 * extent.  We save ourselves some work by keeping the list sorted.
464 	 * In other words, if the start of the current region is greater
465 	 * than the end of our region, we don't have to search any further.
466 	 */
467 
468 	/*
469 	 * Keep a pointer to the last region we looked at so
470 	 * that we don't have to traverse the list again when
471 	 * we insert ourselves.  If "last" is NULL when we
472 	 * finally insert ourselves, we go at the head of the
473 	 * list.  See extent_insert_and_optimize() for details.
474 	 */
475 	last = NULL;
476 
477 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
478 		if (rp->er_start > end) {
479 			/*
480 			 * We lie before this region and don't
481 			 * conflict.
482 			 */
483 			break;
484 		}
485 
486 		/*
487 		 * The current region begins before we end.
488 		 * Check for a conflict.
489 		 */
490 		if (rp->er_end >= start) {
491 			/*
492 			 * We conflict.  If we can (and want to) wait,
493 			 * do so.
494 			 */
495 			if (flags & EX_WAITSPACE) {
496 				ex->ex_flags |= EXF_WANTED;
497 				error = tsleep(ex,
498 				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
499 				    "extnt", 0);
500 				if (error)
501 					return (error);
502 				goto alloc_start;
503 			}
504 
505 			/*
506 			 * If we tolerate conflicts adjust things such
507 			 * that all space in the requested region is
508 			 * allocated.
509 			 */
510 			if (flags & EX_CONFLICTOK) {
511 				/*
512 				 * There are four possibilities:
513 				 *
514 				 * 1. The current region overlaps with
515 				 *    the start of the requested region.
516 				 *    Adjust the requested region to
517 				 *    start at the end of the current
518 				 *    region and try again.
519 				 *
520 				 * 2. The current region falls
521 				 *    completely within the requested
522 				 *    region.  Free the current region
523 				 *    and try again.
524 				 *
525 				 * 3. The current region overlaps with
526 				 *    the end of the requested region.
527 				 *    Adjust the requested region to
528 				 *    end at the start of the current
529 				 *    region and try again.
530 				 *
531 				 * 4. The requested region falls
532 				 *    completely within the current
533 				 *    region.  We're done.
534 				 */
535 				if (rp->er_start <= start) {
536 					if (rp->er_end < ex->ex_end) {
537 						start = rp->er_end + 1;
538 						size = end - start + 1;
539 						goto alloc_start;
540 					}
541 				} else if (rp->er_end < end) {
542 					LIST_REMOVE(rp, er_link);
543 					extent_free_region_descriptor(ex, rp);
544 					goto alloc_start;
545 				} else if (rp->er_start < end) {
546 					if (rp->er_start > ex->ex_start) {
547 						end = rp->er_start - 1;
548 						size = end - start + 1;
549 						goto alloc_start;
550 					}
551 				}
552 				return (0);
553 			}
554 
555 			extent_free_region_descriptor(ex, myrp);
556 			return (EAGAIN);
557 		}
558 		/*
559 		 * We don't conflict, but this region lies before
560 		 * us.  Keep a pointer to this region, and keep
561 		 * trying.
562 		 */
563 		last = rp;
564 	}
565 
566 	/*
567 	 * We don't conflict with any regions.  "last" points
568 	 * to the region we fall after, or is NULL if we belong
569 	 * at the beginning of the region list.  Insert ourselves.
570 	 */
571 	extent_insert_and_optimize(ex, start, size, last, myrp);
572 	return (0);
573 }
574 
575 /*
576  * Macro to check (x + y) <= z.  This check is designed to fail
577  * if an overflow occurs.
578  */
579 #define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
580 
581 /*
582  * Allocate a region in an extent map subregion.
583  *
584  * If EX_FAST is specified, we return the first fit in the map.
585  * Otherwise, we try to minimize fragmentation by finding the
586  * smallest gap that will hold the request.
587  *
588  * The allocated region is aligned to "alignment", which must be
589  * a power of 2.
590  */
591 int
592 extent_do_alloc(struct extent *ex, u_long substart, u_long subend,
593     u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
594     struct extent_region *myrp, u_long *result)
595 {
596 	struct extent_region *rp, *last, *bestlast;
597 	u_long newstart, newend, exend, beststart, bestovh, ovh;
598 	u_long dontcross;
599 	int error;
600 
601 #ifdef DIAGNOSTIC
602 	/* Check arguments. */
603 	if (ex == NULL)
604 		panic("%s: NULL extent", __func__);
605 	if (myrp == NULL)
606 		panic("%s: NULL region descriptor", __func__);
607 	if (result == NULL)
608 		panic("%s: NULL result pointer", __func__);
609 	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
610 	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
611 		printf("%s: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
612 		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
613 		printf("%s: substart 0x%lx, subend 0x%lx\n",
614 		    __func__, substart, subend);
615 		panic("%s: bad subregion", __func__);
616 	}
617 	if ((size < 1) || ((size - 1) > (subend - substart))) {
618 		printf("%s: extent `%s', size 0x%lx\n",
619 		    __func__, ex->ex_name, size);
620 		panic("%s: bad size", __func__);
621 	}
622 	if (alignment == 0)
623 		panic("%s: bad alignment", __func__);
624 	if (boundary && (boundary < size)) {
625 		printf("%s: extent `%s', size 0x%lx, boundary 0x%lx\n",
626 		    __func__, ex->ex_name, size, boundary);
627 		panic("%s: bad boundary", __func__);
628 	}
629 #endif
630 
631  alloc_start:
632 	/*
633 	 * Keep a pointer to the last region we looked at so
634 	 * that we don't have to traverse the list again when
635 	 * we insert ourselves.  If "last" is NULL when we
636 	 * finally insert ourselves, we go at the head of the
637 	 * list.  See extent_insert_and_optimize() for details.
638 	 */
639 	last = NULL;
640 
641 	/*
642 	 * Keep track of size and location of the smallest
643 	 * chunk we fit in.
644 	 *
645 	 * Since the extent can be as large as the numeric range
646 	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
647 	 * best overhead value can be the maximum unsigned integer.
648 	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
649 	 * into the region list immediately on an exact match (which
650 	 * is the only case where "bestovh" would be set to 0).
651 	 */
652 	bestovh = 0;
653 	beststart = 0;
654 	bestlast = NULL;
655 
656 	/*
657 	 * Keep track of end of free region.  This is either the end of extent
658 	 * or the start of a region past the subend.
659 	 */
660 	exend = ex->ex_end;
661 
662 	/*
663 	 * For N allocated regions, we must make (N + 1)
664 	 * checks for unallocated space.  The first chunk we
665 	 * check is the area from the beginning of the subregion
666 	 * to the first allocated region after that point.
667 	 */
668 	newstart = extent_align(substart, alignment, skew);
669 	if (newstart < ex->ex_start) {
670 #ifdef DIAGNOSTIC
671 		printf("%s: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
672 		    __func__, ex->ex_name, ex->ex_start, ex->ex_end,
673 		    alignment);
674 		panic("%s: overflow after alignment", __func__);
675 #else
676 		extent_free_region_descriptor(ex, myrp);
677 		return (EINVAL);
678 #endif
679 	}
680 
681 	/*
682 	 * Find the first allocated region that begins on or after
683 	 * the subregion start, advancing the "last" pointer along
684 	 * the way.
685 	 */
686 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
687 		if (rp->er_start >= newstart)
688 			break;
689 		last = rp;
690 	}
691 
692 	/*
693 	 * Relocate the start of our candidate region to the end of
694 	 * the last allocated region (if there was one overlapping
695 	 * our subrange).
696 	 */
697 	if (last != NULL && last->er_end >= newstart)
698 		newstart = extent_align((last->er_end + 1), alignment, skew);
699 
700 	for (; rp != LIST_END(&ex->ex_regions); rp = LIST_NEXT(rp, er_link)) {
701 		/*
702 		 * If the region pasts the subend, bail out and see
703 		 * if we fit against the subend.
704 		 */
705 		if (rp->er_start > subend) {
706 			exend = rp->er_start;
707 			break;
708 		}
709 
710 		/*
711 		 * Check the chunk before "rp".  Note that our
712 		 * comparison is safe from overflow conditions.
713 		 */
714 		if (LE_OV(newstart, size, rp->er_start)) {
715 			/*
716 			 * Do a boundary check, if necessary.  Note
717 			 * that a region may *begin* on the boundary,
718 			 * but it must end before the boundary.
719 			 */
720 			if (boundary) {
721 				newend = newstart + (size - 1);
722 
723 				/*
724 				 * Calculate the next boundary after the start
725 				 * of this region.
726 				 */
727 				dontcross = extent_align(newstart+1, boundary,
728 				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
729 				    - 1;
730 
731 #if 0
732 				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
733 				    newstart, newend, ex->ex_start, ex->ex_end,
734 				    boundary, dontcross);
735 #endif
736 
737 				/* Check for overflow */
738 				if (dontcross < ex->ex_start)
739 					dontcross = ex->ex_end;
740 				else if (newend > dontcross) {
741 					/*
742 					 * Candidate region crosses boundary.
743 					 * Throw away the leading part and see
744 					 * if we still fit.
745 					 */
746 					newstart = dontcross + 1;
747 					newend = newstart + (size - 1);
748 					dontcross += boundary;
749 					if (!LE_OV(newstart, size, rp->er_start))
750 						goto skip;
751 				}
752 
753 				/*
754 				 * If we run past the end of
755 				 * the extent or the boundary
756 				 * overflows, then the request
757 				 * can't fit.
758 				 */
759 				if (newstart + size - 1 > ex->ex_end ||
760 				    dontcross < newstart)
761 					goto fail;
762 			}
763 
764 			/*
765 			 * We would fit into this space.  Calculate
766 			 * the overhead (wasted space).  If we exactly
767 			 * fit, or we're taking the first fit, insert
768 			 * ourselves into the region list.
769 			 */
770 			ovh = rp->er_start - newstart - size;
771 			if ((flags & EX_FAST) || (ovh == 0))
772 				goto found;
773 
774 			/*
775 			 * Don't exactly fit, but check to see
776 			 * if we're better than any current choice.
777 			 */
778 			if ((bestovh == 0) || (ovh < bestovh)) {
779 				bestovh = ovh;
780 				beststart = newstart;
781 				bestlast = last;
782 			}
783 		}
784 
785 skip:
786 		/*
787 		 * Skip past the current region and check again.
788 		 */
789 		newstart = extent_align((rp->er_end + 1), alignment, skew);
790 		if (newstart < rp->er_end) {
791 			/*
792 			 * Overflow condition.  Don't error out, since
793 			 * we might have a chunk of space that we can
794 			 * use.
795 			 */
796 			goto fail;
797 		}
798 
799 		last = rp;
800 	}
801 
802 	/*
803 	 * The final check is from the current starting point to the
804 	 * end of the subregion.  If there were no allocated regions,
805 	 * "newstart" is set to the beginning of the subregion, or
806 	 * just past the end of the last allocated region, adjusted
807 	 * for alignment in either case.
808 	 */
809 	if (LE_OV(newstart, (size - 1), subend)) {
810 		/*
811 		 * Do a boundary check, if necessary.  Note
812 		 * that a region may *begin* on the boundary,
813 		 * but it must end before the boundary.
814 		 */
815 		if (boundary) {
816 			newend = newstart + (size - 1);
817 
818 			/*
819 			 * Calculate the next boundary after the start
820 			 * of this region.
821 			 */
822 			dontcross = extent_align(newstart+1, boundary,
823 			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
824 			    - 1;
825 
826 #if 0
827 			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
828 			    newstart, newend, ex->ex_start, ex->ex_end,
829 			    boundary, dontcross);
830 #endif
831 
832 			/* Check for overflow */
833 			if (dontcross < ex->ex_start)
834 				dontcross = ex->ex_end;
835 			else if (newend > dontcross) {
836 				/*
837 				 * Candidate region crosses boundary.
838 				 * Throw away the leading part and see
839 				 * if we still fit.
840 				 */
841 				newstart = dontcross + 1;
842 				newend = newstart + (size - 1);
843 				dontcross += boundary;
844 				if (!LE_OV(newstart, (size - 1), subend))
845 					goto fail;
846 			}
847 
848 			/*
849 			 * If we run past the end of
850 			 * the extent or the boundary
851 			 * overflows, then the request
852 			 * can't fit.
853 			 */
854 			if (newstart + size - 1 > ex->ex_end ||
855 			    dontcross < newstart)
856 				goto fail;
857 		}
858 
859 		/*
860 		 * We would fit into this space.  Calculate
861 		 * the overhead (wasted space).  If we exactly
862 		 * fit, or we're taking the first fit, insert
863 		 * ourselves into the region list.
864 		 */
865 		ovh = exend - newstart - (size - 1);
866 		if ((flags & EX_FAST) || (ovh == 0))
867 			goto found;
868 
869 		/*
870 		 * Don't exactly fit, but check to see
871 		 * if we're better than any current choice.
872 		 */
873 		if ((bestovh == 0) || (ovh < bestovh)) {
874 			bestovh = ovh;
875 			beststart = newstart;
876 			bestlast = last;
877 		}
878 	}
879 
880  fail:
881 	/*
882 	 * One of the following two conditions have
883 	 * occurred:
884 	 *
885 	 *	There is no chunk large enough to hold the request.
886 	 *
887 	 *	If EX_FAST was not specified, there is not an
888 	 *	exact match for the request.
889 	 *
890 	 * Note that if we reach this point and EX_FAST is
891 	 * set, then we know there is no space in the extent for
892 	 * the request.
893 	 */
894 	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
895 		/*
896 		 * We have a match that's "good enough".
897 		 */
898 		newstart = beststart;
899 		last = bestlast;
900 		goto found;
901 	}
902 
903 	/*
904 	 * No space currently available.  Wait for it to free up,
905 	 * if possible.
906 	 */
907 	if (flags & EX_WAITSPACE) {
908 		ex->ex_flags |= EXF_WANTED;
909 		error = tsleep(ex,
910 		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), "extnt", 0);
911 		if (error)
912 			return (error);
913 		goto alloc_start;
914 	}
915 
916 	extent_free_region_descriptor(ex, myrp);
917 	return (EAGAIN);
918 
919  found:
920 	/*
921 	 * Insert ourselves into the region list.
922 	 */
923 	extent_insert_and_optimize(ex, newstart, size, last, myrp);
924 	*result = newstart;
925 	return (0);
926 }
927 
928 int
929 extent_alloc_subregion(struct extent *ex, u_long substart, u_long subend,
930     u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
931     u_long *result)
932 {
933 	struct extent_region *rp;
934 
935 	/*
936 	 * Allocate the region descriptor.  It will be freed later
937 	 * if we can coalesce with another region.
938 	 */
939 	rp = extent_alloc_region_descriptor(ex, flags);
940 	if (rp == NULL) {
941 #ifdef DIAGNOSTIC
942 		printf("%s: can't allocate region descriptor\n", __func__);
943 #endif
944 		return (ENOMEM);
945 	}
946 
947 	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
948 	    boundary, flags, rp, result);
949 }
950 
951 int
952 extent_alloc_subregion_with_descr(struct extent *ex, u_long substart,
953     u_long subend, u_long size, u_long alignment, u_long skew,
954     u_long boundary, int flags, struct extent_region *rp, u_long *result)
955 {
956 #ifdef DIAGNOSTIC
957 	if ((ex->ex_flags & EXF_NOCOALESCE) == 0)
958 		panic("%s: EX_NOCOALESCE not set", __func__);
959 #endif
960 
961 	rp->er_flags = ER_DISCARD;
962 	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
963 	    boundary, flags, rp, result);
964 }
965 
966 int
967 extent_free(struct extent *ex, u_long start, u_long size, int flags)
968 {
969 	struct extent_region *rp, *nrp = NULL;
970 	u_long end = start + (size - 1);
971 	int exflags;
972 
973 #ifdef DIAGNOSTIC
974 	/* Check arguments. */
975 	if (ex == NULL)
976 		panic("%s: NULL extent", __func__);
977 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
978 		extent_print(ex);
979 		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
980 		    __func__, ex->ex_name, start, size);
981 		panic("%s: extent `%s', region not within extent",
982 		    __func__, ex->ex_name);
983 	}
984 	/* Check for an overflow. */
985 	if (end < start) {
986 		extent_print(ex);
987 		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
988 		    __func__, ex->ex_name, start, size);
989 		panic("%s: overflow", __func__);
990 	}
991 #endif
992 
993 	/*
994 	 * If we're allowing coalescing, we must allocate a region
995 	 * descriptor now, since it might block.
996 	 *
997 	 * XXX Make a static, create-time flags word, so we don't
998 	 * XXX have to lock to read it!
999 	 */
1000 	exflags = ex->ex_flags;
1001 
1002 	if ((exflags & EXF_NOCOALESCE) == 0) {
1003 		/* Allocate a region descriptor. */
1004 		nrp = extent_alloc_region_descriptor(ex, flags);
1005 		if (nrp == NULL)
1006 			return (ENOMEM);
1007 	}
1008 
1009 	/*
1010 	 * Find region and deallocate.  Several possibilities:
1011 	 *
1012 	 *	1. (start == er_start) && (end == er_end):
1013 	 *	   Free descriptor.
1014 	 *
1015 	 *	2. (start == er_start) && (end < er_end):
1016 	 *	   Adjust er_start.
1017 	 *
1018 	 *	3. (start > er_start) && (end == er_end):
1019 	 *	   Adjust er_end.
1020 	 *
1021 	 *	4. (start > er_start) && (end < er_end):
1022 	 *	   Fragment region.  Requires descriptor alloc.
1023 	 *
1024 	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
1025 	 * is not set.
1026 	 */
1027 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
1028 		/*
1029 		 * Save ourselves some comparisons; does the current
1030 		 * region end before chunk to be freed begins?  If so,
1031 		 * then we haven't found the appropriate region descriptor.
1032 		 */
1033 		if (rp->er_end < start)
1034 			continue;
1035 
1036 		/*
1037 		 * Save ourselves some traversal; does the current
1038 		 * region begin after the chunk to be freed ends?  If so,
1039 		 * then we've already passed any possible region descriptors
1040 		 * that might have contained the chunk to be freed.
1041 		 */
1042 		if (rp->er_start > end)
1043 			break;
1044 
1045 		/* Case 1. */
1046 		if ((start == rp->er_start) && (end == rp->er_end)) {
1047 			LIST_REMOVE(rp, er_link);
1048 			extent_free_region_descriptor(ex, rp);
1049 			goto done;
1050 		}
1051 
1052 		/*
1053 		 * The following cases all require that EXF_NOCOALESCE
1054 		 * is not set.
1055 		 */
1056 		if (ex->ex_flags & EXF_NOCOALESCE)
1057 			continue;
1058 
1059 		/* Case 2. */
1060 		if ((start == rp->er_start) && (end < rp->er_end)) {
1061 			rp->er_start = (end + 1);
1062 			goto done;
1063 		}
1064 
1065 		/* Case 3. */
1066 		if ((start > rp->er_start) && (end == rp->er_end)) {
1067 			rp->er_end = (start - 1);
1068 			goto done;
1069 		}
1070 
1071 		/* Case 4. */
1072 		if ((start > rp->er_start) && (end < rp->er_end)) {
1073 			/* Fill in new descriptor. */
1074 			nrp->er_start = end + 1;
1075 			nrp->er_end = rp->er_end;
1076 
1077 			/* Adjust current descriptor. */
1078 			rp->er_end = start - 1;
1079 
1080 			/* Insert new descriptor after current. */
1081 			LIST_INSERT_AFTER(rp, nrp, er_link);
1082 
1083 			/* We used the new descriptor, so don't free it below */
1084 			nrp = NULL;
1085 			goto done;
1086 		}
1087 	}
1088 
1089 	/* Region not found, or request otherwise invalid. */
1090 #if defined(DIAGNOSTIC) || defined(DDB)
1091 	extent_print(ex);
1092 #endif
1093 	printf("%s: start 0x%lx, end 0x%lx\n", __func__, start, end);
1094 	panic("%s: region not found", __func__);
1095 
1096  done:
1097 	if (nrp != NULL)
1098 		extent_free_region_descriptor(ex, nrp);
1099 	if (ex->ex_flags & EXF_WANTED) {
1100 		ex->ex_flags &= ~EXF_WANTED;
1101 		wakeup(ex);
1102 	}
1103 	return (0);
1104 }
1105 
1106 static struct extent_region *
1107 extent_alloc_region_descriptor(struct extent *ex, int flags)
1108 {
1109 	struct extent_region *rp;
1110 
1111 	if (ex->ex_flags & EXF_FIXED) {
1112 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1113 
1114 		while (LIST_EMPTY(&fex->fex_freelist)) {
1115 			if (flags & EX_MALLOCOK)
1116 				goto alloc;
1117 
1118 			if ((flags & EX_WAITOK) == 0)
1119 				return (NULL);
1120 			ex->ex_flags |= EXF_FLWANTED;
1121 			if (tsleep(&fex->fex_freelist,
1122 			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1123 			    "extnt", 0))
1124 				return (NULL);
1125 		}
1126 		rp = LIST_FIRST(&fex->fex_freelist);
1127 		LIST_REMOVE(rp, er_link);
1128 
1129 		/*
1130 		 * Don't muck with flags after pulling it off the
1131 		 * freelist; it may be a dynamically allocated
1132 		 * region pointer that was kindly given to us,
1133 		 * and we need to preserve that information.
1134 		 */
1135 
1136 		return (rp);
1137 	}
1138 
1139  alloc:
1140 	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK :
1141 	    PR_NOWAIT);
1142 	if (rp != NULL)
1143 		rp->er_flags = ER_ALLOC;
1144 
1145 	return (rp);
1146 }
1147 
1148 static void
1149 extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
1150 {
1151 	if (rp->er_flags & ER_DISCARD)
1152 		return;
1153 
1154 	if (ex->ex_flags & EXF_FIXED) {
1155 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1156 
1157 		/*
1158 		 * If someone's waiting for a region descriptor,
1159 		 * be nice and give them this one, rather than
1160 		 * just free'ing it back to the system.
1161 		 */
1162 		if (rp->er_flags & ER_ALLOC) {
1163 			if (ex->ex_flags & EXF_FLWANTED) {
1164 				/* Clear all but ER_ALLOC flag. */
1165 				rp->er_flags = ER_ALLOC;
1166 				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1167 				    er_link);
1168 				goto wake_em_up;
1169 			} else {
1170 				pool_put(&ex_region_pl, rp);
1171 			}
1172 		} else {
1173 			/* Clear all flags. */
1174 			rp->er_flags = 0;
1175 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1176 		}
1177 
1178 		if (ex->ex_flags & EXF_FLWANTED) {
1179  wake_em_up:
1180 			ex->ex_flags &= ~EXF_FLWANTED;
1181 			wakeup(&fex->fex_freelist);
1182 		}
1183 		return;
1184 	}
1185 
1186 	/*
1187 	 * We know it's dynamically allocated if we get here.
1188 	 */
1189 	pool_put(&ex_region_pl, rp);
1190 }
1191 
1192 
1193 #if defined(DIAGNOSTIC) || defined(DDB) || !defined(_KERNEL)
1194 
1195 void
1196 extent_print(struct extent *ex)
1197 {
1198 	extent_print1(ex, printf);
1199 }
1200 
1201 void
1202 extent_print1(struct extent *ex,
1203     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1204 {
1205 	struct extent_region *rp;
1206 
1207 	if (ex == NULL)
1208 		panic("%s: NULL extent", __func__);
1209 
1210 #ifdef _KERNEL
1211 	(*pr)("extent `%s' (0x%lx - 0x%lx), flags=%b\n", ex->ex_name,
1212 	    ex->ex_start, ex->ex_end, ex->ex_flags, EXF_BITS);
1213 #else
1214 	(*pr)("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1215 	    ex->ex_start, ex->ex_end, ex->ex_flags);
1216 #endif
1217 
1218 	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1219 		(*pr)("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1220 }
1221 #endif
1222