xref: /openbsd-src/sys/kern/subr_extent.c (revision abcbcc4d80a0ed0907ba123021478336e6ec4c4a)
1 /*	$OpenBSD: subr_extent.c,v 1.53 2014/09/13 16:06:37 doug Exp $	*/
2 /*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3 
4 /*-
5  * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe and Matthias Drochner.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * General purpose extent manager.
35  */
36 
37 #ifdef _KERNEL
38 #include <sys/param.h>
39 #include <sys/extent.h>
40 #include <sys/malloc.h>
41 #include <sys/time.h>
42 #include <sys/systm.h>
43 #include <sys/proc.h>
44 #include <sys/queue.h>
45 #include <sys/pool.h>
46 #include <ddb/db_output.h>
47 #else
48 /*
49  * user-land definitions, so it can fit into a testing harness.
50  */
51 #include <sys/param.h>
52 #include <sys/extent.h>
53 #include <sys/queue.h>
54 #include <errno.h>
55 #include <err.h>
56 #include <stdlib.h>
57 #include <stdio.h>
58 #include <string.h>
59 
60 #define	malloc(s, t, flags)		malloc(s)
61 #define	free(p, t, s)			free(p)
62 
63 #define	tsleep(chan, pri, str, timo)	(EWOULDBLOCK)
64 #define	wakeup(chan)			((void)0)
65 
66 struct pool {
67 	size_t pr_size;
68 };
69 
70 #define	pool_init(a, b, c, d, e, f, g)	do { (a)->pr_size = (b); } while (0)
71 #define	pool_setipl(pp, ipl)		/* nothing */
72 #define pool_get(pp, flags)		malloc((pp)->pr_size, 0, 0)
73 #define	pool_put(pp, rp)		free((rp), 0, 0)
74 
75 #define	panic(...)		do { warnx(__VA_ARGS__); abort(); } while (0)
76 #endif
77 
78 #if defined(DIAGNOSTIC) || defined(DDB)
79 void	extent_print1(struct extent *, int (*)(const char *, ...)
80 	    __attribute__((__format__(__kprintf__,1,2))));
81 #endif
82 
83 static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
84 	    struct extent_region *, struct extent_region *);
85 static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
86 static	void extent_free_region_descriptor(struct extent *,
87 	    struct extent_region *);
88 int	extent_do_alloc(struct extent *, u_long, u_long, u_long, u_long,
89 	    u_long, u_long, int, struct extent_region *, u_long *);
90 
91 /*
92  * Shortcut to align to an arbitrary power-of-two boundary.
93  */
94 static __inline__ u_long
95 extent_align(u_long start, u_long align, u_long skew)
96 {
97 	return ((((start - skew) + (align - 1)) & (-align)) + skew);
98 }
99 
100 
101 #if defined(DIAGNOSTIC) || defined(DDB)
102 /*
103  * Register the extent on a doubly linked list.
104  * Should work, no?
105  */
106 static LIST_HEAD(listhead, extent) ext_list;
107 static	void extent_register(struct extent *);
108 
109 static void
110 extent_register(struct extent *ex)
111 {
112 #ifdef DIAGNOSTIC
113 	struct extent *ep;
114 #endif
115 	static int initialized;
116 
117 	if (!initialized){
118 		LIST_INIT(&ext_list);
119 		initialized = 1;
120 	}
121 
122 #ifdef DIAGNOSTIC
123 	LIST_FOREACH(ep, &ext_list, ex_link) {
124 		if (ep == ex)
125 			panic("%s: already registered", __func__);
126 	}
127 #endif
128 
129 	/* Insert into list */
130 	LIST_INSERT_HEAD(&ext_list, ex, ex_link);
131 }
132 #endif	/* DIAGNOSTIC || DDB */
133 
134 struct pool ex_region_pl;
135 
136 static void
137 extent_pool_init(void)
138 {
139 	static int inited;
140 
141 	if (!inited) {
142 		pool_init(&ex_region_pl, sizeof(struct extent_region), 0, 0, 0,
143 		    "extentpl", NULL);
144 		pool_setipl(&ex_region_pl, IPL_VM);
145 		inited = 1;
146 	}
147 }
148 
149 #ifdef DDB
150 /*
151  * Print out all extents registered.  This is used in
152  * DDB show extents
153  */
154 void
155 extent_print_all(void)
156 {
157 	struct extent *ep;
158 
159 	LIST_FOREACH(ep, &ext_list, ex_link) {
160 		extent_print1(ep, db_printf);
161 	}
162 }
163 #endif
164 
165 /*
166  * Allocate and initialize an extent map.
167  */
168 struct extent *
169 extent_create(char *name, u_long start, u_long end, int mtype, caddr_t storage,
170     size_t storagesize, int flags)
171 {
172 	struct extent *ex;
173 	caddr_t cp = storage;
174 	size_t sz = storagesize;
175 	struct extent_region *rp;
176 	int fixed_extent = (storage != NULL);
177 
178 #ifdef DIAGNOSTIC
179 	/* Check arguments. */
180 	if (name == NULL)
181 		panic("%s: name == NULL", __func__);
182 	if (end < start) {
183 		printf("%s: extent `%s', start 0x%lx, end 0x%lx\n",
184 		    __func__, name, start, end);
185 		panic("%s: end < start", __func__);
186 	}
187 	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
188 		panic("%s: fixed extent, bad storagesize 0x%lx",
189 		    __func__, (u_long)storagesize);
190 	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
191 		panic("%s: storage provided for non-fixed", __func__);
192 #endif
193 
194 	extent_pool_init();
195 
196 	/* Allocate extent descriptor. */
197 	if (fixed_extent) {
198 		struct extent_fixed *fex;
199 
200 		memset(storage, 0, storagesize);
201 
202 		/*
203 		 * Align all descriptors on "long" boundaries.
204 		 */
205 		fex = (struct extent_fixed *)cp;
206 		ex = (struct extent *)fex;
207 		cp += ALIGN(sizeof(struct extent_fixed));
208 		sz -= ALIGN(sizeof(struct extent_fixed));
209 		fex->fex_storage = storage;
210 		fex->fex_storagesize = storagesize;
211 
212 		/*
213 		 * In a fixed extent, we have to pre-allocate region
214 		 * descriptors and place them in the extent's freelist.
215 		 */
216 		LIST_INIT(&fex->fex_freelist);
217 		while (sz >= ALIGN(sizeof(struct extent_region))) {
218 			rp = (struct extent_region *)cp;
219 			cp += ALIGN(sizeof(struct extent_region));
220 			sz -= ALIGN(sizeof(struct extent_region));
221 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
222 		}
223 	} else {
224 		ex = (struct extent *)malloc(sizeof(struct extent),
225 		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
226 		if (ex == NULL)
227 			return (NULL);
228 	}
229 
230 	/* Fill in the extent descriptor and return it to the caller. */
231 	LIST_INIT(&ex->ex_regions);
232 	ex->ex_name = name;
233 	ex->ex_start = start;
234 	ex->ex_end = end;
235 	ex->ex_mtype = mtype;
236 	ex->ex_flags = 0;
237 	if (fixed_extent)
238 		ex->ex_flags |= EXF_FIXED;
239 	if (flags & EX_NOCOALESCE)
240 		ex->ex_flags |= EXF_NOCOALESCE;
241 
242 	if (flags & EX_FILLED) {
243 		rp = extent_alloc_region_descriptor(ex, flags);
244 		if (rp == NULL) {
245 			if (!fixed_extent)
246 				free(ex, mtype, 0);
247 			return (NULL);
248 		}
249 		rp->er_start = start;
250 		rp->er_end = end;
251 		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
252 	}
253 
254 #if defined(DIAGNOSTIC) || defined(DDB)
255 	extent_register(ex);
256 #endif
257 	return (ex);
258 }
259 
260 /*
261  * Destroy an extent map.
262  */
263 void
264 extent_destroy(struct extent *ex)
265 {
266 	struct extent_region *rp, *orp;
267 
268 #ifdef DIAGNOSTIC
269 	/* Check arguments. */
270 	if (ex == NULL)
271 		panic("%s: NULL extent", __func__);
272 #endif
273 
274 	/* Free all region descriptors in extent. */
275 	for (rp = LIST_FIRST(&ex->ex_regions); rp != NULL; ) {
276 		orp = rp;
277 		rp = LIST_NEXT(rp, er_link);
278 		LIST_REMOVE(orp, er_link);
279 		extent_free_region_descriptor(ex, orp);
280 	}
281 
282 #if defined(DIAGNOSTIC) || defined(DDB)
283 	/* Remove from the list of all extents. */
284 	LIST_REMOVE(ex, ex_link);
285 #endif
286 
287 	/* If we're not a fixed extent, free the extent descriptor itself. */
288 	if ((ex->ex_flags & EXF_FIXED) == 0)
289 		free(ex, ex->ex_mtype, 0);
290 }
291 
292 /*
293  * Insert a region descriptor into the sorted region list after the
294  * entry "after" or at the head of the list (if "after" is NULL).
295  * The region descriptor we insert is passed in "rp".  We must
296  * allocate the region descriptor before calling this function!
297  * If we don't need the region descriptor, it will be freed here.
298  */
299 static void
300 extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
301     struct extent_region *after, struct extent_region *rp)
302 {
303 	struct extent_region *nextr;
304 	int appended = 0;
305 
306 	if (after == NULL) {
307 		/*
308 		 * We're the first in the region list.  If there's
309 		 * a region after us, attempt to coalesce to save
310 		 * descriptor overhead.
311 		 */
312 		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
313 		    !LIST_EMPTY(&ex->ex_regions) &&
314 		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
315 			/*
316 			 * We can coalesce.  Prepend us to the first region.
317 			 */
318 			LIST_FIRST(&ex->ex_regions)->er_start = start;
319 			extent_free_region_descriptor(ex, rp);
320 			return;
321 		}
322 
323 		/*
324 		 * Can't coalesce.  Fill in the region descriptor
325 		 * in, and insert us at the head of the region list.
326 		 */
327 		rp->er_start = start;
328 		rp->er_end = start + (size - 1);
329 		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
330 		return;
331 	}
332 
333 	/*
334 	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
335 	 */
336 	if (ex->ex_flags & EXF_NOCOALESCE)
337 		goto cant_coalesce;
338 
339 	/*
340 	 * Attempt to coalesce with the region before us.
341 	 */
342 	if ((after->er_end + 1) == start) {
343 		/*
344 		 * We can coalesce.  Append ourselves and make
345 		 * note of it.
346 		 */
347 		after->er_end = start + (size - 1);
348 		appended = 1;
349 	}
350 
351 	/*
352 	 * Attempt to coalesce with the region after us.
353 	 */
354 	if (LIST_NEXT(after, er_link) != NULL &&
355 	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
356 		/*
357 		 * We can coalesce.  Note that if we appended ourselves
358 		 * to the previous region, we exactly fit the gap, and
359 		 * can free the "next" region descriptor.
360 		 */
361 		if (appended) {
362 			/*
363 			 * Yup, we can free it up.
364 			 */
365 			after->er_end = LIST_NEXT(after, er_link)->er_end;
366 			nextr = LIST_NEXT(after, er_link);
367 			LIST_REMOVE(nextr, er_link);
368 			extent_free_region_descriptor(ex, nextr);
369 		} else {
370 			/*
371 			 * Nope, just prepend us to the next region.
372 			 */
373 			LIST_NEXT(after, er_link)->er_start = start;
374 		}
375 
376 		extent_free_region_descriptor(ex, rp);
377 		return;
378 	}
379 
380 	/*
381 	 * We weren't able to coalesce with the next region, but
382 	 * we don't need to allocate a region descriptor if we
383 	 * appended ourselves to the previous region.
384 	 */
385 	if (appended) {
386 		extent_free_region_descriptor(ex, rp);
387 		return;
388 	}
389 
390  cant_coalesce:
391 
392 	/*
393 	 * Fill in the region descriptor and insert ourselves
394 	 * into the region list.
395 	 */
396 	rp->er_start = start;
397 	rp->er_end = start + (size - 1);
398 	LIST_INSERT_AFTER(after, rp, er_link);
399 }
400 
401 /*
402  * Allocate a specific region in an extent map.
403  */
404 int
405 extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
406 {
407 	struct extent_region *rp, *last, *myrp;
408 	u_long end = start + (size - 1);
409 	int error;
410 
411 #ifdef DIAGNOSTIC
412 	/* Check arguments. */
413 	if (ex == NULL)
414 		panic("%s: NULL extent", __func__);
415 	if (size < 1) {
416 		printf("%s: extent `%s', size 0x%lx\n",
417 		    __func__, ex->ex_name, size);
418 		panic("%s: bad size", __func__);
419 	}
420 	if (end < start) {
421 		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
422 		    __func__, ex->ex_name, start, size);
423 		panic("%s: overflow", __func__);
424 	}
425 	if ((flags & EX_CONFLICTOK) && (flags & EX_WAITSPACE))
426 		panic("%s: EX_CONFLICTOK and EX_WAITSPACE "
427 		    "are mutually exclusive", __func__);
428 #endif
429 
430 	/*
431 	 * Make sure the requested region lies within the
432 	 * extent.
433 	 */
434 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
435 #ifdef DIAGNOSTIC
436 		printf("%s: extent `%s' (0x%lx - 0x%lx)\n",
437 		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
438 		printf("%s: start 0x%lx, end 0x%lx\n",
439 		    __func__, start, end);
440 		panic("%s: region lies outside extent", __func__);
441 #else
442 		return (EINVAL);
443 #endif
444 	}
445 
446 	/*
447 	 * Allocate the region descriptor.  It will be freed later
448 	 * if we can coalesce with another region.
449 	 */
450 	myrp = extent_alloc_region_descriptor(ex, flags);
451 	if (myrp == NULL) {
452 #ifdef DIAGNOSTIC
453 		printf(
454 		    "%s: can't allocate region descriptor\n", __func__);
455 #endif
456 		return (ENOMEM);
457 	}
458 
459  alloc_start:
460 	/*
461 	 * Attempt to place ourselves in the desired area of the
462 	 * extent.  We save ourselves some work by keeping the list sorted.
463 	 * In other words, if the start of the current region is greater
464 	 * than the end of our region, we don't have to search any further.
465 	 */
466 
467 	/*
468 	 * Keep a pointer to the last region we looked at so
469 	 * that we don't have to traverse the list again when
470 	 * we insert ourselves.  If "last" is NULL when we
471 	 * finally insert ourselves, we go at the head of the
472 	 * list.  See extent_insert_and_optimize() for details.
473 	 */
474 	last = NULL;
475 
476 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
477 		if (rp->er_start > end) {
478 			/*
479 			 * We lie before this region and don't
480 			 * conflict.
481 			 */
482 			break;
483 		}
484 
485 		/*
486 		 * The current region begins before we end.
487 		 * Check for a conflict.
488 		 */
489 		if (rp->er_end >= start) {
490 			/*
491 			 * We conflict.  If we can (and want to) wait,
492 			 * do so.
493 			 */
494 			if (flags & EX_WAITSPACE) {
495 				ex->ex_flags |= EXF_WANTED;
496 				error = tsleep(ex,
497 				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
498 				    "extnt", 0);
499 				if (error)
500 					return (error);
501 				goto alloc_start;
502 			}
503 
504 			/*
505 			 * If we tolerate conflicts adjust things such
506 			 * that all space in the requested region is
507 			 * allocated.
508 			 */
509 			if (flags & EX_CONFLICTOK) {
510 				/*
511 				 * There are four possibilities:
512 				 *
513 				 * 1. The current region overlaps with
514 				 *    the start of the requested region.
515 				 *    Adjust the requested region to
516 				 *    start at the end of the current
517 				 *    region and try again.
518 				 *
519 				 * 2. The current region falls
520 				 *    completely within the requested
521 				 *    region.  Free the current region
522 				 *    and try again.
523 				 *
524 				 * 3. The current region overlaps with
525 				 *    the end of the requested region.
526 				 *    Adjust the requested region to
527 				 *    end at the start of the current
528 				 *    region and try again.
529 				 *
530 				 * 4. The requested region falls
531 				 *    completely within the current
532 				 *    region.  We're done.
533 				 */
534 				if (rp->er_start <= start) {
535 					if (rp->er_end < ex->ex_end) {
536 						start = rp->er_end + 1;
537 						size = end - start + 1;
538 						goto alloc_start;
539 					}
540 				} else if (rp->er_end < end) {
541 					LIST_REMOVE(rp, er_link);
542 					extent_free_region_descriptor(ex, rp);
543 					goto alloc_start;
544 				} else if (rp->er_start < end) {
545 					if (rp->er_start > ex->ex_start) {
546 						end = rp->er_start - 1;
547 						size = end - start + 1;
548 						goto alloc_start;
549 					}
550 				}
551 				return (0);
552 			}
553 
554 			extent_free_region_descriptor(ex, myrp);
555 			return (EAGAIN);
556 		}
557 		/*
558 		 * We don't conflict, but this region lies before
559 		 * us.  Keep a pointer to this region, and keep
560 		 * trying.
561 		 */
562 		last = rp;
563 	}
564 
565 	/*
566 	 * We don't conflict with any regions.  "last" points
567 	 * to the region we fall after, or is NULL if we belong
568 	 * at the beginning of the region list.  Insert ourselves.
569 	 */
570 	extent_insert_and_optimize(ex, start, size, last, myrp);
571 	return (0);
572 }
573 
574 /*
575  * Macro to check (x + y) <= z.  This check is designed to fail
576  * if an overflow occurs.
577  */
578 #define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
579 
580 /*
581  * Allocate a region in an extent map subregion.
582  *
583  * If EX_FAST is specified, we return the first fit in the map.
584  * Otherwise, we try to minimize fragmentation by finding the
585  * smallest gap that will hold the request.
586  *
587  * The allocated region is aligned to "alignment", which must be
588  * a power of 2.
589  */
590 int
591 extent_do_alloc(struct extent *ex, u_long substart, u_long subend,
592     u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
593     struct extent_region *myrp, u_long *result)
594 {
595 	struct extent_region *rp, *last, *bestlast;
596 	u_long newstart, newend, exend, beststart, bestovh, ovh;
597 	u_long dontcross;
598 	int error;
599 
600 #ifdef DIAGNOSTIC
601 	/* Check arguments. */
602 	if (ex == NULL)
603 		panic("%s: NULL extent", __func__);
604 	if (myrp == NULL)
605 		panic("%s: NULL region descriptor", __func__);
606 	if (result == NULL)
607 		panic("%s: NULL result pointer", __func__);
608 	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
609 	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
610 		printf("%s: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
611 		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
612 		printf("%s: substart 0x%lx, subend 0x%lx\n",
613 		    __func__, substart, subend);
614 		panic("%s: bad subregion", __func__);
615 	}
616 	if ((size < 1) || ((size - 1) > (subend - substart))) {
617 		printf("%s: extent `%s', size 0x%lx\n",
618 		    __func__, ex->ex_name, size);
619 		panic("%s: bad size", __func__);
620 	}
621 	if (alignment == 0)
622 		panic("%s: bad alignment", __func__);
623 	if (boundary && (boundary < size)) {
624 		printf("%s: extent `%s', size 0x%lx, boundary 0x%lx\n",
625 		    __func__, ex->ex_name, size, boundary);
626 		panic("%s: bad boundary", __func__);
627 	}
628 #endif
629 
630  alloc_start:
631 	/*
632 	 * Keep a pointer to the last region we looked at so
633 	 * that we don't have to traverse the list again when
634 	 * we insert ourselves.  If "last" is NULL when we
635 	 * finally insert ourselves, we go at the head of the
636 	 * list.  See extent_insert_and_optimize() for details.
637 	 */
638 	last = NULL;
639 
640 	/*
641 	 * Keep track of size and location of the smallest
642 	 * chunk we fit in.
643 	 *
644 	 * Since the extent can be as large as the numeric range
645 	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
646 	 * best overhead value can be the maximum unsigned integer.
647 	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
648 	 * into the region list immediately on an exact match (which
649 	 * is the only case where "bestovh" would be set to 0).
650 	 */
651 	bestovh = 0;
652 	beststart = 0;
653 	bestlast = NULL;
654 
655 	/*
656 	 * Keep track of end of free region.  This is either the end of extent
657 	 * or the start of a region past the subend.
658 	 */
659 	exend = ex->ex_end;
660 
661 	/*
662 	 * For N allocated regions, we must make (N + 1)
663 	 * checks for unallocated space.  The first chunk we
664 	 * check is the area from the beginning of the subregion
665 	 * to the first allocated region after that point.
666 	 */
667 	newstart = extent_align(substart, alignment, skew);
668 	if (newstart < ex->ex_start) {
669 #ifdef DIAGNOSTIC
670 		printf("%s: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
671 		    __func__, ex->ex_name, ex->ex_start, ex->ex_end,
672 		    alignment);
673 		panic("%s: overflow after alignment", __func__);
674 #else
675 		extent_free_region_descriptor(ex, myrp);
676 		return (EINVAL);
677 #endif
678 	}
679 
680 	/*
681 	 * Find the first allocated region that begins on or after
682 	 * the subregion start, advancing the "last" pointer along
683 	 * the way.
684 	 */
685 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
686 		if (rp->er_start >= newstart)
687 			break;
688 		last = rp;
689 	}
690 
691 	/*
692 	 * Relocate the start of our candidate region to the end of
693 	 * the last allocated region (if there was one overlapping
694 	 * our subrange).
695 	 */
696 	if (last != NULL && last->er_end >= newstart)
697 		newstart = extent_align((last->er_end + 1), alignment, skew);
698 
699 	for (; rp != NULL; rp = LIST_NEXT(rp, er_link)) {
700 		/*
701 		 * If the region pasts the subend, bail out and see
702 		 * if we fit against the subend.
703 		 */
704 		if (rp->er_start > subend) {
705 			exend = rp->er_start;
706 			break;
707 		}
708 
709 		/*
710 		 * Check the chunk before "rp".  Note that our
711 		 * comparison is safe from overflow conditions.
712 		 */
713 		if (LE_OV(newstart, size, rp->er_start)) {
714 			/*
715 			 * Do a boundary check, if necessary.  Note
716 			 * that a region may *begin* on the boundary,
717 			 * but it must end before the boundary.
718 			 */
719 			if (boundary) {
720 				newend = newstart + (size - 1);
721 
722 				/*
723 				 * Calculate the next boundary after the start
724 				 * of this region.
725 				 */
726 				dontcross = extent_align(newstart+1, boundary,
727 				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
728 				    - 1;
729 
730 #if 0
731 				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
732 				    newstart, newend, ex->ex_start, ex->ex_end,
733 				    boundary, dontcross);
734 #endif
735 
736 				/* Check for overflow */
737 				if (dontcross < ex->ex_start)
738 					dontcross = ex->ex_end;
739 				else if (newend > dontcross) {
740 					/*
741 					 * Candidate region crosses boundary.
742 					 * Throw away the leading part and see
743 					 * if we still fit.
744 					 */
745 					newstart = dontcross + 1;
746 					newend = newstart + (size - 1);
747 					dontcross += boundary;
748 					if (!LE_OV(newstart, size, rp->er_start))
749 						goto skip;
750 				}
751 
752 				/*
753 				 * If we run past the end of
754 				 * the extent or the boundary
755 				 * overflows, then the request
756 				 * can't fit.
757 				 */
758 				if (newstart + size - 1 > ex->ex_end ||
759 				    dontcross < newstart)
760 					goto fail;
761 			}
762 
763 			/*
764 			 * We would fit into this space.  Calculate
765 			 * the overhead (wasted space).  If we exactly
766 			 * fit, or we're taking the first fit, insert
767 			 * ourselves into the region list.
768 			 */
769 			ovh = rp->er_start - newstart - size;
770 			if ((flags & EX_FAST) || (ovh == 0))
771 				goto found;
772 
773 			/*
774 			 * Don't exactly fit, but check to see
775 			 * if we're better than any current choice.
776 			 */
777 			if ((bestovh == 0) || (ovh < bestovh)) {
778 				bestovh = ovh;
779 				beststart = newstart;
780 				bestlast = last;
781 			}
782 		}
783 
784 skip:
785 		/*
786 		 * Skip past the current region and check again.
787 		 */
788 		newstart = extent_align((rp->er_end + 1), alignment, skew);
789 		if (newstart < rp->er_end) {
790 			/*
791 			 * Overflow condition.  Don't error out, since
792 			 * we might have a chunk of space that we can
793 			 * use.
794 			 */
795 			goto fail;
796 		}
797 
798 		last = rp;
799 	}
800 
801 	/*
802 	 * The final check is from the current starting point to the
803 	 * end of the subregion.  If there were no allocated regions,
804 	 * "newstart" is set to the beginning of the subregion, or
805 	 * just past the end of the last allocated region, adjusted
806 	 * for alignment in either case.
807 	 */
808 	if (LE_OV(newstart, (size - 1), subend)) {
809 		/*
810 		 * Do a boundary check, if necessary.  Note
811 		 * that a region may *begin* on the boundary,
812 		 * but it must end before the boundary.
813 		 */
814 		if (boundary) {
815 			newend = newstart + (size - 1);
816 
817 			/*
818 			 * Calculate the next boundary after the start
819 			 * of this region.
820 			 */
821 			dontcross = extent_align(newstart+1, boundary,
822 			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
823 			    - 1;
824 
825 #if 0
826 			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
827 			    newstart, newend, ex->ex_start, ex->ex_end,
828 			    boundary, dontcross);
829 #endif
830 
831 			/* Check for overflow */
832 			if (dontcross < ex->ex_start)
833 				dontcross = ex->ex_end;
834 			else if (newend > dontcross) {
835 				/*
836 				 * Candidate region crosses boundary.
837 				 * Throw away the leading part and see
838 				 * if we still fit.
839 				 */
840 				newstart = dontcross + 1;
841 				newend = newstart + (size - 1);
842 				dontcross += boundary;
843 				if (!LE_OV(newstart, (size - 1), subend))
844 					goto fail;
845 			}
846 
847 			/*
848 			 * If we run past the end of
849 			 * the extent or the boundary
850 			 * overflows, then the request
851 			 * can't fit.
852 			 */
853 			if (newstart + size - 1 > ex->ex_end ||
854 			    dontcross < newstart)
855 				goto fail;
856 		}
857 
858 		/*
859 		 * We would fit into this space.  Calculate
860 		 * the overhead (wasted space).  If we exactly
861 		 * fit, or we're taking the first fit, insert
862 		 * ourselves into the region list.
863 		 */
864 		ovh = exend - newstart - (size - 1);
865 		if ((flags & EX_FAST) || (ovh == 0))
866 			goto found;
867 
868 		/*
869 		 * Don't exactly fit, but check to see
870 		 * if we're better than any current choice.
871 		 */
872 		if ((bestovh == 0) || (ovh < bestovh)) {
873 			bestovh = ovh;
874 			beststart = newstart;
875 			bestlast = last;
876 		}
877 	}
878 
879  fail:
880 	/*
881 	 * One of the following two conditions have
882 	 * occurred:
883 	 *
884 	 *	There is no chunk large enough to hold the request.
885 	 *
886 	 *	If EX_FAST was not specified, there is not an
887 	 *	exact match for the request.
888 	 *
889 	 * Note that if we reach this point and EX_FAST is
890 	 * set, then we know there is no space in the extent for
891 	 * the request.
892 	 */
893 	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
894 		/*
895 		 * We have a match that's "good enough".
896 		 */
897 		newstart = beststart;
898 		last = bestlast;
899 		goto found;
900 	}
901 
902 	/*
903 	 * No space currently available.  Wait for it to free up,
904 	 * if possible.
905 	 */
906 	if (flags & EX_WAITSPACE) {
907 		ex->ex_flags |= EXF_WANTED;
908 		error = tsleep(ex,
909 		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), "extnt", 0);
910 		if (error)
911 			return (error);
912 		goto alloc_start;
913 	}
914 
915 	extent_free_region_descriptor(ex, myrp);
916 	return (EAGAIN);
917 
918  found:
919 	/*
920 	 * Insert ourselves into the region list.
921 	 */
922 	extent_insert_and_optimize(ex, newstart, size, last, myrp);
923 	*result = newstart;
924 	return (0);
925 }
926 
927 int
928 extent_alloc_subregion(struct extent *ex, u_long substart, u_long subend,
929     u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
930     u_long *result)
931 {
932 	struct extent_region *rp;
933 
934 	/*
935 	 * Allocate the region descriptor.  It will be freed later
936 	 * if we can coalesce with another region.
937 	 */
938 	rp = extent_alloc_region_descriptor(ex, flags);
939 	if (rp == NULL) {
940 #ifdef DIAGNOSTIC
941 		printf("%s: can't allocate region descriptor\n", __func__);
942 #endif
943 		return (ENOMEM);
944 	}
945 
946 	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
947 	    boundary, flags, rp, result);
948 }
949 
950 int
951 extent_alloc_subregion_with_descr(struct extent *ex, u_long substart,
952     u_long subend, u_long size, u_long alignment, u_long skew,
953     u_long boundary, int flags, struct extent_region *rp, u_long *result)
954 {
955 #ifdef DIAGNOSTIC
956 	if ((ex->ex_flags & EXF_NOCOALESCE) == 0)
957 		panic("%s: EX_NOCOALESCE not set", __func__);
958 #endif
959 
960 	rp->er_flags = ER_DISCARD;
961 	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
962 	    boundary, flags, rp, result);
963 }
964 
965 int
966 extent_free(struct extent *ex, u_long start, u_long size, int flags)
967 {
968 	struct extent_region *rp, *nrp = NULL;
969 	u_long end = start + (size - 1);
970 	int exflags;
971 
972 #ifdef DIAGNOSTIC
973 	/* Check arguments. */
974 	if (ex == NULL)
975 		panic("%s: NULL extent", __func__);
976 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
977 		extent_print(ex);
978 		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
979 		    __func__, ex->ex_name, start, size);
980 		panic("%s: extent `%s', region not within extent",
981 		    __func__, ex->ex_name);
982 	}
983 	/* Check for an overflow. */
984 	if (end < start) {
985 		extent_print(ex);
986 		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
987 		    __func__, ex->ex_name, start, size);
988 		panic("%s: overflow", __func__);
989 	}
990 #endif
991 
992 	/*
993 	 * If we're allowing coalescing, we must allocate a region
994 	 * descriptor now, since it might block.
995 	 *
996 	 * XXX Make a static, create-time flags word, so we don't
997 	 * XXX have to lock to read it!
998 	 */
999 	exflags = ex->ex_flags;
1000 
1001 	if ((exflags & EXF_NOCOALESCE) == 0) {
1002 		/* Allocate a region descriptor. */
1003 		nrp = extent_alloc_region_descriptor(ex, flags);
1004 		if (nrp == NULL)
1005 			return (ENOMEM);
1006 	}
1007 
1008 	/*
1009 	 * Find region and deallocate.  Several possibilities:
1010 	 *
1011 	 *	1. (start == er_start) && (end == er_end):
1012 	 *	   Free descriptor.
1013 	 *
1014 	 *	2. (start == er_start) && (end < er_end):
1015 	 *	   Adjust er_start.
1016 	 *
1017 	 *	3. (start > er_start) && (end == er_end):
1018 	 *	   Adjust er_end.
1019 	 *
1020 	 *	4. (start > er_start) && (end < er_end):
1021 	 *	   Fragment region.  Requires descriptor alloc.
1022 	 *
1023 	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
1024 	 * is not set.
1025 	 */
1026 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
1027 		/*
1028 		 * Save ourselves some comparisons; does the current
1029 		 * region end before chunk to be freed begins?  If so,
1030 		 * then we haven't found the appropriate region descriptor.
1031 		 */
1032 		if (rp->er_end < start)
1033 			continue;
1034 
1035 		/*
1036 		 * Save ourselves some traversal; does the current
1037 		 * region begin after the chunk to be freed ends?  If so,
1038 		 * then we've already passed any possible region descriptors
1039 		 * that might have contained the chunk to be freed.
1040 		 */
1041 		if (rp->er_start > end)
1042 			break;
1043 
1044 		/* Case 1. */
1045 		if ((start == rp->er_start) && (end == rp->er_end)) {
1046 			LIST_REMOVE(rp, er_link);
1047 			extent_free_region_descriptor(ex, rp);
1048 			goto done;
1049 		}
1050 
1051 		/*
1052 		 * The following cases all require that EXF_NOCOALESCE
1053 		 * is not set.
1054 		 */
1055 		if (ex->ex_flags & EXF_NOCOALESCE)
1056 			continue;
1057 
1058 		/* Case 2. */
1059 		if ((start == rp->er_start) && (end < rp->er_end)) {
1060 			rp->er_start = (end + 1);
1061 			goto done;
1062 		}
1063 
1064 		/* Case 3. */
1065 		if ((start > rp->er_start) && (end == rp->er_end)) {
1066 			rp->er_end = (start - 1);
1067 			goto done;
1068 		}
1069 
1070 		/* Case 4. */
1071 		if ((start > rp->er_start) && (end < rp->er_end)) {
1072 			/* Fill in new descriptor. */
1073 			nrp->er_start = end + 1;
1074 			nrp->er_end = rp->er_end;
1075 
1076 			/* Adjust current descriptor. */
1077 			rp->er_end = start - 1;
1078 
1079 			/* Insert new descriptor after current. */
1080 			LIST_INSERT_AFTER(rp, nrp, er_link);
1081 
1082 			/* We used the new descriptor, so don't free it below */
1083 			nrp = NULL;
1084 			goto done;
1085 		}
1086 	}
1087 
1088 	/* Region not found, or request otherwise invalid. */
1089 #if defined(DIAGNOSTIC) || defined(DDB)
1090 	extent_print(ex);
1091 #endif
1092 	printf("%s: start 0x%lx, end 0x%lx\n", __func__, start, end);
1093 	panic("%s: region not found", __func__);
1094 
1095  done:
1096 	if (nrp != NULL)
1097 		extent_free_region_descriptor(ex, nrp);
1098 	if (ex->ex_flags & EXF_WANTED) {
1099 		ex->ex_flags &= ~EXF_WANTED;
1100 		wakeup(ex);
1101 	}
1102 	return (0);
1103 }
1104 
1105 static struct extent_region *
1106 extent_alloc_region_descriptor(struct extent *ex, int flags)
1107 {
1108 	struct extent_region *rp;
1109 
1110 	if (ex->ex_flags & EXF_FIXED) {
1111 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1112 
1113 		while (LIST_EMPTY(&fex->fex_freelist)) {
1114 			if (flags & EX_MALLOCOK)
1115 				goto alloc;
1116 
1117 			if ((flags & EX_WAITOK) == 0)
1118 				return (NULL);
1119 			ex->ex_flags |= EXF_FLWANTED;
1120 			if (tsleep(&fex->fex_freelist,
1121 			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1122 			    "extnt", 0))
1123 				return (NULL);
1124 		}
1125 		rp = LIST_FIRST(&fex->fex_freelist);
1126 		LIST_REMOVE(rp, er_link);
1127 
1128 		/*
1129 		 * Don't muck with flags after pulling it off the
1130 		 * freelist; it may be a dynamically allocated
1131 		 * region pointer that was kindly given to us,
1132 		 * and we need to preserve that information.
1133 		 */
1134 
1135 		return (rp);
1136 	}
1137 
1138  alloc:
1139 	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK :
1140 	    PR_NOWAIT);
1141 	if (rp != NULL)
1142 		rp->er_flags = ER_ALLOC;
1143 
1144 	return (rp);
1145 }
1146 
1147 static void
1148 extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
1149 {
1150 	if (rp->er_flags & ER_DISCARD)
1151 		return;
1152 
1153 	if (ex->ex_flags & EXF_FIXED) {
1154 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1155 
1156 		/*
1157 		 * If someone's waiting for a region descriptor,
1158 		 * be nice and give them this one, rather than
1159 		 * just free'ing it back to the system.
1160 		 */
1161 		if (rp->er_flags & ER_ALLOC) {
1162 			if (ex->ex_flags & EXF_FLWANTED) {
1163 				/* Clear all but ER_ALLOC flag. */
1164 				rp->er_flags = ER_ALLOC;
1165 				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1166 				    er_link);
1167 				goto wake_em_up;
1168 			} else {
1169 				pool_put(&ex_region_pl, rp);
1170 			}
1171 		} else {
1172 			/* Clear all flags. */
1173 			rp->er_flags = 0;
1174 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1175 		}
1176 
1177 		if (ex->ex_flags & EXF_FLWANTED) {
1178  wake_em_up:
1179 			ex->ex_flags &= ~EXF_FLWANTED;
1180 			wakeup(&fex->fex_freelist);
1181 		}
1182 		return;
1183 	}
1184 
1185 	/*
1186 	 * We know it's dynamically allocated if we get here.
1187 	 */
1188 	pool_put(&ex_region_pl, rp);
1189 }
1190 
1191 
1192 #if defined(DIAGNOSTIC) || defined(DDB) || !defined(_KERNEL)
1193 
1194 void
1195 extent_print(struct extent *ex)
1196 {
1197 	extent_print1(ex, printf);
1198 }
1199 
1200 void
1201 extent_print1(struct extent *ex,
1202     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1203 {
1204 	struct extent_region *rp;
1205 
1206 	if (ex == NULL)
1207 		panic("%s: NULL extent", __func__);
1208 
1209 #ifdef _KERNEL
1210 	(*pr)("extent `%s' (0x%lx - 0x%lx), flags=%b\n", ex->ex_name,
1211 	    ex->ex_start, ex->ex_end, ex->ex_flags, EXF_BITS);
1212 #else
1213 	(*pr)("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1214 	    ex->ex_start, ex->ex_end, ex->ex_flags);
1215 #endif
1216 
1217 	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1218 		(*pr)("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1219 }
1220 #endif
1221