xref: /openbsd-src/sys/kern/subr_extent.c (revision 46035553bfdd96e63c94e32da0210227ec2e3cf1)
1 /*	$OpenBSD: subr_extent.c,v 1.63 2020/01/11 14:30:24 mpi Exp $	*/
2 /*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3 
4 /*-
5  * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe and Matthias Drochner.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * General purpose extent manager.
35  */
36 
37 #ifdef _KERNEL
38 #include <sys/param.h>
39 #include <sys/extent.h>
40 #include <sys/malloc.h>
41 #include <sys/systm.h>
42 #include <sys/queue.h>
43 #include <sys/pool.h>
44 #include <ddb/db_output.h>
45 #else
46 /*
47  * user-land definitions, so it can fit into a testing harness.
48  */
49 #include <sys/param.h>
50 #include <sys/extent.h>
51 #include <sys/queue.h>
52 #include <errno.h>
53 #include <err.h>
54 #include <stdlib.h>
55 #include <stdio.h>
56 #include <string.h>
57 
58 #define	malloc(s, t, flags)		malloc(s)
59 #define	free(p, t, s)			free(p)
60 
61 #define	tsleep_nsec(c, p, s, t)		(EWOULDBLOCK)
62 #define	wakeup(chan)			((void)0)
63 
64 struct pool {
65 	size_t pr_size;
66 };
67 
68 #define	pool_init(a, b, c, d, e, f, g)	do { (a)->pr_size = (b); } while (0)
69 #define	pool_get(pp, flags)		malloc((pp)->pr_size, 0, 0)
70 #define	pool_put(pp, rp)		free((rp), 0, 0)
71 
72 #define	panic(...)		do { warnx(__VA_ARGS__); abort(); } while (0)
73 #endif
74 
75 #if defined(DIAGNOSTIC) || defined(DDB)
76 void	extent_print1(struct extent *, int (*)(const char *, ...)
77 	    __attribute__((__format__(__kprintf__,1,2))));
78 #endif
79 
80 static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
81 	    struct extent_region *, struct extent_region *);
82 static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
83 static	void extent_free_region_descriptor(struct extent *,
84 	    struct extent_region *);
85 int	extent_do_alloc(struct extent *, u_long, u_long, u_long, u_long,
86 	    u_long, u_long, int, struct extent_region *, u_long *);
87 
88 /*
89  * Shortcut to align to an arbitrary power-of-two boundary.
90  */
91 static __inline__ u_long
92 extent_align(u_long start, u_long align, u_long skew)
93 {
94 	return ((((start - skew) + (align - 1)) & (-align)) + skew);
95 }
96 
97 
98 #if defined(DIAGNOSTIC) || defined(DDB)
99 /*
100  * Register the extent on a doubly linked list.
101  * Should work, no?
102  */
103 static LIST_HEAD(listhead, extent) ext_list;
104 static	void extent_register(struct extent *);
105 
106 static void
107 extent_register(struct extent *ex)
108 {
109 #ifdef DIAGNOSTIC
110 	struct extent *ep;
111 #endif
112 	static int initialized;
113 
114 	if (!initialized){
115 		LIST_INIT(&ext_list);
116 		initialized = 1;
117 	}
118 
119 #ifdef DIAGNOSTIC
120 	LIST_FOREACH(ep, &ext_list, ex_link) {
121 		if (ep == ex)
122 			panic("%s: already registered", __func__);
123 	}
124 #endif
125 
126 	/* Insert into list */
127 	LIST_INSERT_HEAD(&ext_list, ex, ex_link);
128 }
129 #endif	/* DIAGNOSTIC || DDB */
130 
131 struct pool ex_region_pl;
132 
133 static void
134 extent_pool_init(void)
135 {
136 	static int inited;
137 
138 	if (!inited) {
139 		pool_init(&ex_region_pl, sizeof(struct extent_region), 0,
140 		    IPL_VM, 0, "extentpl", NULL);
141 		inited = 1;
142 	}
143 }
144 
145 #ifdef DDB
146 /*
147  * Print out all extents registered.  This is used in
148  * DDB show extents
149  */
150 void
151 extent_print_all(void)
152 {
153 	struct extent *ep;
154 
155 	LIST_FOREACH(ep, &ext_list, ex_link) {
156 		extent_print1(ep, db_printf);
157 	}
158 }
159 #endif
160 
161 /*
162  * Allocate and initialize an extent map.
163  */
164 struct extent *
165 extent_create(char *name, u_long start, u_long end, int mtype, caddr_t storage,
166     size_t storagesize, int flags)
167 {
168 	struct extent *ex;
169 	caddr_t cp = storage;
170 	size_t sz = storagesize;
171 	struct extent_region *rp;
172 	int fixed_extent = (storage != NULL);
173 
174 #ifdef DIAGNOSTIC
175 	/* Check arguments. */
176 	if (name == NULL)
177 		panic("%s: name == NULL", __func__);
178 	if (end < start) {
179 		printf("%s: extent `%s', start 0x%lx, end 0x%lx\n",
180 		    __func__, name, start, end);
181 		panic("%s: end < start", __func__);
182 	}
183 	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
184 		panic("%s: fixed extent, bad storagesize 0x%lx",
185 		    __func__, (u_long)storagesize);
186 	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
187 		panic("%s: storage provided for non-fixed", __func__);
188 #endif
189 
190 	extent_pool_init();
191 
192 	/* Allocate extent descriptor. */
193 	if (fixed_extent) {
194 		struct extent_fixed *fex;
195 
196 		memset(storage, 0, storagesize);
197 
198 		/*
199 		 * Align all descriptors on "long" boundaries.
200 		 */
201 		fex = (struct extent_fixed *)cp;
202 		ex = (struct extent *)fex;
203 		cp += ALIGN(sizeof(struct extent_fixed));
204 		sz -= ALIGN(sizeof(struct extent_fixed));
205 		fex->fex_storage = storage;
206 		fex->fex_storagesize = storagesize;
207 
208 		/*
209 		 * In a fixed extent, we have to pre-allocate region
210 		 * descriptors and place them in the extent's freelist.
211 		 */
212 		LIST_INIT(&fex->fex_freelist);
213 		while (sz >= ALIGN(sizeof(struct extent_region))) {
214 			rp = (struct extent_region *)cp;
215 			cp += ALIGN(sizeof(struct extent_region));
216 			sz -= ALIGN(sizeof(struct extent_region));
217 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
218 		}
219 	} else {
220 		ex = (struct extent *)malloc(sizeof(struct extent),
221 		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
222 		if (ex == NULL)
223 			return (NULL);
224 	}
225 
226 	/* Fill in the extent descriptor and return it to the caller. */
227 	LIST_INIT(&ex->ex_regions);
228 	ex->ex_name = name;
229 	ex->ex_start = start;
230 	ex->ex_end = end;
231 	ex->ex_mtype = mtype;
232 	ex->ex_flags = 0;
233 	if (fixed_extent)
234 		ex->ex_flags |= EXF_FIXED;
235 	if (flags & EX_NOCOALESCE)
236 		ex->ex_flags |= EXF_NOCOALESCE;
237 
238 	if (flags & EX_FILLED) {
239 		rp = extent_alloc_region_descriptor(ex, flags);
240 		if (rp == NULL) {
241 			if (!fixed_extent)
242 				free(ex, mtype, sizeof(struct extent));
243 			return (NULL);
244 		}
245 		rp->er_start = start;
246 		rp->er_end = end;
247 		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
248 	}
249 
250 #if defined(DIAGNOSTIC) || defined(DDB)
251 	extent_register(ex);
252 #endif
253 	return (ex);
254 }
255 
256 /*
257  * Destroy an extent map.
258  */
259 void
260 extent_destroy(struct extent *ex)
261 {
262 	struct extent_region *rp, *orp;
263 
264 #ifdef DIAGNOSTIC
265 	/* Check arguments. */
266 	if (ex == NULL)
267 		panic("%s: NULL extent", __func__);
268 #endif
269 
270 	/* Free all region descriptors in extent. */
271 	for (rp = LIST_FIRST(&ex->ex_regions); rp != NULL; ) {
272 		orp = rp;
273 		rp = LIST_NEXT(rp, er_link);
274 		LIST_REMOVE(orp, er_link);
275 		extent_free_region_descriptor(ex, orp);
276 	}
277 
278 #if defined(DIAGNOSTIC) || defined(DDB)
279 	/* Remove from the list of all extents. */
280 	LIST_REMOVE(ex, ex_link);
281 #endif
282 
283 	/* If we're not a fixed extent, free the extent descriptor itself. */
284 	if ((ex->ex_flags & EXF_FIXED) == 0)
285 		free(ex, ex->ex_mtype, sizeof(*ex));
286 }
287 
288 /*
289  * Insert a region descriptor into the sorted region list after the
290  * entry "after" or at the head of the list (if "after" is NULL).
291  * The region descriptor we insert is passed in "rp".  We must
292  * allocate the region descriptor before calling this function!
293  * If we don't need the region descriptor, it will be freed here.
294  */
295 static void
296 extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
297     struct extent_region *after, struct extent_region *rp)
298 {
299 	struct extent_region *nextr;
300 	int appended = 0;
301 
302 	if (after == NULL) {
303 		/*
304 		 * We're the first in the region list.  If there's
305 		 * a region after us, attempt to coalesce to save
306 		 * descriptor overhead.
307 		 */
308 		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
309 		    !LIST_EMPTY(&ex->ex_regions) &&
310 		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
311 			/*
312 			 * We can coalesce.  Prepend us to the first region.
313 			 */
314 			LIST_FIRST(&ex->ex_regions)->er_start = start;
315 			extent_free_region_descriptor(ex, rp);
316 			return;
317 		}
318 
319 		/*
320 		 * Can't coalesce.  Fill in the region descriptor
321 		 * in, and insert us at the head of the region list.
322 		 */
323 		rp->er_start = start;
324 		rp->er_end = start + (size - 1);
325 		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
326 		return;
327 	}
328 
329 	/*
330 	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
331 	 */
332 	if (ex->ex_flags & EXF_NOCOALESCE)
333 		goto cant_coalesce;
334 
335 	/*
336 	 * Attempt to coalesce with the region before us.
337 	 */
338 	if ((after->er_end + 1) == start) {
339 		/*
340 		 * We can coalesce.  Append ourselves and make
341 		 * note of it.
342 		 */
343 		after->er_end = start + (size - 1);
344 		appended = 1;
345 	}
346 
347 	/*
348 	 * Attempt to coalesce with the region after us.
349 	 */
350 	if (LIST_NEXT(after, er_link) != NULL &&
351 	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
352 		/*
353 		 * We can coalesce.  Note that if we appended ourselves
354 		 * to the previous region, we exactly fit the gap, and
355 		 * can free the "next" region descriptor.
356 		 */
357 		if (appended) {
358 			/*
359 			 * Yup, we can free it up.
360 			 */
361 			after->er_end = LIST_NEXT(after, er_link)->er_end;
362 			nextr = LIST_NEXT(after, er_link);
363 			LIST_REMOVE(nextr, er_link);
364 			extent_free_region_descriptor(ex, nextr);
365 		} else {
366 			/*
367 			 * Nope, just prepend us to the next region.
368 			 */
369 			LIST_NEXT(after, er_link)->er_start = start;
370 		}
371 
372 		extent_free_region_descriptor(ex, rp);
373 		return;
374 	}
375 
376 	/*
377 	 * We weren't able to coalesce with the next region, but
378 	 * we don't need to allocate a region descriptor if we
379 	 * appended ourselves to the previous region.
380 	 */
381 	if (appended) {
382 		extent_free_region_descriptor(ex, rp);
383 		return;
384 	}
385 
386  cant_coalesce:
387 
388 	/*
389 	 * Fill in the region descriptor and insert ourselves
390 	 * into the region list.
391 	 */
392 	rp->er_start = start;
393 	rp->er_end = start + (size - 1);
394 	LIST_INSERT_AFTER(after, rp, er_link);
395 }
396 
397 /*
398  * Allocate a specific region in an extent map.
399  */
400 int
401 extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
402 {
403 	struct extent_region *rp, *last, *myrp;
404 	u_long end = start + (size - 1);
405 	int error;
406 
407 #ifdef DIAGNOSTIC
408 	/* Check arguments. */
409 	if (ex == NULL)
410 		panic("%s: NULL extent", __func__);
411 	if (size < 1) {
412 		printf("%s: extent `%s', size 0x%lx\n",
413 		    __func__, ex->ex_name, size);
414 		panic("%s: bad size", __func__);
415 	}
416 	if (end < start) {
417 		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
418 		    __func__, ex->ex_name, start, size);
419 		panic("%s: overflow", __func__);
420 	}
421 	if ((flags & EX_CONFLICTOK) && (flags & EX_WAITSPACE))
422 		panic("%s: EX_CONFLICTOK and EX_WAITSPACE "
423 		    "are mutually exclusive", __func__);
424 #endif
425 
426 	/*
427 	 * Make sure the requested region lies within the
428 	 * extent.
429 	 */
430 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
431 #ifdef DIAGNOSTIC
432 		printf("%s: extent `%s' (0x%lx - 0x%lx)\n",
433 		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
434 		printf("%s: start 0x%lx, end 0x%lx\n",
435 		    __func__, start, end);
436 		panic("%s: region lies outside extent", __func__);
437 #else
438 		return (EINVAL);
439 #endif
440 	}
441 
442 	/*
443 	 * Allocate the region descriptor.  It will be freed later
444 	 * if we can coalesce with another region.
445 	 */
446 	myrp = extent_alloc_region_descriptor(ex, flags);
447 	if (myrp == NULL) {
448 #ifdef DIAGNOSTIC
449 		printf(
450 		    "%s: can't allocate region descriptor\n", __func__);
451 #endif
452 		return (ENOMEM);
453 	}
454 
455  alloc_start:
456 	/*
457 	 * Attempt to place ourselves in the desired area of the
458 	 * extent.  We save ourselves some work by keeping the list sorted.
459 	 * In other words, if the start of the current region is greater
460 	 * than the end of our region, we don't have to search any further.
461 	 */
462 
463 	/*
464 	 * Keep a pointer to the last region we looked at so
465 	 * that we don't have to traverse the list again when
466 	 * we insert ourselves.  If "last" is NULL when we
467 	 * finally insert ourselves, we go at the head of the
468 	 * list.  See extent_insert_and_optimize() for details.
469 	 */
470 	last = NULL;
471 
472 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
473 		if (rp->er_start > end) {
474 			/*
475 			 * We lie before this region and don't
476 			 * conflict.
477 			 */
478 			break;
479 		}
480 
481 		/*
482 		 * The current region begins before we end.
483 		 * Check for a conflict.
484 		 */
485 		if (rp->er_end >= start) {
486 			/*
487 			 * We conflict.  If we can (and want to) wait,
488 			 * do so.
489 			 */
490 			if (flags & EX_WAITSPACE) {
491 				ex->ex_flags |= EXF_WANTED;
492 				error = tsleep_nsec(ex,
493 				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
494 				    "extnt", INFSLP);
495 				if (error)
496 					return (error);
497 				goto alloc_start;
498 			}
499 
500 			/*
501 			 * If we tolerate conflicts adjust things such
502 			 * that all space in the requested region is
503 			 * allocated.
504 			 */
505 			if (flags & EX_CONFLICTOK) {
506 				/*
507 				 * There are four possibilities:
508 				 *
509 				 * 1. The current region overlaps with
510 				 *    the start of the requested region.
511 				 *    Adjust the requested region to
512 				 *    start at the end of the current
513 				 *    region and try again.
514 				 *
515 				 * 2. The current region falls
516 				 *    completely within the requested
517 				 *    region.  Free the current region
518 				 *    and try again.
519 				 *
520 				 * 3. The current region overlaps with
521 				 *    the end of the requested region.
522 				 *    Adjust the requested region to
523 				 *    end at the start of the current
524 				 *    region and try again.
525 				 *
526 				 * 4. The requested region falls
527 				 *    completely within the current
528 				 *    region.  We're done.
529 				 */
530 				if (rp->er_start <= start) {
531 					if (rp->er_end < ex->ex_end) {
532 						start = rp->er_end + 1;
533 						size = end - start + 1;
534 						goto alloc_start;
535 					}
536 				} else if (rp->er_end < end) {
537 					LIST_REMOVE(rp, er_link);
538 					extent_free_region_descriptor(ex, rp);
539 					goto alloc_start;
540 				} else if (rp->er_start < end) {
541 					if (rp->er_start > ex->ex_start) {
542 						end = rp->er_start - 1;
543 						size = end - start + 1;
544 						goto alloc_start;
545 					}
546 				}
547 				return (0);
548 			}
549 
550 			extent_free_region_descriptor(ex, myrp);
551 			return (EAGAIN);
552 		}
553 		/*
554 		 * We don't conflict, but this region lies before
555 		 * us.  Keep a pointer to this region, and keep
556 		 * trying.
557 		 */
558 		last = rp;
559 	}
560 
561 	/*
562 	 * We don't conflict with any regions.  "last" points
563 	 * to the region we fall after, or is NULL if we belong
564 	 * at the beginning of the region list.  Insert ourselves.
565 	 */
566 	extent_insert_and_optimize(ex, start, size, last, myrp);
567 	return (0);
568 }
569 
570 /*
571  * Macro to check (x + y) <= z.  This check is designed to fail
572  * if an overflow occurs.
573  */
574 #define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
575 
576 /*
577  * Allocate a region in an extent map subregion.
578  *
579  * If EX_FAST is specified, we return the first fit in the map.
580  * Otherwise, we try to minimize fragmentation by finding the
581  * smallest gap that will hold the request.
582  *
583  * The allocated region is aligned to "alignment", which must be
584  * a power of 2.
585  */
586 int
587 extent_do_alloc(struct extent *ex, u_long substart, u_long subend,
588     u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
589     struct extent_region *myrp, u_long *result)
590 {
591 	struct extent_region *rp, *last, *bestlast;
592 	u_long newstart, newend, exend, beststart, bestovh, ovh;
593 	u_long dontcross;
594 	int error;
595 
596 #ifdef DIAGNOSTIC
597 	/* Check arguments. */
598 	if (ex == NULL)
599 		panic("%s: NULL extent", __func__);
600 	if (myrp == NULL)
601 		panic("%s: NULL region descriptor", __func__);
602 	if (result == NULL)
603 		panic("%s: NULL result pointer", __func__);
604 	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
605 	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
606 		printf("%s: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
607 		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
608 		printf("%s: substart 0x%lx, subend 0x%lx\n",
609 		    __func__, substart, subend);
610 		panic("%s: bad subregion", __func__);
611 	}
612 	if ((size < 1) || ((size - 1) > (subend - substart))) {
613 		printf("%s: extent `%s', size 0x%lx\n",
614 		    __func__, ex->ex_name, size);
615 		panic("%s: bad size", __func__);
616 	}
617 	if (alignment == 0)
618 		panic("%s: bad alignment", __func__);
619 	if (boundary && (boundary < size)) {
620 		printf("%s: extent `%s', size 0x%lx, boundary 0x%lx\n",
621 		    __func__, ex->ex_name, size, boundary);
622 		panic("%s: bad boundary", __func__);
623 	}
624 #endif
625 
626  alloc_start:
627 	/*
628 	 * Keep a pointer to the last region we looked at so
629 	 * that we don't have to traverse the list again when
630 	 * we insert ourselves.  If "last" is NULL when we
631 	 * finally insert ourselves, we go at the head of the
632 	 * list.  See extent_insert_and_optimize() for details.
633 	 */
634 	last = NULL;
635 
636 	/*
637 	 * Keep track of size and location of the smallest
638 	 * chunk we fit in.
639 	 *
640 	 * Since the extent can be as large as the numeric range
641 	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
642 	 * best overhead value can be the maximum unsigned integer.
643 	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
644 	 * into the region list immediately on an exact match (which
645 	 * is the only case where "bestovh" would be set to 0).
646 	 */
647 	bestovh = 0;
648 	beststart = 0;
649 	bestlast = NULL;
650 
651 	/*
652 	 * Keep track of end of free region.  This is either the end of extent
653 	 * or the start of a region past the subend.
654 	 */
655 	exend = ex->ex_end;
656 
657 	/*
658 	 * For N allocated regions, we must make (N + 1)
659 	 * checks for unallocated space.  The first chunk we
660 	 * check is the area from the beginning of the subregion
661 	 * to the first allocated region after that point.
662 	 */
663 	newstart = extent_align(substart, alignment, skew);
664 	if (newstart < ex->ex_start) {
665 #ifdef DIAGNOSTIC
666 		printf("%s: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
667 		    __func__, ex->ex_name, ex->ex_start, ex->ex_end,
668 		    alignment);
669 		panic("%s: overflow after alignment", __func__);
670 #else
671 		extent_free_region_descriptor(ex, myrp);
672 		return (EINVAL);
673 #endif
674 	}
675 
676 	/*
677 	 * Find the first allocated region that begins on or after
678 	 * the subregion start, advancing the "last" pointer along
679 	 * the way.
680 	 */
681 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
682 		if (rp->er_start >= newstart)
683 			break;
684 		last = rp;
685 	}
686 
687 	/*
688 	 * Relocate the start of our candidate region to the end of
689 	 * the last allocated region (if there was one overlapping
690 	 * our subrange).
691 	 */
692 	if (last != NULL && last->er_end >= newstart)
693 		newstart = extent_align((last->er_end + 1), alignment, skew);
694 
695 	for (; rp != NULL; rp = LIST_NEXT(rp, er_link)) {
696 		/*
697 		 * If the region pasts the subend, bail out and see
698 		 * if we fit against the subend.
699 		 */
700 		if (rp->er_start > subend) {
701 			exend = rp->er_start;
702 			break;
703 		}
704 
705 		/*
706 		 * Check the chunk before "rp".  Note that our
707 		 * comparison is safe from overflow conditions.
708 		 */
709 		if (LE_OV(newstart, size, rp->er_start)) {
710 			/*
711 			 * Do a boundary check, if necessary.  Note
712 			 * that a region may *begin* on the boundary,
713 			 * but it must end before the boundary.
714 			 */
715 			if (boundary) {
716 				newend = newstart + (size - 1);
717 
718 				/*
719 				 * Calculate the next boundary after the start
720 				 * of this region.
721 				 */
722 				dontcross = extent_align(newstart+1, boundary,
723 				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
724 				    - 1;
725 
726 #if 0
727 				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
728 				    newstart, newend, ex->ex_start, ex->ex_end,
729 				    boundary, dontcross);
730 #endif
731 
732 				/* Check for overflow */
733 				if (dontcross < ex->ex_start)
734 					dontcross = ex->ex_end;
735 				else if (newend > dontcross) {
736 					/*
737 					 * Candidate region crosses boundary.
738 					 * Throw away the leading part and see
739 					 * if we still fit.
740 					 */
741 					newstart = dontcross + 1;
742 					newend = newstart + (size - 1);
743 					dontcross += boundary;
744 					if (!LE_OV(newstart, size, rp->er_start))
745 						goto skip;
746 				}
747 
748 				/*
749 				 * If we run past the end of
750 				 * the extent or the boundary
751 				 * overflows, then the request
752 				 * can't fit.
753 				 */
754 				if (newstart + size - 1 > ex->ex_end ||
755 				    dontcross < newstart)
756 					goto fail;
757 			}
758 
759 			/*
760 			 * We would fit into this space.  Calculate
761 			 * the overhead (wasted space).  If we exactly
762 			 * fit, or we're taking the first fit, insert
763 			 * ourselves into the region list.
764 			 */
765 			ovh = rp->er_start - newstart - size;
766 			if ((flags & EX_FAST) || (ovh == 0))
767 				goto found;
768 
769 			/*
770 			 * Don't exactly fit, but check to see
771 			 * if we're better than any current choice.
772 			 */
773 			if ((bestovh == 0) || (ovh < bestovh)) {
774 				bestovh = ovh;
775 				beststart = newstart;
776 				bestlast = last;
777 			}
778 		}
779 
780 skip:
781 		/*
782 		 * Skip past the current region and check again.
783 		 */
784 		newstart = extent_align((rp->er_end + 1), alignment, skew);
785 		if (newstart < rp->er_end) {
786 			/*
787 			 * Overflow condition.  Don't error out, since
788 			 * we might have a chunk of space that we can
789 			 * use.
790 			 */
791 			goto fail;
792 		}
793 
794 		last = rp;
795 	}
796 
797 	/*
798 	 * The final check is from the current starting point to the
799 	 * end of the subregion.  If there were no allocated regions,
800 	 * "newstart" is set to the beginning of the subregion, or
801 	 * just past the end of the last allocated region, adjusted
802 	 * for alignment in either case.
803 	 */
804 	if (LE_OV(newstart, (size - 1), subend)) {
805 		/*
806 		 * Do a boundary check, if necessary.  Note
807 		 * that a region may *begin* on the boundary,
808 		 * but it must end before the boundary.
809 		 */
810 		if (boundary) {
811 			newend = newstart + (size - 1);
812 
813 			/*
814 			 * Calculate the next boundary after the start
815 			 * of this region.
816 			 */
817 			dontcross = extent_align(newstart+1, boundary,
818 			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
819 			    - 1;
820 
821 #if 0
822 			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
823 			    newstart, newend, ex->ex_start, ex->ex_end,
824 			    boundary, dontcross);
825 #endif
826 
827 			/* Check for overflow */
828 			if (dontcross < ex->ex_start)
829 				dontcross = ex->ex_end;
830 			else if (newend > dontcross) {
831 				/*
832 				 * Candidate region crosses boundary.
833 				 * Throw away the leading part and see
834 				 * if we still fit.
835 				 */
836 				newstart = dontcross + 1;
837 				newend = newstart + (size - 1);
838 				dontcross += boundary;
839 				if (!LE_OV(newstart, (size - 1), subend))
840 					goto fail;
841 			}
842 
843 			/*
844 			 * If we run past the end of
845 			 * the extent or the boundary
846 			 * overflows, then the request
847 			 * can't fit.
848 			 */
849 			if (newstart + size - 1 > ex->ex_end ||
850 			    dontcross < newstart)
851 				goto fail;
852 		}
853 
854 		/*
855 		 * We would fit into this space.  Calculate
856 		 * the overhead (wasted space).  If we exactly
857 		 * fit, or we're taking the first fit, insert
858 		 * ourselves into the region list.
859 		 */
860 		ovh = exend - newstart - (size - 1);
861 		if ((flags & EX_FAST) || (ovh == 0))
862 			goto found;
863 
864 		/*
865 		 * Don't exactly fit, but check to see
866 		 * if we're better than any current choice.
867 		 */
868 		if ((bestovh == 0) || (ovh < bestovh)) {
869 			bestovh = ovh;
870 			beststart = newstart;
871 			bestlast = last;
872 		}
873 	}
874 
875  fail:
876 	/*
877 	 * One of the following two conditions have
878 	 * occurred:
879 	 *
880 	 *	There is no chunk large enough to hold the request.
881 	 *
882 	 *	If EX_FAST was not specified, there is not an
883 	 *	exact match for the request.
884 	 *
885 	 * Note that if we reach this point and EX_FAST is
886 	 * set, then we know there is no space in the extent for
887 	 * the request.
888 	 */
889 	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
890 		/*
891 		 * We have a match that's "good enough".
892 		 */
893 		newstart = beststart;
894 		last = bestlast;
895 		goto found;
896 	}
897 
898 	/*
899 	 * No space currently available.  Wait for it to free up,
900 	 * if possible.
901 	 */
902 	if (flags & EX_WAITSPACE) {
903 		ex->ex_flags |= EXF_WANTED;
904 		error = tsleep_nsec(ex,
905 		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
906 		    "extnt", INFSLP);
907 		if (error)
908 			return (error);
909 		goto alloc_start;
910 	}
911 
912 	extent_free_region_descriptor(ex, myrp);
913 	return (EAGAIN);
914 
915  found:
916 	/*
917 	 * Insert ourselves into the region list.
918 	 */
919 	extent_insert_and_optimize(ex, newstart, size, last, myrp);
920 	*result = newstart;
921 	return (0);
922 }
923 
924 int
925 extent_alloc_subregion(struct extent *ex, u_long substart, u_long subend,
926     u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
927     u_long *result)
928 {
929 	struct extent_region *rp;
930 
931 	/*
932 	 * Allocate the region descriptor.  It will be freed later
933 	 * if we can coalesce with another region.
934 	 */
935 	rp = extent_alloc_region_descriptor(ex, flags);
936 	if (rp == NULL) {
937 #ifdef DIAGNOSTIC
938 		printf("%s: can't allocate region descriptor\n", __func__);
939 #endif
940 		return (ENOMEM);
941 	}
942 
943 	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
944 	    boundary, flags, rp, result);
945 }
946 
947 int
948 extent_alloc_subregion_with_descr(struct extent *ex, u_long substart,
949     u_long subend, u_long size, u_long alignment, u_long skew,
950     u_long boundary, int flags, struct extent_region *rp, u_long *result)
951 {
952 #ifdef DIAGNOSTIC
953 	if ((ex->ex_flags & EXF_NOCOALESCE) == 0)
954 		panic("%s: EX_NOCOALESCE not set", __func__);
955 #endif
956 
957 	rp->er_flags = ER_DISCARD;
958 	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
959 	    boundary, flags, rp, result);
960 }
961 
962 int
963 extent_free(struct extent *ex, u_long start, u_long size, int flags)
964 {
965 	struct extent_region *rp, *nrp = NULL;
966 	struct extent_region *tmp;
967 	u_long end = start + (size - 1);
968 	int exflags;
969 	int error = 0;
970 
971 #ifdef DIAGNOSTIC
972 	/* Check arguments. */
973 	if (ex == NULL)
974 		panic("%s: NULL extent", __func__);
975 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
976 		extent_print(ex);
977 		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
978 		    __func__, ex->ex_name, start, size);
979 		panic("%s: extent `%s', region not within extent",
980 		    __func__, ex->ex_name);
981 	}
982 	/* Check for an overflow. */
983 	if (end < start) {
984 		extent_print(ex);
985 		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
986 		    __func__, ex->ex_name, start, size);
987 		panic("%s: overflow", __func__);
988 	}
989 #endif
990 
991 	/*
992 	 * If we're allowing coalescing, we must allocate a region
993 	 * descriptor now, since it might block.
994 	 *
995 	 * XXX Make a static, create-time flags word, so we don't
996 	 * XXX have to lock to read it!
997 	 */
998 	exflags = ex->ex_flags;
999 
1000 	if ((exflags & EXF_NOCOALESCE) == 0) {
1001 		/* Allocate a region descriptor. */
1002 		nrp = extent_alloc_region_descriptor(ex, flags);
1003 		if (nrp == NULL)
1004 			return (ENOMEM);
1005 	}
1006 
1007 	/*
1008 	 * Find region and deallocate.  Several possibilities:
1009 	 *
1010 	 *	1. (start == er_start) && (end == er_end):
1011 	 *	   Free descriptor.
1012 	 *
1013 	 *	2. (start == er_start) && (end < er_end):
1014 	 *	   Adjust er_start.
1015 	 *
1016 	 *	3. (start > er_start) && (end == er_end):
1017 	 *	   Adjust er_end.
1018 	 *
1019 	 *	4. (start > er_start) && (end < er_end):
1020 	 *	   Fragment region.  Requires descriptor alloc.
1021 	 *
1022 	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
1023 	 * is not set.
1024 	 *
1025 	 * If the EX_CONFLICTOK flag is set, partially overlapping
1026 	 * regions are allowed.  This is handled in cases 1a, 2a and
1027 	 * 3a below.
1028 	 */
1029 	LIST_FOREACH_SAFE(rp, &ex->ex_regions, er_link, tmp) {
1030 		/*
1031 		 * Save ourselves some comparisons; does the current
1032 		 * region end before chunk to be freed begins?  If so,
1033 		 * then we haven't found the appropriate region descriptor.
1034 		 */
1035 		if (rp->er_end < start)
1036 			continue;
1037 
1038 		/*
1039 		 * Save ourselves some traversal; does the current
1040 		 * region begin after the chunk to be freed ends?  If so,
1041 		 * then we've already passed any possible region descriptors
1042 		 * that might have contained the chunk to be freed.
1043 		 */
1044 		if (rp->er_start > end)
1045 			break;
1046 
1047 		/* Case 1. */
1048 		if ((start == rp->er_start) && (end == rp->er_end)) {
1049 			LIST_REMOVE(rp, er_link);
1050 			extent_free_region_descriptor(ex, rp);
1051 			goto done;
1052 		}
1053 
1054 		/*
1055 		 * The following cases all require that EXF_NOCOALESCE
1056 		 * is not set.
1057 		 */
1058 		if (ex->ex_flags & EXF_NOCOALESCE)
1059 			continue;
1060 
1061 		/* Case 2. */
1062 		if ((start == rp->er_start) && (end < rp->er_end)) {
1063 			rp->er_start = (end + 1);
1064 			goto done;
1065 		}
1066 
1067 		/* Case 3. */
1068 		if ((start > rp->er_start) && (end == rp->er_end)) {
1069 			rp->er_end = (start - 1);
1070 			goto done;
1071 		}
1072 
1073 		/* Case 4. */
1074 		if ((start > rp->er_start) && (end < rp->er_end)) {
1075 			/* Fill in new descriptor. */
1076 			nrp->er_start = end + 1;
1077 			nrp->er_end = rp->er_end;
1078 
1079 			/* Adjust current descriptor. */
1080 			rp->er_end = start - 1;
1081 
1082 			/* Insert new descriptor after current. */
1083 			LIST_INSERT_AFTER(rp, nrp, er_link);
1084 
1085 			/* We used the new descriptor, so don't free it below */
1086 			nrp = NULL;
1087 			goto done;
1088 		}
1089 
1090 		if ((flags & EX_CONFLICTOK) == 0)
1091 			continue;
1092 
1093 		/* Case 1a. */
1094 		if ((start <= rp->er_start && end >= rp->er_end)) {
1095 			LIST_REMOVE(rp, er_link);
1096 			extent_free_region_descriptor(ex, rp);
1097 			continue;
1098 		}
1099 
1100 		/* Case 2a. */
1101 		if ((start <= rp->er_start) && (end >= rp->er_start))
1102 			rp->er_start = (end + 1);
1103 
1104 		/* Case 3a. */
1105 		if ((start <= rp->er_end) && (end >= rp->er_end))
1106 			rp->er_end = (start - 1);
1107 	}
1108 
1109 	if (flags & EX_CONFLICTOK)
1110 		goto done;
1111 
1112 	/* Region not found, or request otherwise invalid. */
1113 #if defined(DIAGNOSTIC) || defined(DDB)
1114 	extent_print(ex);
1115 #endif
1116 	printf("%s: start 0x%lx, end 0x%lx\n", __func__, start, end);
1117 	panic("%s: region not found", __func__);
1118 
1119  done:
1120 	if (nrp != NULL)
1121 		extent_free_region_descriptor(ex, nrp);
1122 	if (ex->ex_flags & EXF_WANTED) {
1123 		ex->ex_flags &= ~EXF_WANTED;
1124 		wakeup(ex);
1125 	}
1126 	return (error);
1127 }
1128 
1129 static struct extent_region *
1130 extent_alloc_region_descriptor(struct extent *ex, int flags)
1131 {
1132 	struct extent_region *rp;
1133 
1134 	if (ex->ex_flags & EXF_FIXED) {
1135 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1136 
1137 		while (LIST_EMPTY(&fex->fex_freelist)) {
1138 			if (flags & EX_MALLOCOK)
1139 				goto alloc;
1140 
1141 			if ((flags & EX_WAITOK) == 0)
1142 				return (NULL);
1143 			ex->ex_flags |= EXF_FLWANTED;
1144 			if (tsleep_nsec(&fex->fex_freelist,
1145 			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1146 			    "extnt", INFSLP))
1147 				return (NULL);
1148 		}
1149 		rp = LIST_FIRST(&fex->fex_freelist);
1150 		LIST_REMOVE(rp, er_link);
1151 
1152 		/*
1153 		 * Don't muck with flags after pulling it off the
1154 		 * freelist; it may be a dynamically allocated
1155 		 * region pointer that was kindly given to us,
1156 		 * and we need to preserve that information.
1157 		 */
1158 
1159 		return (rp);
1160 	}
1161 
1162  alloc:
1163 	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK :
1164 	    PR_NOWAIT);
1165 	if (rp != NULL)
1166 		rp->er_flags = ER_ALLOC;
1167 
1168 	return (rp);
1169 }
1170 
1171 static void
1172 extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
1173 {
1174 	if (rp->er_flags & ER_DISCARD)
1175 		return;
1176 
1177 	if (ex->ex_flags & EXF_FIXED) {
1178 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1179 
1180 		/*
1181 		 * If someone's waiting for a region descriptor,
1182 		 * be nice and give them this one, rather than
1183 		 * just free'ing it back to the system.
1184 		 */
1185 		if (rp->er_flags & ER_ALLOC) {
1186 			if (ex->ex_flags & EXF_FLWANTED) {
1187 				/* Clear all but ER_ALLOC flag. */
1188 				rp->er_flags = ER_ALLOC;
1189 				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1190 				    er_link);
1191 				goto wake_em_up;
1192 			} else {
1193 				pool_put(&ex_region_pl, rp);
1194 			}
1195 		} else {
1196 			/* Clear all flags. */
1197 			rp->er_flags = 0;
1198 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1199 		}
1200 
1201 		if (ex->ex_flags & EXF_FLWANTED) {
1202  wake_em_up:
1203 			ex->ex_flags &= ~EXF_FLWANTED;
1204 			wakeup(&fex->fex_freelist);
1205 		}
1206 		return;
1207 	}
1208 
1209 	/*
1210 	 * We know it's dynamically allocated if we get here.
1211 	 */
1212 	pool_put(&ex_region_pl, rp);
1213 }
1214 
1215 
1216 #if defined(DIAGNOSTIC) || defined(DDB) || !defined(_KERNEL)
1217 
1218 void
1219 extent_print(struct extent *ex)
1220 {
1221 	extent_print1(ex, printf);
1222 }
1223 
1224 void
1225 extent_print1(struct extent *ex,
1226     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1227 {
1228 	struct extent_region *rp;
1229 
1230 	if (ex == NULL)
1231 		panic("%s: NULL extent", __func__);
1232 
1233 #ifdef _KERNEL
1234 	(*pr)("extent `%s' (0x%lx - 0x%lx), flags=%b\n", ex->ex_name,
1235 	    ex->ex_start, ex->ex_end, ex->ex_flags, EXF_BITS);
1236 #else
1237 	(*pr)("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1238 	    ex->ex_start, ex->ex_end, ex->ex_flags);
1239 #endif
1240 
1241 	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1242 		(*pr)("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1243 }
1244 #endif
1245