xref: /openbsd-src/sys/kern/subr_extent.c (revision 98f5fe29df64c678cff6cef1723c98f7754f5583)
1 /*	$OpenBSD: subr_extent.c,v 1.16 2001/07/05 10:00:45 art Exp $	*/
2 /*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3 
4 /*-
5  * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe and Matthias Drochner.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * General purpose extent manager.
42  */
43 
44 #ifdef _KERNEL
45 #include <sys/param.h>
46 #include <sys/extent.h>
47 #include <sys/malloc.h>
48 #include <sys/time.h>
49 #include <sys/systm.h>
50 #include <sys/proc.h>
51 #include <sys/queue.h>
52 #include <ddb/db_output.h>
53 #else
54 /*
55  * user-land definitions, so it can fit into a testing harness.
56  */
57 #include <sys/param.h>
58 #include <sys/extent.h>
59 #include <sys/queue.h>
60 #include <errno.h>
61 #include <stdlib.h>
62 #include <stdio.h>
63 
64 #define	malloc(s, t, flags)		malloc(s)
65 #define	free(p, t)			free(p)
66 #define	tsleep(chan, pri, str, timo)	(EWOULDBLOCK)
67 #define	wakeup(chan)			((void)0)
68 #define db_printf printf
69 #endif
70 
71 static	void extent_insert_and_optimize __P((struct extent *, u_long, u_long,
72 	    int, struct extent_region *, struct extent_region *));
73 static	struct extent_region *extent_alloc_region_descriptor
74 	    __P((struct extent *, int));
75 static	void extent_free_region_descriptor __P((struct extent *,
76 	    struct extent_region *));
77 static	void extent_register __P((struct extent *));
78 
79 /*
80  * Macro to align to an arbitrary power-of-two boundary.
81  */
82 #define EXTENT_ALIGN(_start, _align, _skew)		\
83 	(((((_start) - (_skew)) + ((_align) - 1)) & (-(_align))) + (_skew))
84 
85 
86 /*
87  * Register the extent on a doubly linked list.
88  * Should work, no?
89  */
90 static LIST_HEAD(listhead, extent) ext_list;
91 static struct listhead *ext_listp;
92 
93 static void
94 extent_register(ex)
95 	struct extent *ex;
96 {
97 	/* Is this redundant? */
98 	if (ext_listp == NULL){
99 		LIST_INIT(&ext_list);
100 		ext_listp = &ext_list;
101 	}
102 
103 	/* Insert into list */
104 	LIST_INSERT_HEAD(ext_listp, ex, ex_link);
105 }
106 
107 /*
108  * Find a given extent, and return a pointer to
109  * it so that other extent functions can be used
110  * on it.
111  *
112  * Returns NULL on failure.
113  */
114 struct extent *
115 extent_find(name)
116 	char *name;
117 {
118 	struct extent *ep;
119 
120 	for(ep = ext_listp->lh_first; ep != NULL; ep = ep->ex_link.le_next){
121 		if (!strcmp(ep->ex_name, name))
122 			return(ep);
123 	}
124 
125 	return(NULL);
126 }
127 
128 #ifdef DDB
129 /*
130  * Print out all extents registered.  This is used in
131  * DDB show extents
132  */
133 void
134 extent_print_all(void)
135 {
136 	struct extent *ep;
137 
138 	for(ep = ext_listp->lh_first; ep != NULL; ep = ep->ex_link.le_next){
139 		extent_print(ep);
140 	}
141 }
142 #endif
143 
144 /*
145  * Allocate and initialize an extent map.
146  */
147 struct extent *
148 extent_create(name, start, end, mtype, storage, storagesize, flags)
149 	char *name;
150 	u_long start, end;
151 	int mtype;
152 	caddr_t storage;
153 	size_t storagesize;
154 	int flags;
155 {
156 	struct extent *ex;
157 	caddr_t cp = storage;
158 	size_t sz = storagesize;
159 	struct extent_region *rp;
160 	int fixed_extent = (storage != NULL);
161 
162 #ifdef DIAGNOSTIC
163 	/* Check arguments. */
164 	if (name == NULL)
165 		panic("extent_create: name == NULL");
166 	if (end < start) {
167 		printf("extent_create: extent `%s', start 0x%lx, end 0x%lx\n",
168 		    name, start, end);
169 		panic("extent_create: end < start");
170 	}
171 	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
172 		panic("extent_create: fixed extent, bad storagesize 0x%x",
173 		    storagesize);
174 	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
175 		panic("extent_create: storage provided for non-fixed");
176 #endif
177 
178 	/* Allocate extent descriptor. */
179 	if (fixed_extent) {
180 		struct extent_fixed *fex;
181 
182 		bzero(storage, storagesize);
183 
184 		/*
185 		 * Align all descriptors on "long" boundaries.
186 		 */
187 		fex = (struct extent_fixed *)cp;
188 		ex = (struct extent *)fex;
189 		cp += ALIGN(sizeof(struct extent_fixed));
190 		sz -= ALIGN(sizeof(struct extent_fixed));
191 		fex->fex_storage = storage;
192 		fex->fex_storagesize = storagesize;
193 
194 		/*
195 		 * In a fixed extent, we have to pre-allocate region
196 		 * descriptors and place them in the extent's freelist.
197 		 */
198 		LIST_INIT(&fex->fex_freelist);
199 		while (sz >= ALIGN(sizeof(struct extent_region))) {
200 			rp = (struct extent_region *)cp;
201 			cp += ALIGN(sizeof(struct extent_region));
202 			sz -= ALIGN(sizeof(struct extent_region));
203 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
204 		}
205 	} else {
206 		ex = (struct extent *)malloc(sizeof(struct extent),
207 		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
208 		if (ex == NULL)
209 			return (NULL);
210 	}
211 
212 	/* Fill in the extent descriptor and return it to the caller. */
213 	LIST_INIT(&ex->ex_regions);
214 	ex->ex_name = name;
215 	ex->ex_start = start;
216 	ex->ex_end = end;
217 	ex->ex_mtype = mtype;
218 	ex->ex_flags = 0;
219 	if (fixed_extent)
220 		ex->ex_flags |= EXF_FIXED;
221 	if (flags & EX_NOCOALESCE)
222 		ex->ex_flags |= EXF_NOCOALESCE;
223 
224 	extent_register(ex);
225 	return (ex);
226 }
227 
228 /*
229  * Destroy an extent map.
230  */
231 void
232 extent_destroy(ex)
233 	struct extent *ex;
234 {
235 	struct extent_region *rp, *orp;
236 
237 #ifdef DIAGNOSTIC
238 	/* Check arguments. */
239 	if (ex == NULL)
240 		panic("extent_destroy: NULL extent");
241 #endif
242 
243 	/* Free all region descriptors in extent. */
244 	for (rp = ex->ex_regions.lh_first; rp != NULL; ) {
245 		orp = rp;
246 		rp = rp->er_link.le_next;
247 		LIST_REMOVE(orp, er_link);
248 		extent_free_region_descriptor(ex, orp);
249 	}
250 
251 	/* If we're not a fixed extent, free the extent descriptor itself. */
252 	if ((ex->ex_flags & EXF_FIXED) == 0)
253 		free(ex, ex->ex_mtype);
254 }
255 
256 /*
257  * Insert a region descriptor into the sorted region list after the
258  * entry "after" or at the head of the list (if "after" is NULL).
259  * The region descriptor we insert is passed in "rp".  We must
260  * allocate the region descriptor before calling this function!
261  * If we don't need the region descriptor, it will be freed here.
262  */
263 static void
264 extent_insert_and_optimize(ex, start, size, flags, after, rp)
265 	struct extent *ex;
266 	u_long start, size;
267 	int flags;
268 	struct extent_region *after, *rp;
269 {
270 	struct extent_region *nextr;
271 	int appended = 0;
272 
273 	if (after == NULL) {
274 		/*
275 		 * We're the first in the region list.  If there's
276 		 * a region after us, attempt to coalesce to save
277 		 * descriptor overhead.
278 		 */
279 		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
280 		    (ex->ex_regions.lh_first != NULL) &&
281 		    ((start + size) == ex->ex_regions.lh_first->er_start)) {
282 			/*
283 			 * We can coalesce.  Prepend us to the first region.
284 			 */
285 			ex->ex_regions.lh_first->er_start = start;
286 			extent_free_region_descriptor(ex, rp);
287 			return;
288 		}
289 
290 		/*
291 		 * Can't coalesce.  Fill in the region descriptor
292 		 * in, and insert us at the head of the region list.
293 		 */
294 		rp->er_start = start;
295 		rp->er_end = start + (size - 1);
296 		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
297 		return;
298 	}
299 
300 	/*
301 	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
302 	 */
303 	if (ex->ex_flags & EXF_NOCOALESCE)
304 		goto cant_coalesce;
305 
306 	/*
307 	 * Attempt to coalesce with the region before us.
308 	 */
309 	if ((after->er_end + 1) == start) {
310 		/*
311 		 * We can coalesce.  Append ourselves and make
312 		 * note of it.
313 		 */
314 		after->er_end = start + (size - 1);
315 		appended = 1;
316 	}
317 
318 	/*
319 	 * Attempt to coalesce with the region after us.
320 	 */
321 	if ((after->er_link.le_next != NULL) &&
322 	    ((start + size) == after->er_link.le_next->er_start)) {
323 		/*
324 		 * We can coalesce.  Note that if we appended ourselves
325 		 * to the previous region, we exactly fit the gap, and
326 		 * can free the "next" region descriptor.
327 		 */
328 		if (appended) {
329 			/*
330 			 * Yup, we can free it up.
331 			 */
332 			after->er_end = after->er_link.le_next->er_end;
333 			nextr = after->er_link.le_next;
334 			LIST_REMOVE(nextr, er_link);
335 			extent_free_region_descriptor(ex, nextr);
336 		} else {
337 			/*
338 			 * Nope, just prepend us to the next region.
339 			 */
340 			after->er_link.le_next->er_start = start;
341 		}
342 
343 		extent_free_region_descriptor(ex, rp);
344 		return;
345 	}
346 
347 	/*
348 	 * We weren't able to coalesce with the next region, but
349 	 * we don't need to allocate a region descriptor if we
350 	 * appended ourselves to the previous region.
351 	 */
352 	if (appended) {
353 		extent_free_region_descriptor(ex, rp);
354 		return;
355 	}
356 
357  cant_coalesce:
358 
359 	/*
360 	 * Fill in the region descriptor and insert ourselves
361 	 * into the region list.
362 	 */
363 	rp->er_start = start;
364 	rp->er_end = start + (size - 1);
365 	LIST_INSERT_AFTER(after, rp, er_link);
366 }
367 
368 /*
369  * Allocate a specific region in an extent map.
370  */
371 int
372 extent_alloc_region(ex, start, size, flags)
373 	struct extent *ex;
374 	u_long start, size;
375 	int flags;
376 {
377 	struct extent_region *rp, *last, *myrp;
378 	u_long end = start + (size - 1);
379 	int error;
380 
381 #ifdef DIAGNOSTIC
382 	/* Check arguments. */
383 	if (ex == NULL)
384 		panic("extent_alloc_region: NULL extent");
385 	if (size < 1) {
386 		printf("extent_alloc_region: extent `%s', size 0x%lx\n",
387 		    ex->ex_name, size);
388 		panic("extent_alloc_region: bad size");
389 	}
390 	if (end < start) {
391 		printf(
392 		 "extent_alloc_region: extent `%s', start 0x%lx, size 0x%lx\n",
393 		 ex->ex_name, start, size);
394 		panic("extent_alloc_region: overflow");
395 	}
396 #endif
397 
398 	/*
399 	 * Make sure the requested region lies within the
400 	 * extent.
401 	 */
402 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
403 #ifdef DIAGNOSTIC
404 		printf("extent_alloc_region: extent `%s' (0x%lx - 0x%lx)\n",
405 		    ex->ex_name, ex->ex_start, ex->ex_end);
406 		printf("extent_alloc_region: start 0x%lx, end 0x%lx\n",
407 		    start, end);
408 		panic("extent_alloc_region: region lies outside extent");
409 #else
410 		return (EINVAL);
411 #endif
412 	}
413 
414 	/*
415 	 * Allocate the region descriptor.  It will be freed later
416 	 * if we can coalesce with another region.
417 	 */
418 	myrp = extent_alloc_region_descriptor(ex, flags);
419 	if (myrp == NULL) {
420 #ifdef DIAGNOSTIC
421 		printf(
422 		    "extent_alloc_region: can't allocate region descriptor\n");
423 #endif
424 		return (ENOMEM);
425 	}
426 
427  alloc_start:
428 	/*
429 	 * Attempt to place ourselves in the desired area of the
430 	 * extent.  We save ourselves some work by keeping the list sorted.
431 	 * In other words, if the start of the current region is greater
432 	 * than the end of our region, we don't have to search any further.
433 	 */
434 
435 	/*
436 	 * Keep a pointer to the last region we looked at so
437 	 * that we don't have to traverse the list again when
438 	 * we insert ourselves.  If "last" is NULL when we
439 	 * finally insert ourselves, we go at the head of the
440 	 * list.  See extent_insert_and_optimize() for details.
441 	 */
442 	last = NULL;
443 
444 	for (rp = ex->ex_regions.lh_first; rp != NULL;
445 	    rp = rp->er_link.le_next) {
446 		if (rp->er_start > end) {
447 			/*
448 			 * We lie before this region and don't
449 			 * conflict.
450 			 */
451 			break;
452 		}
453 
454 		/*
455 		 * The current region begins before we end.
456 		 * Check for a conflict.
457 		 */
458 		if (rp->er_end >= start) {
459 			/*
460 			 * We conflict.  If we can (and want to) wait,
461 			 * do so.
462 			 */
463 			if (flags & EX_WAITSPACE) {
464 				ex->ex_flags |= EXF_WANTED;
465 				error = tsleep(ex,
466 				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
467 				    "extnt", 0);
468 				if (error)
469 					return (error);
470 				goto alloc_start;
471 			}
472 			extent_free_region_descriptor(ex, myrp);
473 			return (EAGAIN);
474 		}
475 		/*
476 		 * We don't conflict, but this region lies before
477 		 * us.  Keep a pointer to this region, and keep
478 		 * trying.
479 		 */
480 		last = rp;
481 	}
482 
483 	/*
484 	 * We don't conflict with any regions.  "last" points
485 	 * to the region we fall after, or is NULL if we belong
486 	 * at the beginning of the region list.  Insert ourselves.
487 	 */
488 	extent_insert_and_optimize(ex, start, size, flags, last, myrp);
489 	return (0);
490 }
491 
492 /*
493  * Macro to check (x + y) <= z.  This check is designed to fail
494  * if an overflow occurs.
495  */
496 #define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
497 
498 /*
499  * Allocate a region in an extent map subregion.
500  *
501  * If EX_FAST is specified, we return the first fit in the map.
502  * Otherwise, we try to minimize fragmentation by finding the
503  * smallest gap that will hold the request.
504  *
505  * The allocated region is aligned to "alignment", which must be
506  * a power of 2.
507  */
508 int
509 extent_alloc_subregion(ex, substart, subend, size, alignment, skew, boundary,
510     flags, result)
511 	struct extent *ex;
512 	u_long substart, subend, size, alignment, skew, boundary;
513 	int flags;
514 	u_long *result;
515 {
516 	struct extent_region *rp, *myrp, *last, *bestlast;
517 	u_long newstart, newend, beststart, bestovh, ovh;
518 	u_long dontcross;
519 	int error;
520 
521 #ifdef DIAGNOSTIC
522 	/* Check arguments. */
523 	if (ex == NULL)
524 		panic("extent_alloc_subregion: NULL extent");
525 	if (result == NULL)
526 		panic("extent_alloc_subregion: NULL result pointer");
527 	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
528 	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
529   printf("extent_alloc_subregion: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
530 		    ex->ex_name, ex->ex_start, ex->ex_end);
531 		printf("extent_alloc_subregion: substart 0x%lx, subend 0x%lx\n",
532 		    substart, subend);
533 		panic("extent_alloc_subregion: bad subregion");
534 	}
535 	if ((size < 1) || ((size - 1) > (subend - substart))) {
536 		printf("extent_alloc_subregion: extent `%s', size 0x%lx\n",
537 		    ex->ex_name, size);
538 		panic("extent_alloc_subregion: bad size");
539 	}
540 	if (alignment == 0)
541 		panic("extent_alloc_subregion: bad alignment");
542 	if (boundary && (boundary < size)) {
543 		printf(
544 		    "extent_alloc_subregion: extent `%s', size 0x%lx, "
545 		    "boundary 0x%lx\n", ex->ex_name, size, boundary);
546 		panic("extent_alloc_subregion: bad boundary");
547 	}
548 #endif
549 
550 	/*
551 	 * Allocate the region descriptor.  It will be freed later
552 	 * if we can coalesce with another region.
553 	 */
554 	myrp = extent_alloc_region_descriptor(ex, flags);
555 	if (myrp == NULL) {
556 #ifdef DIAGNOSTIC
557 		printf(
558 		 "extent_alloc_subregion: can't allocate region descriptor\n");
559 #endif
560 		return (ENOMEM);
561 	}
562 
563  alloc_start:
564 	/*
565 	 * Keep a pointer to the last region we looked at so
566 	 * that we don't have to traverse the list again when
567 	 * we insert ourselves.  If "last" is NULL when we
568 	 * finally insert ourselves, we go at the head of the
569 	 * list.  See extent_insert_and_optimize() for deatails.
570 	 */
571 	last = NULL;
572 
573 	/*
574 	 * Keep track of size and location of the smallest
575 	 * chunk we fit in.
576 	 *
577 	 * Since the extent can be as large as the numeric range
578 	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
579 	 * best overhead value can be the maximum unsigned integer.
580 	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
581 	 * into the region list immediately on an exact match (which
582 	 * is the only case where "bestovh" would be set to 0).
583 	 */
584 	bestovh = 0;
585 	beststart = 0;
586 	bestlast = NULL;
587 
588 	/*
589 	 * For N allocated regions, we must make (N + 1)
590 	 * checks for unallocated space.  The first chunk we
591 	 * check is the area from the beginning of the subregion
592 	 * to the first allocated region after that point.
593 	 */
594 	newstart = EXTENT_ALIGN(substart, alignment, skew);
595 	if (newstart < ex->ex_start) {
596 #ifdef DIAGNOSTIC
597 		printf(
598       "extent_alloc_subregion: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
599 		 ex->ex_name, ex->ex_start, ex->ex_end, alignment);
600 		panic("extent_alloc_subregion: overflow after alignment");
601 #else
602 		extent_free_region_descriptor(ex, myrp);
603 		return (EINVAL);
604 #endif
605 	}
606 
607 	/*
608 	 * Find the first allocated region that begins on or after
609 	 * the subregion start, advancing the "last" pointer along
610 	 * the way.
611 	 */
612 	for (rp = ex->ex_regions.lh_first; rp != NULL;
613 	     rp = rp->er_link.le_next) {
614 		if (rp->er_start >= newstart)
615 			break;
616 		last = rp;
617 	}
618 
619 	/*
620 	 * Relocate the start of our candidate region to the end of
621 	 * the last allocated region (if there was one overlapping
622 	 * our subrange).
623 	 */
624 	if (last != NULL && last->er_end >= newstart)
625 		newstart = EXTENT_ALIGN((last->er_end + 1), alignment, skew);
626 
627 	for (; rp != NULL; rp = rp->er_link.le_next) {
628 		/*
629 		 * Check the chunk before "rp".  Note that our
630 		 * comparison is safe from overflow conditions.
631 		 */
632 		if (LE_OV(newstart, size, rp->er_start)) {
633 			/*
634 			 * Do a boundary check, if necessary.  Note
635 			 * that a region may *begin* on the boundary,
636 			 * but it must end before the boundary.
637 			 */
638 			if (boundary) {
639 				newend = newstart + (size - 1);
640 
641 				/*
642 				 * Calculate the next boundary after the start
643 				 * of this region.
644 				 */
645 				dontcross = EXTENT_ALIGN(newstart+1, boundary,
646 				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
647 				    - 1;
648 
649 #if 0
650 				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
651 				    newstart, newend, ex->ex_start, ex->ex_end,
652 				    boundary, dontcross);
653 #endif
654 
655 				/* Check for overflow */
656 				if (dontcross < ex->ex_start)
657 					dontcross = ex->ex_end;
658 				else if (newend > dontcross) {
659 					/*
660 					 * Candidate region crosses boundary.
661 					 * Throw away the leading part and see
662 					 * if we still fit.
663 					 */
664 					newstart = dontcross + 1;
665 					newend = newstart + (size - 1);
666 					dontcross += boundary;
667 					if (!LE_OV(newstart, size, rp->er_start))
668 						continue;
669 				}
670 
671 				/*
672 				 * If we run past the end of
673 				 * the extent or the boundary
674 				 * overflows, then the request
675 				 * can't fit.
676 				 */
677 				if (newstart + size - 1 > ex->ex_end ||
678 				    dontcross < newstart)
679 					goto fail;
680 			}
681 
682 			/*
683 			 * We would fit into this space.  Calculate
684 			 * the overhead (wasted space).  If we exactly
685 			 * fit, or we're taking the first fit, insert
686 			 * ourselves into the region list.
687 			 */
688 			ovh = rp->er_start - newstart - size;
689 			if ((flags & EX_FAST) || (ovh == 0))
690 				goto found;
691 
692 			/*
693 			 * Don't exactly fit, but check to see
694 			 * if we're better than any current choice.
695 			 */
696 			if ((bestovh == 0) || (ovh < bestovh)) {
697 				bestovh = ovh;
698 				beststart = newstart;
699 				bestlast = last;
700 			}
701 		}
702 
703 		/*
704 		 * Skip past the current region and check again.
705 		 */
706 		newstart = EXTENT_ALIGN((rp->er_end + 1), alignment, skew);
707 		if (newstart < rp->er_end) {
708 			/*
709 			 * Overflow condition.  Don't error out, since
710 			 * we might have a chunk of space that we can
711 			 * use.
712 			 */
713 			goto fail;
714 		}
715 
716 		/*
717 		 * Check that the current region don't run past the
718 		 * end of the subregion.
719 		 */
720 		if (!LE_OV(newstart, (size - 1), subend))
721 			goto fail;
722 
723 		last = rp;
724 	}
725 
726 	/*
727 	 * The final check is from the current starting point to the
728 	 * end of the subregion.  If there were no allocated regions,
729 	 * "newstart" is set to the beginning of the subregion, or
730 	 * just past the end of the last allocated region, adjusted
731 	 * for alignment in either case.
732 	 */
733 	if (LE_OV(newstart, (size - 1), subend)) {
734 		/*
735 		 * Do a boundary check, if necessary.  Note
736 		 * that a region may *begin* on the boundary,
737 		 * but it must end before the boundary.
738 		 */
739 		if (boundary) {
740 			newend = newstart + (size - 1);
741 
742 			/*
743 			 * Calculate the next boundary after the start
744 			 * of this region.
745 			 */
746 			dontcross = EXTENT_ALIGN(newstart+1, boundary,
747 			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
748 			    - 1;
749 
750 #if 0
751 			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
752 			    newstart, newend, ex->ex_start, ex->ex_end,
753 			    boundary, dontcross);
754 #endif
755 
756 			/* Check for overflow */
757 			if (dontcross < ex->ex_start)
758 				dontcross = ex->ex_end;
759 			else if (newend > dontcross) {
760 				/*
761 				 * Candidate region crosses boundary.
762 				 * Throw away the leading part and see
763 				 * if we still fit.
764 				 */
765 				newstart = dontcross + 1;
766 				newend = newstart + (size - 1);
767 				dontcross += boundary;
768 				if (!LE_OV(newstart, (size - 1), subend))
769 					goto fail;
770 			}
771 
772 			/*
773 			 * If we run past the end of
774 			 * the extent or the boundary
775 			 * overflows, then the request
776 			 * can't fit.
777 			 */
778 			if (newstart + size - 1 > ex->ex_end ||
779 			    dontcross < newstart)
780 				goto fail;
781 		}
782 
783 		/*
784 		 * We would fit into this space.  Calculate
785 		 * the overhead (wasted space).  If we exactly
786 		 * fit, or we're taking the first fit, insert
787 		 * ourselves into the region list.
788 		 */
789 		ovh = ex->ex_end - newstart - (size - 1);
790 		if ((flags & EX_FAST) || (ovh == 0))
791 			goto found;
792 
793 		/*
794 		 * Don't exactly fit, but check to see
795 		 * if we're better than any current choice.
796 		 */
797 		if ((bestovh == 0) || (ovh < bestovh)) {
798 			bestovh = ovh;
799 			beststart = newstart;
800 			bestlast = last;
801 		}
802 	}
803 
804  fail:
805 	/*
806 	 * One of the following two conditions have
807 	 * occurred:
808 	 *
809 	 *	There is no chunk large enough to hold the request.
810 	 *
811 	 *	If EX_FAST was not specified, there is not an
812 	 *	exact match for the request.
813 	 *
814 	 * Note that if we reach this point and EX_FAST is
815 	 * set, then we know there is no space in the extent for
816 	 * the request.
817 	 */
818 	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
819 		/*
820 		 * We have a match that's "good enough".
821 		 */
822 		newstart = beststart;
823 		last = bestlast;
824 		goto found;
825 	}
826 
827 	/*
828 	 * No space currently available.  Wait for it to free up,
829 	 * if possible.
830 	 */
831 	if (flags & EX_WAITSPACE) {
832 		ex->ex_flags |= EXF_WANTED;
833 		error = tsleep(ex,
834 		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), "extnt", 0);
835 		if (error)
836 			return (error);
837 		goto alloc_start;
838 	}
839 
840 	extent_free_region_descriptor(ex, myrp);
841 	return (EAGAIN);
842 
843  found:
844 	/*
845 	 * Insert ourselves into the region list.
846 	 */
847 	extent_insert_and_optimize(ex, newstart, size, flags, last, myrp);
848 	*result = newstart;
849 	return (0);
850 }
851 
852 int
853 extent_free(ex, start, size, flags)
854 	struct extent *ex;
855 	u_long start, size;
856 	int flags;
857 {
858 	struct extent_region *rp, *nrp = NULL;
859 	u_long end = start + (size - 1);
860 	int exflags;
861 
862 #ifdef DIAGNOSTIC
863 	/* Check arguments. */
864 	if (ex == NULL)
865 		panic("extent_free: NULL extent");
866 	if ((start < ex->ex_start) || (start > ex->ex_end)) {
867 		extent_print(ex);
868 		printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
869 		    ex->ex_name, start, size);
870 		panic("extent_free: extent `%s', region not within extent",
871 		    ex->ex_name);
872 	}
873 	/* Check for an overflow. */
874 	if (end < start) {
875 		extent_print(ex);
876 		printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
877 		    ex->ex_name, start, size);
878 		panic("extent_free: overflow");
879 	}
880 #endif
881 
882 	/*
883 	 * If we're allowing coalescing, we must allocate a region
884 	 * descriptor now, since it might block.
885 	 *
886 	 * XXX Make a static, create-time flags word, so we don't
887 	 * XXX have to lock to read it!
888 	 */
889 	exflags = ex->ex_flags;
890 
891 	if ((exflags & EXF_NOCOALESCE) == 0) {
892 		/* Allocate a region descriptor. */
893 		nrp = extent_alloc_region_descriptor(ex, flags);
894 		if (nrp == NULL)
895 			return (ENOMEM);
896 	}
897 
898 	/*
899 	 * Find region and deallocate.  Several possibilities:
900 	 *
901 	 *	1. (start == er_start) && (end == er_end):
902 	 *	   Free descriptor.
903 	 *
904 	 *	2. (start == er_start) && (end < er_end):
905 	 *	   Adjust er_start.
906 	 *
907 	 *	3. (start > er_start) && (end == er_end):
908 	 *	   Adjust er_end.
909 	 *
910 	 *	4. (start > er_start) && (end < er_end):
911 	 *	   Fragment region.  Requires descriptor alloc.
912 	 *
913 	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
914 	 * is not set.
915 	 */
916 	for (rp = ex->ex_regions.lh_first; rp != NULL;
917 	    rp = rp->er_link.le_next) {
918 		/*
919 		 * Save ourselves some comparisons; does the current
920 		 * region end before chunk to be freed begins?  If so,
921 		 * then we haven't found the appropriate region descriptor.
922 		 */
923 		if (rp->er_end < start)
924 			continue;
925 
926 		/*
927 		 * Save ourselves some traversal; does the current
928 		 * region begin after the chunk to be freed ends?  If so,
929 		 * then we've already passed any possible region descriptors
930 		 * that might have contained the chunk to be freed.
931 		 */
932 		if (rp->er_start > end)
933 			break;
934 
935 		/* Case 1. */
936 		if ((start == rp->er_start) && (end == rp->er_end)) {
937 			LIST_REMOVE(rp, er_link);
938 			extent_free_region_descriptor(ex, rp);
939 			goto done;
940 		}
941 
942 		/*
943 		 * The following cases all require that EXF_NOCOALESCE
944 		 * is not set.
945 		 */
946 		if (ex->ex_flags & EXF_NOCOALESCE)
947 			continue;
948 
949 		/* Case 2. */
950 		if ((start == rp->er_start) && (end < rp->er_end)) {
951 			rp->er_start = (end + 1);
952 			goto done;
953 		}
954 
955 		/* Case 3. */
956 		if ((start > rp->er_start) && (end == rp->er_end)) {
957 			rp->er_end = (start - 1);
958 			goto done;
959 		}
960 
961 		/* Case 4. */
962 		if ((start > rp->er_start) && (end < rp->er_end)) {
963 			/* Fill in new descriptor. */
964 			nrp->er_start = end + 1;
965 			nrp->er_end = rp->er_end;
966 
967 			/* Adjust current descriptor. */
968 			rp->er_end = start - 1;
969 
970 			/* Insert new descriptor after current. */
971 			LIST_INSERT_AFTER(rp, nrp, er_link);
972 
973 			/* We used the new descriptor, so don't free it below */
974 			nrp = NULL;
975 			goto done;
976 		}
977 	}
978 
979 	/* Region not found, or request otherwise invalid. */
980 #if defined(DIAGNOSTIC) || defined(DDB)
981 	extent_print(ex);
982 #endif
983 	printf("extent_free: start 0x%lx, end 0x%lx\n", start, end);
984 	panic("extent_free: region not found");
985 
986  done:
987 	if (nrp != NULL)
988 		extent_free_region_descriptor(ex, nrp);
989 	if (ex->ex_flags & EXF_WANTED) {
990 		ex->ex_flags &= ~EXF_WANTED;
991 		wakeup(ex);
992 	}
993 	return (0);
994 }
995 
996 static struct extent_region *
997 extent_alloc_region_descriptor(ex, flags)
998 	struct extent *ex;
999 	int flags;
1000 {
1001 	struct extent_region *rp;
1002 
1003 	if (ex->ex_flags & EXF_FIXED) {
1004 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1005 
1006 		while (fex->fex_freelist.lh_first == NULL) {
1007 			if (flags & EX_MALLOCOK)
1008 				goto alloc;
1009 
1010 			if ((flags & EX_WAITOK) == 0)
1011 				return (NULL);
1012 			ex->ex_flags |= EXF_FLWANTED;
1013 			if (tsleep(&fex->fex_freelist,
1014 			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1015 			    "extnt", 0))
1016 				return (NULL);
1017 		}
1018 		rp = fex->fex_freelist.lh_first;
1019 		LIST_REMOVE(rp, er_link);
1020 
1021 		/*
1022 		 * Don't muck with flags after pulling it off the
1023 		 * freelist; it may be a dynamiclly allocated
1024 		 * region pointer that was kindly given to us,
1025 		 * and we need to preserve that information.
1026 		 */
1027 
1028 		return (rp);
1029 	}
1030 
1031  alloc:
1032 	rp = (struct extent_region *)
1033 	    malloc(sizeof(struct extent_region), ex->ex_mtype,
1034 	    (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
1035 
1036 	if (rp != NULL)
1037 		rp->er_flags = ER_ALLOC;
1038 
1039 	return (rp);
1040 }
1041 
1042 static void
1043 extent_free_region_descriptor(ex, rp)
1044 	struct extent *ex;
1045 	struct extent_region *rp;
1046 {
1047 
1048 	if (ex->ex_flags & EXF_FIXED) {
1049 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1050 
1051 		/*
1052 		 * If someone's waiting for a region descriptor,
1053 		 * be nice and give them this one, rather than
1054 		 * just free'ing it back to the system.
1055 		 */
1056 		if (rp->er_flags & ER_ALLOC) {
1057 			if (ex->ex_flags & EXF_FLWANTED) {
1058 				/* Clear all but ER_ALLOC flag. */
1059 				rp->er_flags = ER_ALLOC;
1060 				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1061 				    er_link);
1062 				goto wake_em_up;
1063 			} else {
1064 				free(rp, ex->ex_mtype);
1065 			}
1066 		} else {
1067 			/* Clear all flags. */
1068 			rp->er_flags = 0;
1069 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1070 		}
1071 
1072 		if (ex->ex_flags & EXF_FLWANTED) {
1073  wake_em_up:
1074 			ex->ex_flags &= ~EXF_FLWANTED;
1075 			wakeup(&fex->fex_freelist);
1076 		}
1077 		return;
1078 	}
1079 
1080 	/*
1081 	 * We know it's dynamically allocated if we get here.
1082 	 */
1083 	free(rp, ex->ex_mtype);
1084 }
1085 
1086 #ifndef DDB
1087 #define db_printf printf
1088 #endif
1089 
1090 #if defined(DIAGNOSTIC) || defined(DDB)
1091 void
1092 extent_print(ex)
1093 	struct extent *ex;
1094 {
1095 	struct extent_region *rp;
1096 
1097 	if (ex == NULL)
1098 		panic("extent_print: NULL extent");
1099 
1100 	db_printf("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1101 	    ex->ex_start, ex->ex_end, ex->ex_flags);
1102 
1103 	for (rp = ex->ex_regions.lh_first; rp != NULL;
1104 	    rp = rp->er_link.le_next)
1105 		db_printf("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1106 }
1107 #endif
1108