xref: /openbsd-src/sys/kern/subr_extent.c (revision ae1f2cca0e5df8ba8e1c01ec84cb1c994a827afa)
1 /*	$OpenBSD: subr_extent.c,v 1.17 2001/07/26 14:34:43 art Exp $	*/
2 /*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3 
4 /*-
5  * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe and Matthias Drochner.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * General purpose extent manager.
42  */
43 
44 #ifdef _KERNEL
45 #include <sys/param.h>
46 #include <sys/extent.h>
47 #include <sys/malloc.h>
48 #include <sys/time.h>
49 #include <sys/systm.h>
50 #include <sys/proc.h>
51 #include <sys/queue.h>
52 #include <ddb/db_output.h>
53 #else
54 /*
55  * user-land definitions, so it can fit into a testing harness.
56  */
57 #include <sys/param.h>
58 #include <sys/extent.h>
59 #include <sys/queue.h>
60 #include <errno.h>
61 #include <stdlib.h>
62 #include <stdio.h>
63 
64 #define	malloc(s, t, flags)		malloc(s)
65 #define	free(p, t)			free(p)
66 #define	tsleep(chan, pri, str, timo)	(EWOULDBLOCK)
67 #define	wakeup(chan)			((void)0)
68 #define db_printf printf
69 #endif
70 
71 static	void extent_insert_and_optimize __P((struct extent *, u_long, u_long,
72 	    int, struct extent_region *, struct extent_region *));
73 static	struct extent_region *extent_alloc_region_descriptor
74 	    __P((struct extent *, int));
75 static	void extent_free_region_descriptor __P((struct extent *,
76 	    struct extent_region *));
77 static	void extent_register __P((struct extent *));
78 
79 /*
80  * Macro to align to an arbitrary power-of-two boundary.
81  */
82 #define EXTENT_ALIGN(_start, _align, _skew)		\
83 	(((((_start) - (_skew)) + ((_align) - 1)) & (-(_align))) + (_skew))
84 
85 
86 /*
87  * Register the extent on a doubly linked list.
88  * Should work, no?
89  */
90 static LIST_HEAD(listhead, extent) ext_list;
91 static struct listhead *ext_listp;
92 
93 static void
94 extent_register(ex)
95 	struct extent *ex;
96 {
97 	/* Is this redundant? */
98 	if (ext_listp == NULL){
99 		LIST_INIT(&ext_list);
100 		ext_listp = &ext_list;
101 	}
102 
103 	/* Insert into list */
104 	LIST_INSERT_HEAD(ext_listp, ex, ex_link);
105 }
106 
107 /*
108  * Find a given extent, and return a pointer to
109  * it so that other extent functions can be used
110  * on it.
111  *
112  * Returns NULL on failure.
113  */
114 struct extent *
115 extent_find(name)
116 	char *name;
117 {
118 	struct extent *ep;
119 
120 	for(ep = ext_listp->lh_first; ep != NULL; ep = ep->ex_link.le_next){
121 		if (!strcmp(ep->ex_name, name))
122 			return(ep);
123 	}
124 
125 	return(NULL);
126 }
127 
128 #ifdef DDB
129 /*
130  * Print out all extents registered.  This is used in
131  * DDB show extents
132  */
133 void
134 extent_print_all(void)
135 {
136 	struct extent *ep;
137 
138 	for(ep = ext_listp->lh_first; ep != NULL; ep = ep->ex_link.le_next){
139 		extent_print(ep);
140 	}
141 }
142 #endif
143 
144 /*
145  * Allocate and initialize an extent map.
146  */
147 struct extent *
148 extent_create(name, start, end, mtype, storage, storagesize, flags)
149 	char *name;
150 	u_long start, end;
151 	int mtype;
152 	caddr_t storage;
153 	size_t storagesize;
154 	int flags;
155 {
156 	struct extent *ex;
157 	caddr_t cp = storage;
158 	size_t sz = storagesize;
159 	struct extent_region *rp;
160 	int fixed_extent = (storage != NULL);
161 
162 #ifdef DIAGNOSTIC
163 	/* Check arguments. */
164 	if (name == NULL)
165 		panic("extent_create: name == NULL");
166 	if (end < start) {
167 		printf("extent_create: extent `%s', start 0x%lx, end 0x%lx\n",
168 		    name, start, end);
169 		panic("extent_create: end < start");
170 	}
171 	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
172 		panic("extent_create: fixed extent, bad storagesize 0x%x",
173 		    storagesize);
174 	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
175 		panic("extent_create: storage provided for non-fixed");
176 #endif
177 
178 	/* Allocate extent descriptor. */
179 	if (fixed_extent) {
180 		struct extent_fixed *fex;
181 
182 		bzero(storage, storagesize);
183 
184 		/*
185 		 * Align all descriptors on "long" boundaries.
186 		 */
187 		fex = (struct extent_fixed *)cp;
188 		ex = (struct extent *)fex;
189 		cp += ALIGN(sizeof(struct extent_fixed));
190 		sz -= ALIGN(sizeof(struct extent_fixed));
191 		fex->fex_storage = storage;
192 		fex->fex_storagesize = storagesize;
193 
194 		/*
195 		 * In a fixed extent, we have to pre-allocate region
196 		 * descriptors and place them in the extent's freelist.
197 		 */
198 		LIST_INIT(&fex->fex_freelist);
199 		while (sz >= ALIGN(sizeof(struct extent_region))) {
200 			rp = (struct extent_region *)cp;
201 			cp += ALIGN(sizeof(struct extent_region));
202 			sz -= ALIGN(sizeof(struct extent_region));
203 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
204 		}
205 	} else {
206 		ex = (struct extent *)malloc(sizeof(struct extent),
207 		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
208 		if (ex == NULL)
209 			return (NULL);
210 	}
211 
212 	/* Fill in the extent descriptor and return it to the caller. */
213 	LIST_INIT(&ex->ex_regions);
214 	ex->ex_name = name;
215 	ex->ex_start = start;
216 	ex->ex_end = end;
217 	ex->ex_mtype = mtype;
218 	ex->ex_flags = 0;
219 	if (fixed_extent)
220 		ex->ex_flags |= EXF_FIXED;
221 	if (flags & EX_NOCOALESCE)
222 		ex->ex_flags |= EXF_NOCOALESCE;
223 
224 	extent_register(ex);
225 	return (ex);
226 }
227 
228 /*
229  * Destroy an extent map.
230  */
231 void
232 extent_destroy(ex)
233 	struct extent *ex;
234 {
235 	struct extent_region *rp, *orp;
236 
237 #ifdef DIAGNOSTIC
238 	/* Check arguments. */
239 	if (ex == NULL)
240 		panic("extent_destroy: NULL extent");
241 #endif
242 
243 	/* Free all region descriptors in extent. */
244 	for (rp = ex->ex_regions.lh_first; rp != NULL; ) {
245 		orp = rp;
246 		rp = rp->er_link.le_next;
247 		LIST_REMOVE(orp, er_link);
248 		extent_free_region_descriptor(ex, orp);
249 	}
250 
251 	/* Remove from the list of all extents. */
252 	LIST_REMOVE(ex, ex_link);
253 
254 	/* If we're not a fixed extent, free the extent descriptor itself. */
255 	if ((ex->ex_flags & EXF_FIXED) == 0)
256 		free(ex, ex->ex_mtype);
257 }
258 
259 /*
260  * Insert a region descriptor into the sorted region list after the
261  * entry "after" or at the head of the list (if "after" is NULL).
262  * The region descriptor we insert is passed in "rp".  We must
263  * allocate the region descriptor before calling this function!
264  * If we don't need the region descriptor, it will be freed here.
265  */
266 static void
267 extent_insert_and_optimize(ex, start, size, flags, after, rp)
268 	struct extent *ex;
269 	u_long start, size;
270 	int flags;
271 	struct extent_region *after, *rp;
272 {
273 	struct extent_region *nextr;
274 	int appended = 0;
275 
276 	if (after == NULL) {
277 		/*
278 		 * We're the first in the region list.  If there's
279 		 * a region after us, attempt to coalesce to save
280 		 * descriptor overhead.
281 		 */
282 		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
283 		    (ex->ex_regions.lh_first != NULL) &&
284 		    ((start + size) == ex->ex_regions.lh_first->er_start)) {
285 			/*
286 			 * We can coalesce.  Prepend us to the first region.
287 			 */
288 			ex->ex_regions.lh_first->er_start = start;
289 			extent_free_region_descriptor(ex, rp);
290 			return;
291 		}
292 
293 		/*
294 		 * Can't coalesce.  Fill in the region descriptor
295 		 * in, and insert us at the head of the region list.
296 		 */
297 		rp->er_start = start;
298 		rp->er_end = start + (size - 1);
299 		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
300 		return;
301 	}
302 
303 	/*
304 	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
305 	 */
306 	if (ex->ex_flags & EXF_NOCOALESCE)
307 		goto cant_coalesce;
308 
309 	/*
310 	 * Attempt to coalesce with the region before us.
311 	 */
312 	if ((after->er_end + 1) == start) {
313 		/*
314 		 * We can coalesce.  Append ourselves and make
315 		 * note of it.
316 		 */
317 		after->er_end = start + (size - 1);
318 		appended = 1;
319 	}
320 
321 	/*
322 	 * Attempt to coalesce with the region after us.
323 	 */
324 	if ((after->er_link.le_next != NULL) &&
325 	    ((start + size) == after->er_link.le_next->er_start)) {
326 		/*
327 		 * We can coalesce.  Note that if we appended ourselves
328 		 * to the previous region, we exactly fit the gap, and
329 		 * can free the "next" region descriptor.
330 		 */
331 		if (appended) {
332 			/*
333 			 * Yup, we can free it up.
334 			 */
335 			after->er_end = after->er_link.le_next->er_end;
336 			nextr = after->er_link.le_next;
337 			LIST_REMOVE(nextr, er_link);
338 			extent_free_region_descriptor(ex, nextr);
339 		} else {
340 			/*
341 			 * Nope, just prepend us to the next region.
342 			 */
343 			after->er_link.le_next->er_start = start;
344 		}
345 
346 		extent_free_region_descriptor(ex, rp);
347 		return;
348 	}
349 
350 	/*
351 	 * We weren't able to coalesce with the next region, but
352 	 * we don't need to allocate a region descriptor if we
353 	 * appended ourselves to the previous region.
354 	 */
355 	if (appended) {
356 		extent_free_region_descriptor(ex, rp);
357 		return;
358 	}
359 
360  cant_coalesce:
361 
362 	/*
363 	 * Fill in the region descriptor and insert ourselves
364 	 * into the region list.
365 	 */
366 	rp->er_start = start;
367 	rp->er_end = start + (size - 1);
368 	LIST_INSERT_AFTER(after, rp, er_link);
369 }
370 
371 /*
372  * Allocate a specific region in an extent map.
373  */
374 int
375 extent_alloc_region(ex, start, size, flags)
376 	struct extent *ex;
377 	u_long start, size;
378 	int flags;
379 {
380 	struct extent_region *rp, *last, *myrp;
381 	u_long end = start + (size - 1);
382 	int error;
383 
384 #ifdef DIAGNOSTIC
385 	/* Check arguments. */
386 	if (ex == NULL)
387 		panic("extent_alloc_region: NULL extent");
388 	if (size < 1) {
389 		printf("extent_alloc_region: extent `%s', size 0x%lx\n",
390 		    ex->ex_name, size);
391 		panic("extent_alloc_region: bad size");
392 	}
393 	if (end < start) {
394 		printf(
395 		 "extent_alloc_region: extent `%s', start 0x%lx, size 0x%lx\n",
396 		 ex->ex_name, start, size);
397 		panic("extent_alloc_region: overflow");
398 	}
399 #endif
400 
401 	/*
402 	 * Make sure the requested region lies within the
403 	 * extent.
404 	 */
405 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
406 #ifdef DIAGNOSTIC
407 		printf("extent_alloc_region: extent `%s' (0x%lx - 0x%lx)\n",
408 		    ex->ex_name, ex->ex_start, ex->ex_end);
409 		printf("extent_alloc_region: start 0x%lx, end 0x%lx\n",
410 		    start, end);
411 		panic("extent_alloc_region: region lies outside extent");
412 #else
413 		return (EINVAL);
414 #endif
415 	}
416 
417 	/*
418 	 * Allocate the region descriptor.  It will be freed later
419 	 * if we can coalesce with another region.
420 	 */
421 	myrp = extent_alloc_region_descriptor(ex, flags);
422 	if (myrp == NULL) {
423 #ifdef DIAGNOSTIC
424 		printf(
425 		    "extent_alloc_region: can't allocate region descriptor\n");
426 #endif
427 		return (ENOMEM);
428 	}
429 
430  alloc_start:
431 	/*
432 	 * Attempt to place ourselves in the desired area of the
433 	 * extent.  We save ourselves some work by keeping the list sorted.
434 	 * In other words, if the start of the current region is greater
435 	 * than the end of our region, we don't have to search any further.
436 	 */
437 
438 	/*
439 	 * Keep a pointer to the last region we looked at so
440 	 * that we don't have to traverse the list again when
441 	 * we insert ourselves.  If "last" is NULL when we
442 	 * finally insert ourselves, we go at the head of the
443 	 * list.  See extent_insert_and_optimize() for details.
444 	 */
445 	last = NULL;
446 
447 	for (rp = ex->ex_regions.lh_first; rp != NULL;
448 	    rp = rp->er_link.le_next) {
449 		if (rp->er_start > end) {
450 			/*
451 			 * We lie before this region and don't
452 			 * conflict.
453 			 */
454 			break;
455 		}
456 
457 		/*
458 		 * The current region begins before we end.
459 		 * Check for a conflict.
460 		 */
461 		if (rp->er_end >= start) {
462 			/*
463 			 * We conflict.  If we can (and want to) wait,
464 			 * do so.
465 			 */
466 			if (flags & EX_WAITSPACE) {
467 				ex->ex_flags |= EXF_WANTED;
468 				error = tsleep(ex,
469 				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
470 				    "extnt", 0);
471 				if (error)
472 					return (error);
473 				goto alloc_start;
474 			}
475 			extent_free_region_descriptor(ex, myrp);
476 			return (EAGAIN);
477 		}
478 		/*
479 		 * We don't conflict, but this region lies before
480 		 * us.  Keep a pointer to this region, and keep
481 		 * trying.
482 		 */
483 		last = rp;
484 	}
485 
486 	/*
487 	 * We don't conflict with any regions.  "last" points
488 	 * to the region we fall after, or is NULL if we belong
489 	 * at the beginning of the region list.  Insert ourselves.
490 	 */
491 	extent_insert_and_optimize(ex, start, size, flags, last, myrp);
492 	return (0);
493 }
494 
495 /*
496  * Macro to check (x + y) <= z.  This check is designed to fail
497  * if an overflow occurs.
498  */
499 #define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
500 
501 /*
502  * Allocate a region in an extent map subregion.
503  *
504  * If EX_FAST is specified, we return the first fit in the map.
505  * Otherwise, we try to minimize fragmentation by finding the
506  * smallest gap that will hold the request.
507  *
508  * The allocated region is aligned to "alignment", which must be
509  * a power of 2.
510  */
511 int
512 extent_alloc_subregion(ex, substart, subend, size, alignment, skew, boundary,
513     flags, result)
514 	struct extent *ex;
515 	u_long substart, subend, size, alignment, skew, boundary;
516 	int flags;
517 	u_long *result;
518 {
519 	struct extent_region *rp, *myrp, *last, *bestlast;
520 	u_long newstart, newend, beststart, bestovh, ovh;
521 	u_long dontcross;
522 	int error;
523 
524 #ifdef DIAGNOSTIC
525 	/* Check arguments. */
526 	if (ex == NULL)
527 		panic("extent_alloc_subregion: NULL extent");
528 	if (result == NULL)
529 		panic("extent_alloc_subregion: NULL result pointer");
530 	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
531 	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
532   printf("extent_alloc_subregion: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
533 		    ex->ex_name, ex->ex_start, ex->ex_end);
534 		printf("extent_alloc_subregion: substart 0x%lx, subend 0x%lx\n",
535 		    substart, subend);
536 		panic("extent_alloc_subregion: bad subregion");
537 	}
538 	if ((size < 1) || ((size - 1) > (subend - substart))) {
539 		printf("extent_alloc_subregion: extent `%s', size 0x%lx\n",
540 		    ex->ex_name, size);
541 		panic("extent_alloc_subregion: bad size");
542 	}
543 	if (alignment == 0)
544 		panic("extent_alloc_subregion: bad alignment");
545 	if (boundary && (boundary < size)) {
546 		printf(
547 		    "extent_alloc_subregion: extent `%s', size 0x%lx, "
548 		    "boundary 0x%lx\n", ex->ex_name, size, boundary);
549 		panic("extent_alloc_subregion: bad boundary");
550 	}
551 #endif
552 
553 	/*
554 	 * Allocate the region descriptor.  It will be freed later
555 	 * if we can coalesce with another region.
556 	 */
557 	myrp = extent_alloc_region_descriptor(ex, flags);
558 	if (myrp == NULL) {
559 #ifdef DIAGNOSTIC
560 		printf(
561 		 "extent_alloc_subregion: can't allocate region descriptor\n");
562 #endif
563 		return (ENOMEM);
564 	}
565 
566  alloc_start:
567 	/*
568 	 * Keep a pointer to the last region we looked at so
569 	 * that we don't have to traverse the list again when
570 	 * we insert ourselves.  If "last" is NULL when we
571 	 * finally insert ourselves, we go at the head of the
572 	 * list.  See extent_insert_and_optimize() for deatails.
573 	 */
574 	last = NULL;
575 
576 	/*
577 	 * Keep track of size and location of the smallest
578 	 * chunk we fit in.
579 	 *
580 	 * Since the extent can be as large as the numeric range
581 	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
582 	 * best overhead value can be the maximum unsigned integer.
583 	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
584 	 * into the region list immediately on an exact match (which
585 	 * is the only case where "bestovh" would be set to 0).
586 	 */
587 	bestovh = 0;
588 	beststart = 0;
589 	bestlast = NULL;
590 
591 	/*
592 	 * For N allocated regions, we must make (N + 1)
593 	 * checks for unallocated space.  The first chunk we
594 	 * check is the area from the beginning of the subregion
595 	 * to the first allocated region after that point.
596 	 */
597 	newstart = EXTENT_ALIGN(substart, alignment, skew);
598 	if (newstart < ex->ex_start) {
599 #ifdef DIAGNOSTIC
600 		printf(
601       "extent_alloc_subregion: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
602 		 ex->ex_name, ex->ex_start, ex->ex_end, alignment);
603 		panic("extent_alloc_subregion: overflow after alignment");
604 #else
605 		extent_free_region_descriptor(ex, myrp);
606 		return (EINVAL);
607 #endif
608 	}
609 
610 	/*
611 	 * Find the first allocated region that begins on or after
612 	 * the subregion start, advancing the "last" pointer along
613 	 * the way.
614 	 */
615 	for (rp = ex->ex_regions.lh_first; rp != NULL;
616 	     rp = rp->er_link.le_next) {
617 		if (rp->er_start >= newstart)
618 			break;
619 		last = rp;
620 	}
621 
622 	/*
623 	 * Relocate the start of our candidate region to the end of
624 	 * the last allocated region (if there was one overlapping
625 	 * our subrange).
626 	 */
627 	if (last != NULL && last->er_end >= newstart)
628 		newstart = EXTENT_ALIGN((last->er_end + 1), alignment, skew);
629 
630 	for (; rp != NULL; rp = rp->er_link.le_next) {
631 		/*
632 		 * Check the chunk before "rp".  Note that our
633 		 * comparison is safe from overflow conditions.
634 		 */
635 		if (LE_OV(newstart, size, rp->er_start)) {
636 			/*
637 			 * Do a boundary check, if necessary.  Note
638 			 * that a region may *begin* on the boundary,
639 			 * but it must end before the boundary.
640 			 */
641 			if (boundary) {
642 				newend = newstart + (size - 1);
643 
644 				/*
645 				 * Calculate the next boundary after the start
646 				 * of this region.
647 				 */
648 				dontcross = EXTENT_ALIGN(newstart+1, boundary,
649 				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
650 				    - 1;
651 
652 #if 0
653 				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
654 				    newstart, newend, ex->ex_start, ex->ex_end,
655 				    boundary, dontcross);
656 #endif
657 
658 				/* Check for overflow */
659 				if (dontcross < ex->ex_start)
660 					dontcross = ex->ex_end;
661 				else if (newend > dontcross) {
662 					/*
663 					 * Candidate region crosses boundary.
664 					 * Throw away the leading part and see
665 					 * if we still fit.
666 					 */
667 					newstart = dontcross + 1;
668 					newend = newstart + (size - 1);
669 					dontcross += boundary;
670 					if (!LE_OV(newstart, size, rp->er_start))
671 						continue;
672 				}
673 
674 				/*
675 				 * If we run past the end of
676 				 * the extent or the boundary
677 				 * overflows, then the request
678 				 * can't fit.
679 				 */
680 				if (newstart + size - 1 > ex->ex_end ||
681 				    dontcross < newstart)
682 					goto fail;
683 			}
684 
685 			/*
686 			 * We would fit into this space.  Calculate
687 			 * the overhead (wasted space).  If we exactly
688 			 * fit, or we're taking the first fit, insert
689 			 * ourselves into the region list.
690 			 */
691 			ovh = rp->er_start - newstart - size;
692 			if ((flags & EX_FAST) || (ovh == 0))
693 				goto found;
694 
695 			/*
696 			 * Don't exactly fit, but check to see
697 			 * if we're better than any current choice.
698 			 */
699 			if ((bestovh == 0) || (ovh < bestovh)) {
700 				bestovh = ovh;
701 				beststart = newstart;
702 				bestlast = last;
703 			}
704 		}
705 
706 		/*
707 		 * Skip past the current region and check again.
708 		 */
709 		newstart = EXTENT_ALIGN((rp->er_end + 1), alignment, skew);
710 		if (newstart < rp->er_end) {
711 			/*
712 			 * Overflow condition.  Don't error out, since
713 			 * we might have a chunk of space that we can
714 			 * use.
715 			 */
716 			goto fail;
717 		}
718 
719 		/*
720 		 * Check that the current region don't run past the
721 		 * end of the subregion.
722 		 */
723 		if (!LE_OV(newstart, (size - 1), subend))
724 			goto fail;
725 
726 		last = rp;
727 	}
728 
729 	/*
730 	 * The final check is from the current starting point to the
731 	 * end of the subregion.  If there were no allocated regions,
732 	 * "newstart" is set to the beginning of the subregion, or
733 	 * just past the end of the last allocated region, adjusted
734 	 * for alignment in either case.
735 	 */
736 	if (LE_OV(newstart, (size - 1), subend)) {
737 		/*
738 		 * Do a boundary check, if necessary.  Note
739 		 * that a region may *begin* on the boundary,
740 		 * but it must end before the boundary.
741 		 */
742 		if (boundary) {
743 			newend = newstart + (size - 1);
744 
745 			/*
746 			 * Calculate the next boundary after the start
747 			 * of this region.
748 			 */
749 			dontcross = EXTENT_ALIGN(newstart+1, boundary,
750 			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
751 			    - 1;
752 
753 #if 0
754 			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
755 			    newstart, newend, ex->ex_start, ex->ex_end,
756 			    boundary, dontcross);
757 #endif
758 
759 			/* Check for overflow */
760 			if (dontcross < ex->ex_start)
761 				dontcross = ex->ex_end;
762 			else if (newend > dontcross) {
763 				/*
764 				 * Candidate region crosses boundary.
765 				 * Throw away the leading part and see
766 				 * if we still fit.
767 				 */
768 				newstart = dontcross + 1;
769 				newend = newstart + (size - 1);
770 				dontcross += boundary;
771 				if (!LE_OV(newstart, (size - 1), subend))
772 					goto fail;
773 			}
774 
775 			/*
776 			 * If we run past the end of
777 			 * the extent or the boundary
778 			 * overflows, then the request
779 			 * can't fit.
780 			 */
781 			if (newstart + size - 1 > ex->ex_end ||
782 			    dontcross < newstart)
783 				goto fail;
784 		}
785 
786 		/*
787 		 * We would fit into this space.  Calculate
788 		 * the overhead (wasted space).  If we exactly
789 		 * fit, or we're taking the first fit, insert
790 		 * ourselves into the region list.
791 		 */
792 		ovh = ex->ex_end - newstart - (size - 1);
793 		if ((flags & EX_FAST) || (ovh == 0))
794 			goto found;
795 
796 		/*
797 		 * Don't exactly fit, but check to see
798 		 * if we're better than any current choice.
799 		 */
800 		if ((bestovh == 0) || (ovh < bestovh)) {
801 			bestovh = ovh;
802 			beststart = newstart;
803 			bestlast = last;
804 		}
805 	}
806 
807  fail:
808 	/*
809 	 * One of the following two conditions have
810 	 * occurred:
811 	 *
812 	 *	There is no chunk large enough to hold the request.
813 	 *
814 	 *	If EX_FAST was not specified, there is not an
815 	 *	exact match for the request.
816 	 *
817 	 * Note that if we reach this point and EX_FAST is
818 	 * set, then we know there is no space in the extent for
819 	 * the request.
820 	 */
821 	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
822 		/*
823 		 * We have a match that's "good enough".
824 		 */
825 		newstart = beststart;
826 		last = bestlast;
827 		goto found;
828 	}
829 
830 	/*
831 	 * No space currently available.  Wait for it to free up,
832 	 * if possible.
833 	 */
834 	if (flags & EX_WAITSPACE) {
835 		ex->ex_flags |= EXF_WANTED;
836 		error = tsleep(ex,
837 		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), "extnt", 0);
838 		if (error)
839 			return (error);
840 		goto alloc_start;
841 	}
842 
843 	extent_free_region_descriptor(ex, myrp);
844 	return (EAGAIN);
845 
846  found:
847 	/*
848 	 * Insert ourselves into the region list.
849 	 */
850 	extent_insert_and_optimize(ex, newstart, size, flags, last, myrp);
851 	*result = newstart;
852 	return (0);
853 }
854 
855 int
856 extent_free(ex, start, size, flags)
857 	struct extent *ex;
858 	u_long start, size;
859 	int flags;
860 {
861 	struct extent_region *rp, *nrp = NULL;
862 	u_long end = start + (size - 1);
863 	int exflags;
864 
865 #ifdef DIAGNOSTIC
866 	/* Check arguments. */
867 	if (ex == NULL)
868 		panic("extent_free: NULL extent");
869 	if ((start < ex->ex_start) || (start > ex->ex_end)) {
870 		extent_print(ex);
871 		printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
872 		    ex->ex_name, start, size);
873 		panic("extent_free: extent `%s', region not within extent",
874 		    ex->ex_name);
875 	}
876 	/* Check for an overflow. */
877 	if (end < start) {
878 		extent_print(ex);
879 		printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
880 		    ex->ex_name, start, size);
881 		panic("extent_free: overflow");
882 	}
883 #endif
884 
885 	/*
886 	 * If we're allowing coalescing, we must allocate a region
887 	 * descriptor now, since it might block.
888 	 *
889 	 * XXX Make a static, create-time flags word, so we don't
890 	 * XXX have to lock to read it!
891 	 */
892 	exflags = ex->ex_flags;
893 
894 	if ((exflags & EXF_NOCOALESCE) == 0) {
895 		/* Allocate a region descriptor. */
896 		nrp = extent_alloc_region_descriptor(ex, flags);
897 		if (nrp == NULL)
898 			return (ENOMEM);
899 	}
900 
901 	/*
902 	 * Find region and deallocate.  Several possibilities:
903 	 *
904 	 *	1. (start == er_start) && (end == er_end):
905 	 *	   Free descriptor.
906 	 *
907 	 *	2. (start == er_start) && (end < er_end):
908 	 *	   Adjust er_start.
909 	 *
910 	 *	3. (start > er_start) && (end == er_end):
911 	 *	   Adjust er_end.
912 	 *
913 	 *	4. (start > er_start) && (end < er_end):
914 	 *	   Fragment region.  Requires descriptor alloc.
915 	 *
916 	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
917 	 * is not set.
918 	 */
919 	for (rp = ex->ex_regions.lh_first; rp != NULL;
920 	    rp = rp->er_link.le_next) {
921 		/*
922 		 * Save ourselves some comparisons; does the current
923 		 * region end before chunk to be freed begins?  If so,
924 		 * then we haven't found the appropriate region descriptor.
925 		 */
926 		if (rp->er_end < start)
927 			continue;
928 
929 		/*
930 		 * Save ourselves some traversal; does the current
931 		 * region begin after the chunk to be freed ends?  If so,
932 		 * then we've already passed any possible region descriptors
933 		 * that might have contained the chunk to be freed.
934 		 */
935 		if (rp->er_start > end)
936 			break;
937 
938 		/* Case 1. */
939 		if ((start == rp->er_start) && (end == rp->er_end)) {
940 			LIST_REMOVE(rp, er_link);
941 			extent_free_region_descriptor(ex, rp);
942 			goto done;
943 		}
944 
945 		/*
946 		 * The following cases all require that EXF_NOCOALESCE
947 		 * is not set.
948 		 */
949 		if (ex->ex_flags & EXF_NOCOALESCE)
950 			continue;
951 
952 		/* Case 2. */
953 		if ((start == rp->er_start) && (end < rp->er_end)) {
954 			rp->er_start = (end + 1);
955 			goto done;
956 		}
957 
958 		/* Case 3. */
959 		if ((start > rp->er_start) && (end == rp->er_end)) {
960 			rp->er_end = (start - 1);
961 			goto done;
962 		}
963 
964 		/* Case 4. */
965 		if ((start > rp->er_start) && (end < rp->er_end)) {
966 			/* Fill in new descriptor. */
967 			nrp->er_start = end + 1;
968 			nrp->er_end = rp->er_end;
969 
970 			/* Adjust current descriptor. */
971 			rp->er_end = start - 1;
972 
973 			/* Insert new descriptor after current. */
974 			LIST_INSERT_AFTER(rp, nrp, er_link);
975 
976 			/* We used the new descriptor, so don't free it below */
977 			nrp = NULL;
978 			goto done;
979 		}
980 	}
981 
982 	/* Region not found, or request otherwise invalid. */
983 #if defined(DIAGNOSTIC) || defined(DDB)
984 	extent_print(ex);
985 #endif
986 	printf("extent_free: start 0x%lx, end 0x%lx\n", start, end);
987 	panic("extent_free: region not found");
988 
989  done:
990 	if (nrp != NULL)
991 		extent_free_region_descriptor(ex, nrp);
992 	if (ex->ex_flags & EXF_WANTED) {
993 		ex->ex_flags &= ~EXF_WANTED;
994 		wakeup(ex);
995 	}
996 	return (0);
997 }
998 
999 static struct extent_region *
1000 extent_alloc_region_descriptor(ex, flags)
1001 	struct extent *ex;
1002 	int flags;
1003 {
1004 	struct extent_region *rp;
1005 
1006 	if (ex->ex_flags & EXF_FIXED) {
1007 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1008 
1009 		while (fex->fex_freelist.lh_first == NULL) {
1010 			if (flags & EX_MALLOCOK)
1011 				goto alloc;
1012 
1013 			if ((flags & EX_WAITOK) == 0)
1014 				return (NULL);
1015 			ex->ex_flags |= EXF_FLWANTED;
1016 			if (tsleep(&fex->fex_freelist,
1017 			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1018 			    "extnt", 0))
1019 				return (NULL);
1020 		}
1021 		rp = fex->fex_freelist.lh_first;
1022 		LIST_REMOVE(rp, er_link);
1023 
1024 		/*
1025 		 * Don't muck with flags after pulling it off the
1026 		 * freelist; it may be a dynamiclly allocated
1027 		 * region pointer that was kindly given to us,
1028 		 * and we need to preserve that information.
1029 		 */
1030 
1031 		return (rp);
1032 	}
1033 
1034  alloc:
1035 	rp = (struct extent_region *)
1036 	    malloc(sizeof(struct extent_region), ex->ex_mtype,
1037 	    (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
1038 
1039 	if (rp != NULL)
1040 		rp->er_flags = ER_ALLOC;
1041 
1042 	return (rp);
1043 }
1044 
1045 static void
1046 extent_free_region_descriptor(ex, rp)
1047 	struct extent *ex;
1048 	struct extent_region *rp;
1049 {
1050 
1051 	if (ex->ex_flags & EXF_FIXED) {
1052 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1053 
1054 		/*
1055 		 * If someone's waiting for a region descriptor,
1056 		 * be nice and give them this one, rather than
1057 		 * just free'ing it back to the system.
1058 		 */
1059 		if (rp->er_flags & ER_ALLOC) {
1060 			if (ex->ex_flags & EXF_FLWANTED) {
1061 				/* Clear all but ER_ALLOC flag. */
1062 				rp->er_flags = ER_ALLOC;
1063 				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1064 				    er_link);
1065 				goto wake_em_up;
1066 			} else {
1067 				free(rp, ex->ex_mtype);
1068 			}
1069 		} else {
1070 			/* Clear all flags. */
1071 			rp->er_flags = 0;
1072 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1073 		}
1074 
1075 		if (ex->ex_flags & EXF_FLWANTED) {
1076  wake_em_up:
1077 			ex->ex_flags &= ~EXF_FLWANTED;
1078 			wakeup(&fex->fex_freelist);
1079 		}
1080 		return;
1081 	}
1082 
1083 	/*
1084 	 * We know it's dynamically allocated if we get here.
1085 	 */
1086 	free(rp, ex->ex_mtype);
1087 }
1088 
1089 #ifndef DDB
1090 #define db_printf printf
1091 #endif
1092 
1093 #if defined(DIAGNOSTIC) || defined(DDB)
1094 void
1095 extent_print(ex)
1096 	struct extent *ex;
1097 {
1098 	struct extent_region *rp;
1099 
1100 	if (ex == NULL)
1101 		panic("extent_print: NULL extent");
1102 
1103 	db_printf("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1104 	    ex->ex_start, ex->ex_end, ex->ex_flags);
1105 
1106 	for (rp = ex->ex_regions.lh_first; rp != NULL;
1107 	    rp = rp->er_link.le_next)
1108 		db_printf("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1109 }
1110 #endif
1111