xref: /openbsd-src/sys/kern/subr_extent.c (revision 16a54b9c619f0860eefbb44dd2e4345a69e3b279)
1 /*	$OpenBSD: subr_extent.c,v 1.22 2002/06/11 05:59:25 art Exp $	*/
2 /*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3 
4 /*-
5  * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe and Matthias Drochner.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * General purpose extent manager.
42  */
43 
44 #ifdef _KERNEL
45 #include <sys/param.h>
46 #include <sys/extent.h>
47 #include <sys/malloc.h>
48 #include <sys/time.h>
49 #include <sys/systm.h>
50 #include <sys/proc.h>
51 #include <sys/queue.h>
52 #include <sys/pool.h>
53 #include <ddb/db_output.h>
54 #else
55 /*
56  * user-land definitions, so it can fit into a testing harness.
57  */
58 #include <sys/param.h>
59 #include <sys/extent.h>
60 #include <sys/queue.h>
61 #include <errno.h>
62 #include <stdlib.h>
63 #include <stdio.h>
64 
65 #define	malloc(s, t, flags)		malloc(s)
66 #define	free(p, t)			free(p)
67 #define	tsleep(chan, pri, str, timo)	(EWOULDBLOCK)
68 #define	wakeup(chan)			((void)0)
69 #define db_printf printf
70 #endif
71 
72 static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
73 	    int, struct extent_region *, struct extent_region *);
74 static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
75 static	void extent_free_region_descriptor(struct extent *,
76 	    struct extent_region *);
77 static	void extent_register(struct extent *);
78 
79 /*
80  * Macro to align to an arbitrary power-of-two boundary.
81  */
82 #define EXTENT_ALIGN(_start, _align, _skew)		\
83 	(((((_start) - (_skew)) + ((_align) - 1)) & (-(_align))) + (_skew))
84 
85 
86 /*
87  * Register the extent on a doubly linked list.
88  * Should work, no?
89  */
90 static LIST_HEAD(listhead, extent) ext_list;
91 static struct listhead *ext_listp;
92 
93 static void
94 extent_register(ex)
95 	struct extent *ex;
96 {
97 	/* Is this redundant? */
98 	if (ext_listp == NULL){
99 		LIST_INIT(&ext_list);
100 		ext_listp = &ext_list;
101 	}
102 
103 	/* Insert into list */
104 	LIST_INSERT_HEAD(ext_listp, ex, ex_link);
105 }
106 
107 struct pool ex_region_pl;
108 
109 static void
110 extent_pool_init(void)
111 {
112 	static int inited;
113 
114 	if (!inited) {
115 		pool_init(&ex_region_pl, sizeof(struct extent_region), 0, 0, 0,
116 		    "extentpl", NULL);
117 		inited = 1;
118 	}
119 }
120 
121 /*
122  * Find a given extent, and return a pointer to
123  * it so that other extent functions can be used
124  * on it.
125  *
126  * Returns NULL on failure.
127  */
128 struct extent *
129 extent_find(name)
130 	char *name;
131 {
132 	struct extent *ep;
133 
134 	for(ep = ext_listp->lh_first; ep != NULL; ep = ep->ex_link.le_next){
135 		if (!strcmp(ep->ex_name, name))
136 			return(ep);
137 	}
138 
139 	return(NULL);
140 }
141 
142 #ifdef DDB
143 /*
144  * Print out all extents registered.  This is used in
145  * DDB show extents
146  */
147 void
148 extent_print_all(void)
149 {
150 	struct extent *ep;
151 
152 	for(ep = ext_listp->lh_first; ep != NULL; ep = ep->ex_link.le_next){
153 		extent_print(ep);
154 	}
155 }
156 #endif
157 
158 /*
159  * Allocate and initialize an extent map.
160  */
161 struct extent *
162 extent_create(name, start, end, mtype, storage, storagesize, flags)
163 	char *name;
164 	u_long start, end;
165 	int mtype;
166 	caddr_t storage;
167 	size_t storagesize;
168 	int flags;
169 {
170 	struct extent *ex;
171 	caddr_t cp = storage;
172 	size_t sz = storagesize;
173 	struct extent_region *rp;
174 	int fixed_extent = (storage != NULL);
175 
176 #ifdef DIAGNOSTIC
177 	/* Check arguments. */
178 	if (name == NULL)
179 		panic("extent_create: name == NULL");
180 	if (end < start) {
181 		printf("extent_create: extent `%s', start 0x%lx, end 0x%lx\n",
182 		    name, start, end);
183 		panic("extent_create: end < start");
184 	}
185 	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
186 		panic("extent_create: fixed extent, bad storagesize 0x%lx",
187 		    (u_long)storagesize);
188 	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
189 		panic("extent_create: storage provided for non-fixed");
190 #endif
191 
192 	extent_pool_init();
193 
194 	/* Allocate extent descriptor. */
195 	if (fixed_extent) {
196 		struct extent_fixed *fex;
197 
198 		bzero(storage, storagesize);
199 
200 		/*
201 		 * Align all descriptors on "long" boundaries.
202 		 */
203 		fex = (struct extent_fixed *)cp;
204 		ex = (struct extent *)fex;
205 		cp += ALIGN(sizeof(struct extent_fixed));
206 		sz -= ALIGN(sizeof(struct extent_fixed));
207 		fex->fex_storage = storage;
208 		fex->fex_storagesize = storagesize;
209 
210 		/*
211 		 * In a fixed extent, we have to pre-allocate region
212 		 * descriptors and place them in the extent's freelist.
213 		 */
214 		LIST_INIT(&fex->fex_freelist);
215 		while (sz >= ALIGN(sizeof(struct extent_region))) {
216 			rp = (struct extent_region *)cp;
217 			cp += ALIGN(sizeof(struct extent_region));
218 			sz -= ALIGN(sizeof(struct extent_region));
219 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
220 		}
221 	} else {
222 		ex = (struct extent *)malloc(sizeof(struct extent),
223 		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
224 		if (ex == NULL)
225 			return (NULL);
226 	}
227 
228 	/* Fill in the extent descriptor and return it to the caller. */
229 	LIST_INIT(&ex->ex_regions);
230 	ex->ex_name = name;
231 	ex->ex_start = start;
232 	ex->ex_end = end;
233 	ex->ex_mtype = mtype;
234 	ex->ex_flags = 0;
235 	if (fixed_extent)
236 		ex->ex_flags |= EXF_FIXED;
237 	if (flags & EX_NOCOALESCE)
238 		ex->ex_flags |= EXF_NOCOALESCE;
239 
240 	extent_register(ex);
241 	return (ex);
242 }
243 
244 /*
245  * Destroy an extent map.
246  */
247 void
248 extent_destroy(ex)
249 	struct extent *ex;
250 {
251 	struct extent_region *rp, *orp;
252 
253 #ifdef DIAGNOSTIC
254 	/* Check arguments. */
255 	if (ex == NULL)
256 		panic("extent_destroy: NULL extent");
257 #endif
258 
259 	/* Free all region descriptors in extent. */
260 	for (rp = ex->ex_regions.lh_first; rp != NULL; ) {
261 		orp = rp;
262 		rp = rp->er_link.le_next;
263 		LIST_REMOVE(orp, er_link);
264 		extent_free_region_descriptor(ex, orp);
265 	}
266 
267 	/* Remove from the list of all extents. */
268 	LIST_REMOVE(ex, ex_link);
269 
270 	/* If we're not a fixed extent, free the extent descriptor itself. */
271 	if ((ex->ex_flags & EXF_FIXED) == 0)
272 		free(ex, ex->ex_mtype);
273 }
274 
275 /*
276  * Insert a region descriptor into the sorted region list after the
277  * entry "after" or at the head of the list (if "after" is NULL).
278  * The region descriptor we insert is passed in "rp".  We must
279  * allocate the region descriptor before calling this function!
280  * If we don't need the region descriptor, it will be freed here.
281  */
282 static void
283 extent_insert_and_optimize(ex, start, size, flags, after, rp)
284 	struct extent *ex;
285 	u_long start, size;
286 	int flags;
287 	struct extent_region *after, *rp;
288 {
289 	struct extent_region *nextr;
290 	int appended = 0;
291 
292 	if (after == NULL) {
293 		/*
294 		 * We're the first in the region list.  If there's
295 		 * a region after us, attempt to coalesce to save
296 		 * descriptor overhead.
297 		 */
298 		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
299 		    (ex->ex_regions.lh_first != NULL) &&
300 		    ((start + size) == ex->ex_regions.lh_first->er_start)) {
301 			/*
302 			 * We can coalesce.  Prepend us to the first region.
303 			 */
304 			ex->ex_regions.lh_first->er_start = start;
305 			extent_free_region_descriptor(ex, rp);
306 			return;
307 		}
308 
309 		/*
310 		 * Can't coalesce.  Fill in the region descriptor
311 		 * in, and insert us at the head of the region list.
312 		 */
313 		rp->er_start = start;
314 		rp->er_end = start + (size - 1);
315 		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
316 		return;
317 	}
318 
319 	/*
320 	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
321 	 */
322 	if (ex->ex_flags & EXF_NOCOALESCE)
323 		goto cant_coalesce;
324 
325 	/*
326 	 * Attempt to coalesce with the region before us.
327 	 */
328 	if ((after->er_end + 1) == start) {
329 		/*
330 		 * We can coalesce.  Append ourselves and make
331 		 * note of it.
332 		 */
333 		after->er_end = start + (size - 1);
334 		appended = 1;
335 	}
336 
337 	/*
338 	 * Attempt to coalesce with the region after us.
339 	 */
340 	if ((after->er_link.le_next != NULL) &&
341 	    ((start + size) == after->er_link.le_next->er_start)) {
342 		/*
343 		 * We can coalesce.  Note that if we appended ourselves
344 		 * to the previous region, we exactly fit the gap, and
345 		 * can free the "next" region descriptor.
346 		 */
347 		if (appended) {
348 			/*
349 			 * Yup, we can free it up.
350 			 */
351 			after->er_end = after->er_link.le_next->er_end;
352 			nextr = after->er_link.le_next;
353 			LIST_REMOVE(nextr, er_link);
354 			extent_free_region_descriptor(ex, nextr);
355 		} else {
356 			/*
357 			 * Nope, just prepend us to the next region.
358 			 */
359 			after->er_link.le_next->er_start = start;
360 		}
361 
362 		extent_free_region_descriptor(ex, rp);
363 		return;
364 	}
365 
366 	/*
367 	 * We weren't able to coalesce with the next region, but
368 	 * we don't need to allocate a region descriptor if we
369 	 * appended ourselves to the previous region.
370 	 */
371 	if (appended) {
372 		extent_free_region_descriptor(ex, rp);
373 		return;
374 	}
375 
376  cant_coalesce:
377 
378 	/*
379 	 * Fill in the region descriptor and insert ourselves
380 	 * into the region list.
381 	 */
382 	rp->er_start = start;
383 	rp->er_end = start + (size - 1);
384 	LIST_INSERT_AFTER(after, rp, er_link);
385 }
386 
387 /*
388  * Allocate a specific region in an extent map.
389  */
390 int
391 extent_alloc_region(ex, start, size, flags)
392 	struct extent *ex;
393 	u_long start, size;
394 	int flags;
395 {
396 	struct extent_region *rp, *last, *myrp;
397 	u_long end = start + (size - 1);
398 	int error;
399 
400 #ifdef DIAGNOSTIC
401 	/* Check arguments. */
402 	if (ex == NULL)
403 		panic("extent_alloc_region: NULL extent");
404 	if (size < 1) {
405 		printf("extent_alloc_region: extent `%s', size 0x%lx\n",
406 		    ex->ex_name, size);
407 		panic("extent_alloc_region: bad size");
408 	}
409 	if (end < start) {
410 		printf(
411 		 "extent_alloc_region: extent `%s', start 0x%lx, size 0x%lx\n",
412 		 ex->ex_name, start, size);
413 		panic("extent_alloc_region: overflow");
414 	}
415 #endif
416 
417 	/*
418 	 * Make sure the requested region lies within the
419 	 * extent.
420 	 */
421 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
422 #ifdef DIAGNOSTIC
423 		printf("extent_alloc_region: extent `%s' (0x%lx - 0x%lx)\n",
424 		    ex->ex_name, ex->ex_start, ex->ex_end);
425 		printf("extent_alloc_region: start 0x%lx, end 0x%lx\n",
426 		    start, end);
427 		panic("extent_alloc_region: region lies outside extent");
428 #else
429 		return (EINVAL);
430 #endif
431 	}
432 
433 	/*
434 	 * Allocate the region descriptor.  It will be freed later
435 	 * if we can coalesce with another region.
436 	 */
437 	myrp = extent_alloc_region_descriptor(ex, flags);
438 	if (myrp == NULL) {
439 #ifdef DIAGNOSTIC
440 		printf(
441 		    "extent_alloc_region: can't allocate region descriptor\n");
442 #endif
443 		return (ENOMEM);
444 	}
445 
446  alloc_start:
447 	/*
448 	 * Attempt to place ourselves in the desired area of the
449 	 * extent.  We save ourselves some work by keeping the list sorted.
450 	 * In other words, if the start of the current region is greater
451 	 * than the end of our region, we don't have to search any further.
452 	 */
453 
454 	/*
455 	 * Keep a pointer to the last region we looked at so
456 	 * that we don't have to traverse the list again when
457 	 * we insert ourselves.  If "last" is NULL when we
458 	 * finally insert ourselves, we go at the head of the
459 	 * list.  See extent_insert_and_optimize() for details.
460 	 */
461 	last = NULL;
462 
463 	for (rp = ex->ex_regions.lh_first; rp != NULL;
464 	    rp = rp->er_link.le_next) {
465 		if (rp->er_start > end) {
466 			/*
467 			 * We lie before this region and don't
468 			 * conflict.
469 			 */
470 			break;
471 		}
472 
473 		/*
474 		 * The current region begins before we end.
475 		 * Check for a conflict.
476 		 */
477 		if (rp->er_end >= start) {
478 			/*
479 			 * We conflict.  If we can (and want to) wait,
480 			 * do so.
481 			 */
482 			if (flags & EX_WAITSPACE) {
483 				ex->ex_flags |= EXF_WANTED;
484 				error = tsleep(ex,
485 				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
486 				    "extnt", 0);
487 				if (error)
488 					return (error);
489 				goto alloc_start;
490 			}
491 			extent_free_region_descriptor(ex, myrp);
492 			return (EAGAIN);
493 		}
494 		/*
495 		 * We don't conflict, but this region lies before
496 		 * us.  Keep a pointer to this region, and keep
497 		 * trying.
498 		 */
499 		last = rp;
500 	}
501 
502 	/*
503 	 * We don't conflict with any regions.  "last" points
504 	 * to the region we fall after, or is NULL if we belong
505 	 * at the beginning of the region list.  Insert ourselves.
506 	 */
507 	extent_insert_and_optimize(ex, start, size, flags, last, myrp);
508 	return (0);
509 }
510 
511 /*
512  * Macro to check (x + y) <= z.  This check is designed to fail
513  * if an overflow occurs.
514  */
515 #define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
516 
517 /*
518  * Allocate a region in an extent map subregion.
519  *
520  * If EX_FAST is specified, we return the first fit in the map.
521  * Otherwise, we try to minimize fragmentation by finding the
522  * smallest gap that will hold the request.
523  *
524  * The allocated region is aligned to "alignment", which must be
525  * a power of 2.
526  */
527 int
528 extent_alloc_subregion(ex, substart, subend, size, alignment, skew, boundary,
529     flags, result)
530 	struct extent *ex;
531 	u_long substart, subend, size, alignment, skew, boundary;
532 	int flags;
533 	u_long *result;
534 {
535 	struct extent_region *rp, *myrp, *last, *bestlast;
536 	u_long newstart, newend, exend, beststart, bestovh, ovh;
537 	u_long dontcross;
538 	int error;
539 
540 #ifdef DIAGNOSTIC
541 	/* Check arguments. */
542 	if (ex == NULL)
543 		panic("extent_alloc_subregion: NULL extent");
544 	if (result == NULL)
545 		panic("extent_alloc_subregion: NULL result pointer");
546 	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
547 	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
548   printf("extent_alloc_subregion: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
549 		    ex->ex_name, ex->ex_start, ex->ex_end);
550 		printf("extent_alloc_subregion: substart 0x%lx, subend 0x%lx\n",
551 		    substart, subend);
552 		panic("extent_alloc_subregion: bad subregion");
553 	}
554 	if ((size < 1) || ((size - 1) > (subend - substart))) {
555 		printf("extent_alloc_subregion: extent `%s', size 0x%lx\n",
556 		    ex->ex_name, size);
557 		panic("extent_alloc_subregion: bad size");
558 	}
559 	if (alignment == 0)
560 		panic("extent_alloc_subregion: bad alignment");
561 	if (boundary && (boundary < size)) {
562 		printf(
563 		    "extent_alloc_subregion: extent `%s', size 0x%lx, "
564 		    "boundary 0x%lx\n", ex->ex_name, size, boundary);
565 		panic("extent_alloc_subregion: bad boundary");
566 	}
567 #endif
568 
569 	/*
570 	 * Allocate the region descriptor.  It will be freed later
571 	 * if we can coalesce with another region.
572 	 */
573 	myrp = extent_alloc_region_descriptor(ex, flags);
574 	if (myrp == NULL) {
575 #ifdef DIAGNOSTIC
576 		printf(
577 		 "extent_alloc_subregion: can't allocate region descriptor\n");
578 #endif
579 		return (ENOMEM);
580 	}
581 
582  alloc_start:
583 	/*
584 	 * Keep a pointer to the last region we looked at so
585 	 * that we don't have to traverse the list again when
586 	 * we insert ourselves.  If "last" is NULL when we
587 	 * finally insert ourselves, we go at the head of the
588 	 * list.  See extent_insert_and_optimize() for deatails.
589 	 */
590 	last = NULL;
591 
592 	/*
593 	 * Keep track of size and location of the smallest
594 	 * chunk we fit in.
595 	 *
596 	 * Since the extent can be as large as the numeric range
597 	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
598 	 * best overhead value can be the maximum unsigned integer.
599 	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
600 	 * into the region list immediately on an exact match (which
601 	 * is the only case where "bestovh" would be set to 0).
602 	 */
603 	bestovh = 0;
604 	beststart = 0;
605 	bestlast = NULL;
606 
607 	/*
608 	 * Keep track of end of free region.  This is either the end of extent
609 	 * or the start of a region past the subend.
610 	 */
611 	exend = ex->ex_end;
612 
613 	/*
614 	 * For N allocated regions, we must make (N + 1)
615 	 * checks for unallocated space.  The first chunk we
616 	 * check is the area from the beginning of the subregion
617 	 * to the first allocated region after that point.
618 	 */
619 	newstart = EXTENT_ALIGN(substart, alignment, skew);
620 	if (newstart < ex->ex_start) {
621 #ifdef DIAGNOSTIC
622 		printf(
623       "extent_alloc_subregion: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
624 		 ex->ex_name, ex->ex_start, ex->ex_end, alignment);
625 		panic("extent_alloc_subregion: overflow after alignment");
626 #else
627 		extent_free_region_descriptor(ex, myrp);
628 		return (EINVAL);
629 #endif
630 	}
631 
632 	/*
633 	 * Find the first allocated region that begins on or after
634 	 * the subregion start, advancing the "last" pointer along
635 	 * the way.
636 	 */
637 	for (rp = ex->ex_regions.lh_first; rp != NULL;
638 	     rp = rp->er_link.le_next) {
639 		if (rp->er_start >= newstart)
640 			break;
641 		last = rp;
642 	}
643 
644 	/*
645 	 * Relocate the start of our candidate region to the end of
646 	 * the last allocated region (if there was one overlapping
647 	 * our subrange).
648 	 */
649 	if (last != NULL && last->er_end >= newstart)
650 		newstart = EXTENT_ALIGN((last->er_end + 1), alignment, skew);
651 
652 	for (; rp != NULL; rp = rp->er_link.le_next) {
653 		/*
654 		 * If the region pasts the subend, bail out and see
655 		 * if we fit against the subend.
656 		 */
657 		if (rp->er_start >= subend) {
658 			exend = rp->er_start;
659 			break;
660 		}
661 
662 		/*
663 		 * Check the chunk before "rp".  Note that our
664 		 * comparison is safe from overflow conditions.
665 		 */
666 		if (LE_OV(newstart, size, rp->er_start)) {
667 			/*
668 			 * Do a boundary check, if necessary.  Note
669 			 * that a region may *begin* on the boundary,
670 			 * but it must end before the boundary.
671 			 */
672 			if (boundary) {
673 				newend = newstart + (size - 1);
674 
675 				/*
676 				 * Calculate the next boundary after the start
677 				 * of this region.
678 				 */
679 				dontcross = EXTENT_ALIGN(newstart+1, boundary,
680 				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
681 				    - 1;
682 
683 #if 0
684 				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
685 				    newstart, newend, ex->ex_start, ex->ex_end,
686 				    boundary, dontcross);
687 #endif
688 
689 				/* Check for overflow */
690 				if (dontcross < ex->ex_start)
691 					dontcross = ex->ex_end;
692 				else if (newend > dontcross) {
693 					/*
694 					 * Candidate region crosses boundary.
695 					 * Throw away the leading part and see
696 					 * if we still fit.
697 					 */
698 					newstart = dontcross + 1;
699 					newend = newstart + (size - 1);
700 					dontcross += boundary;
701 					if (!LE_OV(newstart, size, rp->er_start))
702 						goto skip;
703 				}
704 
705 				/*
706 				 * If we run past the end of
707 				 * the extent or the boundary
708 				 * overflows, then the request
709 				 * can't fit.
710 				 */
711 				if (newstart + size - 1 > ex->ex_end ||
712 				    dontcross < newstart)
713 					goto fail;
714 			}
715 
716 			/*
717 			 * We would fit into this space.  Calculate
718 			 * the overhead (wasted space).  If we exactly
719 			 * fit, or we're taking the first fit, insert
720 			 * ourselves into the region list.
721 			 */
722 			ovh = rp->er_start - newstart - size;
723 			if ((flags & EX_FAST) || (ovh == 0))
724 				goto found;
725 
726 			/*
727 			 * Don't exactly fit, but check to see
728 			 * if we're better than any current choice.
729 			 */
730 			if ((bestovh == 0) || (ovh < bestovh)) {
731 				bestovh = ovh;
732 				beststart = newstart;
733 				bestlast = last;
734 			}
735 		}
736 
737 skip:
738 		/*
739 		 * Skip past the current region and check again.
740 		 */
741 		newstart = EXTENT_ALIGN((rp->er_end + 1), alignment, skew);
742 		if (newstart < rp->er_end) {
743 			/*
744 			 * Overflow condition.  Don't error out, since
745 			 * we might have a chunk of space that we can
746 			 * use.
747 			 */
748 			goto fail;
749 		}
750 
751 		last = rp;
752 	}
753 
754 	/*
755 	 * The final check is from the current starting point to the
756 	 * end of the subregion.  If there were no allocated regions,
757 	 * "newstart" is set to the beginning of the subregion, or
758 	 * just past the end of the last allocated region, adjusted
759 	 * for alignment in either case.
760 	 */
761 	if (LE_OV(newstart, (size - 1), subend)) {
762 		/*
763 		 * Do a boundary check, if necessary.  Note
764 		 * that a region may *begin* on the boundary,
765 		 * but it must end before the boundary.
766 		 */
767 		if (boundary) {
768 			newend = newstart + (size - 1);
769 
770 			/*
771 			 * Calculate the next boundary after the start
772 			 * of this region.
773 			 */
774 			dontcross = EXTENT_ALIGN(newstart+1, boundary,
775 			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
776 			    - 1;
777 
778 #if 0
779 			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
780 			    newstart, newend, ex->ex_start, ex->ex_end,
781 			    boundary, dontcross);
782 #endif
783 
784 			/* Check for overflow */
785 			if (dontcross < ex->ex_start)
786 				dontcross = ex->ex_end;
787 			else if (newend > dontcross) {
788 				/*
789 				 * Candidate region crosses boundary.
790 				 * Throw away the leading part and see
791 				 * if we still fit.
792 				 */
793 				newstart = dontcross + 1;
794 				newend = newstart + (size - 1);
795 				dontcross += boundary;
796 				if (!LE_OV(newstart, (size - 1), subend))
797 					goto fail;
798 			}
799 
800 			/*
801 			 * If we run past the end of
802 			 * the extent or the boundary
803 			 * overflows, then the request
804 			 * can't fit.
805 			 */
806 			if (newstart + size - 1 > ex->ex_end ||
807 			    dontcross < newstart)
808 				goto fail;
809 		}
810 
811 		/*
812 		 * We would fit into this space.  Calculate
813 		 * the overhead (wasted space).  If we exactly
814 		 * fit, or we're taking the first fit, insert
815 		 * ourselves into the region list.
816 		 */
817 		ovh = exend - newstart - (size - 1);
818 		if ((flags & EX_FAST) || (ovh == 0))
819 			goto found;
820 
821 		/*
822 		 * Don't exactly fit, but check to see
823 		 * if we're better than any current choice.
824 		 */
825 		if ((bestovh == 0) || (ovh < bestovh)) {
826 			bestovh = ovh;
827 			beststart = newstart;
828 			bestlast = last;
829 		}
830 	}
831 
832  fail:
833 	/*
834 	 * One of the following two conditions have
835 	 * occurred:
836 	 *
837 	 *	There is no chunk large enough to hold the request.
838 	 *
839 	 *	If EX_FAST was not specified, there is not an
840 	 *	exact match for the request.
841 	 *
842 	 * Note that if we reach this point and EX_FAST is
843 	 * set, then we know there is no space in the extent for
844 	 * the request.
845 	 */
846 	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
847 		/*
848 		 * We have a match that's "good enough".
849 		 */
850 		newstart = beststart;
851 		last = bestlast;
852 		goto found;
853 	}
854 
855 	/*
856 	 * No space currently available.  Wait for it to free up,
857 	 * if possible.
858 	 */
859 	if (flags & EX_WAITSPACE) {
860 		ex->ex_flags |= EXF_WANTED;
861 		error = tsleep(ex,
862 		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), "extnt", 0);
863 		if (error)
864 			return (error);
865 		goto alloc_start;
866 	}
867 
868 	extent_free_region_descriptor(ex, myrp);
869 	return (EAGAIN);
870 
871  found:
872 	/*
873 	 * Insert ourselves into the region list.
874 	 */
875 	extent_insert_and_optimize(ex, newstart, size, flags, last, myrp);
876 	*result = newstart;
877 	return (0);
878 }
879 
880 int
881 extent_free(ex, start, size, flags)
882 	struct extent *ex;
883 	u_long start, size;
884 	int flags;
885 {
886 	struct extent_region *rp, *nrp = NULL;
887 	u_long end = start + (size - 1);
888 	int exflags;
889 
890 #ifdef DIAGNOSTIC
891 	/* Check arguments. */
892 	if (ex == NULL)
893 		panic("extent_free: NULL extent");
894 	if ((start < ex->ex_start) || (start > ex->ex_end)) {
895 		extent_print(ex);
896 		printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
897 		    ex->ex_name, start, size);
898 		panic("extent_free: extent `%s', region not within extent",
899 		    ex->ex_name);
900 	}
901 	/* Check for an overflow. */
902 	if (end < start) {
903 		extent_print(ex);
904 		printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
905 		    ex->ex_name, start, size);
906 		panic("extent_free: overflow");
907 	}
908 #endif
909 
910 	/*
911 	 * If we're allowing coalescing, we must allocate a region
912 	 * descriptor now, since it might block.
913 	 *
914 	 * XXX Make a static, create-time flags word, so we don't
915 	 * XXX have to lock to read it!
916 	 */
917 	exflags = ex->ex_flags;
918 
919 	if ((exflags & EXF_NOCOALESCE) == 0) {
920 		/* Allocate a region descriptor. */
921 		nrp = extent_alloc_region_descriptor(ex, flags);
922 		if (nrp == NULL)
923 			return (ENOMEM);
924 	}
925 
926 	/*
927 	 * Find region and deallocate.  Several possibilities:
928 	 *
929 	 *	1. (start == er_start) && (end == er_end):
930 	 *	   Free descriptor.
931 	 *
932 	 *	2. (start == er_start) && (end < er_end):
933 	 *	   Adjust er_start.
934 	 *
935 	 *	3. (start > er_start) && (end == er_end):
936 	 *	   Adjust er_end.
937 	 *
938 	 *	4. (start > er_start) && (end < er_end):
939 	 *	   Fragment region.  Requires descriptor alloc.
940 	 *
941 	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
942 	 * is not set.
943 	 */
944 	for (rp = ex->ex_regions.lh_first; rp != NULL;
945 	    rp = rp->er_link.le_next) {
946 		/*
947 		 * Save ourselves some comparisons; does the current
948 		 * region end before chunk to be freed begins?  If so,
949 		 * then we haven't found the appropriate region descriptor.
950 		 */
951 		if (rp->er_end < start)
952 			continue;
953 
954 		/*
955 		 * Save ourselves some traversal; does the current
956 		 * region begin after the chunk to be freed ends?  If so,
957 		 * then we've already passed any possible region descriptors
958 		 * that might have contained the chunk to be freed.
959 		 */
960 		if (rp->er_start > end)
961 			break;
962 
963 		/* Case 1. */
964 		if ((start == rp->er_start) && (end == rp->er_end)) {
965 			LIST_REMOVE(rp, er_link);
966 			extent_free_region_descriptor(ex, rp);
967 			goto done;
968 		}
969 
970 		/*
971 		 * The following cases all require that EXF_NOCOALESCE
972 		 * is not set.
973 		 */
974 		if (ex->ex_flags & EXF_NOCOALESCE)
975 			continue;
976 
977 		/* Case 2. */
978 		if ((start == rp->er_start) && (end < rp->er_end)) {
979 			rp->er_start = (end + 1);
980 			goto done;
981 		}
982 
983 		/* Case 3. */
984 		if ((start > rp->er_start) && (end == rp->er_end)) {
985 			rp->er_end = (start - 1);
986 			goto done;
987 		}
988 
989 		/* Case 4. */
990 		if ((start > rp->er_start) && (end < rp->er_end)) {
991 			/* Fill in new descriptor. */
992 			nrp->er_start = end + 1;
993 			nrp->er_end = rp->er_end;
994 
995 			/* Adjust current descriptor. */
996 			rp->er_end = start - 1;
997 
998 			/* Insert new descriptor after current. */
999 			LIST_INSERT_AFTER(rp, nrp, er_link);
1000 
1001 			/* We used the new descriptor, so don't free it below */
1002 			nrp = NULL;
1003 			goto done;
1004 		}
1005 	}
1006 
1007 	/* Region not found, or request otherwise invalid. */
1008 #if defined(DIAGNOSTIC) || defined(DDB)
1009 	extent_print(ex);
1010 #endif
1011 	printf("extent_free: start 0x%lx, end 0x%lx\n", start, end);
1012 	panic("extent_free: region not found");
1013 
1014  done:
1015 	if (nrp != NULL)
1016 		extent_free_region_descriptor(ex, nrp);
1017 	if (ex->ex_flags & EXF_WANTED) {
1018 		ex->ex_flags &= ~EXF_WANTED;
1019 		wakeup(ex);
1020 	}
1021 	return (0);
1022 }
1023 
1024 static struct extent_region *
1025 extent_alloc_region_descriptor(ex, flags)
1026 	struct extent *ex;
1027 	int flags;
1028 {
1029 	struct extent_region *rp;
1030 	int s;
1031 
1032 	if (ex->ex_flags & EXF_FIXED) {
1033 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1034 
1035 		while (fex->fex_freelist.lh_first == NULL) {
1036 			if (flags & EX_MALLOCOK)
1037 				goto alloc;
1038 
1039 			if ((flags & EX_WAITOK) == 0)
1040 				return (NULL);
1041 			ex->ex_flags |= EXF_FLWANTED;
1042 			if (tsleep(&fex->fex_freelist,
1043 			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1044 			    "extnt", 0))
1045 				return (NULL);
1046 		}
1047 		rp = fex->fex_freelist.lh_first;
1048 		LIST_REMOVE(rp, er_link);
1049 
1050 		/*
1051 		 * Don't muck with flags after pulling it off the
1052 		 * freelist; it may be a dynamiclly allocated
1053 		 * region pointer that was kindly given to us,
1054 		 * and we need to preserve that information.
1055 		 */
1056 
1057 		return (rp);
1058 	}
1059 
1060  alloc:
1061 	s = splvm();
1062 	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK : 0);
1063 	splx(s);
1064 	if (rp != NULL)
1065 		rp->er_flags = ER_ALLOC;
1066 
1067 	return (rp);
1068 }
1069 
1070 static void
1071 extent_free_region_descriptor(ex, rp)
1072 	struct extent *ex;
1073 	struct extent_region *rp;
1074 {
1075 	int s;
1076 
1077 	if (ex->ex_flags & EXF_FIXED) {
1078 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1079 
1080 		/*
1081 		 * If someone's waiting for a region descriptor,
1082 		 * be nice and give them this one, rather than
1083 		 * just free'ing it back to the system.
1084 		 */
1085 		if (rp->er_flags & ER_ALLOC) {
1086 			if (ex->ex_flags & EXF_FLWANTED) {
1087 				/* Clear all but ER_ALLOC flag. */
1088 				rp->er_flags = ER_ALLOC;
1089 				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1090 				    er_link);
1091 				goto wake_em_up;
1092 			} else {
1093 				s = splvm();
1094 				pool_put(&ex_region_pl, rp);
1095 				splx(s);
1096 			}
1097 		} else {
1098 			/* Clear all flags. */
1099 			rp->er_flags = 0;
1100 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1101 		}
1102 
1103 		if (ex->ex_flags & EXF_FLWANTED) {
1104  wake_em_up:
1105 			ex->ex_flags &= ~EXF_FLWANTED;
1106 			wakeup(&fex->fex_freelist);
1107 		}
1108 		return;
1109 	}
1110 
1111 	/*
1112 	 * We know it's dynamically allocated if we get here.
1113 	 */
1114 	s = splvm();
1115 	pool_put(&ex_region_pl, rp);
1116 	splx(s);
1117 }
1118 
1119 #ifndef DDB
1120 #define db_printf printf
1121 #endif
1122 
1123 #if defined(DIAGNOSTIC) || defined(DDB)
1124 void
1125 extent_print(ex)
1126 	struct extent *ex;
1127 {
1128 	struct extent_region *rp;
1129 
1130 	if (ex == NULL)
1131 		panic("extent_print: NULL extent");
1132 
1133 	db_printf("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1134 	    ex->ex_start, ex->ex_end, ex->ex_flags);
1135 
1136 	for (rp = ex->ex_regions.lh_first; rp != NULL;
1137 	    rp = rp->er_link.le_next)
1138 		db_printf("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1139 }
1140 #endif
1141