xref: /openbsd-src/sys/kern/subr_extent.c (revision d874cce4b1d9fe6b41c9e4f2117a77d8a4a37b92)
1 /*	$OpenBSD: subr_extent.c,v 1.34 2008/06/26 05:42:20 ray Exp $	*/
2 /*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3 
4 /*-
5  * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe and Matthias Drochner.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * General purpose extent manager.
35  */
36 
37 #ifdef _KERNEL
38 #include <sys/param.h>
39 #include <sys/extent.h>
40 #include <sys/malloc.h>
41 #include <sys/time.h>
42 #include <sys/systm.h>
43 #include <sys/proc.h>
44 #include <sys/queue.h>
45 #include <sys/pool.h>
46 #include <ddb/db_output.h>
47 #else
48 /*
49  * user-land definitions, so it can fit into a testing harness.
50  */
51 #include <sys/param.h>
52 #include <sys/pool.h>
53 #include <sys/extent.h>
54 #include <sys/queue.h>
55 #include <errno.h>
56 #include <stdlib.h>
57 #include <stdio.h>
58 
59 #define	malloc(s, t, flags)		malloc(s)
60 #define	free(p, t)			free(p)
61 #define	tsleep(chan, pri, str, timo)	(EWOULDBLOCK)
62 #define	wakeup(chan)			((void)0)
63 #define	pool_get(pool, flags)		malloc((pool)->pr_size, 0, 0)
64 #define	pool_init(a, b, c, d, e, f, g)	(a)->pr_size = (b)
65 #define	pool_put(pool, rp)		free((rp), 0)
66 #define	panic				printf
67 #define	splvm()				(1)
68 #define	splx(s)				((void)(s))
69 #endif
70 
71 static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
72 	    struct extent_region *, struct extent_region *);
73 static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
74 static	void extent_free_region_descriptor(struct extent *,
75 	    struct extent_region *);
76 
77 /*
78  * Shortcut to align to an arbitrary power-of-two boundary.
79  */
80 static __inline__ u_long
81 extent_align(u_long start, u_long align, u_long skew)
82 {
83 	return ((((start - skew) + (align - 1)) & (-align)) + skew);
84 }
85 
86 
87 #if defined(DIAGNOSTIC) || defined(DDB)
88 /*
89  * Register the extent on a doubly linked list.
90  * Should work, no?
91  */
92 static LIST_HEAD(listhead, extent) ext_list;
93 static	void extent_register(struct extent *);
94 
95 static void
96 extent_register(struct extent *ex)
97 {
98 #ifdef DIAGNOSTIC
99 	struct extent *ep;
100 #endif
101 	static int initialized;
102 
103 	if (!initialized){
104 		LIST_INIT(&ext_list);
105 		initialized = 1;
106 	}
107 
108 #ifdef DIAGNOSTIC
109 	LIST_FOREACH(ep, &ext_list, ex_link) {
110 		if (ep == ex)
111 			panic("extent_register: already registered");
112 	}
113 #endif
114 
115 	/* Insert into list */
116 	LIST_INSERT_HEAD(&ext_list, ex, ex_link);
117 }
118 #endif	/* DIAGNOSTIC || DDB */
119 
120 struct pool ex_region_pl;
121 
122 static void
123 extent_pool_init(void)
124 {
125 	static int inited;
126 
127 	if (!inited) {
128 		pool_init(&ex_region_pl, sizeof(struct extent_region), 0, 0, 0,
129 		    "extentpl", NULL);
130 		inited = 1;
131 	}
132 }
133 
134 #ifdef DDB
135 /*
136  * Print out all extents registered.  This is used in
137  * DDB show extents
138  */
139 void
140 extent_print_all(void)
141 {
142 	struct extent *ep;
143 
144 	LIST_FOREACH(ep, &ext_list, ex_link) {
145 		extent_print(ep);
146 	}
147 }
148 #endif
149 
150 /*
151  * Allocate and initialize an extent map.
152  */
153 struct extent *
154 extent_create(char *name, u_long start, u_long end, int mtype, caddr_t storage,
155     size_t storagesize, int flags)
156 {
157 	struct extent *ex;
158 	caddr_t cp = storage;
159 	size_t sz = storagesize;
160 	struct extent_region *rp;
161 	int fixed_extent = (storage != NULL);
162 
163 #ifdef DIAGNOSTIC
164 	/* Check arguments. */
165 	if (name == NULL)
166 		panic("extent_create: name == NULL");
167 	if (end < start) {
168 		printf("extent_create: extent `%s', start 0x%lx, end 0x%lx\n",
169 		    name, start, end);
170 		panic("extent_create: end < start");
171 	}
172 	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
173 		panic("extent_create: fixed extent, bad storagesize 0x%lx",
174 		    (u_long)storagesize);
175 	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
176 		panic("extent_create: storage provided for non-fixed");
177 #endif
178 
179 	extent_pool_init();
180 
181 	/* Allocate extent descriptor. */
182 	if (fixed_extent) {
183 		struct extent_fixed *fex;
184 
185 		bzero(storage, storagesize);
186 
187 		/*
188 		 * Align all descriptors on "long" boundaries.
189 		 */
190 		fex = (struct extent_fixed *)cp;
191 		ex = (struct extent *)fex;
192 		cp += ALIGN(sizeof(struct extent_fixed));
193 		sz -= ALIGN(sizeof(struct extent_fixed));
194 		fex->fex_storage = storage;
195 		fex->fex_storagesize = storagesize;
196 
197 		/*
198 		 * In a fixed extent, we have to pre-allocate region
199 		 * descriptors and place them in the extent's freelist.
200 		 */
201 		LIST_INIT(&fex->fex_freelist);
202 		while (sz >= ALIGN(sizeof(struct extent_region))) {
203 			rp = (struct extent_region *)cp;
204 			cp += ALIGN(sizeof(struct extent_region));
205 			sz -= ALIGN(sizeof(struct extent_region));
206 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
207 		}
208 	} else {
209 		ex = (struct extent *)malloc(sizeof(struct extent),
210 		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
211 		if (ex == NULL)
212 			return (NULL);
213 	}
214 
215 	/* Fill in the extent descriptor and return it to the caller. */
216 	LIST_INIT(&ex->ex_regions);
217 	ex->ex_name = name;
218 	ex->ex_start = start;
219 	ex->ex_end = end;
220 	ex->ex_mtype = mtype;
221 	ex->ex_flags = 0;
222 	if (fixed_extent)
223 		ex->ex_flags |= EXF_FIXED;
224 	if (flags & EX_NOCOALESCE)
225 		ex->ex_flags |= EXF_NOCOALESCE;
226 
227 #if defined(DIAGNOSTIC) || defined(DDB)
228 	extent_register(ex);
229 #endif
230 	return (ex);
231 }
232 
233 /*
234  * Destroy an extent map.
235  */
236 void
237 extent_destroy(struct extent *ex)
238 {
239 	struct extent_region *rp, *orp;
240 
241 #ifdef DIAGNOSTIC
242 	/* Check arguments. */
243 	if (ex == NULL)
244 		panic("extent_destroy: NULL extent");
245 #endif
246 
247 	/* Free all region descriptors in extent. */
248 	for (rp = LIST_FIRST(&ex->ex_regions);
249 	    rp != LIST_END(&ex->ex_regions); ) {
250 		orp = rp;
251 		rp = LIST_NEXT(rp, er_link);
252 		LIST_REMOVE(orp, er_link);
253 		extent_free_region_descriptor(ex, orp);
254 	}
255 
256 #if defined(DIAGNOSTIC) || defined(DDB)
257 	/* Remove from the list of all extents. */
258 	LIST_REMOVE(ex, ex_link);
259 #endif
260 
261 	/* If we're not a fixed extent, free the extent descriptor itself. */
262 	if ((ex->ex_flags & EXF_FIXED) == 0)
263 		free(ex, ex->ex_mtype);
264 }
265 
266 /*
267  * Insert a region descriptor into the sorted region list after the
268  * entry "after" or at the head of the list (if "after" is NULL).
269  * The region descriptor we insert is passed in "rp".  We must
270  * allocate the region descriptor before calling this function!
271  * If we don't need the region descriptor, it will be freed here.
272  */
273 static void
274 extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
275     struct extent_region *after, struct extent_region *rp)
276 {
277 	struct extent_region *nextr;
278 	int appended = 0;
279 
280 	if (after == NULL) {
281 		/*
282 		 * We're the first in the region list.  If there's
283 		 * a region after us, attempt to coalesce to save
284 		 * descriptor overhead.
285 		 */
286 		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
287 		    !LIST_EMPTY(&ex->ex_regions) &&
288 		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
289 			/*
290 			 * We can coalesce.  Prepend us to the first region.
291 			 */
292 			LIST_FIRST(&ex->ex_regions)->er_start = start;
293 			extent_free_region_descriptor(ex, rp);
294 			return;
295 		}
296 
297 		/*
298 		 * Can't coalesce.  Fill in the region descriptor
299 		 * in, and insert us at the head of the region list.
300 		 */
301 		rp->er_start = start;
302 		rp->er_end = start + (size - 1);
303 		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
304 		return;
305 	}
306 
307 	/*
308 	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
309 	 */
310 	if (ex->ex_flags & EXF_NOCOALESCE)
311 		goto cant_coalesce;
312 
313 	/*
314 	 * Attempt to coalesce with the region before us.
315 	 */
316 	if ((after->er_end + 1) == start) {
317 		/*
318 		 * We can coalesce.  Append ourselves and make
319 		 * note of it.
320 		 */
321 		after->er_end = start + (size - 1);
322 		appended = 1;
323 	}
324 
325 	/*
326 	 * Attempt to coalesce with the region after us.
327 	 */
328 	if (LIST_NEXT(after, er_link) != NULL &&
329 	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
330 		/*
331 		 * We can coalesce.  Note that if we appended ourselves
332 		 * to the previous region, we exactly fit the gap, and
333 		 * can free the "next" region descriptor.
334 		 */
335 		if (appended) {
336 			/*
337 			 * Yup, we can free it up.
338 			 */
339 			after->er_end = LIST_NEXT(after, er_link)->er_end;
340 			nextr = LIST_NEXT(after, er_link);
341 			LIST_REMOVE(nextr, er_link);
342 			extent_free_region_descriptor(ex, nextr);
343 		} else {
344 			/*
345 			 * Nope, just prepend us to the next region.
346 			 */
347 			LIST_NEXT(after, er_link)->er_start = start;
348 		}
349 
350 		extent_free_region_descriptor(ex, rp);
351 		return;
352 	}
353 
354 	/*
355 	 * We weren't able to coalesce with the next region, but
356 	 * we don't need to allocate a region descriptor if we
357 	 * appended ourselves to the previous region.
358 	 */
359 	if (appended) {
360 		extent_free_region_descriptor(ex, rp);
361 		return;
362 	}
363 
364  cant_coalesce:
365 
366 	/*
367 	 * Fill in the region descriptor and insert ourselves
368 	 * into the region list.
369 	 */
370 	rp->er_start = start;
371 	rp->er_end = start + (size - 1);
372 	LIST_INSERT_AFTER(after, rp, er_link);
373 }
374 
375 /*
376  * Allocate a specific region in an extent map.
377  */
378 int
379 extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
380 {
381 	struct extent_region *rp, *last, *myrp;
382 	u_long end = start + (size - 1);
383 	int error;
384 
385 #ifdef DIAGNOSTIC
386 	/* Check arguments. */
387 	if (ex == NULL)
388 		panic("extent_alloc_region: NULL extent");
389 	if (size < 1) {
390 		printf("extent_alloc_region: extent `%s', size 0x%lx\n",
391 		    ex->ex_name, size);
392 		panic("extent_alloc_region: bad size");
393 	}
394 	if (end < start) {
395 		printf(
396 		 "extent_alloc_region: extent `%s', start 0x%lx, size 0x%lx\n",
397 		 ex->ex_name, start, size);
398 		panic("extent_alloc_region: overflow");
399 	}
400 #endif
401 
402 	/*
403 	 * Make sure the requested region lies within the
404 	 * extent.
405 	 */
406 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
407 #ifdef DIAGNOSTIC
408 		printf("extent_alloc_region: extent `%s' (0x%lx - 0x%lx)\n",
409 		    ex->ex_name, ex->ex_start, ex->ex_end);
410 		printf("extent_alloc_region: start 0x%lx, end 0x%lx\n",
411 		    start, end);
412 		panic("extent_alloc_region: region lies outside extent");
413 #else
414 		return (EINVAL);
415 #endif
416 	}
417 
418 	/*
419 	 * Allocate the region descriptor.  It will be freed later
420 	 * if we can coalesce with another region.
421 	 */
422 	myrp = extent_alloc_region_descriptor(ex, flags);
423 	if (myrp == NULL) {
424 #ifdef DIAGNOSTIC
425 		printf(
426 		    "extent_alloc_region: can't allocate region descriptor\n");
427 #endif
428 		return (ENOMEM);
429 	}
430 
431  alloc_start:
432 	/*
433 	 * Attempt to place ourselves in the desired area of the
434 	 * extent.  We save ourselves some work by keeping the list sorted.
435 	 * In other words, if the start of the current region is greater
436 	 * than the end of our region, we don't have to search any further.
437 	 */
438 
439 	/*
440 	 * Keep a pointer to the last region we looked at so
441 	 * that we don't have to traverse the list again when
442 	 * we insert ourselves.  If "last" is NULL when we
443 	 * finally insert ourselves, we go at the head of the
444 	 * list.  See extent_insert_and_optimize() for details.
445 	 */
446 	last = NULL;
447 
448 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
449 		if (rp->er_start > end) {
450 			/*
451 			 * We lie before this region and don't
452 			 * conflict.
453 			 */
454 			break;
455 		}
456 
457 		/*
458 		 * The current region begins before we end.
459 		 * Check for a conflict.
460 		 */
461 		if (rp->er_end >= start) {
462 			/*
463 			 * We conflict.  If we can (and want to) wait,
464 			 * do so.
465 			 */
466 			if (flags & EX_WAITSPACE) {
467 				ex->ex_flags |= EXF_WANTED;
468 				error = tsleep(ex,
469 				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
470 				    "extnt", 0);
471 				if (error)
472 					return (error);
473 				goto alloc_start;
474 			}
475 			extent_free_region_descriptor(ex, myrp);
476 			return (EAGAIN);
477 		}
478 		/*
479 		 * We don't conflict, but this region lies before
480 		 * us.  Keep a pointer to this region, and keep
481 		 * trying.
482 		 */
483 		last = rp;
484 	}
485 
486 	/*
487 	 * We don't conflict with any regions.  "last" points
488 	 * to the region we fall after, or is NULL if we belong
489 	 * at the beginning of the region list.  Insert ourselves.
490 	 */
491 	extent_insert_and_optimize(ex, start, size, last, myrp);
492 	return (0);
493 }
494 
495 /*
496  * Macro to check (x + y) <= z.  This check is designed to fail
497  * if an overflow occurs.
498  */
499 #define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
500 
501 /*
502  * Allocate a region in an extent map subregion.
503  *
504  * If EX_FAST is specified, we return the first fit in the map.
505  * Otherwise, we try to minimize fragmentation by finding the
506  * smallest gap that will hold the request.
507  *
508  * The allocated region is aligned to "alignment", which must be
509  * a power of 2.
510  */
511 int
512 extent_alloc_subregion(struct extent *ex, u_long substart, u_long subend,
513     u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
514     u_long *result)
515 {
516 	struct extent_region *rp, *myrp, *last, *bestlast;
517 	u_long newstart, newend, exend, beststart, bestovh, ovh;
518 	u_long dontcross;
519 	int error;
520 
521 #ifdef DIAGNOSTIC
522 	/* Check arguments. */
523 	if (ex == NULL)
524 		panic("extent_alloc_subregion: NULL extent");
525 	if (result == NULL)
526 		panic("extent_alloc_subregion: NULL result pointer");
527 	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
528 	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
529   printf("extent_alloc_subregion: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
530 		    ex->ex_name, ex->ex_start, ex->ex_end);
531 		printf("extent_alloc_subregion: substart 0x%lx, subend 0x%lx\n",
532 		    substart, subend);
533 		panic("extent_alloc_subregion: bad subregion");
534 	}
535 	if ((size < 1) || ((size - 1) > (subend - substart))) {
536 		printf("extent_alloc_subregion: extent `%s', size 0x%lx\n",
537 		    ex->ex_name, size);
538 		panic("extent_alloc_subregion: bad size");
539 	}
540 	if (alignment == 0)
541 		panic("extent_alloc_subregion: bad alignment");
542 	if (boundary && (boundary < size)) {
543 		printf(
544 		    "extent_alloc_subregion: extent `%s', size 0x%lx, "
545 		    "boundary 0x%lx\n", ex->ex_name, size, boundary);
546 		panic("extent_alloc_subregion: bad boundary");
547 	}
548 #endif
549 
550 	/*
551 	 * Allocate the region descriptor.  It will be freed later
552 	 * if we can coalesce with another region.
553 	 */
554 	myrp = extent_alloc_region_descriptor(ex, flags);
555 	if (myrp == NULL) {
556 #ifdef DIAGNOSTIC
557 		printf(
558 		 "extent_alloc_subregion: can't allocate region descriptor\n");
559 #endif
560 		return (ENOMEM);
561 	}
562 
563  alloc_start:
564 	/*
565 	 * Keep a pointer to the last region we looked at so
566 	 * that we don't have to traverse the list again when
567 	 * we insert ourselves.  If "last" is NULL when we
568 	 * finally insert ourselves, we go at the head of the
569 	 * list.  See extent_insert_and_optimize() for deatails.
570 	 */
571 	last = NULL;
572 
573 	/*
574 	 * Keep track of size and location of the smallest
575 	 * chunk we fit in.
576 	 *
577 	 * Since the extent can be as large as the numeric range
578 	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
579 	 * best overhead value can be the maximum unsigned integer.
580 	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
581 	 * into the region list immediately on an exact match (which
582 	 * is the only case where "bestovh" would be set to 0).
583 	 */
584 	bestovh = 0;
585 	beststart = 0;
586 	bestlast = NULL;
587 
588 	/*
589 	 * Keep track of end of free region.  This is either the end of extent
590 	 * or the start of a region past the subend.
591 	 */
592 	exend = ex->ex_end;
593 
594 	/*
595 	 * For N allocated regions, we must make (N + 1)
596 	 * checks for unallocated space.  The first chunk we
597 	 * check is the area from the beginning of the subregion
598 	 * to the first allocated region after that point.
599 	 */
600 	newstart = extent_align(substart, alignment, skew);
601 	if (newstart < ex->ex_start) {
602 #ifdef DIAGNOSTIC
603 		printf(
604       "extent_alloc_subregion: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
605 		 ex->ex_name, ex->ex_start, ex->ex_end, alignment);
606 		panic("extent_alloc_subregion: overflow after alignment");
607 #else
608 		extent_free_region_descriptor(ex, myrp);
609 		return (EINVAL);
610 #endif
611 	}
612 
613 	/*
614 	 * Find the first allocated region that begins on or after
615 	 * the subregion start, advancing the "last" pointer along
616 	 * the way.
617 	 */
618 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
619 		if (rp->er_start >= newstart)
620 			break;
621 		last = rp;
622 	}
623 
624 	/*
625 	 * Relocate the start of our candidate region to the end of
626 	 * the last allocated region (if there was one overlapping
627 	 * our subrange).
628 	 */
629 	if (last != NULL && last->er_end >= newstart)
630 		newstart = extent_align((last->er_end + 1), alignment, skew);
631 
632 	for (; rp != LIST_END(&ex->ex_regions); rp = LIST_NEXT(rp, er_link)) {
633 		/*
634 		 * If the region pasts the subend, bail out and see
635 		 * if we fit against the subend.
636 		 */
637 		if (rp->er_start > subend) {
638 			exend = rp->er_start;
639 			break;
640 		}
641 
642 		/*
643 		 * Check the chunk before "rp".  Note that our
644 		 * comparison is safe from overflow conditions.
645 		 */
646 		if (LE_OV(newstart, size, rp->er_start)) {
647 			/*
648 			 * Do a boundary check, if necessary.  Note
649 			 * that a region may *begin* on the boundary,
650 			 * but it must end before the boundary.
651 			 */
652 			if (boundary) {
653 				newend = newstart + (size - 1);
654 
655 				/*
656 				 * Calculate the next boundary after the start
657 				 * of this region.
658 				 */
659 				dontcross = extent_align(newstart+1, boundary,
660 				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
661 				    - 1;
662 
663 #if 0
664 				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
665 				    newstart, newend, ex->ex_start, ex->ex_end,
666 				    boundary, dontcross);
667 #endif
668 
669 				/* Check for overflow */
670 				if (dontcross < ex->ex_start)
671 					dontcross = ex->ex_end;
672 				else if (newend > dontcross) {
673 					/*
674 					 * Candidate region crosses boundary.
675 					 * Throw away the leading part and see
676 					 * if we still fit.
677 					 */
678 					newstart = dontcross + 1;
679 					newend = newstart + (size - 1);
680 					dontcross += boundary;
681 					if (!LE_OV(newstart, size, rp->er_start))
682 						goto skip;
683 				}
684 
685 				/*
686 				 * If we run past the end of
687 				 * the extent or the boundary
688 				 * overflows, then the request
689 				 * can't fit.
690 				 */
691 				if (newstart + size - 1 > ex->ex_end ||
692 				    dontcross < newstart)
693 					goto fail;
694 			}
695 
696 			/*
697 			 * We would fit into this space.  Calculate
698 			 * the overhead (wasted space).  If we exactly
699 			 * fit, or we're taking the first fit, insert
700 			 * ourselves into the region list.
701 			 */
702 			ovh = rp->er_start - newstart - size;
703 			if ((flags & EX_FAST) || (ovh == 0))
704 				goto found;
705 
706 			/*
707 			 * Don't exactly fit, but check to see
708 			 * if we're better than any current choice.
709 			 */
710 			if ((bestovh == 0) || (ovh < bestovh)) {
711 				bestovh = ovh;
712 				beststart = newstart;
713 				bestlast = last;
714 			}
715 		}
716 
717 skip:
718 		/*
719 		 * Skip past the current region and check again.
720 		 */
721 		newstart = extent_align((rp->er_end + 1), alignment, skew);
722 		if (newstart < rp->er_end) {
723 			/*
724 			 * Overflow condition.  Don't error out, since
725 			 * we might have a chunk of space that we can
726 			 * use.
727 			 */
728 			goto fail;
729 		}
730 
731 		last = rp;
732 	}
733 
734 	/*
735 	 * The final check is from the current starting point to the
736 	 * end of the subregion.  If there were no allocated regions,
737 	 * "newstart" is set to the beginning of the subregion, or
738 	 * just past the end of the last allocated region, adjusted
739 	 * for alignment in either case.
740 	 */
741 	if (LE_OV(newstart, (size - 1), subend)) {
742 		/*
743 		 * Do a boundary check, if necessary.  Note
744 		 * that a region may *begin* on the boundary,
745 		 * but it must end before the boundary.
746 		 */
747 		if (boundary) {
748 			newend = newstart + (size - 1);
749 
750 			/*
751 			 * Calculate the next boundary after the start
752 			 * of this region.
753 			 */
754 			dontcross = extent_align(newstart+1, boundary,
755 			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
756 			    - 1;
757 
758 #if 0
759 			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
760 			    newstart, newend, ex->ex_start, ex->ex_end,
761 			    boundary, dontcross);
762 #endif
763 
764 			/* Check for overflow */
765 			if (dontcross < ex->ex_start)
766 				dontcross = ex->ex_end;
767 			else if (newend > dontcross) {
768 				/*
769 				 * Candidate region crosses boundary.
770 				 * Throw away the leading part and see
771 				 * if we still fit.
772 				 */
773 				newstart = dontcross + 1;
774 				newend = newstart + (size - 1);
775 				dontcross += boundary;
776 				if (!LE_OV(newstart, (size - 1), subend))
777 					goto fail;
778 			}
779 
780 			/*
781 			 * If we run past the end of
782 			 * the extent or the boundary
783 			 * overflows, then the request
784 			 * can't fit.
785 			 */
786 			if (newstart + size - 1 > ex->ex_end ||
787 			    dontcross < newstart)
788 				goto fail;
789 		}
790 
791 		/*
792 		 * We would fit into this space.  Calculate
793 		 * the overhead (wasted space).  If we exactly
794 		 * fit, or we're taking the first fit, insert
795 		 * ourselves into the region list.
796 		 */
797 		ovh = exend - newstart - (size - 1);
798 		if ((flags & EX_FAST) || (ovh == 0))
799 			goto found;
800 
801 		/*
802 		 * Don't exactly fit, but check to see
803 		 * if we're better than any current choice.
804 		 */
805 		if ((bestovh == 0) || (ovh < bestovh)) {
806 			bestovh = ovh;
807 			beststart = newstart;
808 			bestlast = last;
809 		}
810 	}
811 
812  fail:
813 	/*
814 	 * One of the following two conditions have
815 	 * occurred:
816 	 *
817 	 *	There is no chunk large enough to hold the request.
818 	 *
819 	 *	If EX_FAST was not specified, there is not an
820 	 *	exact match for the request.
821 	 *
822 	 * Note that if we reach this point and EX_FAST is
823 	 * set, then we know there is no space in the extent for
824 	 * the request.
825 	 */
826 	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
827 		/*
828 		 * We have a match that's "good enough".
829 		 */
830 		newstart = beststart;
831 		last = bestlast;
832 		goto found;
833 	}
834 
835 	/*
836 	 * No space currently available.  Wait for it to free up,
837 	 * if possible.
838 	 */
839 	if (flags & EX_WAITSPACE) {
840 		ex->ex_flags |= EXF_WANTED;
841 		error = tsleep(ex,
842 		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0), "extnt", 0);
843 		if (error)
844 			return (error);
845 		goto alloc_start;
846 	}
847 
848 	extent_free_region_descriptor(ex, myrp);
849 	return (EAGAIN);
850 
851  found:
852 	/*
853 	 * Insert ourselves into the region list.
854 	 */
855 	extent_insert_and_optimize(ex, newstart, size, last, myrp);
856 	*result = newstart;
857 	return (0);
858 }
859 
860 int
861 extent_free(struct extent *ex, u_long start, u_long size, int flags)
862 {
863 	struct extent_region *rp, *nrp = NULL;
864 	u_long end = start + (size - 1);
865 	int exflags;
866 
867 #ifdef DIAGNOSTIC
868 	/* Check arguments. */
869 	if (ex == NULL)
870 		panic("extent_free: NULL extent");
871 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
872 		extent_print(ex);
873 		printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
874 		    ex->ex_name, start, size);
875 		panic("extent_free: extent `%s', region not within extent",
876 		    ex->ex_name);
877 	}
878 	/* Check for an overflow. */
879 	if (end < start) {
880 		extent_print(ex);
881 		printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
882 		    ex->ex_name, start, size);
883 		panic("extent_free: overflow");
884 	}
885 #endif
886 
887 	/*
888 	 * If we're allowing coalescing, we must allocate a region
889 	 * descriptor now, since it might block.
890 	 *
891 	 * XXX Make a static, create-time flags word, so we don't
892 	 * XXX have to lock to read it!
893 	 */
894 	exflags = ex->ex_flags;
895 
896 	if ((exflags & EXF_NOCOALESCE) == 0) {
897 		/* Allocate a region descriptor. */
898 		nrp = extent_alloc_region_descriptor(ex, flags);
899 		if (nrp == NULL)
900 			return (ENOMEM);
901 	}
902 
903 	/*
904 	 * Find region and deallocate.  Several possibilities:
905 	 *
906 	 *	1. (start == er_start) && (end == er_end):
907 	 *	   Free descriptor.
908 	 *
909 	 *	2. (start == er_start) && (end < er_end):
910 	 *	   Adjust er_start.
911 	 *
912 	 *	3. (start > er_start) && (end == er_end):
913 	 *	   Adjust er_end.
914 	 *
915 	 *	4. (start > er_start) && (end < er_end):
916 	 *	   Fragment region.  Requires descriptor alloc.
917 	 *
918 	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
919 	 * is not set.
920 	 */
921 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
922 		/*
923 		 * Save ourselves some comparisons; does the current
924 		 * region end before chunk to be freed begins?  If so,
925 		 * then we haven't found the appropriate region descriptor.
926 		 */
927 		if (rp->er_end < start)
928 			continue;
929 
930 		/*
931 		 * Save ourselves some traversal; does the current
932 		 * region begin after the chunk to be freed ends?  If so,
933 		 * then we've already passed any possible region descriptors
934 		 * that might have contained the chunk to be freed.
935 		 */
936 		if (rp->er_start > end)
937 			break;
938 
939 		/* Case 1. */
940 		if ((start == rp->er_start) && (end == rp->er_end)) {
941 			LIST_REMOVE(rp, er_link);
942 			extent_free_region_descriptor(ex, rp);
943 			goto done;
944 		}
945 
946 		/*
947 		 * The following cases all require that EXF_NOCOALESCE
948 		 * is not set.
949 		 */
950 		if (ex->ex_flags & EXF_NOCOALESCE)
951 			continue;
952 
953 		/* Case 2. */
954 		if ((start == rp->er_start) && (end < rp->er_end)) {
955 			rp->er_start = (end + 1);
956 			goto done;
957 		}
958 
959 		/* Case 3. */
960 		if ((start > rp->er_start) && (end == rp->er_end)) {
961 			rp->er_end = (start - 1);
962 			goto done;
963 		}
964 
965 		/* Case 4. */
966 		if ((start > rp->er_start) && (end < rp->er_end)) {
967 			/* Fill in new descriptor. */
968 			nrp->er_start = end + 1;
969 			nrp->er_end = rp->er_end;
970 
971 			/* Adjust current descriptor. */
972 			rp->er_end = start - 1;
973 
974 			/* Insert new descriptor after current. */
975 			LIST_INSERT_AFTER(rp, nrp, er_link);
976 
977 			/* We used the new descriptor, so don't free it below */
978 			nrp = NULL;
979 			goto done;
980 		}
981 	}
982 
983 	/* Region not found, or request otherwise invalid. */
984 #if defined(DIAGNOSTIC) || defined(DDB)
985 	extent_print(ex);
986 #endif
987 	printf("extent_free: start 0x%lx, end 0x%lx\n", start, end);
988 	panic("extent_free: region not found");
989 
990  done:
991 	if (nrp != NULL)
992 		extent_free_region_descriptor(ex, nrp);
993 	if (ex->ex_flags & EXF_WANTED) {
994 		ex->ex_flags &= ~EXF_WANTED;
995 		wakeup(ex);
996 	}
997 	return (0);
998 }
999 
1000 static struct extent_region *
1001 extent_alloc_region_descriptor(struct extent *ex, int flags)
1002 {
1003 	struct extent_region *rp;
1004 	int s;
1005 
1006 	if (ex->ex_flags & EXF_FIXED) {
1007 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1008 
1009 		while (LIST_EMPTY(&fex->fex_freelist)) {
1010 			if (flags & EX_MALLOCOK)
1011 				goto alloc;
1012 
1013 			if ((flags & EX_WAITOK) == 0)
1014 				return (NULL);
1015 			ex->ex_flags |= EXF_FLWANTED;
1016 			if (tsleep(&fex->fex_freelist,
1017 			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1018 			    "extnt", 0))
1019 				return (NULL);
1020 		}
1021 		rp = LIST_FIRST(&fex->fex_freelist);
1022 		LIST_REMOVE(rp, er_link);
1023 
1024 		/*
1025 		 * Don't muck with flags after pulling it off the
1026 		 * freelist; it may be a dynamiclly allocated
1027 		 * region pointer that was kindly given to us,
1028 		 * and we need to preserve that information.
1029 		 */
1030 
1031 		return (rp);
1032 	}
1033 
1034  alloc:
1035 	s = splvm();
1036 	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK : 0);
1037 	splx(s);
1038 	if (rp != NULL)
1039 		rp->er_flags = ER_ALLOC;
1040 
1041 	return (rp);
1042 }
1043 
1044 static void
1045 extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
1046 {
1047 	int s;
1048 
1049 	if (ex->ex_flags & EXF_FIXED) {
1050 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1051 
1052 		/*
1053 		 * If someone's waiting for a region descriptor,
1054 		 * be nice and give them this one, rather than
1055 		 * just free'ing it back to the system.
1056 		 */
1057 		if (rp->er_flags & ER_ALLOC) {
1058 			if (ex->ex_flags & EXF_FLWANTED) {
1059 				/* Clear all but ER_ALLOC flag. */
1060 				rp->er_flags = ER_ALLOC;
1061 				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1062 				    er_link);
1063 				goto wake_em_up;
1064 			} else {
1065 				s = splvm();
1066 				pool_put(&ex_region_pl, rp);
1067 				splx(s);
1068 			}
1069 		} else {
1070 			/* Clear all flags. */
1071 			rp->er_flags = 0;
1072 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1073 		}
1074 
1075 		if (ex->ex_flags & EXF_FLWANTED) {
1076  wake_em_up:
1077 			ex->ex_flags &= ~EXF_FLWANTED;
1078 			wakeup(&fex->fex_freelist);
1079 		}
1080 		return;
1081 	}
1082 
1083 	/*
1084 	 * We know it's dynamically allocated if we get here.
1085 	 */
1086 	s = splvm();
1087 	pool_put(&ex_region_pl, rp);
1088 	splx(s);
1089 }
1090 
1091 #ifndef DDB
1092 #define db_printf printf
1093 #endif
1094 
1095 #if defined(DIAGNOSTIC) || defined(DDB) || !defined(_KERNEL)
1096 void
1097 extent_print(struct extent *ex)
1098 {
1099 	struct extent_region *rp;
1100 
1101 	if (ex == NULL)
1102 		panic("extent_print: NULL extent");
1103 
1104 #ifdef _KERNEL
1105 	db_printf("extent `%s' (0x%lx - 0x%lx), flags=%b\n", ex->ex_name,
1106 	    ex->ex_start, ex->ex_end, ex->ex_flags, EXF_BITS);
1107 #else
1108 	db_printf("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1109 	    ex->ex_start, ex->ex_end, ex->ex_flags);
1110 #endif
1111 
1112 	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1113 		db_printf("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1114 }
1115 #endif
1116