xref: /netbsd-src/sys/kern/subr_extent.c (revision 7c3f385475147b6e1c4753f2bee961630e2dfc40)
1 /*	$NetBSD: subr_extent.c,v 1.71 2008/03/17 17:05:54 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 1996, 1998, 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe and Matthias Drochner.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the NetBSD
21  *	Foundation, Inc. and its contributors.
22  * 4. Neither the name of The NetBSD Foundation nor the names of its
23  *    contributors may be used to endorse or promote products derived
24  *    from this software without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36  * POSSIBILITY OF SUCH DAMAGE.
37  */
38 
39 /*
40  * General purpose extent manager.
41  */
42 
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: subr_extent.c,v 1.71 2008/03/17 17:05:54 ad Exp $");
45 
46 #ifdef _KERNEL
47 #include "opt_lockdebug.h"
48 
49 #include <sys/param.h>
50 #include <sys/extent.h>
51 #include <sys/malloc.h>
52 #include <sys/pool.h>
53 #include <sys/time.h>
54 #include <sys/systm.h>
55 #include <sys/proc.h>
56 
57 #include <uvm/uvm_extern.h>
58 
59 #define	KMEM_IS_RUNNING		(kmem_map != NULL)
60 #elif defined(_EXTENT_TESTING)
61 /*
62  * user-land definitions, so it can fit into a testing harness.
63  */
64 #include <sys/param.h>
65 #include <sys/pool.h>
66 #include <sys/extent.h>
67 
68 #include <errno.h>
69 #include <stdlib.h>
70 #include <stdio.h>
71 #include <string.h>
72 
73 /*
74  * Use multi-line #defines to avoid screwing up the kernel tags file;
75  * without this, ctags produces a tags file where panic() shows up
76  * in subr_extent.c rather than subr_prf.c.
77  */
78 #define	\
79 malloc(s, t, flags)		malloc(s)
80 #define	\
81 free(p, t)			free(p)
82 #define	\
83 cv_wait_sig(cv, lock)		(EWOULDBLOCK)
84 #define	\
85 pool_get(pool, flags)		malloc((pool)->pr_size,0,0)
86 #define	\
87 pool_put(pool, rp)		free(rp,0)
88 #define	\
89 panic(a)			printf(a)
90 #define	mutex_init(a, b, c)
91 #define	mutex_destroy(a)
92 #define	mutex_enter(l)
93 #define	mutex_exit(l)
94 #define	cv_wait(cv, lock)
95 #define	cv_broadcast(cv)
96 #define	cv_init(a, b)
97 #define	cv_destroy(a)
98 #define	KMEM_IS_RUNNING			(1)
99 #define	IPL_VM				(0)
100 #define	MUTEX_DEFAULT			(0)
101 #endif
102 
103 static struct pool expool;
104 
105 /*
106  * Macro to align to an arbitrary power-of-two boundary.
107  */
108 #define EXTENT_ALIGN(_start, _align, _skew)		\
109 	(((((_start) - (_skew)) + ((_align) - 1)) & (-(_align))) + (_skew))
110 
111 /*
112  * Create the extent_region pool.
113  */
114 void
115 extent_init(void)
116 {
117 
118 #if defined(_KERNEL)
119 	pool_init(&expool, sizeof(struct extent_region), 0, 0, 0,
120 	    "extent", NULL, IPL_VM);
121 #else
122 	expool.pr_size = sizeof(struct extent_region);
123 #endif
124 }
125 
126 /*
127  * Allocate an extent region descriptor.  EXTENT MUST NOT BE LOCKED.
128  * We will handle any locking we may need.
129  */
130 static struct extent_region *
131 extent_alloc_region_descriptor(struct extent *ex, int flags)
132 {
133 	struct extent_region *rp;
134 	int exflags, error;
135 
136 	/*
137 	 * If the kernel memory allocator is not yet running, we can't
138 	 * use it (obviously).
139 	 */
140 	if (KMEM_IS_RUNNING == 0)
141 		flags &= ~EX_MALLOCOK;
142 
143 	/*
144 	 * XXX Make a static, create-time flags word, so we don't
145 	 * XXX have to lock to read it!
146 	 */
147 	mutex_enter(&ex->ex_lock);
148 	exflags = ex->ex_flags;
149 	mutex_exit(&ex->ex_lock);
150 
151 	if (exflags & EXF_FIXED) {
152 		struct extent_fixed *fex = (struct extent_fixed *)ex;
153 
154 		mutex_enter(&ex->ex_lock);
155 		for (;;) {
156 			if ((rp = LIST_FIRST(&fex->fex_freelist)) != NULL) {
157 				/*
158 				 * Don't muck with flags after pulling it off
159 				 * the freelist; it may have been dynamically
160 				 * allocated, and kindly given to us.  We
161 				 * need to remember that information.
162 				 */
163 				LIST_REMOVE(rp, er_link);
164 				mutex_exit(&ex->ex_lock);
165 				return (rp);
166 			}
167 			if (flags & EX_MALLOCOK) {
168 				mutex_exit(&ex->ex_lock);
169 				goto alloc;
170 			}
171 			if ((flags & EX_WAITOK) == 0) {
172 				mutex_exit(&ex->ex_lock);
173 				return (NULL);
174 			}
175 			ex->ex_flags |= EXF_FLWANTED;
176 			if ((flags & EX_CATCH) != 0)
177 				error = cv_wait_sig(&ex->ex_cv, &ex->ex_lock);
178 			else {
179 				cv_wait(&ex->ex_cv, &ex->ex_lock);
180 				error = 0;
181 			}
182 			if (error != 0) {
183 				mutex_exit(&ex->ex_lock);
184 				return (NULL);
185 			}
186 		}
187 	}
188 
189  alloc:
190 	rp = pool_get(&expool, (flags & EX_WAITOK) ? PR_WAITOK : 0);
191 
192 	if (rp != NULL)
193 		rp->er_flags = ER_ALLOC;
194 
195 	return (rp);
196 }
197 
198 /*
199  * Free an extent region descriptor.  EXTENT _MUST_ BE LOCKED!
200  */
201 static void
202 extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
203 {
204 
205 	if (ex->ex_flags & EXF_FIXED) {
206 		struct extent_fixed *fex = (struct extent_fixed *)ex;
207 
208 		/*
209 		 * If someone's waiting for a region descriptor,
210 		 * be nice and give them this one, rather than
211 		 * just free'ing it back to the system.
212 		 */
213 		if (rp->er_flags & ER_ALLOC) {
214 			if (ex->ex_flags & EXF_FLWANTED) {
215 				/* Clear all but ER_ALLOC flag. */
216 				rp->er_flags = ER_ALLOC;
217 				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
218 				    er_link);
219 				goto wake_em_up;
220 			} else
221 				pool_put(&expool, rp);
222 		} else {
223 			/* Clear all flags. */
224 			rp->er_flags = 0;
225 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
226 		}
227 
228  wake_em_up:
229 		ex->ex_flags &= ~EXF_FLWANTED;
230 		cv_broadcast(&ex->ex_cv);
231 		return;
232 	}
233 
234 	/*
235 	 * We know it's dynamically allocated if we get here.
236 	 */
237 	pool_put(&expool, rp);
238 }
239 
240 /*
241  * Allocate and initialize an extent map.
242  */
243 struct extent *
244 extent_create(const char *name, u_long start, u_long end,
245     struct malloc_type *mtype, void *storage, size_t storagesize, int flags)
246 {
247 	struct extent *ex;
248 	char *cp = storage;
249 	size_t sz = storagesize;
250 	struct extent_region *rp;
251 	int fixed_extent = (storage != NULL);
252 
253 #ifndef _KERNEL
254 	extent_init();
255 #endif
256 
257 #ifdef DIAGNOSTIC
258 	/* Check arguments. */
259 	if (name == NULL)
260 		panic("extent_create: name == NULL");
261 	if (end < start) {
262 		printf("extent_create: extent `%s', start 0x%lx, end 0x%lx\n",
263 		    name, start, end);
264 		panic("extent_create: end < start");
265 	}
266 	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
267 		panic("extent_create: fixed extent, bad storagesize 0x%lx",
268 		    (u_long)storagesize);
269 	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
270 		panic("extent_create: storage provided for non-fixed");
271 #endif
272 
273 	/* Allocate extent descriptor. */
274 	if (fixed_extent) {
275 		struct extent_fixed *fex;
276 
277 		memset(storage, 0, storagesize);
278 
279 		/*
280 		 * Align all descriptors on "long" boundaries.
281 		 */
282 		fex = (struct extent_fixed *)cp;
283 		ex = (struct extent *)fex;
284 		cp += ALIGN(sizeof(struct extent_fixed));
285 		sz -= ALIGN(sizeof(struct extent_fixed));
286 		fex->fex_storage = storage;
287 		fex->fex_storagesize = storagesize;
288 
289 		/*
290 		 * In a fixed extent, we have to pre-allocate region
291 		 * descriptors and place them in the extent's freelist.
292 		 */
293 		LIST_INIT(&fex->fex_freelist);
294 		while (sz >= ALIGN(sizeof(struct extent_region))) {
295 			rp = (struct extent_region *)cp;
296 			cp += ALIGN(sizeof(struct extent_region));
297 			sz -= ALIGN(sizeof(struct extent_region));
298 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
299 		}
300 	} else {
301 		ex = (struct extent *)malloc(sizeof(struct extent),
302 		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
303 		if (ex == NULL)
304 			return (NULL);
305 	}
306 
307 	/* Fill in the extent descriptor and return it to the caller. */
308 	mutex_init(&ex->ex_lock, MUTEX_DEFAULT, IPL_VM);
309 	cv_init(&ex->ex_cv, "extent");
310 	LIST_INIT(&ex->ex_regions);
311 	ex->ex_name = name;
312 	ex->ex_start = start;
313 	ex->ex_end = end;
314 	ex->ex_mtype = mtype;
315 	ex->ex_flags = 0;
316 	if (fixed_extent)
317 		ex->ex_flags |= EXF_FIXED;
318 	if (flags & EX_NOCOALESCE)
319 		ex->ex_flags |= EXF_NOCOALESCE;
320 	return (ex);
321 }
322 
323 /*
324  * Destroy an extent map.
325  * Since we're freeing the data, there can't be any references
326  * so we don't need any locking.
327  */
328 void
329 extent_destroy(struct extent *ex)
330 {
331 	struct extent_region *rp, *orp;
332 
333 #ifdef DIAGNOSTIC
334 	/* Check arguments. */
335 	if (ex == NULL)
336 		panic("extent_destroy: NULL extent");
337 #endif
338 
339 	/* Free all region descriptors in extent. */
340 	for (rp = LIST_FIRST(&ex->ex_regions); rp != NULL; ) {
341 		orp = rp;
342 		rp = LIST_NEXT(rp, er_link);
343 		LIST_REMOVE(orp, er_link);
344 		extent_free_region_descriptor(ex, orp);
345 	}
346 
347 	cv_destroy(&ex->ex_cv);
348 	mutex_destroy(&ex->ex_lock);
349 
350 	/* If we're not a fixed extent, free the extent descriptor itself. */
351 	if ((ex->ex_flags & EXF_FIXED) == 0)
352 		free(ex, ex->ex_mtype);
353 }
354 
355 /*
356  * Insert a region descriptor into the sorted region list after the
357  * entry "after" or at the head of the list (if "after" is NULL).
358  * The region descriptor we insert is passed in "rp".  We must
359  * allocate the region descriptor before calling this function!
360  * If we don't need the region descriptor, it will be freed here.
361  */
362 static void
363 extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
364     int flags, struct extent_region *after, struct extent_region *rp)
365 {
366 	struct extent_region *nextr;
367 	int appended = 0;
368 
369 	if (after == NULL) {
370 		/*
371 		 * We're the first in the region list.  If there's
372 		 * a region after us, attempt to coalesce to save
373 		 * descriptor overhead.
374 		 */
375 		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
376 		    (LIST_FIRST(&ex->ex_regions) != NULL) &&
377 		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
378 			/*
379 			 * We can coalesce.  Prepend us to the first region.
380 			 */
381 			LIST_FIRST(&ex->ex_regions)->er_start = start;
382 			extent_free_region_descriptor(ex, rp);
383 			return;
384 		}
385 
386 		/*
387 		 * Can't coalesce.  Fill in the region descriptor
388 		 * in, and insert us at the head of the region list.
389 		 */
390 		rp->er_start = start;
391 		rp->er_end = start + (size - 1);
392 		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
393 		return;
394 	}
395 
396 	/*
397 	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
398 	 */
399 	if (ex->ex_flags & EXF_NOCOALESCE)
400 		goto cant_coalesce;
401 
402 	/*
403 	 * Attempt to coalesce with the region before us.
404 	 */
405 	if ((after->er_end + 1) == start) {
406 		/*
407 		 * We can coalesce.  Append ourselves and make
408 		 * note of it.
409 		 */
410 		after->er_end = start + (size - 1);
411 		appended = 1;
412 	}
413 
414 	/*
415 	 * Attempt to coalesce with the region after us.
416 	 */
417 	if ((LIST_NEXT(after, er_link) != NULL) &&
418 	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
419 		/*
420 		 * We can coalesce.  Note that if we appended ourselves
421 		 * to the previous region, we exactly fit the gap, and
422 		 * can free the "next" region descriptor.
423 		 */
424 		if (appended) {
425 			/*
426 			 * Yup, we can free it up.
427 			 */
428 			after->er_end = LIST_NEXT(after, er_link)->er_end;
429 			nextr = LIST_NEXT(after, er_link);
430 			LIST_REMOVE(nextr, er_link);
431 			extent_free_region_descriptor(ex, nextr);
432 		} else {
433 			/*
434 			 * Nope, just prepend us to the next region.
435 			 */
436 			LIST_NEXT(after, er_link)->er_start = start;
437 		}
438 
439 		extent_free_region_descriptor(ex, rp);
440 		return;
441 	}
442 
443 	/*
444 	 * We weren't able to coalesce with the next region, but
445 	 * we don't need to allocate a region descriptor if we
446 	 * appended ourselves to the previous region.
447 	 */
448 	if (appended) {
449 		extent_free_region_descriptor(ex, rp);
450 		return;
451 	}
452 
453  cant_coalesce:
454 
455 	/*
456 	 * Fill in the region descriptor and insert ourselves
457 	 * into the region list.
458 	 */
459 	rp->er_start = start;
460 	rp->er_end = start + (size - 1);
461 	LIST_INSERT_AFTER(after, rp, er_link);
462 }
463 
464 /*
465  * Allocate a specific region in an extent map.
466  */
467 int
468 extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
469 {
470 	struct extent_region *rp, *last, *myrp;
471 	u_long end = start + (size - 1);
472 	int error;
473 
474 #ifdef DIAGNOSTIC
475 	/* Check arguments. */
476 	if (ex == NULL)
477 		panic("extent_alloc_region: NULL extent");
478 	if (size < 1) {
479 		printf("extent_alloc_region: extent `%s', size 0x%lx\n",
480 		    ex->ex_name, size);
481 		panic("extent_alloc_region: bad size");
482 	}
483 	if (end < start) {
484 		printf(
485 		 "extent_alloc_region: extent `%s', start 0x%lx, size 0x%lx\n",
486 		 ex->ex_name, start, size);
487 		panic("extent_alloc_region: overflow");
488 	}
489 #endif
490 #ifdef LOCKDEBUG
491 	if (flags & EX_WAITSPACE) {
492 		ASSERT_SLEEPABLE();
493 	}
494 #endif
495 
496 	/*
497 	 * Make sure the requested region lies within the
498 	 * extent.
499 	 *
500 	 * We don't lock to check the range, because those values
501 	 * are never modified, and if another thread deletes the
502 	 * extent, we're screwed anyway.
503 	 */
504 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
505 #ifdef DIAGNOSTIC
506 		printf("extent_alloc_region: extent `%s' (0x%lx - 0x%lx)\n",
507 		    ex->ex_name, ex->ex_start, ex->ex_end);
508 		printf("extent_alloc_region: start 0x%lx, end 0x%lx\n",
509 		    start, end);
510 		panic("extent_alloc_region: region lies outside extent");
511 #else
512 		return (EINVAL);
513 #endif
514 	}
515 
516 	/*
517 	 * Allocate the region descriptor.  It will be freed later
518 	 * if we can coalesce with another region.  Don't lock before
519 	 * here!  This could block.
520 	 */
521 	myrp = extent_alloc_region_descriptor(ex, flags);
522 	if (myrp == NULL) {
523 #ifdef DIAGNOSTIC
524 		printf(
525 		    "extent_alloc_region: can't allocate region descriptor\n");
526 #endif
527 		return (ENOMEM);
528 	}
529 
530 	mutex_enter(&ex->ex_lock);
531  alloc_start:
532 
533 	/*
534 	 * Attempt to place ourselves in the desired area of the
535 	 * extent.  We save ourselves some work by keeping the list sorted.
536 	 * In other words, if the start of the current region is greater
537 	 * than the end of our region, we don't have to search any further.
538 	 */
539 
540 	/*
541 	 * Keep a pointer to the last region we looked at so
542 	 * that we don't have to traverse the list again when
543 	 * we insert ourselves.  If "last" is NULL when we
544 	 * finally insert ourselves, we go at the head of the
545 	 * list.  See extent_insert_and_optimize() for details.
546 	 */
547 	last = NULL;
548 
549 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
550 		if (rp->er_start > end) {
551 			/*
552 			 * We lie before this region and don't
553 			 * conflict.
554 			 */
555 			break;
556 		}
557 
558 		/*
559 		 * The current region begins before we end.
560 		 * Check for a conflict.
561 		 */
562 		if (rp->er_end >= start) {
563 			/*
564 			 * We conflict.  If we can (and want to) wait,
565 			 * do so.
566 			 */
567 			if (flags & EX_WAITSPACE) {
568 				if ((flags & EX_CATCH) != 0)
569 					error = cv_wait_sig(&ex->ex_cv,
570 					    &ex->ex_lock);
571 				else {
572 					cv_wait(&ex->ex_cv, &ex->ex_lock);
573 					error = 0;
574 				}
575 				if (error == 0)
576 					goto alloc_start;
577 				mutex_exit(&ex->ex_lock);
578 			} else {
579 				mutex_exit(&ex->ex_lock);
580 				error = EAGAIN;
581 			}
582 			extent_free_region_descriptor(ex, myrp);
583 			return error;
584 		}
585 		/*
586 		 * We don't conflict, but this region lies before
587 		 * us.  Keep a pointer to this region, and keep
588 		 * trying.
589 		 */
590 		last = rp;
591 	}
592 
593 	/*
594 	 * We don't conflict with any regions.  "last" points
595 	 * to the region we fall after, or is NULL if we belong
596 	 * at the beginning of the region list.  Insert ourselves.
597 	 */
598 	extent_insert_and_optimize(ex, start, size, flags, last, myrp);
599 	mutex_exit(&ex->ex_lock);
600 	return (0);
601 }
602 
603 /*
604  * Macro to check (x + y) <= z.  This check is designed to fail
605  * if an overflow occurs.
606  */
607 #define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
608 
609 /*
610  * Allocate a region in an extent map subregion.
611  *
612  * If EX_FAST is specified, we return the first fit in the map.
613  * Otherwise, we try to minimize fragmentation by finding the
614  * smallest gap that will hold the request.
615  *
616  * The allocated region is aligned to "alignment", which must be
617  * a power of 2.
618  */
619 int
620 extent_alloc_subregion1(struct extent *ex, u_long substart, u_long subend,
621     u_long size, u_long alignment, u_long skew, u_long boundary,
622     int flags, u_long *result)
623 {
624 	struct extent_region *rp, *myrp, *last, *bestlast;
625 	u_long newstart, newend, exend, beststart, bestovh, ovh;
626 	u_long dontcross;
627 	int error;
628 
629 #ifdef DIAGNOSTIC
630 	/*
631 	 * Check arguments.
632 	 *
633 	 * We don't lock to check these, because these values
634 	 * are never modified, and if another thread deletes the
635 	 * extent, we're screwed anyway.
636 	 */
637 	if (ex == NULL)
638 		panic("extent_alloc_subregion: NULL extent");
639 	if (result == NULL)
640 		panic("extent_alloc_subregion: NULL result pointer");
641 	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
642 	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
643   printf("extent_alloc_subregion: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
644 		    ex->ex_name, ex->ex_start, ex->ex_end);
645 		printf("extent_alloc_subregion: substart 0x%lx, subend 0x%lx\n",
646 		    substart, subend);
647 		panic("extent_alloc_subregion: bad subregion");
648 	}
649 	if ((size < 1) || ((size - 1) > (subend - substart))) {
650 		printf("extent_alloc_subregion: extent `%s', size 0x%lx\n",
651 		    ex->ex_name, size);
652 		panic("extent_alloc_subregion: bad size");
653 	}
654 	if (alignment == 0)
655 		panic("extent_alloc_subregion: bad alignment");
656 	if (boundary && (boundary < size)) {
657 		printf(
658 		    "extent_alloc_subregion: extent `%s', size 0x%lx, "
659 		    "boundary 0x%lx\n", ex->ex_name, size, boundary);
660 		panic("extent_alloc_subregion: bad boundary");
661 	}
662 #endif
663 #ifdef LOCKDEBUG
664 	if (flags & EX_WAITSPACE) {
665 		ASSERT_SLEEPABLE();
666 	}
667 #endif
668 
669 	/*
670 	 * Allocate the region descriptor.  It will be freed later
671 	 * if we can coalesce with another region.  Don't lock before
672 	 * here!  This could block.
673 	 */
674 	myrp = extent_alloc_region_descriptor(ex, flags);
675 	if (myrp == NULL) {
676 #ifdef DIAGNOSTIC
677 		printf(
678 		 "extent_alloc_subregion: can't allocate region descriptor\n");
679 #endif
680 		return (ENOMEM);
681 	}
682 
683  alloc_start:
684 	mutex_enter(&ex->ex_lock);
685 
686 	/*
687 	 * Keep a pointer to the last region we looked at so
688 	 * that we don't have to traverse the list again when
689 	 * we insert ourselves.  If "last" is NULL when we
690 	 * finally insert ourselves, we go at the head of the
691 	 * list.  See extent_insert_and_optimize() for deatails.
692 	 */
693 	last = NULL;
694 
695 	/*
696 	 * Keep track of size and location of the smallest
697 	 * chunk we fit in.
698 	 *
699 	 * Since the extent can be as large as the numeric range
700 	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
701 	 * best overhead value can be the maximum unsigned integer.
702 	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
703 	 * into the region list immediately on an exact match (which
704 	 * is the only case where "bestovh" would be set to 0).
705 	 */
706 	bestovh = 0;
707 	beststart = 0;
708 	bestlast = NULL;
709 
710 	/*
711 	 * Keep track of end of free region.  This is either the end of extent
712 	 * or the start of a region past the subend.
713 	 */
714 	exend = ex->ex_end;
715 
716 	/*
717 	 * For N allocated regions, we must make (N + 1)
718 	 * checks for unallocated space.  The first chunk we
719 	 * check is the area from the beginning of the subregion
720 	 * to the first allocated region after that point.
721 	 */
722 	newstart = EXTENT_ALIGN(substart, alignment, skew);
723 	if (newstart < ex->ex_start) {
724 #ifdef DIAGNOSTIC
725 		printf(
726       "extent_alloc_subregion: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
727 		 ex->ex_name, ex->ex_start, ex->ex_end, alignment);
728 		mutex_exit(&ex->ex_lock);
729 		panic("extent_alloc_subregion: overflow after alignment");
730 #else
731 		extent_free_region_descriptor(ex, myrp);
732 		mutex_exit(&ex->ex_lock);
733 		return (EINVAL);
734 #endif
735 	}
736 
737 	/*
738 	 * Find the first allocated region that begins on or after
739 	 * the subregion start, advancing the "last" pointer along
740 	 * the way.
741 	 */
742 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
743 		if (rp->er_start >= newstart)
744 			break;
745 		last = rp;
746 	}
747 
748 	/*
749 	 * Relocate the start of our candidate region to the end of
750 	 * the last allocated region (if there was one overlapping
751 	 * our subrange).
752 	 */
753 	if (last != NULL && last->er_end >= newstart)
754 		newstart = EXTENT_ALIGN((last->er_end + 1), alignment, skew);
755 
756 	for (; rp != NULL; rp = LIST_NEXT(rp, er_link)) {
757 		/*
758 		 * If the region pasts the subend, bail out and see
759 		 * if we fit against the subend.
760 		 */
761 		if (rp->er_start > subend) {
762 			exend = rp->er_start;
763 			break;
764 		}
765 
766 		/*
767 		 * Check the chunk before "rp".  Note that our
768 		 * comparison is safe from overflow conditions.
769 		 */
770 		if (LE_OV(newstart, size, rp->er_start)) {
771 			/*
772 			 * Do a boundary check, if necessary.  Note
773 			 * that a region may *begin* on the boundary,
774 			 * but it must end before the boundary.
775 			 */
776 			if (boundary) {
777 				newend = newstart + (size - 1);
778 
779 				/*
780 				 * Calculate the next boundary after the start
781 				 * of this region.
782 				 */
783 				dontcross = EXTENT_ALIGN(newstart+1, boundary,
784 				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
785 				    - 1;
786 
787 #if 0
788 				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
789 				    newstart, newend, ex->ex_start, ex->ex_end,
790 				    boundary, dontcross);
791 #endif
792 
793 				/* Check for overflow */
794 				if (dontcross < ex->ex_start)
795 					dontcross = ex->ex_end;
796 				else if (newend > dontcross) {
797 					/*
798 					 * Candidate region crosses boundary.
799 					 * Throw away the leading part and see
800 					 * if we still fit.
801 					 */
802 					newstart = dontcross + 1;
803 					newend = newstart + (size - 1);
804 					dontcross += boundary;
805 					if (!LE_OV(newstart, size, rp->er_start))
806 						goto skip;
807 				}
808 
809 				/*
810 				 * If we run past the end of
811 				 * the extent or the boundary
812 				 * overflows, then the request
813 				 * can't fit.
814 				 */
815 				if (newstart + size - 1 > ex->ex_end ||
816 				    dontcross < newstart)
817 					goto fail;
818 			}
819 
820 			/*
821 			 * We would fit into this space.  Calculate
822 			 * the overhead (wasted space).  If we exactly
823 			 * fit, or we're taking the first fit, insert
824 			 * ourselves into the region list.
825 			 */
826 			ovh = rp->er_start - newstart - size;
827 			if ((flags & EX_FAST) || (ovh == 0))
828 				goto found;
829 
830 			/*
831 			 * Don't exactly fit, but check to see
832 			 * if we're better than any current choice.
833 			 */
834 			if ((bestovh == 0) || (ovh < bestovh)) {
835 				bestovh = ovh;
836 				beststart = newstart;
837 				bestlast = last;
838 			}
839 		}
840 
841 skip:
842 		/*
843 		 * Skip past the current region and check again.
844 		 */
845 		newstart = EXTENT_ALIGN((rp->er_end + 1), alignment, skew);
846 		if (newstart < rp->er_end) {
847 			/*
848 			 * Overflow condition.  Don't error out, since
849 			 * we might have a chunk of space that we can
850 			 * use.
851 			 */
852 			goto fail;
853 		}
854 
855 		last = rp;
856 	}
857 
858 	/*
859 	 * The final check is from the current starting point to the
860 	 * end of the subregion.  If there were no allocated regions,
861 	 * "newstart" is set to the beginning of the subregion, or
862 	 * just past the end of the last allocated region, adjusted
863 	 * for alignment in either case.
864 	 */
865 	if (LE_OV(newstart, (size - 1), subend)) {
866 		/*
867 		 * Do a boundary check, if necessary.  Note
868 		 * that a region may *begin* on the boundary,
869 		 * but it must end before the boundary.
870 		 */
871 		if (boundary) {
872 			newend = newstart + (size - 1);
873 
874 			/*
875 			 * Calculate the next boundary after the start
876 			 * of this region.
877 			 */
878 			dontcross = EXTENT_ALIGN(newstart+1, boundary,
879 			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
880 			    - 1;
881 
882 #if 0
883 			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
884 			    newstart, newend, ex->ex_start, ex->ex_end,
885 			    boundary, dontcross);
886 #endif
887 
888 			/* Check for overflow */
889 			if (dontcross < ex->ex_start)
890 				dontcross = ex->ex_end;
891 			else if (newend > dontcross) {
892 				/*
893 				 * Candidate region crosses boundary.
894 				 * Throw away the leading part and see
895 				 * if we still fit.
896 				 */
897 				newstart = dontcross + 1;
898 				newend = newstart + (size - 1);
899 				dontcross += boundary;
900 				if (!LE_OV(newstart, (size - 1), subend))
901 					goto fail;
902 			}
903 
904 			/*
905 			 * If we run past the end of
906 			 * the extent or the boundary
907 			 * overflows, then the request
908 			 * can't fit.
909 			 */
910 			if (newstart + size - 1 > ex->ex_end ||
911 			    dontcross < newstart)
912 				goto fail;
913 		}
914 
915 		/*
916 		 * We would fit into this space.  Calculate
917 		 * the overhead (wasted space).  If we exactly
918 		 * fit, or we're taking the first fit, insert
919 		 * ourselves into the region list.
920 		 */
921 		ovh = exend - newstart - (size - 1);
922 		if ((flags & EX_FAST) || (ovh == 0))
923 			goto found;
924 
925 		/*
926 		 * Don't exactly fit, but check to see
927 		 * if we're better than any current choice.
928 		 */
929 		if ((bestovh == 0) || (ovh < bestovh)) {
930 			bestovh = ovh;
931 			beststart = newstart;
932 			bestlast = last;
933 		}
934 	}
935 
936  fail:
937 	/*
938 	 * One of the following two conditions have
939 	 * occurred:
940 	 *
941 	 *	There is no chunk large enough to hold the request.
942 	 *
943 	 *	If EX_FAST was not specified, there is not an
944 	 *	exact match for the request.
945 	 *
946 	 * Note that if we reach this point and EX_FAST is
947 	 * set, then we know there is no space in the extent for
948 	 * the request.
949 	 */
950 	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
951 		/*
952 		 * We have a match that's "good enough".
953 		 */
954 		newstart = beststart;
955 		last = bestlast;
956 		goto found;
957 	}
958 
959 	/*
960 	 * No space currently available.  Wait for it to free up,
961 	 * if possible.
962 	 */
963 	if (flags & EX_WAITSPACE) {
964 		if ((flags & EX_CATCH) != 0) {
965 			error = cv_wait_sig(&ex->ex_cv, &ex->ex_lock);
966 		} else {
967 			cv_wait(&ex->ex_cv, &ex->ex_lock);
968 			error = 0;
969 		}
970 		if (error == 0)
971 			goto alloc_start;
972 		mutex_exit(&ex->ex_lock);
973 	} else {
974 		mutex_exit(&ex->ex_lock);
975 		error = EAGAIN;
976 	}
977 
978 	extent_free_region_descriptor(ex, myrp);
979 	return error;
980 
981  found:
982 	/*
983 	 * Insert ourselves into the region list.
984 	 */
985 	extent_insert_and_optimize(ex, newstart, size, flags, last, myrp);
986 	mutex_exit(&ex->ex_lock);
987 	*result = newstart;
988 	return (0);
989 }
990 
991 int
992 extent_alloc_subregion(struct extent *ex, u_long start, u_long end, u_long size,
993     u_long alignment, u_long boundary, int flags, u_long *result)
994 {
995 
996 	return (extent_alloc_subregion1(ex, start, end, size, alignment,
997 					0, boundary, flags, result));
998 }
999 
1000 int
1001 extent_alloc(struct extent *ex, u_long size, u_long alignment, u_long boundary,
1002     int flags, u_long *result)
1003 {
1004 
1005 	return (extent_alloc_subregion1(ex, ex->ex_start, ex->ex_end,
1006 					size, alignment, 0, boundary,
1007 					flags, result));
1008 }
1009 
1010 int
1011 extent_alloc1(struct extent *ex, u_long size, u_long alignment, u_long skew,
1012     u_long boundary, int flags, u_long *result)
1013 {
1014 
1015 	return (extent_alloc_subregion1(ex, ex->ex_start, ex->ex_end,
1016 					size, alignment, skew, boundary,
1017 					flags, result));
1018 }
1019 
1020 int
1021 extent_free(struct extent *ex, u_long start, u_long size, int flags)
1022 {
1023 	struct extent_region *rp, *nrp = NULL;
1024 	u_long end = start + (size - 1);
1025 	int coalesce;
1026 
1027 #ifdef DIAGNOSTIC
1028 	/*
1029 	 * Check arguments.
1030 	 *
1031 	 * We don't lock to check these, because these values
1032 	 * are never modified, and if another thread deletes the
1033 	 * extent, we're screwed anyway.
1034 	 */
1035 	if (ex == NULL)
1036 		panic("extent_free: NULL extent");
1037 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
1038 		extent_print(ex);
1039 		printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
1040 		    ex->ex_name, start, size);
1041 		panic("extent_free: extent `%s', region not within extent",
1042 		    ex->ex_name);
1043 	}
1044 	/* Check for an overflow. */
1045 	if (end < start) {
1046 		extent_print(ex);
1047 		printf("extent_free: extent `%s', start 0x%lx, size 0x%lx\n",
1048 		    ex->ex_name, start, size);
1049 		panic("extent_free: overflow");
1050 	}
1051 #endif
1052 
1053 	/*
1054 	 * If we're allowing coalescing, we must allocate a region
1055 	 * descriptor now, since it might block.
1056 	 *
1057 	 * XXX Make a static, create-time flags word, so we don't
1058 	 * XXX have to lock to read it!
1059 	 */
1060 	mutex_enter(&ex->ex_lock);
1061 	coalesce = (ex->ex_flags & EXF_NOCOALESCE) == 0;
1062 	mutex_exit(&ex->ex_lock);
1063 
1064 	if (coalesce) {
1065 		/* Allocate a region descriptor. */
1066 		nrp = extent_alloc_region_descriptor(ex, flags);
1067 		if (nrp == NULL)
1068 			return (ENOMEM);
1069 	}
1070 
1071 	mutex_enter(&ex->ex_lock);
1072 
1073 	/*
1074 	 * Find region and deallocate.  Several possibilities:
1075 	 *
1076 	 *	1. (start == er_start) && (end == er_end):
1077 	 *	   Free descriptor.
1078 	 *
1079 	 *	2. (start == er_start) && (end < er_end):
1080 	 *	   Adjust er_start.
1081 	 *
1082 	 *	3. (start > er_start) && (end == er_end):
1083 	 *	   Adjust er_end.
1084 	 *
1085 	 *	4. (start > er_start) && (end < er_end):
1086 	 *	   Fragment region.  Requires descriptor alloc.
1087 	 *
1088 	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
1089 	 * is not set.
1090 	 */
1091 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
1092 		/*
1093 		 * Save ourselves some comparisons; does the current
1094 		 * region end before chunk to be freed begins?  If so,
1095 		 * then we haven't found the appropriate region descriptor.
1096 		 */
1097 		if (rp->er_end < start)
1098 			continue;
1099 
1100 		/*
1101 		 * Save ourselves some traversal; does the current
1102 		 * region begin after the chunk to be freed ends?  If so,
1103 		 * then we've already passed any possible region descriptors
1104 		 * that might have contained the chunk to be freed.
1105 		 */
1106 		if (rp->er_start > end)
1107 			break;
1108 
1109 		/* Case 1. */
1110 		if ((start == rp->er_start) && (end == rp->er_end)) {
1111 			LIST_REMOVE(rp, er_link);
1112 			extent_free_region_descriptor(ex, rp);
1113 			goto done;
1114 		}
1115 
1116 		/*
1117 		 * The following cases all require that EXF_NOCOALESCE
1118 		 * is not set.
1119 		 */
1120 		if (!coalesce)
1121 			continue;
1122 
1123 		/* Case 2. */
1124 		if ((start == rp->er_start) && (end < rp->er_end)) {
1125 			rp->er_start = (end + 1);
1126 			goto done;
1127 		}
1128 
1129 		/* Case 3. */
1130 		if ((start > rp->er_start) && (end == rp->er_end)) {
1131 			rp->er_end = (start - 1);
1132 			goto done;
1133 		}
1134 
1135 		/* Case 4. */
1136 		if ((start > rp->er_start) && (end < rp->er_end)) {
1137 			/* Fill in new descriptor. */
1138 			nrp->er_start = end + 1;
1139 			nrp->er_end = rp->er_end;
1140 
1141 			/* Adjust current descriptor. */
1142 			rp->er_end = start - 1;
1143 
1144 			/* Insert new descriptor after current. */
1145 			LIST_INSERT_AFTER(rp, nrp, er_link);
1146 
1147 			/* We used the new descriptor, so don't free it below */
1148 			nrp = NULL;
1149 			goto done;
1150 		}
1151 	}
1152 
1153 	/* Region not found, or request otherwise invalid. */
1154 	mutex_exit(&ex->ex_lock);
1155 	extent_print(ex);
1156 	printf("extent_free: start 0x%lx, end 0x%lx\n", start, end);
1157 	panic("extent_free: region not found");
1158 
1159  done:
1160 	if (nrp != NULL)
1161 		extent_free_region_descriptor(ex, nrp);
1162 	cv_broadcast(&ex->ex_cv);
1163 	mutex_exit(&ex->ex_lock);
1164 	return (0);
1165 }
1166 
1167 void
1168 extent_print(struct extent *ex)
1169 {
1170 	struct extent_region *rp;
1171 
1172 	if (ex == NULL)
1173 		panic("extent_print: NULL extent");
1174 
1175 	mutex_enter(&ex->ex_lock);
1176 
1177 	printf("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1178 	    ex->ex_start, ex->ex_end, ex->ex_flags);
1179 
1180 	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1181 		printf("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1182 
1183 	mutex_exit(&ex->ex_lock);
1184 }
1185